diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index c614d3f0e..000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,892 +0,0 @@ -version: 2.1 - -orbs: - win: circleci/windows@5.0.0 - -commands: - install-cmake-on-macos: - steps: - - run: - name: Install cmake on macos - command: | - HOMEBREW_NO_AUTO_UPDATE=1 brew install cmake - - install-jdk8-on-macos: - steps: - - run: - name: Install JDK 8 on macos - command: | - brew install --cask adoptopenjdk/openjdk/adoptopenjdk8 - - increase-max-open-files-on-macos: - steps: - - run: - name: Increase max open files - command: | - sudo sysctl -w kern.maxfiles=1048576 - sudo sysctl -w kern.maxfilesperproc=1048576 - sudo launchctl limit maxfiles 1048576 - - pre-steps: - steps: - - checkout - - run: - name: Setup Environment Variables - command: | - echo "export GTEST_THROW_ON_FAILURE=0" >> $BASH_ENV - echo "export GTEST_OUTPUT=\"xml:/tmp/test-results/\"" >> $BASH_ENV - echo "export SKIP_FORMAT_BUCK_CHECKS=1" >> $BASH_ENV - echo "export GTEST_COLOR=1" >> $BASH_ENV - echo "export CTEST_OUTPUT_ON_FAILURE=1" >> $BASH_ENV - echo "export CTEST_TEST_TIMEOUT=300" >> $BASH_ENV - echo "export ZLIB_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zlib" >> $BASH_ENV - echo "export BZIP2_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/bzip2" >> $BASH_ENV - echo "export SNAPPY_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/snappy" >> $BASH_ENV - echo "export LZ4_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/lz4" >> $BASH_ENV - echo "export ZSTD_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zstd" >> $BASH_ENV - - windows-build-steps: - steps: - - checkout - - run: - name: "Install thirdparty dependencies" - command: | - echo "Installing CMake..." - choco install cmake --installargs 'ADD_CMAKE_TO_PATH=System' -y - mkdir $Env:THIRDPARTY_HOME - cd $Env:THIRDPARTY_HOME - echo "Building Snappy dependency..." - curl https://github.com/google/snappy/archive/refs/tags/1.1.8.zip -O snappy-1.1.8.zip - unzip -q snappy-1.1.8.zip - cd snappy-1.1.8 - mkdir build - cd build - & $Env:CMAKE_BIN -G "$Env:CMAKE_GENERATOR" .. - msbuild.exe Snappy.sln -maxCpuCount -property:Configuration=Debug -property:Platform=x64 - - run: - name: "Build RocksDB" - command: | - mkdir build - cd build - & $Env:CMAKE_BIN -G "$Env:CMAKE_GENERATOR" -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE=1 -DSNAPPY=1 -DJNI=1 .. - cd .. - echo "Building with VS version: $Env:CMAKE_GENERATOR" - msbuild.exe build/rocksdb.sln -maxCpuCount -property:Configuration=Debug -property:Platform=x64 - - run: - name: "Test RocksDB" - shell: powershell.exe - command: | - build_tools\run_ci_db_test.ps1 -SuiteRun arena_test,db_basic_test,db_test,db_test2,db_merge_operand_test,bloom_test,c_test,coding_test,crc32c_test,dynamic_bloom_test,env_basic_test,env_test,hash_test,random_test -Concurrency 16 - pre-steps-macos: - steps: - - pre-steps - - post-steps: - steps: - - store_test_results: # store test result if there's any - path: /tmp/test-results - - store_artifacts: # store LOG for debugging if there's any - path: LOG - - run: # on fail, compress Test Logs for diagnosing the issue - name: Compress Test Logs - command: tar -cvzf t.tar.gz t - when: on_fail - - store_artifacts: # on fail, store Test Logs for diagnosing the issue - path: t.tar.gz - destination: test_logs - when: on_fail - - run: # store core dumps if there's any - command: | - mkdir -p /tmp/core_dumps - cp core.* /tmp/core_dumps - when: on_fail - - store_artifacts: - path: /tmp/core_dumps - when: on_fail - - upgrade-cmake: - steps: - - run: - name: Upgrade cmake - command: | - sudo apt remove --purge cmake - sudo snap install cmake --classic - - install-gflags: - steps: - - run: - name: Install gflags - command: | - sudo apt-get update -y && sudo apt-get install -y libgflags-dev - - install-gflags-on-macos: - steps: - - run: - name: Install gflags on macos - command: | - HOMEBREW_NO_AUTO_UPDATE=1 brew install gflags - - setup-folly: - steps: - - run: - name: Checkout folly sources - command: | - make checkout_folly - - build-folly: - steps: - - run: - name: Build folly and dependencies - command: | - make build_folly - - build-for-benchmarks: - steps: - - pre-steps - - run: - name: "Linux build for benchmarks" - command: #sized for the resource-class rocksdb-benchmark-sys1 - make V=1 J=8 -j8 release - - perform-benchmarks: - steps: - - run: - name: "Test low-variance benchmarks" - command: ./tools/benchmark_ci.py --db_dir /tmp/rocksdb-benchmark-datadir --output_dir /tmp/benchmark-results --num_keys 20000000 - environment: - LD_LIBRARY_PATH: /usr/local/lib - # How long to run parts of the test(s) - DURATION_RO: 300 - DURATION_RW: 500 - # Keep threads within physical capacity of server (much lower than default) - NUM_THREADS: 1 - MAX_BACKGROUND_JOBS: 4 - # Don't run a couple of "optional" initial tests - CI_TESTS_ONLY: "true" - # Reduce configured size of levels to ensure more levels in the leveled compaction LSM tree - WRITE_BUFFER_SIZE_MB: 16 - TARGET_FILE_SIZE_BASE_MB: 16 - MAX_BYTES_FOR_LEVEL_BASE_MB: 64 - # The benchmark host has 32GB memory - # The following values are tailored to work with that - # Note, tests may not exercise the targeted issues if the memory is increased on new test hosts. - COMPRESSION_TYPE: "none" - CACHE_INDEX_AND_FILTER_BLOCKS: 1 - MIN_LEVEL_TO_COMPRESS: 3 - CACHE_SIZE_MB: 10240 - MB_WRITE_PER_SEC: 2 - - post-benchmarks: - steps: - - store_artifacts: # store the benchmark output - path: /tmp/benchmark-results - destination: test_logs - - run: - name: Send benchmark report to visualisation - command: | - set +e - set +o pipefail - ./build_tools/benchmark_log_tool.py --tsvfile /tmp/benchmark-results/report.tsv --esdocument https://search-rocksdb-bench-k2izhptfeap2hjfxteolsgsynm.us-west-2.es.amazonaws.com/bench_test3_rix/_doc - true - -executors: - linux-docker: - docker: - # The image configuration is build_tools/ubuntu20_image/Dockerfile - # To update and build the image: - # $ cd build_tools/ubuntu20_image - # $ docker build -t zjay437/rocksdb:0.5 . - # $ docker push zjay437/rocksdb:0.5 - # `zjay437` is the account name for zjay@meta.com which readwrite token is shared internally. To login: - # $ docker login --username zjay437 - # Or please feel free to change it to your docker hub account for hosting the image, meta employee should already have the account and able to login with SSO. - # To avoid impacting the existing CI runs, please bump the version every time creating a new image - # to run the CI image environment locally: - # $ docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -it zjay437/rocksdb:0.5 bash - # option `--cap-add=SYS_PTRACE --security-opt seccomp=unconfined` is used to enable gdb to attach an existing process - - image: zjay437/rocksdb:0.6 - -jobs: - build-macos: - macos: - xcode: 12.5.1 - resource_class: large - environment: - ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc cause env_test hang, disable it for now - steps: - - increase-max-open-files-on-macos - - install-gflags-on-macos - - pre-steps-macos - - run: ulimit -S -n `ulimit -H -n` && OPT=-DCIRCLECI make V=1 J=32 -j32 all - - post-steps - - build-macos-cmake: - macos: - xcode: 12.5.1 - resource_class: large - parameters: - run_even_tests: - description: run even or odd tests, used to split tests to 2 groups - type: boolean - default: true - steps: - - increase-max-open-files-on-macos - - install-cmake-on-macos - - install-gflags-on-macos - - pre-steps-macos - - run: - name: "cmake generate project file" - command: ulimit -S -n `ulimit -H -n` && mkdir build && cd build && cmake -DWITH_GFLAGS=1 .. - - run: - name: "Build tests" - command: cd build && make V=1 -j32 - - when: - condition: << parameters.run_even_tests >> - steps: - - run: - name: "Run even tests" - command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 0,,2 - - when: - condition: - not: << parameters.run_even_tests >> - steps: - - run: - name: "Run odd tests" - command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 1,,2 - - post-steps - - build-linux: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: make V=1 J=32 -j32 check - - post-steps - - build-linux-encrypted_env-no_compression: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: ENCRYPTED_ENV=1 ROCKSDB_DISABLE_SNAPPY=1 ROCKSDB_DISABLE_ZLIB=1 ROCKSDB_DISABLE_BZIP=1 ROCKSDB_DISABLE_LZ4=1 ROCKSDB_DISABLE_ZSTD=1 make V=1 J=32 -j32 check - - run: | - ./sst_dump --help | grep -E -q 'Supported compression types: kNoCompression$' # Verify no compiled in compression - - post-steps - - build-linux-static_lib-alt_namespace-status_checked: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 ROCKSDB_MODIFY_NPHASH=1 LIB_MODE=static OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" make V=1 -j24 check - - post-steps - - build-linux-release: - executor: linux-docker - resource_class: 2xlarge - steps: - - checkout # check out the code in the project directory - - run: make V=1 -j32 LIB_MODE=shared release - - run: ls librocksdb.so # ensure shared lib built - - run: ./db_stress --version # ensure with gflags - - run: make clean - - run: make V=1 -j32 release - - run: ls librocksdb.a # ensure static lib built - - run: ./db_stress --version # ensure with gflags - - run: make clean - - run: apt-get remove -y libgflags-dev - - run: make V=1 -j32 LIB_MODE=shared release - - run: ls librocksdb.so # ensure shared lib built - - run: if ./db_stress --version; then false; else true; fi # ensure without gflags - - run: make clean - - run: make V=1 -j32 release - - run: ls librocksdb.a # ensure static lib built - - run: if ./db_stress --version; then false; else true; fi # ensure without gflags - - post-steps - - build-linux-release-rtti: - executor: linux-docker - resource_class: xlarge - steps: - - checkout # check out the code in the project directory - - run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j16 static_lib tools db_bench - - run: ./db_stress --version # ensure with gflags - - run: make clean - - run: apt-get remove -y libgflags-dev - - run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j16 static_lib tools db_bench - - run: if ./db_stress --version; then false; else true; fi # ensure without gflags - - build-linux-clang-no_test_run: - executor: linux-docker - resource_class: xlarge - steps: - - checkout # check out the code in the project directory - - run: CC=clang CXX=clang++ USE_CLANG=1 PORTABLE=1 make V=1 -j16 all - - post-steps - - build-linux-clang10-asan: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: COMPILE_WITH_ASAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check # aligned new doesn't work for reason we haven't figured out - - post-steps - - build-linux-clang10-mini-tsan: - executor: linux-docker - resource_class: 2xlarge+ - steps: - - pre-steps - - run: COMPILE_WITH_TSAN=1 CC=clang-13 CXX=clang++-13 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check - - post-steps - - build-linux-clang10-ubsan: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: COMPILE_WITH_UBSAN=1 OPT="-fsanitize-blacklist=.circleci/ubsan_suppression_list.txt" CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 ubsan_check # aligned new doesn't work for reason we haven't figured out - - post-steps - - build-linux-valgrind: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: PORTABLE=1 make V=1 -j32 valgrind_test - - post-steps - - build-linux-clang10-clang-analyze: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 CLANG_ANALYZER="/usr/bin/clang++-10" CLANG_SCAN_BUILD=scan-build-10 USE_CLANG=1 make V=1 -j32 analyze # aligned new doesn't work for reason we haven't figured out. For unknown, reason passing "clang++-10" as CLANG_ANALYZER doesn't work, and we need a full path. - - post-steps - - run: - name: "compress test report" - command: tar -cvzf scan_build_report.tar.gz scan_build_report - when: on_fail - - store_artifacts: - path: scan_build_report.tar.gz - destination: scan_build_report - when: on_fail - - build-linux-runner: - machine: true - resource_class: facebook/rocksdb-benchmark-sys1 - steps: - - pre-steps - - run: - name: "Checked Linux build (Runner)" - command: make V=1 J=8 -j8 check - environment: - LD_LIBRARY_PATH: /usr/local/lib - - post-steps - - build-linux-cmake-with-folly: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - setup-folly - - build-folly - - run: (mkdir build && cd build && cmake -DUSE_FOLLY=1 -DWITH_GFLAGS=1 -DROCKSDB_BUILD_SHARED=0 .. && make V=1 -j20 && ctest -j20) - - post-steps - - build-linux-cmake-with-folly-lite-no-test: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - setup-folly - - run: (mkdir build && cd build && cmake -DUSE_FOLLY_LITE=1 -DWITH_GFLAGS=1 .. && make V=1 -j20) - - post-steps - - build-linux-cmake-with-benchmark: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: mkdir build && cd build && cmake -DWITH_GFLAGS=1 -DWITH_BENCHMARK=1 .. && make V=1 -j20 && ctest -j20 - - post-steps - - build-linux-unity-and-headers: - docker: # executor type - - image: gcc:latest - environment: - EXTRA_CXXFLAGS: -mno-avx512f # Warnings-as-error in avx512fintrin.h, would be used on newer hardware - resource_class: large - steps: - - checkout # check out the code in the project directory - - run: apt-get update -y && apt-get install -y libgflags-dev - - run: - name: "Unity build" - command: make V=1 -j8 unity_test - no_output_timeout: 20m - - run: make V=1 -j8 -k check-headers # could be moved to a different build - - post-steps - - build-linux-gcc-7-with-folly: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - setup-folly - - build-folly - - run: USE_FOLLY=1 LIB_MODE=static CC=gcc-7 CXX=g++-7 V=1 make -j32 check # TODO: LIB_MODE only to work around unresolved linker failures - - post-steps - - build-linux-gcc-7-with-folly-lite-no-test: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - setup-folly - - run: USE_FOLLY_LITE=1 CC=gcc-7 CXX=g++-7 V=1 make -j32 all - - post-steps - - build-linux-gcc-8-no_test_run: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: CC=gcc-8 CXX=g++-8 V=1 make -j32 all - - post-steps - - build-linux-cmake-with-folly-coroutines: - executor: linux-docker - resource_class: 2xlarge - environment: - CC: gcc-10 - CXX: g++-10 - steps: - - pre-steps - - setup-folly - - build-folly - - run: (mkdir build && cd build && cmake -DUSE_COROUTINES=1 -DWITH_GFLAGS=1 -DROCKSDB_BUILD_SHARED=0 .. && make V=1 -j20 && ctest -j20) - - post-steps - - build-linux-gcc-10-cxx20-no_test_run: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: CC=gcc-10 CXX=g++-10 V=1 ROCKSDB_CXX_STANDARD=c++20 make -j32 all - - post-steps - - build-linux-gcc-11-no_test_run: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: LIB_MODE=static CC=gcc-11 CXX=g++-11 V=1 make -j32 all microbench # TODO: LIB_MODE only to work around unresolved linker failures - - post-steps - - build-linux-clang-13-no_test_run: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j32 all microbench - - post-steps - - # Ensure ASAN+UBSAN with folly, and full testsuite with clang 13 - build-linux-clang-13-asan-ubsan-with-folly: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - setup-folly - - build-folly - - run: CC=clang-13 CXX=clang++-13 LIB_MODE=static USE_CLANG=1 USE_FOLLY=1 COMPILE_WITH_UBSAN=1 COMPILE_WITH_ASAN=1 make -j32 check # TODO: LIB_MODE only to work around unresolved linker failures - - post-steps - - # This job is only to make sure the microbench tests are able to run, the benchmark result is not meaningful as the CI host is changing. - build-linux-run-microbench: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: DEBUG_LEVEL=0 make -j32 run_microbench - - post-steps - - build-linux-mini-crashtest: - executor: linux-docker - resource_class: large - steps: - - pre-steps - - run: ulimit -S -n `ulimit -H -n` && make V=1 -j8 CRASH_TEST_EXT_ARGS='--duration=960 --max_key=2500000 --use_io_uring=0' blackbox_crash_test_with_atomic_flush - - post-steps - - build-linux-crashtest-tiered-storage-bb: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: - name: "run crashtest" - command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS='--duration=10800 --use_io_uring=0' blackbox_crash_test_with_tiered_storage - no_output_timeout: 100m - - post-steps - - build-linux-crashtest-tiered-storage-wb: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: - name: "run crashtest" - command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS='--duration=10800 --use_io_uring=0' whitebox_crash_test_with_tiered_storage - no_output_timeout: 100m - - post-steps - - build-windows-vs2022: - executor: - name: win/server-2022 - size: 2xlarge - environment: - THIRDPARTY_HOME: C:/Users/circleci/thirdparty - CMAKE_HOME: C:/Program Files/CMake - CMAKE_BIN: C:/Program Files/CMake/bin/cmake.exe - SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.8 - SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.8;C:/Users/circleci/thirdparty/snappy-1.1.8/build - SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.8/build/Debug/snappy.lib - CMAKE_GENERATOR: Visual Studio 17 2022 - steps: - - windows-build-steps - - build-windows-vs2019: - executor: - name: win/server-2019 - size: 2xlarge - environment: - THIRDPARTY_HOME: C:/Users/circleci/thirdparty - CMAKE_HOME: C:/Program Files/CMake - CMAKE_BIN: C:/Program Files/CMake/bin/cmake.exe - SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.8 - SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.8;C:/Users/circleci/thirdparty/snappy-1.1.8/build - SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.8/build/Debug/snappy.lib - CMAKE_GENERATOR: Visual Studio 16 2019 - steps: - - windows-build-steps - - build-linux-java: - executor: linux-docker - resource_class: large - steps: - - pre-steps - - run: - name: "Set Java Environment" - command: | - echo "JAVA_HOME=${JAVA_HOME}" - echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV - which java && java -version - which javac && javac -version - - run: - name: "Test RocksDBJava" - command: make V=1 J=8 -j8 jtest - - post-steps - - build-linux-java-static: - executor: linux-docker - resource_class: large - steps: - - pre-steps - - run: - name: "Set Java Environment" - command: | - echo "JAVA_HOME=${JAVA_HOME}" - echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV - which java && java -version - which javac && javac -version - - run: - name: "Build RocksDBJava Static Library" - command: make V=1 J=8 -j8 rocksdbjavastatic - - post-steps - - build-macos-java: - macos: - xcode: 12.5.1 - resource_class: large - environment: - JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home - ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc causes java 8 crash - steps: - - increase-max-open-files-on-macos - - install-gflags-on-macos - - install-jdk8-on-macos - - pre-steps-macos - - run: - name: "Set Java Environment" - command: | - echo "JAVA_HOME=${JAVA_HOME}" - echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV - which java && java -version - which javac && javac -version - - run: - name: "Test RocksDBJava" - command: make V=1 J=16 -j16 jtest - no_output_timeout: 20m - - post-steps - - build-macos-java-static: - macos: - xcode: 12.5.1 - resource_class: large - environment: - JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home - steps: - - increase-max-open-files-on-macos - - install-gflags-on-macos - - install-cmake-on-macos - - install-jdk8-on-macos - - pre-steps-macos - - run: - name: "Set Java Environment" - command: | - echo "JAVA_HOME=${JAVA_HOME}" - echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV - which java && java -version - which javac && javac -version - - run: - name: "Build RocksDBJava x86 and ARM Static Libraries" - command: make V=1 J=16 -j16 rocksdbjavastaticosx - no_output_timeout: 20m - - post-steps - - build-macos-java-static-universal: - macos: - xcode: 12.5.1 - resource_class: large - environment: - JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home - steps: - - increase-max-open-files-on-macos - - install-gflags-on-macos - - install-cmake-on-macos - - install-jdk8-on-macos - - pre-steps-macos - - run: - name: "Set Java Environment" - command: | - echo "JAVA_HOME=${JAVA_HOME}" - echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV - which java && java -version - which javac && javac -version - - run: - name: "Build RocksDBJava Universal Binary Static Library" - command: make V=1 J=16 -j16 rocksdbjavastaticosx_ub - no_output_timeout: 20m - - post-steps - - build-examples: - executor: linux-docker - resource_class: large - steps: - - pre-steps - - run: - name: "Build examples" - command: | - make V=1 -j4 static_lib && cd examples && make V=1 -j4 - - post-steps - - build-cmake-mingw: - executor: linux-docker - resource_class: large - steps: - - pre-steps - - run: update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix - - run: - name: "Build cmake-mingw" - command: | - export PATH=$JAVA_HOME/bin:$PATH - echo "JAVA_HOME=${JAVA_HOME}" - which java && java -version - which javac && javac -version - mkdir build && cd build && cmake -DJNI=1 -DWITH_GFLAGS=OFF .. -DCMAKE_C_COMPILER=x86_64-w64-mingw32-gcc -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-g++ -DCMAKE_SYSTEM_NAME=Windows && make -j4 rocksdb rocksdbjni - - post-steps - - build-linux-non-shm: - executor: linux-docker - resource_class: 2xlarge - environment: - TEST_TMPDIR: /tmp/rocksdb_test_tmp - steps: - - pre-steps - - run: make V=1 -j32 check - - post-steps - - build-linux-arm-test-full: - machine: - image: ubuntu-2004:202111-02 - resource_class: arm.large - steps: - - pre-steps - - install-gflags - - run: make V=1 J=4 -j4 check - - post-steps - - build-linux-arm: - machine: - image: ubuntu-2004:202111-02 - resource_class: arm.large - steps: - - pre-steps - - install-gflags - - run: ROCKSDBTESTS_PLATFORM_DEPENDENT=only make V=1 J=4 -j4 all_but_some_tests check_some - - post-steps - - build-linux-arm-cmake-no_test_run: - machine: - image: ubuntu-2004:202111-02 - resource_class: arm.large - environment: - JAVA_HOME: /usr/lib/jvm/java-8-openjdk-arm64 - steps: - - pre-steps - - install-gflags - - run: - name: "Set Java Environment" - command: | - echo "JAVA_HOME=${JAVA_HOME}" - echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV - which java && java -version - which javac && javac -version - - run: - name: "Build with cmake" - command: | - mkdir build - cd build - cmake -DCMAKE_BUILD_TYPE=Release -DWITH_TESTS=0 -DWITH_GFLAGS=1 -DWITH_BENCHMARK_TOOLS=0 -DWITH_TOOLS=0 -DWITH_CORE_TOOLS=1 .. - make -j4 - - run: - name: "Build Java with cmake" - command: | - rm -rf build - mkdir build - cd build - cmake -DJNI=1 -DCMAKE_BUILD_TYPE=Release -DWITH_GFLAGS=1 .. - make -j4 rocksdb rocksdbjni - - post-steps - - build-format-compatible: - executor: linux-docker - resource_class: 2xlarge - steps: - - pre-steps - - run: - name: "test" - command: | - export TEST_TMPDIR=/dev/shm/rocksdb - rm -rf /dev/shm/rocksdb - mkdir /dev/shm/rocksdb - tools/check_format_compatible.sh - - post-steps - - build-fuzzers: - executor: linux-docker - resource_class: large - steps: - - pre-steps - - run: - name: "Build rocksdb lib" - command: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j4 static_lib - - run: - name: "Build fuzzers" - command: cd fuzz && make sst_file_writer_fuzzer db_fuzzer db_map_fuzzer - - post-steps - - benchmark-linux: #use a private Circle CI runner (resource_class) to run the job - machine: true - resource_class: facebook/rocksdb-benchmark-sys1 - steps: - - build-for-benchmarks - - perform-benchmarks - - post-benchmarks - -workflows: - version: 2 - jobs-linux-run-tests: - jobs: - - build-linux - - build-linux-cmake-with-folly - - build-linux-cmake-with-folly-lite-no-test - - build-linux-gcc-7-with-folly - - build-linux-gcc-7-with-folly-lite-no-test - - build-linux-cmake-with-folly-coroutines - - build-linux-cmake-with-benchmark - - build-linux-encrypted_env-no_compression - jobs-linux-run-tests-san: - jobs: - - build-linux-clang10-asan - - build-linux-clang10-ubsan - - build-linux-clang10-mini-tsan - - build-linux-static_lib-alt_namespace-status_checked - jobs-linux-no-test-run: - jobs: - - build-linux-release - - build-linux-release-rtti - - build-examples - - build-fuzzers - - build-linux-clang-no_test_run - - build-linux-clang-13-no_test_run - - build-linux-gcc-8-no_test_run - - build-linux-gcc-10-cxx20-no_test_run - - build-linux-gcc-11-no_test_run - - build-linux-arm-cmake-no_test_run - jobs-linux-other-checks: - jobs: - - build-linux-clang10-clang-analyze - - build-linux-unity-and-headers - - build-linux-mini-crashtest - jobs-windows: - jobs: - - build-windows-vs2022 - - build-windows-vs2019 - - build-cmake-mingw - jobs-java: - jobs: - - build-linux-java - - build-linux-java-static - - build-macos-java - - build-macos-java-static - - build-macos-java-static-universal - jobs-macos: - jobs: - - build-macos - - build-macos-cmake: - run_even_tests: true - - build-macos-cmake: - run_even_tests: false - jobs-linux-arm: - jobs: - - build-linux-arm - build-fuzzers: - jobs: - - build-fuzzers - benchmark-linux: - triggers: - - schedule: - cron: "0 * * * *" - filters: - branches: - only: - - main - jobs: - - benchmark-linux - nightly: - triggers: - - schedule: - cron: "0 9 * * *" - filters: - branches: - only: - - main - jobs: - - build-format-compatible - - build-linux-arm-test-full - - build-linux-run-microbench - - build-linux-non-shm - - build-linux-clang-13-asan-ubsan-with-folly - - build-linux-valgrind diff --git a/.circleci/ubsan_suppression_list.txt b/.circleci/ubsan_suppression_list.txt deleted file mode 100644 index d7db81806..000000000 --- a/.circleci/ubsan_suppression_list.txt +++ /dev/null @@ -1,6 +0,0 @@ -# Supress UBSAN warnings related to stl_tree.h, e.g. -# UndefinedBehaviorSanitizer: undefined-behavior /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/stl_tree.h:1505:43 in -# /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/stl_tree.h:1505:43: -# runtime error: upcast of address 0x000001fa8820 with insufficient space for an object of type -# 'std::_Rb_tree_node, rocksdb::(anonymous namespace)::LockHoldingInfo> >' -src:*bits/stl_tree.h diff --git a/.github/workflows/sanity_check.yml b/.github/workflows/sanity_check.yml deleted file mode 100644 index 6ee53ce1b..000000000 --- a/.github/workflows/sanity_check.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: Check buck targets and code format -on: [push, pull_request] -permissions: - contents: read - -jobs: - check: - name: Check TARGETS file and code format - runs-on: ubuntu-latest - steps: - - name: Checkout feature branch - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Fetch from upstream - run: | - git remote add upstream https://github.com/facebook/rocksdb.git && git fetch upstream - - - name: Where am I - run: | - echo git status && git status - echo "git remote -v" && git remote -v - echo git branch && git branch - - - name: Setup Python - uses: actions/setup-python@v1 - - - name: Install Dependencies - run: python -m pip install --upgrade pip - - - name: Install argparse - run: pip install argparse - - - name: Download clang-format-diff.py - uses: wei/wget@v1 - with: - args: https://raw.githubusercontent.com/llvm/llvm-project/release/12.x/clang/tools/clang-format/clang-format-diff.py - - - name: Check format - run: VERBOSE_CHECK=1 make check-format - - - name: Compare buckify output - run: make check-buck-targets - - - name: Simple source code checks - run: make check-sources diff --git a/build_tools/amalgamate.py b/build_tools/amalgamate.py deleted file mode 100755 index f79e9075e..000000000 --- a/build_tools/amalgamate.py +++ /dev/null @@ -1,168 +0,0 @@ -#!/usr/bin/python -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -# amalgamate.py creates an amalgamation from a unity build. -# It can be run with either Python 2 or 3. -# An amalgamation consists of a header that includes the contents of all public -# headers and a source file that includes the contents of all source files and -# private headers. -# -# This script works by starting with the unity build file and recursively expanding -# #include directives. If the #include is found in a public include directory, -# that header is expanded into the amalgamation header. -# -# A particular header is only expanded once, so this script will -# break if there are multiple inclusions of the same header that are expected to -# expand differently. Similarly, this type of code causes issues: -# -# #ifdef FOO -# #include "bar.h" -# // code here -# #else -# #include "bar.h" // oops, doesn't get expanded -# // different code here -# #endif -# -# The solution is to move the include out of the #ifdef. - -from __future__ import print_function - -import argparse -import re -import sys -from os import path - -include_re = re.compile('^[ \t]*#include[ \t]+"(.*)"[ \t]*$') -included = set() -excluded = set() - - -def find_header(name, abs_path, include_paths): - samedir = path.join(path.dirname(abs_path), name) - if path.exists(samedir): - return samedir - for include_path in include_paths: - include_path = path.join(include_path, name) - if path.exists(include_path): - return include_path - return None - - -def expand_include( - include_path, - f, - abs_path, - source_out, - header_out, - include_paths, - public_include_paths, -): - if include_path in included: - return False - - included.add(include_path) - with open(include_path) as f: - print('#line 1 "{}"'.format(include_path), file=source_out) - process_file( - f, include_path, source_out, header_out, include_paths, public_include_paths - ) - return True - - -def process_file( - f, abs_path, source_out, header_out, include_paths, public_include_paths -): - for (line, text) in enumerate(f): - m = include_re.match(text) - if m: - filename = m.groups()[0] - # first check private headers - include_path = find_header(filename, abs_path, include_paths) - if include_path: - if include_path in excluded: - source_out.write(text) - expanded = False - else: - expanded = expand_include( - include_path, - f, - abs_path, - source_out, - header_out, - include_paths, - public_include_paths, - ) - else: - # now try public headers - include_path = find_header(filename, abs_path, public_include_paths) - if include_path: - # found public header - expanded = False - if include_path in excluded: - source_out.write(text) - else: - expand_include( - include_path, - f, - abs_path, - header_out, - None, - public_include_paths, - [], - ) - else: - sys.exit( - "unable to find {}, included in {} on line {}".format( - filename, abs_path, line - ) - ) - - if expanded: - print('#line {} "{}"'.format(line + 1, abs_path), file=source_out) - elif text != "#pragma once\n": - source_out.write(text) - - -def main(): - parser = argparse.ArgumentParser( - description="Transform a unity build into an amalgamation" - ) - parser.add_argument("source", help="source file") - parser.add_argument( - "-I", - action="append", - dest="include_paths", - help="include paths for private headers", - ) - parser.add_argument( - "-i", - action="append", - dest="public_include_paths", - help="include paths for public headers", - ) - parser.add_argument( - "-x", action="append", dest="excluded", help="excluded header files" - ) - parser.add_argument("-o", dest="source_out", help="output C++ file", required=True) - parser.add_argument( - "-H", dest="header_out", help="output C++ header file", required=True - ) - args = parser.parse_args() - - include_paths = list(map(path.abspath, args.include_paths or [])) - public_include_paths = list(map(path.abspath, args.public_include_paths or [])) - excluded.update(map(path.abspath, args.excluded or [])) - filename = args.source - abs_path = path.abspath(filename) - with open(filename) as f, open(args.source_out, "w") as source_out, open( - args.header_out, "w" - ) as header_out: - print('#line 1 "{}"'.format(filename), file=source_out) - print('#include "{}"'.format(header_out.name), file=source_out) - process_file( - f, abs_path, source_out, header_out, include_paths, public_include_paths - ) - - -if __name__ == "__main__": - main() diff --git a/build_tools/benchmark_log_tool.py b/build_tools/benchmark_log_tool.py deleted file mode 100755 index d1ad45911..000000000 --- a/build_tools/benchmark_log_tool.py +++ /dev/null @@ -1,238 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -# This source code is licensed under both the GPLv2 (found in the -# COPYING file in the root directory) and Apache 2.0 License -# (found in the LICENSE.Apache file in the root directory). - -"""Access the results of benchmark runs -Send these results on to OpenSearch graphing service -""" - -import argparse -import itertools -import logging -import os -import re -import sys - -import requests -from dateutil import parser - -logging.basicConfig(level=logging.DEBUG) - - -class Configuration: - opensearch_user = os.environ["ES_USER"] - opensearch_pass = os.environ["ES_PASS"] - - -class BenchmarkResultException(Exception): - def __init__(self, message, content): - super().__init__(self, message) - self.content = content - - -class BenchmarkUtils: - - expected_keys = [ - "ops_sec", - "mb_sec", - "lsm_sz", - "blob_sz", - "c_wgb", - "w_amp", - "c_mbps", - "c_wsecs", - "c_csecs", - "b_rgb", - "b_wgb", - "usec_op", - "p50", - "p99", - "p99.9", - "p99.99", - "pmax", - "uptime", - "stall%", - "Nstall", - "u_cpu", - "s_cpu", - "rss", - "test", - "date", - "version", - "job_id", - ] - - def sanity_check(row): - if "test" not in row: - logging.debug(f"not 'test' in row: {row}") - return False - if row["test"] == "": - logging.debug(f"row['test'] == '': {row}") - return False - if "date" not in row: - logging.debug(f"not 'date' in row: {row}") - return False - if "ops_sec" not in row: - logging.debug(f"not 'ops_sec' in row: {row}") - return False - try: - _ = int(row["ops_sec"]) - except (ValueError, TypeError): - logging.debug(f"int(row['ops_sec']): {row}") - return False - try: - (_, _) = parser.parse(row["date"], fuzzy_with_tokens=True) - except (parser.ParserError): - logging.error( - f"parser.parse((row['date']): not a valid format for date in row: {row}" - ) - return False - return True - - def conform_opensearch(row): - (dt, _) = parser.parse(row["date"], fuzzy_with_tokens=True) - # create a test_date field, which was previously what was expected - # repair the date field, which has what can be a WRONG ISO FORMAT, (no leading 0 on single-digit day-of-month) - # e.g. 2022-07-1T00:14:55 should be 2022-07-01T00:14:55 - row["test_date"] = dt.isoformat() - row["date"] = dt.isoformat() - return {key.replace(".", "_"): value for key, value in row.items()} - - -class ResultParser: - def __init__(self, field="(\w|[+-:.%])+", intrafield="(\s)+", separator="\t"): - self.field = re.compile(field) - self.intra = re.compile(intrafield) - self.sep = re.compile(separator) - - def ignore(self, l_in: str): - if len(l_in) == 0: - return True - if l_in[0:1] == "#": - return True - return False - - def line(self, line_in: str): - """Parse a line into items - Being clever about separators - """ - line = line_in - row = [] - while line != "": - match_item = self.field.match(line) - if match_item: - item = match_item.group(0) - row.append(item) - line = line[len(item) :] - else: - match_intra = self.intra.match(line) - if match_intra: - intra = match_intra.group(0) - # Count the separators - # If there are >1 then generate extra blank fields - # White space with no true separators fakes up a single separator - tabbed = self.sep.split(intra) - sep_count = len(tabbed) - 1 - if sep_count == 0: - sep_count = 1 - for _ in range(sep_count - 1): - row.append("") - line = line[len(intra) :] - else: - raise BenchmarkResultException( - "Invalid TSV line", f"{line_in} at {line}" - ) - return row - - def parse(self, lines): - """Parse something that iterates lines""" - rows = [self.line(line) for line in lines if not self.ignore(line)] - header = rows[0] - width = len(header) - records = [ - {k: v for (k, v) in itertools.zip_longest(header, row[:width])} - for row in rows[1:] - ] - return records - - -def load_report_from_tsv(filename: str): - file = open(filename, "r") - contents = file.readlines() - file.close() - parser = ResultParser() - report = parser.parse(contents) - logging.debug(f"Loaded TSV Report: {report}") - return report - - -def push_report_to_opensearch(report, esdocument): - sanitized = [ - BenchmarkUtils.conform_opensearch(row) - for row in report - if BenchmarkUtils.sanity_check(row) - ] - logging.debug( - f"upload {len(sanitized)} sane of {len(report)} benchmarks to opensearch" - ) - for single_benchmark in sanitized: - logging.debug(f"upload benchmark: {single_benchmark}") - response = requests.post( - esdocument, - json=single_benchmark, - auth=(os.environ["ES_USER"], os.environ["ES_PASS"]), - ) - logging.debug( - f"Sent to OpenSearch, status: {response.status_code}, result: {response.text}" - ) - response.raise_for_status() - - -def push_report_to_null(report): - - for row in report: - if BenchmarkUtils.sanity_check(row): - logging.debug(f"row {row}") - conformed = BenchmarkUtils.conform_opensearch(row) - logging.debug(f"conformed row {conformed}") - - -def main(): - """Tool for fetching, parsing and uploading benchmark results to OpenSearch / ElasticSearch - This tool will - - (1) Open a local tsv benchmark report file - (2) Upload to OpenSearch document, via https/JSON - """ - - parser = argparse.ArgumentParser(description="CircleCI benchmark scraper.") - - # --tsvfile is the name of the file to read results from - # --esdocument is the ElasticSearch document to push these results into - # - parser.add_argument( - "--tsvfile", - default="build_tools/circle_api_scraper_input.txt", - help="File from which to read tsv report", - ) - parser.add_argument( - "--esdocument", - help="ElasticSearch/OpenSearch document URL to upload report into", - ) - parser.add_argument( - "--upload", choices=["opensearch", "none"], default="opensearch" - ) - - args = parser.parse_args() - logging.debug(f"Arguments: {args}") - reports = load_report_from_tsv(args.tsvfile) - if args.upload == "opensearch": - push_report_to_opensearch(reports, args.esdocument) - else: - push_report_to_null(reports) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform deleted file mode 100755 index c03d9ae41..000000000 --- a/build_tools/build_detect_platform +++ /dev/null @@ -1,900 +0,0 @@ -#!/usr/bin/env bash -# -# Detects OS we're compiling on and outputs a file specified by the first -# argument, which in turn gets read while processing Makefile. -# -# The output will set the following variables: -# CC C Compiler path -# CXX C++ Compiler path -# PLATFORM_LDFLAGS Linker flags -# JAVA_LDFLAGS Linker flags for RocksDBJava -# JAVA_STATIC_LDFLAGS Linker flags for RocksDBJava static build -# JAVAC_ARGS Arguments for javac -# PLATFORM_SHARED_EXT Extension for shared libraries -# PLATFORM_SHARED_LDFLAGS Flags for building shared library -# PLATFORM_SHARED_CFLAGS Flags for compiling objects for shared library -# PLATFORM_CCFLAGS C compiler flags -# PLATFORM_CXXFLAGS C++ compiler flags. Will contain: -# PLATFORM_SHARED_VERSIONED Set to 'true' if platform supports versioned -# shared libraries, empty otherwise. -# FIND Command for the find utility -# WATCH Command for the watch utility -# -# The PLATFORM_CCFLAGS and PLATFORM_CXXFLAGS might include the following: -# -# -DROCKSDB_PLATFORM_POSIX if posix-platform based -# -DSNAPPY if the Snappy library is present -# -DLZ4 if the LZ4 library is present -# -DZSTD if the ZSTD library is present -# -DNUMA if the NUMA library is present -# -DTBB if the TBB library is present -# -DMEMKIND if the memkind library is present -# -# Using gflags in rocksdb: -# Our project depends on gflags, which requires users to take some extra steps -# before they can compile the whole repository: -# 1. Install gflags. You may download it from here: -# https://gflags.github.io/gflags/ (Mac users can `brew install gflags`) -# 2. Once installed, add the include path for gflags to your CPATH env var and -# the lib path to LIBRARY_PATH. If installed with default settings, the lib -# will be /usr/local/lib and the include path will be /usr/local/include - -OUTPUT=$1 -if test -z "$OUTPUT"; then - echo "usage: $0 " >&2 - exit 1 -fi - -# we depend on C++17, but should be compatible with newer standards -if [ "$ROCKSDB_CXX_STANDARD" ]; then - PLATFORM_CXXFLAGS="-std=$ROCKSDB_CXX_STANDARD" -else - PLATFORM_CXXFLAGS="-std=c++17" -fi - -# we currently depend on POSIX platform -COMMON_FLAGS="-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX" - -# Default to fbcode gcc on internal fb machines -if [ -z "$ROCKSDB_NO_FBCODE" -a -d /mnt/gvfs/third-party ]; then - FBCODE_BUILD="true" - # If we're compiling with TSAN or shared lib, we need pic build - PIC_BUILD=$COMPILE_WITH_TSAN - if [ "$LIB_MODE" == "shared" ]; then - PIC_BUILD=1 - fi - source "$PWD/build_tools/fbcode_config_platform010.sh" -fi - -# Delete existing output, if it exists -rm -f "$OUTPUT" -touch "$OUTPUT" - -if test -z "$CC"; then - if [ -x "$(command -v cc)" ]; then - CC=cc - elif [ -x "$(command -v clang)" ]; then - CC=clang - else - CC=cc - fi -fi - -if test -z "$CXX"; then - if [ -x "$(command -v g++)" ]; then - CXX=g++ - elif [ -x "$(command -v clang++)" ]; then - CXX=clang++ - else - CXX=g++ - fi -fi - -if test -z "$AR"; then - if [ -x "$(command -v gcc-ar)" ]; then - AR=gcc-ar - elif [ -x "$(command -v llvm-ar)" ]; then - AR=llvm-ar - else - AR=ar - fi -fi - -# Detect OS -if test -z "$TARGET_OS"; then - TARGET_OS=`uname -s` -fi - -if test -z "$TARGET_ARCHITECTURE"; then - TARGET_ARCHITECTURE=`uname -m` -fi - -if test -z "$CLANG_SCAN_BUILD"; then - CLANG_SCAN_BUILD=scan-build -fi - -if test -z "$CLANG_ANALYZER"; then - CLANG_ANALYZER=$(command -v clang++ 2> /dev/null) -fi - -if test -z "$FIND"; then - FIND=find -fi - -if test -z "$WATCH"; then - WATCH=watch -fi - -COMMON_FLAGS="$COMMON_FLAGS ${CFLAGS}" -CROSS_COMPILE= -PLATFORM_CCFLAGS= -PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS" -PLATFORM_SHARED_EXT="so" -PLATFORM_SHARED_LDFLAGS="-Wl,--no-as-needed -shared -Wl,-soname -Wl," -PLATFORM_SHARED_CFLAGS="-fPIC" -PLATFORM_SHARED_VERSIONED=true - -# generic port files (working on all platform by #ifdef) go directly in /port -GENERIC_PORT_FILES=`cd "$ROCKSDB_ROOT"; find port -name '*.cc' | tr "\n" " "` - -# On GCC, we pick libc's memcmp over GCC's memcmp via -fno-builtin-memcmp -case "$TARGET_OS" in - Darwin) - PLATFORM=OS_MACOSX - COMMON_FLAGS="$COMMON_FLAGS -DOS_MACOSX" - PLATFORM_SHARED_EXT=dylib - PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name " - # PORT_FILES=port/darwin/darwin_specific.cc - ;; - IOS) - PLATFORM=IOS - COMMON_FLAGS="$COMMON_FLAGS -DOS_MACOSX -DIOS_CROSS_COMPILE " - PLATFORM_SHARED_EXT=dylib - PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name " - CROSS_COMPILE=true - PLATFORM_SHARED_VERSIONED= - ;; - Linux) - PLATFORM=OS_LINUX - COMMON_FLAGS="$COMMON_FLAGS -DOS_LINUX" - if [ -z "$USE_CLANG" ]; then - COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp" - else - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic" - fi - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt -ldl" - if test -z "$ROCKSDB_USE_IO_URING"; then - ROCKSDB_USE_IO_URING=1 - fi - if test "$ROCKSDB_USE_IO_URING" -ne 0; then - # check for liburing - $CXX $PLATFORM_CXXFLAGS -x c++ - -luring -o test.o 2>/dev/null < - int main() { - struct io_uring ring; - io_uring_queue_init(1, &ring, 0); - return 0; - } -EOF - if [ "$?" = 0 ]; then - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -luring" - COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_IOURING_PRESENT" - fi - fi - # PORT_FILES=port/linux/linux_specific.cc - ;; - SunOS) - PLATFORM=OS_SOLARIS - COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_SOLARIS -m64" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt -static-libstdc++ -static-libgcc -m64" - # PORT_FILES=port/sunos/sunos_specific.cc - ;; - AIX) - PLATFORM=OS_AIX - CC=gcc - COMMON_FLAGS="$COMMON_FLAGS -maix64 -pthread -fno-builtin-memcmp -D_REENTRANT -DOS_AIX -D__STDC_FORMAT_MACROS" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -pthread -lpthread -lrt -maix64 -static-libstdc++ -static-libgcc" - # PORT_FILES=port/aix/aix_specific.cc - ;; - FreeBSD) - PLATFORM=OS_FREEBSD - CXX=clang++ - COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_FREEBSD" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread" - # PORT_FILES=port/freebsd/freebsd_specific.cc - ;; - GNU/kFreeBSD) - PLATFORM=OS_GNU_KFREEBSD - COMMON_FLAGS="$COMMON_FLAGS -DOS_GNU_KFREEBSD" - if [ -z "$USE_CLANG" ]; then - COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp" - else - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic" - fi - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt" - # PORT_FILES=port/gnu_kfreebsd/gnu_kfreebsd_specific.cc - ;; - NetBSD) - PLATFORM=OS_NETBSD - COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_NETBSD" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lgcc_s" - # PORT_FILES=port/netbsd/netbsd_specific.cc - ;; - OpenBSD) - PLATFORM=OS_OPENBSD - CXX=clang++ - COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_OPENBSD" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -pthread" - # PORT_FILES=port/openbsd/openbsd_specific.cc - FIND=gfind - WATCH=gnuwatch - ;; - DragonFly) - PLATFORM=OS_DRAGONFLYBSD - COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_DRAGONFLYBSD" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread" - # PORT_FILES=port/dragonfly/dragonfly_specific.cc - ;; - Cygwin) - PLATFORM=CYGWIN - PLATFORM_SHARED_CFLAGS="" - PLATFORM_CXXFLAGS="-std=gnu++11" - COMMON_FLAGS="$COMMON_FLAGS -DCYGWIN" - if [ -z "$USE_CLANG" ]; then - COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp" - else - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic" - fi - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt" - # PORT_FILES=port/linux/linux_specific.cc - ;; - OS_ANDROID_CROSSCOMPILE) - PLATFORM=OS_ANDROID - COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_ANDROID -DROCKSDB_PLATFORM_POSIX" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS " # All pthread features are in the Android C library - # PORT_FILES=port/android/android.cc - CROSS_COMPILE=true - ;; - *) - echo "Unknown platform!" >&2 - exit 1 -esac - -PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS ${CXXFLAGS}" -JAVA_LDFLAGS="$PLATFORM_LDFLAGS" -JAVA_STATIC_LDFLAGS="$PLATFORM_LDFLAGS" -JAVAC_ARGS="-source 8" - -if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then - # Cross-compiling; do not try any compilation tests. - # Also don't need any compilation tests if compiling on fbcode - if [ "$FBCODE_BUILD" = "true" ]; then - # Enable backtrace on fbcode since the necessary libraries are present - COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE" - FOLLY_DIR="third-party/folly" - fi - true -else - if ! test $ROCKSDB_DISABLE_FALLOCATE; then - # Test whether fallocate is available - $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null < - #include - int main() { - int fd = open("/dev/null", 0); - fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 1024); - } -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_FALLOCATE_PRESENT" - fi - fi - - if ! test $ROCKSDB_DISABLE_SNAPPY; then - # Test whether Snappy library is installed - # http://code.google.com/p/snappy/ - $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null < - int main() {} -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DSNAPPY" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lsnappy" - JAVA_LDFLAGS="$JAVA_LDFLAGS -lsnappy" - fi - fi - - if ! test $ROCKSDB_DISABLE_GFLAGS; then - # Test whether gflags library is installed - # http://gflags.github.io/gflags/ - # check if the namespace is gflags - if $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF - #include - using namespace GFLAGS_NAMESPACE; - int main() {} -EOF - then - COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags" - # check if namespace is gflags - elif $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF - #include - using namespace gflags; - int main() {} -EOF - then - COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1 -DGFLAGS_NAMESPACE=gflags" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags" - # check if namespace is google - elif $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF - #include - using namespace google; - int main() {} -EOF - then - COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1 -DGFLAGS_NAMESPACE=google" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags" - fi - fi - - if ! test $ROCKSDB_DISABLE_ZLIB; then - # Test whether zlib library is installed - $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null < - int main() {} -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DZLIB" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lz" - JAVA_LDFLAGS="$JAVA_LDFLAGS -lz" - fi - fi - - if ! test $ROCKSDB_DISABLE_BZIP; then - # Test whether bzip library is installed - $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null < - int main() {} -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DBZIP2" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lbz2" - JAVA_LDFLAGS="$JAVA_LDFLAGS -lbz2" - fi - fi - - if ! test $ROCKSDB_DISABLE_LZ4; then - # Test whether lz4 library is installed - $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null < - #include - int main() {} -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DLZ4" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -llz4" - JAVA_LDFLAGS="$JAVA_LDFLAGS -llz4" - fi - fi - - if ! test $ROCKSDB_DISABLE_ZSTD; then - # Test whether zstd library is installed - $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < - int main() {} -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DZSTD" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lzstd" - JAVA_LDFLAGS="$JAVA_LDFLAGS -lzstd" - fi - fi - - if ! test $ROCKSDB_DISABLE_NUMA; then - # Test whether numa is available - $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o -lnuma 2>/dev/null < - #include - int main() {} -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DNUMA" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lnuma" - JAVA_LDFLAGS="$JAVA_LDFLAGS -lnuma" - fi - fi - - if ! test $ROCKSDB_DISABLE_TBB; then - # Test whether tbb is available - $CXX $PLATFORM_CXXFLAGS $LDFLAGS -x c++ - -o test.o -ltbb 2>/dev/null < - int main() {} -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DTBB" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -ltbb" - JAVA_LDFLAGS="$JAVA_LDFLAGS -ltbb" - fi - fi - - if ! test $ROCKSDB_DISABLE_JEMALLOC; then - # Test whether jemalloc is available - if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o -ljemalloc \ - 2>/dev/null; then - # This will enable some preprocessor identifiers in the Makefile - JEMALLOC=1 - # JEMALLOC can be enabled either using the flag (like here) or by - # providing direct link to the jemalloc library - WITH_JEMALLOC_FLAG=1 - # check for JEMALLOC installed with HomeBrew - if [ "$PLATFORM" == "OS_MACOSX" ]; then - if hash brew 2>/dev/null && brew ls --versions jemalloc > /dev/null; then - JEMALLOC_VER=$(brew ls --versions jemalloc | tail -n 1 | cut -f 2 -d ' ') - JEMALLOC_INCLUDE="-I/usr/local/Cellar/jemalloc/${JEMALLOC_VER}/include" - JEMALLOC_LIB="/usr/local/Cellar/jemalloc/${JEMALLOC_VER}/lib/libjemalloc_pic.a" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS $JEMALLOC_LIB" - JAVA_STATIC_LDFLAGS="$JAVA_STATIC_LDFLAGS $JEMALLOC_LIB" - fi - fi - fi - fi - if ! test $JEMALLOC && ! test $ROCKSDB_DISABLE_TCMALLOC; then - # jemalloc is not available. Let's try tcmalloc - if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o \ - -ltcmalloc 2>/dev/null; then - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -ltcmalloc" - JAVA_LDFLAGS="$JAVA_LDFLAGS -ltcmalloc" - fi - fi - - if ! test $ROCKSDB_DISABLE_MALLOC_USABLE_SIZE; then - # Test whether malloc_usable_size is available - $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null < - int main() { - size_t res = malloc_usable_size(0); - (void)res; - return 0; - } -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_MALLOC_USABLE_SIZE" - fi - fi - - if ! test $ROCKSDB_DISABLE_MEMKIND; then - # Test whether memkind library is installed - $CXX $PLATFORM_CXXFLAGS $LDFLAGS -x c++ - -o test.o -lmemkind 2>/dev/null < - int main() { - memkind_malloc(MEMKIND_DAX_KMEM, 1024); - return 0; - } -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DMEMKIND" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lmemkind" - JAVA_LDFLAGS="$JAVA_LDFLAGS -lmemkind" - fi - fi - - if ! test $ROCKSDB_DISABLE_PTHREAD_MUTEX_ADAPTIVE_NP; then - # Test whether PTHREAD_MUTEX_ADAPTIVE_NP mutex type is available - $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null < - int main() { - int x = PTHREAD_MUTEX_ADAPTIVE_NP; - (void)x; - return 0; - } -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_PTHREAD_ADAPTIVE_MUTEX" - fi - fi - - if ! test $ROCKSDB_DISABLE_BACKTRACE; then - # Test whether backtrace is available - $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null < - int main() { - void* frames[1]; - backtrace_symbols(frames, backtrace(frames, 1)); - return 0; - } -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE" - else - # Test whether execinfo library is installed - $CXX $PLATFORM_CXXFLAGS -lexecinfo -x c++ - -o test.o 2>/dev/null < - int main() { - void* frames[1]; - backtrace_symbols(frames, backtrace(frames, 1)); - } -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lexecinfo" - JAVA_LDFLAGS="$JAVA_LDFLAGS -lexecinfo" - fi - fi - fi - - if ! test $ROCKSDB_DISABLE_PG; then - # Test if -pg is supported - $CXX $PLATFORM_CXXFLAGS -pg -x c++ - -o test.o 2>/dev/null </dev/null < - int main() { - int fd = open("/dev/null", 0); - sync_file_range(fd, 0, 1024, SYNC_FILE_RANGE_WRITE); - } -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_RANGESYNC_PRESENT" - fi - fi - - if ! test $ROCKSDB_DISABLE_SCHED_GETCPU; then - # Test whether sched_getcpu is supported - $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null < - int main() { - int cpuid = sched_getcpu(); - (void)cpuid; - } -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_SCHED_GETCPU_PRESENT" - fi - fi - - if ! test $ROCKSDB_DISABLE_AUXV_GETAUXVAL; then - # Test whether getauxval is supported - $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null < - int main() { - uint64_t auxv = getauxval(AT_HWCAP); - (void)auxv; - } -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_AUXV_GETAUXVAL_PRESENT" - fi - fi - - if ! test $ROCKSDB_DISABLE_ALIGNED_NEW; then - # Test whether c++17 aligned-new is supported - $CXX $PLATFORM_CXXFLAGS -faligned-new -x c++ - -o test.o 2>/dev/null </dev/null < - int main() {} -EOF - if [ "$?" = 0 ]; then - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lbenchmark" - fi - fi - if test $USE_FOLLY; then - # Test whether libfolly library is installed - $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < - int main() {} -EOF - if [ "$?" != 0 ]; then - FOLLY_DIR="./third-party/folly" - fi - fi - -fi - -# TODO(tec): Fix -Wshorten-64-to-32 errors on FreeBSD and enable the warning. -# -Wshorten-64-to-32 breaks compilation on FreeBSD aarch64 and i386 -if ! { [ "$TARGET_OS" = FreeBSD ] && [ "$TARGET_ARCHITECTURE" = arm64 -o "$TARGET_ARCHITECTURE" = i386 ]; }; then - # Test whether -Wshorten-64-to-32 is available - $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o -Wshorten-64-to-32 2>/dev/null </dev/null; then - COMMON_FLAGS="$COMMON_FLAGS -march=native " - else - COMMON_FLAGS="$COMMON_FLAGS -march=z196 " - fi - COMMON_FLAGS="$COMMON_FLAGS" - elif test -n "`echo $TARGET_ARCHITECTURE | grep ^riscv64`"; then - RISC_ISA=$(cat /proc/cpuinfo | grep isa | head -1 | cut --delimiter=: -f 2 | cut -b 2-) - COMMON_FLAGS="$COMMON_FLAGS -march=${RISC_ISA}" - elif [ "$TARGET_OS" == "IOS" ]; then - COMMON_FLAGS="$COMMON_FLAGS" - elif [ "$TARGET_OS" == "AIX" ] || [ "$TARGET_OS" == "SunOS" ]; then - # TODO: Not sure why we don't use -march=native on these OSes - if test "$USE_SSE"; then - TRY_SSE_ETC="1" - fi - else - COMMON_FLAGS="$COMMON_FLAGS -march=native " - fi -else - # PORTABLE=1 - if test "$USE_SSE"; then - TRY_SSE_ETC="1" - fi - - if test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then - COMMON_FLAGS="$COMMON_FLAGS -march=z196 " - fi - - if test -n "`echo $TARGET_ARCHITECTURE | grep ^riscv64`"; then - RISC_ISA=$(cat /proc/cpuinfo | grep isa | head -1 | cut --delimiter=: -f 2 | cut -b 2-) - COMMON_FLAGS="$COMMON_FLAGS -march=${RISC_ISA}" - fi - - if [[ "${PLATFORM}" == "OS_MACOSX" ]]; then - # For portability compile for macOS 10.13 (2017) or newer - COMMON_FLAGS="$COMMON_FLAGS -mmacosx-version-min=10.13" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -mmacosx-version-min=10.13" - # -mmacosx-version-min must come first here. - PLATFORM_SHARED_LDFLAGS="-mmacosx-version-min=10.13 $PLATFORM_SHARED_LDFLAGS" - PLATFORM_CMAKE_FLAGS="-DCMAKE_OSX_DEPLOYMENT_TARGET=10.13" - JAVA_STATIC_DEPS_COMMON_FLAGS="-mmacosx-version-min=10.13" - JAVA_STATIC_DEPS_LDFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS" - JAVA_STATIC_DEPS_CCFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS" - JAVA_STATIC_DEPS_CXXFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS" - fi -fi - -if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then - # check for GNU libc on ppc64 - $CXX -x c++ - -o /dev/null 2>/dev/null < - #include - #include - - int main(int argc, char *argv[]) { - printf("GNU libc version: %s\n", gnu_get_libc_version()); - return 0; - } -EOF - if [ "$?" != 0 ]; then - PPC_LIBC_IS_GNU=0 - fi -fi - -if test "$TRY_SSE_ETC"; then - # The USE_SSE flag now means "attempt to compile with widely-available - # Intel architecture extensions utilized by specific optimizations in the - # source code." It's a qualifier on PORTABLE=1 that means "mostly portable." - # It doesn't even really check that your current CPU is compatible. - # - # SSE4.2 available since nehalem, ca. 2008-2010 - # Includes POPCNT for BitsSetToOne, BitParity - TRY_SSE42="-msse4.2" - # PCLMUL available since westmere, ca. 2010-2011 - TRY_PCLMUL="-mpclmul" - # AVX2 available since haswell, ca. 2013-2015 - TRY_AVX2="-mavx2" - # BMI available since haswell, ca. 2013-2015 - # Primarily for TZCNT for CountTrailingZeroBits - TRY_BMI="-mbmi" - # LZCNT available since haswell, ca. 2013-2015 - # For FloorLog2 - TRY_LZCNT="-mlzcnt" -fi - -$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_SSE42 -x c++ - -o test.o 2>/dev/null < - #include - int main() { - volatile uint32_t x = _mm_crc32_u32(0, 0); - (void)x; - } -EOF -if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS $TRY_SSE42 -DHAVE_SSE42" -elif test "$USE_SSE"; then - echo "warning: USE_SSE specified but compiler could not use SSE intrinsics, disabling" >&2 -fi - -$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_PCLMUL -x c++ - -o test.o 2>/dev/null < - #include - int main() { - const auto a = _mm_set_epi64x(0, 0); - const auto b = _mm_set_epi64x(0, 0); - const auto c = _mm_clmulepi64_si128(a, b, 0x00); - auto d = _mm_cvtsi128_si64(c); - (void)d; - } -EOF -if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS $TRY_PCLMUL -DHAVE_PCLMUL" -elif test "$USE_SSE"; then - echo "warning: USE_SSE specified but compiler could not use PCLMUL intrinsics, disabling" >&2 -fi - -$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_AVX2 -x c++ - -o test.o 2>/dev/null < - #include - int main() { - const auto a = _mm256_setr_epi32(0, 1, 2, 3, 4, 7, 6, 5); - const auto b = _mm256_permutevar8x32_epi32(a, a); - (void)b; - } -EOF -if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS $TRY_AVX2 -DHAVE_AVX2" -elif test "$USE_SSE"; then - echo "warning: USE_SSE specified but compiler could not use AVX2 intrinsics, disabling" >&2 -fi - -$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_BMI -x c++ - -o test.o 2>/dev/null < - #include - int main(int argc, char *argv[]) { - (void)argv; - return (int)_tzcnt_u64((uint64_t)argc); - } -EOF -if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS $TRY_BMI -DHAVE_BMI" -elif test "$USE_SSE"; then - echo "warning: USE_SSE specified but compiler could not use BMI intrinsics, disabling" >&2 -fi - -$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_LZCNT -x c++ - -o test.o 2>/dev/null < - #include - int main(int argc, char *argv[]) { - (void)argv; - return (int)_lzcnt_u64((uint64_t)argc); - } -EOF -if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS $TRY_LZCNT -DHAVE_LZCNT" -elif test "$USE_SSE"; then - echo "warning: USE_SSE specified but compiler could not use LZCNT intrinsics, disabling" >&2 -fi - -$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null < - int main() { - uint64_t a = 0xffffFFFFffffFFFF; - __uint128_t b = __uint128_t(a) * a; - a = static_cast(b >> 64); - (void)a; - } -EOF -if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DHAVE_UINT128_EXTENSION" -fi - -if [ "$FBCODE_BUILD" != "true" -a "$PLATFORM" = OS_LINUX ]; then - $CXX $COMMON_FLAGS $PLATFORM_SHARED_CFLAGS -x c++ -c - -o test_dl.o 2>/dev/null </dev/null - if [ "$?" = 0 ]; then - EXEC_LDFLAGS+="-ldl" - rm -f test_dl.o - fi - fi -fi - -# check for F_FULLFSYNC -$CXX $PLATFORM_CXXFALGS -x c++ - -o test.o 2>/dev/null < - int main() { - fcntl(0, F_FULLFSYNC); - return 0; - } -EOF -if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DHAVE_FULLFSYNC" -fi - -rm -f test.o test_dl.o - -# Get the path for the folly installation dir -if [ "$USE_FOLLY" ]; then - if [ "$FOLLY_DIR" ]; then - FOLLY_PATH=`cd $FOLLY_DIR && $PYTHON build/fbcode_builder/getdeps.py show-inst-dir folly` - fi -fi - -PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS" -PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS" - -VALGRIND_VER="$VALGRIND_VER" - -ROCKSDB_MAJOR=`build_tools/version.sh major` -ROCKSDB_MINOR=`build_tools/version.sh minor` -ROCKSDB_PATCH=`build_tools/version.sh patch` - -echo "CC=$CC" >> "$OUTPUT" -echo "CXX=$CXX" >> "$OUTPUT" -echo "AR=$AR" >> "$OUTPUT" -echo "PLATFORM=$PLATFORM" >> "$OUTPUT" -echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> "$OUTPUT" -echo "PLATFORM_CMAKE_FLAGS=$PLATFORM_CMAKE_FLAGS" >> "$OUTPUT" -echo "JAVA_LDFLAGS=$JAVA_LDFLAGS" >> "$OUTPUT" -echo "JAVA_STATIC_LDFLAGS=$JAVA_STATIC_LDFLAGS" >> "$OUTPUT" -echo "JAVA_STATIC_DEPS_CCFLAGS=$JAVA_STATIC_DEPS_CCFLAGS" >> "$OUTPUT" -echo "JAVA_STATIC_DEPS_CXXFLAGS=$JAVA_STATIC_DEPS_CXXFLAGS" >> "$OUTPUT" -echo "JAVA_STATIC_DEPS_LDFLAGS=$JAVA_STATIC_DEPS_LDFLAGS" >> "$OUTPUT" -echo "JAVAC_ARGS=$JAVAC_ARGS" >> "$OUTPUT" -echo "VALGRIND_VER=$VALGRIND_VER" >> "$OUTPUT" -echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> "$OUTPUT" -echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> "$OUTPUT" -echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> "$OUTPUT" -echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> "$OUTPUT" -echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> "$OUTPUT" -echo "PLATFORM_SHARED_VERSIONED=$PLATFORM_SHARED_VERSIONED" >> "$OUTPUT" -echo "EXEC_LDFLAGS=$EXEC_LDFLAGS" >> "$OUTPUT" -echo "JEMALLOC_INCLUDE=$JEMALLOC_INCLUDE" >> "$OUTPUT" -echo "JEMALLOC_LIB=$JEMALLOC_LIB" >> "$OUTPUT" -echo "ROCKSDB_MAJOR=$ROCKSDB_MAJOR" >> "$OUTPUT" -echo "ROCKSDB_MINOR=$ROCKSDB_MINOR" >> "$OUTPUT" -echo "ROCKSDB_PATCH=$ROCKSDB_PATCH" >> "$OUTPUT" -echo "CLANG_SCAN_BUILD=$CLANG_SCAN_BUILD" >> "$OUTPUT" -echo "CLANG_ANALYZER=$CLANG_ANALYZER" >> "$OUTPUT" -echo "PROFILING_FLAGS=$PROFILING_FLAGS" >> "$OUTPUT" -echo "FIND=$FIND" >> "$OUTPUT" -echo "WATCH=$WATCH" >> "$OUTPUT" -echo "FOLLY_PATH=$FOLLY_PATH" >> "$OUTPUT" - -# This will enable some related identifiers for the preprocessor -if test -n "$JEMALLOC"; then - echo "JEMALLOC=1" >> "$OUTPUT" -fi -# Indicates that jemalloc should be enabled using -ljemalloc flag -# The alternative is to porvide a direct link to the library via JEMALLOC_LIB -# and JEMALLOC_INCLUDE -if test -n "$WITH_JEMALLOC_FLAG"; then - echo "WITH_JEMALLOC_FLAG=$WITH_JEMALLOC_FLAG" >> "$OUTPUT" -fi -echo "LUA_PATH=$LUA_PATH" >> "$OUTPUT" -if test -n "$USE_FOLLY"; then - echo "USE_FOLLY=$USE_FOLLY" >> "$OUTPUT" -fi -if test -n "$PPC_LIBC_IS_GNU"; then - echo "PPC_LIBC_IS_GNU=$PPC_LIBC_IS_GNU" >> "$OUTPUT" -fi diff --git a/build_tools/check-sources.sh b/build_tools/check-sources.sh deleted file mode 100755 index 5672f7b2b..000000000 --- a/build_tools/check-sources.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# Check for some simple mistakes that should prevent commit or push - -BAD="" - -git grep -n 'namespace rocksdb' -- '*.[ch]*' -if [ "$?" != "1" ]; then - echo "^^^^^ Do not hardcode namespace rocksdb. Use ROCKSDB_NAMESPACE" - BAD=1 -fi - -git grep -n -i 'nocommit' -- ':!build_tools/check-sources.sh' -if [ "$?" != "1" ]; then - echo "^^^^^ Code was not intended to be committed" - BAD=1 -fi - -git grep -n 'include :: Failure' - _GTEST_FAIL_PATTERN = re.compile(r"(unknown file|\S+:\d+): Failure$") - - def __init__(self): - self._last_gtest_name = "Unknown test" - - def parse_error(self, line): - gtest_name_match = self._GTEST_NAME_PATTERN.match(line) - if gtest_name_match: - self._last_gtest_name = gtest_name_match.group(1) - return None - gtest_fail_match = self._GTEST_FAIL_PATTERN.match(line) - if gtest_fail_match: - return "%s failed: %s" % (self._last_gtest_name, gtest_fail_match.group(1)) - return None - - -class MatchErrorParser(ErrorParserBase): - """A simple parser that returns the whole line if it matches the pattern.""" - - def __init__(self, pattern): - self._pattern = re.compile(pattern) - - def parse_error(self, line): - if self._pattern.match(line): - return line - return None - - -class CompilerErrorParser(MatchErrorParser): - def __init__(self): - # format (compile error): - # '::: error: ' - # format (link error): - # ':: error: ' - # The below regex catches both - super(CompilerErrorParser, self).__init__(r"\S+:\d+: error:") - - -class ScanBuildErrorParser(MatchErrorParser): - def __init__(self): - super(ScanBuildErrorParser, self).__init__(r"scan-build: \d+ bugs found.$") - - -class DbCrashErrorParser(MatchErrorParser): - def __init__(self): - super(DbCrashErrorParser, self).__init__(r"\*\*\*.*\^$|TEST FAILED.") - - -class WriteStressErrorParser(MatchErrorParser): - def __init__(self): - super(WriteStressErrorParser, self).__init__( - r"ERROR: write_stress died with exitcode=\d+" - ) - - -class AsanErrorParser(MatchErrorParser): - def __init__(self): - super(AsanErrorParser, self).__init__(r"==\d+==ERROR: AddressSanitizer:") - - -class UbsanErrorParser(MatchErrorParser): - def __init__(self): - # format: '::: runtime error: ' - super(UbsanErrorParser, self).__init__(r"\S+:\d+:\d+: runtime error:") - - -class ValgrindErrorParser(MatchErrorParser): - def __init__(self): - # just grab the summary, valgrind doesn't clearly distinguish errors - # from other log messages. - super(ValgrindErrorParser, self).__init__(r"==\d+== ERROR SUMMARY:") - - -class CompatErrorParser(MatchErrorParser): - def __init__(self): - super(CompatErrorParser, self).__init__(r"==== .*[Ee]rror.* ====$") - - -class TsanErrorParser(MatchErrorParser): - def __init__(self): - super(TsanErrorParser, self).__init__(r"WARNING: ThreadSanitizer:") - - -_TEST_NAME_TO_PARSERS = { - "punit": [CompilerErrorParser, GTestErrorParser], - "unit": [CompilerErrorParser, GTestErrorParser], - "release": [CompilerErrorParser, GTestErrorParser], - "unit_481": [CompilerErrorParser, GTestErrorParser], - "release_481": [CompilerErrorParser, GTestErrorParser], - "clang_unit": [CompilerErrorParser, GTestErrorParser], - "clang_release": [CompilerErrorParser, GTestErrorParser], - "clang_analyze": [CompilerErrorParser, ScanBuildErrorParser], - "code_cov": [CompilerErrorParser, GTestErrorParser], - "unity": [CompilerErrorParser, GTestErrorParser], - "lite": [CompilerErrorParser], - "lite_test": [CompilerErrorParser, GTestErrorParser], - "stress_crash": [CompilerErrorParser, DbCrashErrorParser], - "stress_crash_with_atomic_flush": [CompilerErrorParser, DbCrashErrorParser], - "stress_crash_with_txn": [CompilerErrorParser, DbCrashErrorParser], - "write_stress": [CompilerErrorParser, WriteStressErrorParser], - "asan": [CompilerErrorParser, GTestErrorParser, AsanErrorParser], - "asan_crash": [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser], - "asan_crash_with_atomic_flush": [ - CompilerErrorParser, - AsanErrorParser, - DbCrashErrorParser, - ], - "asan_crash_with_txn": [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser], - "ubsan": [CompilerErrorParser, GTestErrorParser, UbsanErrorParser], - "ubsan_crash": [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser], - "ubsan_crash_with_atomic_flush": [ - CompilerErrorParser, - UbsanErrorParser, - DbCrashErrorParser, - ], - "ubsan_crash_with_txn": [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser], - "valgrind": [CompilerErrorParser, GTestErrorParser, ValgrindErrorParser], - "tsan": [CompilerErrorParser, GTestErrorParser, TsanErrorParser], - "format_compatible": [CompilerErrorParser, CompatErrorParser], - "run_format_compatible": [CompilerErrorParser, CompatErrorParser], - "no_compression": [CompilerErrorParser, GTestErrorParser], - "run_no_compression": [CompilerErrorParser, GTestErrorParser], - "regression": [CompilerErrorParser], - "run_regression": [CompilerErrorParser], -} - - -def main(): - if len(sys.argv) != 2: - return "Usage: %s " % sys.argv[0] - test_name = sys.argv[1] - if test_name not in _TEST_NAME_TO_PARSERS: - return "Unknown test name: %s" % test_name - - error_parsers = [] - for parser_cls in _TEST_NAME_TO_PARSERS[test_name]: - error_parsers.append(parser_cls()) - - for line in sys.stdin: - line = line.strip() - for error_parser in error_parsers: - error_msg = error_parser.parse_error(line) - if error_msg is not None: - print(error_msg) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/build_tools/fb_compile_mongo.sh b/build_tools/fb_compile_mongo.sh deleted file mode 100755 index ec733cdf1..000000000 --- a/build_tools/fb_compile_mongo.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/sh -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# fail early -set -e - -if test -z $ROCKSDB_PATH; then - ROCKSDB_PATH=~/rocksdb -fi -source $ROCKSDB_PATH/build_tools/fbcode_config4.8.1.sh - -EXTRA_LDFLAGS="" - -if test -z $ALLOC; then - # default - ALLOC=tcmalloc -elif [[ $ALLOC == "jemalloc" ]]; then - ALLOC=system - EXTRA_LDFLAGS+=" -Wl,--whole-archive $JEMALLOC_LIB -Wl,--no-whole-archive" -fi - -# we need to force mongo to use static library, not shared -STATIC_LIB_DEP_DIR='build/static_library_dependencies' -test -d $STATIC_LIB_DEP_DIR || mkdir $STATIC_LIB_DEP_DIR -test -h $STATIC_LIB_DEP_DIR/`basename $SNAPPY_LIBS` || ln -s $SNAPPY_LIBS $STATIC_LIB_DEP_DIR -test -h $STATIC_LIB_DEP_DIR/`basename $LZ4_LIBS` || ln -s $LZ4_LIBS $STATIC_LIB_DEP_DIR - -EXTRA_LDFLAGS+=" -L $STATIC_LIB_DEP_DIR" - -set -x - -EXTRA_CMD="" -if ! test -e version.json; then - # this is Mongo 3.0 - EXTRA_CMD="--rocksdb \ - --variant-dir=linux2/norm - --cxx=${CXX} \ - --cc=${CC} \ - --use-system-zlib" # add this line back to normal code path - # when https://jira.mongodb.org/browse/SERVER-19123 is resolved -fi - -scons \ - LINKFLAGS="$EXTRA_LDFLAGS $EXEC_LDFLAGS $PLATFORM_LDFLAGS" \ - CCFLAGS="$CXXFLAGS -L $STATIC_LIB_DEP_DIR" \ - LIBS="lz4 gcc stdc++" \ - LIBPATH="$ROCKSDB_PATH" \ - CPPPATH="$ROCKSDB_PATH/include" \ - -j32 \ - --allocator=$ALLOC \ - --nostrip \ - --opt=on \ - --disable-minimum-compiler-version-enforcement \ - --use-system-snappy \ - --disable-warnings-as-errors \ - $EXTRA_CMD $* diff --git a/build_tools/fbcode_config.sh b/build_tools/fbcode_config.sh deleted file mode 100644 index cf3c355b1..000000000 --- a/build_tools/fbcode_config.sh +++ /dev/null @@ -1,175 +0,0 @@ -#!/bin/sh -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# Set environment variables so that we can compile rocksdb using -# fbcode settings. It uses the latest g++ and clang compilers and also -# uses jemalloc -# Environment variables that change the behavior of this script: -# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included - - -BASEDIR=`dirname $BASH_SOURCE` -source "$BASEDIR/dependencies.sh" - -CFLAGS="" - -# libgcc -LIBGCC_INCLUDE="$LIBGCC_BASE/include" -LIBGCC_LIBS=" -L $LIBGCC_BASE/lib" - -# glibc -GLIBC_INCLUDE="$GLIBC_BASE/include" -GLIBC_LIBS=" -L $GLIBC_BASE/lib" - -if ! test $ROCKSDB_DISABLE_SNAPPY; then - # snappy - SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/" - if test -z $PIC_BUILD; then - SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a" - else - SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a" - fi - CFLAGS+=" -DSNAPPY" -fi - -if test -z $PIC_BUILD; then - if ! test $ROCKSDB_DISABLE_ZLIB; then - # location of zlib headers and libraries - ZLIB_INCLUDE=" -I $ZLIB_BASE/include/" - ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a" - CFLAGS+=" -DZLIB" - fi - - if ! test $ROCKSDB_DISABLE_BZIP; then - # location of bzip headers and libraries - BZIP_INCLUDE=" -I $BZIP2_BASE/include/" - BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a" - CFLAGS+=" -DBZIP2" - fi - - if ! test $ROCKSDB_DISABLE_LZ4; then - LZ4_INCLUDE=" -I $LZ4_BASE/include/" - LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a" - CFLAGS+=" -DLZ4" - fi -fi - -if ! test $ROCKSDB_DISABLE_ZSTD; then - ZSTD_INCLUDE=" -I $ZSTD_BASE/include/" - if test -z $PIC_BUILD; then - ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a" - else - ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a" - fi - CFLAGS+=" -DZSTD -DZSTD_STATIC_LINKING_ONLY" -fi - -# location of gflags headers and libraries -GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/" -if test -z $PIC_BUILD; then - GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a" -else - GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a" -fi -CFLAGS+=" -DGFLAGS=gflags" - -# location of jemalloc -JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/" -JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc.a" - -if test -z $PIC_BUILD; then - # location of numa - NUMA_INCLUDE=" -I $NUMA_BASE/include/" - NUMA_LIB=" $NUMA_BASE/lib/libnuma.a" - CFLAGS+=" -DNUMA" - - # location of libunwind - LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a" -fi - -# location of TBB -TBB_INCLUDE=" -isystem $TBB_BASE/include/" -if test -z $PIC_BUILD; then - TBB_LIBS="$TBB_BASE/lib/libtbb.a" -else - TBB_LIBS="$TBB_BASE/lib/libtbb_pic.a" -fi -CFLAGS+=" -DTBB" - -test "$USE_SSE" || USE_SSE=1 -export USE_SSE -test "$PORTABLE" || PORTABLE=1 -export PORTABLE - -BINUTILS="$BINUTILS_BASE/bin" -AR="$BINUTILS/ar" - -DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE" - -STDLIBS="-L $GCC_BASE/lib64" - -CLANG_BIN="$CLANG_BASE/bin" -CLANG_LIB="$CLANG_BASE/lib" -CLANG_SRC="$CLANG_BASE/../../src" - -CLANG_ANALYZER="$CLANG_BIN/clang++" -CLANG_SCAN_BUILD="$CLANG_SRC/llvm/tools/clang/tools/scan-build/bin/scan-build" - -if [ -z "$USE_CLANG" ]; then - # gcc - CC="$GCC_BASE/bin/gcc" - CXX="$GCC_BASE/bin/g++" - AR="$GCC_BASE/bin/gcc-ar" - - CFLAGS+=" -B$BINUTILS/gold" - CFLAGS+=" -isystem $GLIBC_INCLUDE" - CFLAGS+=" -isystem $LIBGCC_INCLUDE" - JEMALLOC=1 -else - # clang - CLANG_INCLUDE="$CLANG_LIB/clang/stable/include" - CC="$CLANG_BIN/clang" - CXX="$CLANG_BIN/clang++" - AR="$CLANG_BIN/llvm-ar" - - KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include" - - CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib" - CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/5.x " - CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/5.x/x86_64-facebook-linux " - CFLAGS+=" -isystem $GLIBC_INCLUDE" - CFLAGS+=" -isystem $LIBGCC_INCLUDE" - CFLAGS+=" -isystem $CLANG_INCLUDE" - CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux " - CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE " - CFLAGS+=" -Wno-expansion-to-defined " - CXXFLAGS="-nostdinc++" -fi - -CFLAGS+=" $DEPS_INCLUDE" -CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DHAVE_SSE42" -CXXFLAGS+=" $CFLAGS" - -EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS" -EXEC_LDFLAGS+=" -B$BINUTILS/gold" -EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-5-glibc-2.23/lib/ld.so" -EXEC_LDFLAGS+=" $LIBUNWIND" -EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-5-glibc-2.23/lib" -# required by libtbb -EXEC_LDFLAGS+=" -ldl" - -PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++" - -EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS" - -VALGRIND_VER="$VALGRIND_BASE/bin/" - -LUA_PATH="$LUA_BASE" - -if test -z $PIC_BUILD; then - LUA_LIB=" $LUA_PATH/lib/liblua.a" -else - LUA_LIB=" $LUA_PATH/lib/liblua_pic.a" -fi - -export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB diff --git a/build_tools/fbcode_config_platform010.sh b/build_tools/fbcode_config_platform010.sh deleted file mode 100644 index babe92c41..000000000 --- a/build_tools/fbcode_config_platform010.sh +++ /dev/null @@ -1,175 +0,0 @@ -#!/bin/sh -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# Set environment variables so that we can compile rocksdb using -# fbcode settings. It uses the latest g++ and clang compilers and also -# uses jemalloc -# Environment variables that change the behavior of this script: -# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included - - -BASEDIR=`dirname $BASH_SOURCE` -source "$BASEDIR/dependencies_platform010.sh" - -# Disallow using libraries from default locations as they might not be compatible with platform010 libraries. -CFLAGS=" --sysroot=/DOES/NOT/EXIST" - -# libgcc -LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/trunk" -LIBGCC_LIBS=" -L $LIBGCC_BASE/lib -B$LIBGCC_BASE/lib/gcc/x86_64-facebook-linux/trunk/" - -# glibc -GLIBC_INCLUDE="$GLIBC_BASE/include" -GLIBC_LIBS=" -L $GLIBC_BASE/lib" -GLIBC_LIBS+=" -B$GLIBC_BASE/lib" - -if test -z $PIC_BUILD; then - MAYBE_PIC= -else - MAYBE_PIC=_pic -fi - -if ! test $ROCKSDB_DISABLE_SNAPPY; then - # snappy - SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/" - SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a" - CFLAGS+=" -DSNAPPY" -fi - -if ! test $ROCKSDB_DISABLE_ZLIB; then - # location of zlib headers and libraries - ZLIB_INCLUDE=" -I $ZLIB_BASE/include/" - ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a" - CFLAGS+=" -DZLIB" -fi - -if ! test $ROCKSDB_DISABLE_BZIP; then - # location of bzip headers and libraries - BZIP_INCLUDE=" -I $BZIP2_BASE/include/" - BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a" - CFLAGS+=" -DBZIP2" -fi - -if ! test $ROCKSDB_DISABLE_LZ4; then - LZ4_INCLUDE=" -I $LZ4_BASE/include/" - LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a" - CFLAGS+=" -DLZ4" -fi - -if ! test $ROCKSDB_DISABLE_ZSTD; then - ZSTD_INCLUDE=" -I $ZSTD_BASE/include/" - ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a" - CFLAGS+=" -DZSTD" -fi - -# location of gflags headers and libraries -GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/" -GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags${MAYBE_PIC}.a" -CFLAGS+=" -DGFLAGS=gflags" - -BENCHMARK_INCLUDE=" -I $BENCHMARK_BASE/include/" -BENCHMARK_LIBS=" $BENCHMARK_BASE/lib/libbenchmark${MAYBE_PIC}.a" - -# location of jemalloc -JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/" -JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc${MAYBE_PIC}.a" - -# location of numa -NUMA_INCLUDE=" -I $NUMA_BASE/include/" -NUMA_LIB=" $NUMA_BASE/lib/libnuma${MAYBE_PIC}.a" -CFLAGS+=" -DNUMA" - -# location of libunwind -LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind${MAYBE_PIC}.a" - -# location of TBB -TBB_INCLUDE=" -isystem $TBB_BASE/include/" -TBB_LIBS="$TBB_BASE/lib/libtbb${MAYBE_PIC}.a" -CFLAGS+=" -DTBB" - -# location of LIBURING -LIBURING_INCLUDE=" -isystem $LIBURING_BASE/include/" -LIBURING_LIBS="$LIBURING_BASE/lib/liburing${MAYBE_PIC}.a" -CFLAGS+=" -DLIBURING" - -test "$USE_SSE" || USE_SSE=1 -export USE_SSE -test "$PORTABLE" || PORTABLE=1 -export PORTABLE - -BINUTILS="$BINUTILS_BASE/bin" -AR="$BINUTILS/ar" -AS="$BINUTILS/as" - -DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE $BENCHMARK_INCLUDE" - -STDLIBS="-L $GCC_BASE/lib64" - -CLANG_BIN="$CLANG_BASE/bin" -CLANG_LIB="$CLANG_BASE/lib" -CLANG_SRC="$CLANG_BASE/../../src" - -CLANG_ANALYZER="$CLANG_BIN/clang++" -CLANG_SCAN_BUILD="$CLANG_SRC/llvm/clang/tools/scan-build/bin/scan-build" - -if [ -z "$USE_CLANG" ]; then - # gcc - CC="$GCC_BASE/bin/gcc" - CXX="$GCC_BASE/bin/g++" - AR="$GCC_BASE/bin/gcc-ar" - - CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib" - CFLAGS+=" -I$GCC_BASE/include" - CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/include" - CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/install-tools/include" - CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/include-fixed/" - CFLAGS+=" -isystem $LIBGCC_INCLUDE" - CFLAGS+=" -isystem $GLIBC_INCLUDE" - CFLAGS+=" -I$GLIBC_INCLUDE" - CFLAGS+=" -I$LIBGCC_BASE/include" - CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/" - CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/x86_64-facebook-linux/" - CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/backward" - CFLAGS+=" -isystem $GLIBC_INCLUDE -I$GLIBC_INCLUDE" - JEMALLOC=1 -else - # clang - CLANG_INCLUDE="$CLANG_LIB/clang/stable/include" - CC="$CLANG_BIN/clang" - CXX="$CLANG_BIN/clang++" - AR="$CLANG_BIN/llvm-ar" - - CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib" - CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/trunk " - CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/trunk/x86_64-facebook-linux " - CFLAGS+=" -isystem $GLIBC_INCLUDE" - CFLAGS+=" -isystem $LIBGCC_INCLUDE" - CFLAGS+=" -isystem $CLANG_INCLUDE" - CFLAGS+=" -Wno-expansion-to-defined " - CXXFLAGS="-nostdinc++" -fi - -KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include" -CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux " -CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE " - -CFLAGS+=" $DEPS_INCLUDE" -CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DHAVE_SSE42 -DROCKSDB_IOURING_PRESENT" -CXXFLAGS+=" $CFLAGS" - -EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS" -EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform010/lib/ld.so" -EXEC_LDFLAGS+=" $LIBUNWIND" -EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform010/lib" -EXEC_LDFLAGS+=" -Wl,-rpath=$GCC_BASE/lib64" -# required by libtbb -EXEC_LDFLAGS+=" -ldl" - -PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++" -PLATFORM_LDFLAGS+=" -B$BINUTILS" - -EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS" - -VALGRIND_VER="$VALGRIND_BASE/bin/" - -export CC CXX AR AS CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB diff --git a/build_tools/format-diff.sh b/build_tools/format-diff.sh deleted file mode 100755 index 62e8834f7..000000000 --- a/build_tools/format-diff.sh +++ /dev/null @@ -1,203 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# If clang_format_diff.py command is not specfied, we assume we are able to -# access directly without any path. - -print_usage () { - echo "Usage:" - echo "format-diff.sh [OPTIONS]" - echo "-c: check only." - echo "-h: print this message." -} - -while getopts ':ch' OPTION; do - case "$OPTION" in - c) - CHECK_ONLY=1 - ;; - h) - print_usage - exit 1 - ;; - ?) - print_usage - exit 1 - ;; - esac -done - -REPO_ROOT="$(git rev-parse --show-toplevel)" - -if [ "$CLANG_FORMAT_DIFF" ]; then - echo "Note: CLANG_FORMAT_DIFF='$CLANG_FORMAT_DIFF'" - # Dry run to confirm dependencies like argparse - if $CLANG_FORMAT_DIFF --help >/dev/null < /dev/null; then - true #Good - else - exit 128 - fi -else - # First try directly executing the possibilities - if clang-format-diff --help &> /dev/null < /dev/null; then - CLANG_FORMAT_DIFF=clang-format-diff - elif clang-format-diff.py --help &> /dev/null < /dev/null; then - CLANG_FORMAT_DIFF=clang-format-diff.py - elif $REPO_ROOT/clang-format-diff.py --help &> /dev/null < /dev/null; then - CLANG_FORMAT_DIFF=$REPO_ROOT/clang-format-diff.py - else - # This probably means we need to directly invoke the interpreter. - # But first find clang-format-diff.py - if [ -f "$REPO_ROOT/clang-format-diff.py" ]; then - CFD_PATH="$REPO_ROOT/clang-format-diff.py" - elif which clang-format-diff.py &> /dev/null; then - CFD_PATH="$(which clang-format-diff.py)" - else - echo "You didn't have clang-format-diff.py and/or clang-format available in your computer!" - echo "You can download clang-format-diff.py by running: " - echo " curl --location https://raw.githubusercontent.com/llvm/llvm-project/main/clang/tools/clang-format/clang-format-diff.py -o ${REPO_ROOT}/clang-format-diff.py" - echo "You should make sure the downloaded script is not compromised." - echo "You can download clang-format by running:" - echo " brew install clang-format" - echo " Or" - echo " apt install clang-format" - echo " This might work too:" - echo " yum install git-clang-format" - echo "Then make sure clang-format is available and executable from \$PATH:" - echo " clang-format --version" - exit 128 - fi - # Check argparse pre-req on interpreter, or it will fail - if echo import argparse | ${PYTHON:-python3}; then - true # Good - else - echo "To run clang-format-diff.py, we'll need the library "argparse" to be" - echo "installed. You can try either of the follow ways to install it:" - echo " 1. Manually download argparse: https://pypi.python.org/pypi/argparse" - echo " 2. easy_install argparse (if you have easy_install)" - echo " 3. pip install argparse (if you have pip)" - exit 129 - fi - # Unfortunately, some machines have a Python2 clang-format-diff.py - # installed but only a Python3 interpreter installed. Unfortunately, - # automatic 2to3 migration is insufficient, so suggest downloading latest. - if grep -q "print '" "$CFD_PATH" && \ - ${PYTHON:-python3} --version | grep -q 'ython 3'; then - echo "You have clang-format-diff.py for Python 2 but are using a Python 3" - echo "interpreter (${PYTHON:-python3})." - echo "You can download clang-format-diff.py for Python 3 by running: " - echo " curl --location https://raw.githubusercontent.com/llvm/llvm-project/main/clang/tools/clang-format/clang-format-diff.py -o ${REPO_ROOT}/clang-format-diff.py" - echo "You should make sure the downloaded script is not compromised." - exit 130 - fi - CLANG_FORMAT_DIFF="${PYTHON:-python3} $CFD_PATH" - # This had better work after all those checks - if $CLANG_FORMAT_DIFF --help >/dev/null < /dev/null; then - true #Good - else - exit 128 - fi - fi -fi - -# TODO(kailiu) following work is not complete since we still need to figure -# out how to add the modified files done pre-commit hook to git's commit index. -# -# Check if this script has already been added to pre-commit hook. -# Will suggest user to add this script to pre-commit hook if their pre-commit -# is empty. -# PRE_COMMIT_SCRIPT_PATH="`git rev-parse --show-toplevel`/.git/hooks/pre-commit" -# if ! ls $PRE_COMMIT_SCRIPT_PATH &> /dev/null -# then -# echo "Would you like to add this script to pre-commit hook, which will do " -# echo -n "the format check for all the affected lines before you check in (y/n):" -# read add_to_hook -# if [ "$add_to_hook" == "y" ] -# then -# ln -s `git rev-parse --show-toplevel`/build_tools/format-diff.sh $PRE_COMMIT_SCRIPT_PATH -# fi -# fi -set -e - -uncommitted_code=`git diff HEAD` - -# If there's no uncommitted changes, we assume user are doing post-commit -# format check, in which case we'll try to check the modified lines vs. the -# facebook/rocksdb.git main branch. Otherwise, we'll check format of the -# uncommitted code only. -if [ -z "$uncommitted_code" ] -then - # Attempt to get name of facebook/rocksdb.git remote. - [ "$FORMAT_REMOTE" ] || FORMAT_REMOTE="$(LC_ALL=POSIX LANG=POSIX git remote -v | grep 'facebook/rocksdb.git' | head -n 1 | cut -f 1)" - # Fall back on 'origin' if that fails - [ "$FORMAT_REMOTE" ] || FORMAT_REMOTE=origin - # Use main branch from that remote - [ "$FORMAT_UPSTREAM" ] || FORMAT_UPSTREAM="$FORMAT_REMOTE/$(LC_ALL=POSIX LANG=POSIX git remote show $FORMAT_REMOTE | sed -n '/HEAD branch/s/.*: //p')" - # Get the common ancestor with that remote branch. Everything after that - # common ancestor would be considered the contents of a pull request, so - # should be relevant for formatting fixes. - FORMAT_UPSTREAM_MERGE_BASE="$(git merge-base "$FORMAT_UPSTREAM" HEAD)" - # Get the differences - diffs=$(git diff -U0 "$FORMAT_UPSTREAM_MERGE_BASE" | $CLANG_FORMAT_DIFF -p 1) - echo "Checking format of changes not yet in $FORMAT_UPSTREAM..." -else - # Check the format of uncommitted lines, - diffs=$(git diff -U0 HEAD | $CLANG_FORMAT_DIFF -p 1) - echo "Checking format of uncommitted changes..." -fi - -if [ -z "$diffs" ] -then - echo "Nothing needs to be reformatted!" - exit 0 -elif [ $CHECK_ONLY ] -then - echo "Your change has unformatted code. Please run make format!" - if [ $VERBOSE_CHECK ]; then - clang-format --version - echo "$diffs" - fi - exit 1 -fi - -# Highlight the insertion/deletion from the clang-format-diff.py's output -COLOR_END="\033[0m" -COLOR_RED="\033[0;31m" -COLOR_GREEN="\033[0;32m" - -echo -e "Detect lines that doesn't follow the format rules:\r" -# Add the color to the diff. lines added will be green; lines removed will be red. -echo "$diffs" | - sed -e "s/\(^-.*$\)/`echo -e \"$COLOR_RED\1$COLOR_END\"`/" | - sed -e "s/\(^+.*$\)/`echo -e \"$COLOR_GREEN\1$COLOR_END\"`/" - -echo -e "Would you like to fix the format automatically (y/n): \c" - -# Make sure under any mode, we can read user input. -exec < /dev/tty -read to_fix - -if [ "$to_fix" != "y" ] -then - exit 1 -fi - -# Do in-place format adjustment. -if [ -z "$uncommitted_code" ] -then - git diff -U0 "$FORMAT_UPSTREAM_MERGE_BASE" | $CLANG_FORMAT_DIFF -i -p 1 -else - git diff -U0 HEAD | $CLANG_FORMAT_DIFF -i -p 1 -fi -echo "Files reformatted!" - -# Amend to last commit if user do the post-commit format check -if [ -z "$uncommitted_code" ]; then - echo -e "Would you like to amend the changes to last commit (`git log HEAD --oneline | head -1`)? (y/n): \c" - read to_amend - - if [ "$to_amend" == "y" ] - then - git commit -a --amend --reuse-message HEAD - echo "Amended to last commit" - fi -fi diff --git a/build_tools/gnu_parallel b/build_tools/gnu_parallel deleted file mode 100755 index 3365f46ba..000000000 --- a/build_tools/gnu_parallel +++ /dev/null @@ -1,7971 +0,0 @@ -#!/usr/bin/env perl - -# Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014 Ole Tange and -# Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, see -# or write to the Free Software Foundation, Inc., 51 Franklin St, -# Fifth Floor, Boston, MA 02110-1301 USA - -# open3 used in Job::start -use IPC::Open3; -# &WNOHANG used in reaper -use POSIX qw(:sys_wait_h setsid ceil :errno_h); -# gensym used in Job::start -use Symbol qw(gensym); -# tempfile used in Job::start -use File::Temp qw(tempfile tempdir); -# mkpath used in openresultsfile -use File::Path; -# GetOptions used in get_options_from_array -use Getopt::Long; -# Used to ensure code quality -use strict; -use File::Basename; - -if(not $ENV{HOME}) { - # $ENV{HOME} is sometimes not set if called from PHP - ::warning("\$HOME not set. Using /tmp\n"); - $ENV{HOME} = "/tmp"; -} - -save_stdin_stdout_stderr(); -save_original_signal_handler(); -parse_options(); -::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n"); -my $number_of_args; -if($Global::max_number_of_args) { - $number_of_args=$Global::max_number_of_args; -} elsif ($opt::X or $opt::m or $opt::xargs) { - $number_of_args = undef; -} else { - $number_of_args = 1; -} - -my @command; -@command = @ARGV; - -my @fhlist; -if($opt::pipepart) { - @fhlist = map { open_or_exit($_) } "/dev/null"; -} else { - @fhlist = map { open_or_exit($_) } @opt::a; - if(not @fhlist and not $opt::pipe) { - @fhlist = (*STDIN); - } -} - -if($opt::skip_first_line) { - # Skip the first line for the first file handle - my $fh = $fhlist[0]; - <$fh>; -} -if($opt::header and not $opt::pipe) { - my $fh = $fhlist[0]; - # split with colsep or \t - # $header force $colsep = \t if undef? - my $delimiter = $opt::colsep; - $delimiter ||= "\$"; - my $id = 1; - for my $fh (@fhlist) { - my $line = <$fh>; - chomp($line); - ::debug("init", "Delimiter: '$delimiter'"); - for my $s (split /$delimiter/o, $line) { - ::debug("init", "Colname: '$s'"); - # Replace {colname} with {2} - # TODO accept configurable short hands - # TODO how to deal with headers in {=...=} - for(@command) { - s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g; - } - $Global::input_source_header{$id} = $s; - $id++; - } - } -} else { - my $id = 1; - for my $fh (@fhlist) { - $Global::input_source_header{$id} = $id; - $id++; - } -} - -if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { - # Parallel check all hosts are up. Remove hosts that are down - filter_hosts(); -} - -if($opt::nonall or $opt::onall) { - onall(@command); - wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); -} - -# TODO --transfer foo/./bar --cleanup -# multiple --transfer and --basefile with different /./ - -$Global::JobQueue = JobQueue->new( - \@command,\@fhlist,$Global::ContextReplace,$number_of_args,\@Global::ret_files); - -if($opt::eta or $opt::bar) { - # Count the number of jobs before starting any - $Global::JobQueue->total_jobs(); -} -if($opt::pipepart) { - @Global::cat_partials = map { pipe_part_files($_) } @opt::a; - # Unget the command as many times as there are parts - $Global::JobQueue->{'commandlinequeue'}->unget( - map { $Global::JobQueue->{'commandlinequeue'}->get() } @Global::cat_partials - ); -} -for my $sshlogin (values %Global::host) { - $sshlogin->max_jobs_running(); -} - -init_run_jobs(); -my $sem; -if($Global::semaphore) { - $sem = acquire_semaphore(); -} -$SIG{TERM} = \&start_no_new_jobs; - -start_more_jobs(); -if(not $opt::pipepart) { - if($opt::pipe) { - spreadstdin(); - } -} -::debug("init", "Start draining\n"); -drain_job_queue(); -::debug("init", "Done draining\n"); -reaper(); -::debug("init", "Done reaping\n"); -if($opt::pipe and @opt::a) { - for my $job (@Global::tee_jobs) { - unlink $job->fh(2,"name"); - $job->set_fh(2,"name",""); - $job->print(); - unlink $job->fh(1,"name"); - } -} -::debug("init", "Cleaning\n"); -cleanup(); -if($Global::semaphore) { - $sem->release(); -} -for(keys %Global::sshmaster) { - kill "TERM", $_; -} -::debug("init", "Halt\n"); -if($opt::halt_on_error) { - wait_and_exit($Global::halt_on_error_exitstatus); -} else { - wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); -} - -sub __PIPE_MODE__ {} - -sub pipe_part_files { - # Input: - # $file = the file to read - # Returns: - # @commands that will cat_partial each part - my ($file) = @_; - my $buf = ""; - my $header = find_header(\$buf,open_or_exit($file)); - # find positions - my @pos = find_split_positions($file,$opt::blocksize,length $header); - # Make @cat_partials - my @cat_partials = (); - for(my $i=0; $i<$#pos; $i++) { - push @cat_partials, cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]); - } - # Remote exec should look like: - # ssh -oLogLevel=quiet lo 'eval `echo $SHELL | grep "/t\{0,1\}csh" > /dev/null && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; setenv PARALLEL_PID '$PARALLEL_PID' || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ FOO\ /tmp/foo\ \|\|\ export\ FOO=/tmp/foo\; \(wc\ -\ \$FOO\) - # ssh -tt not allowed. Remote will die due to broken pipe anyway. - # TODO test remote with --fifo / --cat - return @cat_partials; -} - -sub find_header { - # Input: - # $buf_ref = reference to read-in buffer - # $fh = filehandle to read from - # Uses: - # $opt::header - # $opt::blocksize - # Returns: - # $header string - my ($buf_ref, $fh) = @_; - my $header = ""; - if($opt::header) { - if($opt::header eq ":") { $opt::header = "(.*\n)"; } - # Number = number of lines - $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e; - while(read($fh,substr($$buf_ref,length $$buf_ref,0),$opt::blocksize)) { - if($$buf_ref=~s/^($opt::header)//) { - $header = $1; - last; - } - } - } - return $header; -} - -sub find_split_positions { - # Input: - # $file = the file to read - # $block = (minimal) --block-size of each chunk - # $headerlen = length of header to be skipped - # Uses: - # $opt::recstart - # $opt::recend - # Returns: - # @positions of block start/end - my($file, $block, $headerlen) = @_; - my $size = -s $file; - $block = int $block; - # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 - # The optimal dd blocksize for freebsd = 2^15..2^17 - my $dd_block_size = 131072; # 2^17 - my @pos; - my ($recstart,$recend) = recstartrecend(); - my $recendrecstart = $recend.$recstart; - my $fh = ::open_or_exit($file); - push(@pos,$headerlen); - for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) { - my $buf; - seek($fh, $pos, 0) || die; - while(read($fh,substr($buf,length $buf,0),$dd_block_size)) { - if($opt::regexp) { - # If match /$recend$recstart/ => Record position - if($buf =~ /(.*$recend)$recstart/os) { - my $i = length($1); - push(@pos,$pos+$i); - # Start looking for next record _after_ this match - $pos += $i; - last; - } - } else { - # If match $recend$recstart => Record position - my $i = index($buf,$recendrecstart); - if($i != -1) { - push(@pos,$pos+$i); - # Start looking for next record _after_ this match - $pos += $i; - last; - } - } - } - } - push(@pos,$size); - close $fh; - return @pos; -} - -sub cat_partial { - # Input: - # $file = the file to read - # ($start, $end, [$start2, $end2, ...]) = start byte, end byte - # Returns: - # Efficient perl command to copy $start..$end, $start2..$end2, ... to stdout - my($file, @start_end) = @_; - my($start, $i); - # Convert start_end to start_len - my @start_len = map { if(++$i % 2) { $start = $_; } else { $_-$start } } @start_end; - return "<". shell_quote_scalar($file) . - q{ perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' } . - " @start_len"; -} - -sub spreadstdin { - # read a record - # Spawn a job and print the record to it. - # Uses: - # $opt::blocksize - # STDIN - # $opr::r - # $Global::max_lines - # $Global::max_number_of_args - # $opt::regexp - # $Global::start_no_new_jobs - # $opt::roundrobin - # %Global::running - - my $buf = ""; - my ($recstart,$recend) = recstartrecend(); - my $recendrecstart = $recend.$recstart; - my $chunk_number = 1; - my $one_time_through; - my $blocksize = $opt::blocksize; - my $in = *STDIN; - my $header = find_header(\$buf,$in); - while(1) { - my $anything_written = 0; - if(not read($in,substr($buf,length $buf,0),$blocksize)) { - # End-of-file - $chunk_number != 1 and last; - # Force the while-loop once if everything was read by header reading - $one_time_through++ and last; - } - if($opt::r) { - # Remove empty lines - $buf =~ s/^\s*\n//gm; - if(length $buf == 0) { - next; - } - } - if($Global::max_lines and not $Global::max_number_of_args) { - # Read n-line records - my $n_lines = $buf =~ tr/\n/\n/; - my $last_newline_pos = rindex($buf,"\n"); - while($n_lines % $Global::max_lines) { - $n_lines--; - $last_newline_pos = rindex($buf,"\n",$last_newline_pos-1); - } - # Chop at $last_newline_pos as that is where n-line record ends - $anything_written += - write_record_to_pipe($chunk_number++,\$header,\$buf, - $recstart,$recend,$last_newline_pos+1); - substr($buf,0,$last_newline_pos+1) = ""; - } elsif($opt::regexp) { - if($Global::max_number_of_args) { - # -N => (start..*?end){n} - # -L -N => (start..*?end){n*l} - my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); - while($buf =~ s/((?:$recstart.*?$recend){$read_n_lines})($recstart.*)$/$2/os) { - # Copy to modifiable variable - my $b = $1; - $anything_written += - write_record_to_pipe($chunk_number++,\$header,\$b, - $recstart,$recend,length $1); - } - } else { - # Find the last recend-recstart in $buf - if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) { - # Copy to modifiable variable - my $b = $1; - $anything_written += - write_record_to_pipe($chunk_number++,\$header,\$b, - $recstart,$recend,length $1); - } - } - } else { - if($Global::max_number_of_args) { - # -N => (start..*?end){n} - my $i = 0; - my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); - while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1) { - $i += length $recend; # find the actual splitting location - $anything_written += - write_record_to_pipe($chunk_number++,\$header,\$buf, - $recstart,$recend,$i); - substr($buf,0,$i) = ""; - } - } else { - # Find the last recend-recstart in $buf - my $i = rindex($buf,$recendrecstart); - if($i != -1) { - $i += length $recend; # find the actual splitting location - $anything_written += - write_record_to_pipe($chunk_number++,\$header,\$buf, - $recstart,$recend,$i); - substr($buf,0,$i) = ""; - } - } - } - if(not $anything_written and not eof($in)) { - # Nothing was written - maybe the block size < record size? - # Increase blocksize exponentially - my $old_blocksize = $blocksize; - $blocksize = ceil($blocksize * 1.3 + 1); - ::warning("A record was longer than $old_blocksize. " . - "Increasing to --blocksize $blocksize\n"); - } - } - ::debug("init", "Done reading input\n"); - - # If there is anything left in the buffer write it - substr($buf,0,0) = ""; - write_record_to_pipe($chunk_number++,\$header,\$buf,$recstart,$recend,length $buf); - - $Global::start_no_new_jobs ||= 1; - if($opt::roundrobin) { - for my $job (values %Global::running) { - close $job->fh(0,"w"); - } - my %incomplete_jobs = %Global::running; - my $sleep = 1; - while(keys %incomplete_jobs) { - my $something_written = 0; - for my $pid (keys %incomplete_jobs) { - my $job = $incomplete_jobs{$pid}; - if($job->stdin_buffer_length()) { - $something_written += $job->non_block_write(); - } else { - delete $incomplete_jobs{$pid} - } - } - if($something_written) { - $sleep = $sleep/2+0.001; - } - $sleep = ::reap_usleep($sleep); - } - } -} - -sub recstartrecend { - # Uses: - # $opt::recstart - # $opt::recend - # Returns: - # $recstart,$recend with default values and regexp conversion - my($recstart,$recend); - if(defined($opt::recstart) and defined($opt::recend)) { - # If both --recstart and --recend is given then both must match - $recstart = $opt::recstart; - $recend = $opt::recend; - } elsif(defined($opt::recstart)) { - # If --recstart is given it must match start of record - $recstart = $opt::recstart; - $recend = ""; - } elsif(defined($opt::recend)) { - # If --recend is given then it must match end of record - $recstart = ""; - $recend = $opt::recend; - } - - if($opt::regexp) { - # If $recstart/$recend contains '|' this should only apply to the regexp - $recstart = "(?:".$recstart.")"; - $recend = "(?:".$recend.")"; - } else { - # $recstart/$recend = printf strings (\n) - $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; - $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; - } - return ($recstart,$recend); -} - -sub nindex { - # See if string is in buffer N times - # Returns: - # the position where the Nth copy is found - my ($buf_ref, $str, $n) = @_; - my $i = 0; - for(1..$n) { - $i = index($$buf_ref,$str,$i+1); - if($i == -1) { last } - } - return $i; -} - -{ - my @robin_queue; - - sub round_robin_write { - # Input: - # $header_ref = ref to $header string - # $block_ref = ref to $block to be written - # $recstart = record start string - # $recend = record end string - # $endpos = end position of $block - # Uses: - # %Global::running - my ($header_ref,$block_ref,$recstart,$recend,$endpos) = @_; - my $something_written = 0; - my $block_passed = 0; - my $sleep = 1; - while(not $block_passed) { - # Continue flushing existing buffers - # until one is empty and a new block is passed - # Make a queue to spread the blocks evenly - if(not @robin_queue) { - push @robin_queue, values %Global::running; - } - while(my $job = shift @robin_queue) { - if($job->stdin_buffer_length() > 0) { - $something_written += $job->non_block_write(); - } else { - $job->set_stdin_buffer($header_ref,$block_ref,$endpos,$recstart,$recend); - $block_passed = 1; - $job->set_virgin(0); - $something_written += $job->non_block_write(); - last; - } - } - $sleep = ::reap_usleep($sleep); - } - return $something_written; - } -} - -sub write_record_to_pipe { - # Fork then - # Write record from pos 0 .. $endpos to pipe - # Input: - # $chunk_number = sequence number - to see if already run - # $header_ref = reference to header string to prepend - # $record_ref = reference to record to write - # $recstart = start string of record - # $recend = end string of record - # $endpos = position in $record_ref where record ends - # Uses: - # $Global::job_already_run - # $opt::roundrobin - # @Global::virgin_jobs - # Returns: - # Number of chunks written (0 or 1) - my ($chunk_number,$header_ref,$record_ref,$recstart,$recend,$endpos) = @_; - if($endpos == 0) { return 0; } - if(vec($Global::job_already_run,$chunk_number,1)) { return 1; } - if($opt::roundrobin) { - return round_robin_write($header_ref,$record_ref,$recstart,$recend,$endpos); - } - # If no virgin found, backoff - my $sleep = 0.0001; # 0.01 ms - better performance on highend - while(not @Global::virgin_jobs) { - ::debug("pipe", "No virgin jobs"); - $sleep = ::reap_usleep($sleep); - # Jobs may not be started because of loadavg - # or too little time between each ssh login. - start_more_jobs(); - } - my $job = shift @Global::virgin_jobs; - # Job is no longer virgin - $job->set_virgin(0); - if(fork()) { - # Skip - } else { - # Chop of at $endpos as we do not know how many rec_sep will - # be removed. - substr($$record_ref,$endpos,length $$record_ref) = ""; - # Remove rec_sep - if($opt::remove_rec_sep) { - Job::remove_rec_sep($record_ref,$recstart,$recend); - } - $job->write($header_ref); - $job->write($record_ref); - close $job->fh(0,"w"); - exit(0); - } - close $job->fh(0,"w"); - return 1; -} - -sub __SEM_MODE__ {} - -sub acquire_semaphore { - # Acquires semaphore. If needed: spawns to the background - # Uses: - # @Global::host - # Returns: - # The semaphore to be released when jobs is complete - $Global::host{':'} = SSHLogin->new(":"); - my $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running()); - $sem->acquire(); - if($Semaphore::fg) { - # skip - } else { - # If run in the background, the PID will change - # therefore release and re-acquire the semaphore - $sem->release(); - if(fork()) { - exit(0); - } else { - # child - # Get a semaphore for this pid - ::die_bug("Can't start a new session: $!") if setsid() == -1; - $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running()); - $sem->acquire(); - } - } - return $sem; -} - -sub __PARSE_OPTIONS__ {} - -sub options_hash { - # Returns: - # %hash = the GetOptions config - return - ("debug|D=s" => \$opt::D, - "xargs" => \$opt::xargs, - "m" => \$opt::m, - "X" => \$opt::X, - "v" => \@opt::v, - "joblog=s" => \$opt::joblog, - "results|result|res=s" => \$opt::results, - "resume" => \$opt::resume, - "resume-failed|resumefailed" => \$opt::resume_failed, - "silent" => \$opt::silent, - #"silent-error|silenterror" => \$opt::silent_error, - "keep-order|keeporder|k" => \$opt::keeporder, - "group" => \$opt::group, - "g" => \$opt::retired, - "ungroup|u" => \$opt::ungroup, - "linebuffer|linebuffered|line-buffer|line-buffered" => \$opt::linebuffer, - "tmux" => \$opt::tmux, - "null|0" => \$opt::0, - "quote|q" => \$opt::q, - # Replacement strings - "parens=s" => \$opt::parens, - "rpl=s" => \@opt::rpl, - "plus" => \$opt::plus, - "I=s" => \$opt::I, - "extensionreplace|er=s" => \$opt::U, - "U=s" => \$opt::retired, - "basenamereplace|bnr=s" => \$opt::basenamereplace, - "dirnamereplace|dnr=s" => \$opt::dirnamereplace, - "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace, - "seqreplace=s" => \$opt::seqreplace, - "slotreplace=s" => \$opt::slotreplace, - "jobs|j=s" => \$opt::jobs, - "delay=f" => \$opt::delay, - "sshdelay=f" => \$opt::sshdelay, - "load=s" => \$opt::load, - "noswap" => \$opt::noswap, - "max-line-length-allowed" => \$opt::max_line_length_allowed, - "number-of-cpus" => \$opt::number_of_cpus, - "number-of-cores" => \$opt::number_of_cores, - "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores, - "shellquote|shell_quote|shell-quote" => \$opt::shellquote, - "nice=i" => \$opt::nice, - "timeout=s" => \$opt::timeout, - "tag" => \$opt::tag, - "tagstring|tag-string=s" => \$opt::tagstring, - "onall" => \$opt::onall, - "nonall" => \$opt::nonall, - "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts, - "sshlogin|S=s" => \@opt::sshlogin, - "sshloginfile|slf=s" => \@opt::sshloginfile, - "controlmaster|M" => \$opt::controlmaster, - "return=s" => \@opt::return, - "trc=s" => \@opt::trc, - "transfer" => \$opt::transfer, - "cleanup" => \$opt::cleanup, - "basefile|bf=s" => \@opt::basefile, - "B=s" => \$opt::retired, - "ctrlc|ctrl-c" => \$opt::ctrlc, - "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::noctrlc, - "workdir|work-dir|wd=s" => \$opt::workdir, - "W=s" => \$opt::retired, - "tmpdir=s" => \$opt::tmpdir, - "tempdir=s" => \$opt::tmpdir, - "use-compress-program|compress-program=s" => \$opt::compress_program, - "use-decompress-program|decompress-program=s" => \$opt::decompress_program, - "compress" => \$opt::compress, - "tty" => \$opt::tty, - "T" => \$opt::retired, - "halt-on-error|halt=s" => \$opt::halt_on_error, - "H=i" => \$opt::retired, - "retries=i" => \$opt::retries, - "dry-run|dryrun" => \$opt::dryrun, - "progress" => \$opt::progress, - "eta" => \$opt::eta, - "bar" => \$opt::bar, - "arg-sep|argsep=s" => \$opt::arg_sep, - "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep, - "trim=s" => \$opt::trim, - "env=s" => \@opt::env, - "recordenv|record-env" => \$opt::record_env, - "plain" => \$opt::plain, - "profile|J=s" => \@opt::profile, - "pipe|spreadstdin" => \$opt::pipe, - "robin|round-robin|roundrobin" => \$opt::roundrobin, - "recstart=s" => \$opt::recstart, - "recend=s" => \$opt::recend, - "regexp|regex" => \$opt::regexp, - "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep, - "files|output-as-files|outputasfiles" => \$opt::files, - "block|block-size|blocksize=s" => \$opt::blocksize, - "tollef" => \$opt::retired, - "gnu" => \$opt::gnu, - "xapply" => \$opt::xapply, - "bibtex" => \$opt::bibtex, - "nn|nonotice|no-notice" => \$opt::no_notice, - # xargs-compatibility - implemented, man, testsuite - "max-procs|P=s" => \$opt::jobs, - "delimiter|d=s" => \$opt::d, - "max-chars|s=i" => \$opt::max_chars, - "arg-file|a=s" => \@opt::a, - "no-run-if-empty|r" => \$opt::r, - "replace|i:s" => \$opt::i, - "E=s" => \$opt::eof, - "eof|e:s" => \$opt::eof, - "max-args|n=i" => \$opt::max_args, - "max-replace-args|N=i" => \$opt::max_replace_args, - "colsep|col-sep|C=s" => \$opt::colsep, - "help|h" => \$opt::help, - "L=f" => \$opt::L, - "max-lines|l:f" => \$opt::max_lines, - "interactive|p" => \$opt::p, - "verbose|t" => \$opt::verbose, - "version|V" => \$opt::version, - "minversion|min-version=i" => \$opt::minversion, - "show-limits|showlimits" => \$opt::show_limits, - "exit|x" => \$opt::x, - # Semaphore - "semaphore" => \$opt::semaphore, - "semaphoretimeout=i" => \$opt::semaphoretimeout, - "semaphorename|id=s" => \$opt::semaphorename, - "fg" => \$opt::fg, - "bg" => \$opt::bg, - "wait" => \$opt::wait, - # Shebang #!/usr/bin/parallel --shebang - "shebang|hashbang" => \$opt::shebang, - "internal-pipe-means-argfiles" => \$opt::internal_pipe_means_argfiles, - "Y" => \$opt::retired, - "skip-first-line" => \$opt::skip_first_line, - "header=s" => \$opt::header, - "cat" => \$opt::cat, - "fifo" => \$opt::fifo, - "pipepart|pipe-part" => \$opt::pipepart, - "hgrp|hostgroup|hostgroups" => \$opt::hostgroups, - ); -} - -sub get_options_from_array { - # Run GetOptions on @array - # Input: - # $array_ref = ref to @ARGV to parse - # @keep_only = Keep only these options - # Uses: - # @ARGV - # Returns: - # true if parsing worked - # false if parsing failed - # @$array_ref is changed - my ($array_ref, @keep_only) = @_; - if(not @$array_ref) { - # Empty array: No need to look more at that - return 1; - } - # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not - # supported everywhere - my @save_argv; - my $this_is_ARGV = (\@::ARGV == $array_ref); - if(not $this_is_ARGV) { - @save_argv = @::ARGV; - @::ARGV = @{$array_ref}; - } - # If @keep_only set: Ignore all values except @keep_only - my %options = options_hash(); - if(@keep_only) { - my (%keep,@dummy); - @keep{@keep_only} = @keep_only; - for my $k (grep { not $keep{$_} } keys %options) { - # Store the value of the option in @dummy - $options{$k} = \@dummy; - } - } - my $retval = GetOptions(%options); - if(not $this_is_ARGV) { - @{$array_ref} = @::ARGV; - @::ARGV = @save_argv; - } - return $retval; -} - -sub parse_options { - # Returns: N/A - # Defaults: - $Global::version = 20141122; - $Global::progname = 'parallel'; - $Global::infinity = 2**31; - $Global::debug = 0; - $Global::verbose = 0; - $Global::quoting = 0; - # Read only table with default --rpl values - %Global::replace = - ( - '{}' => '', - '{#}' => '1 $_=$job->seq()', - '{%}' => '1 $_=$job->slot()', - '{/}' => 's:.*/::', - '{//}' => '$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; $_ = dirname($_);', - '{/.}' => 's:.*/::; s:\.[^/.]+$::;', - '{.}' => 's:\.[^/.]+$::', - ); - %Global::plus = - ( - # {} = {+/}/{/} - # = {.}.{+.} = {+/}/{/.}.{+.} - # = {..}.{+..} = {+/}/{/..}.{+..} - # = {...}.{+...} = {+/}/{/...}.{+...} - '{+/}' => 's:/[^/]*$::', - '{+.}' => 's:.*\.::', - '{+..}' => 's:.*\.([^.]*\.):$1:', - '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:', - '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::', - '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', - '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::', - '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', - ); - # Modifiable copy of %Global::replace - %Global::rpl = %Global::replace; - $Global::parens = "{==}"; - $/="\n"; - $Global::ignore_empty = 0; - $Global::interactive = 0; - $Global::stderr_verbose = 0; - $Global::default_simultaneous_sshlogins = 9; - $Global::exitstatus = 0; - $Global::halt_on_error_exitstatus = 0; - $Global::arg_sep = ":::"; - $Global::arg_file_sep = "::::"; - $Global::trim = 'n'; - $Global::max_jobs_running = 0; - $Global::job_already_run = ''; - $ENV{'TMPDIR'} ||= "/tmp"; - - @ARGV=read_options(); - - if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2 - $Global::debug = $opt::D; - $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) || $ENV{'SHELL'} || "/bin/sh"; - if(defined $opt::X) { $Global::ContextReplace = 1; } - if(defined $opt::silent) { $Global::verbose = 0; } - if(defined $opt::0) { $/ = "\0"; } - if(defined $opt::d) { my $e="sprintf \"$opt::d\""; $/ = eval $e; } - if(defined $opt::p) { $Global::interactive = $opt::p; } - if(defined $opt::q) { $Global::quoting = 1; } - if(defined $opt::r) { $Global::ignore_empty = 1; } - if(defined $opt::verbose) { $Global::stderr_verbose = 1; } - # Deal with --rpl - sub rpl { - # Modify %Global::rpl - # Replace $old with $new - my ($old,$new) = @_; - if($old ne $new) { - $Global::rpl{$new} = $Global::rpl{$old}; - delete $Global::rpl{$old}; - } - } - if(defined $opt::parens) { $Global::parens = $opt::parens; } - my $parenslen = 0.5*length $Global::parens; - $Global::parensleft = substr($Global::parens,0,$parenslen); - $Global::parensright = substr($Global::parens,$parenslen); - if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); } - if(defined $opt::I) { rpl('{}',$opt::I); } - if(defined $opt::U) { rpl('{.}',$opt::U); } - if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); } - if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); } - if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); } - if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); } - if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); } - if(defined $opt::basenameextensionreplace) { - rpl('{/.}',$opt::basenameextensionreplace); - } - for(@opt::rpl) { - # Create $Global::rpl entries for --rpl options - # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;" - my ($shorthand,$long) = split/ /,$_,2; - $Global::rpl{$shorthand} = $long; - } - if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; } - if(defined $opt::max_args) { $Global::max_number_of_args = $opt::max_args; } - if(defined $opt::timeout) { $Global::timeoutq = TimeoutQueue->new($opt::timeout); } - if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; } - if(defined $opt::help) { die_usage(); } - if(defined $opt::colsep) { $Global::trim = 'lr'; } - if(defined $opt::header) { $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; } - if(defined $opt::trim) { $Global::trim = $opt::trim; } - if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; } - if(defined $opt::arg_file_sep) { $Global::arg_file_sep = $opt::arg_file_sep; } - if(defined $opt::number_of_cpus) { print SSHLogin::no_of_cpus(),"\n"; wait_and_exit(0); } - if(defined $opt::number_of_cores) { - print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); - } - if(defined $opt::max_line_length_allowed) { - print Limits::Command::real_max_length(),"\n"; wait_and_exit(0); - } - if(defined $opt::version) { version(); wait_and_exit(0); } - if(defined $opt::bibtex) { bibtex(); wait_and_exit(0); } - if(defined $opt::record_env) { record_env(); wait_and_exit(0); } - if(defined $opt::show_limits) { show_limits(); } - if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; } - if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); } - if(@opt::return) { push @Global::ret_files, @opt::return; } - if(not defined $opt::recstart and - not defined $opt::recend) { $opt::recend = "\n"; } - if(not defined $opt::blocksize) { $opt::blocksize = "1M"; } - $opt::blocksize = multiply_binary_prefix($opt::blocksize); - if(defined $opt::controlmaster) { $opt::noctrlc = 1; } - if(defined $opt::semaphore) { $Global::semaphore = 1; } - if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; } - if(defined $opt::semaphorename) { $Global::semaphore = 1; } - if(defined $opt::fg) { $Global::semaphore = 1; } - if(defined $opt::bg) { $Global::semaphore = 1; } - if(defined $opt::wait) { $Global::semaphore = 1; } - if(defined $opt::halt_on_error and - $opt::halt_on_error=~/%/) { $opt::halt_on_error /= 100; } - if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) { - ::error("--timeout must be seconds or percentage\n"); - wait_and_exit(255); - } - if(defined $opt::minversion) { - print $Global::version,"\n"; - if($Global::version < $opt::minversion) { - wait_and_exit(255); - } else { - wait_and_exit(0); - } - } - if(not defined $opt::delay) { - # Set --delay to --sshdelay if not set - $opt::delay = $opt::sshdelay; - } - if($opt::compress_program) { - $opt::compress = 1; - $opt::decompress_program ||= $opt::compress_program." -dc"; - } - if($opt::compress) { - my ($compress, $decompress) = find_compression_program(); - $opt::compress_program ||= $compress; - $opt::decompress_program ||= $decompress; - } - if(defined $opt::nonall) { - # Append a dummy empty argument - push @ARGV, $Global::arg_sep, ""; - } - if(defined $opt::tty) { - # Defaults for --tty: -j1 -u - # Can be overridden with -jXXX -g - if(not defined $opt::jobs) { - $opt::jobs = 1; - } - if(not defined $opt::group) { - $opt::ungroup = 0; - } - } - if(@opt::trc) { - push @Global::ret_files, @opt::trc; - $opt::transfer = 1; - $opt::cleanup = 1; - } - if(defined $opt::max_lines) { - if($opt::max_lines eq "-0") { - # -l -0 (swallowed -0) - $opt::max_lines = 1; - $opt::0 = 1; - $/ = "\0"; - } elsif ($opt::max_lines == 0) { - # If not given (or if 0 is given) => 1 - $opt::max_lines = 1; - } - $Global::max_lines = $opt::max_lines; - if(not $opt::pipe) { - # --pipe -L means length of record - not max_number_of_args - $Global::max_number_of_args ||= $Global::max_lines; - } - } - - # Read more than one arg at a time (-L, -N) - if(defined $opt::L) { - $Global::max_lines = $opt::L; - if(not $opt::pipe) { - # --pipe -L means length of record - not max_number_of_args - $Global::max_number_of_args ||= $Global::max_lines; - } - } - if(defined $opt::max_replace_args) { - $Global::max_number_of_args = $opt::max_replace_args; - $Global::ContextReplace = 1; - } - if((defined $opt::L or defined $opt::max_replace_args) - and - not ($opt::xargs or $opt::m)) { - $Global::ContextReplace = 1; - } - if(defined $opt::tag and not defined $opt::tagstring) { - $opt::tagstring = "\257<\257>"; # Default = {} - } - if(defined $opt::pipepart and - (defined $opt::L or defined $opt::max_lines - or defined $opt::max_replace_args)) { - ::error("--pipepart is incompatible with --max-replace-args, ", - "--max-lines, and -L.\n"); - wait_and_exit(255); - } - if(grep /^$Global::arg_sep$|^$Global::arg_file_sep$/o, @ARGV) { - # Deal with ::: and :::: - @ARGV=read_args_from_command_line(); - } - - # Semaphore defaults - # Must be done before computing number of processes and max_line_length - # because when running as a semaphore GNU Parallel does not read args - $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem' - if($Global::semaphore) { - # A semaphore does not take input from neither stdin nor file - @opt::a = ("/dev/null"); - push(@Global::unget_argv, [Arg->new("")]); - $Semaphore::timeout = $opt::semaphoretimeout || 0; - if(defined $opt::semaphorename) { - $Semaphore::name = $opt::semaphorename; - } else { - $Semaphore::name = `tty`; - chomp $Semaphore::name; - } - $Semaphore::fg = $opt::fg; - $Semaphore::wait = $opt::wait; - $Global::default_simultaneous_sshlogins = 1; - if(not defined $opt::jobs) { - $opt::jobs = 1; - } - if($Global::interactive and $opt::bg) { - ::error("Jobs running in the ". - "background cannot be interactive.\n"); - ::wait_and_exit(255); - } - } - if(defined $opt::eta) { - $opt::progress = $opt::eta; - } - if(defined $opt::bar) { - $opt::progress = $opt::bar; - } - if(defined $opt::retired) { - ::error("-g has been retired. Use --group.\n"); - ::error("-B has been retired. Use --bf.\n"); - ::error("-T has been retired. Use --tty.\n"); - ::error("-U has been retired. Use --er.\n"); - ::error("-W has been retired. Use --wd.\n"); - ::error("-Y has been retired. Use --shebang.\n"); - ::error("-H has been retired. Use --halt.\n"); - ::error("--tollef has been retired. Use -u -q --arg-sep -- and --load for -l.\n"); - ::wait_and_exit(255); - } - citation_notice(); - - parse_sshlogin(); - parse_env_var(); - - if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) { - # As we do not know the max line length on the remote machine - # long commands generated by xargs may fail - # If opt_N is set, it is probably safe - ::warning("Using -X or -m with --sshlogin may fail.\n"); - } - - if(not defined $opt::jobs) { - $opt::jobs = "100%"; - } - open_joblog(); -} - -sub env_quote { - # Input: - # $v = value to quote - # Returns: - # $v = value quoted as environment variable - my $v = $_[0]; - $v =~ s/([\\])/\\$1/g; - $v =~ s/([\[\] \#\'\&\<\>\(\)\;\{\}\t\"\$\`\*\174\!\?\~])/\\$1/g; - $v =~ s/\n/"\n"/g; - return $v; -} - -sub record_env { - # Record current %ENV-keys in ~/.parallel/ignored_vars - # Returns: N/A - my $ignore_filename = $ENV{'HOME'} . "/.parallel/ignored_vars"; - if(open(my $vars_fh, ">", $ignore_filename)) { - print $vars_fh map { $_,"\n" } keys %ENV; - } else { - ::error("Cannot write to $ignore_filename\n"); - ::wait_and_exit(255); - } -} - -sub parse_env_var { - # Parse --env and set $Global::envvar, $Global::envwarn and $Global::envvarlen - # - # Bash functions must be parsed to export them remotely - # Pre-shellshock style bash function: - # myfunc=() {... - # Post-shellshock style bash function: - # BASH_FUNC_myfunc()=() {... - # - # Uses: - # $Global::envvar = eval string that will set variables in both bash and csh - # $Global::envwarn = If functions are used: Give warning in csh - # $Global::envvarlen = length of $Global::envvar - # @opt::env - # $Global::shell - # %ENV - # Returns: N/A - $Global::envvar = ""; - $Global::envwarn = ""; - my @vars = ('parallel_bash_environment'); - for my $varstring (@opt::env) { - # Split up --env VAR1,VAR2 - push @vars, split /,/, $varstring; - } - if(grep { /^_$/ } @vars) { - # --env _ - # Include all vars that are not in a clean environment - if(open(my $vars_fh, "<", $ENV{'HOME'} . "/.parallel/ignored_vars")) { - my @ignore = <$vars_fh>; - chomp @ignore; - my %ignore; - @ignore{@ignore} = @ignore; - close $vars_fh; - push @vars, grep { not defined $ignore{$_} } keys %ENV; - @vars = grep { not /^_$/ } @vars; - } else { - ::error("Run '$Global::progname --record-env' in a clean environment first.\n"); - ::wait_and_exit(255); - } - } - # Duplicate vars as BASH functions to include post-shellshock functions. - # So --env myfunc should also look for BASH_FUNC_myfunc() - @vars = map { $_, "BASH_FUNC_$_()" } @vars; - # Keep only defined variables - @vars = grep { defined($ENV{$_}) } @vars; - # Pre-shellshock style bash function: - # myfunc=() { echo myfunc - # } - # Post-shellshock style bash function: - # BASH_FUNC_myfunc()=() { echo myfunc - # } - my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars; - my @non_functions = grep { substr($ENV{$_},0,4) ne "() {" } @vars; - if(@bash_functions) { - # Functions are not supported for all shells - if($Global::shell !~ m:/(bash|rbash|zsh|rzsh|dash|ksh):) { - ::warning("Shell functions may not be supported in $Global::shell\n"); - } - } - - # Pre-shellschock names are without () - my @bash_pre_shellshock = grep { not /\(\)/ } @bash_functions; - # Post-shellschock names are with () - my @bash_post_shellshock = grep { /\(\)/ } @bash_functions; - - my @qcsh = (map { my $a=$_; "setenv $a " . env_quote($ENV{$a}) } - grep { not /^parallel_bash_environment$/ } @non_functions); - my @qbash = (map { my $a=$_; "export $a=" . env_quote($ENV{$a}) } - @non_functions, @bash_pre_shellshock); - - push @qbash, map { my $a=$_; "eval $a\"\$$a\"" } @bash_pre_shellshock; - push @qbash, map { /BASH_FUNC_(.*)\(\)/; "$1 $ENV{$_}" } @bash_post_shellshock; - - #ssh -tt -oLogLevel=quiet lo 'eval `echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ BASH_FUNC_myfunc\ \\\(\\\)\\\ \\\{\\\ \\\ echo\\\ a\"' - #'\"\\\}\ \|\|\ myfunc\(\)\ \{\ \ echo\ a' - #'\}\ \;myfunc\ 1; - - # Check if any variables contain \n - if(my @v = map { s/BASH_FUNC_(.*)\(\)/$1/; $_ } grep { $ENV{$_}=~/\n/ } @vars) { - # \n is bad for csh and will cause it to fail. - $Global::envwarn = ::shell_quote_scalar(q{echo $SHELL | grep -E "/t?csh" > /dev/null && echo CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset }."@v".q{ && exec false;}."\n\n") . $Global::envwarn; - } - - if(not @qcsh) { push @qcsh, "true"; } - if(not @qbash) { push @qbash, "true"; } - # Create lines like: - # echo $SHELL | grep "/t\\{0,1\\}csh" >/dev/null && setenv V1 val1 && setenv V2 val2 || export V1=val1 && export V2=val2 ; echo "$V1$V2" - if(@vars) { - $Global::envvar .= - join"", - (q{echo $SHELL | grep "/t\\{0,1\\}csh" > /dev/null && } - . join(" && ", @qcsh) - . q{ || } - . join(" && ", @qbash) - .q{;}); - if($ENV{'parallel_bash_environment'}) { - $Global::envvar .= 'eval "$parallel_bash_environment";'."\n"; - } - } - $Global::envvarlen = length $Global::envvar; -} - -sub open_joblog { - # Open joblog as specified by --joblog - # Uses: - # $opt::resume - # $opt::resume_failed - # $opt::joblog - # $opt::results - # $Global::job_already_run - # %Global::fd - my $append = 0; - if(($opt::resume or $opt::resume_failed) - and - not ($opt::joblog or $opt::results)) { - ::error("--resume and --resume-failed require --joblog or --results.\n"); - ::wait_and_exit(255); - } - if($opt::joblog) { - if($opt::resume || $opt::resume_failed) { - if(open(my $joblog_fh, "<", $opt::joblog)) { - # Read the joblog - $append = <$joblog_fh>; # If there is a header: Open as append later - my $joblog_regexp; - if($opt::resume_failed) { - # Make a regexp that only matches commands with exit+signal=0 - # 4 host 1360490623.067 3.445 1023 1222 0 0 command - $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; - } else { - # Just match the job number - $joblog_regexp='^(\d+)'; - } - while(<$joblog_fh>) { - if(/$joblog_regexp/o) { - # This is 30% faster than set_job_already_run($1); - vec($Global::job_already_run,($1||0),1) = 1; - } elsif(not /\d+\s+[^\s]+\s+([0-9.]+\s+){6}/) { - ::error("Format of '$opt::joblog' is wrong: $_"); - ::wait_and_exit(255); - } - } - close $joblog_fh; - } - } - if($append) { - # Append to joblog - if(not open($Global::joblog, ">>", $opt::joblog)) { - ::error("Cannot append to --joblog $opt::joblog.\n"); - ::wait_and_exit(255); - } - } else { - if($opt::joblog eq "-") { - # Use STDOUT as joblog - $Global::joblog = $Global::fd{1}; - } elsif(not open($Global::joblog, ">", $opt::joblog)) { - # Overwrite the joblog - ::error("Cannot write to --joblog $opt::joblog.\n"); - ::wait_and_exit(255); - } - print $Global::joblog - join("\t", "Seq", "Host", "Starttime", "JobRuntime", - "Send", "Receive", "Exitval", "Signal", "Command" - ). "\n"; - } - } -} - -sub find_compression_program { - # Find a fast compression program - # Returns: - # $compress_program = compress program with options - # $decompress_program = decompress program with options - - # Search for these. Sorted by speed - my @prg = qw(lzop pigz pxz gzip plzip pbzip2 lzma xz lzip bzip2); - for my $p (@prg) { - if(which($p)) { - return ("$p -c -1","$p -dc"); - } - } - # Fall back to cat - return ("cat","cat"); -} - - -sub read_options { - # Read options from command line, profile and $PARALLEL - # Uses: - # $opt::shebang_wrap - # $opt::shebang - # @ARGV - # $opt::plain - # @opt::profile - # $ENV{'HOME'} - # $ENV{'PARALLEL'} - # Returns: - # @ARGV_no_opt = @ARGV without --options - - # This must be done first as this may exec myself - if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or - $ARGV[0] =~ /^--shebang-?wrap/ or - $ARGV[0] =~ /^--hashbang/)) { - # Program is called from #! line in script - # remove --shebang-wrap if it is set - $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//); - # remove --shebang if it is set - $opt::shebang = ($ARGV[0] =~ s/^--shebang *//); - # remove --hashbang if it is set - $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//); - if($opt::shebang) { - my $argfile = shell_quote_scalar(pop @ARGV); - # exec myself to split $ARGV[0] into separate fields - exec "$0 --skip-first-line -a $argfile @ARGV"; - } - if($opt::shebang_wrap) { - my @options; - my @parser; - if ($^O eq 'freebsd') { - # FreeBSD's #! puts different values in @ARGV than Linux' does. - my @nooptions = @ARGV; - get_options_from_array(\@nooptions); - while($#ARGV > $#nooptions) { - push @options, shift @ARGV; - } - while(@ARGV and $ARGV[0] ne ":::") { - push @parser, shift @ARGV; - } - if(@ARGV and $ARGV[0] eq ":::") { - shift @ARGV; - } - } else { - @options = shift @ARGV; - } - my $script = shell_quote_scalar(shift @ARGV); - # exec myself to split $ARGV[0] into separate fields - exec "$0 --internal-pipe-means-argfiles @options @parser $script ::: @ARGV"; - } - } - - Getopt::Long::Configure("bundling","require_order"); - my @ARGV_copy = @ARGV; - # Check if there is a --profile to set @opt::profile - get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage(); - my @ARGV_profile = (); - my @ARGV_env = (); - if(not $opt::plain) { - # Add options from .parallel/config and other profiles - my @config_profiles = ( - "/etc/parallel/config", - $ENV{'HOME'}."/.parallel/config", - $ENV{'HOME'}."/.parallelrc"); - my @profiles = @config_profiles; - if(@opt::profile) { - # --profile overrides default profiles - @profiles = (); - for my $profile (@opt::profile) { - if(-r $profile) { - push @profiles, $profile; - } else { - push @profiles, $ENV{'HOME'}."/.parallel/".$profile; - } - } - } - for my $profile (@profiles) { - if(-r $profile) { - open (my $in_fh, "<", $profile) || ::die_bug("read-profile: $profile"); - while(<$in_fh>) { - /^\s*\#/ and next; - chomp; - push @ARGV_profile, shellwords($_); - } - close $in_fh; - } else { - if(grep /^$profile$/, @config_profiles) { - # config file is not required to exist - } else { - ::error("$profile not readable.\n"); - wait_and_exit(255); - } - } - } - # Add options from shell variable $PARALLEL - if($ENV{'PARALLEL'}) { - @ARGV_env = shellwords($ENV{'PARALLEL'}); - } - } - Getopt::Long::Configure("bundling","require_order"); - get_options_from_array(\@ARGV_profile) || die_usage(); - get_options_from_array(\@ARGV_env) || die_usage(); - get_options_from_array(\@ARGV) || die_usage(); - - # Prepend non-options to @ARGV (such as commands like 'nice') - unshift @ARGV, @ARGV_profile, @ARGV_env; - return @ARGV; -} - -sub read_args_from_command_line { - # Arguments given on the command line after: - # ::: ($Global::arg_sep) - # :::: ($Global::arg_file_sep) - # Removes the arguments from @ARGV and: - # - puts filenames into -a - # - puts arguments into files and add the files to -a - # Input: - # @::ARGV = command option ::: arg arg arg :::: argfiles - # Uses: - # $Global::arg_sep - # $Global::arg_file_sep - # $opt::internal_pipe_means_argfiles - # $opt::pipe - # @opt::a - # Returns: - # @argv_no_argsep = @::ARGV without ::: and :::: and following args - my @new_argv = (); - for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) { - if($arg eq $Global::arg_sep - or - $arg eq $Global::arg_file_sep) { - my $group = $arg; # This group of arguments is args or argfiles - my @group; - while(defined ($arg = shift @ARGV)) { - if($arg eq $Global::arg_sep - or - $arg eq $Global::arg_file_sep) { - # exit while loop if finding new separator - last; - } else { - # If not hitting ::: or :::: - # Append it to the group - push @group, $arg; - } - } - - if($group eq $Global::arg_file_sep - or ($opt::internal_pipe_means_argfiles and $opt::pipe) - ) { - # Group of file names on the command line. - # Append args into -a - push @opt::a, @group; - } elsif($group eq $Global::arg_sep) { - # Group of arguments on the command line. - # Put them into a file. - # Create argfile - my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); - unlink($name); - # Put args into argfile - print $outfh map { $_,$/ } @group; - seek $outfh, 0, 0; - # Append filehandle to -a - push @opt::a, $outfh; - } else { - ::die_bug("Unknown command line group: $group"); - } - if(defined($arg)) { - # $arg is ::: or :::: - redo; - } else { - # $arg is undef -> @ARGV empty - last; - } - } - push @new_argv, $arg; - } - # Output: @ARGV = command to run with options - return @new_argv; -} - -sub cleanup { - # Returns: N/A - if(@opt::basefile) { cleanup_basefile(); } -} - -sub __QUOTING_ARGUMENTS_FOR_SHELL__ {} - -sub shell_quote { - # Input: - # @strings = strings to be quoted - # Output: - # @shell_quoted_strings = string quoted with \ as needed by the shell - my @strings = (@_); - for my $a (@strings) { - $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; - $a =~ s/[\n]/'\n'/g; # filenames with '\n' is quoted using \' - } - return wantarray ? @strings : "@strings"; -} - -sub shell_quote_empty { - # Inputs: - # @strings = strings to be quoted - # Returns: - # @quoted_strings = empty strings quoted as ''. - my @strings = shell_quote(@_); - for my $a (@strings) { - if($a eq "") { - $a = "''"; - } - } - return wantarray ? @strings : "@strings"; -} - -sub shell_quote_scalar { - # Quote the string so shell will not expand any special chars - # Inputs: - # $string = string to be quoted - # Returns: - # $shell_quoted = string quoted with \ as needed by the shell - my $a = $_[0]; - if(defined $a) { - # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; - # This is 1% faster than the above - $a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377]/\\$&/go; - $a =~ s/[\n]/'\n'/go; # filenames with '\n' is quoted using \' - } - return $a; -} - -sub shell_quote_file { - # Quote the string so shell will not expand any special chars and prepend ./ if needed - # Input: - # $filename = filename to be shell quoted - # Returns: - # $quoted_filename = filename quoted with \ as needed by the shell and ./ if needed - my $a = shell_quote_scalar(shift); - if(defined $a) { - if($a =~ m:^/: or $a =~ m:^\./:) { - # /abs/path or ./rel/path => skip - } else { - # rel/path => ./rel/path - $a = "./".$a; - } - } - return $a; -} - -sub shellwords { - # Input: - # $string = shell line - # Returns: - # @shell_words = $string split into words as shell would do - $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;"; - return Text::ParseWords::shellwords(@_); -} - - -sub __FILEHANDLES__ {} - - -sub save_stdin_stdout_stderr { - # Remember the original STDIN, STDOUT and STDERR - # and file descriptors opened by the shell (e.g. 3>/tmp/foo) - # Uses: - # %Global::fd - # $Global::original_stderr - # $Global::original_stdin - # Returns: N/A - - # Find file descriptors that are already opened (by the shell) - for my $fdno (1..61) { - # /dev/fd/62 and above are used by bash for <(cmd) - my $fh; - # 2-argument-open is used to be compatible with old perl 5.8.0 - # bug #43570: Perl 5.8.0 creates 61 files - if(open($fh,">&=$fdno")) { - $Global::fd{$fdno}=$fh; - } - } - open $Global::original_stderr, ">&", "STDERR" or - ::die_bug("Can't dup STDERR: $!"); - open $Global::original_stdin, "<&", "STDIN" or - ::die_bug("Can't dup STDIN: $!"); - $Global::is_terminal = (-t $Global::original_stderr) && !$ENV{'CIRCLECI'} && !$ENV{'TRAVIS'}; -} - -sub enough_file_handles { - # Check that we have enough filehandles available for starting - # another job - # Uses: - # $opt::ungroup - # %Global::fd - # Returns: - # 1 if ungrouped (thus not needing extra filehandles) - # 0 if too few filehandles - # 1 if enough filehandles - if(not $opt::ungroup) { - my %fh; - my $enough_filehandles = 1; - # perl uses 7 filehandles for something? - # open3 uses 2 extra filehandles temporarily - # We need a filehandle for each redirected file descriptor - # (normally just STDOUT and STDERR) - for my $i (1..(7+2+keys %Global::fd)) { - $enough_filehandles &&= open($fh{$i}, "<", "/dev/null"); - } - for (values %fh) { close $_; } - return $enough_filehandles; - } else { - # Ungrouped does not need extra file handles - return 1; - } -} - -sub open_or_exit { - # Open a file name or exit if the file cannot be opened - # Inputs: - # $file = filehandle or filename to open - # Uses: - # $Global::stdin_in_opt_a - # $Global::original_stdin - # Returns: - # $fh = file handle to read-opened file - my $file = shift; - if($file eq "-") { - $Global::stdin_in_opt_a = 1; - return ($Global::original_stdin || *STDIN); - } - if(ref $file eq "GLOB") { - # This is an open filehandle - return $file; - } - my $fh = gensym; - if(not open($fh, "<", $file)) { - ::error("Cannot open input file `$file': No such file or directory.\n"); - wait_and_exit(255); - } - return $fh; -} - -sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__ {} - -# Variable structure: -# -# $Global::running{$pid} = Pointer to Job-object -# @Global::virgin_jobs = Pointer to Job-object that have received no input -# $Global::host{$sshlogin} = Pointer to SSHLogin-object -# $Global::total_running = total number of running jobs -# $Global::total_started = total jobs started - -sub init_run_jobs { - $Global::total_running = 0; - $Global::total_started = 0; - $Global::tty_taken = 0; - $SIG{USR1} = \&list_running_jobs; - $SIG{USR2} = \&toggle_progress; - if(@opt::basefile) { setup_basefile(); } -} - -{ - my $last_time; - my %last_mtime; - -sub start_more_jobs { - # Run start_another_job() but only if: - # * not $Global::start_no_new_jobs set - # * not JobQueue is empty - # * not load on server is too high - # * not server swapping - # * not too short time since last remote login - # Uses: - # $Global::max_procs_file - # $Global::max_procs_file_last_mod - # %Global::host - # @opt::sshloginfile - # $Global::start_no_new_jobs - # $opt::filter_hosts - # $Global::JobQueue - # $opt::pipe - # $opt::load - # $opt::noswap - # $opt::delay - # $Global::newest_starttime - # Returns: - # $jobs_started = number of jobs started - my $jobs_started = 0; - my $jobs_started_this_round = 0; - if($Global::start_no_new_jobs) { - return $jobs_started; - } - if(time - ($last_time||0) > 1) { - # At most do this every second - $last_time = time; - if($Global::max_procs_file) { - # --jobs filename - my $mtime = (stat($Global::max_procs_file))[9]; - if($mtime > $Global::max_procs_file_last_mod) { - # file changed: Force re-computing max_jobs_running - $Global::max_procs_file_last_mod = $mtime; - for my $sshlogin (values %Global::host) { - $sshlogin->set_max_jobs_running(undef); - } - } - } - if(@opt::sshloginfile) { - # Is --sshloginfile changed? - for my $slf (@opt::sshloginfile) { - my $actual_file = expand_slf_shorthand($slf); - my $mtime = (stat($actual_file))[9]; - $last_mtime{$actual_file} ||= $mtime; - if($mtime - $last_mtime{$actual_file} > 1) { - ::debug("run","--sshloginfile $actual_file changed. reload\n"); - $last_mtime{$actual_file} = $mtime; - # Reload $slf - # Empty sshlogins - @Global::sshlogin = (); - for (values %Global::host) { - # Don't start new jobs on any host - # except the ones added back later - $_->set_max_jobs_running(0); - } - # This will set max_jobs_running on the SSHlogins - read_sshloginfile($actual_file); - parse_sshlogin(); - $opt::filter_hosts and filter_hosts(); - setup_basefile(); - } - } - } - } - do { - $jobs_started_this_round = 0; - # This will start 1 job on each --sshlogin (if possible) - # thus distribute the jobs on the --sshlogins round robin - - for my $sshlogin (values %Global::host) { - if($Global::JobQueue->empty() and not $opt::pipe) { - # No more jobs in the queue - last; - } - debug("run", "Running jobs before on ", $sshlogin->string(), ": ", - $sshlogin->jobs_running(), "\n"); - if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) { - if($opt::load and $sshlogin->loadavg_too_high()) { - # The load is too high or unknown - next; - } - if($opt::noswap and $sshlogin->swapping()) { - # The server is swapping - next; - } - if($sshlogin->too_fast_remote_login()) { - # It has been too short since - next; - } - if($opt::delay and $opt::delay > ::now() - $Global::newest_starttime) { - # It has been too short since last start - next; - } - debug("run", $sshlogin->string(), " has ", $sshlogin->jobs_running(), - " out of ", $sshlogin->max_jobs_running(), - " jobs running. Start another.\n"); - if(start_another_job($sshlogin) == 0) { - # No more jobs to start on this $sshlogin - debug("run","No jobs started on ", $sshlogin->string(), "\n"); - next; - } - $sshlogin->inc_jobs_running(); - $sshlogin->set_last_login_at(::now()); - $jobs_started++; - $jobs_started_this_round++; - } - debug("run","Running jobs after on ", $sshlogin->string(), ": ", - $sshlogin->jobs_running(), " of ", - $sshlogin->max_jobs_running(), "\n"); - } - } while($jobs_started_this_round); - - return $jobs_started; -} -} - -{ - my $no_more_file_handles_warned; - -sub start_another_job { - # If there are enough filehandles - # and JobQueue not empty - # and not $job is in joblog - # Then grab a job from Global::JobQueue, - # start it at sshlogin - # mark it as virgin_job - # Inputs: - # $sshlogin = the SSHLogin to start the job on - # Uses: - # $Global::JobQueue - # $opt::pipe - # $opt::results - # $opt::resume - # @Global::virgin_jobs - # Returns: - # 1 if another jobs was started - # 0 otherwise - my $sshlogin = shift; - # Do we have enough file handles to start another job? - if(enough_file_handles()) { - if($Global::JobQueue->empty() and not $opt::pipe) { - # No more commands to run - debug("start", "Not starting: JobQueue empty\n"); - return 0; - } else { - my $job; - # Skip jobs already in job log - # Skip jobs already in results - do { - $job = get_job_with_sshlogin($sshlogin); - if(not defined $job) { - # No command available for that sshlogin - debug("start", "Not starting: no jobs available for ", - $sshlogin->string(), "\n"); - return 0; - } - } while ($job->is_already_in_joblog() - or - ($opt::results and $opt::resume and $job->is_already_in_results())); - debug("start", "Command to run on '", $job->sshlogin()->string(), "': '", - $job->replaced(),"'\n"); - if($job->start()) { - if($opt::pipe) { - push(@Global::virgin_jobs,$job); - } - debug("start", "Started as seq ", $job->seq(), - " pid:", $job->pid(), "\n"); - return 1; - } else { - # Not enough processes to run the job. - # Put it back on the queue. - $Global::JobQueue->unget($job); - # Count down the number of jobs to run for this SSHLogin. - my $max = $sshlogin->max_jobs_running(); - if($max > 1) { $max--; } else { - ::error("No more processes: cannot run a single job. Something is wrong.\n"); - ::wait_and_exit(255); - } - $sshlogin->set_max_jobs_running($max); - # Sleep up to 300 ms to give other processes time to die - ::usleep(rand()*300); - ::warning("No more processes: ", - "Decreasing number of running jobs to $max. ", - "Raising ulimit -u or /etc/security/limits.conf may help.\n"); - return 0; - } - } - } else { - # No more file handles - $no_more_file_handles_warned++ or - ::warning("No more file handles. ", - "Raising ulimit -n or /etc/security/limits.conf may help.\n"); - return 0; - } -} -} - -$opt::min_progress_interval = 0; - -sub init_progress { - # Uses: - # $opt::bar - # Returns: - # list of computers for progress output - $|=1; - if (not $Global::is_terminal) { - $opt::min_progress_interval = 30; - } - if($opt::bar) { - return("",""); - } - my %progress = progress(); - return ("\nComputers / CPU cores / Max jobs to run\n", - $progress{'workerlist'}); -} - -sub drain_job_queue { - # Uses: - # $opt::progress - # $Global::original_stderr - # $Global::total_running - # $Global::max_jobs_running - # %Global::running - # $Global::JobQueue - # %Global::host - # $Global::start_no_new_jobs - # Returns: N/A - if($opt::progress) { - print $Global::original_stderr init_progress(); - } - my $last_header=""; - my $sleep = 0.2; - my $last_left = 1000000000; - my $last_progress_time = 0; - my $ps_reported = 0; - do { - while($Global::total_running > 0) { - debug($Global::total_running, "==", scalar - keys %Global::running," slots: ", $Global::max_jobs_running); - if($opt::pipe) { - # When using --pipe sometimes file handles are not closed properly - for my $job (values %Global::running) { - close $job->fh(0,"w"); - } - } - # When not connected to terminal, assume CI (e.g. CircleCI). In - # that case we want occasional progress output to prevent abort - # due to timeout with no output, but we also need to stop sending - # progress output if there has been no actual progress, so that - # the job can time out appropriately (CirecleCI: 10m) in case of - # a hung test. But without special output, it is extremely - # annoying to diagnose which test is hung, so we add that using - # `ps` below. - if($opt::progress and - ($Global::is_terminal or (time() - $last_progress_time) >= 30)) { - my %progress = progress(); - if($last_header ne $progress{'header'}) { - print $Global::original_stderr "\n", $progress{'header'}, "\n"; - $last_header = $progress{'header'}; - } - if ($Global::is_terminal) { - print $Global::original_stderr "\r",$progress{'status'}; - } - if ($last_left > $Global::left) { - if (not $Global::is_terminal) { - print $Global::original_stderr $progress{'status'},"\n"; - } - $last_progress_time = time(); - $ps_reported = 0; - } elsif (not $ps_reported and (time() - $last_progress_time) >= 60) { - # No progress in at least 60 seconds: run ps - print $Global::original_stderr "\n"; - my $script_dir = ::dirname($0); - system("$script_dir/ps_with_stack || ps -wwf"); - $ps_reported = 1; - } - $last_left = $Global::left; - flush $Global::original_stderr; - } - if($Global::total_running < $Global::max_jobs_running - and not $Global::JobQueue->empty()) { - # These jobs may not be started because of loadavg - # or too little time between each ssh login. - if(start_more_jobs() > 0) { - # Exponential back-on if jobs were started - $sleep = $sleep/2+0.001; - } - } - # Sometimes SIGCHLD is not registered, so force reaper - $sleep = ::reap_usleep($sleep); - } - if(not $Global::JobQueue->empty()) { - # These jobs may not be started: - # * because there the --filter-hosts has removed all - if(not %Global::host) { - ::error("There are no hosts left to run on.\n"); - ::wait_and_exit(255); - } - # * because of loadavg - # * because of too little time between each ssh login. - start_more_jobs(); - $sleep = ::reap_usleep($sleep); - if($Global::max_jobs_running == 0) { - ::warning("There are no job slots available. Increase --jobs.\n"); - } - } - } while ($Global::total_running > 0 - or - not $Global::start_no_new_jobs and not $Global::JobQueue->empty()); - if($opt::progress) { - my %progress = progress(); - print $Global::original_stderr $opt::progress_sep, $progress{'status'}, "\n"; - flush $Global::original_stderr; - } -} - -sub toggle_progress { - # Turn on/off progress view - # Uses: - # $opt::progress - # $Global::original_stderr - # Returns: N/A - $opt::progress = not $opt::progress; - if($opt::progress) { - print $Global::original_stderr init_progress(); - } -} - -sub progress { - # Uses: - # $opt::bar - # $opt::eta - # %Global::host - # $Global::total_started - # Returns: - # $workerlist = list of workers - # $header = that will fit on the screen - # $status = message that will fit on the screen - if($opt::bar) { - return ("workerlist" => "", "header" => "", "status" => bar()); - } - my $eta = ""; - my ($status,$header)=("",""); - if($opt::eta) { - my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = - compute_eta(); - $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", - $this_eta, $left, $avgtime); - $Global::left = $left; - } - my $termcols = terminal_columns(); - my @workers = sort keys %Global::host; - my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers; - my $workerno = 1; - my %workerno = map { ($_=>$workerno++) } @workers; - my $workerlist = ""; - for my $w (@workers) { - $workerlist .= - $workerno{$w}.":".$sshlogin{$w} ." / ". - ($Global::host{$w}->ncpus() || "-")." / ". - $Global::host{$w}->max_jobs_running()."\n"; - } - $status = "x"x($termcols+1); - if(length $status > $termcols) { - # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs - $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete"; - $status = $eta . - join(" ",map - { - if($Global::total_started) { - my $completed = ($Global::host{$_}->jobs_completed()||0); - my $running = $Global::host{$_}->jobs_running(); - my $time = $completed ? (time-$^T)/($completed) : "0"; - sprintf("%s:%d/%d/%d%%/%.1fs ", - $sshlogin{$_}, $running, $completed, - ($running+$completed)*100 - / $Global::total_started, $time); - } - } @workers); - } - if(length $status > $termcols) { - # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs - $header = "Computer:jobs running/jobs completed/%of started jobs"; - $status = $eta . - join(" ",map - { - my $completed = ($Global::host{$_}->jobs_completed()||0); - my $running = $Global::host{$_}->jobs_running(); - my $time = $completed ? (time-$^T)/($completed) : "0"; - sprintf("%s:%d/%d/%d%%/%.1fs ", - $workerno{$_}, $running, $completed, - ($running+$completed)*100 - / $Global::total_started, $time); - } @workers); - } - if(length $status > $termcols) { - # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX% - $header = "Computer:jobs running/jobs completed/%of started jobs"; - $status = $eta . - join(" ",map - { sprintf("%s:%d/%d/%d%%", - $sshlogin{$_}, - $Global::host{$_}->jobs_running(), - ($Global::host{$_}->jobs_completed()||0), - ($Global::host{$_}->jobs_running()+ - ($Global::host{$_}->jobs_completed()||0))*100 - / $Global::total_started) } - @workers); - } - if(length $status > $termcols) { - # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX% - $header = "Computer:jobs running/jobs completed/%of started jobs"; - $status = $eta . - join(" ",map - { sprintf("%s:%d/%d/%d%%", - $workerno{$_}, - $Global::host{$_}->jobs_running(), - ($Global::host{$_}->jobs_completed()||0), - ($Global::host{$_}->jobs_running()+ - ($Global::host{$_}->jobs_completed()||0))*100 - / $Global::total_started) } - @workers); - } - if(length $status > $termcols) { - # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX - $header = "Computer:jobs running/jobs completed"; - $status = $eta . - join(" ",map - { sprintf("%s:%d/%d", - $sshlogin{$_}, $Global::host{$_}->jobs_running(), - ($Global::host{$_}->jobs_completed()||0)) } - @workers); - } - if(length $status > $termcols) { - # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX - $header = "Computer:jobs running/jobs completed"; - $status = $eta . - join(" ",map - { sprintf("%s:%d/%d", - $sshlogin{$_}, $Global::host{$_}->jobs_running(), - ($Global::host{$_}->jobs_completed()||0)) } - @workers); - } - if(length $status > $termcols) { - # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX - $header = "Computer:jobs running/jobs completed"; - $status = $eta . - join(" ",map - { sprintf("%s:%d/%d", - $workerno{$_}, $Global::host{$_}->jobs_running(), - ($Global::host{$_}->jobs_completed()||0)) } - @workers); - } - if(length $status > $termcols) { - # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX - $header = "Computer:jobs completed"; - $status = $eta . - join(" ",map - { sprintf("%s:%d", - $sshlogin{$_}, - ($Global::host{$_}->jobs_completed()||0)) } - @workers); - } - if(length $status > $termcols) { - # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX - $header = "Computer:jobs completed"; - $status = $eta . - join(" ",map - { sprintf("%s:%d", - $workerno{$_}, - ($Global::host{$_}->jobs_completed()||0)) } - @workers); - } - return ("workerlist" => $workerlist, "header" => $header, "status" => $status); -} - -{ - my ($total, $first_completed, $smoothed_avg_time); - - sub compute_eta { - # Calculate important numbers for ETA - # Returns: - # $total = number of jobs in total - # $completed = number of jobs completed - # $left = number of jobs left - # $pctcomplete = percent of jobs completed - # $avgtime = averaged time - # $eta = smoothed eta - $total ||= $Global::JobQueue->total_jobs(); - my $completed = 0; - for(values %Global::host) { $completed += $_->jobs_completed() } - my $left = $total - $completed; - if(not $completed) { - return($total, $completed, $left, 0, 0, 0); - } - my $pctcomplete = $completed / $total; - $first_completed ||= time; - my $timepassed = (time - $first_completed); - my $avgtime = $timepassed / $completed; - $smoothed_avg_time ||= $avgtime; - # Smooth the eta so it does not jump wildly - $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time + - $pctcomplete * $avgtime; - my $eta = int($left * $smoothed_avg_time); - return($total, $completed, $left, $pctcomplete, $avgtime, $eta); - } -} - -{ - my ($rev,$reset); - - sub bar { - # Return: - # $status = bar with eta, completed jobs, arg and pct - $rev ||= "\033[7m"; - $reset ||= "\033[0m"; - my($total, $completed, $left, $pctcomplete, $avgtime, $eta) = - compute_eta(); - my $arg = $Global::newest_job ? - $Global::newest_job->{'commandline'}->replace_placeholders(["\257<\257>"],0,0) : ""; - # These chars mess up display in the terminal - $arg =~ tr/[\011-\016\033\302-\365]//d; - my $bar_text = - sprintf("%d%% %d:%d=%ds %s", - $pctcomplete*100, $completed, $left, $eta, $arg); - my $terminal_width = terminal_columns(); - my $s = sprintf("%-${terminal_width}s", - substr($bar_text." "x$terminal_width, - 0,$terminal_width)); - my $width = int($terminal_width * $pctcomplete); - substr($s,$width,0) = $reset; - my $zenity = sprintf("%-${terminal_width}s", - substr("# $eta sec $arg", - 0,$terminal_width)); - $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header - "\r" . $rev . $s . $reset; - return $s; - } -} - -{ - my ($columns,$last_column_time); - - sub terminal_columns { - # Get the number of columns of the display - # Returns: - # number of columns of the screen - if(not $columns or $last_column_time < time) { - $last_column_time = time; - $columns = $ENV{'COLUMNS'}; - if(not $columns) { - my $resize = qx{ resize 2>/dev/null }; - $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; }; - } - $columns ||= 80; - } - return $columns; - } -} - -sub get_job_with_sshlogin { - # Returns: - # next job object for $sshlogin if any available - my $sshlogin = shift; - my $job = undef; - - if ($opt::hostgroups) { - my @other_hostgroup_jobs = (); - - while($job = $Global::JobQueue->get()) { - if($sshlogin->in_hostgroups($job->hostgroups())) { - # Found a job for this hostgroup - last; - } else { - # This job was not in the hostgroups of $sshlogin - push @other_hostgroup_jobs, $job; - } - } - $Global::JobQueue->unget(@other_hostgroup_jobs); - if(not defined $job) { - # No more jobs - return undef; - } - } else { - $job = $Global::JobQueue->get(); - if(not defined $job) { - # No more jobs - ::debug("start", "No more jobs: JobQueue empty\n"); - return undef; - } - } - - my $clean_command = $job->replaced(); - if($clean_command =~ /^\s*$/) { - # Do not run empty lines - if(not $Global::JobQueue->empty()) { - return get_job_with_sshlogin($sshlogin); - } else { - return undef; - } - } - $job->set_sshlogin($sshlogin); - if($opt::retries and $clean_command and - $job->failed_here()) { - # This command with these args failed for this sshlogin - my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed(); - # Only look at the Global::host that have > 0 jobslots - if($no_of_failed_sshlogins == grep { $_->max_jobs_running() > 0 } values %Global::host - and $job->failed_here() == $min_failures) { - # It failed the same or more times on another host: - # run it on this host - } else { - # If it failed fewer times on another host: - # Find another job to run - my $nextjob; - if(not $Global::JobQueue->empty()) { - # This can potentially recurse for all args - no warnings 'recursion'; - $nextjob = get_job_with_sshlogin($sshlogin); - } - # Push the command back on the queue - $Global::JobQueue->unget($job); - return $nextjob; - } - } - return $job; -} - -sub __REMOTE_SSH__ {} - -sub read_sshloginfiles { - # Returns: N/A - for my $s (@_) { - read_sshloginfile(expand_slf_shorthand($s)); - } -} - -sub expand_slf_shorthand { - my $file = shift; - if($file eq "-") { - # skip: It is stdin - } elsif($file eq "..") { - $file = $ENV{'HOME'}."/.parallel/sshloginfile"; - } elsif($file eq ".") { - $file = "/etc/parallel/sshloginfile"; - } elsif(not -r $file) { - if(not -r $ENV{'HOME'}."/.parallel/".$file) { - # Try prepending ~/.parallel - ::error("Cannot open $file.\n"); - ::wait_and_exit(255); - } else { - $file = $ENV{'HOME'}."/.parallel/".$file; - } - } - return $file; -} - -sub read_sshloginfile { - # Returns: N/A - my $file = shift; - my $close = 1; - my $in_fh; - ::debug("init","--slf ",$file); - if($file eq "-") { - $in_fh = *STDIN; - $close = 0; - } else { - if(not open($in_fh, "<", $file)) { - # Try the filename - ::error("Cannot open $file.\n"); - ::wait_and_exit(255); - } - } - while(<$in_fh>) { - chomp; - /^\s*#/ and next; - /^\s*$/ and next; - push @Global::sshlogin, $_; - } - if($close) { - close $in_fh; - } -} - -sub parse_sshlogin { - # Returns: N/A - my @login; - if(not @Global::sshlogin) { @Global::sshlogin = (":"); } - for my $sshlogin (@Global::sshlogin) { - # Split up -S sshlogin,sshlogin - for my $s (split /,/, $sshlogin) { - if ($s eq ".." or $s eq "-") { - # This may add to @Global::sshlogin - possibly bug - read_sshloginfile(expand_slf_shorthand($s)); - } else { - push (@login, $s); - } - } - } - $Global::minimal_command_line_length = 8_000_000; - my @allowed_hostgroups; - for my $ncpu_sshlogin_string (::uniq(@login)) { - my $sshlogin = SSHLogin->new($ncpu_sshlogin_string); - my $sshlogin_string = $sshlogin->string(); - if($sshlogin_string eq "") { - # This is an ssh group: -S @webservers - push @allowed_hostgroups, $sshlogin->hostgroups(); - next; - } - if($Global::host{$sshlogin_string}) { - # This sshlogin has already been added: - # It is probably a host that has come back - # Set the max_jobs_running back to the original - debug("run","Already seen $sshlogin_string\n"); - if($sshlogin->{'ncpus'}) { - # If ncpus set by '#/' of the sshlogin, overwrite it: - $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus()); - } - $Global::host{$sshlogin_string}->set_max_jobs_running(undef); - next; - } - if($sshlogin_string eq ":") { - $sshlogin->set_maxlength(Limits::Command::max_length()); - } else { - # If all chars needs to be quoted, every other character will be \ - $sshlogin->set_maxlength(int(Limits::Command::max_length()/2)); - } - $Global::minimal_command_line_length = - ::min($Global::minimal_command_line_length, $sshlogin->maxlength()); - $Global::host{$sshlogin_string} = $sshlogin; - } - if(@allowed_hostgroups) { - # Remove hosts that are not in these groups - while (my ($string, $sshlogin) = each %Global::host) { - if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) { - delete $Global::host{$string}; - } - } - } - - # debug("start", "sshlogin: ", my_dump(%Global::host),"\n"); - if($opt::transfer or @opt::return or $opt::cleanup or @opt::basefile) { - if(not remote_hosts()) { - # There are no remote hosts - if(@opt::trc) { - ::warning("--trc ignored as there are no remote --sshlogin.\n"); - } elsif (defined $opt::transfer) { - ::warning("--transfer ignored as there are no remote --sshlogin.\n"); - } elsif (@opt::return) { - ::warning("--return ignored as there are no remote --sshlogin.\n"); - } elsif (defined $opt::cleanup) { - ::warning("--cleanup ignored as there are no remote --sshlogin.\n"); - } elsif (@opt::basefile) { - ::warning("--basefile ignored as there are no remote --sshlogin.\n"); - } - } - } -} - -sub remote_hosts { - # Return sshlogins that are not ':' - # Returns: - # list of sshlogins with ':' removed - return grep !/^:$/, keys %Global::host; -} - -sub setup_basefile { - # Transfer basefiles to each $sshlogin - # This needs to be done before first jobs on $sshlogin is run - # Returns: N/A - my $cmd = ""; - my $rsync_destdir; - my $workdir; - for my $sshlogin (values %Global::host) { - if($sshlogin->string() eq ":") { next } - for my $file (@opt::basefile) { - if($file !~ m:^/: and $opt::workdir eq "...") { - ::error("Work dir '...' will not work with relative basefiles\n"); - ::wait_and_exit(255); - } - $workdir ||= Job->new("")->workdir(); - $cmd .= $sshlogin->rsync_transfer_cmd($file,$workdir) . "&"; - } - } - $cmd .= "wait;"; - debug("init", "basesetup: $cmd\n"); - print `$cmd`; -} - -sub cleanup_basefile { - # Remove the basefiles transferred - # Returns: N/A - my $cmd=""; - my $workdir = Job->new("")->workdir(); - for my $sshlogin (values %Global::host) { - if($sshlogin->string() eq ":") { next } - for my $file (@opt::basefile) { - $cmd .= $sshlogin->cleanup_cmd($file,$workdir)."&"; - } - } - $cmd .= "wait;"; - debug("init", "basecleanup: $cmd\n"); - print `$cmd`; -} - -sub filter_hosts { - my(@cores, @cpus, @maxline, @echo); - my $envvar = ::shell_quote_scalar($Global::envvar); - while (my ($host, $sshlogin) = each %Global::host) { - if($host eq ":") { next } - # The 'true' is used to get the $host out later - my $sshcmd = "true $host;" . $sshlogin->sshcommand()." ".$sshlogin->serverlogin(); - push(@cores, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cores\n\0"); - push(@cpus, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cpus\n\0"); - push(@maxline, $host."\t".$sshcmd." ".$envvar." parallel --max-line-length-allowed\n\0"); - # 'echo' is used to get the best possible value for an ssh login time - push(@echo, $host."\t".$sshcmd." echo\n\0"); - } - my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".ssh"); - print $fh @cores, @cpus, @maxline, @echo; - close $fh; - # --timeout 5: Setting up an SSH connection and running a simple - # command should never take > 5 sec. - # --delay 0.1: If multiple sshlogins use the same proxy the delay - # will make it less likely to overload the ssh daemon. - # --retries 3: If the ssh daemon it overloaded, try 3 times - # -s 16000: Half of the max line on UnixWare - my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 16000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null"; - ::debug("init", $cmd, "\n"); - open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd"); - my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts); - my $prepend = ""; - while(<$host_fh>) { - if(/\'$/) { - # if last char = ' then append next line - # This may be due to quoting of $Global::envvar - $prepend .= $_; - next; - } - $_ = $prepend . $_; - $prepend = ""; - chomp; - my @col = split /\t/, $_; - if(defined $col[6]) { - # This is a line from --joblog - # seq host time spent sent received exit signal command - # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores - if($col[0] eq "Seq" and $col[1] eq "Host" and - $col[2] eq "Starttime") { - # Header => skip - next; - } - # Get server from: eval true server\; - $col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]"); - my $host = $1; - $host =~ tr/\\//d; - $Global::host{$host} or next; - if($col[6] eq "255" or $col[7] eq "15") { - # exit == 255 or signal == 15: ssh failed - # Remove sshlogin - ::debug("init", "--filtered $host\n"); - push(@down_hosts, $host); - @down_hosts = uniq(@down_hosts); - } elsif($col[6] eq "127") { - # signal == 127: parallel not installed remote - # Set ncpus and ncores = 1 - ::warning("Could not figure out ", - "number of cpus on $host. Using 1.\n"); - $ncores{$host} = 1; - $ncpus{$host} = 1; - $maxlen{$host} = Limits::Command::max_length(); - } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) { - # Remember how log it took to log in - # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo - $time_to_login{$host} = ::min($time_to_login{$host},$col[3]); - } else { - ::die_bug("host check unmatched long jobline: $_"); - } - } elsif($Global::host{$col[0]}) { - # This output from --number-of-cores, --number-of-cpus, - # --max-line-length-allowed - # ncores: server 8 - # ncpus: server 2 - # maxlen: server 131071 - if(not $ncores{$col[0]}) { - $ncores{$col[0]} = $col[1]; - } elsif(not $ncpus{$col[0]}) { - $ncpus{$col[0]} = $col[1]; - } elsif(not $maxlen{$col[0]}) { - $maxlen{$col[0]} = $col[1]; - } elsif(not $echo{$col[0]}) { - $echo{$col[0]} = $col[1]; - } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) { - # Skip these: - # perl: warning: Setting locale failed. - # perl: warning: Please check that your locale settings: - # LANGUAGE = (unset), - # LC_ALL = (unset), - # LANG = "en_US.UTF-8" - # are supported and installed on your system. - # perl: warning: Falling back to the standard locale ("C"). - } else { - ::die_bug("host check too many col0: $_"); - } - } else { - ::die_bug("host check unmatched short jobline ($col[0]): $_"); - } - } - close $host_fh; - $Global::debug or unlink $tmpfile; - delete @Global::host{@down_hosts}; - @down_hosts and ::warning("Removed @down_hosts\n"); - $Global::minimal_command_line_length = 8_000_000; - while (my ($sshlogin, $obj) = each %Global::host) { - if($sshlogin eq ":") { next } - $ncpus{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin()); - $ncores{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin()); - $time_to_login{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin()); - $maxlen{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin()); - if($opt::use_cpus_instead_of_cores) { - $obj->set_ncpus($ncpus{$sshlogin}); - } else { - $obj->set_ncpus($ncores{$sshlogin}); - } - $obj->set_time_to_login($time_to_login{$sshlogin}); - $obj->set_maxlength($maxlen{$sshlogin}); - $Global::minimal_command_line_length = - ::min($Global::minimal_command_line_length, - int($maxlen{$sshlogin}/2)); - ::debug("init", "Timing from -S:$sshlogin ncpus:",$ncpus{$sshlogin}, - " ncores:", $ncores{$sshlogin}, - " time_to_login:", $time_to_login{$sshlogin}, - " maxlen:", $maxlen{$sshlogin}, - " min_max_len:", $Global::minimal_command_line_length,"\n"); - } -} - -sub onall { - sub tmp_joblog { - my $joblog = shift; - if(not defined $joblog) { - return undef; - } - my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log"); - close $fh; - return $tmpfile; - } - my @command = @_; - if($Global::quoting) { - @command = shell_quote_empty(@command); - } - - # Copy all @fhlist into tempfiles - my @argfiles = (); - for my $fh (@fhlist) { - my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => 1); - print $outfh (<$fh>); - close $outfh; - push @argfiles, $name; - } - if(@opt::basefile) { setup_basefile(); } - # for each sshlogin do: - # parallel -S $sshlogin $command :::: @argfiles - # - # Pass some of the options to the sub-parallels, not all of them as - # -P should only go to the first, and -S should not be copied at all. - my $options = - join(" ", - ((defined $opt::jobs) ? "-P $opt::jobs" : ""), - ((defined $opt::linebuffer) ? "--linebuffer" : ""), - ((defined $opt::ungroup) ? "-u" : ""), - ((defined $opt::group) ? "-g" : ""), - ((defined $opt::keeporder) ? "--keeporder" : ""), - ((defined $opt::D) ? "-D $opt::D" : ""), - ((defined $opt::plain) ? "--plain" : ""), - ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), - ); - my $suboptions = - join(" ", - ((defined $opt::ungroup) ? "-u" : ""), - ((defined $opt::linebuffer) ? "--linebuffer" : ""), - ((defined $opt::group) ? "-g" : ""), - ((defined $opt::files) ? "--files" : ""), - ((defined $opt::keeporder) ? "--keeporder" : ""), - ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""), - ((@opt::v) ? "-vv" : ""), - ((defined $opt::D) ? "-D $opt::D" : ""), - ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), - ((defined $opt::plain) ? "--plain" : ""), - ((defined $opt::retries) ? "--retries ".$opt::retries : ""), - ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), - ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""), - ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""), - (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""), - ); - ::debug("init", "| $0 $options\n"); - open(my $parallel_fh, "|-", "$0 --no-notice -j0 $options") || - ::die_bug("This does not run GNU Parallel: $0 $options"); - my @joblogs; - for my $host (sort keys %Global::host) { - my $sshlogin = $Global::host{$host}; - my $joblog = tmp_joblog($opt::joblog); - if($joblog) { - push @joblogs, $joblog; - $joblog = "--joblog $joblog"; - } - my $quad = $opt::arg_file_sep || "::::"; - ::debug("init", "$0 $suboptions -j1 $joblog ", - ((defined $opt::tag) ? - "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), - " -S ", shell_quote_scalar($sshlogin->string())," ", - join(" ",shell_quote(@command))," $quad @argfiles\n"); - print $parallel_fh "$0 $suboptions -j1 $joblog ", - ((defined $opt::tag) ? - "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), - " -S ", shell_quote_scalar($sshlogin->string())," ", - join(" ",shell_quote(@command))," $quad @argfiles\n"; - } - close $parallel_fh; - $Global::exitstatus = $? >> 8; - debug("init", "--onall exitvalue ", $?); - if(@opt::basefile) { cleanup_basefile(); } - $Global::debug or unlink(@argfiles); - my %seen; - for my $joblog (@joblogs) { - # Append to $joblog - open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog"); - # Skip first line (header); - <$fh>; - print $Global::joblog (<$fh>); - close $fh; - unlink($joblog); - } -} - -sub __SIGNAL_HANDLING__ {} - -sub save_original_signal_handler { - # Remember the original signal handler - # Returns: N/A - $SIG{TERM} ||= sub { exit 0; }; # $SIG{TERM} is not set on Mac OS X - $SIG{INT} = sub { if($opt::tmux) { qx { tmux kill-session -t p$$ }; } - unlink keys %Global::unlink; exit -1 }; - $SIG{TERM} = sub { if($opt::tmux) { qx { tmux kill-session -t p$$ }; } - unlink keys %Global::unlink; exit -1 }; - %Global::original_sig = %SIG; - $SIG{TERM} = sub {}; # Dummy until jobs really start -} - -sub list_running_jobs { - # Returns: N/A - for my $v (values %Global::running) { - print $Global::original_stderr "$Global::progname: ",$v->replaced(),"\n"; - } -} - -sub start_no_new_jobs { - # Returns: N/A - $SIG{TERM} = $Global::original_sig{TERM}; - print $Global::original_stderr - ("$Global::progname: SIGTERM received. No new jobs will be started.\n", - "$Global::progname: Waiting for these ", scalar(keys %Global::running), - " jobs to finish. Send SIGTERM again to stop now.\n"); - list_running_jobs(); - $Global::start_no_new_jobs ||= 1; -} - -sub reaper { - # A job finished. - # Print the output. - # Start another job - # Returns: N/A - my $stiff; - my $children_reaped = 0; - debug("run", "Reaper "); - while (($stiff = waitpid(-1, &WNOHANG)) > 0) { - $children_reaped++; - if($Global::sshmaster{$stiff}) { - # This is one of the ssh -M: ignore - next; - } - my $job = $Global::running{$stiff}; - # '-a <(seq 10)' will give us a pid not in %Global::running - $job or next; - $job->set_exitstatus($? >> 8); - $job->set_exitsignal($? & 127); - debug("run", "died (", $job->exitstatus(), "): ", $job->seq()); - $job->set_endtime(::now()); - if($stiff == $Global::tty_taken) { - # The process that died had the tty => release it - $Global::tty_taken = 0; - } - - if(not $job->should_be_retried()) { - # The job is done - # Free the jobslot - push @Global::slots, $job->slot(); - if($opt::timeout) { - # Update average runtime for timeout - $Global::timeoutq->update_delta_time($job->runtime()); - } - # Force printing now if the job failed and we are going to exit - my $print_now = ($opt::halt_on_error and $opt::halt_on_error == 2 - and $job->exitstatus()); - if($opt::keeporder and not $print_now) { - print_earlier_jobs($job); - } else { - $job->print(); - } - if($job->exitstatus()) { - process_failed_job($job); - } - - } - my $sshlogin = $job->sshlogin(); - $sshlogin->dec_jobs_running(); - $sshlogin->inc_jobs_completed(); - $Global::total_running--; - delete $Global::running{$stiff}; - start_more_jobs(); - } - debug("run", "done "); - return $children_reaped; -} - -sub process_failed_job { - # The jobs had a exit status <> 0, so error - # Returns: N/A - my $job = shift; - $Global::exitstatus++; - $Global::total_failed++; - if($opt::halt_on_error) { - if($opt::halt_on_error == 1 - or - ($opt::halt_on_error < 1 and $Global::total_failed > 3 - and - $Global::total_failed / $Global::total_started > $opt::halt_on_error)) { - # If halt on error == 1 or --halt 10% - # we should gracefully exit - print $Global::original_stderr - ("$Global::progname: Starting no more jobs. ", - "Waiting for ", scalar(keys %Global::running), - " jobs to finish. This job failed:\n", - $job->replaced(),"\n"); - $Global::start_no_new_jobs ||= 1; - $Global::halt_on_error_exitstatus = $job->exitstatus(); - } elsif($opt::halt_on_error == 2) { - # If halt on error == 2 we should exit immediately - print $Global::original_stderr - ("$Global::progname: This job failed:\n", - $job->replaced(),"\n"); - exit ($job->exitstatus()); - } - } -} - -{ - my (%print_later,$job_end_sequence); - - sub print_earlier_jobs { - # Print jobs completed earlier - # Returns: N/A - my $job = shift; - $print_later{$job->seq()} = $job; - $job_end_sequence ||= 1; - debug("run", "Looking for: $job_end_sequence ", - "Current: ", $job->seq(), "\n"); - for(my $j = $print_later{$job_end_sequence}; - $j or vec($Global::job_already_run,$job_end_sequence,1); - $job_end_sequence++, - $j = $print_later{$job_end_sequence}) { - debug("run", "Found job end $job_end_sequence"); - if($j) { - $j->print(); - delete $print_later{$job_end_sequence}; - } - } - } -} - -sub __USAGE__ {} - -sub wait_and_exit { - # If we do not wait, we sometimes get segfault - # Returns: N/A - my $error = shift; - if($error) { - # Kill all without printing - for my $job (values %Global::running) { - $job->kill("TERM"); - $job->kill("TERM"); - } - } - for (keys %Global::unkilled_children) { - kill 9, $_; - waitpid($_,0); - delete $Global::unkilled_children{$_}; - } - wait(); - exit($error); -} - -sub die_usage { - # Returns: N/A - usage(); - wait_and_exit(255); -} - -sub usage { - # Returns: N/A - print join - ("\n", - "Usage:", - "", - "$Global::progname [options] [command [arguments]] < list_of_arguments", - "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...", - "cat ... | $Global::progname --pipe [options] [command [arguments]]", - "", - "-j n Run n jobs in parallel", - "-k Keep same order", - "-X Multiple arguments with context replace", - "--colsep regexp Split input on regexp for positional replacements", - "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings", - "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings", - "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =", - " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}", - "", - "-S sshlogin Example: foo\@server.example.com", - "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins", - "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup", - "--onall Run the given command with argument on all sshlogins", - "--nonall Run the given command with no arguments on all sshlogins", - "", - "--pipe Split stdin (standard input) to multiple jobs.", - "--recend str Record end separator for --pipe.", - "--recstart str Record start separator for --pipe.", - "", - "See 'man $Global::progname' for details", - "", - "When using programs that use GNU Parallel to process data for publication please cite:", - "", - "O. Tange (2011): GNU Parallel - The Command-Line Power Tool,", - ";login: The USENIX Magazine, February 2011:42-47.", - "", - "Or you can get GNU Parallel without this requirement by paying 10000 EUR.", - ""); -} - - -sub citation_notice { - # if --no-notice or --plain: do nothing - # if stderr redirected: do nothing - # if ~/.parallel/will-cite: do nothing - # else: print citation notice to stderr - if($opt::no_notice - or - $opt::plain - or - not -t $Global::original_stderr - or - -e $ENV{'HOME'}."/.parallel/will-cite") { - # skip - } else { - print $Global::original_stderr - ("When using programs that use GNU Parallel to process data for publication please cite:\n", - "\n", - " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,\n", - " ;login: The USENIX Magazine, February 2011:42-47.\n", - "\n", - "This helps funding further development; and it won't cost you a cent.\n", - "Or you can get GNU Parallel without this requirement by paying 10000 EUR.\n", - "\n", - "To silence this citation notice run 'parallel --bibtex' once or use '--no-notice'.\n\n", - ); - flush $Global::original_stderr; - } -} - - -sub warning { - my @w = @_; - my $fh = $Global::original_stderr || *STDERR; - my $prog = $Global::progname || "parallel"; - print $fh $prog, ": Warning: ", @w; -} - - -sub error { - my @w = @_; - my $fh = $Global::original_stderr || *STDERR; - my $prog = $Global::progname || "parallel"; - print $fh $prog, ": Error: ", @w; -} - - -sub die_bug { - my $bugid = shift; - print STDERR - ("$Global::progname: This should not happen. You have found a bug.\n", - "Please contact and include:\n", - "* The version number: $Global::version\n", - "* The bugid: $bugid\n", - "* The command line being run\n", - "* The files being read (put the files on a webserver if they are big)\n", - "\n", - "If you get the error on smaller/fewer files, please include those instead.\n"); - ::wait_and_exit(255); -} - -sub version { - # Returns: N/A - if($opt::tollef and not $opt::gnu) { - print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n"; - } - print join("\n", - "GNU $Global::progname $Global::version", - "Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014 Ole Tange and Free Software Foundation, Inc.", - "License GPLv3+: GNU GPL version 3 or later ", - "This is free software: you are free to change and redistribute it.", - "GNU $Global::progname comes with no warranty.", - "", - "Web site: http://www.gnu.org/software/${Global::progname}\n", - "When using programs that use GNU Parallel to process data for publication please cite:\n", - "O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ", - ";login: The USENIX Magazine, February 2011:42-47.\n", - "Or you can get GNU Parallel without this requirement by paying 10000 EUR.\n", - ); -} - -sub bibtex { - # Returns: N/A - if($opt::tollef and not $opt::gnu) { - print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n"; - } - print join("\n", - "When using programs that use GNU Parallel to process data for publication please cite:", - "", - "\@article{Tange2011a,", - " title = {GNU Parallel - The Command-Line Power Tool},", - " author = {O. Tange},", - " address = {Frederiksberg, Denmark},", - " journal = {;login: The USENIX Magazine},", - " month = {Feb},", - " number = {1},", - " volume = {36},", - " url = {http://www.gnu.org/s/parallel},", - " year = {2011},", - " pages = {42-47}", - "}", - "", - "(Feel free to use \\nocite{Tange2011a})", - "", - "This helps funding further development.", - "", - "Or you can get GNU Parallel without this requirement by paying 10000 EUR.", - "" - ); - while(not -e $ENV{'HOME'}."/.parallel/will-cite") { - print "\nType: 'will cite' and press enter.\n> "; - my $input = ; - if($input =~ /will cite/i) { - mkdir $ENV{'HOME'}."/.parallel"; - open (my $fh, ">", $ENV{'HOME'}."/.parallel/will-cite") - || ::die_bug("Cannot write: ".$ENV{'HOME'}."/.parallel/will-cite"); - close $fh; - print "\nThank you for your support. It is much appreciated. The citation\n", - "notice is now silenced.\n"; - } - } -} - -sub show_limits { - # Returns: N/A - print("Maximal size of command: ",Limits::Command::real_max_length(),"\n", - "Maximal used size of command: ",Limits::Command::max_length(),"\n", - "\n", - "Execution of will continue now, and it will try to read its input\n", - "and run commands; if this is not what you wanted to happen, please\n", - "press CTRL-D or CTRL-C\n"); -} - -sub __GENERIC_COMMON_FUNCTION__ {} - -sub uniq { - # Remove duplicates and return unique values - return keys %{{ map { $_ => 1 } @_ }}; -} - -sub min { - # Returns: - # Minimum value of array - my $min; - for (@_) { - # Skip undefs - defined $_ or next; - defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef - $min = ($min < $_) ? $min : $_; - } - return $min; -} - -sub max { - # Returns: - # Maximum value of array - my $max; - for (@_) { - # Skip undefs - defined $_ or next; - defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef - $max = ($max > $_) ? $max : $_; - } - return $max; -} - -sub sum { - # Returns: - # Sum of values of array - my @args = @_; - my $sum = 0; - for (@args) { - # Skip undefs - $_ and do { $sum += $_; } - } - return $sum; -} - -sub undef_as_zero { - my $a = shift; - return $a ? $a : 0; -} - -sub undef_as_empty { - my $a = shift; - return $a ? $a : ""; -} - -{ - my $hostname; - sub hostname { - if(not $hostname) { - $hostname = `hostname`; - chomp($hostname); - $hostname ||= "nohostname"; - } - return $hostname; - } -} - -sub which { - # Input: - # @programs = programs to find the path to - # Returns: - # @full_path = full paths to @programs. Nothing if not found - my @which; - for my $prg (@_) { - push @which, map { $_."/".$prg } grep { -x $_."/".$prg } split(":",$ENV{'PATH'}); - } - return @which; -} - -{ - my ($regexp,%fakename); - - sub parent_shell { - # Input: - # $pid = pid to see if (grand)*parent is a shell - # Returns: - # $shellpath = path to shell - undef if no shell found - my $pid = shift; - if(not $regexp) { - # All shells known to mankind - # - # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh - # posh rbash rush rzsh sash sh static-sh tcsh yash zsh - my @shells = qw(ash bash csh dash fdsh fish fizsh ksh - ksh93 mksh pdksh posh rbash rush rzsh - sash sh static-sh tcsh yash zsh -sh -csh); - # Can be formatted as: - # [sh] -sh sh busybox sh - # /bin/sh /sbin/sh /opt/csw/sh - # NOT: foo.sh sshd crash flush pdflush scosh fsflush ssh - my $shell = "(?:".join("|",@shells).")"; - $regexp = '^((\[)('. $shell. ')(\])|(|\S+/|busybox )('. $shell. '))($| )'; - %fakename = ( - # csh and tcsh disguise themselves as -sh/-csh - "-sh" => ["csh", "tcsh"], - "-csh" => ["tcsh", "csh"], - ); - } - my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table(); - my $shellpath; - my $testpid = $pid; - while($testpid) { - ::debug("init", "shell? ". $name_of_ref->{$testpid}."\n"); - if($name_of_ref->{$testpid} =~ /$regexp/o) { - ::debug("init", "which ".($3||$6)." => "); - $shellpath = (which($3 || $6,@{$fakename{$3 || $6}}))[0]; - ::debug("init", "shell path $shellpath\n"); - $shellpath and last; - } - $testpid = $parent_of_ref->{$testpid}; - } - return $shellpath; - } -} - -{ - my %pid_parentpid_cmd; - - sub pid_table { - # Returns: - # %children_of = { pid -> children of pid } - # %parent_of = { pid -> pid of parent } - # %name_of = { pid -> commandname } - - if(not %pid_parentpid_cmd) { - # Filter for SysV-style `ps` - my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). - q(s/^.{$s}//; print "@F[1,2] $_"' ); - # BSD-style `ps` - my $bsd = q(ps -o pid,ppid,command -ax); - %pid_parentpid_cmd = - ( - 'aix' => $sysv, - 'cygwin' => $sysv, - 'msys' => $sysv, - 'dec_osf' => $sysv, - 'darwin' => $bsd, - 'dragonfly' => $bsd, - 'freebsd' => $bsd, - 'gnu' => $sysv, - 'hpux' => $sysv, - 'linux' => $sysv, - 'mirbsd' => $bsd, - 'netbsd' => $bsd, - 'nto' => $sysv, - 'openbsd' => $bsd, - 'solaris' => $sysv, - 'svr5' => $sysv, - ); - } - $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing"); - - my (@pidtable,%parent_of,%children_of,%name_of); - # Table with pid -> children of pid - @pidtable = `$pid_parentpid_cmd{$^O}`; - my $p=$$; - for (@pidtable) { - # must match: 24436 21224 busybox ash - /(\S+)\s+(\S+)\s+(\S+.*)/ or ::die_bug("pidtable format: $_"); - $parent_of{$1} = $2; - push @{$children_of{$2}}, $1; - $name_of{$1} = $3; - } - return(\%children_of, \%parent_of, \%name_of); - } -} - -sub reap_usleep { - # Reap dead children. - # If no dead children: Sleep specified amount with exponential backoff - # Input: - # $ms = milliseconds to sleep - # Returns: - # $ms/2+0.001 if children reaped - # $ms*1.1 if no children reaped - my $ms = shift; - if(reaper()) { - # Sleep exponentially shorter (1/2^n) if a job finished - return $ms/2+0.001; - } else { - if($opt::timeout) { - $Global::timeoutq->process_timeouts(); - } - usleep($ms); - Job::exit_if_disk_full(); - if($opt::linebuffer) { - for my $job (values %Global::running) { - $job->print(); - } - } - # Sleep exponentially longer (1.1^n) if a job did not finish - # though at most 1000 ms. - return (($ms < 1000) ? ($ms * 1.1) : ($ms)); - } -} - -sub usleep { - # Sleep this many milliseconds. - # Input: - # $ms = milliseconds to sleep - my $ms = shift; - ::debug(int($ms),"ms "); - select(undef, undef, undef, $ms/1000); -} - -sub now { - # Returns time since epoch as in seconds with 3 decimals - # Uses: - # @Global::use - # Returns: - # $time = time now with millisecond accuracy - if(not $Global::use{"Time::HiRes"}) { - if(eval "use Time::HiRes qw ( time );") { - eval "sub TimeHiRestime { return Time::HiRes::time };"; - } else { - eval "sub TimeHiRestime { return time() };"; - } - $Global::use{"Time::HiRes"} = 1; - } - - return (int(TimeHiRestime()*1000))/1000; -} - -sub multiply_binary_prefix { - # Evalualte numbers with binary prefix - # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 - # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 - # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80 - # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 - # 13G = 13*1024*1024*1024 = 13958643712 - # Input: - # $s = string with prefixes - # Returns: - # $value = int with prefixes multiplied - my $s = shift; - $s =~ s/ki/*1024/gi; - $s =~ s/mi/*1024*1024/gi; - $s =~ s/gi/*1024*1024*1024/gi; - $s =~ s/ti/*1024*1024*1024*1024/gi; - $s =~ s/pi/*1024*1024*1024*1024*1024/gi; - $s =~ s/ei/*1024*1024*1024*1024*1024*1024/gi; - $s =~ s/zi/*1024*1024*1024*1024*1024*1024*1024/gi; - $s =~ s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi; - $s =~ s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; - - $s =~ s/K/*1024/g; - $s =~ s/M/*1024*1024/g; - $s =~ s/G/*1024*1024*1024/g; - $s =~ s/T/*1024*1024*1024*1024/g; - $s =~ s/P/*1024*1024*1024*1024*1024/g; - $s =~ s/E/*1024*1024*1024*1024*1024*1024/g; - $s =~ s/Z/*1024*1024*1024*1024*1024*1024*1024/g; - $s =~ s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g; - $s =~ s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g; - - $s =~ s/k/*1000/g; - $s =~ s/m/*1000*1000/g; - $s =~ s/g/*1000*1000*1000/g; - $s =~ s/t/*1000*1000*1000*1000/g; - $s =~ s/p/*1000*1000*1000*1000*1000/g; - $s =~ s/e/*1000*1000*1000*1000*1000*1000/g; - $s =~ s/z/*1000*1000*1000*1000*1000*1000*1000/g; - $s =~ s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g; - $s =~ s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; - - $s = eval $s; - ::debug($s); - return $s; -} - -sub tmpfile { - # Create tempfile as $TMPDIR/parXXXXX - # Returns: - # $filename = file name created - return ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_); -} - -sub __DEBUGGING__ {} - -sub debug { - # Uses: - # $Global::debug - # %Global::fd - # Returns: N/A - $Global::debug or return; - @_ = grep { defined $_ ? $_ : "" } @_; - if($Global::debug eq "all" or $Global::debug eq $_[0]) { - if($Global::fd{1}) { - # Original stdout was saved - my $stdout = $Global::fd{1}; - print $stdout @_[1..$#_]; - } else { - print @_[1..$#_]; - } - } -} - -sub my_memory_usage { - # Returns: - # memory usage if found - # 0 otherwise - use strict; - use FileHandle; - - my $pid = $$; - if(-e "/proc/$pid/stat") { - my $fh = FileHandle->new("; - chomp $data; - $fh->close; - - my @procinfo = split(/\s+/,$data); - - return undef_as_zero($procinfo[22]); - } else { - return 0; - } -} - -sub my_size { - # Returns: - # $size = size of object if Devel::Size is installed - # -1 otherwise - my @size_this = (@_); - eval "use Devel::Size qw(size total_size)"; - if ($@) { - return -1; - } else { - return total_size(@_); - } -} - -sub my_dump { - # Returns: - # ascii expression of object if Data::Dump(er) is installed - # error code otherwise - my @dump_this = (@_); - eval "use Data::Dump qw(dump);"; - if ($@) { - # Data::Dump not installed - eval "use Data::Dumper;"; - if ($@) { - my $err = "Neither Data::Dump nor Data::Dumper is installed\n". - "Not dumping output\n"; - print $Global::original_stderr $err; - return $err; - } else { - return Dumper(@dump_this); - } - } else { - # Create a dummy Data::Dump:dump as Hans Schou sometimes has - # it undefined - eval "sub Data::Dump:dump {}"; - eval "use Data::Dump qw(dump);"; - return (Data::Dump::dump(@dump_this)); - } -} - -sub my_croak { - eval "use Carp; 1"; - $Carp::Verbose = 1; - croak(@_); -} - -sub my_carp { - eval "use Carp; 1"; - $Carp::Verbose = 1; - carp(@_); -} - -sub __OBJECT_ORIENTED_PARTS__ {} - -package SSHLogin; - -sub new { - my $class = shift; - my $sshlogin_string = shift; - my $ncpus; - my %hostgroups; - # SSHLogins can have these formats: - # @grp+grp/ncpu//usr/bin/ssh user@server - # ncpu//usr/bin/ssh user@server - # /usr/bin/ssh user@server - # user@server - # ncpu/user@server - # @grp+grp/user@server - if($sshlogin_string =~ s:^\@([^/]+)/?::) { - # Look for SSHLogin hostgroups - %hostgroups = map { $_ => 1 } split(/\+/, $1); - } - if ($sshlogin_string =~ s:^(\d+)/::) { - # Override default autodetected ncpus unless missing - $ncpus = $1; - } - my $string = $sshlogin_string; - # An SSHLogin is always in the hostgroup of its $string-name - $hostgroups{$string} = 1; - @Global::hostgroups{keys %hostgroups} = values %hostgroups; - my @unget = (); - my $no_slash_string = $string; - $no_slash_string =~ s/[^-a-z0-9:]/_/gi; - return bless { - 'string' => $string, - 'jobs_running' => 0, - 'jobs_completed' => 0, - 'maxlength' => undef, - 'max_jobs_running' => undef, - 'orig_max_jobs_running' => undef, - 'ncpus' => $ncpus, - 'hostgroups' => \%hostgroups, - 'sshcommand' => undef, - 'serverlogin' => undef, - 'control_path_dir' => undef, - 'control_path' => undef, - 'time_to_login' => undef, - 'last_login_at' => undef, - 'loadavg_file' => $ENV{'HOME'} . "/.parallel/tmp/loadavg-" . - $no_slash_string, - 'loadavg' => undef, - 'last_loadavg_update' => 0, - 'swap_activity_file' => $ENV{'HOME'} . "/.parallel/tmp/swap_activity-" . - $no_slash_string, - 'swap_activity' => undef, - }, ref($class) || $class; -} - -sub DESTROY { - my $self = shift; - # Remove temporary files if they are created. - unlink $self->{'loadavg_file'}; - unlink $self->{'swap_activity_file'}; -} - -sub string { - my $self = shift; - return $self->{'string'}; -} - -sub jobs_running { - my $self = shift; - - return ($self->{'jobs_running'} || "0"); -} - -sub inc_jobs_running { - my $self = shift; - $self->{'jobs_running'}++; -} - -sub dec_jobs_running { - my $self = shift; - $self->{'jobs_running'}--; -} - -sub set_maxlength { - my $self = shift; - $self->{'maxlength'} = shift; -} - -sub maxlength { - my $self = shift; - return $self->{'maxlength'}; -} - -sub jobs_completed { - my $self = shift; - return $self->{'jobs_completed'}; -} - -sub in_hostgroups { - # Input: - # @hostgroups = the hostgroups to look for - # Returns: - # true if intersection of @hostgroups and the hostgroups of this - # SSHLogin is non-empty - my $self = shift; - return grep { defined $self->{'hostgroups'}{$_} } @_; -} - -sub hostgroups { - my $self = shift; - return keys %{$self->{'hostgroups'}}; -} - -sub inc_jobs_completed { - my $self = shift; - $self->{'jobs_completed'}++; -} - -sub set_max_jobs_running { - my $self = shift; - if(defined $self->{'max_jobs_running'}) { - $Global::max_jobs_running -= $self->{'max_jobs_running'}; - } - $self->{'max_jobs_running'} = shift; - if(defined $self->{'max_jobs_running'}) { - # max_jobs_running could be resat if -j is a changed file - $Global::max_jobs_running += $self->{'max_jobs_running'}; - } - # Initialize orig to the first non-zero value that comes around - $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'}; -} - -sub swapping { - my $self = shift; - my $swapping = $self->swap_activity(); - return (not defined $swapping or $swapping) -} - -sub swap_activity { - # If the currently known swap activity is too old: - # Recompute a new one in the background - # Returns: - # last swap activity computed - my $self = shift; - # Should we update the swap_activity file? - my $update_swap_activity_file = 0; - if(-r $self->{'swap_activity_file'}) { - open(my $swap_fh, "<", $self->{'swap_activity_file'}) || ::die_bug("swap_activity_file-r"); - my $swap_out = <$swap_fh>; - close $swap_fh; - if($swap_out =~ /^(\d+)$/) { - $self->{'swap_activity'} = $1; - ::debug("swap", "New swap_activity: ", $self->{'swap_activity'}); - } - ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'}); - if(time - $self->{'last_swap_activity_update'} > 10) { - # last swap activity update was started 10 seconds ago - ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'}); - $update_swap_activity_file = 1; - } - } else { - ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'}); - $self->{'swap_activity'} = undef; - $update_swap_activity_file = 1; - } - if($update_swap_activity_file) { - ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'}); - $self->{'last_swap_activity_update'} = time; - -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; - -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; - my $swap_activity; - $swap_activity = swapactivityscript(); - if($self->{'string'} ne ":") { - $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " . - ::shell_quote_scalar($swap_activity); - } - # Run swap_activity measuring. - # As the command can take long to run if run remote - # save it to a tmp file before moving it to the correct file - my $file = $self->{'swap_activity_file'}; - my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp"); - ::debug("swap", "\n", $swap_activity, "\n"); - qx{ ($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile) & }; - } - return $self->{'swap_activity'}; -} - -{ - my $script; - - sub swapactivityscript { - # Returns: - # shellscript for detecting swap activity - # - # arguments for vmstat are OS dependant - # swap_in and swap_out are in different columns depending on OS - # - if(not $script) { - my %vmstat = ( - # linux: $7*$8 - # $ vmstat 1 2 - # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu---- - # r b swpd free buff cache si so bi bo in cs us sy id wa - # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1 - # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0 - 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'], - - # solaris: $6*$7 - # $ vmstat -S 1 2 - # kthr memory page disk faults cpu - # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id - # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97 - # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98 - 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'], - - # darwin (macosx): $21*$22 - # $ vm_stat -c 2 1 - # Mach Virtual Memory Statistics: (page size of 4096 bytes) - # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts - # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0 - # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0 - 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'], - - # ultrix: $12*$13 - # $ vmstat -S 1 2 - # procs faults cpu memory page disk - # r b w in sy cs us sy id avm fre si so pi po fr de sr s0 - # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0 - # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0 - 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'], - - # aix: $6*$7 - # $ vmstat 1 2 - # System configuration: lcpu=1 mem=2048MB - # - # kthr memory page faults cpu - # ----- ----------- ------------------------ ------------ ----------- - # r b avm fre re pi po fr sr cy in sy cs us sy id wa - # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0 - # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5 - 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'], - - # freebsd: $8*$9 - # $ vmstat -H 1 2 - # procs memory page disks faults cpu - # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id - # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99 - # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99 - 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'], - - # mirbsd: $8*$9 - # $ vmstat 1 2 - # procs memory page disks traps cpu - # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id - # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96 - # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100 - 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], - - # netbsd: $7*$8 - # $ vmstat 1 2 - # procs memory page disks faults cpu - # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id - # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100 - # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100 - 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'], - - # openbsd: $8*$9 - # $ vmstat 1 2 - # procs memory page disks traps cpu - # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id - # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99 - # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99 - 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], - - # hpux: $8*$9 - # $ vmstat 1 2 - # procs memory page faults cpu - # r b w avm free re at pi po fr de sr in sy cs us sy id - # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83 - # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105 - 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'], - - # dec_osf (tru64): $11*$12 - # $ vmstat 1 2 - # Virtual Memory Statistics: (pagesize = 8192) - # procs memory pages intr cpu - # r w u act free wire fault cow zero react pin pout in sy cs us sy id - # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94 - # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98 - 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'], - - # gnu (hurd): $7*$8 - # $ vmstat -k 1 2 - # (pagesize: 4, size: 512288, swap size: 894972) - # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree - # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972 - # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972 - 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'], - - # -nto (qnx has no swap) - #-irix - #-svr5 (scosysv) - ); - my $perlscript = ""; - for my $os (keys %vmstat) { - #q[ { vmstat 1 2 2> /dev/null || vmstat -c 1 2; } | ]. - # q[ awk 'NR!=4{next} NF==17||NF==16{print $7*$8} NF==22{print $21*$22} {exit}' ]; - $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$ - $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' . - $vmstat{$os}[1] . '}"` }'; - } - $perlscript = "perl -e " . ::shell_quote_scalar($perlscript); - $script = $Global::envvar. " " .$perlscript; - } - return $script; - } -} - -sub too_fast_remote_login { - my $self = shift; - if($self->{'last_login_at'} and $self->{'time_to_login'}) { - # sshd normally allows 10 simultaneous logins - # A login takes time_to_login - # So time_to_login/5 should be safe - # If now <= last_login + time_to_login/5: Then it is too soon. - my $too_fast = (::now() <= $self->{'last_login_at'} - + $self->{'time_to_login'}/5); - ::debug("run", "Too fast? $too_fast "); - return $too_fast; - } else { - # No logins so far (or time_to_login not computed): it is not too fast - return 0; - } -} - -sub last_login_at { - my $self = shift; - return $self->{'last_login_at'}; -} - -sub set_last_login_at { - my $self = shift; - $self->{'last_login_at'} = shift; -} - -sub loadavg_too_high { - my $self = shift; - my $loadavg = $self->loadavg(); - return (not defined $loadavg or - $loadavg > $self->max_loadavg()); -} - -sub loadavg { - # If the currently know loadavg is too old: - # Recompute a new one in the background - # The load average is computed as the number of processes waiting for disk - # or CPU right now. So it is the server load this instant and not averaged over - # several minutes. This is needed so GNU Parallel will at most start one job - # that will push the load over the limit. - # - # Returns: - # $last_loadavg = last load average computed (undef if none) - my $self = shift; - # Should we update the loadavg file? - my $update_loadavg_file = 0; - if(open(my $load_fh, "<", $self->{'loadavg_file'})) { - local $/ = undef; - my $load_out = <$load_fh>; - close $load_fh; - my $load =()= ($load_out=~/(^[DR]....[^\[])/gm); - if($load > 0) { - # load is overestimated by 1 - $self->{'loadavg'} = $load - 1; - ::debug("load", "New loadavg: ", $self->{'loadavg'}); - } else { - ::die_bug("loadavg_invalid_content: $load_out"); - } - ::debug("load", "Last update: ", $self->{'last_loadavg_update'}); - if(time - $self->{'last_loadavg_update'} > 10) { - # last loadavg was started 10 seconds ago - ::debug("load", time - $self->{'last_loadavg_update'}, " secs old: ", - $self->{'loadavg_file'}); - $update_loadavg_file = 1; - } - } else { - ::debug("load", "No loadavg file: ", $self->{'loadavg_file'}); - $self->{'loadavg'} = undef; - $update_loadavg_file = 1; - } - if($update_loadavg_file) { - ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n"); - $self->{'last_loadavg_update'} = time; - -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; - -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; - my $cmd = ""; - if($self->{'string'} ne ":") { - $cmd = $self->sshcommand() . " " . $self->serverlogin() . " "; - } - # TODO Is is called 'ps ax -o state,command' on other platforms? - $cmd .= "ps ax -o state,command"; - # As the command can take long to run if run remote - # save it to a tmp file before moving it to the correct file - my $file = $self->{'loadavg_file'}; - my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".loa"); - qx{ ($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile) & }; - } - return $self->{'loadavg'}; -} - -sub max_loadavg { - my $self = shift; - # If --load is a file it might be changed - if($Global::max_load_file) { - my $mtime = (stat($Global::max_load_file))[9]; - if($mtime > $Global::max_load_file_last_mod) { - $Global::max_load_file_last_mod = $mtime; - for my $sshlogin (values %Global::host) { - $sshlogin->set_max_loadavg(undef); - } - } - } - if(not defined $self->{'max_loadavg'}) { - $self->{'max_loadavg'} = - $self->compute_max_loadavg($opt::load); - } - ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'}); - return $self->{'max_loadavg'}; -} - -sub set_max_loadavg { - my $self = shift; - $self->{'max_loadavg'} = shift; -} - -sub compute_max_loadavg { - # Parse the max loadaverage that the user asked for using --load - # Returns: - # max loadaverage - my $self = shift; - my $loadspec = shift; - my $load; - if(defined $loadspec) { - if($loadspec =~ /^\+(\d+)$/) { - # E.g. --load +2 - my $j = $1; - $load = - $self->ncpus() + $j; - } elsif ($loadspec =~ /^-(\d+)$/) { - # E.g. --load -2 - my $j = $1; - $load = - $self->ncpus() - $j; - } elsif ($loadspec =~ /^(\d+)\%$/) { - my $j = $1; - $load = - $self->ncpus() * $j / 100; - } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) { - $load = $1; - } elsif (-f $loadspec) { - $Global::max_load_file = $loadspec; - $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9]; - if(open(my $in_fh, "<", $Global::max_load_file)) { - my $opt_load_file = join("",<$in_fh>); - close $in_fh; - $load = $self->compute_max_loadavg($opt_load_file); - } else { - print $Global::original_stderr "Cannot open $loadspec\n"; - ::wait_and_exit(255); - } - } else { - print $Global::original_stderr "Parsing of --load failed\n"; - ::die_usage(); - } - if($load < 0.01) { - $load = 0.01; - } - } - return $load; -} - -sub time_to_login { - my $self = shift; - return $self->{'time_to_login'}; -} - -sub set_time_to_login { - my $self = shift; - $self->{'time_to_login'} = shift; -} - -sub max_jobs_running { - my $self = shift; - if(not defined $self->{'max_jobs_running'}) { - my $nproc = $self->compute_number_of_processes($opt::jobs); - $self->set_max_jobs_running($nproc); - } - return $self->{'max_jobs_running'}; -} - -sub orig_max_jobs_running { - my $self = shift; - return $self->{'orig_max_jobs_running'}; -} - -sub compute_number_of_processes { - # Number of processes wanted and limited by system resources - # Returns: - # Number of processes - my $self = shift; - my $opt_P = shift; - my $wanted_processes = $self->user_requested_processes($opt_P); - if(not defined $wanted_processes) { - $wanted_processes = $Global::default_simultaneous_sshlogins; - } - ::debug("load", "Wanted procs: $wanted_processes\n"); - my $system_limit = - $self->processes_available_by_system_limit($wanted_processes); - ::debug("load", "Limited to procs: $system_limit\n"); - return $system_limit; -} - -sub processes_available_by_system_limit { - # If the wanted number of processes is bigger than the system limits: - # Limit them to the system limits - # Limits are: File handles, number of input lines, processes, - # and taking > 1 second to spawn 10 extra processes - # Returns: - # Number of processes - my $self = shift; - my $wanted_processes = shift; - - my $system_limit = 0; - my @jobs = (); - my $job; - my @args = (); - my $arg; - my $more_filehandles = 1; - my $max_system_proc_reached = 0; - my $slow_spawining_warning_printed = 0; - my $time = time; - my %fh; - my @children; - - # Reserve filehandles - # perl uses 7 filehandles for something? - # parallel uses 1 for memory_usage - # parallel uses 4 for ? - for my $i (1..12) { - open($fh{"init-$i"}, "<", "/dev/null"); - } - - for(1..2) { - # System process limit - my $child; - if($child = fork()) { - push (@children,$child); - $Global::unkilled_children{$child} = 1; - } elsif(defined $child) { - # The child takes one process slot - # It will be killed later - $SIG{TERM} = $Global::original_sig{TERM}; - sleep 10000000; - exit(0); - } else { - $max_system_proc_reached = 1; - } - } - my $count_jobs_already_read = $Global::JobQueue->next_seq(); - my $wait_time_for_getting_args = 0; - my $start_time = time; - while(1) { - $system_limit >= $wanted_processes and last; - not $more_filehandles and last; - $max_system_proc_reached and last; - my $before_getting_arg = time; - if($Global::semaphore or $opt::pipe) { - # Skip: No need to get args - } elsif(defined $opt::retries and $count_jobs_already_read) { - # For retries we may need to run all jobs on this sshlogin - # so include the already read jobs for this sshlogin - $count_jobs_already_read--; - } else { - if($opt::X or $opt::m) { - # The arguments may have to be re-spread over several jobslots - # So pessimistically only read one arg per jobslot - # instead of a full commandline - if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) { - if($Global::JobQueue->empty()) { - last; - } else { - ($job) = $Global::JobQueue->get(); - push(@jobs, $job); - } - } else { - ($arg) = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); - push(@args, $arg); - } - } else { - # If there are no more command lines, then we have a process - # per command line, so no need to go further - $Global::JobQueue->empty() and last; - ($job) = $Global::JobQueue->get(); - push(@jobs, $job); - } - } - $wait_time_for_getting_args += time - $before_getting_arg; - $system_limit++; - - # Every simultaneous process uses 2 filehandles when grouping - # Every simultaneous process uses 2 filehandles when compressing - $more_filehandles = open($fh{$system_limit*10}, "<", "/dev/null") - && open($fh{$system_limit*10+2}, "<", "/dev/null") - && open($fh{$system_limit*10+3}, "<", "/dev/null") - && open($fh{$system_limit*10+4}, "<", "/dev/null"); - - # System process limit - my $child; - if($child = fork()) { - push (@children,$child); - $Global::unkilled_children{$child} = 1; - } elsif(defined $child) { - # The child takes one process slot - # It will be killed later - $SIG{TERM} = $Global::original_sig{TERM}; - sleep 10000000; - exit(0); - } else { - $max_system_proc_reached = 1; - } - my $forktime = time - $time - $wait_time_for_getting_args; - ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ", - $forktime, - " (processes so far: ", $system_limit,")\n"); - if($system_limit > 10 and - $forktime > 1 and - $forktime > $system_limit * 0.01 - and not $slow_spawining_warning_printed) { - # It took more than 0.01 second to fork a processes on avg. - # Give the user a warning. He can press Ctrl-C if this - # sucks. - print $Global::original_stderr - ("parallel: Warning: Starting $system_limit processes took > $forktime sec.\n", - "Consider adjusting -j. Press CTRL-C to stop.\n"); - $slow_spawining_warning_printed = 1; - } - } - # Cleanup: Close the files - for (values %fh) { close $_ } - # Cleanup: Kill the children - for my $pid (@children) { - kill 9, $pid; - waitpid($pid,0); - delete $Global::unkilled_children{$pid}; - } - # Cleanup: Unget the command_lines or the @args - $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args); - $Global::JobQueue->unget(@jobs); - if($system_limit < $wanted_processes) { - # The system_limit is less than the wanted_processes - if($system_limit < 1 and not $Global::JobQueue->empty()) { - ::warning("Cannot spawn any jobs. Raising ulimit -u or /etc/security/limits.conf\n", - "or /proc/sys/kernel/pid_max may help.\n"); - ::wait_and_exit(255); - } - if(not $more_filehandles) { - ::warning("Only enough file handles to run ", $system_limit, " jobs in parallel.\n", - "Running 'parallel -j0 -N", $system_limit, " --pipe parallel -j0' or ", - "raising ulimit -n or /etc/security/limits.conf may help.\n"); - } - if($max_system_proc_reached) { - ::warning("Only enough available processes to run ", $system_limit, - " jobs in parallel. Raising ulimit -u or /etc/security/limits.conf\n", - "or /proc/sys/kernel/pid_max may help.\n"); - } - } - if($] == 5.008008 and $system_limit > 1000) { - # https://savannah.gnu.org/bugs/?36942 - $system_limit = 1000; - } - if($Global::JobQueue->empty()) { - $system_limit ||= 1; - } - if($self->string() ne ":" and - $system_limit > $Global::default_simultaneous_sshlogins) { - $system_limit = - $self->simultaneous_sshlogin_limit($system_limit); - } - return $system_limit; -} - -sub simultaneous_sshlogin_limit { - # Test by logging in wanted number of times simultaneously - # Returns: - # min($wanted_processes,$working_simultaneous_ssh_logins-1) - my $self = shift; - my $wanted_processes = shift; - if($self->{'time_to_login'}) { - return $wanted_processes; - } - - # Try twice because it guesses wrong sometimes - # Choose the minimal - my $ssh_limit = - ::min($self->simultaneous_sshlogin($wanted_processes), - $self->simultaneous_sshlogin($wanted_processes)); - if($ssh_limit < $wanted_processes) { - my $serverlogin = $self->serverlogin(); - ::warning("ssh to $serverlogin only allows ", - "for $ssh_limit simultaneous logins.\n", - "You may raise this by changing ", - "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.\n", - "Using only ",$ssh_limit-1," connections ", - "to avoid race conditions.\n"); - } - # Race condition can cause problem if using all sshs. - if($ssh_limit > 1) { $ssh_limit -= 1; } - return $ssh_limit; -} - -sub simultaneous_sshlogin { - # Using $sshlogin try to see if we can do $wanted_processes - # simultaneous logins - # (ssh host echo simultaneouslogin & ssh host echo simultaneouslogin & ...)|grep simul|wc -l - # Returns: - # Number of succesful logins - my $self = shift; - my $wanted_processes = shift; - my $sshcmd = $self->sshcommand(); - my $serverlogin = $self->serverlogin(); - my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : ""; - my $cmd = "$sshdelay$sshcmd $serverlogin echo simultaneouslogin &1 &"x$wanted_processes; - ::debug("init", "Trying $wanted_processes logins at $serverlogin\n"); - open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or - ::die_bug("simultaneouslogin"); - my $ssh_limit = <$simul_fh>; - close $simul_fh; - chomp $ssh_limit; - return $ssh_limit; -} - -sub set_ncpus { - my $self = shift; - $self->{'ncpus'} = shift; -} - -sub user_requested_processes { - # Parse the number of processes that the user asked for using -j - # Returns: - # the number of processes to run on this sshlogin - my $self = shift; - my $opt_P = shift; - my $processes; - if(defined $opt_P) { - if($opt_P =~ /^\+(\d+)$/) { - # E.g. -P +2 - my $j = $1; - $processes = - $self->ncpus() + $j; - } elsif ($opt_P =~ /^-(\d+)$/) { - # E.g. -P -2 - my $j = $1; - $processes = - $self->ncpus() - $j; - } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) { - # E.g. -P 10.5% - my $j = $1; - $processes = - $self->ncpus() * $j / 100; - } elsif ($opt_P =~ /^(\d+)$/) { - $processes = $1; - if($processes == 0) { - # -P 0 = infinity (or at least close) - $processes = $Global::infinity; - } - } elsif (-f $opt_P) { - $Global::max_procs_file = $opt_P; - $Global::max_procs_file_last_mod = (stat($Global::max_procs_file))[9]; - if(open(my $in_fh, "<", $Global::max_procs_file)) { - my $opt_P_file = join("",<$in_fh>); - close $in_fh; - $processes = $self->user_requested_processes($opt_P_file); - } else { - ::error("Cannot open $opt_P.\n"); - ::wait_and_exit(255); - } - } else { - ::error("Parsing of --jobs/-j/--max-procs/-P failed.\n"); - ::die_usage(); - } - $processes = ::ceil($processes); - } - return $processes; -} - -sub ncpus { - my $self = shift; - if(not defined $self->{'ncpus'}) { - my $sshcmd = $self->sshcommand(); - my $serverlogin = $self->serverlogin(); - if($serverlogin eq ":") { - if($opt::use_cpus_instead_of_cores) { - $self->{'ncpus'} = no_of_cpus(); - } else { - $self->{'ncpus'} = no_of_cores(); - } - } else { - my $ncpu; - my $sqe = ::shell_quote_scalar($Global::envvar); - if($opt::use_cpus_instead_of_cores) { - $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cpus); - } else { - ::debug("init",qq(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores\n)); - $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores); - } - chomp $ncpu; - if($ncpu =~ /^\s*[0-9]+\s*$/s) { - $self->{'ncpus'} = $ncpu; - } else { - ::warning("Could not figure out ", - "number of cpus on $serverlogin ($ncpu). Using 1.\n"); - $self->{'ncpus'} = 1; - } - } - } - return $self->{'ncpus'}; -} - -sub no_of_cpus { - # Returns: - # Number of physical CPUs - local $/="\n"; # If delimiter is set, then $/ will be wrong - my $no_of_cpus; - if ($^O eq 'linux') { - $no_of_cpus = no_of_cpus_gnu_linux() || no_of_cores_gnu_linux(); - } elsif ($^O eq 'freebsd') { - $no_of_cpus = no_of_cpus_freebsd(); - } elsif ($^O eq 'netbsd') { - $no_of_cpus = no_of_cpus_netbsd(); - } elsif ($^O eq 'openbsd') { - $no_of_cpus = no_of_cpus_openbsd(); - } elsif ($^O eq 'gnu') { - $no_of_cpus = no_of_cpus_hurd(); - } elsif ($^O eq 'darwin') { - $no_of_cpus = no_of_cpus_darwin(); - } elsif ($^O eq 'solaris') { - $no_of_cpus = no_of_cpus_solaris(); - } elsif ($^O eq 'aix') { - $no_of_cpus = no_of_cpus_aix(); - } elsif ($^O eq 'hpux') { - $no_of_cpus = no_of_cpus_hpux(); - } elsif ($^O eq 'nto') { - $no_of_cpus = no_of_cpus_qnx(); - } elsif ($^O eq 'svr5') { - $no_of_cpus = no_of_cpus_openserver(); - } elsif ($^O eq 'irix') { - $no_of_cpus = no_of_cpus_irix(); - } elsif ($^O eq 'dec_osf') { - $no_of_cpus = no_of_cpus_tru64(); - } else { - $no_of_cpus = (no_of_cpus_gnu_linux() - || no_of_cpus_freebsd() - || no_of_cpus_netbsd() - || no_of_cpus_openbsd() - || no_of_cpus_hurd() - || no_of_cpus_darwin() - || no_of_cpus_solaris() - || no_of_cpus_aix() - || no_of_cpus_hpux() - || no_of_cpus_qnx() - || no_of_cpus_openserver() - || no_of_cpus_irix() - || no_of_cpus_tru64() - # Number of cores is better than no guess for #CPUs - || nproc() - ); - } - if($no_of_cpus) { - chomp $no_of_cpus; - return $no_of_cpus; - } else { - ::warning("Cannot figure out number of cpus. Using 1.\n"); - return 1; - } -} - -sub no_of_cores { - # Returns: - # Number of CPU cores - local $/="\n"; # If delimiter is set, then $/ will be wrong - my $no_of_cores; - if ($^O eq 'linux') { - $no_of_cores = no_of_cores_gnu_linux(); - } elsif ($^O eq 'freebsd') { - $no_of_cores = no_of_cores_freebsd(); - } elsif ($^O eq 'netbsd') { - $no_of_cores = no_of_cores_netbsd(); - } elsif ($^O eq 'openbsd') { - $no_of_cores = no_of_cores_openbsd(); - } elsif ($^O eq 'gnu') { - $no_of_cores = no_of_cores_hurd(); - } elsif ($^O eq 'darwin') { - $no_of_cores = no_of_cores_darwin(); - } elsif ($^O eq 'solaris') { - $no_of_cores = no_of_cores_solaris(); - } elsif ($^O eq 'aix') { - $no_of_cores = no_of_cores_aix(); - } elsif ($^O eq 'hpux') { - $no_of_cores = no_of_cores_hpux(); - } elsif ($^O eq 'nto') { - $no_of_cores = no_of_cores_qnx(); - } elsif ($^O eq 'svr5') { - $no_of_cores = no_of_cores_openserver(); - } elsif ($^O eq 'irix') { - $no_of_cores = no_of_cores_irix(); - } elsif ($^O eq 'dec_osf') { - $no_of_cores = no_of_cores_tru64(); - } else { - $no_of_cores = (no_of_cores_gnu_linux() - || no_of_cores_freebsd() - || no_of_cores_netbsd() - || no_of_cores_openbsd() - || no_of_cores_hurd() - || no_of_cores_darwin() - || no_of_cores_solaris() - || no_of_cores_aix() - || no_of_cores_hpux() - || no_of_cores_qnx() - || no_of_cores_openserver() - || no_of_cores_irix() - || no_of_cores_tru64() - || nproc() - ); - } - if($no_of_cores) { - chomp $no_of_cores; - return $no_of_cores; - } else { - ::warning("Cannot figure out number of CPU cores. Using 1.\n"); - return 1; - } -} - -sub nproc { - # Returns: - # Number of cores using `nproc` - my $no_of_cores = `nproc 2>/dev/null`; - return $no_of_cores; -} - -sub no_of_cpus_gnu_linux { - # Returns: - # Number of physical CPUs on GNU/Linux - # undef if not GNU/Linux - my $no_of_cpus; - my $no_of_cores; - if(-e "/proc/cpuinfo") { - $no_of_cpus = 0; - $no_of_cores = 0; - my %seen; - open(my $in_fh, "<", "/proc/cpuinfo") || return undef; - while(<$in_fh>) { - if(/^physical id.*[:](.*)/ and not $seen{$1}++) { - $no_of_cpus++; - } - /^processor.*[:]/i and $no_of_cores++; - } - close $in_fh; - } - return ($no_of_cpus||$no_of_cores); -} - -sub no_of_cores_gnu_linux { - # Returns: - # Number of CPU cores on GNU/Linux - # undef if not GNU/Linux - my $no_of_cores; - if(-e "/proc/cpuinfo") { - $no_of_cores = 0; - open(my $in_fh, "<", "/proc/cpuinfo") || return undef; - while(<$in_fh>) { - /^processor.*[:]/i and $no_of_cores++; - } - close $in_fh; - } - return $no_of_cores; -} - -sub no_of_cpus_freebsd { - # Returns: - # Number of physical CPUs on FreeBSD - # undef if not FreeBSD - my $no_of_cpus = - (`sysctl -a dev.cpu 2>/dev/null | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }'` - or - `sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'`); - chomp $no_of_cpus; - return $no_of_cpus; -} - -sub no_of_cores_freebsd { - # Returns: - # Number of CPU cores on FreeBSD - # undef if not FreeBSD - my $no_of_cores = - (`sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'` - or - `sysctl -a hw 2>/dev/null | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }'`); - chomp $no_of_cores; - return $no_of_cores; -} - -sub no_of_cpus_netbsd { - # Returns: - # Number of physical CPUs on NetBSD - # undef if not NetBSD - my $no_of_cpus = `sysctl -n hw.ncpu 2>/dev/null`; - chomp $no_of_cpus; - return $no_of_cpus; -} - -sub no_of_cores_netbsd { - # Returns: - # Number of CPU cores on NetBSD - # undef if not NetBSD - my $no_of_cores = `sysctl -n hw.ncpu 2>/dev/null`; - chomp $no_of_cores; - return $no_of_cores; -} - -sub no_of_cpus_openbsd { - # Returns: - # Number of physical CPUs on OpenBSD - # undef if not OpenBSD - my $no_of_cpus = `sysctl -n hw.ncpu 2>/dev/null`; - chomp $no_of_cpus; - return $no_of_cpus; -} - -sub no_of_cores_openbsd { - # Returns: - # Number of CPU cores on OpenBSD - # undef if not OpenBSD - my $no_of_cores = `sysctl -n hw.ncpu 2>/dev/null`; - chomp $no_of_cores; - return $no_of_cores; -} - -sub no_of_cpus_hurd { - # Returns: - # Number of physical CPUs on HURD - # undef if not HURD - my $no_of_cpus = `nproc`; - chomp $no_of_cpus; - return $no_of_cpus; -} - -sub no_of_cores_hurd { - # Returns: - # Number of physical CPUs on HURD - # undef if not HURD - my $no_of_cores = `nproc`; - chomp $no_of_cores; - return $no_of_cores; -} - -sub no_of_cpus_darwin { - # Returns: - # Number of physical CPUs on Mac Darwin - # undef if not Mac Darwin - my $no_of_cpus = - (`sysctl -n hw.physicalcpu 2>/dev/null` - or - `sysctl -a hw 2>/dev/null | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }'`); - return $no_of_cpus; -} - -sub no_of_cores_darwin { - # Returns: - # Number of CPU cores on Mac Darwin - # undef if not Mac Darwin - my $no_of_cores = - (`sysctl -n hw.logicalcpu 2>/dev/null` - or - `sysctl -a hw 2>/dev/null | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }'`); - return $no_of_cores; -} - -sub no_of_cpus_solaris { - # Returns: - # Number of physical CPUs on Solaris - # undef if not Solaris - if(-x "/usr/sbin/psrinfo") { - my @psrinfo = `/usr/sbin/psrinfo`; - if($#psrinfo >= 0) { - return $#psrinfo +1; - } - } - if(-x "/usr/sbin/prtconf") { - my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`; - if($#prtconf >= 0) { - return $#prtconf +1; - } - } - return undef; -} - -sub no_of_cores_solaris { - # Returns: - # Number of CPU cores on Solaris - # undef if not Solaris - if(-x "/usr/sbin/psrinfo") { - my @psrinfo = `/usr/sbin/psrinfo`; - if($#psrinfo >= 0) { - return $#psrinfo +1; - } - } - if(-x "/usr/sbin/prtconf") { - my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`; - if($#prtconf >= 0) { - return $#prtconf +1; - } - } - return undef; -} - -sub no_of_cpus_aix { - # Returns: - # Number of physical CPUs on AIX - # undef if not AIX - my $no_of_cpus = 0; - if(-x "/usr/sbin/lscfg") { - open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '") - || return undef; - $no_of_cpus = <$in_fh>; - chomp ($no_of_cpus); - close $in_fh; - } - return $no_of_cpus; -} - -sub no_of_cores_aix { - # Returns: - # Number of CPU cores on AIX - # undef if not AIX - my $no_of_cores; - if(-x "/usr/bin/vmstat") { - open(my $in_fh, "-|", "/usr/bin/vmstat 1 1") || return undef; - while(<$in_fh>) { - /lcpu=([0-9]*) / and $no_of_cores = $1; - } - close $in_fh; - } - return $no_of_cores; -} - -sub no_of_cpus_hpux { - # Returns: - # Number of physical CPUs on HP-UX - # undef if not HP-UX - my $no_of_cpus = - (`/usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'`); - return $no_of_cpus; -} - -sub no_of_cores_hpux { - # Returns: - # Number of CPU cores on HP-UX - # undef if not HP-UX - my $no_of_cores = - (`/usr/bin/mpsched -s 2>&1 | grep 'Processor Count' | awk '{ print \$3 }'`); - return $no_of_cores; -} - -sub no_of_cpus_qnx { - # Returns: - # Number of physical CPUs on QNX - # undef if not QNX - # BUG: It is now known how to calculate this. - my $no_of_cpus = 0; - return $no_of_cpus; -} - -sub no_of_cores_qnx { - # Returns: - # Number of CPU cores on QNX - # undef if not QNX - # BUG: It is now known how to calculate this. - my $no_of_cores = 0; - return $no_of_cores; -} - -sub no_of_cpus_openserver { - # Returns: - # Number of physical CPUs on SCO OpenServer - # undef if not SCO OpenServer - my $no_of_cpus = 0; - if(-x "/usr/sbin/psrinfo") { - my @psrinfo = `/usr/sbin/psrinfo`; - if($#psrinfo >= 0) { - return $#psrinfo +1; - } - } - return $no_of_cpus; -} - -sub no_of_cores_openserver { - # Returns: - # Number of CPU cores on SCO OpenServer - # undef if not SCO OpenServer - my $no_of_cores = 0; - if(-x "/usr/sbin/psrinfo") { - my @psrinfo = `/usr/sbin/psrinfo`; - if($#psrinfo >= 0) { - return $#psrinfo +1; - } - } - return $no_of_cores; -} - -sub no_of_cpus_irix { - # Returns: - # Number of physical CPUs on IRIX - # undef if not IRIX - my $no_of_cpus = `hinv | grep HZ | grep Processor | awk '{print \$1}'`; - return $no_of_cpus; -} - -sub no_of_cores_irix { - # Returns: - # Number of CPU cores on IRIX - # undef if not IRIX - my $no_of_cores = `hinv | grep HZ | grep Processor | awk '{print \$1}'`; - return $no_of_cores; -} - -sub no_of_cpus_tru64 { - # Returns: - # Number of physical CPUs on Tru64 - # undef if not Tru64 - my $no_of_cpus = `sizer -pr`; - return $no_of_cpus; -} - -sub no_of_cores_tru64 { - # Returns: - # Number of CPU cores on Tru64 - # undef if not Tru64 - my $no_of_cores = `sizer -pr`; - return $no_of_cores; -} - -sub sshcommand { - my $self = shift; - if (not defined $self->{'sshcommand'}) { - $self->sshcommand_of_sshlogin(); - } - return $self->{'sshcommand'}; -} - -sub serverlogin { - my $self = shift; - if (not defined $self->{'serverlogin'}) { - $self->sshcommand_of_sshlogin(); - } - return $self->{'serverlogin'}; -} - -sub sshcommand_of_sshlogin { - # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server') - # 'user@server' -> ('ssh','user@server') - # 'myssh user@server' -> ('myssh','user@server') - # 'myssh -l user server' -> ('myssh -l user','server') - # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server') - # Returns: - # sshcommand - defaults to 'ssh' - # login@host - my $self = shift; - my ($sshcmd, $serverlogin); - if($self->{'string'} =~ /(.+) (\S+)$/) { - # Own ssh command - $sshcmd = $1; $serverlogin = $2; - } else { - # Normal ssh - if($opt::controlmaster) { - # Use control_path to make ssh faster - my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p"; - $sshcmd = "ssh -S ".$control_path; - $serverlogin = $self->{'string'}; - if(not $self->{'control_path'}{$control_path}++) { - # Master is not running for this control_path - # Start it - my $pid = fork(); - if($pid) { - $Global::sshmaster{$pid} ||= 1; - } else { - $SIG{'TERM'} = undef; - # Ignore the 'foo' being printed - open(STDOUT,">","/dev/null"); - # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt - # STDERR >/dev/null to ignore "process_mux_new_session: tcgetattr: Invalid argument" - open(STDERR,">","/dev/null"); - open(STDIN,"<","/dev/null"); - # Run a sleep that outputs data, so it will discover if the ssh connection closes. - my $sleep = ::shell_quote_scalar('$|=1;while(1){sleep 1;print "foo\n"}'); - my @master = ("ssh", "-tt", "-MTS", $control_path, $serverlogin, "perl", "-e", $sleep); - exec(@master); - } - } - } else { - $sshcmd = "ssh"; $serverlogin = $self->{'string'}; - } - } - $self->{'sshcommand'} = $sshcmd; - $self->{'serverlogin'} = $serverlogin; -} - -sub control_path_dir { - # Returns: - # path to directory - my $self = shift; - if(not defined $self->{'control_path_dir'}) { - -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; - -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; - $self->{'control_path_dir'} = - File::Temp::tempdir($ENV{'HOME'} - . "/.parallel/tmp/control_path_dir-XXXX", - CLEANUP => 1); - } - return $self->{'control_path_dir'}; -} - -sub rsync_transfer_cmd { - # Command to run to transfer a file - # Input: - # $file = filename of file to transfer - # $workdir = destination dir - # Returns: - # $cmd = rsync command to run to transfer $file ("" if unreadable) - my $self = shift; - my $file = shift; - my $workdir = shift; - if(not -r $file) { - ::warning($file, " is not readable and will not be transferred.\n"); - return "true"; - } - my $rsync_destdir; - if($file =~ m:^/:) { - # rsync /foo/bar / - $rsync_destdir = "/"; - } else { - $rsync_destdir = ::shell_quote_file($workdir); - } - $file = ::shell_quote_file($file); - my $sshcmd = $self->sshcommand(); - my $rsync_opt = "-rlDzR -e" . ::shell_quote_scalar($sshcmd); - my $serverlogin = $self->serverlogin(); - # Make dir if it does not exist - return "( $sshcmd $serverlogin mkdir -p $rsync_destdir;" . - rsync()." $rsync_opt $file $serverlogin:$rsync_destdir )"; -} - -sub cleanup_cmd { - # Command to run to remove the remote file - # Input: - # $file = filename to remove - # $workdir = destination dir - # Returns: - # $cmd = ssh command to run to remove $file and empty parent dirs - my $self = shift; - my $file = shift; - my $workdir = shift; - my $f = $file; - if($f =~ m:/\./:) { - # foo/bar/./baz/quux => workdir/baz/quux - # /foo/bar/./baz/quux => workdir/baz/quux - $f =~ s:.*/\./:$workdir/:; - } elsif($f =~ m:^[^/]:) { - # foo/bar => workdir/foo/bar - $f = $workdir."/".$f; - } - my @subdirs = split m:/:, ::dirname($f); - my @rmdir; - my $dir = ""; - for(@subdirs) { - $dir .= $_."/"; - unshift @rmdir, ::shell_quote_file($dir); - } - my $rmdir = @rmdir ? "rmdir @rmdir 2>/dev/null;" : ""; - if(defined $opt::workdir and $opt::workdir eq "...") { - $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';'; - } - - $f = ::shell_quote_file($f); - my $sshcmd = $self->sshcommand(); - my $serverlogin = $self->serverlogin(); - return "$sshcmd $serverlogin ".::shell_quote_scalar("(rm -f $f; $rmdir)"); -} - -{ - my $rsync; - - sub rsync { - # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7. - # If the version >= 3.1.0: downgrade to protocol 30 - if(not $rsync) { - my @out = `rsync --version`; - for (@out) { - if(/version (\d+.\d+)(.\d+)?/) { - if($1 >= 3.1) { - # Version 3.1.0 or later: Downgrade to protocol 30 - $rsync = "rsync --protocol 30"; - } else { - $rsync = "rsync"; - } - } - } - $rsync or ::die_bug("Cannot figure out version of rsync: @out"); - } - return $rsync; - } -} - - -package JobQueue; - -sub new { - my $class = shift; - my $commandref = shift; - my $read_from = shift; - my $context_replace = shift; - my $max_number_of_args = shift; - my $return_files = shift; - my $commandlinequeue = CommandLineQueue->new - ($commandref, $read_from, $context_replace, $max_number_of_args, - $return_files); - my @unget = (); - return bless { - 'unget' => \@unget, - 'commandlinequeue' => $commandlinequeue, - 'total_jobs' => undef, - }, ref($class) || $class; -} - -sub get { - my $self = shift; - - if(@{$self->{'unget'}}) { - my $job = shift @{$self->{'unget'}}; - return ($job); - } else { - my $commandline = $self->{'commandlinequeue'}->get(); - if(defined $commandline) { - my $job = Job->new($commandline); - return $job; - } else { - return undef; - } - } -} - -sub unget { - my $self = shift; - unshift @{$self->{'unget'}}, @_; -} - -sub empty { - my $self = shift; - my $empty = (not @{$self->{'unget'}}) - && $self->{'commandlinequeue'}->empty(); - ::debug("run", "JobQueue->empty $empty "); - return $empty; -} - -sub total_jobs { - my $self = shift; - if(not defined $self->{'total_jobs'}) { - my $job; - my @queue; - my $start = time; - while($job = $self->get()) { - if(time - $start > 10) { - ::warning("Reading all arguments takes longer than 10 seconds.\n"); - $opt::eta && ::warning("Consider removing --eta.\n"); - $opt::bar && ::warning("Consider removing --bar.\n"); - last; - } - push @queue, $job; - } - while($job = $self->get()) { - push @queue, $job; - } - - $self->unget(@queue); - $self->{'total_jobs'} = $#queue+1; - } - return $self->{'total_jobs'}; -} - -sub next_seq { - my $self = shift; - - return $self->{'commandlinequeue'}->seq(); -} - -sub quote_args { - my $self = shift; - return $self->{'commandlinequeue'}->quote_args(); -} - - -package Job; - -sub new { - my $class = shift; - my $commandlineref = shift; - return bless { - 'commandline' => $commandlineref, # CommandLine object - 'workdir' => undef, # --workdir - 'stdin' => undef, # filehandle for stdin (used for --pipe) - # filename for writing stdout to (used for --files) - 'remaining' => "", # remaining data not sent to stdin (used for --pipe) - 'datawritten' => 0, # amount of data sent via stdin (used for --pipe) - 'transfersize' => 0, # size of files using --transfer - 'returnsize' => 0, # size of files using --return - 'pid' => undef, - # hash of { SSHLogins => number of times the command failed there } - 'failed' => undef, - 'sshlogin' => undef, - # The commandline wrapped with rsync and ssh - 'sshlogin_wrap' => undef, - 'exitstatus' => undef, - 'exitsignal' => undef, - # Timestamp for timeout if any - 'timeout' => undef, - 'virgin' => 1, - }, ref($class) || $class; -} - -sub replaced { - my $self = shift; - $self->{'commandline'} or ::die_bug("commandline empty"); - return $self->{'commandline'}->replaced(); -} - -sub seq { - my $self = shift; - return $self->{'commandline'}->seq(); -} - -sub slot { - my $self = shift; - return $self->{'commandline'}->slot(); -} - -{ - my($cattail); - - sub cattail { - # Returns: - # $cattail = perl program for: cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink] - if(not $cattail) { - $cattail = q{ - # cat followed by tail. - # If $writerpid dead: finish after this round - use Fcntl; - - $|=1; - - my ($cmd, $writerpid, $read_file, $unlink_file) = @ARGV; - if($read_file) { - open(IN,"<",$read_file) || die("cattail: Cannot open $read_file"); - } else { - *IN = *STDIN; - } - - my $flags; - fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle - $flags |= O_NONBLOCK; # Add non-blocking to the flags - fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle - open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd"); - - while(1) { - # clear EOF - seek(IN,0,1); - my $writer_running = kill 0, $writerpid; - $read = sysread(IN,$buf,32768); - if($read) { - # We can unlink the file now: The writer has written something - -e $unlink_file and unlink $unlink_file; - # Blocking print - while($buf) { - my $bytes_written = syswrite(OUT,$buf); - # syswrite may be interrupted by SIGHUP - substr($buf,0,$bytes_written) = ""; - } - # Something printed: Wait less next time - $sleep /= 2; - } else { - if(eof(IN) and not $writer_running) { - # Writer dead: There will never be more to read => exit - exit; - } - # TODO This could probably be done more efficiently using select(2) - # Nothing read: Wait longer before next read - # Up to 30 milliseconds - $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep); - usleep($sleep); - } - } - - sub usleep { - # Sleep this many milliseconds. - my $secs = shift; - select(undef, undef, undef, $secs/1000); - } - }; - $cattail =~ s/#.*//mg; - $cattail =~ s/\s+/ /g; - } - return $cattail; - } -} - -sub openoutputfiles { - # Open files for STDOUT and STDERR - # Set file handles in $self->fh - my $self = shift; - my ($outfhw, $errfhw, $outname, $errname); - if($opt::results) { - my $args_as_dirname = $self->{'commandline'}->args_as_dirname(); - # Output in: prefix/name1/val1/name2/val2/stdout - my $dir = $opt::results."/".$args_as_dirname; - if(eval{ File::Path::mkpath($dir); }) { - # OK - } else { - # mkpath failed: Argument probably too long. - # Set $Global::max_file_length, which will keep the individual - # dir names shorter than the max length - max_file_name_length($opt::results); - $args_as_dirname = $self->{'commandline'}->args_as_dirname(); - # prefix/name1/val1/name2/val2/ - $dir = $opt::results."/".$args_as_dirname; - File::Path::mkpath($dir); - } - # prefix/name1/val1/name2/val2/stdout - $outname = "$dir/stdout"; - if(not open($outfhw, "+>", $outname)) { - ::error("Cannot write to `$outname'.\n"); - ::wait_and_exit(255); - } - # prefix/name1/val1/name2/val2/stderr - $errname = "$dir/stderr"; - if(not open($errfhw, "+>", $errname)) { - ::error("Cannot write to `$errname'.\n"); - ::wait_and_exit(255); - } - $self->set_fh(1,"unlink",""); - $self->set_fh(2,"unlink",""); - } elsif(not $opt::ungroup) { - # To group we create temporary files for STDOUT and STDERR - # To avoid the cleanup unlink the files immediately (but keep them open) - if(@Global::tee_jobs) { - # files must be removed when the tee is done - } elsif($opt::files) { - ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); - ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); - # --files => only remove stderr - $self->set_fh(1,"unlink",""); - $self->set_fh(2,"unlink",$errname); - } else { - ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); - ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); - $self->set_fh(1,"unlink",$outname); - $self->set_fh(2,"unlink",$errname); - } - } else { - # --ungroup - open($outfhw,">&",$Global::fd{1}) || die; - open($errfhw,">&",$Global::fd{2}) || die; - # File name must be empty as it will otherwise be printed - $outname = ""; - $errname = ""; - $self->set_fh(1,"unlink",$outname); - $self->set_fh(2,"unlink",$errname); - } - # Set writing FD - $self->set_fh(1,'w',$outfhw); - $self->set_fh(2,'w',$errfhw); - $self->set_fh(1,'name',$outname); - $self->set_fh(2,'name',$errname); - if($opt::compress) { - # Send stdout to stdin for $opt::compress_program(1) - # Send stderr to stdin for $opt::compress_program(2) - # cattail get pid: $pid = $self->fh($fdno,'rpid'); - my $cattail = cattail(); - for my $fdno (1,2) { - my $wpid = open(my $fdw,"|-","$opt::compress_program >>". - $self->fh($fdno,'name')) || die $?; - $self->set_fh($fdno,'w',$fdw); - $self->set_fh($fdno,'wpid',$wpid); - my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, - $opt::decompress_program, $wpid, - $self->fh($fdno,'name'),$self->fh($fdno,'unlink')) || die $?; - $self->set_fh($fdno,'r',$fdr); - $self->set_fh($fdno,'rpid',$rpid); - } - } elsif(not $opt::ungroup) { - # Set reading FD if using --group (--ungroup does not need) - for my $fdno (1,2) { - # Re-open the file for reading - # so fdw can be closed separately - # and fdr can be seeked separately (for --line-buffer) - open(my $fdr,"<", $self->fh($fdno,'name')) || - ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name')); - $self->set_fh($fdno,'r',$fdr); - # Unlink if required - $Global::debug or unlink $self->fh($fdno,"unlink"); - } - } - if($opt::linebuffer) { - # Set non-blocking when using --linebuffer - $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; - for my $fdno (1,2) { - my $fdr = $self->fh($fdno,'r'); - my $flags; - fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle - $flags |= &O_NONBLOCK; # Add non-blocking to the flags - fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle - } - } -} - -sub max_file_name_length { - # Figure out the max length of a subdir - # TODO and the max total length - # Ext4 = 255,130816 - my $testdir = shift; - - my $upper = 8_000_000; - my $len = 8; - my $dir="x"x$len; - do { - rmdir($testdir."/".$dir); - $len *= 16; - $dir="x"x$len; - } while (mkdir $testdir."/".$dir); - # Then search for the actual max length between $len/16 and $len - my $min = $len/16; - my $max = $len; - while($max-$min > 5) { - # If we are within 5 chars of the exact value: - # it is not worth the extra time to find the exact value - my $test = int(($min+$max)/2); - $dir="x"x$test; - if(mkdir $testdir."/".$dir) { - rmdir($testdir."/".$dir); - $min = $test; - } else { - $max = $test; - } - } - $Global::max_file_length = $min; - return $min; -} - -sub set_fh { - # Set file handle - my ($self, $fd_no, $key, $fh) = @_; - $self->{'fd'}{$fd_no,$key} = $fh; -} - -sub fh { - # Get file handle - my ($self, $fd_no, $key) = @_; - return $self->{'fd'}{$fd_no,$key}; -} - -sub write { - my $self = shift; - my $remaining_ref = shift; - my $stdin_fh = $self->fh(0,"w"); - syswrite($stdin_fh,$$remaining_ref); -} - -sub set_stdin_buffer { - # Copy stdin buffer from $block_ref up to $endpos - # Prepend with $header_ref - # Remove $recstart and $recend if needed - # Input: - # $header_ref = ref to $header to prepend - # $block_ref = ref to $block to pass on - # $endpos = length of $block to pass on - # $recstart = --recstart regexp - # $recend = --recend regexp - # Returns: - # N/A - my $self = shift; - my ($header_ref,$block_ref,$endpos,$recstart,$recend) = @_; - $self->{'stdin_buffer'} = ($self->virgin() ? $$header_ref : "").substr($$block_ref,0,$endpos); - if($opt::remove_rec_sep) { - remove_rec_sep(\$self->{'stdin_buffer'},$recstart,$recend); - } - $self->{'stdin_buffer_length'} = length $self->{'stdin_buffer'}; - $self->{'stdin_buffer_pos'} = 0; -} - -sub stdin_buffer_length { - my $self = shift; - return $self->{'stdin_buffer_length'}; -} - -sub remove_rec_sep { - my ($block_ref,$recstart,$recend) = @_; - # Remove record separator - $$block_ref =~ s/$recend$recstart//gos; - $$block_ref =~ s/^$recstart//os; - $$block_ref =~ s/$recend$//os; -} - -sub non_block_write { - my $self = shift; - my $something_written = 0; - use POSIX qw(:errno_h); -# use Fcntl; -# my $flags = ''; - for my $buf (substr($self->{'stdin_buffer'},$self->{'stdin_buffer_pos'})) { - my $in = $self->fh(0,"w"); -# fcntl($in, F_GETFL, $flags) -# or die "Couldn't get flags for HANDLE : $!\n"; -# $flags |= O_NONBLOCK; -# fcntl($in, F_SETFL, $flags) -# or die "Couldn't set flags for HANDLE: $!\n"; - my $rv = syswrite($in, $buf); - if (!defined($rv) && $! == EAGAIN) { - # would block - $something_written = 0; - } elsif ($self->{'stdin_buffer_pos'}+$rv != $self->{'stdin_buffer_length'}) { - # incomplete write - # Remove the written part - $self->{'stdin_buffer_pos'} += $rv; - $something_written = $rv; - } else { - # successfully wrote everything - my $a=""; - $self->set_stdin_buffer(\$a,\$a,"",""); - $something_written = $rv; - } - } - - ::debug("pipe", "Non-block: ", $something_written); - return $something_written; -} - - -sub virgin { - my $self = shift; - return $self->{'virgin'}; -} - -sub set_virgin { - my $self = shift; - $self->{'virgin'} = shift; -} - -sub pid { - my $self = shift; - return $self->{'pid'}; -} - -sub set_pid { - my $self = shift; - $self->{'pid'} = shift; -} - -sub starttime { - # Returns: - # UNIX-timestamp this job started - my $self = shift; - return sprintf("%.3f",$self->{'starttime'}); -} - -sub set_starttime { - my $self = shift; - my $starttime = shift || ::now(); - $self->{'starttime'} = $starttime; -} - -sub runtime { - # Returns: - # Run time in seconds - my $self = shift; - return sprintf("%.3f",int(($self->endtime() - $self->starttime())*1000)/1000); -} - -sub endtime { - # Returns: - # UNIX-timestamp this job ended - # 0 if not ended yet - my $self = shift; - return ($self->{'endtime'} || 0); -} - -sub set_endtime { - my $self = shift; - my $endtime = shift; - $self->{'endtime'} = $endtime; -} - -sub timedout { - # Is the job timedout? - # Input: - # $delta_time = time that the job may run - # Returns: - # True or false - my $self = shift; - my $delta_time = shift; - return time > $self->{'starttime'} + $delta_time; -} - -sub kill { - # Kill the job. - # Send the signals to (grand)*children and pid. - # If no signals: TERM TERM KILL - # Wait 200 ms after each TERM. - # Input: - # @signals = signals to send - my $self = shift; - my @signals = @_; - my @family_pids = $self->family_pids(); - # Record this jobs as failed - $self->set_exitstatus(-1); - # Send two TERMs to give time to clean up - ::debug("run", "Kill seq ", $self->seq(), "\n"); - my @send_signals = @signals || ("TERM", "TERM", "KILL"); - for my $signal (@send_signals) { - my $alive = 0; - for my $pid (@family_pids) { - if(kill 0, $pid) { - # The job still running - kill $signal, $pid; - $alive = 1; - } - } - # If a signal was given as input, do not do the sleep below - @signals and next; - - if($signal eq "TERM" and $alive) { - # Wait up to 200 ms between TERMs - but only if any pids are alive - my $sleep = 1; - for (my $sleepsum = 0; kill 0, $family_pids[0] and $sleepsum < 200; - $sleepsum += $sleep) { - $sleep = ::reap_usleep($sleep); - } - } - } -} - -sub family_pids { - # Find the pids with this->pid as (grand)*parent - # Returns: - # @pids = pids of (grand)*children - my $self = shift; - my $pid = $self->pid(); - my @pids; - - my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table(); - - my @more = ($pid); - # While more (grand)*children - while(@more) { - my @m; - push @pids, @more; - for my $parent (@more) { - if($children_of_ref->{$parent}) { - # add the children of this parent - push @m, @{$children_of_ref->{$parent}}; - } - } - @more = @m; - } - return (@pids); -} - -sub failed { - # return number of times failed for this $sshlogin - # Input: - # $sshlogin - # Returns: - # Number of times failed for $sshlogin - my $self = shift; - my $sshlogin = shift; - return $self->{'failed'}{$sshlogin}; -} - -sub failed_here { - # return number of times failed for the current $sshlogin - # Returns: - # Number of times failed for this sshlogin - my $self = shift; - return $self->{'failed'}{$self->sshlogin()}; -} - -sub add_failed { - # increase the number of times failed for this $sshlogin - my $self = shift; - my $sshlogin = shift; - $self->{'failed'}{$sshlogin}++; -} - -sub add_failed_here { - # increase the number of times failed for the current $sshlogin - my $self = shift; - $self->{'failed'}{$self->sshlogin()}++; -} - -sub reset_failed { - # increase the number of times failed for this $sshlogin - my $self = shift; - my $sshlogin = shift; - delete $self->{'failed'}{$sshlogin}; -} - -sub reset_failed_here { - # increase the number of times failed for this $sshlogin - my $self = shift; - delete $self->{'failed'}{$self->sshlogin()}; -} - -sub min_failed { - # Returns: - # the number of sshlogins this command has failed on - # the minimal number of times this command has failed - my $self = shift; - my $min_failures = - ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}}); - my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}}; - return ($number_of_sshlogins_failed_on,$min_failures); -} - -sub total_failed { - # Returns: - # $total_failures = the number of times this command has failed - my $self = shift; - my $total_failures = 0; - for (values %{$self->{'failed'}}) { - $total_failures += $_; - } - return $total_failures; -} - -sub wrapped { - # Wrap command with: - # * --shellquote - # * --nice - # * --cat - # * --fifo - # * --sshlogin - # * --pipepart (@Global::cat_partials) - # * --pipe - # * --tmux - # The ordering of the wrapping is important: - # * --nice/--cat/--fifo should be done on the remote machine - # * --pipepart/--pipe should be done on the local machine inside --tmux - # Uses: - # $Global::envvar - # $opt::shellquote - # $opt::nice - # $Global::shell - # $opt::cat - # $opt::fifo - # @Global::cat_partials - # $opt::pipe - # $opt::tmux - # Returns: - # $self->{'wrapped'} = the command wrapped with the above - my $self = shift; - if(not defined $self->{'wrapped'}) { - my $command = $Global::envvar.$self->replaced(); - if($opt::shellquote) { - # Prepend echo - # and quote twice - $command = "echo " . - ::shell_quote_scalar(::shell_quote_scalar($command)); - } - if($opt::nice) { - # Prepend \nice -n19 $SHELL -c - # and quote. - # The '\' before nice is needed to avoid tcsh's built-in - $command = '\nice'. " -n". $opt::nice. " ". - $Global::shell. " -c ". - ::shell_quote_scalar($command); - } - if($opt::cat) { - # Prepend 'cat > {};' - # Append '_EXIT=$?;(rm {};exit $_EXIT)' - $command = - $self->{'commandline'}->replace_placeholders(["cat > \257<\257>; "], 0, 0). - $command. - $self->{'commandline'}->replace_placeholders( - ["; _EXIT=\$?; rm \257<\257>; exit \$_EXIT"], 0, 0); - } elsif($opt::fifo) { - # Prepend 'mkfifo {}; (' - # Append ') & _PID=$!; cat > {}; wait $_PID; _EXIT=$?;(rm {};exit $_EXIT)' - $command = - $self->{'commandline'}->replace_placeholders(["mkfifo \257<\257>; ("], 0, 0). - $command. - $self->{'commandline'}->replace_placeholders([") & _PID=\$!; cat > \257<\257>; ", - "wait \$_PID; _EXIT=\$?; ", - "rm \257<\257>; exit \$_EXIT"], - 0,0); - } - # Wrap with ssh + tranferring of files - $command = $self->sshlogin_wrap($command); - if(@Global::cat_partials) { - # Prepend: - # < /tmp/foo perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' 0 0 0 11 | - $command = (shift @Global::cat_partials). "|". "(". $command. ")"; - } elsif($opt::pipe) { - # Prepend EOF-detector to avoid starting $command if EOF. - # The $tmpfile might exist if run on a remote system - we accept that risk - my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".chr"); - # Unlink to avoid leaving files if --dry-run or --sshlogin - unlink $tmpfile; - $command = - # Exit value: - # empty input = true - # some input = exit val from command - qq{ sh -c 'dd bs=1 count=1 of=$tmpfile 2>/dev/null'; }. - qq{ test \! -s "$tmpfile" && rm -f "$tmpfile" && exec true; }. - qq{ (cat $tmpfile; rm $tmpfile; cat - ) | }. - "($command);"; - } - if($opt::tmux) { - # Wrap command with 'tmux' - $command = $self->tmux_wrap($command); - } - $self->{'wrapped'} = $command; - } - return $self->{'wrapped'}; -} - -sub set_sshlogin { - my $self = shift; - my $sshlogin = shift; - $self->{'sshlogin'} = $sshlogin; - delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong - delete $self->{'wrapped'}; -} - -sub sshlogin { - my $self = shift; - return $self->{'sshlogin'}; -} - -sub sshlogin_wrap { - # Wrap the command with the commands needed to run remotely - # Returns: - # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands - my $self = shift; - my $command = shift; - if(not defined $self->{'sshlogin_wrap'}) { - my $sshlogin = $self->sshlogin(); - my $sshcmd = $sshlogin->sshcommand(); - my $serverlogin = $sshlogin->serverlogin(); - my ($pre,$post,$cleanup)=("","",""); - - if($serverlogin eq ":") { - # No transfer neeeded - $self->{'sshlogin_wrap'} = $command; - } else { - # --transfer - $pre .= $self->sshtransfer(); - # --return - $post .= $self->sshreturn(); - # --cleanup - $post .= $self->sshcleanup(); - if($post) { - # We need to save the exit status of the job - $post = '_EXIT_status=$?; ' . $post . ' exit $_EXIT_status;'; - } - # If the remote login shell is (t)csh then use 'setenv' - # otherwise use 'export' - # We cannot use parse_env_var(), as PARALLEL_SEQ changes - # for each command - my $parallel_env = - ($Global::envwarn - . q{ 'eval `echo $SHELL | grep "/t\\{0,1\\}csh" > /dev/null } - . q{ && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; } - . q{ setenv PARALLEL_PID '$PARALLEL_PID' } - . q{ || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; } - . q{ PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' }); - my $remote_pre = ""; - my $ssh_options = ""; - if(($opt::pipe or $opt::pipepart) and $opt::ctrlc - or - not ($opt::pipe or $opt::pipepart) and not $opt::noctrlc) { - # TODO Determine if this is needed - # Propagating CTRL-C to kill remote jobs requires - # remote jobs to be run with a terminal. - $ssh_options = "-tt -oLogLevel=quiet"; -# $ssh_options = ""; - # tty - check if we have a tty. - # stty: - # -onlcr - make output 8-bit clean - # isig - pass CTRL-C as signal - # -echo - do not echo input - $remote_pre .= ::shell_quote_scalar('tty >/dev/null && stty isig -onlcr -echo;'); - } - if($opt::workdir) { - my $wd = ::shell_quote_file($self->workdir()); - $remote_pre .= ::shell_quote_scalar("mkdir -p ") . $wd . - ::shell_quote_scalar("; cd ") . $wd . - # exit 255 (instead of exec false) would be the correct thing, - # but that fails on tcsh - ::shell_quote_scalar(qq{ || exec false;}); - } - # This script is to solve the problem of - # * not mixing STDERR and STDOUT - # * terminating with ctrl-c - # It works on Linux but not Solaris - # Finishes on Solaris, but wrong exit code: - # $SIG{CHLD} = sub {exit ($?&127 ? 128+($?&127) : 1+$?>>8)}; - # Hangs on Solaris, but correct exit code on Linux: - # $SIG{CHLD} = sub { $done = 1 }; - # $p->poll; - my $signal_script = "perl -e '". - q{ - use IO::Poll; - $SIG{CHLD} = sub { $done = 1 }; - $p = IO::Poll->new; - $p->mask(STDOUT, POLLHUP); - $pid=fork; unless($pid) {setpgrp; exec $ENV{SHELL}, "-c", @ARGV; die "exec: $!\n"} - $p->poll; - kill SIGHUP, -${pid} unless $done; - wait; exit ($?&127 ? 128+($?&127) : 1+$?>>8) - } . "' "; - $signal_script =~ s/\s+/ /g; - - $self->{'sshlogin_wrap'} = - ($pre - . "$sshcmd $ssh_options $serverlogin $parallel_env " - . $remote_pre -# . ::shell_quote_scalar($signal_script . ::shell_quote_scalar($command)) - . ::shell_quote_scalar($command) - . ";" - . $post); - } - } - return $self->{'sshlogin_wrap'}; -} - -sub transfer { - # Files to transfer - # Returns: - # @transfer - File names of files to transfer - my $self = shift; - my @transfer = (); - $self->{'transfersize'} = 0; - if($opt::transfer) { - for my $record (@{$self->{'commandline'}{'arg_list'}}) { - # Merge arguments from records into args - for my $arg (@$record) { - CORE::push @transfer, $arg->orig(); - # filesize - if(-e $arg->orig()) { - $self->{'transfersize'} += (stat($arg->orig()))[7]; - } - } - } - } - return @transfer; -} - -sub transfersize { - my $self = shift; - return $self->{'transfersize'}; -} - -sub sshtransfer { - # Returns for each transfer file: - # rsync $file remote:$workdir - my $self = shift; - my @pre; - my $sshlogin = $self->sshlogin(); - my $workdir = $self->workdir(); - for my $file ($self->transfer()) { - push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";"; - } - return join("",@pre); -} - -sub return { - # Files to return - # Non-quoted and with {...} substituted - # Returns: - # @non_quoted_filenames - my $self = shift; - return $self->{'commandline'}-> - replace_placeholders($self->{'commandline'}{'return_files'},0,0); -} - -sub returnsize { - # This is called after the job has finished - # Returns: - # $number_of_bytes transferred in return - my $self = shift; - for my $file ($self->return()) { - if(-e $file) { - $self->{'returnsize'} += (stat($file))[7]; - } - } - return $self->{'returnsize'}; -} - -sub sshreturn { - # Returns for each return-file: - # rsync remote:$workdir/$file . - my $self = shift; - my $sshlogin = $self->sshlogin(); - my $sshcmd = $sshlogin->sshcommand(); - my $serverlogin = $sshlogin->serverlogin(); - my $rsync_opt = "-rlDzR -e".::shell_quote_scalar($sshcmd); - my $pre = ""; - for my $file ($self->return()) { - $file =~ s:^\./::g; # Remove ./ if any - my $relpath = ($file !~ m:^/:); # Is the path relative? - my $cd = ""; - my $wd = ""; - if($relpath) { - # rsync -avR /foo/./bar/baz.c remote:/tmp/ - # == (on old systems) - # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/ - $wd = ::shell_quote_file($self->workdir()."/"); - } - # Only load File::Basename if actually needed - $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; - # dir/./file means relative to dir, so remove dir on remote - $file =~ m:(.*)/\./:; - my $basedir = $1 ? ::shell_quote_file($1."/") : ""; - my $nobasedir = $file; - $nobasedir =~ s:.*/\./::; - $cd = ::shell_quote_file(::dirname($nobasedir)); - my $rsync_cd = '--rsync-path='.::shell_quote_scalar("cd $wd$cd; rsync"); - my $basename = ::shell_quote_scalar(::shell_quote_file(basename($file))); - # --return - # mkdir -p /home/tange/dir/subdir/; - # rsync (--protocol 30) -rlDzR --rsync-path="cd /home/tange/dir/subdir/; rsync" - # server:file.gz /home/tange/dir/subdir/ - $pre .= "mkdir -p $basedir$cd; ".$sshlogin->rsync()." $rsync_cd $rsync_opt $serverlogin:". - $basename . " ".$basedir.$cd.";"; - } - return $pre; -} - -sub sshcleanup { - # Return the sshcommand needed to remove the file - # Returns: - # ssh command needed to remove files from sshlogin - my $self = shift; - my $sshlogin = $self->sshlogin(); - my $sshcmd = $sshlogin->sshcommand(); - my $serverlogin = $sshlogin->serverlogin(); - my $workdir = $self->workdir(); - my $cleancmd = ""; - - for my $file ($self->cleanup()) { - my @subworkdirs = parentdirs_of($file); - $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";"; - } - if(defined $opt::workdir and $opt::workdir eq "...") { - $cleancmd .= "$sshcmd $serverlogin rm -rf " . ::shell_quote_scalar($workdir).';'; - } - return $cleancmd; -} - -sub cleanup { - # Returns: - # Files to remove at cleanup - my $self = shift; - if($opt::cleanup) { - my @transfer = $self->transfer(); - my @return = $self->return(); - return (@transfer,@return); - } else { - return (); - } -} - -sub workdir { - # Returns: - # the workdir on a remote machine - my $self = shift; - if(not defined $self->{'workdir'}) { - my $workdir; - if(defined $opt::workdir) { - if($opt::workdir eq ".") { - # . means current dir - my $home = $ENV{'HOME'}; - eval 'use Cwd'; - my $cwd = cwd(); - $workdir = $cwd; - if($home) { - # If homedir exists: remove the homedir from - # workdir if cwd starts with homedir - # E.g. /home/foo/my/dir => my/dir - # E.g. /tmp/my/dir => /tmp/my/dir - my ($home_dev, $home_ino) = (stat($home))[0,1]; - my $parent = ""; - my @dir_parts = split(m:/:,$cwd); - my $part; - while(defined ($part = shift @dir_parts)) { - $part eq "" and next; - $parent .= "/".$part; - my ($parent_dev, $parent_ino) = (stat($parent))[0,1]; - if($parent_dev == $home_dev and $parent_ino == $home_ino) { - # dev and ino is the same: We found the homedir. - $workdir = join("/",@dir_parts); - last; - } - } - } - if($workdir eq "") { - $workdir = "."; - } - } elsif($opt::workdir eq "...") { - $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$ - . "-" . $self->seq(); - } else { - $workdir = $opt::workdir; - # Rsync treats /./ special. We don't want that - $workdir =~ s:/\./:/:g; # Remove /./ - $workdir =~ s:/+$::; # Remove ending / if any - $workdir =~ s:^\./::g; # Remove starting ./ if any - } - } else { - $workdir = "."; - } - $self->{'workdir'} = ::shell_quote_scalar($workdir); - } - return $self->{'workdir'}; -} - -sub parentdirs_of { - # Return: - # all parentdirs except . of this dir or file - sorted desc by length - my $d = shift; - my @parents = (); - while($d =~ s:/[^/]+$::) { - if($d ne ".") { - push @parents, $d; - } - } - return @parents; -} - -sub start { - # Setup STDOUT and STDERR for a job and start it. - # Returns: - # job-object or undef if job not to run - my $job = shift; - # Get the shell command to be executed (possibly with ssh infront). - my $command = $job->wrapped(); - - if($Global::interactive or $Global::stderr_verbose) { - if($Global::interactive) { - print $Global::original_stderr "$command ?..."; - open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty"); - my $answer = <$tty_fh>; - close $tty_fh; - my $run_yes = ($answer =~ /^\s*y/i); - if (not $run_yes) { - $command = "true"; # Run the command 'true' - } - } else { - print $Global::original_stderr "$command\n"; - } - } - - my $pid; - $job->openoutputfiles(); - my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w")); - local (*IN,*OUT,*ERR); - open OUT, '>&', $stdout_fh or ::die_bug("Can't redirect STDOUT: $!"); - open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDOUT: $!"); - - if(($opt::dryrun or $Global::verbose) and $opt::ungroup) { - if($Global::verbose <= 1) { - print $stdout_fh $job->replaced(),"\n"; - } else { - # Verbose level > 1: Print the rsync and stuff - print $stdout_fh $command,"\n"; - } - } - if($opt::dryrun) { - $command = "true"; - } - $ENV{'PARALLEL_SEQ'} = $job->seq(); - $ENV{'PARALLEL_PID'} = $$; - ::debug("run", $Global::total_running, " processes . Starting (", - $job->seq(), "): $command\n"); - if($opt::pipe) { - my ($stdin_fh); - # The eval is needed to catch exception from open3 - eval { - $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", $Global::shell, "-c", $command) || - ::die_bug("open3-pipe"); - 1; - }; - $job->set_fh(0,"w",$stdin_fh); - } elsif(@opt::a and not $Global::stdin_in_opt_a and $job->seq() == 1 - and $job->sshlogin()->string() eq ":") { - # Give STDIN to the first job if using -a (but only if running - # locally - otherwise CTRL-C does not work for other jobs Bug#36585) - *IN = *STDIN; - # The eval is needed to catch exception from open3 - eval { - $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) || - ::die_bug("open3-a"); - 1; - }; - # Re-open to avoid complaining - open(STDIN, "<&", $Global::original_stdin) - or ::die_bug("dup-\$Global::original_stdin: $!"); - } elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and - open(my $devtty_fh, "<", "/dev/tty")) { - # Give /dev/tty to the command if no one else is using it - *IN = $devtty_fh; - # The eval is needed to catch exception from open3 - eval { - $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) || - ::die_bug("open3-/dev/tty"); - $Global::tty_taken = $pid; - close $devtty_fh; - 1; - }; - } else { - # The eval is needed to catch exception from open3 - eval { - $pid = ::open3(::gensym, ">&OUT", ">&ERR", $Global::shell, "-c", $command) || - ::die_bug("open3-gensym"); - 1; - }; - } - if($pid) { - # A job was started - $Global::total_running++; - $Global::total_started++; - $job->set_pid($pid); - $job->set_starttime(); - $Global::running{$job->pid()} = $job; - if($opt::timeout) { - $Global::timeoutq->insert($job); - } - $Global::newest_job = $job; - $Global::newest_starttime = ::now(); - return $job; - } else { - # No more processes - ::debug("run", "Cannot spawn more jobs.\n"); - return undef; - } -} - -sub tmux_wrap { - # Wrap command with tmux for session pPID - # Input: - # $actual_command = the actual command being run (incl ssh wrap) - my $self = shift; - my $actual_command = shift; - # Temporary file name. Used for fifo to communicate exit val - my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".tmx"); - $Global::unlink{$tmpfile}=1; - close $fh; - unlink $tmpfile; - my $visual_command = $self->replaced(); - my $title = $visual_command; - # ; causes problems - # ascii 194-245 annoys tmux - $title =~ tr/[\011-\016;\302-\365]//d; - - my $tmux; - if($Global::total_running == 0) { - $tmux = "tmux new-session -s p$$ -d -n ". - ::shell_quote_scalar($title); - print $Global::original_stderr "See output with: tmux attach -t p$$\n"; - } else { - $tmux = "tmux new-window -t p$$ -n ".::shell_quote_scalar($title); - } - return "mkfifo $tmpfile; $tmux ". - # Run in tmux - ::shell_quote_scalar( - "(".$actual_command.');(echo $?$status;echo 255) >'.$tmpfile."&". - "echo ".::shell_quote_scalar($visual_command).";". - "echo \007Job finished at: `date`;sleep 10"). - # Run outside tmux - # Read the first line from the fifo and use that as status code - "; exit `perl -ne 'unlink \$ARGV; 1..1 and print' $tmpfile` "; -} - -sub is_already_in_results { - # Do we already have results for this job? - # Returns: - # $job_already_run = bool whether there is output for this or not - my $job = $_[0]; - my $args_as_dirname = $job->{'commandline'}->args_as_dirname(); - # prefix/name1/val1/name2/val2/ - my $dir = $opt::results."/".$args_as_dirname; - ::debug("run", "Test $dir/stdout", -e "$dir/stdout", "\n"); - return -e "$dir/stdout"; -} - -sub is_already_in_joblog { - my $job = shift; - return vec($Global::job_already_run,$job->seq(),1); -} - -sub set_job_in_joblog { - my $job = shift; - vec($Global::job_already_run,$job->seq(),1) = 1; -} - -sub should_be_retried { - # Should this job be retried? - # Returns - # 0 - do not retry - # 1 - job queued for retry - my $self = shift; - if (not $opt::retries) { - return 0; - } - if(not $self->exitstatus()) { - # Completed with success. If there is a recorded failure: forget it - $self->reset_failed_here(); - return 0 - } else { - # The job failed. Should it be retried? - $self->add_failed_here(); - if($self->total_failed() == $opt::retries) { - # This has been retried enough - return 0; - } else { - # This command should be retried - $self->set_endtime(undef); - $Global::JobQueue->unget($self); - ::debug("run", "Retry ", $self->seq(), "\n"); - return 1; - } - } -} - -sub print { - # Print the output of the jobs - # Returns: N/A - - my $self = shift; - ::debug("print", ">>joboutput ", $self->replaced(), "\n"); - if($opt::dryrun) { - # Nothing was printed to this job: - # cleanup tmp files if --files was set - unlink $self->fh(1,"name"); - } - if($opt::pipe and $self->virgin()) { - # Skip --joblog, --dryrun, --verbose - } else { - if($Global::joblog and defined $self->{'exitstatus'}) { - # Add to joblog when finished - $self->print_joblog(); - } - - # Printing is only relevant for grouped/--line-buffer output. - $opt::ungroup and return; - # Check for disk full - exit_if_disk_full(); - - if(($opt::dryrun or $Global::verbose) - and - not $self->{'verbose_printed'}) { - $self->{'verbose_printed'}++; - if($Global::verbose <= 1) { - print STDOUT $self->replaced(),"\n"; - } else { - # Verbose level > 1: Print the rsync and stuff - print STDOUT $self->wrapped(),"\n"; - } - # If STDOUT and STDERR are merged, - # we want the command to be printed first - # so flush to avoid STDOUT being buffered - flush STDOUT; - } - } - for my $fdno (sort { $a <=> $b } keys %Global::fd) { - # Sort by file descriptor numerically: 1,2,3,..,9,10,11 - $fdno == 0 and next; - my $out_fd = $Global::fd{$fdno}; - my $in_fh = $self->fh($fdno,"r"); - if(not $in_fh) { - if(not $Job::file_descriptor_warning_printed{$fdno}++) { - # ::warning("File descriptor $fdno not defined\n"); - } - next; - } - ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):"); - if($opt::files) { - # If --compress: $in_fh must be closed first. - close $self->fh($fdno,"w"); - close $in_fh; - if($opt::pipe and $self->virgin()) { - # Nothing was printed to this job: - # cleanup unused tmp files if --files was set - for my $fdno (1,2) { - unlink $self->fh($fdno,"name"); - unlink $self->fh($fdno,"unlink"); - } - } elsif($fdno == 1 and $self->fh($fdno,"name")) { - print $out_fd $self->fh($fdno,"name"),"\n"; - } - } elsif($opt::linebuffer) { - # Line buffered print out - $self->linebuffer_print($fdno,$in_fh,$out_fd); - } else { - my $buf; - close $self->fh($fdno,"w"); - seek $in_fh, 0, 0; - # $in_fh is now ready for reading at position 0 - if($opt::tag or defined $opt::tagstring) { - my $tag = $self->tag(); - if($fdno == 2) { - # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt - # This is a crappy way of ignoring it. - while(<$in_fh>) { - if(/^(client_process_control: )?tcgetattr: Invalid argument\n/) { - # Skip - } else { - print $out_fd $tag,$_; - } - # At most run the loop once - last; - } - } - while(<$in_fh>) { - print $out_fd $tag,$_; - } - } else { - my $buf; - if($fdno == 2) { - # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt - # This is a crappy way of ignoring it. - sysread($in_fh,$buf,1_000); - $buf =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//; - print $out_fd $buf; - } - while(sysread($in_fh,$buf,32768)) { - print $out_fd $buf; - } - } - close $in_fh; - } - flush $out_fd; - } - ::debug("print", "<{'partial_line',$fdno}; - - if(defined $self->{'exitstatus'}) { - # If the job is dead: close printing fh. Needed for --compress - close $self->fh($fdno,"w"); - if($opt::compress) { - # Blocked reading in final round - $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; - for my $fdno (1,2) { - my $fdr = $self->fh($fdno,'r'); - my $flags; - fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle - $flags &= ~&O_NONBLOCK; # Remove non-blocking to the flags - fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle - } - } - } - # This seek will clear EOF - seek $in_fh, tell($in_fh), 0; - # The read is non-blocking: The $in_fh is set to non-blocking. - # 32768 --tag = 5.1s - # 327680 --tag = 4.4s - # 1024000 --tag = 4.4s - # 3276800 --tag = 4.3s - # 32768000 --tag = 4.7s - # 10240000 --tag = 4.3s - while(read($in_fh,substr($$partial,length $$partial),3276800)) { - # Append to $$partial - # Find the last \n - my $i = rindex($$partial,"\n"); - if($i != -1) { - # One or more complete lines were found - if($fdno == 2 and not $self->{'printed_first_line',$fdno}++) { - # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt - # This is a crappy way of ignoring it. - $$partial =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//; - # Length of partial line has changed: Find the last \n again - $i = rindex($$partial,"\n"); - } - if($opt::tag or defined $opt::tagstring) { - # Replace ^ with $tag within the full line - my $tag = $self->tag(); - substr($$partial,0,$i+1) =~ s/^/$tag/gm; - # Length of partial line has changed: Find the last \n again - $i = rindex($$partial,"\n"); - } - # Print up to and including the last \n - print $out_fd substr($$partial,0,$i+1); - # Remove the printed part - substr($$partial,0,$i+1)=""; - } - } - if(defined $self->{'exitstatus'}) { - # If the job is dead: print the remaining partial line - # read remaining - if($$partial and ($opt::tag or defined $opt::tagstring)) { - my $tag = $self->tag(); - $$partial =~ s/^/$tag/gm; - } - print $out_fd $$partial; - # Release the memory - $$partial = undef; - if($self->fh($fdno,"rpid") and CORE::kill 0, $self->fh($fdno,"rpid")) { - # decompress still running - } else { - # decompress done: close fh - close $in_fh; - } - } -} - -sub print_joblog { - my $self = shift; - my $cmd; - if($Global::verbose <= 1) { - $cmd = $self->replaced(); - } else { - # Verbose level > 1: Print the rsync and stuff - $cmd = "@command"; - } - print $Global::joblog - join("\t", $self->seq(), $self->sshlogin()->string(), - $self->starttime(), sprintf("%10.3f",$self->runtime()), - $self->transfersize(), $self->returnsize(), - $self->exitstatus(), $self->exitsignal(), $cmd - ). "\n"; - flush $Global::joblog; - $self->set_job_in_joblog(); -} - -sub tag { - my $self = shift; - if(not defined $self->{'tag'}) { - $self->{'tag'} = $self->{'commandline'}-> - replace_placeholders([$opt::tagstring],0,0)."\t"; - } - return $self->{'tag'}; -} - -sub hostgroups { - my $self = shift; - if(not defined $self->{'hostgroups'}) { - $self->{'hostgroups'} = $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'}; - } - return @{$self->{'hostgroups'}}; -} - -sub exitstatus { - my $self = shift; - return $self->{'exitstatus'}; -} - -sub set_exitstatus { - my $self = shift; - my $exitstatus = shift; - if($exitstatus) { - # Overwrite status if non-zero - $self->{'exitstatus'} = $exitstatus; - } else { - # Set status but do not overwrite - # Status may have been set by --timeout - $self->{'exitstatus'} ||= $exitstatus; - } -} - -sub exitsignal { - my $self = shift; - return $self->{'exitsignal'}; -} - -sub set_exitsignal { - my $self = shift; - my $exitsignal = shift; - $self->{'exitsignal'} = $exitsignal; -} - -{ - my ($disk_full_fh, $b8193, $name); - sub exit_if_disk_full { - # Checks if $TMPDIR is full by writing 8kb to a tmpfile - # If the disk is full: Exit immediately. - # Returns: - # N/A - if(not $disk_full_fh) { - ($disk_full_fh, $name) = ::tmpfile(SUFFIX => ".df"); - unlink $name; - $b8193 = "x"x8193; - } - # Linux does not discover if a disk is full if writing <= 8192 - # Tested on: - # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos - # ntfs reiserfs tmpfs ubifs vfat xfs - # TODO this should be tested on different OS similar to this: - # - # doit() { - # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop - # seq 100000 | parallel --tmpdir /mnt/loop/ true & - # seq 6900000 > /mnt/loop/i && echo seq OK - # seq 6980868 > /mnt/loop/i - # seq 10000 > /mnt/loop/ii - # sleep 3 - # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/ - # echo >&2 - # } - print $disk_full_fh $b8193; - if(not $disk_full_fh - or - tell $disk_full_fh == 0) { - ::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?\n"); - ::error("Change \$TMPDIR with --tmpdir or use --compress.\n"); - ::wait_and_exit(255); - } - truncate $disk_full_fh, 0; - seek($disk_full_fh, 0, 0) || die; - } -} - - -package CommandLine; - -sub new { - my $class = shift; - my $seq = shift; - my $commandref = shift; - $commandref || die; - my $arg_queue = shift; - my $context_replace = shift; - my $max_number_of_args = shift; # for -N and normal (-n1) - my $return_files = shift; - my $replacecount_ref = shift; - my $len_ref = shift; - my %replacecount = %$replacecount_ref; - my %len = %$len_ref; - for (keys %$replacecount_ref) { - # Total length of this replacement string {} replaced with all args - $len{$_} = 0; - } - return bless { - 'command' => $commandref, - 'seq' => $seq, - 'len' => \%len, - 'arg_list' => [], - 'arg_queue' => $arg_queue, - 'max_number_of_args' => $max_number_of_args, - 'replacecount' => \%replacecount, - 'context_replace' => $context_replace, - 'return_files' => $return_files, - 'replaced' => undef, - }, ref($class) || $class; -} - -sub seq { - my $self = shift; - return $self->{'seq'}; -} - -{ - my $max_slot_number; - - sub slot { - # Find the number of a free job slot and return it - # Uses: - # @Global::slots - # Returns: - # $jobslot = number of jobslot - my $self = shift; - if(not $self->{'slot'}) { - if(not @Global::slots) { - # $Global::max_slot_number will typically be $Global::max_jobs_running - push @Global::slots, ++$max_slot_number; - } - $self->{'slot'} = shift @Global::slots; - } - return $self->{'slot'}; - } -} - -sub populate { - # Add arguments from arg_queue until the number of arguments or - # max line length is reached - # Uses: - # $Global::minimal_command_line_length - # $opt::cat - # $opt::fifo - # $Global::JobQueue - # $opt::m - # $opt::X - # $CommandLine::already_spread - # $Global::max_jobs_running - # Returns: N/A - my $self = shift; - my $next_arg; - my $max_len = $Global::minimal_command_line_length || Limits::Command::max_length(); - - if($opt::cat or $opt::fifo) { - # Generate a tempfile name that will be used as {} - my($outfh,$name) = ::tmpfile(SUFFIX => ".pip"); - close $outfh; - # Unlink is needed if: ssh otheruser@localhost - unlink $name; - $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget([Arg->new($name)]); - } - - while (not $self->{'arg_queue'}->empty()) { - $next_arg = $self->{'arg_queue'}->get(); - if(not defined $next_arg) { - next; - } - $self->push($next_arg); - if($self->len() >= $max_len) { - # Command length is now > max_length - # If there are arguments: remove the last - # If there are no arguments: Error - # TODO stuff about -x opt_x - if($self->number_of_args() > 1) { - # There is something to work on - $self->{'arg_queue'}->unget($self->pop()); - last; - } else { - my $args = join(" ", map { $_->orig() } @$next_arg); - ::error("Command line too long (", - $self->len(), " >= ", - $max_len, - ") at number ", - $self->{'arg_queue'}->arg_number(), - ": ". - (substr($args,0,50))."...\n"); - $self->{'arg_queue'}->unget($self->pop()); - ::wait_and_exit(255); - } - } - - if(defined $self->{'max_number_of_args'}) { - if($self->number_of_args() >= $self->{'max_number_of_args'}) { - last; - } - } - } - if(($opt::m or $opt::X) and not $CommandLine::already_spread - and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) { - # -m or -X and EOF => Spread the arguments over all jobslots - # (unless they are already spread) - $CommandLine::already_spread ||= 1; - if($self->number_of_args() > 1) { - $self->{'max_number_of_args'} = - ::ceil($self->number_of_args()/$Global::max_jobs_running); - $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} = - $self->{'max_number_of_args'}; - $self->{'arg_queue'}->unget($self->pop_all()); - while($self->number_of_args() < $self->{'max_number_of_args'}) { - $self->push($self->{'arg_queue'}->get()); - } - } - } -} - -sub push { - # Add one or more records as arguments - # Returns: N/A - my $self = shift; - my $record = shift; - push @{$self->{'arg_list'}}, $record; - - my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; - my $rep; - for my $arg (@$record) { - if(defined $arg) { - for my $perlexpr (keys %{$self->{'replacecount'}}) { - # 50% faster than below - $self->{'len'}{$perlexpr} += length $arg->replace($perlexpr,$quote_arg,$self); - # $rep = $arg->replace($perlexpr,$quote_arg,$self); - # $self->{'len'}{$perlexpr} += length $rep; - # ::debug("length", "Length: ", length $rep, - # "(", $perlexpr, "=>", $rep, ")\n"); - } - } - } -} - -sub pop { - # Remove last argument - # Returns: - # the last record - my $self = shift; - my $record = pop @{$self->{'arg_list'}}; - my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; - for my $arg (@$record) { - if(defined $arg) { - for my $perlexpr (keys %{$self->{'replacecount'}}) { - $self->{'len'}{$perlexpr} -= - length $arg->replace($perlexpr,$quote_arg,$self); - } - } - } - return $record; -} - -sub pop_all { - # Remove all arguments and zeros the length of replacement strings - # Returns: - # all records - my $self = shift; - my @popped = @{$self->{'arg_list'}}; - for my $replacement_string (keys %{$self->{'replacecount'}}) { - $self->{'len'}{$replacement_string} = 0; - } - $self->{'arg_list'} = []; - return @popped; -} - -sub number_of_args { - # The number of records - # Returns: - # number of records - my $self = shift; - # Ftq rudef oaawuq ime dqxqmeqp az 2011-01-24 mzp ime iaz nk MQhmd - # Mdzrvadp Nvmdymeaz az 2011-04-10. Ftue oaawuq dqxqmeqp az - # 2013-08-18 ue m nuf tmdpqd me kag tmhq fa geq daf14. Bxqmeq - # qymux oaawuq@fmzsq.pw itqz kag dqmp ftue. - # - # U my ftq ymuzfmuzqd ar m buqoq ar rdqq earfimdq omxxqp SZG - # Bmdmxxqx. Rdqq earfimdq sgmdmzfqqe kag mooqee fa ftq eagdoq - # oapq, ngf U tmhq nqqz iazpqduzs tai ymzk mofgmxxk _dqmp_ ftq - # eagdoq oapq. - # - # Fa fqef ftue U bgf uz m oayyqzf fqxxuzs bqabxq fa qymux yq itqz - # ftqk dqmp ftue. Ftq oayyqzf ime bgf uz m eqofuaz ar ftq oapq - # ftmf za azq iagxp xaaw fa ruj ad uybdahq ftq earfimdq - ea ftq - # eagdoq oapq qcguhmxqzf fa m pgefk oadzqd. Fa ymwq egdq ftq - # oayyqzf iagxp zaf etai gb ur eayq azq vgef sdqbbqp ftdagst ftq - # eagdoq oapq U daf13'qp ftq eagdoq oapq - # tffb://qz.iuwubqpum.ads/iuwu/DAF13 - # - # 2.5 yazfte xmfqd U dqoquhqp mz qymux rday eayqazq ita zaf azxk - # ymzmsqp fa ruzp ftq oayyqzf, ngf mxea ymzmsqp fa sgqee ftq oapq - # tmp fa nq daf13'qp. - # - # Ftue nduzse yq fa ftq oazoxgeuaz ftmf ftqdq _mdq_ bqabxq, ita - # mdq zaf mrruxumfqp iuft ftq bdavqof, ftmf iuxx dqmp ftq eagdoq - # oapq - ftagst uf ymk zaf tmbbqz hqdk arfqz. - # - # This is really the number of records - return $#{$self->{'arg_list'}}+1; -} - -sub number_of_recargs { - # The number of args in records - # Returns: - # number of args records - my $self = shift; - my $sum = 0; - my $nrec = scalar @{$self->{'arg_list'}}; - if($nrec) { - $sum = $nrec * (scalar @{$self->{'arg_list'}[0]}); - } - return $sum; -} - -sub args_as_string { - # Returns: - # all unmodified arguments joined with ' ' (similar to {}) - my $self = shift; - return (join " ", map { $_->orig() } - map { @$_ } @{$self->{'arg_list'}}); -} - -sub args_as_dirname { - # Returns: - # all unmodified arguments joined with '/' (similar to {}) - # \t \0 \\ and / are quoted as: \t \0 \\ \_ - # If $Global::max_file_length: Keep subdirs < $Global::max_file_length - my $self = shift; - my @res = (); - - for my $rec_ref (@{$self->{'arg_list'}}) { - # If headers are used, sort by them. - # Otherwise keep the order from the command line. - my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1); - for my $n (@header_indexes_sorted) { - CORE::push(@res, - $Global::input_source_header{$n}, - map { my $s = $_; - # \t \0 \\ and / are quoted as: \t \0 \\ \_ - $s =~ s/\\/\\\\/g; - $s =~ s/\t/\\t/g; - $s =~ s/\0/\\0/g; - $s =~ s:/:\\_:g; - if($Global::max_file_length) { - # Keep each subdir shorter than the longest - # allowed file name - $s = substr($s,0,$Global::max_file_length); - } - $s; } - $rec_ref->[$n-1]->orig()); - } - } - return join "/", @res; -} - -sub header_indexes_sorted { - # Sort headers first by number then by name. - # E.g.: 1a 1b 11a 11b - # Returns: - # Indexes of %Global::input_source_header sorted - my $max_col = shift; - - no warnings 'numeric'; - for my $col (1 .. $max_col) { - # Make sure the header is defined. If it is not: use column number - if(not defined $Global::input_source_header{$col}) { - $Global::input_source_header{$col} = $col; - } - } - my @header_indexes_sorted = sort { - # Sort headers numerically then asciibetically - $Global::input_source_header{$a} <=> $Global::input_source_header{$b} - or - $Global::input_source_header{$a} cmp $Global::input_source_header{$b} - } 1 .. $max_col; - return @header_indexes_sorted; -} - -sub len { - # Uses: - # $opt::shellquote - # The length of the command line with args substituted - my $self = shift; - my $len = 0; - # Add length of the original command with no args - # Length of command w/ all replacement args removed - $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1; - ::debug("length", "noncontext + command: $len\n"); - my $recargs = $self->number_of_recargs(); - if($self->{'context_replace'}) { - # Context is duplicated for each arg - $len += $recargs * $self->{'len'}{'context'}; - for my $replstring (keys %{$self->{'replacecount'}}) { - # If the replacements string is more than once: mulitply its length - $len += $self->{'len'}{$replstring} * - $self->{'replacecount'}{$replstring}; - ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*", - $self->{'replacecount'}{$replstring}, "\n"); - } - # echo 11 22 33 44 55 66 77 88 99 1010 - # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 - # 5 + ctxgrp*arg - ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'}, - " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n"); - # Add space between context groups - $len += ($recargs-1) * ($self->{'len'}{'contextgroups'}); - } else { - # Each replacement string may occur several times - # Add the length for each time - $len += 1*$self->{'len'}{'context'}; - ::debug("length", "context+noncontext + command: $len\n"); - for my $replstring (keys %{$self->{'replacecount'}}) { - # (space between regargs + length of replacement) - # * number this replacement is used - $len += ($recargs -1 + $self->{'len'}{$replstring}) * - $self->{'replacecount'}{$replstring}; - } - } - if($opt::nice) { - # Pessimistic length if --nice is set - # Worse than worst case: every char needs to be quoted with \ - $len *= 2; - } - if($Global::quoting) { - # Pessimistic length if -q is set - # Worse than worst case: every char needs to be quoted with \ - $len *= 2; - } - if($opt::shellquote) { - # Pessimistic length if --shellquote is set - # Worse than worst case: every char needs to be quoted with \ twice - $len *= 4; - } - # If we are using --env, add the prefix for that, too. - $len += $Global::envvarlen; - - return $len; -} - -sub replaced { - # Uses: - # $Global::noquote - # $Global::quoting - # Returns: - # $replaced = command with place holders replaced and prepended - my $self = shift; - if(not defined $self->{'replaced'}) { - # Don't quote arguments if the input is the full command line - my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; - $self->{'replaced'} = $self->replace_placeholders($self->{'command'},$Global::quoting,$quote_arg); - my $len = length $self->{'replaced'}; - if ($len != $self->len()) { - ::debug("length", $len, " != ", $self->len(), " ", $self->{'replaced'}, "\n"); - } else { - ::debug("length", $len, " == ", $self->len(), " ", $self->{'replaced'}, "\n"); - } - } - return $self->{'replaced'}; -} - -sub replace_placeholders { - # Replace foo{}bar with fooargbar - # Input: - # $targetref = command as shell words - # $quote = should everything be quoted? - # $quote_arg = should replaced arguments be quoted? - # Returns: - # @target with placeholders replaced - my $self = shift; - my $targetref = shift; - my $quote = shift; - my $quote_arg = shift; - my $context_replace = $self->{'context_replace'}; - my @target = @$targetref; - ::debug("replace", "Replace @target\n"); - # -X = context replace - # maybe multiple input sources - # maybe --xapply - if(not @target) { - # @target is empty: Return empty array - return @target; - } - # Fish out the words that have replacement strings in them - my %word; - for (@target) { - my $tt = $_; - ::debug("replace", "Target: $tt"); - # a{1}b{}c{}d - # a{=1 $_=$_ =}b{= $_=$_ =}c{= $_=$_ =}d - # a\257<1 $_=$_ \257>b\257< $_=$_ \257>c\257< $_=$_ \257>d - # A B C => aAbA B CcA B Cd - # -X A B C => aAbAcAd aAbBcBd aAbCcCd - - if($context_replace) { - while($tt =~ s/([^\s\257]* # before {= - (?: - \257< # {= - [^\257]*? # The perl expression - \257> # =} - [^\s\257]* # after =} - )+)/ /x) { - # $1 = pre \257 perlexpr \257 post - $word{"$1"} ||= 1; - } - } else { - while($tt =~ s/( (?: \257<([^\257]*?)\257>) )//x) { - # $f = \257 perlexpr \257 - $word{$1} ||= 1; - } - } - } - my @word = keys %word; - - my %replace; - my @arg; - for my $record (@{$self->{'arg_list'}}) { - # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ] - # Merge arg-objects from records into @arg for easy access - CORE::push @arg, @$record; - } - # Add one arg if empty to allow {#} and {%} to be computed only once - if(not @arg) { @arg = (Arg->new("")); } - # Number of arguments - used for positional arguments - my $n = $#_+1; - - # This is actually a CommandLine-object, - # but it looks nice to be able to say {= $job->slot() =} - my $job = $self; - for my $word (@word) { - # word = AB \257< perlexpr \257> CD \257< perlexpr \257> EF - my $w = $word; - ::debug("replace", "Replacing in $w\n"); - - # Replace positional arguments - $w =~ s< ([^\s\257]*) # before {= - \257< # {= - (-?\d+) # Position (eg. -2 or 3) - ([^\257]*?) # The perl expression - \257> # =} - ([^\s\257]*) # after =} - > - { $1. # Context (pre) - ( - $arg[$2 > 0 ? $2-1 : $n+$2] ? # If defined: replace - $arg[$2 > 0 ? $2-1 : $n+$2]->replace($3,$quote_arg,$self) - : "") - .$4 }egx;# Context (post) - ::debug("replace", "Positional replaced $word with: $w\n"); - - if($w !~ /\257/) { - # No more replacement strings in $w: No need to do more - if($quote) { - CORE::push(@{$replace{::shell_quote($word)}}, $w); - } else { - CORE::push(@{$replace{$word}}, $w); - } - next; - } - # for each arg: - # compute replacement for each string - # replace replacement strings with replacement in the word value - # push to replace word value - ::debug("replace", "Positional done: $w\n"); - for my $arg (@arg) { - my $val = $w; - my $number_of_replacements = 0; - for my $perlexpr (keys %{$self->{'replacecount'}}) { - # Replace {= perl expr =} with value for each arg - $number_of_replacements += - $val =~ s{\257<\Q$perlexpr\E\257>} - {$arg ? $arg->replace($perlexpr,$quote_arg,$self) : ""}eg; - } - my $ww = $word; - if($quote) { - $ww = ::shell_quote_scalar($word); - $val = ::shell_quote_scalar($val); - } - if($number_of_replacements) { - CORE::push(@{$replace{$ww}}, $val); - } - } - } - - if($quote) { - @target = ::shell_quote(@target); - } - # ::debug("replace", "%replace=",::my_dump(%replace),"\n"); - if(%replace) { - # Substitute the replace strings with the replacement values - # Must be sorted by length if a short word is a substring of a long word - my $regexp = join('|', map { my $s = $_; $s =~ s/(\W)/\\$1/g; $s } - sort { length $b <=> length $a } keys %replace); - for(@target) { - s/($regexp)/join(" ",@{$replace{$1}})/ge; - } - } - ::debug("replace", "Return @target\n"); - return wantarray ? @target : "@target"; -} - - -package CommandLineQueue; - -sub new { - my $class = shift; - my $commandref = shift; - my $read_from = shift; - my $context_replace = shift; - my $max_number_of_args = shift; - my $return_files = shift; - my @unget = (); - my ($count,%replacecount,$posrpl,$perlexpr,%len); - my @command = @$commandref; - # If the first command start with '-' it is probably an option - if($command[0] =~ /^\s*(-\S+)/) { - # Is this really a command in $PATH starting with '-'? - my $cmd = $1; - if(not ::which($cmd)) { - ::error("Command ($cmd) starts with '-'. Is this a wrong option?\n"); - ::wait_and_exit(255); - } - } - # Replace replacement strings with {= perl expr =} - # Protect matching inside {= perl expr =} - # by replacing {= and =} with \257< and \257> - for(@command) { - if(/\257/) { - ::error("Command cannot contain the character \257. Use a function for that.\n"); - ::wait_and_exit(255); - } - s/\Q$Global::parensleft\E(.*?)\Q$Global::parensright\E/\257<$1\257>/gx; - } - for my $rpl (keys %Global::rpl) { - # Replace the short hand string with the {= perl expr =} in $command and $opt::tagstring - # Avoid replacing inside existing {= perl expr =} - for(@command,@Global::ret_files) { - while(s/((^|\257>)[^\257]*?) # Don't replace after \257 unless \257> - \Q$rpl\E/$1\257<$Global::rpl{$rpl}\257>/xg) { - } - } - if(defined $opt::tagstring) { - for($opt::tagstring) { - while(s/((^|\257>)[^\257]*?) # Don't replace after \257 unless \257> - \Q$rpl\E/$1\257<$Global::rpl{$rpl}\257>/x) {} - } - } - # Do the same for the positional replacement strings - # A bit harder as we have to put in the position number - $posrpl = $rpl; - if($posrpl =~ s/^\{//) { - # Only do this if the shorthand start with { - for(@command,@Global::ret_files) { - s/\{(-?\d+)\Q$posrpl\E/\257<$1 $Global::rpl{$rpl}\257>/g; - } - if(defined $opt::tagstring) { - $opt::tagstring =~ s/\{(-?\d+)\Q$posrpl\E/\257<$1 $perlexpr\257>/g; - } - } - } - my $sum = 0; - while($sum == 0) { - # Count how many times each replacement string is used - my @cmd = @command; - my $contextlen = 0; - my $noncontextlen = 0; - my $contextgroups = 0; - for my $c (@cmd) { - while($c =~ s/ \257<([^\257]*?)\257> /\000/x) { - # %replacecount = { "perlexpr" => number of times seen } - # e.g { "$_++" => 2 } - $replacecount{$1} ++; - $sum++; - } - # Measure the length of the context around the {= perl expr =} - # Use that {=...=} has been replaced with \000 above - # So there is no need to deal with \257< - while($c =~ s/ (\S*\000\S*) //x) { - my $w = $1; - $w =~ tr/\000//d; # Remove all \000's - $contextlen += length($w); - $contextgroups++; - } - # All {= perl expr =} have been removed: The rest is non-context - $noncontextlen += length $c; - } - if($opt::tagstring) { - my $t = $opt::tagstring; - while($t =~ s/ \257<([^\257]*)\257> //x) { - # %replacecount = { "perlexpr" => number of times seen } - # e.g { "$_++" => 2 } - # But for tagstring we just need to mark it as seen - $replacecount{$1}||=1; - } - } - - $len{'context'} = 0+$contextlen; - $len{'noncontext'} = $noncontextlen; - $len{'contextgroups'} = $contextgroups; - $len{'noncontextgroups'} = @cmd-$contextgroups; - ::debug("length", "@command Context: ", $len{'context'}, - " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'}, - " NonCtxGrp: ", $len{'noncontextgroups'}, "\n"); - if($sum == 0) { - # Default command = {} - # If not replacement string: append {} - if(not @command) { - @command = ("\257<\257>"); - $Global::noquote = 1; - } elsif(($opt::pipe or $opt::pipepart) - and not $opt::fifo and not $opt::cat) { - # With --pipe / --pipe-part you can have no replacement - last; - } else { - # Append {} to the command if there are no {...}'s and no {=...=} - push @command, ("\257<\257>"); - } - } - } - - return bless { - 'unget' => \@unget, - 'command' => \@command, - 'replacecount' => \%replacecount, - 'arg_queue' => RecordQueue->new($read_from,$opt::colsep), - 'context_replace' => $context_replace, - 'len' => \%len, - 'max_number_of_args' => $max_number_of_args, - 'size' => undef, - 'return_files' => $return_files, - 'seq' => 1, - }, ref($class) || $class; -} - -sub get { - my $self = shift; - if(@{$self->{'unget'}}) { - my $cmd_line = shift @{$self->{'unget'}}; - return ($cmd_line); - } else { - my $cmd_line; - $cmd_line = CommandLine->new($self->seq(), - $self->{'command'}, - $self->{'arg_queue'}, - $self->{'context_replace'}, - $self->{'max_number_of_args'}, - $self->{'return_files'}, - $self->{'replacecount'}, - $self->{'len'}, - ); - $cmd_line->populate(); - ::debug("init","cmd_line->number_of_args ", - $cmd_line->number_of_args(), "\n"); - if($opt::pipe or $opt::pipepart) { - if($cmd_line->replaced() eq "") { - # Empty command - pipe requires a command - ::error("--pipe must have a command to pipe into (e.g. 'cat').\n"); - ::wait_and_exit(255); - } - } else { - if($cmd_line->number_of_args() == 0) { - # We did not get more args - maybe at EOF string? - return undef; - } elsif($cmd_line->replaced() eq "") { - # Empty command - get the next instead - return $self->get(); - } - } - $self->set_seq($self->seq()+1); - return $cmd_line; - } -} - -sub unget { - my $self = shift; - unshift @{$self->{'unget'}}, @_; -} - -sub empty { - my $self = shift; - my $empty = (not @{$self->{'unget'}}) && $self->{'arg_queue'}->empty(); - ::debug("run", "CommandLineQueue->empty $empty"); - return $empty; -} - -sub seq { - my $self = shift; - return $self->{'seq'}; -} - -sub set_seq { - my $self = shift; - $self->{'seq'} = shift; -} - -sub quote_args { - my $self = shift; - # If there is not command emulate |bash - return $self->{'command'}; -} - -sub size { - my $self = shift; - if(not $self->{'size'}) { - my @all_lines = (); - while(not $self->{'arg_queue'}->empty()) { - push @all_lines, CommandLine->new($self->{'command'}, - $self->{'arg_queue'}, - $self->{'context_replace'}, - $self->{'max_number_of_args'}); - } - $self->{'size'} = @all_lines; - $self->unget(@all_lines); - } - return $self->{'size'}; -} - - -package Limits::Command; - -# Maximal command line length (for -m and -X) -sub max_length { - # Find the max_length of a command line and cache it - # Returns: - # number of chars on the longest command line allowed - if(not $Limits::Command::line_max_len) { - # Disk cache of max command line length - my $len_cache = $ENV{'HOME'} . "/.parallel/tmp/linelen-" . ::hostname(); - my $cached_limit; - if(-e $len_cache) { - open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache"); - $cached_limit = <$fh>; - close $fh; - } else { - $cached_limit = real_max_length(); - # If $HOME is write protected: Do not fail - mkdir($ENV{'HOME'} . "/.parallel"); - mkdir($ENV{'HOME'} . "/.parallel/tmp"); - open(my $fh, ">", $len_cache); - print $fh $cached_limit; - close $fh; - } - $Limits::Command::line_max_len = $cached_limit; - if($opt::max_chars) { - if($opt::max_chars <= $cached_limit) { - $Limits::Command::line_max_len = $opt::max_chars; - } else { - ::warning("Value for -s option ", - "should be < $cached_limit.\n"); - } - } - } - return $Limits::Command::line_max_len; -} - -sub real_max_length { - # Find the max_length of a command line - # Returns: - # The maximal command line length - # Use an upper bound of 8 MB if the shell allows for for infinite long lengths - my $upper = 8_000_000; - my $len = 8; - do { - if($len > $upper) { return $len }; - $len *= 16; - } while (is_acceptable_command_line_length($len)); - # Then search for the actual max length between 0 and upper bound - return binary_find_max_length(int($len/16),$len); -} - -sub binary_find_max_length { - # Given a lower and upper bound find the max_length of a command line - # Returns: - # number of chars on the longest command line allowed - my ($lower, $upper) = (@_); - if($lower == $upper or $lower == $upper-1) { return $lower; } - my $middle = int (($upper-$lower)/2 + $lower); - ::debug("init", "Maxlen: $lower,$upper,$middle : "); - if (is_acceptable_command_line_length($middle)) { - return binary_find_max_length($middle,$upper); - } else { - return binary_find_max_length($lower,$middle); - } -} - -sub is_acceptable_command_line_length { - # Test if a command line of this length can run - # Returns: - # 0 if the command line length is too long - # 1 otherwise - my $len = shift; - - local *STDERR; - open (STDERR, ">", "/dev/null"); - system "true "."x"x$len; - close STDERR; - ::debug("init", "$len=$? "); - return not $?; -} - - -package RecordQueue; - -sub new { - my $class = shift; - my $fhs = shift; - my $colsep = shift; - my @unget = (); - my $arg_sub_queue; - if($colsep) { - # Open one file with colsep - $arg_sub_queue = RecordColQueue->new($fhs); - } else { - # Open one or more files if multiple -a - $arg_sub_queue = MultifileQueue->new($fhs); - } - return bless { - 'unget' => \@unget, - 'arg_number' => 0, - 'arg_sub_queue' => $arg_sub_queue, - }, ref($class) || $class; -} - -sub get { - # Returns: - # reference to array of Arg-objects - my $self = shift; - if(@{$self->{'unget'}}) { - $self->{'arg_number'}++; - return shift @{$self->{'unget'}}; - } - my $ret = $self->{'arg_sub_queue'}->get(); - if(defined $Global::max_number_of_args - and $Global::max_number_of_args == 0) { - ::debug("run", "Read 1 but return 0 args\n"); - return [Arg->new("")]; - } else { - return $ret; - } -} - -sub unget { - my $self = shift; - ::debug("run", "RecordQueue-unget '@_'\n"); - $self->{'arg_number'} -= @_; - unshift @{$self->{'unget'}}, @_; -} - -sub empty { - my $self = shift; - my $empty = not @{$self->{'unget'}}; - $empty &&= $self->{'arg_sub_queue'}->empty(); - ::debug("run", "RecordQueue->empty $empty"); - return $empty; -} - -sub arg_number { - my $self = shift; - return $self->{'arg_number'}; -} - - -package RecordColQueue; - -sub new { - my $class = shift; - my $fhs = shift; - my @unget = (); - my $arg_sub_queue = MultifileQueue->new($fhs); - return bless { - 'unget' => \@unget, - 'arg_sub_queue' => $arg_sub_queue, - }, ref($class) || $class; -} - -sub get { - # Returns: - # reference to array of Arg-objects - my $self = shift; - if(@{$self->{'unget'}}) { - return shift @{$self->{'unget'}}; - } - my $unget_ref=$self->{'unget'}; - if($self->{'arg_sub_queue'}->empty()) { - return undef; - } - my $in_record = $self->{'arg_sub_queue'}->get(); - if(defined $in_record) { - my @out_record = (); - for my $arg (@$in_record) { - ::debug("run", "RecordColQueue::arg $arg\n"); - my $line = $arg->orig(); - ::debug("run", "line='$line'\n"); - if($line ne "") { - for my $s (split /$opt::colsep/o, $line, -1) { - push @out_record, Arg->new($s); - } - } else { - push @out_record, Arg->new(""); - } - } - return \@out_record; - } else { - return undef; - } -} - -sub unget { - my $self = shift; - ::debug("run", "RecordColQueue-unget '@_'\n"); - unshift @{$self->{'unget'}}, @_; -} - -sub empty { - my $self = shift; - my $empty = (not @{$self->{'unget'}} and $self->{'arg_sub_queue'}->empty()); - ::debug("run", "RecordColQueue->empty $empty"); - return $empty; -} - - -package MultifileQueue; - -@Global::unget_argv=(); - -sub new { - my $class = shift; - my $fhs = shift; - for my $fh (@$fhs) { - if(-t $fh) { - ::warning("Input is read from the terminal. ". - "Only experts do this on purpose. ". - "Press CTRL-D to exit.\n"); - } - } - return bless { - 'unget' => \@Global::unget_argv, - 'fhs' => $fhs, - 'arg_matrix' => undef, - }, ref($class) || $class; -} - -sub get { - my $self = shift; - if($opt::xapply) { - return $self->xapply_get(); - } else { - return $self->nest_get(); - } -} - -sub unget { - my $self = shift; - ::debug("run", "MultifileQueue-unget '@_'\n"); - unshift @{$self->{'unget'}}, @_; -} - -sub empty { - my $self = shift; - my $empty = (not @Global::unget_argv - and not @{$self->{'unget'}}); - for my $fh (@{$self->{'fhs'}}) { - $empty &&= eof($fh); - } - ::debug("run", "MultifileQueue->empty $empty "); - return $empty; -} - -sub xapply_get { - my $self = shift; - if(@{$self->{'unget'}}) { - return shift @{$self->{'unget'}}; - } - my @record = (); - my $prepend = undef; - my $empty = 1; - for my $fh (@{$self->{'fhs'}}) { - my $arg = read_arg_from_fh($fh); - if(defined $arg) { - # Record $arg for recycling at end of file - push @{$self->{'arg_matrix'}{$fh}}, $arg; - push @record, $arg; - $empty = 0; - } else { - ::debug("run", "EOA "); - # End of file: Recycle arguments - push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}}; - # return last @{$args->{'args'}{$fh}}; - push @record, @{$self->{'arg_matrix'}{$fh}}[-1]; - } - } - if($empty) { - return undef; - } else { - return \@record; - } -} - -sub nest_get { - my $self = shift; - if(@{$self->{'unget'}}) { - return shift @{$self->{'unget'}}; - } - my @record = (); - my $prepend = undef; - my $empty = 1; - my $no_of_inputsources = $#{$self->{'fhs'}} + 1; - if(not $self->{'arg_matrix'}) { - # Initialize @arg_matrix with one arg from each file - # read one line from each file - my @first_arg_set; - my $all_empty = 1; - for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) { - my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); - if(defined $arg) { - $all_empty = 0; - } - $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new(""); - push @first_arg_set, $self->{'arg_matrix'}[$fhno][0]; - } - if($all_empty) { - # All filehandles were at eof or eof-string - return undef; - } - return [@first_arg_set]; - } - - # Treat the case with one input source special. For multiple - # input sources we need to remember all previously read values to - # generate all combinations. But for one input source we can - # forget the value after first use. - if($no_of_inputsources == 1) { - my $arg = read_arg_from_fh($self->{'fhs'}[0]); - if(defined($arg)) { - return [$arg]; - } - return undef; - } - for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) { - if(eof($self->{'fhs'}[$fhno])) { - next; - } else { - # read one - my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); - defined($arg) || next; # If we just read an EOF string: Treat this as EOF - my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1; - $self->{'arg_matrix'}[$fhno][$len] = $arg; - # make all new combinations - my @combarg = (); - for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) { - push @combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}]; - } - $combarg[$fhno] = [$len,$len]; # Find only combinations with this new entry - # map combinations - # [ 1, 3, 7 ], [ 2, 4, 1 ] - # => - # [ m[0][1], m[1][3], m[3][7] ], [ m[0][2], m[1][4], m[2][1] ] - my @mapped; - for my $c (expand_combinations(@combarg)) { - my @a; - for my $n (0 .. $no_of_inputsources - 1 ) { - push @a, $self->{'arg_matrix'}[$n][$$c[$n]]; - } - push @mapped, \@a; - } - # append the mapped to the ungotten arguments - push @{$self->{'unget'}}, @mapped; - # get the first - return shift @{$self->{'unget'}}; - } - } - # all are eof or at EOF string; return from the unget queue - return shift @{$self->{'unget'}}; -} - -sub read_arg_from_fh { - # Read one Arg from filehandle - # Returns: - # Arg-object with one read line - # undef if end of file - my $fh = shift; - my $prepend = undef; - my $arg; - do {{ - # This makes 10% faster - if(not ($arg = <$fh>)) { - if(defined $prepend) { - return Arg->new($prepend); - } else { - return undef; - } - } -# ::debug("run", "read $arg\n"); - # Remove delimiter - $arg =~ s:$/$::; - if($Global::end_of_file_string and - $arg eq $Global::end_of_file_string) { - # Ignore the rest of input file - close $fh; - ::debug("run", "EOF-string ($arg) met\n"); - if(defined $prepend) { - return Arg->new($prepend); - } else { - return undef; - } - } - if(defined $prepend) { - $arg = $prepend.$arg; # For line continuation - $prepend = undef; #undef; - } - if($Global::ignore_empty) { - if($arg =~ /^\s*$/) { - redo; # Try the next line - } - } - if($Global::max_lines) { - if($arg =~ /\s$/) { - # Trailing space => continued on next line - $prepend = $arg; - redo; - } - } - }} while (1 == 0); # Dummy loop {{}} for redo - if(defined $arg) { - return Arg->new($arg); - } else { - ::die_bug("multiread arg undefined"); - } -} - -sub expand_combinations { - # Input: - # ([xmin,xmax], [ymin,ymax], ...) - # Returns: ([x,y,...],[x,y,...]) - # where xmin <= x <= xmax and ymin <= y <= ymax - my $minmax_ref = shift; - my $xmin = $$minmax_ref[0]; - my $xmax = $$minmax_ref[1]; - my @p; - if(@_) { - # If there are more columns: Compute those recursively - my @rest = expand_combinations(@_); - for(my $x = $xmin; $x <= $xmax; $x++) { - push @p, map { [$x, @$_] } @rest; - } - } else { - for(my $x = $xmin; $x <= $xmax; $x++) { - push @p, [$x]; - } - } - return @p; -} - - -package Arg; - -sub new { - my $class = shift; - my $orig = shift; - my @hostgroups; - if($opt::hostgroups) { - if($orig =~ s:@(.+)::) { - # We found hostgroups on the arg - @hostgroups = split(/\+/, $1); - if(not grep { defined $Global::hostgroups{$_} } @hostgroups) { - ::warning("No such hostgroup (@hostgroups)\n"); - @hostgroups = (keys %Global::hostgroups); - } - } else { - @hostgroups = (keys %Global::hostgroups); - } - } - return bless { - 'orig' => $orig, - 'hostgroups' => \@hostgroups, - }, ref($class) || $class; -} - -sub replace { - # Calculates the corresponding value for a given perl expression - # Returns: - # The calculated string (quoted if asked for) - my $self = shift; - my $perlexpr = shift; # E.g. $_=$_ or s/.gz// - my $quote = (shift) ? 1 : 0; # should the string be quoted? - # This is actually a CommandLine-object, - # but it looks nice to be able to say {= $job->slot() =} - my $job = shift; - $perlexpr =~ s/^-?\d+ //; # Positional replace treated as normal replace - if(not defined $self->{"rpl",0,$perlexpr}) { - local $_; - if($Global::trim eq "n") { - $_ = $self->{'orig'}; - } else { - $_ = trim_of($self->{'orig'}); - } - ::debug("replace", "eval ", $perlexpr, " ", $_, "\n"); - if(not $Global::perleval{$perlexpr}) { - # Make an anonymous function of the $perlexpr - # And more importantly: Compile it only once - if($Global::perleval{$perlexpr} = - eval('sub { no strict; no warnings; my $job = shift; '. - $perlexpr.' }')) { - # All is good - } else { - # The eval failed. Maybe $perlexpr is invalid perl? - ::error("Cannot use $perlexpr: $@\n"); - ::wait_and_exit(255); - } - } - # Execute the function - $Global::perleval{$perlexpr}->($job); - $self->{"rpl",0,$perlexpr} = $_; - } - if(not defined $self->{"rpl",$quote,$perlexpr}) { - $self->{"rpl",1,$perlexpr} = - ::shell_quote_scalar($self->{"rpl",0,$perlexpr}); - } - return $self->{"rpl",$quote,$perlexpr}; -} - -sub orig { - my $self = shift; - return $self->{'orig'}; -} - -sub trim_of { - # Removes white space as specifed by --trim: - # n = nothing - # l = start - # r = end - # lr|rl = both - # Returns: - # string with white space removed as needed - my @strings = map { defined $_ ? $_ : "" } (@_); - my $arg; - if($Global::trim eq "n") { - # skip - } elsif($Global::trim eq "l") { - for my $arg (@strings) { $arg =~ s/^\s+//; } - } elsif($Global::trim eq "r") { - for my $arg (@strings) { $arg =~ s/\s+$//; } - } elsif($Global::trim eq "rl" or $Global::trim eq "lr") { - for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; } - } else { - ::error("--trim must be one of: r l rl lr.\n"); - ::wait_and_exit(255); - } - return wantarray ? @strings : "@strings"; -} - - -package TimeoutQueue; - -sub new { - my $class = shift; - my $delta_time = shift; - my ($pct); - if($delta_time =~ /(\d+(\.\d+)?)%/) { - # Timeout in percent - $pct = $1/100; - $delta_time = 1_000_000; - } - return bless { - 'queue' => [], - 'delta_time' => $delta_time, - 'pct' => $pct, - 'remedian_idx' => 0, - 'remedian_arr' => [], - 'remedian' => undef, - }, ref($class) || $class; -} - -sub delta_time { - my $self = shift; - return $self->{'delta_time'}; -} - -sub set_delta_time { - my $self = shift; - $self->{'delta_time'} = shift; -} - -sub remedian { - my $self = shift; - return $self->{'remedian'}; -} - -sub set_remedian { - # Set median of the last 999^3 (=997002999) values using Remedian - # - # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A - # robust averaging method for large data sets." Journal of the - # American Statistical Association 85.409 (1990): 97-104. - my $self = shift; - my $val = shift; - my $i = $self->{'remedian_idx'}++; - my $rref = $self->{'remedian_arr'}; - $rref->[0][$i%999] = $val; - $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2]; - $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2]; - $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2]; -} - -sub update_delta_time { - # Update delta_time based on runtime of finished job if timeout is - # a percentage - my $self = shift; - my $runtime = shift; - if($self->{'pct'}) { - $self->set_remedian($runtime); - $self->{'delta_time'} = $self->{'pct'} * $self->remedian(); - ::debug("run", "Timeout: $self->{'delta_time'}s "); - } -} - -sub process_timeouts { - # Check if there was a timeout - my $self = shift; - # $self->{'queue'} is sorted by start time - while (@{$self->{'queue'}}) { - my $job = $self->{'queue'}[0]; - if($job->endtime()) { - # Job already finished. No need to timeout the job - # This could be because of --keep-order - shift @{$self->{'queue'}}; - } elsif($job->timedout($self->{'delta_time'})) { - # Need to shift off queue before kill - # because kill calls usleep that calls process_timeouts - shift @{$self->{'queue'}}; - $job->kill(); - } else { - # Because they are sorted by start time the rest are later - last; - } - } -} - -sub insert { - my $self = shift; - my $in = shift; - push @{$self->{'queue'}}, $in; -} - - -package Semaphore; - -# This package provides a counting semaphore -# -# If a process dies without releasing the semaphore the next process -# that needs that entry will clean up dead semaphores -# -# The semaphores are stored in ~/.parallel/semaphores/id- Each -# file in ~/.parallel/semaphores/id-/ is the process ID of the -# process holding the entry. If the process dies, the entry can be -# taken by another process. - -sub new { - my $class = shift; - my $id = shift; - my $count = shift; - $id=~s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex - $id="id-".$id; # To distinguish it from a process id - my $parallel_dir = $ENV{'HOME'}."/.parallel"; - -d $parallel_dir or mkdir_or_die($parallel_dir); - my $parallel_locks = $parallel_dir."/semaphores"; - -d $parallel_locks or mkdir_or_die($parallel_locks); - my $lockdir = "$parallel_locks/$id"; - my $lockfile = $lockdir.".lock"; - if($count < 1) { ::die_bug("semaphore-count: $count"); } - return bless { - 'lockfile' => $lockfile, - 'lockfh' => Symbol::gensym(), - 'lockdir' => $lockdir, - 'id' => $id, - 'idfile' => $lockdir."/".$id, - 'pid' => $$, - 'pidfile' => $lockdir."/".$$.'@'.::hostname(), - 'count' => $count + 1 # nlinks returns a link for the 'id-' as well - }, ref($class) || $class; -} - -sub acquire { - my $self = shift; - my $sleep = 1; # 1 ms - my $start_time = time; - while(1) { - $self->atomic_link_if_count_less_than() and last; - ::debug("sem", "Remove dead locks"); - my $lockdir = $self->{'lockdir'}; - for my $d (glob "$lockdir/*") { - ::debug("sem", "Lock $d $lockdir\n"); - $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next; - my ($pid, $host) = ($1, $2); - if($host eq ::hostname()) { - if(not kill 0, $1) { - ::debug("sem", "Dead: $d"); - unlink $d; - } else { - ::debug("sem", "Alive: $d"); - } - } - } - # try again - $self->atomic_link_if_count_less_than() and last; - # Retry slower and slower up to 1 second - $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); - # Random to avoid every sleeping job waking up at the same time - ::usleep(rand()*$sleep); - if(defined($opt::timeout) and - $start_time + $opt::timeout > time) { - # Acquire the lock anyway - if(not -e $self->{'idfile'}) { - open (my $fh, ">", $self->{'idfile'}) or - ::die_bug("timeout_write_idfile: $self->{'idfile'}"); - close $fh; - } - link $self->{'idfile'}, $self->{'pidfile'}; - last; - } - } - ::debug("sem", "acquired $self->{'pid'}\n"); -} - -sub release { - my $self = shift; - unlink $self->{'pidfile'}; - if($self->nlinks() == 1) { - # This is the last link, so atomic cleanup - $self->lock(); - if($self->nlinks() == 1) { - unlink $self->{'idfile'}; - rmdir $self->{'lockdir'}; - } - $self->unlock(); - } - ::debug("run", "released $self->{'pid'}\n"); -} - -sub _release { - my $self = shift; - - unlink $self->{'pidfile'}; - $self->lock(); - my $nlinks = $self->nlinks(); - ::debug("sem", $nlinks, "<", $self->{'count'}); - if($nlinks-- > 1) { - unlink $self->{'idfile'}; - open (my $fh, ">", $self->{'idfile'}) or - ::die_bug("write_idfile: $self->{'idfile'}"); - print $fh "#"x$nlinks; - close $fh; - } else { - unlink $self->{'idfile'}; - rmdir $self->{'lockdir'}; - } - $self->unlock(); - ::debug("sem", "released $self->{'pid'}\n"); -} - -sub atomic_link_if_count_less_than { - # Link $file1 to $file2 if nlinks to $file1 < $count - my $self = shift; - my $retval = 0; - $self->lock(); - ::debug($self->nlinks(), "<", $self->{'count'}); - if($self->nlinks() < $self->{'count'}) { - -d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'}); - if(not -e $self->{'idfile'}) { - open (my $fh, ">", $self->{'idfile'}) or - ::die_bug("write_idfile: $self->{'idfile'}"); - close $fh; - } - $retval = link $self->{'idfile'}, $self->{'pidfile'}; - } - $self->unlock(); - ::debug("run", "atomic $retval"); - return $retval; -} - -sub _atomic_link_if_count_less_than { - # Link $file1 to $file2 if nlinks to $file1 < $count - my $self = shift; - my $retval = 0; - $self->lock(); - my $nlinks = $self->nlinks(); - ::debug("sem", $nlinks, "<", $self->{'count'}); - if($nlinks++ < $self->{'count'}) { - -d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'}); - if(not -e $self->{'idfile'}) { - open (my $fh, ">", $self->{'idfile'}) or - ::die_bug("write_idfile: $self->{'idfile'}"); - close $fh; - } - open (my $fh, ">", $self->{'idfile'}) or - ::die_bug("write_idfile: $self->{'idfile'}"); - print $fh "#"x$nlinks; - close $fh; - $retval = link $self->{'idfile'}, $self->{'pidfile'}; - } - $self->unlock(); - ::debug("sem", "atomic $retval"); - return $retval; -} - -sub nlinks { - my $self = shift; - if(-e $self->{'idfile'}) { - ::debug("sem", "nlinks", (stat(_))[3], "size", (stat(_))[7], "\n"); - return (stat(_))[3]; - } else { - return 0; - } -} - -sub lock { - my $self = shift; - my $sleep = 100; # 100 ms - my $total_sleep = 0; - $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; - my $locked = 0; - while(not $locked) { - if(tell($self->{'lockfh'}) == -1) { - # File not open - open($self->{'lockfh'}, ">", $self->{'lockfile'}) - or ::debug("run", "Cannot open $self->{'lockfile'}"); - } - if($self->{'lockfh'}) { - # File is open - chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw - if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) { - # The file is locked: No need to retry - $locked = 1; - last; - } else { - if ($! =~ m/Function not implemented/) { - ::warning("flock: $!"); - ::warning("Will wait for a random while\n"); - ::usleep(rand(5000)); - # File cannot be locked: No need to retry - $locked = 2; - last; - } - } - } - # Locking failed in first round - # Sleep and try again - $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); - # Random to avoid every sleeping job waking up at the same time - ::usleep(rand()*$sleep); - $total_sleep += $sleep; - if($opt::semaphoretimeout) { - if($total_sleep/1000 > $opt::semaphoretimeout) { - # Timeout: bail out - ::warning("Semaphore timed out. Ignoring timeout."); - $locked = 3; - last; - } - } else { - if($total_sleep/1000 > 30) { - ::warning("Semaphore stuck for 30 seconds. Consider using --semaphoretimeout."); - } - } - } - ::debug("run", "locked $self->{'lockfile'}"); -} - -sub unlock { - my $self = shift; - unlink $self->{'lockfile'}; - close $self->{'lockfh'}; - ::debug("run", "unlocked\n"); -} - -sub mkdir_or_die { - # If dir is not writable: die - my $dir = shift; - my @dir_parts = split(m:/:,$dir); - my ($ddir,$part); - while(defined ($part = shift @dir_parts)) { - $part eq "" and next; - $ddir .= "/".$part; - -d $ddir and next; - mkdir $ddir; - } - if(not -w $dir) { - ::error("Cannot write to $dir: $!\n"); - ::wait_and_exit(255); - } -} - -# Keep perl -w happy -$opt::x = $Semaphore::timeout = $Semaphore::wait = -$Job::file_descriptor_warning_printed = 0; diff --git a/build_tools/make_package.sh b/build_tools/make_package.sh deleted file mode 100755 index 68a5d8a72..000000000 --- a/build_tools/make_package.sh +++ /dev/null @@ -1,129 +0,0 @@ -# shellcheck disable=SC1113 -#/usr/bin/env bash -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -set -e - -function log() { - echo "[+] $1" -} - -function fatal() { - echo "[!] $1" - exit 1 -} - -function platform() { - local __resultvar=$1 - if [[ -f "/etc/yum.conf" ]]; then - eval $__resultvar="centos" - elif [[ -f "/etc/dpkg/dpkg.cfg" ]]; then - eval $__resultvar="ubuntu" - else - fatal "Unknwon operating system" - fi -} -platform OS - -function package() { - if [[ $OS = "ubuntu" ]]; then - if dpkg --get-selections | grep --quiet $1; then - log "$1 is already installed. skipping." - else - # shellcheck disable=SC2068 - apt-get install $@ -y - fi - elif [[ $OS = "centos" ]]; then - if rpm -qa | grep --quiet $1; then - log "$1 is already installed. skipping." - else - # shellcheck disable=SC2068 - yum install $@ -y - fi - fi -} - -function detect_fpm_output() { - if [[ $OS = "ubuntu" ]]; then - export FPM_OUTPUT=deb - elif [[ $OS = "centos" ]]; then - export FPM_OUTPUT=rpm - fi -} -detect_fpm_output - -function gem_install() { - if gem list | grep --quiet $1; then - log "$1 is already installed. skipping." - else - # shellcheck disable=SC2068 - gem install $@ - fi -} - -function main() { - if [[ $# -ne 1 ]]; then - fatal "Usage: $0 " - else - log "using rocksdb version: $1" - fi - - if [[ -d /vagrant ]]; then - if [[ $OS = "ubuntu" ]]; then - package g++-4.8 - export CXX=g++-4.8 - - # the deb would depend on libgflags2, but the static lib is the only thing - # installed by make install - package libgflags-dev - - package ruby-all-dev - elif [[ $OS = "centos" ]]; then - pushd /etc/yum.repos.d - if [[ ! -f /etc/yum.repos.d/devtools-1.1.repo ]]; then - wget http://people.centos.org/tru/devtools-1.1/devtools-1.1.repo - fi - package devtoolset-1.1-gcc --enablerepo=testing-1.1-devtools-6 - package devtoolset-1.1-gcc-c++ --enablerepo=testing-1.1-devtools-6 - export CC=/opt/centos/devtoolset-1.1/root/usr/bin/gcc - export CPP=/opt/centos/devtoolset-1.1/root/usr/bin/cpp - export CXX=/opt/centos/devtoolset-1.1/root/usr/bin/c++ - export PATH=$PATH:/opt/centos/devtoolset-1.1/root/usr/bin - popd - if ! rpm -qa | grep --quiet gflags; then - rpm -i https://github.com/schuhschuh/gflags/releases/download/v2.1.0/gflags-devel-2.1.0-1.amd64.rpm - fi - - package ruby - package ruby-devel - package rubygems - package rpm-build - fi - fi - gem_install fpm - - make static_lib - LIBDIR=/usr/lib - if [[ $FPM_OUTPUT = "rpm" ]]; then - LIBDIR=$(rpm --eval '%_libdir') - fi - - rm -rf package - make install DESTDIR=package PREFIX=/usr LIBDIR=$LIBDIR - - fpm \ - -s dir \ - -t $FPM_OUTPUT \ - -C package \ - -n rocksdb \ - -v $1 \ - --url http://rocksdb.org/ \ - -m rocksdb@fb.com \ - --license BSD \ - --vendor Facebook \ - --description "RocksDB is an embeddable persistent key-value store for fast storage." \ - usr -} - -# shellcheck disable=SC2068 -main $@ diff --git a/build_tools/ps_with_stack b/build_tools/ps_with_stack deleted file mode 100755 index ee4256965..000000000 --- a/build_tools/ps_with_stack +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env perl - -use strict; - -open(my $ps, "-|", "ps -wwf"); -my $cols_known = 0; -my $cmd_col = 0; -my $pid_col = 0; -while (<$ps>) { - print; - my @cols = split(/\s+/); - - if (!$cols_known && /CMD/) { - # Parse relevant ps column headers - for (my $i = 0; $i <= $#cols; $i++) { - if ($cols[$i] eq "CMD") { - $cmd_col = $i; - } - if ($cols[$i] eq "PID") { - $pid_col = $i; - } - } - $cols_known = 1; - } else { - my $pid = $cols[$pid_col]; - my $cmd = $cols[$cmd_col]; - # Match numeric PID and relative path command - # -> The intention is only to dump stack traces for hangs in code under - # test, which means we probably just built it and are executing by - # relative path (e.g. ./my_test or foo/bar_test) rather then by absolute - # path (e.g. /usr/bin/time) or PATH search (e.g. grep). - if ($pid =~ /^[0-9]+$/ && $cmd =~ /^[^\/ ]+[\/]/) { - print "Dumping stacks for $pid...\n"; - system("pstack $pid || gdb -batch -p $pid -ex 'thread apply all bt'"); - } - } -} -close $ps; diff --git a/build_tools/regression_build_test.sh b/build_tools/regression_build_test.sh deleted file mode 100755 index 5ecdb1d21..000000000 --- a/build_tools/regression_build_test.sh +++ /dev/null @@ -1,396 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -set -e - -NUM=10000000 - -if [ $# -eq 1 ];then - DATA_DIR=$1 -elif [ $# -eq 2 ];then - DATA_DIR=$1 - STAT_FILE=$2 -fi - -# On the production build servers, set data and stat -# files/directories not in /tmp or else the tempdir cleaning -# scripts will make you very unhappy. -DATA_DIR=${DATA_DIR:-$(mktemp -t -d rocksdb_XXXX)} -STAT_FILE=${STAT_FILE:-$(mktemp -t -u rocksdb_test_stats_XXXX)} - -function cleanup { - rm -rf $DATA_DIR - rm -f $STAT_FILE.* -} - -trap cleanup EXIT - -make release - -# measure fillseq + fill up the DB for overwrite benchmark -./db_bench \ - --benchmarks=fillseq \ - --db=$DATA_DIR \ - --use_existing_db=0 \ - --bloom_bits=10 \ - --num=$NUM \ - --writes=$NUM \ - --cache_size=6442450944 \ - --cache_numshardbits=6 \ - --table_cache_numshardbits=4 \ - --open_files=55000 \ - --statistics=1 \ - --histogram=1 \ - --disable_wal=1 \ - --sync=0 > ${STAT_FILE}.fillseq - -# measure overwrite performance -./db_bench \ - --benchmarks=overwrite \ - --db=$DATA_DIR \ - --use_existing_db=1 \ - --bloom_bits=10 \ - --num=$NUM \ - --writes=$((NUM / 10)) \ - --cache_size=6442450944 \ - --cache_numshardbits=6 \ - --table_cache_numshardbits=4 \ - --open_files=55000 \ - --statistics=1 \ - --histogram=1 \ - --disable_wal=1 \ - --sync=0 \ - --threads=8 > ${STAT_FILE}.overwrite - -# fill up the db for readrandom benchmark (1GB total size) -./db_bench \ - --benchmarks=fillseq \ - --db=$DATA_DIR \ - --use_existing_db=0 \ - --bloom_bits=10 \ - --num=$NUM \ - --writes=$NUM \ - --cache_size=6442450944 \ - --cache_numshardbits=6 \ - --table_cache_numshardbits=4 \ - --open_files=55000 \ - --statistics=1 \ - --histogram=1 \ - --disable_wal=1 \ - --sync=0 \ - --threads=1 > /dev/null - -# measure readrandom with 6GB block cache -./db_bench \ - --benchmarks=readrandom \ - --db=$DATA_DIR \ - --use_existing_db=1 \ - --bloom_bits=10 \ - --num=$NUM \ - --reads=$((NUM / 5)) \ - --cache_size=6442450944 \ - --cache_numshardbits=6 \ - --table_cache_numshardbits=4 \ - --open_files=55000 \ - --statistics=1 \ - --histogram=1 \ - --disable_wal=1 \ - --sync=0 \ - --threads=16 > ${STAT_FILE}.readrandom - -# measure readrandom with 6GB block cache and tailing iterator -./db_bench \ - --benchmarks=readrandom \ - --db=$DATA_DIR \ - --use_existing_db=1 \ - --bloom_bits=10 \ - --num=$NUM \ - --reads=$((NUM / 5)) \ - --cache_size=6442450944 \ - --cache_numshardbits=6 \ - --table_cache_numshardbits=4 \ - --open_files=55000 \ - --use_tailing_iterator=1 \ - --statistics=1 \ - --histogram=1 \ - --disable_wal=1 \ - --sync=0 \ - --threads=16 > ${STAT_FILE}.readrandomtailing - -# measure readrandom with 100MB block cache -./db_bench \ - --benchmarks=readrandom \ - --db=$DATA_DIR \ - --use_existing_db=1 \ - --bloom_bits=10 \ - --num=$NUM \ - --reads=$((NUM / 5)) \ - --cache_size=104857600 \ - --cache_numshardbits=6 \ - --table_cache_numshardbits=4 \ - --open_files=55000 \ - --statistics=1 \ - --histogram=1 \ - --disable_wal=1 \ - --sync=0 \ - --threads=16 > ${STAT_FILE}.readrandomsmallblockcache - -# measure readrandom with 8k data in memtable -./db_bench \ - --benchmarks=overwrite,readrandom \ - --db=$DATA_DIR \ - --use_existing_db=1 \ - --bloom_bits=10 \ - --num=$NUM \ - --reads=$((NUM / 5)) \ - --writes=512 \ - --cache_size=6442450944 \ - --cache_numshardbits=6 \ - --table_cache_numshardbits=4 \ - --write_buffer_size=1000000000 \ - --open_files=55000 \ - --statistics=1 \ - --histogram=1 \ - --disable_wal=1 \ - --sync=0 \ - --threads=16 > ${STAT_FILE}.readrandom_mem_sst - - -# fill up the db for readrandom benchmark with filluniquerandom (1GB total size) -./db_bench \ - --benchmarks=filluniquerandom \ - --db=$DATA_DIR \ - --use_existing_db=0 \ - --bloom_bits=10 \ - --num=$((NUM / 4)) \ - --writes=$((NUM / 4)) \ - --cache_size=6442450944 \ - --cache_numshardbits=6 \ - --table_cache_numshardbits=4 \ - --open_files=55000 \ - --statistics=1 \ - --histogram=1 \ - --disable_wal=1 \ - --sync=0 \ - --threads=1 > /dev/null - -# dummy test just to compact the data -./db_bench \ - --benchmarks=readrandom \ - --db=$DATA_DIR \ - --use_existing_db=1 \ - --bloom_bits=10 \ - --num=$((NUM / 1000)) \ - --reads=$((NUM / 1000)) \ - --cache_size=6442450944 \ - --cache_numshardbits=6 \ - --table_cache_numshardbits=4 \ - --open_files=55000 \ - --statistics=1 \ - --histogram=1 \ - --disable_wal=1 \ - --sync=0 \ - --threads=16 > /dev/null - -# measure readrandom after load with filluniquerandom with 6GB block cache -./db_bench \ - --benchmarks=readrandom \ - --db=$DATA_DIR \ - --use_existing_db=1 \ - --bloom_bits=10 \ - --num=$((NUM / 4)) \ - --reads=$((NUM / 4)) \ - --cache_size=6442450944 \ - --cache_numshardbits=6 \ - --table_cache_numshardbits=4 \ - --open_files=55000 \ - --disable_auto_compactions=1 \ - --statistics=1 \ - --histogram=1 \ - --disable_wal=1 \ - --sync=0 \ - --threads=16 > ${STAT_FILE}.readrandom_filluniquerandom - -# measure readwhilewriting after load with filluniquerandom with 6GB block cache -./db_bench \ - --benchmarks=readwhilewriting \ - --db=$DATA_DIR \ - --use_existing_db=1 \ - --bloom_bits=10 \ - --num=$((NUM / 4)) \ - --reads=$((NUM / 4)) \ - --benchmark_write_rate_limit=$(( 110 * 1024 )) \ - --write_buffer_size=100000000 \ - --cache_size=6442450944 \ - --cache_numshardbits=6 \ - --table_cache_numshardbits=4 \ - --open_files=55000 \ - --statistics=1 \ - --histogram=1 \ - --disable_wal=1 \ - --sync=0 \ - --threads=16 > ${STAT_FILE}.readwhilewriting - -# measure memtable performance -- none of the data gets flushed to disk -./db_bench \ - --benchmarks=fillrandom,readrandom, \ - --db=$DATA_DIR \ - --use_existing_db=0 \ - --num=$((NUM / 10)) \ - --reads=$NUM \ - --cache_size=6442450944 \ - --cache_numshardbits=6 \ - --table_cache_numshardbits=4 \ - --write_buffer_size=1000000000 \ - --open_files=55000 \ - --statistics=1 \ - --histogram=1 \ - --disable_wal=1 \ - --sync=0 \ - --value_size=10 \ - --threads=16 > ${STAT_FILE}.memtablefillreadrandom - -common_in_mem_args="--db=/dev/shm/rocksdb \ - --num_levels=6 \ - --key_size=20 \ - --prefix_size=12 \ - --keys_per_prefix=10 \ - --value_size=100 \ - --compression_type=none \ - --compression_ratio=1 \ - --write_buffer_size=134217728 \ - --max_write_buffer_number=4 \ - --level0_file_num_compaction_trigger=8 \ - --level0_slowdown_writes_trigger=16 \ - --level0_stop_writes_trigger=24 \ - --target_file_size_base=134217728 \ - --max_bytes_for_level_base=1073741824 \ - --disable_wal=0 \ - --wal_dir=/dev/shm/rocksdb \ - --sync=0 \ - --verify_checksum=1 \ - --delete_obsolete_files_period_micros=314572800 \ - --use_plain_table=1 \ - --open_files=-1 \ - --mmap_read=1 \ - --mmap_write=0 \ - --bloom_bits=10 \ - --bloom_locality=1 \ - --perf_level=0" - -# prepare a in-memory DB with 50M keys, total DB size is ~6G -./db_bench \ - $common_in_mem_args \ - --statistics=0 \ - --max_background_compactions=16 \ - --max_background_flushes=16 \ - --benchmarks=filluniquerandom \ - --use_existing_db=0 \ - --num=52428800 \ - --threads=1 > /dev/null - -# Readwhilewriting -./db_bench \ - $common_in_mem_args \ - --statistics=1 \ - --max_background_compactions=4 \ - --max_background_flushes=0 \ - --benchmarks=readwhilewriting\ - --use_existing_db=1 \ - --duration=600 \ - --threads=32 \ - --benchmark_write_rate_limit=9502720 > ${STAT_FILE}.readwhilewriting_in_ram - -# Seekrandomwhilewriting -./db_bench \ - $common_in_mem_args \ - --statistics=1 \ - --max_background_compactions=4 \ - --max_background_flushes=0 \ - --benchmarks=seekrandomwhilewriting \ - --use_existing_db=1 \ - --use_tailing_iterator=1 \ - --duration=600 \ - --threads=32 \ - --benchmark_write_rate_limit=9502720 > ${STAT_FILE}.seekwhilewriting_in_ram - -# measure fillseq with bunch of column families -./db_bench \ - --benchmarks=fillseq \ - --num_column_families=500 \ - --write_buffer_size=1048576 \ - --db=$DATA_DIR \ - --use_existing_db=0 \ - --num=$NUM \ - --writes=$NUM \ - --open_files=55000 \ - --statistics=1 \ - --histogram=1 \ - --disable_wal=1 \ - --sync=0 > ${STAT_FILE}.fillseq_lots_column_families - -# measure overwrite performance with bunch of column families -./db_bench \ - --benchmarks=overwrite \ - --num_column_families=500 \ - --write_buffer_size=1048576 \ - --db=$DATA_DIR \ - --use_existing_db=1 \ - --num=$NUM \ - --writes=$((NUM / 10)) \ - --open_files=55000 \ - --statistics=1 \ - --histogram=1 \ - --disable_wal=1 \ - --sync=0 \ - --threads=8 > ${STAT_FILE}.overwrite_lots_column_families - -# send data to ods -function send_to_ods { - key="$1" - value="$2" - - if [ -z $JENKINS_HOME ]; then - # running on devbox, just print out the values - echo $1 $2 - return - fi - - if [ -z "$value" ];then - echo >&2 "ERROR: Key $key doesn't have a value." - return - fi - curl --silent "https://www.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build&key=$key&value=$value" \ - --connect-timeout 60 -} - -function send_benchmark_to_ods { - bench="$1" - bench_key="$2" - file="$3" - - QPS=$(grep $bench $file | awk '{print $5}') - P50_MICROS=$(grep $bench $file -A 6 | grep "Percentiles" | awk '{print $3}' ) - P75_MICROS=$(grep $bench $file -A 6 | grep "Percentiles" | awk '{print $5}' ) - P99_MICROS=$(grep $bench $file -A 6 | grep "Percentiles" | awk '{print $7}' ) - - send_to_ods rocksdb.build.$bench_key.qps $QPS - send_to_ods rocksdb.build.$bench_key.p50_micros $P50_MICROS - send_to_ods rocksdb.build.$bench_key.p75_micros $P75_MICROS - send_to_ods rocksdb.build.$bench_key.p99_micros $P99_MICROS -} - -send_benchmark_to_ods overwrite overwrite $STAT_FILE.overwrite -send_benchmark_to_ods fillseq fillseq $STAT_FILE.fillseq -send_benchmark_to_ods readrandom readrandom $STAT_FILE.readrandom -send_benchmark_to_ods readrandom readrandom_tailing $STAT_FILE.readrandomtailing -send_benchmark_to_ods readrandom readrandom_smallblockcache $STAT_FILE.readrandomsmallblockcache -send_benchmark_to_ods readrandom readrandom_memtable_sst $STAT_FILE.readrandom_mem_sst -send_benchmark_to_ods readrandom readrandom_fillunique_random $STAT_FILE.readrandom_filluniquerandom -send_benchmark_to_ods fillrandom memtablefillrandom $STAT_FILE.memtablefillreadrandom -send_benchmark_to_ods readrandom memtablereadrandom $STAT_FILE.memtablefillreadrandom -send_benchmark_to_ods readwhilewriting readwhilewriting $STAT_FILE.readwhilewriting -send_benchmark_to_ods readwhilewriting readwhilewriting_in_ram ${STAT_FILE}.readwhilewriting_in_ram -send_benchmark_to_ods seekrandomwhilewriting seekwhilewriting_in_ram ${STAT_FILE}.seekwhilewriting_in_ram -send_benchmark_to_ods fillseq fillseq_lots_column_families ${STAT_FILE}.fillseq_lots_column_families -send_benchmark_to_ods overwrite overwrite_lots_column_families ${STAT_FILE}.overwrite_lots_column_families diff --git a/build_tools/run_ci_db_test.ps1 b/build_tools/run_ci_db_test.ps1 deleted file mode 100644 index f20d3213f..000000000 --- a/build_tools/run_ci_db_test.ps1 +++ /dev/null @@ -1,493 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# This script enables you running RocksDB tests by running -# All the tests concurrently and utilizing all the cores -Param( - [switch]$EnableJE = $false, # Look for and use test executable, append _je to listed exclusions - [switch]$RunAll = $false, # Will attempt discover all *_test[_je].exe binaries and run all - # of them as Google suites. I.e. It will run test cases concurrently - # except those mentioned as $Run, those will run as individual test cases - # And any execlued with $ExcludeExes or $ExcludeCases - # It will also not run any individual test cases - # excluded but $ExcludeCasese - [switch]$RunAllExe = $false, # Look for and use test exdcutables, append _je to exclusions automatically - # It will attempt to run them in parallel w/o breaking them up on individual - # test cases. Those listed with $ExcludeExes will be excluded - [string]$SuiteRun = "", # Split test suites in test cases and run in parallel, not compatible with $RunAll - [string]$Run = "", # Run specified executables in parallel but do not split to test cases - [string]$ExcludeCases = "", # Exclude test cases, expects a comma separated list, no spaces - # Takes effect when $RunAll or $SuiteRun is specified. Must have full - # Test cases name including a group and a parameter if any - [string]$ExcludeExes = "", # Exclude exes from consideration, expects a comma separated list, - # no spaces. Takes effect only when $RunAll is specified - [string]$WorkFolder = "", # Direct tests to use that folder. SSD or Ram drive are better options. - # Number of async tasks that would run concurrently. Recommend a number below 64. - # However, CPU utlization really depends on the storage media. Recommend ram based disk. - # a value of 1 will run everything serially - [int]$Concurrency = 8, - [int]$Limit = -1 # -1 means do not limit for test purposes -) - -# Folders and commands must be fullpath to run assuming -# the current folder is at the root of the git enlistment -$StartDate = (Get-Date) -$StartDate - - -$DebugPreference = "Continue" - -# These tests are not google test suites and we should guard -# Against running them as suites -$RunOnly = New-Object System.Collections.Generic.HashSet[string] -$RunOnly.Add("c_test") | Out-Null -$RunOnly.Add("compact_on_deletion_collector_test") | Out-Null -$RunOnly.Add("merge_test") | Out-Null -$RunOnly.Add("stringappend_test") | Out-Null # Apparently incorrectly written -$RunOnly.Add("backup_engine_test") | Out-Null # Disabled -$RunOnly.Add("timer_queue_test") | Out-Null # Not a gtest - -if($RunAll -and $SuiteRun -ne "") { - Write-Error "$RunAll and $SuiteRun are not compatible" - exit 1 -} - -if($RunAllExe -and $Run -ne "") { - Write-Error "$RunAllExe and $Run are not compatible" - exit 1 -} - -# If running under Appveyor assume that root -[string]$Appveyor = $Env:APPVEYOR_BUILD_FOLDER -if($Appveyor -ne "") { - $RootFolder = $Appveyor -} else { - $RootFolder = $PSScriptRoot -replace '\\build_tools', '' -} - -$LogFolder = -Join($RootFolder, "\db_logs\") -$BinariesFolder = -Join($RootFolder, "\build\Debug\") - -if($WorkFolder -eq "") { - - # If TEST_TMPDIR is set use it - [string]$var = $Env:TEST_TMPDIR - if($var -eq "") { - $WorkFolder = -Join($RootFolder, "\db_tests\") - $Env:TEST_TMPDIR = $WorkFolder - } else { - $WorkFolder = $var - } -} else { -# Override from a command line - $Env:TEST_TMPDIR = $WorkFolder -} - -Write-Output "Root: $RootFolder, WorkFolder: $WorkFolder" -Write-Output "BinariesFolder: $BinariesFolder, LogFolder: $LogFolder" - -# Create test directories in the current folder -md -Path $WorkFolder -ErrorAction Ignore | Out-Null -md -Path $LogFolder -ErrorAction Ignore | Out-Null - - -$ExcludeCasesSet = New-Object System.Collections.Generic.HashSet[string] -if($ExcludeCases -ne "") { - Write-Host "ExcludeCases: $ExcludeCases" - $l = $ExcludeCases -split ' ' - ForEach($t in $l) { - $ExcludeCasesSet.Add($t) | Out-Null - } -} - -$ExcludeExesSet = New-Object System.Collections.Generic.HashSet[string] -if($ExcludeExes -ne "") { - Write-Host "ExcludeExe: $ExcludeExes" - $l = $ExcludeExes -split ' ' - ForEach($t in $l) { - $ExcludeExesSet.Add($t) | Out-Null - } -} - - -# Extract the names of its tests by running db_test with --gtest_list_tests. -# This filter removes the "#"-introduced comments, and expands to -# fully-qualified names by changing input like this: -# -# DBTest. -# Empty -# WriteEmptyBatch -# MultiThreaded/MultiThreadedDBTest. -# MultiThreaded/0 # GetParam() = 0 -# MultiThreaded/1 # GetParam() = 1 -# RibbonTypeParamTest/0. # TypeParam = struct DefaultTypesAndSettings -# CompactnessAndBacktrackAndFpRate -# Extremes -# FindOccupancyForSuccessRate -# -# into this: -# -# DBTest.Empty -# DBTest.WriteEmptyBatch -# MultiThreaded/MultiThreadedDBTest.MultiThreaded/0 -# MultiThreaded/MultiThreadedDBTest.MultiThreaded/1 -# RibbonTypeParamTest/0.CompactnessAndBacktrackAndFpRate -# RibbonTypeParamTest/0.Extremes -# RibbonTypeParamTest/0.FindOccupancyForSuccessRate -# -# Output into the parameter in a form TestName -> Log File Name -function ExtractTestCases([string]$GTestExe, $HashTable) { - - $Tests = @() -# Run db_test to get a list of tests and store it into $a array - &$GTestExe --gtest_list_tests | tee -Variable Tests | Out-Null - - # Current group - $Group="" - - ForEach( $l in $Tests) { - - # remove trailing comment if any - $l = $l -replace '\s+\#.*','' - # Leading whitespace is fine - $l = $l -replace '^\s+','' - # Trailing dot is a test group but no whitespace - if ($l -match "\.$" -and $l -notmatch "\s+") { - $Group = $l - } else { - # Otherwise it is a test name, remove leading space - $test = $l - # create a log name - $test = "$Group$test" - - if($ExcludeCasesSet.Contains($test)) { - Write-Warning "$test case is excluded" - continue - } - - $test_log = $test -replace '[\./]','_' - $test_log += ".log" - $log_path = -join ($LogFolder, $test_log) - - # Add to a hashtable - $HashTable.Add($test, $log_path); - } - } -} - -# The function removes trailing .exe siffix if any, -# creates a name for the log file -# Then adds the test name if it was not excluded into -# a HashTable in a form of test_name -> log_path -function MakeAndAdd([string]$token, $HashTable) { - - $test_name = $token -replace '.exe$', '' - $log_name = -join ($test_name, ".log") - $log_path = -join ($LogFolder, $log_name) - $HashTable.Add($test_name, $log_path) -} - -# This function takes a list of Suites to run -# Lists all the test cases in each of the suite -# and populates HashOfHashes -# Ordered by suite(exe) @{ Exe = @{ TestCase = LogName }} -function ProcessSuites($ListOfSuites, $HashOfHashes) { - - $suite_list = $ListOfSuites - # Problem: if you run --gtest_list_tests on - # a non Google Test executable then it will start executing - # and we will get nowhere - ForEach($suite in $suite_list) { - - if($RunOnly.Contains($suite)) { - Write-Warning "$suite is excluded from running as Google test suite" - continue - } - - if($EnableJE) { - $suite += "_je" - } - - $Cases = [ordered]@{} - $Cases.Clear() - $suite_exe = -Join ($BinariesFolder, $suite) - ExtractTestCases -GTestExe $suite_exe -HashTable $Cases - if($Cases.Count -gt 0) { - $HashOfHashes.Add($suite, $Cases); - } - } - - # Make logs and run - if($CasesToRun.Count -lt 1) { - Write-Error "Failed to extract tests from $SuiteRun" - exit 1 - } - -} - -# This will contain all test executables to run - -# Hash table that contains all non suite -# Test executable to run -$TestExes = [ordered]@{} - -# Check for test exe that are not -# Google Test Suites -# Since this is explicitely mentioned it is not subject -# for exclusions -if($Run -ne "") { - - $test_list = $Run -split ' ' - ForEach($t in $test_list) { - - if($EnableJE) { - $t += "_je" - } - MakeAndAdd -token $t -HashTable $TestExes - } - - if($TestExes.Count -lt 1) { - Write-Error "Failed to extract tests from $Run" - exit 1 - } -} elseif($RunAllExe) { - # Discover all the test binaries - if($EnableJE) { - $pattern = "*_test_je.exe" - } else { - $pattern = "*_test.exe" - } - - $search_path = -join ($BinariesFolder, $pattern) - Write-Host "Binaries Search Path: $search_path" - - $DiscoveredExe = @() - dir -Path $search_path | ForEach-Object { - $DiscoveredExe += ($_.Name) - } - - # Remove exclusions - ForEach($e in $DiscoveredExe) { - $e = $e -replace '.exe$', '' - $bare_name = $e -replace '_je$', '' - - if($ExcludeExesSet.Contains($bare_name)) { - Write-Warning "Test $e is excluded" - continue - } - MakeAndAdd -token $e -HashTable $TestExes - } - - if($TestExes.Count -lt 1) { - Write-Error "Failed to discover test executables" - exit 1 - } -} - -# Ordered by exe @{ Exe = @{ TestCase = LogName }} -$CasesToRun = [ordered]@{} - -if($SuiteRun -ne "") { - $suite_list = $SuiteRun -split ' ' - ProcessSuites -ListOfSuites $suite_list -HashOfHashes $CasesToRun -} elseif ($RunAll) { -# Discover all the test binaries - if($EnableJE) { - $pattern = "*_test_je.exe" - } else { - $pattern = "*_test.exe" - } - - $search_path = -join ($BinariesFolder, $pattern) - Write-Host "Binaries Search Path: $search_path" - - $ListOfExe = @() - dir -Path $search_path | ForEach-Object { - $ListOfExe += ($_.Name) - } - - # Exclude those in RunOnly from running as suites - $ListOfSuites = @() - ForEach($e in $ListOfExe) { - - $e = $e -replace '.exe$', '' - $bare_name = $e -replace '_je$', '' - - if($ExcludeExesSet.Contains($bare_name)) { - Write-Warning "Test $e is excluded" - continue - } - - if($RunOnly.Contains($bare_name)) { - MakeAndAdd -token $e -HashTable $TestExes - } else { - $ListOfSuites += $bare_name - } - } - - ProcessSuites -ListOfSuites $ListOfSuites -HashOfHashes $CasesToRun -} - - -# Invoke a test with a filter and redirect all output -$InvokeTestCase = { - param($exe, $test, $log); - &$exe --gtest_filter=$test > $log 2>&1 -} - -# Invoke all tests and redirect output -$InvokeTestAsync = { - param($exe, $log) - &$exe > $log 2>&1 -} - -# Hash that contains tests to rerun if any failed -# Those tests will be rerun sequentially -# $Rerun = [ordered]@{} -# Test limiting factor here -[int]$count = 0 -# Overall status -[bool]$script:success = $true; - -function RunJobs($Suites, $TestCmds, [int]$ConcurrencyVal) -{ - # Array to wait for any of the running jobs - $jobs = @() - # Hash JobToLog - $JobToLog = @{} - - # Wait for all to finish and get the results - while(($JobToLog.Count -gt 0) -or - ($TestCmds.Count -gt 0) -or - ($Suites.Count -gt 0)) { - - # Make sure we have maximum concurrent jobs running if anything - # and the $Limit either not set or allows to proceed - while(($JobToLog.Count -lt $ConcurrencyVal) -and - ((($TestCmds.Count -gt 0) -or ($Suites.Count -gt 0)) -and - (($Limit -lt 0) -or ($count -lt $Limit)))) { - - # We always favore suites to run if available - [string]$exe_name = "" - [string]$log_path = "" - $Cases = @{} - - if($Suites.Count -gt 0) { - # Will the first one - ForEach($e in $Suites.Keys) { - $exe_name = $e - $Cases = $Suites[$e] - break - } - [string]$test_case = "" - [string]$log_path = "" - ForEach($c in $Cases.Keys) { - $test_case = $c - $log_path = $Cases[$c] - break - } - - Write-Host "Starting $exe_name::$test_case" - [string]$Exe = -Join ($BinariesFolder, $exe_name) - $job = Start-Job -Name "$exe_name::$test_case" -ArgumentList @($Exe,$test_case,$log_path) -ScriptBlock $InvokeTestCase - $JobToLog.Add($job, $log_path) - - $Cases.Remove($test_case) - if($Cases.Count -lt 1) { - $Suites.Remove($exe_name) - } - - } elseif ($TestCmds.Count -gt 0) { - - ForEach($e in $TestCmds.Keys) { - $exe_name = $e - $log_path = $TestCmds[$e] - break - } - - Write-Host "Starting $exe_name" - [string]$Exe = -Join ($BinariesFolder, $exe_name) - $job = Start-Job -Name $exe_name -ScriptBlock $InvokeTestAsync -ArgumentList @($Exe,$log_path) - $JobToLog.Add($job, $log_path) - - $TestCmds.Remove($exe_name) - - } else { - Write-Error "In the job loop but nothing to run" - exit 1 - } - - ++$count - } # End of Job starting loop - - if($JobToLog.Count -lt 1) { - break - } - - $jobs = @() - foreach($k in $JobToLog.Keys) { $jobs += $k } - - $completed = Wait-Job -Job $jobs -Any - $log = $JobToLog[$completed] - $JobToLog.Remove($completed) - - $message = -join @($completed.Name, " State: ", ($completed.State)) - - $log_content = @(Get-Content $log) - - if($completed.State -ne "Completed") { - $script:success = $false - Write-Warning $message - $log_content | Write-Warning - } else { - # Scan the log. If we find PASSED and no occurrence of FAILED - # then it is a success - [bool]$pass_found = $false - ForEach($l in $log_content) { - - if(($l -match "^\[\s+FAILED") -or - ($l -match "Assertion failed:")) { - $pass_found = $false - break - } - - if(($l -match "^\[\s+PASSED") -or - ($l -match " : PASSED$") -or - ($l -match "^PASS$") -or # Special c_test case - ($l -match "Passed all tests!") ) { - $pass_found = $true - } - } - - if(!$pass_found) { - $script:success = $false; - Write-Warning $message - $log_content | Write-Warning - } else { - Write-Host $message - } - } - - # Remove cached job info from the system - # Should be no output - Receive-Job -Job $completed | Out-Null - } -} - -RunJobs -Suites $CasesToRun -TestCmds $TestExes -ConcurrencyVal $Concurrency - -$EndDate = (Get-Date) - -New-TimeSpan -Start $StartDate -End $EndDate | - ForEach-Object { - "Elapsed time: {0:g}" -f $_ - } - - -if(!$script:success) { -# This does not succeed killing off jobs quick -# So we simply exit -# Remove-Job -Job $jobs -Force -# indicate failure using this exit code - exit 1 - } - - exit 0 diff --git a/build_tools/setup_centos7.sh b/build_tools/setup_centos7.sh deleted file mode 100755 index 474d91a3d..000000000 --- a/build_tools/setup_centos7.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -set -ex - -ROCKSDB_VERSION="6.7.3" -ZSTD_VERSION="1.4.4" - -echo "This script configures CentOS with everything needed to build and run RocksDB" - -yum update -y && yum install epel-release -y - -yum install -y \ - wget \ - gcc-c++ \ - snappy snappy-devel \ - zlib zlib-devel \ - bzip2 bzip2-devel \ - lz4-devel \ - libasan \ - gflags - -mkdir -pv /usr/local/rocksdb-${ROCKSDB_VERSION} -ln -sfT /usr/local/rocksdb-${ROCKSDB_VERSION} /usr/local/rocksdb - -wget -qO /tmp/zstd-${ZSTD_VERSION}.tar.gz https://github.com/facebook/zstd/archive/v${ZSTD_VERSION}.tar.gz -wget -qO /tmp/rocksdb-${ROCKSDB_VERSION}.tar.gz https://github.com/facebook/rocksdb/archive/v${ROCKSDB_VERSION}.tar.gz - -cd /tmp - -tar xzvf zstd-${ZSTD_VERSION}.tar.gz -tar xzvf rocksdb-${ROCKSDB_VERSION}.tar.gz -C /usr/local/ - -echo "Installing ZSTD..." -pushd zstd-${ZSTD_VERSION} -make && make install -popd - -echo "Compiling RocksDB..." -cd /usr/local/rocksdb -chown -R vagrant:vagrant /usr/local/rocksdb/ -sudo -u vagrant make static_lib -cd examples/ -sudo -u vagrant LD_LIBRARY_PATH=/usr/local/lib/ make all -sudo -u vagrant LD_LIBRARY_PATH=/usr/local/lib/ ./c_simple_example - diff --git a/build_tools/ubuntu20_image/Dockerfile b/build_tools/ubuntu20_image/Dockerfile deleted file mode 100644 index d81a5e4b2..000000000 --- a/build_tools/ubuntu20_image/Dockerfile +++ /dev/null @@ -1,57 +0,0 @@ -# from official ubuntu 20.04 -FROM ubuntu:20.04 -# update system -RUN apt-get update && apt-get upgrade -y -# install basic tools -RUN apt-get install -y vim wget curl -# install tzdata noninteractive -RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata -# install git and default compilers -RUN apt-get install -y git gcc g++ clang clang-tools -# install basic package -RUN apt-get install -y lsb-release software-properties-common gnupg -# install gflags, tbb -RUN apt-get install -y libgflags-dev libtbb-dev -# install compression libs -RUN apt-get install -y libsnappy-dev zlib1g-dev libbz2-dev liblz4-dev libzstd-dev -# install cmake -RUN apt-get install -y cmake -RUN apt-get install -y libssl-dev -# install clang-13 -WORKDIR /root -RUN wget https://apt.llvm.org/llvm.sh -RUN chmod +x llvm.sh -RUN ./llvm.sh 13 all -# install gcc-7, 8, 10, 11, default is 9 -RUN apt-get install -y gcc-7 g++-7 -RUN apt-get install -y gcc-8 g++-8 -RUN apt-get install -y gcc-10 g++-10 -RUN add-apt-repository -y ppa:ubuntu-toolchain-r/test -RUN apt-get install -y gcc-11 g++-11 -# install apt-get install -y valgrind -RUN apt-get install -y valgrind -# install folly depencencies -RUN apt-get install -y libgoogle-glog-dev -# install openjdk 8 -RUN apt-get install -y openjdk-8-jdk -ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk-amd64 -# install mingw -RUN apt-get install -y mingw-w64 - -# install gtest-parallel package -RUN git clone --single-branch --branch master --depth 1 https://github.com/google/gtest-parallel.git ~/gtest-parallel -ENV PATH $PATH:/root/gtest-parallel - -# install libprotobuf for fuzzers test -RUN apt-get install -y ninja-build binutils liblzma-dev libz-dev pkg-config autoconf libtool -RUN git clone --branch v1.0 https://github.com/google/libprotobuf-mutator.git ~/libprotobuf-mutator && cd ~/libprotobuf-mutator && git checkout ffd86a32874e5c08a143019aad1aaf0907294c9f && mkdir build && cd build && cmake .. -GNinja -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang++-13 -DCMAKE_BUILD_TYPE=Release -DLIB_PROTO_MUTATOR_DOWNLOAD_PROTOBUF=ON && ninja && ninja install -ENV PKG_CONFIG_PATH /usr/local/OFF/:/root/libprotobuf-mutator/build/external.protobuf/lib/pkgconfig/ -ENV PROTOC_BIN /root/libprotobuf-mutator/build/external.protobuf/bin/protoc - -# install the latest google benchmark -RUN git clone --depth 1 --branch v1.7.0 https://github.com/google/benchmark.git ~/benchmark -RUN cd ~/benchmark && mkdir build && cd build && cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_GTEST_TESTS=0 && ninja && ninja install - -# clean up -RUN rm -rf /var/lib/apt/lists/* -RUN rm -rf /root/benchmark diff --git a/build_tools/update_dependencies.sh b/build_tools/update_dependencies.sh deleted file mode 100755 index c549e5b6e..000000000 --- a/build_tools/update_dependencies.sh +++ /dev/null @@ -1,106 +0,0 @@ -#!/bin/sh -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# -# Update dependencies.sh file with the latest avaliable versions - -BASEDIR=$(dirname $0) -OUTPUT="" - -function log_header() -{ - echo "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved." >> "$OUTPUT" - echo "# The file is generated using update_dependencies.sh." >> "$OUTPUT" -} - - -function log_variable() -{ - echo "$1=${!1}" >> "$OUTPUT" -} - - -TP2_LATEST="/data/users/$USER/fbsource/fbcode/third-party2/" -## $1 => lib name -## $2 => lib version (if not provided, will try to pick latest) -## $3 => platform (if not provided, will try to pick latest gcc) -## -## get_lib_base will set a variable named ${LIB_NAME}_BASE to the lib location -function get_lib_base() -{ - local lib_name=$1 - local lib_version=$2 - local lib_platform=$3 - - local result="$TP2_LATEST/$lib_name/" - - # Lib Version - if [ -z "$lib_version" ] || [ "$lib_version" = "LATEST" ]; then - # version is not provided, use latest - result=`ls -dr1v $result/*/ | head -n1` - else - result="$result/$lib_version/" - fi - - # Lib Platform - if [ -z "$lib_platform" ]; then - # platform is not provided, use latest gcc - result=`ls -dr1v $result/gcc-*[^fb]/ | head -n1` - else - echo $lib_platform - result="$result/$lib_platform/" - fi - - result=`ls -1d $result/*/ | head -n1` - - echo Finding link $result - - # lib_name => LIB_NAME_BASE - local __res_var=${lib_name^^}"_BASE" - __res_var=`echo $__res_var | tr - _` - # LIB_NAME_BASE=$result - eval $__res_var=`readlink -f $result` - - log_variable $__res_var -} - -########################################################### -# platform010 dependencies # -########################################################### - -OUTPUT="$BASEDIR/dependencies_platform010.sh" - -rm -f "$OUTPUT" -touch "$OUTPUT" - -echo "Writing dependencies to $OUTPUT" - -# Compilers locations -GCC_BASE=`readlink -f $TP2_LATEST/gcc/11.x/centos7-native/*/` -CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/12/platform010/*/` - -log_header -log_variable GCC_BASE -log_variable CLANG_BASE - -# Libraries locations -get_lib_base libgcc 11.x platform010 -get_lib_base glibc 2.34 platform010 -get_lib_base snappy LATEST platform010 -get_lib_base zlib LATEST platform010 -get_lib_base bzip2 LATEST platform010 -get_lib_base lz4 LATEST platform010 -get_lib_base zstd LATEST platform010 -get_lib_base gflags LATEST platform010 -get_lib_base jemalloc LATEST platform010 -get_lib_base numa LATEST platform010 -get_lib_base libunwind LATEST platform010 -get_lib_base tbb 2018_U5 platform010 -get_lib_base liburing LATEST platform010 -get_lib_base benchmark LATEST platform010 - -get_lib_base kernel-headers fb platform010 -get_lib_base binutils LATEST centos7-native -get_lib_base valgrind LATEST platform010 -get_lib_base lua 5.3.4 platform010 - -git diff $OUTPUT diff --git a/build_tools/version.sh b/build_tools/version.sh deleted file mode 100755 index dbc1a9296..000000000 --- a/build_tools/version.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -if [ "$#" = "0" ]; then - echo "Usage: $0 major|minor|patch|full" - exit 1 -fi - -if [ "$1" = "major" ]; then - cat include/rocksdb/version.h | grep MAJOR | head -n1 | awk '{print $3}' -fi -if [ "$1" = "minor" ]; then - cat include/rocksdb/version.h | grep MINOR | head -n1 | awk '{print $3}' -fi -if [ "$1" = "patch" ]; then - cat include/rocksdb/version.h | grep PATCH | head -n1 | awk '{print $3}' -fi -if [ "$1" = "full" ]; then - awk '/#define ROCKSDB/ { env[$2] = $3 } - END { printf "%s.%s.%s\n", env["ROCKSDB_MAJOR"], - env["ROCKSDB_MINOR"], - env["ROCKSDB_PATCH"] }' \ - include/rocksdb/version.h -fi diff --git a/cache/cache_reservation_manager_test.cc b/cache/cache_reservation_manager_test.cc deleted file mode 100644 index 2a0c318e0..000000000 --- a/cache/cache_reservation_manager_test.cc +++ /dev/null @@ -1,469 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -#include "cache/cache_reservation_manager.h" - -#include -#include -#include - -#include "cache/cache_entry_roles.h" -#include "rocksdb/cache.h" -#include "rocksdb/slice.h" -#include "test_util/testharness.h" -#include "util/coding.h" - -namespace ROCKSDB_NAMESPACE { -class CacheReservationManagerTest : public ::testing::Test { - protected: - static constexpr std::size_t kSizeDummyEntry = - CacheReservationManagerImpl::GetDummyEntrySize(); - static constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry; - static constexpr int kNumShardBits = 0; // 2^0 shard - static constexpr std::size_t kMetaDataChargeOverhead = 10000; - - std::shared_ptr cache = NewLRUCache(kCacheCapacity, kNumShardBits); - std::shared_ptr test_cache_rev_mng; - - CacheReservationManagerTest() { - test_cache_rev_mng = - std::make_shared>( - cache); - } -}; - -TEST_F(CacheReservationManagerTest, GenerateCacheKey) { - std::size_t new_mem_used = 1 * kSizeDummyEntry; - Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - ASSERT_EQ(s, Status::OK()); - ASSERT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry); - ASSERT_LT(cache->GetPinnedUsage(), - 1 * kSizeDummyEntry + kMetaDataChargeOverhead); - - // Next unique Cache key - CacheKey ckey = CacheKey::CreateUniqueForCacheLifetime(cache.get()); - // Get to the underlying values - uint64_t* ckey_data = reinterpret_cast(&ckey); - // Back it up to the one used by CRM (using CacheKey implementation details) - ckey_data[1]--; - - // Specific key (subject to implementation details) - EXPECT_EQ(ckey_data[0], 0); - EXPECT_EQ(ckey_data[1], 2); - - Cache::Handle* handle = cache->Lookup(ckey.AsSlice()); - EXPECT_NE(handle, nullptr) - << "Failed to generate the cache key for the dummy entry correctly"; - // Clean up the returned handle from Lookup() to prevent memory leak - cache->Release(handle); -} - -TEST_F(CacheReservationManagerTest, KeepCacheReservationTheSame) { - std::size_t new_mem_used = 1 * kSizeDummyEntry; - Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - ASSERT_EQ(s, Status::OK()); - ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), - 1 * kSizeDummyEntry); - ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used); - std::size_t initial_pinned_usage = cache->GetPinnedUsage(); - ASSERT_GE(initial_pinned_usage, 1 * kSizeDummyEntry); - ASSERT_LT(initial_pinned_usage, - 1 * kSizeDummyEntry + kMetaDataChargeOverhead); - - s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - EXPECT_EQ(s, Status::OK()) - << "Failed to keep cache reservation the same when new_mem_used equals " - "to current cache reservation"; - EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), - 1 * kSizeDummyEntry) - << "Failed to bookkeep correctly when new_mem_used equals to current " - "cache reservation"; - EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) - << "Failed to bookkeep the used memory correctly when new_mem_used " - "equals to current cache reservation"; - EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage) - << "Failed to keep underlying dummy entries the same when new_mem_used " - "equals to current cache reservation"; -} - -TEST_F(CacheReservationManagerTest, - IncreaseCacheReservationByMultiplesOfDummyEntrySize) { - std::size_t new_mem_used = 2 * kSizeDummyEntry; - Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - EXPECT_EQ(s, Status::OK()) - << "Failed to increase cache reservation correctly"; - EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), - 2 * kSizeDummyEntry) - << "Failed to bookkeep cache reservation increase correctly"; - EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) - << "Failed to bookkeep the used memory correctly"; - EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry) - << "Failed to increase underlying dummy entries in cache correctly"; - EXPECT_LT(cache->GetPinnedUsage(), - 2 * kSizeDummyEntry + kMetaDataChargeOverhead) - << "Failed to increase underlying dummy entries in cache correctly"; -} - -TEST_F(CacheReservationManagerTest, - IncreaseCacheReservationNotByMultiplesOfDummyEntrySize) { - std::size_t new_mem_used = 2 * kSizeDummyEntry + kSizeDummyEntry / 2; - Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - EXPECT_EQ(s, Status::OK()) - << "Failed to increase cache reservation correctly"; - EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), - 3 * kSizeDummyEntry) - << "Failed to bookkeep cache reservation increase correctly"; - EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) - << "Failed to bookkeep the used memory correctly"; - EXPECT_GE(cache->GetPinnedUsage(), 3 * kSizeDummyEntry) - << "Failed to increase underlying dummy entries in cache correctly"; - EXPECT_LT(cache->GetPinnedUsage(), - 3 * kSizeDummyEntry + kMetaDataChargeOverhead) - << "Failed to increase underlying dummy entries in cache correctly"; -} - -TEST(CacheReservationManagerIncreaseReservcationOnFullCacheTest, - IncreaseCacheReservationOnFullCache) { - ; - constexpr std::size_t kSizeDummyEntry = - CacheReservationManagerImpl::GetDummyEntrySize(); - constexpr std::size_t kSmallCacheCapacity = 4 * kSizeDummyEntry; - constexpr std::size_t kBigCacheCapacity = 4096 * kSizeDummyEntry; - constexpr std::size_t kMetaDataChargeOverhead = 10000; - - LRUCacheOptions lo; - lo.capacity = kSmallCacheCapacity; - lo.num_shard_bits = 0; // 2^0 shard - lo.strict_capacity_limit = true; - std::shared_ptr cache = NewLRUCache(lo); - std::shared_ptr test_cache_rev_mng = - std::make_shared>( - cache); - - std::size_t new_mem_used = kSmallCacheCapacity + 1; - Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - EXPECT_EQ(s, Status::MemoryLimit()) - << "Failed to return status to indicate failure of dummy entry insertion " - "during cache reservation on full cache"; - EXPECT_GE(test_cache_rev_mng->GetTotalReservedCacheSize(), - 1 * kSizeDummyEntry) - << "Failed to bookkeep correctly before cache resevation failure happens " - "due to full cache"; - EXPECT_LE(test_cache_rev_mng->GetTotalReservedCacheSize(), - kSmallCacheCapacity) - << "Failed to bookkeep correctly (i.e, bookkeep only successful dummy " - "entry insertions) when encountering cache resevation failure due to " - "full cache"; - EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) - << "Failed to bookkeep the used memory correctly"; - EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry) - << "Failed to insert underlying dummy entries correctly when " - "encountering cache resevation failure due to full cache"; - EXPECT_LE(cache->GetPinnedUsage(), kSmallCacheCapacity) - << "Failed to insert underlying dummy entries correctly when " - "encountering cache resevation failure due to full cache"; - - new_mem_used = kSmallCacheCapacity / 2; // 2 dummy entries - s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - EXPECT_EQ(s, Status::OK()) - << "Failed to decrease cache reservation after encountering cache " - "reservation failure due to full cache"; - EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), - 2 * kSizeDummyEntry) - << "Failed to bookkeep cache reservation decrease correctly after " - "encountering cache reservation due to full cache"; - EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) - << "Failed to bookkeep the used memory correctly"; - EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry) - << "Failed to release underlying dummy entries correctly on cache " - "reservation decrease after encountering cache resevation failure due " - "to full cache"; - EXPECT_LT(cache->GetPinnedUsage(), - 2 * kSizeDummyEntry + kMetaDataChargeOverhead) - << "Failed to release underlying dummy entries correctly on cache " - "reservation decrease after encountering cache resevation failure due " - "to full cache"; - - // Create cache full again for subsequent tests - new_mem_used = kSmallCacheCapacity + 1; - s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - EXPECT_EQ(s, Status::MemoryLimit()) - << "Failed to return status to indicate failure of dummy entry insertion " - "during cache reservation on full cache"; - EXPECT_GE(test_cache_rev_mng->GetTotalReservedCacheSize(), - 1 * kSizeDummyEntry) - << "Failed to bookkeep correctly before cache resevation failure happens " - "due to full cache"; - EXPECT_LE(test_cache_rev_mng->GetTotalReservedCacheSize(), - kSmallCacheCapacity) - << "Failed to bookkeep correctly (i.e, bookkeep only successful dummy " - "entry insertions) when encountering cache resevation failure due to " - "full cache"; - EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) - << "Failed to bookkeep the used memory correctly"; - EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry) - << "Failed to insert underlying dummy entries correctly when " - "encountering cache resevation failure due to full cache"; - EXPECT_LE(cache->GetPinnedUsage(), kSmallCacheCapacity) - << "Failed to insert underlying dummy entries correctly when " - "encountering cache resevation failure due to full cache"; - - // Increase cache capacity so the previously failed insertion can fully - // succeed - cache->SetCapacity(kBigCacheCapacity); - new_mem_used = kSmallCacheCapacity + 1; - s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - EXPECT_EQ(s, Status::OK()) - << "Failed to increase cache reservation after increasing cache capacity " - "and mitigating cache full error"; - EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), - 5 * kSizeDummyEntry) - << "Failed to bookkeep cache reservation increase correctly after " - "increasing cache capacity and mitigating cache full error"; - EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) - << "Failed to bookkeep the used memory correctly"; - EXPECT_GE(cache->GetPinnedUsage(), 5 * kSizeDummyEntry) - << "Failed to insert underlying dummy entries correctly after increasing " - "cache capacity and mitigating cache full error"; - EXPECT_LT(cache->GetPinnedUsage(), - 5 * kSizeDummyEntry + kMetaDataChargeOverhead) - << "Failed to insert underlying dummy entries correctly after increasing " - "cache capacity and mitigating cache full error"; -} - -TEST_F(CacheReservationManagerTest, - DecreaseCacheReservationByMultiplesOfDummyEntrySize) { - std::size_t new_mem_used = 2 * kSizeDummyEntry; - Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - ASSERT_EQ(s, Status::OK()); - ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), - 2 * kSizeDummyEntry); - ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used); - ASSERT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry); - ASSERT_LT(cache->GetPinnedUsage(), - 2 * kSizeDummyEntry + kMetaDataChargeOverhead); - - new_mem_used = 1 * kSizeDummyEntry; - s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - EXPECT_EQ(s, Status::OK()) - << "Failed to decrease cache reservation correctly"; - EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), - 1 * kSizeDummyEntry) - << "Failed to bookkeep cache reservation decrease correctly"; - EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) - << "Failed to bookkeep the used memory correctly"; - EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry) - << "Failed to decrease underlying dummy entries in cache correctly"; - EXPECT_LT(cache->GetPinnedUsage(), - 1 * kSizeDummyEntry + kMetaDataChargeOverhead) - << "Failed to decrease underlying dummy entries in cache correctly"; -} - -TEST_F(CacheReservationManagerTest, - DecreaseCacheReservationNotByMultiplesOfDummyEntrySize) { - std::size_t new_mem_used = 2 * kSizeDummyEntry; - Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - ASSERT_EQ(s, Status::OK()); - ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), - 2 * kSizeDummyEntry); - ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used); - ASSERT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry); - ASSERT_LT(cache->GetPinnedUsage(), - 2 * kSizeDummyEntry + kMetaDataChargeOverhead); - - new_mem_used = kSizeDummyEntry / 2; - s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - EXPECT_EQ(s, Status::OK()) - << "Failed to decrease cache reservation correctly"; - EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), - 1 * kSizeDummyEntry) - << "Failed to bookkeep cache reservation decrease correctly"; - EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) - << "Failed to bookkeep the used memory correctly"; - EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry) - << "Failed to decrease underlying dummy entries in cache correctly"; - EXPECT_LT(cache->GetPinnedUsage(), - 1 * kSizeDummyEntry + kMetaDataChargeOverhead) - << "Failed to decrease underlying dummy entries in cache correctly"; -} - -TEST(CacheReservationManagerWithDelayedDecreaseTest, - DecreaseCacheReservationWithDelayedDecrease) { - constexpr std::size_t kSizeDummyEntry = - CacheReservationManagerImpl::GetDummyEntrySize(); - constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry; - constexpr std::size_t kMetaDataChargeOverhead = 10000; - - LRUCacheOptions lo; - lo.capacity = kCacheCapacity; - lo.num_shard_bits = 0; - std::shared_ptr cache = NewLRUCache(lo); - std::shared_ptr test_cache_rev_mng = - std::make_shared>( - cache, true /* delayed_decrease */); - - std::size_t new_mem_used = 8 * kSizeDummyEntry; - Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - ASSERT_EQ(s, Status::OK()); - ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), - 8 * kSizeDummyEntry); - ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used); - std::size_t initial_pinned_usage = cache->GetPinnedUsage(); - ASSERT_GE(initial_pinned_usage, 8 * kSizeDummyEntry); - ASSERT_LT(initial_pinned_usage, - 8 * kSizeDummyEntry + kMetaDataChargeOverhead); - - new_mem_used = 6 * kSizeDummyEntry; - s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - EXPECT_EQ(s, Status::OK()) << "Failed to delay decreasing cache reservation"; - EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), - 8 * kSizeDummyEntry) - << "Failed to bookkeep correctly when delaying cache reservation " - "decrease"; - EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) - << "Failed to bookkeep the used memory correctly"; - EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage) - << "Failed to delay decreasing underlying dummy entries in cache"; - - new_mem_used = 7 * kSizeDummyEntry; - s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - EXPECT_EQ(s, Status::OK()) << "Failed to delay decreasing cache reservation"; - EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), - 8 * kSizeDummyEntry) - << "Failed to bookkeep correctly when delaying cache reservation " - "decrease"; - EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) - << "Failed to bookkeep the used memory correctly"; - EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage) - << "Failed to delay decreasing underlying dummy entries in cache"; - - new_mem_used = 6 * kSizeDummyEntry - 1; - s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - EXPECT_EQ(s, Status::OK()) - << "Failed to decrease cache reservation correctly when new_mem_used < " - "GetTotalReservedCacheSize() * 3 / 4 on delayed decrease mode"; - EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), - 6 * kSizeDummyEntry) - << "Failed to bookkeep correctly when new_mem_used < " - "GetTotalReservedCacheSize() * 3 / 4 on delayed decrease mode"; - EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) - << "Failed to bookkeep the used memory correctly"; - EXPECT_GE(cache->GetPinnedUsage(), 6 * kSizeDummyEntry) - << "Failed to decrease underlying dummy entries in cache when " - "new_mem_used < GetTotalReservedCacheSize() * 3 / 4 on delayed " - "decrease mode"; - EXPECT_LT(cache->GetPinnedUsage(), - 6 * kSizeDummyEntry + kMetaDataChargeOverhead) - << "Failed to decrease underlying dummy entries in cache when " - "new_mem_used < GetTotalReservedCacheSize() * 3 / 4 on delayed " - "decrease mode"; -} - -TEST(CacheReservationManagerDestructorTest, - ReleaseRemainingDummyEntriesOnDestruction) { - constexpr std::size_t kSizeDummyEntry = - CacheReservationManagerImpl::GetDummyEntrySize(); - constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry; - constexpr std::size_t kMetaDataChargeOverhead = 10000; - - LRUCacheOptions lo; - lo.capacity = kCacheCapacity; - lo.num_shard_bits = 0; - std::shared_ptr cache = NewLRUCache(lo); - { - std::shared_ptr test_cache_rev_mng = - std::make_shared>( - cache); - std::size_t new_mem_used = 1 * kSizeDummyEntry; - Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used); - ASSERT_EQ(s, Status::OK()); - ASSERT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry); - ASSERT_LT(cache->GetPinnedUsage(), - 1 * kSizeDummyEntry + kMetaDataChargeOverhead); - } - EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry) - << "Failed to release remaining underlying dummy entries in cache in " - "CacheReservationManager's destructor"; -} - -TEST(CacheReservationHandleTest, HandleTest) { - constexpr std::size_t kOneGigabyte = 1024 * 1024 * 1024; - constexpr std::size_t kSizeDummyEntry = 256 * 1024; - constexpr std::size_t kMetaDataChargeOverhead = 10000; - - LRUCacheOptions lo; - lo.capacity = kOneGigabyte; - lo.num_shard_bits = 0; - std::shared_ptr cache = NewLRUCache(lo); - - std::shared_ptr test_cache_rev_mng( - std::make_shared>( - cache)); - - std::size_t mem_used = 0; - const std::size_t incremental_mem_used_handle_1 = 1 * kSizeDummyEntry; - const std::size_t incremental_mem_used_handle_2 = 2 * kSizeDummyEntry; - std::unique_ptr handle_1, - handle_2; - - // To test consecutive CacheReservationManager::MakeCacheReservation works - // correctly in terms of returning the handle as well as updating cache - // reservation and the latest total memory used - Status s = test_cache_rev_mng->MakeCacheReservation( - incremental_mem_used_handle_1, &handle_1); - mem_used = mem_used + incremental_mem_used_handle_1; - ASSERT_EQ(s, Status::OK()); - EXPECT_TRUE(handle_1 != nullptr); - EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used); - EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used); - EXPECT_GE(cache->GetPinnedUsage(), mem_used); - EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead); - - s = test_cache_rev_mng->MakeCacheReservation(incremental_mem_used_handle_2, - &handle_2); - mem_used = mem_used + incremental_mem_used_handle_2; - ASSERT_EQ(s, Status::OK()); - EXPECT_TRUE(handle_2 != nullptr); - EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used); - EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used); - EXPECT_GE(cache->GetPinnedUsage(), mem_used); - EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead); - - // To test - // CacheReservationManager::CacheReservationHandle::~CacheReservationHandle() - // works correctly in releasing the cache reserved for the handle - handle_1.reset(); - EXPECT_TRUE(handle_1 == nullptr); - mem_used = mem_used - incremental_mem_used_handle_1; - EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used); - EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used); - EXPECT_GE(cache->GetPinnedUsage(), mem_used); - EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead); - - // To test the actual CacheReservationManager object won't be deallocated - // as long as there remain handles pointing to it. - // We strongly recommend deallocating CacheReservationManager object only - // after all its handles are deallocated to keep things easy to reasonate - test_cache_rev_mng.reset(); - EXPECT_GE(cache->GetPinnedUsage(), mem_used); - EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead); - - handle_2.reset(); - // The CacheReservationManager object is now deallocated since all the handles - // and its original pointer is gone - mem_used = mem_used - incremental_mem_used_handle_2; - EXPECT_EQ(mem_used, 0); - EXPECT_EQ(cache->GetPinnedUsage(), mem_used); -} -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/cache/cache_test.cc b/cache/cache_test.cc deleted file mode 100644 index febed5b42..000000000 --- a/cache/cache_test.cc +++ /dev/null @@ -1,969 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "rocksdb/cache.h" - -#include -#include -#include -#include -#include - -#include "cache/lru_cache.h" -#include "cache/typed_cache.h" -#include "port/stack_trace.h" -#include "test_util/secondary_cache_test_util.h" -#include "test_util/testharness.h" -#include "util/coding.h" -#include "util/string_util.h" - -// HyperClockCache only supports 16-byte keys, so some of the tests -// originally written for LRUCache do not work on the other caches. -// Those tests were adapted to use 16-byte keys. We kept the original ones. -// TODO: Remove the original tests if they ever become unused. - -namespace ROCKSDB_NAMESPACE { - -namespace { - -// Conversions between numeric keys/values and the types expected by Cache. -std::string EncodeKey16Bytes(int k) { - std::string result; - PutFixed32(&result, k); - result.append(std::string(12, 'a')); // Because we need a 16B output, we - // add a 12-byte padding. - return result; -} - -int DecodeKey16Bytes(const Slice& k) { - assert(k.size() == 16); - return DecodeFixed32(k.data()); // Decodes only the first 4 bytes of k. -} - -std::string EncodeKey32Bits(int k) { - std::string result; - PutFixed32(&result, k); - return result; -} - -int DecodeKey32Bits(const Slice& k) { - assert(k.size() == 4); - return DecodeFixed32(k.data()); -} - -Cache::ObjectPtr EncodeValue(uintptr_t v) { - return reinterpret_cast(v); -} - -int DecodeValue(void* v) { - return static_cast(reinterpret_cast(v)); -} - -const Cache::CacheItemHelper kDumbHelper{ - CacheEntryRole::kMisc, - [](Cache::ObjectPtr /*value*/, MemoryAllocator* /*alloc*/) {}}; - -const Cache::CacheItemHelper kEraseOnDeleteHelper1{ - CacheEntryRole::kMisc, - [](Cache::ObjectPtr value, MemoryAllocator* /*alloc*/) { - Cache* cache = static_cast(value); - cache->Erase("foo"); - }}; - -const Cache::CacheItemHelper kEraseOnDeleteHelper2{ - CacheEntryRole::kMisc, - [](Cache::ObjectPtr value, MemoryAllocator* /*alloc*/) { - Cache* cache = static_cast(value); - cache->Erase(EncodeKey16Bytes(1234)); - }}; -} // anonymous namespace - -class CacheTest : public testing::Test, - public secondary_cache_test_util::WithCacheTypeParam { - public: - static CacheTest* current_; - static std::string type_; - - static void Deleter(Cache::ObjectPtr v, MemoryAllocator*) { - current_->deleted_values_.push_back(DecodeValue(v)); - } - static const Cache::CacheItemHelper kHelper; - - static const int kCacheSize = 1000; - static const int kNumShardBits = 4; - - static const int kCacheSize2 = 100; - static const int kNumShardBits2 = 2; - - std::vector deleted_values_; - std::shared_ptr cache_; - std::shared_ptr cache2_; - - CacheTest() - : cache_(NewCache(kCacheSize, kNumShardBits, false)), - cache2_(NewCache(kCacheSize2, kNumShardBits2, false)) { - current_ = this; - type_ = GetParam(); - } - - ~CacheTest() override {} - - // These functions encode/decode keys in tests cases that use - // int keys. - // Currently, HyperClockCache requires keys to be 16B long, whereas - // LRUCache doesn't, so the encoding depends on the cache type. - std::string EncodeKey(int k) { - auto type = GetParam(); - if (type == kHyperClock) { - return EncodeKey16Bytes(k); - } else { - return EncodeKey32Bits(k); - } - } - - int DecodeKey(const Slice& k) { - auto type = GetParam(); - if (type == kHyperClock) { - return DecodeKey16Bytes(k); - } else { - return DecodeKey32Bits(k); - } - } - - int Lookup(std::shared_ptr cache, int key) { - Cache::Handle* handle = cache->Lookup(EncodeKey(key)); - const int r = (handle == nullptr) ? -1 : DecodeValue(cache->Value(handle)); - if (handle != nullptr) { - cache->Release(handle); - } - return r; - } - - void Insert(std::shared_ptr cache, int key, int value, - int charge = 1) { - EXPECT_OK(cache->Insert(EncodeKey(key), EncodeValue(value), &kHelper, - charge, /*handle*/ nullptr, Cache::Priority::HIGH)); - } - - void Erase(std::shared_ptr cache, int key) { - cache->Erase(EncodeKey(key)); - } - - int Lookup(int key) { return Lookup(cache_, key); } - - void Insert(int key, int value, int charge = 1) { - Insert(cache_, key, value, charge); - } - - void Erase(int key) { Erase(cache_, key); } - - int Lookup2(int key) { return Lookup(cache2_, key); } - - void Insert2(int key, int value, int charge = 1) { - Insert(cache2_, key, value, charge); - } - - void Erase2(int key) { Erase(cache2_, key); } -}; - -const Cache::CacheItemHelper CacheTest::kHelper{CacheEntryRole::kMisc, - &CacheTest::Deleter}; - -CacheTest* CacheTest::current_; -std::string CacheTest::type_; - -class LRUCacheTest : public CacheTest {}; - -TEST_P(CacheTest, UsageTest) { - auto type = GetParam(); - - // cache is std::shared_ptr and will be automatically cleaned up. - const size_t kCapacity = 100000; - auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata); - auto precise_cache = NewCache(kCapacity, 0, false, kFullChargeCacheMetadata); - ASSERT_EQ(0, cache->GetUsage()); - size_t baseline_meta_usage = precise_cache->GetUsage(); - if (type != kHyperClock) { - ASSERT_EQ(0, baseline_meta_usage); - } - - size_t usage = 0; - char value[10] = "abcdef"; - // make sure everything will be cached - for (int i = 1; i < 100; ++i) { - std::string key; - if (type == kLRU) { - key = std::string(i, 'a'); - } else { - key = EncodeKey(i); - } - auto kv_size = key.size() + 5; - ASSERT_OK(cache->Insert(key, value, &kDumbHelper, kv_size)); - ASSERT_OK(precise_cache->Insert(key, value, &kDumbHelper, kv_size)); - usage += kv_size; - ASSERT_EQ(usage, cache->GetUsage()); - if (type == kHyperClock) { - ASSERT_EQ(baseline_meta_usage + usage, precise_cache->GetUsage()); - } else { - ASSERT_LT(usage, precise_cache->GetUsage()); - } - } - - cache->EraseUnRefEntries(); - precise_cache->EraseUnRefEntries(); - ASSERT_EQ(0, cache->GetUsage()); - ASSERT_EQ(baseline_meta_usage, precise_cache->GetUsage()); - - // make sure the cache will be overloaded - for (size_t i = 1; i < kCapacity; ++i) { - std::string key; - if (type == kLRU) { - key = std::to_string(i); - } else { - key = EncodeKey(static_cast(1000 + i)); - } - ASSERT_OK(cache->Insert(key, value, &kDumbHelper, key.size() + 5)); - ASSERT_OK(precise_cache->Insert(key, value, &kDumbHelper, key.size() + 5)); - } - - // the usage should be close to the capacity - ASSERT_GT(kCapacity, cache->GetUsage()); - ASSERT_GT(kCapacity, precise_cache->GetUsage()); - ASSERT_LT(kCapacity * 0.95, cache->GetUsage()); - if (type != kHyperClock) { - ASSERT_LT(kCapacity * 0.95, precise_cache->GetUsage()); - } else { - // estimated value size of 1 is weird for clock cache, because - // almost all of the capacity will be used for metadata, and due to only - // using power of 2 table sizes, we might hit strict occupancy limit - // before hitting capacity limit. - ASSERT_LT(kCapacity * 0.80, precise_cache->GetUsage()); - } -} - -// TODO: This test takes longer than expected on ClockCache. This is -// because the values size estimate at construction is too sloppy. -// Fix this. -// Why is it so slow? The cache is constructed with an estimate of 1, but -// then the charge is claimed to be 21. This will cause the hash table -// to be extremely sparse, which in turn means clock needs to scan too -// many slots to find victims. -TEST_P(CacheTest, PinnedUsageTest) { - auto type = GetParam(); - - // cache is std::shared_ptr and will be automatically cleaned up. - const size_t kCapacity = 200000; - auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata); - auto precise_cache = NewCache(kCapacity, 8, false, kFullChargeCacheMetadata); - size_t baseline_meta_usage = precise_cache->GetUsage(); - if (type != kHyperClock) { - ASSERT_EQ(0, baseline_meta_usage); - } - - size_t pinned_usage = 0; - char value[10] = "abcdef"; - - std::forward_list unreleased_handles; - std::forward_list unreleased_handles_in_precise_cache; - - // Add entries. Unpin some of them after insertion. Then, pin some of them - // again. Check GetPinnedUsage(). - for (int i = 1; i < 100; ++i) { - std::string key; - if (type == kLRU) { - key = std::string(i, 'a'); - } else { - key = EncodeKey(i); - } - auto kv_size = key.size() + 5; - Cache::Handle* handle; - Cache::Handle* handle_in_precise_cache; - ASSERT_OK(cache->Insert(key, value, &kDumbHelper, kv_size, &handle)); - assert(handle); - ASSERT_OK(precise_cache->Insert(key, value, &kDumbHelper, kv_size, - &handle_in_precise_cache)); - assert(handle_in_precise_cache); - pinned_usage += kv_size; - ASSERT_EQ(pinned_usage, cache->GetPinnedUsage()); - ASSERT_LT(pinned_usage, precise_cache->GetPinnedUsage()); - if (i % 2 == 0) { - cache->Release(handle); - precise_cache->Release(handle_in_precise_cache); - pinned_usage -= kv_size; - ASSERT_EQ(pinned_usage, cache->GetPinnedUsage()); - ASSERT_LT(pinned_usage, precise_cache->GetPinnedUsage()); - } else { - unreleased_handles.push_front(handle); - unreleased_handles_in_precise_cache.push_front(handle_in_precise_cache); - } - if (i % 3 == 0) { - unreleased_handles.push_front(cache->Lookup(key)); - auto x = precise_cache->Lookup(key); - assert(x); - unreleased_handles_in_precise_cache.push_front(x); - // If i % 2 == 0, then the entry was unpinned before Lookup, so pinned - // usage increased - if (i % 2 == 0) { - pinned_usage += kv_size; - } - ASSERT_EQ(pinned_usage, cache->GetPinnedUsage()); - ASSERT_LT(pinned_usage, precise_cache->GetPinnedUsage()); - } - } - auto precise_cache_pinned_usage = precise_cache->GetPinnedUsage(); - ASSERT_LT(pinned_usage, precise_cache_pinned_usage); - - // check that overloading the cache does not change the pinned usage - for (size_t i = 1; i < 2 * kCapacity; ++i) { - std::string key; - if (type == kLRU) { - key = std::to_string(i); - } else { - key = EncodeKey(static_cast(1000 + i)); - } - ASSERT_OK(cache->Insert(key, value, &kDumbHelper, key.size() + 5)); - ASSERT_OK(precise_cache->Insert(key, value, &kDumbHelper, key.size() + 5)); - } - ASSERT_EQ(pinned_usage, cache->GetPinnedUsage()); - ASSERT_EQ(precise_cache_pinned_usage, precise_cache->GetPinnedUsage()); - - cache->EraseUnRefEntries(); - precise_cache->EraseUnRefEntries(); - ASSERT_EQ(pinned_usage, cache->GetPinnedUsage()); - ASSERT_EQ(precise_cache_pinned_usage, precise_cache->GetPinnedUsage()); - - // release handles for pinned entries to prevent memory leaks - for (auto handle : unreleased_handles) { - cache->Release(handle); - } - for (auto handle : unreleased_handles_in_precise_cache) { - precise_cache->Release(handle); - } - ASSERT_EQ(0, cache->GetPinnedUsage()); - ASSERT_EQ(0, precise_cache->GetPinnedUsage()); - cache->EraseUnRefEntries(); - precise_cache->EraseUnRefEntries(); - ASSERT_EQ(0, cache->GetUsage()); - ASSERT_EQ(baseline_meta_usage, precise_cache->GetUsage()); -} - -TEST_P(CacheTest, HitAndMiss) { - ASSERT_EQ(-1, Lookup(100)); - - Insert(100, 101); - ASSERT_EQ(101, Lookup(100)); - ASSERT_EQ(-1, Lookup(200)); - ASSERT_EQ(-1, Lookup(300)); - - Insert(200, 201); - ASSERT_EQ(101, Lookup(100)); - ASSERT_EQ(201, Lookup(200)); - ASSERT_EQ(-1, Lookup(300)); - - Insert(100, 102); - if (GetParam() == kHyperClock) { - // ClockCache usually doesn't overwrite on Insert - ASSERT_EQ(101, Lookup(100)); - } else { - ASSERT_EQ(102, Lookup(100)); - } - ASSERT_EQ(201, Lookup(200)); - ASSERT_EQ(-1, Lookup(300)); - - ASSERT_EQ(1U, deleted_values_.size()); - if (GetParam() == kHyperClock) { - ASSERT_EQ(102, deleted_values_[0]); - } else { - ASSERT_EQ(101, deleted_values_[0]); - } -} - -TEST_P(CacheTest, InsertSameKey) { - if (GetParam() == kHyperClock) { - ROCKSDB_GTEST_BYPASS( - "ClockCache doesn't guarantee Insert overwrite same key."); - return; - } - Insert(1, 1); - Insert(1, 2); - ASSERT_EQ(2, Lookup(1)); -} - -TEST_P(CacheTest, Erase) { - Erase(200); - ASSERT_EQ(0U, deleted_values_.size()); - - Insert(100, 101); - Insert(200, 201); - Erase(100); - ASSERT_EQ(-1, Lookup(100)); - ASSERT_EQ(201, Lookup(200)); - ASSERT_EQ(1U, deleted_values_.size()); - ASSERT_EQ(101, deleted_values_[0]); - - Erase(100); - ASSERT_EQ(-1, Lookup(100)); - ASSERT_EQ(201, Lookup(200)); - ASSERT_EQ(1U, deleted_values_.size()); -} - -TEST_P(CacheTest, EntriesArePinned) { - if (GetParam() == kHyperClock) { - ROCKSDB_GTEST_BYPASS( - "ClockCache doesn't guarantee Insert overwrite same key."); - return; - } - Insert(100, 101); - Cache::Handle* h1 = cache_->Lookup(EncodeKey(100)); - ASSERT_EQ(101, DecodeValue(cache_->Value(h1))); - ASSERT_EQ(1U, cache_->GetUsage()); - - Insert(100, 102); - Cache::Handle* h2 = cache_->Lookup(EncodeKey(100)); - ASSERT_EQ(102, DecodeValue(cache_->Value(h2))); - ASSERT_EQ(0U, deleted_values_.size()); - ASSERT_EQ(2U, cache_->GetUsage()); - - cache_->Release(h1); - ASSERT_EQ(1U, deleted_values_.size()); - ASSERT_EQ(101, deleted_values_[0]); - ASSERT_EQ(1U, cache_->GetUsage()); - - Erase(100); - ASSERT_EQ(-1, Lookup(100)); - ASSERT_EQ(1U, deleted_values_.size()); - ASSERT_EQ(1U, cache_->GetUsage()); - - cache_->Release(h2); - ASSERT_EQ(2U, deleted_values_.size()); - ASSERT_EQ(102, deleted_values_[1]); - ASSERT_EQ(0U, cache_->GetUsage()); -} - -TEST_P(CacheTest, EvictionPolicy) { - Insert(100, 101); - Insert(200, 201); - // Frequently used entry must be kept around - for (int i = 0; i < 2 * kCacheSize; i++) { - Insert(1000 + i, 2000 + i); - ASSERT_EQ(101, Lookup(100)); - } - ASSERT_EQ(101, Lookup(100)); - ASSERT_EQ(-1, Lookup(200)); -} - -TEST_P(CacheTest, ExternalRefPinsEntries) { - Insert(100, 101); - Cache::Handle* h = cache_->Lookup(EncodeKey(100)); - ASSERT_TRUE(cache_->Ref(h)); - ASSERT_EQ(101, DecodeValue(cache_->Value(h))); - ASSERT_EQ(1U, cache_->GetUsage()); - - for (int i = 0; i < 3; ++i) { - if (i > 0) { - // First release (i == 1) corresponds to Ref(), second release (i == 2) - // corresponds to Lookup(). Then, since all external refs are released, - // the below insertions should push out the cache entry. - cache_->Release(h); - } - // double cache size because the usage bit in block cache prevents 100 from - // being evicted in the first kCacheSize iterations - for (int j = 0; j < 2 * kCacheSize + 100; j++) { - Insert(1000 + j, 2000 + j); - } - // Clock cache is even more stateful and needs more churn to evict - if (GetParam() == kHyperClock) { - for (int j = 0; j < kCacheSize; j++) { - Insert(11000 + j, 11000 + j); - } - } - if (i < 2) { - ASSERT_EQ(101, Lookup(100)); - } - } - ASSERT_EQ(-1, Lookup(100)); -} - -TEST_P(CacheTest, EvictionPolicyRef) { - Insert(100, 101); - Insert(101, 102); - Insert(102, 103); - Insert(103, 104); - Insert(200, 101); - Insert(201, 102); - Insert(202, 103); - Insert(203, 104); - Cache::Handle* h201 = cache_->Lookup(EncodeKey(200)); - Cache::Handle* h202 = cache_->Lookup(EncodeKey(201)); - Cache::Handle* h203 = cache_->Lookup(EncodeKey(202)); - Cache::Handle* h204 = cache_->Lookup(EncodeKey(203)); - Insert(300, 101); - Insert(301, 102); - Insert(302, 103); - Insert(303, 104); - - // Insert entries much more than cache capacity. - for (int i = 0; i < 100 * kCacheSize; i++) { - Insert(1000 + i, 2000 + i); - } - - // Check whether the entries inserted in the beginning - // are evicted. Ones without extra ref are evicted and - // those with are not. - ASSERT_EQ(-1, Lookup(100)); - ASSERT_EQ(-1, Lookup(101)); - ASSERT_EQ(-1, Lookup(102)); - ASSERT_EQ(-1, Lookup(103)); - - ASSERT_EQ(-1, Lookup(300)); - ASSERT_EQ(-1, Lookup(301)); - ASSERT_EQ(-1, Lookup(302)); - ASSERT_EQ(-1, Lookup(303)); - - ASSERT_EQ(101, Lookup(200)); - ASSERT_EQ(102, Lookup(201)); - ASSERT_EQ(103, Lookup(202)); - ASSERT_EQ(104, Lookup(203)); - - // Cleaning up all the handles - cache_->Release(h201); - cache_->Release(h202); - cache_->Release(h203); - cache_->Release(h204); -} - -TEST_P(CacheTest, EvictEmptyCache) { - auto type = GetParam(); - - // Insert item large than capacity to trigger eviction on empty cache. - auto cache = NewCache(1, 0, false); - if (type == kLRU) { - ASSERT_OK(cache->Insert("foo", nullptr, &kDumbHelper, 10)); - } else { - ASSERT_OK(cache->Insert(EncodeKey(1000), nullptr, &kDumbHelper, 10)); - } -} - -TEST_P(CacheTest, EraseFromDeleter) { - auto type = GetParam(); - - // Have deleter which will erase item from cache, which will re-enter - // the cache at that point. - std::shared_ptr cache = NewCache(10, 0, false); - std::string foo, bar; - const Cache::CacheItemHelper* erase_helper; - if (type == kLRU) { - foo = "foo"; - bar = "bar"; - erase_helper = &kEraseOnDeleteHelper1; - } else { - foo = EncodeKey(1234); - bar = EncodeKey(5678); - erase_helper = &kEraseOnDeleteHelper2; - } - - ASSERT_OK(cache->Insert(foo, nullptr, &kDumbHelper, 1)); - ASSERT_OK(cache->Insert(bar, cache.get(), erase_helper, 1)); - - cache->Erase(bar); - ASSERT_EQ(nullptr, cache->Lookup(foo)); - ASSERT_EQ(nullptr, cache->Lookup(bar)); -} - -TEST_P(CacheTest, ErasedHandleState) { - // insert a key and get two handles - Insert(100, 1000); - Cache::Handle* h1 = cache_->Lookup(EncodeKey(100)); - Cache::Handle* h2 = cache_->Lookup(EncodeKey(100)); - ASSERT_EQ(h1, h2); - ASSERT_EQ(DecodeValue(cache_->Value(h1)), 1000); - ASSERT_EQ(DecodeValue(cache_->Value(h2)), 1000); - - // delete the key from the cache - Erase(100); - // can no longer find in the cache - ASSERT_EQ(-1, Lookup(100)); - - // release one handle - cache_->Release(h1); - // still can't find in cache - ASSERT_EQ(-1, Lookup(100)); - - cache_->Release(h2); -} - -TEST_P(CacheTest, HeavyEntries) { - // Add a bunch of light and heavy entries and then count the combined - // size of items still in the cache, which must be approximately the - // same as the total capacity. - const int kLight = 1; - const int kHeavy = 10; - int added = 0; - int index = 0; - while (added < 2 * kCacheSize) { - const int weight = (index & 1) ? kLight : kHeavy; - Insert(index, 1000 + index, weight); - added += weight; - index++; - } - - int cached_weight = 0; - for (int i = 0; i < index; i++) { - const int weight = (i & 1 ? kLight : kHeavy); - int r = Lookup(i); - if (r >= 0) { - cached_weight += weight; - ASSERT_EQ(1000 + i, r); - } - } - ASSERT_LE(cached_weight, kCacheSize + kCacheSize / 10); -} - -TEST_P(CacheTest, NewId) { - uint64_t a = cache_->NewId(); - uint64_t b = cache_->NewId(); - ASSERT_NE(a, b); -} - -TEST_P(CacheTest, ReleaseAndErase) { - std::shared_ptr cache = NewCache(5, 0, false); - Cache::Handle* handle; - Status s = - cache->Insert(EncodeKey(100), EncodeValue(100), &kHelper, 1, &handle); - ASSERT_TRUE(s.ok()); - ASSERT_EQ(5U, cache->GetCapacity()); - ASSERT_EQ(1U, cache->GetUsage()); - ASSERT_EQ(0U, deleted_values_.size()); - auto erased = cache->Release(handle, true); - ASSERT_TRUE(erased); - // This tests that deleter has been called - ASSERT_EQ(1U, deleted_values_.size()); -} - -TEST_P(CacheTest, ReleaseWithoutErase) { - std::shared_ptr cache = NewCache(5, 0, false); - Cache::Handle* handle; - Status s = - cache->Insert(EncodeKey(100), EncodeValue(100), &kHelper, 1, &handle); - ASSERT_TRUE(s.ok()); - ASSERT_EQ(5U, cache->GetCapacity()); - ASSERT_EQ(1U, cache->GetUsage()); - ASSERT_EQ(0U, deleted_values_.size()); - auto erased = cache->Release(handle); - ASSERT_FALSE(erased); - // This tests that deleter is not called. When cache has free capacity it is - // not expected to immediately erase the released items. - ASSERT_EQ(0U, deleted_values_.size()); -} - -namespace { -class Value { - public: - explicit Value(int v) : v_(v) {} - - int v_; - - static constexpr auto kCacheEntryRole = CacheEntryRole::kMisc; -}; - -using SharedCache = BasicTypedSharedCacheInterface; -using TypedHandle = SharedCache::TypedHandle; -} // namespace - -TEST_P(CacheTest, SetCapacity) { - auto type = GetParam(); - if (type == kHyperClock) { - ROCKSDB_GTEST_BYPASS( - "FastLRUCache and HyperClockCache don't support arbitrary capacity " - "adjustments."); - return; - } - // test1: increase capacity - // lets create a cache with capacity 5, - // then, insert 5 elements, then increase capacity - // to 10, returned capacity should be 10, usage=5 - SharedCache cache{NewCache(5, 0, false)}; - std::vector handles(10); - // Insert 5 entries, but not releasing. - for (int i = 0; i < 5; i++) { - std::string key = EncodeKey(i + 1); - Status s = cache.Insert(key, new Value(i + 1), 1, &handles[i]); - ASSERT_TRUE(s.ok()); - } - ASSERT_EQ(5U, cache.get()->GetCapacity()); - ASSERT_EQ(5U, cache.get()->GetUsage()); - cache.get()->SetCapacity(10); - ASSERT_EQ(10U, cache.get()->GetCapacity()); - ASSERT_EQ(5U, cache.get()->GetUsage()); - - // test2: decrease capacity - // insert 5 more elements to cache, then release 5, - // then decrease capacity to 7, final capacity should be 7 - // and usage should be 7 - for (int i = 5; i < 10; i++) { - std::string key = EncodeKey(i + 1); - Status s = cache.Insert(key, new Value(i + 1), 1, &handles[i]); - ASSERT_TRUE(s.ok()); - } - ASSERT_EQ(10U, cache.get()->GetCapacity()); - ASSERT_EQ(10U, cache.get()->GetUsage()); - for (int i = 0; i < 5; i++) { - cache.Release(handles[i]); - } - ASSERT_EQ(10U, cache.get()->GetCapacity()); - ASSERT_EQ(10U, cache.get()->GetUsage()); - cache.get()->SetCapacity(7); - ASSERT_EQ(7, cache.get()->GetCapacity()); - ASSERT_EQ(7, cache.get()->GetUsage()); - - // release remaining 5 to keep valgrind happy - for (int i = 5; i < 10; i++) { - cache.Release(handles[i]); - } - - // Make sure this doesn't crash or upset ASAN/valgrind - cache.get()->DisownData(); -} - -TEST_P(LRUCacheTest, SetStrictCapacityLimit) { - // test1: set the flag to false. Insert more keys than capacity. See if they - // all go through. - SharedCache cache{NewCache(5, 0, false)}; - std::vector handles(10); - Status s; - for (int i = 0; i < 10; i++) { - std::string key = EncodeKey(i + 1); - s = cache.Insert(key, new Value(i + 1), 1, &handles[i]); - ASSERT_OK(s); - ASSERT_NE(nullptr, handles[i]); - } - ASSERT_EQ(10, cache.get()->GetUsage()); - - // test2: set the flag to true. Insert and check if it fails. - std::string extra_key = EncodeKey(100); - Value* extra_value = new Value(0); - cache.get()->SetStrictCapacityLimit(true); - TypedHandle* handle; - s = cache.Insert(extra_key, extra_value, 1, &handle); - ASSERT_TRUE(s.IsMemoryLimit()); - ASSERT_EQ(nullptr, handle); - ASSERT_EQ(10, cache.get()->GetUsage()); - - for (int i = 0; i < 10; i++) { - cache.Release(handles[i]); - } - - // test3: init with flag being true. - SharedCache cache2{NewCache(5, 0, true)}; - for (int i = 0; i < 5; i++) { - std::string key = EncodeKey(i + 1); - s = cache2.Insert(key, new Value(i + 1), 1, &handles[i]); - ASSERT_OK(s); - ASSERT_NE(nullptr, handles[i]); - } - s = cache2.Insert(extra_key, extra_value, 1, &handle); - ASSERT_TRUE(s.IsMemoryLimit()); - ASSERT_EQ(nullptr, handle); - // test insert without handle - s = cache2.Insert(extra_key, extra_value, 1); - // AS if the key have been inserted into cache but get evicted immediately. - ASSERT_OK(s); - ASSERT_EQ(5, cache2.get()->GetUsage()); - ASSERT_EQ(nullptr, cache2.Lookup(extra_key)); - - for (int i = 0; i < 5; i++) { - cache2.Release(handles[i]); - } -} - -TEST_P(CacheTest, OverCapacity) { - size_t n = 10; - - // a LRUCache with n entries and one shard only - SharedCache cache{NewCache(n, 0, false)}; - std::vector handles(n + 1); - - // Insert n+1 entries, but not releasing. - for (int i = 0; i < static_cast(n + 1); i++) { - std::string key = EncodeKey(i + 1); - Status s = cache.Insert(key, new Value(i + 1), 1, &handles[i]); - ASSERT_TRUE(s.ok()); - } - - // Guess what's in the cache now? - for (int i = 0; i < static_cast(n + 1); i++) { - std::string key = EncodeKey(i + 1); - auto h = cache.Lookup(key); - ASSERT_TRUE(h != nullptr); - if (h) cache.Release(h); - } - - // the cache is over capacity since nothing could be evicted - ASSERT_EQ(n + 1U, cache.get()->GetUsage()); - for (int i = 0; i < static_cast(n + 1); i++) { - cache.Release(handles[i]); - } - - if (GetParam() == kHyperClock) { - // Make sure eviction is triggered. - ASSERT_OK(cache.Insert(EncodeKey(-1), nullptr, 1, &handles[0])); - - // cache is under capacity now since elements were released - ASSERT_GE(n, cache.get()->GetUsage()); - - // clean up - cache.Release(handles[0]); - } else { - // LRUCache checks for over-capacity in Release. - - // cache is exactly at capacity now with minimal eviction - ASSERT_EQ(n, cache.get()->GetUsage()); - - // element 0 is evicted and the rest is there - // This is consistent with the LRU policy since the element 0 - // was released first - for (int i = 0; i < static_cast(n + 1); i++) { - std::string key = EncodeKey(i + 1); - auto h = cache.Lookup(key); - if (h) { - ASSERT_NE(static_cast(i), 0U); - cache.Release(h); - } else { - ASSERT_EQ(static_cast(i), 0U); - } - } - } -} - -TEST_P(CacheTest, ApplyToAllEntriesTest) { - std::vector callback_state; - const auto callback = [&](const Slice& key, Cache::ObjectPtr value, - size_t charge, - const Cache::CacheItemHelper* helper) { - callback_state.push_back(std::to_string(DecodeKey(key)) + "," + - std::to_string(DecodeValue(value)) + "," + - std::to_string(charge)); - assert(helper == &CacheTest::kHelper); - }; - - std::vector inserted; - callback_state.clear(); - - for (int i = 0; i < 10; ++i) { - Insert(i, i * 2, i + 1); - inserted.push_back(std::to_string(i) + "," + std::to_string(i * 2) + "," + - std::to_string(i + 1)); - } - cache_->ApplyToAllEntries(callback, /*opts*/ {}); - - std::sort(inserted.begin(), inserted.end()); - std::sort(callback_state.begin(), callback_state.end()); - ASSERT_EQ(inserted.size(), callback_state.size()); - for (int i = 0; i < static_cast(inserted.size()); ++i) { - EXPECT_EQ(inserted[i], callback_state[i]); - } -} - -TEST_P(CacheTest, ApplyToAllEntriesDuringResize) { - // This is a mini-stress test of ApplyToAllEntries, to ensure - // items in the cache that are neither added nor removed - // during ApplyToAllEntries are counted exactly once. - - // Insert some entries that we expect to be seen exactly once - // during iteration. - constexpr int kSpecialCharge = 2; - constexpr int kNotSpecialCharge = 1; - constexpr int kSpecialCount = 100; - size_t expected_usage = 0; - for (int i = 0; i < kSpecialCount; ++i) { - Insert(i, i * 2, kSpecialCharge); - expected_usage += kSpecialCharge; - } - - // For callback - int special_count = 0; - const auto callback = [&](const Slice&, Cache::ObjectPtr, size_t charge, - const Cache::CacheItemHelper*) { - if (charge == static_cast(kSpecialCharge)) { - ++special_count; - } - }; - - // Start counting - std::thread apply_thread([&]() { - // Use small average_entries_per_lock to make the problem difficult - Cache::ApplyToAllEntriesOptions opts; - opts.average_entries_per_lock = 2; - cache_->ApplyToAllEntries(callback, opts); - }); - - // In parallel, add more entries, enough to cause resize but not enough - // to cause ejections. (Note: if any cache shard is over capacity, there - // will be ejections) - for (int i = kSpecialCount * 1; i < kSpecialCount * 5; ++i) { - Insert(i, i * 2, kNotSpecialCharge); - expected_usage += kNotSpecialCharge; - } - - apply_thread.join(); - // verify no evictions - ASSERT_EQ(cache_->GetUsage(), expected_usage); - // verify everything seen in ApplyToAllEntries - ASSERT_EQ(special_count, kSpecialCount); -} - -TEST_P(CacheTest, DefaultShardBits) { - // Prevent excessive allocation (to save time & space) - estimated_value_size_ = 100000; - // Implementations use different minimum shard sizes - size_t min_shard_size = - (GetParam() == kHyperClock ? 32U * 1024U : 512U) * 1024U; - - std::shared_ptr cache = NewCache(32U * min_shard_size); - ShardedCacheBase* sc = dynamic_cast(cache.get()); - ASSERT_EQ(5, sc->GetNumShardBits()); - - cache = NewCache(min_shard_size / 1000U * 999U); - sc = dynamic_cast(cache.get()); - ASSERT_EQ(0, sc->GetNumShardBits()); - - cache = NewCache(3U * 1024U * 1024U * 1024U); - sc = dynamic_cast(cache.get()); - // current maximum of 6 - ASSERT_EQ(6, sc->GetNumShardBits()); - - if constexpr (sizeof(size_t) > 4) { - cache = NewCache(128U * min_shard_size); - sc = dynamic_cast(cache.get()); - // current maximum of 6 - ASSERT_EQ(6, sc->GetNumShardBits()); - } -} - -TEST_P(CacheTest, GetChargeAndDeleter) { - Insert(1, 2); - Cache::Handle* h1 = cache_->Lookup(EncodeKey(1)); - ASSERT_EQ(2, DecodeValue(cache_->Value(h1))); - ASSERT_EQ(1, cache_->GetCharge(h1)); - ASSERT_EQ(&CacheTest::kHelper, cache_->GetCacheItemHelper(h1)); - cache_->Release(h1); -} - -INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest, - secondary_cache_test_util::GetTestingCacheTypes()); -INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest, - testing::Values(secondary_cache_test_util::kLRU)); - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/cache/compressed_secondary_cache_test.cc b/cache/compressed_secondary_cache_test.cc deleted file mode 100644 index 1e41fc142..000000000 --- a/cache/compressed_secondary_cache_test.cc +++ /dev/null @@ -1,980 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "cache/compressed_secondary_cache.h" - -#include -#include -#include -#include - -#include "memory/jemalloc_nodump_allocator.h" -#include "rocksdb/convenience.h" -#include "test_util/secondary_cache_test_util.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" - -namespace ROCKSDB_NAMESPACE { - -using secondary_cache_test_util::GetTestingCacheTypes; -using secondary_cache_test_util::WithCacheType; - -// 16 bytes for HCC compatibility -const std::string key0 = "____ ____key0"; -const std::string key1 = "____ ____key1"; -const std::string key2 = "____ ____key2"; -const std::string key3 = "____ ____key3"; - -class CompressedSecondaryCacheTestBase : public testing::Test, - public WithCacheType { - public: - CompressedSecondaryCacheTestBase() {} - ~CompressedSecondaryCacheTestBase() override = default; - - protected: - void BasicTestHelper(std::shared_ptr sec_cache, - bool sec_cache_is_compressed) { - get_perf_context()->Reset(); - bool kept_in_sec_cache{true}; - // Lookup an non-existent key. - std::unique_ptr handle0 = - sec_cache->Lookup(key0, GetHelper(), this, true, /*advise_erase=*/true, - kept_in_sec_cache); - ASSERT_EQ(handle0, nullptr); - - Random rnd(301); - // Insert and Lookup the item k1 for the first time. - std::string str1(rnd.RandomString(1000)); - TestItem item1(str1.data(), str1.length()); - // A dummy handle is inserted if the item is inserted for the first time. - ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper())); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 1); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, 0); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, 0); - - std::unique_ptr handle1_1 = - sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/false, - kept_in_sec_cache); - ASSERT_EQ(handle1_1, nullptr); - - // Insert and Lookup the item k1 for the second time and advise erasing it. - ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper())); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 1); - - std::unique_ptr handle1_2 = - sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/true, - kept_in_sec_cache); - ASSERT_NE(handle1_2, nullptr); - ASSERT_FALSE(kept_in_sec_cache); - if (sec_cache_is_compressed) { - ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, - 1000); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, - 1007); - } else { - ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, 0); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, 0); - } - - std::unique_ptr val1 = - std::unique_ptr(static_cast(handle1_2->Value())); - ASSERT_NE(val1, nullptr); - ASSERT_EQ(memcmp(val1->Buf(), item1.Buf(), item1.Size()), 0); - - // Lookup the item k1 again. - std::unique_ptr handle1_3 = - sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/true, - kept_in_sec_cache); - ASSERT_EQ(handle1_3, nullptr); - - // Insert and Lookup the item k2. - std::string str2(rnd.RandomString(1000)); - TestItem item2(str2.data(), str2.length()); - ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper())); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 2); - std::unique_ptr handle2_1 = - sec_cache->Lookup(key2, GetHelper(), this, true, /*advise_erase=*/false, - kept_in_sec_cache); - ASSERT_EQ(handle2_1, nullptr); - - ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper())); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 2); - if (sec_cache_is_compressed) { - ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, - 2000); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, - 2014); - } else { - ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, 0); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, 0); - } - std::unique_ptr handle2_2 = - sec_cache->Lookup(key2, GetHelper(), this, true, /*advise_erase=*/false, - kept_in_sec_cache); - ASSERT_NE(handle2_2, nullptr); - std::unique_ptr val2 = - std::unique_ptr(static_cast(handle2_2->Value())); - ASSERT_NE(val2, nullptr); - ASSERT_EQ(memcmp(val2->Buf(), item2.Buf(), item2.Size()), 0); - - std::vector handles = {handle1_2.get(), - handle2_2.get()}; - sec_cache->WaitAll(handles); - - sec_cache.reset(); - } - - void BasicTest(bool sec_cache_is_compressed, bool use_jemalloc) { - CompressedSecondaryCacheOptions opts; - opts.capacity = 2048; - opts.num_shard_bits = 0; - - if (sec_cache_is_compressed) { - if (!LZ4_Supported()) { - ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); - opts.compression_type = CompressionType::kNoCompression; - sec_cache_is_compressed = false; - } - } else { - opts.compression_type = CompressionType::kNoCompression; - } - - if (use_jemalloc) { - JemallocAllocatorOptions jopts; - std::shared_ptr allocator; - std::string msg; - if (JemallocNodumpAllocator::IsSupported(&msg)) { - Status s = NewJemallocNodumpAllocator(jopts, &allocator); - if (s.ok()) { - opts.memory_allocator = allocator; - } - } else { - ROCKSDB_GTEST_BYPASS("JEMALLOC not supported"); - } - } - std::shared_ptr sec_cache = - NewCompressedSecondaryCache(opts); - - BasicTestHelper(sec_cache, sec_cache_is_compressed); - } - - void FailsTest(bool sec_cache_is_compressed) { - CompressedSecondaryCacheOptions secondary_cache_opts; - if (sec_cache_is_compressed) { - if (!LZ4_Supported()) { - ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); - secondary_cache_opts.compression_type = CompressionType::kNoCompression; - } - } else { - secondary_cache_opts.compression_type = CompressionType::kNoCompression; - } - - secondary_cache_opts.capacity = 1100; - secondary_cache_opts.num_shard_bits = 0; - std::shared_ptr sec_cache = - NewCompressedSecondaryCache(secondary_cache_opts); - - // Insert and Lookup the first item. - Random rnd(301); - std::string str1(rnd.RandomString(1000)); - TestItem item1(str1.data(), str1.length()); - // Insert a dummy handle. - ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper())); - // Insert k1. - ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper())); - - // Insert and Lookup the second item. - std::string str2(rnd.RandomString(200)); - TestItem item2(str2.data(), str2.length()); - // Insert a dummy handle, k1 is not evicted. - ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper())); - bool kept_in_sec_cache{false}; - std::unique_ptr handle1 = - sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/false, - kept_in_sec_cache); - ASSERT_EQ(handle1, nullptr); - - // Insert k2 and k1 is evicted. - ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper())); - std::unique_ptr handle2 = - sec_cache->Lookup(key2, GetHelper(), this, true, /*advise_erase=*/false, - kept_in_sec_cache); - ASSERT_NE(handle2, nullptr); - std::unique_ptr val2 = - std::unique_ptr(static_cast(handle2->Value())); - ASSERT_NE(val2, nullptr); - ASSERT_EQ(memcmp(val2->Buf(), item2.Buf(), item2.Size()), 0); - - // Insert k1 again and a dummy handle is inserted. - ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper())); - - std::unique_ptr handle1_1 = - sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/false, - kept_in_sec_cache); - ASSERT_EQ(handle1_1, nullptr); - - // Create Fails. - SetFailCreate(true); - std::unique_ptr handle2_1 = - sec_cache->Lookup(key2, GetHelper(), this, true, /*advise_erase=*/true, - kept_in_sec_cache); - ASSERT_EQ(handle2_1, nullptr); - - // Save Fails. - std::string str3 = rnd.RandomString(10); - TestItem item3(str3.data(), str3.length()); - // The Status is OK because a dummy handle is inserted. - ASSERT_OK(sec_cache->Insert(key3, &item3, GetHelperFail())); - ASSERT_NOK(sec_cache->Insert(key3, &item3, GetHelperFail())); - - sec_cache.reset(); - } - - void BasicIntegrationTest(bool sec_cache_is_compressed, - bool enable_custom_split_merge) { - CompressedSecondaryCacheOptions secondary_cache_opts; - - if (sec_cache_is_compressed) { - if (!LZ4_Supported()) { - ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); - secondary_cache_opts.compression_type = CompressionType::kNoCompression; - sec_cache_is_compressed = false; - } - } else { - secondary_cache_opts.compression_type = CompressionType::kNoCompression; - } - - secondary_cache_opts.capacity = 6000; - secondary_cache_opts.num_shard_bits = 0; - secondary_cache_opts.enable_custom_split_merge = enable_custom_split_merge; - std::shared_ptr secondary_cache = - NewCompressedSecondaryCache(secondary_cache_opts); - std::shared_ptr cache = NewCache( - /*_capacity =*/1300, /*_num_shard_bits =*/0, - /*_strict_capacity_limit =*/true, secondary_cache); - std::shared_ptr stats = CreateDBStatistics(); - - get_perf_context()->Reset(); - Random rnd(301); - std::string str1 = rnd.RandomString(1001); - auto item1_1 = new TestItem(str1.data(), str1.length()); - ASSERT_OK(cache->Insert(key1, item1_1, GetHelper(), str1.length())); - - std::string str2 = rnd.RandomString(1012); - auto item2_1 = new TestItem(str2.data(), str2.length()); - // After this Insert, primary cache contains k2 and secondary cache contains - // k1's dummy item. - ASSERT_OK(cache->Insert(key2, item2_1, GetHelper(), str2.length())); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 1); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, 0); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, 0); - - std::string str3 = rnd.RandomString(1024); - auto item3_1 = new TestItem(str3.data(), str3.length()); - // After this Insert, primary cache contains k3 and secondary cache contains - // k1's dummy item and k2's dummy item. - ASSERT_OK(cache->Insert(key3, item3_1, GetHelper(), str3.length())); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 2); - - // After this Insert, primary cache contains k1 and secondary cache contains - // k1's dummy item, k2's dummy item, and k3's dummy item. - auto item1_2 = new TestItem(str1.data(), str1.length()); - ASSERT_OK(cache->Insert(key1, item1_2, GetHelper(), str1.length())); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 3); - - // After this Insert, primary cache contains k2 and secondary cache contains - // k1's item, k2's dummy item, and k3's dummy item. - auto item2_2 = new TestItem(str2.data(), str2.length()); - ASSERT_OK(cache->Insert(key2, item2_2, GetHelper(), str2.length())); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 1); - if (sec_cache_is_compressed) { - ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, - str1.length()); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, - 1008); - } else { - ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, 0); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, 0); - } - - // After this Insert, primary cache contains k3 and secondary cache contains - // k1's item and k2's item. - auto item3_2 = new TestItem(str3.data(), str3.length()); - ASSERT_OK(cache->Insert(key3, item3_2, GetHelper(), str3.length())); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 2); - if (sec_cache_is_compressed) { - ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, - str1.length() + str2.length()); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, - 2027); - } else { - ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, 0); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, 0); - } - - Cache::Handle* handle; - handle = cache->Lookup(key3, GetHelper(), this, Cache::Priority::LOW, - stats.get()); - ASSERT_NE(handle, nullptr); - auto val3 = static_cast(cache->Value(handle)); - ASSERT_NE(val3, nullptr); - ASSERT_EQ(memcmp(val3->Buf(), item3_2->Buf(), item3_2->Size()), 0); - cache->Release(handle); - - // Lookup an non-existent key. - handle = cache->Lookup(key0, GetHelper(), this, Cache::Priority::LOW, - stats.get()); - ASSERT_EQ(handle, nullptr); - - // This Lookup should just insert a dummy handle in the primary cache - // and the k1 is still in the secondary cache. - handle = cache->Lookup(key1, GetHelper(), this, Cache::Priority::LOW, - stats.get()); - ASSERT_NE(handle, nullptr); - ASSERT_EQ(get_perf_context()->block_cache_standalone_handle_count, 1); - auto val1_1 = static_cast(cache->Value(handle)); - ASSERT_NE(val1_1, nullptr); - ASSERT_EQ(memcmp(val1_1->Buf(), str1.data(), str1.size()), 0); - cache->Release(handle); - - // This Lookup should erase k1 from the secondary cache and insert - // it into primary cache; then k3 is demoted. - // k2 and k3 are in secondary cache. - handle = cache->Lookup(key1, GetHelper(), this, Cache::Priority::LOW, - stats.get()); - ASSERT_NE(handle, nullptr); - ASSERT_EQ(get_perf_context()->block_cache_standalone_handle_count, 1); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 3); - cache->Release(handle); - - // k2 is still in secondary cache. - handle = cache->Lookup(key2, GetHelper(), this, Cache::Priority::LOW, - stats.get()); - ASSERT_NE(handle, nullptr); - ASSERT_EQ(get_perf_context()->block_cache_standalone_handle_count, 2); - cache->Release(handle); - - // Testing SetCapacity(). - ASSERT_OK(secondary_cache->SetCapacity(0)); - handle = cache->Lookup(key3, GetHelper(), this, Cache::Priority::LOW, - stats.get()); - ASSERT_EQ(handle, nullptr); - - ASSERT_OK(secondary_cache->SetCapacity(7000)); - size_t capacity; - ASSERT_OK(secondary_cache->GetCapacity(capacity)); - ASSERT_EQ(capacity, 7000); - auto item1_3 = new TestItem(str1.data(), str1.length()); - // After this Insert, primary cache contains k1. - ASSERT_OK(cache->Insert(key1, item1_3, GetHelper(), str2.length())); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 3); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 4); - - auto item2_3 = new TestItem(str2.data(), str2.length()); - // After this Insert, primary cache contains k2 and secondary cache contains - // k1's dummy item. - ASSERT_OK(cache->Insert(key2, item2_3, GetHelper(), str1.length())); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 4); - - auto item1_4 = new TestItem(str1.data(), str1.length()); - // After this Insert, primary cache contains k1 and secondary cache contains - // k1's dummy item and k2's dummy item. - ASSERT_OK(cache->Insert(key1, item1_4, GetHelper(), str2.length())); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 5); - - auto item2_4 = new TestItem(str2.data(), str2.length()); - // After this Insert, primary cache contains k2 and secondary cache contains - // k1's real item and k2's dummy item. - ASSERT_OK(cache->Insert(key2, item2_4, GetHelper(), str2.length())); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 5); - // This Lookup should just insert a dummy handle in the primary cache - // and the k1 is still in the secondary cache. - handle = cache->Lookup(key1, GetHelper(), this, Cache::Priority::LOW, - stats.get()); - - ASSERT_NE(handle, nullptr); - cache->Release(handle); - ASSERT_EQ(get_perf_context()->block_cache_standalone_handle_count, 3); - - cache.reset(); - secondary_cache.reset(); - } - - void BasicIntegrationFailTest(bool sec_cache_is_compressed) { - CompressedSecondaryCacheOptions secondary_cache_opts; - - if (sec_cache_is_compressed) { - if (!LZ4_Supported()) { - ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); - secondary_cache_opts.compression_type = CompressionType::kNoCompression; - } - } else { - secondary_cache_opts.compression_type = CompressionType::kNoCompression; - } - - secondary_cache_opts.capacity = 6000; - secondary_cache_opts.num_shard_bits = 0; - std::shared_ptr secondary_cache = - NewCompressedSecondaryCache(secondary_cache_opts); - - std::shared_ptr cache = NewCache( - /*_capacity=*/1300, /*_num_shard_bits=*/0, - /*_strict_capacity_limit=*/false, secondary_cache); - - Random rnd(301); - std::string str1 = rnd.RandomString(1001); - auto item1 = std::make_unique(str1.data(), str1.length()); - ASSERT_OK(cache->Insert(key1, item1.get(), GetHelper(), str1.length())); - item1.release(); // Appease clang-analyze "potential memory leak" - - Cache::Handle* handle; - handle = cache->Lookup(key2, nullptr, this, Cache::Priority::LOW); - ASSERT_EQ(handle, nullptr); - handle = cache->Lookup(key2, GetHelper(), this, Cache::Priority::LOW); - ASSERT_EQ(handle, nullptr); - - Cache::AsyncLookupHandle ah; - ah.key = key2; - ah.helper = GetHelper(); - ah.create_context = this; - ah.priority = Cache::Priority::LOW; - cache->StartAsyncLookup(ah); - cache->Wait(ah); - ASSERT_EQ(ah.Result(), nullptr); - - cache.reset(); - secondary_cache.reset(); - } - - void IntegrationSaveFailTest(bool sec_cache_is_compressed) { - CompressedSecondaryCacheOptions secondary_cache_opts; - - if (sec_cache_is_compressed) { - if (!LZ4_Supported()) { - ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); - secondary_cache_opts.compression_type = CompressionType::kNoCompression; - } - } else { - secondary_cache_opts.compression_type = CompressionType::kNoCompression; - } - - secondary_cache_opts.capacity = 6000; - secondary_cache_opts.num_shard_bits = 0; - - std::shared_ptr secondary_cache = - NewCompressedSecondaryCache(secondary_cache_opts); - - std::shared_ptr cache = NewCache( - /*_capacity=*/1300, /*_num_shard_bits=*/0, - /*_strict_capacity_limit=*/true, secondary_cache); - - Random rnd(301); - std::string str1 = rnd.RandomString(1001); - auto item1 = new TestItem(str1.data(), str1.length()); - ASSERT_OK(cache->Insert(key1, item1, GetHelperFail(), str1.length())); - - std::string str2 = rnd.RandomString(1002); - auto item2 = new TestItem(str2.data(), str2.length()); - // k1 should be demoted to the secondary cache. - ASSERT_OK(cache->Insert(key2, item2, GetHelperFail(), str2.length())); - - Cache::Handle* handle; - handle = cache->Lookup(key2, GetHelperFail(), this, Cache::Priority::LOW); - ASSERT_NE(handle, nullptr); - cache->Release(handle); - // This lookup should fail, since k1 demotion would have failed. - handle = cache->Lookup(key1, GetHelperFail(), this, Cache::Priority::LOW); - ASSERT_EQ(handle, nullptr); - // Since k1 was not promoted, k2 should still be in cache. - handle = cache->Lookup(key2, GetHelperFail(), this, Cache::Priority::LOW); - ASSERT_NE(handle, nullptr); - cache->Release(handle); - - cache.reset(); - secondary_cache.reset(); - } - - void IntegrationCreateFailTest(bool sec_cache_is_compressed) { - CompressedSecondaryCacheOptions secondary_cache_opts; - - if (sec_cache_is_compressed) { - if (!LZ4_Supported()) { - ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); - secondary_cache_opts.compression_type = CompressionType::kNoCompression; - } - } else { - secondary_cache_opts.compression_type = CompressionType::kNoCompression; - } - - secondary_cache_opts.capacity = 6000; - secondary_cache_opts.num_shard_bits = 0; - - std::shared_ptr secondary_cache = - NewCompressedSecondaryCache(secondary_cache_opts); - - std::shared_ptr cache = NewCache( - /*_capacity=*/1300, /*_num_shard_bits=*/0, - /*_strict_capacity_limit=*/true, secondary_cache); - - Random rnd(301); - std::string str1 = rnd.RandomString(1001); - auto item1 = new TestItem(str1.data(), str1.length()); - ASSERT_OK(cache->Insert(key1, item1, GetHelper(), str1.length())); - - std::string str2 = rnd.RandomString(1002); - auto item2 = new TestItem(str2.data(), str2.length()); - // k1 should be demoted to the secondary cache. - ASSERT_OK(cache->Insert(key2, item2, GetHelper(), str2.length())); - - Cache::Handle* handle; - SetFailCreate(true); - handle = cache->Lookup(key2, GetHelper(), this, Cache::Priority::LOW); - ASSERT_NE(handle, nullptr); - cache->Release(handle); - // This lookup should fail, since k1 creation would have failed - handle = cache->Lookup(key1, GetHelper(), this, Cache::Priority::LOW); - ASSERT_EQ(handle, nullptr); - // Since k1 didn't get promoted, k2 should still be in cache - handle = cache->Lookup(key2, GetHelper(), this, Cache::Priority::LOW); - ASSERT_NE(handle, nullptr); - cache->Release(handle); - - cache.reset(); - secondary_cache.reset(); - } - - void IntegrationFullCapacityTest(bool sec_cache_is_compressed) { - CompressedSecondaryCacheOptions secondary_cache_opts; - - if (sec_cache_is_compressed) { - if (!LZ4_Supported()) { - ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); - secondary_cache_opts.compression_type = CompressionType::kNoCompression; - } - } else { - secondary_cache_opts.compression_type = CompressionType::kNoCompression; - } - - secondary_cache_opts.capacity = 6000; - secondary_cache_opts.num_shard_bits = 0; - - std::shared_ptr secondary_cache = - NewCompressedSecondaryCache(secondary_cache_opts); - - std::shared_ptr cache = NewCache( - /*_capacity=*/1300, /*_num_shard_bits=*/0, - /*_strict_capacity_limit=*/false, secondary_cache); - - Random rnd(301); - std::string str1 = rnd.RandomString(1001); - auto item1_1 = new TestItem(str1.data(), str1.length()); - ASSERT_OK(cache->Insert(key1, item1_1, GetHelper(), str1.length())); - - std::string str2 = rnd.RandomString(1002); - std::string str2_clone{str2}; - auto item2 = new TestItem(str2.data(), str2.length()); - // After this Insert, primary cache contains k2 and secondary cache contains - // k1's dummy item. - ASSERT_OK(cache->Insert(key2, item2, GetHelper(), str2.length())); - - // After this Insert, primary cache contains k1 and secondary cache contains - // k1's dummy item and k2's dummy item. - auto item1_2 = new TestItem(str1.data(), str1.length()); - ASSERT_OK(cache->Insert(key1, item1_2, GetHelper(), str1.length())); - - auto item2_2 = new TestItem(str2.data(), str2.length()); - // After this Insert, primary cache contains k2 and secondary cache contains - // k1's item and k2's dummy item. - ASSERT_OK(cache->Insert(key2, item2_2, GetHelper(), str2.length())); - - Cache::Handle* handle2; - handle2 = cache->Lookup(key2, GetHelper(), this, Cache::Priority::LOW); - ASSERT_NE(handle2, nullptr); - cache->Release(handle2); - - // k1 promotion should fail because cache is at capacity and - // strict_capacity_limit is true, but the lookup should still succeed. - // A k1's dummy item is inserted into primary cache. - Cache::Handle* handle1; - handle1 = cache->Lookup(key1, GetHelper(), this, Cache::Priority::LOW); - ASSERT_NE(handle1, nullptr); - cache->Release(handle1); - - // Since k1 didn't get inserted, k2 should still be in cache - handle2 = cache->Lookup(key2, GetHelper(), this, Cache::Priority::LOW); - ASSERT_NE(handle2, nullptr); - cache->Release(handle2); - - cache.reset(); - secondary_cache.reset(); - } - - void SplitValueIntoChunksTest() { - JemallocAllocatorOptions jopts; - std::shared_ptr allocator; - std::string msg; - if (JemallocNodumpAllocator::IsSupported(&msg)) { - Status s = NewJemallocNodumpAllocator(jopts, &allocator); - if (!s.ok()) { - ROCKSDB_GTEST_BYPASS("JEMALLOC not supported"); - } - } else { - ROCKSDB_GTEST_BYPASS("JEMALLOC not supported"); - } - - using CacheValueChunk = CompressedSecondaryCache::CacheValueChunk; - std::unique_ptr sec_cache = - std::make_unique(1000, 0, true, 0.5, 0.0, - allocator); - Random rnd(301); - // 8500 = 8169 + 233 + 98, so there should be 3 chunks after split. - size_t str_size{8500}; - std::string str = rnd.RandomString(static_cast(str_size)); - size_t charge{0}; - CacheValueChunk* chunks_head = - sec_cache->SplitValueIntoChunks(str, kLZ4Compression, charge); - ASSERT_EQ(charge, str_size + 3 * (sizeof(CacheValueChunk) - 1)); - - CacheValueChunk* current_chunk = chunks_head; - ASSERT_EQ(current_chunk->size, 8192 - sizeof(CacheValueChunk) + 1); - current_chunk = current_chunk->next; - ASSERT_EQ(current_chunk->size, 256 - sizeof(CacheValueChunk) + 1); - current_chunk = current_chunk->next; - ASSERT_EQ(current_chunk->size, 98); - - sec_cache->GetHelper(true)->del_cb(chunks_head, /*alloc*/ nullptr); - } - - void MergeChunksIntoValueTest() { - using CacheValueChunk = CompressedSecondaryCache::CacheValueChunk; - Random rnd(301); - size_t size1{2048}; - std::string str1 = rnd.RandomString(static_cast(size1)); - CacheValueChunk* current_chunk = reinterpret_cast( - new char[sizeof(CacheValueChunk) - 1 + size1]); - CacheValueChunk* chunks_head = current_chunk; - memcpy(current_chunk->data, str1.data(), size1); - current_chunk->size = size1; - - size_t size2{256}; - std::string str2 = rnd.RandomString(static_cast(size2)); - current_chunk->next = reinterpret_cast( - new char[sizeof(CacheValueChunk) - 1 + size2]); - current_chunk = current_chunk->next; - memcpy(current_chunk->data, str2.data(), size2); - current_chunk->size = size2; - - size_t size3{31}; - std::string str3 = rnd.RandomString(static_cast(size3)); - current_chunk->next = reinterpret_cast( - new char[sizeof(CacheValueChunk) - 1 + size3]); - current_chunk = current_chunk->next; - memcpy(current_chunk->data, str3.data(), size3); - current_chunk->size = size3; - current_chunk->next = nullptr; - - std::string str = str1 + str2 + str3; - - std::unique_ptr sec_cache = - std::make_unique(1000, 0, true, 0.5, 0.0); - size_t charge{0}; - CacheAllocationPtr value = - sec_cache->MergeChunksIntoValue(chunks_head, charge); - ASSERT_EQ(charge, size1 + size2 + size3); - std::string value_str{value.get(), charge}; - ASSERT_EQ(strcmp(value_str.data(), str.data()), 0); - - while (chunks_head != nullptr) { - CacheValueChunk* tmp_chunk = chunks_head; - chunks_head = chunks_head->next; - tmp_chunk->Free(); - } - } - - void SplictValueAndMergeChunksTest() { - JemallocAllocatorOptions jopts; - std::shared_ptr allocator; - std::string msg; - if (JemallocNodumpAllocator::IsSupported(&msg)) { - Status s = NewJemallocNodumpAllocator(jopts, &allocator); - if (!s.ok()) { - ROCKSDB_GTEST_BYPASS("JEMALLOC not supported"); - } - } else { - ROCKSDB_GTEST_BYPASS("JEMALLOC not supported"); - } - - using CacheValueChunk = CompressedSecondaryCache::CacheValueChunk; - std::unique_ptr sec_cache = - std::make_unique(1000, 0, true, 0.5, 0.0, - allocator); - Random rnd(301); - // 8500 = 8169 + 233 + 98, so there should be 3 chunks after split. - size_t str_size{8500}; - std::string str = rnd.RandomString(static_cast(str_size)); - size_t charge{0}; - CacheValueChunk* chunks_head = - sec_cache->SplitValueIntoChunks(str, kLZ4Compression, charge); - ASSERT_EQ(charge, str_size + 3 * (sizeof(CacheValueChunk) - 1)); - - CacheAllocationPtr value = - sec_cache->MergeChunksIntoValue(chunks_head, charge); - ASSERT_EQ(charge, str_size); - std::string value_str{value.get(), charge}; - ASSERT_EQ(strcmp(value_str.data(), str.data()), 0); - - sec_cache->GetHelper(true)->del_cb(chunks_head, /*alloc*/ nullptr); - } -}; - -class CompressedSecondaryCacheTest - : public CompressedSecondaryCacheTestBase, - public testing::WithParamInterface { - const std::string& Type() override { return GetParam(); } -}; - -INSTANTIATE_TEST_CASE_P(CompressedSecondaryCacheTest, - CompressedSecondaryCacheTest, GetTestingCacheTypes()); - -class CompressedSecCacheTestWithCompressAndAllocatorParam - : public CompressedSecondaryCacheTestBase, - public ::testing::WithParamInterface< - std::tuple> { - public: - CompressedSecCacheTestWithCompressAndAllocatorParam() { - sec_cache_is_compressed_ = std::get<0>(GetParam()); - use_jemalloc_ = std::get<1>(GetParam()); - } - const std::string& Type() override { return std::get<2>(GetParam()); } - bool sec_cache_is_compressed_; - bool use_jemalloc_; -}; - -TEST_P(CompressedSecCacheTestWithCompressAndAllocatorParam, BasicTes) { - BasicTest(sec_cache_is_compressed_, use_jemalloc_); -} - -INSTANTIATE_TEST_CASE_P(CompressedSecCacheTests, - CompressedSecCacheTestWithCompressAndAllocatorParam, - ::testing::Combine(testing::Bool(), testing::Bool(), - GetTestingCacheTypes())); - -class CompressedSecondaryCacheTestWithCompressionParam - : public CompressedSecondaryCacheTestBase, - public ::testing::WithParamInterface> { - public: - CompressedSecondaryCacheTestWithCompressionParam() { - sec_cache_is_compressed_ = std::get<0>(GetParam()); - } - const std::string& Type() override { return std::get<1>(GetParam()); } - bool sec_cache_is_compressed_; -}; - -TEST_P(CompressedSecondaryCacheTestWithCompressionParam, BasicTestFromString) { - std::shared_ptr sec_cache{nullptr}; - std::string sec_cache_uri; - if (sec_cache_is_compressed_) { - if (LZ4_Supported()) { - sec_cache_uri = - "compressed_secondary_cache://" - "capacity=2048;num_shard_bits=0;compression_type=kLZ4Compression;" - "compress_format_version=2"; - } else { - ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); - sec_cache_uri = - "compressed_secondary_cache://" - "capacity=2048;num_shard_bits=0;compression_type=kNoCompression"; - sec_cache_is_compressed_ = false; - } - Status s = SecondaryCache::CreateFromString(ConfigOptions(), sec_cache_uri, - &sec_cache); - EXPECT_OK(s); - } else { - sec_cache_uri = - "compressed_secondary_cache://" - "capacity=2048;num_shard_bits=0;compression_type=kNoCompression"; - Status s = SecondaryCache::CreateFromString(ConfigOptions(), sec_cache_uri, - &sec_cache); - EXPECT_OK(s); - } - BasicTestHelper(sec_cache, sec_cache_is_compressed_); -} - -TEST_P(CompressedSecondaryCacheTestWithCompressionParam, - BasicTestFromStringWithSplit) { - std::shared_ptr sec_cache{nullptr}; - std::string sec_cache_uri; - if (sec_cache_is_compressed_) { - if (LZ4_Supported()) { - sec_cache_uri = - "compressed_secondary_cache://" - "capacity=2048;num_shard_bits=0;compression_type=kLZ4Compression;" - "compress_format_version=2;enable_custom_split_merge=true"; - } else { - ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); - sec_cache_uri = - "compressed_secondary_cache://" - "capacity=2048;num_shard_bits=0;compression_type=kNoCompression;" - "enable_custom_split_merge=true"; - sec_cache_is_compressed_ = false; - } - Status s = SecondaryCache::CreateFromString(ConfigOptions(), sec_cache_uri, - &sec_cache); - EXPECT_OK(s); - } else { - sec_cache_uri = - "compressed_secondary_cache://" - "capacity=2048;num_shard_bits=0;compression_type=kNoCompression;" - "enable_custom_split_merge=true"; - Status s = SecondaryCache::CreateFromString(ConfigOptions(), sec_cache_uri, - &sec_cache); - EXPECT_OK(s); - } - BasicTestHelper(sec_cache, sec_cache_is_compressed_); -} - - -TEST_P(CompressedSecondaryCacheTestWithCompressionParam, FailsTest) { - FailsTest(sec_cache_is_compressed_); -} - -TEST_P(CompressedSecondaryCacheTestWithCompressionParam, - BasicIntegrationFailTest) { - BasicIntegrationFailTest(sec_cache_is_compressed_); -} - -TEST_P(CompressedSecondaryCacheTestWithCompressionParam, - IntegrationSaveFailTest) { - IntegrationSaveFailTest(sec_cache_is_compressed_); -} - -TEST_P(CompressedSecondaryCacheTestWithCompressionParam, - IntegrationCreateFailTest) { - IntegrationCreateFailTest(sec_cache_is_compressed_); -} - -TEST_P(CompressedSecondaryCacheTestWithCompressionParam, - IntegrationFullCapacityTest) { - IntegrationFullCapacityTest(sec_cache_is_compressed_); -} - -TEST_P(CompressedSecondaryCacheTestWithCompressionParam, EntryRoles) { - CompressedSecondaryCacheOptions opts; - opts.capacity = 2048; - opts.num_shard_bits = 0; - - if (sec_cache_is_compressed_) { - if (!LZ4_Supported()) { - ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); - return; - } - } else { - opts.compression_type = CompressionType::kNoCompression; - } - - // Select a random subset to include, for fast test - Random& r = *Random::GetTLSInstance(); - CacheEntryRoleSet do_not_compress; - for (uint32_t i = 0; i < kNumCacheEntryRoles; ++i) { - // A few included on average, but decent chance of zero - if (r.OneIn(5)) { - do_not_compress.Add(static_cast(i)); - } - } - opts.do_not_compress_roles = do_not_compress; - - std::shared_ptr sec_cache = NewCompressedSecondaryCache(opts); - - // Fixed seed to ensure consistent compressibility (doesn't compress) - std::string junk(Random(301).RandomString(1000)); - - for (uint32_t i = 0; i < kNumCacheEntryRoles; ++i) { - CacheEntryRole role = static_cast(i); - - // Uniquify `junk` - junk[0] = static_cast(i); - TestItem item{junk.data(), junk.length()}; - Slice ith_key = Slice(junk.data(), 16); - - get_perf_context()->Reset(); - ASSERT_OK(sec_cache->Insert(ith_key, &item, GetHelper(role))); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 1U); - - ASSERT_OK(sec_cache->Insert(ith_key, &item, GetHelper(role))); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 1U); - - bool kept_in_sec_cache{true}; - std::unique_ptr handle = - sec_cache->Lookup(ith_key, GetHelper(role), this, true, - /*advise_erase=*/true, kept_in_sec_cache); - ASSERT_NE(handle, nullptr); - - // Lookup returns the right data - std::unique_ptr val = - std::unique_ptr(static_cast(handle->Value())); - ASSERT_NE(val, nullptr); - ASSERT_EQ(memcmp(val->Buf(), item.Buf(), item.Size()), 0); - - bool compressed = - sec_cache_is_compressed_ && !do_not_compress.Contains(role); - if (compressed) { - ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, - 1000); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, - 1007); - } else { - ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, 0); - ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, 0); - } - } -} - -INSTANTIATE_TEST_CASE_P(CompressedSecCacheTests, - CompressedSecondaryCacheTestWithCompressionParam, - testing::Combine(testing::Bool(), - GetTestingCacheTypes())); - -class CompressedSecCacheTestWithCompressAndSplitParam - : public CompressedSecondaryCacheTestBase, - public ::testing::WithParamInterface< - std::tuple> { - public: - CompressedSecCacheTestWithCompressAndSplitParam() { - sec_cache_is_compressed_ = std::get<0>(GetParam()); - enable_custom_split_merge_ = std::get<1>(GetParam()); - } - const std::string& Type() override { return std::get<2>(GetParam()); } - bool sec_cache_is_compressed_; - bool enable_custom_split_merge_; -}; - -TEST_P(CompressedSecCacheTestWithCompressAndSplitParam, BasicIntegrationTest) { - BasicIntegrationTest(sec_cache_is_compressed_, enable_custom_split_merge_); -} - -INSTANTIATE_TEST_CASE_P(CompressedSecCacheTests, - CompressedSecCacheTestWithCompressAndSplitParam, - ::testing::Combine(testing::Bool(), testing::Bool(), - GetTestingCacheTypes())); - -TEST_P(CompressedSecondaryCacheTest, SplitValueIntoChunksTest) { - SplitValueIntoChunksTest(); -} - -TEST_P(CompressedSecondaryCacheTest, MergeChunksIntoValueTest) { - MergeChunksIntoValueTest(); -} - -TEST_P(CompressedSecondaryCacheTest, SplictValueAndMergeChunksTest) { - SplictValueAndMergeChunksTest(); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/cache/lru_cache_test.cc b/cache/lru_cache_test.cc deleted file mode 100644 index c4f392976..000000000 --- a/cache/lru_cache_test.cc +++ /dev/null @@ -1,2558 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "cache/lru_cache.h" - -#include -#include -#include - -#include "cache/cache_key.h" -#include "cache/clock_cache.h" -#include "cache_helpers.h" -#include "db/db_test_util.h" -#include "file/sst_file_manager_impl.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/cache.h" -#include "rocksdb/io_status.h" -#include "rocksdb/sst_file_manager.h" -#include "rocksdb/utilities/cache_dump_load.h" -#include "test_util/secondary_cache_test_util.h" -#include "test_util/testharness.h" -#include "typed_cache.h" -#include "util/coding.h" -#include "util/random.h" -#include "utilities/cache_dump_load_impl.h" -#include "utilities/fault_injection_fs.h" - -namespace ROCKSDB_NAMESPACE { - -class LRUCacheTest : public testing::Test { - public: - LRUCacheTest() {} - ~LRUCacheTest() override { DeleteCache(); } - - void DeleteCache() { - if (cache_ != nullptr) { - cache_->~LRUCacheShard(); - port::cacheline_aligned_free(cache_); - cache_ = nullptr; - } - } - - void NewCache(size_t capacity, double high_pri_pool_ratio = 0.0, - double low_pri_pool_ratio = 1.0, - bool use_adaptive_mutex = kDefaultToAdaptiveMutex) { - DeleteCache(); - cache_ = reinterpret_cast( - port::cacheline_aligned_alloc(sizeof(LRUCacheShard))); - new (cache_) LRUCacheShard(capacity, /*strict_capacity_limit=*/false, - high_pri_pool_ratio, low_pri_pool_ratio, - use_adaptive_mutex, kDontChargeCacheMetadata, - /*max_upper_hash_bits=*/24, - /*allocator*/ nullptr, &eviction_callback_); - } - - void Insert(const std::string& key, - Cache::Priority priority = Cache::Priority::LOW) { - EXPECT_OK(cache_->Insert(key, 0 /*hash*/, nullptr /*value*/, - &kNoopCacheItemHelper, 1 /*charge*/, - nullptr /*handle*/, priority)); - } - - void Insert(char key, Cache::Priority priority = Cache::Priority::LOW) { - Insert(std::string(1, key), priority); - } - - bool Lookup(const std::string& key) { - auto handle = cache_->Lookup(key, 0 /*hash*/, nullptr, nullptr, - Cache::Priority::LOW, nullptr); - if (handle) { - cache_->Release(handle, true /*useful*/, false /*erase*/); - return true; - } - return false; - } - - bool Lookup(char key) { return Lookup(std::string(1, key)); } - - void Erase(const std::string& key) { cache_->Erase(key, 0 /*hash*/); } - - void ValidateLRUList(std::vector keys, - size_t num_high_pri_pool_keys = 0, - size_t num_low_pri_pool_keys = 0, - size_t num_bottom_pri_pool_keys = 0) { - LRUHandle* lru; - LRUHandle* lru_low_pri; - LRUHandle* lru_bottom_pri; - cache_->TEST_GetLRUList(&lru, &lru_low_pri, &lru_bottom_pri); - - LRUHandle* iter = lru; - - bool in_low_pri_pool = false; - bool in_high_pri_pool = false; - - size_t high_pri_pool_keys = 0; - size_t low_pri_pool_keys = 0; - size_t bottom_pri_pool_keys = 0; - - if (iter == lru_bottom_pri) { - in_low_pri_pool = true; - in_high_pri_pool = false; - } - if (iter == lru_low_pri) { - in_low_pri_pool = false; - in_high_pri_pool = true; - } - - for (const auto& key : keys) { - iter = iter->next; - ASSERT_NE(lru, iter); - ASSERT_EQ(key, iter->key().ToString()); - ASSERT_EQ(in_high_pri_pool, iter->InHighPriPool()); - ASSERT_EQ(in_low_pri_pool, iter->InLowPriPool()); - if (in_high_pri_pool) { - ASSERT_FALSE(iter->InLowPriPool()); - high_pri_pool_keys++; - } else if (in_low_pri_pool) { - ASSERT_FALSE(iter->InHighPriPool()); - low_pri_pool_keys++; - } else { - bottom_pri_pool_keys++; - } - if (iter == lru_bottom_pri) { - ASSERT_FALSE(in_low_pri_pool); - ASSERT_FALSE(in_high_pri_pool); - in_low_pri_pool = true; - in_high_pri_pool = false; - } - if (iter == lru_low_pri) { - ASSERT_TRUE(in_low_pri_pool); - ASSERT_FALSE(in_high_pri_pool); - in_low_pri_pool = false; - in_high_pri_pool = true; - } - } - ASSERT_EQ(lru, iter->next); - ASSERT_FALSE(in_low_pri_pool); - ASSERT_TRUE(in_high_pri_pool); - ASSERT_EQ(num_high_pri_pool_keys, high_pri_pool_keys); - ASSERT_EQ(num_low_pri_pool_keys, low_pri_pool_keys); - ASSERT_EQ(num_bottom_pri_pool_keys, bottom_pri_pool_keys); - } - - private: - LRUCacheShard* cache_ = nullptr; - Cache::EvictionCallback eviction_callback_; -}; - -TEST_F(LRUCacheTest, BasicLRU) { - NewCache(5); - for (char ch = 'a'; ch <= 'e'; ch++) { - Insert(ch); - } - ValidateLRUList({"a", "b", "c", "d", "e"}, 0, 5); - for (char ch = 'x'; ch <= 'z'; ch++) { - Insert(ch); - } - ValidateLRUList({"d", "e", "x", "y", "z"}, 0, 5); - ASSERT_FALSE(Lookup("b")); - ValidateLRUList({"d", "e", "x", "y", "z"}, 0, 5); - ASSERT_TRUE(Lookup("e")); - ValidateLRUList({"d", "x", "y", "z", "e"}, 0, 5); - ASSERT_TRUE(Lookup("z")); - ValidateLRUList({"d", "x", "y", "e", "z"}, 0, 5); - Erase("x"); - ValidateLRUList({"d", "y", "e", "z"}, 0, 4); - ASSERT_TRUE(Lookup("d")); - ValidateLRUList({"y", "e", "z", "d"}, 0, 4); - Insert("u"); - ValidateLRUList({"y", "e", "z", "d", "u"}, 0, 5); - Insert("v"); - ValidateLRUList({"e", "z", "d", "u", "v"}, 0, 5); -} - -TEST_F(LRUCacheTest, LowPriorityMidpointInsertion) { - // Allocate 2 cache entries to high-pri pool and 3 to low-pri pool. - NewCache(5, /* high_pri_pool_ratio */ 0.40, /* low_pri_pool_ratio */ 0.60); - - Insert("a", Cache::Priority::LOW); - Insert("b", Cache::Priority::LOW); - Insert("c", Cache::Priority::LOW); - Insert("x", Cache::Priority::HIGH); - Insert("y", Cache::Priority::HIGH); - ValidateLRUList({"a", "b", "c", "x", "y"}, 2, 3); - - // Low-pri entries inserted to the tail of low-pri list (the midpoint). - // After lookup, it will move to the tail of the full list. - Insert("d", Cache::Priority::LOW); - ValidateLRUList({"b", "c", "d", "x", "y"}, 2, 3); - ASSERT_TRUE(Lookup("d")); - ValidateLRUList({"b", "c", "x", "y", "d"}, 2, 3); - - // High-pri entries will be inserted to the tail of full list. - Insert("z", Cache::Priority::HIGH); - ValidateLRUList({"c", "x", "y", "d", "z"}, 2, 3); -} - -TEST_F(LRUCacheTest, BottomPriorityMidpointInsertion) { - // Allocate 2 cache entries to high-pri pool and 2 to low-pri pool. - NewCache(6, /* high_pri_pool_ratio */ 0.35, /* low_pri_pool_ratio */ 0.35); - - Insert("a", Cache::Priority::BOTTOM); - Insert("b", Cache::Priority::BOTTOM); - Insert("i", Cache::Priority::LOW); - Insert("j", Cache::Priority::LOW); - Insert("x", Cache::Priority::HIGH); - Insert("y", Cache::Priority::HIGH); - ValidateLRUList({"a", "b", "i", "j", "x", "y"}, 2, 2, 2); - - // Low-pri entries will be inserted to the tail of low-pri list (the - // midpoint). After lookup, 'k' will move to the tail of the full list, and - // 'x' will spill over to the low-pri pool. - Insert("k", Cache::Priority::LOW); - ValidateLRUList({"b", "i", "j", "k", "x", "y"}, 2, 2, 2); - ASSERT_TRUE(Lookup("k")); - ValidateLRUList({"b", "i", "j", "x", "y", "k"}, 2, 2, 2); - - // High-pri entries will be inserted to the tail of full list. Although y was - // inserted with high priority, it got spilled over to the low-pri pool. As - // a result, j also got spilled over to the bottom-pri pool. - Insert("z", Cache::Priority::HIGH); - ValidateLRUList({"i", "j", "x", "y", "k", "z"}, 2, 2, 2); - Erase("x"); - ValidateLRUList({"i", "j", "y", "k", "z"}, 2, 1, 2); - Erase("y"); - ValidateLRUList({"i", "j", "k", "z"}, 2, 0, 2); - - // Bottom-pri entries will be inserted to the tail of bottom-pri list. - Insert("c", Cache::Priority::BOTTOM); - ValidateLRUList({"i", "j", "c", "k", "z"}, 2, 0, 3); - Insert("d", Cache::Priority::BOTTOM); - ValidateLRUList({"i", "j", "c", "d", "k", "z"}, 2, 0, 4); - Insert("e", Cache::Priority::BOTTOM); - ValidateLRUList({"j", "c", "d", "e", "k", "z"}, 2, 0, 4); - - // Low-pri entries will be inserted to the tail of low-pri list (the - // midpoint). - Insert("l", Cache::Priority::LOW); - ValidateLRUList({"c", "d", "e", "l", "k", "z"}, 2, 1, 3); - Insert("m", Cache::Priority::LOW); - ValidateLRUList({"d", "e", "l", "m", "k", "z"}, 2, 2, 2); - - Erase("k"); - ValidateLRUList({"d", "e", "l", "m", "z"}, 1, 2, 2); - Erase("z"); - ValidateLRUList({"d", "e", "l", "m"}, 0, 2, 2); - - // Bottom-pri entries will be inserted to the tail of bottom-pri list. - Insert("f", Cache::Priority::BOTTOM); - ValidateLRUList({"d", "e", "f", "l", "m"}, 0, 2, 3); - Insert("g", Cache::Priority::BOTTOM); - ValidateLRUList({"d", "e", "f", "g", "l", "m"}, 0, 2, 4); - - // High-pri entries will be inserted to the tail of full list. - Insert("o", Cache::Priority::HIGH); - ValidateLRUList({"e", "f", "g", "l", "m", "o"}, 1, 2, 3); - Insert("p", Cache::Priority::HIGH); - ValidateLRUList({"f", "g", "l", "m", "o", "p"}, 2, 2, 2); -} - -TEST_F(LRUCacheTest, EntriesWithPriority) { - // Allocate 2 cache entries to high-pri pool and 2 to low-pri pool. - NewCache(6, /* high_pri_pool_ratio */ 0.35, /* low_pri_pool_ratio */ 0.35); - - Insert("a", Cache::Priority::LOW); - Insert("b", Cache::Priority::LOW); - ValidateLRUList({"a", "b"}, 0, 2, 0); - // Low-pri entries can overflow to bottom-pri pool. - Insert("c", Cache::Priority::LOW); - ValidateLRUList({"a", "b", "c"}, 0, 2, 1); - - // Bottom-pri entries can take high-pri pool capacity if available - Insert("t", Cache::Priority::LOW); - Insert("u", Cache::Priority::LOW); - ValidateLRUList({"a", "b", "c", "t", "u"}, 0, 2, 3); - Insert("v", Cache::Priority::LOW); - ValidateLRUList({"a", "b", "c", "t", "u", "v"}, 0, 2, 4); - Insert("w", Cache::Priority::LOW); - ValidateLRUList({"b", "c", "t", "u", "v", "w"}, 0, 2, 4); - - Insert("X", Cache::Priority::HIGH); - Insert("Y", Cache::Priority::HIGH); - ValidateLRUList({"t", "u", "v", "w", "X", "Y"}, 2, 2, 2); - - // After lookup, the high-pri entry 'X' got spilled over to the low-pri pool. - // The low-pri entry 'v' got spilled over to the bottom-pri pool. - Insert("Z", Cache::Priority::HIGH); - ValidateLRUList({"u", "v", "w", "X", "Y", "Z"}, 2, 2, 2); - - // Low-pri entries will be inserted to head of low-pri pool. - Insert("a", Cache::Priority::LOW); - ValidateLRUList({"v", "w", "X", "a", "Y", "Z"}, 2, 2, 2); - - // After lookup, the high-pri entry 'Y' got spilled over to the low-pri pool. - // The low-pri entry 'X' got spilled over to the bottom-pri pool. - ASSERT_TRUE(Lookup("v")); - ValidateLRUList({"w", "X", "a", "Y", "Z", "v"}, 2, 2, 2); - - // After lookup, the high-pri entry 'Z' got spilled over to the low-pri pool. - // The low-pri entry 'a' got spilled over to the bottom-pri pool. - ASSERT_TRUE(Lookup("X")); - ValidateLRUList({"w", "a", "Y", "Z", "v", "X"}, 2, 2, 2); - - // After lookup, the low pri entry 'Z' got promoted back to high-pri pool. The - // high-pri entry 'v' got spilled over to the low-pri pool. - ASSERT_TRUE(Lookup("Z")); - ValidateLRUList({"w", "a", "Y", "v", "X", "Z"}, 2, 2, 2); - - Erase("Y"); - ValidateLRUList({"w", "a", "v", "X", "Z"}, 2, 1, 2); - Erase("X"); - ValidateLRUList({"w", "a", "v", "Z"}, 1, 1, 2); - - Insert("d", Cache::Priority::LOW); - Insert("e", Cache::Priority::LOW); - ValidateLRUList({"w", "a", "v", "d", "e", "Z"}, 1, 2, 3); - - Insert("f", Cache::Priority::LOW); - Insert("g", Cache::Priority::LOW); - ValidateLRUList({"v", "d", "e", "f", "g", "Z"}, 1, 2, 3); - ASSERT_TRUE(Lookup("d")); - ValidateLRUList({"v", "e", "f", "g", "Z", "d"}, 2, 2, 2); - - // Erase some entries. - Erase("e"); - Erase("f"); - Erase("Z"); - ValidateLRUList({"v", "g", "d"}, 1, 1, 1); - - // Bottom-pri entries can take low- and high-pri pool capacity if available - Insert("o", Cache::Priority::BOTTOM); - ValidateLRUList({"v", "o", "g", "d"}, 1, 1, 2); - Insert("p", Cache::Priority::BOTTOM); - ValidateLRUList({"v", "o", "p", "g", "d"}, 1, 1, 3); - Insert("q", Cache::Priority::BOTTOM); - ValidateLRUList({"v", "o", "p", "q", "g", "d"}, 1, 1, 4); - - // High-pri entries can overflow to low-pri pool, and bottom-pri entries will - // be evicted. - Insert("x", Cache::Priority::HIGH); - ValidateLRUList({"o", "p", "q", "g", "d", "x"}, 2, 1, 3); - Insert("y", Cache::Priority::HIGH); - ValidateLRUList({"p", "q", "g", "d", "x", "y"}, 2, 2, 2); - Insert("z", Cache::Priority::HIGH); - ValidateLRUList({"q", "g", "d", "x", "y", "z"}, 2, 2, 2); - - // 'g' is bottom-pri before this lookup, it will be inserted to head of - // high-pri pool after lookup. - ASSERT_TRUE(Lookup("g")); - ValidateLRUList({"q", "d", "x", "y", "z", "g"}, 2, 2, 2); - - // High-pri entries will be inserted to head of high-pri pool after lookup. - ASSERT_TRUE(Lookup("z")); - ValidateLRUList({"q", "d", "x", "y", "g", "z"}, 2, 2, 2); - - // Bottom-pri entries will be inserted to head of high-pri pool after lookup. - ASSERT_TRUE(Lookup("d")); - ValidateLRUList({"q", "x", "y", "g", "z", "d"}, 2, 2, 2); - - // Bottom-pri entries will be inserted to the tail of bottom-pri list. - Insert("m", Cache::Priority::BOTTOM); - ValidateLRUList({"x", "m", "y", "g", "z", "d"}, 2, 2, 2); - - // Bottom-pri entries will be inserted to head of high-pri pool after lookup. - ASSERT_TRUE(Lookup("m")); - ValidateLRUList({"x", "y", "g", "z", "d", "m"}, 2, 2, 2); -} - -namespace clock_cache { - -class ClockCacheTest : public testing::Test { - public: - using Shard = HyperClockCache::Shard; - using Table = HyperClockTable; - using HandleImpl = Shard::HandleImpl; - - ClockCacheTest() {} - ~ClockCacheTest() override { DeleteShard(); } - - void DeleteShard() { - if (shard_ != nullptr) { - shard_->~ClockCacheShard(); - port::cacheline_aligned_free(shard_); - shard_ = nullptr; - } - } - - void NewShard(size_t capacity, bool strict_capacity_limit = true) { - DeleteShard(); - shard_ = - reinterpret_cast(port::cacheline_aligned_alloc(sizeof(Shard))); - - Table::Opts opts; - opts.estimated_value_size = 1; - new (shard_) - Shard(capacity, strict_capacity_limit, kDontChargeCacheMetadata, - /*allocator*/ nullptr, &eviction_callback_, opts); - } - - Status Insert(const UniqueId64x2& hashed_key, - Cache::Priority priority = Cache::Priority::LOW) { - return shard_->Insert(TestKey(hashed_key), hashed_key, nullptr /*value*/, - &kNoopCacheItemHelper, 1 /*charge*/, - nullptr /*handle*/, priority); - } - - Status Insert(char key, Cache::Priority priority = Cache::Priority::LOW) { - return Insert(TestHashedKey(key), priority); - } - - Status InsertWithLen(char key, size_t len) { - std::string skey(len, key); - return shard_->Insert(skey, TestHashedKey(key), nullptr /*value*/, - &kNoopCacheItemHelper, 1 /*charge*/, - nullptr /*handle*/, Cache::Priority::LOW); - } - - bool Lookup(const Slice& key, const UniqueId64x2& hashed_key, - bool useful = true) { - auto handle = shard_->Lookup(key, hashed_key); - if (handle) { - shard_->Release(handle, useful, /*erase_if_last_ref=*/false); - return true; - } - return false; - } - - bool Lookup(const UniqueId64x2& hashed_key, bool useful = true) { - return Lookup(TestKey(hashed_key), hashed_key, useful); - } - - bool Lookup(char key, bool useful = true) { - return Lookup(TestHashedKey(key), useful); - } - - void Erase(char key) { - UniqueId64x2 hashed_key = TestHashedKey(key); - shard_->Erase(TestKey(hashed_key), hashed_key); - } - - static inline Slice TestKey(const UniqueId64x2& hashed_key) { - return Slice(reinterpret_cast(&hashed_key), 16U); - } - - static inline UniqueId64x2 TestHashedKey(char key) { - // For testing hash near-collision behavior, put the variance in - // hashed_key in bits that are unlikely to be used as hash bits. - return {(static_cast(key) << 56) + 1234U, 5678U}; - } - - Shard* shard_ = nullptr; - - private: - Cache::EvictionCallback eviction_callback_; -}; - -TEST_F(ClockCacheTest, Misc) { - NewShard(3); - - // Key size stuff - EXPECT_OK(InsertWithLen('a', 16)); - EXPECT_NOK(InsertWithLen('b', 15)); - EXPECT_OK(InsertWithLen('b', 16)); - EXPECT_NOK(InsertWithLen('c', 17)); - EXPECT_NOK(InsertWithLen('d', 1000)); - EXPECT_NOK(InsertWithLen('e', 11)); - EXPECT_NOK(InsertWithLen('f', 0)); - - // Some of this is motivated by code coverage - std::string wrong_size_key(15, 'x'); - EXPECT_FALSE(Lookup(wrong_size_key, TestHashedKey('x'))); - EXPECT_FALSE(shard_->Ref(nullptr)); - EXPECT_FALSE(shard_->Release(nullptr)); - shard_->Erase(wrong_size_key, TestHashedKey('x')); // no-op -} - -TEST_F(ClockCacheTest, Limits) { - constexpr size_t kCapacity = 3; - NewShard(kCapacity, false /*strict_capacity_limit*/); - for (bool strict_capacity_limit : {false, true, false}) { - SCOPED_TRACE("strict_capacity_limit = " + - std::to_string(strict_capacity_limit)); - - // Also tests switching between strict limit and not - shard_->SetStrictCapacityLimit(strict_capacity_limit); - - UniqueId64x2 hkey = TestHashedKey('x'); - - // Single entry charge beyond capacity - { - Status s = shard_->Insert(TestKey(hkey), hkey, nullptr /*value*/, - &kNoopCacheItemHelper, 5 /*charge*/, - nullptr /*handle*/, Cache::Priority::LOW); - if (strict_capacity_limit) { - EXPECT_TRUE(s.IsMemoryLimit()); - } else { - EXPECT_OK(s); - } - } - - // Single entry fills capacity - { - HandleImpl* h; - ASSERT_OK(shard_->Insert(TestKey(hkey), hkey, nullptr /*value*/, - &kNoopCacheItemHelper, 3 /*charge*/, &h, - Cache::Priority::LOW)); - // Try to insert more - Status s = Insert('a'); - if (strict_capacity_limit) { - EXPECT_TRUE(s.IsMemoryLimit()); - } else { - EXPECT_OK(s); - } - // Release entry filling capacity. - // Cover useful = false case. - shard_->Release(h, false /*useful*/, false /*erase_if_last_ref*/); - } - - // Insert more than table size can handle to exceed occupancy limit. - // (Cleverly using mostly zero-charge entries, but some non-zero to - // verify usage tracking on detached entries.) - { - size_t n = shard_->GetTableAddressCount() + 1; - std::unique_ptr ha { new HandleImpl* [n] {} }; - Status s; - for (size_t i = 0; i < n && s.ok(); ++i) { - hkey[1] = i; - s = shard_->Insert(TestKey(hkey), hkey, nullptr /*value*/, - &kNoopCacheItemHelper, - (i + kCapacity < n) ? 0 : 1 /*charge*/, &ha[i], - Cache::Priority::LOW); - if (i == 0) { - EXPECT_OK(s); - } - } - if (strict_capacity_limit) { - EXPECT_TRUE(s.IsMemoryLimit()); - } else { - EXPECT_OK(s); - } - // Same result if not keeping a reference - s = Insert('a'); - if (strict_capacity_limit) { - EXPECT_TRUE(s.IsMemoryLimit()); - } else { - EXPECT_OK(s); - } - - // Regardless, we didn't allow table to actually get full - EXPECT_LT(shard_->GetOccupancyCount(), shard_->GetTableAddressCount()); - - // Release handles - for (size_t i = 0; i < n; ++i) { - if (ha[i]) { - shard_->Release(ha[i]); - } - } - } - } -} - -TEST_F(ClockCacheTest, ClockEvictionTest) { - for (bool strict_capacity_limit : {false, true}) { - SCOPED_TRACE("strict_capacity_limit = " + - std::to_string(strict_capacity_limit)); - - NewShard(6, strict_capacity_limit); - EXPECT_OK(Insert('a', Cache::Priority::BOTTOM)); - EXPECT_OK(Insert('b', Cache::Priority::LOW)); - EXPECT_OK(Insert('c', Cache::Priority::HIGH)); - EXPECT_OK(Insert('d', Cache::Priority::BOTTOM)); - EXPECT_OK(Insert('e', Cache::Priority::LOW)); - EXPECT_OK(Insert('f', Cache::Priority::HIGH)); - - EXPECT_TRUE(Lookup('a', /*use*/ false)); - EXPECT_TRUE(Lookup('b', /*use*/ false)); - EXPECT_TRUE(Lookup('c', /*use*/ false)); - EXPECT_TRUE(Lookup('d', /*use*/ false)); - EXPECT_TRUE(Lookup('e', /*use*/ false)); - EXPECT_TRUE(Lookup('f', /*use*/ false)); - - // Ensure bottom are evicted first, even if new entries are low - EXPECT_OK(Insert('g', Cache::Priority::LOW)); - EXPECT_OK(Insert('h', Cache::Priority::LOW)); - - EXPECT_FALSE(Lookup('a', /*use*/ false)); - EXPECT_TRUE(Lookup('b', /*use*/ false)); - EXPECT_TRUE(Lookup('c', /*use*/ false)); - EXPECT_FALSE(Lookup('d', /*use*/ false)); - EXPECT_TRUE(Lookup('e', /*use*/ false)); - EXPECT_TRUE(Lookup('f', /*use*/ false)); - // Mark g & h useful - EXPECT_TRUE(Lookup('g', /*use*/ true)); - EXPECT_TRUE(Lookup('h', /*use*/ true)); - - // Then old LOW entries - EXPECT_OK(Insert('i', Cache::Priority::LOW)); - EXPECT_OK(Insert('j', Cache::Priority::LOW)); - - EXPECT_FALSE(Lookup('b', /*use*/ false)); - EXPECT_TRUE(Lookup('c', /*use*/ false)); - EXPECT_FALSE(Lookup('e', /*use*/ false)); - EXPECT_TRUE(Lookup('f', /*use*/ false)); - // Mark g & h useful once again - EXPECT_TRUE(Lookup('g', /*use*/ true)); - EXPECT_TRUE(Lookup('h', /*use*/ true)); - EXPECT_TRUE(Lookup('i', /*use*/ false)); - EXPECT_TRUE(Lookup('j', /*use*/ false)); - - // Then old HIGH entries - EXPECT_OK(Insert('k', Cache::Priority::LOW)); - EXPECT_OK(Insert('l', Cache::Priority::LOW)); - - EXPECT_FALSE(Lookup('c', /*use*/ false)); - EXPECT_FALSE(Lookup('f', /*use*/ false)); - EXPECT_TRUE(Lookup('g', /*use*/ false)); - EXPECT_TRUE(Lookup('h', /*use*/ false)); - EXPECT_TRUE(Lookup('i', /*use*/ false)); - EXPECT_TRUE(Lookup('j', /*use*/ false)); - EXPECT_TRUE(Lookup('k', /*use*/ false)); - EXPECT_TRUE(Lookup('l', /*use*/ false)); - - // Then the (roughly) least recently useful - EXPECT_OK(Insert('m', Cache::Priority::HIGH)); - EXPECT_OK(Insert('n', Cache::Priority::HIGH)); - - EXPECT_TRUE(Lookup('g', /*use*/ false)); - EXPECT_TRUE(Lookup('h', /*use*/ false)); - EXPECT_FALSE(Lookup('i', /*use*/ false)); - EXPECT_FALSE(Lookup('j', /*use*/ false)); - EXPECT_TRUE(Lookup('k', /*use*/ false)); - EXPECT_TRUE(Lookup('l', /*use*/ false)); - - // Now try changing capacity down - shard_->SetCapacity(4); - // Insert to ensure evictions happen - EXPECT_OK(Insert('o', Cache::Priority::LOW)); - EXPECT_OK(Insert('p', Cache::Priority::LOW)); - - EXPECT_FALSE(Lookup('g', /*use*/ false)); - EXPECT_FALSE(Lookup('h', /*use*/ false)); - EXPECT_FALSE(Lookup('k', /*use*/ false)); - EXPECT_FALSE(Lookup('l', /*use*/ false)); - EXPECT_TRUE(Lookup('m', /*use*/ false)); - EXPECT_TRUE(Lookup('n', /*use*/ false)); - EXPECT_TRUE(Lookup('o', /*use*/ false)); - EXPECT_TRUE(Lookup('p', /*use*/ false)); - - // Now try changing capacity up - EXPECT_TRUE(Lookup('m', /*use*/ true)); - EXPECT_TRUE(Lookup('n', /*use*/ true)); - shard_->SetCapacity(6); - EXPECT_OK(Insert('q', Cache::Priority::HIGH)); - EXPECT_OK(Insert('r', Cache::Priority::HIGH)); - EXPECT_OK(Insert('s', Cache::Priority::HIGH)); - EXPECT_OK(Insert('t', Cache::Priority::HIGH)); - - EXPECT_FALSE(Lookup('o', /*use*/ false)); - EXPECT_FALSE(Lookup('p', /*use*/ false)); - EXPECT_TRUE(Lookup('m', /*use*/ false)); - EXPECT_TRUE(Lookup('n', /*use*/ false)); - EXPECT_TRUE(Lookup('q', /*use*/ false)); - EXPECT_TRUE(Lookup('r', /*use*/ false)); - EXPECT_TRUE(Lookup('s', /*use*/ false)); - EXPECT_TRUE(Lookup('t', /*use*/ false)); - } -} - -namespace { -struct DeleteCounter { - int deleted = 0; -}; -const Cache::CacheItemHelper kDeleteCounterHelper{ - CacheEntryRole::kMisc, - [](Cache::ObjectPtr value, MemoryAllocator* /*alloc*/) { - static_cast(value)->deleted += 1; - }}; -} // namespace - -// Testing calls to CorrectNearOverflow in Release -TEST_F(ClockCacheTest, ClockCounterOverflowTest) { - NewShard(6, /*strict_capacity_limit*/ false); - HandleImpl* h; - DeleteCounter val; - UniqueId64x2 hkey = TestHashedKey('x'); - ASSERT_OK(shard_->Insert(TestKey(hkey), hkey, &val, &kDeleteCounterHelper, 1, - &h, Cache::Priority::HIGH)); - - // Some large number outstanding - shard_->TEST_RefN(h, 123456789); - // Simulate many lookup/ref + release, plenty to overflow counters - for (int i = 0; i < 10000; ++i) { - shard_->TEST_RefN(h, 1234567); - shard_->TEST_ReleaseN(h, 1234567); - } - // Mark it invisible (to reach a different CorrectNearOverflow() in Release) - shard_->Erase(TestKey(hkey), hkey); - // Simulate many more lookup/ref + release (one-by-one would be too - // expensive for unit test) - for (int i = 0; i < 10000; ++i) { - shard_->TEST_RefN(h, 1234567); - shard_->TEST_ReleaseN(h, 1234567); - } - // Free all but last 1 - shard_->TEST_ReleaseN(h, 123456789); - // Still alive - ASSERT_EQ(val.deleted, 0); - // Free last ref, which will finalize erasure - shard_->Release(h); - // Deleted - ASSERT_EQ(val.deleted, 1); -} - -// This test is mostly to exercise some corner case logic, by forcing two -// keys to have the same hash, and more -TEST_F(ClockCacheTest, CollidingInsertEraseTest) { - NewShard(6, /*strict_capacity_limit*/ false); - DeleteCounter val; - UniqueId64x2 hkey1 = TestHashedKey('x'); - Slice key1 = TestKey(hkey1); - UniqueId64x2 hkey2 = TestHashedKey('y'); - Slice key2 = TestKey(hkey2); - UniqueId64x2 hkey3 = TestHashedKey('z'); - Slice key3 = TestKey(hkey3); - HandleImpl* h1; - ASSERT_OK(shard_->Insert(key1, hkey1, &val, &kDeleteCounterHelper, 1, &h1, - Cache::Priority::HIGH)); - HandleImpl* h2; - ASSERT_OK(shard_->Insert(key2, hkey2, &val, &kDeleteCounterHelper, 1, &h2, - Cache::Priority::HIGH)); - HandleImpl* h3; - ASSERT_OK(shard_->Insert(key3, hkey3, &val, &kDeleteCounterHelper, 1, &h3, - Cache::Priority::HIGH)); - - // Can repeatedly lookup+release despite the hash collision - HandleImpl* tmp_h; - for (bool erase_if_last_ref : {true, false}) { // but not last ref - tmp_h = shard_->Lookup(key1, hkey1); - ASSERT_EQ(h1, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); - - tmp_h = shard_->Lookup(key2, hkey2); - ASSERT_EQ(h2, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); - - tmp_h = shard_->Lookup(key3, hkey3); - ASSERT_EQ(h3, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); - } - - // Make h1 invisible - shard_->Erase(key1, hkey1); - // Redundant erase - shard_->Erase(key1, hkey1); - - // All still alive - ASSERT_EQ(val.deleted, 0); - - // Invisible to Lookup - tmp_h = shard_->Lookup(key1, hkey1); - ASSERT_EQ(nullptr, tmp_h); - - // Can still find h2, h3 - for (bool erase_if_last_ref : {true, false}) { // but not last ref - tmp_h = shard_->Lookup(key2, hkey2); - ASSERT_EQ(h2, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); - - tmp_h = shard_->Lookup(key3, hkey3); - ASSERT_EQ(h3, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); - } - - // Also Insert with invisible entry there - ASSERT_OK(shard_->Insert(key1, hkey1, &val, &kDeleteCounterHelper, 1, nullptr, - Cache::Priority::HIGH)); - tmp_h = shard_->Lookup(key1, hkey1); - // Found but distinct handle - ASSERT_NE(nullptr, tmp_h); - ASSERT_NE(h1, tmp_h); - ASSERT_TRUE(shard_->Release(tmp_h, /*erase_if_last_ref*/ true)); - - // tmp_h deleted - ASSERT_EQ(val.deleted--, 1); - - // Release last ref on h1 (already invisible) - ASSERT_TRUE(shard_->Release(h1, /*erase_if_last_ref*/ false)); - - // h1 deleted - ASSERT_EQ(val.deleted--, 1); - h1 = nullptr; - - // Can still find h2, h3 - for (bool erase_if_last_ref : {true, false}) { // but not last ref - tmp_h = shard_->Lookup(key2, hkey2); - ASSERT_EQ(h2, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); - - tmp_h = shard_->Lookup(key3, hkey3); - ASSERT_EQ(h3, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); - } - - // Release last ref on h2 - ASSERT_FALSE(shard_->Release(h2, /*erase_if_last_ref*/ false)); - - // h2 still not deleted (unreferenced in cache) - ASSERT_EQ(val.deleted, 0); - - // Can still find it - tmp_h = shard_->Lookup(key2, hkey2); - ASSERT_EQ(h2, tmp_h); - - // Release last ref on h2, with erase - ASSERT_TRUE(shard_->Release(h2, /*erase_if_last_ref*/ true)); - - // h2 deleted - ASSERT_EQ(val.deleted--, 1); - tmp_h = shard_->Lookup(key2, hkey2); - ASSERT_EQ(nullptr, tmp_h); - - // Can still find h3 - for (bool erase_if_last_ref : {true, false}) { // but not last ref - tmp_h = shard_->Lookup(key3, hkey3); - ASSERT_EQ(h3, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); - } - - // Release last ref on h3, without erase - ASSERT_FALSE(shard_->Release(h3, /*erase_if_last_ref*/ false)); - - // h3 still not deleted (unreferenced in cache) - ASSERT_EQ(val.deleted, 0); - - // Explicit erase - shard_->Erase(key3, hkey3); - - // h3 deleted - ASSERT_EQ(val.deleted--, 1); - tmp_h = shard_->Lookup(key3, hkey3); - ASSERT_EQ(nullptr, tmp_h); -} - -// This uses the public API to effectively test CalcHashBits etc. -TEST_F(ClockCacheTest, TableSizesTest) { - for (size_t est_val_size : {1U, 5U, 123U, 2345U, 345678U}) { - SCOPED_TRACE("est_val_size = " + std::to_string(est_val_size)); - for (double est_count : {1.1, 2.2, 511.9, 512.1, 2345.0}) { - SCOPED_TRACE("est_count = " + std::to_string(est_count)); - size_t capacity = static_cast(est_val_size * est_count); - // kDontChargeCacheMetadata - auto cache = HyperClockCacheOptions( - capacity, est_val_size, /*num shard_bits*/ -1, - /*strict_capacity_limit*/ false, - /*memory_allocator*/ nullptr, kDontChargeCacheMetadata) - .MakeSharedCache(); - // Table sizes are currently only powers of two - EXPECT_GE(cache->GetTableAddressCount(), est_count / kLoadFactor); - EXPECT_LE(cache->GetTableAddressCount(), est_count / kLoadFactor * 2.0); - EXPECT_EQ(cache->GetUsage(), 0); - - // kFullChargeMetaData - // Because table sizes are currently only powers of two, sizes get - // really weird when metadata is a huge portion of capacity. For example, - // doubling the table size could cut by 90% the space available to - // values. Therefore, we omit those weird cases for now. - if (est_val_size >= 512) { - cache = HyperClockCacheOptions( - capacity, est_val_size, /*num shard_bits*/ -1, - /*strict_capacity_limit*/ false, - /*memory_allocator*/ nullptr, kFullChargeCacheMetadata) - .MakeSharedCache(); - double est_count_after_meta = - (capacity - cache->GetUsage()) * 1.0 / est_val_size; - EXPECT_GE(cache->GetTableAddressCount(), - est_count_after_meta / kLoadFactor); - EXPECT_LE(cache->GetTableAddressCount(), - est_count_after_meta / kLoadFactor * 2.0); - } - } - } -} - -} // namespace clock_cache - -class TestSecondaryCache : public SecondaryCache { - public: - // Specifies what action to take on a lookup for a particular key - enum ResultType { - SUCCESS, - // Fail lookup immediately - FAIL, - // Defer the result. It will returned after Wait/WaitAll is called - DEFER, - // Defer the result and eventually return failure - DEFER_AND_FAIL - }; - - using ResultMap = std::unordered_map; - - explicit TestSecondaryCache(size_t capacity) - : cache_(NewLRUCache(capacity, 0, false, 0.5 /* high_pri_pool_ratio */, - nullptr, kDefaultToAdaptiveMutex, - kDontChargeCacheMetadata)), - num_inserts_(0), - num_lookups_(0), - inject_failure_(false) {} - - const char* Name() const override { return "TestSecondaryCache"; } - - void InjectFailure() { inject_failure_ = true; } - - void ResetInjectFailure() { inject_failure_ = false; } - - Status Insert(const Slice& key, Cache::ObjectPtr value, - const Cache::CacheItemHelper* helper) override { - if (inject_failure_) { - return Status::Corruption("Insertion Data Corrupted"); - } - CheckCacheKeyCommonPrefix(key); - size_t size; - char* buf; - Status s; - - num_inserts_++; - size = (*helper->size_cb)(value); - buf = new char[size + sizeof(uint64_t)]; - EncodeFixed64(buf, size); - s = (*helper->saveto_cb)(value, 0, size, buf + sizeof(uint64_t)); - if (!s.ok()) { - delete[] buf; - return s; - } - return cache_.Insert(key, buf, size); - } - - std::unique_ptr Lookup( - const Slice& key, const Cache::CacheItemHelper* helper, - Cache::CreateContext* create_context, bool /*wait*/, - bool /*advise_erase*/, bool& kept_in_sec_cache) override { - std::string key_str = key.ToString(); - TEST_SYNC_POINT_CALLBACK("TestSecondaryCache::Lookup", &key_str); - - std::unique_ptr secondary_handle; - kept_in_sec_cache = false; - ResultType type = ResultType::SUCCESS; - auto iter = result_map_.find(key.ToString()); - if (iter != result_map_.end()) { - type = iter->second; - } - if (type == ResultType::FAIL) { - return secondary_handle; - } - - TypedHandle* handle = cache_.Lookup(key); - num_lookups_++; - if (handle) { - Cache::ObjectPtr value = nullptr; - size_t charge = 0; - Status s; - if (type != ResultType::DEFER_AND_FAIL) { - char* ptr = cache_.Value(handle); - size_t size = DecodeFixed64(ptr); - ptr += sizeof(uint64_t); - s = helper->create_cb(Slice(ptr, size), create_context, - /*alloc*/ nullptr, &value, &charge); - } - if (s.ok()) { - secondary_handle.reset(new TestSecondaryCacheResultHandle( - cache_.get(), handle, value, charge, type)); - kept_in_sec_cache = true; - } else { - cache_.Release(handle); - } - } - return secondary_handle; - } - - bool SupportForceErase() const override { return false; } - - void Erase(const Slice& /*key*/) override {} - - void WaitAll(std::vector handles) override { - for (SecondaryCacheResultHandle* handle : handles) { - TestSecondaryCacheResultHandle* sec_handle = - static_cast(handle); - sec_handle->SetReady(); - } - } - - std::string GetPrintableOptions() const override { return ""; } - - void SetResultMap(ResultMap&& map) { result_map_ = std::move(map); } - - uint32_t num_inserts() { return num_inserts_; } - - uint32_t num_lookups() { return num_lookups_; } - - void CheckCacheKeyCommonPrefix(const Slice& key) { - Slice current_prefix(key.data(), OffsetableCacheKey::kCommonPrefixSize); - if (ckey_prefix_.empty()) { - ckey_prefix_ = current_prefix.ToString(); - } else { - EXPECT_EQ(ckey_prefix_, current_prefix.ToString()); - } - } - - private: - class TestSecondaryCacheResultHandle : public SecondaryCacheResultHandle { - public: - TestSecondaryCacheResultHandle(Cache* cache, Cache::Handle* handle, - Cache::ObjectPtr value, size_t size, - ResultType type) - : cache_(cache), - handle_(handle), - value_(value), - size_(size), - is_ready_(true) { - if (type != ResultType::SUCCESS) { - is_ready_ = false; - } - } - - ~TestSecondaryCacheResultHandle() override { cache_->Release(handle_); } - - bool IsReady() override { return is_ready_; } - - void Wait() override {} - - Cache::ObjectPtr Value() override { - assert(is_ready_); - return value_; - } - - size_t Size() override { return Value() ? size_ : 0; } - - void SetReady() { is_ready_ = true; } - - private: - Cache* cache_; - Cache::Handle* handle_; - Cache::ObjectPtr value_; - size_t size_; - bool is_ready_; - }; - - using SharedCache = - BasicTypedSharedCacheInterface; - using TypedHandle = SharedCache::TypedHandle; - SharedCache cache_; - uint32_t num_inserts_; - uint32_t num_lookups_; - bool inject_failure_; - std::string ckey_prefix_; - ResultMap result_map_; -}; - -using secondary_cache_test_util::GetTestingCacheTypes; -using secondary_cache_test_util::WithCacheTypeParam; - -class BasicSecondaryCacheTest : public testing::Test, - public WithCacheTypeParam {}; - -INSTANTIATE_TEST_CASE_P(BasicSecondaryCacheTest, BasicSecondaryCacheTest, - GetTestingCacheTypes()); - -class DBSecondaryCacheTest : public DBTestBase, public WithCacheTypeParam { - public: - DBSecondaryCacheTest() - : DBTestBase("db_secondary_cache_test", /*env_do_fsync=*/true) { - fault_fs_.reset(new FaultInjectionTestFS(env_->GetFileSystem())); - fault_env_.reset(new CompositeEnvWrapper(env_, fault_fs_)); - } - - std::shared_ptr fault_fs_; - std::unique_ptr fault_env_; -}; - -INSTANTIATE_TEST_CASE_P(DBSecondaryCacheTest, DBSecondaryCacheTest, - GetTestingCacheTypes()); - -TEST_P(BasicSecondaryCacheTest, BasicTest) { - std::shared_ptr secondary_cache = - std::make_shared(4096); - std::shared_ptr cache = - NewCache(1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - std::shared_ptr stats = CreateDBStatistics(); - CacheKey k1 = CacheKey::CreateUniqueForCacheLifetime(cache.get()); - CacheKey k2 = CacheKey::CreateUniqueForCacheLifetime(cache.get()); - CacheKey k3 = CacheKey::CreateUniqueForCacheLifetime(cache.get()); - - Random rnd(301); - // Start with warming k3 - std::string str3 = rnd.RandomString(1021); - ASSERT_OK(secondary_cache->InsertSaved(k3.AsSlice(), str3)); - - std::string str1 = rnd.RandomString(1021); - TestItem* item1 = new TestItem(str1.data(), str1.length()); - ASSERT_OK(cache->Insert(k1.AsSlice(), item1, GetHelper(), str1.length())); - std::string str2 = rnd.RandomString(1021); - TestItem* item2 = new TestItem(str2.data(), str2.length()); - // k1 should be demoted to NVM - ASSERT_OK(cache->Insert(k2.AsSlice(), item2, GetHelper(), str2.length())); - - get_perf_context()->Reset(); - Cache::Handle* handle; - handle = cache->Lookup(k2.AsSlice(), GetHelper(), - /*context*/ this, Cache::Priority::LOW, stats.get()); - ASSERT_NE(handle, nullptr); - ASSERT_EQ(static_cast(cache->Value(handle))->Size(), str2.size()); - cache->Release(handle); - - // This lookup should promote k1 and demote k2 - handle = cache->Lookup(k1.AsSlice(), GetHelper(), - /*context*/ this, Cache::Priority::LOW, stats.get()); - ASSERT_NE(handle, nullptr); - ASSERT_EQ(static_cast(cache->Value(handle))->Size(), str1.size()); - cache->Release(handle); - - // This lookup should promote k3 and demote k1 - handle = cache->Lookup(k3.AsSlice(), GetHelper(), - /*context*/ this, Cache::Priority::LOW, stats.get()); - ASSERT_NE(handle, nullptr); - ASSERT_EQ(static_cast(cache->Value(handle))->Size(), str3.size()); - cache->Release(handle); - - ASSERT_EQ(secondary_cache->num_inserts(), 3u); - ASSERT_EQ(secondary_cache->num_lookups(), 2u); - ASSERT_EQ(stats->getTickerCount(SECONDARY_CACHE_HITS), - secondary_cache->num_lookups()); - PerfContext perf_ctx = *get_perf_context(); - ASSERT_EQ(perf_ctx.secondary_cache_hit_count, secondary_cache->num_lookups()); - - cache.reset(); - secondary_cache.reset(); -} - -TEST_P(BasicSecondaryCacheTest, StatsTest) { - std::shared_ptr secondary_cache = - std::make_shared(4096); - std::shared_ptr cache = - NewCache(1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - std::shared_ptr stats = CreateDBStatistics(); - CacheKey k1 = CacheKey::CreateUniqueForCacheLifetime(cache.get()); - CacheKey k2 = CacheKey::CreateUniqueForCacheLifetime(cache.get()); - CacheKey k3 = CacheKey::CreateUniqueForCacheLifetime(cache.get()); - - Random rnd(301); - // Start with warming secondary cache - std::string str1 = rnd.RandomString(1020); - std::string str2 = rnd.RandomString(1020); - std::string str3 = rnd.RandomString(1020); - ASSERT_OK(secondary_cache->InsertSaved(k1.AsSlice(), str1)); - ASSERT_OK(secondary_cache->InsertSaved(k2.AsSlice(), str2)); - ASSERT_OK(secondary_cache->InsertSaved(k3.AsSlice(), str3)); - - get_perf_context()->Reset(); - Cache::Handle* handle; - handle = cache->Lookup(k1.AsSlice(), GetHelper(CacheEntryRole::kFilterBlock), - /*context*/ this, Cache::Priority::LOW, stats.get()); - ASSERT_NE(handle, nullptr); - ASSERT_EQ(static_cast(cache->Value(handle))->Size(), str1.size()); - cache->Release(handle); - - handle = cache->Lookup(k2.AsSlice(), GetHelper(CacheEntryRole::kIndexBlock), - /*context*/ this, Cache::Priority::LOW, stats.get()); - ASSERT_NE(handle, nullptr); - ASSERT_EQ(static_cast(cache->Value(handle))->Size(), str2.size()); - cache->Release(handle); - - handle = cache->Lookup(k3.AsSlice(), GetHelper(CacheEntryRole::kDataBlock), - /*context*/ this, Cache::Priority::LOW, stats.get()); - ASSERT_NE(handle, nullptr); - ASSERT_EQ(static_cast(cache->Value(handle))->Size(), str3.size()); - cache->Release(handle); - - ASSERT_EQ(secondary_cache->num_inserts(), 3u); - ASSERT_EQ(secondary_cache->num_lookups(), 3u); - ASSERT_EQ(stats->getTickerCount(SECONDARY_CACHE_HITS), - secondary_cache->num_lookups()); - ASSERT_EQ(stats->getTickerCount(SECONDARY_CACHE_FILTER_HITS), 1); - ASSERT_EQ(stats->getTickerCount(SECONDARY_CACHE_INDEX_HITS), 1); - ASSERT_EQ(stats->getTickerCount(SECONDARY_CACHE_DATA_HITS), 1); - PerfContext perf_ctx = *get_perf_context(); - ASSERT_EQ(perf_ctx.secondary_cache_hit_count, secondary_cache->num_lookups()); - - cache.reset(); - secondary_cache.reset(); -} - -TEST_P(BasicSecondaryCacheTest, BasicFailTest) { - std::shared_ptr secondary_cache = - std::make_shared(2048); - std::shared_ptr cache = - NewCache(1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - CacheKey k1 = CacheKey::CreateUniqueForCacheLifetime(cache.get()); - CacheKey k2 = CacheKey::CreateUniqueForCacheLifetime(cache.get()); - - Random rnd(301); - std::string str1 = rnd.RandomString(1020); - auto item1 = std::make_unique(str1.data(), str1.length()); - // NOTE: changed to assert helper != nullptr for efficiency / code size - // ASSERT_TRUE(cache->Insert(k1.AsSlice(), item1.get(), nullptr, - // str1.length()).IsInvalidArgument()); - ASSERT_OK( - cache->Insert(k1.AsSlice(), item1.get(), GetHelper(), str1.length())); - item1.release(); // Appease clang-analyze "potential memory leak" - - Cache::Handle* handle; - handle = cache->Lookup(k2.AsSlice(), nullptr, /*context*/ this, - Cache::Priority::LOW); - ASSERT_EQ(handle, nullptr); - - handle = cache->Lookup(k2.AsSlice(), GetHelper(), - /*context*/ this, Cache::Priority::LOW); - ASSERT_EQ(handle, nullptr); - - Cache::AsyncLookupHandle async_handle; - async_handle.key = k2.AsSlice(); - async_handle.helper = GetHelper(); - async_handle.create_context = this; - async_handle.priority = Cache::Priority::LOW; - cache->StartAsyncLookup(async_handle); - cache->Wait(async_handle); - handle = async_handle.Result(); - ASSERT_EQ(handle, nullptr); - - cache.reset(); - secondary_cache.reset(); -} - -TEST_P(BasicSecondaryCacheTest, SaveFailTest) { - std::shared_ptr secondary_cache = - std::make_shared(2048); - std::shared_ptr cache = - NewCache(1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - CacheKey k1 = CacheKey::CreateUniqueForCacheLifetime(cache.get()); - CacheKey k2 = CacheKey::CreateUniqueForCacheLifetime(cache.get()); - - Random rnd(301); - std::string str1 = rnd.RandomString(1020); - TestItem* item1 = new TestItem(str1.data(), str1.length()); - ASSERT_OK(cache->Insert(k1.AsSlice(), item1, GetHelperFail(), str1.length())); - std::string str2 = rnd.RandomString(1020); - TestItem* item2 = new TestItem(str2.data(), str2.length()); - // k1 should be demoted to NVM - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_OK(cache->Insert(k2.AsSlice(), item2, GetHelperFail(), str2.length())); - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - - Cache::Handle* handle; - handle = cache->Lookup(k2.AsSlice(), GetHelperFail(), - /*context*/ this, Cache::Priority::LOW); - ASSERT_NE(handle, nullptr); - cache->Release(handle); - // This lookup should fail, since k1 demotion would have failed - handle = cache->Lookup(k1.AsSlice(), GetHelperFail(), - /*context*/ this, Cache::Priority::LOW); - ASSERT_EQ(handle, nullptr); - // Since k1 didn't get promoted, k2 should still be in cache - handle = cache->Lookup(k2.AsSlice(), GetHelperFail(), - /*context*/ this, Cache::Priority::LOW); - ASSERT_NE(handle, nullptr); - cache->Release(handle); - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 1u); - - cache.reset(); - secondary_cache.reset(); -} - -TEST_P(BasicSecondaryCacheTest, CreateFailTest) { - std::shared_ptr secondary_cache = - std::make_shared(2048); - std::shared_ptr cache = - NewCache(1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - CacheKey k1 = CacheKey::CreateUniqueForCacheLifetime(cache.get()); - CacheKey k2 = CacheKey::CreateUniqueForCacheLifetime(cache.get()); - - Random rnd(301); - std::string str1 = rnd.RandomString(1020); - TestItem* item1 = new TestItem(str1.data(), str1.length()); - ASSERT_OK(cache->Insert(k1.AsSlice(), item1, GetHelper(), str1.length())); - std::string str2 = rnd.RandomString(1020); - TestItem* item2 = new TestItem(str2.data(), str2.length()); - // k1 should be demoted to NVM - ASSERT_OK(cache->Insert(k2.AsSlice(), item2, GetHelper(), str2.length())); - - Cache::Handle* handle; - SetFailCreate(true); - handle = cache->Lookup(k2.AsSlice(), GetHelper(), - /*context*/ this, Cache::Priority::LOW); - ASSERT_NE(handle, nullptr); - cache->Release(handle); - // This lookup should fail, since k1 creation would have failed - handle = cache->Lookup(k1.AsSlice(), GetHelper(), - /*context*/ this, Cache::Priority::LOW); - ASSERT_EQ(handle, nullptr); - // Since k1 didn't get promoted, k2 should still be in cache - handle = cache->Lookup(k2.AsSlice(), GetHelper(), - /*context*/ this, Cache::Priority::LOW); - ASSERT_NE(handle, nullptr); - cache->Release(handle); - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 1u); - - cache.reset(); - secondary_cache.reset(); -} - -TEST_P(BasicSecondaryCacheTest, FullCapacityTest) { - for (bool strict_capacity_limit : {false, true}) { - std::shared_ptr secondary_cache = - std::make_shared(2048); - std::shared_ptr cache = - NewCache(1024 /* capacity */, 0 /* num_shard_bits */, - strict_capacity_limit, secondary_cache); - CacheKey k1 = CacheKey::CreateUniqueForCacheLifetime(cache.get()); - CacheKey k2 = CacheKey::CreateUniqueForCacheLifetime(cache.get()); - - Random rnd(301); - std::string str1 = rnd.RandomString(1020); - TestItem* item1 = new TestItem(str1.data(), str1.length()); - ASSERT_OK(cache->Insert(k1.AsSlice(), item1, GetHelper(), str1.length())); - std::string str2 = rnd.RandomString(1020); - TestItem* item2 = new TestItem(str2.data(), str2.length()); - // k1 should be demoted to NVM - ASSERT_OK(cache->Insert(k2.AsSlice(), item2, GetHelper(), str2.length())); - - Cache::Handle* handle2; - handle2 = cache->Lookup(k2.AsSlice(), GetHelper(), - /*context*/ this, Cache::Priority::LOW); - ASSERT_NE(handle2, nullptr); - // k1 lookup fails without secondary cache support - Cache::Handle* handle1; - handle1 = cache->Lookup( - k1.AsSlice(), - GetHelper(CacheEntryRole::kDataBlock, /*secondary_compatible=*/false), - /*context*/ this, Cache::Priority::LOW); - ASSERT_EQ(handle1, nullptr); - - // k1 promotion can fail with strict_capacit_limit=true, but Lookup still - // succeeds using a standalone handle - handle1 = cache->Lookup(k1.AsSlice(), GetHelper(), - /*context*/ this, Cache::Priority::LOW); - ASSERT_NE(handle1, nullptr); - - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 1u); - - // Releasing k2's handle first, k2 is evicted from primary iff k1 promotion - // was charged to the cache (except HCC doesn't erase in Release() over - // capacity) - // FIXME: Insert to secondary from Release disabled - cache->Release(handle2); - cache->Release(handle1); - handle2 = cache->Lookup( - k2.AsSlice(), - GetHelper(CacheEntryRole::kDataBlock, /*secondary_compatible=*/false), - /*context*/ this, Cache::Priority::LOW); - if (strict_capacity_limit || GetParam() == kHyperClock) { - ASSERT_NE(handle2, nullptr); - cache->Release(handle2); - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - } else { - ASSERT_EQ(handle2, nullptr); - // FIXME: Insert to secondary from Release disabled - // ASSERT_EQ(secondary_cache->num_inserts(), 2u); - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - } - - cache.reset(); - secondary_cache.reset(); - } -} - -// In this test, the block cache size is set to 4096, after insert 6 KV-pairs -// and flush, there are 5 blocks in this SST file, 2 data blocks and 3 meta -// blocks. block_1 size is 4096 and block_2 size is 2056. The total size -// of the meta blocks are about 900 to 1000. Therefore, in any situation, -// if we try to insert block_1 to the block cache, it will always fails. Only -// block_2 will be successfully inserted into the block cache. -// CORRECTION: this is not quite right. block_1 can be inserted into the block -// cache because strict_capacity_limit=false, but it is removed from the cache -// in Release() because of being over-capacity, without demoting to secondary -// cache. HyperClockCache doesn't check capacity on release (for efficiency) -// so can demote the over-capacity item to secondary cache. Also, we intend to -// add support for demotion in Release, but that currently causes too much -// unit test churn. -TEST_P(DBSecondaryCacheTest, TestSecondaryCacheCorrectness1) { - if (GetParam() == kHyperClock) { - // See CORRECTION above - ROCKSDB_GTEST_BYPASS("Test depends on LRUCache-specific behaviors"); - return; - } - std::shared_ptr secondary_cache( - new TestSecondaryCache(2048 * 1024)); - std::shared_ptr cache = - NewCache(4 * 1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - BlockBasedTableOptions table_options; - table_options.block_cache = cache; - table_options.block_size = 4 * 1024; - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.env = fault_env_.get(); - fault_fs_->SetFailGetUniqueId(true); - - // Set the file paranoid check, so after flush, the file will be read - // all the blocks will be accessed. - options.paranoid_file_checks = true; - DestroyAndReopen(options); - Random rnd(301); - const int N = 6; - for (int i = 0; i < N; i++) { - std::string p_v = rnd.RandomString(1007); - ASSERT_OK(Put(Key(i), p_v)); - } - - ASSERT_OK(Flush()); - // After Flush is successful, RocksDB will do the paranoid check for the new - // SST file. Meta blocks are always cached in the block cache and they - // will not be evicted. When block_2 is cache miss and read out, it is - // inserted to the block cache. Note that, block_1 is never successfully - // inserted to the block cache. Here are 2 lookups in the secondary cache - // for block_1 and block_2 - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 2u); - - Compact("a", "z"); - // Compaction will create the iterator to scan the whole file. So all the - // blocks are needed. Meta blocks are always cached. When block_1 is read - // out, block_2 is evicted from block cache and inserted to secondary - // cache. - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 3u); - - std::string v = Get(Key(0)); - ASSERT_EQ(1007, v.size()); - // The first data block is not in the cache, similarly, trigger the block - // cache Lookup and secondary cache lookup for block_1. But block_1 will not - // be inserted successfully due to the size. Currently, cache only has - // the meta blocks. - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 4u); - - v = Get(Key(5)); - ASSERT_EQ(1007, v.size()); - // The second data block is not in the cache, similarly, trigger the block - // cache Lookup and secondary cache lookup for block_2 and block_2 is found - // in the secondary cache. Now block cache has block_2 - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 5u); - - v = Get(Key(5)); - ASSERT_EQ(1007, v.size()); - // block_2 is in the block cache. There is a block cache hit. No need to - // lookup or insert the secondary cache. - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 5u); - - v = Get(Key(0)); - ASSERT_EQ(1007, v.size()); - // Lookup the first data block, not in the block cache, so lookup the - // secondary cache. Also not in the secondary cache. After Get, still - // block_1 is will not be cached. - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 6u); - - v = Get(Key(0)); - ASSERT_EQ(1007, v.size()); - // Lookup the first data block, not in the block cache, so lookup the - // secondary cache. Also not in the secondary cache. After Get, still - // block_1 is will not be cached. - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 7u); - - Destroy(options); -} - -// In this test, the block cache size is set to 6100, after insert 6 KV-pairs -// and flush, there are 5 blocks in this SST file, 2 data blocks and 3 meta -// blocks. block_1 size is 4096 and block_2 size is 2056. The total size -// of the meta blocks are about 900 to 1000. Therefore, we can successfully -// insert and cache block_1 in the block cache (this is the different place -// from TestSecondaryCacheCorrectness1) -TEST_P(DBSecondaryCacheTest, TestSecondaryCacheCorrectness2) { - if (GetParam() == kHyperClock) { - ROCKSDB_GTEST_BYPASS("Test depends on LRUCache-specific behaviors"); - return; - } - std::shared_ptr secondary_cache( - new TestSecondaryCache(2048 * 1024)); - std::shared_ptr cache = - NewCache(6100 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - BlockBasedTableOptions table_options; - table_options.block_cache = cache; - table_options.block_size = 4 * 1024; - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.paranoid_file_checks = true; - options.env = fault_env_.get(); - fault_fs_->SetFailGetUniqueId(true); - DestroyAndReopen(options); - Random rnd(301); - const int N = 6; - for (int i = 0; i < N; i++) { - std::string p_v = rnd.RandomString(1007); - ASSERT_OK(Put(Key(i), p_v)); - } - - ASSERT_OK(Flush()); - // After Flush is successful, RocksDB will do the paranoid check for the new - // SST file. Meta blocks are always cached in the block cache and they - // will not be evicted. When block_2 is cache miss and read out, it is - // inserted to the block cache. Thefore, block_1 is evicted from block - // cache and successfully inserted to the secondary cache. Here are 2 - // lookups in the secondary cache for block_1 and block_2. - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 2u); - - Compact("a", "z"); - // Compaction will create the iterator to scan the whole file. So all the - // blocks are needed. After Flush, only block_2 is cached in block cache - // and block_1 is in the secondary cache. So when read block_1, it is - // read out from secondary cache and inserted to block cache. At the same - // time, block_2 is inserted to secondary cache. Now, secondary cache has - // both block_1 and block_2. After compaction, block_1 is in the cache. - ASSERT_EQ(secondary_cache->num_inserts(), 2u); - ASSERT_EQ(secondary_cache->num_lookups(), 3u); - - std::string v = Get(Key(0)); - ASSERT_EQ(1007, v.size()); - // This Get needs to access block_1, since block_1 is cached in block cache - // there is no secondary cache lookup. - ASSERT_EQ(secondary_cache->num_inserts(), 2u); - ASSERT_EQ(secondary_cache->num_lookups(), 3u); - - v = Get(Key(5)); - ASSERT_EQ(1007, v.size()); - // This Get needs to access block_2 which is not in the block cache. So - // it will lookup the secondary cache for block_2 and cache it in the - // block_cache. - ASSERT_EQ(secondary_cache->num_inserts(), 2u); - ASSERT_EQ(secondary_cache->num_lookups(), 4u); - - v = Get(Key(5)); - ASSERT_EQ(1007, v.size()); - // This Get needs to access block_2 which is already in the block cache. - // No need to lookup secondary cache. - ASSERT_EQ(secondary_cache->num_inserts(), 2u); - ASSERT_EQ(secondary_cache->num_lookups(), 4u); - - v = Get(Key(0)); - ASSERT_EQ(1007, v.size()); - // This Get needs to access block_1, since block_1 is not in block cache - // there is one econdary cache lookup. Then, block_1 is cached in the - // block cache. - ASSERT_EQ(secondary_cache->num_inserts(), 2u); - ASSERT_EQ(secondary_cache->num_lookups(), 5u); - - v = Get(Key(0)); - ASSERT_EQ(1007, v.size()); - // This Get needs to access block_1, since block_1 is cached in block cache - // there is no secondary cache lookup. - ASSERT_EQ(secondary_cache->num_inserts(), 2u); - ASSERT_EQ(secondary_cache->num_lookups(), 5u); - - Destroy(options); -} - -// The block cache size is set to 1024*1024, after insert 6 KV-pairs -// and flush, there are 5 blocks in this SST file, 2 data blocks and 3 meta -// blocks. block_1 size is 4096 and block_2 size is 2056. The total size -// of the meta blocks are about 900 to 1000. Therefore, we can successfully -// cache all the blocks in the block cache and there is not secondary cache -// insertion. 2 lookup is needed for the blocks. -TEST_P(DBSecondaryCacheTest, NoSecondaryCacheInsertion) { - std::shared_ptr secondary_cache( - new TestSecondaryCache(2048 * 1024)); - std::shared_ptr cache = - NewCache(1024 * 1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - BlockBasedTableOptions table_options; - table_options.block_cache = cache; - table_options.block_size = 4 * 1024; - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.paranoid_file_checks = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.env = fault_env_.get(); - fault_fs_->SetFailGetUniqueId(true); - - DestroyAndReopen(options); - Random rnd(301); - const int N = 6; - for (int i = 0; i < N; i++) { - std::string p_v = rnd.RandomString(1000); - ASSERT_OK(Put(Key(i), p_v)); - } - - ASSERT_OK(Flush()); - // After Flush is successful, RocksDB will do the paranoid check for the new - // SST file. Meta blocks are always cached in the block cache and they - // will not be evicted. Now, block cache is large enough, it cache - // both block_1 and block_2. When first time read block_1 and block_2 - // there are cache misses. So 2 secondary cache lookups are needed for - // the 2 blocks - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 2u); - - Compact("a", "z"); - // Compaction will iterate the whole SST file. Since all the data blocks - // are in the block cache. No need to lookup the secondary cache. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 2u); - - std::string v = Get(Key(0)); - ASSERT_EQ(1000, v.size()); - // Since the block cache is large enough, all the blocks are cached. we - // do not need to lookup the seondary cache. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 2u); - - Destroy(options); -} - -TEST_P(DBSecondaryCacheTest, SecondaryCacheIntensiveTesting) { - std::shared_ptr secondary_cache( - new TestSecondaryCache(2048 * 1024)); - std::shared_ptr cache = - NewCache(8 * 1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - BlockBasedTableOptions table_options; - table_options.block_cache = cache; - table_options.block_size = 4 * 1024; - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.env = fault_env_.get(); - fault_fs_->SetFailGetUniqueId(true); - DestroyAndReopen(options); - Random rnd(301); - const int N = 256; - for (int i = 0; i < N; i++) { - std::string p_v = rnd.RandomString(1000); - ASSERT_OK(Put(Key(i), p_v)); - } - ASSERT_OK(Flush()); - Compact("a", "z"); - - Random r_index(47); - std::string v; - for (int i = 0; i < 1000; i++) { - uint32_t key_i = r_index.Next() % N; - v = Get(Key(key_i)); - } - - // We have over 200 data blocks there will be multiple insertion - // and lookups. - ASSERT_GE(secondary_cache->num_inserts(), 1u); - ASSERT_GE(secondary_cache->num_lookups(), 1u); - - Destroy(options); -} - -// In this test, the block cache size is set to 4096, after insert 6 KV-pairs -// and flush, there are 5 blocks in this SST file, 2 data blocks and 3 meta -// blocks. block_1 size is 4096 and block_2 size is 2056. The total size -// of the meta blocks are about 900 to 1000. Therefore, in any situation, -// if we try to insert block_1 to the block cache, it will always fails. Only -// block_2 will be successfully inserted into the block cache. -TEST_P(DBSecondaryCacheTest, SecondaryCacheFailureTest) { - if (GetParam() == kHyperClock) { - ROCKSDB_GTEST_BYPASS("Test depends on LRUCache-specific behaviors"); - return; - } - std::shared_ptr secondary_cache( - new TestSecondaryCache(2048 * 1024)); - std::shared_ptr cache = - NewCache(4 * 1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - BlockBasedTableOptions table_options; - table_options.block_cache = cache; - table_options.block_size = 4 * 1024; - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.paranoid_file_checks = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.env = fault_env_.get(); - fault_fs_->SetFailGetUniqueId(true); - DestroyAndReopen(options); - Random rnd(301); - const int N = 6; - for (int i = 0; i < N; i++) { - std::string p_v = rnd.RandomString(1007); - ASSERT_OK(Put(Key(i), p_v)); - } - - ASSERT_OK(Flush()); - // After Flush is successful, RocksDB will do the paranoid check for the new - // SST file. Meta blocks are always cached in the block cache and they - // will not be evicted. When block_2 is cache miss and read out, it is - // inserted to the block cache. Note that, block_1 is never successfully - // inserted to the block cache. Here are 2 lookups in the secondary cache - // for block_1 and block_2 - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 2u); - - // Fail the insertion, in LRU cache, the secondary insertion returned status - // is not checked, therefore, the DB will not be influenced. - secondary_cache->InjectFailure(); - Compact("a", "z"); - // Compaction will create the iterator to scan the whole file. So all the - // blocks are needed. Meta blocks are always cached. When block_1 is read - // out, block_2 is evicted from block cache and inserted to secondary - // cache. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 3u); - - std::string v = Get(Key(0)); - ASSERT_EQ(1007, v.size()); - // The first data block is not in the cache, similarly, trigger the block - // cache Lookup and secondary cache lookup for block_1. But block_1 will not - // be inserted successfully due to the size. Currently, cache only has - // the meta blocks. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 4u); - - v = Get(Key(5)); - ASSERT_EQ(1007, v.size()); - // The second data block is not in the cache, similarly, trigger the block - // cache Lookup and secondary cache lookup for block_2 and block_2 is found - // in the secondary cache. Now block cache has block_2 - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 5u); - - v = Get(Key(5)); - ASSERT_EQ(1007, v.size()); - // block_2 is in the block cache. There is a block cache hit. No need to - // lookup or insert the secondary cache. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 5u); - - v = Get(Key(0)); - ASSERT_EQ(1007, v.size()); - // Lookup the first data block, not in the block cache, so lookup the - // secondary cache. Also not in the secondary cache. After Get, still - // block_1 is will not be cached. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 6u); - - v = Get(Key(0)); - ASSERT_EQ(1007, v.size()); - // Lookup the first data block, not in the block cache, so lookup the - // secondary cache. Also not in the secondary cache. After Get, still - // block_1 is will not be cached. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 7u); - secondary_cache->ResetInjectFailure(); - - Destroy(options); -} - -TEST_P(BasicSecondaryCacheTest, BasicWaitAllTest) { - std::shared_ptr secondary_cache = - std::make_shared(32 * 1024); - std::shared_ptr cache = - NewCache(1024 /* capacity */, 2 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - const int num_keys = 32; - OffsetableCacheKey ock{"foo", "bar", 1}; - - Random rnd(301); - std::vector values; - for (int i = 0; i < num_keys; ++i) { - std::string str = rnd.RandomString(1020); - values.emplace_back(str); - TestItem* item = new TestItem(str.data(), str.length()); - ASSERT_OK(cache->Insert(ock.WithOffset(i).AsSlice(), item, GetHelper(), - str.length())); - } - // Force all entries to be evicted to the secondary cache - if (GetParam() == kHyperClock) { - // HCC doesn't respond immediately to SetCapacity - for (int i = 9000; i < 9030; ++i) { - ASSERT_OK(cache->Insert(ock.WithOffset(i).AsSlice(), nullptr, - &kNoopCacheItemHelper, 256)); - } - } else { - cache->SetCapacity(0); - } - ASSERT_EQ(secondary_cache->num_inserts(), 32u); - cache->SetCapacity(32 * 1024); - - secondary_cache->SetResultMap( - {{ock.WithOffset(3).AsSlice().ToString(), - TestSecondaryCache::ResultType::DEFER}, - {ock.WithOffset(4).AsSlice().ToString(), - TestSecondaryCache::ResultType::DEFER_AND_FAIL}, - {ock.WithOffset(5).AsSlice().ToString(), - TestSecondaryCache::ResultType::FAIL}}); - - std::array async_handles; - std::array cache_keys; - for (size_t i = 0; i < async_handles.size(); ++i) { - auto& ah = async_handles[i]; - cache_keys[i] = ock.WithOffset(i); - ah.key = cache_keys[i].AsSlice(); - ah.helper = GetHelper(); - ah.create_context = this; - ah.priority = Cache::Priority::LOW; - cache->StartAsyncLookup(ah); - } - cache->WaitAll(&async_handles[0], async_handles.size()); - for (size_t i = 0; i < async_handles.size(); ++i) { - SCOPED_TRACE("i = " + std::to_string(i)); - Cache::Handle* result = async_handles[i].Result(); - if (i == 4 || i == 5) { - ASSERT_EQ(result, nullptr); - continue; - } else { - ASSERT_NE(result, nullptr); - TestItem* item = static_cast(cache->Value(result)); - ASSERT_EQ(item->ToString(), values[i]); - } - cache->Release(result); - } - - cache.reset(); - secondary_cache.reset(); -} - -// In this test, we have one KV pair per data block. We indirectly determine -// the cache key associated with each data block (and thus each KV) by using -// a sync point callback in TestSecondaryCache::Lookup. We then control the -// lookup result by setting the ResultMap. -TEST_P(DBSecondaryCacheTest, TestSecondaryCacheMultiGet) { - if (GetParam() == kHyperClock) { - ROCKSDB_GTEST_BYPASS("Test depends on LRUCache-specific behaviors"); - return; - } - std::shared_ptr secondary_cache( - new TestSecondaryCache(2048 * 1024)); - std::shared_ptr cache = - NewCache(1 << 20 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - BlockBasedTableOptions table_options; - table_options.block_cache = cache; - table_options.block_size = 4 * 1024; - table_options.cache_index_and_filter_blocks = false; - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.paranoid_file_checks = true; - DestroyAndReopen(options); - Random rnd(301); - const int N = 8; - std::vector keys; - for (int i = 0; i < N; i++) { - std::string p_v = rnd.RandomString(4000); - keys.emplace_back(p_v); - ASSERT_OK(Put(Key(i), p_v)); - } - - ASSERT_OK(Flush()); - // After Flush is successful, RocksDB does the paranoid check for the new - // SST file. This will try to lookup all data blocks in the secondary - // cache. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 8u); - - cache->SetCapacity(0); - ASSERT_EQ(secondary_cache->num_inserts(), 8u); - cache->SetCapacity(1 << 20); - - std::vector cache_keys; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "TestSecondaryCache::Lookup", [&cache_keys](void* key) -> void { - cache_keys.emplace_back(*(static_cast(key))); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - for (int i = 0; i < N; ++i) { - std::string v = Get(Key(i)); - ASSERT_EQ(4000, v.size()); - ASSERT_EQ(v, keys[i]); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ASSERT_EQ(secondary_cache->num_lookups(), 16u); - cache->SetCapacity(0); - cache->SetCapacity(1 << 20); - - ASSERT_EQ(Get(Key(2)), keys[2]); - ASSERT_EQ(Get(Key(7)), keys[7]); - secondary_cache->SetResultMap( - {{cache_keys[3], TestSecondaryCache::ResultType::DEFER}, - {cache_keys[4], TestSecondaryCache::ResultType::DEFER_AND_FAIL}, - {cache_keys[5], TestSecondaryCache::ResultType::FAIL}}); - - std::vector mget_keys( - {Key(0), Key(1), Key(2), Key(3), Key(4), Key(5), Key(6), Key(7)}); - std::vector values(mget_keys.size()); - std::vector s(keys.size()); - std::vector key_slices; - for (const std::string& key : mget_keys) { - key_slices.emplace_back(key); - } - uint32_t num_lookups = secondary_cache->num_lookups(); - dbfull()->MultiGet(ReadOptions(), dbfull()->DefaultColumnFamily(), - key_slices.size(), key_slices.data(), values.data(), - s.data(), false); - ASSERT_EQ(secondary_cache->num_lookups(), num_lookups + 5); - for (int i = 0; i < N; ++i) { - ASSERT_OK(s[i]); - ASSERT_EQ(values[i].ToString(), keys[i]); - values[i].Reset(); - } - Destroy(options); -} - -class CacheWithStats : public CacheWrapper { - public: - using CacheWrapper::CacheWrapper; - - static const char* kClassName() { return "CacheWithStats"; } - const char* Name() const override { return kClassName(); } - - Status Insert(const Slice& key, Cache::ObjectPtr value, - const CacheItemHelper* helper, size_t charge, - Handle** handle = nullptr, - Priority priority = Priority::LOW) override { - insert_count_++; - return target_->Insert(key, value, helper, charge, handle, priority); - } - Handle* Lookup(const Slice& key, const CacheItemHelper* helper, - CreateContext* create_context, Priority priority, - Statistics* stats = nullptr) override { - lookup_count_++; - return target_->Lookup(key, helper, create_context, priority, stats); - } - - uint32_t GetInsertCount() { return insert_count_; } - uint32_t GetLookupcount() { return lookup_count_; } - void ResetCount() { - insert_count_ = 0; - lookup_count_ = 0; - } - - private: - uint32_t insert_count_ = 0; - uint32_t lookup_count_ = 0; -}; - -TEST_P(DBSecondaryCacheTest, LRUCacheDumpLoadBasic) { - std::shared_ptr base_cache = - NewCache(1024 * 1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */); - std::shared_ptr cache = - std::make_shared(base_cache); - BlockBasedTableOptions table_options; - table_options.block_cache = cache; - table_options.block_size = 4 * 1024; - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.env = fault_env_.get(); - DestroyAndReopen(options); - fault_fs_->SetFailGetUniqueId(true); - - Random rnd(301); - const int N = 256; - std::vector value; - char buf[1000]; - memset(buf, 'a', 1000); - value.resize(N); - for (int i = 0; i < N; i++) { - // std::string p_v = rnd.RandomString(1000); - std::string p_v(buf, 1000); - value[i] = p_v; - ASSERT_OK(Put(Key(i), p_v)); - } - ASSERT_OK(Flush()); - Compact("a", "z"); - - // do th eread for all the key value pairs, so all the blocks should be in - // cache - uint32_t start_insert = cache->GetInsertCount(); - uint32_t start_lookup = cache->GetLookupcount(); - std::string v; - for (int i = 0; i < N; i++) { - v = Get(Key(i)); - ASSERT_EQ(v, value[i]); - } - uint32_t dump_insert = cache->GetInsertCount() - start_insert; - uint32_t dump_lookup = cache->GetLookupcount() - start_lookup; - ASSERT_EQ(63, - static_cast(dump_insert)); // the insert in the block cache - ASSERT_EQ(256, - static_cast(dump_lookup)); // the lookup in the block cache - // We have enough blocks in the block cache - - CacheDumpOptions cd_options; - cd_options.clock = fault_env_->GetSystemClock().get(); - std::string dump_path = db_->GetName() + "/cache_dump"; - std::unique_ptr dump_writer; - Status s = NewToFileCacheDumpWriter(fault_fs_, FileOptions(), dump_path, - &dump_writer); - ASSERT_OK(s); - std::unique_ptr cache_dumper; - s = NewDefaultCacheDumper(cd_options, cache, std::move(dump_writer), - &cache_dumper); - ASSERT_OK(s); - std::vector db_list; - db_list.push_back(db_); - s = cache_dumper->SetDumpFilter(db_list); - ASSERT_OK(s); - s = cache_dumper->DumpCacheEntriesToWriter(); - ASSERT_OK(s); - cache_dumper.reset(); - - // we have a new cache it is empty, then, before we do the Get, we do the - // dumpload - std::shared_ptr secondary_cache = - std::make_shared(2048 * 1024); - // This time with secondary cache - base_cache = NewCache(1024 * 1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - cache = std::make_shared(base_cache); - table_options.block_cache = cache; - table_options.block_size = 4 * 1024; - options.create_if_missing = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.env = fault_env_.get(); - - // start to load the data to new block cache - start_insert = secondary_cache->num_inserts(); - start_lookup = secondary_cache->num_lookups(); - std::unique_ptr dump_reader; - s = NewFromFileCacheDumpReader(fault_fs_, FileOptions(), dump_path, - &dump_reader); - ASSERT_OK(s); - std::unique_ptr cache_loader; - s = NewDefaultCacheDumpedLoader(cd_options, table_options, secondary_cache, - std::move(dump_reader), &cache_loader); - ASSERT_OK(s); - s = cache_loader->RestoreCacheEntriesToSecondaryCache(); - ASSERT_OK(s); - uint32_t load_insert = secondary_cache->num_inserts() - start_insert; - uint32_t load_lookup = secondary_cache->num_lookups() - start_lookup; - // check the number we inserted - ASSERT_EQ(64, static_cast(load_insert)); - ASSERT_EQ(0, static_cast(load_lookup)); - ASSERT_OK(s); - - Reopen(options); - - // After load, we do the Get again - start_insert = secondary_cache->num_inserts(); - start_lookup = secondary_cache->num_lookups(); - uint32_t cache_insert = cache->GetInsertCount(); - uint32_t cache_lookup = cache->GetLookupcount(); - for (int i = 0; i < N; i++) { - v = Get(Key(i)); - ASSERT_EQ(v, value[i]); - } - uint32_t final_insert = secondary_cache->num_inserts() - start_insert; - uint32_t final_lookup = secondary_cache->num_lookups() - start_lookup; - // no insert to secondary cache - ASSERT_EQ(0, static_cast(final_insert)); - // lookup the secondary to get all blocks - ASSERT_EQ(64, static_cast(final_lookup)); - uint32_t block_insert = cache->GetInsertCount() - cache_insert; - uint32_t block_lookup = cache->GetLookupcount() - cache_lookup; - // Check the new block cache insert and lookup, should be no insert since all - // blocks are from the secondary cache. - ASSERT_EQ(0, static_cast(block_insert)); - ASSERT_EQ(256, static_cast(block_lookup)); - - fault_fs_->SetFailGetUniqueId(false); - Destroy(options); -} - -TEST_P(DBSecondaryCacheTest, LRUCacheDumpLoadWithFilter) { - std::shared_ptr base_cache = - NewCache(1024 * 1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */); - std::shared_ptr cache = - std::make_shared(base_cache); - BlockBasedTableOptions table_options; - table_options.block_cache = cache; - table_options.block_size = 4 * 1024; - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.env = fault_env_.get(); - std::string dbname1 = test::PerThreadDBPath("db_1"); - ASSERT_OK(DestroyDB(dbname1, options)); - DB* db1 = nullptr; - ASSERT_OK(DB::Open(options, dbname1, &db1)); - std::string dbname2 = test::PerThreadDBPath("db_2"); - ASSERT_OK(DestroyDB(dbname2, options)); - DB* db2 = nullptr; - ASSERT_OK(DB::Open(options, dbname2, &db2)); - fault_fs_->SetFailGetUniqueId(true); - - // write the KVs to db1 - Random rnd(301); - const int N = 256; - std::vector value1; - WriteOptions wo; - char buf[1000]; - memset(buf, 'a', 1000); - value1.resize(N); - for (int i = 0; i < N; i++) { - std::string p_v(buf, 1000); - value1[i] = p_v; - ASSERT_OK(db1->Put(wo, Key(i), p_v)); - } - ASSERT_OK(db1->Flush(FlushOptions())); - Slice bg("a"); - Slice ed("b"); - ASSERT_OK(db1->CompactRange(CompactRangeOptions(), &bg, &ed)); - - // Write the KVs to DB2 - std::vector value2; - memset(buf, 'b', 1000); - value2.resize(N); - for (int i = 0; i < N; i++) { - std::string p_v(buf, 1000); - value2[i] = p_v; - ASSERT_OK(db2->Put(wo, Key(i), p_v)); - } - ASSERT_OK(db2->Flush(FlushOptions())); - ASSERT_OK(db2->CompactRange(CompactRangeOptions(), &bg, &ed)); - - // do th eread for all the key value pairs, so all the blocks should be in - // cache - uint32_t start_insert = cache->GetInsertCount(); - uint32_t start_lookup = cache->GetLookupcount(); - ReadOptions ro; - std::string v; - for (int i = 0; i < N; i++) { - ASSERT_OK(db1->Get(ro, Key(i), &v)); - ASSERT_EQ(v, value1[i]); - } - for (int i = 0; i < N; i++) { - ASSERT_OK(db2->Get(ro, Key(i), &v)); - ASSERT_EQ(v, value2[i]); - } - uint32_t dump_insert = cache->GetInsertCount() - start_insert; - uint32_t dump_lookup = cache->GetLookupcount() - start_lookup; - ASSERT_EQ(128, - static_cast(dump_insert)); // the insert in the block cache - ASSERT_EQ(512, - static_cast(dump_lookup)); // the lookup in the block cache - // We have enough blocks in the block cache - - CacheDumpOptions cd_options; - cd_options.clock = fault_env_->GetSystemClock().get(); - std::string dump_path = db1->GetName() + "/cache_dump"; - std::unique_ptr dump_writer; - Status s = NewToFileCacheDumpWriter(fault_fs_, FileOptions(), dump_path, - &dump_writer); - ASSERT_OK(s); - std::unique_ptr cache_dumper; - s = NewDefaultCacheDumper(cd_options, cache, std::move(dump_writer), - &cache_dumper); - ASSERT_OK(s); - std::vector db_list; - db_list.push_back(db1); - s = cache_dumper->SetDumpFilter(db_list); - ASSERT_OK(s); - s = cache_dumper->DumpCacheEntriesToWriter(); - ASSERT_OK(s); - cache_dumper.reset(); - - // we have a new cache it is empty, then, before we do the Get, we do the - // dumpload - std::shared_ptr secondary_cache = - std::make_shared(2048 * 1024); - // This time with secondary_cache - base_cache = NewCache(1024 * 1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - cache = std::make_shared(base_cache); - table_options.block_cache = cache; - table_options.block_size = 4 * 1024; - options.create_if_missing = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.env = fault_env_.get(); - - // Start the cache loading process - start_insert = secondary_cache->num_inserts(); - start_lookup = secondary_cache->num_lookups(); - std::unique_ptr dump_reader; - s = NewFromFileCacheDumpReader(fault_fs_, FileOptions(), dump_path, - &dump_reader); - ASSERT_OK(s); - std::unique_ptr cache_loader; - s = NewDefaultCacheDumpedLoader(cd_options, table_options, secondary_cache, - std::move(dump_reader), &cache_loader); - ASSERT_OK(s); - s = cache_loader->RestoreCacheEntriesToSecondaryCache(); - ASSERT_OK(s); - uint32_t load_insert = secondary_cache->num_inserts() - start_insert; - uint32_t load_lookup = secondary_cache->num_lookups() - start_lookup; - // check the number we inserted - ASSERT_EQ(64, static_cast(load_insert)); - ASSERT_EQ(0, static_cast(load_lookup)); - ASSERT_OK(s); - - ASSERT_OK(db1->Close()); - delete db1; - ASSERT_OK(DB::Open(options, dbname1, &db1)); - - // After load, we do the Get again. To validate the cache, we do not allow any - // I/O, so we set the file system to false. - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - fault_fs_->SetFilesystemActive(false, error_msg); - start_insert = secondary_cache->num_inserts(); - start_lookup = secondary_cache->num_lookups(); - uint32_t cache_insert = cache->GetInsertCount(); - uint32_t cache_lookup = cache->GetLookupcount(); - for (int i = 0; i < N; i++) { - ASSERT_OK(db1->Get(ro, Key(i), &v)); - ASSERT_EQ(v, value1[i]); - } - uint32_t final_insert = secondary_cache->num_inserts() - start_insert; - uint32_t final_lookup = secondary_cache->num_lookups() - start_lookup; - // no insert to secondary cache - ASSERT_EQ(0, static_cast(final_insert)); - // lookup the secondary to get all blocks - ASSERT_EQ(64, static_cast(final_lookup)); - uint32_t block_insert = cache->GetInsertCount() - cache_insert; - uint32_t block_lookup = cache->GetLookupcount() - cache_lookup; - // Check the new block cache insert and lookup, should be no insert since all - // blocks are from the secondary cache. - ASSERT_EQ(0, static_cast(block_insert)); - ASSERT_EQ(256, static_cast(block_lookup)); - fault_fs_->SetFailGetUniqueId(false); - fault_fs_->SetFilesystemActive(true); - delete db1; - delete db2; - ASSERT_OK(DestroyDB(dbname1, options)); - ASSERT_OK(DestroyDB(dbname2, options)); -} - -// Test the option not to use the secondary cache in a certain DB. -TEST_P(DBSecondaryCacheTest, TestSecondaryCacheOptionBasic) { - std::shared_ptr secondary_cache( - new TestSecondaryCache(2048 * 1024)); - std::shared_ptr cache = - NewCache(4 * 1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - BlockBasedTableOptions table_options; - table_options.block_cache = cache; - table_options.block_size = 4 * 1024; - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.env = fault_env_.get(); - fault_fs_->SetFailGetUniqueId(true); - options.lowest_used_cache_tier = CacheTier::kVolatileTier; - - // Set the file paranoid check, so after flush, the file will be read - // all the blocks will be accessed. - options.paranoid_file_checks = true; - DestroyAndReopen(options); - Random rnd(301); - const int N = 6; - for (int i = 0; i < N; i++) { - std::string p_v = rnd.RandomString(1007); - ASSERT_OK(Put(Key(i), p_v)); - } - - ASSERT_OK(Flush()); - - for (int i = 0; i < N; i++) { - std::string p_v = rnd.RandomString(1007); - ASSERT_OK(Put(Key(i + 70), p_v)); - } - - ASSERT_OK(Flush()); - - // Flush will trigger the paranoid check and read blocks. But only block cache - // will be read. No operations for secondary cache. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 0u); - - Compact("a", "z"); - - // Compaction will also insert and evict blocks, no operations to the block - // cache. No operations for secondary cache. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 0u); - - std::string v = Get(Key(0)); - ASSERT_EQ(1007, v.size()); - - // Check the data in first block. Cache miss, direclty read from SST file. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 0u); - - v = Get(Key(5)); - ASSERT_EQ(1007, v.size()); - - // Check the second block. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 0u); - - v = Get(Key(5)); - ASSERT_EQ(1007, v.size()); - - // block cache hit - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 0u); - - v = Get(Key(70)); - ASSERT_EQ(1007, v.size()); - - // Check the first block in the second SST file. Cache miss and trigger SST - // file read. No operations for secondary cache. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 0u); - - v = Get(Key(75)); - ASSERT_EQ(1007, v.size()); - - // Check the second block in the second SST file. Cache miss and trigger SST - // file read. No operations for secondary cache. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 0u); - - Destroy(options); -} - -// We disable the secondary cache in DBOptions at first. Close and reopen the DB -// with new options, which set the lowest_used_cache_tier to -// kNonVolatileBlockTier. So secondary cache will be used. -TEST_P(DBSecondaryCacheTest, TestSecondaryCacheOptionChange) { - if (GetParam() == kHyperClock) { - ROCKSDB_GTEST_BYPASS("Test depends on LRUCache-specific behaviors"); - return; - } - std::shared_ptr secondary_cache( - new TestSecondaryCache(2048 * 1024)); - std::shared_ptr cache = - NewCache(4 * 1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - BlockBasedTableOptions table_options; - table_options.block_cache = cache; - table_options.block_size = 4 * 1024; - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.env = fault_env_.get(); - fault_fs_->SetFailGetUniqueId(true); - options.lowest_used_cache_tier = CacheTier::kVolatileTier; - - // Set the file paranoid check, so after flush, the file will be read - // all the blocks will be accessed. - options.paranoid_file_checks = true; - DestroyAndReopen(options); - Random rnd(301); - const int N = 6; - for (int i = 0; i < N; i++) { - std::string p_v = rnd.RandomString(1007); - ASSERT_OK(Put(Key(i), p_v)); - } - - ASSERT_OK(Flush()); - - for (int i = 0; i < N; i++) { - std::string p_v = rnd.RandomString(1007); - ASSERT_OK(Put(Key(i + 70), p_v)); - } - - ASSERT_OK(Flush()); - - // Flush will trigger the paranoid check and read blocks. But only block cache - // will be read. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 0u); - - Compact("a", "z"); - - // Compaction will also insert and evict blocks, no operations to the block - // cache. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 0u); - - std::string v = Get(Key(0)); - ASSERT_EQ(1007, v.size()); - - // Check the data in first block. Cache miss, direclty read from SST file. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 0u); - - v = Get(Key(5)); - ASSERT_EQ(1007, v.size()); - - // Check the second block. - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 0u); - - v = Get(Key(5)); - ASSERT_EQ(1007, v.size()); - - // block cache hit - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 0u); - - // Change the option to enable secondary cache after we Reopen the DB - options.lowest_used_cache_tier = CacheTier::kNonVolatileBlockTier; - Reopen(options); - - v = Get(Key(70)); - ASSERT_EQ(1007, v.size()); - - // Enable the secondary cache, trigger lookup of the first block in second SST - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 1u); - - v = Get(Key(75)); - ASSERT_EQ(1007, v.size()); - - // trigger lookup of the second block in second SST - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 2u); - Destroy(options); -} - -// Two DB test. We create 2 DBs sharing the same block cache and secondary -// cache. We diable the secondary cache option for DB2. -TEST_P(DBSecondaryCacheTest, TestSecondaryCacheOptionTwoDB) { - if (GetParam() == kHyperClock) { - ROCKSDB_GTEST_BYPASS("Test depends on LRUCache-specific behaviors"); - return; - } - std::shared_ptr secondary_cache( - new TestSecondaryCache(2048 * 1024)); - std::shared_ptr cache = - NewCache(4 * 1024 /* capacity */, 0 /* num_shard_bits */, - false /* strict_capacity_limit */, secondary_cache); - BlockBasedTableOptions table_options; - table_options.block_cache = cache; - table_options.block_size = 4 * 1024; - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.env = fault_env_.get(); - options.paranoid_file_checks = true; - std::string dbname1 = test::PerThreadDBPath("db_t_1"); - ASSERT_OK(DestroyDB(dbname1, options)); - DB* db1 = nullptr; - ASSERT_OK(DB::Open(options, dbname1, &db1)); - std::string dbname2 = test::PerThreadDBPath("db_t_2"); - ASSERT_OK(DestroyDB(dbname2, options)); - DB* db2 = nullptr; - Options options2 = options; - options2.lowest_used_cache_tier = CacheTier::kVolatileTier; - ASSERT_OK(DB::Open(options2, dbname2, &db2)); - fault_fs_->SetFailGetUniqueId(true); - - WriteOptions wo; - Random rnd(301); - const int N = 6; - for (int i = 0; i < N; i++) { - std::string p_v = rnd.RandomString(1007); - ASSERT_OK(db1->Put(wo, Key(i), p_v)); - } - - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 0u); - ASSERT_OK(db1->Flush(FlushOptions())); - - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 2u); - - for (int i = 0; i < N; i++) { - std::string p_v = rnd.RandomString(1007); - ASSERT_OK(db2->Put(wo, Key(i), p_v)); - } - - // No change in the secondary cache, since it is disabled in DB2 - ASSERT_EQ(secondary_cache->num_inserts(), 0u); - ASSERT_EQ(secondary_cache->num_lookups(), 2u); - ASSERT_OK(db2->Flush(FlushOptions())); - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 2u); - - Slice bg("a"); - Slice ed("b"); - ASSERT_OK(db1->CompactRange(CompactRangeOptions(), &bg, &ed)); - ASSERT_OK(db2->CompactRange(CompactRangeOptions(), &bg, &ed)); - - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 2u); - - ReadOptions ro; - std::string v; - ASSERT_OK(db1->Get(ro, Key(0), &v)); - ASSERT_EQ(1007, v.size()); - - // DB 1 has lookup block 1 and it is miss in block cache, trigger secondary - // cache lookup - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 3u); - - ASSERT_OK(db1->Get(ro, Key(5), &v)); - ASSERT_EQ(1007, v.size()); - - // DB 1 lookup the second block and it is miss in block cache, trigger - // secondary cache lookup - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 4u); - - ASSERT_OK(db2->Get(ro, Key(0), &v)); - ASSERT_EQ(1007, v.size()); - - // For db2, it is not enabled with secondary cache, so no search in the - // secondary cache - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 4u); - - ASSERT_OK(db2->Get(ro, Key(5), &v)); - ASSERT_EQ(1007, v.size()); - - // For db2, it is not enabled with secondary cache, so no search in the - // secondary cache - ASSERT_EQ(secondary_cache->num_inserts(), 1u); - ASSERT_EQ(secondary_cache->num_lookups(), 4u); - - fault_fs_->SetFailGetUniqueId(false); - fault_fs_->SetFilesystemActive(true); - delete db1; - delete db2; - ASSERT_OK(DestroyDB(dbname1, options)); - ASSERT_OK(DestroyDB(dbname2, options)); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/coverage/coverage_test.sh b/coverage/coverage_test.sh deleted file mode 100755 index d8d750c93..000000000 --- a/coverage/coverage_test.sh +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -# Exit on error. -set -e - -if [ -n "$USE_CLANG" ]; then - echo "Error: Coverage test is supported only for gcc." - exit 1 -fi - -ROOT=".." -# Fetch right version of gcov -if [ -d /mnt/gvfs/third-party -a -z "$CXX" ]; then - source $ROOT/build_tools/fbcode_config_platform010.sh - GCOV=$GCC_BASE/bin/gcov -else - GCOV=$(which gcov) -fi -echo -e "Using $GCOV" - -COVERAGE_DIR="$PWD/COVERAGE_REPORT" -mkdir -p $COVERAGE_DIR - -# Find all gcno files to generate the coverage report - -PYTHON=${1:-`which python3`} -echo -e "Using $PYTHON" -GCNO_FILES=`find $ROOT -name "*.gcno"` -$GCOV --preserve-paths --relative-only --no-output $GCNO_FILES 2>/dev/null | - # Parse the raw gcov report to more human readable form. - $PYTHON $ROOT/coverage/parse_gcov_output.py | - # Write the output to both stdout and report file. - tee $COVERAGE_DIR/coverage_report_all.txt && -echo -e "Generated coverage report for all files: $COVERAGE_DIR/coverage_report_all.txt\n" - -# TODO: we also need to get the files of the latest commits. -# Get the most recently committed files. -LATEST_FILES=` - git show --pretty="format:" --name-only HEAD | - grep -v "^$" | - paste -s -d,` -RECENT_REPORT=$COVERAGE_DIR/coverage_report_recent.txt - -echo -e "Recently updated files: $LATEST_FILES\n" > $RECENT_REPORT -$GCOV --preserve-paths --relative-only --no-output $GCNO_FILES 2>/dev/null | - $PYTHON $ROOT/coverage/parse_gcov_output.py -interested-files $LATEST_FILES | - tee -a $RECENT_REPORT && -echo -e "Generated coverage report for recently updated files: $RECENT_REPORT\n" - -# Unless otherwise specified, we'll not generate html report by default -if [ -z "$HTML" ]; then - exit 0 -fi - -# Generate the html report. If we cannot find lcov in this machine, we'll simply -# skip this step. -echo "Generating the html coverage report..." - -LCOV=$(which lcov || true 2>/dev/null) -if [ -z $LCOV ] -then - echo "Skip: Cannot find lcov to generate the html report." - exit 0 -fi - -LCOV_VERSION=$(lcov -v | grep 1.1 || true) -if [ $LCOV_VERSION ] -then - echo "Not supported lcov version. Expect lcov 1.1." - exit 0 -fi - -(cd $ROOT; lcov --no-external \ - --capture \ - --directory $PWD \ - --gcov-tool $GCOV \ - --output-file $COVERAGE_DIR/coverage.info) - -genhtml $COVERAGE_DIR/coverage.info -o $COVERAGE_DIR - -echo "HTML Coverage report is generated in $COVERAGE_DIR" diff --git a/coverage/parse_gcov_output.py b/coverage/parse_gcov_output.py deleted file mode 100644 index b9788ec81..000000000 --- a/coverage/parse_gcov_output.py +++ /dev/null @@ -1,128 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -from __future__ import print_function - -import optparse -import re -import sys - -# the gcov report follows certain pattern. Each file will have two lines -# of report, from which we can extract the file name, total lines and coverage -# percentage. -def parse_gcov_report(gcov_input): - per_file_coverage = {} - total_coverage = None - - for line in sys.stdin: - line = line.strip() - - # --First line of the coverage report (with file name in it)? - match_obj = re.match("^File '(.*)'$", line) - if match_obj: - # fetch the file name from the first line of the report. - current_file = match_obj.group(1) - continue - - # -- Second line of the file report (with coverage percentage) - match_obj = re.match("^Lines executed:(.*)% of (.*)", line) - - if match_obj: - coverage = float(match_obj.group(1)) - lines = int(match_obj.group(2)) - - if current_file is not None: - per_file_coverage[current_file] = (coverage, lines) - current_file = None - else: - # If current_file is not set, we reach the last line of report, - # which contains the summarized coverage percentage. - total_coverage = (coverage, lines) - continue - - # If the line's pattern doesn't fall into the above categories. We - # can simply ignore them since they're either empty line or doesn't - # find executable lines of the given file. - current_file = None - - return per_file_coverage, total_coverage - - -def get_option_parser(): - usage = ( - "Parse the gcov output and generate more human-readable code " - + "coverage report." - ) - parser = optparse.OptionParser(usage) - - parser.add_option( - "--interested-files", - "-i", - dest="filenames", - help="Comma separated files names. if specified, we will display " - + "the coverage report only for interested source files. " - + "Otherwise we will display the coverage report for all " - + "source files.", - ) - return parser - - -def display_file_coverage(per_file_coverage, total_coverage): - # To print out auto-adjustable column, we need to know the longest - # length of file names. - max_file_name_length = max(len(fname) for fname in per_file_coverage.keys()) - - # -- Print header - # size of separator is determined by 3 column sizes: - # file name, coverage percentage and lines. - header_template = "%" + str(max_file_name_length) + "s\t%s\t%s" - separator = "-" * (max_file_name_length + 10 + 20) - print( - header_template % ("Filename", "Coverage", "Lines") - ) # noqa: E999 T25377293 Grandfathered in - print(separator) - - # -- Print body - # template for printing coverage report for each file. - record_template = "%" + str(max_file_name_length) + "s\t%5.2f%%\t%10d" - - for fname, coverage_info in per_file_coverage.items(): - coverage, lines = coverage_info - print(record_template % (fname, coverage, lines)) - - # -- Print footer - if total_coverage: - print(separator) - print(record_template % ("Total", total_coverage[0], total_coverage[1])) - - -def report_coverage(): - parser = get_option_parser() - (options, args) = parser.parse_args() - - interested_files = set() - if options.filenames is not None: - interested_files = {f.strip() for f in options.filenames.split(",")} - - # To make things simple, right now we only read gcov report from the input - per_file_coverage, total_coverage = parse_gcov_report(sys.stdin) - - # Check if we need to display coverage info for interested files. - if len(interested_files): - per_file_coverage = dict( - (fname, per_file_coverage[fname]) - for fname in interested_files - if fname in per_file_coverage - ) - # If we only interested in several files, it makes no sense to report - # the total_coverage - total_coverage = None - - if not len(per_file_coverage): - print("Cannot find coverage info for the given files.", file=sys.stderr) - return - display_file_coverage(per_file_coverage, total_coverage) - - -if __name__ == "__main__": - report_coverage() diff --git a/db/column_family_test.cc b/db/column_family_test.cc deleted file mode 100644 index 9c92707d3..000000000 --- a/db/column_family_test.cc +++ /dev/null @@ -1,3382 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include -#include -#include -#include - -#include "db/db_impl/db_impl.h" -#include "db/db_test_util.h" -#include "options/options_parser.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/convenience.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/iterator.h" -#include "rocksdb/utilities/object_registry.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/coding.h" -#include "util/string_util.h" -#include "utilities/fault_injection_env.h" -#include "utilities/merge_operators.h" - -namespace ROCKSDB_NAMESPACE { - -static const int kValueSize = 1000; - -// counts how many operations were performed -class EnvCounter : public SpecialEnv { - public: - explicit EnvCounter(Env* base) - : SpecialEnv(base), num_new_writable_file_(0) {} - int GetNumberOfNewWritableFileCalls() { return num_new_writable_file_; } - Status NewWritableFile(const std::string& f, std::unique_ptr* r, - const EnvOptions& soptions) override { - ++num_new_writable_file_; - return EnvWrapper::NewWritableFile(f, r, soptions); - } - - private: - std::atomic num_new_writable_file_; -}; - -class ColumnFamilyTestBase : public testing::Test { - public: - explicit ColumnFamilyTestBase(uint32_t format) : rnd_(139), format_(format) { - Env* base_env = Env::Default(); - EXPECT_OK( - test::CreateEnvFromSystem(ConfigOptions(), &base_env, &env_guard_)); - EXPECT_NE(nullptr, base_env); - env_ = new EnvCounter(base_env); - env_->skip_fsync_ = true; - dbname_ = test::PerThreadDBPath("column_family_test"); - db_options_.create_if_missing = true; - db_options_.fail_if_options_file_error = true; - db_options_.env = env_; - EXPECT_OK(DestroyDB(dbname_, Options(db_options_, column_family_options_))); - } - - ~ColumnFamilyTestBase() override { - std::vector column_families; - for (auto h : handles_) { - ColumnFamilyDescriptor cfdescriptor; - Status s = h->GetDescriptor(&cfdescriptor); - EXPECT_OK(s); - column_families.push_back(cfdescriptor); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - Destroy(column_families); - delete env_; - } - - BlockBasedTableOptions GetBlockBasedTableOptions() { - BlockBasedTableOptions options; - options.format_version = format_; - return options; - } - - // Return the value to associate with the specified key - Slice Value(int k, std::string* storage) { - if (k == 0) { - // Ugh. Random seed of 0 used to produce no entropy. This code - // preserves the implementation that was in place when all of the - // magic values in this file were picked. - *storage = std::string(kValueSize, ' '); - } else { - Random r(k); - *storage = r.RandomString(kValueSize); - } - return Slice(*storage); - } - - void Build(int base, int n, int flush_every = 0) { - std::string key_space, value_space; - WriteBatch batch; - - for (int i = 0; i < n; i++) { - if (flush_every != 0 && i != 0 && i % flush_every == 0) { - DBImpl* dbi = static_cast_with_check(db_); - dbi->TEST_FlushMemTable(); - } - - int keyi = base + i; - Slice key(DBTestBase::Key(keyi)); - - batch.Clear(); - batch.Put(handles_[0], key, Value(keyi, &value_space)); - batch.Put(handles_[1], key, Value(keyi, &value_space)); - batch.Put(handles_[2], key, Value(keyi, &value_space)); - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - } - } - - void CheckMissed() { - uint64_t next_expected = 0; - uint64_t missed = 0; - int bad_keys = 0; - int bad_values = 0; - int correct = 0; - std::string value_space; - for (int cf = 0; cf < 3; cf++) { - next_expected = 0; - Iterator* iter = db_->NewIterator(ReadOptions(false, true), handles_[cf]); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - uint64_t key; - Slice in(iter->key()); - in.remove_prefix(3); - if (!ConsumeDecimalNumber(&in, &key) || !in.empty() || - key < next_expected) { - bad_keys++; - continue; - } - missed += (key - next_expected); - next_expected = key + 1; - if (iter->value() != Value(static_cast(key), &value_space)) { - bad_values++; - } else { - correct++; - } - } - delete iter; - } - - ASSERT_EQ(0, bad_keys); - ASSERT_EQ(0, bad_values); - ASSERT_EQ(0, missed); - (void)correct; - } - - void Close() { - for (auto h : handles_) { - if (h) { - ASSERT_OK(db_->DestroyColumnFamilyHandle(h)); - } - } - handles_.clear(); - names_.clear(); - delete db_; - db_ = nullptr; - } - - Status TryOpen(std::vector cf, - std::vector options = {}) { - std::vector column_families; - names_.clear(); - for (size_t i = 0; i < cf.size(); ++i) { - column_families.emplace_back( - cf[i], options.size() == 0 ? column_family_options_ : options[i]); - names_.push_back(cf[i]); - } - return DB::Open(db_options_, dbname_, column_families, &handles_, &db_); - } - - Status OpenReadOnly(std::vector cf, - std::vector options = {}) { - std::vector column_families; - names_.clear(); - for (size_t i = 0; i < cf.size(); ++i) { - column_families.emplace_back( - cf[i], options.size() == 0 ? column_family_options_ : options[i]); - names_.push_back(cf[i]); - } - return DB::OpenForReadOnly(db_options_, dbname_, column_families, &handles_, - &db_); - } - - void AssertOpenReadOnly(std::vector cf, - std::vector options = {}) { - ASSERT_OK(OpenReadOnly(cf, options)); - } - - void Open(std::vector cf, - std::vector options = {}) { - ASSERT_OK(TryOpen(cf, options)); - } - - void Open() { Open({"default"}); } - - DBImpl* dbfull() { return static_cast_with_check(db_); } - - int GetProperty(int cf, std::string property) { - std::string value; - EXPECT_TRUE(dbfull()->GetProperty(handles_[cf], property, &value)); -#ifndef CYGWIN - return std::stoi(value); -#else - return std::strtol(value.c_str(), 0 /* off */, 10 /* base */); -#endif - } - - bool IsDbWriteStopped() { - uint64_t v; - EXPECT_TRUE(dbfull()->GetIntProperty("rocksdb.is-write-stopped", &v)); - return (v == 1); - } - - uint64_t GetDbDelayedWriteRate() { - uint64_t v; - EXPECT_TRUE( - dbfull()->GetIntProperty("rocksdb.actual-delayed-write-rate", &v)); - return v; - } - - void Destroy(const std::vector& column_families = - std::vector()) { - Close(); - ASSERT_OK(DestroyDB(dbname_, Options(db_options_, column_family_options_), - column_families)); - } - - void CreateColumnFamilies( - const std::vector& cfs, - const std::vector options = {}) { - int cfi = static_cast(handles_.size()); - handles_.resize(cfi + cfs.size()); - names_.resize(cfi + cfs.size()); - for (size_t i = 0; i < cfs.size(); ++i) { - const auto& current_cf_opt = - options.size() == 0 ? column_family_options_ : options[i]; - ASSERT_OK( - db_->CreateColumnFamily(current_cf_opt, cfs[i], &handles_[cfi])); - names_[cfi] = cfs[i]; - - // Verify the CF options of the returned CF handle. - ColumnFamilyDescriptor desc; - ASSERT_OK(handles_[cfi]->GetDescriptor(&desc)); - // Need to sanitize the default column family options before comparing - // them. - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( - ConfigOptions(), desc.options, - SanitizeOptions(dbfull()->immutable_db_options(), current_cf_opt))); - cfi++; - } - } - - void Reopen(const std::vector options = {}) { - std::vector names; - for (auto name : names_) { - if (name != "") { - names.push_back(name); - } - } - Close(); - assert(options.size() == 0 || names.size() == options.size()); - Open(names, options); - } - - void CreateColumnFamiliesAndReopen(const std::vector& cfs) { - CreateColumnFamilies(cfs); - Reopen(); - } - - void DropColumnFamilies(const std::vector& cfs) { - for (auto cf : cfs) { - ASSERT_OK(db_->DropColumnFamily(handles_[cf])); - ASSERT_OK(db_->DestroyColumnFamilyHandle(handles_[cf])); - handles_[cf] = nullptr; - names_[cf] = ""; - } - } - - void PutRandomData(int cf, int num, int key_value_size, bool save = false) { - if (cf >= static_cast(keys_.size())) { - keys_.resize(cf + 1); - } - for (int i = 0; i < num; ++i) { - // 10 bytes for key, rest is value - if (!save) { - ASSERT_OK(Put(cf, test::RandomKey(&rnd_, 11), - rnd_.RandomString(key_value_size - 10))); - } else { - std::string key = test::RandomKey(&rnd_, 11); - keys_[cf].insert(key); - ASSERT_OK(Put(cf, key, rnd_.RandomString(key_value_size - 10))); - } - } - ASSERT_OK(db_->FlushWAL(/*sync=*/false)); - } - - void WaitForFlush(int cf) { - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf])); - } - - void WaitForCompaction() { ASSERT_OK(dbfull()->TEST_WaitForCompact()); } - - uint64_t MaxTotalInMemoryState() { - return dbfull()->TEST_MaxTotalInMemoryState(); - } - - void AssertMaxTotalInMemoryState(uint64_t value) { - ASSERT_EQ(value, MaxTotalInMemoryState()); - } - - Status Put(int cf, const std::string& key, const std::string& value) { - return db_->Put(WriteOptions(), handles_[cf], Slice(key), Slice(value)); - } - Status Merge(int cf, const std::string& key, const std::string& value) { - return db_->Merge(WriteOptions(), handles_[cf], Slice(key), Slice(value)); - } - Status Flush(int cf) { return db_->Flush(FlushOptions(), handles_[cf]); } - - std::string Get(int cf, const std::string& key) { - ReadOptions options; - options.verify_checksums = true; - std::string result; - Status s = db_->Get(options, handles_[cf], Slice(key), &result); - if (s.IsNotFound()) { - result = "NOT_FOUND"; - } else if (!s.ok()) { - result = s.ToString(); - } - return result; - } - - void CompactAll(int cf) { - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), handles_[cf], nullptr, - nullptr)); - } - - void Compact(int cf, const Slice& start, const Slice& limit) { - ASSERT_OK( - db_->CompactRange(CompactRangeOptions(), handles_[cf], &start, &limit)); - } - - int NumTableFilesAtLevel(int level, int cf) { - return GetProperty(cf, - "rocksdb.num-files-at-level" + std::to_string(level)); - } - - // Return spread of files per level - std::string FilesPerLevel(int cf) { - std::string result; - int last_non_zero_offset = 0; - for (int level = 0; level < dbfull()->NumberLevels(handles_[cf]); level++) { - int f = NumTableFilesAtLevel(level, cf); - char buf[100]; - snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f); - result += buf; - if (f > 0) { - last_non_zero_offset = static_cast(result.size()); - } - } - result.resize(last_non_zero_offset); - return result; - } - - void AssertFilesPerLevel(const std::string& value, int cf) { - ASSERT_EQ(value, FilesPerLevel(cf)); - } - - int CountLiveFiles() { - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - return static_cast(metadata.size()); - } - - void AssertCountLiveFiles(int expected_value) { - ASSERT_EQ(expected_value, CountLiveFiles()); - } - - // Do n memtable flushes, each of which produces an sstable - // covering the range [small,large]. - void MakeTables(int cf, int n, const std::string& small, - const std::string& large) { - for (int i = 0; i < n; i++) { - ASSERT_OK(Put(cf, small, "begin")); - ASSERT_OK(Put(cf, large, "end")); - ASSERT_OK(db_->Flush(FlushOptions(), handles_[cf])); - } - } - - int CountLiveLogFiles() { - int micros_wait_for_log_deletion = 20000; - env_->SleepForMicroseconds(micros_wait_for_log_deletion); - int ret = 0; - VectorLogPtr wal_files; - Status s; - // GetSortedWalFiles is a flakey function -- it gets all the wal_dir - // children files and then later checks for their existence. if some of the - // log files doesn't exist anymore, it reports an error. it does all of this - // without DB mutex held, so if a background process deletes the log file - // while the function is being executed, it returns an error. We retry the - // function 10 times to avoid the error failing the test - for (int retries = 0; retries < 10; ++retries) { - wal_files.clear(); - s = db_->GetSortedWalFiles(wal_files); - if (s.ok()) { - break; - } - } - EXPECT_OK(s); - for (const auto& wal : wal_files) { - if (wal->Type() == kAliveLogFile) { - ++ret; - } - } - return ret; - return 0; - } - - void AssertCountLiveLogFiles(int value) { - ASSERT_EQ(value, CountLiveLogFiles()); - } - - void AssertNumberOfImmutableMemtables(std::vector num_per_cf) { - assert(num_per_cf.size() == handles_.size()); - - for (size_t i = 0; i < num_per_cf.size(); ++i) { - ASSERT_EQ(num_per_cf[i], GetProperty(static_cast(i), - "rocksdb.num-immutable-mem-table")); - } - } - - void CopyFile(const std::string& source, const std::string& destination, - uint64_t size = 0) { - const EnvOptions soptions; - std::unique_ptr srcfile; - ASSERT_OK(env_->NewSequentialFile(source, &srcfile, soptions)); - std::unique_ptr destfile; - ASSERT_OK(env_->NewWritableFile(destination, &destfile, soptions)); - - if (size == 0) { - // default argument means copy everything - ASSERT_OK(env_->GetFileSize(source, &size)); - } - - char buffer[4096]; - Slice slice; - while (size > 0) { - uint64_t one = std::min(uint64_t(sizeof(buffer)), size); - ASSERT_OK(srcfile->Read(one, &slice, buffer)); - ASSERT_OK(destfile->Append(slice)); - size -= slice.size(); - } - ASSERT_OK(destfile->Close()); - } - - int GetSstFileCount(std::string path) { - std::vector files; - DBTestBase::GetSstFiles(env_, path, &files); - return static_cast(files.size()); - } - - void RecalculateWriteStallConditions( - ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options) { - // add lock to avoid race condition between - // `RecalculateWriteStallConditions` which writes to CFStats and - // background `DBImpl::DumpStats()` threads which read CFStats - dbfull()->TEST_LockMutex(); - cfd->RecalculateWriteStallConditions(mutable_cf_options); - dbfull()->TEST_UnlockMutex(); - } - - std::vector handles_; - std::vector names_; - std::vector> keys_; - ColumnFamilyOptions column_family_options_; - DBOptions db_options_; - std::string dbname_; - DB* db_ = nullptr; - EnvCounter* env_; - std::shared_ptr env_guard_; - Random rnd_; - uint32_t format_; -}; - -class ColumnFamilyTest - : public ColumnFamilyTestBase, - virtual public ::testing::WithParamInterface { - public: - ColumnFamilyTest() : ColumnFamilyTestBase(GetParam()) {} -}; - -INSTANTIATE_TEST_CASE_P(FormatDef, ColumnFamilyTest, - testing::Values(test::kDefaultFormatVersion)); -INSTANTIATE_TEST_CASE_P(FormatLatest, ColumnFamilyTest, - testing::Values(kLatestFormatVersion)); - -TEST_P(ColumnFamilyTest, DontReuseColumnFamilyID) { - for (int iter = 0; iter < 3; ++iter) { - Open(); - CreateColumnFamilies({"one", "two", "three"}); - for (size_t i = 0; i < handles_.size(); ++i) { - auto cfh = static_cast_with_check(handles_[i]); - ASSERT_EQ(i, cfh->GetID()); - } - if (iter == 1) { - Reopen(); - } - DropColumnFamilies({3}); - Reopen(); - if (iter == 2) { - // this tests if max_column_family is correctly persisted with - // WriteSnapshot() - Reopen(); - } - CreateColumnFamilies({"three2"}); - // ID 3 that was used for dropped column family "three" should not be - // reused - auto cfh3 = static_cast_with_check(handles_[3]); - ASSERT_EQ(4U, cfh3->GetID()); - Close(); - Destroy(); - } -} - -TEST_P(ColumnFamilyTest, CreateCFRaceWithGetAggProperty) { - Open(); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::WriteOptionsFile:1", - "ColumnFamilyTest.CreateCFRaceWithGetAggProperty:1"}, - {"ColumnFamilyTest.CreateCFRaceWithGetAggProperty:2", - "DBImpl::WriteOptionsFile:2"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ROCKSDB_NAMESPACE::port::Thread thread( - [&] { CreateColumnFamilies({"one"}); }); - - TEST_SYNC_POINT("ColumnFamilyTest.CreateCFRaceWithGetAggProperty:1"); - uint64_t pv; - db_->GetAggregatedIntProperty(DB::Properties::kEstimateTableReadersMem, &pv); - TEST_SYNC_POINT("ColumnFamilyTest.CreateCFRaceWithGetAggProperty:2"); - - thread.join(); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -class FlushEmptyCFTestWithParam - : public ColumnFamilyTestBase, - virtual public testing::WithParamInterface> { - public: - FlushEmptyCFTestWithParam() - : ColumnFamilyTestBase(std::get<0>(GetParam())), - allow_2pc_(std::get<1>(GetParam())) {} - - // Required if inheriting from testing::WithParamInterface<> - static void SetUpTestCase() {} - static void TearDownTestCase() {} - - bool allow_2pc_; -}; - -TEST_P(FlushEmptyCFTestWithParam, FlushEmptyCFTest) { - std::unique_ptr fault_env( - new FaultInjectionTestEnv(env_)); - db_options_.env = fault_env.get(); - db_options_.allow_2pc = allow_2pc_; - Open(); - CreateColumnFamilies({"one", "two"}); - // Generate log file A. - ASSERT_OK(Put(1, "foo", "v1")); // seqID 1 - - Reopen(); - // Log file A is not dropped after reopening because default column family's - // min log number is 0. - // It flushes to SST file X - ASSERT_OK(Put(1, "foo", "v1")); // seqID 2 - ASSERT_OK(Put(1, "bar", "v2")); // seqID 3 - // Current log file is file B now. While flushing, a new log file C is created - // and is set to current. Boths' min log number is set to file C in memory, so - // after flushing file B is deleted. At the same time, the min log number of - // default CF is not written to manifest. Log file A still remains. - // Flushed to SST file Y. - ASSERT_OK(Flush(1)); - ASSERT_OK(Flush(0)); - ASSERT_OK(Put(1, "bar", "v3")); // seqID 4 - ASSERT_OK(Put(1, "foo", "v4")); // seqID 5 - ASSERT_OK(db_->FlushWAL(/*sync=*/false)); - - // Preserve file system state up to here to simulate a crash condition. - fault_env->SetFilesystemActive(false); - std::vector names; - for (auto name : names_) { - if (name != "") { - names.push_back(name); - } - } - - Close(); - fault_env->ResetState(); - - // Before opening, there are four files: - // Log file A contains seqID 1 - // Log file C contains seqID 4, 5 - // SST file X contains seqID 1 - // SST file Y contains seqID 2, 3 - // Min log number: - // default CF: 0 - // CF one, two: C - // When opening the DB, all the seqID should be preserved. - Open(names, {}); - ASSERT_EQ("v4", Get(1, "foo")); - ASSERT_EQ("v3", Get(1, "bar")); - Close(); - - db_options_.env = env_; -} - -TEST_P(FlushEmptyCFTestWithParam, FlushEmptyCFTest2) { - std::unique_ptr fault_env( - new FaultInjectionTestEnv(env_)); - db_options_.env = fault_env.get(); - db_options_.allow_2pc = allow_2pc_; - Open(); - CreateColumnFamilies({"one", "two"}); - // Generate log file A. - ASSERT_OK(Put(1, "foo", "v1")); // seqID 1 - - Reopen(); - // Log file A is not dropped after reopening because default column family's - // min log number is 0. - // It flushes to SST file X - ASSERT_OK(Put(1, "foo", "v1")); // seqID 2 - ASSERT_OK(Put(1, "bar", "v2")); // seqID 3 - // Current log file is file B now. While flushing, a new log file C is created - // and is set to current. Both CFs' min log number is set to file C so after - // flushing file B is deleted. Log file A still remains. - // Flushed to SST file Y. - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(0, "bar", "v2")); // seqID 4 - ASSERT_OK(Put(2, "bar", "v2")); // seqID 5 - ASSERT_OK(Put(1, "bar", "v3")); // seqID 6 - // Flushing all column families. This forces all CFs' min log to current. This - // is written to the manifest file. Log file C is cleared. - ASSERT_OK(Flush(0)); - ASSERT_OK(Flush(1)); - ASSERT_OK(Flush(2)); - // Write to log file D - ASSERT_OK(Put(1, "bar", "v4")); // seqID 7 - ASSERT_OK(Put(1, "bar", "v5")); // seqID 8 - ASSERT_OK(db_->FlushWAL(/*sync=*/false)); - // Preserve file system state up to here to simulate a crash condition. - fault_env->SetFilesystemActive(false); - std::vector names; - for (auto name : names_) { - if (name != "") { - names.push_back(name); - } - } - - Close(); - fault_env->ResetState(); - // Before opening, there are two logfiles: - // Log file A contains seqID 1 - // Log file D contains seqID 7, 8 - // Min log number: - // default CF: D - // CF one, two: D - // When opening the DB, log file D should be replayed using the seqID - // specified in the file. - Open(names, {}); - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_EQ("v5", Get(1, "bar")); - Close(); - - db_options_.env = env_; -} - -INSTANTIATE_TEST_CASE_P( - FormatDef, FlushEmptyCFTestWithParam, - testing::Values(std::make_tuple(test::kDefaultFormatVersion, true), - std::make_tuple(test::kDefaultFormatVersion, false))); -INSTANTIATE_TEST_CASE_P( - FormatLatest, FlushEmptyCFTestWithParam, - testing::Values(std::make_tuple(kLatestFormatVersion, true), - std::make_tuple(kLatestFormatVersion, false))); - -TEST_P(ColumnFamilyTest, AddDrop) { - Open(); - CreateColumnFamilies({"one", "two", "three"}); - ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); - ASSERT_EQ("NOT_FOUND", Get(2, "fodor")); - DropColumnFamilies({2}); - ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); - CreateColumnFamilies({"four"}); - ASSERT_EQ("NOT_FOUND", Get(3, "fodor")); - ASSERT_OK(Put(1, "fodor", "mirko")); - ASSERT_EQ("mirko", Get(1, "fodor")); - ASSERT_EQ("NOT_FOUND", Get(3, "fodor")); - Close(); - ASSERT_TRUE(TryOpen({"default"}).IsInvalidArgument()); - Open({"default", "one", "three", "four"}); - DropColumnFamilies({1}); - Reopen(); - Close(); - - std::vector families; - ASSERT_OK(DB::ListColumnFamilies(db_options_, dbname_, &families)); - std::sort(families.begin(), families.end()); - ASSERT_TRUE(families == - std::vector({"default", "four", "three"})); -} - -TEST_P(ColumnFamilyTest, BulkAddDrop) { - constexpr int kNumCF = 1000; - ColumnFamilyOptions cf_options; - WriteOptions write_options; - Open(); - std::vector cf_names; - std::vector cf_handles; - for (int i = 1; i <= kNumCF; i++) { - cf_names.push_back("cf1-" + std::to_string(i)); - } - ASSERT_OK(db_->CreateColumnFamilies(cf_options, cf_names, &cf_handles)); - for (int i = 1; i <= kNumCF; i++) { - ASSERT_OK(db_->Put(write_options, cf_handles[i - 1], "foo", "bar")); - } - ASSERT_OK(db_->DropColumnFamilies(cf_handles)); - std::vector cf_descriptors; - for (auto* handle : cf_handles) { - delete handle; - } - cf_handles.clear(); - for (int i = 1; i <= kNumCF; i++) { - cf_descriptors.emplace_back("cf2-" + std::to_string(i), - ColumnFamilyOptions()); - } - ASSERT_OK(db_->CreateColumnFamilies(cf_descriptors, &cf_handles)); - for (int i = 1; i <= kNumCF; i++) { - ASSERT_OK(db_->Put(write_options, cf_handles[i - 1], "foo", "bar")); - } - ASSERT_OK(db_->DropColumnFamilies(cf_handles)); - for (auto* handle : cf_handles) { - delete handle; - } - Close(); - std::vector families; - ASSERT_OK(DB::ListColumnFamilies(db_options_, dbname_, &families)); - std::sort(families.begin(), families.end()); - ASSERT_TRUE(families == std::vector({"default"})); -} - -TEST_P(ColumnFamilyTest, DropTest) { - // first iteration - don't reopen DB before dropping - // second iteration - reopen DB before dropping - for (int iter = 0; iter < 2; ++iter) { - Open({"default"}); - CreateColumnFamiliesAndReopen({"pikachu"}); - for (int i = 0; i < 100; ++i) { - ASSERT_OK(Put(1, std::to_string(i), "bar" + std::to_string(i))); - } - ASSERT_OK(Flush(1)); - - if (iter == 1) { - Reopen(); - } - ASSERT_EQ("bar1", Get(1, "1")); - - AssertCountLiveFiles(1); - DropColumnFamilies({1}); - // make sure that all files are deleted when we drop the column family - AssertCountLiveFiles(0); - Destroy(); - } -} - -TEST_P(ColumnFamilyTest, WriteBatchFailure) { - Open(); - CreateColumnFamiliesAndReopen({"one", "two"}); - WriteBatch batch; - ASSERT_OK(batch.Put(handles_[0], Slice("existing"), Slice("column-family"))); - ASSERT_OK( - batch.Put(handles_[1], Slice("non-existing"), Slice("column-family"))); - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - DropColumnFamilies({1}); - WriteOptions woptions_ignore_missing_cf; - woptions_ignore_missing_cf.ignore_missing_column_families = true; - ASSERT_OK( - batch.Put(handles_[0], Slice("still here"), Slice("column-family"))); - ASSERT_OK(db_->Write(woptions_ignore_missing_cf, &batch)); - ASSERT_EQ("column-family", Get(0, "still here")); - Status s = db_->Write(WriteOptions(), &batch); - ASSERT_TRUE(s.IsInvalidArgument()); - Close(); -} - -TEST_P(ColumnFamilyTest, ReadWrite) { - Open(); - CreateColumnFamiliesAndReopen({"one", "two"}); - ASSERT_OK(Put(0, "foo", "v1")); - ASSERT_OK(Put(0, "bar", "v2")); - ASSERT_OK(Put(1, "mirko", "v3")); - ASSERT_OK(Put(0, "foo", "v2")); - ASSERT_OK(Put(2, "fodor", "v5")); - - for (int iter = 0; iter <= 3; ++iter) { - ASSERT_EQ("v2", Get(0, "foo")); - ASSERT_EQ("v2", Get(0, "bar")); - ASSERT_EQ("v3", Get(1, "mirko")); - ASSERT_EQ("v5", Get(2, "fodor")); - ASSERT_EQ("NOT_FOUND", Get(0, "fodor")); - ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); - ASSERT_EQ("NOT_FOUND", Get(2, "foo")); - if (iter <= 1) { - Reopen(); - } - } - Close(); -} - -TEST_P(ColumnFamilyTest, IgnoreRecoveredLog) { - std::string backup_logs = dbname_ + "/backup_logs"; - - // delete old files in backup_logs directory - ASSERT_OK(env_->CreateDirIfMissing(dbname_)); - ASSERT_OK(env_->CreateDirIfMissing(backup_logs)); - std::vector old_files; - ASSERT_OK(env_->GetChildren(backup_logs, &old_files)); - for (auto& file : old_files) { - ASSERT_OK(env_->DeleteFile(backup_logs + "/" + file)); - } - - column_family_options_.merge_operator = - MergeOperators::CreateUInt64AddOperator(); - db_options_.wal_dir = dbname_ + "/logs"; - Destroy(); - Open(); - CreateColumnFamilies({"cf1", "cf2"}); - - // fill up the DB - std::string one, two, three; - PutFixed64(&one, 1); - PutFixed64(&two, 2); - PutFixed64(&three, 3); - ASSERT_OK(Merge(0, "foo", one)); - ASSERT_OK(Merge(1, "mirko", one)); - ASSERT_OK(Merge(0, "foo", one)); - ASSERT_OK(Merge(2, "bla", one)); - ASSERT_OK(Merge(2, "fodor", one)); - ASSERT_OK(Merge(0, "bar", one)); - ASSERT_OK(Merge(2, "bla", one)); - ASSERT_OK(Merge(1, "mirko", two)); - ASSERT_OK(Merge(1, "franjo", one)); - - // copy the logs to backup - std::vector logs; - ASSERT_OK(env_->GetChildren(db_options_.wal_dir, &logs)); - for (auto& log : logs) { - CopyFile(db_options_.wal_dir + "/" + log, backup_logs + "/" + log); - } - - // recover the DB - Close(); - - // 1. check consistency - // 2. copy the logs from backup back to WAL dir. if the recovery happens - // again on the same log files, this should lead to incorrect results - // due to applying merge operator twice - // 3. check consistency - for (int iter = 0; iter < 2; ++iter) { - // assert consistency - Open({"default", "cf1", "cf2"}); - ASSERT_EQ(two, Get(0, "foo")); - ASSERT_EQ(one, Get(0, "bar")); - ASSERT_EQ(three, Get(1, "mirko")); - ASSERT_EQ(one, Get(1, "franjo")); - ASSERT_EQ(one, Get(2, "fodor")); - ASSERT_EQ(two, Get(2, "bla")); - Close(); - - if (iter == 0) { - // copy the logs from backup back to wal dir - for (auto& log : logs) { - CopyFile(backup_logs + "/" + log, db_options_.wal_dir + "/" + log); - } - } - } -} - -TEST_P(ColumnFamilyTest, FlushTest) { - Open(); - CreateColumnFamiliesAndReopen({"one", "two"}); - ASSERT_OK(Put(0, "foo", "v1")); - ASSERT_OK(Put(0, "bar", "v2")); - ASSERT_OK(Put(1, "mirko", "v3")); - ASSERT_OK(Put(0, "foo", "v2")); - ASSERT_OK(Put(2, "fodor", "v5")); - - for (int j = 0; j < 2; j++) { - ReadOptions ro; - std::vector iterators; - // Hold super version. - if (j == 0) { - ASSERT_OK(db_->NewIterators(ro, handles_, &iterators)); - } - - for (int i = 0; i < 3; ++i) { - uint64_t max_total_in_memory_state = MaxTotalInMemoryState(); - ASSERT_OK(Flush(i)); - AssertMaxTotalInMemoryState(max_total_in_memory_state); - } - ASSERT_OK(Put(1, "foofoo", "bar")); - ASSERT_OK(Put(0, "foofoo", "bar")); - - for (auto* it : iterators) { - ASSERT_OK(it->status()); - delete it; - } - } - Reopen(); - - for (int iter = 0; iter <= 2; ++iter) { - ASSERT_EQ("v2", Get(0, "foo")); - ASSERT_EQ("v2", Get(0, "bar")); - ASSERT_EQ("v3", Get(1, "mirko")); - ASSERT_EQ("v5", Get(2, "fodor")); - ASSERT_EQ("NOT_FOUND", Get(0, "fodor")); - ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); - ASSERT_EQ("NOT_FOUND", Get(2, "foo")); - if (iter <= 1) { - Reopen(); - } - } - Close(); -} - -// Makes sure that obsolete log files get deleted -TEST_P(ColumnFamilyTest, LogDeletionTest) { - db_options_.max_total_wal_size = std::numeric_limits::max(); - column_family_options_.arena_block_size = 4 * 1024; - column_family_options_.write_buffer_size = 128000; // 128KB - Open(); - CreateColumnFamilies({"one", "two", "three", "four"}); - // Each bracket is one log file. if number is in (), it means - // we don't need it anymore (it's been flushed) - // [] - AssertCountLiveLogFiles(0); - PutRandomData(0, 1, 128); - // [0] - PutRandomData(1, 1, 128); - // [0, 1] - PutRandomData(1, 1000, 128); - WaitForFlush(1); - // [0, (1)] [1] - AssertCountLiveLogFiles(2); - PutRandomData(0, 1, 128); - // [0, (1)] [0, 1] - AssertCountLiveLogFiles(2); - PutRandomData(2, 1, 128); - // [0, (1)] [0, 1, 2] - PutRandomData(2, 1000, 128); - WaitForFlush(2); - // [0, (1)] [0, 1, (2)] [2] - AssertCountLiveLogFiles(3); - PutRandomData(2, 1000, 128); - WaitForFlush(2); - // [0, (1)] [0, 1, (2)] [(2)] [2] - AssertCountLiveLogFiles(4); - PutRandomData(3, 1, 128); - // [0, (1)] [0, 1, (2)] [(2)] [2, 3] - PutRandomData(1, 1, 128); - // [0, (1)] [0, 1, (2)] [(2)] [1, 2, 3] - AssertCountLiveLogFiles(4); - PutRandomData(1, 1000, 128); - WaitForFlush(1); - // [0, (1)] [0, (1), (2)] [(2)] [(1), 2, 3] [1] - AssertCountLiveLogFiles(5); - PutRandomData(0, 1000, 128); - WaitForFlush(0); - // [(0), (1)] [(0), (1), (2)] [(2)] [(1), 2, 3] [1, (0)] [0] - // delete obsolete logs --> - // [(1), 2, 3] [1, (0)] [0] - AssertCountLiveLogFiles(3); - PutRandomData(0, 1000, 128); - WaitForFlush(0); - // [(1), 2, 3] [1, (0)], [(0)] [0] - AssertCountLiveLogFiles(4); - PutRandomData(1, 1000, 128); - WaitForFlush(1); - // [(1), 2, 3] [(1), (0)] [(0)] [0, (1)] [1] - AssertCountLiveLogFiles(5); - PutRandomData(2, 1000, 128); - WaitForFlush(2); - // [(1), (2), 3] [(1), (0)] [(0)] [0, (1)] [1, (2)], [2] - AssertCountLiveLogFiles(6); - PutRandomData(3, 1000, 128); - WaitForFlush(3); - // [(1), (2), (3)] [(1), (0)] [(0)] [0, (1)] [1, (2)], [2, (3)] [3] - // delete obsolete logs --> - // [0, (1)] [1, (2)], [2, (3)] [3] - AssertCountLiveLogFiles(4); - Close(); -} - -TEST_P(ColumnFamilyTest, CrashAfterFlush) { - std::unique_ptr fault_env( - new FaultInjectionTestEnv(env_)); - db_options_.env = fault_env.get(); - Open(); - CreateColumnFamilies({"one"}); - - WriteBatch batch; - ASSERT_OK(batch.Put(handles_[0], Slice("foo"), Slice("bar"))); - ASSERT_OK(batch.Put(handles_[1], Slice("foo"), Slice("bar"))); - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - ASSERT_OK(Flush(0)); - fault_env->SetFilesystemActive(false); - - std::vector names; - for (auto name : names_) { - if (name != "") { - names.push_back(name); - } - } - Close(); - ASSERT_OK(fault_env->DropUnsyncedFileData()); - fault_env->ResetState(); - Open(names, {}); - - // Write batch should be atomic. - ASSERT_EQ(Get(0, "foo"), Get(1, "foo")); - - Close(); - db_options_.env = env_; -} - -TEST_P(ColumnFamilyTest, OpenNonexistentColumnFamily) { - ASSERT_OK(TryOpen({"default"})); - Close(); - ASSERT_TRUE(TryOpen({"default", "dne"}).IsInvalidArgument()); -} - -// Makes sure that obsolete log files get deleted -TEST_P(ColumnFamilyTest, DifferentWriteBufferSizes) { - // disable flushing stale column families - db_options_.max_total_wal_size = std::numeric_limits::max(); - Open(); - CreateColumnFamilies({"one", "two", "three"}); - ColumnFamilyOptions default_cf, one, two, three; - // setup options. all column families have max_write_buffer_number setup to 10 - // "default" -> 100KB memtable, start flushing immediately - // "one" -> 200KB memtable, start flushing with two immutable memtables - // "two" -> 1MB memtable, start flushing with three immutable memtables - // "three" -> 90KB memtable, start flushing with four immutable memtables - default_cf.write_buffer_size = 100000; - default_cf.arena_block_size = 4 * 4096; - default_cf.max_write_buffer_number = 10; - default_cf.min_write_buffer_number_to_merge = 1; - default_cf.max_write_buffer_size_to_maintain = 0; - one.write_buffer_size = 200000; - one.arena_block_size = 4 * 4096; - one.max_write_buffer_number = 10; - one.min_write_buffer_number_to_merge = 2; - one.max_write_buffer_size_to_maintain = - static_cast(one.write_buffer_size); - two.write_buffer_size = 1000000; - two.arena_block_size = 4 * 4096; - two.max_write_buffer_number = 10; - two.min_write_buffer_number_to_merge = 3; - two.max_write_buffer_size_to_maintain = - static_cast(two.write_buffer_size); - three.write_buffer_size = 4096 * 22; - three.arena_block_size = 4096; - three.max_write_buffer_number = 10; - three.min_write_buffer_number_to_merge = 4; - three.max_write_buffer_size_to_maintain = - static_cast(three.write_buffer_size); - - Reopen({default_cf, one, two, three}); - - int micros_wait_for_flush = 10000; - PutRandomData(0, 100, 1000); - WaitForFlush(0); - AssertNumberOfImmutableMemtables({0, 0, 0, 0}); - AssertCountLiveLogFiles(1); - PutRandomData(1, 200, 1000); - env_->SleepForMicroseconds(micros_wait_for_flush); - AssertNumberOfImmutableMemtables({0, 1, 0, 0}); - AssertCountLiveLogFiles(2); - PutRandomData(2, 1000, 1000); - env_->SleepForMicroseconds(micros_wait_for_flush); - AssertNumberOfImmutableMemtables({0, 1, 1, 0}); - AssertCountLiveLogFiles(3); - PutRandomData(2, 1000, 1000); - env_->SleepForMicroseconds(micros_wait_for_flush); - AssertNumberOfImmutableMemtables({0, 1, 2, 0}); - AssertCountLiveLogFiles(4); - PutRandomData(3, 93, 990); - env_->SleepForMicroseconds(micros_wait_for_flush); - AssertNumberOfImmutableMemtables({0, 1, 2, 1}); - AssertCountLiveLogFiles(5); - PutRandomData(3, 88, 990); - env_->SleepForMicroseconds(micros_wait_for_flush); - AssertNumberOfImmutableMemtables({0, 1, 2, 2}); - AssertCountLiveLogFiles(6); - PutRandomData(3, 88, 990); - env_->SleepForMicroseconds(micros_wait_for_flush); - AssertNumberOfImmutableMemtables({0, 1, 2, 3}); - AssertCountLiveLogFiles(7); - PutRandomData(0, 100, 1000); - WaitForFlush(0); - AssertNumberOfImmutableMemtables({0, 1, 2, 3}); - AssertCountLiveLogFiles(8); - PutRandomData(2, 100, 10000); - WaitForFlush(2); - AssertNumberOfImmutableMemtables({0, 1, 0, 3}); - AssertCountLiveLogFiles(9); - PutRandomData(3, 88, 990); - WaitForFlush(3); - AssertNumberOfImmutableMemtables({0, 1, 0, 0}); - AssertCountLiveLogFiles(10); - PutRandomData(3, 88, 990); - env_->SleepForMicroseconds(micros_wait_for_flush); - AssertNumberOfImmutableMemtables({0, 1, 0, 1}); - AssertCountLiveLogFiles(11); - PutRandomData(1, 200, 1000); - WaitForFlush(1); - AssertNumberOfImmutableMemtables({0, 0, 0, 1}); - AssertCountLiveLogFiles(5); - PutRandomData(3, 88 * 3, 990); - WaitForFlush(3); - PutRandomData(3, 88 * 4, 990); - WaitForFlush(3); - AssertNumberOfImmutableMemtables({0, 0, 0, 0}); - AssertCountLiveLogFiles(12); - PutRandomData(0, 100, 1000); - WaitForFlush(0); - AssertNumberOfImmutableMemtables({0, 0, 0, 0}); - AssertCountLiveLogFiles(12); - PutRandomData(2, 3 * 1000, 1000); - WaitForFlush(2); - AssertNumberOfImmutableMemtables({0, 0, 0, 0}); - AssertCountLiveLogFiles(12); - PutRandomData(1, 2 * 200, 1000); - WaitForFlush(1); - AssertNumberOfImmutableMemtables({0, 0, 0, 0}); - AssertCountLiveLogFiles(7); - Close(); -} - -// The test is commented out because we want to test that snapshot is -// not created for memtables not supported it, but There isn't a memtable -// that doesn't support snapshot right now. If we have one later, we can -// re-enable the test. -// -// TEST_P(ColumnFamilyTest, MemtableNotSupportSnapshot) { -// db_options_.allow_concurrent_memtable_write = false; -// Open(); -// auto* s1 = dbfull()->GetSnapshot(); -// ASSERT_TRUE(s1 != nullptr); -// dbfull()->ReleaseSnapshot(s1); - -// // Add a column family that doesn't support snapshot -// ColumnFamilyOptions first; -// first.memtable_factory.reset(new DummyMemtableNotSupportingSnapshot()); -// CreateColumnFamilies({"first"}, {first}); -// auto* s2 = dbfull()->GetSnapshot(); -// ASSERT_TRUE(s2 == nullptr); - -// // Add a column family that supports snapshot. Snapshot stays not -// supported. ColumnFamilyOptions second; CreateColumnFamilies({"second"}, -// {second}); auto* s3 = dbfull()->GetSnapshot(); ASSERT_TRUE(s3 == nullptr); -// Close(); -// } - -class TestComparator : public Comparator { - int Compare(const ROCKSDB_NAMESPACE::Slice& /*a*/, - const ROCKSDB_NAMESPACE::Slice& /*b*/) const override { - return 0; - } - const char* Name() const override { return "Test"; } - void FindShortestSeparator( - std::string* /*start*/, - const ROCKSDB_NAMESPACE::Slice& /*limit*/) const override {} - void FindShortSuccessor(std::string* /*key*/) const override {} -}; - -static TestComparator third_comparator; -static TestComparator fourth_comparator; - -// Test that we can retrieve the comparator from a created CF -TEST_P(ColumnFamilyTest, GetComparator) { - Open(); - // Add a column family with no comparator specified - CreateColumnFamilies({"first"}); - const Comparator* comp = handles_[0]->GetComparator(); - ASSERT_EQ(comp, BytewiseComparator()); - - // Add three column families - one with no comparator and two - // with comparators specified - ColumnFamilyOptions second, third, fourth; - second.comparator = &third_comparator; - third.comparator = &fourth_comparator; - CreateColumnFamilies({"second", "third", "fourth"}, {second, third, fourth}); - ASSERT_EQ(handles_[1]->GetComparator(), BytewiseComparator()); - ASSERT_EQ(handles_[2]->GetComparator(), &third_comparator); - ASSERT_EQ(handles_[3]->GetComparator(), &fourth_comparator); - Close(); -} - -TEST_P(ColumnFamilyTest, DifferentMergeOperators) { - Open(); - CreateColumnFamilies({"first", "second"}); - ColumnFamilyOptions default_cf, first, second; - first.merge_operator = MergeOperators::CreateUInt64AddOperator(); - second.merge_operator = MergeOperators::CreateStringAppendOperator(); - Reopen({default_cf, first, second}); - - std::string one, two, three; - PutFixed64(&one, 1); - PutFixed64(&two, 2); - PutFixed64(&three, 3); - - ASSERT_OK(Put(0, "foo", two)); - ASSERT_OK(Put(0, "foo", one)); - ASSERT_TRUE(Merge(0, "foo", two).IsNotSupported()); - ASSERT_EQ(Get(0, "foo"), one); - - ASSERT_OK(Put(1, "foo", two)); - ASSERT_OK(Put(1, "foo", one)); - ASSERT_OK(Merge(1, "foo", two)); - ASSERT_EQ(Get(1, "foo"), three); - - ASSERT_OK(Put(2, "foo", two)); - ASSERT_OK(Put(2, "foo", one)); - ASSERT_OK(Merge(2, "foo", two)); - ASSERT_EQ(Get(2, "foo"), one + "," + two); - Close(); -} - -TEST_P(ColumnFamilyTest, DifferentCompactionStyles) { - Open(); - CreateColumnFamilies({"one", "two"}); - ColumnFamilyOptions default_cf, one, two; - db_options_.max_open_files = 20; // only 10 files in file cache - - default_cf.compaction_style = kCompactionStyleLevel; - default_cf.num_levels = 3; - default_cf.write_buffer_size = 64 << 10; // 64KB - default_cf.target_file_size_base = 30 << 10; - default_cf.max_compaction_bytes = static_cast(1) << 60; - - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.no_block_cache = true; - default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - one.compaction_style = kCompactionStyleUniversal; - - one.num_levels = 1; - // trigger compaction if there are >= 4 files - one.level0_file_num_compaction_trigger = 4; - one.write_buffer_size = 120000; - - two.compaction_style = kCompactionStyleLevel; - two.num_levels = 4; - two.level0_file_num_compaction_trigger = 3; - two.write_buffer_size = 100000; - - Reopen({default_cf, one, two}); - - // SETUP column family "one" -- universal style - for (int i = 0; i < one.level0_file_num_compaction_trigger - 1; ++i) { - PutRandomData(1, 10, 12000); - PutRandomData(1, 1, 10); - WaitForFlush(1); - AssertFilesPerLevel(std::to_string(i + 1), 1); - } - - // SETUP column family "two" -- level style with 4 levels - for (int i = 0; i < two.level0_file_num_compaction_trigger - 1; ++i) { - PutRandomData(2, 10, 12000); - PutRandomData(2, 1, 10); - WaitForFlush(2); - AssertFilesPerLevel(std::to_string(i + 1), 2); - } - - // TRIGGER compaction "one" - PutRandomData(1, 10, 12000); - PutRandomData(1, 1, 10); - - // TRIGGER compaction "two" - PutRandomData(2, 10, 12000); - PutRandomData(2, 1, 10); - - // WAIT for compactions - WaitForCompaction(); - - // VERIFY compaction "one" - AssertFilesPerLevel("1", 1); - - // VERIFY compaction "two" - AssertFilesPerLevel("0,1", 2); - CompactAll(2); - AssertFilesPerLevel("0,1", 2); - - Close(); -} - -// Sync points not supported in RocksDB Lite - -TEST_P(ColumnFamilyTest, MultipleManualCompactions) { - Open(); - CreateColumnFamilies({"one", "two"}); - ColumnFamilyOptions default_cf, one, two; - db_options_.max_open_files = 20; // only 10 files in file cache - db_options_.max_background_compactions = 3; - - default_cf.compaction_style = kCompactionStyleLevel; - default_cf.num_levels = 3; - default_cf.write_buffer_size = 64 << 10; // 64KB - default_cf.target_file_size_base = 30 << 10; - default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.no_block_cache = true; - default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - one.compaction_style = kCompactionStyleUniversal; - - one.num_levels = 1; - // trigger compaction if there are >= 4 files - one.level0_file_num_compaction_trigger = 4; - one.write_buffer_size = 120000; - - two.compaction_style = kCompactionStyleLevel; - two.num_levels = 4; - two.level0_file_num_compaction_trigger = 3; - two.write_buffer_size = 100000; - - Reopen({default_cf, one, two}); - - // SETUP column family "one" -- universal style - for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) { - PutRandomData(1, 10, 12000, true); - PutRandomData(1, 1, 10, true); - WaitForFlush(1); - AssertFilesPerLevel(std::to_string(i + 1), 1); - } - std::atomic_bool cf_1_1{true}; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"ColumnFamilyTest::MultiManual:4", "ColumnFamilyTest::MultiManual:1"}, - {"ColumnFamilyTest::MultiManual:2", "ColumnFamilyTest::MultiManual:5"}, - {"ColumnFamilyTest::MultiManual:2", "ColumnFamilyTest::MultiManual:3"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { - if (cf_1_1.exchange(false)) { - TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:4"); - TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:3"); - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - std::vector threads; - threads.emplace_back([&] { - CompactRangeOptions compact_options; - compact_options.exclusive_manual_compaction = false; - ASSERT_OK( - db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); - }); - - // SETUP column family "two" -- level style with 4 levels - for (int i = 0; i < two.level0_file_num_compaction_trigger - 2; ++i) { - PutRandomData(2, 10, 12000); - PutRandomData(2, 1, 10); - WaitForFlush(2); - AssertFilesPerLevel(std::to_string(i + 1), 2); - } - threads.emplace_back([&] { - TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:1"); - CompactRangeOptions compact_options; - compact_options.exclusive_manual_compaction = false; - ASSERT_OK( - db_->CompactRange(compact_options, handles_[2], nullptr, nullptr)); - TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:2"); - }); - - TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:5"); - for (auto& t : threads) { - t.join(); - } - - // VERIFY compaction "one" - AssertFilesPerLevel("1", 1); - - // VERIFY compaction "two" - AssertFilesPerLevel("0,1", 2); - CompactAll(2); - AssertFilesPerLevel("0,1", 2); - // Compare against saved keys - std::set::iterator key_iter = keys_[1].begin(); - while (key_iter != keys_[1].end()) { - ASSERT_NE("NOT_FOUND", Get(1, *key_iter)); - key_iter++; - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - Close(); -} - -TEST_P(ColumnFamilyTest, AutomaticAndManualCompactions) { - Open(); - CreateColumnFamilies({"one", "two"}); - ColumnFamilyOptions default_cf, one, two; - db_options_.max_open_files = 20; // only 10 files in file cache - db_options_.max_background_compactions = 3; - - default_cf.compaction_style = kCompactionStyleLevel; - default_cf.num_levels = 3; - default_cf.write_buffer_size = 64 << 10; // 64KB - default_cf.target_file_size_base = 30 << 10; - default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - ; - table_options.no_block_cache = true; - default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - one.compaction_style = kCompactionStyleUniversal; - - one.num_levels = 1; - // trigger compaction if there are >= 4 files - one.level0_file_num_compaction_trigger = 4; - one.write_buffer_size = 120000; - - two.compaction_style = kCompactionStyleLevel; - two.num_levels = 4; - two.level0_file_num_compaction_trigger = 3; - two.write_buffer_size = 100000; - - Reopen({default_cf, one, two}); - // make sure all background compaction jobs can be scheduled - auto stop_token = - dbfull()->TEST_write_controler().GetCompactionPressureToken(); - - std::atomic_bool cf_1_1{true}; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"ColumnFamilyTest::AutoManual:4", "ColumnFamilyTest::AutoManual:1"}, - {"ColumnFamilyTest::AutoManual:2", "ColumnFamilyTest::AutoManual:5"}, - {"ColumnFamilyTest::AutoManual:2", "ColumnFamilyTest::AutoManual:3"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { - if (cf_1_1.exchange(false)) { - TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:4"); - TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:3"); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - // SETUP column family "one" -- universal style - for (int i = 0; i < one.level0_file_num_compaction_trigger; ++i) { - PutRandomData(1, 10, 12000, true); - PutRandomData(1, 1, 10, true); - WaitForFlush(1); - AssertFilesPerLevel(std::to_string(i + 1), 1); - } - - TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:1"); - - // SETUP column family "two" -- level style with 4 levels - for (int i = 0; i < two.level0_file_num_compaction_trigger - 2; ++i) { - PutRandomData(2, 10, 12000); - PutRandomData(2, 1, 10); - WaitForFlush(2); - AssertFilesPerLevel(std::to_string(i + 1), 2); - } - ROCKSDB_NAMESPACE::port::Thread threads([&] { - CompactRangeOptions compact_options; - compact_options.exclusive_manual_compaction = false; - ASSERT_OK( - db_->CompactRange(compact_options, handles_[2], nullptr, nullptr)); - TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:2"); - }); - - TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:5"); - threads.join(); - - // WAIT for compactions - WaitForCompaction(); - - // VERIFY compaction "one" - AssertFilesPerLevel("1", 1); - - // VERIFY compaction "two" - AssertFilesPerLevel("0,1", 2); - CompactAll(2); - AssertFilesPerLevel("0,1", 2); - // Compare against saved keys - std::set::iterator key_iter = keys_[1].begin(); - while (key_iter != keys_[1].end()) { - ASSERT_NE("NOT_FOUND", Get(1, *key_iter)); - key_iter++; - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_P(ColumnFamilyTest, ManualAndAutomaticCompactions) { - Open(); - CreateColumnFamilies({"one", "two"}); - ColumnFamilyOptions default_cf, one, two; - db_options_.max_open_files = 20; // only 10 files in file cache - db_options_.max_background_compactions = 3; - - default_cf.compaction_style = kCompactionStyleLevel; - default_cf.num_levels = 3; - default_cf.write_buffer_size = 64 << 10; // 64KB - default_cf.target_file_size_base = 30 << 10; - default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - ; - table_options.no_block_cache = true; - default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - one.compaction_style = kCompactionStyleUniversal; - - one.num_levels = 1; - // trigger compaction if there are >= 4 files - one.level0_file_num_compaction_trigger = 4; - one.write_buffer_size = 120000; - - two.compaction_style = kCompactionStyleLevel; - two.num_levels = 4; - two.level0_file_num_compaction_trigger = 3; - two.write_buffer_size = 100000; - - Reopen({default_cf, one, two}); - // make sure all background compaction jobs can be scheduled - auto stop_token = - dbfull()->TEST_write_controler().GetCompactionPressureToken(); - - // SETUP column family "one" -- universal style - for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) { - PutRandomData(1, 10, 12000, true); - PutRandomData(1, 1, 10, true); - WaitForFlush(1); - AssertFilesPerLevel(std::to_string(i + 1), 1); - } - std::atomic_bool cf_1_1{true}; - std::atomic_bool cf_1_2{true}; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"ColumnFamilyTest::ManualAuto:4", "ColumnFamilyTest::ManualAuto:1"}, - {"ColumnFamilyTest::ManualAuto:5", "ColumnFamilyTest::ManualAuto:2"}, - {"ColumnFamilyTest::ManualAuto:2", "ColumnFamilyTest::ManualAuto:3"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { - if (cf_1_1.exchange(false)) { - TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:4"); - TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:3"); - } else if (cf_1_2.exchange(false)) { - TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:2"); - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ROCKSDB_NAMESPACE::port::Thread threads([&] { - CompactRangeOptions compact_options; - compact_options.exclusive_manual_compaction = false; - ASSERT_OK( - db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); - }); - - TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:1"); - - // SETUP column family "two" -- level style with 4 levels - for (int i = 0; i < two.level0_file_num_compaction_trigger; ++i) { - PutRandomData(2, 10, 12000); - PutRandomData(2, 1, 10); - WaitForFlush(2); - AssertFilesPerLevel(std::to_string(i + 1), 2); - } - TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:5"); - threads.join(); - - // WAIT for compactions - WaitForCompaction(); - - // VERIFY compaction "one" - AssertFilesPerLevel("1", 1); - - // VERIFY compaction "two" - AssertFilesPerLevel("0,1", 2); - CompactAll(2); - AssertFilesPerLevel("0,1", 2); - // Compare against saved keys - std::set::iterator key_iter = keys_[1].begin(); - while (key_iter != keys_[1].end()) { - ASSERT_NE("NOT_FOUND", Get(1, *key_iter)); - key_iter++; - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_P(ColumnFamilyTest, SameCFManualManualCompactions) { - Open(); - CreateColumnFamilies({"one"}); - ColumnFamilyOptions default_cf, one; - db_options_.max_open_files = 20; // only 10 files in file cache - db_options_.max_background_compactions = 3; - - default_cf.compaction_style = kCompactionStyleLevel; - default_cf.num_levels = 3; - default_cf.write_buffer_size = 64 << 10; // 64KB - default_cf.target_file_size_base = 30 << 10; - default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - ; - table_options.no_block_cache = true; - default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - one.compaction_style = kCompactionStyleUniversal; - - one.num_levels = 1; - // trigger compaction if there are >= 4 files - one.level0_file_num_compaction_trigger = 4; - one.write_buffer_size = 120000; - - Reopen({default_cf, one}); - // make sure all background compaction jobs can be scheduled - auto stop_token = - dbfull()->TEST_write_controler().GetCompactionPressureToken(); - - // SETUP column family "one" -- universal style - for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) { - PutRandomData(1, 10, 12000, true); - PutRandomData(1, 1, 10, true); - WaitForFlush(1); - AssertFilesPerLevel(std::to_string(i + 1), 1); - } - std::atomic_bool cf_1_1{true}; - std::atomic_bool cf_1_2{true}; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"ColumnFamilyTest::ManualManual:4", "ColumnFamilyTest::ManualManual:2"}, - {"ColumnFamilyTest::ManualManual:4", "ColumnFamilyTest::ManualManual:5"}, - {"ColumnFamilyTest::ManualManual:1", "ColumnFamilyTest::ManualManual:2"}, - {"ColumnFamilyTest::ManualManual:1", - "ColumnFamilyTest::ManualManual:3"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { - if (cf_1_1.exchange(false)) { - TEST_SYNC_POINT("ColumnFamilyTest::ManualManual:4"); - TEST_SYNC_POINT("ColumnFamilyTest::ManualManual:3"); - } else if (cf_1_2.exchange(false)) { - TEST_SYNC_POINT("ColumnFamilyTest::ManualManual:2"); - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ROCKSDB_NAMESPACE::port::Thread threads([&] { - CompactRangeOptions compact_options; - compact_options.exclusive_manual_compaction = true; - ASSERT_OK( - db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); - }); - - TEST_SYNC_POINT("ColumnFamilyTest::ManualManual:5"); - - WaitForFlush(1); - - // Add more L0 files and force another manual compaction - for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) { - PutRandomData(1, 10, 12000, true); - PutRandomData(1, 1, 10, true); - WaitForFlush(1); - AssertFilesPerLevel( - std::to_string(one.level0_file_num_compaction_trigger + i), 1); - } - - ROCKSDB_NAMESPACE::port::Thread threads1([&] { - CompactRangeOptions compact_options; - compact_options.exclusive_manual_compaction = false; - ASSERT_OK( - db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); - }); - - TEST_SYNC_POINT("ColumnFamilyTest::ManualManual:1"); - - threads.join(); - threads1.join(); - WaitForCompaction(); - // VERIFY compaction "one" - ASSERT_LE(NumTableFilesAtLevel(0, 1), 2); - - // Compare against saved keys - std::set::iterator key_iter = keys_[1].begin(); - while (key_iter != keys_[1].end()) { - ASSERT_NE("NOT_FOUND", Get(1, *key_iter)); - key_iter++; - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_P(ColumnFamilyTest, SameCFManualAutomaticCompactions) { - Open(); - CreateColumnFamilies({"one"}); - ColumnFamilyOptions default_cf, one; - db_options_.max_open_files = 20; // only 10 files in file cache - db_options_.max_background_compactions = 3; - - default_cf.compaction_style = kCompactionStyleLevel; - default_cf.num_levels = 3; - default_cf.write_buffer_size = 64 << 10; // 64KB - default_cf.target_file_size_base = 30 << 10; - default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - ; - table_options.no_block_cache = true; - default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - one.compaction_style = kCompactionStyleUniversal; - - one.num_levels = 1; - // trigger compaction if there are >= 4 files - one.level0_file_num_compaction_trigger = 4; - one.write_buffer_size = 120000; - - Reopen({default_cf, one}); - // make sure all background compaction jobs can be scheduled - auto stop_token = - dbfull()->TEST_write_controler().GetCompactionPressureToken(); - - // SETUP column family "one" -- universal style - for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) { - PutRandomData(1, 10, 12000, true); - PutRandomData(1, 1, 10, true); - WaitForFlush(1); - AssertFilesPerLevel(std::to_string(i + 1), 1); - } - std::atomic_bool cf_1_1{true}; - std::atomic_bool cf_1_2{true}; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"ColumnFamilyTest::ManualAuto:4", "ColumnFamilyTest::ManualAuto:2"}, - {"ColumnFamilyTest::ManualAuto:4", "ColumnFamilyTest::ManualAuto:5"}, - {"ColumnFamilyTest::ManualAuto:1", "ColumnFamilyTest::ManualAuto:2"}, - {"ColumnFamilyTest::ManualAuto:1", "ColumnFamilyTest::ManualAuto:3"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { - if (cf_1_1.exchange(false)) { - TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:4"); - TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:3"); - } else if (cf_1_2.exchange(false)) { - TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:2"); - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ROCKSDB_NAMESPACE::port::Thread threads([&] { - CompactRangeOptions compact_options; - compact_options.exclusive_manual_compaction = false; - ASSERT_OK( - db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); - }); - - TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:5"); - - WaitForFlush(1); - - // Add more L0 files and force automatic compaction - for (int i = 0; i < one.level0_file_num_compaction_trigger; ++i) { - PutRandomData(1, 10, 12000, true); - PutRandomData(1, 1, 10, true); - WaitForFlush(1); - AssertFilesPerLevel( - std::to_string(one.level0_file_num_compaction_trigger + i), 1); - } - - TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:1"); - - threads.join(); - WaitForCompaction(); - // VERIFY compaction "one" - ASSERT_LE(NumTableFilesAtLevel(0, 1), 2); - - // Compare against saved keys - std::set::iterator key_iter = keys_[1].begin(); - while (key_iter != keys_[1].end()) { - ASSERT_NE("NOT_FOUND", Get(1, *key_iter)); - key_iter++; - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_P(ColumnFamilyTest, SameCFManualAutomaticCompactionsLevel) { - Open(); - CreateColumnFamilies({"one"}); - ColumnFamilyOptions default_cf, one; - db_options_.max_open_files = 20; // only 10 files in file cache - db_options_.max_background_compactions = 3; - - default_cf.compaction_style = kCompactionStyleLevel; - default_cf.num_levels = 3; - default_cf.write_buffer_size = 64 << 10; // 64KB - default_cf.target_file_size_base = 30 << 10; - default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - ; - table_options.no_block_cache = true; - default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - one.compaction_style = kCompactionStyleLevel; - - one.num_levels = 1; - // trigger compaction if there are >= 4 files - one.level0_file_num_compaction_trigger = 3; - one.write_buffer_size = 120000; - - Reopen({default_cf, one}); - // make sure all background compaction jobs can be scheduled - auto stop_token = - dbfull()->TEST_write_controler().GetCompactionPressureToken(); - - // SETUP column family "one" -- level style - for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) { - PutRandomData(1, 10, 12000, true); - PutRandomData(1, 1, 10, true); - WaitForFlush(1); - AssertFilesPerLevel(std::to_string(i + 1), 1); - } - std::atomic_bool cf_1_1{true}; - std::atomic_bool cf_1_2{true}; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"ColumnFamilyTest::ManualAuto:4", "ColumnFamilyTest::ManualAuto:2"}, - {"ColumnFamilyTest::ManualAuto:4", "ColumnFamilyTest::ManualAuto:5"}, - {"ColumnFamilyTest::ManualAuto:3", "ColumnFamilyTest::ManualAuto:2"}, - {"LevelCompactionPicker::PickCompactionBySize:0", - "ColumnFamilyTest::ManualAuto:3"}, - {"ColumnFamilyTest::ManualAuto:1", "ColumnFamilyTest::ManualAuto:3"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { - if (cf_1_1.exchange(false)) { - TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:4"); - TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:3"); - } else if (cf_1_2.exchange(false)) { - TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:2"); - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ROCKSDB_NAMESPACE::port::Thread threads([&] { - CompactRangeOptions compact_options; - compact_options.exclusive_manual_compaction = false; - ASSERT_OK( - db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); - }); - - TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:5"); - - // Add more L0 files and force automatic compaction - for (int i = 0; i < one.level0_file_num_compaction_trigger; ++i) { - PutRandomData(1, 10, 12000, true); - PutRandomData(1, 1, 10, true); - WaitForFlush(1); - AssertFilesPerLevel( - std::to_string(one.level0_file_num_compaction_trigger + i), 1); - } - - TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:1"); - - threads.join(); - WaitForCompaction(); - // VERIFY compaction "one" - AssertFilesPerLevel("0,1", 1); - - // Compare against saved keys - std::set::iterator key_iter = keys_[1].begin(); - while (key_iter != keys_[1].end()) { - ASSERT_NE("NOT_FOUND", Get(1, *key_iter)); - key_iter++; - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -// In this test, we generate enough files to trigger automatic compactions. -// The automatic compaction waits in NonTrivial:AfterRun -// We generate more files and then trigger an automatic compaction -// This will wait because the automatic compaction has files it needs. -// Once the conflict is hit, the automatic compaction starts and ends -// Then the manual will run and end. -TEST_P(ColumnFamilyTest, SameCFAutomaticManualCompactions) { - Open(); - CreateColumnFamilies({"one"}); - ColumnFamilyOptions default_cf, one; - db_options_.max_open_files = 20; // only 10 files in file cache - db_options_.max_background_compactions = 3; - - default_cf.compaction_style = kCompactionStyleLevel; - default_cf.num_levels = 3; - default_cf.write_buffer_size = 64 << 10; // 64KB - default_cf.target_file_size_base = 30 << 10; - default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - ; - table_options.no_block_cache = true; - default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - one.compaction_style = kCompactionStyleUniversal; - - one.num_levels = 1; - // trigger compaction if there are >= 4 files - one.level0_file_num_compaction_trigger = 4; - one.write_buffer_size = 120000; - - Reopen({default_cf, one}); - // make sure all background compaction jobs can be scheduled - auto stop_token = - dbfull()->TEST_write_controler().GetCompactionPressureToken(); - - std::atomic_bool cf_1_1{true}; - std::atomic_bool cf_1_2{true}; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"ColumnFamilyTest::AutoManual:4", "ColumnFamilyTest::AutoManual:2"}, - {"ColumnFamilyTest::AutoManual:4", "ColumnFamilyTest::AutoManual:5"}, - {"CompactionPicker::CompactRange:Conflict", - "ColumnFamilyTest::AutoManual:3"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { - if (cf_1_1.exchange(false)) { - TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:4"); - TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:3"); - } else if (cf_1_2.exchange(false)) { - TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:2"); - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // SETUP column family "one" -- universal style - for (int i = 0; i < one.level0_file_num_compaction_trigger; ++i) { - PutRandomData(1, 10, 12000, true); - PutRandomData(1, 1, 10, true); - WaitForFlush(1); - AssertFilesPerLevel(std::to_string(i + 1), 1); - } - - TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:5"); - - // Add another L0 file and force automatic compaction - for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) { - PutRandomData(1, 10, 12000, true); - PutRandomData(1, 1, 10, true); - WaitForFlush(1); - } - - CompactRangeOptions compact_options; - compact_options.exclusive_manual_compaction = false; - ASSERT_OK(db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); - - TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:1"); - - WaitForCompaction(); - // VERIFY compaction "one" - AssertFilesPerLevel("1", 1); - // Compare against saved keys - std::set::iterator key_iter = keys_[1].begin(); - while (key_iter != keys_[1].end()) { - ASSERT_NE("NOT_FOUND", Get(1, *key_iter)); - key_iter++; - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -namespace { -std::string IterStatus(Iterator* iter) { - std::string result; - if (iter->Valid()) { - result = iter->key().ToString() + "->" + iter->value().ToString(); - } else { - EXPECT_OK(iter->status()); - result = "(invalid)"; - } - return result; -} -} // anonymous namespace - -TEST_P(ColumnFamilyTest, NewIteratorsTest) { - // iter == 0 -- no tailing - // iter == 2 -- tailing - for (int iter = 0; iter < 2; ++iter) { - Open(); - CreateColumnFamiliesAndReopen({"one", "two"}); - ASSERT_OK(Put(0, "a", "b")); - ASSERT_OK(Put(1, "b", "a")); - ASSERT_OK(Put(2, "c", "m")); - ASSERT_OK(Put(2, "v", "t")); - std::vector iterators; - ReadOptions options; - options.tailing = (iter == 1); - ASSERT_OK(db_->NewIterators(options, handles_, &iterators)); - - for (auto it : iterators) { - it->SeekToFirst(); - } - ASSERT_EQ(IterStatus(iterators[0]), "a->b"); - ASSERT_EQ(IterStatus(iterators[1]), "b->a"); - ASSERT_EQ(IterStatus(iterators[2]), "c->m"); - - ASSERT_OK(Put(1, "x", "x")); - - for (auto it : iterators) { - it->Next(); - } - - ASSERT_EQ(IterStatus(iterators[0]), "(invalid)"); - if (iter == 0) { - // no tailing - ASSERT_EQ(IterStatus(iterators[1]), "(invalid)"); - } else { - // tailing - ASSERT_EQ(IterStatus(iterators[1]), "x->x"); - } - ASSERT_EQ(IterStatus(iterators[2]), "v->t"); - - for (auto it : iterators) { - delete it; - } - Destroy(); - } -} - -TEST_P(ColumnFamilyTest, ReadOnlyDBTest) { - Open(); - CreateColumnFamiliesAndReopen({"one", "two", "three", "four"}); - ASSERT_OK(Put(0, "a", "b")); - ASSERT_OK(Put(1, "foo", "bla")); - ASSERT_OK(Put(2, "foo", "blabla")); - ASSERT_OK(Put(3, "foo", "blablabla")); - ASSERT_OK(Put(4, "foo", "blablablabla")); - - DropColumnFamilies({2}); - Close(); - // open only a subset of column families - AssertOpenReadOnly({"default", "one", "four"}); - ASSERT_EQ("NOT_FOUND", Get(0, "foo")); - ASSERT_EQ("bla", Get(1, "foo")); - ASSERT_EQ("blablablabla", Get(2, "foo")); - - // test newiterators - { - std::vector iterators; - ASSERT_OK(db_->NewIterators(ReadOptions(), handles_, &iterators)); - for (auto it : iterators) { - it->SeekToFirst(); - } - ASSERT_EQ(IterStatus(iterators[0]), "a->b"); - ASSERT_EQ(IterStatus(iterators[1]), "foo->bla"); - ASSERT_EQ(IterStatus(iterators[2]), "foo->blablablabla"); - for (auto it : iterators) { - it->Next(); - } - ASSERT_EQ(IterStatus(iterators[0]), "(invalid)"); - ASSERT_EQ(IterStatus(iterators[1]), "(invalid)"); - ASSERT_EQ(IterStatus(iterators[2]), "(invalid)"); - - for (auto it : iterators) { - delete it; - } - } - - Close(); - // can't open dropped column family - Status s = OpenReadOnly({"default", "one", "two"}); - ASSERT_TRUE(!s.ok()); - - // Can't open without specifying default column family - s = OpenReadOnly({"one", "four"}); - ASSERT_TRUE(!s.ok()); -} - -TEST_P(ColumnFamilyTest, DontRollEmptyLogs) { - Open(); - CreateColumnFamiliesAndReopen({"one", "two", "three", "four"}); - - for (size_t i = 0; i < handles_.size(); ++i) { - PutRandomData(static_cast(i), 10, 100); - } - int num_writable_file_start = env_->GetNumberOfNewWritableFileCalls(); - // this will trigger the flushes - for (int i = 0; i <= 4; ++i) { - ASSERT_OK(Flush(i)); - } - - for (int i = 0; i < 4; ++i) { - WaitForFlush(i); - } - int total_new_writable_files = - env_->GetNumberOfNewWritableFileCalls() - num_writable_file_start; - ASSERT_EQ(static_cast(total_new_writable_files), handles_.size() + 1); - Close(); -} - -TEST_P(ColumnFamilyTest, FlushStaleColumnFamilies) { - Open(); - CreateColumnFamilies({"one", "two"}); - ColumnFamilyOptions default_cf, one, two; - default_cf.write_buffer_size = 100000; // small write buffer size - default_cf.arena_block_size = 4096; - default_cf.disable_auto_compactions = true; - one.disable_auto_compactions = true; - two.disable_auto_compactions = true; - db_options_.max_total_wal_size = 210000; - - Reopen({default_cf, one, two}); - - PutRandomData(2, 1, 10); // 10 bytes - for (int i = 0; i < 2; ++i) { - PutRandomData(0, 100, 1000); // flush - WaitForFlush(0); - - AssertCountLiveFiles(i + 1); - } - // third flush. now, CF [two] should be detected as stale and flushed - // column family 1 should not be flushed since it's empty - PutRandomData(0, 100, 1000); // flush - WaitForFlush(0); - WaitForFlush(2); - // at least 3 files for default column families, 1 file for column family - // [two], zero files for column family [one], because it's empty - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - ASSERT_GE(metadata.size(), 4); - bool has_cf1_sst = false; - bool has_cf2_sst = false; - for (const auto& file : metadata) { - if (file.column_family_name == "one") { - has_cf1_sst = true; - } else if (file.column_family_name == "two") { - has_cf2_sst = true; - } - } - ASSERT_FALSE(has_cf1_sst); - ASSERT_TRUE(has_cf2_sst); - - ASSERT_OK(Flush(0)); - ASSERT_EQ(0, dbfull()->TEST_total_log_size()); - Close(); -} - -TEST_P(ColumnFamilyTest, CreateMissingColumnFamilies) { - Status s = TryOpen({"one", "two"}); - ASSERT_TRUE(!s.ok()); - db_options_.create_missing_column_families = true; - s = TryOpen({"default", "one", "two"}); - ASSERT_TRUE(s.ok()); - Close(); -} - -TEST_P(ColumnFamilyTest, SanitizeOptions) { - DBOptions db_options; - for (int s = kCompactionStyleLevel; s <= kCompactionStyleUniversal; ++s) { - for (int l = 0; l <= 2; l++) { - for (int i = 1; i <= 3; i++) { - for (int j = 1; j <= 3; j++) { - for (int k = 1; k <= 3; k++) { - ColumnFamilyOptions original; - original.compaction_style = static_cast(s); - original.num_levels = l; - original.level0_stop_writes_trigger = i; - original.level0_slowdown_writes_trigger = j; - original.level0_file_num_compaction_trigger = k; - original.write_buffer_size = - l * 4 * 1024 * 1024 + i * 1024 * 1024 + j * 1024 + k; - - ColumnFamilyOptions result = - SanitizeOptions(ImmutableDBOptions(db_options), original); - ASSERT_TRUE(result.level0_stop_writes_trigger >= - result.level0_slowdown_writes_trigger); - ASSERT_TRUE(result.level0_slowdown_writes_trigger >= - result.level0_file_num_compaction_trigger); - ASSERT_TRUE(result.level0_file_num_compaction_trigger == - original.level0_file_num_compaction_trigger); - if (s == kCompactionStyleLevel) { - ASSERT_GE(result.num_levels, 2); - } else { - ASSERT_GE(result.num_levels, 1); - if (original.num_levels >= 1) { - ASSERT_EQ(result.num_levels, original.num_levels); - } - } - - // Make sure Sanitize options sets arena_block_size to 1/8 of - // the write_buffer_size, rounded up to a multiple of 4k. - size_t expected_arena_block_size = - l * 4 * 1024 * 1024 / 8 + i * 1024 * 1024 / 8; - if (j + k != 0) { - // not a multiple of 4k, round up 4k - expected_arena_block_size += 4 * 1024; - } - expected_arena_block_size = - std::min(size_t{1024 * 1024}, expected_arena_block_size); - ASSERT_EQ(expected_arena_block_size, result.arena_block_size); - } - } - } - } - } -} - -TEST_P(ColumnFamilyTest, ReadDroppedColumnFamily) { - // iter 0 -- drop CF, don't reopen - // iter 1 -- delete CF, reopen - for (int iter = 0; iter < 2; ++iter) { - db_options_.create_missing_column_families = true; - db_options_.max_open_files = 20; - // delete obsolete files always - db_options_.delete_obsolete_files_period_micros = 0; - Open({"default", "one", "two"}); - ColumnFamilyOptions options; - options.level0_file_num_compaction_trigger = 100; - options.level0_slowdown_writes_trigger = 200; - options.level0_stop_writes_trigger = 200; - options.write_buffer_size = 100000; // small write buffer size - Reopen({options, options, options}); - - // 1MB should create ~10 files for each CF - int kKeysNum = 10000; - PutRandomData(0, kKeysNum, 100); - PutRandomData(1, kKeysNum, 100); - PutRandomData(2, kKeysNum, 100); - - { - std::unique_ptr iterator( - db_->NewIterator(ReadOptions(), handles_[2])); - iterator->SeekToFirst(); - - if (iter == 0) { - // Drop CF two - ASSERT_OK(db_->DropColumnFamily(handles_[2])); - } else { - // delete CF two - ASSERT_OK(db_->DestroyColumnFamilyHandle(handles_[2])); - handles_[2] = nullptr; - } - // Make sure iterator created can still be used. - int count = 0; - for (; iterator->Valid(); iterator->Next()) { - ASSERT_OK(iterator->status()); - ++count; - } - ASSERT_OK(iterator->status()); - ASSERT_EQ(count, kKeysNum); - } - - // Add bunch more data to other CFs - PutRandomData(0, kKeysNum, 100); - PutRandomData(1, kKeysNum, 100); - - if (iter == 1) { - Reopen(); - } - - // Since we didn't delete CF handle, RocksDB's contract guarantees that - // we're still able to read dropped CF - for (int i = 0; i < 3; ++i) { - std::unique_ptr iterator( - db_->NewIterator(ReadOptions(), handles_[i])); - int count = 0; - for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { - ASSERT_OK(iterator->status()); - ++count; - } - ASSERT_OK(iterator->status()); - ASSERT_EQ(count, kKeysNum * ((i == 2) ? 1 : 2)); - } - - Close(); - Destroy(); - } -} - -TEST_P(ColumnFamilyTest, LiveIteratorWithDroppedColumnFamily) { - db_options_.create_missing_column_families = true; - db_options_.max_open_files = 20; - // delete obsolete files always - db_options_.delete_obsolete_files_period_micros = 0; - Open({"default", "one", "two"}); - ColumnFamilyOptions options; - options.level0_file_num_compaction_trigger = 100; - options.level0_slowdown_writes_trigger = 200; - options.level0_stop_writes_trigger = 200; - options.write_buffer_size = 100000; // small write buffer size - Reopen({options, options, options}); - - // 1MB should create ~10 files for each CF - int kKeysNum = 10000; - PutRandomData(1, kKeysNum, 100); - { - std::unique_ptr iterator( - db_->NewIterator(ReadOptions(), handles_[1])); - iterator->SeekToFirst(); - - DropColumnFamilies({1}); - - // Make sure iterator created can still be used. - int count = 0; - for (; iterator->Valid(); iterator->Next()) { - ASSERT_OK(iterator->status()); - ++count; - } - ASSERT_OK(iterator->status()); - ASSERT_EQ(count, kKeysNum); - } - - Reopen(); - Close(); - Destroy(); -} - -TEST_P(ColumnFamilyTest, FlushAndDropRaceCondition) { - db_options_.create_missing_column_families = true; - Open({"default", "one"}); - ColumnFamilyOptions options; - options.level0_file_num_compaction_trigger = 100; - options.level0_slowdown_writes_trigger = 200; - options.level0_stop_writes_trigger = 200; - options.max_write_buffer_number = 20; - options.write_buffer_size = 100000; // small write buffer size - Reopen({options, options}); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"VersionSet::LogAndApply::ColumnFamilyDrop:0", - "FlushJob::WriteLevel0Table"}, - {"VersionSet::LogAndApply::ColumnFamilyDrop:1", - "FlushJob::InstallResults"}, - {"FlushJob::InstallResults", - "VersionSet::LogAndApply::ColumnFamilyDrop:2"}}); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - test::SleepingBackgroundTask sleeping_task; - - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, - Env::Priority::HIGH); - // Make sure the task is sleeping. Otherwise, it might start to execute - // after sleeping_task.WaitUntilDone() and cause TSAN warning. - sleeping_task.WaitUntilSleeping(); - - // 1MB should create ~10 files for each CF - int kKeysNum = 10000; - PutRandomData(1, kKeysNum, 100); - - std::vector threads; - threads.emplace_back([&] { ASSERT_OK(db_->DropColumnFamily(handles_[1])); }); - - sleeping_task.WakeUp(); - sleeping_task.WaitUntilDone(); - sleeping_task.Reset(); - // now we sleep again. this is just so we're certain that flush job finished - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, - Env::Priority::HIGH); - // Make sure the task is sleeping. Otherwise, it might start to execute - // after sleeping_task.WaitUntilDone() and cause TSAN warning. - sleeping_task.WaitUntilSleeping(); - sleeping_task.WakeUp(); - sleeping_task.WaitUntilDone(); - - { - // Since we didn't delete CF handle, RocksDB's contract guarantees that - // we're still able to read dropped CF - std::unique_ptr iterator( - db_->NewIterator(ReadOptions(), handles_[1])); - int count = 0; - for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { - ASSERT_OK(iterator->status()); - ++count; - } - ASSERT_OK(iterator->status()); - ASSERT_EQ(count, kKeysNum); - } - for (auto& t : threads) { - t.join(); - } - - Close(); - Destroy(); -} - -namespace { -std::atomic test_stage(0); -std::atomic ordered_by_writethread(false); -const int kMainThreadStartPersistingOptionsFile = 1; -const int kChildThreadFinishDroppingColumnFamily = 2; -void DropSingleColumnFamily(ColumnFamilyTest* cf_test, int cf_id, - std::vector* comparators) { - while (test_stage < kMainThreadStartPersistingOptionsFile && - !ordered_by_writethread) { - Env::Default()->SleepForMicroseconds(100); - } - cf_test->DropColumnFamilies({cf_id}); - if ((*comparators)[cf_id]) { - delete (*comparators)[cf_id]; - (*comparators)[cf_id] = nullptr; - } - test_stage = kChildThreadFinishDroppingColumnFamily; -} -} // anonymous namespace - -TEST_P(ColumnFamilyTest, CreateAndDropRace) { - const int kCfCount = 5; - std::vector cf_opts; - std::vector comparators; - for (int i = 0; i < kCfCount; ++i) { - cf_opts.emplace_back(); - comparators.push_back(new test::SimpleSuffixReverseComparator()); - cf_opts.back().comparator = comparators.back(); - } - db_options_.create_if_missing = true; - db_options_.create_missing_column_families = true; - - auto main_thread_id = std::this_thread::get_id(); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "PersistRocksDBOptions:start", [&](void* /*arg*/) { - auto current_thread_id = std::this_thread::get_id(); - // If it's the main thread hitting this sync-point, then it - // will be blocked until some other thread update the test_stage. - if (main_thread_id == current_thread_id) { - test_stage = kMainThreadStartPersistingOptionsFile; - while (test_stage < kChildThreadFinishDroppingColumnFamily && - !ordered_by_writethread) { - Env::Default()->SleepForMicroseconds(100); - } - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WriteThread::EnterUnbatched:Wait", [&](void* /*arg*/) { - // This means a thread doing DropColumnFamily() is waiting for - // other thread to finish persisting options. - // In such case, we update the test_stage to unblock the main thread. - ordered_by_writethread = true; - }); - - // Create a database with four column families - Open({"default", "one", "two", "three"}, - {cf_opts[0], cf_opts[1], cf_opts[2], cf_opts[3]}); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Start a thread that will drop the first column family - // and its comparator - ROCKSDB_NAMESPACE::port::Thread drop_cf_thread(DropSingleColumnFamily, this, - 1, &comparators); - - DropColumnFamilies({2}); - - drop_cf_thread.join(); - Close(); - Destroy(); - for (auto* comparator : comparators) { - if (comparator) { - delete comparator; - } - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_P(ColumnFamilyTest, WriteStallSingleColumnFamily) { - const uint64_t kBaseRate = 800000u; - db_options_.delayed_write_rate = kBaseRate; - db_options_.max_background_compactions = 6; - - Open({"default"}); - ColumnFamilyData* cfd = - static_cast(db_->DefaultColumnFamily())->cfd(); - - VersionStorageInfo* vstorage = cfd->current()->storage_info(); - - MutableCFOptions mutable_cf_options(column_family_options_); - - mutable_cf_options.level0_slowdown_writes_trigger = 20; - mutable_cf_options.level0_stop_writes_trigger = 10000; - mutable_cf_options.soft_pending_compaction_bytes_limit = 200; - mutable_cf_options.hard_pending_compaction_bytes_limit = 2000; - mutable_cf_options.disable_auto_compactions = false; - - vstorage->TEST_set_estimated_compaction_needed_bytes(50); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(201); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); - ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(400); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); - ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(500); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate / 1.25 / 1.25, GetDbDelayedWriteRate()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(450); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(205); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(202); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(201); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(198); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(399); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(599); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(2001); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(IsDbWriteStopped()); - ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(3001); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(IsDbWriteStopped()); - ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(390); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(100); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - - vstorage->set_l0_delay_trigger_count(100); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); - ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->set_l0_delay_trigger_count(101); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); - - vstorage->set_l0_delay_trigger_count(0); - vstorage->TEST_set_estimated_compaction_needed_bytes(300); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate / 1.25 / 1.25, GetDbDelayedWriteRate()); - - vstorage->set_l0_delay_trigger_count(101); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate / 1.25 / 1.25 / 1.25, GetDbDelayedWriteRate()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(200); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate / 1.25 / 1.25, GetDbDelayedWriteRate()); - - vstorage->set_l0_delay_trigger_count(0); - vstorage->TEST_set_estimated_compaction_needed_bytes(0); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - - mutable_cf_options.disable_auto_compactions = true; - dbfull()->TEST_write_controler().set_delayed_write_rate(kBaseRate); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - - vstorage->set_l0_delay_trigger_count(50); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(0, GetDbDelayedWriteRate()); - ASSERT_EQ(kBaseRate, dbfull()->TEST_write_controler().delayed_write_rate()); - - vstorage->set_l0_delay_trigger_count(60); - vstorage->TEST_set_estimated_compaction_needed_bytes(300); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(0, GetDbDelayedWriteRate()); - ASSERT_EQ(kBaseRate, dbfull()->TEST_write_controler().delayed_write_rate()); - - mutable_cf_options.disable_auto_compactions = false; - vstorage->set_l0_delay_trigger_count(70); - vstorage->TEST_set_estimated_compaction_needed_bytes(500); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); - - vstorage->set_l0_delay_trigger_count(71); - vstorage->TEST_set_estimated_compaction_needed_bytes(501); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); -} - -TEST_P(ColumnFamilyTest, CompactionSpeedupSingleColumnFamily) { - db_options_.max_background_compactions = 6; - Open({"default"}); - ColumnFamilyData* cfd = - static_cast(db_->DefaultColumnFamily())->cfd(); - - VersionStorageInfo* vstorage = cfd->current()->storage_info(); - - MutableCFOptions mutable_cf_options(column_family_options_); - - // Speed up threshold = min(4 * 2, 4 + (36 - 4)/4) = 8 - mutable_cf_options.level0_file_num_compaction_trigger = 4; - mutable_cf_options.level0_slowdown_writes_trigger = 36; - mutable_cf_options.level0_stop_writes_trigger = 50; - // Speedup threshold = 200 / 4 = 50 - mutable_cf_options.soft_pending_compaction_bytes_limit = 200; - mutable_cf_options.hard_pending_compaction_bytes_limit = 2000; - - vstorage->TEST_set_estimated_compaction_needed_bytes(40); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(50); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(300); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(45); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->set_l0_delay_trigger_count(7); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->set_l0_delay_trigger_count(9); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->set_l0_delay_trigger_count(6); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); - - // Speed up threshold = min(4 * 2, 4 + (12 - 4)/4) = 6 - mutable_cf_options.level0_file_num_compaction_trigger = 4; - mutable_cf_options.level0_slowdown_writes_trigger = 16; - mutable_cf_options.level0_stop_writes_trigger = 30; - - vstorage->set_l0_delay_trigger_count(5); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->set_l0_delay_trigger_count(7); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->set_l0_delay_trigger_count(3); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); -} - -TEST_P(ColumnFamilyTest, WriteStallTwoColumnFamilies) { - const uint64_t kBaseRate = 810000u; - db_options_.delayed_write_rate = kBaseRate; - Open(); - CreateColumnFamilies({"one"}); - ColumnFamilyData* cfd = - static_cast(db_->DefaultColumnFamily())->cfd(); - VersionStorageInfo* vstorage = cfd->current()->storage_info(); - - ColumnFamilyData* cfd1 = - static_cast(handles_[1])->cfd(); - VersionStorageInfo* vstorage1 = cfd1->current()->storage_info(); - - MutableCFOptions mutable_cf_options(column_family_options_); - mutable_cf_options.level0_slowdown_writes_trigger = 20; - mutable_cf_options.level0_stop_writes_trigger = 10000; - mutable_cf_options.soft_pending_compaction_bytes_limit = 200; - mutable_cf_options.hard_pending_compaction_bytes_limit = 2000; - - MutableCFOptions mutable_cf_options1 = mutable_cf_options; - mutable_cf_options1.soft_pending_compaction_bytes_limit = 500; - - vstorage->TEST_set_estimated_compaction_needed_bytes(50); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - - vstorage1->TEST_set_estimated_compaction_needed_bytes(201); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - - vstorage1->TEST_set_estimated_compaction_needed_bytes(600); - RecalculateWriteStallConditions(cfd1, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(70); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); - - vstorage1->TEST_set_estimated_compaction_needed_bytes(800); - RecalculateWriteStallConditions(cfd1, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(300); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate / 1.25 / 1.25, GetDbDelayedWriteRate()); - - vstorage1->TEST_set_estimated_compaction_needed_bytes(700); - RecalculateWriteStallConditions(cfd1, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(500); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate / 1.25 / 1.25, GetDbDelayedWriteRate()); - - vstorage1->TEST_set_estimated_compaction_needed_bytes(600); - RecalculateWriteStallConditions(cfd1, mutable_cf_options); - ASSERT_TRUE(!IsDbWriteStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); -} - -TEST_P(ColumnFamilyTest, CompactionSpeedupTwoColumnFamilies) { - db_options_.max_background_compactions = 6; - column_family_options_.soft_pending_compaction_bytes_limit = 200; - column_family_options_.hard_pending_compaction_bytes_limit = 2000; - Open(); - CreateColumnFamilies({"one"}); - ColumnFamilyData* cfd = - static_cast(db_->DefaultColumnFamily())->cfd(); - VersionStorageInfo* vstorage = cfd->current()->storage_info(); - - ColumnFamilyData* cfd1 = - static_cast(handles_[1])->cfd(); - VersionStorageInfo* vstorage1 = cfd1->current()->storage_info(); - - MutableCFOptions mutable_cf_options(column_family_options_); - // Speed up threshold = min(4 * 2, 4 + (36 - 4)/4) = 8 - mutable_cf_options.level0_file_num_compaction_trigger = 4; - mutable_cf_options.level0_slowdown_writes_trigger = 36; - mutable_cf_options.level0_stop_writes_trigger = 30; - // Speedup threshold = 200 / 4 = 50 - mutable_cf_options.soft_pending_compaction_bytes_limit = 200; - mutable_cf_options.hard_pending_compaction_bytes_limit = 2000; - - MutableCFOptions mutable_cf_options1 = mutable_cf_options; - mutable_cf_options1.level0_slowdown_writes_trigger = 16; - - vstorage->TEST_set_estimated_compaction_needed_bytes(40); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(60); - RecalculateWriteStallConditions(cfd1, mutable_cf_options); - ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage1->TEST_set_estimated_compaction_needed_bytes(30); - RecalculateWriteStallConditions(cfd1, mutable_cf_options); - ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage1->TEST_set_estimated_compaction_needed_bytes(70); - RecalculateWriteStallConditions(cfd1, mutable_cf_options); - ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->TEST_set_estimated_compaction_needed_bytes(20); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage1->TEST_set_estimated_compaction_needed_bytes(3); - RecalculateWriteStallConditions(cfd1, mutable_cf_options); - ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->set_l0_delay_trigger_count(9); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage1->set_l0_delay_trigger_count(2); - RecalculateWriteStallConditions(cfd1, mutable_cf_options); - ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); - - vstorage->set_l0_delay_trigger_count(0); - RecalculateWriteStallConditions(cfd, mutable_cf_options); - ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); -} - -TEST_P(ColumnFamilyTest, CreateAndDestroyOptions) { - std::unique_ptr cfo(new ColumnFamilyOptions()); - ColumnFamilyHandle* cfh; - Open(); - ASSERT_OK(db_->CreateColumnFamily(*(cfo.get()), "yoyo", &cfh)); - cfo.reset(); - ASSERT_OK(db_->Put(WriteOptions(), cfh, "foo", "bar")); - ASSERT_OK(db_->Flush(FlushOptions(), cfh)); - ASSERT_OK(db_->DropColumnFamily(cfh)); - ASSERT_OK(db_->DestroyColumnFamilyHandle(cfh)); -} - -TEST_P(ColumnFamilyTest, CreateDropAndDestroy) { - ColumnFamilyHandle* cfh; - Open(); - ASSERT_OK(db_->CreateColumnFamily(ColumnFamilyOptions(), "yoyo", &cfh)); - ASSERT_OK(db_->Put(WriteOptions(), cfh, "foo", "bar")); - ASSERT_OK(db_->Flush(FlushOptions(), cfh)); - ASSERT_OK(db_->DropColumnFamily(cfh)); - ASSERT_OK(db_->DestroyColumnFamilyHandle(cfh)); -} - -TEST_P(ColumnFamilyTest, CreateDropAndDestroyWithoutFileDeletion) { - ColumnFamilyHandle* cfh; - Open(); - ASSERT_OK(db_->CreateColumnFamily(ColumnFamilyOptions(), "yoyo", &cfh)); - ASSERT_OK(db_->Put(WriteOptions(), cfh, "foo", "bar")); - ASSERT_OK(db_->Flush(FlushOptions(), cfh)); - ASSERT_OK(db_->DisableFileDeletions()); - ASSERT_OK(db_->DropColumnFamily(cfh)); - ASSERT_OK(db_->DestroyColumnFamilyHandle(cfh)); -} - -TEST_P(ColumnFamilyTest, FlushCloseWALFiles) { - SpecialEnv env(Env::Default()); - db_options_.env = &env; - db_options_.max_background_flushes = 1; - column_family_options_.memtable_factory.reset( - test::NewSpecialSkipListFactory(2)); - Open(); - CreateColumnFamilies({"one"}); - ASSERT_OK(Put(1, "fodor", "mirko")); - ASSERT_OK(Put(0, "fodor", "mirko")); - ASSERT_OK(Put(1, "fodor", "mirko")); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"DBImpl::BGWorkFlush:done", "FlushCloseWALFiles:0"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Block flush jobs from running - test::SleepingBackgroundTask sleeping_task; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, - Env::Priority::HIGH); - // Make sure the task is sleeping. Otherwise, it might start to execute - // after sleeping_task.WaitUntilDone() and cause TSAN warning. - sleeping_task.WaitUntilSleeping(); - - WriteOptions wo; - wo.sync = true; - ASSERT_OK(db_->Put(wo, handles_[1], "fodor", "mirko")); - - ASSERT_EQ(2, env.num_open_wal_file_.load()); - - sleeping_task.WakeUp(); - sleeping_task.WaitUntilDone(); - TEST_SYNC_POINT("FlushCloseWALFiles:0"); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ASSERT_EQ(1, env.num_open_wal_file_.load()); - - Reopen(); - ASSERT_EQ("mirko", Get(0, "fodor")); - ASSERT_EQ("mirko", Get(1, "fodor")); - db_options_.env = env_; - Close(); -} - -TEST_P(ColumnFamilyTest, IteratorCloseWALFile1) { - SpecialEnv env(Env::Default()); - db_options_.env = &env; - db_options_.max_background_flushes = 1; - column_family_options_.memtable_factory.reset( - test::NewSpecialSkipListFactory(2)); - Open(); - CreateColumnFamilies({"one"}); - ASSERT_OK(Put(1, "fodor", "mirko")); - // Create an iterator holding the current super version. - Iterator* it = db_->NewIterator(ReadOptions(), handles_[1]); - ASSERT_OK(it->status()); - // A flush will make `it` hold the last reference of its super version. - ASSERT_OK(Flush(1)); - - ASSERT_OK(Put(1, "fodor", "mirko")); - ASSERT_OK(Put(0, "fodor", "mirko")); - ASSERT_OK(Put(1, "fodor", "mirko")); - - // Flush jobs will close previous WAL files after finishing. By - // block flush jobs from running, we trigger a condition where - // the iterator destructor should close the WAL files. - test::SleepingBackgroundTask sleeping_task; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, - Env::Priority::HIGH); - // Make sure the task is sleeping. Otherwise, it might start to execute - // after sleeping_task.WaitUntilDone() and cause TSAN warning. - sleeping_task.WaitUntilSleeping(); - - WriteOptions wo; - wo.sync = true; - ASSERT_OK(db_->Put(wo, handles_[1], "fodor", "mirko")); - - ASSERT_EQ(2, env.num_open_wal_file_.load()); - // Deleting the iterator will clear its super version, triggering - // closing all files - delete it; - ASSERT_EQ(1, env.num_open_wal_file_.load()); - - sleeping_task.WakeUp(); - sleeping_task.WaitUntilDone(); - WaitForFlush(1); - - Reopen(); - ASSERT_EQ("mirko", Get(0, "fodor")); - ASSERT_EQ("mirko", Get(1, "fodor")); - db_options_.env = env_; - Close(); -} - -TEST_P(ColumnFamilyTest, IteratorCloseWALFile2) { - SpecialEnv env(Env::Default()); - // Allow both of flush and purge job to schedule. - env.SetBackgroundThreads(2, Env::HIGH); - db_options_.env = &env; - db_options_.max_background_flushes = 1; - column_family_options_.memtable_factory.reset( - test::NewSpecialSkipListFactory(2)); - Open(); - CreateColumnFamilies({"one"}); - ASSERT_OK(Put(1, "fodor", "mirko")); - // Create an iterator holding the current super version. - ReadOptions ro; - ro.background_purge_on_iterator_cleanup = true; - Iterator* it = db_->NewIterator(ro, handles_[1]); - ASSERT_OK(it->status()); - // A flush will make `it` hold the last reference of its super version. - ASSERT_OK(Flush(1)); - - ASSERT_OK(Put(1, "fodor", "mirko")); - ASSERT_OK(Put(0, "fodor", "mirko")); - ASSERT_OK(Put(1, "fodor", "mirko")); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"ColumnFamilyTest::IteratorCloseWALFile2:0", - "DBImpl::BGWorkPurge:start"}, - {"ColumnFamilyTest::IteratorCloseWALFile2:2", - "DBImpl::BackgroundCallFlush:start"}, - {"DBImpl::BGWorkPurge:end", "ColumnFamilyTest::IteratorCloseWALFile2:1"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - WriteOptions wo; - wo.sync = true; - ASSERT_OK(db_->Put(wo, handles_[1], "fodor", "mirko")); - - ASSERT_EQ(2, env.num_open_wal_file_.load()); - // Deleting the iterator will clear its super version, triggering - // closing all files - delete it; - ASSERT_EQ(2, env.num_open_wal_file_.load()); - - TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:0"); - TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:1"); - ASSERT_EQ(1, env.num_open_wal_file_.load()); - TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:2"); - WaitForFlush(1); - ASSERT_EQ(1, env.num_open_wal_file_.load()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - Reopen(); - ASSERT_EQ("mirko", Get(0, "fodor")); - ASSERT_EQ("mirko", Get(1, "fodor")); - db_options_.env = env_; - Close(); -} - -TEST_P(ColumnFamilyTest, ForwardIteratorCloseWALFile) { - SpecialEnv env(Env::Default()); - // Allow both of flush and purge job to schedule. - env.SetBackgroundThreads(2, Env::HIGH); - db_options_.env = &env; - db_options_.max_background_flushes = 1; - column_family_options_.memtable_factory.reset( - test::NewSpecialSkipListFactory(3)); - column_family_options_.level0_file_num_compaction_trigger = 2; - Open(); - CreateColumnFamilies({"one"}); - ASSERT_OK(Put(1, "fodor", "mirko")); - ASSERT_OK(Put(1, "fodar2", "mirko")); - ASSERT_OK(Flush(1)); - - // Create an iterator holding the current super version, as well as - // the SST file just flushed. - ReadOptions ro; - ro.tailing = true; - ro.background_purge_on_iterator_cleanup = true; - Iterator* it = db_->NewIterator(ro, handles_[1]); - // A flush will make `it` hold the last reference of its super version. - - ASSERT_OK(Put(1, "fodor", "mirko")); - ASSERT_OK(Put(1, "fodar2", "mirko")); - ASSERT_OK(Flush(1)); - - WaitForCompaction(); - - ASSERT_OK(Put(1, "fodor", "mirko")); - ASSERT_OK(Put(1, "fodor", "mirko")); - ASSERT_OK(Put(0, "fodor", "mirko")); - ASSERT_OK(Put(1, "fodor", "mirko")); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"ColumnFamilyTest::IteratorCloseWALFile2:0", - "DBImpl::BGWorkPurge:start"}, - {"ColumnFamilyTest::IteratorCloseWALFile2:2", - "DBImpl::BackgroundCallFlush:start"}, - {"DBImpl::BGWorkPurge:end", "ColumnFamilyTest::IteratorCloseWALFile2:1"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - WriteOptions wo; - wo.sync = true; - ASSERT_OK(db_->Put(wo, handles_[1], "fodor", "mirko")); - - env.delete_count_.store(0); - ASSERT_EQ(2, env.num_open_wal_file_.load()); - // Deleting the iterator will clear its super version, triggering - // closing all files - it->Seek(""); - ASSERT_OK(it->status()); - - ASSERT_EQ(2, env.num_open_wal_file_.load()); - ASSERT_EQ(0, env.delete_count_.load()); - - TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:0"); - TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:1"); - ASSERT_EQ(1, env.num_open_wal_file_.load()); - ASSERT_EQ(1, env.delete_count_.load()); - TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:2"); - WaitForFlush(1); - ASSERT_EQ(1, env.num_open_wal_file_.load()); - ASSERT_EQ(1, env.delete_count_.load()); - - delete it; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - Reopen(); - ASSERT_EQ("mirko", Get(0, "fodor")); - ASSERT_EQ("mirko", Get(1, "fodor")); - db_options_.env = env_; - Close(); -} - -// Disable on windows because SyncWAL requires env->IsSyncThreadSafe() -// to return true which is not so in unbuffered mode. -#ifndef OS_WIN -TEST_P(ColumnFamilyTest, LogSyncConflictFlush) { - Open(); - CreateColumnFamiliesAndReopen({"one", "two"}); - - ASSERT_OK(Put(0, "", "")); - ASSERT_OK(Put(1, "foo", "bar")); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::SyncWAL:BeforeMarkLogsSynced:1", - "ColumnFamilyTest::LogSyncConflictFlush:1"}, - {"ColumnFamilyTest::LogSyncConflictFlush:2", - "DBImpl::SyncWAL:BeforeMarkLogsSynced:2"}}); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ROCKSDB_NAMESPACE::port::Thread thread([&] { ASSERT_OK(db_->SyncWAL()); }); - - TEST_SYNC_POINT("ColumnFamilyTest::LogSyncConflictFlush:1"); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "foo", "bar")); - ASSERT_OK(Flush(1)); - - TEST_SYNC_POINT("ColumnFamilyTest::LogSyncConflictFlush:2"); - - thread.join(); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - Close(); -} -#endif - -// this test is placed here, because the infrastructure for Column Family -// test is being used to ensure a roll of wal files. -// Basic idea is to test that WAL truncation is being detected and not -// ignored -TEST_P(ColumnFamilyTest, DISABLED_LogTruncationTest) { - Open(); - CreateColumnFamiliesAndReopen({"one", "two"}); - - Build(0, 100); - - // Flush the 0th column family to force a roll of the wal log - ASSERT_OK(Flush(0)); - - // Add some more entries - Build(100, 100); - - std::vector filenames; - ASSERT_OK(env_->GetChildren(dbname_, &filenames)); - - // collect wal files - std::vector logfs; - for (size_t i = 0; i < filenames.size(); i++) { - uint64_t number; - FileType type; - if (!(ParseFileName(filenames[i], &number, &type))) continue; - - if (type != kWalFile) continue; - - logfs.push_back(filenames[i]); - } - - std::sort(logfs.begin(), logfs.end()); - ASSERT_GE(logfs.size(), 2); - - // Take the last but one file, and truncate it - std::string fpath = dbname_ + "/" + logfs[logfs.size() - 2]; - std::vector names_save = names_; - - uint64_t fsize; - ASSERT_OK(env_->GetFileSize(fpath, &fsize)); - ASSERT_GT(fsize, 0); - - Close(); - - std::string backup_logs = dbname_ + "/backup_logs"; - std::string t_fpath = backup_logs + "/" + logfs[logfs.size() - 2]; - - ASSERT_OK(env_->CreateDirIfMissing(backup_logs)); - // Not sure how easy it is to make this data driven. - // need to read back the WAL file and truncate last 10 - // entries - CopyFile(fpath, t_fpath, fsize - 9180); - - ASSERT_OK(env_->DeleteFile(fpath)); - ASSERT_OK(env_->RenameFile(t_fpath, fpath)); - - db_options_.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; - - OpenReadOnly(names_save); - - CheckMissed(); - - Close(); - - Open(names_save); - - CheckMissed(); - - Close(); - - // cleanup - ASSERT_OK(env_->DeleteDir(backup_logs)); -} - -TEST_P(ColumnFamilyTest, DefaultCfPathsTest) { - Open(); - // Leave cf_paths for one column families to be empty. - // Files should be generated according to db_paths for that - // column family. - ColumnFamilyOptions cf_opt1, cf_opt2; - cf_opt1.cf_paths.emplace_back(dbname_ + "_one_1", - std::numeric_limits::max()); - CreateColumnFamilies({"one", "two"}, {cf_opt1, cf_opt2}); - Reopen({ColumnFamilyOptions(), cf_opt1, cf_opt2}); - - // Fill Column family 1. - PutRandomData(1, 100, 100); - ASSERT_OK(Flush(1)); - - ASSERT_EQ(1, GetSstFileCount(cf_opt1.cf_paths[0].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - // Fill column family 2 - PutRandomData(2, 100, 100); - ASSERT_OK(Flush(2)); - - // SST from Column family 2 should be generated in - // db_paths which is dbname_ in this case. - ASSERT_EQ(1, GetSstFileCount(dbname_)); -} - -TEST_P(ColumnFamilyTest, MultipleCFPathsTest) { - Open(); - // Configure Column family specific paths. - ColumnFamilyOptions cf_opt1, cf_opt2; - cf_opt1.cf_paths.emplace_back(dbname_ + "_one_1", - std::numeric_limits::max()); - cf_opt2.cf_paths.emplace_back(dbname_ + "_two_1", - std::numeric_limits::max()); - CreateColumnFamilies({"one", "two"}, {cf_opt1, cf_opt2}); - Reopen({ColumnFamilyOptions(), cf_opt1, cf_opt2}); - - PutRandomData(1, 100, 100, true /* save */); - ASSERT_OK(Flush(1)); - - // Check that files are generated in appropriate paths. - ASSERT_EQ(1, GetSstFileCount(cf_opt1.cf_paths[0].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - PutRandomData(2, 100, 100, true /* save */); - ASSERT_OK(Flush(2)); - - ASSERT_EQ(1, GetSstFileCount(cf_opt2.cf_paths[0].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - // Re-open and verify the keys. - Reopen({ColumnFamilyOptions(), cf_opt1, cf_opt2}); - DBImpl* dbi = static_cast_with_check(db_); - for (int cf = 1; cf != 3; ++cf) { - ReadOptions read_options; - read_options.readahead_size = 0; - auto it = dbi->NewIterator(read_options, handles_[cf]); - for (it->SeekToFirst(); it->Valid(); it->Next()) { - ASSERT_OK(it->status()); - Slice key(it->key()); - ASSERT_NE(keys_[cf].end(), keys_[cf].find(key.ToString())); - } - ASSERT_OK(it->status()); - delete it; - - for (const auto& key : keys_[cf]) { - ASSERT_NE("NOT_FOUND", Get(cf, key)); - } - } -} - -TEST(ColumnFamilyTest, ValidateBlobGCCutoff) { - DBOptions db_options; - - ColumnFamilyOptions cf_options; - cf_options.enable_blob_garbage_collection = true; - - cf_options.blob_garbage_collection_age_cutoff = -0.5; - ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options) - .IsInvalidArgument()); - - cf_options.blob_garbage_collection_age_cutoff = 0.0; - ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); - - cf_options.blob_garbage_collection_age_cutoff = 0.5; - ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); - - cf_options.blob_garbage_collection_age_cutoff = 1.0; - ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); - - cf_options.blob_garbage_collection_age_cutoff = 1.5; - ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options) - .IsInvalidArgument()); -} - -TEST(ColumnFamilyTest, ValidateBlobGCForceThreshold) { - DBOptions db_options; - - ColumnFamilyOptions cf_options; - cf_options.enable_blob_garbage_collection = true; - - cf_options.blob_garbage_collection_force_threshold = -0.5; - ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options) - .IsInvalidArgument()); - - cf_options.blob_garbage_collection_force_threshold = 0.0; - ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); - - cf_options.blob_garbage_collection_force_threshold = 0.5; - ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); - - cf_options.blob_garbage_collection_force_threshold = 1.0; - ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); - - cf_options.blob_garbage_collection_force_threshold = 1.5; - ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options) - .IsInvalidArgument()); -} - -TEST(ColumnFamilyTest, ValidateMemtableKVChecksumOption) { - DBOptions db_options; - - ColumnFamilyOptions cf_options; - ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); - - cf_options.memtable_protection_bytes_per_key = 5; - ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options) - .IsNotSupported()); - - cf_options.memtable_protection_bytes_per_key = 1; - ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); - - cf_options.memtable_protection_bytes_per_key = 16; - ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options) - .IsNotSupported()); - - cf_options.memtable_protection_bytes_per_key = 0; - ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/compact_files_test.cc b/db/compact_files_test.cc deleted file mode 100644 index ad94ad340..000000000 --- a/db/compact_files_test.cc +++ /dev/null @@ -1,491 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - - -#include -#include -#include -#include - -#include "db/db_impl/db_impl.h" -#include "port/port.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "util/cast_util.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -class CompactFilesTest : public testing::Test { - public: - CompactFilesTest() { - env_ = Env::Default(); - db_name_ = test::PerThreadDBPath("compact_files_test"); - } - - std::string db_name_; - Env* env_; -}; - -// A class which remembers the name of each flushed file. -class FlushedFileCollector : public EventListener { - public: - FlushedFileCollector() {} - ~FlushedFileCollector() override {} - - void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override { - std::lock_guard lock(mutex_); - flushed_files_.push_back(info.file_path); - } - - std::vector GetFlushedFiles() { - std::lock_guard lock(mutex_); - std::vector result; - for (auto fname : flushed_files_) { - result.push_back(fname); - } - return result; - } - void ClearFlushedFiles() { - std::lock_guard lock(mutex_); - flushed_files_.clear(); - } - - private: - std::vector flushed_files_; - std::mutex mutex_; -}; - -TEST_F(CompactFilesTest, L0ConflictsFiles) { - Options options; - // to trigger compaction more easily - const int kWriteBufferSize = 10000; - const int kLevel0Trigger = 2; - options.create_if_missing = true; - options.compaction_style = kCompactionStyleLevel; - // Small slowdown and stop trigger for experimental purpose. - options.level0_slowdown_writes_trigger = 20; - options.level0_stop_writes_trigger = 20; - options.level0_stop_writes_trigger = 20; - options.write_buffer_size = kWriteBufferSize; - options.level0_file_num_compaction_trigger = kLevel0Trigger; - options.compression = kNoCompression; - - DB* db = nullptr; - ASSERT_OK(DestroyDB(db_name_, options)); - Status s = DB::Open(options, db_name_, &db); - assert(s.ok()); - assert(db); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"CompactFilesImpl:0", "BackgroundCallCompaction:0"}, - {"BackgroundCallCompaction:1", "CompactFilesImpl:1"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // create couple files - // Background compaction starts and waits in BackgroundCallCompaction:0 - for (int i = 0; i < kLevel0Trigger * 4; ++i) { - ASSERT_OK(db->Put(WriteOptions(), std::to_string(i), "")); - ASSERT_OK(db->Put(WriteOptions(), std::to_string(100 - i), "")); - ASSERT_OK(db->Flush(FlushOptions())); - } - - ROCKSDB_NAMESPACE::ColumnFamilyMetaData meta; - db->GetColumnFamilyMetaData(&meta); - std::string file1; - for (auto& file : meta.levels[0].files) { - ASSERT_EQ(0, meta.levels[0].level); - if (file1 == "") { - file1 = file.db_path + "/" + file.name; - } else { - std::string file2 = file.db_path + "/" + file.name; - // Another thread starts a compact files and creates an L0 compaction - // The background compaction then notices that there is an L0 compaction - // already in progress and doesn't do an L0 compaction - // Once the background compaction finishes, the compact files finishes - ASSERT_OK(db->CompactFiles(ROCKSDB_NAMESPACE::CompactionOptions(), - {file1, file2}, 0)); - break; - } - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - delete db; -} - -TEST_F(CompactFilesTest, MultipleLevel) { - Options options; - options.create_if_missing = true; - options.level_compaction_dynamic_level_bytes = true; - options.num_levels = 6; - // Add listener - FlushedFileCollector* collector = new FlushedFileCollector(); - options.listeners.emplace_back(collector); - - DB* db = nullptr; - ASSERT_OK(DestroyDB(db_name_, options)); - Status s = DB::Open(options, db_name_, &db); - ASSERT_OK(s); - ASSERT_NE(db, nullptr); - - // create couple files in L0, L3, L4 and L5 - for (int i = 5; i > 2; --i) { - collector->ClearFlushedFiles(); - ASSERT_OK(db->Put(WriteOptions(), std::to_string(i), "")); - ASSERT_OK(db->Flush(FlushOptions())); - // Ensure background work is fully finished including listener callbacks - // before accessing listener state. - ASSERT_OK(static_cast_with_check(db)->TEST_WaitForBackgroundWork()); - auto l0_files = collector->GetFlushedFiles(); - ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files, i)); - - std::string prop; - ASSERT_TRUE(db->GetProperty( - "rocksdb.num-files-at-level" + std::to_string(i), &prop)); - ASSERT_EQ("1", prop); - } - ASSERT_OK(db->Put(WriteOptions(), std::to_string(0), "")); - ASSERT_OK(db->Flush(FlushOptions())); - - ColumnFamilyMetaData meta; - db->GetColumnFamilyMetaData(&meta); - // Compact files except the file in L3 - std::vector files; - for (int i = 0; i < 6; ++i) { - if (i == 3) continue; - for (auto& file : meta.levels[i].files) { - files.push_back(file.db_path + "/" + file.name); - } - } - - SyncPoint::GetInstance()->LoadDependency({ - {"CompactionJob::Run():Start", "CompactFilesTest.MultipleLevel:0"}, - {"CompactFilesTest.MultipleLevel:1", "CompactFilesImpl:3"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - - std::thread thread([&] { - TEST_SYNC_POINT("CompactFilesTest.MultipleLevel:0"); - ASSERT_OK(db->Put(WriteOptions(), "bar", "v2")); - ASSERT_OK(db->Put(WriteOptions(), "foo", "v2")); - ASSERT_OK(db->Flush(FlushOptions())); - TEST_SYNC_POINT("CompactFilesTest.MultipleLevel:1"); - }); - - // Compaction cannot move up the data to higher level - // here we have input file from level 5, so the output level has to be >= 5 - for (int invalid_output_level = 0; invalid_output_level < 5; - invalid_output_level++) { - s = db->CompactFiles(CompactionOptions(), files, invalid_output_level); - std::cout << s.ToString() << std::endl; - ASSERT_TRUE(s.IsInvalidArgument()); - } - - ASSERT_OK(db->CompactFiles(CompactionOptions(), files, 5)); - SyncPoint::GetInstance()->DisableProcessing(); - thread.join(); - - delete db; -} - -TEST_F(CompactFilesTest, ObsoleteFiles) { - Options options; - // to trigger compaction more easily - const int kWriteBufferSize = 65536; - options.create_if_missing = true; - // Disable RocksDB background compaction. - options.compaction_style = kCompactionStyleNone; - options.level0_slowdown_writes_trigger = (1 << 30); - options.level0_stop_writes_trigger = (1 << 30); - options.write_buffer_size = kWriteBufferSize; - options.max_write_buffer_number = 2; - options.compression = kNoCompression; - - // Add listener - FlushedFileCollector* collector = new FlushedFileCollector(); - options.listeners.emplace_back(collector); - - DB* db = nullptr; - ASSERT_OK(DestroyDB(db_name_, options)); - Status s = DB::Open(options, db_name_, &db); - ASSERT_OK(s); - ASSERT_NE(db, nullptr); - - // create couple files - for (int i = 1000; i < 2000; ++i) { - ASSERT_OK(db->Put(WriteOptions(), std::to_string(i), - std::string(kWriteBufferSize / 10, 'a' + (i % 26)))); - } - - auto l0_files = collector->GetFlushedFiles(); - ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files, 1)); - ASSERT_OK(static_cast_with_check(db)->TEST_WaitForCompact()); - - // verify all compaction input files are deleted - for (auto fname : l0_files) { - ASSERT_EQ(Status::NotFound(), env_->FileExists(fname)); - } - delete db; -} - -TEST_F(CompactFilesTest, NotCutOutputOnLevel0) { - Options options; - options.create_if_missing = true; - // Disable RocksDB background compaction. - options.compaction_style = kCompactionStyleNone; - options.level0_slowdown_writes_trigger = 1000; - options.level0_stop_writes_trigger = 1000; - options.write_buffer_size = 65536; - options.max_write_buffer_number = 2; - options.compression = kNoCompression; - options.max_compaction_bytes = 5000; - - // Add listener - FlushedFileCollector* collector = new FlushedFileCollector(); - options.listeners.emplace_back(collector); - - DB* db = nullptr; - ASSERT_OK(DestroyDB(db_name_, options)); - Status s = DB::Open(options, db_name_, &db); - assert(s.ok()); - assert(db); - - // create couple files - for (int i = 0; i < 500; ++i) { - ASSERT_OK(db->Put(WriteOptions(), std::to_string(i), - std::string(1000, 'a' + (i % 26)))); - } - ASSERT_OK(static_cast_with_check(db)->TEST_WaitForFlushMemTable()); - auto l0_files_1 = collector->GetFlushedFiles(); - collector->ClearFlushedFiles(); - for (int i = 0; i < 500; ++i) { - ASSERT_OK(db->Put(WriteOptions(), std::to_string(i), - std::string(1000, 'a' + (i % 26)))); - } - ASSERT_OK(static_cast_with_check(db)->TEST_WaitForFlushMemTable()); - auto l0_files_2 = collector->GetFlushedFiles(); - ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files_1, 0)); - ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files_2, 0)); - // no assertion failure - delete db; -} - -TEST_F(CompactFilesTest, CapturingPendingFiles) { - Options options; - options.create_if_missing = true; - // Disable RocksDB background compaction. - options.compaction_style = kCompactionStyleNone; - // Always do full scans for obsolete files (needed to reproduce the issue). - options.delete_obsolete_files_period_micros = 0; - - // Add listener. - FlushedFileCollector* collector = new FlushedFileCollector(); - options.listeners.emplace_back(collector); - - DB* db = nullptr; - ASSERT_OK(DestroyDB(db_name_, options)); - Status s = DB::Open(options, db_name_, &db); - ASSERT_OK(s); - assert(db); - - // Create 5 files. - for (int i = 0; i < 5; ++i) { - ASSERT_OK(db->Put(WriteOptions(), "key" + std::to_string(i), "value")); - ASSERT_OK(db->Flush(FlushOptions())); - } - - // Ensure background work is fully finished including listener callbacks - // before accessing listener state. - ASSERT_OK(static_cast_with_check(db)->TEST_WaitForBackgroundWork()); - auto l0_files = collector->GetFlushedFiles(); - EXPECT_EQ(5, l0_files.size()); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"CompactFilesImpl:2", "CompactFilesTest.CapturingPendingFiles:0"}, - {"CompactFilesTest.CapturingPendingFiles:1", "CompactFilesImpl:3"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Start compacting files. - ROCKSDB_NAMESPACE::port::Thread compaction_thread( - [&] { EXPECT_OK(db->CompactFiles(CompactionOptions(), l0_files, 1)); }); - - // In the meantime flush another file. - TEST_SYNC_POINT("CompactFilesTest.CapturingPendingFiles:0"); - ASSERT_OK(db->Put(WriteOptions(), "key5", "value")); - ASSERT_OK(db->Flush(FlushOptions())); - TEST_SYNC_POINT("CompactFilesTest.CapturingPendingFiles:1"); - - compaction_thread.join(); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - delete db; - - // Make sure we can reopen the DB. - s = DB::Open(options, db_name_, &db); - ASSERT_OK(s); - assert(db); - delete db; -} - -TEST_F(CompactFilesTest, CompactionFilterWithGetSv) { - class FilterWithGet : public CompactionFilter { - public: - bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, - std::string* /*new_value*/, - bool* /*value_changed*/) const override { - if (db_ == nullptr) { - return true; - } - std::string res; - db_->Get(ReadOptions(), "", &res); - return true; - } - - void SetDB(DB* db) { db_ = db; } - - const char* Name() const override { return "FilterWithGet"; } - - private: - DB* db_; - }; - - std::shared_ptr cf(new FilterWithGet()); - - Options options; - options.create_if_missing = true; - options.compaction_filter = cf.get(); - - DB* db = nullptr; - ASSERT_OK(DestroyDB(db_name_, options)); - Status s = DB::Open(options, db_name_, &db); - ASSERT_OK(s); - - cf->SetDB(db); - - // Write one L0 file - ASSERT_OK(db->Put(WriteOptions(), "K1", "V1")); - ASSERT_OK(db->Flush(FlushOptions())); - - // Compact all L0 files using CompactFiles - ROCKSDB_NAMESPACE::ColumnFamilyMetaData meta; - db->GetColumnFamilyMetaData(&meta); - for (auto& file : meta.levels[0].files) { - std::string fname = file.db_path + "/" + file.name; - ASSERT_OK( - db->CompactFiles(ROCKSDB_NAMESPACE::CompactionOptions(), {fname}, 0)); - } - - delete db; -} - -TEST_F(CompactFilesTest, SentinelCompressionType) { - if (!Zlib_Supported()) { - fprintf(stderr, "zlib compression not supported, skip this test\n"); - return; - } - if (!Snappy_Supported()) { - fprintf(stderr, "snappy compression not supported, skip this test\n"); - return; - } - // Check that passing `CompressionType::kDisableCompressionOption` to - // `CompactFiles` causes it to use the column family compression options. - for (auto compaction_style : {CompactionStyle::kCompactionStyleLevel, - CompactionStyle::kCompactionStyleUniversal, - CompactionStyle::kCompactionStyleNone}) { - ASSERT_OK(DestroyDB(db_name_, Options())); - Options options; - options.compaction_style = compaction_style; - // L0: Snappy, L1: ZSTD, L2: Snappy - options.compression_per_level = {CompressionType::kSnappyCompression, - CompressionType::kZlibCompression, - CompressionType::kSnappyCompression}; - options.create_if_missing = true; - FlushedFileCollector* collector = new FlushedFileCollector(); - options.listeners.emplace_back(collector); - DB* db = nullptr; - ASSERT_OK(DB::Open(options, db_name_, &db)); - - ASSERT_OK(db->Put(WriteOptions(), "key", "val")); - ASSERT_OK(db->Flush(FlushOptions())); - - // Ensure background work is fully finished including listener callbacks - // before accessing listener state. - ASSERT_OK(static_cast_with_check(db)->TEST_WaitForBackgroundWork()); - auto l0_files = collector->GetFlushedFiles(); - ASSERT_EQ(1, l0_files.size()); - - // L0->L1 compaction, so output should be ZSTD-compressed - CompactionOptions compaction_opts; - compaction_opts.compression = CompressionType::kDisableCompressionOption; - ASSERT_OK(db->CompactFiles(compaction_opts, l0_files, 1)); - - ROCKSDB_NAMESPACE::TablePropertiesCollection all_tables_props; - ASSERT_OK(db->GetPropertiesOfAllTables(&all_tables_props)); - for (const auto& name_and_table_props : all_tables_props) { - ASSERT_EQ(CompressionTypeToString(CompressionType::kZlibCompression), - name_and_table_props.second->compression_name); - } - delete db; - } -} - -TEST_F(CompactFilesTest, GetCompactionJobInfo) { - Options options; - options.create_if_missing = true; - // Disable RocksDB background compaction. - options.compaction_style = kCompactionStyleNone; - options.level0_slowdown_writes_trigger = 1000; - options.level0_stop_writes_trigger = 1000; - options.write_buffer_size = 65536; - options.max_write_buffer_number = 2; - options.compression = kNoCompression; - options.max_compaction_bytes = 5000; - - // Add listener - FlushedFileCollector* collector = new FlushedFileCollector(); - options.listeners.emplace_back(collector); - - DB* db = nullptr; - ASSERT_OK(DestroyDB(db_name_, options)); - Status s = DB::Open(options, db_name_, &db); - ASSERT_OK(s); - assert(db); - - // create couple files - for (int i = 0; i < 500; ++i) { - ASSERT_OK(db->Put(WriteOptions(), std::to_string(i), - std::string(1000, 'a' + (i % 26)))); - } - ASSERT_OK(static_cast_with_check(db)->TEST_WaitForFlushMemTable()); - auto l0_files_1 = collector->GetFlushedFiles(); - CompactionOptions co; - co.compression = CompressionType::kLZ4Compression; - CompactionJobInfo compaction_job_info{}; - ASSERT_OK( - db->CompactFiles(co, l0_files_1, 0, -1, nullptr, &compaction_job_info)); - ASSERT_EQ(compaction_job_info.base_input_level, 0); - ASSERT_EQ(compaction_job_info.cf_id, db->DefaultColumnFamily()->GetID()); - ASSERT_EQ(compaction_job_info.cf_name, db->DefaultColumnFamily()->GetName()); - ASSERT_EQ(compaction_job_info.compaction_reason, - CompactionReason::kManualCompaction); - ASSERT_EQ(compaction_job_info.compression, CompressionType::kLZ4Compression); - ASSERT_EQ(compaction_job_info.output_level, 0); - ASSERT_OK(compaction_job_info.status); - // no assertion failure - delete db; -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/db/comparator_db_test.cc b/db/comparator_db_test.cc deleted file mode 100644 index e5e3493b3..000000000 --- a/db/comparator_db_test.cc +++ /dev/null @@ -1,678 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -#include -#include -#include - -#include "memtable/stl_wrappers.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/hash.h" -#include "util/kv_map.h" -#include "util/random.h" -#include "util/string_util.h" -#include "utilities/merge_operators.h" - -namespace ROCKSDB_NAMESPACE { -namespace { - -static const Comparator* kTestComparator = nullptr; - -class KVIter : public Iterator { - public: - explicit KVIter(const stl_wrappers::KVMap* map) - : map_(map), iter_(map_->end()) {} - bool Valid() const override { return iter_ != map_->end(); } - void SeekToFirst() override { iter_ = map_->begin(); } - void SeekToLast() override { - if (map_->empty()) { - iter_ = map_->end(); - } else { - iter_ = map_->find(map_->rbegin()->first); - } - } - void Seek(const Slice& k) override { - iter_ = map_->lower_bound(k.ToString()); - } - void SeekForPrev(const Slice& k) override { - iter_ = map_->upper_bound(k.ToString()); - Prev(); - } - void Next() override { ++iter_; } - void Prev() override { - if (iter_ == map_->begin()) { - iter_ = map_->end(); - return; - } - --iter_; - } - - Slice key() const override { return iter_->first; } - Slice value() const override { return iter_->second; } - Status status() const override { return Status::OK(); } - - private: - const stl_wrappers::KVMap* const map_; - stl_wrappers::KVMap::const_iterator iter_; -}; - -void AssertItersEqual(Iterator* iter1, Iterator* iter2) { - ASSERT_EQ(iter1->Valid(), iter2->Valid()); - if (iter1->Valid()) { - ASSERT_EQ(iter1->key().ToString(), iter2->key().ToString()); - ASSERT_EQ(iter1->value().ToString(), iter2->value().ToString()); - } -} - -// Measuring operations on DB (expect to be empty). -// source_strings are candidate keys -void DoRandomIteraratorTest(DB* db, std::vector source_strings, - Random* rnd, int num_writes, int num_iter_ops, - int num_trigger_flush) { - stl_wrappers::KVMap map((stl_wrappers::LessOfComparator(kTestComparator))); - - for (int i = 0; i < num_writes; i++) { - if (num_trigger_flush > 0 && i != 0 && i % num_trigger_flush == 0) { - db->Flush(FlushOptions()); - } - - int type = rnd->Uniform(2); - int index = rnd->Uniform(static_cast(source_strings.size())); - auto& key = source_strings[index]; - switch (type) { - case 0: - // put - map[key] = key; - ASSERT_OK(db->Put(WriteOptions(), key, key)); - break; - case 1: - // delete - if (map.find(key) != map.end()) { - map.erase(key); - } - ASSERT_OK(db->Delete(WriteOptions(), key)); - break; - default: - assert(false); - } - } - - std::unique_ptr iter(db->NewIterator(ReadOptions())); - std::unique_ptr result_iter(new KVIter(&map)); - - bool is_valid = false; - for (int i = 0; i < num_iter_ops; i++) { - // Random walk and make sure iter and result_iter returns the - // same key and value - int type = rnd->Uniform(6); - ASSERT_OK(iter->status()); - switch (type) { - case 0: - // Seek to First - iter->SeekToFirst(); - result_iter->SeekToFirst(); - break; - case 1: - // Seek to last - iter->SeekToLast(); - result_iter->SeekToLast(); - break; - case 2: { - // Seek to random key - auto key_idx = rnd->Uniform(static_cast(source_strings.size())); - auto key = source_strings[key_idx]; - iter->Seek(key); - result_iter->Seek(key); - break; - } - case 3: - // Next - if (is_valid) { - iter->Next(); - result_iter->Next(); - } else { - continue; - } - break; - case 4: - // Prev - if (is_valid) { - iter->Prev(); - result_iter->Prev(); - } else { - continue; - } - break; - default: { - assert(type == 5); - auto key_idx = rnd->Uniform(static_cast(source_strings.size())); - auto key = source_strings[key_idx]; - std::string result; - auto status = db->Get(ReadOptions(), key, &result); - if (map.find(key) == map.end()) { - ASSERT_TRUE(status.IsNotFound()); - } else { - ASSERT_EQ(map[key], result); - } - break; - } - } - AssertItersEqual(iter.get(), result_iter.get()); - is_valid = iter->Valid(); - } -} - -class DoubleComparator : public Comparator { - public: - DoubleComparator() {} - - const char* Name() const override { return "DoubleComparator"; } - - int Compare(const Slice& a, const Slice& b) const override { -#ifndef CYGWIN - double da = std::stod(a.ToString()); - double db = std::stod(b.ToString()); -#else - double da = std::strtod(a.ToString().c_str(), 0 /* endptr */); - double db = std::strtod(a.ToString().c_str(), 0 /* endptr */); -#endif - if (da == db) { - return a.compare(b); - } else if (da > db) { - return 1; - } else { - return -1; - } - } - void FindShortestSeparator(std::string* /*start*/, - const Slice& /*limit*/) const override {} - - void FindShortSuccessor(std::string* /*key*/) const override {} -}; - -class HashComparator : public Comparator { - public: - HashComparator() {} - - const char* Name() const override { return "HashComparator"; } - - int Compare(const Slice& a, const Slice& b) const override { - uint32_t ha = Hash(a.data(), a.size(), 66); - uint32_t hb = Hash(b.data(), b.size(), 66); - if (ha == hb) { - return a.compare(b); - } else if (ha > hb) { - return 1; - } else { - return -1; - } - } - void FindShortestSeparator(std::string* /*start*/, - const Slice& /*limit*/) const override {} - - void FindShortSuccessor(std::string* /*key*/) const override {} -}; - -class TwoStrComparator : public Comparator { - public: - TwoStrComparator() {} - - const char* Name() const override { return "TwoStrComparator"; } - - int Compare(const Slice& a, const Slice& b) const override { - assert(a.size() >= 2); - assert(b.size() >= 2); - size_t size_a1 = static_cast(a[0]); - size_t size_b1 = static_cast(b[0]); - size_t size_a2 = static_cast(a[1]); - size_t size_b2 = static_cast(b[1]); - assert(size_a1 + size_a2 + 2 == a.size()); - assert(size_b1 + size_b2 + 2 == b.size()); - - Slice a1 = Slice(a.data() + 2, size_a1); - Slice b1 = Slice(b.data() + 2, size_b1); - Slice a2 = Slice(a.data() + 2 + size_a1, size_a2); - Slice b2 = Slice(b.data() + 2 + size_b1, size_b2); - - if (a1 != b1) { - return a1.compare(b1); - } - return a2.compare(b2); - } - void FindShortestSeparator(std::string* /*start*/, - const Slice& /*limit*/) const override {} - - void FindShortSuccessor(std::string* /*key*/) const override {} -}; -} // anonymous namespace - -class ComparatorDBTest - : public testing::Test, - virtual public ::testing::WithParamInterface { - private: - std::string dbname_; - Env* env_; - DB* db_; - Options last_options_; - std::unique_ptr comparator_guard; - - public: - ComparatorDBTest() : env_(Env::Default()), db_(nullptr) { - kTestComparator = BytewiseComparator(); - dbname_ = test::PerThreadDBPath("comparator_db_test"); - BlockBasedTableOptions toptions; - toptions.format_version = GetParam(); - last_options_.table_factory.reset( - ROCKSDB_NAMESPACE::NewBlockBasedTableFactory(toptions)); - EXPECT_OK(DestroyDB(dbname_, last_options_)); - } - - ~ComparatorDBTest() override { - delete db_; - EXPECT_OK(DestroyDB(dbname_, last_options_)); - kTestComparator = BytewiseComparator(); - } - - DB* GetDB() { return db_; } - - void SetOwnedComparator(const Comparator* cmp, bool owner = true) { - if (owner) { - comparator_guard.reset(cmp); - } else { - comparator_guard.reset(); - } - kTestComparator = cmp; - last_options_.comparator = cmp; - } - - // Return the current option configuration. - Options* GetOptions() { return &last_options_; } - - void DestroyAndReopen() { - // Destroy using last options - Destroy(); - ASSERT_OK(TryReopen()); - } - - void Destroy() { - delete db_; - db_ = nullptr; - ASSERT_OK(DestroyDB(dbname_, last_options_)); - } - - Status TryReopen() { - delete db_; - db_ = nullptr; - last_options_.create_if_missing = true; - - return DB::Open(last_options_, dbname_, &db_); - } -}; - -INSTANTIATE_TEST_CASE_P(FormatDef, ComparatorDBTest, - testing::Values(test::kDefaultFormatVersion)); -INSTANTIATE_TEST_CASE_P(FormatLatest, ComparatorDBTest, - testing::Values(kLatestFormatVersion)); - -TEST_P(ComparatorDBTest, Bytewise) { - for (int rand_seed = 301; rand_seed < 306; rand_seed++) { - DestroyAndReopen(); - Random rnd(rand_seed); - DoRandomIteraratorTest(GetDB(), - {"a", "b", "c", "d", "e", "f", "g", "h", "i"}, &rnd, - 8, 100, 3); - } -} - -TEST_P(ComparatorDBTest, SimpleSuffixReverseComparator) { - SetOwnedComparator(new test::SimpleSuffixReverseComparator()); - - for (int rnd_seed = 301; rnd_seed < 316; rnd_seed++) { - Options* opt = GetOptions(); - opt->comparator = kTestComparator; - DestroyAndReopen(); - Random rnd(rnd_seed); - - std::vector source_strings; - std::vector source_prefixes; - // Randomly generate 5 prefixes - for (int i = 0; i < 5; i++) { - source_prefixes.push_back(rnd.HumanReadableString(8)); - } - for (int j = 0; j < 20; j++) { - int prefix_index = rnd.Uniform(static_cast(source_prefixes.size())); - std::string key = source_prefixes[prefix_index] + - rnd.HumanReadableString(rnd.Uniform(8)); - source_strings.push_back(key); - } - - DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 30, 600, 66); - } -} - -TEST_P(ComparatorDBTest, Uint64Comparator) { - SetOwnedComparator(test::Uint64Comparator(), false /* owner */); - - for (int rnd_seed = 301; rnd_seed < 316; rnd_seed++) { - Options* opt = GetOptions(); - opt->comparator = kTestComparator; - DestroyAndReopen(); - Random rnd(rnd_seed); - Random64 rnd64(rnd_seed); - - std::vector source_strings; - // Randomly generate source keys - for (int i = 0; i < 100; i++) { - uint64_t r = rnd64.Next(); - std::string str; - str.resize(8); - memcpy(&str[0], static_cast(&r), 8); - source_strings.push_back(str); - } - - DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 200, 1000, 66); - } -} - -TEST_P(ComparatorDBTest, DoubleComparator) { - SetOwnedComparator(new DoubleComparator()); - - for (int rnd_seed = 301; rnd_seed < 316; rnd_seed++) { - Options* opt = GetOptions(); - opt->comparator = kTestComparator; - DestroyAndReopen(); - Random rnd(rnd_seed); - - std::vector source_strings; - // Randomly generate source keys - for (int i = 0; i < 100; i++) { - uint32_t r = rnd.Next(); - uint32_t divide_order = rnd.Uniform(8); - double to_divide = 1.0; - for (uint32_t j = 0; j < divide_order; j++) { - to_divide *= 10.0; - } - source_strings.push_back(std::to_string(r / to_divide)); - } - - DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 200, 1000, 66); - } -} - -TEST_P(ComparatorDBTest, HashComparator) { - SetOwnedComparator(new HashComparator()); - - for (int rnd_seed = 301; rnd_seed < 316; rnd_seed++) { - Options* opt = GetOptions(); - opt->comparator = kTestComparator; - DestroyAndReopen(); - Random rnd(rnd_seed); - - std::vector source_strings; - // Randomly generate source keys - for (int i = 0; i < 100; i++) { - source_strings.push_back(test::RandomKey(&rnd, 8)); - } - - DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 200, 1000, 66); - } -} - -TEST_P(ComparatorDBTest, TwoStrComparator) { - SetOwnedComparator(new TwoStrComparator()); - - for (int rnd_seed = 301; rnd_seed < 316; rnd_seed++) { - Options* opt = GetOptions(); - opt->comparator = kTestComparator; - DestroyAndReopen(); - Random rnd(rnd_seed); - - std::vector source_strings; - // Randomly generate source keys - for (int i = 0; i < 100; i++) { - std::string str; - uint32_t size1 = rnd.Uniform(8); - uint32_t size2 = rnd.Uniform(8); - str.append(1, static_cast(size1)); - str.append(1, static_cast(size2)); - str.append(test::RandomKey(&rnd, size1)); - str.append(test::RandomKey(&rnd, size2)); - source_strings.push_back(str); - } - - DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 200, 1000, 66); - } -} - -namespace { -void VerifyNotSuccessor(const Slice& s, const Slice& t) { - auto bc = BytewiseComparator(); - auto rbc = ReverseBytewiseComparator(); - ASSERT_FALSE(bc->IsSameLengthImmediateSuccessor(s, t)); - ASSERT_FALSE(rbc->IsSameLengthImmediateSuccessor(s, t)); - ASSERT_FALSE(bc->IsSameLengthImmediateSuccessor(t, s)); - ASSERT_FALSE(rbc->IsSameLengthImmediateSuccessor(t, s)); -} - -void VerifySuccessor(const Slice& s, const Slice& t) { - auto bc = BytewiseComparator(); - auto rbc = ReverseBytewiseComparator(); - ASSERT_TRUE(bc->IsSameLengthImmediateSuccessor(s, t)); - ASSERT_FALSE(rbc->IsSameLengthImmediateSuccessor(s, t)); - ASSERT_FALSE(bc->IsSameLengthImmediateSuccessor(t, s)); - // Should be true but that increases exposure to a design bug in - // auto_prefix_mode, so currently set to FALSE - ASSERT_FALSE(rbc->IsSameLengthImmediateSuccessor(t, s)); -} - -} // anonymous namespace - -TEST_P(ComparatorDBTest, IsSameLengthImmediateSuccessor) { - { - // different length - Slice s("abcxy"); - Slice t("abcxyz"); - VerifyNotSuccessor(s, t); - } - { - Slice s("abcxyz"); - Slice t("abcxy"); - VerifyNotSuccessor(s, t); - } - { - // not last byte different - Slice s("abc1xyz"); - Slice t("abc2xyz"); - VerifyNotSuccessor(s, t); - } - { - // same string - Slice s("abcxyz"); - Slice t("abcxyz"); - VerifyNotSuccessor(s, t); - } - { - Slice s("abcxy"); - Slice t("abcxz"); - VerifySuccessor(s, t); - } - { - const char s_array[] = "\x50\x8a\xac"; - const char t_array[] = "\x50\x8a\xad"; - Slice s(s_array); - Slice t(t_array); - VerifySuccessor(s, t); - } - { - const char s_array[] = "\x50\x8a\xff"; - const char t_array[] = "\x50\x8b\x00"; - Slice s(s_array, 3); - Slice t(t_array, 3); - VerifySuccessor(s, t); - } - { - const char s_array[] = "\x50\x8a\xff\xff"; - const char t_array[] = "\x50\x8b\x00\x00"; - Slice s(s_array, 4); - Slice t(t_array, 4); - VerifySuccessor(s, t); - } - { - const char s_array[] = "\x50\x8a\xff\xff"; - const char t_array[] = "\x50\x8b\x00\x01"; - Slice s(s_array, 4); - Slice t(t_array, 4); - VerifyNotSuccessor(s, t); - } -} - -TEST_P(ComparatorDBTest, FindShortestSeparator) { - std::string s1 = "abc1xyz"; - std::string s2 = "abc3xy"; - - BytewiseComparator()->FindShortestSeparator(&s1, s2); - ASSERT_EQ("abc2", s1); - - s1 = "abc5xyztt"; - - ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2); - ASSERT_EQ("abc5", s1); - - s1 = "abc3"; - s2 = "abc2xy"; - ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2); - ASSERT_EQ("abc3", s1); - - s1 = "abc3xyz"; - s2 = "abc2xy"; - ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2); - ASSERT_EQ("abc3", s1); - - s1 = "abc3xyz"; - s2 = "abc2"; - ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2); - ASSERT_EQ("abc3", s1); - - std::string old_s1 = s1 = "abc2xy"; - s2 = "abc2"; - ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2); - ASSERT_TRUE(old_s1 >= s1); - ASSERT_TRUE(s1 > s2); -} - -TEST_P(ComparatorDBTest, SeparatorSuccessorRandomizeTest) { - // Char list for boundary cases. - std::array char_list{{0, 1, 2, 253, 254, 255}}; - Random rnd(301); - - for (int attempts = 0; attempts < 1000; attempts++) { - uint32_t size1 = rnd.Skewed(4); - uint32_t size2; - - if (rnd.OneIn(2)) { - // size2 to be random size - size2 = rnd.Skewed(4); - } else { - // size1 is within [-2, +2] of size1 - int diff = static_cast(rnd.Uniform(5)) - 2; - int tmp_size2 = static_cast(size1) + diff; - if (tmp_size2 < 0) { - tmp_size2 = 0; - } - size2 = static_cast(tmp_size2); - } - - std::string s1; - std::string s2; - for (uint32_t i = 0; i < size1; i++) { - if (rnd.OneIn(2)) { - // Use random byte - s1 += static_cast(rnd.Uniform(256)); - } else { - // Use one byte in char_list - char c = static_cast(char_list[rnd.Uniform(sizeof(char_list))]); - s1 += c; - } - } - - // First set s2 to be the same as s1, and then modify s2. - s2 = s1; - s2.resize(size2); - // We start from the back of the string - if (size2 > 0) { - uint32_t pos = size2 - 1; - do { - if (pos >= size1 || rnd.OneIn(4)) { - // For 1/4 chance, use random byte - s2[pos] = static_cast(rnd.Uniform(256)); - } else if (rnd.OneIn(4)) { - // In 1/4 chance, stop here. - break; - } else { - // Create a char within [-2, +2] of the matching char of s1. - int diff = static_cast(rnd.Uniform(5)) - 2; - // char may be signed or unsigned based on platform. - int s1_char = static_cast(static_cast(s1[pos])); - int s2_char = s1_char + diff; - if (s2_char < 0) { - s2_char = 0; - } - if (s2_char > 255) { - s2_char = 255; - } - s2[pos] = static_cast(s2_char); - } - } while (pos-- != 0); - } - - // Test separators - for (int rev = 0; rev < 2; rev++) { - if (rev == 1) { - // switch s1 and s2 - std::string t = s1; - s1 = s2; - s2 = t; - } - std::string separator = s1; - BytewiseComparator()->FindShortestSeparator(&separator, s2); - std::string rev_separator = s1; - ReverseBytewiseComparator()->FindShortestSeparator(&rev_separator, s2); - - if (s1 == s2) { - ASSERT_EQ(s1, separator); - ASSERT_EQ(s2, rev_separator); - } else if (s1 < s2) { - ASSERT_TRUE(s1 <= separator); - ASSERT_TRUE(s2 > separator); - ASSERT_LE(separator.size(), std::max(s1.size(), s2.size())); - ASSERT_EQ(s1, rev_separator); - } else { - ASSERT_TRUE(s1 >= rev_separator); - ASSERT_TRUE(s2 < rev_separator); - ASSERT_LE(rev_separator.size(), std::max(s1.size(), s2.size())); - ASSERT_EQ(s1, separator); - } - } - - // Test successors - std::string succ = s1; - BytewiseComparator()->FindShortSuccessor(&succ); - ASSERT_TRUE(succ >= s1); - - succ = s1; - ReverseBytewiseComparator()->FindShortSuccessor(&succ); - ASSERT_TRUE(succ <= s1); - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/corruption_test.cc b/db/corruption_test.cc deleted file mode 100644 index ab506cdb7..000000000 --- a/db/corruption_test.cc +++ /dev/null @@ -1,1669 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "rocksdb/options.h" - -#include -#include -#include - -#include - -#include "db/db_impl/db_impl.h" -#include "db/db_test_util.h" -#include "db/log_format.h" -#include "db/version_set.h" -#include "file/filename.h" -#include "port/stack_trace.h" -#include "rocksdb/cache.h" -#include "rocksdb/convenience.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/table.h" -#include "rocksdb/utilities/transaction_db.h" -#include "rocksdb/write_batch.h" -#include "table/block_based/block_based_table_builder.h" -#include "table/meta_blocks.h" -#include "table/mock_table.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/cast_util.h" -#include "util/random.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -static constexpr int kValueSize = 1000; -namespace { -// A wrapper that allows injection of errors. -class ErrorFS : public FileSystemWrapper { - public: - bool writable_file_error_; - int num_writable_file_errors_; - - explicit ErrorFS(const std::shared_ptr& _target) - : FileSystemWrapper(_target), - writable_file_error_(false), - num_writable_file_errors_(0) {} - const char* Name() const override { return "ErrorEnv"; } - - virtual IOStatus NewWritableFile(const std::string& fname, - const FileOptions& opts, - std::unique_ptr* result, - IODebugContext* dbg) override { - result->reset(); - if (writable_file_error_) { - ++num_writable_file_errors_; - return IOStatus::IOError(fname, "fake error"); - } - return target()->NewWritableFile(fname, opts, result, dbg); - } -}; -} // anonymous namespace -class CorruptionTest : public testing::Test { - public: - std::shared_ptr env_guard_; - std::shared_ptr fs_; - std::unique_ptr env_; - Env* base_env_; - std::string dbname_; - std::shared_ptr tiny_cache_; - Options options_; - DB* db_; - - CorruptionTest() { - // If LRU cache shard bit is smaller than 2 (or -1 which will automatically - // set it to 0), test SequenceNumberRecovery will fail, likely because of a - // bug in recovery code. Keep it 4 for now to make the test passes. - tiny_cache_ = NewLRUCache(100, 4); - base_env_ = Env::Default(); - EXPECT_OK( - test::CreateEnvFromSystem(ConfigOptions(), &base_env_, &env_guard_)); - EXPECT_NE(base_env_, nullptr); - fs_.reset(new ErrorFS(base_env_->GetFileSystem())); - env_ = NewCompositeEnv(fs_); - options_.wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords; - options_.env = env_.get(); - dbname_ = test::PerThreadDBPath(env_.get(), "corruption_test"); - Status s = DestroyDB(dbname_, options_); - EXPECT_OK(s); - - db_ = nullptr; - options_.create_if_missing = true; - BlockBasedTableOptions table_options; - table_options.block_size_deviation = 0; // make unit test pass for now - options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(); - options_.create_if_missing = false; - } - - ~CorruptionTest() override { - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency({}); - SyncPoint::GetInstance()->ClearAllCallBacks(); - delete db_; - db_ = nullptr; - if (getenv("KEEP_DB")) { - fprintf(stdout, "db is still at %s\n", dbname_.c_str()); - } else { - Options opts; - opts.env = base_env_; - EXPECT_OK(DestroyDB(dbname_, opts)); - } - } - - void CloseDb() { - delete db_; - db_ = nullptr; - } - - Status TryReopen(Options* options = nullptr) { - delete db_; - db_ = nullptr; - Options opt = (options ? *options : options_); - if (opt.env == Options().env) { - // If env is not overridden, replace it with ErrorEnv. - // Otherwise, the test already uses a non-default Env. - opt.env = env_.get(); - } - opt.arena_block_size = 4096; - BlockBasedTableOptions table_options; - table_options.block_cache = tiny_cache_; - table_options.block_size_deviation = 0; - opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); - return DB::Open(opt, dbname_, &db_); - } - - void Reopen(Options* options = nullptr) { ASSERT_OK(TryReopen(options)); } - - void RepairDB() { - delete db_; - db_ = nullptr; - ASSERT_OK(::ROCKSDB_NAMESPACE::RepairDB(dbname_, options_)); - } - - void Build(int n, int start, int flush_every) { - std::string key_space, value_space; - WriteBatch batch; - for (int i = 0; i < n; i++) { - if (flush_every != 0 && i != 0 && i % flush_every == 0) { - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - } - // if ((i % 100) == 0) fprintf(stderr, "@ %d of %d\n", i, n); - Slice key = Key(i + start, &key_space); - batch.Clear(); - ASSERT_OK(batch.Put(key, Value(i + start, &value_space))); - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - } - } - - void Build(int n, int flush_every = 0) { Build(n, 0, flush_every); } - - void Check(int min_expected, int max_expected) { - Check(min_expected, max_expected, ReadOptions(false, true)); - } - - void Check(int min_expected, int max_expected, ReadOptions read_options) { - uint64_t next_expected = 0; - uint64_t missed = 0; - int bad_keys = 0; - int bad_values = 0; - int correct = 0; - std::string value_space; - // Do not verify checksums. If we verify checksums then the - // db itself will raise errors because data is corrupted. - // Instead, we want the reads to be successful and this test - // will detect whether the appropriate corruptions have - // occurred. - Iterator* iter = db_->NewIterator(read_options); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - uint64_t key; - Slice in(iter->key()); - if (!ConsumeDecimalNumber(&in, &key) || !in.empty() || - key < next_expected) { - bad_keys++; - continue; - } - missed += (key - next_expected); - next_expected = key + 1; - if (iter->value() != Value(static_cast(key), &value_space)) { - bad_values++; - } else { - correct++; - } - } - iter->status().PermitUncheckedError(); - delete iter; - - fprintf( - stderr, - "expected=%d..%d; got=%d; bad_keys=%d; bad_values=%d; missed=%llu\n", - min_expected, max_expected, correct, bad_keys, bad_values, - static_cast(missed)); - ASSERT_LE(min_expected, correct); - ASSERT_GE(max_expected, correct); - } - - void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) { - // Pick file to corrupt - std::vector filenames; - ASSERT_OK(env_->GetChildren(dbname_, &filenames)); - uint64_t number; - FileType type; - std::string fname; - int picked_number = -1; - for (size_t i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &type) && type == filetype && - static_cast(number) > picked_number) { // Pick latest file - fname = dbname_ + "/" + filenames[i]; - picked_number = static_cast(number); - } - } - ASSERT_TRUE(!fname.empty()) << filetype; - - ASSERT_OK(test::CorruptFile(env_.get(), fname, offset, bytes_to_corrupt, - /*verify_checksum*/ filetype == kTableFile)); - } - - // corrupts exactly one file at level `level`. if no file found at level, - // asserts - void CorruptTableFileAtLevel(int level, int offset, int bytes_to_corrupt) { - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - for (const auto& m : metadata) { - if (m.level == level) { - ASSERT_OK(test::CorruptFile(env_.get(), dbname_ + "/" + m.name, offset, - bytes_to_corrupt)); - return; - } - } - FAIL() << "no file found at level"; - } - - int Property(const std::string& name) { - std::string property; - int result; - if (db_->GetProperty(name, &property) && - sscanf(property.c_str(), "%d", &result) == 1) { - return result; - } else { - return -1; - } - } - - // Return the ith key - Slice Key(int i, std::string* storage) { - char buf[100]; - snprintf(buf, sizeof(buf), "%016d", i); - storage->assign(buf, strlen(buf)); - return Slice(*storage); - } - - // Return the value to associate with the specified key - Slice Value(int k, std::string* storage) { - if (k == 0) { - // Ugh. Random seed of 0 used to produce no entropy. This code - // preserves the implementation that was in place when all of the - // magic values in this file were picked. - *storage = std::string(kValueSize, ' '); - } else { - Random r(k); - *storage = r.RandomString(kValueSize); - } - return Slice(*storage); - } - - void GetSortedWalFiles(std::vector& file_nums) { - std::vector tmp_files; - ASSERT_OK(env_->GetChildren(dbname_, &tmp_files)); - FileType type = kWalFile; - for (const auto& file : tmp_files) { - uint64_t number = 0; - if (ParseFileName(file, &number, &type) && type == kWalFile) { - file_nums.push_back(number); - } - } - std::sort(file_nums.begin(), file_nums.end()); - } - - void CorruptFileWithTruncation(FileType file, uint64_t number, - uint64_t bytes_to_truncate = 0) { - std::string path; - switch (file) { - case FileType::kWalFile: - path = LogFileName(dbname_, number); - break; - // TODO: Add other file types as this method is being used for those file - // types. - default: - return; - } - uint64_t old_size = 0; - ASSERT_OK(env_->GetFileSize(path, &old_size)); - assert(old_size > bytes_to_truncate); - uint64_t new_size = old_size - bytes_to_truncate; - // If bytes_to_truncate == 0, it will do full truncation. - if (bytes_to_truncate == 0) { - new_size = 0; - } - ASSERT_OK(test::TruncateFile(env_.get(), path, new_size)); - } -}; - -TEST_F(CorruptionTest, Recovery) { - Build(100); - Check(100, 100); -#ifdef OS_WIN - // On Wndows OS Disk cache does not behave properly - // We do not call FlushBuffers on every Flush. If we do not close - // the log file prior to the corruption we end up with the first - // block not corrupted but only the second. However, under the debugger - // things work just fine but never pass when running normally - // For that reason people may want to run with unbuffered I/O. That option - // is not available for WAL though. - CloseDb(); -#endif - Corrupt(kWalFile, 19, 1); // WriteBatch tag for first record - Corrupt(kWalFile, log::kBlockSize + 1000, 1); // Somewhere in second block - ASSERT_TRUE(!TryReopen().ok()); - options_.paranoid_checks = false; - Reopen(&options_); - - // The 64 records in the first two log blocks are completely lost. - Check(36, 36); -} - -TEST_F(CorruptionTest, PostPITRCorruptionWALsRetained) { - // Repro for bug where WALs following the point-in-time recovery were not - // retained leading to the next recovery failing. - CloseDb(); - - options_.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; - - const std::string test_cf_name = "test_cf"; - std::vector cf_descs; - cf_descs.emplace_back(kDefaultColumnFamilyName, ColumnFamilyOptions()); - cf_descs.emplace_back(test_cf_name, ColumnFamilyOptions()); - - uint64_t log_num; - { - options_.create_missing_column_families = true; - std::vector cfhs; - ASSERT_OK(DB::Open(options_, dbname_, cf_descs, &cfhs, &db_)); - assert(db_ != nullptr); // suppress false clang-analyze report - - ASSERT_OK(db_->Put(WriteOptions(), cfhs[0], "k", "v")); - ASSERT_OK(db_->Put(WriteOptions(), cfhs[1], "k", "v")); - ASSERT_OK(db_->Put(WriteOptions(), cfhs[0], "k2", "v2")); - std::vector file_nums; - GetSortedWalFiles(file_nums); - log_num = file_nums.back(); - for (auto* cfh : cfhs) { - delete cfh; - } - CloseDb(); - } - - CorruptFileWithTruncation(FileType::kWalFile, log_num, - /*bytes_to_truncate=*/1); - - { - // Recover "k" -> "v" for both CFs. "k2" -> "v2" is lost due to truncation. - options_.avoid_flush_during_recovery = true; - std::vector cfhs; - ASSERT_OK(DB::Open(options_, dbname_, cf_descs, &cfhs, &db_)); - assert(db_ != nullptr); // suppress false clang-analyze report - - // Flush one but not both CFs and write some data so there's a seqno gap - // between the PITR corruption and the next DB session's first WAL. - ASSERT_OK(db_->Put(WriteOptions(), cfhs[1], "k2", "v2")); - ASSERT_OK(db_->Flush(FlushOptions(), cfhs[1])); - - for (auto* cfh : cfhs) { - delete cfh; - } - CloseDb(); - } - - // With the bug, this DB open would remove the WALs following the PITR - // corruption. Then, the next recovery would fail. - for (int i = 0; i < 2; ++i) { - std::vector cfhs; - ASSERT_OK(DB::Open(options_, dbname_, cf_descs, &cfhs, &db_)); - assert(db_ != nullptr); // suppress false clang-analyze report - - for (auto* cfh : cfhs) { - delete cfh; - } - CloseDb(); - } -} - -TEST_F(CorruptionTest, RecoverWriteError) { - fs_->writable_file_error_ = true; - Status s = TryReopen(); - ASSERT_TRUE(!s.ok()); -} - -TEST_F(CorruptionTest, NewFileErrorDuringWrite) { - // Do enough writing to force minor compaction - fs_->writable_file_error_ = true; - const int num = - static_cast(3 + (Options().write_buffer_size / kValueSize)); - std::string value_storage; - Status s; - bool failed = false; - for (int i = 0; i < num; i++) { - WriteBatch batch; - ASSERT_OK(batch.Put("a", Value(100, &value_storage))); - s = db_->Write(WriteOptions(), &batch); - if (!s.ok()) { - failed = true; - } - ASSERT_TRUE(!failed || !s.ok()); - } - ASSERT_TRUE(!s.ok()); - ASSERT_GE(fs_->num_writable_file_errors_, 1); - fs_->writable_file_error_ = false; - Reopen(); -} - -TEST_F(CorruptionTest, TableFile) { - Build(100); - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - ASSERT_OK(dbi->TEST_CompactRange(0, nullptr, nullptr)); - ASSERT_OK(dbi->TEST_CompactRange(1, nullptr, nullptr)); - - Corrupt(kTableFile, 100, 1); - Check(99, 99); - ASSERT_NOK(dbi->VerifyChecksum()); -} - -TEST_F(CorruptionTest, VerifyChecksumReadahead) { - Options options; - SpecialEnv senv(base_env_); - options.env = &senv; - // Disable block cache as we are going to check checksum for - // the same file twice and measure number of reads. - BlockBasedTableOptions table_options_no_bc; - table_options_no_bc.no_block_cache = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options_no_bc)); - - Reopen(&options); - - Build(10000); - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - ASSERT_OK(dbi->TEST_CompactRange(0, nullptr, nullptr)); - ASSERT_OK(dbi->TEST_CompactRange(1, nullptr, nullptr)); - - senv.count_random_reads_ = true; - senv.random_read_counter_.Reset(); - ASSERT_OK(dbi->VerifyChecksum()); - - // Make sure the counter is enabled. - ASSERT_GT(senv.random_read_counter_.Read(), 0); - - // The SST file is about 10MB. Default readahead size is 256KB. - // Give a conservative 20 reads for metadata blocks, The number - // of random reads should be within 10 MB / 256KB + 20 = 60. - ASSERT_LT(senv.random_read_counter_.Read(), 60); - - senv.random_read_bytes_counter_ = 0; - ReadOptions ro; - ro.readahead_size = size_t{32 * 1024}; - ASSERT_OK(dbi->VerifyChecksum(ro)); - // The SST file is about 10MB. We set readahead size to 32KB. - // Give 0 to 20 reads for metadata blocks, and allow real read - // to range from 24KB to 48KB. The lower bound would be: - // 10MB / 48KB + 0 = 213 - // The higher bound is - // 10MB / 24KB + 20 = 447. - ASSERT_GE(senv.random_read_counter_.Read(), 213); - ASSERT_LE(senv.random_read_counter_.Read(), 447); - - // Test readahead shouldn't break mmap mode (where it should be - // disabled). - options.allow_mmap_reads = true; - Reopen(&options); - dbi = static_cast(db_); - ASSERT_OK(dbi->VerifyChecksum(ro)); - - CloseDb(); -} - -TEST_F(CorruptionTest, TableFileIndexData) { - Options options; - // very big, we'll trigger flushes manually - options.write_buffer_size = 100 * 1024 * 1024; - Reopen(&options); - // build 2 tables, flush at 5000 - Build(10000, 5000); - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - - // corrupt an index block of an entire file - Corrupt(kTableFile, -2000, 500); - options.paranoid_checks = false; - Reopen(&options); - dbi = static_cast_with_check(db_); - // one full file may be readable, since only one was corrupted - // the other file should be fully non-readable, since index was corrupted - Check(0, 5000, ReadOptions(true, true)); - ASSERT_NOK(dbi->VerifyChecksum()); - - // In paranoid mode, the db cannot be opened due to the corrupted file. - ASSERT_TRUE(TryReopen().IsCorruption()); -} - -TEST_F(CorruptionTest, TableFileFooterMagic) { - Build(100); - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - Check(100, 100); - // Corrupt the whole footer - Corrupt(kTableFile, -100, 100); - Status s = TryReopen(); - ASSERT_TRUE(s.IsCorruption()); - // Contains useful message, and magic number should be the first thing - // reported as corrupt. - ASSERT_TRUE(s.ToString().find("magic number") != std::string::npos); - // with file name - ASSERT_TRUE(s.ToString().find(".sst") != std::string::npos); -} - -TEST_F(CorruptionTest, TableFileFooterNotMagic) { - Build(100); - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - Check(100, 100); - // Corrupt footer except magic number - Corrupt(kTableFile, -100, 92); - Status s = TryReopen(); - ASSERT_TRUE(s.IsCorruption()); - // The next thing checked after magic number is format_version - ASSERT_TRUE(s.ToString().find("format_version") != std::string::npos); - // with file name - ASSERT_TRUE(s.ToString().find(".sst") != std::string::npos); -} - -TEST_F(CorruptionTest, TableFileWrongSize) { - Build(100); - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - Check(100, 100); - - // ******************************************** - // Make the file bigger by appending to it - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - ASSERT_EQ(1U, metadata.size()); - std::string filename = dbname_ + metadata[0].name; - const auto& fs = options_.env->GetFileSystem(); - { - std::unique_ptr f; - ASSERT_OK(fs->ReopenWritableFile(filename, FileOptions(), &f, nullptr)); - ASSERT_OK(f->Append("blahblah", IOOptions(), nullptr)); - ASSERT_OK(f->Close(IOOptions(), nullptr)); - } - - // DB actually accepts this without paranoid checks, relying on size - // recorded in manifest to locate the SST footer. - options_.paranoid_checks = false; - options_.skip_checking_sst_file_sizes_on_db_open = false; - Reopen(); - Check(100, 100); - - // But reports the issue with paranoid checks - options_.paranoid_checks = true; - Status s = TryReopen(); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(s.ToString().find("file size mismatch") != std::string::npos); - - // ******************************************** - // Make the file smaller with truncation. - // First leaving a partial footer, and then completely removing footer. - for (size_t bytes_lost : {8, 100}) { - ASSERT_OK(test::TruncateFile(env_.get(), filename, - metadata[0].size - bytes_lost)); - - // Reported well with paranoid checks - options_.paranoid_checks = true; - s = TryReopen(); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(s.ToString().find("file size mismatch") != std::string::npos); - - // Without paranoid checks, not reported until read - options_.paranoid_checks = false; - Reopen(); - Check(0, 0); // Missing data - } -} - -TEST_F(CorruptionTest, MissingDescriptor) { - Build(1000); - RepairDB(); - Reopen(); - Check(1000, 1000); -} - -TEST_F(CorruptionTest, SequenceNumberRecovery) { - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1")); - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2")); - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v3")); - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v4")); - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v5")); - RepairDB(); - Reopen(); - std::string v; - ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); - ASSERT_EQ("v5", v); - // Write something. If sequence number was not recovered properly, - // it will be hidden by an earlier write. - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v6")); - ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); - ASSERT_EQ("v6", v); - Reopen(); - ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); - ASSERT_EQ("v6", v); -} - -TEST_F(CorruptionTest, CorruptedDescriptor) { - ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello")); - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_OK( - dbi->CompactRange(cro, dbi->DefaultColumnFamily(), nullptr, nullptr)); - - Corrupt(kDescriptorFile, 0, 1000); - Status s = TryReopen(); - ASSERT_TRUE(!s.ok()); - - RepairDB(); - Reopen(); - std::string v; - ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); - ASSERT_EQ("hello", v); -} - -TEST_F(CorruptionTest, CompactionInputError) { - Options options; - options.env = env_.get(); - Reopen(&options); - Build(10); - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - ASSERT_OK(dbi->TEST_CompactRange(0, nullptr, nullptr)); - ASSERT_OK(dbi->TEST_CompactRange(1, nullptr, nullptr)); - ASSERT_EQ(1, Property("rocksdb.num-files-at-level2")); - - Corrupt(kTableFile, 100, 1); - Check(9, 9); - ASSERT_NOK(dbi->VerifyChecksum()); - - // Force compactions by writing lots of values - Build(10000); - Check(10000, 10000); - ASSERT_NOK(dbi->VerifyChecksum()); -} - -TEST_F(CorruptionTest, CompactionInputErrorParanoid) { - Options options; - options.env = env_.get(); - options.paranoid_checks = true; - options.write_buffer_size = 131072; - options.max_write_buffer_number = 2; - Reopen(&options); - DBImpl* dbi = static_cast_with_check(db_); - - // Fill levels >= 1 - for (int level = 1; level < dbi->NumberLevels(); level++) { - ASSERT_OK(dbi->Put(WriteOptions(), "", "begin")); - ASSERT_OK(dbi->Put(WriteOptions(), "~", "end")); - ASSERT_OK(dbi->TEST_FlushMemTable()); - for (int comp_level = 0; comp_level < dbi->NumberLevels() - level; - ++comp_level) { - ASSERT_OK(dbi->TEST_CompactRange(comp_level, nullptr, nullptr)); - } - } - - Reopen(&options); - - dbi = static_cast_with_check(db_); - Build(10); - ASSERT_OK(dbi->TEST_FlushMemTable()); - ASSERT_OK(dbi->TEST_WaitForCompact()); - ASSERT_EQ(1, Property("rocksdb.num-files-at-level0")); - - CorruptTableFileAtLevel(0, 100, 1); - Check(9, 9); - ASSERT_NOK(dbi->VerifyChecksum()); - - // Write must eventually fail because of corrupted table - Status s; - std::string tmp1, tmp2; - bool failed = false; - for (int i = 0; i < 10000; i++) { - s = db_->Put(WriteOptions(), Key(i, &tmp1), Value(i, &tmp2)); - if (!s.ok()) { - failed = true; - } - // if one write failed, every subsequent write must fail, too - ASSERT_TRUE(!failed || !s.ok()) << "write did not fail in a corrupted db"; - } - ASSERT_TRUE(!s.ok()) << "write did not fail in corrupted paranoid db"; -} - -TEST_F(CorruptionTest, UnrelatedKeys) { - Build(10); - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - Corrupt(kTableFile, 100, 1); - ASSERT_NOK(dbi->VerifyChecksum()); - - std::string tmp1, tmp2; - ASSERT_OK(db_->Put(WriteOptions(), Key(1000, &tmp1), Value(1000, &tmp2))); - std::string v; - ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v)); - ASSERT_EQ(Value(1000, &tmp2).ToString(), v); - ASSERT_OK(dbi->TEST_FlushMemTable()); - ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v)); - ASSERT_EQ(Value(1000, &tmp2).ToString(), v); -} - -TEST_F(CorruptionTest, RangeDeletionCorrupted) { - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b")); - ASSERT_OK(db_->Flush(FlushOptions())); - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - ASSERT_EQ(static_cast(1), metadata.size()); - std::string filename = dbname_ + metadata[0].name; - - FileOptions file_opts; - const auto& fs = options_.env->GetFileSystem(); - std::unique_ptr file_reader; - ASSERT_OK(RandomAccessFileReader::Create(fs, filename, file_opts, - &file_reader, nullptr)); - - uint64_t file_size; - ASSERT_OK( - fs->GetFileSize(filename, file_opts.io_options, &file_size, nullptr)); - - BlockHandle range_del_handle; - ASSERT_OK(FindMetaBlockInFile( - file_reader.get(), file_size, kBlockBasedTableMagicNumber, - ImmutableOptions(options_), kRangeDelBlockName, &range_del_handle)); - - ASSERT_OK(TryReopen()); - ASSERT_OK(test::CorruptFile(env_.get(), filename, - static_cast(range_del_handle.offset()), 1)); - ASSERT_TRUE(TryReopen().IsCorruption()); -} - -TEST_F(CorruptionTest, FileSystemStateCorrupted) { - for (int iter = 0; iter < 2; ++iter) { - Options options; - options.env = env_.get(); - options.paranoid_checks = true; - options.create_if_missing = true; - Reopen(&options); - Build(10); - ASSERT_OK(db_->Flush(FlushOptions())); - DBImpl* dbi = static_cast_with_check(db_); - std::vector metadata; - dbi->GetLiveFilesMetaData(&metadata); - ASSERT_GT(metadata.size(), 0); - std::string filename = dbname_ + metadata[0].name; - - delete db_; - db_ = nullptr; - - if (iter == 0) { // corrupt file size - std::unique_ptr file; - ASSERT_OK(env_->NewWritableFile(filename, &file, EnvOptions())); - ASSERT_OK(file->Append(Slice("corrupted sst"))); - file.reset(); - Status x = TryReopen(&options); - ASSERT_TRUE(x.IsCorruption()); - } else { // delete the file - ASSERT_OK(env_->DeleteFile(filename)); - Status x = TryReopen(&options); - ASSERT_TRUE(x.IsCorruption()); - } - - ASSERT_OK(DestroyDB(dbname_, options_)); - } -} - -static const auto& corruption_modes = { - mock::MockTableFactory::kCorruptNone, mock::MockTableFactory::kCorruptKey, - mock::MockTableFactory::kCorruptValue, - mock::MockTableFactory::kCorruptReorderKey}; - -TEST_F(CorruptionTest, ParanoidFileChecksOnFlush) { - Options options; - options.env = env_.get(); - options.check_flush_compaction_key_order = false; - options.paranoid_file_checks = true; - options.create_if_missing = true; - Status s; - for (const auto& mode : corruption_modes) { - delete db_; - db_ = nullptr; - s = DestroyDB(dbname_, options); - ASSERT_OK(s); - std::shared_ptr mock = - std::make_shared(); - options.table_factory = mock; - mock->SetCorruptionMode(mode); - ASSERT_OK(DB::Open(options, dbname_, &db_)); - assert(db_ != nullptr); // suppress false clang-analyze report - Build(10); - s = db_->Flush(FlushOptions()); - if (mode == mock::MockTableFactory::kCorruptNone) { - ASSERT_OK(s); - } else { - ASSERT_NOK(s); - } - } -} - -TEST_F(CorruptionTest, ParanoidFileChecksOnCompact) { - Options options; - options.env = env_.get(); - options.paranoid_file_checks = true; - options.create_if_missing = true; - options.check_flush_compaction_key_order = false; - Status s; - for (const auto& mode : corruption_modes) { - delete db_; - db_ = nullptr; - s = DestroyDB(dbname_, options); - ASSERT_OK(s); - std::shared_ptr mock = - std::make_shared(); - options.table_factory = mock; - ASSERT_OK(DB::Open(options, dbname_, &db_)); - assert(db_ != nullptr); // suppress false clang-analyze report - Build(100, 2); - // ASSERT_OK(db_->Flush(FlushOptions())); - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - mock->SetCorruptionMode(mode); - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - s = dbi->CompactRange(cro, dbi->DefaultColumnFamily(), nullptr, nullptr); - if (mode == mock::MockTableFactory::kCorruptNone) { - ASSERT_OK(s); - } else { - ASSERT_NOK(s); - } - } -} - -TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRangeFirst) { - Options options; - options.env = env_.get(); - options.check_flush_compaction_key_order = false; - options.paranoid_file_checks = true; - options.create_if_missing = true; - for (bool do_flush : {true, false}) { - delete db_; - db_ = nullptr; - ASSERT_OK(DestroyDB(dbname_, options)); - ASSERT_OK(DB::Open(options, dbname_, &db_)); - std::string start, end; - assert(db_ != nullptr); // suppress false clang-analyze report - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(3, &start), Key(7, &end))); - auto snap = db_->GetSnapshot(); - ASSERT_NE(snap, nullptr); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(8, &start), Key(9, &end))); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(2, &start), Key(5, &end))); - Build(10); - if (do_flush) { - ASSERT_OK(db_->Flush(FlushOptions())); - } else { - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_OK( - dbi->CompactRange(cro, dbi->DefaultColumnFamily(), nullptr, nullptr)); - } - db_->ReleaseSnapshot(snap); - } -} - -TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRange) { - Options options; - options.env = env_.get(); - options.check_flush_compaction_key_order = false; - options.paranoid_file_checks = true; - options.create_if_missing = true; - for (bool do_flush : {true, false}) { - delete db_; - db_ = nullptr; - ASSERT_OK(DestroyDB(dbname_, options)); - ASSERT_OK(DB::Open(options, dbname_, &db_)); - assert(db_ != nullptr); // suppress false clang-analyze report - Build(10, 0, 0); - std::string start, end; - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(5, &start), Key(15, &end))); - auto snap = db_->GetSnapshot(); - ASSERT_NE(snap, nullptr); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(8, &start), Key(9, &end))); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(12, &start), Key(17, &end))); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(2, &start), Key(4, &end))); - Build(10, 10, 0); - if (do_flush) { - ASSERT_OK(db_->Flush(FlushOptions())); - } else { - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_OK( - dbi->CompactRange(cro, dbi->DefaultColumnFamily(), nullptr, nullptr)); - } - db_->ReleaseSnapshot(snap); - } -} - -TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRangeLast) { - Options options; - options.env = env_.get(); - options.check_flush_compaction_key_order = false; - options.paranoid_file_checks = true; - options.create_if_missing = true; - for (bool do_flush : {true, false}) { - delete db_; - db_ = nullptr; - ASSERT_OK(DestroyDB(dbname_, options)); - ASSERT_OK(DB::Open(options, dbname_, &db_)); - assert(db_ != nullptr); // suppress false clang-analyze report - std::string start, end; - Build(10); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(3, &start), Key(7, &end))); - auto snap = db_->GetSnapshot(); - ASSERT_NE(snap, nullptr); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(6, &start), Key(8, &end))); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(2, &start), Key(5, &end))); - if (do_flush) { - ASSERT_OK(db_->Flush(FlushOptions())); - } else { - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_OK( - dbi->CompactRange(cro, dbi->DefaultColumnFamily(), nullptr, nullptr)); - } - db_->ReleaseSnapshot(snap); - } -} - -TEST_F(CorruptionTest, LogCorruptionErrorsInCompactionIterator) { - Options options; - options.env = env_.get(); - options.create_if_missing = true; - options.allow_data_in_errors = true; - auto mode = mock::MockTableFactory::kCorruptKey; - delete db_; - db_ = nullptr; - ASSERT_OK(DestroyDB(dbname_, options)); - - std::shared_ptr mock = - std::make_shared(); - mock->SetCorruptionMode(mode); - options.table_factory = mock; - - ASSERT_OK(DB::Open(options, dbname_, &db_)); - assert(db_ != nullptr); // suppress false clang-analyze report - Build(100, 2); - - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - Status s = - dbi->CompactRange(cro, dbi->DefaultColumnFamily(), nullptr, nullptr); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsCorruption()); -} - -TEST_F(CorruptionTest, CompactionKeyOrderCheck) { - Options options; - options.env = env_.get(); - options.paranoid_file_checks = false; - options.create_if_missing = true; - options.check_flush_compaction_key_order = false; - delete db_; - db_ = nullptr; - ASSERT_OK(DestroyDB(dbname_, options)); - std::shared_ptr mock = - std::make_shared(); - options.table_factory = mock; - ASSERT_OK(DB::Open(options, dbname_, &db_)); - assert(db_ != nullptr); // suppress false clang-analyze report - mock->SetCorruptionMode(mock::MockTableFactory::kCorruptReorderKey); - Build(100, 2); - DBImpl* dbi = static_cast_with_check(db_); - ASSERT_OK(dbi->TEST_FlushMemTable()); - - mock->SetCorruptionMode(mock::MockTableFactory::kCorruptNone); - ASSERT_OK(db_->SetOptions({{"check_flush_compaction_key_order", "true"}})); - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_NOK( - dbi->CompactRange(cro, dbi->DefaultColumnFamily(), nullptr, nullptr)); -} - -TEST_F(CorruptionTest, FlushKeyOrderCheck) { - Options options; - options.env = env_.get(); - options.paranoid_file_checks = false; - options.create_if_missing = true; - ASSERT_OK(db_->SetOptions({{"check_flush_compaction_key_order", "true"}})); - - ASSERT_OK(db_->Put(WriteOptions(), "foo1", "v1")); - ASSERT_OK(db_->Put(WriteOptions(), "foo2", "v1")); - ASSERT_OK(db_->Put(WriteOptions(), "foo3", "v1")); - ASSERT_OK(db_->Put(WriteOptions(), "foo4", "v1")); - - int cnt = 0; - // Generate some out of order keys from the memtable - SyncPoint::GetInstance()->SetCallBack( - "MemTableIterator::Next:0", [&](void* arg) { - MemTableRep::Iterator* mem_iter = - static_cast(arg); - if (++cnt == 3) { - mem_iter->Prev(); - mem_iter->Prev(); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - Status s = static_cast_with_check(db_)->TEST_FlushMemTable(); - ASSERT_NOK(s); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_F(CorruptionTest, DisableKeyOrderCheck) { - ASSERT_OK(db_->SetOptions({{"check_flush_compaction_key_order", "false"}})); - DBImpl* dbi = static_cast_with_check(db_); - - SyncPoint::GetInstance()->SetCallBack( - "OutputValidator::Add:order_check", - [&](void* /*arg*/) { ASSERT_TRUE(false); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(db_->Put(WriteOptions(), "foo1", "v1")); - ASSERT_OK(db_->Put(WriteOptions(), "foo3", "v1")); - ASSERT_OK(dbi->TEST_FlushMemTable()); - ASSERT_OK(db_->Put(WriteOptions(), "foo2", "v1")); - ASSERT_OK(db_->Put(WriteOptions(), "foo4", "v1")); - ASSERT_OK(dbi->TEST_FlushMemTable()); - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_OK( - dbi->CompactRange(cro, dbi->DefaultColumnFamily(), nullptr, nullptr)); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_F(CorruptionTest, VerifyWholeTableChecksum) { - CloseDb(); - Options options; - options.env = env_.get(); - ASSERT_OK(DestroyDB(dbname_, options)); - options.create_if_missing = true; - options.file_checksum_gen_factory = - ROCKSDB_NAMESPACE::GetFileChecksumGenCrc32cFactory(); - Reopen(&options); - - Build(10, 5); - - ASSERT_OK(db_->VerifyFileChecksums(ReadOptions())); - CloseDb(); - - // Corrupt the first byte of each table file, this must be data block. - Corrupt(kTableFile, 0, 1); - - ASSERT_OK(TryReopen(&options)); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - int count{0}; - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::VerifyFullFileChecksum:mismatch", [&](void* arg) { - auto* s = reinterpret_cast(arg); - ASSERT_NE(s, nullptr); - ++count; - ASSERT_NOK(*s); - }); - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_TRUE(db_->VerifyFileChecksums(ReadOptions()).IsCorruption()); - ASSERT_EQ(1, count); -} - -class CrashDuringRecoveryWithCorruptionTest - : public CorruptionTest, - public testing::WithParamInterface> { - public: - explicit CrashDuringRecoveryWithCorruptionTest() - : CorruptionTest(), - avoid_flush_during_recovery_(std::get<0>(GetParam())), - track_and_verify_wals_in_manifest_(std::get<1>(GetParam())) {} - - protected: - const bool avoid_flush_during_recovery_; - const bool track_and_verify_wals_in_manifest_; -}; - -INSTANTIATE_TEST_CASE_P(CorruptionTest, CrashDuringRecoveryWithCorruptionTest, - ::testing::Values(std::make_tuple(true, false), - std::make_tuple(false, false), - std::make_tuple(true, true), - std::make_tuple(false, true))); - -// In case of non-TransactionDB with avoid_flush_during_recovery = true, RocksDB -// won't flush the data from WAL to L0 for all column families if possible. As a -// result, not all column families can increase their log_numbers, and -// min_log_number_to_keep won't change. -// It may prematurely persist a new MANIFEST even before we can declare the DB -// is in consistent state after recovery (this is when the new WAL is synced) -// and advances log_numbers for some column families. -// -// If there is power failure before we sync the new WAL, we will end up in -// a situation in which after persisting the MANIFEST, RocksDB will see some -// column families' log_numbers larger than the corrupted wal, and -// "Column family inconsistency: SST file contains data beyond the point of -// corruption" error will be hit, causing recovery to fail. -// -// After adding the fix, only after new WAL is synced, RocksDB persist a new -// MANIFEST with column families to ensure RocksDB is in consistent state. -// RocksDB writes an empty WriteBatch as a sentinel to the new WAL which is -// synced immediately afterwards. The sequence number of the sentinel -// WriteBatch will be the next sequence number immediately after the largest -// sequence number recovered from previous WALs and MANIFEST because of which DB -// will be in consistent state. -// If a future recovery starts from the new MANIFEST, then it means the new WAL -// is successfully synced. Due to the sentinel empty write batch at the -// beginning, kPointInTimeRecovery of WAL is guaranteed to go after this point. -// If future recovery starts from the old MANIFEST, it means the writing the new -// MANIFEST failed. It won't have the "SST ahead of WAL" error. -// -// The combination of corrupting a WAL and injecting an error during subsequent -// re-open exposes the bug of prematurely persisting a new MANIFEST with -// advanced ColumnFamilyData::log_number. -TEST_P(CrashDuringRecoveryWithCorruptionTest, CrashDuringRecovery) { - CloseDb(); - Options options; - options.track_and_verify_wals_in_manifest = - track_and_verify_wals_in_manifest_; - options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; - options.avoid_flush_during_recovery = false; - options.env = env_.get(); - ASSERT_OK(DestroyDB(dbname_, options)); - options.create_if_missing = true; - options.max_write_buffer_number = 8; - - Reopen(&options); - Status s; - const std::string test_cf_name = "test_cf"; - ColumnFamilyHandle* cfh = nullptr; - s = db_->CreateColumnFamily(options, test_cf_name, &cfh); - ASSERT_OK(s); - delete cfh; - CloseDb(); - - std::vector cf_descs; - cf_descs.emplace_back(kDefaultColumnFamilyName, options); - cf_descs.emplace_back(test_cf_name, options); - std::vector handles; - - // 1. Open and populate the DB. Write and flush default_cf several times to - // advance wal number so that some column families have advanced log_number - // while other don't. - { - ASSERT_OK(DB::Open(options, dbname_, cf_descs, &handles, &db_)); - auto* dbimpl = static_cast_with_check(db_); - assert(dbimpl); - - // Write one key to test_cf. - ASSERT_OK(db_->Put(WriteOptions(), handles[1], "old_key", "dontcare")); - ASSERT_OK(db_->Flush(FlushOptions(), handles[1])); - - // Write to default_cf and flush this cf several times to advance wal - // number. TEST_SwitchMemtable makes sure WALs are not synced and test can - // corrupt un-sync WAL. - for (int i = 0; i < 2; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), "key" + std::to_string(i), - "value" + std::to_string(i))); - ASSERT_OK(dbimpl->TEST_SwitchMemtable()); - } - - for (auto* h : handles) { - delete h; - } - handles.clear(); - CloseDb(); - } - - // 2. Corrupt second last un-syned wal file to emulate power reset which - // caused the DB to lose the un-synced WAL. - { - std::vector file_nums; - GetSortedWalFiles(file_nums); - size_t size = file_nums.size(); - assert(size >= 2); - uint64_t log_num = file_nums[size - 2]; - CorruptFileWithTruncation(FileType::kWalFile, log_num, - /*bytes_to_truncate=*/8); - } - - // 3. After first crash reopen the DB which contains corrupted WAL. Default - // family has higher log number than corrupted wal number. - // - // Case1: If avoid_flush_during_recovery = true, RocksDB won't flush the data - // from WAL to L0 for all column families (test_cf_name in this case). As a - // result, not all column families can increase their log_numbers, and - // min_log_number_to_keep won't change. - // - // Case2: If avoid_flush_during_recovery = false, all column families have - // flushed their data from WAL to L0 during recovery, and none of them will - // ever need to read the WALs again. - - // 4. Fault is injected to fail the recovery. - { - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::GetLogSizeAndMaybeTruncate:0", [&](void* arg) { - auto* tmp_s = reinterpret_cast(arg); - assert(tmp_s); - *tmp_s = Status::IOError("Injected"); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - handles.clear(); - options.avoid_flush_during_recovery = true; - s = DB::Open(options, dbname_, cf_descs, &handles, &db_); - ASSERT_TRUE(s.IsIOError()); - ASSERT_EQ("IO error: Injected", s.ToString()); - for (auto* h : handles) { - delete h; - } - CloseDb(); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - } - - // 5. After second crash reopen the db with second corruption. Default family - // has higher log number than corrupted wal number. - // - // Case1: If avoid_flush_during_recovery = true, we persist a new - // MANIFEST with advanced log_numbers for some column families only after - // syncing the WAL. So during second crash, RocksDB will skip the corrupted - // WAL files as they have been moved to different folder. Since newly synced - // WAL file's sequence number (sentinel WriteBatch) will be the next - // sequence number immediately after the largest sequence number recovered - // from previous WALs and MANIFEST, db will be in consistent state and opens - // successfully. - // - // Case2: If avoid_flush_during_recovery = false, the corrupted WAL is below - // this number. So during a second crash after persisting the new MANIFEST, - // RocksDB will skip the corrupted WAL(s) because they are all below this - // bound. Therefore, we won't hit the "column family inconsistency" error - // message. - { - options.avoid_flush_during_recovery = avoid_flush_during_recovery_; - ASSERT_OK(DB::Open(options, dbname_, cf_descs, &handles, &db_)); - - // Verify that data is not lost. - { - std::string v; - ASSERT_OK(db_->Get(ReadOptions(), handles[1], "old_key", &v)); - ASSERT_EQ("dontcare", v); - - v.clear(); - ASSERT_OK(db_->Get(ReadOptions(), "key" + std::to_string(0), &v)); - ASSERT_EQ("value" + std::to_string(0), v); - - // Since it's corrupting second last wal, below key is not found. - v.clear(); - ASSERT_EQ(db_->Get(ReadOptions(), "key" + std::to_string(1), &v), - Status::NotFound()); - } - - for (auto* h : handles) { - delete h; - } - handles.clear(); - CloseDb(); - } -} - -// In case of TransactionDB, it enables two-phase-commit. The prepare section of -// an uncommitted transaction always need to be kept. Even if we perform flush -// during recovery, we may still need to hold an old WAL. The -// min_log_number_to_keep won't change, and "Column family inconsistency: SST -// file contains data beyond the point of corruption" error will be hit, causing -// recovery to fail. -// -// After adding the fix, only after new WAL is synced, RocksDB persist a new -// MANIFEST with column families to ensure RocksDB is in consistent state. -// RocksDB writes an empty WriteBatch as a sentinel to the new WAL which is -// synced immediately afterwards. The sequence number of the sentinel -// WriteBatch will be the next sequence number immediately after the largest -// sequence number recovered from previous WALs and MANIFEST because of which DB -// will be in consistent state. -// If a future recovery starts from the new MANIFEST, then it means the new WAL -// is successfully synced. Due to the sentinel empty write batch at the -// beginning, kPointInTimeRecovery of WAL is guaranteed to go after this point. -// If future recovery starts from the old MANIFEST, it means the writing the new -// MANIFEST failed. It won't have the "SST ahead of WAL" error. -// -// The combination of corrupting a WAL and injecting an error during subsequent -// re-open exposes the bug of prematurely persisting a new MANIFEST with -// advanced ColumnFamilyData::log_number. -TEST_P(CrashDuringRecoveryWithCorruptionTest, TxnDbCrashDuringRecovery) { - CloseDb(); - Options options; - options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; - options.track_and_verify_wals_in_manifest = - track_and_verify_wals_in_manifest_; - options.avoid_flush_during_recovery = false; - options.env = env_.get(); - ASSERT_OK(DestroyDB(dbname_, options)); - options.create_if_missing = true; - options.max_write_buffer_number = 3; - Reopen(&options); - - // Create cf test_cf_name. - ColumnFamilyHandle* cfh = nullptr; - const std::string test_cf_name = "test_cf"; - Status s = db_->CreateColumnFamily(options, test_cf_name, &cfh); - ASSERT_OK(s); - delete cfh; - CloseDb(); - - std::vector cf_descs; - cf_descs.emplace_back(kDefaultColumnFamilyName, options); - cf_descs.emplace_back(test_cf_name, options); - std::vector handles; - - TransactionDB* txn_db = nullptr; - TransactionDBOptions txn_db_opts; - - // 1. Open and populate the DB. Write and flush default_cf several times to - // advance wal number so that some column families have advanced log_number - // while other don't. - { - ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, cf_descs, - &handles, &txn_db)); - - auto* txn = txn_db->BeginTransaction(WriteOptions(), TransactionOptions()); - // Put cf1 - ASSERT_OK(txn->Put(handles[1], "foo", "value")); - ASSERT_OK(txn->SetName("txn0")); - ASSERT_OK(txn->Prepare()); - ASSERT_OK(txn_db->Flush(FlushOptions())); - - delete txn; - txn = nullptr; - - auto* dbimpl = static_cast_with_check(txn_db->GetRootDB()); - assert(dbimpl); - - // Put and flush cf0 - for (int i = 0; i < 2; ++i) { - ASSERT_OK(txn_db->Put(WriteOptions(), "key" + std::to_string(i), - "value" + std::to_string(i))); - ASSERT_OK(dbimpl->TEST_SwitchMemtable()); - } - - // Put cf1 - txn = txn_db->BeginTransaction(WriteOptions(), TransactionOptions()); - ASSERT_OK(txn->Put(handles[1], "foo1", "value1")); - ASSERT_OK(txn->Commit()); - - delete txn; - txn = nullptr; - - for (auto* h : handles) { - delete h; - } - handles.clear(); - delete txn_db; - } - - // 2. Corrupt second last wal to emulate power reset which caused the DB to - // lose the un-synced WAL. - { - std::vector file_nums; - GetSortedWalFiles(file_nums); - size_t size = file_nums.size(); - assert(size >= 2); - uint64_t log_num = file_nums[size - 2]; - CorruptFileWithTruncation(FileType::kWalFile, log_num, - /*bytes_to_truncate=*/8); - } - - // 3. After first crash reopen the DB which contains corrupted WAL. Default - // family has higher log number than corrupted wal number. There may be old - // WAL files that it must not delete because they can contain data of - // uncommitted transactions. As a result, min_log_number_to_keep won't change. - - { - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::Open::BeforeSyncWAL", [&](void* arg) { - auto* tmp_s = reinterpret_cast(arg); - assert(tmp_s); - *tmp_s = Status::IOError("Injected"); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - handles.clear(); - s = TransactionDB::Open(options, txn_db_opts, dbname_, cf_descs, &handles, - &txn_db); - ASSERT_TRUE(s.IsIOError()); - ASSERT_EQ("IO error: Injected", s.ToString()); - for (auto* h : handles) { - delete h; - } - CloseDb(); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - } - - // 4. Corrupt max_wal_num. - { - std::vector file_nums; - GetSortedWalFiles(file_nums); - size_t size = file_nums.size(); - uint64_t log_num = file_nums[size - 1]; - CorruptFileWithTruncation(FileType::kWalFile, log_num); - } - - // 5. After second crash reopen the db with second corruption. Default family - // has higher log number than corrupted wal number. - // We persist a new MANIFEST with advanced log_numbers for some column - // families only after syncing the WAL. So during second crash, RocksDB will - // skip the corrupted WAL files as they have been moved to different folder. - // Since newly synced WAL file's sequence number (sentinel WriteBatch) will be - // the next sequence number immediately after the largest sequence number - // recovered from previous WALs and MANIFEST, db will be in consistent state - // and opens successfully. - { - ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, cf_descs, - &handles, &txn_db)); - - // Verify that data is not lost. - { - std::string v; - // Key not visible since it's not committed. - ASSERT_EQ(txn_db->Get(ReadOptions(), handles[1], "foo", &v), - Status::NotFound()); - - v.clear(); - ASSERT_OK(txn_db->Get(ReadOptions(), "key" + std::to_string(0), &v)); - ASSERT_EQ("value" + std::to_string(0), v); - - // Last WAL is corrupted which contains two keys below. - v.clear(); - ASSERT_EQ(txn_db->Get(ReadOptions(), "key" + std::to_string(1), &v), - Status::NotFound()); - v.clear(); - ASSERT_EQ(txn_db->Get(ReadOptions(), handles[1], "foo1", &v), - Status::NotFound()); - } - - for (auto* h : handles) { - delete h; - } - delete txn_db; - } -} - -// This test is similar to -// CrashDuringRecoveryWithCorruptionTest.CrashDuringRecovery except it calls -// flush and corrupts Last WAL. It calls flush to sync some of the WALs and -// remaining are unsyned one of which is then corrupted to simulate crash. -// -// In case of non-TransactionDB with avoid_flush_during_recovery = true, RocksDB -// won't flush the data from WAL to L0 for all column families if possible. As a -// result, not all column families can increase their log_numbers, and -// min_log_number_to_keep won't change. -// It may prematurely persist a new MANIFEST even before we can declare the DB -// is in consistent state after recovery (this is when the new WAL is synced) -// and advances log_numbers for some column families. -// -// If there is power failure before we sync the new WAL, we will end up in -// a situation in which after persisting the MANIFEST, RocksDB will see some -// column families' log_numbers larger than the corrupted wal, and -// "Column family inconsistency: SST file contains data beyond the point of -// corruption" error will be hit, causing recovery to fail. -// -// After adding the fix, only after new WAL is synced, RocksDB persist a new -// MANIFEST with column families to ensure RocksDB is in consistent state. -// RocksDB writes an empty WriteBatch as a sentinel to the new WAL which is -// synced immediately afterwards. The sequence number of the sentinel -// WriteBatch will be the next sequence number immediately after the largest -// sequence number recovered from previous WALs and MANIFEST because of which DB -// will be in consistent state. -// If a future recovery starts from the new MANIFEST, then it means the new WAL -// is successfully synced. Due to the sentinel empty write batch at the -// beginning, kPointInTimeRecovery of WAL is guaranteed to go after this point. -// If future recovery starts from the old MANIFEST, it means the writing the new -// MANIFEST failed. It won't have the "SST ahead of WAL" error. - -// The combination of corrupting a WAL and injecting an error during subsequent -// re-open exposes the bug of prematurely persisting a new MANIFEST with -// advanced ColumnFamilyData::log_number. -TEST_P(CrashDuringRecoveryWithCorruptionTest, CrashDuringRecoveryWithFlush) { - CloseDb(); - Options options; - options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; - options.avoid_flush_during_recovery = false; - options.env = env_.get(); - options.create_if_missing = true; - - ASSERT_OK(DestroyDB(dbname_, options)); - Reopen(&options); - - ColumnFamilyHandle* cfh = nullptr; - const std::string test_cf_name = "test_cf"; - Status s = db_->CreateColumnFamily(options, test_cf_name, &cfh); - ASSERT_OK(s); - delete cfh; - - CloseDb(); - - std::vector cf_descs; - cf_descs.emplace_back(kDefaultColumnFamilyName, options); - cf_descs.emplace_back(test_cf_name, options); - std::vector handles; - - { - ASSERT_OK(DB::Open(options, dbname_, cf_descs, &handles, &db_)); - - // Write one key to test_cf. - ASSERT_OK(db_->Put(WriteOptions(), handles[1], "old_key", "dontcare")); - - // Write to default_cf and flush this cf several times to advance wal - // number. - for (int i = 0; i < 2; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), "key" + std::to_string(i), - "value" + std::to_string(i))); - ASSERT_OK(db_->Flush(FlushOptions())); - } - - ASSERT_OK(db_->Put(WriteOptions(), handles[1], "dontcare", "dontcare")); - for (auto* h : handles) { - delete h; - } - handles.clear(); - CloseDb(); - } - - // Corrupt second last un-syned wal file to emulate power reset which - // caused the DB to lose the un-synced WAL. - { - std::vector file_nums; - GetSortedWalFiles(file_nums); - size_t size = file_nums.size(); - uint64_t log_num = file_nums[size - 1]; - CorruptFileWithTruncation(FileType::kWalFile, log_num, - /*bytes_to_truncate=*/8); - } - - // Fault is injected to fail the recovery. - { - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::GetLogSizeAndMaybeTruncate:0", [&](void* arg) { - auto* tmp_s = reinterpret_cast(arg); - assert(tmp_s); - *tmp_s = Status::IOError("Injected"); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - handles.clear(); - options.avoid_flush_during_recovery = true; - s = DB::Open(options, dbname_, cf_descs, &handles, &db_); - ASSERT_TRUE(s.IsIOError()); - ASSERT_EQ("IO error: Injected", s.ToString()); - for (auto* h : handles) { - delete h; - } - CloseDb(); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - } - - // Reopen db again - { - options.avoid_flush_during_recovery = avoid_flush_during_recovery_; - ASSERT_OK(DB::Open(options, dbname_, cf_descs, &handles, &db_)); - - // Verify that data is not lost. - { - std::string v; - ASSERT_OK(db_->Get(ReadOptions(), handles[1], "old_key", &v)); - ASSERT_EQ("dontcare", v); - - for (int i = 0; i < 2; ++i) { - v.clear(); - ASSERT_OK(db_->Get(ReadOptions(), "key" + std::to_string(i), &v)); - ASSERT_EQ("value" + std::to_string(i), v); - } - - // Since it's corrupting last wal after Flush, below key is not found. - v.clear(); - ASSERT_EQ(db_->Get(ReadOptions(), handles[1], "dontcare", &v), - Status::NotFound()); - } - - for (auto* h : handles) { - delete h; - } - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/db/cuckoo_table_db_test.cc b/db/cuckoo_table_db_test.cc deleted file mode 100644 index 7bd4dfda4..000000000 --- a/db/cuckoo_table_db_test.cc +++ /dev/null @@ -1,351 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - - -#include "db/db_impl/db_impl.h" -#include "db/db_test_util.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "table/cuckoo/cuckoo_table_factory.h" -#include "table/cuckoo/cuckoo_table_reader.h" -#include "table/meta_blocks.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/cast_util.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -class CuckooTableDBTest : public testing::Test { - private: - std::string dbname_; - Env* env_; - DB* db_; - - public: - CuckooTableDBTest() : env_(Env::Default()) { - dbname_ = test::PerThreadDBPath("cuckoo_table_db_test"); - EXPECT_OK(DestroyDB(dbname_, Options())); - db_ = nullptr; - Reopen(); - } - - ~CuckooTableDBTest() override { - delete db_; - EXPECT_OK(DestroyDB(dbname_, Options())); - } - - Options CurrentOptions() { - Options options; - options.table_factory.reset(NewCuckooTableFactory()); - options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true)); - options.allow_mmap_reads = true; - options.create_if_missing = true; - options.allow_concurrent_memtable_write = false; - return options; - } - - DBImpl* dbfull() { return static_cast_with_check(db_); } - - // The following util methods are copied from plain_table_db_test. - void Reopen(Options* options = nullptr) { - delete db_; - db_ = nullptr; - Options opts; - if (options != nullptr) { - opts = *options; - } else { - opts = CurrentOptions(); - opts.create_if_missing = true; - } - ASSERT_OK(DB::Open(opts, dbname_, &db_)); - } - - void DestroyAndReopen(Options* options) { - assert(options); - ASSERT_OK(db_->Close()); - delete db_; - db_ = nullptr; - ASSERT_OK(DestroyDB(dbname_, *options)); - Reopen(options); - } - - Status Put(const Slice& k, const Slice& v) { - return db_->Put(WriteOptions(), k, v); - } - - Status Delete(const std::string& k) { return db_->Delete(WriteOptions(), k); } - - std::string Get(const std::string& k) { - ReadOptions options; - std::string result; - Status s = db_->Get(options, k, &result); - if (s.IsNotFound()) { - result = "NOT_FOUND"; - } else if (!s.ok()) { - result = s.ToString(); - } - return result; - } - - int NumTableFilesAtLevel(int level) { - std::string property; - EXPECT_TRUE(db_->GetProperty( - "rocksdb.num-files-at-level" + std::to_string(level), &property)); - return atoi(property.c_str()); - } - - // Return spread of files per level - std::string FilesPerLevel() { - std::string result; - size_t last_non_zero_offset = 0; - for (int level = 0; level < db_->NumberLevels(); level++) { - int f = NumTableFilesAtLevel(level); - char buf[100]; - snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f); - result += buf; - if (f > 0) { - last_non_zero_offset = result.size(); - } - } - result.resize(last_non_zero_offset); - return result; - } -}; - -TEST_F(CuckooTableDBTest, Flush) { - // Try with empty DB first. - ASSERT_TRUE(dbfull() != nullptr); - ASSERT_EQ("NOT_FOUND", Get("key2")); - - // Add some values to db. - Options options = CurrentOptions(); - Reopen(&options); - - ASSERT_OK(Put("key1", "v1")); - ASSERT_OK(Put("key2", "v2")); - ASSERT_OK(Put("key3", "v3")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - TablePropertiesCollection ptc; - ASSERT_OK(reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc)); - VerifySstUniqueIds(ptc); - ASSERT_EQ(1U, ptc.size()); - ASSERT_EQ(3U, ptc.begin()->second->num_entries); - ASSERT_EQ("1", FilesPerLevel()); - - ASSERT_EQ("v1", Get("key1")); - ASSERT_EQ("v2", Get("key2")); - ASSERT_EQ("v3", Get("key3")); - ASSERT_EQ("NOT_FOUND", Get("key4")); - - // Now add more keys and flush. - ASSERT_OK(Put("key4", "v4")); - ASSERT_OK(Put("key5", "v5")); - ASSERT_OK(Put("key6", "v6")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - ASSERT_OK(reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc)); - VerifySstUniqueIds(ptc); - ASSERT_EQ(2U, ptc.size()); - auto row = ptc.begin(); - ASSERT_EQ(3U, row->second->num_entries); - ASSERT_EQ(3U, (++row)->second->num_entries); - ASSERT_EQ("2", FilesPerLevel()); - ASSERT_EQ("v1", Get("key1")); - ASSERT_EQ("v2", Get("key2")); - ASSERT_EQ("v3", Get("key3")); - ASSERT_EQ("v4", Get("key4")); - ASSERT_EQ("v5", Get("key5")); - ASSERT_EQ("v6", Get("key6")); - - ASSERT_OK(Delete("key6")); - ASSERT_OK(Delete("key5")); - ASSERT_OK(Delete("key4")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - ASSERT_OK(reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc)); - VerifySstUniqueIds(ptc); - ASSERT_EQ(3U, ptc.size()); - row = ptc.begin(); - ASSERT_EQ(3U, row->second->num_entries); - ASSERT_EQ(3U, (++row)->second->num_entries); - ASSERT_EQ(3U, (++row)->second->num_entries); - ASSERT_EQ("3", FilesPerLevel()); - ASSERT_EQ("v1", Get("key1")); - ASSERT_EQ("v2", Get("key2")); - ASSERT_EQ("v3", Get("key3")); - ASSERT_EQ("NOT_FOUND", Get("key4")); - ASSERT_EQ("NOT_FOUND", Get("key5")); - ASSERT_EQ("NOT_FOUND", Get("key6")); -} - -TEST_F(CuckooTableDBTest, FlushWithDuplicateKeys) { - Options options = CurrentOptions(); - Reopen(&options); - ASSERT_OK(Put("key1", "v1")); - ASSERT_OK(Put("key2", "v2")); - ASSERT_OK(Put("key1", "v3")); // Duplicate - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - TablePropertiesCollection ptc; - ASSERT_OK(reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc)); - VerifySstUniqueIds(ptc); - ASSERT_EQ(1U, ptc.size()); - ASSERT_EQ(2U, ptc.begin()->second->num_entries); - ASSERT_EQ("1", FilesPerLevel()); - ASSERT_EQ("v3", Get("key1")); - ASSERT_EQ("v2", Get("key2")); -} - -namespace { -static std::string Key(int i) { - char buf[100]; - snprintf(buf, sizeof(buf), "key_______%06d", i); - return std::string(buf); -} -static std::string Uint64Key(uint64_t i) { - std::string str; - str.resize(8); - memcpy(&str[0], static_cast(&i), 8); - return str; -} -} // namespace. - -TEST_F(CuckooTableDBTest, Uint64Comparator) { - Options options = CurrentOptions(); - options.comparator = test::Uint64Comparator(); - DestroyAndReopen(&options); - - ASSERT_OK(Put(Uint64Key(1), "v1")); - ASSERT_OK(Put(Uint64Key(2), "v2")); - ASSERT_OK(Put(Uint64Key(3), "v3")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - ASSERT_EQ("v1", Get(Uint64Key(1))); - ASSERT_EQ("v2", Get(Uint64Key(2))); - ASSERT_EQ("v3", Get(Uint64Key(3))); - ASSERT_EQ("NOT_FOUND", Get(Uint64Key(4))); - - // Add more keys. - ASSERT_OK(Delete(Uint64Key(2))); // Delete. - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - ASSERT_OK(Put(Uint64Key(3), "v0")); // Update. - ASSERT_OK(Put(Uint64Key(4), "v4")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - ASSERT_EQ("v1", Get(Uint64Key(1))); - ASSERT_EQ("NOT_FOUND", Get(Uint64Key(2))); - ASSERT_EQ("v0", Get(Uint64Key(3))); - ASSERT_EQ("v4", Get(Uint64Key(4))); -} - -TEST_F(CuckooTableDBTest, CompactionIntoMultipleFiles) { - // Create a big L0 file and check it compacts into multiple files in L1. - Options options = CurrentOptions(); - options.write_buffer_size = 270 << 10; - // Two SST files should be created, each containing 14 keys. - // Number of buckets will be 16. Total size ~156 KB. - options.target_file_size_base = 160 << 10; - Reopen(&options); - - // Write 28 values, each 10016 B ~ 10KB - for (int idx = 0; idx < 28; ++idx) { - ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + char(idx)))); - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ("1", FilesPerLevel()); - - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, - true /* disallow trivial move */)); - ASSERT_EQ("0,2", FilesPerLevel()); - for (int idx = 0; idx < 28; ++idx) { - ASSERT_EQ(std::string(10000, 'a' + char(idx)), Get(Key(idx))); - } -} - -TEST_F(CuckooTableDBTest, SameKeyInsertedInTwoDifferentFilesAndCompacted) { - // Insert same key twice so that they go to different SST files. Then wait for - // compaction and check if the latest value is stored and old value removed. - Options options = CurrentOptions(); - options.write_buffer_size = 100 << 10; // 100KB - options.level0_file_num_compaction_trigger = 2; - Reopen(&options); - - // Write 11 values, each 10016 B - for (int idx = 0; idx < 11; ++idx) { - ASSERT_OK(Put(Key(idx), std::string(10000, 'a'))); - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ("1", FilesPerLevel()); - - // Generate one more file in level-0, and should trigger level-0 compaction - for (int idx = 0; idx < 11; ++idx) { - ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + char(idx)))); - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr)); - - ASSERT_EQ("0,1", FilesPerLevel()); - for (int idx = 0; idx < 11; ++idx) { - ASSERT_EQ(std::string(10000, 'a' + char(idx)), Get(Key(idx))); - } -} - -TEST_F(CuckooTableDBTest, AdaptiveTable) { - Options options = CurrentOptions(); - - // Ensure options compatible with PlainTable - options.prefix_extractor.reset(NewCappedPrefixTransform(8)); - - // Write some keys using cuckoo table. - options.table_factory.reset(NewCuckooTableFactory()); - Reopen(&options); - - ASSERT_OK(Put("key1", "v1")); - ASSERT_OK(Put("key2", "v2")); - ASSERT_OK(Put("key3", "v3")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - // Write some keys using plain table. - std::shared_ptr block_based_factory( - NewBlockBasedTableFactory()); - std::shared_ptr plain_table_factory(NewPlainTableFactory()); - std::shared_ptr cuckoo_table_factory(NewCuckooTableFactory()); - options.create_if_missing = false; - options.table_factory.reset( - NewAdaptiveTableFactory(plain_table_factory, block_based_factory, - plain_table_factory, cuckoo_table_factory)); - Reopen(&options); - ASSERT_OK(Put("key4", "v4")); - ASSERT_OK(Put("key1", "v5")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - // Write some keys using block based table. - options.table_factory.reset( - NewAdaptiveTableFactory(block_based_factory, block_based_factory, - plain_table_factory, cuckoo_table_factory)); - Reopen(&options); - ASSERT_OK(Put("key5", "v6")); - ASSERT_OK(Put("key2", "v7")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - ASSERT_EQ("v5", Get("key1")); - ASSERT_EQ("v7", Get("key2")); - ASSERT_EQ("v3", Get("key3")); - ASSERT_EQ("v4", Get("key4")); - ASSERT_EQ("v6", Get("key5")); -} -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - if (ROCKSDB_NAMESPACE::port::kLittleEndian) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); - } else { - fprintf(stderr, "SKIPPED as Cuckoo table doesn't support Big Endian\n"); - return 0; - } -} - diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc deleted file mode 100644 index 063b99839..000000000 --- a/db/db_basic_test.cc +++ /dev/null @@ -1,4777 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include - -#include "db/db_test_util.h" -#include "options/options_helper.h" -#include "port/stack_trace.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/flush_block_policy.h" -#include "rocksdb/merge_operator.h" -#include "rocksdb/perf_context.h" -#include "rocksdb/table.h" -#include "rocksdb/utilities/debug.h" -#include "table/block_based/block_based_table_reader.h" -#include "table/block_based/block_builder.h" -#include "test_util/sync_point.h" -#include "util/file_checksum_helper.h" -#include "util/random.h" -#include "utilities/counted_fs.h" -#include "utilities/fault_injection_env.h" -#include "utilities/merge_operators.h" -#include "utilities/merge_operators/string_append/stringappend.h" - -namespace ROCKSDB_NAMESPACE { - -static bool enable_io_uring = true; -extern "C" bool RocksDbIOUringEnable() { return enable_io_uring; } - -class DBBasicTest : public DBTestBase { - public: - DBBasicTest() : DBTestBase("db_basic_test", /*env_do_fsync=*/false) {} -}; - -TEST_F(DBBasicTest, OpenWhenOpen) { - Options options = CurrentOptions(); - options.env = env_; - DB* db2 = nullptr; - Status s = DB::Open(options, dbname_, &db2); - ASSERT_NOK(s) << [db2]() { - delete db2; - return "db2 open: ok"; - }(); - ASSERT_EQ(Status::Code::kIOError, s.code()); - ASSERT_EQ(Status::SubCode::kNone, s.subcode()); - ASSERT_TRUE(strstr(s.getState(), "lock ") != nullptr); - - delete db2; -} - -TEST_F(DBBasicTest, EnableDirectIOWithZeroBuf) { - if (!IsDirectIOSupported()) { - ROCKSDB_GTEST_BYPASS("Direct IO not supported"); - return; - } - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.use_direct_io_for_flush_and_compaction = true; - options.writable_file_max_buffer_size = 0; - ASSERT_TRUE(TryReopen(options).IsInvalidArgument()); - - options.writable_file_max_buffer_size = 1024; - Reopen(options); - const std::unordered_map new_db_opts = { - {"writable_file_max_buffer_size", "0"}}; - ASSERT_TRUE(db_->SetDBOptions(new_db_opts).IsInvalidArgument()); -} - -TEST_F(DBBasicTest, UniqueSession) { - Options options = CurrentOptions(); - std::string sid1, sid2, sid3, sid4; - - ASSERT_OK(db_->GetDbSessionId(sid1)); - Reopen(options); - ASSERT_OK(db_->GetDbSessionId(sid2)); - ASSERT_OK(Put("foo", "v1")); - ASSERT_OK(db_->GetDbSessionId(sid4)); - Reopen(options); - ASSERT_OK(db_->GetDbSessionId(sid3)); - - ASSERT_NE(sid1, sid2); - ASSERT_NE(sid1, sid3); - ASSERT_NE(sid2, sid3); - - ASSERT_EQ(sid2, sid4); - - // Expected compact format for session ids (see notes in implementation) - TestRegex expected("[0-9A-Z]{20}"); - EXPECT_MATCHES_REGEX(sid1, expected); - EXPECT_MATCHES_REGEX(sid2, expected); - EXPECT_MATCHES_REGEX(sid3, expected); - - Close(); - ASSERT_OK(ReadOnlyReopen(options)); - ASSERT_OK(db_->GetDbSessionId(sid1)); - // Test uniqueness between readonly open (sid1) and regular open (sid3) - ASSERT_NE(sid1, sid3); - Close(); - ASSERT_OK(ReadOnlyReopen(options)); - ASSERT_OK(db_->GetDbSessionId(sid2)); - ASSERT_EQ("v1", Get("foo")); - ASSERT_OK(db_->GetDbSessionId(sid3)); - - ASSERT_NE(sid1, sid2); - - ASSERT_EQ(sid2, sid3); - - CreateAndReopenWithCF({"goku"}, options); - ASSERT_OK(db_->GetDbSessionId(sid1)); - ASSERT_OK(Put("bar", "e1")); - ASSERT_OK(db_->GetDbSessionId(sid2)); - ASSERT_EQ("e1", Get("bar")); - ASSERT_OK(db_->GetDbSessionId(sid3)); - ReopenWithColumnFamilies({"default", "goku"}, options); - ASSERT_OK(db_->GetDbSessionId(sid4)); - - ASSERT_EQ(sid1, sid2); - ASSERT_EQ(sid2, sid3); - - ASSERT_NE(sid1, sid4); -} - -TEST_F(DBBasicTest, ReadOnlyDB) { - ASSERT_OK(Put("foo", "v1")); - ASSERT_OK(Put("bar", "v2")); - ASSERT_OK(Put("foo", "v3")); - Close(); - - auto verify_one_iter = [&](Iterator* iter) { - int count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - ++count; - } - // Always expect two keys: "foo" and "bar" - ASSERT_EQ(count, 2); - }; - - auto verify_all_iters = [&]() { - Iterator* iter = db_->NewIterator(ReadOptions()); - verify_one_iter(iter); - delete iter; - - std::vector iters; - ASSERT_OK(db_->NewIterators(ReadOptions(), - {dbfull()->DefaultColumnFamily()}, &iters)); - ASSERT_EQ(static_cast(1), iters.size()); - verify_one_iter(iters[0]); - delete iters[0]; - }; - - auto options = CurrentOptions(); - assert(options.env == env_); - ASSERT_OK(ReadOnlyReopen(options)); - ASSERT_EQ("v3", Get("foo")); - ASSERT_EQ("v2", Get("bar")); - verify_all_iters(); - Close(); - - // Reopen and flush memtable. - Reopen(options); - ASSERT_OK(Flush()); - Close(); - // Now check keys in read only mode. - ASSERT_OK(ReadOnlyReopen(options)); - ASSERT_EQ("v3", Get("foo")); - ASSERT_EQ("v2", Get("bar")); - verify_all_iters(); - ASSERT_TRUE(db_->SyncWAL().IsNotSupported()); -} - -// TODO akanksha: Update the test to check that combination -// does not actually write to FS (use open read-only with -// CompositeEnvWrapper+ReadOnlyFileSystem). -TEST_F(DBBasicTest, DISABLED_ReadOnlyDBWithWriteDBIdToManifestSet) { - ASSERT_OK(Put("foo", "v1")); - ASSERT_OK(Put("bar", "v2")); - ASSERT_OK(Put("foo", "v3")); - Close(); - - auto options = CurrentOptions(); - options.write_dbid_to_manifest = true; - assert(options.env == env_); - ASSERT_OK(ReadOnlyReopen(options)); - std::string db_id1; - ASSERT_OK(db_->GetDbIdentity(db_id1)); - ASSERT_EQ("v3", Get("foo")); - ASSERT_EQ("v2", Get("bar")); - Iterator* iter = db_->NewIterator(ReadOptions()); - int count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - ++count; - } - ASSERT_EQ(count, 2); - delete iter; - Close(); - - // Reopen and flush memtable. - Reopen(options); - ASSERT_OK(Flush()); - Close(); - // Now check keys in read only mode. - ASSERT_OK(ReadOnlyReopen(options)); - ASSERT_EQ("v3", Get("foo")); - ASSERT_EQ("v2", Get("bar")); - ASSERT_TRUE(db_->SyncWAL().IsNotSupported()); - std::string db_id2; - ASSERT_OK(db_->GetDbIdentity(db_id2)); - ASSERT_EQ(db_id1, db_id2); -} - -TEST_F(DBBasicTest, CompactedDB) { - const uint64_t kFileSize = 1 << 20; - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.write_buffer_size = kFileSize; - options.target_file_size_base = kFileSize; - options.max_bytes_for_level_base = 1 << 30; - options.compression = kNoCompression; - Reopen(options); - // 1 L0 file, use CompactedDB if max_open_files = -1 - ASSERT_OK(Put("aaa", DummyString(kFileSize / 2, '1'))); - ASSERT_OK(Flush()); - Close(); - ASSERT_OK(ReadOnlyReopen(options)); - Status s = Put("new", "value"); - ASSERT_EQ(s.ToString(), - "Not implemented: Not supported operation in read only mode."); - ASSERT_EQ(DummyString(kFileSize / 2, '1'), Get("aaa")); - Close(); - options.max_open_files = -1; - ASSERT_OK(ReadOnlyReopen(options)); - s = Put("new", "value"); - ASSERT_EQ(s.ToString(), - "Not implemented: Not supported in compacted db mode."); - ASSERT_EQ(DummyString(kFileSize / 2, '1'), Get("aaa")); - Close(); - Reopen(options); - // Add more L0 files - ASSERT_OK(Put("bbb", DummyString(kFileSize / 2, '2'))); - ASSERT_OK(Flush()); - ASSERT_OK(Put("aaa", DummyString(kFileSize / 2, 'a'))); - ASSERT_OK(Flush()); - ASSERT_OK(Put("bbb", DummyString(kFileSize / 2, 'b'))); - ASSERT_OK(Put("eee", DummyString(kFileSize / 2, 'e'))); - ASSERT_OK(Flush()); - ASSERT_OK(Put("something_not_flushed", "x")); - Close(); - - ASSERT_OK(ReadOnlyReopen(options)); - // Fallback to read-only DB - s = Put("new", "value"); - ASSERT_EQ(s.ToString(), - "Not implemented: Not supported operation in read only mode."); - - // TODO: validate that other write ops return NotImplemented - // (DBImplReadOnly is missing some overrides) - - // Ensure no deadlock on flush triggered by another API function - // (Old deadlock bug depends on something_not_flushed above.) - std::vector files; - uint64_t manifest_file_size; - ASSERT_OK(db_->GetLiveFiles(files, &manifest_file_size, /*flush*/ true)); - LiveFilesStorageInfoOptions lfsi_opts; - lfsi_opts.wal_size_for_flush = 0; // always - std::vector files2; - ASSERT_OK(db_->GetLiveFilesStorageInfo(lfsi_opts, &files2)); - - Close(); - - // Full compaction - Reopen(options); - // Add more keys - ASSERT_OK(Put("fff", DummyString(kFileSize / 2, 'f'))); - ASSERT_OK(Put("hhh", DummyString(kFileSize / 2, 'h'))); - ASSERT_OK(Put("iii", DummyString(kFileSize / 2, 'i'))); - ASSERT_OK(Put("jjj", DummyString(kFileSize / 2, 'j'))); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(3, NumTableFilesAtLevel(1)); - Close(); - - // CompactedDB - ASSERT_OK(ReadOnlyReopen(options)); - s = Put("new", "value"); - ASSERT_EQ(s.ToString(), - "Not implemented: Not supported in compacted db mode."); - ASSERT_EQ("NOT_FOUND", Get("abc")); - ASSERT_EQ(DummyString(kFileSize / 2, 'a'), Get("aaa")); - ASSERT_EQ(DummyString(kFileSize / 2, 'b'), Get("bbb")); - ASSERT_EQ("NOT_FOUND", Get("ccc")); - ASSERT_EQ(DummyString(kFileSize / 2, 'e'), Get("eee")); - ASSERT_EQ(DummyString(kFileSize / 2, 'f'), Get("fff")); - ASSERT_EQ("NOT_FOUND", Get("ggg")); - ASSERT_EQ(DummyString(kFileSize / 2, 'h'), Get("hhh")); - ASSERT_EQ(DummyString(kFileSize / 2, 'i'), Get("iii")); - ASSERT_EQ(DummyString(kFileSize / 2, 'j'), Get("jjj")); - ASSERT_EQ("NOT_FOUND", Get("kkk")); - - // TODO: validate that other write ops return NotImplemented - // (CompactedDB is missing some overrides) - - // Ensure no deadlock on flush triggered by another API function - ASSERT_OK(db_->GetLiveFiles(files, &manifest_file_size, /*flush*/ true)); - ASSERT_OK(db_->GetLiveFilesStorageInfo(lfsi_opts, &files2)); - - // MultiGet - std::vector values; - std::vector status_list = dbfull()->MultiGet( - ReadOptions(), - std::vector({Slice("aaa"), Slice("ccc"), Slice("eee"), - Slice("ggg"), Slice("iii"), Slice("kkk")}), - &values); - ASSERT_EQ(status_list.size(), static_cast(6)); - ASSERT_EQ(values.size(), static_cast(6)); - ASSERT_OK(status_list[0]); - ASSERT_EQ(DummyString(kFileSize / 2, 'a'), values[0]); - ASSERT_TRUE(status_list[1].IsNotFound()); - ASSERT_OK(status_list[2]); - ASSERT_EQ(DummyString(kFileSize / 2, 'e'), values[2]); - ASSERT_TRUE(status_list[3].IsNotFound()); - ASSERT_OK(status_list[4]); - ASSERT_EQ(DummyString(kFileSize / 2, 'i'), values[4]); - ASSERT_TRUE(status_list[5].IsNotFound()); - - Reopen(options); - // Add a key - ASSERT_OK(Put("fff", DummyString(kFileSize / 2, 'f'))); - Close(); - ASSERT_OK(ReadOnlyReopen(options)); - s = Put("new", "value"); - ASSERT_EQ(s.ToString(), - "Not implemented: Not supported operation in read only mode."); -} - -TEST_F(DBBasicTest, LevelLimitReopen) { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu"}, options); - - const std::string value(1024 * 1024, ' '); - int i = 0; - while (NumTableFilesAtLevel(2, 1) == 0) { - ASSERT_OK(Put(1, Key(i++), value)); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - - options.num_levels = 1; - options.max_bytes_for_level_multiplier_additional.resize(1, 1); - Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ(s.IsInvalidArgument(), true); - ASSERT_EQ(s.ToString(), - "Invalid argument: db has more levels than options.num_levels"); - - options.num_levels = 10; - options.max_bytes_for_level_multiplier_additional.resize(10, 1); - ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); -} - -TEST_F(DBBasicTest, PutDeleteGet) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_OK(Put(1, "foo", "v2")); - ASSERT_EQ("v2", Get(1, "foo")); - ASSERT_OK(Delete(1, "foo")); - ASSERT_EQ("NOT_FOUND", Get(1, "foo")); - } while (ChangeOptions()); -} - -TEST_F(DBBasicTest, PutSingleDeleteGet) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_OK(Put(1, "foo2", "v2")); - ASSERT_EQ("v2", Get(1, "foo2")); - ASSERT_OK(SingleDelete(1, "foo")); - ASSERT_EQ("NOT_FOUND", Get(1, "foo")); - // Ski FIFO and universal compaction because they do not apply to the test - // case. Skip MergePut because single delete does not get removed when it - // encounters a merge. - } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | - kSkipMergePut)); -} - -TEST_F(DBBasicTest, EmptyFlush) { - // It is possible to produce empty flushes when using single deletes. Tests - // whether empty flushes cause issues. - do { - Random rnd(301); - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "a", Slice())); - ASSERT_OK(SingleDelete(1, "a")); - ASSERT_OK(Flush(1)); - - ASSERT_EQ("[ ]", AllEntriesFor("a", 1)); - // Skip FIFO and universal compaction as they do not apply to the test - // case. Skip MergePut because merges cannot be combined with single - // deletions. - } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | - kSkipMergePut)); -} - -TEST_F(DBBasicTest, GetFromVersions) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_OK(Flush(1)); - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_EQ("NOT_FOUND", Get(0, "foo")); - } while (ChangeOptions()); -} - -TEST_F(DBBasicTest, GetSnapshot) { - anon::OptionsOverride options_override; - options_override.skip_policy = kSkipNoSnapshot; - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override)); - // Try with both a short key and a long key - for (int i = 0; i < 2; i++) { - std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x'); - ASSERT_OK(Put(1, key, "v1")); - const Snapshot* s1 = db_->GetSnapshot(); - ASSERT_OK(Put(1, key, "v2")); - ASSERT_EQ("v2", Get(1, key)); - ASSERT_EQ("v1", Get(1, key, s1)); - ASSERT_OK(Flush(1)); - ASSERT_EQ("v2", Get(1, key)); - ASSERT_EQ("v1", Get(1, key, s1)); - db_->ReleaseSnapshot(s1); - } - } while (ChangeOptions()); -} - -TEST_F(DBBasicTest, CheckLock) { - do { - DB* localdb = nullptr; - Options options = CurrentOptions(); - ASSERT_OK(TryReopen(options)); - - // second open should fail - Status s = DB::Open(options, dbname_, &localdb); - ASSERT_NOK(s) << [localdb]() { - delete localdb; - return "localdb open: ok"; - }(); -#ifdef OS_LINUX - ASSERT_TRUE(s.ToString().find("lock ") != std::string::npos); -#endif // OS_LINUX - } while (ChangeCompactOptions()); -} - -TEST_F(DBBasicTest, FlushMultipleMemtable) { - do { - Options options = CurrentOptions(); - WriteOptions writeOpt = WriteOptions(); - writeOpt.disableWAL = true; - options.max_write_buffer_number = 4; - options.min_write_buffer_number_to_merge = 3; - options.max_write_buffer_size_to_maintain = -1; - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1")); - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1")); - - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_EQ("v1", Get(1, "bar")); - ASSERT_OK(Flush(1)); - } while (ChangeCompactOptions()); -} - -TEST_F(DBBasicTest, FlushEmptyColumnFamily) { - // Block flush thread and disable compaction thread - env_->SetBackgroundThreads(1, Env::HIGH); - env_->SetBackgroundThreads(1, Env::LOW); - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - test::SleepingBackgroundTask sleeping_task_high; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - &sleeping_task_high, Env::Priority::HIGH); - - Options options = CurrentOptions(); - // disable compaction - options.disable_auto_compactions = true; - WriteOptions writeOpt = WriteOptions(); - writeOpt.disableWAL = true; - options.max_write_buffer_number = 2; - options.min_write_buffer_number_to_merge = 1; - options.max_write_buffer_size_to_maintain = - static_cast(options.write_buffer_size); - CreateAndReopenWithCF({"pikachu"}, options); - - // Compaction can still go through even if no thread can flush the - // mem table. - ASSERT_OK(Flush(0)); - ASSERT_OK(Flush(1)); - - // Insert can go through - ASSERT_OK(dbfull()->Put(writeOpt, handles_[0], "foo", "v1")); - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1")); - - ASSERT_EQ("v1", Get(0, "foo")); - ASSERT_EQ("v1", Get(1, "bar")); - - sleeping_task_high.WakeUp(); - sleeping_task_high.WaitUntilDone(); - - // Flush can still go through. - ASSERT_OK(Flush(0)); - ASSERT_OK(Flush(1)); - - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); -} - -TEST_F(DBBasicTest, Flush) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - WriteOptions writeOpt = WriteOptions(); - writeOpt.disableWAL = true; - SetPerfLevel(kEnableTime); - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1")); - // this will now also flush the last 2 writes - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1")); - - get_perf_context()->Reset(); - Get(1, "foo"); - ASSERT_TRUE((int)get_perf_context()->get_from_output_files_time > 0); - ASSERT_EQ(2, (int)get_perf_context()->get_read_bytes); - - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_EQ("v1", Get(1, "bar")); - - writeOpt.disableWAL = true; - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v2")); - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v2")); - ASSERT_OK(Flush(1)); - - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_EQ("v2", Get(1, "bar")); - get_perf_context()->Reset(); - ASSERT_EQ("v2", Get(1, "foo")); - ASSERT_TRUE((int)get_perf_context()->get_from_output_files_time > 0); - - writeOpt.disableWAL = false; - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v3")); - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v3")); - ASSERT_OK(Flush(1)); - - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - // 'foo' should be there because its put - // has WAL enabled. - ASSERT_EQ("v3", Get(1, "foo")); - ASSERT_EQ("v3", Get(1, "bar")); - - SetPerfLevel(kDisable); - } while (ChangeCompactOptions()); -} - -TEST_F(DBBasicTest, ManifestRollOver) { - do { - Options options; - options.max_manifest_file_size = 10; // 10 bytes - options = CurrentOptions(options); - CreateAndReopenWithCF({"pikachu"}, options); - { - ASSERT_OK(Put(1, "manifest_key1", std::string(1000, '1'))); - ASSERT_OK(Put(1, "manifest_key2", std::string(1000, '2'))); - ASSERT_OK(Put(1, "manifest_key3", std::string(1000, '3'))); - uint64_t manifest_before_flush = dbfull()->TEST_Current_Manifest_FileNo(); - ASSERT_OK(Flush(1)); // This should trigger LogAndApply. - uint64_t manifest_after_flush = dbfull()->TEST_Current_Manifest_FileNo(); - ASSERT_GT(manifest_after_flush, manifest_before_flush); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_GT(dbfull()->TEST_Current_Manifest_FileNo(), manifest_after_flush); - // check if a new manifest file got inserted or not. - ASSERT_EQ(std::string(1000, '1'), Get(1, "manifest_key1")); - ASSERT_EQ(std::string(1000, '2'), Get(1, "manifest_key2")); - ASSERT_EQ(std::string(1000, '3'), Get(1, "manifest_key3")); - } - } while (ChangeCompactOptions()); -} - -TEST_F(DBBasicTest, IdentityAcrossRestarts) { - constexpr size_t kMinIdSize = 10; - do { - for (bool with_manifest : {false, true}) { - std::string idfilename = IdentityFileName(dbname_); - std::string id1, tmp; - ASSERT_OK(db_->GetDbIdentity(id1)); - ASSERT_GE(id1.size(), kMinIdSize); - - Options options = CurrentOptions(); - options.write_dbid_to_manifest = with_manifest; - Reopen(options); - std::string id2; - ASSERT_OK(db_->GetDbIdentity(id2)); - // id2 should match id1 because identity was not regenerated - ASSERT_EQ(id1, id2); - ASSERT_OK(ReadFileToString(env_, idfilename, &tmp)); - ASSERT_EQ(tmp, id2); - - // Recover from deleted/missing IDENTITY - ASSERT_OK(env_->DeleteFile(idfilename)); - Reopen(options); - std::string id3; - ASSERT_OK(db_->GetDbIdentity(id3)); - if (with_manifest) { - // id3 should match id1 because identity was restored from manifest - ASSERT_EQ(id1, id3); - } else { - // id3 should NOT match id1 because identity was regenerated - ASSERT_NE(id1, id3); - ASSERT_GE(id3.size(), kMinIdSize); - } - ASSERT_OK(ReadFileToString(env_, idfilename, &tmp)); - ASSERT_EQ(tmp, id3); - - // Recover from truncated IDENTITY - { - std::unique_ptr w; - ASSERT_OK(env_->NewWritableFile(idfilename, &w, EnvOptions())); - ASSERT_OK(w->Close()); - } - Reopen(options); - std::string id4; - ASSERT_OK(db_->GetDbIdentity(id4)); - if (with_manifest) { - // id4 should match id1 because identity was restored from manifest - ASSERT_EQ(id1, id4); - } else { - // id4 should NOT match id1 because identity was regenerated - ASSERT_NE(id1, id4); - ASSERT_GE(id4.size(), kMinIdSize); - } - ASSERT_OK(ReadFileToString(env_, idfilename, &tmp)); - ASSERT_EQ(tmp, id4); - - // Recover from overwritten IDENTITY - std::string silly_id = "asdf123456789"; - { - std::unique_ptr w; - ASSERT_OK(env_->NewWritableFile(idfilename, &w, EnvOptions())); - ASSERT_OK(w->Append(silly_id)); - ASSERT_OK(w->Close()); - } - Reopen(options); - std::string id5; - ASSERT_OK(db_->GetDbIdentity(id5)); - if (with_manifest) { - // id4 should match id1 because identity was restored from manifest - ASSERT_EQ(id1, id5); - } else { - ASSERT_EQ(id5, silly_id); - } - ASSERT_OK(ReadFileToString(env_, idfilename, &tmp)); - ASSERT_EQ(tmp, id5); - } - } while (ChangeCompactOptions()); -} - -TEST_F(DBBasicTest, LockFileRecovery) { - Options options = CurrentOptions(); - // Regardless of best_efforts_recovery - for (bool ber : {false, true}) { - options.best_efforts_recovery = ber; - DestroyAndReopen(options); - std::string id1, id2; - ASSERT_OK(db_->GetDbIdentity(id1)); - Close(); - - // Should be OK to re-open DB after lock file deleted - std::string lockfilename = LockFileName(dbname_); - ASSERT_OK(env_->DeleteFile(lockfilename)); - Reopen(options); - - // Should be same DB as before - ASSERT_OK(db_->GetDbIdentity(id2)); - ASSERT_EQ(id1, id2); - } -} - -TEST_F(DBBasicTest, Snapshot) { - env_->SetMockSleep(); - anon::OptionsOverride options_override; - options_override.skip_policy = kSkipNoSnapshot; - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override)); - ASSERT_OK(Put(0, "foo", "0v1")); - ASSERT_OK(Put(1, "foo", "1v1")); - - const Snapshot* s1 = db_->GetSnapshot(); - ASSERT_EQ(1U, GetNumSnapshots()); - uint64_t time_snap1 = GetTimeOldestSnapshots(); - ASSERT_GT(time_snap1, 0U); - ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); - ASSERT_OK(Put(0, "foo", "0v2")); - ASSERT_OK(Put(1, "foo", "1v2")); - - env_->MockSleepForSeconds(1); - - const Snapshot* s2 = db_->GetSnapshot(); - ASSERT_EQ(2U, GetNumSnapshots()); - ASSERT_EQ(time_snap1, GetTimeOldestSnapshots()); - ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); - ASSERT_OK(Put(0, "foo", "0v3")); - ASSERT_OK(Put(1, "foo", "1v3")); - - { - ManagedSnapshot s3(db_); - ASSERT_EQ(3U, GetNumSnapshots()); - ASSERT_EQ(time_snap1, GetTimeOldestSnapshots()); - ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); - - ASSERT_OK(Put(0, "foo", "0v4")); - ASSERT_OK(Put(1, "foo", "1v4")); - ASSERT_EQ("0v1", Get(0, "foo", s1)); - ASSERT_EQ("1v1", Get(1, "foo", s1)); - ASSERT_EQ("0v2", Get(0, "foo", s2)); - ASSERT_EQ("1v2", Get(1, "foo", s2)); - ASSERT_EQ("0v3", Get(0, "foo", s3.snapshot())); - ASSERT_EQ("1v3", Get(1, "foo", s3.snapshot())); - ASSERT_EQ("0v4", Get(0, "foo")); - ASSERT_EQ("1v4", Get(1, "foo")); - } - - ASSERT_EQ(2U, GetNumSnapshots()); - ASSERT_EQ(time_snap1, GetTimeOldestSnapshots()); - ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); - ASSERT_EQ("0v1", Get(0, "foo", s1)); - ASSERT_EQ("1v1", Get(1, "foo", s1)); - ASSERT_EQ("0v2", Get(0, "foo", s2)); - ASSERT_EQ("1v2", Get(1, "foo", s2)); - ASSERT_EQ("0v4", Get(0, "foo")); - ASSERT_EQ("1v4", Get(1, "foo")); - - db_->ReleaseSnapshot(s1); - ASSERT_EQ("0v2", Get(0, "foo", s2)); - ASSERT_EQ("1v2", Get(1, "foo", s2)); - ASSERT_EQ("0v4", Get(0, "foo")); - ASSERT_EQ("1v4", Get(1, "foo")); - ASSERT_EQ(1U, GetNumSnapshots()); - ASSERT_LT(time_snap1, GetTimeOldestSnapshots()); - ASSERT_EQ(GetSequenceOldestSnapshots(), s2->GetSequenceNumber()); - - db_->ReleaseSnapshot(s2); - ASSERT_EQ(0U, GetNumSnapshots()); - ASSERT_EQ(GetSequenceOldestSnapshots(), 0); - ASSERT_EQ("0v4", Get(0, "foo")); - ASSERT_EQ("1v4", Get(1, "foo")); - } while (ChangeOptions()); -} - - -class DBBasicMultiConfigs : public DBBasicTest, - public ::testing::WithParamInterface { - public: - DBBasicMultiConfigs() { option_config_ = GetParam(); } - - static std::vector GenerateOptionConfigs() { - std::vector option_configs; - for (int option_config = kDefault; option_config < kEnd; ++option_config) { - if (!ShouldSkipOptions(option_config, kSkipFIFOCompaction)) { - option_configs.push_back(option_config); - } - } - return option_configs; - } -}; - -TEST_P(DBBasicMultiConfigs, CompactBetweenSnapshots) { - anon::OptionsOverride options_override; - options_override.skip_policy = kSkipNoSnapshot; - Options options = CurrentOptions(options_override); - options.disable_auto_compactions = true; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - Random rnd(301); - FillLevels("a", "z", 1); - - ASSERT_OK(Put(1, "foo", "first")); - const Snapshot* snapshot1 = db_->GetSnapshot(); - ASSERT_OK(Put(1, "foo", "second")); - ASSERT_OK(Put(1, "foo", "third")); - ASSERT_OK(Put(1, "foo", "fourth")); - const Snapshot* snapshot2 = db_->GetSnapshot(); - ASSERT_OK(Put(1, "foo", "fifth")); - ASSERT_OK(Put(1, "foo", "sixth")); - - // All entries (including duplicates) exist - // before any compaction or flush is triggered. - ASSERT_EQ(AllEntriesFor("foo", 1), - "[ sixth, fifth, fourth, third, second, first ]"); - ASSERT_EQ("sixth", Get(1, "foo")); - ASSERT_EQ("fourth", Get(1, "foo", snapshot2)); - ASSERT_EQ("first", Get(1, "foo", snapshot1)); - - // After a flush, "second", "third" and "fifth" should - // be removed - ASSERT_OK(Flush(1)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth, first ]"); - - // after we release the snapshot1, only two values left - db_->ReleaseSnapshot(snapshot1); - FillLevels("a", "z", 1); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, - nullptr)); - - // We have only one valid snapshot snapshot2. Since snapshot1 is - // not valid anymore, "first" should be removed by a compaction. - ASSERT_EQ("sixth", Get(1, "foo")); - ASSERT_EQ("fourth", Get(1, "foo", snapshot2)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth ]"); - - // after we release the snapshot2, only one value should be left - db_->ReleaseSnapshot(snapshot2); - FillLevels("a", "z", 1); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, - nullptr)); - ASSERT_EQ("sixth", Get(1, "foo")); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth ]"); -} - -INSTANTIATE_TEST_CASE_P( - DBBasicMultiConfigs, DBBasicMultiConfigs, - ::testing::ValuesIn(DBBasicMultiConfigs::GenerateOptionConfigs())); - -TEST_F(DBBasicTest, DBOpen_Options) { - Options options = CurrentOptions(); - Close(); - Destroy(options); - - // Does not exist, and create_if_missing == false: error - DB* db = nullptr; - options.create_if_missing = false; - Status s = DB::Open(options, dbname_, &db); - ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != nullptr); - ASSERT_TRUE(db == nullptr); - - // Does not exist, and create_if_missing == true: OK - options.create_if_missing = true; - s = DB::Open(options, dbname_, &db); - ASSERT_OK(s); - ASSERT_TRUE(db != nullptr); - - delete db; - db = nullptr; - - // Does exist, and error_if_exists == true: error - options.create_if_missing = false; - options.error_if_exists = true; - s = DB::Open(options, dbname_, &db); - ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != nullptr); - ASSERT_TRUE(db == nullptr); - - // Does exist, and error_if_exists == false: OK - options.create_if_missing = true; - options.error_if_exists = false; - s = DB::Open(options, dbname_, &db); - ASSERT_OK(s); - ASSERT_TRUE(db != nullptr); - - delete db; - db = nullptr; -} - -TEST_F(DBBasicTest, CompactOnFlush) { - anon::OptionsOverride options_override; - options_override.skip_policy = kSkipNoSnapshot; - do { - Options options = CurrentOptions(options_override); - options.disable_auto_compactions = true; - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_OK(Flush(1)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ v1 ]"); - - // Write two new keys - ASSERT_OK(Put(1, "a", "begin")); - ASSERT_OK(Put(1, "z", "end")); - ASSERT_OK(Flush(1)); - - // Case1: Delete followed by a put - ASSERT_OK(Delete(1, "foo")); - ASSERT_OK(Put(1, "foo", "v2")); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]"); - - // After the current memtable is flushed, the DEL should - // have been removed - ASSERT_OK(Flush(1)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]"); - - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], - nullptr, nullptr)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]"); - - // Case 2: Delete followed by another delete - ASSERT_OK(Delete(1, "foo")); - ASSERT_OK(Delete(1, "foo")); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, DEL, v2 ]"); - ASSERT_OK(Flush(1)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v2 ]"); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], - nullptr, nullptr)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]"); - - // Case 3: Put followed by a delete - ASSERT_OK(Put(1, "foo", "v3")); - ASSERT_OK(Delete(1, "foo")); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v3 ]"); - ASSERT_OK(Flush(1)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL ]"); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], - nullptr, nullptr)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]"); - - // Case 4: Put followed by another Put - ASSERT_OK(Put(1, "foo", "v4")); - ASSERT_OK(Put(1, "foo", "v5")); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5, v4 ]"); - ASSERT_OK(Flush(1)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5 ]"); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], - nullptr, nullptr)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5 ]"); - - // clear database - ASSERT_OK(Delete(1, "foo")); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], - nullptr, nullptr)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]"); - - // Case 5: Put followed by snapshot followed by another Put - // Both puts should remain. - ASSERT_OK(Put(1, "foo", "v6")); - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(Put(1, "foo", "v7")); - ASSERT_OK(Flush(1)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ v7, v6 ]"); - db_->ReleaseSnapshot(snapshot); - - // clear database - ASSERT_OK(Delete(1, "foo")); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], - nullptr, nullptr)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]"); - - // Case 5: snapshot followed by a put followed by another Put - // Only the last put should remain. - const Snapshot* snapshot1 = db_->GetSnapshot(); - ASSERT_OK(Put(1, "foo", "v8")); - ASSERT_OK(Put(1, "foo", "v9")); - ASSERT_OK(Flush(1)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ v9 ]"); - db_->ReleaseSnapshot(snapshot1); - } while (ChangeCompactOptions()); -} - -TEST_F(DBBasicTest, FlushOneColumnFamily) { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich", - "alyosha", "popovich"}, - options); - - ASSERT_OK(Put(0, "Default", "Default")); - ASSERT_OK(Put(1, "pikachu", "pikachu")); - ASSERT_OK(Put(2, "ilya", "ilya")); - ASSERT_OK(Put(3, "muromec", "muromec")); - ASSERT_OK(Put(4, "dobrynia", "dobrynia")); - ASSERT_OK(Put(5, "nikitich", "nikitich")); - ASSERT_OK(Put(6, "alyosha", "alyosha")); - ASSERT_OK(Put(7, "popovich", "popovich")); - - for (int i = 0; i < 8; ++i) { - ASSERT_OK(Flush(i)); - auto tables = ListTableFiles(env_, dbname_); - ASSERT_EQ(tables.size(), i + 1U); - } -} - -TEST_F(DBBasicTest, MultiGetSimple) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - SetPerfLevel(kEnableCount); - ASSERT_OK(Put(1, "k1", "v1")); - ASSERT_OK(Put(1, "k2", "v2")); - ASSERT_OK(Put(1, "k3", "v3")); - ASSERT_OK(Put(1, "k4", "v4")); - ASSERT_OK(Delete(1, "k4")); - ASSERT_OK(Put(1, "k5", "v5")); - ASSERT_OK(Delete(1, "no_key")); - - std::vector keys({"k1", "k2", "k3", "k4", "k5", "no_key"}); - - std::vector values(20, "Temporary data to be overwritten"); - std::vector cfs(keys.size(), handles_[1]); - - get_perf_context()->Reset(); - std::vector s = db_->MultiGet(ReadOptions(), cfs, keys, &values); - ASSERT_EQ(values.size(), keys.size()); - ASSERT_EQ(values[0], "v1"); - ASSERT_EQ(values[1], "v2"); - ASSERT_EQ(values[2], "v3"); - ASSERT_EQ(values[4], "v5"); - // four kv pairs * two bytes per value - ASSERT_EQ(8, (int)get_perf_context()->multiget_read_bytes); - - ASSERT_OK(s[0]); - ASSERT_OK(s[1]); - ASSERT_OK(s[2]); - ASSERT_TRUE(s[3].IsNotFound()); - ASSERT_OK(s[4]); - ASSERT_TRUE(s[5].IsNotFound()); - SetPerfLevel(kDisable); - } while (ChangeCompactOptions()); -} - -TEST_F(DBBasicTest, MultiGetEmpty) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - // Empty Key Set - std::vector keys; - std::vector values; - std::vector cfs; - std::vector s = db_->MultiGet(ReadOptions(), cfs, keys, &values); - ASSERT_EQ(s.size(), 0U); - - // Empty Database, Empty Key Set - Options options = CurrentOptions(); - options.create_if_missing = true; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - s = db_->MultiGet(ReadOptions(), cfs, keys, &values); - ASSERT_EQ(s.size(), 0U); - - // Empty Database, Search for Keys - keys.resize(2); - keys[0] = "a"; - keys[1] = "b"; - cfs.push_back(handles_[0]); - cfs.push_back(handles_[1]); - s = db_->MultiGet(ReadOptions(), cfs, keys, &values); - ASSERT_EQ(static_cast(s.size()), 2); - ASSERT_TRUE(s[0].IsNotFound() && s[1].IsNotFound()); - } while (ChangeCompactOptions()); -} - -class DBBlockChecksumTest : public DBBasicTest, - public testing::WithParamInterface {}; - -INSTANTIATE_TEST_CASE_P(FormatVersions, DBBlockChecksumTest, - testing::ValuesIn(test::kFooterFormatVersionsToTest)); - -TEST_P(DBBlockChecksumTest, BlockChecksumTest) { - BlockBasedTableOptions table_options; - table_options.format_version = GetParam(); - Options options = CurrentOptions(); - const int kNumPerFile = 2; - - const auto algs = GetSupportedChecksums(); - const int algs_size = static_cast(algs.size()); - - // generate one table with each type of checksum - for (int i = 0; i < algs_size; ++i) { - table_options.checksum = algs[i]; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - for (int j = 0; j < kNumPerFile; ++j) { - ASSERT_OK(Put(Key(i * kNumPerFile + j), Key(i * kNumPerFile + j))); - } - ASSERT_OK(Flush()); - } - - // with each valid checksum type setting... - for (int i = 0; i < algs_size; ++i) { - table_options.checksum = algs[i]; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - // verify every type of checksum (should be regardless of that setting) - for (int j = 0; j < algs_size * kNumPerFile; ++j) { - ASSERT_EQ(Key(j), Get(Key(j))); - } - } - - // Now test invalid checksum type - table_options.checksum = static_cast(123); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - ASSERT_TRUE(TryReopen(options).IsInvalidArgument()); -} - -// On Windows you can have either memory mapped file or a file -// with unbuffered access. So this asserts and does not make -// sense to run -#ifndef OS_WIN -TEST_F(DBBasicTest, MmapAndBufferOptions) { - if (!IsMemoryMappedAccessSupported()) { - return; - } - Options options = CurrentOptions(); - - options.use_direct_reads = true; - options.allow_mmap_reads = true; - ASSERT_NOK(TryReopen(options)); - - // All other combinations are acceptable - options.use_direct_reads = false; - ASSERT_OK(TryReopen(options)); - - if (IsDirectIOSupported()) { - options.use_direct_reads = true; - options.allow_mmap_reads = false; - ASSERT_OK(TryReopen(options)); - } - - options.use_direct_reads = false; - ASSERT_OK(TryReopen(options)); -} -#endif - -class TestEnv : public EnvWrapper { - public: - explicit TestEnv(Env* base_env) : EnvWrapper(base_env), close_count(0) {} - static const char* kClassName() { return "TestEnv"; } - const char* Name() const override { return kClassName(); } - - class TestLogger : public Logger { - public: - using Logger::Logv; - explicit TestLogger(TestEnv* env_ptr) : Logger() { env = env_ptr; } - ~TestLogger() override { - if (!closed_) { - CloseHelper().PermitUncheckedError(); - } - } - void Logv(const char* /*format*/, va_list /*ap*/) override {} - - protected: - Status CloseImpl() override { return CloseHelper(); } - - private: - Status CloseHelper() { - env->CloseCountInc(); - ; - return Status::IOError(); - } - TestEnv* env; - }; - - void CloseCountInc() { close_count++; } - - int GetCloseCount() { return close_count; } - - Status NewLogger(const std::string& /*fname*/, - std::shared_ptr* result) override { - result->reset(new TestLogger(this)); - return Status::OK(); - } - - private: - int close_count; -}; - -TEST_F(DBBasicTest, DBClose) { - Options options = GetDefaultOptions(); - std::string dbname = test::PerThreadDBPath("db_close_test"); - ASSERT_OK(DestroyDB(dbname, options)); - - DB* db = nullptr; - TestEnv* env = new TestEnv(env_); - std::unique_ptr local_env_guard(env); - options.create_if_missing = true; - options.env = env; - Status s = DB::Open(options, dbname, &db); - ASSERT_OK(s); - ASSERT_TRUE(db != nullptr); - - s = db->Close(); - ASSERT_EQ(env->GetCloseCount(), 1); - ASSERT_EQ(s, Status::IOError()); - - delete db; - ASSERT_EQ(env->GetCloseCount(), 1); - - // Do not call DB::Close() and ensure our logger Close() still gets called - s = DB::Open(options, dbname, &db); - ASSERT_OK(s); - ASSERT_TRUE(db != nullptr); - delete db; - ASSERT_EQ(env->GetCloseCount(), 2); - - // Provide our own logger and ensure DB::Close() does not close it - options.info_log.reset(new TestEnv::TestLogger(env)); - options.create_if_missing = false; - s = DB::Open(options, dbname, &db); - ASSERT_OK(s); - ASSERT_TRUE(db != nullptr); - - s = db->Close(); - ASSERT_EQ(s, Status::OK()); - delete db; - ASSERT_EQ(env->GetCloseCount(), 2); - options.info_log.reset(); - ASSERT_EQ(env->GetCloseCount(), 3); -} - -TEST_F(DBBasicTest, DBCloseAllDirectoryFDs) { - Options options = GetDefaultOptions(); - std::string dbname = test::PerThreadDBPath("db_close_all_dir_fds_test"); - // Configure a specific WAL directory - options.wal_dir = dbname + "_wal_dir"; - // Configure 3 different data directories - options.db_paths.emplace_back(dbname + "_1", 512 * 1024); - options.db_paths.emplace_back(dbname + "_2", 4 * 1024 * 1024); - options.db_paths.emplace_back(dbname + "_3", 1024 * 1024 * 1024); - - ASSERT_OK(DestroyDB(dbname, options)); - - DB* db = nullptr; - std::unique_ptr env = NewCompositeEnv( - std::make_shared(FileSystem::Default())); - options.create_if_missing = true; - options.env = env.get(); - Status s = DB::Open(options, dbname, &db); - ASSERT_OK(s); - ASSERT_TRUE(db != nullptr); - - // Explicitly close the database to ensure the open and close counter for - // directories are equivalent - s = db->Close(); - auto* counted_fs = - options.env->GetFileSystem()->CheckedCast(); - ASSERT_TRUE(counted_fs != nullptr); - ASSERT_EQ(counted_fs->counters()->dir_opens, - counted_fs->counters()->dir_closes); - ASSERT_OK(s); - delete db; -} - -TEST_F(DBBasicTest, DBCloseFlushError) { - std::unique_ptr fault_injection_env( - new FaultInjectionTestEnv(env_)); - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.manual_wal_flush = true; - options.write_buffer_size = 100; - options.env = fault_injection_env.get(); - - Reopen(options); - ASSERT_OK(Put("key1", "value1")); - ASSERT_OK(Put("key2", "value2")); - ASSERT_OK(dbfull()->TEST_SwitchMemtable()); - ASSERT_OK(Put("key3", "value3")); - fault_injection_env->SetFilesystemActive(false); - Status s = dbfull()->Close(); - ASSERT_NE(s, Status::OK()); - // retry should return the same error - s = dbfull()->Close(); - ASSERT_NE(s, Status::OK()); - fault_injection_env->SetFilesystemActive(true); - // retry close() is no-op even the system is back. Could be improved if - // Close() is retry-able: #9029 - s = dbfull()->Close(); - ASSERT_NE(s, Status::OK()); - Destroy(options); -} - -class DBMultiGetTestWithParam - : public DBBasicTest, - public testing::WithParamInterface> {}; - -TEST_P(DBMultiGetTestWithParam, MultiGetMultiCF) { -#ifndef USE_COROUTINES - if (std::get<1>(GetParam())) { - ROCKSDB_GTEST_SKIP("This test requires coroutine support"); - return; - } -#endif // USE_COROUTINES - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich", - "alyosha", "popovich"}, - options); - // tuples - std::vector> cf_kv_vec; - static const int num_keys = 24; - cf_kv_vec.reserve(num_keys); - - for (int i = 0; i < num_keys; ++i) { - int cf = i / 3; - int cf_key = 1 % 3; - cf_kv_vec.emplace_back(std::make_tuple( - cf, "cf" + std::to_string(cf) + "_key_" + std::to_string(cf_key), - "cf" + std::to_string(cf) + "_val_" + std::to_string(cf_key))); - ASSERT_OK(Put(std::get<0>(cf_kv_vec[i]), std::get<1>(cf_kv_vec[i]), - std::get<2>(cf_kv_vec[i]))); - } - - int get_sv_count = 0; - ROCKSDB_NAMESPACE::DBImpl* db = static_cast_with_check(db_); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::MultiGet::AfterRefSV", [&](void* /*arg*/) { - if (++get_sv_count == 2) { - // After MultiGet refs a couple of CFs, flush all CFs so MultiGet - // is forced to repeat the process - for (int i = 0; i < num_keys; ++i) { - int cf = i / 3; - int cf_key = i % 8; - if (cf_key == 0) { - ASSERT_OK(Flush(cf)); - } - ASSERT_OK(Put(std::get<0>(cf_kv_vec[i]), std::get<1>(cf_kv_vec[i]), - std::get<2>(cf_kv_vec[i]) + "_2")); - } - } - if (get_sv_count == 11) { - for (int i = 0; i < 8; ++i) { - auto* cfd = static_cast_with_check( - db->GetColumnFamilyHandle(i)) - ->cfd(); - ASSERT_EQ(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVInUse); - } - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - std::vector cfs; - std::vector keys; - std::vector values; - - for (int i = 0; i < num_keys; ++i) { - cfs.push_back(std::get<0>(cf_kv_vec[i])); - keys.push_back(std::get<1>(cf_kv_vec[i])); - } - - values = MultiGet(cfs, keys, nullptr, std::get<0>(GetParam()), - std::get<1>(GetParam())); - ASSERT_EQ(values.size(), num_keys); - for (unsigned int j = 0; j < values.size(); ++j) { - ASSERT_EQ(values[j], std::get<2>(cf_kv_vec[j]) + "_2"); - } - - keys.clear(); - cfs.clear(); - cfs.push_back(std::get<0>(cf_kv_vec[0])); - keys.push_back(std::get<1>(cf_kv_vec[0])); - cfs.push_back(std::get<0>(cf_kv_vec[3])); - keys.push_back(std::get<1>(cf_kv_vec[3])); - cfs.push_back(std::get<0>(cf_kv_vec[4])); - keys.push_back(std::get<1>(cf_kv_vec[4])); - values = MultiGet(cfs, keys, nullptr, std::get<0>(GetParam()), - std::get<1>(GetParam())); - ASSERT_EQ(values[0], std::get<2>(cf_kv_vec[0]) + "_2"); - ASSERT_EQ(values[1], std::get<2>(cf_kv_vec[3]) + "_2"); - ASSERT_EQ(values[2], std::get<2>(cf_kv_vec[4]) + "_2"); - - keys.clear(); - cfs.clear(); - cfs.push_back(std::get<0>(cf_kv_vec[7])); - keys.push_back(std::get<1>(cf_kv_vec[7])); - cfs.push_back(std::get<0>(cf_kv_vec[6])); - keys.push_back(std::get<1>(cf_kv_vec[6])); - cfs.push_back(std::get<0>(cf_kv_vec[1])); - keys.push_back(std::get<1>(cf_kv_vec[1])); - values = MultiGet(cfs, keys, nullptr, std::get<0>(GetParam()), - std::get<1>(GetParam())); - ASSERT_EQ(values[0], std::get<2>(cf_kv_vec[7]) + "_2"); - ASSERT_EQ(values[1], std::get<2>(cf_kv_vec[6]) + "_2"); - ASSERT_EQ(values[2], std::get<2>(cf_kv_vec[1]) + "_2"); - - for (int cf = 0; cf < 8; ++cf) { - auto* cfd = - static_cast_with_check( - static_cast_with_check(db_)->GetColumnFamilyHandle(cf)) - ->cfd(); - ASSERT_NE(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVInUse); - ASSERT_NE(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVObsolete); - } -} - -TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFMutex) { -#ifndef USE_COROUTINES - if (std::get<1>(GetParam())) { - ROCKSDB_GTEST_SKIP("This test requires coroutine support"); - return; - } -#endif // USE_COROUTINES - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich", - "alyosha", "popovich"}, - options); - - for (int i = 0; i < 8; ++i) { - ASSERT_OK(Put(i, "cf" + std::to_string(i) + "_key", - "cf" + std::to_string(i) + "_val")); - } - - int get_sv_count = 0; - int retries = 0; - bool last_try = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::MultiGet::LastTry", [&](void* /*arg*/) { - last_try = true; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::MultiGet::AfterRefSV", [&](void* /*arg*/) { - if (last_try) { - return; - } - if (++get_sv_count == 2) { - ++retries; - get_sv_count = 0; - for (int i = 0; i < 8; ++i) { - ASSERT_OK(Flush(i)); - ASSERT_OK(Put( - i, "cf" + std::to_string(i) + "_key", - "cf" + std::to_string(i) + "_val" + std::to_string(retries))); - } - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - std::vector cfs; - std::vector keys; - std::vector values; - - for (int i = 0; i < 8; ++i) { - cfs.push_back(i); - keys.push_back("cf" + std::to_string(i) + "_key"); - } - - values = MultiGet(cfs, keys, nullptr, std::get<0>(GetParam()), - std::get<1>(GetParam())); - ASSERT_TRUE(last_try); - ASSERT_EQ(values.size(), 8); - for (unsigned int j = 0; j < values.size(); ++j) { - ASSERT_EQ(values[j], - "cf" + std::to_string(j) + "_val" + std::to_string(retries)); - } - for (int i = 0; i < 8; ++i) { - auto* cfd = - static_cast_with_check( - static_cast_with_check(db_)->GetColumnFamilyHandle(i)) - ->cfd(); - ASSERT_NE(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVInUse); - } -} - -TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFSnapshot) { -#ifndef USE_COROUTINES - if (std::get<1>(GetParam())) { - ROCKSDB_GTEST_SKIP("This test requires coroutine support"); - return; - } -#endif // USE_COROUTINES - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich", - "alyosha", "popovich"}, - options); - - for (int i = 0; i < 8; ++i) { - ASSERT_OK(Put(i, "cf" + std::to_string(i) + "_key", - "cf" + std::to_string(i) + "_val")); - } - - int get_sv_count = 0; - ROCKSDB_NAMESPACE::DBImpl* db = static_cast_with_check(db_); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::MultiGet::AfterRefSV", [&](void* /*arg*/) { - if (++get_sv_count == 2) { - for (int i = 0; i < 8; ++i) { - ASSERT_OK(Flush(i)); - ASSERT_OK(Put(i, "cf" + std::to_string(i) + "_key", - "cf" + std::to_string(i) + "_val2")); - } - } - if (get_sv_count == 8) { - for (int i = 0; i < 8; ++i) { - auto* cfd = static_cast_with_check( - db->GetColumnFamilyHandle(i)) - ->cfd(); - ASSERT_TRUE( - (cfd->TEST_GetLocalSV()->Get() == SuperVersion::kSVInUse) || - (cfd->TEST_GetLocalSV()->Get() == SuperVersion::kSVObsolete)); - } - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - std::vector cfs; - std::vector keys; - std::vector values; - - for (int i = 0; i < 8; ++i) { - cfs.push_back(i); - keys.push_back("cf" + std::to_string(i) + "_key"); - } - - const Snapshot* snapshot = db_->GetSnapshot(); - values = MultiGet(cfs, keys, snapshot, std::get<0>(GetParam()), - std::get<1>(GetParam())); - db_->ReleaseSnapshot(snapshot); - ASSERT_EQ(values.size(), 8); - for (unsigned int j = 0; j < values.size(); ++j) { - ASSERT_EQ(values[j], "cf" + std::to_string(j) + "_val"); - } - for (int i = 0; i < 8; ++i) { - auto* cfd = - static_cast_with_check( - static_cast_with_check(db_)->GetColumnFamilyHandle(i)) - ->cfd(); - ASSERT_NE(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVInUse); - } -} - -TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFUnsorted) { -#ifndef USE_COROUTINES - if (std::get<1>(GetParam())) { - ROCKSDB_GTEST_SKIP("This test requires coroutine support"); - return; - } -#endif // USE_COROUTINES - Options options = CurrentOptions(); - CreateAndReopenWithCF({"one", "two"}, options); - - ASSERT_OK(Put(1, "foo", "bar")); - ASSERT_OK(Put(2, "baz", "xyz")); - ASSERT_OK(Put(1, "abc", "def")); - - // Note: keys for the same CF do not form a consecutive range - std::vector cfs{1, 2, 1}; - std::vector keys{"foo", "baz", "abc"}; - std::vector values; - - values = MultiGet(cfs, keys, /* snapshot */ nullptr, - /* batched */ std::get<0>(GetParam()), - /* async */ std::get<1>(GetParam())); - - ASSERT_EQ(values.size(), 3); - ASSERT_EQ(values[0], "bar"); - ASSERT_EQ(values[1], "xyz"); - ASSERT_EQ(values[2], "def"); -} - -TEST_P(DBMultiGetTestWithParam, MultiGetBatchedSimpleUnsorted) { -#ifndef USE_COROUTINES - if (std::get<1>(GetParam())) { - ROCKSDB_GTEST_SKIP("This test requires coroutine support"); - return; - } -#endif // USE_COROUTINES - // Skip for unbatched MultiGet - if (!std::get<0>(GetParam())) { - ROCKSDB_GTEST_BYPASS("This test is only for batched MultiGet"); - return; - } - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - SetPerfLevel(kEnableCount); - ASSERT_OK(Put(1, "k1", "v1")); - ASSERT_OK(Put(1, "k2", "v2")); - ASSERT_OK(Put(1, "k3", "v3")); - ASSERT_OK(Put(1, "k4", "v4")); - ASSERT_OK(Delete(1, "k4")); - ASSERT_OK(Put(1, "k5", "v5")); - ASSERT_OK(Delete(1, "no_key")); - - get_perf_context()->Reset(); - - std::vector keys({"no_key", "k5", "k4", "k3", "k2", "k1"}); - std::vector values(keys.size()); - std::vector cfs(keys.size(), handles_[1]); - std::vector s(keys.size()); - - ReadOptions ro; - ro.async_io = std::get<1>(GetParam()); - db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(), - s.data(), false); - - ASSERT_EQ(values.size(), keys.size()); - ASSERT_EQ(std::string(values[5].data(), values[5].size()), "v1"); - ASSERT_EQ(std::string(values[4].data(), values[4].size()), "v2"); - ASSERT_EQ(std::string(values[3].data(), values[3].size()), "v3"); - ASSERT_EQ(std::string(values[1].data(), values[1].size()), "v5"); - // four kv pairs * two bytes per value - ASSERT_EQ(8, (int)get_perf_context()->multiget_read_bytes); - - ASSERT_TRUE(s[0].IsNotFound()); - ASSERT_OK(s[1]); - ASSERT_TRUE(s[2].IsNotFound()); - ASSERT_OK(s[3]); - ASSERT_OK(s[4]); - ASSERT_OK(s[5]); - - SetPerfLevel(kDisable); - } while (ChangeCompactOptions()); -} - -TEST_P(DBMultiGetTestWithParam, MultiGetBatchedSortedMultiFile) { -#ifndef USE_COROUTINES - if (std::get<1>(GetParam())) { - ROCKSDB_GTEST_SKIP("This test requires coroutine support"); - return; - } -#endif // USE_COROUTINES - // Skip for unbatched MultiGet - if (!std::get<0>(GetParam())) { - ROCKSDB_GTEST_BYPASS("This test is only for batched MultiGet"); - return; - } - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - SetPerfLevel(kEnableCount); - // To expand the power of this test, generate > 1 table file and - // mix with memtable - ASSERT_OK(Put(1, "k1", "v1")); - ASSERT_OK(Put(1, "k2", "v2")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "k3", "v3")); - ASSERT_OK(Put(1, "k4", "v4")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Delete(1, "k4")); - ASSERT_OK(Put(1, "k5", "v5")); - ASSERT_OK(Delete(1, "no_key")); - - get_perf_context()->Reset(); - - std::vector keys({"k1", "k2", "k3", "k4", "k5", "no_key"}); - std::vector values(keys.size()); - std::vector cfs(keys.size(), handles_[1]); - std::vector s(keys.size()); - - ReadOptions ro; - ro.async_io = std::get<1>(GetParam()); - db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(), - s.data(), true); - - ASSERT_EQ(values.size(), keys.size()); - ASSERT_EQ(std::string(values[0].data(), values[0].size()), "v1"); - ASSERT_EQ(std::string(values[1].data(), values[1].size()), "v2"); - ASSERT_EQ(std::string(values[2].data(), values[2].size()), "v3"); - ASSERT_EQ(std::string(values[4].data(), values[4].size()), "v5"); - // four kv pairs * two bytes per value - ASSERT_EQ(8, (int)get_perf_context()->multiget_read_bytes); - - ASSERT_OK(s[0]); - ASSERT_OK(s[1]); - ASSERT_OK(s[2]); - ASSERT_TRUE(s[3].IsNotFound()); - ASSERT_OK(s[4]); - ASSERT_TRUE(s[5].IsNotFound()); - - SetPerfLevel(kDisable); - } while (ChangeOptions()); -} - -TEST_P(DBMultiGetTestWithParam, MultiGetBatchedDuplicateKeys) { -#ifndef USE_COROUTINES - if (std::get<1>(GetParam())) { - ROCKSDB_GTEST_SKIP("This test requires coroutine support"); - return; - } -#endif // USE_COROUTINES - // Skip for unbatched MultiGet - if (!std::get<0>(GetParam())) { - ROCKSDB_GTEST_BYPASS("This test is only for batched MultiGet"); - return; - } - Options opts = CurrentOptions(); - opts.merge_operator = MergeOperators::CreateStringAppendOperator(); - CreateAndReopenWithCF({"pikachu"}, opts); - SetPerfLevel(kEnableCount); - // To expand the power of this test, generate > 1 table file and - // mix with memtable - ASSERT_OK(Merge(1, "k1", "v1")); - ASSERT_OK(Merge(1, "k2", "v2")); - ASSERT_OK(Flush(1)); - MoveFilesToLevel(2, 1); - ASSERT_OK(Merge(1, "k3", "v3")); - ASSERT_OK(Merge(1, "k4", "v4")); - ASSERT_OK(Flush(1)); - MoveFilesToLevel(2, 1); - ASSERT_OK(Merge(1, "k4", "v4_2")); - ASSERT_OK(Merge(1, "k6", "v6")); - ASSERT_OK(Flush(1)); - MoveFilesToLevel(2, 1); - ASSERT_OK(Merge(1, "k7", "v7")); - ASSERT_OK(Merge(1, "k8", "v8")); - ASSERT_OK(Flush(1)); - MoveFilesToLevel(2, 1); - - get_perf_context()->Reset(); - - std::vector keys({"k8", "k8", "k8", "k4", "k4", "k1", "k3"}); - std::vector values(keys.size()); - std::vector cfs(keys.size(), handles_[1]); - std::vector s(keys.size()); - - ReadOptions ro; - ro.async_io = std::get<1>(GetParam()); - db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(), - s.data(), false); - - ASSERT_EQ(values.size(), keys.size()); - ASSERT_EQ(std::string(values[0].data(), values[0].size()), "v8"); - ASSERT_EQ(std::string(values[1].data(), values[1].size()), "v8"); - ASSERT_EQ(std::string(values[2].data(), values[2].size()), "v8"); - ASSERT_EQ(std::string(values[3].data(), values[3].size()), "v4,v4_2"); - ASSERT_EQ(std::string(values[4].data(), values[4].size()), "v4,v4_2"); - ASSERT_EQ(std::string(values[5].data(), values[5].size()), "v1"); - ASSERT_EQ(std::string(values[6].data(), values[6].size()), "v3"); - ASSERT_EQ(24, (int)get_perf_context()->multiget_read_bytes); - - for (Status& status : s) { - ASSERT_OK(status); - } - - SetPerfLevel(kDisable); -} - -TEST_P(DBMultiGetTestWithParam, MultiGetBatchedMultiLevel) { -#ifndef USE_COROUTINES - if (std::get<1>(GetParam())) { - ROCKSDB_GTEST_SKIP("This test requires coroutine support"); - return; - } -#endif // USE_COROUTINES - // Skip for unbatched MultiGet - if (!std::get<0>(GetParam())) { - ROCKSDB_GTEST_BYPASS("This test is only for batched MultiGet"); - return; - } - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - Reopen(options); - int num_keys = 0; - - for (int i = 0; i < 128; ++i) { - ASSERT_OK(Put("key_" + std::to_string(i), "val_l2_" + std::to_string(i))); - num_keys++; - if (num_keys == 8) { - ASSERT_OK(Flush()); - num_keys = 0; - } - } - if (num_keys > 0) { - ASSERT_OK(Flush()); - num_keys = 0; - } - MoveFilesToLevel(2); - - for (int i = 0; i < 128; i += 3) { - ASSERT_OK(Put("key_" + std::to_string(i), "val_l1_" + std::to_string(i))); - num_keys++; - if (num_keys == 8) { - ASSERT_OK(Flush()); - num_keys = 0; - } - } - if (num_keys > 0) { - ASSERT_OK(Flush()); - num_keys = 0; - } - MoveFilesToLevel(1); - - for (int i = 0; i < 128; i += 5) { - ASSERT_OK(Put("key_" + std::to_string(i), "val_l0_" + std::to_string(i))); - num_keys++; - if (num_keys == 8) { - ASSERT_OK(Flush()); - num_keys = 0; - } - } - if (num_keys > 0) { - ASSERT_OK(Flush()); - num_keys = 0; - } - ASSERT_EQ(0, num_keys); - - for (int i = 0; i < 128; i += 9) { - ASSERT_OK(Put("key_" + std::to_string(i), "val_mem_" + std::to_string(i))); - } - - std::vector keys; - std::vector values; - - for (int i = 64; i < 80; ++i) { - keys.push_back("key_" + std::to_string(i)); - } - - values = MultiGet(keys, nullptr, std::get<1>(GetParam())); - ASSERT_EQ(values.size(), 16); - for (unsigned int j = 0; j < values.size(); ++j) { - int key = j + 64; - if (key % 9 == 0) { - ASSERT_EQ(values[j], "val_mem_" + std::to_string(key)); - } else if (key % 5 == 0) { - ASSERT_EQ(values[j], "val_l0_" + std::to_string(key)); - } else if (key % 3 == 0) { - ASSERT_EQ(values[j], "val_l1_" + std::to_string(key)); - } else { - ASSERT_EQ(values[j], "val_l2_" + std::to_string(key)); - } - } -} - -TEST_P(DBMultiGetTestWithParam, MultiGetBatchedMultiLevelMerge) { -#ifndef USE_COROUTINES - if (std::get<1>(GetParam())) { - ROCKSDB_GTEST_SKIP("This test requires coroutine support"); - return; - } -#endif // USE_COROUTINES - // Skip for unbatched MultiGet - if (!std::get<0>(GetParam())) { - ROCKSDB_GTEST_BYPASS("This test is only for batched MultiGet"); - return; - } - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - int num_keys = 0; - - for (int i = 0; i < 128; ++i) { - ASSERT_OK(Put("key_" + std::to_string(i), "val_l2_" + std::to_string(i))); - num_keys++; - if (num_keys == 8) { - ASSERT_OK(Flush()); - num_keys = 0; - } - } - if (num_keys > 0) { - ASSERT_OK(Flush()); - num_keys = 0; - } - MoveFilesToLevel(2); - - for (int i = 0; i < 128; i += 3) { - ASSERT_OK(Merge("key_" + std::to_string(i), "val_l1_" + std::to_string(i))); - num_keys++; - if (num_keys == 8) { - ASSERT_OK(Flush()); - num_keys = 0; - } - } - if (num_keys > 0) { - ASSERT_OK(Flush()); - num_keys = 0; - } - MoveFilesToLevel(1); - - for (int i = 0; i < 128; i += 5) { - ASSERT_OK(Merge("key_" + std::to_string(i), "val_l0_" + std::to_string(i))); - num_keys++; - if (num_keys == 8) { - ASSERT_OK(Flush()); - num_keys = 0; - } - } - if (num_keys > 0) { - ASSERT_OK(Flush()); - num_keys = 0; - } - ASSERT_EQ(0, num_keys); - - for (int i = 0; i < 128; i += 9) { - ASSERT_OK( - Merge("key_" + std::to_string(i), "val_mem_" + std::to_string(i))); - } - - std::vector keys; - std::vector values; - - for (int i = 32; i < 80; ++i) { - keys.push_back("key_" + std::to_string(i)); - } - - values = MultiGet(keys, nullptr, std::get<1>(GetParam())); - ASSERT_EQ(values.size(), keys.size()); - for (unsigned int j = 0; j < 48; ++j) { - int key = j + 32; - std::string value; - value.append("val_l2_" + std::to_string(key)); - if (key % 3 == 0) { - value.append(","); - value.append("val_l1_" + std::to_string(key)); - } - if (key % 5 == 0) { - value.append(","); - value.append("val_l0_" + std::to_string(key)); - } - if (key % 9 == 0) { - value.append(","); - value.append("val_mem_" + std::to_string(key)); - } - ASSERT_EQ(values[j], value); - } -} - -TEST_P(DBMultiGetTestWithParam, MultiGetBatchedValueSizeInMemory) { -#ifndef USE_COROUTINES - if (std::get<1>(GetParam())) { - ROCKSDB_GTEST_SKIP("This test requires coroutine support"); - return; - } -#endif // USE_COROUTINES - // Skip for unbatched MultiGet - if (!std::get<0>(GetParam())) { - ROCKSDB_GTEST_BYPASS("This test is only for batched MultiGet"); - return; - } - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - SetPerfLevel(kEnableCount); - ASSERT_OK(Put(1, "k1", "v_1")); - ASSERT_OK(Put(1, "k2", "v_2")); - ASSERT_OK(Put(1, "k3", "v_3")); - ASSERT_OK(Put(1, "k4", "v_4")); - ASSERT_OK(Put(1, "k5", "v_5")); - ASSERT_OK(Put(1, "k6", "v_6")); - std::vector keys = {"k1", "k2", "k3", "k4", "k5", "k6"}; - std::vector values(keys.size()); - std::vector s(keys.size()); - std::vector cfs(keys.size(), handles_[1]); - - get_perf_context()->Reset(); - ReadOptions ro; - ro.value_size_soft_limit = 11; - ro.async_io = std::get<1>(GetParam()); - db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(), - s.data(), false); - - ASSERT_EQ(values.size(), keys.size()); - for (unsigned int i = 0; i < 4; i++) { - ASSERT_EQ(std::string(values[i].data(), values[i].size()), - "v_" + std::to_string(i + 1)); - } - - for (unsigned int i = 4; i < 6; i++) { - ASSERT_TRUE(s[i].IsAborted()); - } - - ASSERT_EQ(12, (int)get_perf_context()->multiget_read_bytes); - SetPerfLevel(kDisable); -} - -TEST_P(DBMultiGetTestWithParam, MultiGetBatchedValueSize) { -#ifndef USE_COROUTINES - if (std::get<1>(GetParam())) { - ROCKSDB_GTEST_SKIP("This test requires coroutine support"); - return; - } -#endif // USE_COROUTINES - // Skip for unbatched MultiGet - if (!std::get<0>(GetParam())) { - return; - } - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - SetPerfLevel(kEnableCount); - - ASSERT_OK(Put(1, "k6", "v6")); - ASSERT_OK(Put(1, "k7", "v7_")); - ASSERT_OK(Put(1, "k3", "v3_")); - ASSERT_OK(Put(1, "k4", "v4")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Delete(1, "k4")); - ASSERT_OK(Put(1, "k11", "v11")); - ASSERT_OK(Delete(1, "no_key")); - ASSERT_OK(Put(1, "k8", "v8_")); - ASSERT_OK(Put(1, "k13", "v13")); - ASSERT_OK(Put(1, "k14", "v14")); - ASSERT_OK(Put(1, "k15", "v15")); - ASSERT_OK(Put(1, "k16", "v16")); - ASSERT_OK(Put(1, "k17", "v17")); - ASSERT_OK(Flush(1)); - - ASSERT_OK(Put(1, "k1", "v1_")); - ASSERT_OK(Put(1, "k2", "v2_")); - ASSERT_OK(Put(1, "k5", "v5_")); - ASSERT_OK(Put(1, "k9", "v9_")); - ASSERT_OK(Put(1, "k10", "v10")); - ASSERT_OK(Delete(1, "k2")); - ASSERT_OK(Delete(1, "k6")); - - get_perf_context()->Reset(); - - std::vector keys({"k1", "k10", "k11", "k12", "k13", "k14", "k15", - "k16", "k17", "k2", "k3", "k4", "k5", "k6", "k7", - "k8", "k9", "no_key"}); - std::vector values(keys.size()); - std::vector cfs(keys.size(), handles_[1]); - std::vector s(keys.size()); - - ReadOptions ro; - ro.value_size_soft_limit = 20; - ro.async_io = std::get<1>(GetParam()); - db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(), - s.data(), false); - - ASSERT_EQ(values.size(), keys.size()); - - // In memory keys - ASSERT_EQ(std::string(values[0].data(), values[0].size()), "v1_"); - ASSERT_EQ(std::string(values[1].data(), values[1].size()), "v10"); - ASSERT_TRUE(s[9].IsNotFound()); // k2 - ASSERT_EQ(std::string(values[12].data(), values[12].size()), "v5_"); - ASSERT_TRUE(s[13].IsNotFound()); // k6 - ASSERT_EQ(std::string(values[16].data(), values[16].size()), "v9_"); - - // In sst files - ASSERT_EQ(std::string(values[2].data(), values[1].size()), "v11"); - ASSERT_EQ(std::string(values[4].data(), values[4].size()), "v13"); - ASSERT_EQ(std::string(values[5].data(), values[5].size()), "v14"); - - // Remaining aborted after value_size exceeds. - ASSERT_TRUE(s[3].IsAborted()); - ASSERT_TRUE(s[6].IsAborted()); - ASSERT_TRUE(s[7].IsAborted()); - ASSERT_TRUE(s[8].IsAborted()); - ASSERT_TRUE(s[10].IsAborted()); - ASSERT_TRUE(s[11].IsAborted()); - ASSERT_TRUE(s[14].IsAborted()); - ASSERT_TRUE(s[15].IsAborted()); - ASSERT_TRUE(s[17].IsAborted()); - - // 6 kv pairs * 3 bytes per value (i.e. 18) - ASSERT_EQ(21, (int)get_perf_context()->multiget_read_bytes); - SetPerfLevel(kDisable); - } while (ChangeCompactOptions()); -} - -TEST_P(DBMultiGetTestWithParam, MultiGetBatchedValueSizeMultiLevelMerge) { - if (std::get<1>(GetParam())) { - ROCKSDB_GTEST_BYPASS("This test needs to be fixed for async IO"); - return; - } - // Skip for unbatched MultiGet - if (!std::get<0>(GetParam())) { - ROCKSDB_GTEST_BYPASS("This test is only for batched MultiGet"); - return; - } - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - int num_keys = 0; - - for (int i = 0; i < 64; ++i) { - ASSERT_OK(Put("key_" + std::to_string(i), "val_l2_" + std::to_string(i))); - num_keys++; - if (num_keys == 8) { - ASSERT_OK(Flush()); - num_keys = 0; - } - } - if (num_keys > 0) { - ASSERT_OK(Flush()); - num_keys = 0; - } - MoveFilesToLevel(2); - - for (int i = 0; i < 64; i += 3) { - ASSERT_OK(Merge("key_" + std::to_string(i), "val_l1_" + std::to_string(i))); - num_keys++; - if (num_keys == 8) { - ASSERT_OK(Flush()); - num_keys = 0; - } - } - if (num_keys > 0) { - ASSERT_OK(Flush()); - num_keys = 0; - } - MoveFilesToLevel(1); - - for (int i = 0; i < 64; i += 5) { - ASSERT_OK(Merge("key_" + std::to_string(i), "val_l0_" + std::to_string(i))); - num_keys++; - if (num_keys == 8) { - ASSERT_OK(Flush()); - num_keys = 0; - } - } - if (num_keys > 0) { - ASSERT_OK(Flush()); - num_keys = 0; - } - ASSERT_EQ(0, num_keys); - - for (int i = 0; i < 64; i += 9) { - ASSERT_OK( - Merge("key_" + std::to_string(i), "val_mem_" + std::to_string(i))); - } - - std::vector keys_str; - for (int i = 10; i < 50; ++i) { - keys_str.push_back("key_" + std::to_string(i)); - } - - std::vector keys(keys_str.size()); - for (int i = 0; i < 40; i++) { - keys[i] = Slice(keys_str[i]); - } - - std::vector values(keys_str.size()); - std::vector statuses(keys_str.size()); - ReadOptions read_options; - read_options.verify_checksums = true; - read_options.value_size_soft_limit = 380; - read_options.async_io = std::get<1>(GetParam()); - db_->MultiGet(read_options, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data()); - - ASSERT_EQ(values.size(), keys.size()); - - for (unsigned int j = 0; j < 26; ++j) { - int key = j + 10; - std::string value; - value.append("val_l2_" + std::to_string(key)); - if (key % 3 == 0) { - value.append(","); - value.append("val_l1_" + std::to_string(key)); - } - if (key % 5 == 0) { - value.append(","); - value.append("val_l0_" + std::to_string(key)); - } - if (key % 9 == 0) { - value.append(","); - value.append("val_mem_" + std::to_string(key)); - } - ASSERT_EQ(values[j], value); - ASSERT_OK(statuses[j]); - } - - // All remaning keys status is set Status::Abort - for (unsigned int j = 26; j < 40; j++) { - ASSERT_TRUE(statuses[j].IsAborted()); - } -} - -INSTANTIATE_TEST_CASE_P(DBMultiGetTestWithParam, DBMultiGetTestWithParam, - testing::Combine(testing::Bool(), testing::Bool())); - -#if USE_COROUTINES -class DBMultiGetAsyncIOTest : public DBBasicTest, - public ::testing::WithParamInterface { - public: - DBMultiGetAsyncIOTest() - : DBBasicTest(), statistics_(ROCKSDB_NAMESPACE::CreateDBStatistics()) { - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10)); - options_ = CurrentOptions(); - options_.disable_auto_compactions = true; - options_.statistics = statistics_; - options_.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options_.env = Env::Default(); - Reopen(options_); - int num_keys = 0; - - // Put all keys in the bottommost level, and overwrite some keys - // in L0 and L1 - for (int i = 0; i < 256; ++i) { - EXPECT_OK(Put(Key(i), "val_l2_" + std::to_string(i))); - num_keys++; - if (num_keys == 8) { - EXPECT_OK(Flush()); - num_keys = 0; - } - } - if (num_keys > 0) { - EXPECT_OK(Flush()); - num_keys = 0; - } - MoveFilesToLevel(2); - - for (int i = 0; i < 128; i += 3) { - EXPECT_OK(Put(Key(i), "val_l1_" + std::to_string(i))); - num_keys++; - if (num_keys == 8) { - EXPECT_OK(Flush()); - num_keys = 0; - } - } - if (num_keys > 0) { - EXPECT_OK(Flush()); - num_keys = 0; - } - // Put some range deletes in L1 - for (int i = 128; i < 256; i += 32) { - std::string range_begin = Key(i); - std::string range_end = Key(i + 16); - EXPECT_OK(dbfull()->DeleteRange(WriteOptions(), - dbfull()->DefaultColumnFamily(), - range_begin, range_end)); - // Also do some Puts to force creation of bloom filter - for (int j = i + 16; j < i + 32; ++j) { - if (j % 3 == 0) { - EXPECT_OK(Put(Key(j), "val_l1_" + std::to_string(j))); - } - } - EXPECT_OK(Flush()); - } - MoveFilesToLevel(1); - - for (int i = 0; i < 128; i += 5) { - EXPECT_OK(Put(Key(i), "val_l0_" + std::to_string(i))); - num_keys++; - if (num_keys == 8) { - EXPECT_OK(Flush()); - num_keys = 0; - } - } - if (num_keys > 0) { - EXPECT_OK(Flush()); - num_keys = 0; - } - EXPECT_EQ(0, num_keys); - } - - const std::shared_ptr& statistics() { return statistics_; } - - protected: - void PrepareDBForTest() { -#ifdef ROCKSDB_IOURING_PRESENT - Reopen(options_); -#else // ROCKSDB_IOURING_PRESENT - // Warm up the block cache so we don't need to use the IO uring - Iterator* iter = dbfull()->NewIterator(ReadOptions()); - for (iter->SeekToFirst(); iter->Valid() && iter->status().ok(); - iter->Next()) - ; - EXPECT_OK(iter->status()); - delete iter; -#endif // ROCKSDB_IOURING_PRESENT - } - - void ReopenDB() { Reopen(options_); } - - private: - std::shared_ptr statistics_; - Options options_; -}; - -TEST_P(DBMultiGetAsyncIOTest, GetFromL0) { - // All 3 keys in L0. The L0 files should be read serially. - std::vector key_strs{Key(0), Key(40), Key(80)}; - std::vector keys{key_strs[0], key_strs[1], key_strs[2]}; - std::vector values(key_strs.size()); - std::vector statuses(key_strs.size()); - - PrepareDBForTest(); - - ReadOptions ro; - ro.async_io = true; - ro.optimize_multiget_for_io = GetParam(); - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data()); - ASSERT_EQ(values.size(), 3); - ASSERT_OK(statuses[0]); - ASSERT_OK(statuses[1]); - ASSERT_OK(statuses[2]); - ASSERT_EQ(values[0], "val_l0_" + std::to_string(0)); - ASSERT_EQ(values[1], "val_l0_" + std::to_string(40)); - ASSERT_EQ(values[2], "val_l0_" + std::to_string(80)); - - HistogramData multiget_io_batch_size; - - statistics()->histogramData(MULTIGET_IO_BATCH_SIZE, &multiget_io_batch_size); - - // With async IO, lookups will happen in parallel for each key -#ifdef ROCKSDB_IOURING_PRESENT - if (GetParam()) { - ASSERT_EQ(multiget_io_batch_size.count, 1); - ASSERT_EQ(multiget_io_batch_size.max, 3); - ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 3); - } else { - // Without Async IO, MultiGet will call MultiRead 3 times, once for each - // L0 file - ASSERT_EQ(multiget_io_batch_size.count, 3); - } -#else // ROCKSDB_IOURING_PRESENT - ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 0); -#endif // ROCKSDB_IOURING_PRESENT -} - -TEST_P(DBMultiGetAsyncIOTest, GetFromL1) { - std::vector key_strs; - std::vector keys; - std::vector values; - std::vector statuses; - - key_strs.push_back(Key(33)); - key_strs.push_back(Key(54)); - key_strs.push_back(Key(102)); - keys.push_back(key_strs[0]); - keys.push_back(key_strs[1]); - keys.push_back(key_strs[2]); - values.resize(keys.size()); - statuses.resize(keys.size()); - - PrepareDBForTest(); - - ReadOptions ro; - ro.async_io = true; - ro.optimize_multiget_for_io = GetParam(); - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data()); - ASSERT_EQ(values.size(), 3); - ASSERT_EQ(statuses[0], Status::OK()); - ASSERT_EQ(statuses[1], Status::OK()); - ASSERT_EQ(statuses[2], Status::OK()); - ASSERT_EQ(values[0], "val_l1_" + std::to_string(33)); - ASSERT_EQ(values[1], "val_l1_" + std::to_string(54)); - ASSERT_EQ(values[2], "val_l1_" + std::to_string(102)); - - HistogramData multiget_io_batch_size; - - statistics()->histogramData(MULTIGET_IO_BATCH_SIZE, &multiget_io_batch_size); - -#ifdef ROCKSDB_IOURING_PRESENT - // A batch of 3 async IOs is expected, one for each overlapping file in L1 - ASSERT_EQ(multiget_io_batch_size.count, 1); - ASSERT_EQ(multiget_io_batch_size.max, 3); - ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 3); -#else // ROCKSDB_IOURING_PRESENT - ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 0); -#endif // ROCKSDB_IOURING_PRESENT -} - -#ifdef ROCKSDB_IOURING_PRESENT -TEST_P(DBMultiGetAsyncIOTest, GetFromL1Error) { - std::vector key_strs; - std::vector keys; - std::vector values; - std::vector statuses; - - key_strs.push_back(Key(33)); - key_strs.push_back(Key(54)); - key_strs.push_back(Key(102)); - keys.push_back(key_strs[0]); - keys.push_back(key_strs[1]); - keys.push_back(key_strs[2]); - values.resize(keys.size()); - statuses.resize(keys.size()); - - int count = 0; - SyncPoint::GetInstance()->SetCallBack( - "TableCache::GetTableReader:BeforeOpenFile", [&](void* status) { - count++; - // Fail the last table reader open, which is the 6th SST file - // since 3 overlapping L0 files + 3 L1 files containing the keys - if (count == 6) { - Status* s = static_cast(status); - *s = Status::IOError(); - } - }); - // DB open will create table readers unless we reduce the table cache - // capacity. - // SanitizeOptions will set max_open_files to minimum of 20. Table cache - // is allocated with max_open_files - 10 as capacity. So override - // max_open_files to 11 so table cache capacity will become 1. This will - // prevent file open during DB open and force the file to be opened - // during MultiGet - SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { - int* max_open_files = (int*)arg; - *max_open_files = 11; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - PrepareDBForTest(); - - ReadOptions ro; - ro.async_io = true; - ro.optimize_multiget_for_io = GetParam(); - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data()); - SyncPoint::GetInstance()->DisableProcessing(); - ASSERT_EQ(values.size(), 3); - ASSERT_EQ(statuses[0], Status::OK()); - ASSERT_EQ(statuses[1], Status::OK()); - ASSERT_EQ(statuses[2], Status::IOError()); - - HistogramData multiget_io_batch_size; - - statistics()->histogramData(MULTIGET_IO_BATCH_SIZE, &multiget_io_batch_size); - - // A batch of 3 async IOs is expected, one for each overlapping file in L1 - ASSERT_EQ(multiget_io_batch_size.count, 1); - ASSERT_EQ(multiget_io_batch_size.max, 2); - ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 2); -} -#endif // ROCKSDB_IOURING_PRESENT - -TEST_P(DBMultiGetAsyncIOTest, LastKeyInFile) { - std::vector key_strs; - std::vector keys; - std::vector values; - std::vector statuses; - - // 21 is the last key in the first L1 file - key_strs.push_back(Key(21)); - key_strs.push_back(Key(54)); - key_strs.push_back(Key(102)); - keys.push_back(key_strs[0]); - keys.push_back(key_strs[1]); - keys.push_back(key_strs[2]); - values.resize(keys.size()); - statuses.resize(keys.size()); - - PrepareDBForTest(); - - ReadOptions ro; - ro.async_io = true; - ro.optimize_multiget_for_io = GetParam(); - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data()); - ASSERT_EQ(values.size(), 3); - ASSERT_EQ(statuses[0], Status::OK()); - ASSERT_EQ(statuses[1], Status::OK()); - ASSERT_EQ(statuses[2], Status::OK()); - ASSERT_EQ(values[0], "val_l1_" + std::to_string(21)); - ASSERT_EQ(values[1], "val_l1_" + std::to_string(54)); - ASSERT_EQ(values[2], "val_l1_" + std::to_string(102)); - -#ifdef ROCKSDB_IOURING_PRESENT - HistogramData multiget_io_batch_size; - - statistics()->histogramData(MULTIGET_IO_BATCH_SIZE, &multiget_io_batch_size); - - // Since the first MultiGet key is the last key in a file, the MultiGet is - // expected to lookup in that file first, before moving on to other files. - // So the first file lookup will issue one async read, and the next lookup - // will lookup 2 files in parallel and issue 2 async reads - ASSERT_EQ(multiget_io_batch_size.count, 2); - ASSERT_EQ(multiget_io_batch_size.max, 2); -#endif // ROCKSDB_IOURING_PRESENT -} - -TEST_P(DBMultiGetAsyncIOTest, GetFromL1AndL2) { - std::vector key_strs; - std::vector keys; - std::vector values; - std::vector statuses; - - // 33 and 102 are in L1, and 56 is in L2 - key_strs.push_back(Key(33)); - key_strs.push_back(Key(56)); - key_strs.push_back(Key(102)); - keys.push_back(key_strs[0]); - keys.push_back(key_strs[1]); - keys.push_back(key_strs[2]); - values.resize(keys.size()); - statuses.resize(keys.size()); - - PrepareDBForTest(); - - ReadOptions ro; - ro.async_io = true; - ro.optimize_multiget_for_io = GetParam(); - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data()); - ASSERT_EQ(values.size(), 3); - ASSERT_EQ(statuses[0], Status::OK()); - ASSERT_EQ(statuses[1], Status::OK()); - ASSERT_EQ(statuses[2], Status::OK()); - ASSERT_EQ(values[0], "val_l1_" + std::to_string(33)); - ASSERT_EQ(values[1], "val_l2_" + std::to_string(56)); - ASSERT_EQ(values[2], "val_l1_" + std::to_string(102)); - -#ifdef ROCKSDB_IOURING_PRESENT - HistogramData multiget_io_batch_size; - - statistics()->histogramData(MULTIGET_IO_BATCH_SIZE, &multiget_io_batch_size); - - // There are 2 keys in L1 in twp separate files, and 1 in L2. With - // optimize_multiget_for_io, all three lookups will happen in parallel. - // Otherwise, the L2 lookup will happen after L1. - ASSERT_EQ(multiget_io_batch_size.count, GetParam() ? 1 : 2); - ASSERT_EQ(multiget_io_batch_size.max, GetParam() ? 3 : 2); -#endif // ROCKSDB_IOURING_PRESENT -} - -TEST_P(DBMultiGetAsyncIOTest, GetFromL2WithRangeOverlapL0L1) { - std::vector key_strs; - std::vector keys; - std::vector values; - std::vector statuses; - - // 19 and 26 are in L2, but overlap with L0 and L1 file ranges - key_strs.push_back(Key(19)); - key_strs.push_back(Key(26)); - keys.push_back(key_strs[0]); - keys.push_back(key_strs[1]); - values.resize(keys.size()); - statuses.resize(keys.size()); - - PrepareDBForTest(); - - ReadOptions ro; - ro.async_io = true; - ro.optimize_multiget_for_io = GetParam(); - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data()); - ASSERT_EQ(values.size(), 2); - ASSERT_EQ(statuses[0], Status::OK()); - ASSERT_EQ(statuses[1], Status::OK()); - ASSERT_EQ(values[0], "val_l2_" + std::to_string(19)); - ASSERT_EQ(values[1], "val_l2_" + std::to_string(26)); - -#ifdef ROCKSDB_IOURING_PRESENT - // Bloom filters in L0/L1 will avoid the coroutine calls in those levels - ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 2); -#else // ROCKSDB_IOURING_PRESENT - ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 0); -#endif // ROCKSDB_IOURING_PRESENT -} - -#ifdef ROCKSDB_IOURING_PRESENT -TEST_P(DBMultiGetAsyncIOTest, GetFromL2WithRangeDelInL1) { - std::vector key_strs; - std::vector keys; - std::vector values; - std::vector statuses; - - // 139 and 163 are in L2, but overlap with a range deletes in L1 - key_strs.push_back(Key(139)); - key_strs.push_back(Key(163)); - keys.push_back(key_strs[0]); - keys.push_back(key_strs[1]); - values.resize(keys.size()); - statuses.resize(keys.size()); - - PrepareDBForTest(); - - ReadOptions ro; - ro.async_io = true; - ro.optimize_multiget_for_io = GetParam(); - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data()); - ASSERT_EQ(values.size(), 2); - ASSERT_EQ(statuses[0], Status::NotFound()); - ASSERT_EQ(statuses[1], Status::NotFound()); - - // Bloom filters in L0/L1 will avoid the coroutine calls in those levels - ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 2); -} - -TEST_P(DBMultiGetAsyncIOTest, GetFromL1AndL2WithRangeDelInL1) { - std::vector key_strs; - std::vector keys; - std::vector values; - std::vector statuses; - - // 139 and 163 are in L2, but overlap with a range deletes in L1 - key_strs.push_back(Key(139)); - key_strs.push_back(Key(144)); - key_strs.push_back(Key(163)); - keys.push_back(key_strs[0]); - keys.push_back(key_strs[1]); - keys.push_back(key_strs[2]); - values.resize(keys.size()); - statuses.resize(keys.size()); - - PrepareDBForTest(); - - ReadOptions ro; - ro.async_io = true; - ro.optimize_multiget_for_io = GetParam(); - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data()); - ASSERT_EQ(values.size(), keys.size()); - ASSERT_EQ(statuses[0], Status::NotFound()); - ASSERT_EQ(statuses[1], Status::OK()); - ASSERT_EQ(values[1], "val_l1_" + std::to_string(144)); - ASSERT_EQ(statuses[2], Status::NotFound()); - - // Bloom filters in L0/L1 will avoid the coroutine calls in those levels - ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 3); -} -#endif // ROCKSDB_IOURING_PRESENT - -TEST_P(DBMultiGetAsyncIOTest, GetNoIOUring) { - std::vector key_strs; - std::vector keys; - std::vector values; - std::vector statuses; - - key_strs.push_back(Key(33)); - key_strs.push_back(Key(54)); - key_strs.push_back(Key(102)); - keys.push_back(key_strs[0]); - keys.push_back(key_strs[1]); - keys.push_back(key_strs[2]); - values.resize(keys.size()); - statuses.resize(keys.size()); - - enable_io_uring = false; - ReopenDB(); - - ReadOptions ro; - ro.async_io = true; - ro.optimize_multiget_for_io = GetParam(); - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data()); - ASSERT_EQ(values.size(), 3); - ASSERT_EQ(statuses[0], Status::OK()); - ASSERT_EQ(statuses[1], Status::OK()); - ASSERT_EQ(statuses[2], Status::OK()); - - HistogramData async_read_bytes; - - statistics()->histogramData(ASYNC_READ_BYTES, &async_read_bytes); - - // A batch of 3 async IOs is expected, one for each overlapping file in L1 - ASSERT_EQ(async_read_bytes.count, 0); - ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 0); -} - -INSTANTIATE_TEST_CASE_P(DBMultiGetAsyncIOTest, DBMultiGetAsyncIOTest, - testing::Bool()); -#endif // USE_COROUTINES - -TEST_F(DBBasicTest, MultiGetStats) { - Options options; - options.create_if_missing = true; - options.disable_auto_compactions = true; - options.env = env_; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - BlockBasedTableOptions table_options; - table_options.block_size = 1; - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - table_options.partition_filters = true; - table_options.no_block_cache = true; - table_options.cache_index_and_filter_blocks = false; - table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - CreateAndReopenWithCF({"pikachu"}, options); - - int total_keys = 2000; - std::vector keys_str(total_keys); - std::vector keys(total_keys); - static size_t kMultiGetBatchSize = 100; - std::vector values(kMultiGetBatchSize); - std::vector s(kMultiGetBatchSize); - ReadOptions read_opts; - - Random rnd(309); - // Create Multiple SST files at multiple levels. - for (int i = 0; i < 500; ++i) { - keys_str[i] = "k" + std::to_string(i); - keys[i] = Slice(keys_str[i]); - ASSERT_OK(Put(1, "k" + std::to_string(i), rnd.RandomString(1000))); - if (i % 100 == 0) { - ASSERT_OK(Flush(1)); - } - } - ASSERT_OK(Flush(1)); - MoveFilesToLevel(2, 1); - - for (int i = 501; i < 1000; ++i) { - keys_str[i] = "k" + std::to_string(i); - keys[i] = Slice(keys_str[i]); - ASSERT_OK(Put(1, "k" + std::to_string(i), rnd.RandomString(1000))); - if (i % 100 == 0) { - ASSERT_OK(Flush(1)); - } - } - - ASSERT_OK(Flush(1)); - MoveFilesToLevel(2, 1); - - for (int i = 1001; i < total_keys; ++i) { - keys_str[i] = "k" + std::to_string(i); - keys[i] = Slice(keys_str[i]); - ASSERT_OK(Put(1, "k" + std::to_string(i), rnd.RandomString(1000))); - if (i % 100 == 0) { - ASSERT_OK(Flush(1)); - } - } - ASSERT_OK(Flush(1)); - MoveFilesToLevel(1, 1); - Close(); - - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_OK(options.statistics->Reset()); - - db_->MultiGet(read_opts, handles_[1], kMultiGetBatchSize, &keys[1250], - values.data(), s.data(), false); - - ASSERT_EQ(values.size(), kMultiGetBatchSize); - HistogramData hist_level; - HistogramData hist_index_and_filter_blocks; - HistogramData hist_sst; - - options.statistics->histogramData(NUM_LEVEL_READ_PER_MULTIGET, &hist_level); - options.statistics->histogramData(NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, - &hist_index_and_filter_blocks); - options.statistics->histogramData(NUM_SST_READ_PER_LEVEL, &hist_sst); - - // Maximum number of blocks read from a file system in a level. - ASSERT_EQ(hist_level.max, 1); - ASSERT_GT(hist_index_and_filter_blocks.max, 0); - // Maximum number of sst files read from file system in a level. - ASSERT_EQ(hist_sst.max, 2); - - // Minimun number of blocks read in a level. - ASSERT_EQ(hist_level.min, 1); - ASSERT_GT(hist_index_and_filter_blocks.min, 0); - // Minimun number of sst files read in a level. - ASSERT_EQ(hist_sst.min, 1); - - for (PinnableSlice& value : values) { - value.Reset(); - } - for (Status& status : s) { - status = Status::OK(); - } - db_->MultiGet(read_opts, handles_[1], kMultiGetBatchSize, &keys[950], - values.data(), s.data(), false); - options.statistics->histogramData(NUM_LEVEL_READ_PER_MULTIGET, &hist_level); - ASSERT_EQ(hist_level.max, 2); -} - -// Test class for batched MultiGet with prefix extractor -// Param bool - If true, use partitioned filters -// If false, use full filter block -class MultiGetPrefixExtractorTest : public DBBasicTest, - public ::testing::WithParamInterface { -}; - -TEST_P(MultiGetPrefixExtractorTest, Batched) { - Options options = CurrentOptions(); - options.prefix_extractor.reset(NewFixedPrefixTransform(2)); - options.memtable_prefix_bloom_size_ratio = 10; - BlockBasedTableOptions bbto; - if (GetParam()) { - bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - bbto.partition_filters = true; - } - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.whole_key_filtering = false; - bbto.cache_index_and_filter_blocks = false; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - - SetPerfLevel(kEnableCount); - get_perf_context()->Reset(); - - ASSERT_OK(Put("k", "v0")); - ASSERT_OK(Put("kk1", "v1")); - ASSERT_OK(Put("kk2", "v2")); - ASSERT_OK(Put("kk3", "v3")); - ASSERT_OK(Put("kk4", "v4")); - std::vector keys( - {"k", "kk1", "kk2", "kk3", "kk4", "rofl", "lmho"}); - std::vector expected( - {"v0", "v1", "v2", "v3", "v4", "NOT_FOUND", "NOT_FOUND"}); - std::vector values; - values = MultiGet(keys, nullptr); - ASSERT_EQ(values, expected); - // One key ("k") is not queried against the filter because it is outside - // the prefix_extractor domain, leaving 6 keys with queried prefixes. - ASSERT_EQ(get_perf_context()->bloom_memtable_miss_count, 2); - ASSERT_EQ(get_perf_context()->bloom_memtable_hit_count, 4); - ASSERT_OK(Flush()); - - get_perf_context()->Reset(); - values = MultiGet(keys, nullptr); - ASSERT_EQ(values, expected); - ASSERT_EQ(get_perf_context()->bloom_sst_miss_count, 2); - ASSERT_EQ(get_perf_context()->bloom_sst_hit_count, 4); - - // Also check Get stat - get_perf_context()->Reset(); - for (size_t i = 0; i < keys.size(); ++i) { - values[i] = Get(keys[i]); - } - ASSERT_EQ(values, expected); - ASSERT_EQ(get_perf_context()->bloom_sst_miss_count, 2); - ASSERT_EQ(get_perf_context()->bloom_sst_hit_count, 4); -} - -INSTANTIATE_TEST_CASE_P(MultiGetPrefix, MultiGetPrefixExtractorTest, - ::testing::Bool()); - -class DBMultiGetRowCacheTest : public DBBasicTest, - public ::testing::WithParamInterface {}; - -TEST_P(DBMultiGetRowCacheTest, MultiGetBatched) { - do { - option_config_ = kRowCache; - Options options = CurrentOptions(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - CreateAndReopenWithCF({"pikachu"}, options); - SetPerfLevel(kEnableCount); - ASSERT_OK(Put(1, "k1", "v1")); - ASSERT_OK(Put(1, "k2", "v2")); - ASSERT_OK(Put(1, "k3", "v3")); - ASSERT_OK(Put(1, "k4", "v4")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "k5", "v5")); - const Snapshot* snap1 = dbfull()->GetSnapshot(); - ASSERT_OK(Delete(1, "k4")); - ASSERT_OK(Flush(1)); - const Snapshot* snap2 = dbfull()->GetSnapshot(); - - get_perf_context()->Reset(); - - std::vector keys({"no_key", "k5", "k4", "k3", "k1"}); - std::vector values(keys.size()); - std::vector cfs(keys.size(), handles_[1]); - std::vector s(keys.size()); - - ReadOptions ro; - bool use_snapshots = GetParam(); - if (use_snapshots) { - ro.snapshot = snap2; - } - db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(), - s.data(), false); - - ASSERT_EQ(values.size(), keys.size()); - ASSERT_EQ(std::string(values[4].data(), values[4].size()), "v1"); - ASSERT_EQ(std::string(values[3].data(), values[3].size()), "v3"); - ASSERT_EQ(std::string(values[1].data(), values[1].size()), "v5"); - // four kv pairs * two bytes per value - ASSERT_EQ(6, (int)get_perf_context()->multiget_read_bytes); - - ASSERT_TRUE(s[0].IsNotFound()); - ASSERT_OK(s[1]); - ASSERT_TRUE(s[2].IsNotFound()); - ASSERT_OK(s[3]); - ASSERT_OK(s[4]); - - // Call MultiGet() again with some intersection with the previous set of - // keys. Those should already be in the row cache. - keys.assign({"no_key", "k5", "k3", "k2"}); - for (size_t i = 0; i < keys.size(); ++i) { - values[i].Reset(); - s[i] = Status::OK(); - } - get_perf_context()->Reset(); - - if (use_snapshots) { - ro.snapshot = snap1; - } - db_->MultiGet(ReadOptions(), handles_[1], keys.size(), keys.data(), - values.data(), s.data(), false); - - ASSERT_EQ(std::string(values[3].data(), values[3].size()), "v2"); - ASSERT_EQ(std::string(values[2].data(), values[2].size()), "v3"); - ASSERT_EQ(std::string(values[1].data(), values[1].size()), "v5"); - // four kv pairs * two bytes per value - ASSERT_EQ(6, (int)get_perf_context()->multiget_read_bytes); - - ASSERT_TRUE(s[0].IsNotFound()); - ASSERT_OK(s[1]); - ASSERT_OK(s[2]); - ASSERT_OK(s[3]); - if (use_snapshots) { - // Only reads from the first SST file would have been cached, since - // snapshot seq no is > fd.largest_seqno - ASSERT_EQ(1, TestGetTickerCount(options, ROW_CACHE_HIT)); - } else { - ASSERT_EQ(2, TestGetTickerCount(options, ROW_CACHE_HIT)); - } - - SetPerfLevel(kDisable); - dbfull()->ReleaseSnapshot(snap1); - dbfull()->ReleaseSnapshot(snap2); - } while (ChangeCompactOptions()); -} - -INSTANTIATE_TEST_CASE_P(DBMultiGetRowCacheTest, DBMultiGetRowCacheTest, - testing::Values(true, false)); - -TEST_F(DBBasicTest, GetAllKeyVersions) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.disable_auto_compactions = true; - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_EQ(2, handles_.size()); - const size_t kNumInserts = 4; - const size_t kNumDeletes = 4; - const size_t kNumUpdates = 4; - - // Check default column family - for (size_t i = 0; i != kNumInserts; ++i) { - ASSERT_OK(Put(std::to_string(i), "value")); - } - for (size_t i = 0; i != kNumUpdates; ++i) { - ASSERT_OK(Put(std::to_string(i), "value1")); - } - for (size_t i = 0; i != kNumDeletes; ++i) { - ASSERT_OK(Delete(std::to_string(i))); - } - std::vector key_versions; - ASSERT_OK(GetAllKeyVersions(db_, Slice(), Slice(), - std::numeric_limits::max(), - &key_versions)); - ASSERT_EQ(kNumInserts + kNumDeletes + kNumUpdates, key_versions.size()); - for (size_t i = 0; i < kNumInserts + kNumDeletes + kNumUpdates; i++) { - if (i % 3 == 0) { - ASSERT_EQ(key_versions[i].GetTypeName(), "TypeDeletion"); - } else { - ASSERT_EQ(key_versions[i].GetTypeName(), "TypeValue"); - } - } - ASSERT_OK(GetAllKeyVersions(db_, handles_[0], Slice(), Slice(), - std::numeric_limits::max(), - &key_versions)); - ASSERT_EQ(kNumInserts + kNumDeletes + kNumUpdates, key_versions.size()); - - // Check non-default column family - for (size_t i = 0; i + 1 != kNumInserts; ++i) { - ASSERT_OK(Put(1, std::to_string(i), "value")); - } - for (size_t i = 0; i + 1 != kNumUpdates; ++i) { - ASSERT_OK(Put(1, std::to_string(i), "value1")); - } - for (size_t i = 0; i + 1 != kNumDeletes; ++i) { - ASSERT_OK(Delete(1, std::to_string(i))); - } - ASSERT_OK(GetAllKeyVersions(db_, handles_[1], Slice(), Slice(), - std::numeric_limits::max(), - &key_versions)); - ASSERT_EQ(kNumInserts + kNumDeletes + kNumUpdates - 3, key_versions.size()); -} - -TEST_F(DBBasicTest, ValueTypeString) { - KeyVersion key_version; - // when adding new type, please also update `value_type_string_map` - for (unsigned char i = ValueType::kTypeDeletion; i < ValueType::kTypeMaxValid; - i++) { - key_version.type = i; - ASSERT_TRUE(key_version.GetTypeName() != "Invalid"); - } -} - -TEST_F(DBBasicTest, MultiGetIOBufferOverrun) { - Options options = CurrentOptions(); - Random rnd(301); - BlockBasedTableOptions table_options; - table_options.pin_l0_filter_and_index_blocks_in_cache = true; - table_options.block_size = 16 * 1024; - ASSERT_TRUE(table_options.block_size > - BlockBasedTable::kMultiGetReadStackBufSize); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - - std::string zero_str(128, '\0'); - for (int i = 0; i < 100; ++i) { - // Make the value compressible. A purely random string doesn't compress - // and the resultant data block will not be compressed - std::string value(rnd.RandomString(128) + zero_str); - assert(Put(Key(i), value) == Status::OK()); - } - ASSERT_OK(Flush()); - - std::vector key_data(10); - std::vector keys; - // We cannot resize a PinnableSlice vector, so just set initial size to - // largest we think we will need - std::vector values(10); - std::vector statuses; - ReadOptions ro; - - // Warm up the cache first - key_data.emplace_back(Key(0)); - keys.emplace_back(Slice(key_data.back())); - key_data.emplace_back(Key(50)); - keys.emplace_back(Slice(key_data.back())); - statuses.resize(keys.size()); - - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data(), true); -} - -TEST_F(DBBasicTest, IncrementalRecoveryNoCorrupt) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu", "eevee"}, options); - size_t num_cfs = handles_.size(); - ASSERT_EQ(3, num_cfs); - WriteOptions write_opts; - write_opts.disableWAL = true; - for (size_t cf = 0; cf != num_cfs; ++cf) { - for (size_t i = 0; i != 10000; ++i) { - std::string key_str = Key(static_cast(i)); - std::string value_str = std::to_string(cf) + "_" + std::to_string(i); - - ASSERT_OK(Put(static_cast(cf), key_str, value_str)); - if (0 == (i % 1000)) { - ASSERT_OK(Flush(static_cast(cf))); - } - } - } - for (size_t cf = 0; cf != num_cfs; ++cf) { - ASSERT_OK(Flush(static_cast(cf))); - } - Close(); - options.best_efforts_recovery = true; - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"}, - options); - num_cfs = handles_.size(); - ASSERT_EQ(3, num_cfs); - for (size_t cf = 0; cf != num_cfs; ++cf) { - for (int i = 0; i != 10000; ++i) { - std::string key_str = Key(static_cast(i)); - std::string expected_value_str = - std::to_string(cf) + "_" + std::to_string(i); - ASSERT_EQ(expected_value_str, Get(static_cast(cf), key_str)); - } - } -} - -TEST_F(DBBasicTest, BestEffortsRecoveryWithVersionBuildingFailure) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "value")); - ASSERT_OK(Flush()); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "VersionBuilder::CheckConsistencyBeforeReturn", [&](void* arg) { - ASSERT_NE(nullptr, arg); - *(reinterpret_cast(arg)) = - Status::Corruption("Inject corruption"); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - options.best_efforts_recovery = true; - Status s = TryReopen(options); - ASSERT_TRUE(s.IsCorruption()); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -namespace { -class TableFileListener : public EventListener { - public: - void OnTableFileCreated(const TableFileCreationInfo& info) override { - InstrumentedMutexLock lock(&mutex_); - cf_to_paths_[info.cf_name].push_back(info.file_path); - } - std::vector& GetFiles(const std::string& cf_name) { - InstrumentedMutexLock lock(&mutex_); - return cf_to_paths_[cf_name]; - } - - private: - InstrumentedMutex mutex_; - std::unordered_map> cf_to_paths_; -}; -} // anonymous namespace - -TEST_F(DBBasicTest, LastSstFileNotInManifest) { - // If the last sst file is not tracked in MANIFEST, - // or the VersionEdit for the last sst file is not synced, - // on recovery, the last sst file should be deleted, - // and new sst files shouldn't reuse its file number. - Options options = CurrentOptions(); - DestroyAndReopen(options); - Close(); - - // Manually add a sst file. - constexpr uint64_t kSstFileNumber = 100; - const std::string kSstFile = MakeTableFileName(dbname_, kSstFileNumber); - ASSERT_OK(WriteStringToFile(env_, /* data = */ "bad sst file content", - /* fname = */ kSstFile, - /* should_sync = */ true)); - ASSERT_OK(env_->FileExists(kSstFile)); - - TableFileListener* listener = new TableFileListener(); - options.listeners.emplace_back(listener); - Reopen(options); - // kSstFile should already be deleted. - ASSERT_TRUE(env_->FileExists(kSstFile).IsNotFound()); - - ASSERT_OK(Put("k", "v")); - ASSERT_OK(Flush()); - // New sst file should have file number > kSstFileNumber. - std::vector& files = - listener->GetFiles(kDefaultColumnFamilyName); - ASSERT_EQ(files.size(), 1); - const std::string fname = files[0].erase(0, (dbname_ + "/").size()); - uint64_t number = 0; - FileType type = kTableFile; - ASSERT_TRUE(ParseFileName(fname, &number, &type)); - ASSERT_EQ(type, kTableFile); - ASSERT_GT(number, kSstFileNumber); -} - -TEST_F(DBBasicTest, RecoverWithMissingFiles) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - TableFileListener* listener = new TableFileListener(); - // Disable auto compaction to simplify SST file name tracking. - options.disable_auto_compactions = true; - options.listeners.emplace_back(listener); - CreateAndReopenWithCF({"pikachu", "eevee"}, options); - std::vector all_cf_names = {kDefaultColumnFamilyName, "pikachu", - "eevee"}; - size_t num_cfs = handles_.size(); - ASSERT_EQ(3, num_cfs); - for (size_t cf = 0; cf != num_cfs; ++cf) { - ASSERT_OK(Put(static_cast(cf), "a", "0_value")); - ASSERT_OK(Flush(static_cast(cf))); - ASSERT_OK(Put(static_cast(cf), "b", "0_value")); - ASSERT_OK(Flush(static_cast(cf))); - ASSERT_OK(Put(static_cast(cf), "c", "0_value")); - ASSERT_OK(Flush(static_cast(cf))); - } - - // Delete and corrupt files - for (size_t i = 0; i < all_cf_names.size(); ++i) { - std::vector& files = listener->GetFiles(all_cf_names[i]); - ASSERT_EQ(3, files.size()); - std::string corrupted_data; - ASSERT_OK(ReadFileToString(env_, files[files.size() - 1], &corrupted_data)); - ASSERT_OK(WriteStringToFile( - env_, corrupted_data.substr(0, corrupted_data.size() - 2), - files[files.size() - 1], /*should_sync=*/true)); - for (int j = static_cast(files.size() - 2); j >= static_cast(i); - --j) { - ASSERT_OK(env_->DeleteFile(files[j])); - } - } - options.best_efforts_recovery = true; - ReopenWithColumnFamilies(all_cf_names, options); - // Verify data - ReadOptions read_opts; - read_opts.total_order_seek = true; - { - std::unique_ptr iter(db_->NewIterator(read_opts, handles_[0])); - iter->SeekToFirst(); - ASSERT_FALSE(iter->Valid()); - ASSERT_OK(iter->status()); - iter.reset(db_->NewIterator(read_opts, handles_[1])); - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("a", iter->key()); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - ASSERT_OK(iter->status()); - iter.reset(db_->NewIterator(read_opts, handles_[2])); - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("a", iter->key()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("b", iter->key()); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - ASSERT_OK(iter->status()); - } -} - -TEST_F(DBBasicTest, BestEffortsRecoveryTryMultipleManifests) { - Options options = CurrentOptions(); - options.env = env_; - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "value0")); - ASSERT_OK(Flush()); - Close(); - { - // Hack by adding a new MANIFEST with high file number - std::string garbage(10, '\0'); - ASSERT_OK(WriteStringToFile(env_, garbage, dbname_ + "/MANIFEST-001000", - /*should_sync=*/true)); - } - { - // Hack by adding a corrupted SST not referenced by any MANIFEST - std::string garbage(10, '\0'); - ASSERT_OK(WriteStringToFile(env_, garbage, dbname_ + "/001001.sst", - /*should_sync=*/true)); - } - - options.best_efforts_recovery = true; - - Reopen(options); - ASSERT_OK(Put("bar", "value")); -} - -TEST_F(DBBasicTest, RecoverWithNoCurrentFile) { - Options options = CurrentOptions(); - options.env = env_; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - options.best_efforts_recovery = true; - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); - ASSERT_EQ(2, handles_.size()); - ASSERT_OK(Put("foo", "value")); - ASSERT_OK(Put(1, "bar", "value")); - ASSERT_OK(Flush()); - ASSERT_OK(Flush(1)); - Close(); - ASSERT_OK(env_->DeleteFile(CurrentFileName(dbname_))); - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); - std::vector cf_names; - ASSERT_OK(DB::ListColumnFamilies(DBOptions(options), dbname_, &cf_names)); - ASSERT_EQ(2, cf_names.size()); - for (const auto& name : cf_names) { - ASSERT_TRUE(name == kDefaultColumnFamilyName || name == "pikachu"); - } -} - -TEST_F(DBBasicTest, RecoverWithNoManifest) { - Options options = CurrentOptions(); - options.env = env_; - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "value")); - ASSERT_OK(Flush()); - Close(); - { - // Delete all MANIFEST. - std::vector files; - ASSERT_OK(env_->GetChildren(dbname_, &files)); - for (const auto& file : files) { - uint64_t number = 0; - FileType type = kWalFile; - if (ParseFileName(file, &number, &type) && type == kDescriptorFile) { - ASSERT_OK(env_->DeleteFile(dbname_ + "/" + file)); - } - } - } - options.best_efforts_recovery = true; - options.create_if_missing = false; - Status s = TryReopen(options); - ASSERT_TRUE(s.IsInvalidArgument()); - options.create_if_missing = true; - Reopen(options); - // Since no MANIFEST exists, best-efforts recovery creates a new, empty db. - ASSERT_EQ("NOT_FOUND", Get("foo")); -} - -TEST_F(DBBasicTest, SkipWALIfMissingTableFiles) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - TableFileListener* listener = new TableFileListener(); - options.listeners.emplace_back(listener); - CreateAndReopenWithCF({"pikachu"}, options); - std::vector kAllCfNames = {kDefaultColumnFamilyName, "pikachu"}; - size_t num_cfs = handles_.size(); - ASSERT_EQ(2, num_cfs); - for (int cf = 0; cf < static_cast(kAllCfNames.size()); ++cf) { - ASSERT_OK(Put(cf, "a", "0_value")); - ASSERT_OK(Flush(cf)); - ASSERT_OK(Put(cf, "b", "0_value")); - } - // Delete files - for (size_t i = 0; i < kAllCfNames.size(); ++i) { - std::vector& files = listener->GetFiles(kAllCfNames[i]); - ASSERT_EQ(1, files.size()); - for (int j = static_cast(files.size() - 1); j >= static_cast(i); - --j) { - ASSERT_OK(env_->DeleteFile(files[j])); - } - } - options.best_efforts_recovery = true; - ReopenWithColumnFamilies(kAllCfNames, options); - // Verify WAL is not applied - ReadOptions read_opts; - read_opts.total_order_seek = true; - std::unique_ptr iter(db_->NewIterator(read_opts, handles_[0])); - iter->SeekToFirst(); - ASSERT_FALSE(iter->Valid()); - ASSERT_OK(iter->status()); - iter.reset(db_->NewIterator(read_opts, handles_[1])); - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("a", iter->key()); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - ASSERT_OK(iter->status()); -} - -TEST_F(DBBasicTest, DisableTrackWal) { - // If WAL tracking was enabled, and then disabled during reopen, - // the previously tracked WALs should be removed from MANIFEST. - - Options options = CurrentOptions(); - options.track_and_verify_wals_in_manifest = true; - // extremely small write buffer size, - // so that new WALs are created more frequently. - options.write_buffer_size = 100; - options.env = env_; - DestroyAndReopen(options); - for (int i = 0; i < 100; i++) { - ASSERT_OK(Put("foo" + std::to_string(i), "value" + std::to_string(i))); - } - ASSERT_OK(dbfull()->TEST_SwitchMemtable()); - ASSERT_OK(db_->SyncWAL()); - // Some WALs are tracked. - ASSERT_FALSE(dbfull()->GetVersionSet()->GetWalSet().GetWals().empty()); - Close(); - - // Disable WAL tracking. - options.track_and_verify_wals_in_manifest = false; - options.create_if_missing = false; - ASSERT_OK(TryReopen(options)); - // Previously tracked WALs are cleared. - ASSERT_TRUE(dbfull()->GetVersionSet()->GetWalSet().GetWals().empty()); - Close(); - - // Re-enable WAL tracking again. - options.track_and_verify_wals_in_manifest = true; - options.create_if_missing = false; - ASSERT_OK(TryReopen(options)); - ASSERT_TRUE(dbfull()->GetVersionSet()->GetWalSet().GetWals().empty()); - Close(); -} - -TEST_F(DBBasicTest, ManifestChecksumMismatch) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - ASSERT_OK(Put("bar", "value")); - ASSERT_OK(Flush()); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "LogWriter::EmitPhysicalRecord:BeforeEncodeChecksum", [&](void* arg) { - auto* crc = reinterpret_cast(arg); - *crc = *crc + 1; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - WriteOptions write_opts; - write_opts.disableWAL = true; - Status s = db_->Put(write_opts, "foo", "value"); - ASSERT_OK(s); - ASSERT_OK(Flush()); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - ASSERT_OK(Put("foo", "value1")); - ASSERT_OK(Flush()); - s = TryReopen(options); - ASSERT_TRUE(s.IsCorruption()); -} - -TEST_F(DBBasicTest, ConcurrentlyCloseDB) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - std::vector workers; - for (int i = 0; i < 10; i++) { - workers.push_back(std::thread([&]() { - auto s = db_->Close(); - ASSERT_OK(s); - })); - } - for (auto& w : workers) { - w.join(); - } -} - -class DBBasicTestTrackWal : public DBTestBase, - public testing::WithParamInterface { - public: - DBBasicTestTrackWal() - : DBTestBase("db_basic_test_track_wal", /*env_do_fsync=*/false) {} - - int CountWalFiles() { - VectorLogPtr log_files; - EXPECT_OK(dbfull()->GetSortedWalFiles(log_files)); - return static_cast(log_files.size()); - }; -}; - -TEST_P(DBBasicTestTrackWal, DoNotTrackObsoleteWal) { - // If a WAL becomes obsolete after flushing, but is not deleted from disk yet, - // then if SyncWAL is called afterwards, the obsolete WAL should not be - // tracked in MANIFEST. - - Options options = CurrentOptions(); - options.create_if_missing = true; - options.track_and_verify_wals_in_manifest = true; - options.atomic_flush = GetParam(); - - DestroyAndReopen(options); - CreateAndReopenWithCF({"cf"}, options); - ASSERT_EQ(handles_.size(), 2); // default, cf - // Do not delete WALs. - ASSERT_OK(db_->DisableFileDeletions()); - constexpr int n = 10; - std::vector> wals(n); - for (size_t i = 0; i < n; i++) { - // Generate a new WAL for each key-value. - const int cf = i % 2; - ASSERT_OK(db_->GetCurrentWalFile(&wals[i])); - ASSERT_OK(Put(cf, "k" + std::to_string(i), "v" + std::to_string(i))); - ASSERT_OK(Flush({0, 1})); - } - ASSERT_EQ(CountWalFiles(), n); - // Since all WALs are obsolete, no WAL should be tracked in MANIFEST. - ASSERT_OK(db_->SyncWAL()); - - // Manually delete all WALs. - Close(); - for (const auto& wal : wals) { - ASSERT_OK(env_->DeleteFile(LogFileName(dbname_, wal->LogNumber()))); - } - - // If SyncWAL tracks the obsolete WALs in MANIFEST, - // reopen will fail because the WALs are missing from disk. - ASSERT_OK(TryReopenWithColumnFamilies({"default", "cf"}, options)); - Destroy(options); -} - -INSTANTIATE_TEST_CASE_P(DBBasicTestTrackWal, DBBasicTestTrackWal, - testing::Bool()); - -class DBBasicTestMultiGet : public DBTestBase { - public: - DBBasicTestMultiGet(std::string test_dir, int num_cfs, - bool uncompressed_cache, bool _compression_enabled, - bool _fill_cache, uint32_t compression_parallel_threads) - : DBTestBase(test_dir, /*env_do_fsync=*/false) { - compression_enabled_ = _compression_enabled; - fill_cache_ = _fill_cache; - - if (uncompressed_cache) { - std::shared_ptr cache = NewLRUCache(1048576); - uncompressed_cache_ = std::make_shared(cache); - } - - env_->count_random_reads_ = true; - - Options options = CurrentOptions(); - Random rnd(301); - BlockBasedTableOptions table_options; - - if (compression_enabled_) { - std::vector compression_types; - compression_types = GetSupportedCompressions(); - // Not every platform may have compression libraries available, so - // dynamically pick based on what's available - CompressionType tmp_type = kNoCompression; - for (auto c_type : compression_types) { - if (c_type != kNoCompression) { - tmp_type = c_type; - break; - } - } - if (tmp_type != kNoCompression) { - options.compression = tmp_type; - } else { - compression_enabled_ = false; - } - } - - table_options.block_cache = uncompressed_cache_; - if (table_options.block_cache == nullptr) { - table_options.no_block_cache = true; - } else { - table_options.pin_l0_filter_and_index_blocks_in_cache = true; - } - table_options.flush_block_policy_factory.reset( - new MyFlushBlockPolicyFactory()); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - if (!compression_enabled_) { - options.compression = kNoCompression; - } else { - options.compression_opts.parallel_threads = compression_parallel_threads; - } - options_ = options; - Reopen(options); - - if (num_cfs > 1) { - for (int cf = 0; cf < num_cfs; ++cf) { - cf_names_.emplace_back("cf" + std::to_string(cf)); - } - CreateColumnFamilies(cf_names_, options); - cf_names_.emplace_back("default"); - } - - std::string zero_str(128, '\0'); - for (int cf = 0; cf < num_cfs; ++cf) { - for (int i = 0; i < 100; ++i) { - // Make the value compressible. A purely random string doesn't compress - // and the resultant data block will not be compressed - values_.emplace_back(rnd.RandomString(128) + zero_str); - assert(((num_cfs == 1) ? Put(Key(i), values_[i]) - : Put(cf, Key(i), values_[i])) == Status::OK()); - } - if (num_cfs == 1) { - EXPECT_OK(Flush()); - } else { - EXPECT_OK(dbfull()->Flush(FlushOptions(), handles_[cf])); - } - - for (int i = 0; i < 100; ++i) { - // block cannot gain space by compression - uncompressable_values_.emplace_back(rnd.RandomString(256) + '\0'); - std::string tmp_key = "a" + Key(i); - assert(((num_cfs == 1) ? Put(tmp_key, uncompressable_values_[i]) - : Put(cf, tmp_key, uncompressable_values_[i])) == - Status::OK()); - } - if (num_cfs == 1) { - EXPECT_OK(Flush()); - } else { - EXPECT_OK(dbfull()->Flush(FlushOptions(), handles_[cf])); - } - } - // Clear compressed cache, which is always pre-populated - if (compressed_cache_) { - compressed_cache_->SetCapacity(0); - compressed_cache_->SetCapacity(1048576); - } - } - - bool CheckValue(int i, const std::string& value) { - if (values_[i].compare(value) == 0) { - return true; - } - return false; - } - - bool CheckUncompressableValue(int i, const std::string& value) { - if (uncompressable_values_[i].compare(value) == 0) { - return true; - } - return false; - } - - const std::vector& GetCFNames() const { return cf_names_; } - - int num_lookups() { return uncompressed_cache_->num_lookups(); } - int num_found() { return uncompressed_cache_->num_found(); } - int num_inserts() { return uncompressed_cache_->num_inserts(); } - - int num_lookups_compressed() { return compressed_cache_->num_lookups(); } - int num_found_compressed() { return compressed_cache_->num_found(); } - int num_inserts_compressed() { return compressed_cache_->num_inserts(); } - - bool fill_cache() { return fill_cache_; } - bool compression_enabled() { return compression_enabled_; } - bool has_compressed_cache() { return compressed_cache_ != nullptr; } - bool has_uncompressed_cache() { return uncompressed_cache_ != nullptr; } - Options get_options() { return options_; } - - static void SetUpTestCase() {} - static void TearDownTestCase() {} - - protected: - class MyFlushBlockPolicyFactory : public FlushBlockPolicyFactory { - public: - MyFlushBlockPolicyFactory() {} - - virtual const char* Name() const override { - return "MyFlushBlockPolicyFactory"; - } - - virtual FlushBlockPolicy* NewFlushBlockPolicy( - const BlockBasedTableOptions& /*table_options*/, - const BlockBuilder& data_block_builder) const override { - return new MyFlushBlockPolicy(data_block_builder); - } - }; - - class MyFlushBlockPolicy : public FlushBlockPolicy { - public: - explicit MyFlushBlockPolicy(const BlockBuilder& data_block_builder) - : num_keys_(0), data_block_builder_(data_block_builder) {} - - bool Update(const Slice& /*key*/, const Slice& /*value*/) override { - if (data_block_builder_.empty()) { - // First key in this block - num_keys_ = 1; - return false; - } - // Flush every 10 keys - if (num_keys_ == 10) { - num_keys_ = 1; - return true; - } - num_keys_++; - return false; - } - - private: - int num_keys_; - const BlockBuilder& data_block_builder_; - }; - - class MyBlockCache : public CacheWrapper { - public: - explicit MyBlockCache(std::shared_ptr target) - : CacheWrapper(target), - num_lookups_(0), - num_found_(0), - num_inserts_(0) {} - - const char* Name() const override { return "MyBlockCache"; } - - Status Insert(const Slice& key, Cache::ObjectPtr value, - const CacheItemHelper* helper, size_t charge, - Handle** handle = nullptr, - Priority priority = Priority::LOW) override { - num_inserts_++; - return target_->Insert(key, value, helper, charge, handle, priority); - } - - Handle* Lookup(const Slice& key, const CacheItemHelper* helper, - CreateContext* create_context, - Priority priority = Priority::LOW, - Statistics* stats = nullptr) override { - num_lookups_++; - Handle* handle = - target_->Lookup(key, helper, create_context, priority, stats); - if (handle != nullptr) { - num_found_++; - } - return handle; - } - - int num_lookups() { return num_lookups_; } - - int num_found() { return num_found_; } - - int num_inserts() { return num_inserts_; } - - private: - int num_lookups_; - int num_found_; - int num_inserts_; - }; - - std::shared_ptr compressed_cache_; - std::shared_ptr uncompressed_cache_; - Options options_; - bool compression_enabled_; - std::vector values_; - std::vector uncompressable_values_; - bool fill_cache_; - std::vector cf_names_; -}; - -class DBBasicTestWithParallelIO : public DBBasicTestMultiGet, - public testing::WithParamInterface< - std::tuple> { - public: - DBBasicTestWithParallelIO() - : DBBasicTestMultiGet("/db_basic_test_with_parallel_io", 1, - std::get<0>(GetParam()), std::get<1>(GetParam()), - std::get<2>(GetParam()), std::get<3>(GetParam())) {} -}; - -TEST_P(DBBasicTestWithParallelIO, MultiGet) { - std::vector key_data(10); - std::vector keys; - // We cannot resize a PinnableSlice vector, so just set initial size to - // largest we think we will need - std::vector values(10); - std::vector statuses; - ReadOptions ro; - ro.fill_cache = fill_cache(); - - // Warm up the cache first - key_data.emplace_back(Key(0)); - keys.emplace_back(Slice(key_data.back())); - key_data.emplace_back(Key(50)); - keys.emplace_back(Slice(key_data.back())); - statuses.resize(keys.size()); - - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data(), true); - ASSERT_TRUE(CheckValue(0, values[0].ToString())); - ASSERT_TRUE(CheckValue(50, values[1].ToString())); - - int random_reads = env_->random_read_counter_.Read(); - key_data[0] = Key(1); - key_data[1] = Key(51); - keys[0] = Slice(key_data[0]); - keys[1] = Slice(key_data[1]); - values[0].Reset(); - values[1].Reset(); - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data(), true); - ASSERT_TRUE(CheckValue(1, values[0].ToString())); - ASSERT_TRUE(CheckValue(51, values[1].ToString())); - - bool read_from_cache = false; - if (fill_cache()) { - if (has_uncompressed_cache()) { - read_from_cache = true; - } else if (has_compressed_cache() && compression_enabled()) { - read_from_cache = true; - } - } - - int expected_reads = random_reads + (read_from_cache ? 0 : 2); - ASSERT_EQ(env_->random_read_counter_.Read(), expected_reads); - - keys.resize(10); - statuses.resize(10); - std::vector key_ints{1, 2, 15, 16, 55, 81, 82, 83, 84, 85}; - for (size_t i = 0; i < key_ints.size(); ++i) { - key_data[i] = Key(key_ints[i]); - keys[i] = Slice(key_data[i]); - statuses[i] = Status::OK(); - values[i].Reset(); - } - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data(), true); - for (size_t i = 0; i < key_ints.size(); ++i) { - ASSERT_OK(statuses[i]); - ASSERT_TRUE(CheckValue(key_ints[i], values[i].ToString())); - } - if (compression_enabled() && !has_compressed_cache()) { - expected_reads += (read_from_cache ? 2 : 3); - } else { - expected_reads += (read_from_cache ? 2 : 4); - } - ASSERT_EQ(env_->random_read_counter_.Read(), expected_reads); - - keys.resize(10); - statuses.resize(10); - std::vector key_uncmp{1, 2, 15, 16, 55, 81, 82, 83, 84, 85}; - for (size_t i = 0; i < key_uncmp.size(); ++i) { - key_data[i] = "a" + Key(key_uncmp[i]); - keys[i] = Slice(key_data[i]); - statuses[i] = Status::OK(); - values[i].Reset(); - } - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data(), true); - for (size_t i = 0; i < key_uncmp.size(); ++i) { - ASSERT_OK(statuses[i]); - ASSERT_TRUE(CheckUncompressableValue(key_uncmp[i], values[i].ToString())); - } - if (compression_enabled() && !has_compressed_cache()) { - expected_reads += (read_from_cache ? 3 : 3); - } else { - expected_reads += (read_from_cache ? 4 : 4); - } - ASSERT_EQ(env_->random_read_counter_.Read(), expected_reads); - - keys.resize(5); - statuses.resize(5); - std::vector key_tr{1, 2, 15, 16, 55}; - for (size_t i = 0; i < key_tr.size(); ++i) { - key_data[i] = "a" + Key(key_tr[i]); - keys[i] = Slice(key_data[i]); - statuses[i] = Status::OK(); - values[i].Reset(); - } - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data(), true); - for (size_t i = 0; i < key_tr.size(); ++i) { - ASSERT_OK(statuses[i]); - ASSERT_TRUE(CheckUncompressableValue(key_tr[i], values[i].ToString())); - } - if (compression_enabled() && !has_compressed_cache()) { - expected_reads += (read_from_cache ? 0 : 2); - ASSERT_EQ(env_->random_read_counter_.Read(), expected_reads); - } else { - if (has_uncompressed_cache()) { - expected_reads += (read_from_cache ? 0 : 3); - ASSERT_EQ(env_->random_read_counter_.Read(), expected_reads); - } else { - // A rare case, even we enable the block compression but some of data - // blocks are not compressed due to content. If user only enable the - // compressed cache, the uncompressed blocks will not tbe cached, and - // block reads will be triggered. The number of reads is related to - // the compression algorithm. - ASSERT_TRUE(env_->random_read_counter_.Read() >= expected_reads); - } - } -} - -TEST_P(DBBasicTestWithParallelIO, MultiGetDirectIO) { - class FakeDirectIOEnv : public EnvWrapper { - class FakeDirectIOSequentialFile; - class FakeDirectIORandomAccessFile; - - public: - FakeDirectIOEnv(Env* env) : EnvWrapper(env) {} - static const char* kClassName() { return "FakeDirectIOEnv"; } - const char* Name() const override { return kClassName(); } - - Status NewRandomAccessFile(const std::string& fname, - std::unique_ptr* result, - const EnvOptions& options) override { - std::unique_ptr file; - assert(options.use_direct_reads); - EnvOptions opts = options; - opts.use_direct_reads = false; - Status s = target()->NewRandomAccessFile(fname, &file, opts); - if (!s.ok()) { - return s; - } - result->reset(new FakeDirectIORandomAccessFile(std::move(file))); - return s; - } - - private: - class FakeDirectIOSequentialFile : public SequentialFileWrapper { - public: - FakeDirectIOSequentialFile(std::unique_ptr&& file) - : SequentialFileWrapper(file.get()), file_(std::move(file)) {} - ~FakeDirectIOSequentialFile() {} - - bool use_direct_io() const override { return true; } - size_t GetRequiredBufferAlignment() const override { return 1; } - - private: - std::unique_ptr file_; - }; - - class FakeDirectIORandomAccessFile : public RandomAccessFileWrapper { - public: - FakeDirectIORandomAccessFile(std::unique_ptr&& file) - : RandomAccessFileWrapper(file.get()), file_(std::move(file)) {} - ~FakeDirectIORandomAccessFile() {} - - bool use_direct_io() const override { return true; } - size_t GetRequiredBufferAlignment() const override { return 1; } - - private: - std::unique_ptr file_; - }; - }; - - std::unique_ptr env(new FakeDirectIOEnv(env_)); - Options opts = get_options(); - opts.env = env.get(); - opts.use_direct_reads = true; - Reopen(opts); - - std::vector key_data(10); - std::vector keys; - // We cannot resize a PinnableSlice vector, so just set initial size to - // largest we think we will need - std::vector values(10); - std::vector statuses; - ReadOptions ro; - ro.fill_cache = fill_cache(); - - // Warm up the cache first - key_data.emplace_back(Key(0)); - keys.emplace_back(Slice(key_data.back())); - key_data.emplace_back(Key(50)); - keys.emplace_back(Slice(key_data.back())); - statuses.resize(keys.size()); - - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data(), true); - ASSERT_TRUE(CheckValue(0, values[0].ToString())); - ASSERT_TRUE(CheckValue(50, values[1].ToString())); - - int random_reads = env_->random_read_counter_.Read(); - key_data[0] = Key(1); - key_data[1] = Key(51); - keys[0] = Slice(key_data[0]); - keys[1] = Slice(key_data[1]); - values[0].Reset(); - values[1].Reset(); - if (uncompressed_cache_) { - uncompressed_cache_->SetCapacity(0); - uncompressed_cache_->SetCapacity(1048576); - } - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data(), true); - ASSERT_TRUE(CheckValue(1, values[0].ToString())); - ASSERT_TRUE(CheckValue(51, values[1].ToString())); - - bool read_from_cache = false; - if (fill_cache()) { - if (has_uncompressed_cache()) { - read_from_cache = true; - } else if (has_compressed_cache() && compression_enabled()) { - read_from_cache = true; - } - } - - int expected_reads = random_reads; - if (!compression_enabled() || !has_compressed_cache()) { - expected_reads += 2; - } else { - expected_reads += (read_from_cache ? 0 : 2); - } - if (env_->random_read_counter_.Read() != expected_reads) { - ASSERT_EQ(env_->random_read_counter_.Read(), expected_reads); - } - Close(); -} - -TEST_P(DBBasicTestWithParallelIO, MultiGetWithChecksumMismatch) { - std::vector key_data(10); - std::vector keys; - // We cannot resize a PinnableSlice vector, so just set initial size to - // largest we think we will need - std::vector values(10); - std::vector statuses; - int read_count = 0; - ReadOptions ro; - ro.fill_cache = fill_cache(); - - SyncPoint::GetInstance()->SetCallBack( - "RetrieveMultipleBlocks:VerifyChecksum", [&](void* status) { - Status* s = static_cast(status); - read_count++; - if (read_count == 2) { - *s = Status::Corruption(); - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - - // Warm up the cache first - key_data.emplace_back(Key(0)); - keys.emplace_back(Slice(key_data.back())); - key_data.emplace_back(Key(50)); - keys.emplace_back(Slice(key_data.back())); - statuses.resize(keys.size()); - - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data(), true); - ASSERT_TRUE(CheckValue(0, values[0].ToString())); - // ASSERT_TRUE(CheckValue(50, values[1].ToString())); - ASSERT_EQ(statuses[0], Status::OK()); - ASSERT_EQ(statuses[1], Status::Corruption()); - - SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_P(DBBasicTestWithParallelIO, MultiGetWithMissingFile) { - std::vector key_data(10); - std::vector keys; - // We cannot resize a PinnableSlice vector, so just set initial size to - // largest we think we will need - std::vector values(10); - std::vector statuses; - ReadOptions ro; - ro.fill_cache = fill_cache(); - - SyncPoint::GetInstance()->SetCallBack( - "TableCache::MultiGet:FindTable", [&](void* status) { - Status* s = static_cast(status); - *s = Status::IOError(); - }); - // DB open will create table readers unless we reduce the table cache - // capacity. - // SanitizeOptions will set max_open_files to minimum of 20. Table cache - // is allocated with max_open_files - 10 as capacity. So override - // max_open_files to 11 so table cache capacity will become 1. This will - // prevent file open during DB open and force the file to be opened - // during MultiGet - SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { - int* max_open_files = (int*)arg; - *max_open_files = 11; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - Reopen(CurrentOptions()); - - // Warm up the cache first - key_data.emplace_back(Key(0)); - keys.emplace_back(Slice(key_data.back())); - key_data.emplace_back(Key(50)); - keys.emplace_back(Slice(key_data.back())); - statuses.resize(keys.size()); - - dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), - keys.data(), values.data(), statuses.data(), true); - ASSERT_EQ(statuses[0], Status::IOError()); - ASSERT_EQ(statuses[1], Status::IOError()); - - SyncPoint::GetInstance()->DisableProcessing(); -} - -INSTANTIATE_TEST_CASE_P(ParallelIO, DBBasicTestWithParallelIO, - // Params are as follows - - // Param 0 - Uncompressed cache enabled - // Param 1 - Data compression enabled - // Param 2 - ReadOptions::fill_cache - // Param 3 - CompressionOptions::parallel_threads - ::testing::Combine(::testing::Bool(), ::testing::Bool(), - ::testing::Bool(), - ::testing::Values(1, 4))); - -// Forward declaration -class DeadlineFS; - -class DeadlineRandomAccessFile : public FSRandomAccessFileOwnerWrapper { - public: - DeadlineRandomAccessFile(DeadlineFS& fs, - std::unique_ptr& file) - : FSRandomAccessFileOwnerWrapper(std::move(file)), fs_(fs) {} - - IOStatus Read(uint64_t offset, size_t len, const IOOptions& opts, - Slice* result, char* scratch, - IODebugContext* dbg) const override; - - IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs, - const IOOptions& options, IODebugContext* dbg) override; - - IOStatus ReadAsync(FSReadRequest& req, const IOOptions& opts, - std::function cb, - void* cb_arg, void** io_handle, IOHandleDeleter* del_fn, - IODebugContext* dbg) override; - - private: - DeadlineFS& fs_; - std::unique_ptr file_; -}; - -class DeadlineFS : public FileSystemWrapper { - public: - // The error_on_delay parameter specifies whether a IOStatus::TimedOut() - // status should be returned after delaying the IO to exceed the timeout, - // or to simply delay but return success anyway. The latter mimics the - // behavior of PosixFileSystem, which does not enforce any timeout - explicit DeadlineFS(SpecialEnv* env, bool error_on_delay) - : FileSystemWrapper(env->GetFileSystem()), - deadline_(std::chrono::microseconds::zero()), - io_timeout_(std::chrono::microseconds::zero()), - env_(env), - timedout_(false), - ignore_deadline_(false), - error_on_delay_(error_on_delay) {} - - static const char* kClassName() { return "DeadlineFileSystem"; } - const char* Name() const override { return kClassName(); } - - IOStatus NewRandomAccessFile(const std::string& fname, - const FileOptions& opts, - std::unique_ptr* result, - IODebugContext* dbg) override { - std::unique_ptr file; - IOStatus s = target()->NewRandomAccessFile(fname, opts, &file, dbg); - EXPECT_OK(s); - result->reset(new DeadlineRandomAccessFile(*this, file)); - - const std::chrono::microseconds deadline = GetDeadline(); - const std::chrono::microseconds io_timeout = GetIOTimeout(); - if (deadline.count() || io_timeout.count()) { - AssertDeadline(deadline, io_timeout, opts.io_options); - } - return ShouldDelay(opts.io_options); - } - - // Set a vector of {IO counter, delay in microseconds, return status} tuples - // that control when to inject a delay and duration of the delay - void SetDelayTrigger(const std::chrono::microseconds deadline, - const std::chrono::microseconds io_timeout, - const int trigger) { - delay_trigger_ = trigger; - io_count_ = 0; - deadline_ = deadline; - io_timeout_ = io_timeout; - timedout_ = false; - } - - // Increment the IO counter and return a delay in microseconds - IOStatus ShouldDelay(const IOOptions& opts) { - if (timedout_) { - return IOStatus::TimedOut(); - } else if (!deadline_.count() && !io_timeout_.count()) { - return IOStatus::OK(); - } - if (!ignore_deadline_ && delay_trigger_ == io_count_++) { - env_->SleepForMicroseconds(static_cast(opts.timeout.count() + 1)); - timedout_ = true; - if (error_on_delay_) { - return IOStatus::TimedOut(); - } - } - return IOStatus::OK(); - } - - const std::chrono::microseconds GetDeadline() { - return ignore_deadline_ ? std::chrono::microseconds::zero() : deadline_; - } - - const std::chrono::microseconds GetIOTimeout() { - return ignore_deadline_ ? std::chrono::microseconds::zero() : io_timeout_; - } - - bool TimedOut() { return timedout_; } - - void IgnoreDeadline(bool ignore) { ignore_deadline_ = ignore; } - - void AssertDeadline(const std::chrono::microseconds deadline, - const std::chrono::microseconds io_timeout, - const IOOptions& opts) const { - // Give a leeway of +- 10us as it can take some time for the Get/ - // MultiGet call to reach here, in order to avoid false alarms - std::chrono::microseconds now = - std::chrono::microseconds(env_->NowMicros()); - std::chrono::microseconds timeout; - if (deadline.count()) { - timeout = deadline - now; - if (io_timeout.count()) { - timeout = std::min(timeout, io_timeout); - } - } else { - timeout = io_timeout; - } - if (opts.timeout != timeout) { - ASSERT_EQ(timeout, opts.timeout); - } - } - - private: - // The number of IOs to trigger the delay after - int delay_trigger_; - // Current IO count - int io_count_; - // ReadOptions deadline for the Get/MultiGet/Iterator - std::chrono::microseconds deadline_; - // ReadOptions io_timeout for the Get/MultiGet/Iterator - std::chrono::microseconds io_timeout_; - SpecialEnv* env_; - // Flag to indicate whether we injected a delay - bool timedout_; - // Temporarily ignore deadlines/timeouts - bool ignore_deadline_; - // Return IOStatus::TimedOut() or IOStatus::OK() - bool error_on_delay_; -}; - -IOStatus DeadlineRandomAccessFile::Read(uint64_t offset, size_t len, - const IOOptions& opts, Slice* result, - char* scratch, - IODebugContext* dbg) const { - const std::chrono::microseconds deadline = fs_.GetDeadline(); - const std::chrono::microseconds io_timeout = fs_.GetIOTimeout(); - IOStatus s; - if (deadline.count() || io_timeout.count()) { - fs_.AssertDeadline(deadline, io_timeout, opts); - } - if (s.ok()) { - s = FSRandomAccessFileWrapper::Read(offset, len, opts, result, scratch, - dbg); - } - if (s.ok()) { - s = fs_.ShouldDelay(opts); - } - return s; -} - -IOStatus DeadlineRandomAccessFile::ReadAsync( - FSReadRequest& req, const IOOptions& opts, - std::function cb, void* cb_arg, - void** io_handle, IOHandleDeleter* del_fn, IODebugContext* dbg) { - const std::chrono::microseconds deadline = fs_.GetDeadline(); - const std::chrono::microseconds io_timeout = fs_.GetIOTimeout(); - IOStatus s; - if (deadline.count() || io_timeout.count()) { - fs_.AssertDeadline(deadline, io_timeout, opts); - } - if (s.ok()) { - s = FSRandomAccessFileWrapper::ReadAsync(req, opts, cb, cb_arg, io_handle, - del_fn, dbg); - } - if (s.ok()) { - s = fs_.ShouldDelay(opts); - } - return s; -} - -IOStatus DeadlineRandomAccessFile::MultiRead(FSReadRequest* reqs, - size_t num_reqs, - const IOOptions& options, - IODebugContext* dbg) { - const std::chrono::microseconds deadline = fs_.GetDeadline(); - const std::chrono::microseconds io_timeout = fs_.GetIOTimeout(); - IOStatus s; - if (deadline.count() || io_timeout.count()) { - fs_.AssertDeadline(deadline, io_timeout, options); - } - if (s.ok()) { - s = FSRandomAccessFileWrapper::MultiRead(reqs, num_reqs, options, dbg); - } - if (s.ok()) { - s = fs_.ShouldDelay(options); - } - return s; -} - -// A test class for intercepting random reads and injecting artificial -// delays. Used for testing the MultiGet deadline feature -class DBBasicTestMultiGetDeadline : public DBBasicTestMultiGet, - public testing::WithParamInterface { - public: - DBBasicTestMultiGetDeadline() - : DBBasicTestMultiGet( - "db_basic_test_multiget_deadline" /*Test dir*/, - 10 /*# of column families*/, true /*uncompressed cache enabled*/, - true /*compression enabled*/, true /*ReadOptions.fill_cache*/, - 1 /*# of parallel compression threads*/) {} - - inline void CheckStatus(std::vector& statuses, size_t num_ok) { - for (size_t i = 0; i < statuses.size(); ++i) { - if (i < num_ok) { - EXPECT_OK(statuses[i]); - } else { - if (statuses[i] != Status::TimedOut()) { - EXPECT_EQ(statuses[i], Status::TimedOut()); - } - } - } - } -}; - -TEST_P(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) { -#ifndef USE_COROUTINES - if (GetParam()) { - ROCKSDB_GTEST_SKIP("This test requires coroutine support"); - return; - } -#endif // USE_COROUTINES - std::shared_ptr fs = std::make_shared(env_, false); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - Options options = CurrentOptions(); - - std::shared_ptr cache = NewLRUCache(1048576); - BlockBasedTableOptions table_options; - table_options.block_cache = cache; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.env = env.get(); - SetTimeElapseOnlySleepOnReopen(&options); - ReopenWithColumnFamilies(GetCFNames(), options); - - // Test the non-batched version of MultiGet with multiple column - // families - std::vector key_str; - size_t i; - for (i = 0; i < 5; ++i) { - key_str.emplace_back(Key(static_cast(i))); - } - std::vector cfs(key_str.size()); - ; - std::vector keys(key_str.size()); - std::vector values(key_str.size()); - for (i = 0; i < key_str.size(); ++i) { - cfs[i] = handles_[i]; - keys[i] = Slice(key_str[i].data(), key_str[i].size()); - } - - ReadOptions ro; - ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; - ro.async_io = GetParam(); - // Delay the first IO - fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 0); - - std::vector statuses = dbfull()->MultiGet(ro, cfs, keys, &values); - // The first key is successful because we check after the lookup, but - // subsequent keys fail due to deadline exceeded - CheckStatus(statuses, 1); - - // Clear the cache - cache->SetCapacity(0); - cache->SetCapacity(1048576); - // Test non-batched Multiget with multiple column families and - // introducing an IO delay in one of the middle CFs - key_str.clear(); - for (i = 0; i < 10; ++i) { - key_str.emplace_back(Key(static_cast(i))); - } - cfs.resize(key_str.size()); - keys.resize(key_str.size()); - values.resize(key_str.size()); - for (i = 0; i < key_str.size(); ++i) { - // 2 keys per CF - cfs[i] = handles_[i / 2]; - keys[i] = Slice(key_str[i].data(), key_str[i].size()); - } - ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; - fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 1); - statuses = dbfull()->MultiGet(ro, cfs, keys, &values); - CheckStatus(statuses, 3); - - // Test batched MultiGet with an IO delay in the first data block read. - // Both keys in the first CF should succeed as they're in the same data - // block and would form one batch, and we check for deadline between - // batches. - std::vector pin_values(keys.size()); - cache->SetCapacity(0); - cache->SetCapacity(1048576); - statuses.clear(); - statuses.resize(keys.size()); - ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; - fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 0); - dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(), - pin_values.data(), statuses.data()); - CheckStatus(statuses, 2); - - // Similar to the previous one, but an IO delay in the third CF data block - // read - for (PinnableSlice& value : pin_values) { - value.Reset(); - } - cache->SetCapacity(0); - cache->SetCapacity(1048576); - statuses.clear(); - statuses.resize(keys.size()); - ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; - fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 2); - dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(), - pin_values.data(), statuses.data()); - CheckStatus(statuses, 6); - - // Similar to the previous one, but an IO delay in the last but one CF - for (PinnableSlice& value : pin_values) { - value.Reset(); - } - cache->SetCapacity(0); - cache->SetCapacity(1048576); - statuses.clear(); - statuses.resize(keys.size()); - ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; - fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 3); - dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(), - pin_values.data(), statuses.data()); - CheckStatus(statuses, 8); - - // Test batched MultiGet with single CF and lots of keys. Inject delay - // into the second batch of keys. As each batch is 32, the first 64 keys, - // i.e first two batches, should succeed and the rest should time out - for (PinnableSlice& value : pin_values) { - value.Reset(); - } - cache->SetCapacity(0); - cache->SetCapacity(1048576); - key_str.clear(); - for (i = 0; i < 100; ++i) { - key_str.emplace_back(Key(static_cast(i))); - } - keys.resize(key_str.size()); - pin_values.clear(); - pin_values.resize(key_str.size()); - for (i = 0; i < key_str.size(); ++i) { - keys[i] = Slice(key_str[i].data(), key_str[i].size()); - } - statuses.clear(); - statuses.resize(keys.size()); - ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; - fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 1); - dbfull()->MultiGet(ro, handles_[0], keys.size(), keys.data(), - pin_values.data(), statuses.data()); - CheckStatus(statuses, 64); - Close(); -} - -INSTANTIATE_TEST_CASE_P(DeadlineIO, DBBasicTestMultiGetDeadline, - ::testing::Bool()); - -TEST_F(DBBasicTest, ManifestWriteFailure) { - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.disable_auto_compactions = true; - options.env = env_; - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Flush()); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::ProcessManifestWrites:AfterSyncManifest", [&](void* arg) { - ASSERT_NE(nullptr, arg); - auto* s = reinterpret_cast(arg); - ASSERT_OK(*s); - // Manually overwrite return status - *s = Status::IOError(); - }); - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Put("key", "value")); - ASSERT_NOK(Flush()); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->EnableProcessing(); - Reopen(options); -} - -TEST_F(DBBasicTest, DestroyDefaultCfHandle) { - Options options = GetDefaultOptions(); - options.create_if_missing = true; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - for (const auto* h : handles_) { - ASSERT_NE(db_->DefaultColumnFamily(), h); - } - - // We have two handles to the default column family. The two handles point to - // different ColumnFamilyHandle objects. - assert(db_->DefaultColumnFamily()); - ASSERT_EQ(0U, db_->DefaultColumnFamily()->GetID()); - assert(handles_[0]); - ASSERT_EQ(0U, handles_[0]->GetID()); - - // You can destroy handles_[...]. - for (auto* h : handles_) { - ASSERT_OK(db_->DestroyColumnFamilyHandle(h)); - } - handles_.clear(); - - // But you should not destroy db_->DefaultColumnFamily(), since it's going to - // be deleted in `DBImpl::CloseHelper()`. Before that, it may be used - // elsewhere internally too. - ColumnFamilyHandle* default_cf = db_->DefaultColumnFamily(); - ASSERT_TRUE(db_->DestroyColumnFamilyHandle(default_cf).IsInvalidArgument()); -} - -TEST_F(DBBasicTest, FailOpenIfLoggerCreationFail) { - Options options = GetDefaultOptions(); - options.create_if_missing = true; - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "rocksdb::CreateLoggerFromOptions:AfterGetPath", [&](void* arg) { - auto* s = reinterpret_cast(arg); - assert(s); - *s = Status::IOError("Injected"); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - Status s = TryReopen(options); - ASSERT_EQ(nullptr, options.info_log); - ASSERT_TRUE(s.IsIOError()); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_F(DBBasicTest, VerifyFileChecksums) { - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.env = env_; - DestroyAndReopen(options); - ASSERT_OK(Put("a", "value")); - ASSERT_OK(Flush()); - ASSERT_TRUE(db_->VerifyFileChecksums(ReadOptions()).IsInvalidArgument()); - - options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); - Reopen(options); - ASSERT_OK(db_->VerifyFileChecksums(ReadOptions())); - - // Write an L0 with checksum computed. - ASSERT_OK(Put("b", "value")); - ASSERT_OK(Flush()); - - ASSERT_OK(db_->VerifyFileChecksums(ReadOptions())); - - // Does the right thing but with the wrong name -- using it should lead to an - // error. - class MisnamedFileChecksumGenerator : public FileChecksumGenCrc32c { - public: - MisnamedFileChecksumGenerator(const FileChecksumGenContext& context) - : FileChecksumGenCrc32c(context) {} - - const char* Name() const override { return "sha1"; } - }; - - class MisnamedFileChecksumGenFactory : public FileChecksumGenCrc32cFactory { - public: - std::unique_ptr CreateFileChecksumGenerator( - const FileChecksumGenContext& context) override { - return std::unique_ptr( - new MisnamedFileChecksumGenerator(context)); - } - }; - - options.file_checksum_gen_factory.reset(new MisnamedFileChecksumGenFactory()); - Reopen(options); - ASSERT_TRUE(db_->VerifyFileChecksums(ReadOptions()).IsInvalidArgument()); -} - -TEST_F(DBBasicTest, VerifyFileChecksumsReadahead) { - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.env = env_; - options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); - DestroyAndReopen(options); - - Random rnd(301); - int alignment = 256 * 1024; - for (int i = 0; i < 16; ++i) { - ASSERT_OK(Put("key" + std::to_string(i), rnd.RandomString(alignment))); - } - ASSERT_OK(Flush()); - - std::vector filenames; - int sst_cnt = 0; - std::string sst_name; - uint64_t sst_size; - uint64_t number; - FileType type; - ASSERT_OK(env_->GetChildren(dbname_, &filenames)); - for (auto name : filenames) { - if (ParseFileName(name, &number, &type)) { - if (type == kTableFile) { - sst_cnt++; - sst_name = name; - } - } - } - ASSERT_EQ(sst_cnt, 1); - ASSERT_OK(env_->GetFileSize(dbname_ + '/' + sst_name, &sst_size)); - - bool last_read = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "GenerateOneFileChecksum::Chunk:0", [&](void* /*arg*/) { - if (env_->random_read_bytes_counter_.load() == sst_size) { - EXPECT_FALSE(last_read); - last_read = true; - } else { - ASSERT_EQ(env_->random_read_bytes_counter_.load() & (alignment - 1), - 0); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - env_->count_random_reads_ = true; - env_->random_read_bytes_counter_ = 0; - env_->random_read_counter_.Reset(); - - ReadOptions ro; - ro.readahead_size = alignment; - ASSERT_OK(db_->VerifyFileChecksums(ro)); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ASSERT_TRUE(last_read); - ASSERT_EQ(env_->random_read_counter_.Read(), - (sst_size + alignment - 1) / (alignment)); -} - -// TODO: re-enable after we provide finer-grained control for WAL tracking to -// meet the needs of different use cases, durability levels and recovery modes. -TEST_F(DBBasicTest, DISABLED_ManualWalSync) { - Options options = CurrentOptions(); - options.track_and_verify_wals_in_manifest = true; - options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency; - DestroyAndReopen(options); - - ASSERT_OK(Put("x", "y")); - // This does not create a new WAL. - ASSERT_OK(db_->SyncWAL()); - EXPECT_FALSE(dbfull()->GetVersionSet()->GetWalSet().GetWals().empty()); - - std::unique_ptr wal; - Status s = db_->GetCurrentWalFile(&wal); - ASSERT_OK(s); - Close(); - - EXPECT_OK(env_->DeleteFile(LogFileName(dbname_, wal->LogNumber()))); - - ASSERT_TRUE(TryReopen(options).IsCorruption()); -} - -// A test class for intercepting random reads and injecting artificial -// delays. Used for testing the deadline/timeout feature -class DBBasicTestDeadline - : public DBBasicTest, - public testing::WithParamInterface> {}; - -TEST_P(DBBasicTestDeadline, PointLookupDeadline) { - std::shared_ptr fs = std::make_shared(env_, true); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - bool set_deadline = std::get<0>(GetParam()); - bool set_timeout = std::get<1>(GetParam()); - - for (int option_config = kDefault; option_config < kEnd; ++option_config) { - if (ShouldSkipOptions(option_config, kSkipPlainTable | kSkipMmapReads)) { - continue; - } - option_config_ = option_config; - Options options = CurrentOptions(); - if (options.use_direct_reads) { - continue; - } - options.env = env.get(); - options.disable_auto_compactions = true; - Cache* block_cache = nullptr; - // Fileter block reads currently don't cause the request to get - // aborted on a read timeout, so its possible those block reads - // may get issued even if the deadline is past - SyncPoint::GetInstance()->SetCallBack( - "BlockBasedTable::Get:BeforeFilterMatch", - [&](void* /*arg*/) { fs->IgnoreDeadline(true); }); - SyncPoint::GetInstance()->SetCallBack( - "BlockBasedTable::Get:AfterFilterMatch", - [&](void* /*arg*/) { fs->IgnoreDeadline(false); }); - // DB open will create table readers unless we reduce the table cache - // capacity. - // SanitizeOptions will set max_open_files to minimum of 20. Table cache - // is allocated with max_open_files - 10 as capacity. So override - // max_open_files to 11 so table cache capacity will become 1. This will - // prevent file open during DB open and force the file to be opened - // during MultiGet - SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { - int* max_open_files = (int*)arg; - *max_open_files = 11; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - SetTimeElapseOnlySleepOnReopen(&options); - Reopen(options); - - if (options.table_factory) { - block_cache = options.table_factory->GetOptions( - TableFactory::kBlockCacheOpts()); - } - - Random rnd(301); - for (int i = 0; i < 400; ++i) { - std::string key = "k" + std::to_string(i); - ASSERT_OK(Put(key, rnd.RandomString(100))); - } - ASSERT_OK(Flush()); - - bool timedout = true; - // A timeout will be forced when the IO counter reaches this value - int io_deadline_trigger = 0; - // Keep incrementing io_deadline_trigger and call Get() until there is an - // iteration that doesn't cause a timeout. This ensures that we cover - // all file reads in the point lookup path that can potentially timeout - // and cause the Get() to fail. - while (timedout) { - ReadOptions ro; - if (set_deadline) { - ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; - } - if (set_timeout) { - ro.io_timeout = std::chrono::microseconds{5000}; - } - fs->SetDelayTrigger(ro.deadline, ro.io_timeout, io_deadline_trigger); - - block_cache->SetCapacity(0); - block_cache->SetCapacity(1048576); - - std::string value; - Status s = dbfull()->Get(ro, "k50", &value); - if (fs->TimedOut()) { - ASSERT_EQ(s, Status::TimedOut()); - } else { - timedout = false; - ASSERT_OK(s); - } - io_deadline_trigger++; - } - // Reset the delay sequence in order to avoid false alarms during Reopen - fs->SetDelayTrigger(std::chrono::microseconds::zero(), - std::chrono::microseconds::zero(), 0); - } - Close(); -} - -TEST_P(DBBasicTestDeadline, IteratorDeadline) { - std::shared_ptr fs = std::make_shared(env_, true); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - bool set_deadline = std::get<0>(GetParam()); - bool set_timeout = std::get<1>(GetParam()); - - for (int option_config = kDefault; option_config < kEnd; ++option_config) { - if (ShouldSkipOptions(option_config, kSkipPlainTable | kSkipMmapReads)) { - continue; - } - Options options = CurrentOptions(); - if (options.use_direct_reads) { - continue; - } - options.env = env.get(); - options.disable_auto_compactions = true; - Cache* block_cache = nullptr; - // DB open will create table readers unless we reduce the table cache - // capacity. - // SanitizeOptions will set max_open_files to minimum of 20. Table cache - // is allocated with max_open_files - 10 as capacity. So override - // max_open_files to 11 so table cache capacity will become 1. This will - // prevent file open during DB open and force the file to be opened - // during MultiGet - SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { - int* max_open_files = (int*)arg; - *max_open_files = 11; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - SetTimeElapseOnlySleepOnReopen(&options); - Reopen(options); - - if (options.table_factory) { - block_cache = options.table_factory->GetOptions( - TableFactory::kBlockCacheOpts()); - } - - Random rnd(301); - for (int i = 0; i < 400; ++i) { - std::string key = "k" + std::to_string(i); - ASSERT_OK(Put(key, rnd.RandomString(100))); - } - ASSERT_OK(Flush()); - - bool timedout = true; - // A timeout will be forced when the IO counter reaches this value - int io_deadline_trigger = 0; - // Keep incrementing io_deadline_trigger and call Get() until there is an - // iteration that doesn't cause a timeout. This ensures that we cover - // all file reads in the point lookup path that can potentially timeout - while (timedout) { - ReadOptions ro; - if (set_deadline) { - ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; - } - if (set_timeout) { - ro.io_timeout = std::chrono::microseconds{5000}; - } - fs->SetDelayTrigger(ro.deadline, ro.io_timeout, io_deadline_trigger); - - block_cache->SetCapacity(0); - block_cache->SetCapacity(1048576); - - Iterator* iter = dbfull()->NewIterator(ro); - int count = 0; - iter->Seek("k50"); - while (iter->Valid() && count++ < 100) { - iter->Next(); - } - if (fs->TimedOut()) { - ASSERT_FALSE(iter->Valid()); - ASSERT_EQ(iter->status(), Status::TimedOut()); - } else { - timedout = false; - ASSERT_OK(iter->status()); - } - delete iter; - io_deadline_trigger++; - } - // Reset the delay sequence in order to avoid false alarms during Reopen - fs->SetDelayTrigger(std::chrono::microseconds::zero(), - std::chrono::microseconds::zero(), 0); - } - Close(); -} - -// Param 0: If true, set read_options.deadline -// Param 1: If true, set read_options.io_timeout -INSTANTIATE_TEST_CASE_P(DBBasicTestDeadline, DBBasicTestDeadline, - ::testing::Values(std::make_tuple(true, false), - std::make_tuple(false, true), - std::make_tuple(true, true))); -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc deleted file mode 100644 index 1a1366353..000000000 --- a/db/db_block_cache_test.cc +++ /dev/null @@ -1,1969 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -#include -#include -#include -#include - -#include "cache/cache_entry_roles.h" -#include "cache/cache_key.h" -#include "cache/lru_cache.h" -#include "cache/typed_cache.h" -#include "db/column_family.h" -#include "db/db_impl/db_impl.h" -#include "db/db_test_util.h" -#include "env/unique_id_gen.h" -#include "port/stack_trace.h" -#include "rocksdb/persistent_cache.h" -#include "rocksdb/statistics.h" -#include "rocksdb/table.h" -#include "rocksdb/table_properties.h" -#include "table/block_based/block_based_table_reader.h" -#include "table/unique_id_impl.h" -#include "util/compression.h" -#include "util/defer.h" -#include "util/hash.h" -#include "util/math.h" -#include "util/random.h" -#include "utilities/fault_injection_fs.h" - -namespace ROCKSDB_NAMESPACE { - -class DBBlockCacheTest : public DBTestBase { - private: - size_t miss_count_ = 0; - size_t hit_count_ = 0; - size_t insert_count_ = 0; - size_t failure_count_ = 0; - size_t compression_dict_miss_count_ = 0; - size_t compression_dict_hit_count_ = 0; - size_t compression_dict_insert_count_ = 0; - - public: - const size_t kNumBlocks = 10; - const size_t kValueSize = 100; - - DBBlockCacheTest() - : DBTestBase("db_block_cache_test", /*env_do_fsync=*/true) {} - - BlockBasedTableOptions GetTableOptions() { - BlockBasedTableOptions table_options; - // Set a small enough block size so that each key-value get its own block. - table_options.block_size = 1; - return table_options; - } - - Options GetOptions(const BlockBasedTableOptions& table_options) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.avoid_flush_during_recovery = false; - // options.compression = kNoCompression; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - return options; - } - - void InitTable(const Options& /*options*/) { - std::string value(kValueSize, 'a'); - for (size_t i = 0; i < kNumBlocks; i++) { - ASSERT_OK(Put(std::to_string(i), value.c_str())); - } - } - - void RecordCacheCounters(const Options& options) { - miss_count_ = TestGetTickerCount(options, BLOCK_CACHE_MISS); - hit_count_ = TestGetTickerCount(options, BLOCK_CACHE_HIT); - insert_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD); - failure_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES); - } - - void RecordCacheCountersForCompressionDict(const Options& options) { - compression_dict_miss_count_ = - TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS); - compression_dict_hit_count_ = - TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_HIT); - compression_dict_insert_count_ = - TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_ADD); - } - - void CheckCacheCounters(const Options& options, size_t expected_misses, - size_t expected_hits, size_t expected_inserts, - size_t expected_failures) { - size_t new_miss_count = TestGetTickerCount(options, BLOCK_CACHE_MISS); - size_t new_hit_count = TestGetTickerCount(options, BLOCK_CACHE_HIT); - size_t new_insert_count = TestGetTickerCount(options, BLOCK_CACHE_ADD); - size_t new_failure_count = - TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES); - ASSERT_EQ(miss_count_ + expected_misses, new_miss_count); - ASSERT_EQ(hit_count_ + expected_hits, new_hit_count); - ASSERT_EQ(insert_count_ + expected_inserts, new_insert_count); - ASSERT_EQ(failure_count_ + expected_failures, new_failure_count); - miss_count_ = new_miss_count; - hit_count_ = new_hit_count; - insert_count_ = new_insert_count; - failure_count_ = new_failure_count; - } - - void CheckCacheCountersForCompressionDict( - const Options& options, size_t expected_compression_dict_misses, - size_t expected_compression_dict_hits, - size_t expected_compression_dict_inserts) { - size_t new_compression_dict_miss_count = - TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS); - size_t new_compression_dict_hit_count = - TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_HIT); - size_t new_compression_dict_insert_count = - TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_ADD); - ASSERT_EQ(compression_dict_miss_count_ + expected_compression_dict_misses, - new_compression_dict_miss_count); - ASSERT_EQ(compression_dict_hit_count_ + expected_compression_dict_hits, - new_compression_dict_hit_count); - ASSERT_EQ( - compression_dict_insert_count_ + expected_compression_dict_inserts, - new_compression_dict_insert_count); - compression_dict_miss_count_ = new_compression_dict_miss_count; - compression_dict_hit_count_ = new_compression_dict_hit_count; - compression_dict_insert_count_ = new_compression_dict_insert_count; - } - - const std::array GetCacheEntryRoleCountsBg() { - // Verify in cache entry role stats - std::array cache_entry_role_counts; - std::map values; - EXPECT_TRUE(db_->GetMapProperty(DB::Properties::kFastBlockCacheEntryStats, - &values)); - for (size_t i = 0; i < kNumCacheEntryRoles; ++i) { - auto role = static_cast(i); - cache_entry_role_counts[i] = - ParseSizeT(values[BlockCacheEntryStatsMapKeys::EntryCount(role)]); - } - return cache_entry_role_counts; - } -}; - -TEST_F(DBBlockCacheTest, IteratorBlockCacheUsage) { - ReadOptions read_options; - read_options.fill_cache = false; - auto table_options = GetTableOptions(); - auto options = GetOptions(table_options); - InitTable(options); - - LRUCacheOptions co; - co.capacity = 0; - co.num_shard_bits = 0; - co.strict_capacity_limit = false; - // Needed not to count entry stats collector - co.metadata_charge_policy = kDontChargeCacheMetadata; - std::shared_ptr cache = NewLRUCache(co); - table_options.block_cache = cache; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - RecordCacheCounters(options); - - std::vector> iterators(kNumBlocks - 1); - Iterator* iter = nullptr; - - ASSERT_EQ(0, cache->GetUsage()); - iter = db_->NewIterator(read_options); - iter->Seek(std::to_string(0)); - ASSERT_LT(0, cache->GetUsage()); - delete iter; - iter = nullptr; - ASSERT_EQ(0, cache->GetUsage()); -} - -TEST_F(DBBlockCacheTest, TestWithoutCompressedBlockCache) { - ReadOptions read_options; - auto table_options = GetTableOptions(); - auto options = GetOptions(table_options); - InitTable(options); - - LRUCacheOptions co; - co.capacity = 0; - co.num_shard_bits = 0; - co.strict_capacity_limit = false; - // Needed not to count entry stats collector - co.metadata_charge_policy = kDontChargeCacheMetadata; - std::shared_ptr cache = NewLRUCache(co); - table_options.block_cache = cache; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - RecordCacheCounters(options); - - std::vector> iterators(kNumBlocks - 1); - Iterator* iter = nullptr; - - // Load blocks into cache. - for (size_t i = 0; i + 1 < kNumBlocks; i++) { - iter = db_->NewIterator(read_options); - iter->Seek(std::to_string(i)); - ASSERT_OK(iter->status()); - CheckCacheCounters(options, 1, 0, 1, 0); - iterators[i].reset(iter); - } - size_t usage = cache->GetUsage(); - ASSERT_LT(0, usage); - cache->SetCapacity(usage); - ASSERT_EQ(usage, cache->GetPinnedUsage()); - - // Test with strict capacity limit. - cache->SetStrictCapacityLimit(true); - iter = db_->NewIterator(read_options); - iter->Seek(std::to_string(kNumBlocks - 1)); - ASSERT_TRUE(iter->status().IsMemoryLimit()); - CheckCacheCounters(options, 1, 0, 0, 1); - delete iter; - iter = nullptr; - - // Release iterators and access cache again. - for (size_t i = 0; i + 1 < kNumBlocks; i++) { - iterators[i].reset(); - CheckCacheCounters(options, 0, 0, 0, 0); - } - ASSERT_EQ(0, cache->GetPinnedUsage()); - for (size_t i = 0; i + 1 < kNumBlocks; i++) { - iter = db_->NewIterator(read_options); - iter->Seek(std::to_string(i)); - ASSERT_OK(iter->status()); - CheckCacheCounters(options, 0, 1, 0, 0); - iterators[i].reset(iter); - } -} - -#ifdef SNAPPY - -namespace { -class PersistentCacheFromCache : public PersistentCache { - public: - PersistentCacheFromCache(std::shared_ptr cache, bool read_only) - : cache_(cache), read_only_(read_only) {} - - Status Insert(const Slice& key, const char* data, - const size_t size) override { - if (read_only_) { - return Status::NotSupported(); - } - std::unique_ptr copy{new char[size]}; - std::copy_n(data, size, copy.get()); - Status s = cache_.Insert(key, copy.get(), size); - if (s.ok()) { - copy.release(); - } - return s; - } - - Status Lookup(const Slice& key, std::unique_ptr* data, - size_t* size) override { - auto handle = cache_.Lookup(key); - if (handle) { - char* ptr = cache_.Value(handle); - *size = cache_.get()->GetCharge(handle); - data->reset(new char[*size]); - std::copy_n(ptr, *size, data->get()); - cache_.Release(handle); - return Status::OK(); - } else { - return Status::NotFound(); - } - } - - bool IsCompressed() override { return false; } - - StatsType Stats() override { return StatsType(); } - - std::string GetPrintableOptions() const override { return ""; } - - uint64_t NewId() override { return cache_.get()->NewId(); } - - private: - BasicTypedSharedCacheInterface cache_; - bool read_only_; -}; - -class ReadOnlyCacheWrapper : public CacheWrapper { - public: - using CacheWrapper::CacheWrapper; - - const char* Name() const override { return "ReadOnlyCacheWrapper"; } - - Status Insert(const Slice& /*key*/, Cache::ObjectPtr /*value*/, - const CacheItemHelper* /*helper*/, size_t /*charge*/, - Handle** /*handle*/, Priority /*priority*/) override { - return Status::NotSupported(); - } -}; - -} // anonymous namespace -#endif // SNAPPY - - -// Make sure that when options.block_cache is set, after a new table is -// created its index/filter blocks are added to block cache. -TEST_F(DBBlockCacheTest, IndexAndFilterBlocksOfNewTableAddedToCache) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - BlockBasedTableOptions table_options; - table_options.cache_index_and_filter_blocks = true; - table_options.filter_policy.reset(NewBloomFilterPolicy(20)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "key", "val")); - // Create a new table. - ASSERT_OK(Flush(1)); - - // index/filter blocks added to block cache right after table creation. - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(2, /* only index/filter were added */ - TestGetTickerCount(options, BLOCK_CACHE_ADD)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS)); - uint64_t int_num; - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); - ASSERT_EQ(int_num, 0U); - - // Make sure filter block is in cache. - std::string value; - ReadOptions ropt; - db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value); - - // Miss count should remain the same. - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - - db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - - // Make sure index block is in cache. - auto index_block_hit = TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT); - value = Get(1, "key"); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); - ASSERT_EQ(index_block_hit + 1, - TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); - - value = Get(1, "key"); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); - ASSERT_EQ(index_block_hit + 2, - TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); -} - -// With fill_cache = false, fills up the cache, then iterates over the entire -// db, verify dummy entries inserted in `BlockBasedTable::NewDataBlockIterator` -// does not cause heap-use-after-free errors in COMPILE_WITH_ASAN=1 runs -TEST_F(DBBlockCacheTest, FillCacheAndIterateDB) { - ReadOptions read_options; - read_options.fill_cache = false; - auto table_options = GetTableOptions(); - auto options = GetOptions(table_options); - InitTable(options); - - std::shared_ptr cache = NewLRUCache(10, 0, true); - table_options.block_cache = cache; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - ASSERT_OK(Put("key1", "val1")); - ASSERT_OK(Put("key2", "val2")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("key3", "val3")); - ASSERT_OK(Put("key4", "val4")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("key5", "val5")); - ASSERT_OK(Put("key6", "val6")); - ASSERT_OK(Flush()); - - Iterator* iter = nullptr; - - iter = db_->NewIterator(read_options); - iter->Seek(std::to_string(0)); - while (iter->Valid()) { - iter->Next(); - } - delete iter; - iter = nullptr; -} - -TEST_F(DBBlockCacheTest, IndexAndFilterBlocksStats) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - BlockBasedTableOptions table_options; - table_options.cache_index_and_filter_blocks = true; - LRUCacheOptions co; - // 500 bytes are enough to hold the first two blocks - co.capacity = 500; - co.num_shard_bits = 0; - co.strict_capacity_limit = false; - co.metadata_charge_policy = kDontChargeCacheMetadata; - std::shared_ptr cache = NewLRUCache(co); - table_options.block_cache = cache; - table_options.filter_policy.reset(NewBloomFilterPolicy(20, true)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "longer_key", "val")); - // Create a new table - ASSERT_OK(Flush(1)); - size_t index_bytes_insert = - TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_INSERT); - size_t filter_bytes_insert = - TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_INSERT); - ASSERT_GT(index_bytes_insert, 0); - ASSERT_GT(filter_bytes_insert, 0); - ASSERT_EQ(cache->GetUsage(), index_bytes_insert + filter_bytes_insert); - // set the cache capacity to the current usage - cache->SetCapacity(index_bytes_insert + filter_bytes_insert); - // Note that the second key needs to be no longer than the first one. - // Otherwise the second index block may not fit in cache. - ASSERT_OK(Put(1, "key", "val")); - // Create a new table - ASSERT_OK(Flush(1)); - // cache evicted old index and block entries - ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_INSERT), - index_bytes_insert); - ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_INSERT), - filter_bytes_insert); -} - -#if (defined OS_LINUX || defined OS_WIN) -TEST_F(DBBlockCacheTest, WarmCacheWithDataBlocksDuringFlush) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - - BlockBasedTableOptions table_options; - table_options.block_cache = NewLRUCache(1 << 25, 0, false); - table_options.cache_index_and_filter_blocks = false; - table_options.prepopulate_block_cache = - BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - std::string value(kValueSize, 'a'); - for (size_t i = 1; i <= kNumBlocks; i++) { - ASSERT_OK(Put(std::to_string(i), value)); - ASSERT_OK(Flush()); - ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); - ASSERT_EQ(value, Get(std::to_string(i))); - ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS)); - ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_HIT)); - } - // Verify compaction not counted - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, - /*end=*/nullptr)); - EXPECT_EQ(kNumBlocks, - options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); -} - -// This test cache data, index and filter blocks during flush. -class DBBlockCacheTest1 : public DBTestBase, - public ::testing::WithParamInterface { - public: - const size_t kNumBlocks = 10; - const size_t kValueSize = 100; - DBBlockCacheTest1() : DBTestBase("db_block_cache_test1", true) {} -}; - -INSTANTIATE_TEST_CASE_P(DBBlockCacheTest1, DBBlockCacheTest1, - ::testing::Values(1, 2)); - -TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.disable_auto_compactions = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - - BlockBasedTableOptions table_options; - table_options.block_cache = NewLRUCache(1 << 25, 0, false); - - uint32_t filter_type = GetParam(); - switch (filter_type) { - case 1: // partition_filter - table_options.partition_filters = true; - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); - break; - case 2: // full filter - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); - break; - default: - assert(false); - } - - table_options.cache_index_and_filter_blocks = true; - table_options.prepopulate_block_cache = - BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - std::string value(kValueSize, 'a'); - for (size_t i = 1; i <= kNumBlocks; i++) { - ASSERT_OK(Put(std::to_string(i), value)); - ASSERT_OK(Flush()); - ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); - if (filter_type == 1) { - ASSERT_EQ(2 * i, - options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); - ASSERT_EQ(2 * i, - options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); - } else { - ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); - ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); - } - ASSERT_EQ(value, Get(std::to_string(i))); - - ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS)); - ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS)); - ASSERT_EQ(i * 3, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT)); - if (filter_type == 1) { - ASSERT_EQ(i * 3, - options.statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT)); - } else { - ASSERT_EQ(i * 2, - options.statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT)); - } - ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS)); - } - - // Verify compaction not counted - CompactRangeOptions cro; - // Ensure files are rewritten, not just trivially moved. - cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; - ASSERT_OK(db_->CompactRange(cro, /*begin=*/nullptr, /*end=*/nullptr)); - EXPECT_EQ(kNumBlocks, - options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); - // Index and filter blocks are automatically warmed when the new table file - // is automatically opened at the end of compaction. This is not easily - // disabled so results in the new index and filter blocks being warmed. - if (filter_type == 1) { - EXPECT_EQ(2 * (1 + kNumBlocks), - options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); - EXPECT_EQ(2 * (1 + kNumBlocks), - options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); - } else { - EXPECT_EQ(1 + kNumBlocks, - options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); - EXPECT_EQ(1 + kNumBlocks, - options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); - } -} - -TEST_F(DBBlockCacheTest, DynamicallyWarmCacheDuringFlush) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - - BlockBasedTableOptions table_options; - table_options.block_cache = NewLRUCache(1 << 25, 0, false); - table_options.cache_index_and_filter_blocks = false; - table_options.prepopulate_block_cache = - BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly; - - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - std::string value(kValueSize, 'a'); - - for (size_t i = 1; i <= 5; i++) { - ASSERT_OK(Put(std::to_string(i), value)); - ASSERT_OK(Flush()); - ASSERT_EQ(1, - options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD)); - - ASSERT_EQ(value, Get(std::to_string(i))); - ASSERT_EQ(0, - options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD)); - ASSERT_EQ( - 0, options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS)); - ASSERT_EQ(1, - options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_HIT)); - } - - ASSERT_OK(dbfull()->SetOptions( - {{"block_based_table_factory", "{prepopulate_block_cache=kDisable;}"}})); - - for (size_t i = 6; i <= kNumBlocks; i++) { - ASSERT_OK(Put(std::to_string(i), value)); - ASSERT_OK(Flush()); - ASSERT_EQ(0, - options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD)); - - ASSERT_EQ(value, Get(std::to_string(i))); - ASSERT_EQ(1, - options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD)); - ASSERT_EQ( - 1, options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS)); - ASSERT_EQ(0, - options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_HIT)); - } -} -#endif - -namespace { - -// A mock cache wraps LRUCache, and record how many entries have been -// inserted for each priority. -class MockCache : public LRUCache { - public: - static uint32_t high_pri_insert_count; - static uint32_t low_pri_insert_count; - - MockCache() - : LRUCache((size_t)1 << 25 /*capacity*/, 0 /*num_shard_bits*/, - false /*strict_capacity_limit*/, 0.0 /*high_pri_pool_ratio*/, - 0.0 /*low_pri_pool_ratio*/) {} - - using ShardedCache::Insert; - - Status Insert(const Slice& key, Cache::ObjectPtr value, - const Cache::CacheItemHelper* helper, size_t charge, - Handle** handle, Priority priority) override { - if (priority == Priority::LOW) { - low_pri_insert_count++; - } else { - high_pri_insert_count++; - } - return LRUCache::Insert(key, value, helper, charge, handle, priority); - } -}; - -uint32_t MockCache::high_pri_insert_count = 0; -uint32_t MockCache::low_pri_insert_count = 0; - -} // anonymous namespace - -TEST_F(DBBlockCacheTest, IndexAndFilterBlocksCachePriority) { - for (auto priority : {Cache::Priority::LOW, Cache::Priority::HIGH}) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - BlockBasedTableOptions table_options; - table_options.cache_index_and_filter_blocks = true; - table_options.block_cache.reset(new MockCache()); - table_options.filter_policy.reset(NewBloomFilterPolicy(20)); - table_options.cache_index_and_filter_blocks_with_high_priority = - priority == Cache::Priority::HIGH ? true : false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - MockCache::high_pri_insert_count = 0; - MockCache::low_pri_insert_count = 0; - - // Create a new table. - ASSERT_OK(Put("foo", "value")); - ASSERT_OK(Put("bar", "value")); - ASSERT_OK(Flush()); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - // index/filter blocks added to block cache right after table creation. - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(2, /* only index/filter were added */ - TestGetTickerCount(options, BLOCK_CACHE_ADD)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS)); - if (priority == Cache::Priority::LOW) { - ASSERT_EQ(0u, MockCache::high_pri_insert_count); - ASSERT_EQ(2u, MockCache::low_pri_insert_count); - } else { - ASSERT_EQ(2u, MockCache::high_pri_insert_count); - ASSERT_EQ(0u, MockCache::low_pri_insert_count); - } - - // Access data block. - ASSERT_EQ("value", Get("foo")); - - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(3, /*adding data block*/ - TestGetTickerCount(options, BLOCK_CACHE_ADD)); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS)); - - // Data block should be inserted with low priority. - if (priority == Cache::Priority::LOW) { - ASSERT_EQ(0u, MockCache::high_pri_insert_count); - ASSERT_EQ(3u, MockCache::low_pri_insert_count); - } else { - ASSERT_EQ(2u, MockCache::high_pri_insert_count); - ASSERT_EQ(1u, MockCache::low_pri_insert_count); - } - } -} - -namespace { - -// An LRUCache wrapper that can falsely report "not found" on Lookup. -// This allows us to manipulate BlockBasedTableReader into thinking -// another thread inserted the data in between Lookup and Insert, -// while mostly preserving the LRUCache interface/behavior. -class LookupLiarCache : public CacheWrapper { - int nth_lookup_not_found_ = 0; - - public: - explicit LookupLiarCache(std::shared_ptr target) - : CacheWrapper(std::move(target)) {} - - const char* Name() const override { return "LookupLiarCache"; } - - Handle* Lookup(const Slice& key, const CacheItemHelper* helper = nullptr, - CreateContext* create_context = nullptr, - Priority priority = Priority::LOW, - Statistics* stats = nullptr) override { - if (nth_lookup_not_found_ == 1) { - nth_lookup_not_found_ = 0; - return nullptr; - } - if (nth_lookup_not_found_ > 1) { - --nth_lookup_not_found_; - } - return CacheWrapper::Lookup(key, helper, create_context, priority, stats); - } - - // 1 == next lookup, 2 == after next, etc. - void SetNthLookupNotFound(int n) { nth_lookup_not_found_ = n; } -}; - -} // anonymous namespace - -TEST_F(DBBlockCacheTest, AddRedundantStats) { - const size_t capacity = size_t{1} << 25; - const int num_shard_bits = 0; // 1 shard - int iterations_tested = 0; - for (std::shared_ptr base_cache : - {NewLRUCache(capacity, num_shard_bits), - HyperClockCacheOptions( - capacity, - BlockBasedTableOptions().block_size /*estimated_value_size*/, - num_shard_bits) - .MakeSharedCache()}) { - if (!base_cache) { - // Skip clock cache when not supported - continue; - } - ++iterations_tested; - Options options = CurrentOptions(); - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - - std::shared_ptr cache = - std::make_shared(base_cache); - - BlockBasedTableOptions table_options; - table_options.cache_index_and_filter_blocks = true; - table_options.block_cache = cache; - table_options.filter_policy.reset(NewBloomFilterPolicy(50)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - // Create a new table. - ASSERT_OK(Put("foo", "value")); - ASSERT_OK(Put("bar", "value")); - ASSERT_OK(Flush()); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - // Normal access filter+index+data. - ASSERT_EQ("value", Get("foo")); - - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD)); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD)); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD)); - // -------- - ASSERT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT)); - // -------- - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT)); - - // Againt access filter+index+data, but force redundant load+insert on index - cache->SetNthLookupNotFound(2); - ASSERT_EQ("value", Get("bar")); - - ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD)); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD)); - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD)); - // -------- - ASSERT_EQ(4, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT)); - // -------- - ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT)); - - // Access just filter (with high probability), and force redundant - // load+insert - cache->SetNthLookupNotFound(1); - ASSERT_EQ("NOT_FOUND", Get("this key was not added")); - - EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD)); - EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD)); - EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD)); - // -------- - EXPECT_EQ(5, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT)); - EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT)); - EXPECT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT)); - // -------- - EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT)); - - // Access just data, forcing redundant load+insert - ReadOptions read_options; - std::unique_ptr iter{db_->NewIterator(read_options)}; - cache->SetNthLookupNotFound(1); - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), "bar"); - - EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD)); - EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD)); - EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD)); - // -------- - EXPECT_EQ(6, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT)); - EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT)); - EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT)); - // -------- - EXPECT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT)); - } - EXPECT_GE(iterations_tested, 1); -} - -TEST_F(DBBlockCacheTest, ParanoidFileChecks) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.level0_file_num_compaction_trigger = 2; - options.paranoid_file_checks = true; - BlockBasedTableOptions table_options; - table_options.cache_index_and_filter_blocks = false; - table_options.filter_policy.reset(NewBloomFilterPolicy(20)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "1_key", "val")); - ASSERT_OK(Put(1, "9_key", "val")); - // Create a new table. - ASSERT_OK(Flush(1)); - ASSERT_EQ(1, /* read and cache data block */ - TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - ASSERT_OK(Put(1, "1_key2", "val2")); - ASSERT_OK(Put(1, "9_key2", "val2")); - // Create a new SST file. This will further trigger a compaction - // and generate another file. - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(3, /* Totally 3 files created up to now */ - TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - // After disabling options.paranoid_file_checks. NO further block - // is added after generating a new file. - ASSERT_OK( - dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "false"}})); - - ASSERT_OK(Put(1, "1_key3", "val3")); - ASSERT_OK(Put(1, "9_key3", "val3")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "1_key4", "val4")); - ASSERT_OK(Put(1, "9_key4", "val4")); - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(3, /* Totally 3 files created up to now */ - TestGetTickerCount(options, BLOCK_CACHE_ADD)); -} - -TEST_F(DBBlockCacheTest, CacheCompressionDict) { - const int kNumFiles = 4; - const int kNumEntriesPerFile = 128; - const int kNumBytesPerEntry = 1024; - - // Try all the available libraries that support dictionary compression - std::vector compression_types; - if (Zlib_Supported()) { - compression_types.push_back(kZlibCompression); - } - if (LZ4_Supported()) { - compression_types.push_back(kLZ4Compression); - compression_types.push_back(kLZ4HCCompression); - } - if (ZSTD_Supported()) { - compression_types.push_back(kZSTD); - } else if (ZSTDNotFinal_Supported()) { - compression_types.push_back(kZSTDNotFinalCompression); - } - Random rnd(301); - for (auto compression_type : compression_types) { - Options options = CurrentOptions(); - options.bottommost_compression = compression_type; - options.bottommost_compression_opts.max_dict_bytes = 4096; - options.bottommost_compression_opts.enabled = true; - options.create_if_missing = true; - options.num_levels = 2; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.target_file_size_base = kNumEntriesPerFile * kNumBytesPerEntry; - BlockBasedTableOptions table_options; - table_options.cache_index_and_filter_blocks = true; - table_options.block_cache.reset(new MockCache()); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - RecordCacheCountersForCompressionDict(options); - - for (int i = 0; i < kNumFiles; ++i) { - ASSERT_EQ(i, NumTableFilesAtLevel(0, 0)); - for (int j = 0; j < kNumEntriesPerFile; ++j) { - std::string value = rnd.RandomString(kNumBytesPerEntry); - ASSERT_OK(Put(Key(j * kNumFiles + i), value.c_str())); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(1)); - - // Compression dictionary blocks are preloaded. - CheckCacheCountersForCompressionDict( - options, kNumFiles /* expected_compression_dict_misses */, - 0 /* expected_compression_dict_hits */, - kNumFiles /* expected_compression_dict_inserts */); - - // Seek to a key in a file. It should cause the SST's dictionary meta-block - // to be read. - RecordCacheCounters(options); - RecordCacheCountersForCompressionDict(options); - ReadOptions read_options; - ASSERT_NE("NOT_FOUND", Get(Key(kNumFiles * kNumEntriesPerFile - 1))); - // Two block hits: index and dictionary since they are prefetched - // One block missed/added: data block - CheckCacheCounters(options, 1 /* expected_misses */, 2 /* expected_hits */, - 1 /* expected_inserts */, 0 /* expected_failures */); - CheckCacheCountersForCompressionDict( - options, 0 /* expected_compression_dict_misses */, - 1 /* expected_compression_dict_hits */, - 0 /* expected_compression_dict_inserts */); - } -} - -static void ClearCache(Cache* cache) { - std::deque keys; - Cache::ApplyToAllEntriesOptions opts; - auto callback = [&](const Slice& key, Cache::ObjectPtr, size_t /*charge*/, - const Cache::CacheItemHelper* helper) { - if (helper && helper->role == CacheEntryRole::kMisc) { - // Keep the stats collector - return; - } - keys.push_back(key.ToString()); - }; - cache->ApplyToAllEntries(callback, opts); - for (auto& k : keys) { - cache->Erase(k); - } -} - -TEST_F(DBBlockCacheTest, CacheEntryRoleStats) { - const size_t capacity = size_t{1} << 25; - int iterations_tested = 0; - for (bool partition : {false, true}) { - for (std::shared_ptr cache : - {NewLRUCache(capacity), - HyperClockCacheOptions( - capacity, - BlockBasedTableOptions().block_size /*estimated_value_size*/) - .MakeSharedCache()}) { - ++iterations_tested; - - Options options = CurrentOptions(); - SetTimeElapseOnlySleepOnReopen(&options); - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.max_open_files = 13; - options.table_cache_numshardbits = 0; - // If this wakes up, it could interfere with test - options.stats_dump_period_sec = 0; - - BlockBasedTableOptions table_options; - table_options.block_cache = cache; - table_options.cache_index_and_filter_blocks = true; - table_options.filter_policy.reset(NewBloomFilterPolicy(50)); - if (partition) { - table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; - table_options.partition_filters = true; - } - table_options.metadata_cache_options.top_level_index_pinning = - PinningTier::kNone; - table_options.metadata_cache_options.partition_pinning = - PinningTier::kNone; - table_options.metadata_cache_options.unpartitioned_pinning = - PinningTier::kNone; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - // Create a new table. - ASSERT_OK(Put("foo", "value")); - ASSERT_OK(Put("bar", "value")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("zfoo", "value")); - ASSERT_OK(Put("zbar", "value")); - ASSERT_OK(Flush()); - - ASSERT_EQ(2, NumTableFilesAtLevel(0)); - - // Fresh cache - ClearCache(cache.get()); - - std::array expected{}; - // For CacheEntryStatsCollector - expected[static_cast(CacheEntryRole::kMisc)] = 1; - EXPECT_EQ(expected, GetCacheEntryRoleCountsBg()); - - std::array prev_expected = expected; - - // First access only filters - ASSERT_EQ("NOT_FOUND", Get("different from any key added")); - expected[static_cast(CacheEntryRole::kFilterBlock)] += 2; - if (partition) { - expected[static_cast(CacheEntryRole::kFilterMetaBlock)] += 2; - } - // Within some time window, we will get cached entry stats - EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg()); - // Not enough to force a miss - env_->MockSleepForSeconds(45); - EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg()); - // Enough to force a miss - env_->MockSleepForSeconds(601); - EXPECT_EQ(expected, GetCacheEntryRoleCountsBg()); - - // Now access index and data block - ASSERT_EQ("value", Get("foo")); - expected[static_cast(CacheEntryRole::kIndexBlock)]++; - if (partition) { - // top-level - expected[static_cast(CacheEntryRole::kIndexBlock)]++; - } - expected[static_cast(CacheEntryRole::kDataBlock)]++; - // Enough to force a miss - env_->MockSleepForSeconds(601); - // But inject a simulated long scan so that we need a longer - // interval to force a miss next time. - SyncPoint::GetInstance()->SetCallBack( - "CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries", - [this](void*) { - // To spend no more than 0.2% of time scanning, we would need - // interval of at least 10000s - env_->MockSleepForSeconds(20); - }); - SyncPoint::GetInstance()->EnableProcessing(); - EXPECT_EQ(expected, GetCacheEntryRoleCountsBg()); - prev_expected = expected; - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - - // The same for other file - ASSERT_EQ("value", Get("zfoo")); - expected[static_cast(CacheEntryRole::kIndexBlock)]++; - if (partition) { - // top-level - expected[static_cast(CacheEntryRole::kIndexBlock)]++; - } - expected[static_cast(CacheEntryRole::kDataBlock)]++; - // Because of the simulated long scan, this is not enough to force - // a miss - env_->MockSleepForSeconds(601); - EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg()); - // But this is enough - env_->MockSleepForSeconds(10000); - EXPECT_EQ(expected, GetCacheEntryRoleCountsBg()); - prev_expected = expected; - - // Also check the GetProperty interface - std::map values; - ASSERT_TRUE( - db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, &values)); - - for (size_t i = 0; i < kNumCacheEntryRoles; ++i) { - auto role = static_cast(i); - EXPECT_EQ(std::to_string(expected[i]), - values[BlockCacheEntryStatsMapKeys::EntryCount(role)]); - } - - // Add one for kWriteBuffer - { - WriteBufferManager wbm(size_t{1} << 20, cache); - wbm.ReserveMem(1024); - expected[static_cast(CacheEntryRole::kWriteBuffer)]++; - // Now we check that the GetProperty interface is more agressive about - // re-scanning stats, but not totally aggressive. - // Within some time window, we will get cached entry stats - env_->MockSleepForSeconds(1); - EXPECT_EQ(std::to_string(prev_expected[static_cast( - CacheEntryRole::kWriteBuffer)]), - values[BlockCacheEntryStatsMapKeys::EntryCount( - CacheEntryRole::kWriteBuffer)]); - // Not enough for a "background" miss but enough for a "foreground" miss - env_->MockSleepForSeconds(45); - - ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, - &values)); - EXPECT_EQ( - std::to_string( - expected[static_cast(CacheEntryRole::kWriteBuffer)]), - values[BlockCacheEntryStatsMapKeys::EntryCount( - CacheEntryRole::kWriteBuffer)]); - } - prev_expected = expected; - - // With collector pinned in cache, we should be able to hit - // even if the cache is full - ClearCache(cache.get()); - Cache::Handle* h = nullptr; - if (strcmp(cache->Name(), "LRUCache") == 0) { - ASSERT_OK(cache->Insert("Fill-it-up", nullptr, &kNoopCacheItemHelper, - capacity + 1, &h, Cache::Priority::HIGH)); - } else { - // For ClockCache we use a 16-byte key. - ASSERT_OK(cache->Insert("Fill-it-up-xxxxx", nullptr, - &kNoopCacheItemHelper, capacity + 1, &h, - Cache::Priority::HIGH)); - } - ASSERT_GT(cache->GetUsage(), cache->GetCapacity()); - expected = {}; - // For CacheEntryStatsCollector - expected[static_cast(CacheEntryRole::kMisc)] = 1; - // For Fill-it-up - expected[static_cast(CacheEntryRole::kMisc)]++; - // Still able to hit on saved stats - EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg()); - // Enough to force a miss - env_->MockSleepForSeconds(1000); - EXPECT_EQ(expected, GetCacheEntryRoleCountsBg()); - - cache->Release(h); - - // Now we test that the DB mutex is not held during scans, for the ways - // we know how to (possibly) trigger them. Without a better good way to - // check this, we simply inject an acquire & release of the DB mutex - // deep in the stat collection code. If we were already holding the - // mutex, that is UB that would at least be found by TSAN. - int scan_count = 0; - SyncPoint::GetInstance()->SetCallBack( - "CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries", - [this, &scan_count](void*) { - dbfull()->TEST_LockMutex(); - dbfull()->TEST_UnlockMutex(); - ++scan_count; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - // Different things that might trigger a scan, with mock sleeps to - // force a miss. - env_->MockSleepForSeconds(10000); - dbfull()->DumpStats(); - ASSERT_EQ(scan_count, 1); - - env_->MockSleepForSeconds(60); - ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kFastBlockCacheEntryStats, - &values)); - ASSERT_EQ(scan_count, 1); - ASSERT_TRUE( - db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, &values)); - ASSERT_EQ(scan_count, 2); - - env_->MockSleepForSeconds(10000); - ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kFastBlockCacheEntryStats, - &values)); - ASSERT_EQ(scan_count, 3); - - env_->MockSleepForSeconds(60); - std::string value_str; - ASSERT_TRUE(db_->GetProperty(DB::Properties::kFastBlockCacheEntryStats, - &value_str)); - ASSERT_EQ(scan_count, 3); - ASSERT_TRUE( - db_->GetProperty(DB::Properties::kBlockCacheEntryStats, &value_str)); - ASSERT_EQ(scan_count, 4); - - env_->MockSleepForSeconds(10000); - ASSERT_TRUE(db_->GetProperty(DB::Properties::kFastBlockCacheEntryStats, - &value_str)); - ASSERT_EQ(scan_count, 5); - - ASSERT_TRUE(db_->GetProperty(DB::Properties::kCFStats, &value_str)); - // To match historical speed, querying this property no longer triggers - // a scan, even if results are old. But periodic dump stats should keep - // things reasonably updated. - ASSERT_EQ(scan_count, /*unchanged*/ 5); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - } - EXPECT_GE(iterations_tested, 1); - } -} - -namespace { - -void DummyFillCache(Cache& cache, size_t entry_size, - std::vector>& handles) { - // fprintf(stderr, "Entry size: %zu\n", entry_size); - handles.clear(); - cache.EraseUnRefEntries(); - void* fake_value = &cache; - size_t capacity = cache.GetCapacity(); - OffsetableCacheKey ck{"abc", "abc", 42}; - for (size_t my_usage = 0; my_usage < capacity;) { - size_t charge = std::min(entry_size, capacity - my_usage); - Cache::Handle* handle; - Status st = cache.Insert(ck.WithOffset(my_usage).AsSlice(), fake_value, - &kNoopCacheItemHelper, charge, &handle); - ASSERT_OK(st); - handles.emplace_back(&cache, handle); - my_usage += charge; - } -} - -class CountingLogger : public Logger { - public: - ~CountingLogger() override {} - using Logger::Logv; - void Logv(const InfoLogLevel log_level, const char* format, - va_list /*ap*/) override { - if (std::strstr(format, "HyperClockCache") == nullptr) { - // Not a match - return; - } - // static StderrLogger debug; - // debug.Logv(log_level, format, ap); - if (log_level == InfoLogLevel::INFO_LEVEL) { - ++info_count_; - } else if (log_level == InfoLogLevel::WARN_LEVEL) { - ++warn_count_; - } else if (log_level == InfoLogLevel::ERROR_LEVEL) { - ++error_count_; - } - } - - std::array PopCounts() { - std::array rv{{info_count_, warn_count_, error_count_}}; - info_count_ = warn_count_ = error_count_ = 0; - return rv; - } - - private: - int info_count_{}; - int warn_count_{}; - int error_count_{}; -}; - -} // namespace - -TEST_F(DBBlockCacheTest, HyperClockCacheReportProblems) { - size_t capacity = 1024 * 1024; - size_t value_size_est = 8 * 1024; - HyperClockCacheOptions hcc_opts{capacity, value_size_est}; - hcc_opts.num_shard_bits = 2; // 4 shards - hcc_opts.metadata_charge_policy = kDontChargeCacheMetadata; - std::shared_ptr cache = hcc_opts.MakeSharedCache(); - std::shared_ptr logger = std::make_shared(); - - auto table_options = GetTableOptions(); - auto options = GetOptions(table_options); - table_options.block_cache = cache; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.info_log = logger; - // Going to sample more directly - options.stats_dump_period_sec = 0; - Reopen(options); - - std::vector> handles; - - // Clear anything from DB startup - logger->PopCounts(); - - // Fill cache based on expected size and check that when we - // don't report anything relevant in periodic stats dump - DummyFillCache(*cache, value_size_est, handles); - dbfull()->DumpStats(); - EXPECT_EQ(logger->PopCounts(), (std::array{{0, 0, 0}})); - - // Same, within reasonable bounds - DummyFillCache(*cache, value_size_est - value_size_est / 4, handles); - dbfull()->DumpStats(); - EXPECT_EQ(logger->PopCounts(), (std::array{{0, 0, 0}})); - - DummyFillCache(*cache, value_size_est + value_size_est / 3, handles); - dbfull()->DumpStats(); - EXPECT_EQ(logger->PopCounts(), (std::array{{0, 0, 0}})); - - // Estimate too high (value size too low) eventually reports ERROR - DummyFillCache(*cache, value_size_est / 2, handles); - dbfull()->DumpStats(); - EXPECT_EQ(logger->PopCounts(), (std::array{{0, 1, 0}})); - - DummyFillCache(*cache, value_size_est / 3, handles); - dbfull()->DumpStats(); - EXPECT_EQ(logger->PopCounts(), (std::array{{0, 0, 1}})); - - // Estimate too low (value size too high) starts with INFO - // and is only WARNING in the worst case - DummyFillCache(*cache, value_size_est * 2, handles); - dbfull()->DumpStats(); - EXPECT_EQ(logger->PopCounts(), (std::array{{1, 0, 0}})); - - DummyFillCache(*cache, value_size_est * 3, handles); - dbfull()->DumpStats(); - EXPECT_EQ(logger->PopCounts(), (std::array{{0, 1, 0}})); - - DummyFillCache(*cache, value_size_est * 20, handles); - dbfull()->DumpStats(); - EXPECT_EQ(logger->PopCounts(), (std::array{{0, 1, 0}})); -} - - -class DBBlockCacheKeyTest - : public DBTestBase, - public testing::WithParamInterface> { - public: - DBBlockCacheKeyTest() - : DBTestBase("db_block_cache_test", /*env_do_fsync=*/false) {} - - void SetUp() override { - use_compressed_cache_ = std::get<0>(GetParam()); - exclude_file_numbers_ = std::get<1>(GetParam()); - } - - bool use_compressed_cache_; - bool exclude_file_numbers_; -}; - -// Disable LinkFile so that we can physically copy a DB using Checkpoint. -// Disable file GetUniqueId to enable stable cache keys. -class StableCacheKeyTestFS : public FaultInjectionTestFS { - public: - explicit StableCacheKeyTestFS(const std::shared_ptr& base) - : FaultInjectionTestFS(base) { - SetFailGetUniqueId(true); - } - - virtual ~StableCacheKeyTestFS() override {} - - IOStatus LinkFile(const std::string&, const std::string&, const IOOptions&, - IODebugContext*) override { - return IOStatus::NotSupported("Disabled"); - } -}; - -TEST_P(DBBlockCacheKeyTest, StableCacheKeys) { - std::shared_ptr test_fs{ - new StableCacheKeyTestFS(env_->GetFileSystem())}; - std::unique_ptr test_env{ - new CompositeEnvWrapper(env_, test_fs)}; - - Options options = CurrentOptions(); - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.env = test_env.get(); - - // Corrupting the table properties corrupts the unique id. - // Ignore the unique id recorded in the manifest. - options.verify_sst_unique_id_in_manifest = false; - - BlockBasedTableOptions table_options; - - int key_count = 0; - uint64_t expected_stat = 0; - - std::function verify_stats; - table_options.cache_index_and_filter_blocks = true; - table_options.block_cache = NewLRUCache(1 << 25, 0, false); - verify_stats = [&options, &expected_stat] { - ASSERT_EQ(expected_stat, - options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); - ASSERT_EQ(expected_stat, - options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); - ASSERT_EQ(expected_stat, - options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); - }; - - table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - CreateAndReopenWithCF({"koko"}, options); - - if (exclude_file_numbers_) { - // Simulate something like old behavior without file numbers in properties. - // This is a "control" side of the test that also ensures safely degraded - // behavior on old files. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BlockBasedTableBuilder::BlockBasedTableBuilder:PreSetupBaseCacheKey", - [&](void* arg) { - TableProperties* props = reinterpret_cast(arg); - props->orig_file_number = 0; - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - } - - std::function perform_gets = [&key_count, &expected_stat, this]() { - if (exclude_file_numbers_) { - // No cache key reuse should happen, because we can't rely on current - // file number being stable - expected_stat += key_count; - } else { - // Cache keys should be stable - expected_stat = key_count; - } - for (int i = 0; i < key_count; ++i) { - ASSERT_EQ(Get(1, Key(i)), "abc"); - } - }; - - // Ordinary SST files with same session id - const std::string something_compressible(500U, 'x'); - for (int i = 0; i < 2; ++i) { - ASSERT_OK(Put(1, Key(key_count), "abc")); - ASSERT_OK(Put(1, Key(key_count) + "a", something_compressible)); - ASSERT_OK(Flush(1)); - ++key_count; - } - - // Save an export of those ordinary SST files for later - std::string export_files_dir = dbname_ + "/exported"; - ExportImportFilesMetaData* metadata_ptr_ = nullptr; - Checkpoint* checkpoint; - ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); - ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir, - &metadata_ptr_)); - ASSERT_NE(metadata_ptr_, nullptr); - delete checkpoint; - checkpoint = nullptr; - - // External SST files with same session id - SstFileWriter sst_file_writer(EnvOptions(), options); - std::vector external; - for (int i = 0; i < 2; ++i) { - std::string f = dbname_ + "/external" + std::to_string(i) + ".sst"; - external.push_back(f); - ASSERT_OK(sst_file_writer.Open(f)); - ASSERT_OK(sst_file_writer.Put(Key(key_count), "abc")); - ASSERT_OK( - sst_file_writer.Put(Key(key_count) + "a", something_compressible)); - ++key_count; - ExternalSstFileInfo external_info; - ASSERT_OK(sst_file_writer.Finish(&external_info)); - IngestExternalFileOptions ingest_opts; - ASSERT_OK(db_->IngestExternalFile(handles_[1], {f}, ingest_opts)); - } - - perform_gets(); - verify_stats(); - - // Make sure we can cache hit after re-open - ReopenWithColumnFamilies({"default", "koko"}, options); - - perform_gets(); - verify_stats(); - - // Make sure we can cache hit even on a full copy of the DB. Using - // StableCacheKeyTestFS, Checkpoint will resort to full copy not hard link. - // (Checkpoint not available in LITE mode to test this.) - auto db_copy_name = dbname_ + "-copy"; - ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); - ASSERT_OK(checkpoint->CreateCheckpoint(db_copy_name)); - delete checkpoint; - - Close(); - Destroy(options); - - // Switch to the DB copy - SaveAndRestore save_dbname(&dbname_, db_copy_name); - ReopenWithColumnFamilies({"default", "koko"}, options); - - perform_gets(); - verify_stats(); - - // And ensure that re-importing + ingesting the same files into a - // different DB uses same cache keys - DestroyAndReopen(options); - - ColumnFamilyHandle* cfh = nullptr; - ASSERT_OK(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", - ImportColumnFamilyOptions(), - *metadata_ptr_, &cfh)); - ASSERT_NE(cfh, nullptr); - delete cfh; - cfh = nullptr; - delete metadata_ptr_; - metadata_ptr_ = nullptr; - - ASSERT_OK(DestroyDB(export_files_dir, options)); - - ReopenWithColumnFamilies({"default", "yoyo"}, options); - - IngestExternalFileOptions ingest_opts; - ASSERT_OK(db_->IngestExternalFile(handles_[1], {external}, ingest_opts)); - - perform_gets(); - verify_stats(); - - Close(); - Destroy(options); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -class CacheKeyTest : public testing::Test { - public: - CacheKey GetBaseCacheKey() { - CacheKey rv = GetOffsetableCacheKey(0, /*min file_number*/ 1).WithOffset(0); - // Correct for file_number_ == 1 - *reinterpret_cast(&rv) ^= ReverseBits(uint64_t{1}); - return rv; - } - CacheKey GetCacheKey(uint64_t session_counter, uint64_t file_number, - uint64_t offset) { - OffsetableCacheKey offsetable = - GetOffsetableCacheKey(session_counter, file_number); - // * 4 to counteract optimization that strips lower 2 bits in encoding - // the offset in BlockBasedTable::GetCacheKey (which we prefer to include - // in unit tests to maximize functional coverage). - EXPECT_GE(offset * 4, offset); // no overflow - return BlockBasedTable::GetCacheKey(offsetable, - BlockHandle(offset * 4, /*size*/ 5)); - } - - protected: - OffsetableCacheKey GetOffsetableCacheKey(uint64_t session_counter, - uint64_t file_number) { - // Like SemiStructuredUniqueIdGen::GenerateNext - tp_.db_session_id = EncodeSessionId(base_session_upper_, - base_session_lower_ ^ session_counter); - tp_.db_id = std::to_string(db_id_); - tp_.orig_file_number = file_number; - bool is_stable; - std::string cur_session_id = ""; // ignored - uint64_t cur_file_number = 42; // ignored - OffsetableCacheKey rv; - BlockBasedTable::SetupBaseCacheKey(&tp_, cur_session_id, cur_file_number, - &rv, &is_stable); - EXPECT_TRUE(is_stable); - EXPECT_TRUE(!rv.IsEmpty()); - // BEGIN some assertions in relation to SST unique IDs - std::string external_unique_id_str; - EXPECT_OK(GetUniqueIdFromTableProperties(tp_, &external_unique_id_str)); - UniqueId64x2 sst_unique_id = {}; - EXPECT_OK(DecodeUniqueIdBytes(external_unique_id_str, &sst_unique_id)); - ExternalUniqueIdToInternal(&sst_unique_id); - OffsetableCacheKey ock = - OffsetableCacheKey::FromInternalUniqueId(&sst_unique_id); - EXPECT_EQ(rv.WithOffset(0).AsSlice(), ock.WithOffset(0).AsSlice()); - EXPECT_EQ(ock.ToInternalUniqueId(), sst_unique_id); - // END some assertions in relation to SST unique IDs - return rv; - } - - TableProperties tp_; - uint64_t base_session_upper_ = 0; - uint64_t base_session_lower_ = 0; - uint64_t db_id_ = 0; -}; - -TEST_F(CacheKeyTest, DBImplSessionIdStructure) { - // We have to generate our own session IDs for simulation purposes in other - // tests. Here we verify that the DBImpl implementation seems to match - // our construction here, by using lowest XORed-in bits for "session - // counter." - std::string session_id1 = DBImpl::GenerateDbSessionId(/*env*/ nullptr); - std::string session_id2 = DBImpl::GenerateDbSessionId(/*env*/ nullptr); - uint64_t upper1, upper2, lower1, lower2; - ASSERT_OK(DecodeSessionId(session_id1, &upper1, &lower1)); - ASSERT_OK(DecodeSessionId(session_id2, &upper2, &lower2)); - // Because generated in same process - ASSERT_EQ(upper1, upper2); - // Unless we generate > 4 billion session IDs in this process... - ASSERT_EQ(Upper32of64(lower1), Upper32of64(lower2)); - // But they must be different somewhere - ASSERT_NE(Lower32of64(lower1), Lower32of64(lower2)); -} - -namespace { -// Deconstruct cache key, based on knowledge of implementation details. -void DeconstructNonemptyCacheKey(const CacheKey& key, uint64_t* file_num_etc64, - uint64_t* offset_etc64) { - *file_num_etc64 = *reinterpret_cast(key.AsSlice().data()); - *offset_etc64 = *reinterpret_cast(key.AsSlice().data() + 8); - assert(*file_num_etc64 != 0); - if (*offset_etc64 == 0) { - std::swap(*file_num_etc64, *offset_etc64); - } - assert(*offset_etc64 != 0); -} - -// Make a bit mask of 0 to 64 bits -uint64_t MakeMask64(int bits) { - if (bits >= 64) { - return uint64_t{0} - 1; - } else { - return (uint64_t{1} << bits) - 1; - } -} - -// See CacheKeyTest::Encodings -struct CacheKeyDecoder { - // Inputs - uint64_t base_file_num_etc64, base_offset_etc64; - int session_counter_bits, file_number_bits, offset_bits; - - // Derived - uint64_t session_counter_mask, file_number_mask, offset_mask; - - // Outputs - uint64_t decoded_session_counter, decoded_file_num, decoded_offset; - - void SetBaseCacheKey(const CacheKey& base) { - DeconstructNonemptyCacheKey(base, &base_file_num_etc64, &base_offset_etc64); - } - - void SetRanges(int _session_counter_bits, int _file_number_bits, - int _offset_bits) { - session_counter_bits = _session_counter_bits; - session_counter_mask = MakeMask64(session_counter_bits); - file_number_bits = _file_number_bits; - file_number_mask = MakeMask64(file_number_bits); - offset_bits = _offset_bits; - offset_mask = MakeMask64(offset_bits); - } - - void Decode(const CacheKey& key) { - uint64_t file_num_etc64, offset_etc64; - DeconstructNonemptyCacheKey(key, &file_num_etc64, &offset_etc64); - - // First decode session counter - if (offset_bits + session_counter_bits <= 64) { - // fully recoverable from offset_etc64 - decoded_session_counter = - ReverseBits((offset_etc64 ^ base_offset_etc64)) & - session_counter_mask; - } else if (file_number_bits + session_counter_bits <= 64) { - // fully recoverable from file_num_etc64 - decoded_session_counter = DownwardInvolution( - (file_num_etc64 ^ base_file_num_etc64) & session_counter_mask); - } else { - // Need to combine parts from each word. - // Piece1 will contain some correct prefix of the bottom bits of - // session counter. - uint64_t piece1 = - ReverseBits((offset_etc64 ^ base_offset_etc64) & ~offset_mask); - int piece1_bits = 64 - offset_bits; - // Piece2 will contain involuded bits that we can combine with piece1 - // to infer rest of session counter - int piece2_bits = std::min(64 - file_number_bits, 64 - piece1_bits); - ASSERT_LT(piece2_bits, 64); - uint64_t piece2_mask = MakeMask64(piece2_bits); - uint64_t piece2 = (file_num_etc64 ^ base_file_num_etc64) & piece2_mask; - - // Cancel out the part of piece2 that we can infer from piece1 - // (DownwardInvolution distributes over xor) - piece2 ^= DownwardInvolution(piece1) & piece2_mask; - - // Now we need to solve for the unknown original bits in higher - // positions than piece1 provides. We use Gaussian elimination - // because we know that a piece2_bits X piece2_bits submatrix of - // the matrix underlying DownwardInvolution times the vector of - // unknown original bits equals piece2. - // - // Build an augmented row matrix for that submatrix, built column by - // column. - std::array aug_rows{}; - for (int i = 0; i < piece2_bits; ++i) { // over columns - uint64_t col_i = DownwardInvolution(uint64_t{1} << piece1_bits << i); - ASSERT_NE(col_i & 1U, 0); - for (int j = 0; j < piece2_bits; ++j) { // over rows - aug_rows[j] |= (col_i & 1U) << i; - col_i >>= 1; - } - } - // Augment with right hand side - for (int j = 0; j < piece2_bits; ++j) { // over rows - aug_rows[j] |= (piece2 & 1U) << piece2_bits; - piece2 >>= 1; - } - // Run Gaussian elimination - for (int i = 0; i < piece2_bits; ++i) { // over columns - // Find a row that can be used to cancel others - uint64_t canceller = 0; - // Note: Rows 0 through i-1 contain 1s in columns already eliminated - for (int j = i; j < piece2_bits; ++j) { // over rows - if (aug_rows[j] & (uint64_t{1} << i)) { - // Swap into appropriate row - std::swap(aug_rows[i], aug_rows[j]); - // Keep a handy copy for row reductions - canceller = aug_rows[i]; - break; - } - } - ASSERT_NE(canceller, 0); - for (int j = 0; j < piece2_bits; ++j) { // over rows - if (i != j && ((aug_rows[j] >> i) & 1) != 0) { - // Row reduction - aug_rows[j] ^= canceller; - } - } - } - // Extract result - decoded_session_counter = piece1; - for (int j = 0; j < piece2_bits; ++j) { // over rows - ASSERT_EQ(aug_rows[j] & piece2_mask, uint64_t{1} << j); - decoded_session_counter |= aug_rows[j] >> piece2_bits << piece1_bits - << j; - } - } - - decoded_offset = - offset_etc64 ^ base_offset_etc64 ^ ReverseBits(decoded_session_counter); - - decoded_file_num = ReverseBits(file_num_etc64 ^ base_file_num_etc64 ^ - DownwardInvolution(decoded_session_counter)); - } -}; -} // anonymous namespace - -TEST_F(CacheKeyTest, Encodings) { - // This test primarily verifies this claim from cache_key.cc: - // // In fact, if DB ids were not involved, we would be guaranteed unique - // // cache keys for files generated in a single process until total bits for - // // biggest session_id_counter, orig_file_number, and offset_in_file - // // reach 128 bits. - // - // To demonstrate this, CacheKeyDecoder can reconstruct the structured inputs - // to the cache key when provided an output cache key, the unstructured - // inputs, and bounds on the structured inputs. - // - // See OffsetableCacheKey comments in cache_key.cc. - - // We are going to randomly initialize some values that *should* not affect - // result - Random64 r{std::random_device{}()}; - - CacheKeyDecoder decoder; - db_id_ = r.Next(); - base_session_upper_ = r.Next(); - base_session_lower_ = r.Next(); - if (base_session_lower_ == 0) { - base_session_lower_ = 1; - } - - decoder.SetBaseCacheKey(GetBaseCacheKey()); - - // Loop over configurations and test those - for (int session_counter_bits = 0; session_counter_bits <= 64; - ++session_counter_bits) { - for (int file_number_bits = 1; file_number_bits <= 64; ++file_number_bits) { - // 62 bits max because unoptimized offset will be 64 bits in that case - for (int offset_bits = 0; offset_bits <= 62; ++offset_bits) { - if (session_counter_bits + file_number_bits + offset_bits > 128) { - break; - } - - decoder.SetRanges(session_counter_bits, file_number_bits, offset_bits); - - uint64_t session_counter = r.Next() & decoder.session_counter_mask; - uint64_t file_number = r.Next() & decoder.file_number_mask; - if (file_number == 0) { - // Minimum - file_number = 1; - } - uint64_t offset = r.Next() & decoder.offset_mask; - decoder.Decode(GetCacheKey(session_counter, file_number, offset)); - - EXPECT_EQ(decoder.decoded_session_counter, session_counter); - EXPECT_EQ(decoder.decoded_file_num, file_number); - EXPECT_EQ(decoder.decoded_offset, offset); - } - } - } -} - -INSTANTIATE_TEST_CASE_P(DBBlockCacheKeyTest, DBBlockCacheKeyTest, - ::testing::Combine(::testing::Bool(), - ::testing::Bool())); - -class DBBlockCachePinningTest - : public DBTestBase, - public testing::WithParamInterface< - std::tuple> { - public: - DBBlockCachePinningTest() - : DBTestBase("db_block_cache_test", /*env_do_fsync=*/false) {} - - void SetUp() override { - partition_index_and_filters_ = std::get<0>(GetParam()); - top_level_index_pinning_ = std::get<1>(GetParam()); - partition_pinning_ = std::get<2>(GetParam()); - unpartitioned_pinning_ = std::get<3>(GetParam()); - } - - bool partition_index_and_filters_; - PinningTier top_level_index_pinning_; - PinningTier partition_pinning_; - PinningTier unpartitioned_pinning_; -}; - -TEST_P(DBBlockCachePinningTest, TwoLevelDB) { - // Creates one file in L0 and one file in L1. Both files have enough data that - // their index and filter blocks are partitioned. The L1 file will also have - // a compression dictionary (those are trained only during compaction), which - // must be unpartitioned. - const int kKeySize = 32; - const int kBlockSize = 128; - const int kNumBlocksPerFile = 128; - const int kNumKeysPerFile = kBlockSize * kNumBlocksPerFile / kKeySize; - - Options options = CurrentOptions(); - // `kNoCompression` makes the unit test more portable. But it relies on the - // current behavior of persisting/accessing dictionary even when there's no - // (de)compression happening, which seems fairly likely to change over time. - options.compression = kNoCompression; - options.compression_opts.max_dict_bytes = 4 << 10; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - BlockBasedTableOptions table_options; - table_options.block_cache = NewLRUCache(1 << 20 /* capacity */); - table_options.block_size = kBlockSize; - table_options.metadata_block_size = kBlockSize; - table_options.cache_index_and_filter_blocks = true; - table_options.metadata_cache_options.top_level_index_pinning = - top_level_index_pinning_; - table_options.metadata_cache_options.partition_pinning = partition_pinning_; - table_options.metadata_cache_options.unpartitioned_pinning = - unpartitioned_pinning_; - table_options.filter_policy.reset( - NewBloomFilterPolicy(10 /* bits_per_key */)); - if (partition_index_and_filters_) { - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - table_options.partition_filters = true; - } - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - - Random rnd(301); - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK(Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kKeySize))); - } - ASSERT_OK(Flush()); - if (i == 0) { - // Prevent trivial move so file will be rewritten with dictionary and - // reopened with L1's pinning settings. - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - } - } - - // Clear all unpinned blocks so unpinned blocks will show up as cache misses - // when reading a key from a file. - table_options.block_cache->EraseUnRefEntries(); - - // Get base cache values - uint64_t filter_misses = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); - uint64_t index_misses = TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS); - uint64_t compression_dict_misses = - TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS); - - // Read a key from the L0 file - Get(Key(kNumKeysPerFile)); - uint64_t expected_filter_misses = filter_misses; - uint64_t expected_index_misses = index_misses; - uint64_t expected_compression_dict_misses = compression_dict_misses; - if (partition_index_and_filters_) { - if (top_level_index_pinning_ == PinningTier::kNone) { - ++expected_filter_misses; - ++expected_index_misses; - } - if (partition_pinning_ == PinningTier::kNone) { - ++expected_filter_misses; - ++expected_index_misses; - } - } else { - if (unpartitioned_pinning_ == PinningTier::kNone) { - ++expected_filter_misses; - ++expected_index_misses; - } - } - if (unpartitioned_pinning_ == PinningTier::kNone) { - ++expected_compression_dict_misses; - } - ASSERT_EQ(expected_filter_misses, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(expected_index_misses, - TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); - ASSERT_EQ(expected_compression_dict_misses, - TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS)); - - // Clear all unpinned blocks so unpinned blocks will show up as cache misses - // when reading a key from a file. - table_options.block_cache->EraseUnRefEntries(); - - // Read a key from the L1 file - Get(Key(0)); - if (partition_index_and_filters_) { - if (top_level_index_pinning_ == PinningTier::kNone || - top_level_index_pinning_ == PinningTier::kFlushedAndSimilar) { - ++expected_filter_misses; - ++expected_index_misses; - } - if (partition_pinning_ == PinningTier::kNone || - partition_pinning_ == PinningTier::kFlushedAndSimilar) { - ++expected_filter_misses; - ++expected_index_misses; - } - } else { - if (unpartitioned_pinning_ == PinningTier::kNone || - unpartitioned_pinning_ == PinningTier::kFlushedAndSimilar) { - ++expected_filter_misses; - ++expected_index_misses; - } - } - if (unpartitioned_pinning_ == PinningTier::kNone || - unpartitioned_pinning_ == PinningTier::kFlushedAndSimilar) { - ++expected_compression_dict_misses; - } - ASSERT_EQ(expected_filter_misses, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(expected_index_misses, - TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); - ASSERT_EQ(expected_compression_dict_misses, - TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS)); -} - -INSTANTIATE_TEST_CASE_P( - DBBlockCachePinningTest, DBBlockCachePinningTest, - ::testing::Combine( - ::testing::Bool(), - ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar, - PinningTier::kAll), - ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar, - PinningTier::kAll), - ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar, - PinningTier::kAll))); - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc deleted file mode 100644 index 352343c16..000000000 --- a/db/db_bloom_filter_test.cc +++ /dev/null @@ -1,3473 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include -#include -#include -#include - -#include "cache/cache_entry_roles.h" -#include "cache/cache_reservation_manager.h" -#include "db/db_test_util.h" -#include "options/options_helper.h" -#include "port/stack_trace.h" -#include "rocksdb/advanced_options.h" -#include "rocksdb/convenience.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/perf_context.h" -#include "rocksdb/statistics.h" -#include "rocksdb/table.h" -#include "table/block_based/block_based_table_reader.h" -#include "table/block_based/filter_policy_internal.h" -#include "table/format.h" -#include "test_util/testutil.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -namespace { -std::shared_ptr Create(double bits_per_key, - const std::string& name) { - return BloomLikeFilterPolicy::Create(name, bits_per_key); -} -const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kClassName(); -const std::string kFastLocalBloom = - test::FastLocalBloomFilterPolicy::kClassName(); -const std::string kStandard128Ribbon = - test::Standard128RibbonFilterPolicy::kClassName(); -const std::string kAutoBloom = BloomFilterPolicy::kClassName(); -const std::string kAutoRibbon = RibbonFilterPolicy::kClassName(); -} // anonymous namespace - -// DB tests related to bloom filter. - -class DBBloomFilterTest : public DBTestBase { - public: - DBBloomFilterTest() - : DBTestBase("db_bloom_filter_test", /*env_do_fsync=*/true) {} -}; - -class DBBloomFilterTestWithParam - : public DBTestBase, - public testing::WithParamInterface< - std::tuple> { - // public testing::WithParamInterface { - protected: - std::string bfp_impl_; - bool partition_filters_; - uint32_t format_version_; - - public: - DBBloomFilterTestWithParam() - : DBTestBase("db_bloom_filter_tests", /*env_do_fsync=*/true) {} - - ~DBBloomFilterTestWithParam() override {} - - void SetUp() override { - bfp_impl_ = std::get<0>(GetParam()); - partition_filters_ = std::get<1>(GetParam()); - format_version_ = std::get<2>(GetParam()); - } -}; - -class DBBloomFilterTestDefFormatVersion : public DBBloomFilterTestWithParam {}; - -class SliceTransformLimitedDomainGeneric : public SliceTransform { - const char* Name() const override { - return "SliceTransformLimitedDomainGeneric"; - } - - Slice Transform(const Slice& src) const override { - return Slice(src.data(), 5); - } - - bool InDomain(const Slice& src) const override { - // prefix will be x???? - return src.size() >= 5; - } - - bool InRange(const Slice& dst) const override { - // prefix will be x???? - return dst.size() == 5; - } -}; - -// KeyMayExist can lead to a few false positives, but not false negatives. -// To make test deterministic, use a much larger number of bits per key-20 than -// bits in the key, so that false positives are eliminated -TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) { - do { - ReadOptions ropts; - std::string value; - anon::OptionsOverride options_override; - options_override.filter_policy = Create(20, bfp_impl_); - options_override.partition_filters = partition_filters_; - options_override.metadata_block_size = 32; - options_override.full_block_cache = true; - Options options = CurrentOptions(options_override); - if (partition_filters_) { - auto* table_options = - options.table_factory->GetOptions(); - if (table_options != nullptr && - table_options->index_type != - BlockBasedTableOptions::kTwoLevelIndexSearch) { - // In the current implementation partitioned filters depend on - // partitioned indexes - continue; - } - } - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); - - ASSERT_OK(Put(1, "a", "b")); - bool value_found = false; - ASSERT_TRUE( - db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found)); - ASSERT_TRUE(value_found); - ASSERT_EQ("b", value); - - ASSERT_OK(Flush(1)); - value.clear(); - - uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS); - uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - ASSERT_TRUE( - db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found)); - ASSERT_TRUE(!value_found); - // assert that no new files were opened and no new blocks were - // read into block cache. - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - ASSERT_OK(Delete(1, "a")); - - numopen = TestGetTickerCount(options, NO_FILE_OPENS); - cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1], - true /* disallow trivial move */)); - - numopen = TestGetTickerCount(options, NO_FILE_OPENS); - cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - ASSERT_OK(Delete(1, "c")); - - numopen = TestGetTickerCount(options, NO_FILE_OPENS); - cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "c", &value)); - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - // KeyMayExist function only checks data in block caches, which is not used - // by plain table format. - } while ( - ChangeOptions(kSkipPlainTable | kSkipHashIndex | kSkipFIFOCompaction)); -} - -TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) { - for (bool partition_filters : {true, false}) { - Options options = last_options_; - options.prefix_extractor = - std::make_shared(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - get_perf_context()->EnablePerLevelPerfContext(); - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10)); - if (partition_filters) { - bbto.partition_filters = true; - bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } - bbto.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - - WriteOptions wo; - ReadOptions ro; - FlushOptions fo; - fo.wait = true; - std::string value; - - ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo")); - ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2")); - ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar")); - - ASSERT_OK(dbfull()->Flush(fo)); - - ASSERT_EQ("foo", Get("barbarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ( - 0, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - ASSERT_EQ("foo2", Get("barbarbar2")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ( - 0, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - ASSERT_EQ("NOT_FOUND", Get("barbarbar3")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ( - 0, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - - ASSERT_EQ("NOT_FOUND", Get("barfoofoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ( - 1, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - - ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); - ASSERT_EQ( - 2, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - - ro.total_order_seek = true; - // NOTE: total_order_seek no longer affects Get() - ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ( - 3, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - - // No bloom on extractor changed - ASSERT_OK(db_->SetOptions({{"prefix_extractor", "capped:10"}})); - ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ( - 3, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - - // No bloom on extractor changed, after re-open - options.prefix_extractor.reset(NewCappedPrefixTransform(10)); - Reopen(options); - ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ( - 3, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - - get_perf_context()->Reset(); - } -} - -TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) { - for (bool partition_filters : {true, false}) { - Options options = last_options_; - options.prefix_extractor.reset(NewFixedPrefixTransform(8)); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - get_perf_context()->EnablePerLevelPerfContext(); - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10)); - if (partition_filters) { - bbto.partition_filters = true; - bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } - bbto.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - - WriteOptions wo; - ReadOptions ro; - FlushOptions fo; - fo.wait = true; - std::string value; - - ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo")); - ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2")); - ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar")); - - ASSERT_OK(dbfull()->Flush(fo)); - - ASSERT_EQ("foo", Get("barbarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ("foo2", Get("barbarbar2")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ("NOT_FOUND", Get("barbarbar3")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - - ASSERT_EQ("NOT_FOUND", Get("barfoofoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - - ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); - - ro.total_order_seek = true; - // NOTE: total_order_seek no longer affects Get() - ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ( - 3, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - - // No bloom on extractor changed - ASSERT_OK(db_->SetOptions({{"prefix_extractor", "capped:10"}})); - ASSERT_EQ("NOT_FOUND", Get("foobarbar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ( - 3, - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - - get_perf_context()->Reset(); - } -} - -TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { - for (bool partition_filters : {true, false}) { - Options options = last_options_; - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - get_perf_context()->EnablePerLevelPerfContext(); - - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10)); - bbto.whole_key_filtering = false; - if (partition_filters) { - bbto.partition_filters = true; - bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - - WriteOptions wo; - ReadOptions ro; - FlushOptions fo; - fo.wait = true; - std::string value; - - ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); - // Needs insert some keys to make sure files are not filtered out by key - // ranges. - ASSERT_OK(dbfull()->Put(wo, "aaa", "")); - ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - ASSERT_OK(dbfull()->Flush(fo)); - - Reopen(options); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - - // Reopen with whole key filtering enabled and prefix extractor - // NULL. Bloom filter should be off for both of whole key and - // prefix bloom. - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options.prefix_extractor.reset(); - Reopen(options); - - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - // Write DB with only full key filtering. - ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); - // Needs insert some keys to make sure files are not filtered out by key - // ranges. - ASSERT_OK(dbfull()->Put(wo, "aaa", "")); - ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - // Reopen with both of whole key off and prefix extractor enabled. - // Still no bloom filter should be used. - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - bbto.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - - // Try to create a DB with mixed files: - ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); - // Needs insert some keys to make sure files are not filtered out by key - // ranges. - ASSERT_OK(dbfull()->Put(wo, "aaa", "")); - ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - options.prefix_extractor.reset(); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - - // Try to create a DB with mixed files. - ASSERT_OK(dbfull()->Put(wo, "barfoo", "bar")); - // In this case needs insert some keys to make sure files are - // not filtered out by key ranges. - ASSERT_OK(dbfull()->Put(wo, "aaa", "")); - ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - ASSERT_OK(Flush()); - - // Now we have two files: - // File 1: An older file with prefix bloom. - // File 2: A newer file with whole bloom filter. - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); - ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); - - // Reopen with the same setting: only whole key is used - Reopen(options); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 5); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 6); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); - ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); - - // Restart with both filters are allowed - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); - // File 1 will has it filtered out. - // File 2 will not, as prefix `foo` exists in the file. - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 8); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 10); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); - ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); - - // Restart with only prefix bloom is allowed. - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - bbto.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); - ASSERT_EQ("NOT_FOUND", Get("bar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); - ASSERT_EQ("foo", Get("foobar")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); - ASSERT_EQ("bar", Get("barfoo")); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); - uint64_t bloom_filter_useful_all_levels = 0; - for (auto& kv : (*(get_perf_context()->level_to_perf_context))) { - if (kv.second.bloom_filter_useful > 0) { - bloom_filter_useful_all_levels += kv.second.bloom_filter_useful; - } - } - ASSERT_EQ(12, bloom_filter_useful_all_levels); - get_perf_context()->Reset(); - } -} - -TEST_P(DBBloomFilterTestWithParam, BloomFilter) { - do { - Options options = CurrentOptions(); - env_->count_random_reads_ = true; - options.env = env_; - // ChangeCompactOptions() only changes compaction style, which does not - // trigger reset of table_factory - BlockBasedTableOptions table_options; - table_options.no_block_cache = true; - table_options.filter_policy = Create(10, bfp_impl_); - table_options.partition_filters = partition_filters_; - if (partition_filters_) { - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } - table_options.format_version = format_version_; - if (format_version_ >= 4) { - // value delta encoding challenged more with index interval > 1 - table_options.index_block_restart_interval = 8; - } - table_options.metadata_block_size = 32; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - CreateAndReopenWithCF({"pikachu"}, options); - - // Populate multiple layers - const int N = 10000; - for (int i = 0; i < N; i++) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - Compact(1, "a", "z"); - for (int i = 0; i < N; i += 100) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - ASSERT_OK(Flush(1)); - - // Prevent auto compactions triggered by seeks - env_->delay_sstable_sync_.store(true, std::memory_order_release); - - // Lookup present keys. Should rarely read from small sstable. - env_->random_read_counter_.Reset(); - for (int i = 0; i < N; i++) { - ASSERT_EQ(Key(i), Get(1, Key(i))); - } - int reads = env_->random_read_counter_.Read(); - fprintf(stderr, "%d present => %d reads\n", N, reads); - ASSERT_GE(reads, N); - if (partition_filters_) { - // Without block cache, we read an extra partition filter per each - // level*read and a partition index per each read - ASSERT_LE(reads, 4 * N + 2 * N / 100); - } else { - ASSERT_LE(reads, N + 2 * N / 100); - } - - // Lookup present keys. Should rarely read from either sstable. - env_->random_read_counter_.Reset(); - for (int i = 0; i < N; i++) { - ASSERT_EQ("NOT_FOUND", Get(1, Key(i) + ".missing")); - } - reads = env_->random_read_counter_.Read(); - fprintf(stderr, "%d missing => %d reads\n", N, reads); - if (partition_filters_) { - // With partitioned filter we read one extra filter per level per each - // missed read. - ASSERT_LE(reads, 2 * N + 3 * N / 100); - } else { - ASSERT_LE(reads, 3 * N / 100); - } - - // Sanity check some table properties - std::map props; - ASSERT_TRUE(db_->GetMapProperty( - handles_[1], DB::Properties::kAggregatedTableProperties, &props)); - uint64_t nkeys = N + N / 100; - uint64_t filter_size = ParseUint64(props["filter_size"]); - EXPECT_LE(filter_size, - (partition_filters_ ? 12 : 11) * nkeys / /*bits / byte*/ 8); - if (bfp_impl_ == kAutoRibbon) { - // Sometimes using Ribbon filter which is more space-efficient - EXPECT_GE(filter_size, 7 * nkeys / /*bits / byte*/ 8); - } else { - // Always Bloom - EXPECT_GE(filter_size, 10 * nkeys / /*bits / byte*/ 8); - } - - uint64_t num_filter_entries = ParseUint64(props["num_filter_entries"]); - EXPECT_EQ(num_filter_entries, nkeys); - - env_->delay_sstable_sync_.store(false, std::memory_order_release); - Close(); - } while (ChangeCompactOptions()); -} - -namespace { - -class AlwaysTrueBitsBuilder : public FilterBitsBuilder { - public: - void AddKey(const Slice&) override {} - size_t EstimateEntriesAdded() override { return 0U; } - Slice Finish(std::unique_ptr* /* buf */) override { - // Interpreted as "always true" filter (0 probes over 1 byte of - // payload, 5 bytes metadata) - return Slice("\0\0\0\0\0\0", 6); - } - using FilterBitsBuilder::Finish; - size_t ApproximateNumEntries(size_t) override { return SIZE_MAX; } -}; - -class AlwaysTrueFilterPolicy : public ReadOnlyBuiltinFilterPolicy { - public: - explicit AlwaysTrueFilterPolicy(bool skip) : skip_(skip) {} - - FilterBitsBuilder* GetBuilderWithContext( - const FilterBuildingContext&) const override { - if (skip_) { - return nullptr; - } else { - return new AlwaysTrueBitsBuilder(); - } - } - - private: - bool skip_; -}; - -} // anonymous namespace - -TEST_P(DBBloomFilterTestWithParam, SkipFilterOnEssentiallyZeroBpk) { - constexpr int maxKey = 10; - auto PutFn = [&]() { - int i; - // Put - for (i = 0; i < maxKey; i++) { - ASSERT_OK(Put(Key(i), Key(i))); - } - Flush(); - }; - auto GetFn = [&]() { - int i; - // Get OK - for (i = 0; i < maxKey; i++) { - ASSERT_EQ(Key(i), Get(Key(i))); - } - // Get NotFound - for (; i < maxKey * 2; i++) { - ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); - } - }; - auto PutAndGetFn = [&]() { - PutFn(); - GetFn(); - }; - std::map props; - const auto& kAggTableProps = DB::Properties::kAggregatedTableProperties; - - Options options = CurrentOptions(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - BlockBasedTableOptions table_options; - table_options.partition_filters = partition_filters_; - if (partition_filters_) { - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } - table_options.format_version = format_version_; - - // Test 1: bits per key < 0.5 means skip filters -> no filter - // constructed or read. - table_options.filter_policy = Create(0.4, bfp_impl_); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - PutAndGetFn(); - - // Verify no filter access nor contruction - EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), 0); - EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), 0); - - props.clear(); - ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props)); - EXPECT_EQ(props["filter_size"], "0"); - - // Test 2: use custom API to skip filters -> no filter constructed - // or read. - table_options.filter_policy.reset( - new AlwaysTrueFilterPolicy(/* skip */ true)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - PutAndGetFn(); - - // Verify no filter access nor construction - EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), 0); - EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), 0); - - props.clear(); - ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props)); - EXPECT_EQ(props["filter_size"], "0"); - - // Control test: using an actual filter with 100% FP rate -> the filter - // is constructed and checked on read. - table_options.filter_policy.reset( - new AlwaysTrueFilterPolicy(/* skip */ false)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - PutAndGetFn(); - - // Verify filter is accessed (and constructed) - EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), - maxKey * 2); - EXPECT_EQ( - TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), - maxKey); - props.clear(); - ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props)); - EXPECT_NE(props["filter_size"], "0"); - - // Test 3 (options test): Able to read existing filters with longstanding - // generated options file entry `filter_policy=rocksdb.BuiltinBloomFilter` - ASSERT_OK(FilterPolicy::CreateFromString(ConfigOptions(), - "rocksdb.BuiltinBloomFilter", - &table_options.filter_policy)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - GetFn(); - - // Verify filter is accessed - EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), - maxKey * 2); - EXPECT_EQ( - TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), - maxKey); - - // But new filters are not generated (configuration details unknown) - DestroyAndReopen(options); - PutAndGetFn(); - - // Verify no filter access nor construction - EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), 0); - EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), 0); - - props.clear(); - ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props)); - EXPECT_EQ(props["filter_size"], "0"); -} - -#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -INSTANTIATE_TEST_CASE_P( - FormatDef, DBBloomFilterTestDefFormatVersion, - ::testing::Values( - std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion), - std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion), - std::make_tuple(kAutoRibbon, false, test::kDefaultFormatVersion))); - -INSTANTIATE_TEST_CASE_P( - FormatDef, DBBloomFilterTestWithParam, - ::testing::Values( - std::make_tuple(kAutoBloom, true, test::kDefaultFormatVersion), - std::make_tuple(kAutoBloom, false, test::kDefaultFormatVersion), - std::make_tuple(kAutoRibbon, false, test::kDefaultFormatVersion))); - -INSTANTIATE_TEST_CASE_P( - FormatLatest, DBBloomFilterTestWithParam, - ::testing::Values(std::make_tuple(kAutoBloom, true, kLatestFormatVersion), - std::make_tuple(kAutoBloom, false, kLatestFormatVersion), - std::make_tuple(kAutoRibbon, false, - kLatestFormatVersion))); -#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) - -TEST_F(DBBloomFilterTest, BloomFilterRate) { - while (ChangeFilterOptions()) { - Options options = CurrentOptions(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - get_perf_context()->EnablePerLevelPerfContext(); - CreateAndReopenWithCF({"pikachu"}, options); - - const int maxKey = 10000; - for (int i = 0; i < maxKey; i++) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - // Add a large key to make the file contain wide range - ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); - Flush(1); - - // Check if they can be found - for (int i = 0; i < maxKey; i++) { - ASSERT_EQ(Key(i), Get(1, Key(i))); - } - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - - // Check if filter is useful - for (int i = 0; i < maxKey; i++) { - ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); - } - ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey * 0.98); - ASSERT_GE( - (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful, - maxKey * 0.98); - get_perf_context()->Reset(); - } -} - -namespace { -struct CompatibilityConfig { - std::shared_ptr policy; - bool partitioned; - uint32_t format_version; - - void SetInTableOptions(BlockBasedTableOptions* table_options) { - table_options->filter_policy = policy; - table_options->partition_filters = partitioned; - if (partitioned) { - table_options->index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } else { - table_options->index_type = - BlockBasedTableOptions::IndexType::kBinarySearch; - } - table_options->format_version = format_version; - } -}; -// High bits per key -> almost no FPs -std::shared_ptr kCompatibilityBloomPolicy{ - NewBloomFilterPolicy(20)}; -// bloom_before_level=-1 -> always use Ribbon -std::shared_ptr kCompatibilityRibbonPolicy{ - NewRibbonFilterPolicy(20, -1)}; - -std::vector kCompatibilityConfigs = { - {kCompatibilityBloomPolicy, false, BlockBasedTableOptions().format_version}, - {kCompatibilityBloomPolicy, true, BlockBasedTableOptions().format_version}, - {kCompatibilityBloomPolicy, false, /* legacy Bloom */ 4U}, - {kCompatibilityRibbonPolicy, false, - BlockBasedTableOptions().format_version}, - {kCompatibilityRibbonPolicy, true, BlockBasedTableOptions().format_version}, -}; -} // anonymous namespace - -TEST_F(DBBloomFilterTest, BloomFilterCompatibility) { - Options options = CurrentOptions(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.level0_file_num_compaction_trigger = - static_cast(kCompatibilityConfigs.size()) + 1; - options.max_open_files = -1; - - Close(); - - // Create one file for each kind of filter. Each file covers a distinct key - // range. - for (size_t i = 0; i < kCompatibilityConfigs.size(); ++i) { - BlockBasedTableOptions table_options; - kCompatibilityConfigs[i].SetInTableOptions(&table_options); - ASSERT_TRUE(table_options.filter_policy != nullptr); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - - std::string prefix = std::to_string(i) + "_"; - ASSERT_OK(Put(prefix + "A", "val")); - ASSERT_OK(Put(prefix + "Z", "val")); - ASSERT_OK(Flush()); - } - - // Test filter is used between each pair of {reader,writer} configurations, - // because any built-in FilterPolicy should be able to read filters from any - // other built-in FilterPolicy - for (size_t i = 0; i < kCompatibilityConfigs.size(); ++i) { - BlockBasedTableOptions table_options; - kCompatibilityConfigs[i].SetInTableOptions(&table_options); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - for (size_t j = 0; j < kCompatibilityConfigs.size(); ++j) { - std::string prefix = std::to_string(j) + "_"; - ASSERT_EQ("val", Get(prefix + "A")); // Filter positive - ASSERT_EQ("val", Get(prefix + "Z")); // Filter positive - // Filter negative, with high probability - ASSERT_EQ("NOT_FOUND", Get(prefix + "Q")); - EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), - 2); - EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL), 1); - } - } -} - -// To align with the type of hash entry being reserved in implementation. -using FilterConstructionReserveMemoryHash = uint64_t; - -class ChargeFilterConstructionTestWithParam - : public DBTestBase, - public testing::WithParamInterface> { - public: - ChargeFilterConstructionTestWithParam() - : DBTestBase("db_bloom_filter_tests", - /*env_do_fsync=*/true), - num_key_(0), - charge_filter_construction_(std::get<0>(GetParam())), - policy_(std::get<1>(GetParam())), - partition_filters_(std::get<2>(GetParam())), - detect_filter_construct_corruption_(std::get<3>(GetParam())) { - if (charge_filter_construction_ == - CacheEntryRoleOptions::Decision::kDisabled || - policy_ == kLegacyBloom) { - // For these cases, we only interested in whether filter construction - // cache charging happens instead of its accuracy. Therefore we don't - // need many keys. - num_key_ = 5; - } else if (partition_filters_) { - // For PartitionFilter case, since we set - // table_options.metadata_block_size big enough such that each partition - // trigger at least 1 dummy entry reservation each for hash entries and - // final filter, we need a large number of keys to ensure we have at least - // two partitions. - num_key_ = 18 * - CacheReservationManagerImpl< - CacheEntryRole::kFilterConstruction>::GetDummyEntrySize() / - sizeof(FilterConstructionReserveMemoryHash); - } else if (policy_ == kFastLocalBloom) { - // For Bloom Filter + FullFilter case, since we design the num_key_ to - // make hash entry cache charging be a multiple of dummy entries, the - // correct behavior of charging final filter on top of it will trigger at - // least another dummy entry insertion. Therefore we can assert that - // behavior and we don't need a large number of keys to verify we - // indeed charge the final filter for in cache, even though final - // filter is a lot smaller than hash entries. - num_key_ = 1 * - CacheReservationManagerImpl< - CacheEntryRole::kFilterConstruction>::GetDummyEntrySize() / - sizeof(FilterConstructionReserveMemoryHash); - } else { - // For Ribbon Filter + FullFilter case, we need a large enough number of - // keys so that charging final filter after releasing the hash entries - // reservation will trigger at least another dummy entry (or equivalently - // to saying, causing another peak in cache charging) as banding - // reservation might not be a multiple of dummy entry. - num_key_ = 12 * - CacheReservationManagerImpl< - CacheEntryRole::kFilterConstruction>::GetDummyEntrySize() / - sizeof(FilterConstructionReserveMemoryHash); - } - } - - BlockBasedTableOptions GetBlockBasedTableOptions() { - BlockBasedTableOptions table_options; - - // We set cache capacity big enough to prevent cache full for convenience in - // calculation. - constexpr std::size_t kCacheCapacity = 100 * 1024 * 1024; - - table_options.cache_usage_options.options_overrides.insert( - {CacheEntryRole::kFilterConstruction, - {/*.charged = */ charge_filter_construction_}}); - table_options.filter_policy = Create(10, policy_); - table_options.partition_filters = partition_filters_; - if (table_options.partition_filters) { - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - // We set table_options.metadata_block_size big enough so that each - // partition trigger at least 1 dummy entry insertion each for hash - // entries and final filter. - table_options.metadata_block_size = 409000; - } - table_options.detect_filter_construct_corruption = - detect_filter_construct_corruption_; - - LRUCacheOptions lo; - lo.capacity = kCacheCapacity; - lo.num_shard_bits = 0; // 2^0 shard - lo.strict_capacity_limit = true; - cache_ = std::make_shared< - TargetCacheChargeTrackingCache>( - (NewLRUCache(lo))); - table_options.block_cache = cache_; - - return table_options; - } - - std::size_t GetNumKey() { return num_key_; } - - CacheEntryRoleOptions::Decision ChargeFilterConstructMemory() { - return charge_filter_construction_; - } - - std::string GetFilterPolicy() { return policy_; } - - bool PartitionFilters() { return partition_filters_; } - - std::shared_ptr< - TargetCacheChargeTrackingCache> - GetCache() { - return cache_; - } - - private: - std::size_t num_key_; - CacheEntryRoleOptions::Decision charge_filter_construction_; - std::string policy_; - bool partition_filters_; - std::shared_ptr< - TargetCacheChargeTrackingCache> - cache_; - bool detect_filter_construct_corruption_; -}; - -INSTANTIATE_TEST_CASE_P( - ChargeFilterConstructionTestWithParam, - ChargeFilterConstructionTestWithParam, - ::testing::Values( - std::make_tuple(CacheEntryRoleOptions::Decision::kDisabled, - kFastLocalBloom, false, false), - - std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, - kFastLocalBloom, false, false), - std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, - kFastLocalBloom, false, true), - std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, - kFastLocalBloom, true, false), - std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, - kFastLocalBloom, true, true), - - std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, - kStandard128Ribbon, false, false), - std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, - kStandard128Ribbon, false, true), - std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, - kStandard128Ribbon, true, false), - std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, - kStandard128Ribbon, true, true), - - std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, kLegacyBloom, - false, false))); - -// TODO: Speed up this test, and reduce disk space usage (~700MB) -// The current test inserts many keys (on the scale of dummy entry size) -// in order to make small memory user (e.g, final filter, partitioned hash -// entries/filter/banding) , which is proportional to the number of -// keys, big enough so that its cache charging triggers dummy entry insertion -// and becomes observable in the test. -// -// However, inserting that many keys slows down this test and leaves future -// developers an opportunity to speed it up. -// -// Possible approaches & challenges: -// 1. Use sync point during cache charging of filter construction -// -// Benefit: It does not rely on triggering dummy entry insertion -// but the sync point to verify small memory user is charged correctly. -// -// Challenge: this approach is intrusive. -// -// 2. Make dummy entry size configurable and set it small in the test -// -// Benefit: It increases the precision of cache charging and therefore -// small memory usage can still trigger insertion of dummy entry. -// -// Challenge: change CacheReservationManager related APIs and a hack -// might be needed to control the size of dummmy entry of -// CacheReservationManager used in filter construction for testing -// since CacheReservationManager is not exposed at the high level. -// -TEST_P(ChargeFilterConstructionTestWithParam, Basic) { - Options options = CurrentOptions(); - // We set write_buffer_size big enough so that in the case where there is - // filter construction cache charging, flush won't be triggered before we - // manually trigger it for clean testing - options.write_buffer_size = 640 << 20; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - std::shared_ptr< - TargetCacheChargeTrackingCache> - cache = GetCache(); - options.create_if_missing = true; - // Disable auto compaction to prevent its unexpected side effect - // to the number of keys per partition designed by us in the test - options.disable_auto_compactions = true; - DestroyAndReopen(options); - int num_key = static_cast(GetNumKey()); - for (int i = 0; i < num_key; i++) { - ASSERT_OK(Put(Key(i), Key(i))); - } - - ASSERT_EQ(cache->GetChargedCacheIncrementSum(), 0) - << "Flush was triggered too early in the test case with filter " - "construction cache charging - please make sure no flush triggered " - "during the key insertions above"; - - ASSERT_OK(Flush()); - - bool charge_filter_construction = (ChargeFilterConstructMemory() == - CacheEntryRoleOptions::Decision::kEnabled); - std::string policy = GetFilterPolicy(); - bool partition_filters = PartitionFilters(); - bool detect_filter_construct_corruption = - table_options.detect_filter_construct_corruption; - - std::deque filter_construction_cache_res_peaks = - cache->GetChargedCachePeaks(); - std::size_t filter_construction_cache_res_increments_sum = - cache->GetChargedCacheIncrementSum(); - - if (!charge_filter_construction) { - EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0); - return; - } - - if (policy == kLegacyBloom) { - EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0) - << "There shouldn't be filter construction cache charging as this " - "feature does not support kLegacyBloom"; - return; - } - - const std::size_t kDummyEntrySize = CacheReservationManagerImpl< - CacheEntryRole::kFilterConstruction>::GetDummyEntrySize(); - - const std::size_t predicted_hash_entries_cache_res = - num_key * sizeof(FilterConstructionReserveMemoryHash); - ASSERT_EQ(predicted_hash_entries_cache_res % kDummyEntrySize, 0) - << "It's by this test's design that predicted_hash_entries_cache_res is " - "a multipe of dummy entry"; - - const std::size_t predicted_hash_entries_cache_res_dummy_entry_num = - predicted_hash_entries_cache_res / kDummyEntrySize; - const std::size_t predicted_final_filter_cache_res = - static_cast( - std::ceil(1.0 * predicted_hash_entries_cache_res_dummy_entry_num / 6 * - (policy == kStandard128Ribbon ? 0.7 : 1))) * - kDummyEntrySize; - const std::size_t predicted_banding_cache_res = - static_cast( - std::ceil(predicted_hash_entries_cache_res_dummy_entry_num * 2.5)) * - kDummyEntrySize; - - if (policy == kFastLocalBloom) { - /* kFastLocalBloom + FullFilter - * p0 - * / \ - * b / \ - * / \ - * / \ - * 0/ \ - * hash entries = b - 0, final filter = p0 - b - * p0 = hash entries + final filter - * - * The test is designed in a way such that the reservation for b is a - * multiple of dummy entries so that reservation for (p0 - b) - * will trigger at least another dummy entry insertion. - * - * kFastLocalBloom + FullFilter + - * detect_filter_construct_corruption - * The peak p0 stays the same as - * (kFastLocalBloom + FullFilter) but just lasts - * longer since we release hash entries reservation later. - * - * kFastLocalBloom + PartitionedFilter - * p1 - * / \ - * p0 b'/ \ - * / \ / \ - * b / \ / \ - * / \ / \ - * / a \ - * 0/ \ - * partitioned hash entries1 = b - 0, partitioned hash entries1 = b' - a - * parittioned final filter1 = p0 - b, parittioned final filter2 = p1 - b' - * - * (increment p0 - 0) + (increment p1 - a) - * = partitioned hash entries1 + partitioned hash entries2 - * + parittioned final filter1 + parittioned final filter2 - * = hash entries + final filter - * - * kFastLocalBloom + PartitionedFilter + - * detect_filter_construct_corruption - * The peak p0, p1 stay the same as - * (kFastLocalBloom + PartitionedFilter) but just - * last longer since we release hash entries reservation later. - * - */ - if (!partition_filters) { - EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1) - << "Filter construction cache charging should have only 1 peak in " - "case: kFastLocalBloom + FullFilter"; - std::size_t filter_construction_cache_res_peak = - filter_construction_cache_res_peaks[0]; - EXPECT_GT(filter_construction_cache_res_peak, - predicted_hash_entries_cache_res) - << "The testing number of hash entries is designed to make hash " - "entries cache charging be multiples of dummy entries" - " so the correct behavior of charging final filter on top of it" - " should've triggered at least another dummy entry insertion"; - - std::size_t predicted_filter_construction_cache_res_peak = - predicted_hash_entries_cache_res + predicted_final_filter_cache_res; - EXPECT_GE(filter_construction_cache_res_peak, - predicted_filter_construction_cache_res_peak * 0.9); - EXPECT_LE(filter_construction_cache_res_peak, - predicted_filter_construction_cache_res_peak * 1.1); - return; - } else { - EXPECT_GE(filter_construction_cache_res_peaks.size(), 2) - << "Filter construction cache charging should have multiple peaks " - "in case: kFastLocalBloom + " - "PartitionedFilter"; - std::size_t predicted_filter_construction_cache_res_increments_sum = - predicted_hash_entries_cache_res + predicted_final_filter_cache_res; - EXPECT_GE(filter_construction_cache_res_increments_sum, - predicted_filter_construction_cache_res_increments_sum * 0.9); - EXPECT_LE(filter_construction_cache_res_increments_sum, - predicted_filter_construction_cache_res_increments_sum * 1.1); - return; - } - } - - if (policy == kStandard128Ribbon) { - /* kStandard128Ribbon + FullFilter - * p0 - * / \ p1 - * / \/\ - * b / b' \ - * / \ - * 0/ \ - * hash entries = b - 0, banding = p0 - b, final filter = p1 - b' - * p0 = hash entries + banding - * - * The test is designed in a way such that the reservation for (p1 - b') - * will trigger at least another dummy entry insertion - * (or equivalently to saying, creating another peak). - * - * kStandard128Ribbon + FullFilter + - * detect_filter_construct_corruption - * - * new p0 - * / \ - * / \ - * pre p0 \ - * / \ - * / \ - * b / \ - * / \ - * 0/ \ - * hash entries = b - 0, banding = pre p0 - b, - * final filter = new p0 - pre p0 - * new p0 = hash entries + banding + final filter - * - * The previous p0 will no longer be a peak since under - * detect_filter_construct_corruption == true, we do not release hash - * entries reserveration (like p0 - b' previously) until after final filter - * creation and post-verification - * - * kStandard128Ribbon + PartitionedFilter - * p3 - * p0 /\ p4 - * / \ p1 / \ /\ - * / \/\ b''/ a' \ - * b / b' \ / \ - * / \ / \ - * 0/ a \ - * partitioned hash entries1 = b - 0, partitioned hash entries2 = b'' - a - * partitioned banding1 = p0 - b, partitioned banding2 = p3 - b'' - * parittioned final filter1 = p1 - b',parittioned final filter2 = p4 - a' - * - * (increment p0 - 0) + (increment p1 - b') - * + (increment p3 - a) + (increment p4 - a') - * = partitioned hash entries1 + partitioned hash entries2 - * + parittioned banding1 + parittioned banding2 - * + parittioned final filter1 + parittioned final filter2 - * = hash entries + banding + final filter - * - * kStandard128Ribbon + PartitionedFilter + - * detect_filter_construct_corruption - * - * new p3 - * / \ - * pre p3 \ - * new p0 / \ - * / \ / \ - * pre p0 \ / \ - * / \ b'/ \ - * / \ / \ - * b / \ / \ - * / \a \ - * 0/ \ - * partitioned hash entries1 = b - 0, partitioned hash entries2 = b' - a - * partitioned banding1 = pre p0 - b, partitioned banding2 = pre p3 - b' - * parittioned final filter1 = new p0 - pre p0, - * parittioned final filter2 = new p3 - pre p3 - * - * The previous p0 and p3 will no longer be a peak since under - * detect_filter_construct_corruption == true, we do not release hash - * entries reserveration (like p0 - b', p3 - a' previously) until after - * parittioned final filter creation and post-verification - * - * However, increments sum stay the same as shown below: - * (increment new p0 - 0) + (increment new p3 - a) - * = partitioned hash entries1 + partitioned hash entries2 - * + parittioned banding1 + parittioned banding2 - * + parittioned final filter1 + parittioned final filter2 - * = hash entries + banding + final filter - * - */ - if (!partition_filters) { - ASSERT_GE( - std::floor( - 1.0 * predicted_final_filter_cache_res / - CacheReservationManagerImpl< - CacheEntryRole::kFilterConstruction>::GetDummyEntrySize()), - 1) - << "Final filter cache charging too small for this test - please " - "increase the number of keys"; - if (!detect_filter_construct_corruption) { - EXPECT_EQ(filter_construction_cache_res_peaks.size(), 2) - << "Filter construction cache charging should have 2 peaks in " - "case: kStandard128Ribbon + " - "FullFilter. " - "The second peak is resulted from charging the final filter " - "after " - "decreasing the hash entry reservation since the testing final " - "filter reservation is designed to be at least 1 dummy entry " - "size"; - - std::size_t filter_construction_cache_res_peak = - filter_construction_cache_res_peaks[0]; - std::size_t predicted_filter_construction_cache_res_peak = - predicted_hash_entries_cache_res + predicted_banding_cache_res; - EXPECT_GE(filter_construction_cache_res_peak, - predicted_filter_construction_cache_res_peak * 0.9); - EXPECT_LE(filter_construction_cache_res_peak, - predicted_filter_construction_cache_res_peak * 1.1); - } else { - EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1) - << "Filter construction cache charging should have 1 peaks in " - "case: kStandard128Ribbon + FullFilter " - "+ detect_filter_construct_corruption. " - "The previous second peak now disappears since we don't " - "decrease the hash entry reservation" - "until after final filter reservation and post-verification"; - - std::size_t filter_construction_cache_res_peak = - filter_construction_cache_res_peaks[0]; - std::size_t predicted_filter_construction_cache_res_peak = - predicted_hash_entries_cache_res + predicted_banding_cache_res + - predicted_final_filter_cache_res; - EXPECT_GE(filter_construction_cache_res_peak, - predicted_filter_construction_cache_res_peak * 0.9); - EXPECT_LE(filter_construction_cache_res_peak, - predicted_filter_construction_cache_res_peak * 1.1); - } - return; - } else { - if (!detect_filter_construct_corruption) { - EXPECT_GE(filter_construction_cache_res_peaks.size(), 3) - << "Filter construction cache charging should have more than 3 " - "peaks " - "in case: kStandard128Ribbon + " - "PartitionedFilter"; - } else { - EXPECT_GE(filter_construction_cache_res_peaks.size(), 2) - << "Filter construction cache charging should have more than 2 " - "peaks " - "in case: kStandard128Ribbon + " - "PartitionedFilter + detect_filter_construct_corruption"; - } - std::size_t predicted_filter_construction_cache_res_increments_sum = - predicted_hash_entries_cache_res + predicted_banding_cache_res + - predicted_final_filter_cache_res; - EXPECT_GE(filter_construction_cache_res_increments_sum, - predicted_filter_construction_cache_res_increments_sum * 0.9); - EXPECT_LE(filter_construction_cache_res_increments_sum, - predicted_filter_construction_cache_res_increments_sum * 1.1); - return; - } - } -} - -class DBFilterConstructionCorruptionTestWithParam - : public DBTestBase, - public testing::WithParamInterface< - std::tuple> { - public: - DBFilterConstructionCorruptionTestWithParam() - : DBTestBase("db_bloom_filter_tests", - /*env_do_fsync=*/true) {} - - BlockBasedTableOptions GetBlockBasedTableOptions() { - BlockBasedTableOptions table_options; - table_options.detect_filter_construct_corruption = std::get<0>(GetParam()); - table_options.filter_policy = Create(10, std::get<1>(GetParam())); - table_options.partition_filters = std::get<2>(GetParam()); - if (table_options.partition_filters) { - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - // We set table_options.metadata_block_size small enough so we can - // trigger filter partitioning with GetNumKey() amount of keys - table_options.metadata_block_size = 10; - } - - return table_options; - } - - // Return an appropriate amount of keys for testing - // to generate a long filter (i.e, size >= 8 + kMetadataLen) - std::size_t GetNumKey() { return 5000; } -}; - -INSTANTIATE_TEST_CASE_P( - DBFilterConstructionCorruptionTestWithParam, - DBFilterConstructionCorruptionTestWithParam, - ::testing::Values(std::make_tuple(false, kFastLocalBloom, false), - std::make_tuple(true, kFastLocalBloom, false), - std::make_tuple(true, kFastLocalBloom, true), - std::make_tuple(true, kStandard128Ribbon, false), - std::make_tuple(true, kStandard128Ribbon, true))); - -TEST_P(DBFilterConstructionCorruptionTestWithParam, DetectCorruption) { - Options options = CurrentOptions(); - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.create_if_missing = true; - options.disable_auto_compactions = true; - - DestroyAndReopen(options); - int num_key = static_cast(GetNumKey()); - Status s; - - // Case 1: No corruption in filter construction - for (int i = 0; i < num_key; i++) { - ASSERT_OK(Put(Key(i), Key(i))); - } - s = Flush(); - EXPECT_TRUE(s.ok()); - - // Case 2: Corruption of hash entries in filter construction - for (int i = 0; i < num_key; i++) { - ASSERT_OK(Put(Key(i), Key(i))); - } - - SyncPoint::GetInstance()->SetCallBack( - "XXPH3FilterBitsBuilder::Finish::TamperHashEntries", [&](void* arg) { - std::deque* hash_entries_to_corrupt = - (std::deque*)arg; - assert(!hash_entries_to_corrupt->empty()); - *(hash_entries_to_corrupt->begin()) = - *(hash_entries_to_corrupt->begin()) ^ uint64_t { 1 }; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - s = Flush(); - - if (table_options.detect_filter_construct_corruption) { - EXPECT_TRUE(s.IsCorruption()); - EXPECT_TRUE( - s.ToString().find("Filter's hash entries checksum mismatched") != - std::string::npos); - } else { - EXPECT_TRUE(s.ok()); - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearCallBack( - "XXPH3FilterBitsBuilder::Finish::" - "TamperHashEntries"); - - // Case 3: Corruption of filter content in filter construction - DestroyAndReopen(options); - - for (int i = 0; i < num_key; i++) { - ASSERT_OK(Put(Key(i), Key(i))); - } - - SyncPoint::GetInstance()->SetCallBack( - "XXPH3FilterBitsBuilder::Finish::TamperFilter", [&](void* arg) { - std::pair*, std::size_t>* TEST_arg_pair = - (std::pair*, std::size_t>*)arg; - std::size_t filter_size = TEST_arg_pair->second; - // 5 is the kMetadataLen and - assert(filter_size >= 8 + 5); - std::unique_ptr* filter_content_to_corrupt = - TEST_arg_pair->first; - std::memset(filter_content_to_corrupt->get(), '\0', 8); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - s = Flush(); - - if (table_options.detect_filter_construct_corruption) { - EXPECT_TRUE(s.IsCorruption()); - EXPECT_TRUE(s.ToString().find("Corrupted filter content") != - std::string::npos); - } else { - EXPECT_TRUE(s.ok()); - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearCallBack( - "XXPH3FilterBitsBuilder::Finish::" - "TamperFilter"); -} - -// RocksDB lite does not support dynamic options -TEST_P(DBFilterConstructionCorruptionTestWithParam, - DynamicallyTurnOnAndOffDetectConstructCorruption) { - Options options = CurrentOptions(); - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - // We intend to turn on - // table_options.detect_filter_construct_corruption dynamically - // therefore we override this test parmater's value - table_options.detect_filter_construct_corruption = false; - - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.create_if_missing = true; - - int num_key = static_cast(GetNumKey()); - Status s; - - DestroyAndReopen(options); - - // Case 1: !table_options.detect_filter_construct_corruption - for (int i = 0; i < num_key; i++) { - ASSERT_OK(Put(Key(i), Key(i))); - } - - SyncPoint::GetInstance()->SetCallBack( - "XXPH3FilterBitsBuilder::Finish::TamperHashEntries", [&](void* arg) { - std::deque* hash_entries_to_corrupt = - (std::deque*)arg; - assert(!hash_entries_to_corrupt->empty()); - *(hash_entries_to_corrupt->begin()) = - *(hash_entries_to_corrupt->begin()) ^ uint64_t { 1 }; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - s = Flush(); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearCallBack( - "XXPH3FilterBitsBuilder::Finish::" - "TamperHashEntries"); - - ASSERT_FALSE(table_options.detect_filter_construct_corruption); - EXPECT_TRUE(s.ok()); - - // Case 2: dynamically turn on - // table_options.detect_filter_construct_corruption - ASSERT_OK(db_->SetOptions({{"block_based_table_factory", - "{detect_filter_construct_corruption=true;}"}})); - - for (int i = 0; i < num_key; i++) { - ASSERT_OK(Put(Key(i), Key(i))); - } - - SyncPoint::GetInstance()->SetCallBack( - "XXPH3FilterBitsBuilder::Finish::TamperHashEntries", [&](void* arg) { - std::deque* hash_entries_to_corrupt = - (std::deque*)arg; - assert(!hash_entries_to_corrupt->empty()); - *(hash_entries_to_corrupt->begin()) = - *(hash_entries_to_corrupt->begin()) ^ uint64_t { 1 }; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - s = Flush(); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearCallBack( - "XXPH3FilterBitsBuilder::Finish::" - "TamperHashEntries"); - - auto updated_table_options = - db_->GetOptions().table_factory->GetOptions(); - EXPECT_TRUE(updated_table_options->detect_filter_construct_corruption); - EXPECT_TRUE(s.IsCorruption()); - EXPECT_TRUE(s.ToString().find("Filter's hash entries checksum mismatched") != - std::string::npos); - - // Case 3: dynamically turn off - // table_options.detect_filter_construct_corruption - ASSERT_OK(db_->SetOptions({{"block_based_table_factory", - "{detect_filter_construct_corruption=false;}"}})); - updated_table_options = - db_->GetOptions().table_factory->GetOptions(); - EXPECT_FALSE(updated_table_options->detect_filter_construct_corruption); -} - -namespace { -// NOTE: This class is referenced by HISTORY.md as a model for a wrapper -// FilterPolicy selecting among configurations based on context. -class LevelAndStyleCustomFilterPolicy : public FilterPolicy { - public: - explicit LevelAndStyleCustomFilterPolicy(int bpk_fifo, int bpk_l0_other, - int bpk_otherwise) - : policy_fifo_(NewBloomFilterPolicy(bpk_fifo)), - policy_l0_other_(NewBloomFilterPolicy(bpk_l0_other)), - policy_otherwise_(NewBloomFilterPolicy(bpk_otherwise)) {} - - const char* Name() const override { - return "LevelAndStyleCustomFilterPolicy"; - } - - // OK to use built-in policy name because we are deferring to a - // built-in builder. We aren't changing the serialized format. - const char* CompatibilityName() const override { - return policy_fifo_->CompatibilityName(); - } - - FilterBitsBuilder* GetBuilderWithContext( - const FilterBuildingContext& context) const override { - if (context.compaction_style == kCompactionStyleFIFO) { - return policy_fifo_->GetBuilderWithContext(context); - } else if (context.level_at_creation == 0) { - return policy_l0_other_->GetBuilderWithContext(context); - } else { - return policy_otherwise_->GetBuilderWithContext(context); - } - } - - FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override { - // OK to defer to any of them; they all can parse built-in filters - // from any settings. - return policy_fifo_->GetFilterBitsReader(contents); - } - - private: - const std::unique_ptr policy_fifo_; - const std::unique_ptr policy_l0_other_; - const std::unique_ptr policy_otherwise_; -}; - -static std::map - table_file_creation_reason_to_string{ - {TableFileCreationReason::kCompaction, "kCompaction"}, - {TableFileCreationReason::kFlush, "kFlush"}, - {TableFileCreationReason::kMisc, "kMisc"}, - {TableFileCreationReason::kRecovery, "kRecovery"}, - }; - -class TestingContextCustomFilterPolicy - : public LevelAndStyleCustomFilterPolicy { - public: - explicit TestingContextCustomFilterPolicy(int bpk_fifo, int bpk_l0_other, - int bpk_otherwise) - : LevelAndStyleCustomFilterPolicy(bpk_fifo, bpk_l0_other, bpk_otherwise) { - } - - FilterBitsBuilder* GetBuilderWithContext( - const FilterBuildingContext& context) const override { - test_report_ += "cf="; - test_report_ += context.column_family_name; - test_report_ += ",s="; - test_report_ += - OptionsHelper::compaction_style_to_string[context.compaction_style]; - test_report_ += ",n="; - test_report_ += std::to_string(context.num_levels); - test_report_ += ",l="; - test_report_ += std::to_string(context.level_at_creation); - test_report_ += ",b="; - test_report_ += std::to_string(int{context.is_bottommost}); - test_report_ += ",r="; - test_report_ += table_file_creation_reason_to_string[context.reason]; - test_report_ += "\n"; - - return LevelAndStyleCustomFilterPolicy::GetBuilderWithContext(context); - } - - std::string DumpTestReport() { - std::string rv; - std::swap(rv, test_report_); - return rv; - } - - private: - mutable std::string test_report_; -}; -} // anonymous namespace - -TEST_F(DBBloomFilterTest, ContextCustomFilterPolicy) { - auto policy = std::make_shared(15, 8, 5); - Options options; - for (bool fifo : {true, false}) { - options = CurrentOptions(); - options.max_open_files = fifo ? -1 : options.max_open_files; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.compaction_style = - fifo ? kCompactionStyleFIFO : kCompactionStyleLevel; - - BlockBasedTableOptions table_options; - table_options.filter_policy = policy; - table_options.format_version = 5; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - TryReopen(options); - CreateAndReopenWithCF({fifo ? "abe" : "bob"}, options); - - const int maxKey = 10000; - for (int i = 0; i < maxKey / 2; i++) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - // Add a large key to make the file contain wide range - ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); - Flush(1); - EXPECT_EQ(policy->DumpTestReport(), - fifo ? "cf=abe,s=kCompactionStyleFIFO,n=7,l=0,b=0,r=kFlush\n" - : "cf=bob,s=kCompactionStyleLevel,n=7,l=0,b=0,r=kFlush\n"); - - for (int i = maxKey / 2; i < maxKey; i++) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - Flush(1); - EXPECT_EQ(policy->DumpTestReport(), - fifo ? "cf=abe,s=kCompactionStyleFIFO,n=7,l=0,b=0,r=kFlush\n" - : "cf=bob,s=kCompactionStyleLevel,n=7,l=0,b=0,r=kFlush\n"); - - // Check that they can be found - for (int i = 0; i < maxKey; i++) { - ASSERT_EQ(Key(i), Get(1, Key(i))); - } - // Since we have two tables / two filters, we might have Bloom checks on - // our queries, but no more than one "useful" per query on a found key. - EXPECT_LE(TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey); - - // Check that we have two filters, each about - // fifo: 0.12% FP rate (15 bits per key) - // level: 2.3% FP rate (8 bits per key) - for (int i = 0; i < maxKey; i++) { - ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); - } - { - auto useful_count = - TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL); - EXPECT_GE(useful_count, maxKey * 2 * (fifo ? 0.9980 : 0.975)); - EXPECT_LE(useful_count, maxKey * 2 * (fifo ? 0.9995 : 0.98)); - } - - if (!fifo) { // FIFO doesn't fully support CompactRange - // Full compaction - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, - nullptr)); - EXPECT_EQ(policy->DumpTestReport(), - "cf=bob,s=kCompactionStyleLevel,n=7,l=1,b=1,r=kCompaction\n"); - - // Check that we now have one filter, about 9.2% FP rate (5 bits per key) - for (int i = 0; i < maxKey; i++) { - ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); - } - { - auto useful_count = - TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL); - EXPECT_GE(useful_count, maxKey * 0.90); - EXPECT_LE(useful_count, maxKey * 0.91); - } - } else { - // Also try external SST file - { - std::string file_path = dbname_ + "/external.sst"; - SstFileWriter sst_file_writer(EnvOptions(), options, handles_[1]); - ASSERT_OK(sst_file_writer.Open(file_path)); - ASSERT_OK(sst_file_writer.Put("key", "value")); - ASSERT_OK(sst_file_writer.Finish()); - } - // Note: kCompactionStyleLevel is default, ignored if num_levels == -1 - EXPECT_EQ(policy->DumpTestReport(), - "cf=abe,s=kCompactionStyleLevel,n=-1,l=-1,b=0,r=kMisc\n"); - } - - // Destroy - ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); - ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[1])); - handles_[1] = nullptr; - } -} - -class SliceTransformLimitedDomain : public SliceTransform { - const char* Name() const override { return "SliceTransformLimitedDomain"; } - - Slice Transform(const Slice& src) const override { - return Slice(src.data(), 5); - } - - bool InDomain(const Slice& src) const override { - // prefix will be x???? - return src.size() >= 5 && src[0] == 'x'; - } - - bool InRange(const Slice& dst) const override { - // prefix will be x???? - return dst.size() == 5 && dst[0] == 'x'; - } -}; - -TEST_F(DBBloomFilterTest, PrefixExtractorWithFilter1) { - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); - bbto.whole_key_filtering = false; - - Options options = CurrentOptions(); - options.prefix_extractor = std::make_shared(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - DestroyAndReopen(options); - - ASSERT_OK(Put("x1111_AAAA", "val1")); - ASSERT_OK(Put("x1112_AAAA", "val2")); - ASSERT_OK(Put("x1113_AAAA", "val3")); - ASSERT_OK(Put("x1114_AAAA", "val4")); - // Not in domain, wont be added to filter - ASSERT_OK(Put("zzzzz_AAAA", "val5")); - - ASSERT_OK(Flush()); - - ASSERT_EQ(Get("x1111_AAAA"), "val1"); - ASSERT_EQ(Get("x1112_AAAA"), "val2"); - ASSERT_EQ(Get("x1113_AAAA"), "val3"); - ASSERT_EQ(Get("x1114_AAAA"), "val4"); - // Was not added to filter but rocksdb will try to read it from the filter - ASSERT_EQ(Get("zzzzz_AAAA"), "val5"); -} - -TEST_F(DBBloomFilterTest, PrefixExtractorWithFilter2) { - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); - - Options options = CurrentOptions(); - options.prefix_extractor = std::make_shared(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - DestroyAndReopen(options); - - ASSERT_OK(Put("x1113_AAAA", "val3")); - ASSERT_OK(Put("x1114_AAAA", "val4")); - // Not in domain, wont be added to filter - ASSERT_OK(Put("zzzzz_AAAA", "val1")); - ASSERT_OK(Put("zzzzz_AAAB", "val2")); - ASSERT_OK(Put("zzzzz_AAAC", "val3")); - ASSERT_OK(Put("zzzzz_AAAD", "val4")); - - ASSERT_OK(Flush()); - - std::vector iter_res; - auto iter = db_->NewIterator(ReadOptions()); - // Seek to a key that was not in Domain - for (iter->Seek("zzzzz_AAAA"); iter->Valid(); iter->Next()) { - iter_res.emplace_back(iter->value().ToString()); - } - - std::vector expected_res = {"val1", "val2", "val3", "val4"}; - ASSERT_EQ(iter_res, expected_res); - delete iter; -} - -TEST_F(DBBloomFilterTest, MemtableWholeKeyBloomFilter) { - // regression test for #2743. the range delete tombstones in memtable should - // be added even when Get() skips searching due to its prefix bloom filter - const int kMemtableSize = 1 << 20; // 1MB - const int kMemtablePrefixFilterSize = 1 << 13; // 8KB - const int kPrefixLen = 4; - Options options = CurrentOptions(); - options.memtable_prefix_bloom_size_ratio = - static_cast(kMemtablePrefixFilterSize) / kMemtableSize; - options.prefix_extractor.reset( - ROCKSDB_NAMESPACE::NewFixedPrefixTransform(kPrefixLen)); - options.write_buffer_size = kMemtableSize; - options.memtable_whole_key_filtering = false; - Reopen(options); - std::string key1("AAAABBBB"); - std::string key2("AAAACCCC"); // not in DB - std::string key3("AAAADDDD"); - std::string key4("AAAAEEEE"); - std::string value1("Value1"); - std::string value3("Value3"); - std::string value4("Value4"); - - ASSERT_OK(Put(key1, value1, WriteOptions())); - - // check memtable bloom stats - ASSERT_EQ("NOT_FOUND", Get(key2)); - ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count); - // same prefix, bloom filter false positive - ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); - - // enable whole key bloom filter - options.memtable_whole_key_filtering = true; - Reopen(options); - // check memtable bloom stats - ASSERT_OK(Put(key3, value3, WriteOptions())); - ASSERT_EQ("NOT_FOUND", Get(key2)); - // whole key bloom filter kicks in and determines it's a miss - ASSERT_EQ(1, get_perf_context()->bloom_memtable_miss_count); - ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); - - // verify whole key filtering does not depend on prefix_extractor - options.prefix_extractor.reset(); - Reopen(options); - // check memtable bloom stats - ASSERT_OK(Put(key4, value4, WriteOptions())); - ASSERT_EQ("NOT_FOUND", Get(key2)); - // whole key bloom filter kicks in and determines it's a miss - ASSERT_EQ(2, get_perf_context()->bloom_memtable_miss_count); - ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); -} - -TEST_F(DBBloomFilterTest, MemtableWholeKeyBloomFilterMultiGet) { - Options options = CurrentOptions(); - options.memtable_prefix_bloom_size_ratio = 0.015; - options.memtable_whole_key_filtering = true; - Reopen(options); - std::string key1("AA"); - std::string key2("BB"); - std::string key3("CC"); - std::string key4("DD"); - std::string key_not("EE"); - std::string value1("Value1"); - std::string value2("Value2"); - std::string value3("Value3"); - std::string value4("Value4"); - - ASSERT_OK(Put(key1, value1, WriteOptions())); - ASSERT_OK(Put(key2, value2, WriteOptions())); - ASSERT_OK(Flush()); - ASSERT_OK(Put(key3, value3, WriteOptions())); - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(Put(key4, value4, WriteOptions())); - - // Delete key2 and key3 - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "BA", "CZ")); - - // Read without snapshot - auto results = MultiGet({key_not, key1, key2, key3, key4}); - ASSERT_EQ(results[0], "NOT_FOUND"); - ASSERT_EQ(results[1], value1); - ASSERT_EQ(results[2], "NOT_FOUND"); - ASSERT_EQ(results[3], "NOT_FOUND"); - ASSERT_EQ(results[4], value4); - - // Also check Get - ASSERT_EQ(Get(key1), value1); - ASSERT_EQ(Get(key2), "NOT_FOUND"); - ASSERT_EQ(Get(key3), "NOT_FOUND"); - ASSERT_EQ(Get(key4), value4); - - // Read with snapshot - results = MultiGet({key_not, key1, key2, key3, key4}, snapshot); - ASSERT_EQ(results[0], "NOT_FOUND"); - ASSERT_EQ(results[1], value1); - ASSERT_EQ(results[2], value2); - ASSERT_EQ(results[3], value3); - ASSERT_EQ(results[4], "NOT_FOUND"); - - // Also check Get - ASSERT_EQ(Get(key1, snapshot), value1); - ASSERT_EQ(Get(key2, snapshot), value2); - ASSERT_EQ(Get(key3, snapshot), value3); - ASSERT_EQ(Get(key4, snapshot), "NOT_FOUND"); - - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBBloomFilterTest, MemtablePrefixBloomOutOfDomain) { - constexpr size_t kPrefixSize = 8; - const std::string kKey = "key"; - assert(kKey.size() < kPrefixSize); - Options options = CurrentOptions(); - options.prefix_extractor.reset(NewFixedPrefixTransform(kPrefixSize)); - options.memtable_prefix_bloom_size_ratio = 0.25; - Reopen(options); - ASSERT_OK(Put(kKey, "v")); - ASSERT_EQ("v", Get(kKey)); - std::unique_ptr iter(dbfull()->NewIterator(ReadOptions())); - iter->Seek(kKey); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(kKey, iter->key()); - iter->SeekForPrev(kKey); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(kKey, iter->key()); -} - -class DBBloomFilterTestVaryPrefixAndFormatVer - : public DBTestBase, - public testing::WithParamInterface> { - protected: - bool use_prefix_; - uint32_t format_version_; - - public: - DBBloomFilterTestVaryPrefixAndFormatVer() - : DBTestBase("db_bloom_filter_tests", /*env_do_fsync=*/true) {} - - ~DBBloomFilterTestVaryPrefixAndFormatVer() override {} - - void SetUp() override { - use_prefix_ = std::get<0>(GetParam()); - format_version_ = std::get<1>(GetParam()); - } - - static std::string UKey(uint32_t i) { return Key(static_cast(i)); } -}; - -TEST_P(DBBloomFilterTestVaryPrefixAndFormatVer, PartitionedMultiGet) { - Options options = CurrentOptions(); - if (use_prefix_) { - // Entire key from UKey() - options.prefix_extractor.reset(NewCappedPrefixTransform(9)); - } - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(20)); - bbto.partition_filters = true; - bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - bbto.whole_key_filtering = !use_prefix_; - if (use_prefix_) { // (not related to prefix, just alternating between) - // Make sure code appropriately deals with metadata block size setting - // that is "too small" (smaller than minimum size for filter builder) - bbto.metadata_block_size = 63; - } else { - // Make sure the test will work even on platforms with large minimum - // filter size, due to large cache line size. - // (Largest cache line size + 10+% overhead.) - bbto.metadata_block_size = 290; - } - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - ReadOptions ropts; - - constexpr uint32_t N = 12000; - // Add N/2 evens - for (uint32_t i = 0; i < N; i += 2) { - ASSERT_OK(Put(UKey(i), UKey(i))); - } - ASSERT_OK(Flush()); - ASSERT_EQ(TotalTableFiles(), 1); - - constexpr uint32_t Q = 29; - // MultiGet In - std::array keys; - std::array key_slices; - std::array column_families; - // MultiGet Out - std::array statuses; - std::array values; - - TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_HIT); - TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_MISS); - TestGetAndResetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL); - TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL); - TestGetAndResetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED); - TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE); - TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE); - - // Check that initial clump of keys only loads one partition filter from - // block cache. - // And that spread out keys load many partition filters. - // In both cases, mix present vs. not present keys. - for (uint32_t stride : {uint32_t{1}, (N / Q) | 1}) { - for (uint32_t i = 0; i < Q; ++i) { - keys[i] = UKey(i * stride); - key_slices[i] = Slice(keys[i]); - column_families[i] = db_->DefaultColumnFamily(); - statuses[i] = Status(); - values[i] = PinnableSlice(); - } - - db_->MultiGet(ropts, Q, &column_families[0], &key_slices[0], &values[0], - /*timestamps=*/nullptr, &statuses[0], true); - - // Confirm correct status results - uint32_t number_not_found = 0; - for (uint32_t i = 0; i < Q; ++i) { - if ((i * stride % 2) == 0) { - ASSERT_OK(statuses[i]); - } else { - ASSERT_TRUE(statuses[i].IsNotFound()); - ++number_not_found; - } - } - - // Confirm correct Bloom stats (no FPs) - uint64_t filter_useful = TestGetAndResetTickerCount( - options, - use_prefix_ ? BLOOM_FILTER_PREFIX_USEFUL : BLOOM_FILTER_USEFUL); - uint64_t filter_checked = - TestGetAndResetTickerCount(options, use_prefix_ - ? BLOOM_FILTER_PREFIX_CHECKED - : BLOOM_FILTER_FULL_POSITIVE) + - (use_prefix_ ? 0 : filter_useful); - EXPECT_EQ(filter_useful, number_not_found); - EXPECT_EQ(filter_checked, Q); - if (!use_prefix_) { - EXPECT_EQ( - TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), - Q - number_not_found); - } - - // Confirm no duplicate loading same filter partition - uint64_t filter_accesses = - TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_HIT) + - TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_MISS); - if (stride == 1) { - EXPECT_EQ(filter_accesses, 1); - } else { - // for large stride - EXPECT_GE(filter_accesses, Q / 2 + 1); - } - } - - // Check that a clump of keys (present and not) works when spanning - // two partitions - int found_spanning = 0; - for (uint32_t start = 0; start < N / 2;) { - for (uint32_t i = 0; i < Q; ++i) { - keys[i] = UKey(start + i); - key_slices[i] = Slice(keys[i]); - column_families[i] = db_->DefaultColumnFamily(); - statuses[i] = Status(); - values[i] = PinnableSlice(); - } - - db_->MultiGet(ropts, Q, &column_families[0], &key_slices[0], &values[0], - /*timestamps=*/nullptr, &statuses[0], true); - - // Confirm correct status results - uint32_t number_not_found = 0; - for (uint32_t i = 0; i < Q; ++i) { - if (((start + i) % 2) == 0) { - ASSERT_OK(statuses[i]); - } else { - ASSERT_TRUE(statuses[i].IsNotFound()); - ++number_not_found; - } - } - - // Confirm correct Bloom stats (might see some FPs) - uint64_t filter_useful = TestGetAndResetTickerCount( - options, - use_prefix_ ? BLOOM_FILTER_PREFIX_USEFUL : BLOOM_FILTER_USEFUL); - uint64_t filter_checked = - TestGetAndResetTickerCount(options, use_prefix_ - ? BLOOM_FILTER_PREFIX_CHECKED - : BLOOM_FILTER_FULL_POSITIVE) + - (use_prefix_ ? 0 : filter_useful); - EXPECT_GE(filter_useful, number_not_found - 2); // possible FP - EXPECT_EQ(filter_checked, Q); - if (!use_prefix_) { - EXPECT_EQ( - TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), - Q - number_not_found); - } - - // Confirm no duplicate loading of same filter partition - uint64_t filter_accesses = - TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_HIT) + - TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_MISS); - if (filter_accesses == 2) { - // Spanned across partitions. - ++found_spanning; - if (found_spanning >= 2) { - break; - } else { - // Ensure that at least once we have at least one present and - // one non-present key on both sides of partition boundary. - start += 2; - } - } else { - EXPECT_EQ(filter_accesses, 1); - // See explanation at "start += 2" - start += Q - 4; - } - } - EXPECT_TRUE(found_spanning >= 2); -} - -INSTANTIATE_TEST_CASE_P(DBBloomFilterTestVaryPrefixAndFormatVer, - DBBloomFilterTestVaryPrefixAndFormatVer, - ::testing::Values( - // (use_prefix, format_version) - std::make_tuple(false, 2), - std::make_tuple(false, 3), - std::make_tuple(false, 4), - std::make_tuple(false, 5), std::make_tuple(true, 2), - std::make_tuple(true, 3), std::make_tuple(true, 4), - std::make_tuple(true, 5))); - -namespace { -static const std::string kPlainTable = "test_PlainTableBloom"; -} // anonymous namespace - -class BloomStatsTestWithParam - : public DBBloomFilterTest, - public testing::WithParamInterface> { - public: - BloomStatsTestWithParam() { - bfp_impl_ = std::get<0>(GetParam()); - partition_filters_ = std::get<1>(GetParam()); - - options_.create_if_missing = true; - options_.prefix_extractor.reset( - ROCKSDB_NAMESPACE::NewFixedPrefixTransform(4)); - options_.memtable_prefix_bloom_size_ratio = - 8.0 * 1024.0 / static_cast(options_.write_buffer_size); - if (bfp_impl_ == kPlainTable) { - assert(!partition_filters_); // not supported in plain table - PlainTableOptions table_options; - options_.table_factory.reset(NewPlainTableFactory(table_options)); - } else { - BlockBasedTableOptions table_options; - if (partition_filters_) { - table_options.partition_filters = partition_filters_; - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } - table_options.filter_policy = Create(10, bfp_impl_); - options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); - } - options_.env = env_; - - get_perf_context()->Reset(); - DestroyAndReopen(options_); - } - - ~BloomStatsTestWithParam() override { - get_perf_context()->Reset(); - Destroy(options_); - } - - // Required if inheriting from testing::WithParamInterface<> - static void SetUpTestCase() {} - static void TearDownTestCase() {} - - std::string bfp_impl_; - bool partition_filters_; - Options options_; -}; - -// 1 Insert 2 K-V pairs into DB -// 2 Call Get() for both keys - expext memtable bloom hit stat to be 2 -// 3 Call Get() for nonexisting key - expect memtable bloom miss stat to be 1 -// 4 Call Flush() to create SST -// 5 Call Get() for both keys - expext SST bloom hit stat to be 2 -// 6 Call Get() for nonexisting key - expect SST bloom miss stat to be 1 -// Test both: block and plain SST -TEST_P(BloomStatsTestWithParam, BloomStatsTest) { - std::string key1("AAAA"); - std::string key2("RXDB"); // not in DB - std::string key3("ZBRA"); - std::string value1("Value1"); - std::string value3("Value3"); - - ASSERT_OK(Put(key1, value1, WriteOptions())); - ASSERT_OK(Put(key3, value3, WriteOptions())); - - // check memtable bloom stats - ASSERT_EQ(value1, Get(key1)); - ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); - ASSERT_EQ(value3, Get(key3)); - ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count); - ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count); - - ASSERT_EQ("NOT_FOUND", Get(key2)); - ASSERT_EQ(1, get_perf_context()->bloom_memtable_miss_count); - ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count); - - // sanity checks - ASSERT_EQ(0, get_perf_context()->bloom_sst_hit_count); - ASSERT_EQ(0, get_perf_context()->bloom_sst_miss_count); - - Flush(); - - // sanity checks - ASSERT_EQ(0, get_perf_context()->bloom_sst_hit_count); - ASSERT_EQ(0, get_perf_context()->bloom_sst_miss_count); - - // check SST bloom stats - ASSERT_EQ(value1, Get(key1)); - ASSERT_EQ(1, get_perf_context()->bloom_sst_hit_count); - ASSERT_EQ(value3, Get(key3)); - ASSERT_EQ(2, get_perf_context()->bloom_sst_hit_count); - - ASSERT_EQ("NOT_FOUND", Get(key2)); - ASSERT_EQ(1, get_perf_context()->bloom_sst_miss_count); -} - -// Same scenario as in BloomStatsTest but using an iterator -TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { - std::string key1("AAAA"); - std::string key2("RXDB"); // not in DB - std::string key3("ZBRA"); - std::string value1("Value1"); - std::string value3("Value3"); - - ASSERT_OK(Put(key1, value1, WriteOptions())); - ASSERT_OK(Put(key3, value3, WriteOptions())); - - std::unique_ptr iter(dbfull()->NewIterator(ReadOptions())); - - // check memtable bloom stats - iter->Seek(key1); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(value1, iter->value().ToString()); - ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); - ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count); - - iter->Seek(key3); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(value3, iter->value().ToString()); - ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count); - ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count); - - iter->Seek(key2); - ASSERT_OK(iter->status()); - ASSERT_TRUE(!iter->Valid()); - ASSERT_EQ(1, get_perf_context()->bloom_memtable_miss_count); - ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count); - - Flush(); - - iter.reset(dbfull()->NewIterator(ReadOptions())); - - // Check SST bloom stats - iter->Seek(key1); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(value1, iter->value().ToString()); - ASSERT_EQ(1, get_perf_context()->bloom_sst_hit_count); - - iter->Seek(key3); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(value3, iter->value().ToString()); - uint64_t expected_hits = 2; - ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count); - - iter->Seek(key2); - ASSERT_OK(iter->status()); - ASSERT_TRUE(!iter->Valid()); - ASSERT_EQ(1, get_perf_context()->bloom_sst_miss_count); - ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count); -} - -INSTANTIATE_TEST_CASE_P( - BloomStatsTestWithParam, BloomStatsTestWithParam, - ::testing::Values(std::make_tuple(kLegacyBloom, false), - std::make_tuple(kLegacyBloom, true), - std::make_tuple(kFastLocalBloom, false), - std::make_tuple(kFastLocalBloom, true), - std::make_tuple(kPlainTable, false))); - -namespace { -void PrefixScanInit(DBBloomFilterTest* dbtest) { - char buf[100]; - std::string keystr; - const int small_range_sstfiles = 5; - const int big_range_sstfiles = 5; - - // Generate 11 sst files with the following prefix ranges. - // GROUP 0: [0,10] (level 1) - // GROUP 1: [1,2], [2,3], [3,4], [4,5], [5, 6] (level 0) - // GROUP 2: [0,6], [0,7], [0,8], [0,9], [0,10] (level 0) - // - // A seek with the previous API would do 11 random I/Os (to all the - // files). With the new API and a prefix filter enabled, we should - // only do 2 random I/O, to the 2 files containing the key. - - // GROUP 0 - snprintf(buf, sizeof(buf), "%02d______:start", 0); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - snprintf(buf, sizeof(buf), "%02d______:end", 10); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - ASSERT_OK(dbtest->Flush()); - ASSERT_OK(dbtest->dbfull()->CompactRange(CompactRangeOptions(), nullptr, - nullptr)); // move to level 1 - - // GROUP 1 - for (int i = 1; i <= small_range_sstfiles; i++) { - snprintf(buf, sizeof(buf), "%02d______:start", i); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - snprintf(buf, sizeof(buf), "%02d______:end", i + 1); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - dbtest->Flush(); - } - - // GROUP 2 - for (int i = 1; i <= big_range_sstfiles; i++) { - snprintf(buf, sizeof(buf), "%02d______:start", 0); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - snprintf(buf, sizeof(buf), "%02d______:end", small_range_sstfiles + i + 1); - keystr = std::string(buf); - ASSERT_OK(dbtest->Put(keystr, keystr)); - dbtest->Flush(); - } -} -} // anonymous namespace - -TEST_F(DBBloomFilterTest, PrefixScan) { - while (ChangeFilterOptions()) { - int count; - Slice prefix; - Slice key; - char buf[100]; - Iterator* iter; - snprintf(buf, sizeof(buf), "03______:"); - prefix = Slice(buf, 8); - key = Slice(buf, 9); - ASSERT_EQ(key.difference_offset(prefix), 8); - ASSERT_EQ(prefix.difference_offset(key), 8); - // db configs - env_->count_random_reads_ = true; - Options options = CurrentOptions(); - options.env = env_; - options.prefix_extractor.reset(NewFixedPrefixTransform(8)); - options.disable_auto_compactions = true; - options.max_background_compactions = 2; - options.create_if_missing = true; - options.memtable_factory.reset(NewHashSkipListRepFactory(16)); - assert(!options.unordered_write); - // It is incompatible with allow_concurrent_memtable_write=false - options.allow_concurrent_memtable_write = false; - - BlockBasedTableOptions table_options; - table_options.no_block_cache = true; - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); - table_options.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - // 11 RAND I/Os - DestroyAndReopen(options); - PrefixScanInit(this); - count = 0; - env_->random_read_counter_.Reset(); - iter = db_->NewIterator(ReadOptions()); - for (iter->Seek(prefix); iter->Valid(); iter->Next()) { - if (!iter->key().starts_with(prefix)) { - break; - } - count++; - } - ASSERT_OK(iter->status()); - delete iter; - ASSERT_EQ(count, 2); - ASSERT_EQ(env_->random_read_counter_.Read(), 2); - Close(); - } // end of while -} - -TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) { - Options options = CurrentOptions(); - options.write_buffer_size = 64 * 1024; - options.arena_block_size = 4 * 1024; - options.target_file_size_base = 64 * 1024; - options.level0_file_num_compaction_trigger = 2; - options.level0_slowdown_writes_trigger = 2; - options.level0_stop_writes_trigger = 4; - options.max_bytes_for_level_base = 256 * 1024; - options.max_write_buffer_number = 2; - options.max_background_compactions = 8; - options.max_background_flushes = 8; - options.compression = kNoCompression; - options.compaction_style = kCompactionStyleLevel; - options.level_compaction_dynamic_level_bytes = true; - BlockBasedTableOptions bbto; - bbto.cache_index_and_filter_blocks = true; - bbto.filter_policy.reset(NewBloomFilterPolicy(10)); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options.optimize_filters_for_hits = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - get_perf_context()->Reset(); - get_perf_context()->EnablePerLevelPerfContext(); - CreateAndReopenWithCF({"mypikachu"}, options); - - int numkeys = 200000; - - // Generate randomly shuffled keys, so the updates are almost - // random. - std::vector keys; - keys.reserve(numkeys); - for (int i = 0; i < numkeys; i += 2) { - keys.push_back(i); - } - RandomShuffle(std::begin(keys), std::end(keys), /*seed*/ 42); - int num_inserted = 0; - for (int key : keys) { - ASSERT_OK(Put(1, Key(key), "val")); - if (++num_inserted % 1000 == 0) { - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - } - ASSERT_OK(Put(1, Key(0), "val")); - ASSERT_OK(Put(1, Key(numkeys), "val")); - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - if (NumTableFilesAtLevel(0, 1) == 0) { - // No Level 0 file. Create one. - ASSERT_OK(Put(1, Key(0), "val")); - ASSERT_OK(Put(1, Key(numkeys), "val")); - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - - for (int i = 1; i < numkeys; i += 2) { - ASSERT_EQ(Get(1, Key(i)), "NOT_FOUND"); - } - - ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0)); - ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1)); - ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP)); - - // Now we have three sorted run, L0, L5 and L6 with most files in L6 have - // no bloom filter. Most keys be checked bloom filters twice. - ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 65000 * 2); - ASSERT_LT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 120000 * 2); - uint64_t bloom_filter_useful_all_levels = 0; - for (auto& kv : (*(get_perf_context()->level_to_perf_context))) { - if (kv.second.bloom_filter_useful > 0) { - bloom_filter_useful_all_levels += kv.second.bloom_filter_useful; - } - } - ASSERT_GT(bloom_filter_useful_all_levels, 65000 * 2); - ASSERT_LT(bloom_filter_useful_all_levels, 120000 * 2); - - for (int i = 0; i < numkeys; i += 2) { - ASSERT_EQ(Get(1, Key(i)), "val"); - } - - // Part 2 (read path): rewrite last level with blooms, then verify they get - // cached only if !optimize_filters_for_hits - options.disable_auto_compactions = true; - options.num_levels = 9; - options.optimize_filters_for_hits = false; - options.statistics = CreateDBStatistics(); - bbto.block_cache.reset(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - ReopenWithColumnFamilies({"default", "mypikachu"}, options); - MoveFilesToLevel(7 /* level */, 1 /* column family index */); - - std::string value = Get(1, Key(0)); - uint64_t prev_cache_filter_hits = - TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); - value = Get(1, Key(0)); - ASSERT_EQ(prev_cache_filter_hits + 1, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - - // Now that we know the filter blocks exist in the last level files, see if - // filter caching is skipped for this optimization - options.optimize_filters_for_hits = true; - options.statistics = CreateDBStatistics(); - bbto.block_cache.reset(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - ReopenWithColumnFamilies({"default", "mypikachu"}, options); - - value = Get(1, Key(0)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - ASSERT_EQ(2 /* index and data block */, - TestGetTickerCount(options, BLOCK_CACHE_ADD)); - - // Check filter block ignored for files preloaded during DB::Open() - options.max_open_files = -1; - options.statistics = CreateDBStatistics(); - bbto.block_cache.reset(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - ReopenWithColumnFamilies({"default", "mypikachu"}, options); - - uint64_t prev_cache_filter_misses = - TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); - prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); - Get(1, Key(0)); - ASSERT_EQ(prev_cache_filter_misses, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(prev_cache_filter_hits, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - - // Check filter block ignored for file trivially-moved to bottom level - bbto.block_cache.reset(); - options.max_open_files = 100; // setting > -1 makes it not preload all files - options.statistics = CreateDBStatistics(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - ReopenWithColumnFamilies({"default", "mypikachu"}, options); - - ASSERT_OK(Put(1, Key(numkeys + 1), "val")); - ASSERT_OK(Flush(1)); - - int32_t trivial_move = 0; - int32_t non_trivial_move = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:TrivialMove", - [&](void* /*arg*/) { trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial", - [&](void* /*arg*/) { non_trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - CompactRangeOptions compact_options; - compact_options.bottommost_level_compaction = - BottommostLevelCompaction::kSkip; - compact_options.change_level = true; - compact_options.target_level = 7; - ASSERT_OK(db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); - - ASSERT_EQ(trivial_move, 1); - ASSERT_EQ(non_trivial_move, 0); - - prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); - prev_cache_filter_misses = - TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); - value = Get(1, Key(numkeys + 1)); - ASSERT_EQ(prev_cache_filter_hits, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - ASSERT_EQ(prev_cache_filter_misses, - TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - - // Check filter block not cached for iterator - bbto.block_cache.reset(); - options.statistics = CreateDBStatistics(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - ReopenWithColumnFamilies({"default", "mypikachu"}, options); - - std::unique_ptr iter(db_->NewIterator(ReadOptions(), handles_[1])); - iter->SeekToFirst(); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); - ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); - ASSERT_EQ(2 /* index and data block */, - TestGetTickerCount(options, BLOCK_CACHE_ADD)); - get_perf_context()->Reset(); -} - -int CountIter(std::unique_ptr& iter, const Slice& key) { - int count = 0; - for (iter->Seek(key); iter->Valid(); iter->Next()) { - count++; - } - EXPECT_OK(iter->status()); - return count; -} - -// use iterate_upper_bound to hint compatiability of existing bloom filters. -// The BF is considered compatible if 1) upper bound and seek key transform -// into the same string, or 2) the transformed seek key is of the same length -// as the upper bound and two keys are adjacent according to the comparator. -TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { - for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) { - Options options; - options.create_if_missing = true; - options.env = CurrentOptions().env; - options.prefix_extractor.reset(NewCappedPrefixTransform(4)); - options.disable_auto_compactions = true; - options.statistics = CreateDBStatistics(); - // Enable prefix bloom for SST files - BlockBasedTableOptions table_options; - table_options.cache_index_and_filter_blocks = true; - table_options.filter_policy = Create(10, bfp_impl); - table_options.index_shortening = BlockBasedTableOptions:: - IndexShorteningMode::kShortenSeparatorsAndSuccessor; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - ASSERT_OK(Put("abcdxxx0", "val1")); - ASSERT_OK(Put("abcdxxx1", "val2")); - ASSERT_OK(Put("abcdxxx2", "val3")); - ASSERT_OK(Put("abcdxxx3", "val4")); - ASSERT_OK(dbfull()->Flush(FlushOptions())); - { - // prefix_extractor has not changed, BF will always be read - Slice upper_bound("abce"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abcd0000"), 4); - } - { - Slice upper_bound("abcdzzzz"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abcd0000"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:5"}})); - ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), - "rocksdb.FixedPrefix.5"); - { - // BF changed, [abcdxx00, abce) is a valid bound, will trigger BF read - Slice upper_bound("abce"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abcdxx00"), 4); - // should check bloom filter since upper bound meets requirement - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - { - // [abcdxx01, abcey) is not valid bound since upper bound is too long for - // the BF in SST (capped:4) - Slice upper_bound("abcey"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abcdxx01"), 4); - // should skip bloom filter since upper bound is too long - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - { - // [abcdxx02, abcdy) is a valid bound since the prefix is the same - Slice upper_bound("abcdy"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abcdxx02"), 4); - // should check bloom filter since upper bound matches transformed seek - // key - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - { - // [aaaaaaaa, abce) is not a valid bound since 1) they don't share the - // same prefix, 2) the prefixes are not consecutive - Slice upper_bound("abce"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "aaaaaaaa"), 0); - // should skip bloom filter since mismatch is found - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:3"}})); - { - // [abc, abd) is not a valid bound since the upper bound is too short - // for BF (capped:4) - Slice upper_bound("abd"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abc"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - // Same with re-open - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - Reopen(options); - { - Slice upper_bound("abd"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abc"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - // Set back to capped:4 and verify BF is always read - options.prefix_extractor.reset(NewCappedPrefixTransform(4)); - Reopen(options); - { - Slice upper_bound("abd"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abc"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 5); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); - } - // Same if there's a problem initally loading prefix transform - SyncPoint::GetInstance()->SetCallBack( - "BlockBasedTable::Open::ForceNullTablePrefixExtractor", - [&](void* arg) { *static_cast(arg) = true; }); - SyncPoint::GetInstance()->EnableProcessing(); - Reopen(options); - { - Slice upper_bound("abd"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "abc"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); - } - SyncPoint::GetInstance()->DisableProcessing(); - } -} - -// Create multiple SST files each with a different prefix_extractor config, -// verify iterators can read all SST files using the latest config. -TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { - for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) { - Options options; - options.env = CurrentOptions().env; - options.create_if_missing = true; - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - options.disable_auto_compactions = true; - options.statistics = CreateDBStatistics(); - // Enable prefix bloom for SST files - BlockBasedTableOptions table_options; - table_options.filter_policy = Create(10, bfp_impl); - table_options.cache_index_and_filter_blocks = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - Slice upper_bound("foz90000"); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - - // first SST with fixed:1 BF - ASSERT_OK(Put("foo2", "bar2")); - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Put("foq1", "bar1")); - ASSERT_OK(Put("fpa", "0")); - dbfull()->Flush(FlushOptions()); - std::unique_ptr iter_old(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter_old, "foo"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 1); - - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); - ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), - "rocksdb.CappedPrefix.3"); - read_options.iterate_upper_bound = &upper_bound; - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "foo"), 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2); - ASSERT_EQ(CountIter(iter, "gpk"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - - // second SST with capped:3 BF - ASSERT_OK(Put("foo3", "bar3")); - ASSERT_OK(Put("foo4", "bar4")); - ASSERT_OK(Put("foq5", "bar5")); - ASSERT_OK(Put("fpb", "1")); - ASSERT_OK(dbfull()->Flush(FlushOptions())); - { - // BF is cappped:3 now - std::unique_ptr iter_tmp(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter_tmp, "foo"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0); - // both counters are incremented because BF is "not changed" for 1 of the - // 2 SST files, so filter is checked once and found no match. - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 5); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); - } - - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:2"}})); - ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), - "rocksdb.FixedPrefix.2"); - // third SST with fixed:2 BF - ASSERT_OK(Put("foo6", "bar6")); - ASSERT_OK(Put("foo7", "bar7")); - ASSERT_OK(Put("foq8", "bar8")); - ASSERT_OK(Put("fpc", "2")); - ASSERT_OK(dbfull()->Flush(FlushOptions())); - { - // BF is fixed:2 now - std::unique_ptr iter_tmp(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter_tmp, "foo"), 9); - // the first and last BF are checked - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 7); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); - ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0); - // only last BF is checked and not found - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 8); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); - } - - // iter_old can only see the first SST, so checked plus 1 - ASSERT_EQ(CountIter(iter_old, "foo"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 9); - // iter was created after the first setoptions call so only full filter - // will check the filter - ASSERT_EQ(CountIter(iter, "foo"), 2); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 10); - - { - // keys in all three SSTs are visible to iterator - // The range of [foo, foz90000] is compatible with (fixed:1) and (fixed:2) - // so +2 for checked counter - std::unique_ptr iter_all(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter_all, "foo"), 9); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 12); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); - ASSERT_EQ(CountIter(iter_all, "gpk"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 13); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); - } - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); - ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), - "rocksdb.CappedPrefix.3"); - { - std::unique_ptr iter_all(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter_all, "foo"), 6); - // all three SST are checked because the current options has the same as - // the remaining SST (capped:3) - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 16); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); - ASSERT_EQ(CountIter(iter_all, "gpk"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 17); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 4); - } - // TODO(Zhongyi): Maybe also need to add Get calls to test point look up? - } -} - -// Create a new column family in a running DB, change prefix_extractor -// dynamically, verify the iterator created on the new column family behaves -// as expected -TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) { - int iteration = 0; - for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - options.disable_auto_compactions = true; - options.statistics = CreateDBStatistics(); - // Enable prefix bloom for SST files - BlockBasedTableOptions table_options; - table_options.cache_index_and_filter_blocks = true; - table_options.filter_policy = Create(10, bfp_impl); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - CreateAndReopenWithCF({"pikachu" + std::to_string(iteration)}, options); - ReadOptions read_options; - read_options.prefix_same_as_start = true; - // create a new CF and set prefix_extractor dynamically - options.prefix_extractor.reset(NewCappedPrefixTransform(3)); - CreateColumnFamilies({"ramen_dojo_" + std::to_string(iteration)}, options); - ASSERT_EQ(dbfull()->GetOptions(handles_[2]).prefix_extractor->AsString(), - "rocksdb.CappedPrefix.3"); - ASSERT_OK(Put(2, "foo3", "bar3")); - ASSERT_OK(Put(2, "foo4", "bar4")); - ASSERT_OK(Put(2, "foo5", "bar5")); - ASSERT_OK(Put(2, "foq6", "bar6")); - ASSERT_OK(Put(2, "fpq7", "bar7")); - dbfull()->Flush(FlushOptions()); - { - std::unique_ptr iter( - db_->NewIterator(read_options, handles_[2])); - ASSERT_EQ(CountIter(iter, "foo"), 3); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - ASSERT_OK( - dbfull()->SetOptions(handles_[2], {{"prefix_extractor", "fixed:2"}})); - ASSERT_EQ(dbfull()->GetOptions(handles_[2]).prefix_extractor->AsString(), - "rocksdb.FixedPrefix.2"); - { - std::unique_ptr iter( - db_->NewIterator(read_options, handles_[2])); - ASSERT_EQ(CountIter(iter, "foo"), 4); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - ASSERT_OK(dbfull()->DropColumnFamily(handles_[2])); - ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[2])); - handles_[2] = nullptr; - ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); - ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[1])); - handles_[1] = nullptr; - iteration++; - } -} - -// Verify it's possible to change prefix_extractor at runtime and iterators -// behaves as expected -TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) { - for (const auto& bfp_impl : BloomLikeFilterPolicy::GetAllFixedImpls()) { - Options options; - options.env = CurrentOptions().env; - options.create_if_missing = true; - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - options.disable_auto_compactions = true; - options.statistics = CreateDBStatistics(); - // Enable prefix bloom for SST files - BlockBasedTableOptions table_options; - table_options.cache_index_and_filter_blocks = true; - table_options.filter_policy = Create(10, bfp_impl); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - ASSERT_OK(Put("foo2", "bar2")); - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Put("foo1", "bar1")); - ASSERT_OK(Put("fpa", "0")); - dbfull()->Flush(FlushOptions()); - ASSERT_OK(Put("foo3", "bar3")); - ASSERT_OK(Put("foo4", "bar4")); - ASSERT_OK(Put("foo5", "bar5")); - ASSERT_OK(Put("fpb", "1")); - dbfull()->Flush(FlushOptions()); - ASSERT_OK(Put("foo6", "bar6")); - ASSERT_OK(Put("foo7", "bar7")); - ASSERT_OK(Put("foo8", "bar8")); - ASSERT_OK(Put("fpc", "2")); - dbfull()->Flush(FlushOptions()); - - ReadOptions read_options; - read_options.prefix_same_as_start = true; - { - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter, "foo"), 12); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - std::unique_ptr iter_old(db_->NewIterator(read_options)); - ASSERT_EQ(CountIter(iter_old, "foo"), 12); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); - ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), - "rocksdb.CappedPrefix.3"); - { - std::unique_ptr iter(db_->NewIterator(read_options)); - // "fp*" should be skipped - ASSERT_EQ(CountIter(iter, "foo"), 9); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - } - - // iterator created before should not be affected and see all keys - ASSERT_EQ(CountIter(iter_old, "foo"), 12); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 9); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); - ASSERT_EQ(CountIter(iter_old, "abc"), 0); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 12); - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); - } -} - -TEST_F(DBBloomFilterTest, SeekForPrevWithPartitionedFilters) { - Options options = CurrentOptions(); - constexpr size_t kNumKeys = 10000; - static_assert(kNumKeys <= 10000, "kNumKeys have to be <= 10000"); - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeys + 10)); - options.create_if_missing = true; - constexpr size_t kPrefixLength = 4; - options.prefix_extractor.reset(NewFixedPrefixTransform(kPrefixLength)); - options.compression = kNoCompression; - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(50)); - bbto.index_shortening = - BlockBasedTableOptions::IndexShorteningMode::kNoShortening; - bbto.block_size = 128; - bbto.metadata_block_size = 128; - bbto.partition_filters = true; - bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - - const std::string value(64, '\0'); - - WriteOptions write_opts; - write_opts.disableWAL = true; - for (size_t i = 0; i < kNumKeys; ++i) { - std::ostringstream oss; - oss << std::setfill('0') << std::setw(4) << std::fixed << i; - ASSERT_OK(db_->Put(write_opts, oss.str(), value)); - } - ASSERT_OK(Flush()); - - ReadOptions read_opts; - // Use legacy, implicit prefix seek - read_opts.total_order_seek = false; - read_opts.auto_prefix_mode = false; - std::unique_ptr it(db_->NewIterator(read_opts)); - for (size_t i = 0; i < kNumKeys; ++i) { - // Seek with a key after each one added but with same prefix. One will - // surely cross a partition boundary. - std::ostringstream oss; - oss << std::setfill('0') << std::setw(4) << std::fixed << i << "a"; - it->SeekForPrev(oss.str()); - ASSERT_OK(it->status()); - ASSERT_TRUE(it->Valid()); - } - it.reset(); -} - -namespace { -class BackwardBytewiseComparator : public Comparator { - public: - const char* Name() const override { return "BackwardBytewiseComparator"; } - - int Compare(const Slice& a, const Slice& b) const override { - int min_size_neg = -static_cast(std::min(a.size(), b.size())); - const char* a_end = a.data() + a.size(); - const char* b_end = b.data() + b.size(); - for (int i = -1; i >= min_size_neg; --i) { - if (a_end[i] != b_end[i]) { - if (static_cast(a_end[i]) < - static_cast(b_end[i])) { - return -1; - } else { - return 1; - } - } - } - return static_cast(a.size()) - static_cast(b.size()); - } - - void FindShortestSeparator(std::string* /*start*/, - const Slice& /*limit*/) const override {} - - void FindShortSuccessor(std::string* /*key*/) const override {} -}; - -const BackwardBytewiseComparator kBackwardBytewiseComparator{}; - -class FixedSuffix4Transform : public SliceTransform { - const char* Name() const override { return "FixedSuffixTransform"; } - - Slice Transform(const Slice& src) const override { - return Slice(src.data() + src.size() - 4, 4); - } - - bool InDomain(const Slice& src) const override { return src.size() >= 4; } -}; - -std::pair GetBloomStat(const Options& options, bool sst) { - if (sst) { - return { - options.statistics->getAndResetTickerCount(BLOOM_FILTER_PREFIX_CHECKED), - options.statistics->getAndResetTickerCount(BLOOM_FILTER_PREFIX_USEFUL)}; - } else { - auto hit = std::exchange(get_perf_context()->bloom_memtable_hit_count, 0); - auto miss = std::exchange(get_perf_context()->bloom_memtable_miss_count, 0); - return {hit + miss, miss}; - } -} - -std::pair CheckedAndUseful(uint64_t checked, - uint64_t useful) { - return {checked, useful}; -} -} // anonymous namespace - -// This uses a prefix_extractor + comparator combination that violates -// one of the old obsolete, unnecessary axioms of prefix extraction: -// * key.starts_with(prefix(key)) -// This axiom is not really needed, and we validate that here. -TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter1) { - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); - bbto.whole_key_filtering = false; - - Options options = CurrentOptions(); - options.comparator = &kBackwardBytewiseComparator; - options.prefix_extractor = std::make_shared(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options.memtable_prefix_bloom_size_ratio = 0.1; - options.statistics = CreateDBStatistics(); - - DestroyAndReopen(options); - - ASSERT_OK(Put("321aaaa", "val1")); - ASSERT_OK(Put("112aaaa", "val2")); - ASSERT_OK(Put("009aaaa", "val3")); - ASSERT_OK(Put("baa", "val4")); // out of domain - ASSERT_OK(Put("321abaa", "val5")); - ASSERT_OK(Put("zzz", "val6")); // out of domain - - for (auto flushed : {false, true}) { - SCOPED_TRACE("flushed=" + std::to_string(flushed)); - if (flushed) { - ASSERT_OK(Flush()); - } - ReadOptions read_options; - if (flushed) { // TODO: support auto_prefix_mode in memtable? - read_options.auto_prefix_mode = true; - } - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); - { - Slice ub("999aaaa"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "aaaa"), 3); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); - } - { - Slice ub("999abaa"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "abaa"), 1); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); - } - { - Slice ub("999acaa"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "acaa"), 0); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 1)); - } - { - Slice ub("zzzz"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "baa"), 3); - if (flushed) { // TODO: fix memtable case - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); - } - } - } -} - -// This uses a prefix_extractor + comparator combination that violates -// one of the old obsolete, unnecessary axioms of prefix extraction: -// * Compare(prefix(key), key) <= 0 -// This axiom is not really needed, and we validate that here. -TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter2) { - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); - bbto.whole_key_filtering = false; - - Options options = CurrentOptions(); - options.comparator = ReverseBytewiseComparator(); - options.prefix_extractor.reset(NewFixedPrefixTransform(4)); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options.memtable_prefix_bloom_size_ratio = 0.1; - options.statistics = CreateDBStatistics(); - - DestroyAndReopen(options); - - ASSERT_OK(Put("aaaa123", "val1")); - ASSERT_OK(Put("aaaa211", "val2")); - ASSERT_OK(Put("aaaa900", "val3")); - ASSERT_OK(Put("aab", "val4")); // out of domain - ASSERT_OK(Put("aaba123", "val5")); - ASSERT_OK(Put("qqqq123", "val7")); - ASSERT_OK(Put("qqqq", "val8")); - ASSERT_OK(Put("zzz", "val8")); // out of domain - - for (auto flushed : {false, true}) { - SCOPED_TRACE("flushed=" + std::to_string(flushed)); - if (flushed) { - ASSERT_OK(Flush()); - } - ReadOptions read_options; - if (flushed) { // TODO: support auto_prefix_mode in memtable? - read_options.auto_prefix_mode = true; - } else { - // TODO: why needed? - get_perf_context()->bloom_memtable_hit_count = 0; - get_perf_context()->bloom_memtable_miss_count = 0; - } - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); - { - Slice ub("aaaa000"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "aaaa999"), 3); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); - } - { - // Note: prefix does work as upper bound - Slice ub("aaaa"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "aaaa999"), 3); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); - } - { - // Note: prefix does not work here as seek key - Slice ub("aaaa500"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "aaaa"), 0); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); - } - { - Slice ub("aaba000"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "aaba999"), 1); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); - } - { - Slice ub("aaca000"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "aaca999"), 0); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 1)); - } - { - Slice ub("aaaz"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "zzz"), 5); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); - } - { - // Note: prefix does work here as seek key, but only finds key equal - // to prefix (others with same prefix are less) - read_options.auto_prefix_mode = false; - read_options.iterate_upper_bound = nullptr; - read_options.prefix_same_as_start = true; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "qqqq"), 1); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); - } - } -} - -namespace { -// A weird comparator that in combination with NonIdempotentFixed4Transform -// breaks an old axiom of prefix filtering. -class WeirdComparator : public Comparator { - public: - const char* Name() const override { return "WeirdComparator"; } - - int Compare(const Slice& a, const Slice& b) const override { - bool a_in = a.size() >= 5; - bool b_in = b.size() >= 5; - if (a_in != b_in) { - // Order keys after prefixes - return a_in - b_in; - } - if (a_in) { - return BytewiseComparator()->Compare(a, b); - } else { - // Different ordering on the prefixes - return ReverseBytewiseComparator()->Compare(a, b); - } - } - - void FindShortestSeparator(std::string* /*start*/, - const Slice& /*limit*/) const override {} - - void FindShortSuccessor(std::string* /*key*/) const override {} -}; -const WeirdComparator kWeirdComparator{}; - -// Non-idempotentent because prefix is always 4 bytes, but this is -// out-of-domain for keys to be assigned prefixes (>= 5 bytes) -class NonIdempotentFixed4Transform : public SliceTransform { - const char* Name() const override { return "NonIdempotentFixed4Transform"; } - - Slice Transform(const Slice& src) const override { - return Slice(src.data(), 4); - } - - bool InDomain(const Slice& src) const override { return src.size() >= 5; } -}; -} // anonymous namespace - -// This uses a prefix_extractor + comparator combination that violates -// two of the old obsolete, unnecessary axioms of prefix extraction: -// * prefix(prefix(key)) == prefix(key) -// * If Compare(k1, k2) <= 0, then Compare(prefix(k1), prefix(k2)) <= 0 -// This axiom is not really needed, and we validate that here. -TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter3) { - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); - bbto.whole_key_filtering = false; - - Options options = CurrentOptions(); - options.prefix_extractor = std::make_shared(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options.memtable_prefix_bloom_size_ratio = 0.1; - options.statistics = CreateDBStatistics(); - - for (auto weird_comparator : {false, true}) { - if (weird_comparator) { - options.comparator = &kWeirdComparator; - } - DestroyAndReopen(options); - - ASSERT_OK(Put("aaaa123", "val1")); - ASSERT_OK(Put("aaaa211", "val2")); - ASSERT_OK(Put("aaaa900", "val3")); - ASSERT_OK(Put("aab", "val4")); // out of domain - ASSERT_OK(Put("aaba123", "val5")); - ASSERT_OK(Put("qqqq123", "val7")); - ASSERT_OK(Put("qqqq", "val8")); // out of domain - ASSERT_OK(Put("zzzz", "val8")); // out of domain - - for (auto flushed : {false, true}) { - SCOPED_TRACE("flushed=" + std::to_string(flushed)); - if (flushed) { - ASSERT_OK(Flush()); - } - ReadOptions read_options; - if (flushed) { // TODO: support auto_prefix_mode in memtable? - read_options.auto_prefix_mode = true; - } else { - // TODO: why needed? - get_perf_context()->bloom_memtable_hit_count = 0; - get_perf_context()->bloom_memtable_miss_count = 0; - } - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); - { - Slice ub("aaaa999"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "aaaa000"), 3); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); - } - { - // Note: prefix as seek key is not bloom-optimized - // Note: the count works with weird_comparator because "aaaa" is - // ordered as the last of the prefixes - Slice ub("aaaa999"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "aaaa"), 3); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); - } - { - Slice ub("aaba9"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "aaba0"), 1); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); - } - { - Slice ub("aaca9"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "aaca0"), 0); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 1)); - } - { - Slice ub("qqqq9"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "qqqq0"), 1); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(1, 0)); - } - { - // Note: prefix as seek key is not bloom-optimized - Slice ub("qqqq9"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "qqqq"), weird_comparator ? 7 : 2); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); - } - { - // Note: prefix as seek key is not bloom-optimized - Slice ub("zzzz9"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "zzzz"), weird_comparator ? 8 : 1); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); - } - { - Slice ub("zzzz9"); - read_options.iterate_upper_bound = &ub; - std::unique_ptr iter(db_->NewIterator(read_options)); - EXPECT_EQ(CountIter(iter, "aab"), weird_comparator ? 6 : 5); - EXPECT_EQ(GetBloomStat(options, flushed), CheckedAndUseful(0, 0)); - } - } - } -} - - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_compaction_filter_test.cc b/db/db_compaction_filter_test.cc deleted file mode 100644 index 0b3f3dedc..000000000 --- a/db/db_compaction_filter_test.cc +++ /dev/null @@ -1,1030 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/db_test_util.h" -#include "port/stack_trace.h" - -namespace ROCKSDB_NAMESPACE { - -static int cfilter_count = 0; -static int cfilter_skips = 0; - -// This is a static filter used for filtering -// kvs during the compaction process. -static std::string NEW_VALUE = "NewValue"; - -class DBTestCompactionFilter : public DBTestBase { - public: - DBTestCompactionFilter() - : DBTestBase("db_compaction_filter_test", /*env_do_fsync=*/true) {} -}; - -// Param variant of DBTestBase::ChangeCompactOptions -class DBTestCompactionFilterWithCompactParam - : public DBTestCompactionFilter, - public ::testing::WithParamInterface { - public: - DBTestCompactionFilterWithCompactParam() : DBTestCompactionFilter() { - option_config_ = GetParam(); - Destroy(last_options_); - auto options = CurrentOptions(); - if (option_config_ == kDefault || option_config_ == kUniversalCompaction || - option_config_ == kUniversalCompactionMultiLevel) { - options.create_if_missing = true; - } - if (option_config_ == kLevelSubcompactions || - option_config_ == kUniversalSubcompactions) { - assert(options.max_subcompactions > 1); - } - Reopen(options); - } -}; - -#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -INSTANTIATE_TEST_CASE_P( - CompactionFilterWithOption, DBTestCompactionFilterWithCompactParam, - ::testing::Values(DBTestBase::OptionConfig::kDefault, - DBTestBase::OptionConfig::kUniversalCompaction, - DBTestBase::OptionConfig::kUniversalCompactionMultiLevel, - DBTestBase::OptionConfig::kLevelSubcompactions, - DBTestBase::OptionConfig::kUniversalSubcompactions)); -#else -// Run fewer cases in non-full valgrind to save time. -INSTANTIATE_TEST_CASE_P(CompactionFilterWithOption, - DBTestCompactionFilterWithCompactParam, - ::testing::Values(DBTestBase::OptionConfig::kDefault)); -#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) - -class KeepFilter : public CompactionFilter { - public: - bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, - std::string* /*new_value*/, - bool* /*value_changed*/) const override { - cfilter_count++; - return false; - } - - const char* Name() const override { return "KeepFilter"; } -}; - -class DeleteFilter : public CompactionFilter { - public: - bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, - std::string* /*new_value*/, - bool* /*value_changed*/) const override { - cfilter_count++; - return true; - } - - bool FilterMergeOperand(int /*level*/, const Slice& /*key*/, - const Slice& /*operand*/) const override { - return true; - } - - const char* Name() const override { return "DeleteFilter"; } -}; - -class DeleteISFilter : public CompactionFilter { - public: - bool Filter(int /*level*/, const Slice& key, const Slice& /*value*/, - std::string* /*new_value*/, - bool* /*value_changed*/) const override { - cfilter_count++; - int i = std::stoi(key.ToString()); - if (i > 5 && i <= 105) { - return true; - } - return false; - } - - bool IgnoreSnapshots() const override { return true; } - - const char* Name() const override { return "DeleteFilter"; } -}; - -// Skip x if floor(x/10) is even, use range skips. Requires that keys are -// zero-padded to length 10. -class SkipEvenFilter : public CompactionFilter { - public: - Decision FilterV2(int /*level*/, const Slice& key, ValueType /*value_type*/, - const Slice& /*existing_value*/, std::string* /*new_value*/, - std::string* skip_until) const override { - cfilter_count++; - int i = std::stoi(key.ToString()); - if (i / 10 % 2 == 0) { - char key_str[100]; - snprintf(key_str, sizeof(key_str), "%010d", i / 10 * 10 + 10); - *skip_until = key_str; - ++cfilter_skips; - return Decision::kRemoveAndSkipUntil; - } - return Decision::kKeep; - } - - bool IgnoreSnapshots() const override { return true; } - - const char* Name() const override { return "DeleteFilter"; } -}; - -class ConditionalFilter : public CompactionFilter { - public: - explicit ConditionalFilter(const std::string* filtered_value) - : filtered_value_(filtered_value) {} - bool Filter(int /*level*/, const Slice& /*key*/, const Slice& value, - std::string* /*new_value*/, - bool* /*value_changed*/) const override { - return value.ToString() == *filtered_value_; - } - - const char* Name() const override { return "ConditionalFilter"; } - - private: - const std::string* filtered_value_; -}; - -class ChangeFilter : public CompactionFilter { - public: - explicit ChangeFilter() {} - - bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, - std::string* new_value, bool* value_changed) const override { - assert(new_value != nullptr); - *new_value = NEW_VALUE; - *value_changed = true; - return false; - } - - const char* Name() const override { return "ChangeFilter"; } -}; - -class KeepFilterFactory : public CompactionFilterFactory { - public: - explicit KeepFilterFactory(bool check_context = false, - bool check_context_cf_id = false) - : check_context_(check_context), - check_context_cf_id_(check_context_cf_id), - compaction_filter_created_(false) {} - - std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& context) override { - if (check_context_) { - EXPECT_EQ(expect_full_compaction_.load(), context.is_full_compaction); - EXPECT_EQ(expect_manual_compaction_.load(), context.is_manual_compaction); - } - if (check_context_cf_id_) { - EXPECT_EQ(expect_cf_id_.load(), context.column_family_id); - } - compaction_filter_created_ = true; - return std::unique_ptr(new KeepFilter()); - } - - bool compaction_filter_created() const { return compaction_filter_created_; } - - const char* Name() const override { return "KeepFilterFactory"; } - bool check_context_; - bool check_context_cf_id_; - std::atomic_bool expect_full_compaction_; - std::atomic_bool expect_manual_compaction_; - std::atomic expect_cf_id_; - bool compaction_filter_created_; -}; - -// This filter factory is configured with a `TableFileCreationReason`. Only -// table files created for that reason will undergo filtering. This -// configurability makes it useful to tests for filtering non-compaction table -// files, such as "CompactionFilterFlush" and "CompactionFilterRecovery". -class DeleteFilterFactory : public CompactionFilterFactory { - public: - explicit DeleteFilterFactory(TableFileCreationReason reason) - : reason_(reason) {} - - std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& context) override { - EXPECT_EQ(reason_, context.reason); - if (context.reason == TableFileCreationReason::kCompaction && - !context.is_manual_compaction) { - // Table files created by automatic compaction do not undergo filtering. - // Presumably some tests rely on this. - return std::unique_ptr(nullptr); - } - return std::unique_ptr(new DeleteFilter()); - } - - bool ShouldFilterTableFileCreation( - TableFileCreationReason reason) const override { - return reason_ == reason; - } - - const char* Name() const override { return "DeleteFilterFactory"; } - - private: - const TableFileCreationReason reason_; -}; - -// Delete Filter Factory which ignores snapshots -class DeleteISFilterFactory : public CompactionFilterFactory { - public: - std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& context) override { - if (context.is_manual_compaction) { - return std::unique_ptr(new DeleteISFilter()); - } else { - return std::unique_ptr(nullptr); - } - } - - const char* Name() const override { return "DeleteFilterFactory"; } -}; - -class SkipEvenFilterFactory : public CompactionFilterFactory { - public: - std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& context) override { - if (context.is_manual_compaction) { - return std::unique_ptr(new SkipEvenFilter()); - } else { - return std::unique_ptr(nullptr); - } - } - - const char* Name() const override { return "SkipEvenFilterFactory"; } -}; - -class ConditionalFilterFactory : public CompactionFilterFactory { - public: - explicit ConditionalFilterFactory(const Slice& filtered_value) - : filtered_value_(filtered_value.ToString()) {} - - std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& /*context*/) override { - return std::unique_ptr( - new ConditionalFilter(&filtered_value_)); - } - - const char* Name() const override { return "ConditionalFilterFactory"; } - - private: - std::string filtered_value_; -}; - -class ChangeFilterFactory : public CompactionFilterFactory { - public: - explicit ChangeFilterFactory() {} - - std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& /*context*/) override { - return std::unique_ptr(new ChangeFilter()); - } - - const char* Name() const override { return "ChangeFilterFactory"; } -}; - -TEST_F(DBTestCompactionFilter, CompactionFilter) { - Options options = CurrentOptions(); - options.max_open_files = -1; - options.num_levels = 3; - options.compaction_filter_factory = std::make_shared(); - options = CurrentOptions(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Write 100K keys, these are written to a few files in L0. - const std::string value(10, 'x'); - for (int i = 0; i < 100000; i++) { - char key[100]; - snprintf(key, sizeof(key), "B%010d", i); - ASSERT_OK(Put(1, key, value)); - } - ASSERT_OK(Flush(1)); - - // Push all files to the highest level L2. Verify that - // the compaction is each level invokes the filter for - // all the keys in that level. - cfilter_count = 0; - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1])); - ASSERT_EQ(cfilter_count, 100000); - cfilter_count = 0; - ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1])); - ASSERT_EQ(cfilter_count, 100000); - - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); - ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0); - ASSERT_NE(NumTableFilesAtLevel(2, 1), 0); - cfilter_count = 0; - - // All the files are in the lowest level. - // Verify that all but the 100001st record - // has sequence number zero. The 100001st record - // is at the tip of this snapshot and cannot - // be zeroed out. - int count = 0; - int total = 0; - Arena arena; - { - InternalKeyComparator icmp(options.comparator); - ReadOptions read_options; - ScopedArenaIterator iter(dbfull()->NewInternalIterator( - read_options, &arena, kMaxSequenceNumber, handles_[1])); - iter->SeekToFirst(); - ASSERT_OK(iter->status()); - while (iter->Valid()) { - ParsedInternalKey ikey(Slice(), 0, kTypeValue); - ASSERT_OK(ParseInternalKey(iter->key(), &ikey, true /* log_err_key */)); - total++; - if (ikey.sequence != 0) { - count++; - } - iter->Next(); - } - ASSERT_OK(iter->status()); - } - ASSERT_EQ(total, 100000); - ASSERT_EQ(count, 0); - - // overwrite all the 100K keys once again. - for (int i = 0; i < 100000; i++) { - char key[100]; - snprintf(key, sizeof(key), "B%010d", i); - ASSERT_OK(Put(1, key, value)); - } - ASSERT_OK(Flush(1)); - - // push all files to the highest level L2. This - // means that all keys should pass at least once - // via the compaction filter - cfilter_count = 0; - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1])); - ASSERT_EQ(cfilter_count, 100000); - cfilter_count = 0; - ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1])); - ASSERT_EQ(cfilter_count, 100000); - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); - ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0); - ASSERT_NE(NumTableFilesAtLevel(2, 1), 0); - - // create a new database with the compaction - // filter in such a way that it deletes all keys - options.compaction_filter_factory = std::make_shared( - TableFileCreationReason::kCompaction); - options.create_if_missing = true; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // write all the keys once again. - for (int i = 0; i < 100000; i++) { - char key[100]; - snprintf(key, sizeof(key), "B%010d", i); - ASSERT_OK(Put(1, key, value)); - } - ASSERT_OK(Flush(1)); - ASSERT_NE(NumTableFilesAtLevel(0, 1), 0); - ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0); - ASSERT_EQ(NumTableFilesAtLevel(2, 1), 0); - - // Push all files to the highest level L2. This - // triggers the compaction filter to delete all keys, - // verify that at the end of the compaction process, - // nothing is left. - cfilter_count = 0; - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1])); - ASSERT_EQ(cfilter_count, 100000); - cfilter_count = 0; - ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1])); - ASSERT_EQ(cfilter_count, 0); - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); - ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0); - - { - // Scan the entire database to ensure that nothing is left - std::unique_ptr iter( - db_->NewIterator(ReadOptions(), handles_[1])); - iter->SeekToFirst(); - count = 0; - while (iter->Valid()) { - count++; - iter->Next(); - } - ASSERT_OK(iter->status()); - ASSERT_EQ(count, 0); - } - - // The sequence number of the remaining record - // is not zeroed out even though it is at the - // level Lmax because this record is at the tip - count = 0; - { - InternalKeyComparator icmp(options.comparator); - ReadOptions read_options; - ScopedArenaIterator iter(dbfull()->NewInternalIterator( - read_options, &arena, kMaxSequenceNumber, handles_[1])); - iter->SeekToFirst(); - ASSERT_OK(iter->status()); - while (iter->Valid()) { - ParsedInternalKey ikey(Slice(), 0, kTypeValue); - ASSERT_OK(ParseInternalKey(iter->key(), &ikey, true /* log_err_key */)); - ASSERT_NE(ikey.sequence, (unsigned)0); - count++; - iter->Next(); - } - ASSERT_EQ(count, 0); - } -} - -// Tests the edge case where compaction does not produce any output -- all -// entries are deleted. The compaction should create bunch of 'DeleteFile' -// entries in VersionEdit, but none of the 'AddFile's. -TEST_F(DBTestCompactionFilter, CompactionFilterDeletesAll) { - Options options = CurrentOptions(); - options.compaction_filter_factory = std::make_shared( - TableFileCreationReason::kCompaction); - options.disable_auto_compactions = true; - options.create_if_missing = true; - DestroyAndReopen(options); - - // put some data - for (int table = 0; table < 4; ++table) { - for (int i = 0; i < 10 + table; ++i) { - ASSERT_OK(Put(std::to_string(table * 100 + i), "val")); - } - ASSERT_OK(Flush()); - } - - // this will produce empty file (delete compaction filter) - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(0U, CountLiveFiles()); - - Reopen(options); - - Iterator* itr = db_->NewIterator(ReadOptions()); - itr->SeekToFirst(); - ASSERT_OK(itr->status()); - // empty db - ASSERT_TRUE(!itr->Valid()); - - delete itr; -} - -TEST_F(DBTestCompactionFilter, CompactionFilterFlush) { - // Tests a `CompactionFilterFactory` that filters when table file is created - // by flush. - Options options = CurrentOptions(); - options.compaction_filter_factory = - std::make_shared(TableFileCreationReason::kFlush); - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - Reopen(options); - - // Puts and Merges are purged in flush. - ASSERT_OK(Put("a", "v")); - ASSERT_OK(Merge("b", "v")); - ASSERT_OK(Flush()); - ASSERT_EQ("NOT_FOUND", Get("a")); - ASSERT_EQ("NOT_FOUND", Get("b")); - - // However, Puts and Merges are preserved by recovery. - ASSERT_OK(Put("a", "v")); - ASSERT_OK(Merge("b", "v")); - Reopen(options); - ASSERT_EQ("v", Get("a")); - ASSERT_EQ("v", Get("b")); - - // Likewise, compaction does not apply filtering. - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("v", Get("a")); - ASSERT_EQ("v", Get("b")); -} - -TEST_F(DBTestCompactionFilter, CompactionFilterRecovery) { - // Tests a `CompactionFilterFactory` that filters when table file is created - // by recovery. - Options options = CurrentOptions(); - options.compaction_filter_factory = - std::make_shared(TableFileCreationReason::kRecovery); - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - Reopen(options); - - // Puts and Merges are purged in recovery. - ASSERT_OK(Put("a", "v")); - ASSERT_OK(Merge("b", "v")); - Reopen(options); - ASSERT_EQ("NOT_FOUND", Get("a")); - ASSERT_EQ("NOT_FOUND", Get("b")); - - // However, Puts and Merges are preserved by flush. - ASSERT_OK(Put("a", "v")); - ASSERT_OK(Merge("b", "v")); - ASSERT_OK(Flush()); - ASSERT_EQ("v", Get("a")); - ASSERT_EQ("v", Get("b")); - - // Likewise, compaction does not apply filtering. - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("v", Get("a")); - ASSERT_EQ("v", Get("b")); -} - -TEST_P(DBTestCompactionFilterWithCompactParam, - CompactionFilterWithValueChange) { - Options options = CurrentOptions(); - options.num_levels = 3; - options.compaction_filter_factory = std::make_shared(); - CreateAndReopenWithCF({"pikachu"}, options); - - // Write 100K+1 keys, these are written to a few files - // in L0. We do this so that the current snapshot points - // to the 100001 key.The compaction filter is not invoked - // on keys that are visible via a snapshot because we - // anyways cannot delete it. - const std::string value(10, 'x'); - for (int i = 0; i < 100001; i++) { - char key[100]; - snprintf(key, sizeof(key), "B%010d", i); - ASSERT_OK(Put(1, key, value)); - } - - // push all files to lower levels - ASSERT_OK(Flush(1)); - if (option_config_ != kUniversalCompactionMultiLevel && - option_config_ != kUniversalSubcompactions) { - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1])); - ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1])); - } else { - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], - nullptr, nullptr)); - } - - // re-write all data again - for (int i = 0; i < 100001; i++) { - char key[100]; - snprintf(key, sizeof(key), "B%010d", i); - ASSERT_OK(Put(1, key, value)); - } - - // push all files to lower levels. This should - // invoke the compaction filter for all 100000 keys. - ASSERT_OK(Flush(1)); - if (option_config_ != kUniversalCompactionMultiLevel && - option_config_ != kUniversalSubcompactions) { - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1])); - ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1])); - } else { - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], - nullptr, nullptr)); - } - - // verify that all keys now have the new value that - // was set by the compaction process. - for (int i = 0; i < 100001; i++) { - char key[100]; - snprintf(key, sizeof(key), "B%010d", i); - std::string newvalue = Get(1, key); - ASSERT_EQ(newvalue.compare(NEW_VALUE), 0); - } -} - -TEST_F(DBTestCompactionFilter, CompactionFilterWithMergeOperator) { - std::string one, two, three, four; - PutFixed64(&one, 1); - PutFixed64(&two, 2); - PutFixed64(&three, 3); - PutFixed64(&four, 4); - - Options options = CurrentOptions(); - options.create_if_missing = true; - options.merge_operator = MergeOperators::CreateUInt64AddOperator(); - options.num_levels = 3; - // Filter out keys with value is 2. - options.compaction_filter_factory = - std::make_shared(two); - DestroyAndReopen(options); - - // In the same compaction, a value type needs to be deleted based on - // compaction filter, and there is a merge type for the key. compaction - // filter result is ignored. - ASSERT_OK(db_->Put(WriteOptions(), "foo", two)); - ASSERT_OK(Flush()); - ASSERT_OK(db_->Merge(WriteOptions(), "foo", one)); - ASSERT_OK(Flush()); - std::string newvalue = Get("foo"); - ASSERT_EQ(newvalue, three); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - newvalue = Get("foo"); - ASSERT_EQ(newvalue, three); - - // value key can be deleted based on compaction filter, leaving only - // merge keys. - ASSERT_OK(db_->Put(WriteOptions(), "bar", two)); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - newvalue = Get("bar"); - ASSERT_EQ("NOT_FOUND", newvalue); - ASSERT_OK(db_->Merge(WriteOptions(), "bar", two)); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - newvalue = Get("bar"); - ASSERT_EQ(two, two); - - // Compaction filter never applies to merge keys. - ASSERT_OK(db_->Put(WriteOptions(), "foobar", one)); - ASSERT_OK(Flush()); - ASSERT_OK(db_->Merge(WriteOptions(), "foobar", two)); - ASSERT_OK(Flush()); - newvalue = Get("foobar"); - ASSERT_EQ(newvalue, three); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - newvalue = Get("foobar"); - ASSERT_EQ(newvalue, three); - - // In the same compaction, both of value type and merge type keys need to be - // deleted based on compaction filter, and there is a merge type for the key. - // For both keys, compaction filter results are ignored. - ASSERT_OK(db_->Put(WriteOptions(), "barfoo", two)); - ASSERT_OK(Flush()); - ASSERT_OK(db_->Merge(WriteOptions(), "barfoo", two)); - ASSERT_OK(Flush()); - newvalue = Get("barfoo"); - ASSERT_EQ(newvalue, four); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - newvalue = Get("barfoo"); - ASSERT_EQ(newvalue, four); -} - -TEST_F(DBTestCompactionFilter, CompactionFilterContextManual) { - KeepFilterFactory* filter = new KeepFilterFactory(true, true); - - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.compaction_filter_factory.reset(filter); - options.compression = kNoCompression; - options.level0_file_num_compaction_trigger = 8; - Reopen(options); - int num_keys_per_file = 400; - for (int j = 0; j < 3; j++) { - // Write several keys. - const std::string value(10, 'x'); - for (int i = 0; i < num_keys_per_file; i++) { - char key[100]; - snprintf(key, sizeof(key), "B%08d%02d", i, j); - ASSERT_OK(Put(key, value)); - } - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - // Make sure next file is much smaller so automatic compaction will not - // be triggered. - num_keys_per_file /= 2; - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // Force a manual compaction - cfilter_count = 0; - filter->expect_manual_compaction_.store(true); - filter->expect_full_compaction_.store(true); - filter->expect_cf_id_.store(0); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(cfilter_count, 700); - ASSERT_EQ(NumSortedRuns(0), 1); - ASSERT_TRUE(filter->compaction_filter_created()); - - // Verify total number of keys is correct after manual compaction. - { - int count = 0; - int total = 0; - Arena arena; - InternalKeyComparator icmp(options.comparator); - ReadOptions read_options; - ScopedArenaIterator iter(dbfull()->NewInternalIterator(read_options, &arena, - kMaxSequenceNumber)); - iter->SeekToFirst(); - ASSERT_OK(iter->status()); - while (iter->Valid()) { - ParsedInternalKey ikey(Slice(), 0, kTypeValue); - ASSERT_OK(ParseInternalKey(iter->key(), &ikey, true /* log_err_key */)); - total++; - if (ikey.sequence != 0) { - count++; - } - iter->Next(); - } - ASSERT_EQ(total, 700); - ASSERT_EQ(count, 0); - } -} - -TEST_F(DBTestCompactionFilter, CompactionFilterContextCfId) { - KeepFilterFactory* filter = new KeepFilterFactory(false, true); - filter->expect_cf_id_.store(1); - - Options options = CurrentOptions(); - options.compaction_filter_factory.reset(filter); - options.compression = kNoCompression; - options.level0_file_num_compaction_trigger = 2; - CreateAndReopenWithCF({"pikachu"}, options); - - int num_keys_per_file = 400; - for (int j = 0; j < 3; j++) { - // Write several keys. - const std::string value(10, 'x'); - for (int i = 0; i < num_keys_per_file; i++) { - char key[100]; - snprintf(key, sizeof(key), "B%08d%02d", i, j); - ASSERT_OK(Put(1, key, value)); - } - ASSERT_OK(Flush(1)); - // Make sure next file is much smaller so automatic compaction will not - // be triggered. - num_keys_per_file /= 2; - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_TRUE(filter->compaction_filter_created()); -} - -// Compaction filters aplies to all records, regardless snapshots. -TEST_F(DBTestCompactionFilter, CompactionFilterIgnoreSnapshot) { - std::string five = std::to_string(5); - Options options = CurrentOptions(); - options.compaction_filter_factory = std::make_shared(); - options.disable_auto_compactions = true; - options.create_if_missing = true; - DestroyAndReopen(options); - - // Put some data. - const Snapshot* snapshot = nullptr; - for (int table = 0; table < 4; ++table) { - for (int i = 0; i < 10; ++i) { - ASSERT_OK(Put(std::to_string(table * 100 + i), "val")); - } - ASSERT_OK(Flush()); - - if (table == 0) { - snapshot = db_->GetSnapshot(); - } - } - assert(snapshot != nullptr); - - cfilter_count = 0; - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - // The filter should delete 40 records. - ASSERT_EQ(40, cfilter_count); - - { - // Scan the entire database as of the snapshot to ensure - // that nothing is left - ReadOptions read_options; - read_options.snapshot = snapshot; - std::unique_ptr iter(db_->NewIterator(read_options)); - iter->SeekToFirst(); - ASSERT_OK(iter->status()); - int count = 0; - while (iter->Valid()) { - count++; - iter->Next(); - } - ASSERT_EQ(count, 6); - read_options.snapshot = nullptr; - std::unique_ptr iter1(db_->NewIterator(read_options)); - ASSERT_OK(iter1->status()); - iter1->SeekToFirst(); - count = 0; - while (iter1->Valid()) { - count++; - iter1->Next(); - } - // We have deleted 10 keys from 40 using the compaction filter - // Keys 6-9 before the snapshot and 100-105 after the snapshot - ASSERT_EQ(count, 30); - } - - // Release the snapshot and compact again -> now all records should be - // removed. - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBTestCompactionFilter, SkipUntil) { - Options options = CurrentOptions(); - options.compaction_filter_factory = std::make_shared(); - options.disable_auto_compactions = true; - options.create_if_missing = true; - DestroyAndReopen(options); - - // Write 100K keys, these are written to a few files in L0. - for (int table = 0; table < 4; ++table) { - // Key ranges in tables are [0, 38], [106, 149], [212, 260], [318, 371]. - for (int i = table * 6; i < 39 + table * 11; ++i) { - char key[100]; - snprintf(key, sizeof(key), "%010d", table * 100 + i); - ASSERT_OK(Put(key, std::to_string(table * 1000 + i))); - } - ASSERT_OK(Flush()); - } - - cfilter_skips = 0; - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - // Number of skips in tables: 2, 3, 3, 3. - ASSERT_EQ(11, cfilter_skips); - - for (int table = 0; table < 4; ++table) { - for (int i = table * 6; i < 39 + table * 11; ++i) { - int k = table * 100 + i; - char key[100]; - snprintf(key, sizeof(key), "%010d", table * 100 + i); - auto expected = std::to_string(table * 1000 + i); - std::string val; - Status s = db_->Get(ReadOptions(), key, &val); - if (k / 10 % 2 == 0) { - ASSERT_TRUE(s.IsNotFound()); - } else { - ASSERT_OK(s); - ASSERT_EQ(expected, val); - } - } - } -} - -TEST_F(DBTestCompactionFilter, SkipUntilWithBloomFilter) { - BlockBasedTableOptions table_options; - table_options.whole_key_filtering = false; - table_options.filter_policy.reset(NewBloomFilterPolicy(100, false)); - - Options options = CurrentOptions(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.prefix_extractor.reset(NewCappedPrefixTransform(9)); - options.compaction_filter_factory = std::make_shared(); - options.disable_auto_compactions = true; - options.create_if_missing = true; - DestroyAndReopen(options); - - ASSERT_OK(Put("0000000010", "v10")); - ASSERT_OK(Put("0000000020", "v20")); // skipped - ASSERT_OK(Put("0000000050", "v50")); - ASSERT_OK(Flush()); - - cfilter_skips = 0; - EXPECT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - EXPECT_EQ(1, cfilter_skips); - - Status s; - std::string val; - - s = db_->Get(ReadOptions(), "0000000010", &val); - ASSERT_OK(s); - EXPECT_EQ("v10", val); - - s = db_->Get(ReadOptions(), "0000000020", &val); - EXPECT_TRUE(s.IsNotFound()); - - s = db_->Get(ReadOptions(), "0000000050", &val); - ASSERT_OK(s); - EXPECT_EQ("v50", val); -} - -class TestNotSupportedFilter : public CompactionFilter { - public: - bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, - std::string* /*new_value*/, - bool* /*value_changed*/) const override { - return true; - } - - const char* Name() const override { return "NotSupported"; } - bool IgnoreSnapshots() const override { return false; } -}; - -TEST_F(DBTestCompactionFilter, IgnoreSnapshotsFalse) { - Options options = CurrentOptions(); - options.compaction_filter = new TestNotSupportedFilter(); - DestroyAndReopen(options); - - ASSERT_OK(Put("a", "v10")); - ASSERT_OK(Put("z", "v20")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("a", "v10")); - ASSERT_OK(Put("z", "v20")); - ASSERT_OK(Flush()); - - // Comapction should fail because IgnoreSnapshots() = false - EXPECT_TRUE(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr) - .IsNotSupported()); - - delete options.compaction_filter; -} - -class TestNotSupportedFilterFactory : public CompactionFilterFactory { - public: - explicit TestNotSupportedFilterFactory(TableFileCreationReason reason) - : reason_(reason) {} - - bool ShouldFilterTableFileCreation( - TableFileCreationReason reason) const override { - return reason_ == reason; - } - - std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& /* context */) override { - return std::unique_ptr(new TestNotSupportedFilter()); - } - - const char* Name() const override { return "TestNotSupportedFilterFactory"; } - - private: - const TableFileCreationReason reason_; -}; - -TEST_F(DBTestCompactionFilter, IgnoreSnapshotsFalseDuringFlush) { - Options options = CurrentOptions(); - options.compaction_filter_factory = - std::make_shared( - TableFileCreationReason::kFlush); - Reopen(options); - - ASSERT_OK(Put("a", "v10")); - ASSERT_TRUE(Flush().IsNotSupported()); -} - -TEST_F(DBTestCompactionFilter, IgnoreSnapshotsFalseRecovery) { - Options options = CurrentOptions(); - options.compaction_filter_factory = - std::make_shared( - TableFileCreationReason::kRecovery); - Reopen(options); - - ASSERT_OK(Put("a", "v10")); - ASSERT_TRUE(TryReopen(options).IsNotSupported()); -} - -TEST_F(DBTestCompactionFilter, DropKeyWithSingleDelete) { - Options options = GetDefaultOptions(); - options.create_if_missing = true; - - Reopen(options); - - ASSERT_OK(Put("a", "v0")); - ASSERT_OK(Put("b", "v0")); - const Snapshot* snapshot = db_->GetSnapshot(); - - ASSERT_OK(SingleDelete("b")); - ASSERT_OK(Flush()); - - { - CompactRangeOptions cro; - cro.change_level = true; - cro.target_level = options.num_levels - 1; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - } - - db_->ReleaseSnapshot(snapshot); - Close(); - - class DeleteFilterV2 : public CompactionFilter { - public: - Decision FilterV2(int /*level*/, const Slice& key, ValueType /*value_type*/, - const Slice& /*existing_value*/, - std::string* /*new_value*/, - std::string* /*skip_until*/) const override { - if (key.starts_with("b")) { - return Decision::kPurge; - } - return Decision::kRemove; - } - - const char* Name() const override { return "DeleteFilterV2"; } - } delete_filter_v2; - - options.compaction_filter = &delete_filter_v2; - options.level0_file_num_compaction_trigger = 2; - Reopen(options); - - ASSERT_OK(Put("b", "v1")); - ASSERT_OK(Put("x", "v1")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("r", "v1")); - ASSERT_OK(Put("z", "v1")); - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - Close(); - - options.compaction_filter = nullptr; - Reopen(options); - ASSERT_OK(SingleDelete("b")); - ASSERT_OK(Flush()); - { - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc deleted file mode 100644 index 55852aacd..000000000 --- a/db/db_compaction_test.cc +++ /dev/null @@ -1,9118 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include - -#include "compaction/compaction_picker_universal.h" -#include "db/blob/blob_index.h" -#include "db/db_test_util.h" -#include "db/dbformat.h" -#include "env/mock_env.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/concurrent_task_limiter.h" -#include "rocksdb/experimental.h" -#include "rocksdb/sst_file_writer.h" -#include "rocksdb/utilities/convenience.h" -#include "test_util/sync_point.h" -#include "test_util/testutil.h" -#include "util/concurrent_task_limiter_impl.h" -#include "util/random.h" -#include "utilities/fault_injection_env.h" -#include "utilities/fault_injection_fs.h" - -namespace ROCKSDB_NAMESPACE { - -// SYNC_POINT is not supported in released Windows mode. - -class CompactionStatsCollector : public EventListener { - public: - CompactionStatsCollector() - : compaction_completed_( - static_cast(CompactionReason::kNumOfReasons)) { - for (auto& v : compaction_completed_) { - v.store(0); - } - } - - ~CompactionStatsCollector() override {} - - void OnCompactionCompleted(DB* /* db */, - const CompactionJobInfo& info) override { - int k = static_cast(info.compaction_reason); - int num_of_reasons = static_cast(CompactionReason::kNumOfReasons); - assert(k >= 0 && k < num_of_reasons); - compaction_completed_[k]++; - } - - void OnExternalFileIngested( - DB* /* db */, const ExternalFileIngestionInfo& /* info */) override { - int k = static_cast(CompactionReason::kExternalSstIngestion); - compaction_completed_[k]++; - } - - void OnFlushCompleted(DB* /* db */, const FlushJobInfo& /* info */) override { - int k = static_cast(CompactionReason::kFlush); - compaction_completed_[k]++; - } - - int NumberOfCompactions(CompactionReason reason) const { - int num_of_reasons = static_cast(CompactionReason::kNumOfReasons); - int k = static_cast(reason); - assert(k >= 0 && k < num_of_reasons); - return compaction_completed_.at(k).load(); - } - - private: - std::vector> compaction_completed_; -}; - -class DBCompactionTest : public DBTestBase { - public: - DBCompactionTest() - : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) {} - - protected: - /* - * Verifies compaction stats of cfd are valid. - * - * For each level of cfd, its compaction stats are valid if - * 1) sum(stat.counts) == stat.count, and - * 2) stat.counts[i] == collector.NumberOfCompactions(i) - */ - void VerifyCompactionStats(ColumnFamilyData& cfd, - const CompactionStatsCollector& collector) { -#ifndef NDEBUG - InternalStats* internal_stats_ptr = cfd.internal_stats(); - ASSERT_NE(internal_stats_ptr, nullptr); - const std::vector& comp_stats = - internal_stats_ptr->TEST_GetCompactionStats(); - const int num_of_reasons = - static_cast(CompactionReason::kNumOfReasons); - std::vector counts(num_of_reasons, 0); - // Count the number of compactions caused by each CompactionReason across - // all levels. - for (const auto& stat : comp_stats) { - int sum = 0; - for (int i = 0; i < num_of_reasons; i++) { - counts[i] += stat.counts[i]; - sum += stat.counts[i]; - } - ASSERT_EQ(sum, stat.count); - } - // Verify InternalStats bookkeeping matches that of - // CompactionStatsCollector, assuming that all compactions complete. - for (int i = 0; i < num_of_reasons; i++) { - ASSERT_EQ(collector.NumberOfCompactions(static_cast(i)), - counts[i]); - } -#endif /* NDEBUG */ - } -}; - -class DBCompactionTestWithParam - : public DBTestBase, - public testing::WithParamInterface> { - public: - DBCompactionTestWithParam() - : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) { - max_subcompactions_ = std::get<0>(GetParam()); - exclusive_manual_compaction_ = std::get<1>(GetParam()); - } - - // Required if inheriting from testing::WithParamInterface<> - static void SetUpTestCase() {} - static void TearDownTestCase() {} - - uint32_t max_subcompactions_; - bool exclusive_manual_compaction_; -}; - -class DBCompactionTestWithBottommostParam - : public DBTestBase, - public testing::WithParamInterface { - public: - DBCompactionTestWithBottommostParam() - : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) { - bottommost_level_compaction_ = GetParam(); - } - - BottommostLevelCompaction bottommost_level_compaction_; -}; - -class DBCompactionDirectIOTest : public DBCompactionTest, - public ::testing::WithParamInterface { - public: - DBCompactionDirectIOTest() : DBCompactionTest() {} -}; - -// Param = true : target level is non-empty -// Param = false: level between target level and source level -// is not empty. -class ChangeLevelConflictsWithAuto - : public DBCompactionTest, - public ::testing::WithParamInterface { - public: - ChangeLevelConflictsWithAuto() : DBCompactionTest() {} -}; - -// Param = true: grab the compaction pressure token (enable -// parallel compactions) -// Param = false: Not grab the token (no parallel compactions) -class RoundRobinSubcompactionsAgainstPressureToken - : public DBCompactionTest, - public ::testing::WithParamInterface { - public: - RoundRobinSubcompactionsAgainstPressureToken() { - grab_pressure_token_ = GetParam(); - } - bool grab_pressure_token_; -}; - -class RoundRobinSubcompactionsAgainstResources - : public DBCompactionTest, - public ::testing::WithParamInterface> { - public: - RoundRobinSubcompactionsAgainstResources() { - total_low_pri_threads_ = std::get<0>(GetParam()); - max_compaction_limits_ = std::get<1>(GetParam()); - } - int total_low_pri_threads_; - int max_compaction_limits_; -}; - -namespace { -class FlushedFileCollector : public EventListener { - public: - FlushedFileCollector() {} - ~FlushedFileCollector() override {} - - void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override { - std::lock_guard lock(mutex_); - flushed_files_.push_back(info.file_path); - } - - std::vector GetFlushedFiles() { - std::lock_guard lock(mutex_); - std::vector result; - for (auto fname : flushed_files_) { - result.push_back(fname); - } - return result; - } - - void ClearFlushedFiles() { flushed_files_.clear(); } - - private: - std::vector flushed_files_; - std::mutex mutex_; -}; - -class SstStatsCollector : public EventListener { - public: - SstStatsCollector() : num_ssts_creation_started_(0) {} - - void OnTableFileCreationStarted( - const TableFileCreationBriefInfo& /* info */) override { - ++num_ssts_creation_started_; - } - - int num_ssts_creation_started() { return num_ssts_creation_started_; } - - private: - std::atomic num_ssts_creation_started_; -}; - -static const int kCDTValueSize = 1000; -static const int kCDTKeysPerBuffer = 4; -static const int kCDTNumLevels = 8; -Options DeletionTriggerOptions(Options options) { - options.compression = kNoCompression; - options.write_buffer_size = kCDTKeysPerBuffer * (kCDTValueSize + 24); - options.min_write_buffer_number_to_merge = 1; - options.max_write_buffer_size_to_maintain = 0; - options.num_levels = kCDTNumLevels; - options.level0_file_num_compaction_trigger = 1; - options.target_file_size_base = options.write_buffer_size * 2; - options.target_file_size_multiplier = 2; - options.max_bytes_for_level_base = - options.target_file_size_base * options.target_file_size_multiplier; - options.max_bytes_for_level_multiplier = 2; - options.disable_auto_compactions = false; - options.compaction_options_universal.max_size_amplification_percent = 100; - return options; -} - -bool HaveOverlappingKeyRanges(const Comparator* c, const SstFileMetaData& a, - const SstFileMetaData& b) { - if (c->CompareWithoutTimestamp(a.smallestkey, b.smallestkey) >= 0) { - if (c->CompareWithoutTimestamp(a.smallestkey, b.largestkey) <= 0) { - // b.smallestkey <= a.smallestkey <= b.largestkey - return true; - } - } else if (c->CompareWithoutTimestamp(a.largestkey, b.smallestkey) >= 0) { - // a.smallestkey < b.smallestkey <= a.largestkey - return true; - } - if (c->CompareWithoutTimestamp(a.largestkey, b.largestkey) <= 0) { - if (c->CompareWithoutTimestamp(a.largestkey, b.smallestkey) >= 0) { - // b.smallestkey <= a.largestkey <= b.largestkey - return true; - } - } else if (c->CompareWithoutTimestamp(a.smallestkey, b.largestkey) <= 0) { - // a.smallestkey <= b.largestkey < a.largestkey - return true; - } - return false; -} - -// Identifies all files between level "min_level" and "max_level" -// which has overlapping key range with "input_file_meta". -void GetOverlappingFileNumbersForLevelCompaction( - const ColumnFamilyMetaData& cf_meta, const Comparator* comparator, - int min_level, int max_level, const SstFileMetaData* input_file_meta, - std::set* overlapping_file_names) { - std::set overlapping_files; - overlapping_files.insert(input_file_meta); - for (int m = min_level; m <= max_level; ++m) { - for (auto& file : cf_meta.levels[m].files) { - for (auto* included_file : overlapping_files) { - if (HaveOverlappingKeyRanges(comparator, *included_file, file)) { - overlapping_files.insert(&file); - overlapping_file_names->insert(file.name); - break; - } - } - } - } -} - -void VerifyCompactionResult( - const ColumnFamilyMetaData& cf_meta, - const std::set& overlapping_file_numbers) { -#ifndef NDEBUG - for (auto& level : cf_meta.levels) { - for (auto& file : level.files) { - assert(overlapping_file_numbers.find(file.name) == - overlapping_file_numbers.end()); - } - } -#endif -} - -const SstFileMetaData* PickFileRandomly(const ColumnFamilyMetaData& cf_meta, - Random* rand, int* level = nullptr) { - auto file_id = rand->Uniform(static_cast(cf_meta.file_count)) + 1; - for (auto& level_meta : cf_meta.levels) { - if (file_id <= level_meta.files.size()) { - if (level != nullptr) { - *level = level_meta.level; - } - auto result = rand->Uniform(file_id); - return &(level_meta.files[result]); - } - file_id -= static_cast(level_meta.files.size()); - } - assert(false); - return nullptr; -} -} // anonymous namespace - -#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -// All the TEST_P tests run once with sub_compactions disabled (i.e. -// options.max_subcompactions = 1) and once with it enabled -TEST_P(DBCompactionTestWithParam, CompactionDeletionTrigger) { - for (int tid = 0; tid < 3; ++tid) { - uint64_t db_size[2]; - Options options = DeletionTriggerOptions(CurrentOptions()); - options.max_subcompactions = max_subcompactions_; - - if (tid == 1) { - // the following only disable stats update in DB::Open() - // and should not affect the result of this test. - options.skip_stats_update_on_db_open = true; - } else if (tid == 2) { - // third pass with universal compaction - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = 1; - } - - DestroyAndReopen(options); - Random rnd(301); - - const int kTestSize = kCDTKeysPerBuffer * 1024; - std::vector values; - for (int k = 0; k < kTestSize; ++k) { - values.push_back(rnd.RandomString(kCDTValueSize)); - ASSERT_OK(Put(Key(k), values[k])); - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[0])); - - for (int k = 0; k < kTestSize; ++k) { - ASSERT_OK(Delete(Key(k))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[1])); - - if (options.compaction_style == kCompactionStyleUniversal) { - // Claim: in universal compaction none of the original data will remain - // once compactions settle. - // - // Proof: The compensated size of the file containing the most tombstones - // is enough on its own to trigger size amp compaction. Size amp - // compaction is a full compaction, so all tombstones meet the obsolete - // keys they cover. - ASSERT_EQ(0, db_size[1]); - } else { - // Claim: in level compaction at most `db_size[0] / 2` of the original - // data will remain once compactions settle. - // - // Proof: Assume the original data is all in the bottom level. If it were - // not, it would meet its tombstone sooner. The original data size is - // large enough to require fanout to bottom level to be greater than - // `max_bytes_for_level_multiplier == 2`. In the level just above, - // tombstones must cover less than `db_size[0] / 4` bytes since fanout >= - // 2 and file size is compensated by doubling the size of values we expect - // are covered (`kDeletionWeightOnCompaction == 2`). The tombstones in - // levels above must cover less than `db_size[0] / 8` bytes of original - // data, `db_size[0] / 16`, and so on. - ASSERT_GT(db_size[0] / 2, db_size[1]); - } - } -} -#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) - -TEST_F(DBCompactionTest, SkipStatsUpdateTest) { - // This test verify UpdateAccumulatedStats is not on - // if options.skip_stats_update_on_db_open = true - // The test will need to be updated if the internal behavior changes. - - Options options = DeletionTriggerOptions(CurrentOptions()); - options.disable_auto_compactions = true; - options.env = env_; - DestroyAndReopen(options); - Random rnd(301); - - const int kTestSize = kCDTKeysPerBuffer * 512; - std::vector values; - for (int k = 0; k < kTestSize; ++k) { - values.push_back(rnd.RandomString(kCDTValueSize)); - ASSERT_OK(Put(Key(k), values[k])); - } - - ASSERT_OK(Flush()); - - Close(); - - int update_acc_stats_called = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "VersionStorageInfo::UpdateAccumulatedStats", - [&](void* /* arg */) { ++update_acc_stats_called; }); - SyncPoint::GetInstance()->EnableProcessing(); - - // Reopen the DB with stats-update disabled - options.skip_stats_update_on_db_open = true; - options.max_open_files = 20; - Reopen(options); - - ASSERT_EQ(update_acc_stats_called, 0); - - // Repeat the reopen process, but this time we enable - // stats-update. - options.skip_stats_update_on_db_open = false; - Reopen(options); - - ASSERT_GT(update_acc_stats_called, 0); - - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBCompactionTest, TestTableReaderForCompaction) { - Options options = CurrentOptions(); - options.env = env_; - options.max_open_files = 20; - options.level0_file_num_compaction_trigger = 3; - // Avoid many shards with small max_open_files, where as little as - // two table insertions could lead to an LRU eviction, depending on - // hash values. - options.table_cache_numshardbits = 2; - DestroyAndReopen(options); - Random rnd(301); - - int num_table_cache_lookup = 0; - int num_new_table_reader = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "TableCache::FindTable:0", [&](void* arg) { - assert(arg != nullptr); - bool no_io = *(reinterpret_cast(arg)); - if (!no_io) { - // filter out cases for table properties queries. - num_table_cache_lookup++; - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "TableCache::GetTableReader:0", - [&](void* /*arg*/) { num_new_table_reader++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - for (int k = 0; k < options.level0_file_num_compaction_trigger; ++k) { - ASSERT_OK(Put(Key(k), Key(k))); - ASSERT_OK(Put(Key(10 - k), "bar")); - if (k < options.level0_file_num_compaction_trigger - 1) { - num_table_cache_lookup = 0; - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // preloading iterator issues one table cache lookup and create - // a new table reader, if not preloaded. - int old_num_table_cache_lookup = num_table_cache_lookup; - ASSERT_GE(num_table_cache_lookup, 1); - ASSERT_EQ(num_new_table_reader, 1); - - num_table_cache_lookup = 0; - num_new_table_reader = 0; - ASSERT_EQ(Key(k), Get(Key(k))); - // lookup iterator from table cache and no need to create a new one. - ASSERT_EQ(old_num_table_cache_lookup + num_table_cache_lookup, 2); - ASSERT_EQ(num_new_table_reader, 0); - } - } - - num_table_cache_lookup = 0; - num_new_table_reader = 0; - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Preloading iterator issues one table cache lookup and creates - // a new table reader. One file is created for flush and one for compaction. - // Compaction inputs make no table cache look-up for data/range deletion - // iterators - // May preload table cache too. - ASSERT_GE(num_table_cache_lookup, 2); - int old_num_table_cache_lookup2 = num_table_cache_lookup; - - // Create new iterator for: - // (1) 1 for verifying flush results - // (2) 1 for verifying compaction results. - // (3) New TableReaders will not be created for compaction inputs - ASSERT_EQ(num_new_table_reader, 2); - - num_table_cache_lookup = 0; - num_new_table_reader = 0; - ASSERT_EQ(Key(1), Get(Key(1))); - ASSERT_EQ(num_table_cache_lookup + old_num_table_cache_lookup2, 5); - ASSERT_EQ(num_new_table_reader, 0); - - num_table_cache_lookup = 0; - num_new_table_reader = 0; - CompactRangeOptions cro; - cro.change_level = true; - cro.target_level = 2; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - // Only verifying compaction outputs issues one table cache lookup - // for both data block and range deletion block). - // May preload table cache too. - ASSERT_GE(num_table_cache_lookup, 1); - old_num_table_cache_lookup2 = num_table_cache_lookup; - // One for verifying compaction results. - // No new iterator created for compaction. - ASSERT_EQ(num_new_table_reader, 1); - - num_table_cache_lookup = 0; - num_new_table_reader = 0; - ASSERT_EQ(Key(1), Get(Key(1))); - ASSERT_EQ(num_table_cache_lookup + old_num_table_cache_lookup2, 3); - ASSERT_EQ(num_new_table_reader, 0); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_P(DBCompactionTestWithParam, CompactionDeletionTriggerReopen) { - for (int tid = 0; tid < 2; ++tid) { - uint64_t db_size[3]; - Options options = DeletionTriggerOptions(CurrentOptions()); - options.max_subcompactions = max_subcompactions_; - - if (tid == 1) { - // second pass with universal compaction - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = 1; - } - - DestroyAndReopen(options); - Random rnd(301); - - // round 1 --- insert key/value pairs. - const int kTestSize = kCDTKeysPerBuffer * 512; - std::vector values; - for (int k = 0; k < kTestSize; ++k) { - values.push_back(rnd.RandomString(kCDTValueSize)); - ASSERT_OK(Put(Key(k), values[k])); - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[0])); - Close(); - - // round 2 --- disable auto-compactions and issue deletions. - options.create_if_missing = false; - options.disable_auto_compactions = true; - Reopen(options); - - for (int k = 0; k < kTestSize; ++k) { - ASSERT_OK(Delete(Key(k))); - } - ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[1])); - Close(); - // as auto_compaction is off, we shouldn't see any reduction in db size. - ASSERT_LE(db_size[0], db_size[1]); - - // round 3 --- reopen db with auto_compaction on and see if - // deletion compensation still work. - options.disable_auto_compactions = false; - Reopen(options); - // insert relatively small amount of data to trigger auto compaction. - for (int k = 0; k < kTestSize / 10; ++k) { - ASSERT_OK(Put(Key(k), values[k])); - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[2])); - // this time we're expecting significant drop in size. - // - // See "CompactionDeletionTrigger" test for proof that at most - // `db_size[0] / 2` of the original data remains. In addition to that, this - // test inserts `db_size[0] / 10` to push the tombstones into SST files and - // then through automatic compactions. So in total `3 * db_size[0] / 5` of - // the original data may remain. - ASSERT_GT(3 * db_size[0] / 5, db_size[2]); - } -} - -TEST_F(DBCompactionTest, CompactRangeBottomPri) { - ASSERT_OK(Put(Key(50), "")); - ASSERT_OK(Flush()); - ASSERT_OK(Put(Key(100), "")); - ASSERT_OK(Flush()); - ASSERT_OK(Put(Key(200), "")); - ASSERT_OK(Flush()); - - { - CompactRangeOptions cro; - cro.change_level = true; - cro.target_level = 2; - ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); - } - ASSERT_EQ("0,0,3", FilesPerLevel(0)); - - ASSERT_OK(Put(Key(1), "")); - ASSERT_OK(Put(Key(199), "")); - ASSERT_OK(Flush()); - ASSERT_OK(Put(Key(2), "")); - ASSERT_OK(Put(Key(199), "")); - ASSERT_OK(Flush()); - ASSERT_EQ("2,0,3", FilesPerLevel(0)); - - // Now we have 2 L0 files, and 3 L2 files, and a manual compaction will - // be triggered. - // Two compaction jobs will run. One compacts 2 L0 files in Low Pri Pool - // and one compact to L2 in bottom pri pool. - int low_pri_count = 0; - int bottom_pri_count = 0; - SyncPoint::GetInstance()->SetCallBack( - "ThreadPoolImpl::Impl::BGThread:BeforeRun", [&](void* arg) { - Env::Priority* pri = reinterpret_cast(arg); - // First time is low pri pool in the test case. - if (low_pri_count == 0 && bottom_pri_count == 0) { - ASSERT_EQ(Env::Priority::LOW, *pri); - } - if (*pri == Env::Priority::LOW) { - low_pri_count++; - } else { - bottom_pri_count++; - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - env_->SetBackgroundThreads(1, Env::Priority::BOTTOM); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(1, low_pri_count); - ASSERT_EQ(1, bottom_pri_count); - ASSERT_EQ("0,0,2", FilesPerLevel(0)); - - // Recompact bottom most level uses bottom pool - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); - ASSERT_EQ(1, low_pri_count); - ASSERT_EQ(2, bottom_pri_count); - - env_->SetBackgroundThreads(0, Env::Priority::BOTTOM); - ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); - // Low pri pool is used if bottom pool has size 0. - ASSERT_EQ(2, low_pri_count); - ASSERT_EQ(2, bottom_pri_count); - - SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBCompactionTest, DisableStatsUpdateReopen) { - uint64_t db_size[3]; - for (int test = 0; test < 2; ++test) { - Options options = DeletionTriggerOptions(CurrentOptions()); - options.skip_stats_update_on_db_open = (test == 0); - - env_->random_read_counter_.Reset(); - DestroyAndReopen(options); - Random rnd(301); - - // round 1 --- insert key/value pairs. - const int kTestSize = kCDTKeysPerBuffer * 512; - std::vector values; - for (int k = 0; k < kTestSize; ++k) { - values.push_back(rnd.RandomString(kCDTValueSize)); - ASSERT_OK(Put(Key(k), values[k])); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // L1 and L2 can fit deletions iff size compensation does not take effect, - // i.e., when `skip_stats_update_on_db_open == true`. Move any remaining - // files at or above L2 down to L3 to ensure obsolete data does not - // accidentally meet its tombstone above L3. This makes the final size more - // deterministic and easy to see whether size compensation for deletions - // took effect. - MoveFilesToLevel(3 /* level */); - ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[0])); - Close(); - - // round 2 --- disable auto-compactions and issue deletions. - options.create_if_missing = false; - options.disable_auto_compactions = true; - - env_->random_read_counter_.Reset(); - Reopen(options); - - for (int k = 0; k < kTestSize; ++k) { - ASSERT_OK(Delete(Key(k))); - } - ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[1])); - Close(); - // as auto_compaction is off, we shouldn't see any reduction in db size. - ASSERT_LE(db_size[0], db_size[1]); - - // round 3 --- reopen db with auto_compaction on and see if - // deletion compensation still work. - options.disable_auto_compactions = false; - Reopen(options); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[2])); - - if (options.skip_stats_update_on_db_open) { - // If update stats on DB::Open is disable, we don't expect - // deletion entries taking effect. - // - // The deletions are small enough to fit in L1 and L2, and obsolete keys - // were moved to L3+, so none of the original data should have been - // dropped. - ASSERT_LE(db_size[0], db_size[2]); - } else { - // Otherwise, we should see a significant drop in db size. - // - // See "CompactionDeletionTrigger" test for proof that at most - // `db_size[0] / 2` of the original data remains. - ASSERT_GT(db_size[0] / 2, db_size[2]); - } - } -} - -TEST_P(DBCompactionTestWithParam, CompactionTrigger) { - const int kNumKeysPerFile = 100; - - Options options = CurrentOptions(); - options.write_buffer_size = 110 << 10; // 110KB - options.arena_block_size = 4 << 10; - options.num_levels = 3; - options.level0_file_num_compaction_trigger = 3; - options.max_subcompactions = max_subcompactions_; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - CreateAndReopenWithCF({"pikachu"}, options); - - Random rnd(301); - - for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; - num++) { - std::vector values; - // Write 100KB (100 values, each 1K) - for (int i = 0; i < kNumKeysPerFile; i++) { - values.push_back(rnd.RandomString(990)); - ASSERT_OK(Put(1, Key(i), values[i])); - } - // put extra key to trigger flush - ASSERT_OK(Put(1, "", "")); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1])); - ASSERT_EQ(NumTableFilesAtLevel(0, 1), num + 1); - } - - // generate one more file in level-0, and should trigger level-0 compaction - std::vector values; - for (int i = 0; i < kNumKeysPerFile; i++) { - values.push_back(rnd.RandomString(990)); - ASSERT_OK(Put(1, Key(i), values[i])); - } - // put extra key to trigger flush - ASSERT_OK(Put(1, "", "")); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); - ASSERT_EQ(NumTableFilesAtLevel(1, 1), 1); -} - -TEST_F(DBCompactionTest, BGCompactionsAllowed) { - // Create several column families. Make compaction triggers in all of them - // and see number of compactions scheduled to be less than allowed. - const int kNumKeysPerFile = 100; - - Options options = CurrentOptions(); - options.write_buffer_size = 110 << 10; // 110KB - options.arena_block_size = 4 << 10; - options.num_levels = 3; - // Should speed up compaction when there are 4 files. - options.level0_file_num_compaction_trigger = 2; - options.level0_slowdown_writes_trigger = 20; - options.soft_pending_compaction_bytes_limit = 1 << 30; // Infinitely large - options.max_background_compactions = 3; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - - // Block all threads in thread pool. - const size_t kTotalTasks = 4; - env_->SetBackgroundThreads(4, Env::LOW); - test::SleepingBackgroundTask sleeping_tasks[kTotalTasks]; - for (size_t i = 0; i < kTotalTasks; i++) { - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - &sleeping_tasks[i], Env::Priority::LOW); - sleeping_tasks[i].WaitUntilSleeping(); - } - - CreateAndReopenWithCF({"one", "two", "three"}, options); - - Random rnd(301); - for (int cf = 0; cf < 4; cf++) { - for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { - for (int i = 0; i < kNumKeysPerFile; i++) { - ASSERT_OK(Put(cf, Key(i), "")); - } - // put extra key to trigger flush - ASSERT_OK(Put(cf, "", "")); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf])); - ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1); - } - } - - // Now all column families qualify compaction but only one should be - // scheduled, because no column family hits speed up condition. - ASSERT_EQ(1u, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); - - // Create two more files for one column family, which triggers speed up - // condition, three compactions will be scheduled. - for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { - for (int i = 0; i < kNumKeysPerFile; i++) { - ASSERT_OK(Put(2, Key(i), "")); - } - // put extra key to trigger flush - ASSERT_OK(Put(2, "", "")); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[2])); - ASSERT_EQ(options.level0_file_num_compaction_trigger + num + 1, - NumTableFilesAtLevel(0, 2)); - } - ASSERT_EQ(3U, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); - - // Unblock all threads to unblock all compactions. - for (size_t i = 0; i < kTotalTasks; i++) { - sleeping_tasks[i].WakeUp(); - sleeping_tasks[i].WaitUntilDone(); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // Verify number of compactions allowed will come back to 1. - - for (size_t i = 0; i < kTotalTasks; i++) { - sleeping_tasks[i].Reset(); - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - &sleeping_tasks[i], Env::Priority::LOW); - sleeping_tasks[i].WaitUntilSleeping(); - } - for (int cf = 0; cf < 4; cf++) { - for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { - for (int i = 0; i < kNumKeysPerFile; i++) { - ASSERT_OK(Put(cf, Key(i), "")); - } - // put extra key to trigger flush - ASSERT_OK(Put(cf, "", "")); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf])); - ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1); - } - } - - // Now all column families qualify compaction but only one should be - // scheduled, because no column family hits speed up condition. - ASSERT_EQ(1U, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); - - for (size_t i = 0; i < kTotalTasks; i++) { - sleeping_tasks[i].WakeUp(); - sleeping_tasks[i].WaitUntilDone(); - } -} - -TEST_P(DBCompactionTestWithParam, CompactionsGenerateMultipleFiles) { - Options options = CurrentOptions(); - options.write_buffer_size = 100000000; // Large write buffer - options.max_subcompactions = max_subcompactions_; - CreateAndReopenWithCF({"pikachu"}, options); - - Random rnd(301); - - // Write 8MB (80 values, each 100K) - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); - std::vector values; - for (int i = 0; i < 80; i++) { - values.push_back(rnd.RandomString(100000)); - ASSERT_OK(Put(1, Key(i), values[i])); - } - - // Reopening moves updates to level-0 - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1], - true /* disallow trivial move */)); - - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); - ASSERT_GT(NumTableFilesAtLevel(1, 1), 1); - for (int i = 0; i < 80; i++) { - ASSERT_EQ(Get(1, Key(i)), values[i]); - } -} - -TEST_F(DBCompactionTest, MinorCompactionsHappen) { - do { - Options options = CurrentOptions(); - options.write_buffer_size = 10000; - CreateAndReopenWithCF({"pikachu"}, options); - - const int N = 500; - - int starting_num_tables = TotalTableFiles(1); - for (int i = 0; i < N; i++) { - ASSERT_OK(Put(1, Key(i), Key(i) + std::string(1000, 'v'))); - } - int ending_num_tables = TotalTableFiles(1); - ASSERT_GT(ending_num_tables, starting_num_tables); - - for (int i = 0; i < N; i++) { - ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(1, Key(i))); - } - - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - for (int i = 0; i < N; i++) { - ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(1, Key(i))); - } - } while (ChangeCompactOptions()); -} - -TEST_F(DBCompactionTest, UserKeyCrossFile1) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleLevel; - options.level0_file_num_compaction_trigger = 3; - - DestroyAndReopen(options); - - // create first file and flush to l0 - ASSERT_OK(Put("4", "A")); - ASSERT_OK(Put("3", "A")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - ASSERT_OK(Put("2", "A")); - ASSERT_OK(Delete("3")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ("NOT_FOUND", Get("3")); - - // move both files down to l1 - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("NOT_FOUND", Get("3")); - - for (int i = 0; i < 3; i++) { - ASSERT_OK(Put("2", "B")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ("NOT_FOUND", Get("3")); -} - -TEST_F(DBCompactionTest, UserKeyCrossFile2) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleLevel; - options.level0_file_num_compaction_trigger = 3; - - DestroyAndReopen(options); - - // create first file and flush to l0 - ASSERT_OK(Put("4", "A")); - ASSERT_OK(Put("3", "A")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - ASSERT_OK(Put("2", "A")); - ASSERT_OK(SingleDelete("3")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ("NOT_FOUND", Get("3")); - - // move both files down to l1 - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("NOT_FOUND", Get("3")); - - for (int i = 0; i < 3; i++) { - ASSERT_OK(Put("2", "B")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ("NOT_FOUND", Get("3")); -} - -TEST_F(DBCompactionTest, CompactionSstPartitioner) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleLevel; - options.level0_file_num_compaction_trigger = 3; - std::shared_ptr factory( - NewSstPartitionerFixedPrefixFactory(4)); - options.sst_partitioner_factory = factory; - - DestroyAndReopen(options); - - // create first file and flush to l0 - ASSERT_OK(Put("aaaa1", "A")); - ASSERT_OK(Put("bbbb1", "B")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - ASSERT_OK(Put("aaaa1", "A2")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - // move both files down to l1 - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - std::vector files; - dbfull()->GetLiveFilesMetaData(&files); - ASSERT_EQ(2, files.size()); - ASSERT_EQ("A2", Get("aaaa1")); - ASSERT_EQ("B", Get("bbbb1")); -} - -TEST_F(DBCompactionTest, CompactionSstPartitionWithManualCompaction) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleLevel; - options.level0_file_num_compaction_trigger = 3; - - DestroyAndReopen(options); - - // create first file and flush to l0 - ASSERT_OK(Put("000015", "A")); - ASSERT_OK(Put("000025", "B")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - // create second file and flush to l0 - ASSERT_OK(Put("000015", "A2")); - ASSERT_OK(Put("000025", "B2")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - // CONTROL 1: compact without partitioner - CompactRangeOptions compact_options; - compact_options.bottommost_level_compaction = - BottommostLevelCompaction::kForceOptimized; - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - // Check (compacted but no partitioning yet) - std::vector files; - dbfull()->GetLiveFilesMetaData(&files); - ASSERT_EQ(1, files.size()); - - // Install partitioner - std::shared_ptr factory( - NewSstPartitionerFixedPrefixFactory(5)); - options.sst_partitioner_factory = factory; - Reopen(options); - - // CONTROL 2: request compaction on range with no partition boundary and no - // overlap with actual entries - Slice from("000017"); - Slice to("000019"); - ASSERT_OK(dbfull()->CompactRange(compact_options, &from, &to)); - - // Check (no partitioning yet) - files.clear(); - dbfull()->GetLiveFilesMetaData(&files); - ASSERT_EQ(1, files.size()); - ASSERT_EQ("A2", Get("000015")); - ASSERT_EQ("B2", Get("000025")); - - // TEST: request compaction overlapping with partition boundary but no - // actual entries - // NOTE: `to` is INCLUSIVE - from = Slice("000019"); - to = Slice("000020"); - ASSERT_OK(dbfull()->CompactRange(compact_options, &from, &to)); - - // Check (must be partitioned) - files.clear(); - dbfull()->GetLiveFilesMetaData(&files); - ASSERT_EQ(2, files.size()); - ASSERT_EQ("A2", Get("000015")); - ASSERT_EQ("B2", Get("000025")); -} - -TEST_F(DBCompactionTest, CompactionSstPartitionerNonTrivial) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleLevel; - options.level0_file_num_compaction_trigger = 1; - std::shared_ptr factory( - NewSstPartitionerFixedPrefixFactory(4)); - options.sst_partitioner_factory = factory; - - DestroyAndReopen(options); - - // create first file and flush to l0 - ASSERT_OK(Put("aaaa1", "A")); - ASSERT_OK(Put("bbbb1", "B")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); - - std::vector files; - dbfull()->GetLiveFilesMetaData(&files); - ASSERT_EQ(2, files.size()); - ASSERT_EQ("A", Get("aaaa1")); - ASSERT_EQ("B", Get("bbbb1")); -} - -TEST_F(DBCompactionTest, ZeroSeqIdCompaction) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleLevel; - options.level0_file_num_compaction_trigger = 3; - - FlushedFileCollector* collector = new FlushedFileCollector(); - options.listeners.emplace_back(collector); - - // compaction options - CompactionOptions compact_opt; - compact_opt.compression = kNoCompression; - compact_opt.output_file_size_limit = 4096; - const size_t key_len = - static_cast(compact_opt.output_file_size_limit) / 5; - - DestroyAndReopen(options); - - std::vector snaps; - - // create first file and flush to l0 - for (auto& key : {"1", "2", "3", "3", "3", "3"}) { - ASSERT_OK(Put(key, std::string(key_len, 'A'))); - snaps.push_back(dbfull()->GetSnapshot()); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - // create second file and flush to l0 - for (auto& key : {"3", "4", "5", "6", "7", "8"}) { - ASSERT_OK(Put(key, std::string(key_len, 'A'))); - snaps.push_back(dbfull()->GetSnapshot()); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - // move both files down to l1 - ASSERT_OK( - dbfull()->CompactFiles(compact_opt, collector->GetFlushedFiles(), 1)); - - // release snap so that first instance of key(3) can have seqId=0 - for (auto snap : snaps) { - dbfull()->ReleaseSnapshot(snap); - } - - // create 3 files in l0 so to trigger compaction - for (int i = 0; i < options.level0_file_num_compaction_trigger; i++) { - ASSERT_OK(Put("2", std::string(1, 'A'))); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_OK(Put("", "")); -} - -TEST_F(DBCompactionTest, ManualCompactionUnknownOutputSize) { - // github issue #2249 - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleLevel; - options.level0_file_num_compaction_trigger = 3; - DestroyAndReopen(options); - - // create two files in l1 that we can compact - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < options.level0_file_num_compaction_trigger; j++) { - ASSERT_OK(Put(std::to_string(2 * i), std::string(1, 'A'))); - ASSERT_OK(Put(std::to_string(2 * i + 1), std::string(1, 'A'))); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_OK( - dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"}})); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); - ASSERT_EQ(NumTableFilesAtLevel(1, 0), 2); - ASSERT_OK( - dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "3"}})); - - ColumnFamilyMetaData cf_meta; - dbfull()->GetColumnFamilyMetaData(dbfull()->DefaultColumnFamily(), &cf_meta); - ASSERT_EQ(2, cf_meta.levels[1].files.size()); - std::vector input_filenames; - for (const auto& sst_file : cf_meta.levels[1].files) { - input_filenames.push_back(sst_file.name); - } - - // note CompactionOptions::output_file_size_limit is unset. - CompactionOptions compact_opt; - compact_opt.compression = kNoCompression; - ASSERT_OK(dbfull()->CompactFiles(compact_opt, input_filenames, 1)); -} - -// Check that writes done during a memtable compaction are recovered -// if the database is shutdown during the memtable compaction. -TEST_F(DBCompactionTest, RecoverDuringMemtableCompaction) { - do { - Options options = CurrentOptions(); - options.env = env_; - CreateAndReopenWithCF({"pikachu"}, options); - - // Trigger a long memtable compaction and reopen the database during it - ASSERT_OK(Put(1, "foo", "v1")); // Goes to 1st log file - ASSERT_OK(Put(1, "big1", std::string(10000000, 'x'))); // Fills memtable - ASSERT_OK(Put(1, "big2", std::string(1000, 'y'))); // Triggers compaction - ASSERT_OK(Put(1, "bar", "v2")); // Goes to new log file - - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_EQ("v2", Get(1, "bar")); - ASSERT_EQ(std::string(10000000, 'x'), Get(1, "big1")); - ASSERT_EQ(std::string(1000, 'y'), Get(1, "big2")); - } while (ChangeOptions()); -} - -TEST_P(DBCompactionTestWithParam, TrivialMoveOneFile) { - int32_t trivial_move = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:TrivialMove", - [&](void* /*arg*/) { trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.write_buffer_size = 100000000; - options.max_subcompactions = max_subcompactions_; - DestroyAndReopen(options); - - int32_t num_keys = 80; - int32_t value_size = 100 * 1024; // 100 KB - - Random rnd(301); - std::vector values; - for (int i = 0; i < num_keys; i++) { - values.push_back(rnd.RandomString(value_size)); - ASSERT_OK(Put(Key(i), values[i])); - } - - // Reopening moves updates to L0 - Reopen(options); - ASSERT_EQ(NumTableFilesAtLevel(0, 0), 1); // 1 file in L0 - ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); // 0 files in L1 - - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - ASSERT_EQ(metadata.size(), 1U); - LiveFileMetaData level0_file = metadata[0]; // L0 file meta - - CompactRangeOptions cro; - cro.exclusive_manual_compaction = exclusive_manual_compaction_; - - // Compaction will initiate a trivial move from L0 to L1 - ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); - - // File moved From L0 to L1 - ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); // 0 files in L0 - ASSERT_EQ(NumTableFilesAtLevel(1, 0), 1); // 1 file in L1 - - metadata.clear(); - db_->GetLiveFilesMetaData(&metadata); - ASSERT_EQ(metadata.size(), 1U); - ASSERT_EQ(metadata[0].name /* level1_file.name */, level0_file.name); - ASSERT_EQ(metadata[0].size /* level1_file.size */, level0_file.size); - - for (int i = 0; i < num_keys; i++) { - ASSERT_EQ(Get(Key(i)), values[i]); - } - - ASSERT_EQ(trivial_move, 1); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_P(DBCompactionTestWithParam, TrivialMoveNonOverlappingFiles) { - int32_t trivial_move = 0; - int32_t non_trivial_move = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:TrivialMove", - [&](void* /*arg*/) { trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial", - [&](void* /*arg*/) { non_trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.write_buffer_size = 10 * 1024 * 1024; - options.max_subcompactions = max_subcompactions_; - - DestroyAndReopen(options); - // non overlapping ranges - std::vector> ranges = { - {100, 199}, {300, 399}, {0, 99}, {200, 299}, - {600, 699}, {400, 499}, {500, 550}, {551, 599}, - }; - int32_t value_size = 10 * 1024; // 10 KB - - Random rnd(301); - std::map values; - for (size_t i = 0; i < ranges.size(); i++) { - for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { - values[j] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(j), values[j])); - } - ASSERT_OK(Flush()); - } - - int32_t level0_files = NumTableFilesAtLevel(0, 0); - ASSERT_EQ(level0_files, ranges.size()); // Multiple files in L0 - ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); // No files in L1 - - CompactRangeOptions cro; - cro.exclusive_manual_compaction = exclusive_manual_compaction_; - - // Since data is non-overlapping we expect compaction to initiate - // a trivial move - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - // We expect that all the files were trivially moved from L0 to L1 - ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); - ASSERT_EQ(NumTableFilesAtLevel(1, 0) /* level1_files */, level0_files); - - for (size_t i = 0; i < ranges.size(); i++) { - for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { - ASSERT_EQ(Get(Key(j)), values[j]); - } - } - - ASSERT_EQ(trivial_move, 1); - ASSERT_EQ(non_trivial_move, 0); - - trivial_move = 0; - non_trivial_move = 0; - values.clear(); - DestroyAndReopen(options); - // Same ranges as above but overlapping - ranges = { - {100, 199}, - {300, 399}, - {0, 99}, - {200, 299}, - {600, 699}, - {400, 499}, - {500, 560}, // this range overlap with the next - // one - {551, 599}, - }; - for (size_t i = 0; i < ranges.size(); i++) { - for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { - values[j] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(j), values[j])); - } - ASSERT_OK(Flush()); - } - - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - - for (size_t i = 0; i < ranges.size(); i++) { - for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { - ASSERT_EQ(Get(Key(j)), values[j]); - } - } - ASSERT_EQ(trivial_move, 0); - ASSERT_EQ(non_trivial_move, 1); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_P(DBCompactionTestWithParam, TrivialMoveTargetLevel) { - int32_t trivial_move = 0; - int32_t non_trivial_move = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:TrivialMove", - [&](void* /*arg*/) { trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial", - [&](void* /*arg*/) { non_trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.write_buffer_size = 10 * 1024 * 1024; - options.num_levels = 7; - options.max_subcompactions = max_subcompactions_; - - DestroyAndReopen(options); - int32_t value_size = 10 * 1024; // 10 KB - - // Add 2 non-overlapping files - Random rnd(301); - std::map values; - - // file 1 [0 => 300] - for (int32_t i = 0; i <= 300; i++) { - values[i] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Flush()); - - // file 2 [600 => 700] - for (int32_t i = 600; i <= 700; i++) { - values[i] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Flush()); - - // 2 files in L0 - ASSERT_EQ("2", FilesPerLevel(0)); - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 6; - compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; - ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - // 2 files in L6 - ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel(0)); - - ASSERT_EQ(trivial_move, 1); - ASSERT_EQ(non_trivial_move, 0); - - for (int32_t i = 0; i <= 300; i++) { - ASSERT_EQ(Get(Key(i)), values[i]); - } - for (int32_t i = 600; i <= 700; i++) { - ASSERT_EQ(Get(Key(i)), values[i]); - } -} - -TEST_P(DBCompactionTestWithParam, PartialOverlappingL0) { - class SubCompactionEventListener : public EventListener { - public: - void OnSubcompactionCompleted(const SubcompactionJobInfo&) override { - sub_compaction_finished_++; - } - std::atomic sub_compaction_finished_{0}; - }; - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.write_buffer_size = 10 * 1024 * 1024; - options.max_subcompactions = max_subcompactions_; - SubCompactionEventListener* listener = new SubCompactionEventListener(); - options.listeners.emplace_back(listener); - - DestroyAndReopen(options); - - // For subcompactino to trigger, output level needs to be non-empty. - ASSERT_OK(Put("key", "")); - ASSERT_OK(Put("kez", "")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("key", "")); - ASSERT_OK(Put("kez", "")); - ASSERT_OK(Flush()); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - // Ranges that are only briefly overlapping so that they won't be trivially - // moved but subcompaction ranges would only contain a subset of files. - std::vector> ranges = { - {100, 199}, {198, 399}, {397, 600}, {598, 800}, {799, 900}, {895, 999}, - }; - int32_t value_size = 10 * 1024; // 10 KB - - Random rnd(301); - std::map values; - for (size_t i = 0; i < ranges.size(); i++) { - for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { - values[j] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(j), values[j])); - } - ASSERT_OK(Flush()); - } - - int32_t level0_files = NumTableFilesAtLevel(0, 0); - ASSERT_EQ(level0_files, ranges.size()); // Multiple files in L0 - ASSERT_EQ(NumTableFilesAtLevel(1, 0), 1); // One file in L1 - - listener->sub_compaction_finished_ = 0; - ASSERT_OK(db_->EnableAutoCompaction({db_->DefaultColumnFamily()})); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - if (max_subcompactions_ > 3) { - // RocksDB might not generate the exact number of sub compactions. - // Here we validate that at least subcompaction happened. - ASSERT_GT(listener->sub_compaction_finished_.load(), 2); - } - - // We expect that all the files were compacted to L1 - ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); - ASSERT_GT(NumTableFilesAtLevel(1, 0), 1); - - for (size_t i = 0; i < ranges.size(); i++) { - for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { - ASSERT_EQ(Get(Key(j)), values[j]); - } - } -} - -TEST_P(DBCompactionTestWithParam, ManualCompactionPartial) { - int32_t trivial_move = 0; - int32_t non_trivial_move = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:TrivialMove", - [&](void* /*arg*/) { trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial", - [&](void* /*arg*/) { non_trivial_move++; }); - bool first = true; - // Purpose of dependencies: - // 4 -> 1: ensure the order of two non-trivial compactions - // 5 -> 2 and 5 -> 3: ensure we do a check before two non-trivial compactions - // are installed - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBCompaction::ManualPartial:4", "DBCompaction::ManualPartial:1"}, - {"DBCompaction::ManualPartial:5", "DBCompaction::ManualPartial:2"}, - {"DBCompaction::ManualPartial:5", "DBCompaction::ManualPartial:3"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { - if (first) { - first = false; - TEST_SYNC_POINT("DBCompaction::ManualPartial:4"); - TEST_SYNC_POINT("DBCompaction::ManualPartial:3"); - } else { // second non-trivial compaction - TEST_SYNC_POINT("DBCompaction::ManualPartial:2"); - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.write_buffer_size = 10 * 1024 * 1024; - options.num_levels = 7; - options.max_subcompactions = max_subcompactions_; - options.level0_file_num_compaction_trigger = 3; - options.max_background_compactions = 3; - options.target_file_size_base = 1 << 23; // 8 MB - - DestroyAndReopen(options); - int32_t value_size = 10 * 1024; // 10 KB - - // Add 2 non-overlapping files - Random rnd(301); - std::map values; - - // file 1 [0 => 100] - for (int32_t i = 0; i < 100; i++) { - values[i] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Flush()); - - // file 2 [100 => 300] - for (int32_t i = 100; i < 300; i++) { - values[i] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Flush()); - - // 2 files in L0 - ASSERT_EQ("2", FilesPerLevel(0)); - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 6; - compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; - // Trivial move the two non-overlapping files to level 6 - ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - // 2 files in L6 - ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel(0)); - - ASSERT_EQ(trivial_move, 1); - ASSERT_EQ(non_trivial_move, 0); - - // file 3 [ 0 => 200] - for (int32_t i = 0; i < 200; i++) { - values[i] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Flush()); - - // 1 files in L0 - ASSERT_EQ("1,0,0,0,0,0,2", FilesPerLevel(0)); - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, false)); - ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr, false)); - ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr, nullptr, false)); - ASSERT_OK(dbfull()->TEST_CompactRange(3, nullptr, nullptr, nullptr, false)); - ASSERT_OK(dbfull()->TEST_CompactRange(4, nullptr, nullptr, nullptr, false)); - // 2 files in L6, 1 file in L5 - ASSERT_EQ("0,0,0,0,0,1,2", FilesPerLevel(0)); - - ASSERT_EQ(trivial_move, 6); - ASSERT_EQ(non_trivial_move, 0); - - ROCKSDB_NAMESPACE::port::Thread threads([&] { - compact_options.change_level = false; - compact_options.exclusive_manual_compaction = false; - std::string begin_string = Key(0); - std::string end_string = Key(199); - Slice begin(begin_string); - Slice end(end_string); - // First non-trivial compaction is triggered - ASSERT_OK(db_->CompactRange(compact_options, &begin, &end)); - }); - - TEST_SYNC_POINT("DBCompaction::ManualPartial:1"); - // file 4 [300 => 400) - for (int32_t i = 300; i <= 400; i++) { - values[i] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Flush()); - - // file 5 [400 => 500) - for (int32_t i = 400; i <= 500; i++) { - values[i] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Flush()); - - // file 6 [500 => 600) - for (int32_t i = 500; i <= 600; i++) { - values[i] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(i), values[i])); - } - // Second non-trivial compaction is triggered - ASSERT_OK(Flush()); - - // Before two non-trivial compactions are installed, there are 3 files in L0 - ASSERT_EQ("3,0,0,0,0,1,2", FilesPerLevel(0)); - TEST_SYNC_POINT("DBCompaction::ManualPartial:5"); - - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // After two non-trivial compactions are installed, there is 1 file in L6, and - // 1 file in L1 - ASSERT_EQ("0,1,0,0,0,0,1", FilesPerLevel(0)); - threads.join(); - - for (int32_t i = 0; i < 600; i++) { - ASSERT_EQ(Get(Key(i)), values[i]); - } -} - -// Disable as the test is flaky. -TEST_F(DBCompactionTest, DISABLED_ManualPartialFill) { - int32_t trivial_move = 0; - int32_t non_trivial_move = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:TrivialMove", - [&](void* /*arg*/) { trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial", - [&](void* /*arg*/) { non_trivial_move++; }); - bool first = true; - bool second = true; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBCompaction::PartialFill:4", "DBCompaction::PartialFill:1"}, - {"DBCompaction::PartialFill:2", "DBCompaction::PartialFill:3"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { - if (first) { - TEST_SYNC_POINT("DBCompaction::PartialFill:4"); - first = false; - TEST_SYNC_POINT("DBCompaction::PartialFill:3"); - } else if (second) { - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.write_buffer_size = 10 * 1024 * 1024; - options.max_bytes_for_level_multiplier = 2; - options.num_levels = 4; - options.level0_file_num_compaction_trigger = 3; - options.max_background_compactions = 3; - - DestroyAndReopen(options); - // make sure all background compaction jobs can be scheduled - auto stop_token = - dbfull()->TEST_write_controler().GetCompactionPressureToken(); - int32_t value_size = 10 * 1024; // 10 KB - - // Add 2 non-overlapping files - Random rnd(301); - std::map values; - - // file 1 [0 => 100] - for (int32_t i = 0; i < 100; i++) { - values[i] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Flush()); - - // file 2 [100 => 300] - for (int32_t i = 100; i < 300; i++) { - values[i] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Flush()); - - // 2 files in L0 - ASSERT_EQ("2", FilesPerLevel(0)); - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 2; - ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - // 2 files in L2 - ASSERT_EQ("0,0,2", FilesPerLevel(0)); - - ASSERT_EQ(trivial_move, 1); - ASSERT_EQ(non_trivial_move, 0); - - // file 3 [ 0 => 200] - for (int32_t i = 0; i < 200; i++) { - values[i] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Flush()); - - // 2 files in L2, 1 in L0 - ASSERT_EQ("1,0,2", FilesPerLevel(0)); - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, false)); - // 2 files in L2, 1 in L1 - ASSERT_EQ("0,1,2", FilesPerLevel(0)); - - ASSERT_EQ(trivial_move, 2); - ASSERT_EQ(non_trivial_move, 0); - - ROCKSDB_NAMESPACE::port::Thread threads([&] { - compact_options.change_level = false; - compact_options.exclusive_manual_compaction = false; - std::string begin_string = Key(0); - std::string end_string = Key(199); - Slice begin(begin_string); - Slice end(end_string); - ASSERT_OK(db_->CompactRange(compact_options, &begin, &end)); - }); - - TEST_SYNC_POINT("DBCompaction::PartialFill:1"); - // Many files 4 [300 => 4300) - for (int32_t i = 0; i <= 5; i++) { - for (int32_t j = 300; j < 4300; j++) { - if (j == 2300) { - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - values[j] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(j), values[j])); - } - } - - // Verify level sizes - uint64_t target_size = 4 * options.max_bytes_for_level_base; - for (int32_t i = 1; i < options.num_levels; i++) { - ASSERT_LE(SizeAtLevel(i), target_size); - target_size = static_cast(target_size * - options.max_bytes_for_level_multiplier); - } - - TEST_SYNC_POINT("DBCompaction::PartialFill:2"); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - threads.join(); - - for (int32_t i = 0; i < 4300; i++) { - ASSERT_EQ(Get(Key(i)), values[i]); - } -} - -TEST_F(DBCompactionTest, ManualCompactionWithUnorderedWrite) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::WriteImpl:UnorderedWriteAfterWriteWAL", - "DBCompactionTest::ManualCompactionWithUnorderedWrite:WaitWriteWAL"}, - {"DBImpl::WaitForPendingWrites:BeforeBlock", - "DBImpl::WriteImpl:BeforeUnorderedWriteMemtable"}}); - - Options options = CurrentOptions(); - options.unordered_write = true; - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "v1")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("bar", "v1")); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - port::Thread writer([&]() { ASSERT_OK(Put("foo", "v2")); }); - - TEST_SYNC_POINT( - "DBCompactionTest::ManualCompactionWithUnorderedWrite:WaitWriteWAL"); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - writer.join(); - ASSERT_EQ(Get("foo"), "v2"); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - - Reopen(options); - ASSERT_EQ(Get("foo"), "v2"); -} - -TEST_F(DBCompactionTest, DeleteFileRange) { - Options options = CurrentOptions(); - options.write_buffer_size = 10 * 1024 * 1024; - options.max_bytes_for_level_multiplier = 2; - options.num_levels = 4; - options.level0_file_num_compaction_trigger = 3; - options.max_background_compactions = 3; - - DestroyAndReopen(options); - int32_t value_size = 10 * 1024; // 10 KB - - // Add 2 non-overlapping files - Random rnd(301); - std::map values; - - // file 1 [0 => 100] - for (int32_t i = 0; i < 100; i++) { - values[i] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Flush()); - - // file 2 [100 => 300] - for (int32_t i = 100; i < 300; i++) { - values[i] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Flush()); - - // 2 files in L0 - ASSERT_EQ("2", FilesPerLevel(0)); - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 2; - ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - // 2 files in L2 - ASSERT_EQ("0,0,2", FilesPerLevel(0)); - - // file 3 [ 0 => 200] - for (int32_t i = 0; i < 200; i++) { - values[i] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Flush()); - - // Many files 4 [300 => 4300) - for (int32_t i = 0; i <= 5; i++) { - for (int32_t j = 300; j < 4300; j++) { - if (j == 2300) { - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - values[j] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(j), values[j])); - } - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // Verify level sizes - uint64_t target_size = 4 * options.max_bytes_for_level_base; - for (int32_t i = 1; i < options.num_levels; i++) { - ASSERT_LE(SizeAtLevel(i), target_size); - target_size = static_cast(target_size * - options.max_bytes_for_level_multiplier); - } - - const size_t old_num_files = CountFiles(); - std::string begin_string = Key(1000); - std::string end_string = Key(2000); - Slice begin(begin_string); - Slice end(end_string); - ASSERT_OK(DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin, &end)); - - int32_t deleted_count = 0; - for (int32_t i = 0; i < 4300; i++) { - if (i < 1000 || i > 2000) { - ASSERT_EQ(Get(Key(i)), values[i]); - } else { - ReadOptions roptions; - std::string result; - Status s = db_->Get(roptions, Key(i), &result); - ASSERT_TRUE(s.IsNotFound() || s.ok()); - if (s.IsNotFound()) { - deleted_count++; - } - } - } - ASSERT_GT(deleted_count, 0); - begin_string = Key(5000); - end_string = Key(6000); - Slice begin1(begin_string); - Slice end1(end_string); - // Try deleting files in range which contain no keys - ASSERT_OK( - DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin1, &end1)); - - // Push data from level 0 to level 1 to force all data to be deleted - // Note that we don't delete level 0 files - compact_options.change_level = true; - compact_options.target_level = 1; - ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_OK( - DeleteFilesInRange(db_, db_->DefaultColumnFamily(), nullptr, nullptr)); - - int32_t deleted_count2 = 0; - for (int32_t i = 0; i < 4300; i++) { - ReadOptions roptions; - std::string result; - ASSERT_TRUE(db_->Get(roptions, Key(i), &result).IsNotFound()); - deleted_count2++; - } - ASSERT_GT(deleted_count2, deleted_count); - const size_t new_num_files = CountFiles(); - ASSERT_GT(old_num_files, new_num_files); -} - -TEST_F(DBCompactionTest, DeleteFilesInRanges) { - Options options = CurrentOptions(); - options.write_buffer_size = 10 * 1024 * 1024; - options.max_bytes_for_level_multiplier = 2; - options.num_levels = 4; - options.max_background_compactions = 3; - options.disable_auto_compactions = true; - - DestroyAndReopen(options); - int32_t value_size = 10 * 1024; // 10 KB - - Random rnd(301); - std::map values; - - // file [0 => 100), [100 => 200), ... [900, 1000) - for (auto i = 0; i < 10; i++) { - for (auto j = 0; j < 100; j++) { - auto k = i * 100 + j; - values[k] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(k), values[k])); - } - ASSERT_OK(Flush()); - } - ASSERT_EQ("10", FilesPerLevel(0)); - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 2; - ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - ASSERT_EQ("0,0,10", FilesPerLevel(0)); - - // file [0 => 100), [200 => 300), ... [800, 900) - for (auto i = 0; i < 10; i += 2) { - for (auto j = 0; j < 100; j++) { - auto k = i * 100 + j; - ASSERT_OK(Put(Key(k), values[k])); - } - ASSERT_OK(Flush()); - } - ASSERT_EQ("5,0,10", FilesPerLevel(0)); - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr)); - ASSERT_EQ("0,5,10", FilesPerLevel(0)); - - // Delete files in range [0, 299] (inclusive) - { - auto begin_str1 = Key(0), end_str1 = Key(100); - auto begin_str2 = Key(100), end_str2 = Key(200); - auto begin_str3 = Key(200), end_str3 = Key(299); - Slice begin1(begin_str1), end1(end_str1); - Slice begin2(begin_str2), end2(end_str2); - Slice begin3(begin_str3), end3(end_str3); - std::vector ranges; - ranges.push_back(RangePtr(&begin1, &end1)); - ranges.push_back(RangePtr(&begin2, &end2)); - ranges.push_back(RangePtr(&begin3, &end3)); - ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(), - ranges.data(), ranges.size())); - ASSERT_EQ("0,3,7", FilesPerLevel(0)); - - // Keys [0, 300) should not exist. - for (auto i = 0; i < 300; i++) { - ReadOptions ropts; - std::string result; - auto s = db_->Get(ropts, Key(i), &result); - ASSERT_TRUE(s.IsNotFound()); - } - for (auto i = 300; i < 1000; i++) { - ASSERT_EQ(Get(Key(i)), values[i]); - } - } - - // Delete files in range [600, 999) (exclusive) - { - auto begin_str1 = Key(600), end_str1 = Key(800); - auto begin_str2 = Key(700), end_str2 = Key(900); - auto begin_str3 = Key(800), end_str3 = Key(999); - Slice begin1(begin_str1), end1(end_str1); - Slice begin2(begin_str2), end2(end_str2); - Slice begin3(begin_str3), end3(end_str3); - std::vector ranges; - ranges.push_back(RangePtr(&begin1, &end1)); - ranges.push_back(RangePtr(&begin2, &end2)); - ranges.push_back(RangePtr(&begin3, &end3)); - ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(), - ranges.data(), ranges.size(), false)); - ASSERT_EQ("0,1,4", FilesPerLevel(0)); - - // Keys [600, 900) should not exist. - for (auto i = 600; i < 900; i++) { - ReadOptions ropts; - std::string result; - auto s = db_->Get(ropts, Key(i), &result); - ASSERT_TRUE(s.IsNotFound()); - } - for (auto i = 300; i < 600; i++) { - ASSERT_EQ(Get(Key(i)), values[i]); - } - for (auto i = 900; i < 1000; i++) { - ASSERT_EQ(Get(Key(i)), values[i]); - } - } - - // Delete all files. - { - RangePtr range; - ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(), &range, 1)); - ASSERT_EQ("", FilesPerLevel(0)); - - for (auto i = 0; i < 1000; i++) { - ReadOptions ropts; - std::string result; - auto s = db_->Get(ropts, Key(i), &result); - ASSERT_TRUE(s.IsNotFound()); - } - } -} - -TEST_F(DBCompactionTest, DeleteFileRangeFileEndpointsOverlapBug) { - // regression test for #2833: groups of files whose user-keys overlap at the - // endpoints could be split by `DeleteFilesInRange`. This caused old data to - // reappear, either because a new version of the key was removed, or a range - // deletion was partially dropped. It could also cause non-overlapping - // invariant to be violated if the files dropped by DeleteFilesInRange were - // a subset of files that a range deletion spans. - const int kNumL0Files = 2; - const int kValSize = 8 << 10; // 8KB - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = kNumL0Files; - options.target_file_size_base = 1 << 10; // 1KB - DestroyAndReopen(options); - - // The snapshot prevents key 1 from having its old version dropped. The low - // `target_file_size_base` ensures two keys will be in each output file. - const Snapshot* snapshot = nullptr; - Random rnd(301); - // The value indicates which flush the key belonged to, which is enough - // for us to determine the keys' relative ages. After L0 flushes finish, - // files look like: - // - // File 0: 0 -> vals[0], 1 -> vals[0] - // File 1: 1 -> vals[1], 2 -> vals[1] - // - // Then L0->L1 compaction happens, which outputs keys as follows: - // - // File 0: 0 -> vals[0], 1 -> vals[1] - // File 1: 1 -> vals[0], 2 -> vals[1] - // - // DeleteFilesInRange shouldn't be allowed to drop just file 0, as that - // would cause `1 -> vals[0]` (an older key) to reappear. - std::string vals[kNumL0Files]; - for (int i = 0; i < kNumL0Files; ++i) { - vals[i] = rnd.RandomString(kValSize); - ASSERT_OK(Put(Key(i), vals[i])); - ASSERT_OK(Put(Key(i + 1), vals[i])); - ASSERT_OK(Flush()); - if (i == 0) { - snapshot = db_->GetSnapshot(); - } - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // Verify `DeleteFilesInRange` can't drop only file 0 which would cause - // "1 -> vals[0]" to reappear. - std::string begin_str = Key(0), end_str = Key(1); - Slice begin = begin_str, end = end_str; - ASSERT_OK(DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin, &end)); - ASSERT_EQ(vals[1], Get(Key(1))); - - db_->ReleaseSnapshot(snapshot); -} - -TEST_P(DBCompactionTestWithParam, TrivialMoveToLastLevelWithFiles) { - int32_t trivial_move = 0; - int32_t non_trivial_move = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:TrivialMove", - [&](void* /*arg*/) { trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial", - [&](void* /*arg*/) { non_trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.write_buffer_size = 100000000; - options.max_subcompactions = max_subcompactions_; - DestroyAndReopen(options); - - int32_t value_size = 10 * 1024; // 10 KB - - Random rnd(301); - std::vector values; - // File with keys [ 0 => 99 ] - for (int i = 0; i < 100; i++) { - values.push_back(rnd.RandomString(value_size)); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Flush()); - - ASSERT_EQ("1", FilesPerLevel(0)); - // Compaction will do L0=>L1 (trivial move) then move L1 files to L3 - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 3; - compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; - ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); - ASSERT_EQ(trivial_move, 1); - ASSERT_EQ(non_trivial_move, 0); - - // File with keys [ 100 => 199 ] - for (int i = 100; i < 200; i++) { - values.push_back(rnd.RandomString(value_size)); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Flush()); - - ASSERT_EQ("1,0,0,1", FilesPerLevel(0)); - CompactRangeOptions cro; - cro.exclusive_manual_compaction = exclusive_manual_compaction_; - // Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves) - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - ASSERT_EQ("0,0,0,2", FilesPerLevel(0)); - ASSERT_EQ(trivial_move, 4); - ASSERT_EQ(non_trivial_move, 0); - - for (int i = 0; i < 200; i++) { - ASSERT_EQ(Get(Key(i)), values[i]); - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_P(DBCompactionTestWithParam, LevelCompactionThirdPath) { - Options options = CurrentOptions(); - options.db_paths.emplace_back(dbname_, 500 * 1024); - options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024); - options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024); - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); - options.compaction_style = kCompactionStyleLevel; - options.write_buffer_size = 110 << 10; // 110KB - options.arena_block_size = 4 << 10; - options.level0_file_num_compaction_trigger = 2; - options.num_levels = 4; - options.max_bytes_for_level_base = 400 * 1024; - options.max_subcompactions = max_subcompactions_; - - DestroyAndReopen(options); - - Random rnd(301); - int key_idx = 0; - - // First three 110KB files are not going to second path. - // After that, (100K, 200K) - for (int num = 0; num < 3; num++) { - GenerateNewFile(&rnd, &key_idx); - } - - // Another 110KB triggers a compaction to 400K file to fill up first path - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(3, GetSstFileCount(options.db_paths[1].path)); - - // (1, 4) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4", FilesPerLevel(0)); - ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - // (1, 4, 1) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,1", FilesPerLevel(0)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - // (1, 4, 2) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,2", FilesPerLevel(0)); - ASSERT_EQ(2, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - // (1, 4, 3) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,3", FilesPerLevel(0)); - ASSERT_EQ(3, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - // (1, 4, 4) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,4", FilesPerLevel(0)); - ASSERT_EQ(4, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - // (1, 4, 5) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,5", FilesPerLevel(0)); - ASSERT_EQ(5, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - // (1, 4, 6) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,6", FilesPerLevel(0)); - ASSERT_EQ(6, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - // (1, 4, 7) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,7", FilesPerLevel(0)); - ASSERT_EQ(7, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - // (1, 4, 8) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,8", FilesPerLevel(0)); - ASSERT_EQ(8, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - for (int i = 0; i < key_idx; i++) { - auto v = Get(Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - - Reopen(options); - - for (int i = 0; i < key_idx; i++) { - auto v = Get(Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - - Destroy(options); -} - -TEST_P(DBCompactionTestWithParam, LevelCompactionPathUse) { - Options options = CurrentOptions(); - options.db_paths.emplace_back(dbname_, 500 * 1024); - options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024); - options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024); - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); - options.compaction_style = kCompactionStyleLevel; - options.write_buffer_size = 110 << 10; // 110KB - options.arena_block_size = 4 << 10; - options.level0_file_num_compaction_trigger = 2; - options.num_levels = 4; - options.max_bytes_for_level_base = 400 * 1024; - options.max_subcompactions = max_subcompactions_; - - DestroyAndReopen(options); - - Random rnd(301); - int key_idx = 0; - - // Always gets compacted into 1 Level1 file, - // 0/1 Level 0 file - for (int num = 0; num < 3; num++) { - key_idx = 0; - GenerateNewFile(&rnd, &key_idx); - } - - key_idx = 0; - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - - key_idx = 0; - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,1", FilesPerLevel(0)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - key_idx = 0; - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("0,1", FilesPerLevel(0)); - ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - key_idx = 0; - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,1", FilesPerLevel(0)); - ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - key_idx = 0; - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("0,1", FilesPerLevel(0)); - ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - key_idx = 0; - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,1", FilesPerLevel(0)); - ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - key_idx = 0; - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("0,1", FilesPerLevel(0)); - ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - key_idx = 0; - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,1", FilesPerLevel(0)); - ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - key_idx = 0; - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("0,1", FilesPerLevel(0)); - ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - key_idx = 0; - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,1", FilesPerLevel(0)); - ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - for (int i = 0; i < key_idx; i++) { - auto v = Get(Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - - Reopen(options); - - for (int i = 0; i < key_idx; i++) { - auto v = Get(Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - - Destroy(options); -} - -TEST_P(DBCompactionTestWithParam, LevelCompactionCFPathUse) { - Options options = CurrentOptions(); - options.db_paths.emplace_back(dbname_, 500 * 1024); - options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024); - options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024); - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); - options.compaction_style = kCompactionStyleLevel; - options.write_buffer_size = 110 << 10; // 110KB - options.arena_block_size = 4 << 10; - options.level0_file_num_compaction_trigger = 2; - options.num_levels = 4; - options.max_bytes_for_level_base = 400 * 1024; - options.max_subcompactions = max_subcompactions_; - - std::vector option_vector; - option_vector.emplace_back(options); - ColumnFamilyOptions cf_opt1(options), cf_opt2(options); - // Configure CF1 specific paths. - cf_opt1.cf_paths.emplace_back(dbname_ + "cf1", 500 * 1024); - cf_opt1.cf_paths.emplace_back(dbname_ + "cf1_2", 4 * 1024 * 1024); - cf_opt1.cf_paths.emplace_back(dbname_ + "cf1_3", 1024 * 1024 * 1024); - option_vector.emplace_back(DBOptions(options), cf_opt1); - CreateColumnFamilies({"one"}, option_vector[1]); - - // Configure CF2 specific paths. - cf_opt2.cf_paths.emplace_back(dbname_ + "cf2", 500 * 1024); - cf_opt2.cf_paths.emplace_back(dbname_ + "cf2_2", 4 * 1024 * 1024); - cf_opt2.cf_paths.emplace_back(dbname_ + "cf2_3", 1024 * 1024 * 1024); - option_vector.emplace_back(DBOptions(options), cf_opt2); - CreateColumnFamilies({"two"}, option_vector[2]); - - ReopenWithColumnFamilies({"default", "one", "two"}, option_vector); - - Random rnd(301); - int key_idx = 0; - int key_idx1 = 0; - int key_idx2 = 0; - - auto generate_file = [&]() { - GenerateNewFile(0, &rnd, &key_idx); - GenerateNewFile(1, &rnd, &key_idx1); - GenerateNewFile(2, &rnd, &key_idx2); - }; - - auto check_sstfilecount = [&](int path_id, int expected) { - ASSERT_EQ(expected, GetSstFileCount(options.db_paths[path_id].path)); - ASSERT_EQ(expected, GetSstFileCount(cf_opt1.cf_paths[path_id].path)); - ASSERT_EQ(expected, GetSstFileCount(cf_opt2.cf_paths[path_id].path)); - }; - - auto check_filesperlevel = [&](const std::string& expected) { - ASSERT_EQ(expected, FilesPerLevel(0)); - ASSERT_EQ(expected, FilesPerLevel(1)); - ASSERT_EQ(expected, FilesPerLevel(2)); - }; - - auto check_getvalues = [&]() { - for (int i = 0; i < key_idx; i++) { - auto v = Get(0, Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - - for (int i = 0; i < key_idx1; i++) { - auto v = Get(1, Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - - for (int i = 0; i < key_idx2; i++) { - auto v = Get(2, Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - }; - - // Check that default column family uses db_paths. - // And Column family "one" uses cf_paths. - - // The compaction in level0 outputs the sst files in level1. - // The first path cannot hold level1's data(400KB+400KB > 500KB), - // so every compaction move a sst file to second path. Please - // refer to LevelCompactionBuilder::GetPathId. - for (int num = 0; num < 3; num++) { - generate_file(); - } - check_sstfilecount(0, 1); - check_sstfilecount(1, 2); - - generate_file(); - check_sstfilecount(1, 3); - - // (1, 4) - generate_file(); - check_filesperlevel("1,4"); - check_sstfilecount(1, 4); - check_sstfilecount(0, 1); - - // (1, 4, 1) - generate_file(); - check_filesperlevel("1,4,1"); - check_sstfilecount(2, 1); - check_sstfilecount(1, 4); - check_sstfilecount(0, 1); - - // (1, 4, 2) - generate_file(); - check_filesperlevel("1,4,2"); - check_sstfilecount(2, 2); - check_sstfilecount(1, 4); - check_sstfilecount(0, 1); - - check_getvalues(); - - { // Also verify GetLiveFilesStorageInfo with db_paths / cf_paths - std::vector new_infos; - LiveFilesStorageInfoOptions lfsio; - lfsio.wal_size_for_flush = UINT64_MAX; // no flush - ASSERT_OK(db_->GetLiveFilesStorageInfo(lfsio, &new_infos)); - std::unordered_map live_sst_by_dir; - for (auto& info : new_infos) { - if (info.file_type == kTableFile) { - live_sst_by_dir[info.directory]++; - // Verify file on disk (no directory confusion) - uint64_t size; - ASSERT_OK(env_->GetFileSize( - info.directory + "/" + info.relative_filename, &size)); - ASSERT_EQ(info.size, size); - } - } - ASSERT_EQ(3U * 3U, live_sst_by_dir.size()); - for (auto& paths : {options.db_paths, cf_opt1.cf_paths, cf_opt2.cf_paths}) { - ASSERT_EQ(1, live_sst_by_dir[paths[0].path]); - ASSERT_EQ(4, live_sst_by_dir[paths[1].path]); - ASSERT_EQ(2, live_sst_by_dir[paths[2].path]); - } - } - - ReopenWithColumnFamilies({"default", "one", "two"}, option_vector); - - check_getvalues(); - - Destroy(options, true); -} - -TEST_P(DBCompactionTestWithParam, ConvertCompactionStyle) { - Random rnd(301); - int max_key_level_insert = 200; - int max_key_universal_insert = 600; - - // Stage 1: generate a db with level compaction - Options options = CurrentOptions(); - options.write_buffer_size = 110 << 10; // 110KB - options.arena_block_size = 4 << 10; - options.num_levels = 4; - options.level0_file_num_compaction_trigger = 3; - options.max_bytes_for_level_base = 500 << 10; // 500KB - options.max_bytes_for_level_multiplier = 1; - options.target_file_size_base = 200 << 10; // 200KB - options.target_file_size_multiplier = 1; - options.max_subcompactions = max_subcompactions_; - CreateAndReopenWithCF({"pikachu"}, options); - - for (int i = 0; i <= max_key_level_insert; i++) { - // each value is 10K - ASSERT_OK(Put(1, Key(i), rnd.RandomString(10000))); - } - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_GT(TotalTableFiles(1, 4), 1); - int non_level0_num_files = 0; - for (int i = 1; i < options.num_levels; i++) { - non_level0_num_files += NumTableFilesAtLevel(i, 1); - } - ASSERT_GT(non_level0_num_files, 0); - - // Stage 2: reopen with universal compaction - should fail - options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = 1; - options = CurrentOptions(options); - Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_TRUE(s.IsInvalidArgument()); - - // Stage 3: compact into a single file and move the file to level 0 - options = CurrentOptions(); - options.disable_auto_compactions = true; - options.target_file_size_base = INT_MAX; - options.target_file_size_multiplier = 1; - options.max_bytes_for_level_base = INT_MAX; - options.max_bytes_for_level_multiplier = 1; - options.num_levels = 4; - options = CurrentOptions(options); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 0; - // cannot use kForceOptimized here because the compaction here is expected - // to generate one output file - compact_options.bottommost_level_compaction = - BottommostLevelCompaction::kForce; - compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; - ASSERT_OK( - dbfull()->CompactRange(compact_options, handles_[1], nullptr, nullptr)); - - // Only 1 file in L0 - ASSERT_EQ("1", FilesPerLevel(1)); - - // Stage 4: re-open in universal compaction style and do some db operations - options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = 4; - options.write_buffer_size = 110 << 10; // 110KB - options.arena_block_size = 4 << 10; - options.level0_file_num_compaction_trigger = 3; - options = CurrentOptions(options); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - options.num_levels = 1; - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - for (int i = max_key_level_insert / 2; i <= max_key_universal_insert; i++) { - ASSERT_OK(Put(1, Key(i), rnd.RandomString(10000))); - } - ASSERT_OK(dbfull()->Flush(FlushOptions())); - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - for (int i = 1; i < options.num_levels; i++) { - ASSERT_EQ(NumTableFilesAtLevel(i, 1), 0); - } - - // verify keys inserted in both level compaction style and universal - // compaction style - std::string keys_in_db; - Iterator* iter = dbfull()->NewIterator(ReadOptions(), handles_[1]); - ASSERT_OK(iter->status()); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - keys_in_db.append(iter->key().ToString()); - keys_in_db.push_back(','); - } - delete iter; - - std::string expected_keys; - for (int i = 0; i <= max_key_universal_insert; i++) { - expected_keys.append(Key(i)); - expected_keys.push_back(','); - } - - ASSERT_EQ(keys_in_db, expected_keys); -} - -TEST_F(DBCompactionTest, L0_CompactionBug_Issue44_a) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "b", "v")); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_OK(Delete(1, "b")); - ASSERT_OK(Delete(1, "a")); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_OK(Delete(1, "a")); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "a", "v")); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_EQ("(a->v)", Contents(1)); - env_->SleepForMicroseconds(1000000); // Wait for compaction to finish - ASSERT_EQ("(a->v)", Contents(1)); - } while (ChangeCompactOptions()); -} - -TEST_F(DBCompactionTest, L0_CompactionBug_Issue44_b) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "", "")); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_OK(Delete(1, "e")); - ASSERT_OK(Put(1, "", "")); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "c", "cv")); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "", "")); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "", "")); - env_->SleepForMicroseconds(1000000); // Wait for compaction to finish - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "d", "dv")); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "", "")); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_OK(Delete(1, "d")); - ASSERT_OK(Delete(1, "b")); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_EQ("(->)(c->cv)", Contents(1)); - env_->SleepForMicroseconds(1000000); // Wait for compaction to finish - ASSERT_EQ("(->)(c->cv)", Contents(1)); - } while (ChangeCompactOptions()); -} - -TEST_F(DBCompactionTest, ManualAutoRace) { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::BGWorkCompaction", "DBCompactionTest::ManualAutoRace:1"}, - {"DBImpl::RunManualCompaction:WaitScheduled", - "BackgroundCallCompaction:0"}}); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put(1, "foo", "")); - ASSERT_OK(Put(1, "bar", "")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "foo", "")); - ASSERT_OK(Put(1, "bar", "")); - // Generate four files in CF 0, which should trigger an auto compaction - ASSERT_OK(Put("foo", "")); - ASSERT_OK(Put("bar", "")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo", "")); - ASSERT_OK(Put("bar", "")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo", "")); - ASSERT_OK(Put("bar", "")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo", "")); - ASSERT_OK(Put("bar", "")); - ASSERT_OK(Flush()); - - // The auto compaction is scheduled but waited until here - TEST_SYNC_POINT("DBCompactionTest::ManualAutoRace:1"); - // The auto compaction will wait until the manual compaction is registerd - // before processing so that it will be cancelled. - CompactRangeOptions cro; - cro.exclusive_manual_compaction = true; - ASSERT_OK(dbfull()->CompactRange(cro, handles_[1], nullptr, nullptr)); - ASSERT_EQ("0,1", FilesPerLevel(1)); - - // Eventually the cancelled compaction will be rescheduled and executed. - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("0,1", FilesPerLevel(0)); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_P(DBCompactionTestWithParam, ManualCompaction) { - Options options = CurrentOptions(); - options.max_subcompactions = max_subcompactions_; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - CreateAndReopenWithCF({"pikachu"}, options); - - // iter - 0 with 7 levels - // iter - 1 with 3 levels - for (int iter = 0; iter < 2; ++iter) { - MakeTables(3, "p", "q", 1); - ASSERT_EQ("1,1,1", FilesPerLevel(1)); - - // Compaction range falls before files - Compact(1, "", "c"); - ASSERT_EQ("1,1,1", FilesPerLevel(1)); - - // Compaction range falls after files - Compact(1, "r", "z"); - ASSERT_EQ("1,1,1", FilesPerLevel(1)); - - // Compaction range overlaps files - Compact(1, "p", "q"); - ASSERT_EQ("0,0,1", FilesPerLevel(1)); - - // Populate a different range - MakeTables(3, "c", "e", 1); - ASSERT_EQ("1,1,2", FilesPerLevel(1)); - - // Compact just the new range - Compact(1, "b", "f"); - ASSERT_EQ("0,0,2", FilesPerLevel(1)); - - // Compact all - MakeTables(1, "a", "z", 1); - ASSERT_EQ("1,0,2", FilesPerLevel(1)); - - uint64_t prev_block_cache_add = - options.statistics->getTickerCount(BLOCK_CACHE_ADD); - CompactRangeOptions cro; - cro.exclusive_manual_compaction = exclusive_manual_compaction_; - ASSERT_OK(db_->CompactRange(cro, handles_[1], nullptr, nullptr)); - // Verify manual compaction doesn't fill block cache - ASSERT_EQ(prev_block_cache_add, - options.statistics->getTickerCount(BLOCK_CACHE_ADD)); - - ASSERT_EQ("0,0,1", FilesPerLevel(1)); - - if (iter == 0) { - options = CurrentOptions(); - options.num_levels = 3; - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - } - } -} - -TEST_P(DBCompactionTestWithParam, ManualLevelCompactionOutputPathId) { - Options options = CurrentOptions(); - options.db_paths.emplace_back(dbname_ + "_2", 2 * 10485760); - options.db_paths.emplace_back(dbname_ + "_3", 100 * 10485760); - options.db_paths.emplace_back(dbname_ + "_4", 120 * 10485760); - options.max_subcompactions = max_subcompactions_; - CreateAndReopenWithCF({"pikachu"}, options); - - // iter - 0 with 7 levels - // iter - 1 with 3 levels - for (int iter = 0; iter < 2; ++iter) { - for (int i = 0; i < 3; ++i) { - ASSERT_OK(Put(1, "p", "begin")); - ASSERT_OK(Put(1, "q", "end")); - ASSERT_OK(Flush(1)); - } - ASSERT_EQ("3", FilesPerLevel(1)); - ASSERT_EQ(3, GetSstFileCount(options.db_paths[0].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - // Compaction range falls before files - Compact(1, "", "c"); - ASSERT_EQ("3", FilesPerLevel(1)); - - // Compaction range falls after files - Compact(1, "r", "z"); - ASSERT_EQ("3", FilesPerLevel(1)); - - // Compaction range overlaps files - Compact(1, "p", "q", 1); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("0,1", FilesPerLevel(1)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - // Populate a different range - for (int i = 0; i < 3; ++i) { - ASSERT_OK(Put(1, "c", "begin")); - ASSERT_OK(Put(1, "e", "end")); - ASSERT_OK(Flush(1)); - } - ASSERT_EQ("3,1", FilesPerLevel(1)); - - // Compact just the new range - Compact(1, "b", "f", 1); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("0,2", FilesPerLevel(1)); - ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - // Compact all - ASSERT_OK(Put(1, "a", "begin")); - ASSERT_OK(Put(1, "z", "end")); - ASSERT_OK(Flush(1)); - ASSERT_EQ("1,2", FilesPerLevel(1)); - ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[0].path)); - CompactRangeOptions compact_options; - compact_options.target_path_id = 1; - compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; - ASSERT_OK( - db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ("0,1", FilesPerLevel(1)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - if (iter == 0) { - DestroyAndReopen(options); - options = CurrentOptions(); - options.db_paths.emplace_back(dbname_ + "_2", 2 * 10485760); - options.db_paths.emplace_back(dbname_ + "_3", 100 * 10485760); - options.db_paths.emplace_back(dbname_ + "_4", 120 * 10485760); - options.max_background_flushes = 1; - options.num_levels = 3; - options.create_if_missing = true; - CreateAndReopenWithCF({"pikachu"}, options); - } - } -} - -TEST_F(DBCompactionTest, FilesDeletedAfterCompaction) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "foo", "v2")); - Compact(1, "a", "z"); - const size_t num_files = CountLiveFiles(); - for (int i = 0; i < 10; i++) { - ASSERT_OK(Put(1, "foo", "v2")); - Compact(1, "a", "z"); - } - ASSERT_EQ(CountLiveFiles(), num_files); - } while (ChangeCompactOptions()); -} - -// Check level comapction with compact files -TEST_P(DBCompactionTestWithParam, DISABLED_CompactFilesOnLevelCompaction) { - const int kTestKeySize = 16; - const int kTestValueSize = 984; - const int kEntrySize = kTestKeySize + kTestValueSize; - const int kEntriesPerBuffer = 100; - Options options; - options.create_if_missing = true; - options.write_buffer_size = kEntrySize * kEntriesPerBuffer; - options.compaction_style = kCompactionStyleLevel; - options.target_file_size_base = options.write_buffer_size; - options.max_bytes_for_level_base = options.target_file_size_base * 2; - options.level0_stop_writes_trigger = 2; - options.max_bytes_for_level_multiplier = 2; - options.compression = kNoCompression; - options.max_subcompactions = max_subcompactions_; - options = CurrentOptions(options); - CreateAndReopenWithCF({"pikachu"}, options); - - Random rnd(301); - for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) { - ASSERT_OK(Put(1, std::to_string(key), rnd.RandomString(kTestValueSize))); - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1])); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ColumnFamilyMetaData cf_meta; - dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta); - int output_level = static_cast(cf_meta.levels.size()) - 1; - for (int file_picked = 5; file_picked > 0; --file_picked) { - std::set overlapping_file_names; - std::vector compaction_input_file_names; - for (int f = 0; f < file_picked; ++f) { - int level = 0; - auto file_meta = PickFileRandomly(cf_meta, &rnd, &level); - compaction_input_file_names.push_back(file_meta->name); - GetOverlappingFileNumbersForLevelCompaction( - cf_meta, options.comparator, level, output_level, file_meta, - &overlapping_file_names); - } - - ASSERT_OK(dbfull()->CompactFiles(CompactionOptions(), handles_[1], - compaction_input_file_names, - output_level)); - - // Make sure all overlapping files do not exist after compaction - dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta); - VerifyCompactionResult(cf_meta, overlapping_file_names); - } - - // make sure all key-values are still there. - for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) { - ASSERT_NE(Get(1, std::to_string(key)), "NOT_FOUND"); - } -} - -TEST_P(DBCompactionTestWithParam, PartialCompactionFailure) { - Options options; - const int kKeySize = 16; - const int kKvSize = 1000; - const int kKeysPerBuffer = 100; - const int kNumL1Files = 5; - options.create_if_missing = true; - options.write_buffer_size = kKeysPerBuffer * kKvSize; - options.max_write_buffer_number = 2; - options.target_file_size_base = - options.write_buffer_size * (options.max_write_buffer_number - 1); - options.level0_file_num_compaction_trigger = kNumL1Files; - options.max_bytes_for_level_base = - options.level0_file_num_compaction_trigger * - options.target_file_size_base; - options.max_bytes_for_level_multiplier = 2; - options.compression = kNoCompression; - options.max_subcompactions = max_subcompactions_; - - env_->SetBackgroundThreads(1, Env::HIGH); - env_->SetBackgroundThreads(1, Env::LOW); - // stop the compaction thread until we simulate the file creation failure. - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - - options.env = env_; - - DestroyAndReopen(options); - - const int kNumInsertedKeys = options.level0_file_num_compaction_trigger * - (options.max_write_buffer_number - 1) * - kKeysPerBuffer; - - Random rnd(301); - std::vector keys; - std::vector values; - for (int k = 0; k < kNumInsertedKeys; ++k) { - keys.emplace_back(rnd.RandomString(kKeySize)); - values.emplace_back(rnd.RandomString(kKvSize - kKeySize)); - ASSERT_OK(Put(Slice(keys[k]), Slice(values[k]))); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - - ASSERT_OK(dbfull()->TEST_FlushMemTable(true)); - // Make sure the number of L0 files can trigger compaction. - ASSERT_GE(NumTableFilesAtLevel(0), - options.level0_file_num_compaction_trigger); - - auto previous_num_level0_files = NumTableFilesAtLevel(0); - - // Fail the first file creation. - env_->non_writable_count_ = 1; - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); - - // Expect compaction to fail here as one file will fail its - // creation. - ASSERT_TRUE(!dbfull()->TEST_WaitForCompact().ok()); - - // Verify L0 -> L1 compaction does fail. - ASSERT_EQ(NumTableFilesAtLevel(1), 0); - - // Verify all L0 files are still there. - ASSERT_EQ(NumTableFilesAtLevel(0), previous_num_level0_files); - - // All key-values must exist after compaction fails. - for (int k = 0; k < kNumInsertedKeys; ++k) { - ASSERT_EQ(values[k], Get(keys[k])); - } - - env_->non_writable_count_ = 0; - - // Make sure RocksDB will not get into corrupted state. - Reopen(options); - - // Verify again after reopen. - for (int k = 0; k < kNumInsertedKeys; ++k) { - ASSERT_EQ(values[k], Get(keys[k])); - } -} - -TEST_P(DBCompactionTestWithParam, DeleteMovedFileAfterCompaction) { - // iter 1 -- delete_obsolete_files_period_micros == 0 - for (int iter = 0; iter < 2; ++iter) { - // This test triggers move compaction and verifies that the file is not - // deleted when it's part of move compaction - Options options = CurrentOptions(); - options.env = env_; - if (iter == 1) { - options.delete_obsolete_files_period_micros = 0; - } - options.create_if_missing = true; - options.level0_file_num_compaction_trigger = - 2; // trigger compaction when we have 2 files - OnFileDeletionListener* listener = new OnFileDeletionListener(); - options.listeners.emplace_back(listener); - options.max_subcompactions = max_subcompactions_; - DestroyAndReopen(options); - - Random rnd(301); - // Create two 1MB sst files - for (int i = 0; i < 2; ++i) { - // Create 1MB sst file - for (int j = 0; j < 100; ++j) { - ASSERT_OK(Put(Key(i * 50 + j), rnd.RandomString(10 * 1024))); - } - ASSERT_OK(Flush()); - } - // this should execute L0->L1 - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("0,1", FilesPerLevel(0)); - - // block compactions - test::SleepingBackgroundTask sleeping_task; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, - Env::Priority::LOW); - - options.max_bytes_for_level_base = 1024 * 1024; // 1 MB - Reopen(options); - std::unique_ptr iterator(db_->NewIterator(ReadOptions())); - ASSERT_EQ("0,1", FilesPerLevel(0)); - // let compactions go - sleeping_task.WakeUp(); - sleeping_task.WaitUntilDone(); - - // this should execute L1->L2 (move) - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ("0,0,1", FilesPerLevel(0)); - - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - ASSERT_EQ(metadata.size(), 1U); - auto moved_file_name = metadata[0].name; - - // Create two more 1MB sst files - for (int i = 0; i < 2; ++i) { - // Create 1MB sst file - for (int j = 0; j < 100; ++j) { - ASSERT_OK(Put(Key(i * 50 + j + 100), rnd.RandomString(10 * 1024))); - } - ASSERT_OK(Flush()); - } - // this should execute both L0->L1 and L1->L2 (merge with previous file) - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ("0,0,2", FilesPerLevel(0)); - - // iterator is holding the file - ASSERT_OK(env_->FileExists(dbname_ + moved_file_name)); - - listener->SetExpectedFileName(dbname_ + moved_file_name); - ASSERT_OK(iterator->status()); - iterator.reset(); - - // this file should have been compacted away - ASSERT_NOK(env_->FileExists(dbname_ + moved_file_name)); - listener->VerifyMatchedCount(1); - } -} - -TEST_P(DBCompactionTestWithParam, CompressLevelCompaction) { - if (!Zlib_Supported()) { - return; - } - Options options = CurrentOptions(); - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); - options.compaction_style = kCompactionStyleLevel; - options.write_buffer_size = 110 << 10; // 110KB - options.arena_block_size = 4 << 10; - options.level0_file_num_compaction_trigger = 2; - options.num_levels = 4; - options.max_bytes_for_level_base = 400 * 1024; - options.max_subcompactions = max_subcompactions_; - // First two levels have no compression, so that a trivial move between - // them will be allowed. Level 2 has Zlib compression so that a trivial - // move to level 3 will not be allowed - options.compression_per_level = {kNoCompression, kNoCompression, - kZlibCompression}; - int matches = 0, didnt_match = 0, trivial_move = 0, non_trivial = 0; - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "Compaction::InputCompressionMatchesOutput:Matches", - [&](void* /*arg*/) { matches++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "Compaction::InputCompressionMatchesOutput:DidntMatch", - [&](void* /*arg*/) { didnt_match++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial", - [&](void* /*arg*/) { non_trivial++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:TrivialMove", - [&](void* /*arg*/) { trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Reopen(options); - - Random rnd(301); - int key_idx = 0; - - // First three 110KB files are going to level 0 - // After that, (100K, 200K) - for (int num = 0; num < 3; num++) { - GenerateNewFile(&rnd, &key_idx); - } - - // Another 110KB triggers a compaction to 400K file to fill up level 0 - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(4, GetSstFileCount(dbname_)); - - // (1, 4) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4", FilesPerLevel(0)); - - // (1, 4, 1) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,1", FilesPerLevel(0)); - - // (1, 4, 2) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,2", FilesPerLevel(0)); - - // (1, 4, 3) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,3", FilesPerLevel(0)); - - // (1, 4, 4) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,4", FilesPerLevel(0)); - - // (1, 4, 5) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,5", FilesPerLevel(0)); - - // (1, 4, 6) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,6", FilesPerLevel(0)); - - // (1, 4, 7) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,7", FilesPerLevel(0)); - - // (1, 4, 8) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,4,8", FilesPerLevel(0)); - - ASSERT_EQ(matches, 12); - // Currently, the test relies on the number of calls to - // InputCompressionMatchesOutput() per compaction. - const int kCallsToInputCompressionMatch = 2; - ASSERT_EQ(didnt_match, 8 * kCallsToInputCompressionMatch); - ASSERT_EQ(trivial_move, 12); - ASSERT_EQ(non_trivial, 8); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - for (int i = 0; i < key_idx; i++) { - auto v = Get(Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - - Reopen(options); - - for (int i = 0; i < key_idx; i++) { - auto v = Get(Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - - Destroy(options); -} - -TEST_F(DBCompactionTest, SanitizeCompactionOptionsTest) { - Options options = CurrentOptions(); - options.max_background_compactions = 5; - options.soft_pending_compaction_bytes_limit = 0; - options.hard_pending_compaction_bytes_limit = 100; - options.create_if_missing = true; - DestroyAndReopen(options); - ASSERT_EQ(100, db_->GetOptions().soft_pending_compaction_bytes_limit); - - options.max_background_compactions = 3; - options.soft_pending_compaction_bytes_limit = 200; - options.hard_pending_compaction_bytes_limit = 150; - DestroyAndReopen(options); - ASSERT_EQ(150, db_->GetOptions().soft_pending_compaction_bytes_limit); -} - -// This tests for a bug that could cause two level0 compactions running -// concurrently -// TODO(aekmekji): Make sure that the reason this fails when run with -// max_subcompactions > 1 is not a correctness issue but just inherent to -// running parallel L0-L1 compactions -TEST_F(DBCompactionTest, SuggestCompactRangeNoTwoLevel0Compactions) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleLevel; - options.write_buffer_size = 110 << 10; - options.arena_block_size = 4 << 10; - options.level0_file_num_compaction_trigger = 4; - options.num_levels = 4; - options.compression = kNoCompression; - options.max_bytes_for_level_base = 450 << 10; - options.target_file_size_base = 98 << 10; - options.max_write_buffer_number = 2; - options.max_background_compactions = 2; - - DestroyAndReopen(options); - - // fill up the DB - Random rnd(301); - for (int num = 0; num < 10; num++) { - GenerateNewRandomFile(&rnd); - } - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"CompactionJob::Run():Start", - "DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:1"}, - {"DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:2", - "CompactionJob::Run():End"}}); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // trigger L0 compaction - for (int num = 0; num < options.level0_file_num_compaction_trigger + 1; - num++) { - GenerateNewRandomFile(&rnd, /* nowait */ true); - ASSERT_OK(Flush()); - } - - TEST_SYNC_POINT( - "DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:1"); - - GenerateNewRandomFile(&rnd, /* nowait */ true); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(experimental::SuggestCompactRange(db_, nullptr, nullptr)); - for (int num = 0; num < options.level0_file_num_compaction_trigger + 1; - num++) { - GenerateNewRandomFile(&rnd, /* nowait */ true); - ASSERT_OK(Flush()); - } - - TEST_SYNC_POINT( - "DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:2"); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); -} - -static std::string ShortKey(int i) { - assert(i < 10000); - char buf[100]; - snprintf(buf, sizeof(buf), "key%04d", i); - return std::string(buf); -} - -TEST_P(DBCompactionTestWithParam, ForceBottommostLevelCompaction) { - int32_t trivial_move = 0; - int32_t non_trivial_move = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:TrivialMove", - [&](void* /*arg*/) { trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial", - [&](void* /*arg*/) { non_trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // The key size is guaranteed to be <= 8 - class ShortKeyComparator : public Comparator { - int Compare(const ROCKSDB_NAMESPACE::Slice& a, - const ROCKSDB_NAMESPACE::Slice& b) const override { - assert(a.size() <= 8); - assert(b.size() <= 8); - return BytewiseComparator()->Compare(a, b); - } - const char* Name() const override { return "ShortKeyComparator"; } - void FindShortestSeparator( - std::string* start, - const ROCKSDB_NAMESPACE::Slice& limit) const override { - return BytewiseComparator()->FindShortestSeparator(start, limit); - } - void FindShortSuccessor(std::string* key) const override { - return BytewiseComparator()->FindShortSuccessor(key); - } - } short_key_cmp; - Options options = CurrentOptions(); - options.target_file_size_base = 100000000; - options.write_buffer_size = 100000000; - options.max_subcompactions = max_subcompactions_; - options.comparator = &short_key_cmp; - DestroyAndReopen(options); - - int32_t value_size = 10 * 1024; // 10 KB - - Random rnd(301); - std::vector values; - // File with keys [ 0 => 99 ] - for (int i = 0; i < 100; i++) { - values.push_back(rnd.RandomString(value_size)); - ASSERT_OK(Put(ShortKey(i), values[i])); - } - ASSERT_OK(Flush()); - - ASSERT_EQ("1", FilesPerLevel(0)); - // Compaction will do L0=>L1 (trivial move) then move L1 files to L3 - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 3; - ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); - ASSERT_EQ(trivial_move, 1); - ASSERT_EQ(non_trivial_move, 0); - - // File with keys [ 100 => 199 ] - for (int i = 100; i < 200; i++) { - values.push_back(rnd.RandomString(value_size)); - ASSERT_OK(Put(ShortKey(i), values[i])); - } - ASSERT_OK(Flush()); - - ASSERT_EQ("1,0,0,1", FilesPerLevel(0)); - // Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves) - // then compacte the bottommost level L3=>L3 (non trivial move) - compact_options = CompactRangeOptions(); - compact_options.bottommost_level_compaction = - BottommostLevelCompaction::kForceOptimized; - ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); - ASSERT_EQ(trivial_move, 4); - ASSERT_EQ(non_trivial_move, 1); - - // File with keys [ 200 => 299 ] - for (int i = 200; i < 300; i++) { - values.push_back(rnd.RandomString(value_size)); - ASSERT_OK(Put(ShortKey(i), values[i])); - } - ASSERT_OK(Flush()); - - ASSERT_EQ("1,0,0,1", FilesPerLevel(0)); - trivial_move = 0; - non_trivial_move = 0; - compact_options = CompactRangeOptions(); - compact_options.bottommost_level_compaction = - BottommostLevelCompaction::kSkip; - // Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves) - // and will skip bottommost level compaction - ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - ASSERT_EQ("0,0,0,2", FilesPerLevel(0)); - ASSERT_EQ(trivial_move, 3); - ASSERT_EQ(non_trivial_move, 0); - - for (int i = 0; i < 300; i++) { - ASSERT_EQ(Get(ShortKey(i)), values[i]); - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_P(DBCompactionTestWithParam, IntraL0Compaction) { - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.level0_file_num_compaction_trigger = 5; - options.max_background_compactions = 2; - options.max_subcompactions = max_subcompactions_; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.write_buffer_size = 2 << 20; // 2MB - - BlockBasedTableOptions table_options; - table_options.block_cache = NewLRUCache(64 << 20); // 64MB - table_options.cache_index_and_filter_blocks = true; - table_options.pin_l0_filter_and_index_blocks_in_cache = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - DestroyAndReopen(options); - - const size_t kValueSize = 1 << 20; - Random rnd(301); - std::string value(rnd.RandomString(kValueSize)); - - // The L0->L1 must be picked before we begin flushing files to trigger - // intra-L0 compaction, and must not finish until after an intra-L0 - // compaction has been picked. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"LevelCompactionPicker::PickCompaction:Return", - "DBCompactionTest::IntraL0Compaction:L0ToL1Ready"}, - {"LevelCompactionPicker::PickCompactionBySize:0", - "CompactionJob::Run():Start"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // index: 0 1 2 3 4 5 6 7 8 9 - // size: 1MB 1MB 1MB 1MB 1MB 2MB 1MB 1MB 1MB 1MB - // score: 1.5 1.3 1.5 2.0 inf - // - // Files 0-4 will be included in an L0->L1 compaction. - // - // L0->L0 will be triggered since the sync points guarantee compaction to base - // level is still blocked when files 5-9 trigger another compaction. - // - // Files 6-9 are the longest span of available files for which - // work-per-deleted-file decreases (see "score" row above). - for (int i = 0; i < 10; ++i) { - ASSERT_OK(Put(Key(0), "")); // prevents trivial move - if (i == 5) { - TEST_SYNC_POINT("DBCompactionTest::IntraL0Compaction:L0ToL1Ready"); - ASSERT_OK(Put(Key(i + 1), value + value)); - } else { - ASSERT_OK(Put(Key(i + 1), value)); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - std::vector> level_to_files; - dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), - &level_to_files); - ASSERT_GE(level_to_files.size(), 2); // at least L0 and L1 - // L0 has the 2MB file (not compacted) and 4MB file (output of L0->L0) - ASSERT_EQ(2, level_to_files[0].size()); - ASSERT_GT(level_to_files[1].size(), 0); - for (int i = 0; i < 2; ++i) { - ASSERT_GE(level_to_files[0][i].fd.file_size, 1 << 21); - } - - // The index/filter in the file produced by intra-L0 should not be pinned. - // That means clearing unref'd entries in block cache and re-accessing the - // file produced by intra-L0 should bump the index block miss count. - uint64_t prev_index_misses = - TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS); - table_options.block_cache->EraseUnRefEntries(); - ASSERT_EQ("", Get(Key(0))); - ASSERT_EQ(prev_index_misses + 1, - TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); -} - -TEST_P(DBCompactionTestWithParam, IntraL0CompactionDoesNotObsoleteDeletions) { - // regression test for issue #2722: L0->L0 compaction can resurrect deleted - // keys from older L0 files if L1+ files' key-ranges do not include the key. - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.level0_file_num_compaction_trigger = 5; - options.max_background_compactions = 2; - options.max_subcompactions = max_subcompactions_; - DestroyAndReopen(options); - - const size_t kValueSize = 1 << 20; - Random rnd(301); - std::string value(rnd.RandomString(kValueSize)); - - // The L0->L1 must be picked before we begin flushing files to trigger - // intra-L0 compaction, and must not finish until after an intra-L0 - // compaction has been picked. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"LevelCompactionPicker::PickCompaction:Return", - "DBCompactionTest::IntraL0CompactionDoesNotObsoleteDeletions:" - "L0ToL1Ready"}, - {"LevelCompactionPicker::PickCompactionBySize:0", - "CompactionJob::Run():Start"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // index: 0 1 2 3 4 5 6 7 8 9 - // size: 1MB 1MB 1MB 1MB 1MB 1MB 1MB 1MB 1MB 1MB - // score: 1.25 1.33 1.5 2.0 inf - // - // Files 0-4 will be included in an L0->L1 compaction. - // - // L0->L0 will be triggered since the sync points guarantee compaction to base - // level is still blocked when files 5-9 trigger another compaction. All files - // 5-9 are included in the L0->L0 due to work-per-deleted file decreasing. - // - // Put a key-value in files 0-4. Delete that key in files 5-9. Verify the - // L0->L0 preserves the deletion such that the key remains deleted. - for (int i = 0; i < 10; ++i) { - // key 0 serves both to prevent trivial move and as the key we want to - // verify is not resurrected by L0->L0 compaction. - if (i < 5) { - ASSERT_OK(Put(Key(0), "")); - } else { - ASSERT_OK(Delete(Key(0))); - } - if (i == 5) { - TEST_SYNC_POINT( - "DBCompactionTest::IntraL0CompactionDoesNotObsoleteDeletions:" - "L0ToL1Ready"); - } - ASSERT_OK(Put(Key(i + 1), value)); - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - std::vector> level_to_files; - dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), - &level_to_files); - ASSERT_GE(level_to_files.size(), 2); // at least L0 and L1 - // L0 has a single output file from L0->L0 - ASSERT_EQ(1, level_to_files[0].size()); - ASSERT_GT(level_to_files[1].size(), 0); - ASSERT_GE(level_to_files[0][0].fd.file_size, 1 << 22); - - ReadOptions roptions; - std::string result; - ASSERT_TRUE(db_->Get(roptions, Key(0), &result).IsNotFound()); -} - -TEST_P(DBCompactionTestWithParam, FullCompactionInBottomPriThreadPool) { - const int kNumFilesTrigger = 3; - Env::Default()->SetBackgroundThreads(1, Env::Priority::BOTTOM); - for (bool use_universal_compaction : {false, true}) { - Options options = CurrentOptions(); - if (use_universal_compaction) { - options.compaction_style = kCompactionStyleUniversal; - } else { - options.compaction_style = kCompactionStyleLevel; - options.level_compaction_dynamic_level_bytes = true; - } - options.num_levels = 4; - options.write_buffer_size = 100 << 10; // 100KB - options.target_file_size_base = 32 << 10; // 32KB - options.level0_file_num_compaction_trigger = kNumFilesTrigger; - // Trigger compaction if size amplification exceeds 110% - options.compaction_options_universal.max_size_amplification_percent = 110; - DestroyAndReopen(options); - - int num_bottom_pri_compactions = 0; - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BGWorkBottomCompaction", - [&](void* /*arg*/) { ++num_bottom_pri_compactions; }); - SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - for (int num = 0; num < kNumFilesTrigger; num++) { - ASSERT_EQ(NumSortedRuns(), num); - int key_idx = 0; - GenerateNewFile(&rnd, &key_idx); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ(1, num_bottom_pri_compactions); - - // Verify that size amplification did occur - ASSERT_EQ(NumSortedRuns(), 1); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } - Env::Default()->SetBackgroundThreads(0, Env::Priority::BOTTOM); -} - -TEST_F(DBCompactionTest, CancelCompactionWaitingOnConflict) { - // This test verifies cancellation of a compaction waiting to be scheduled due - // to conflict with a running compaction. - // - // A `CompactRange()` in universal compacts all files, waiting for files to - // become available if they are locked for another compaction. This test - // triggers an automatic compaction that blocks a `CompactRange()`, and - // verifies that `DisableManualCompaction()` can successfully cancel the - // `CompactRange()` without waiting for the automatic compaction to finish. - const int kNumSortedRuns = 4; - - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.level0_file_num_compaction_trigger = kNumSortedRuns; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); - Reopen(options); - - test::SleepingBackgroundTask auto_compaction_sleeping_task; - // Block automatic compaction when it runs in the callback - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "CompactionJob::Run():Start", - [&](void* /*arg*/) { auto_compaction_sleeping_task.DoSleep(); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Fill overlapping files in L0 to trigger an automatic compaction - Random rnd(301); - for (int i = 0; i < kNumSortedRuns; ++i) { - int key_idx = 0; - GenerateNewFile(&rnd, &key_idx, true /* nowait */); - } - auto_compaction_sleeping_task.WaitUntilSleeping(); - - // Make sure the manual compaction has seen the conflict before being canceled - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"ColumnFamilyData::CompactRange:Return", - "DBCompactionTest::CancelCompactionWaitingOnConflict:" - "PreDisableManualCompaction"}}); - auto manual_compaction_thread = port::Thread([this]() { - ASSERT_TRUE(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr) - .IsIncomplete()); - }); - - // Cancel it. Thread should be joinable, i.e., manual compaction was unblocked - // despite finding a conflict with an automatic compaction that is still - // running - TEST_SYNC_POINT( - "DBCompactionTest::CancelCompactionWaitingOnConflict:" - "PreDisableManualCompaction"); - db_->DisableManualCompaction(); - manual_compaction_thread.join(); -} - -TEST_F(DBCompactionTest, OptimizedDeletionObsoleting) { - // Deletions can be dropped when compacted to non-last level if they fall - // outside the lower-level files' key-ranges. - const int kNumL0Files = 4; - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = kNumL0Files; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - DestroyAndReopen(options); - - // put key 1 and 3 in separate L1, L2 files. - // So key 0, 2, and 4+ fall outside these levels' key-ranges. - for (int level = 2; level >= 1; --level) { - for (int i = 0; i < 2; ++i) { - ASSERT_OK(Put(Key(2 * i + 1), "val")); - ASSERT_OK(Flush()); - } - MoveFilesToLevel(level); - ASSERT_EQ(2, NumTableFilesAtLevel(level)); - } - - // Delete keys in range [1, 4]. These L0 files will be compacted with L1: - // - Tombstones for keys 2 and 4 can be dropped early. - // - Tombstones for keys 1 and 3 must be kept due to L2 files' key-ranges. - for (int i = 0; i < kNumL0Files; ++i) { - ASSERT_OK(Put(Key(0), "val")); // sentinel to prevent trivial move - ASSERT_OK(Delete(Key(i + 1))); - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - for (int i = 0; i < kNumL0Files; ++i) { - std::string value; - ASSERT_TRUE(db_->Get(ReadOptions(), Key(i + 1), &value).IsNotFound()); - } - ASSERT_EQ(2, options.statistics->getTickerCount( - COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE)); - ASSERT_EQ(2, - options.statistics->getTickerCount(COMPACTION_KEY_DROP_OBSOLETE)); -} - -TEST_F(DBCompactionTest, CompactFilesPendingL0Bug) { - // https://www.facebook.com/groups/rocksdb.dev/permalink/1389452781153232/ - // CompactFiles() had a bug where it failed to pick a compaction when an L0 - // compaction existed, but marked it as scheduled anyways. It'd never be - // unmarked as scheduled, so future compactions or DB close could hang. - const int kNumL0Files = 5; - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = kNumL0Files - 1; - options.max_background_compactions = 2; - DestroyAndReopen(options); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"LevelCompactionPicker::PickCompaction:Return", - "DBCompactionTest::CompactFilesPendingL0Bug:Picked"}, - {"DBCompactionTest::CompactFilesPendingL0Bug:ManualCompacted", - "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - auto schedule_multi_compaction_token = - dbfull()->TEST_write_controler().GetCompactionPressureToken(); - - // Files 0-3 will be included in an L0->L1 compaction. - // - // File 4 will be included in a call to CompactFiles() while the first - // compaction is running. - for (int i = 0; i < kNumL0Files - 1; ++i) { - ASSERT_OK(Put(Key(0), "val")); // sentinel to prevent trivial move - ASSERT_OK(Put(Key(i + 1), "val")); - ASSERT_OK(Flush()); - } - TEST_SYNC_POINT("DBCompactionTest::CompactFilesPendingL0Bug:Picked"); - // file 4 flushed after 0-3 picked - ASSERT_OK(Put(Key(kNumL0Files), "val")); - ASSERT_OK(Flush()); - - // previously DB close would hang forever as this situation caused scheduled - // compactions count to never decrement to zero. - ColumnFamilyMetaData cf_meta; - dbfull()->GetColumnFamilyMetaData(dbfull()->DefaultColumnFamily(), &cf_meta); - ASSERT_EQ(kNumL0Files, cf_meta.levels[0].files.size()); - std::vector input_filenames; - input_filenames.push_back(cf_meta.levels[0].files.front().name); - ASSERT_OK(dbfull()->CompactFiles(CompactionOptions(), input_filenames, - 0 /* output_level */)); - TEST_SYNC_POINT("DBCompactionTest::CompactFilesPendingL0Bug:ManualCompacted"); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBCompactionTest, CompactFilesOverlapInL0Bug) { - // Regression test for bug of not pulling in L0 files that overlap the user- - // specified input files in time- and key-ranges. - ASSERT_OK(Put(Key(0), "old_val")); - ASSERT_OK(Flush()); - ASSERT_OK(Put(Key(0), "new_val")); - ASSERT_OK(Flush()); - - ColumnFamilyMetaData cf_meta; - dbfull()->GetColumnFamilyMetaData(dbfull()->DefaultColumnFamily(), &cf_meta); - ASSERT_GE(cf_meta.levels.size(), 2); - ASSERT_EQ(2, cf_meta.levels[0].files.size()); - - // Compacting {new L0 file, L1 file} should pull in the old L0 file since it - // overlaps in key-range and time-range. - std::vector input_filenames; - input_filenames.push_back(cf_meta.levels[0].files.front().name); - ASSERT_OK(dbfull()->CompactFiles(CompactionOptions(), input_filenames, - 1 /* output_level */)); - ASSERT_EQ("new_val", Get(Key(0))); -} - -TEST_F(DBCompactionTest, DeleteFilesInRangeConflictWithCompaction) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - const Snapshot* snapshot = nullptr; - const int kMaxKey = 10; - - for (int i = 0; i < kMaxKey; i++) { - ASSERT_OK(Put(Key(i), Key(i))); - ASSERT_OK(Delete(Key(i))); - if (!snapshot) { - snapshot = db_->GetSnapshot(); - } - } - ASSERT_OK(Flush()); - MoveFilesToLevel(1); - ASSERT_OK(Put(Key(kMaxKey), Key(kMaxKey))); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // test DeleteFilesInRange() deletes the files already picked for compaction - SyncPoint::GetInstance()->LoadDependency( - {{"VersionSet::LogAndApply:WriteManifestStart", - "BackgroundCallCompaction:0"}, - {"DBImpl::BackgroundCompaction:Finish", - "VersionSet::LogAndApply:WriteManifestDone"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - // release snapshot which mark bottommost file for compaction - db_->ReleaseSnapshot(snapshot); - std::string begin_string = Key(0); - std::string end_string = Key(kMaxKey + 1); - Slice begin(begin_string); - Slice end(end_string); - ASSERT_OK(DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin, &end)); - SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBCompactionTest, CompactBottomLevelFilesWithDeletions) { - // bottom-level files may contain deletions due to snapshots protecting the - // deleted keys. Once the snapshot is released, we should see files with many - // such deletions undergo single-file compactions. - const int kNumKeysPerFile = 1024; - const int kNumLevelFiles = 4; - const int kValueSize = 128; - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.level0_file_num_compaction_trigger = kNumLevelFiles; - // inflate it a bit to account for key/metadata overhead - options.target_file_size_base = 120 * kNumKeysPerFile * kValueSize / 100; - CreateAndReopenWithCF({"one"}, options); - - Random rnd(301); - const Snapshot* snapshot = nullptr; - for (int i = 0; i < kNumLevelFiles; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); - } - if (i == kNumLevelFiles - 1) { - snapshot = db_->GetSnapshot(); - // delete every other key after grabbing a snapshot, so these deletions - // and the keys they cover can't be dropped until after the snapshot is - // released. - for (int j = 0; j < kNumLevelFiles * kNumKeysPerFile; j += 2) { - ASSERT_OK(Delete(Key(j))); - } - } - ASSERT_OK(Flush()); - if (i < kNumLevelFiles - 1) { - ASSERT_EQ(i + 1, NumTableFilesAtLevel(0)); - } - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(kNumLevelFiles, NumTableFilesAtLevel(1)); - - std::vector pre_release_metadata, post_release_metadata; - db_->GetLiveFilesMetaData(&pre_release_metadata); - // just need to bump seqnum so ReleaseSnapshot knows the newest key in the SST - // files does not need to be preserved in case of a future snapshot. - ASSERT_OK(Put(Key(0), "val")); - ASSERT_NE(kMaxSequenceNumber, dbfull()->bottommost_files_mark_threshold_); - // release snapshot and wait for compactions to finish. Single-file - // compactions should be triggered, which reduce the size of each bottom-level - // file without changing file count. - db_->ReleaseSnapshot(snapshot); - ASSERT_EQ(kMaxSequenceNumber, dbfull()->bottommost_files_mark_threshold_); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - ASSERT_TRUE(compaction->compaction_reason() == - CompactionReason::kBottommostFiles); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - db_->GetLiveFilesMetaData(&post_release_metadata); - ASSERT_EQ(pre_release_metadata.size(), post_release_metadata.size()); - - for (size_t i = 0; i < pre_release_metadata.size(); ++i) { - const auto& pre_file = pre_release_metadata[i]; - const auto& post_file = post_release_metadata[i]; - ASSERT_EQ(1, pre_file.level); - ASSERT_EQ(1, post_file.level); - // each file is smaller than it was before as it was rewritten without - // deletion markers/deleted keys. - ASSERT_LT(post_file.size, pre_file.size); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBCompactionTest, NoCompactBottomLevelFilesWithDeletions) { - // bottom-level files may contain deletions due to snapshots protecting the - // deleted keys. Once the snapshot is released, we should see files with many - // such deletions undergo single-file compactions. But when disabling auto - // compactions, it shouldn't be triggered which may causing too many - // background jobs. - const int kNumKeysPerFile = 1024; - const int kNumLevelFiles = 4; - const int kValueSize = 128; - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.level0_file_num_compaction_trigger = kNumLevelFiles; - // inflate it a bit to account for key/metadata overhead - options.target_file_size_base = 120 * kNumKeysPerFile * kValueSize / 100; - Reopen(options); - - Random rnd(301); - const Snapshot* snapshot = nullptr; - for (int i = 0; i < kNumLevelFiles; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); - } - if (i == kNumLevelFiles - 1) { - snapshot = db_->GetSnapshot(); - // delete every other key after grabbing a snapshot, so these deletions - // and the keys they cover can't be dropped until after the snapshot is - // released. - for (int j = 0; j < kNumLevelFiles * kNumKeysPerFile; j += 2) { - ASSERT_OK(Delete(Key(j))); - } - } - ASSERT_OK(Flush()); - if (i < kNumLevelFiles - 1) { - ASSERT_EQ(i + 1, NumTableFilesAtLevel(0)); - } - } - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr)); - ASSERT_EQ(kNumLevelFiles, NumTableFilesAtLevel(1)); - - std::vector pre_release_metadata, post_release_metadata; - db_->GetLiveFilesMetaData(&pre_release_metadata); - // just need to bump seqnum so ReleaseSnapshot knows the newest key in the SST - // files does not need to be preserved in case of a future snapshot. - ASSERT_OK(Put(Key(0), "val")); - - // release snapshot and no compaction should be triggered. - std::atomic num_compactions{0}; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:Start", - [&](void* /*arg*/) { num_compactions.fetch_add(1); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - db_->ReleaseSnapshot(snapshot); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(0, num_compactions); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - db_->GetLiveFilesMetaData(&post_release_metadata); - ASSERT_EQ(pre_release_metadata.size(), post_release_metadata.size()); - for (size_t i = 0; i < pre_release_metadata.size(); ++i) { - const auto& pre_file = pre_release_metadata[i]; - const auto& post_file = post_release_metadata[i]; - ASSERT_EQ(1, pre_file.level); - ASSERT_EQ(1, post_file.level); - // each file is same as before with deletion markers/deleted keys. - ASSERT_EQ(post_file.size, pre_file.size); - } -} - -TEST_F(DBCompactionTest, RoundRobinTtlCompactionNormal) { - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.level0_file_num_compaction_trigger = 20; - options.ttl = 24 * 60 * 60; // 24 hours - options.compaction_pri = kRoundRobin; - env_->now_cpu_count_.store(0); - env_->SetMockSleep(); - options.env = env_; - - // add a small second for each wait time, to make sure the file is expired - int small_seconds = 1; - - std::atomic_int ttl_compactions{0}; - std::atomic_int round_robin_ttl_compactions{0}; - std::atomic_int other_compactions{0}; - - SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - auto compaction_reason = compaction->compaction_reason(); - if (compaction_reason == CompactionReason::kTtl) { - ttl_compactions++; - } else if (compaction_reason == CompactionReason::kRoundRobinTtl) { - round_robin_ttl_compactions++; - } else { - other_compactions++; - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - - DestroyAndReopen(options); - - // Setup the files from lower level to up level, each file is 1 hour's older - // than the next one. - // create 10 files on the last level (L6) - for (int i = 0; i < 10; i++) { - for (int j = 0; j < 100; j++) { - ASSERT_OK(Put(Key(i * 100 + j), "value" + std::to_string(i * 100 + j))); - } - ASSERT_OK(Flush()); - env_->MockSleepForSeconds(60 * 60); // generate 1 file per hour - } - MoveFilesToLevel(6); - - // create 5 files on L5 - for (int i = 0; i < 5; i++) { - for (int j = 0; j < 200; j++) { - ASSERT_OK(Put(Key(i * 200 + j), "value" + std::to_string(i * 200 + j))); - } - ASSERT_OK(Flush()); - env_->MockSleepForSeconds(60 * 60); - } - MoveFilesToLevel(5); - - // create 3 files on L4 - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 300; j++) { - ASSERT_OK(Put(Key(i * 300 + j), "value" + std::to_string(i * 300 + j))); - } - ASSERT_OK(Flush()); - env_->MockSleepForSeconds(60 * 60); - } - MoveFilesToLevel(4); - - // The LSM tree should be like: - // L4: [0, 299], [300, 599], [600, 899] - // L5: [0, 199] [200, 399]...............[800, 999] - // L6: [0,99][100,199][200,299][300,399]...............[800,899][900,999] - ASSERT_EQ("0,0,0,0,3,5,10", FilesPerLevel()); - - // make sure the first L5 file is expired - env_->MockSleepForSeconds(16 * 60 * 60 + small_seconds++); - - // trigger TTL compaction - ASSERT_OK(Put(Key(4), "value" + std::to_string(1))); - ASSERT_OK(Put(Key(5), "value" + std::to_string(1))); - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // verify there's a RoundRobin TTL compaction - ASSERT_EQ(1, round_robin_ttl_compactions); - round_robin_ttl_compactions = 0; - - // expire 2 more files - env_->MockSleepForSeconds(2 * 60 * 60 + small_seconds++); - // trigger TTL compaction - ASSERT_OK(Put(Key(4), "value" + std::to_string(2))); - ASSERT_OK(Put(Key(5), "value" + std::to_string(2))); - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ(2, round_robin_ttl_compactions); - round_robin_ttl_compactions = 0; - - // expire 4 more files, 2 out of 3 files on L4 are expired - env_->MockSleepForSeconds(4 * 60 * 60 + small_seconds++); - // trigger TTL compaction - ASSERT_OK(Put(Key(6), "value" + std::to_string(3))); - ASSERT_OK(Put(Key(7), "value" + std::to_string(3))); - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ(1, NumTableFilesAtLevel(4)); - ASSERT_EQ(0, NumTableFilesAtLevel(5)); - - ASSERT_GT(round_robin_ttl_compactions, 0); - round_robin_ttl_compactions = 0; - - // make the first L0 file expired, which triggers a normal TTL compaction - // instead of roundrobin TTL compaction, it will also include an extra file - // from L0 because of overlap - ASSERT_EQ(0, ttl_compactions); - env_->MockSleepForSeconds(19 * 60 * 60 + small_seconds++); - - // trigger TTL compaction - ASSERT_OK(Put(Key(6), "value" + std::to_string(4))); - ASSERT_OK(Put(Key(7), "value" + std::to_string(4))); - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // L0 -> L1 compaction is normal TTL compaction, L1 -> next levels compactions - // are RoundRobin TTL compaction. - ASSERT_GT(ttl_compactions, 0); - ttl_compactions = 0; - ASSERT_GT(round_robin_ttl_compactions, 0); - round_robin_ttl_compactions = 0; - - // All files are expired, so only the last level has data - env_->MockSleepForSeconds(24 * 60 * 60); - // trigger TTL compaction - ASSERT_OK(Put(Key(6), "value" + std::to_string(4))); - ASSERT_OK(Put(Key(7), "value" + std::to_string(4))); - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel()); - - ASSERT_GT(ttl_compactions, 0); - ttl_compactions = 0; - ASSERT_GT(round_robin_ttl_compactions, 0); - round_robin_ttl_compactions = 0; - - ASSERT_EQ(0, other_compactions); -} - -TEST_F(DBCompactionTest, RoundRobinTtlCompactionUnsortedTime) { - // This is to test the case that the RoundRobin compaction cursor not pointing - // to the oldest file, RoundRobin compaction should still compact the file - // after cursor until all expired files are compacted. - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.level0_file_num_compaction_trigger = 20; - options.ttl = 24 * 60 * 60; // 24 hours - options.compaction_pri = kRoundRobin; - env_->now_cpu_count_.store(0); - env_->SetMockSleep(); - options.env = env_; - - std::atomic_int ttl_compactions{0}; - std::atomic_int round_robin_ttl_compactions{0}; - std::atomic_int other_compactions{0}; - - SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - auto compaction_reason = compaction->compaction_reason(); - if (compaction_reason == CompactionReason::kTtl) { - ttl_compactions++; - } else if (compaction_reason == CompactionReason::kRoundRobinTtl) { - round_robin_ttl_compactions++; - } else { - other_compactions++; - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - - DestroyAndReopen(options); - - // create 10 files on the last level (L6) - for (int i = 0; i < 10; i++) { - for (int j = 0; j < 100; j++) { - ASSERT_OK(Put(Key(i * 100 + j), "value" + std::to_string(i * 100 + j))); - } - ASSERT_OK(Flush()); - env_->MockSleepForSeconds(60 * 60); // generate 1 file per hour - } - MoveFilesToLevel(6); - - // create 5 files on L5 - for (int i = 0; i < 5; i++) { - for (int j = 0; j < 200; j++) { - ASSERT_OK(Put(Key(i * 200 + j), "value" + std::to_string(i * 200 + j))); - } - ASSERT_OK(Flush()); - env_->MockSleepForSeconds(60 * 60); // 1 hour - } - MoveFilesToLevel(5); - - // The LSM tree should be like: - // L5: [0, 199] [200, 399] [400,599] [600,799] [800, 999] - // L6: [0,99][100,199][200,299][300,399]....................[800,899][900,999] - ASSERT_EQ("0,0,0,0,0,5,10", FilesPerLevel()); - - // point the compaction cursor to the 4th file on L5 - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - ASSERT_NE(cfd, nullptr); - Version* const current = cfd->current(); - ASSERT_NE(current, nullptr); - VersionStorageInfo* storage_info = current->storage_info(); - ASSERT_NE(storage_info, nullptr); - const InternalKey split_cursor = InternalKey(Key(600), 100000, kTypeValue); - storage_info->AddCursorForOneLevel(5, split_cursor); - - // make the first file on L5 expired, there should be 3 TTL compactions: - // 4th one, 5th one, then 1st one. - env_->MockSleepForSeconds(19 * 60 * 60 + 1); - // trigger TTL compaction - ASSERT_OK(Put(Key(6), "value" + std::to_string(4))); - ASSERT_OK(Put(Key(7), "value" + std::to_string(4))); - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(2, NumTableFilesAtLevel(5)); - - ASSERT_EQ(3, round_robin_ttl_compactions); - ASSERT_EQ(0, ttl_compactions); - ASSERT_EQ(0, other_compactions); -} - -TEST_F(DBCompactionTest, LevelCompactExpiredTtlFiles) { - const int kNumKeysPerFile = 32; - const int kNumLevelFiles = 2; - const int kValueSize = 1024; - - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.ttl = 24 * 60 * 60; // 24 hours - options.max_open_files = -1; - env_->SetMockSleep(); - options.env = env_; - - // NOTE: Presumed unnecessary and removed: resetting mock time in env - - DestroyAndReopen(options); - - Random rnd(301); - for (int i = 0; i < kNumLevelFiles; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - MoveFilesToLevel(3); - ASSERT_EQ("0,0,0,2", FilesPerLevel()); - - // Delete previously written keys. - for (int i = 0; i < kNumLevelFiles; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK(Delete(Key(i * kNumKeysPerFile + j))); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("2,0,0,2", FilesPerLevel()); - MoveFilesToLevel(1); - ASSERT_EQ("0,2,0,2", FilesPerLevel()); - - env_->MockSleepForSeconds(36 * 60 * 60); // 36 hours - ASSERT_EQ("0,2,0,2", FilesPerLevel()); - - // Just do a simple write + flush so that the Ttl expired files get - // compacted. - ASSERT_OK(Put("a", "1")); - ASSERT_OK(Flush()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - ASSERT_TRUE(compaction->compaction_reason() == CompactionReason::kTtl); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // All non-L0 files are deleted, as they contained only deleted data. - ASSERT_EQ("1", FilesPerLevel()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - // Test dynamically changing ttl. - - // NOTE: Presumed unnecessary and removed: resetting mock time in env - - DestroyAndReopen(options); - - for (int i = 0; i < kNumLevelFiles; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - MoveFilesToLevel(3); - ASSERT_EQ("0,0,0,2", FilesPerLevel()); - - // Delete previously written keys. - for (int i = 0; i < kNumLevelFiles; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK(Delete(Key(i * kNumKeysPerFile + j))); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("2,0,0,2", FilesPerLevel()); - MoveFilesToLevel(1); - ASSERT_EQ("0,2,0,2", FilesPerLevel()); - - // Move time forward by 12 hours, and make sure that compaction still doesn't - // trigger as ttl is set to 24 hours. - env_->MockSleepForSeconds(12 * 60 * 60); - ASSERT_OK(Put("a", "1")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("1,2,0,2", FilesPerLevel()); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - ASSERT_TRUE(compaction->compaction_reason() == CompactionReason::kTtl); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Dynamically change ttl to 10 hours. - // This should trigger a ttl compaction, as 12 hours have already passed. - ASSERT_OK(dbfull()->SetOptions({{"ttl", "36000"}})); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // All non-L0 files are deleted, as they contained only deleted data. - ASSERT_EQ("1", FilesPerLevel()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBCompactionTest, LevelTtlCompactionOutputCuttingIteractingWithOther) { - // This test is for a bug fix in CompactionOutputs::ShouldStopBefore() where - // TTL states were not being updated for keys that ShouldStopBefore() would - // return true for reasons other than TTL. - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.ttl = 24 * 60 * 60; // 24 hours - options.max_open_files = -1; - options.compaction_pri = kMinOverlappingRatio; - env_->SetMockSleep(); - options.env = env_; - options.target_file_size_base = 4 << 10; - options.disable_auto_compactions = true; - options.level_compaction_dynamic_file_size = false; - - DestroyAndReopen(options); - Random rnd(301); - - // This makes sure the manual compaction below - // is not a bottommost compaction as TTL is only - // for non-bottommost compactions. - ASSERT_OK(Put(Key(3), rnd.RandomString(1 << 10))); - ASSERT_OK(Put(Key(0), rnd.RandomString(1 << 10))); - ASSERT_OK(Flush()); - MoveFilesToLevel(6); - - // L2: - ASSERT_OK(Put(Key(2), rnd.RandomString(4 << 10))); - ASSERT_OK(Put(Key(3), rnd.RandomString(4 << 10))); - ASSERT_OK(Flush()); - MoveFilesToLevel(2); - - // L1, overlaps in range with the file in L2 so - // that they compact together. - ASSERT_OK(Put(Key(0), rnd.RandomString(4 << 10))); - ASSERT_OK(Put(Key(1), rnd.RandomString(4 << 10))); - ASSERT_OK(Put(Key(3), rnd.RandomString(4 << 10))); - ASSERT_OK(Flush()); - MoveFilesToLevel(1); - - ASSERT_EQ("0,1,1,0,0,0,1", FilesPerLevel()); - // 36 hours so that the file in L2 is eligible for TTL - env_->MockSleepForSeconds(36 * 60 * 60); - - CompactRangeOptions compact_range_opts; - - ASSERT_OK(dbfull()->RunManualCompaction( - static_cast_with_check(db_->DefaultColumnFamily()) - ->cfd(), - 1 /* input_level */, 2 /* output_level */, compact_range_opts, - nullptr /* begin */, nullptr /* end */, true /* exclusive */, - true /* disallow_trivial_move */, - std::numeric_limits::max() /*max_file_num_to_ignore*/, - "" /*trim_ts*/)); - - // L2 should have 2 files: - // file 1: Key(0), Key(1) - // ShouldStopBefore(Key(2)) return true due to TTL or output file size - // file 2: Key(2), Key(3) - // - // Before the fix in this PR, L2 would have 3 files: - // file 1: Key(0), Key(1) - // CompactionOutputs::ShouldStopBefore(Key(2)) returns true due to output file - // size. - // file 2: Key(2) - // CompactionOutput::ShouldStopBefore(Key(3)) returns true - // due to TTL cutting and that TTL states were not updated - // for Key(2). - // file 3: Key(3) - ASSERT_EQ("0,0,2,0,0,0,1", FilesPerLevel()); -} - -TEST_F(DBCompactionTest, LevelTtlCascadingCompactions) { - env_->SetMockSleep(); - const int kValueSize = 100; - - for (bool if_restart : {false, true}) { - for (bool if_open_all_files : {false, true}) { - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.ttl = 24 * 60 * 60; // 24 hours - if (if_open_all_files) { - options.max_open_files = -1; - } else { - options.max_open_files = 20; - } - // RocksDB sanitize max open files to at least 20. Modify it back. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { - int* max_open_files = static_cast(arg); - *max_open_files = 2; - }); - // In the case where all files are opened and doing DB restart - // forcing the oldest ancester time in manifest file to be 0 to - // simulate the case of reading from an old version. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "VersionEdit::EncodeTo:VarintOldestAncesterTime", [&](void* arg) { - if (if_restart && if_open_all_files) { - std::string* encoded_fieled = static_cast(arg); - *encoded_fieled = ""; - PutVarint64(encoded_fieled, 0); - } - }); - - options.env = env_; - - // NOTE: Presumed unnecessary and removed: resetting mock time in env - - DestroyAndReopen(options); - - int ttl_compactions = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - auto compaction_reason = compaction->compaction_reason(); - if (compaction_reason == CompactionReason::kTtl) { - ttl_compactions++; - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Add two L6 files with key ranges: [1 .. 100], [101 .. 200]. - Random rnd(301); - for (int i = 1; i <= 100; ++i) { - ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize))); - } - ASSERT_OK(Flush()); - // Get the first file's creation time. This will be the oldest file in the - // DB. Compactions inolving this file's descendents should keep getting - // this time. - std::vector> level_to_files; - dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), - &level_to_files); - uint64_t oldest_time = level_to_files[0][0].oldest_ancester_time; - // Add 1 hour and do another flush. - env_->MockSleepForSeconds(1 * 60 * 60); - for (int i = 101; i <= 200; ++i) { - ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize))); - } - ASSERT_OK(Flush()); - MoveFilesToLevel(6); - ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel()); - - env_->MockSleepForSeconds(1 * 60 * 60); - // Add two L4 files with key ranges: [1 .. 50], [51 .. 150]. - for (int i = 1; i <= 50; ++i) { - ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize))); - } - ASSERT_OK(Flush()); - env_->MockSleepForSeconds(1 * 60 * 60); - for (int i = 51; i <= 150; ++i) { - ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize))); - } - ASSERT_OK(Flush()); - MoveFilesToLevel(4); - ASSERT_EQ("0,0,0,0,2,0,2", FilesPerLevel()); - - env_->MockSleepForSeconds(1 * 60 * 60); - // Add one L1 file with key range: [26, 75]. - for (int i = 26; i <= 75; ++i) { - ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - MoveFilesToLevel(1); - ASSERT_EQ("0,1,0,0,2,0,2", FilesPerLevel()); - - // LSM tree: - // L1: [26 .. 75] - // L4: [1 .. 50][51 ..... 150] - // L6: [1 ........ 100][101 .... 200] - // - // On TTL expiry, TTL compaction should be initiated on L1 file, and the - // compactions should keep going on until the key range hits bottom level. - // In other words: the compaction on this data range "cascasdes" until - // reaching the bottom level. - // - // Order of events on TTL expiry: - // 1. L1 file falls to L3 via 2 trivial moves which are initiated by the - // ttl - // compaction. - // 2. A TTL compaction happens between L3 and L4 files. Output file in L4. - // 3. The new output file from L4 falls to L5 via 1 trival move initiated - // by the ttl compaction. - // 4. A TTL compaction happens between L5 and L6 files. Ouptut in L6. - - // Add 25 hours and do a write - env_->MockSleepForSeconds(25 * 60 * 60); - - ASSERT_OK(Put(Key(1), "1")); - if (if_restart) { - Reopen(options); - } else { - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("1,0,0,0,0,0,1", FilesPerLevel()); - ASSERT_EQ(5, ttl_compactions); - - dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), - &level_to_files); - ASSERT_EQ(oldest_time, level_to_files[6][0].oldest_ancester_time); - - env_->MockSleepForSeconds(25 * 60 * 60); - ASSERT_OK(Put(Key(2), "1")); - if (if_restart) { - Reopen(options); - } else { - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("1,0,0,0,0,0,1", FilesPerLevel()); - ASSERT_GE(ttl_compactions, 6); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } - } -} - -TEST_F(DBCompactionTest, LevelPeriodicCompaction) { - env_->SetMockSleep(); - const int kNumKeysPerFile = 32; - const int kNumLevelFiles = 2; - const int kValueSize = 100; - - for (bool if_restart : {false, true}) { - for (bool if_open_all_files : {false, true}) { - Options options = CurrentOptions(); - options.periodic_compaction_seconds = 48 * 60 * 60; // 2 days - if (if_open_all_files) { - options.max_open_files = -1; // needed for ttl compaction - } else { - options.max_open_files = 20; - } - // RocksDB sanitize max open files to at least 20. Modify it back. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { - int* max_open_files = static_cast(arg); - *max_open_files = 0; - }); - // In the case where all files are opened and doing DB restart - // forcing the file creation time in manifest file to be 0 to - // simulate the case of reading from an old version. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "VersionEdit::EncodeTo:VarintFileCreationTime", [&](void* arg) { - if (if_restart && if_open_all_files) { - std::string* encoded_fieled = static_cast(arg); - *encoded_fieled = ""; - PutVarint64(encoded_fieled, 0); - } - }); - - options.env = env_; - - // NOTE: Presumed unnecessary and removed: resetting mock time in env - - DestroyAndReopen(options); - - int periodic_compactions = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - auto compaction_reason = compaction->compaction_reason(); - if (compaction_reason == CompactionReason::kPeriodicCompaction) { - periodic_compactions++; - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - for (int i = 0; i < kNumLevelFiles; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ("2", FilesPerLevel()); - ASSERT_EQ(0, periodic_compactions); - - // Add 50 hours and do a write - env_->MockSleepForSeconds(50 * 60 * 60); - ASSERT_OK(Put("a", "1")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Assert that the files stay in the same level - ASSERT_EQ("3", FilesPerLevel()); - // The two old files go through the periodic compaction process - ASSERT_EQ(2, periodic_compactions); - - MoveFilesToLevel(1); - ASSERT_EQ("0,3", FilesPerLevel()); - - // Add another 50 hours and do another write - env_->MockSleepForSeconds(50 * 60 * 60); - ASSERT_OK(Put("b", "2")); - if (if_restart) { - Reopen(options); - } else { - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("1,3", FilesPerLevel()); - // The three old files now go through the periodic compaction process. 2 - // + 3. - ASSERT_EQ(5, periodic_compactions); - - // Add another 50 hours and do another write - env_->MockSleepForSeconds(50 * 60 * 60); - ASSERT_OK(Put("c", "3")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("2,3", FilesPerLevel()); - // The four old files now go through the periodic compaction process. 5 - // + 4. - ASSERT_EQ(9, periodic_compactions); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } - } -} - -TEST_F(DBCompactionTest, LevelPeriodicCompactionWithOldDB) { - // This test makes sure that periodic compactions are working with a DB - // where file_creation_time of some files is 0. - // After compactions the new files are created with a valid file_creation_time - - const int kNumKeysPerFile = 32; - const int kNumFiles = 4; - const int kValueSize = 100; - - Options options = CurrentOptions(); - env_->SetMockSleep(); - options.env = env_; - - // NOTE: Presumed unnecessary and removed: resetting mock time in env - - DestroyAndReopen(options); - - int periodic_compactions = 0; - bool set_file_creation_time_to_zero = true; - bool set_creation_time_to_zero = true; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - auto compaction_reason = compaction->compaction_reason(); - if (compaction_reason == CompactionReason::kPeriodicCompaction) { - periodic_compactions++; - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "PropertyBlockBuilder::AddTableProperty:Start", [&](void* arg) { - TableProperties* props = reinterpret_cast(arg); - if (set_file_creation_time_to_zero) { - props->file_creation_time = 0; - } - if (set_creation_time_to_zero) { - props->creation_time = 0; - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - for (int i = 0; i < kNumFiles; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); - } - ASSERT_OK(Flush()); - // Move the first two files to L2. - if (i == 1) { - MoveFilesToLevel(2); - set_creation_time_to_zero = false; - } - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ("2,0,2", FilesPerLevel()); - ASSERT_EQ(0, periodic_compactions); - - Close(); - - set_file_creation_time_to_zero = false; - // Forward the clock by 2 days. - env_->MockSleepForSeconds(2 * 24 * 60 * 60); - options.periodic_compaction_seconds = 1 * 24 * 60 * 60; // 1 day - - Reopen(options); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("2,0,2", FilesPerLevel()); - // Make sure that all files go through periodic compaction. - ASSERT_EQ(kNumFiles, periodic_compactions); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBCompactionTest, LevelPeriodicAndTtlCompaction) { - const int kNumKeysPerFile = 32; - const int kNumLevelFiles = 2; - const int kValueSize = 100; - - Options options = CurrentOptions(); - options.ttl = 10 * 60 * 60; // 10 hours - options.periodic_compaction_seconds = 48 * 60 * 60; // 2 days - options.max_open_files = -1; // needed for both periodic and ttl compactions - env_->SetMockSleep(); - options.env = env_; - - // NOTE: Presumed unnecessary and removed: resetting mock time in env - - DestroyAndReopen(options); - - int periodic_compactions = 0; - int ttl_compactions = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - auto compaction_reason = compaction->compaction_reason(); - if (compaction_reason == CompactionReason::kPeriodicCompaction) { - periodic_compactions++; - } else if (compaction_reason == CompactionReason::kTtl) { - ttl_compactions++; - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - for (int i = 0; i < kNumLevelFiles; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - MoveFilesToLevel(3); - - ASSERT_EQ("0,0,0,2", FilesPerLevel()); - ASSERT_EQ(0, periodic_compactions); - ASSERT_EQ(0, ttl_compactions); - - // Add some time greater than periodic_compaction_time. - env_->MockSleepForSeconds(50 * 60 * 60); - ASSERT_OK(Put("a", "1")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Files in the bottom level go through periodic compactions. - ASSERT_EQ("1,0,0,2", FilesPerLevel()); - ASSERT_EQ(2, periodic_compactions); - ASSERT_EQ(0, ttl_compactions); - - // Add a little more time than ttl - env_->MockSleepForSeconds(11 * 60 * 60); - ASSERT_OK(Put("b", "1")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Notice that the previous file in level 1 falls down to the bottom level - // due to ttl compactions, one level at a time. - // And bottom level files don't get picked up for ttl compactions. - ASSERT_EQ("1,0,0,3", FilesPerLevel()); - ASSERT_EQ(2, periodic_compactions); - ASSERT_EQ(3, ttl_compactions); - - // Add some time greater than periodic_compaction_time. - env_->MockSleepForSeconds(50 * 60 * 60); - ASSERT_OK(Put("c", "1")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Previous L0 file falls one level at a time to bottom level due to ttl. - // And all 4 bottom files go through periodic compactions. - ASSERT_EQ("1,0,0,4", FilesPerLevel()); - ASSERT_EQ(6, periodic_compactions); - ASSERT_EQ(6, ttl_compactions); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBCompactionTest, LevelTtlBooster) { - const int kNumKeysPerFile = 32; - const int kNumLevelFiles = 3; - const int kValueSize = 1000; - - Options options = CurrentOptions(); - options.ttl = 10 * 60 * 60; // 10 hours - options.periodic_compaction_seconds = 480 * 60 * 60; // very long - options.level0_file_num_compaction_trigger = 2; - options.max_bytes_for_level_base = 5 * uint64_t{kNumKeysPerFile * kValueSize}; - options.max_open_files = -1; // needed for both periodic and ttl compactions - options.compaction_pri = CompactionPri::kMinOverlappingRatio; - env_->SetMockSleep(); - options.env = env_; - - // NOTE: Presumed unnecessary and removed: resetting mock time in env - - DestroyAndReopen(options); - - Random rnd(301); - for (int i = 0; i < kNumLevelFiles; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - MoveFilesToLevel(2); - - ASSERT_EQ("0,0,3", FilesPerLevel()); - - // Create some files for L1 - for (int i = 0; i < 2; i++) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK(Put(Key(2 * j + i), rnd.RandomString(kValueSize))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - - ASSERT_EQ("0,1,3", FilesPerLevel()); - - // Make the new L0 files qualify TTL boosting and generate one more to trigger - // L1 -> L2 compaction. Old files will be picked even if their priority is - // lower without boosting. - env_->MockSleepForSeconds(8 * 60 * 60); - for (int i = 0; i < 2; i++) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK(Put(Key(kNumKeysPerFile * 2 + 2 * j + i), - rnd.RandomString(kValueSize * 2))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - // Force files to be compacted to L1 - ASSERT_OK( - dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "1"}})); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("0,1,2", FilesPerLevel()); - ASSERT_OK( - dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"}})); - - ASSERT_GT(SizeAtLevel(1), kNumKeysPerFile * 4 * kValueSize); -} - -TEST_F(DBCompactionTest, LevelPeriodicCompactionWithCompactionFilters) { - class TestCompactionFilter : public CompactionFilter { - const char* Name() const override { return "TestCompactionFilter"; } - }; - class TestCompactionFilterFactory : public CompactionFilterFactory { - const char* Name() const override { return "TestCompactionFilterFactory"; } - std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& /*context*/) override { - return std::unique_ptr(new TestCompactionFilter()); - } - }; - - const int kNumKeysPerFile = 32; - const int kNumLevelFiles = 2; - const int kValueSize = 100; - - Random rnd(301); - - Options options = CurrentOptions(); - TestCompactionFilter test_compaction_filter; - env_->SetMockSleep(); - options.env = env_; - - // NOTE: Presumed unnecessary and removed: resetting mock time in env - - enum CompactionFilterType { - kUseCompactionFilter, - kUseCompactionFilterFactory - }; - - for (CompactionFilterType comp_filter_type : - {kUseCompactionFilter, kUseCompactionFilterFactory}) { - // Assert that periodic compactions are not enabled. - ASSERT_EQ(std::numeric_limits::max() - 1, - options.periodic_compaction_seconds); - - if (comp_filter_type == kUseCompactionFilter) { - options.compaction_filter = &test_compaction_filter; - options.compaction_filter_factory.reset(); - } else if (comp_filter_type == kUseCompactionFilterFactory) { - options.compaction_filter = nullptr; - options.compaction_filter_factory.reset( - new TestCompactionFilterFactory()); - } - DestroyAndReopen(options); - - // periodic_compaction_seconds should be set to the sanitized value when - // a compaction filter or a compaction filter factory is used. - ASSERT_EQ(30 * 24 * 60 * 60, - dbfull()->GetOptions().periodic_compaction_seconds); - - int periodic_compactions = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - auto compaction_reason = compaction->compaction_reason(); - if (compaction_reason == CompactionReason::kPeriodicCompaction) { - periodic_compactions++; - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - for (int i = 0; i < kNumLevelFiles; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ("2", FilesPerLevel()); - ASSERT_EQ(0, periodic_compactions); - - // Add 31 days and do a write - env_->MockSleepForSeconds(31 * 24 * 60 * 60); - ASSERT_OK(Put("a", "1")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Assert that the files stay in the same level - ASSERT_EQ("3", FilesPerLevel()); - // The two old files go through the periodic compaction process - ASSERT_EQ(2, periodic_compactions); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } -} - -TEST_F(DBCompactionTest, CompactRangeDelayedByL0FileCount) { - // Verify that, when `CompactRangeOptions::allow_write_stall == false`, manual - // compaction only triggers flush after it's sure stall won't be triggered for - // L0 file count going too high. - const int kNumL0FilesTrigger = 4; - const int kNumL0FilesLimit = 8; - // i == 0: verifies normal case where stall is avoided by delay - // i == 1: verifies no delay in edge case where stall trigger is same as - // compaction trigger, so stall can't be avoided - for (int i = 0; i < 2; ++i) { - Options options = CurrentOptions(); - options.level0_slowdown_writes_trigger = kNumL0FilesLimit; - if (i == 0) { - options.level0_file_num_compaction_trigger = kNumL0FilesTrigger; - } else { - options.level0_file_num_compaction_trigger = kNumL0FilesLimit; - } - Reopen(options); - - if (i == 0) { - // ensure the auto compaction doesn't finish until manual compaction has - // had a chance to be delayed. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait", - "CompactionJob::Run():End"}}); - } else { - // ensure the auto-compaction doesn't finish until manual compaction has - // continued without delay. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:StallWaitDone", - "CompactionJob::Run():End"}}); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - for (int j = 0; j < kNumL0FilesLimit - 1; ++j) { - for (int k = 0; k < 2; ++k) { - ASSERT_OK(Put(Key(k), rnd.RandomString(1024))); - } - ASSERT_OK(Flush()); - } - auto manual_compaction_thread = port::Thread([this]() { - CompactRangeOptions cro; - cro.allow_write_stall = false; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - }); - - manual_compaction_thread.join(); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_GT(NumTableFilesAtLevel(1), 0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } -} - -TEST_F(DBCompactionTest, CompactRangeDelayedByImmMemTableCount) { - // Verify that, when `CompactRangeOptions::allow_write_stall == false`, manual - // compaction only triggers flush after it's sure stall won't be triggered for - // immutable memtable count going too high. - const int kNumImmMemTableLimit = 8; - // i == 0: verifies normal case where stall is avoided by delay - // i == 1: verifies no delay in edge case where stall trigger is same as flush - // trigger, so stall can't be avoided - for (int i = 0; i < 2; ++i) { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - // the delay limit is one less than the stop limit. This test focuses on - // avoiding delay limit, but this option sets stop limit, so add one. - options.max_write_buffer_number = kNumImmMemTableLimit + 1; - if (i == 1) { - options.min_write_buffer_number_to_merge = kNumImmMemTableLimit; - } - Reopen(options); - - if (i == 0) { - // ensure the flush doesn't finish until manual compaction has had a - // chance to be delayed. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait", - "FlushJob::WriteLevel0Table"}}); - } else { - // ensure the flush doesn't finish until manual compaction has continued - // without delay. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:StallWaitDone", - "FlushJob::WriteLevel0Table"}}); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - for (int j = 0; j < kNumImmMemTableLimit - 1; ++j) { - ASSERT_OK(Put(Key(0), rnd.RandomString(1024))); - FlushOptions flush_opts; - flush_opts.wait = false; - flush_opts.allow_write_stall = true; - ASSERT_OK(dbfull()->Flush(flush_opts)); - } - - auto manual_compaction_thread = port::Thread([this]() { - CompactRangeOptions cro; - cro.allow_write_stall = false; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - }); - - manual_compaction_thread.join(); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_GT(NumTableFilesAtLevel(1), 0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } -} - -TEST_F(DBCompactionTest, CompactRangeShutdownWhileDelayed) { - // Verify that, when `CompactRangeOptions::allow_write_stall == false`, delay - // does not hang if CF is dropped or DB is closed - const int kNumL0FilesTrigger = 4; - const int kNumL0FilesLimit = 8; - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = kNumL0FilesTrigger; - options.level0_slowdown_writes_trigger = kNumL0FilesLimit; - // i == 0: DB::DropColumnFamily() on CompactRange's target CF unblocks it - // i == 1: DB::CancelAllBackgroundWork() unblocks CompactRange. This is to - // simulate what happens during Close as we can't call Close (it - // blocks on the auto-compaction, making a cycle). - for (int i = 0; i < 2; ++i) { - CreateAndReopenWithCF({"one"}, options); - // The calls to close CF/DB wait until the manual compaction stalls. - // The auto-compaction waits until the manual compaction finishes to ensure - // the signal comes from closing CF/DB, not from compaction making progress. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait", - "DBCompactionTest::CompactRangeShutdownWhileDelayed:PreShutdown"}, - {"DBCompactionTest::CompactRangeShutdownWhileDelayed:PostManual", - "CompactionJob::Run():End"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - for (int j = 0; j < kNumL0FilesLimit - 1; ++j) { - for (int k = 0; k < 2; ++k) { - ASSERT_OK(Put(1, Key(k), rnd.RandomString(1024))); - } - ASSERT_OK(Flush(1)); - } - auto manual_compaction_thread = port::Thread([this, i]() { - CompactRangeOptions cro; - cro.allow_write_stall = false; - if (i == 0) { - ASSERT_TRUE(db_->CompactRange(cro, handles_[1], nullptr, nullptr) - .IsColumnFamilyDropped()); - } else { - ASSERT_TRUE(db_->CompactRange(cro, handles_[1], nullptr, nullptr) - .IsShutdownInProgress()); - } - }); - - TEST_SYNC_POINT( - "DBCompactionTest::CompactRangeShutdownWhileDelayed:PreShutdown"); - if (i == 0) { - ASSERT_OK(db_->DropColumnFamily(handles_[1])); - } else { - dbfull()->CancelAllBackgroundWork(false /* wait */); - } - manual_compaction_thread.join(); - TEST_SYNC_POINT( - "DBCompactionTest::CompactRangeShutdownWhileDelayed:PostManual"); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } -} - -TEST_F(DBCompactionTest, CompactRangeSkipFlushAfterDelay) { - // Verify that, when `CompactRangeOptions::allow_write_stall == false`, - // CompactRange skips its flush if the delay is long enough that the memtables - // existing at the beginning of the call have already been flushed. - const int kNumL0FilesTrigger = 4; - const int kNumL0FilesLimit = 8; - Options options = CurrentOptions(); - options.level0_slowdown_writes_trigger = kNumL0FilesLimit; - options.level0_file_num_compaction_trigger = kNumL0FilesTrigger; - Reopen(options); - - Random rnd(301); - // The manual flush includes the memtable that was active when CompactRange - // began. So it unblocks CompactRange and precludes its flush. Throughout the - // test, stall conditions are upheld via high L0 file count. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait", - "DBCompactionTest::CompactRangeSkipFlushAfterDelay:PreFlush"}, - {"DBCompactionTest::CompactRangeSkipFlushAfterDelay:PostFlush", - "DBImpl::FlushMemTable:StallWaitDone"}, - {"DBImpl::FlushMemTable:StallWaitDone", "CompactionJob::Run():End"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // used for the delayable flushes - FlushOptions flush_opts; - flush_opts.allow_write_stall = true; - for (int i = 0; i < kNumL0FilesLimit - 1; ++i) { - for (int j = 0; j < 2; ++j) { - ASSERT_OK(Put(Key(j), rnd.RandomString(1024))); - } - ASSERT_OK(dbfull()->Flush(flush_opts)); - } - auto manual_compaction_thread = port::Thread([this]() { - CompactRangeOptions cro; - cro.allow_write_stall = false; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - }); - - TEST_SYNC_POINT("DBCompactionTest::CompactRangeSkipFlushAfterDelay:PreFlush"); - ASSERT_OK(Put(std::to_string(0), rnd.RandomString(1024))); - ASSERT_OK(dbfull()->Flush(flush_opts)); - ASSERT_OK(Put(std::to_string(0), rnd.RandomString(1024))); - TEST_SYNC_POINT( - "DBCompactionTest::CompactRangeSkipFlushAfterDelay:PostFlush"); - manual_compaction_thread.join(); - - // If CompactRange's flush was skipped, the final Put above will still be - // in the active memtable. - std::string num_keys_in_memtable; - ASSERT_TRUE(db_->GetProperty(DB::Properties::kNumEntriesActiveMemTable, - &num_keys_in_memtable)); - ASSERT_EQ(std::to_string(1), num_keys_in_memtable); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBCompactionTest, CompactRangeFlushOverlappingMemtable) { - // Verify memtable only gets flushed if it contains data overlapping the range - // provided to `CompactRange`. Tests all kinds of overlap/non-overlap. - const int kNumEndpointKeys = 5; - std::string keys[kNumEndpointKeys] = {"a", "b", "c", "d", "e"}; - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - Reopen(options); - - // One extra iteration for nullptr, which means left side of interval is - // unbounded. - for (int i = 0; i <= kNumEndpointKeys; ++i) { - Slice begin; - Slice* begin_ptr; - if (i == 0) { - begin_ptr = nullptr; - } else { - begin = keys[i - 1]; - begin_ptr = &begin; - } - // Start at `i` so right endpoint comes after left endpoint. One extra - // iteration for nullptr, which means right side of interval is unbounded. - for (int j = std::max(0, i - 1); j <= kNumEndpointKeys; ++j) { - Slice end; - Slice* end_ptr; - if (j == kNumEndpointKeys) { - end_ptr = nullptr; - } else { - end = keys[j]; - end_ptr = &end; - } - ASSERT_OK(Put("b", "val")); - ASSERT_OK(Put("d", "val")); - CompactRangeOptions compact_range_opts; - ASSERT_OK(db_->CompactRange(compact_range_opts, begin_ptr, end_ptr)); - - uint64_t get_prop_tmp, num_memtable_entries = 0; - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesImmMemTables, - &get_prop_tmp)); - num_memtable_entries += get_prop_tmp; - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable, - &get_prop_tmp)); - num_memtable_entries += get_prop_tmp; - if (begin_ptr == nullptr || end_ptr == nullptr || - (i <= 4 && j >= 1 && (begin != "c" || end != "c"))) { - // In this case `CompactRange`'s range overlapped in some way with the - // memtable's range, so flush should've happened. Then "b" and "d" won't - // be in the memtable. - ASSERT_EQ(0, num_memtable_entries); - } else { - ASSERT_EQ(2, num_memtable_entries); - // flush anyways to prepare for next iteration - ASSERT_OK(db_->Flush(FlushOptions())); - } - } - } -} - -TEST_F(DBCompactionTest, CompactionStatsTest) { - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 2; - CompactionStatsCollector* collector = new CompactionStatsCollector(); - options.listeners.emplace_back(collector); - DestroyAndReopen(options); - - for (int i = 0; i < 32; i++) { - for (int j = 0; j < 5000; j++) { - ASSERT_OK(Put(std::to_string(j), std::string(1, 'A'))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ColumnFamilyHandleImpl* cfh = - static_cast(dbfull()->DefaultColumnFamily()); - ColumnFamilyData* cfd = cfh->cfd(); - - VerifyCompactionStats(*cfd, *collector); -} - -TEST_F(DBCompactionTest, SubcompactionEvent) { - class SubCompactionEventListener : public EventListener { - public: - void OnCompactionBegin(DB* /*db*/, const CompactionJobInfo& ci) override { - InstrumentedMutexLock l(&mutex_); - ASSERT_EQ(running_compactions_.find(ci.job_id), - running_compactions_.end()); - running_compactions_.emplace(ci.job_id, std::unordered_set()); - } - - void OnCompactionCompleted(DB* /*db*/, - const CompactionJobInfo& ci) override { - InstrumentedMutexLock l(&mutex_); - auto it = running_compactions_.find(ci.job_id); - ASSERT_NE(it, running_compactions_.end()); - ASSERT_EQ(it->second.size(), 0); - running_compactions_.erase(it); - } - - void OnSubcompactionBegin(const SubcompactionJobInfo& si) override { - InstrumentedMutexLock l(&mutex_); - auto it = running_compactions_.find(si.job_id); - ASSERT_NE(it, running_compactions_.end()); - auto r = it->second.insert(si.subcompaction_job_id); - ASSERT_TRUE(r.second); // each subcompaction_job_id should be different - total_subcompaction_cnt_++; - } - - void OnSubcompactionCompleted(const SubcompactionJobInfo& si) override { - InstrumentedMutexLock l(&mutex_); - auto it = running_compactions_.find(si.job_id); - ASSERT_NE(it, running_compactions_.end()); - auto r = it->second.erase(si.subcompaction_job_id); - ASSERT_EQ(r, 1); - } - - size_t GetRunningCompactionCount() { - InstrumentedMutexLock l(&mutex_); - return running_compactions_.size(); - } - - size_t GetTotalSubcompactionCount() { - InstrumentedMutexLock l(&mutex_); - return total_subcompaction_cnt_; - } - - private: - InstrumentedMutex mutex_; - std::unordered_map> running_compactions_; - size_t total_subcompaction_cnt_ = 0; - }; - - Options options = CurrentOptions(); - options.target_file_size_base = 1024; - options.level0_file_num_compaction_trigger = 10; - auto* listener = new SubCompactionEventListener(); - options.listeners.emplace_back(listener); - - DestroyAndReopen(options); - - // generate 4 files @ L2 - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 10; j++) { - int key_id = i * 10 + j; - ASSERT_OK(Put(Key(key_id), "value" + std::to_string(key_id))); - } - ASSERT_OK(Flush()); - } - MoveFilesToLevel(2); - - // generate 2 files @ L1 which overlaps with L2 files - for (int i = 0; i < 2; i++) { - for (int j = 0; j < 10; j++) { - int key_id = i * 20 + j * 2; - ASSERT_OK(Put(Key(key_id), "value" + std::to_string(key_id))); - } - ASSERT_OK(Flush()); - } - MoveFilesToLevel(1); - ASSERT_EQ(FilesPerLevel(), "0,2,4"); - - CompactRangeOptions comp_opts; - comp_opts.max_subcompactions = 4; - Status s = dbfull()->CompactRange(comp_opts, nullptr, nullptr); - ASSERT_OK(s); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // make sure there's no running compaction - ASSERT_EQ(listener->GetRunningCompactionCount(), 0); - // and sub compaction is triggered - ASSERT_GT(listener->GetTotalSubcompactionCount(), 0); -} - -TEST_F(DBCompactionTest, CompactFilesOutputRangeConflict) { - // LSM setup: - // L1: [ba bz] - // L2: [a b] [c d] - // L3: [a b] [c d] - // - // Thread 1: Thread 2: - // Begin compacting all L2->L3 - // Compact [ba bz] L1->L3 - // End compacting all L2->L3 - // - // The compaction operation in thread 2 should be disallowed because the range - // overlaps with the compaction in thread 1, which also covers that range in - // L3. - Options options = CurrentOptions(); - FlushedFileCollector* collector = new FlushedFileCollector(); - options.listeners.emplace_back(collector); - Reopen(options); - - for (int level = 3; level >= 2; --level) { - ASSERT_OK(Put("a", "val")); - ASSERT_OK(Put("b", "val")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("c", "val")); - ASSERT_OK(Put("d", "val")); - ASSERT_OK(Flush()); - MoveFilesToLevel(level); - } - ASSERT_OK(Put("ba", "val")); - ASSERT_OK(Put("bz", "val")); - ASSERT_OK(Flush()); - MoveFilesToLevel(1); - - SyncPoint::GetInstance()->LoadDependency({ - {"CompactFilesImpl:0", - "DBCompactionTest::CompactFilesOutputRangeConflict:Thread2Begin"}, - {"DBCompactionTest::CompactFilesOutputRangeConflict:Thread2End", - "CompactFilesImpl:1"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - - auto bg_thread = port::Thread([&]() { - // Thread 1 - std::vector filenames = collector->GetFlushedFiles(); - filenames.pop_back(); - ASSERT_OK(db_->CompactFiles(CompactionOptions(), filenames, - 3 /* output_level */)); - }); - - // Thread 2 - TEST_SYNC_POINT( - "DBCompactionTest::CompactFilesOutputRangeConflict:Thread2Begin"); - std::string filename = collector->GetFlushedFiles().back(); - ASSERT_FALSE( - db_->CompactFiles(CompactionOptions(), {filename}, 3 /* output_level */) - .ok()); - TEST_SYNC_POINT( - "DBCompactionTest::CompactFilesOutputRangeConflict:Thread2End"); - - bg_thread.join(); -} - -TEST_F(DBCompactionTest, CompactionHasEmptyOutput) { - Options options = CurrentOptions(); - SstStatsCollector* collector = new SstStatsCollector(); - options.level0_file_num_compaction_trigger = 2; - options.listeners.emplace_back(collector); - Reopen(options); - - // Make sure the L0 files overlap to prevent trivial move. - ASSERT_OK(Put("a", "val")); - ASSERT_OK(Put("b", "val")); - ASSERT_OK(Flush()); - ASSERT_OK(Delete("a")); - ASSERT_OK(Delete("b")); - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - ASSERT_EQ(NumTableFilesAtLevel(1), 0); - - // Expect one file creation to start for each flush, and zero for compaction - // since no keys are written. - ASSERT_EQ(2, collector->num_ssts_creation_started()); -} - -TEST_F(DBCompactionTest, CompactionLimiter) { - const int kNumKeysPerFile = 10; - const int kMaxBackgroundThreads = 64; - - struct CompactionLimiter { - std::string name; - int limit_tasks; - int max_tasks; - int tasks; - std::shared_ptr limiter; - }; - - std::vector limiter_settings; - limiter_settings.push_back({"limiter_1", 1, 0, 0, nullptr}); - limiter_settings.push_back({"limiter_2", 2, 0, 0, nullptr}); - limiter_settings.push_back({"limiter_3", 3, 0, 0, nullptr}); - - for (auto& ls : limiter_settings) { - ls.limiter.reset(NewConcurrentTaskLimiter(ls.name, ls.limit_tasks)); - } - - std::shared_ptr unique_limiter( - NewConcurrentTaskLimiter("unique_limiter", -1)); - - const char* cf_names[] = {"default", "0", "1", "2", "3", "4", "5", "6", "7", - "8", "9", "a", "b", "c", "d", "e", "f"}; - const unsigned int cf_count = sizeof cf_names / sizeof cf_names[0]; - - std::unordered_map cf_to_limiter; - - Options options = CurrentOptions(); - options.write_buffer_size = 110 * 1024; // 110KB - options.arena_block_size = 4096; - options.num_levels = 3; - options.level0_file_num_compaction_trigger = 4; - options.level0_slowdown_writes_trigger = 64; - options.level0_stop_writes_trigger = 64; - options.max_background_jobs = kMaxBackgroundThreads; // Enough threads - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - options.max_write_buffer_number = 10; // Enough memtables - DestroyAndReopen(options); - - std::vector option_vector; - option_vector.reserve(cf_count); - - for (unsigned int cf = 0; cf < cf_count; cf++) { - ColumnFamilyOptions cf_opt(options); - if (cf == 0) { - // "Default" CF does't use compaction limiter - cf_opt.compaction_thread_limiter = nullptr; - } else if (cf == 1) { - // "1" CF uses bypass compaction limiter - unique_limiter->SetMaxOutstandingTask(-1); - cf_opt.compaction_thread_limiter = unique_limiter; - } else { - // Assign limiter by mod - auto& ls = limiter_settings[cf % 3]; - cf_opt.compaction_thread_limiter = ls.limiter; - cf_to_limiter[cf_names[cf]] = &ls; - } - option_vector.emplace_back(DBOptions(options), cf_opt); - } - - for (unsigned int cf = 1; cf < cf_count; cf++) { - CreateColumnFamilies({cf_names[cf]}, option_vector[cf]); - } - - ReopenWithColumnFamilies( - std::vector(cf_names, cf_names + cf_count), option_vector); - - port::Mutex mutex; - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:BeforeCompaction", [&](void* arg) { - const auto& cf_name = static_cast(arg)->GetName(); - auto iter = cf_to_limiter.find(cf_name); - if (iter != cf_to_limiter.end()) { - MutexLock l(&mutex); - ASSERT_GE(iter->second->limit_tasks, ++iter->second->tasks); - iter->second->max_tasks = - std::max(iter->second->max_tasks, iter->second->limit_tasks); - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:AfterCompaction", [&](void* arg) { - const auto& cf_name = static_cast(arg)->GetName(); - auto iter = cf_to_limiter.find(cf_name); - if (iter != cf_to_limiter.end()) { - MutexLock l(&mutex); - ASSERT_GE(--iter->second->tasks, 0); - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Block all compact threads in thread pool. - const size_t kTotalFlushTasks = kMaxBackgroundThreads / 4; - const size_t kTotalCompactTasks = kMaxBackgroundThreads - kTotalFlushTasks; - env_->SetBackgroundThreads((int)kTotalFlushTasks, Env::HIGH); - env_->SetBackgroundThreads((int)kTotalCompactTasks, Env::LOW); - - test::SleepingBackgroundTask sleeping_compact_tasks[kTotalCompactTasks]; - - // Block all compaction threads in thread pool. - for (size_t i = 0; i < kTotalCompactTasks; i++) { - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - &sleeping_compact_tasks[i], Env::LOW); - sleeping_compact_tasks[i].WaitUntilSleeping(); - } - - int keyIndex = 0; - - for (int n = 0; n < options.level0_file_num_compaction_trigger; n++) { - for (unsigned int cf = 0; cf < cf_count; cf++) { - for (int i = 0; i < kNumKeysPerFile; i++) { - ASSERT_OK(Put(cf, Key(keyIndex++), "")); - } - // put extra key to trigger flush - ASSERT_OK(Put(cf, "", "")); - } - - for (unsigned int cf = 0; cf < cf_count; cf++) { - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf])); - } - } - - // Enough L0 files to trigger compaction - for (unsigned int cf = 0; cf < cf_count; cf++) { - ASSERT_EQ(NumTableFilesAtLevel(0, cf), - options.level0_file_num_compaction_trigger); - } - - // Create more files for one column family, which triggers speed up - // condition, all compactions will be scheduled. - for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { - for (int i = 0; i < kNumKeysPerFile; i++) { - ASSERT_OK(Put(0, Key(i), "")); - } - // put extra key to trigger flush - ASSERT_OK(Put(0, "", "")); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[0])); - ASSERT_EQ(options.level0_file_num_compaction_trigger + num + 1, - NumTableFilesAtLevel(0, 0)); - } - - // All CFs are pending compaction - ASSERT_EQ(cf_count, env_->GetThreadPoolQueueLen(Env::LOW)); - - // Unblock all compaction threads - for (size_t i = 0; i < kTotalCompactTasks; i++) { - sleeping_compact_tasks[i].WakeUp(); - sleeping_compact_tasks[i].WaitUntilDone(); - } - - for (unsigned int cf = 0; cf < cf_count; cf++) { - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf])); - } - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // Max outstanding compact tasks reached limit - for (auto& ls : limiter_settings) { - ASSERT_EQ(ls.limit_tasks, ls.max_tasks); - ASSERT_EQ(0, ls.limiter->GetOutstandingTask()); - } - - // test manual compaction under a fully throttled limiter - int cf_test = 1; - unique_limiter->SetMaxOutstandingTask(0); - - // flush one more file to cf 1 - for (int i = 0; i < kNumKeysPerFile; i++) { - ASSERT_OK(Put(cf_test, Key(keyIndex++), "")); - } - // put extra key to trigger flush - ASSERT_OK(Put(cf_test, "", "")); - - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf_test])); - ASSERT_EQ(1, NumTableFilesAtLevel(0, cf_test)); - - Compact(cf_test, Key(0), Key(keyIndex)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); -} - -INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam, - ::testing::Values(std::make_tuple(1, true), - std::make_tuple(1, false), - std::make_tuple(4, true), - std::make_tuple(4, false))); - -TEST_P(DBCompactionDirectIOTest, DirectIO) { - Options options = CurrentOptions(); - Destroy(options); - options.create_if_missing = true; - options.disable_auto_compactions = true; - options.use_direct_io_for_flush_and_compaction = GetParam(); - options.env = MockEnv::Create(Env::Default()); - Reopen(options); - bool readahead = false; - SyncPoint::GetInstance()->SetCallBack( - "CompactionJob::OpenCompactionOutputFile", [&](void* arg) { - bool* use_direct_writes = static_cast(arg); - ASSERT_EQ(*use_direct_writes, - options.use_direct_io_for_flush_and_compaction); - }); - if (options.use_direct_io_for_flush_and_compaction) { - SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions:direct_io", [&](void* /*arg*/) { readahead = true; }); - } - SyncPoint::GetInstance()->EnableProcessing(); - CreateAndReopenWithCF({"pikachu"}, options); - MakeTables(3, "p", "q", 1); - ASSERT_EQ("1,1,1", FilesPerLevel(1)); - Compact(1, "p", "q"); - ASSERT_EQ(readahead, options.use_direct_reads); - ASSERT_EQ("0,0,1", FilesPerLevel(1)); - Destroy(options); - delete options.env; -} - -INSTANTIATE_TEST_CASE_P(DBCompactionDirectIOTest, DBCompactionDirectIOTest, - testing::Bool()); - -class CompactionPriTest : public DBTestBase, - public testing::WithParamInterface { - public: - CompactionPriTest() - : DBTestBase("compaction_pri_test", /*env_do_fsync=*/true) { - compaction_pri_ = GetParam(); - } - - // Required if inheriting from testing::WithParamInterface<> - static void SetUpTestCase() {} - static void TearDownTestCase() {} - - uint32_t compaction_pri_; -}; - -TEST_P(CompactionPriTest, Test) { - Options options = CurrentOptions(); - options.write_buffer_size = 16 * 1024; - options.compaction_pri = static_cast(compaction_pri_); - options.hard_pending_compaction_bytes_limit = 256 * 1024; - options.max_bytes_for_level_base = 64 * 1024; - options.max_bytes_for_level_multiplier = 4; - options.compression = kNoCompression; - - DestroyAndReopen(options); - - Random rnd(301); - const int kNKeys = 5000; - int keys[kNKeys]; - for (int i = 0; i < kNKeys; i++) { - keys[i] = i; - } - RandomShuffle(std::begin(keys), std::end(keys), rnd.Next()); - - for (int i = 0; i < kNKeys; i++) { - ASSERT_OK(Put(Key(keys[i]), rnd.RandomString(102))); - } - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - for (int i = 0; i < kNKeys; i++) { - ASSERT_NE("NOT_FOUND", Get(Key(i))); - } -} - -INSTANTIATE_TEST_CASE_P( - CompactionPriTest, CompactionPriTest, - ::testing::Values(CompactionPri::kByCompensatedSize, - CompactionPri::kOldestLargestSeqFirst, - CompactionPri::kOldestSmallestSeqFirst, - CompactionPri::kMinOverlappingRatio, - CompactionPri::kRoundRobin)); - -TEST_F(DBCompactionTest, PersistRoundRobinCompactCursor) { - Options options = CurrentOptions(); - options.write_buffer_size = 16 * 1024; - options.max_bytes_for_level_base = 128 * 1024; - options.target_file_size_base = 64 * 1024; - options.level0_file_num_compaction_trigger = 4; - options.compaction_pri = CompactionPri::kRoundRobin; - options.max_bytes_for_level_multiplier = 4; - options.num_levels = 3; - options.compression = kNoCompression; - - DestroyAndReopen(options); - - Random rnd(301); - - // 30 Files in L0 to trigger compactions between L1 and L2 - for (int i = 0; i < 30; i++) { - for (int j = 0; j < 16; j++) { - ASSERT_OK(Put(rnd.RandomString(24), rnd.RandomString(1000))); - } - ASSERT_OK(Flush()); - } - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - ASSERT_NE(cfd, nullptr); - - Version* const current = cfd->current(); - ASSERT_NE(current, nullptr); - - const VersionStorageInfo* const storage_info = current->storage_info(); - ASSERT_NE(storage_info, nullptr); - - const std::vector compact_cursors = - storage_info->GetCompactCursors(); - - Reopen(options); - - VersionSet* const reopened_versions = dbfull()->GetVersionSet(); - assert(reopened_versions); - - ColumnFamilyData* const reopened_cfd = - reopened_versions->GetColumnFamilySet()->GetDefault(); - ASSERT_NE(reopened_cfd, nullptr); - - Version* const reopened_current = reopened_cfd->current(); - ASSERT_NE(reopened_current, nullptr); - - const VersionStorageInfo* const reopened_storage_info = - reopened_current->storage_info(); - ASSERT_NE(reopened_storage_info, nullptr); - - const std::vector reopened_compact_cursors = - reopened_storage_info->GetCompactCursors(); - const auto icmp = reopened_storage_info->InternalComparator(); - ASSERT_EQ(compact_cursors.size(), reopened_compact_cursors.size()); - for (size_t i = 0; i < compact_cursors.size(); i++) { - if (compact_cursors[i].Valid()) { - ASSERT_EQ(0, - icmp->Compare(compact_cursors[i], reopened_compact_cursors[i])); - } else { - ASSERT_TRUE(!reopened_compact_cursors[i].Valid()); - } - } -} - -TEST_P(RoundRobinSubcompactionsAgainstPressureToken, PressureTokenTest) { - const int kKeysPerBuffer = 100; - Options options = CurrentOptions(); - options.num_levels = 4; - options.max_bytes_for_level_multiplier = 2; - options.level0_file_num_compaction_trigger = 4; - options.target_file_size_base = kKeysPerBuffer * 1024; - options.compaction_pri = CompactionPri::kRoundRobin; - options.max_bytes_for_level_base = 8 * kKeysPerBuffer * 1024; - options.disable_auto_compactions = true; - // Setup 7 threads but limited subcompactions so that - // RoundRobin requires extra compactions from reserved threads - options.max_subcompactions = 1; - options.max_background_compactions = 7; - options.max_compaction_bytes = 100000000; - DestroyAndReopen(options); - env_->SetBackgroundThreads(7, Env::LOW); - - Random rnd(301); - const std::vector files_per_level = {0, 15, 25}; - for (int lvl = 2; lvl > 0; lvl--) { - for (int i = 0; i < files_per_level[lvl]; i++) { - for (int j = 0; j < kKeysPerBuffer; j++) { - // Add (lvl-1) to ensure nearly equivallent number of files - // in L2 are overlapped with fils selected to compact from - // L1 - ASSERT_OK(Put(Key(2 * i * kKeysPerBuffer + 2 * j + (lvl - 1)), - rnd.RandomString(1010))); - } - ASSERT_OK(Flush()); - } - MoveFilesToLevel(lvl); - ASSERT_EQ(files_per_level[lvl], NumTableFilesAtLevel(lvl, 0)); - } - // 15 files in L1; 25 files in L2 - - // This is a variable for making sure the following callback is called - // and the assertions in it are indeed excuted. - bool num_planned_subcompactions_verified = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "CompactionJob::GenSubcompactionBoundaries:0", [&](void* arg) { - uint64_t num_planned_subcompactions = *(static_cast(arg)); - if (grab_pressure_token_) { - // 7 files are selected for round-robin under auto - // compaction. The number of planned subcompaction is restricted by - // the limited number of max_background_compactions - ASSERT_EQ(num_planned_subcompactions, 7); - } else { - ASSERT_EQ(num_planned_subcompactions, 1); - } - num_planned_subcompactions_verified = true; - }); - - // The following 3 dependencies have to be added to ensure the auto - // compaction and the pressure token is correctly enabled. Same for - // RoundRobinSubcompactionsUsingResources and - // DBCompactionTest.RoundRobinSubcompactionsShrinkResources - SyncPoint::GetInstance()->LoadDependency( - {{"RoundRobinSubcompactionsAgainstPressureToken:0", - "BackgroundCallCompaction:0"}, - {"CompactionJob::AcquireSubcompactionResources:0", - "RoundRobinSubcompactionsAgainstPressureToken:1"}, - {"RoundRobinSubcompactionsAgainstPressureToken:2", - "CompactionJob::AcquireSubcompactionResources:1"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(dbfull()->EnableAutoCompaction({dbfull()->DefaultColumnFamily()})); - TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstPressureToken:0"); - TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstPressureToken:1"); - std::unique_ptr pressure_token; - if (grab_pressure_token_) { - pressure_token = - dbfull()->TEST_write_controler().GetCompactionPressureToken(); - } - TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstPressureToken:2"); - - ASSERT_OK(dbfull()->WaitForCompact()); - ASSERT_TRUE(num_planned_subcompactions_verified); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -INSTANTIATE_TEST_CASE_P(RoundRobinSubcompactionsAgainstPressureToken, - RoundRobinSubcompactionsAgainstPressureToken, - testing::Bool()); - -TEST_P(RoundRobinSubcompactionsAgainstResources, SubcompactionsUsingResources) { - const int kKeysPerBuffer = 200; - Options options = CurrentOptions(); - options.num_levels = 4; - options.level0_file_num_compaction_trigger = 3; - options.target_file_size_base = kKeysPerBuffer * 1024; - options.compaction_pri = CompactionPri::kRoundRobin; - options.max_bytes_for_level_base = 30 * kKeysPerBuffer * 1024; - options.disable_auto_compactions = true; - options.max_subcompactions = 1; - options.max_background_compactions = max_compaction_limits_; - // Set a large number for max_compaction_bytes so that one round-robin - // compaction is enough to make post-compaction L1 size less than - // the maximum size (this test assumes only one round-robin compaction - // is triggered by kLevelMaxLevelSize) - options.max_compaction_bytes = 100000000; - - DestroyAndReopen(options); - env_->SetBackgroundThreads(total_low_pri_threads_, Env::LOW); - - Random rnd(301); - const std::vector files_per_level = {0, 40, 100}; - for (int lvl = 2; lvl > 0; lvl--) { - for (int i = 0; i < files_per_level[lvl]; i++) { - for (int j = 0; j < kKeysPerBuffer; j++) { - // Add (lvl-1) to ensure nearly equivallent number of files - // in L2 are overlapped with fils selected to compact from - // L1 - ASSERT_OK(Put(Key(2 * i * kKeysPerBuffer + 2 * j + (lvl - 1)), - rnd.RandomString(1010))); - } - ASSERT_OK(Flush()); - } - MoveFilesToLevel(lvl); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(files_per_level[lvl], NumTableFilesAtLevel(lvl, 0)); - } - - // 40 files in L1; 100 files in L2 - // This is a variable for making sure the following callback is called - // and the assertions in it are indeed excuted. - bool num_planned_subcompactions_verified = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "CompactionJob::GenSubcompactionBoundaries:0", [&](void* arg) { - uint64_t num_planned_subcompactions = *(static_cast(arg)); - // More than 10 files are selected for round-robin under auto - // compaction. The number of planned subcompaction is restricted by - // the minimum number between available threads and compaction limits - ASSERT_EQ(num_planned_subcompactions - options.max_subcompactions, - std::min(total_low_pri_threads_, max_compaction_limits_) - 1); - num_planned_subcompactions_verified = true; - }); - SyncPoint::GetInstance()->LoadDependency( - {{"RoundRobinSubcompactionsAgainstResources:0", - "BackgroundCallCompaction:0"}, - {"CompactionJob::AcquireSubcompactionResources:0", - "RoundRobinSubcompactionsAgainstResources:1"}, - {"RoundRobinSubcompactionsAgainstResources:2", - "CompactionJob::AcquireSubcompactionResources:1"}, - {"CompactionJob::ReleaseSubcompactionResources:0", - "RoundRobinSubcompactionsAgainstResources:3"}, - {"RoundRobinSubcompactionsAgainstResources:4", - "CompactionJob::ReleaseSubcompactionResources:1"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(dbfull()->WaitForCompact()); - ASSERT_OK(dbfull()->EnableAutoCompaction({dbfull()->DefaultColumnFamily()})); - TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstResources:0"); - TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstResources:1"); - auto pressure_token = - dbfull()->TEST_write_controler().GetCompactionPressureToken(); - - TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstResources:2"); - TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstResources:3"); - // We can reserve more threads now except one is being used - ASSERT_EQ(total_low_pri_threads_ - 1, - env_->ReserveThreads(total_low_pri_threads_, Env::Priority::LOW)); - ASSERT_EQ( - total_low_pri_threads_ - 1, - env_->ReleaseThreads(total_low_pri_threads_ - 1, Env::Priority::LOW)); - TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstResources:4"); - ASSERT_OK(dbfull()->WaitForCompact()); - ASSERT_TRUE(num_planned_subcompactions_verified); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -INSTANTIATE_TEST_CASE_P(RoundRobinSubcompactionsAgainstResources, - RoundRobinSubcompactionsAgainstResources, - ::testing::Values(std::make_tuple(1, 5), - std::make_tuple(5, 1), - std::make_tuple(10, 5), - std::make_tuple(5, 10), - std::make_tuple(10, 10))); - -TEST_P(DBCompactionTestWithParam, RoundRobinWithoutAdditionalResources) { - const int kKeysPerBuffer = 200; - Options options = CurrentOptions(); - options.num_levels = 4; - options.level0_file_num_compaction_trigger = 3; - options.target_file_size_base = kKeysPerBuffer * 1024; - options.compaction_pri = CompactionPri::kRoundRobin; - options.max_bytes_for_level_base = 30 * kKeysPerBuffer * 1024; - options.disable_auto_compactions = true; - options.max_subcompactions = max_subcompactions_; - options.max_background_compactions = 1; - options.max_compaction_bytes = 100000000; - // Similar experiment setting as above except the max_subcompactions - // is given by max_subcompactions_ (1 or 4), and we fix the - // additional resources as (1, 1) and thus no more extra resources - // can be used - DestroyAndReopen(options); - env_->SetBackgroundThreads(1, Env::LOW); - - Random rnd(301); - const std::vector files_per_level = {0, 33, 100}; - for (int lvl = 2; lvl > 0; lvl--) { - for (int i = 0; i < files_per_level[lvl]; i++) { - for (int j = 0; j < kKeysPerBuffer; j++) { - // Add (lvl-1) to ensure nearly equivallent number of files - // in L2 are overlapped with fils selected to compact from - // L1 - ASSERT_OK(Put(Key(2 * i * kKeysPerBuffer + 2 * j + (lvl - 1)), - rnd.RandomString(1010))); - } - ASSERT_OK(Flush()); - } - MoveFilesToLevel(lvl); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(files_per_level[lvl], NumTableFilesAtLevel(lvl, 0)); - } - - // 33 files in L1; 100 files in L2 - // This is a variable for making sure the following callback is called - // and the assertions in it are indeed excuted. - bool num_planned_subcompactions_verified = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "CompactionJob::GenSubcompactionBoundaries:0", [&](void* arg) { - uint64_t num_planned_subcompactions = *(static_cast(arg)); - // At most 4 files are selected for round-robin under auto - // compaction. The number of planned subcompaction is restricted by - // the max_subcompactions since no extra resources can be used - ASSERT_EQ(num_planned_subcompactions, options.max_subcompactions); - num_planned_subcompactions_verified = true; - }); - // No need to setup dependency for pressure token since - // AcquireSubcompactionResources may not be called and it anyway cannot - // reserve any additional resources - SyncPoint::GetInstance()->LoadDependency( - {{"DBCompactionTest::RoundRobinWithoutAdditionalResources:0", - "BackgroundCallCompaction:0"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(dbfull()->WaitForCompact()); - ASSERT_OK(dbfull()->EnableAutoCompaction({dbfull()->DefaultColumnFamily()})); - TEST_SYNC_POINT("DBCompactionTest::RoundRobinWithoutAdditionalResources:0"); - - ASSERT_OK(dbfull()->WaitForCompact()); - ASSERT_TRUE(num_planned_subcompactions_verified); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_F(DBCompactionTest, RoundRobinCutOutputAtCompactCursor) { - Options options = CurrentOptions(); - options.num_levels = 3; - options.compression = kNoCompression; - options.write_buffer_size = 4 * 1024; - options.max_bytes_for_level_base = 64 * 1024; - options.max_bytes_for_level_multiplier = 4; - options.level0_file_num_compaction_trigger = 4; - options.compaction_pri = CompactionPri::kRoundRobin; - - DestroyAndReopen(options); - - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - ASSERT_NE(cfd, nullptr); - - Version* const current = cfd->current(); - ASSERT_NE(current, nullptr); - - VersionStorageInfo* storage_info = current->storage_info(); - ASSERT_NE(storage_info, nullptr); - - const InternalKey split_cursor = InternalKey(Key(600), 100, kTypeValue); - storage_info->AddCursorForOneLevel(2, split_cursor); - - Random rnd(301); - - for (int i = 0; i < 50; i++) { - for (int j = 0; j < 50; j++) { - ASSERT_OK(Put(Key(j * 2 + i * 100), rnd.RandomString(102))); - } - } - // Add more overlapping files (avoid trivial move) to trigger compaction that - // output files in L2. Note that trivial move does not trigger compaction and - // in that case the cursor is not necessarily the boundary of file. - for (int i = 0; i < 50; i++) { - for (int j = 0; j < 50; j++) { - ASSERT_OK(Put(Key(j * 2 + 1 + i * 100), rnd.RandomString(1014))); - } - } - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - std::vector> level_to_files; - dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), - &level_to_files); - const auto icmp = cfd->current()->storage_info()->InternalComparator(); - // Files in level 2 should be split by the cursor - for (const auto& file : level_to_files[2]) { - ASSERT_TRUE( - icmp->Compare(file.smallest.Encode(), split_cursor.Encode()) >= 0 || - icmp->Compare(file.largest.Encode(), split_cursor.Encode()) < 0); - } -} - -class NoopMergeOperator : public MergeOperator { - public: - NoopMergeOperator() {} - - bool FullMergeV2(const MergeOperationInput& /*merge_in*/, - MergeOperationOutput* merge_out) const override { - std::string val("bar"); - merge_out->new_value = val; - return true; - } - - const char* Name() const override { return "Noop"; } -}; - -TEST_F(DBCompactionTest, PartialManualCompaction) { - Options opts = CurrentOptions(); - opts.num_levels = 3; - opts.level0_file_num_compaction_trigger = 10; - opts.compression = kNoCompression; - opts.merge_operator.reset(new NoopMergeOperator()); - opts.target_file_size_base = 10240; - DestroyAndReopen(opts); - - Random rnd(301); - for (auto i = 0; i < 8; ++i) { - for (auto j = 0; j < 10; ++j) { - ASSERT_OK(Merge("foo", rnd.RandomString(1024))); - } - ASSERT_OK(Flush()); - } - - MoveFilesToLevel(2); - - std::string prop; - EXPECT_TRUE(dbfull()->GetProperty(DB::Properties::kLiveSstFilesSize, &prop)); - uint64_t max_compaction_bytes = atoi(prop.c_str()) / 2; - ASSERT_OK(dbfull()->SetOptions( - {{"max_compaction_bytes", std::to_string(max_compaction_bytes)}})); - - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; - ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); -} - -TEST_F(DBCompactionTest, ManualCompactionFailsInReadOnlyMode) { - // Regression test for bug where manual compaction hangs forever when the DB - // is in read-only mode. Verify it now at least returns, despite failing. - const int kNumL0Files = 4; - std::unique_ptr mock_env( - new FaultInjectionTestEnv(env_)); - Options opts = CurrentOptions(); - opts.disable_auto_compactions = true; - opts.env = mock_env.get(); - DestroyAndReopen(opts); - - Random rnd(301); - for (int i = 0; i < kNumL0Files; ++i) { - // Make sure files are overlapping in key-range to prevent trivial move. - ASSERT_OK(Put("key1", rnd.RandomString(1024))); - ASSERT_OK(Put("key2", rnd.RandomString(1024))); - ASSERT_OK(Flush()); - } - ASSERT_EQ(kNumL0Files, NumTableFilesAtLevel(0)); - - // Enter read-only mode by failing a write. - mock_env->SetFilesystemActive(false); - // Make sure this is outside `CompactRange`'s range so that it doesn't fail - // early trying to flush memtable. - ASSERT_NOK(Put("key3", rnd.RandomString(1024))); - - // In the bug scenario, the first manual compaction would fail and forget to - // unregister itself, causing the second one to hang forever due to conflict - // with a non-running compaction. - CompactRangeOptions cro; - cro.exclusive_manual_compaction = false; - Slice begin_key("key1"); - Slice end_key("key2"); - ASSERT_NOK(dbfull()->CompactRange(cro, &begin_key, &end_key)); - ASSERT_NOK(dbfull()->CompactRange(cro, &begin_key, &end_key)); - - // Close before mock_env destruct. - Close(); -} - -// ManualCompactionBottomLevelOptimization tests the bottom level manual -// compaction optimization to skip recompacting files created by Ln-1 to Ln -// compaction -TEST_F(DBCompactionTest, ManualCompactionBottomLevelOptimized) { - Options opts = CurrentOptions(); - opts.num_levels = 3; - opts.level0_file_num_compaction_trigger = 5; - opts.compression = kNoCompression; - opts.merge_operator.reset(new NoopMergeOperator()); - opts.target_file_size_base = 1024; - opts.max_bytes_for_level_multiplier = 2; - opts.disable_auto_compactions = true; - DestroyAndReopen(opts); - ColumnFamilyHandleImpl* cfh = - static_cast(dbfull()->DefaultColumnFamily()); - ColumnFamilyData* cfd = cfh->cfd(); - InternalStats* internal_stats_ptr = cfd->internal_stats(); - ASSERT_NE(internal_stats_ptr, nullptr); - - Random rnd(301); - for (auto i = 0; i < 8; ++i) { - for (auto j = 0; j < 10; ++j) { - ASSERT_OK( - Put("foo" + std::to_string(i * 10 + j), rnd.RandomString(1024))); - } - ASSERT_OK(Flush()); - } - - MoveFilesToLevel(2); - - for (auto i = 0; i < 8; ++i) { - for (auto j = 0; j < 10; ++j) { - ASSERT_OK( - Put("bar" + std::to_string(i * 10 + j), rnd.RandomString(1024))); - } - ASSERT_OK(Flush()); - } - const std::vector& comp_stats = - internal_stats_ptr->TEST_GetCompactionStats(); - int num = comp_stats[2].num_input_files_in_output_level; - ASSERT_EQ(num, 0); - - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; - ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); - - const std::vector& comp_stats2 = - internal_stats_ptr->TEST_GetCompactionStats(); - num = comp_stats2[2].num_input_files_in_output_level; - ASSERT_EQ(num, 0); -} - -TEST_F(DBCompactionTest, ManualCompactionMax) { - uint64_t l1_avg_size = 0, l2_avg_size = 0; - auto generate_sst_func = [&]() { - Random rnd(301); - for (auto i = 0; i < 100; i++) { - for (auto j = 0; j < 10; j++) { - ASSERT_OK(Put(Key(i * 10 + j), rnd.RandomString(1024))); - } - ASSERT_OK(Flush()); - } - MoveFilesToLevel(2); - - for (auto i = 0; i < 10; i++) { - for (auto j = 0; j < 10; j++) { - ASSERT_OK(Put(Key(i * 100 + j * 10), rnd.RandomString(1024))); - } - ASSERT_OK(Flush()); - } - MoveFilesToLevel(1); - - std::vector> level_to_files; - dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), - &level_to_files); - - uint64_t total = 0; - for (const auto& file : level_to_files[1]) { - total += file.compensated_file_size; - } - l1_avg_size = total / level_to_files[1].size(); - - total = 0; - for (const auto& file : level_to_files[2]) { - total += file.compensated_file_size; - } - l2_avg_size = total / level_to_files[2].size(); - }; - - std::atomic_int num_compactions(0); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BGWorkCompaction", [&](void* /*arg*/) { ++num_compactions; }); - SyncPoint::GetInstance()->EnableProcessing(); - - Options opts = CurrentOptions(); - opts.disable_auto_compactions = true; - - // with default setting (1.6G by default), it should cover all files in 1 - // compaction - DestroyAndReopen(opts); - generate_sst_func(); - num_compactions.store(0); - CompactRangeOptions cro; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - ASSERT_TRUE(num_compactions.load() == 1); - - // split the compaction to 5 - int num_split = 5; - DestroyAndReopen(opts); - generate_sst_func(); - uint64_t total_size = (l1_avg_size * 10) + (l2_avg_size * 100); - opts.max_compaction_bytes = total_size / num_split; - opts.target_file_size_base = total_size / num_split; - Reopen(opts); - num_compactions.store(0); - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - ASSERT_TRUE(num_compactions.load() == num_split); - - // very small max_compaction_bytes, it should still move forward - opts.max_compaction_bytes = l1_avg_size / 2; - opts.target_file_size_base = l1_avg_size / 2; - DestroyAndReopen(opts); - generate_sst_func(); - num_compactions.store(0); - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - ASSERT_TRUE(num_compactions.load() > 10); - - // dynamically set the option - num_split = 2; - opts.max_compaction_bytes = 0; - DestroyAndReopen(opts); - generate_sst_func(); - total_size = (l1_avg_size * 10) + (l2_avg_size * 100); - Status s = db_->SetOptions( - {{"max_compaction_bytes", std::to_string(total_size / num_split)}, - {"target_file_size_base", std::to_string(total_size / num_split)}}); - ASSERT_OK(s); - - num_compactions.store(0); - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - ASSERT_TRUE(num_compactions.load() == num_split); -} - -TEST_F(DBCompactionTest, CompactionDuringShutdown) { - Options opts = CurrentOptions(); - opts.level0_file_num_compaction_trigger = 2; - opts.disable_auto_compactions = true; - DestroyAndReopen(opts); - ColumnFamilyHandleImpl* cfh = - static_cast(dbfull()->DefaultColumnFamily()); - ColumnFamilyData* cfd = cfh->cfd(); - InternalStats* internal_stats_ptr = cfd->internal_stats(); - ASSERT_NE(internal_stats_ptr, nullptr); - - Random rnd(301); - for (auto i = 0; i < 2; ++i) { - for (auto j = 0; j < 10; ++j) { - ASSERT_OK( - Put("foo" + std::to_string(i * 10 + j), rnd.RandomString(1024))); - } - ASSERT_OK(Flush()); - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial:BeforeRun", - [&](void* /*arg*/) { dbfull()->shutting_down_.store(true); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - Status s = dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); - ASSERT_TRUE(s.ok() || s.IsShutdownInProgress()); - ASSERT_OK(dbfull()->error_handler_.GetBGError()); -} - -// FixFileIngestionCompactionDeadlock tests and verifies that compaction and -// file ingestion do not cause deadlock in the event of write stall triggered -// by number of L0 files reaching level0_stop_writes_trigger. -TEST_P(DBCompactionTestWithParam, FixFileIngestionCompactionDeadlock) { - const int kNumKeysPerFile = 100; - // Generate SST files. - Options options = CurrentOptions(); - - // Generate an external SST file containing a single key, i.e. 99 - std::string sst_files_dir = dbname_ + "/sst_files/"; - ASSERT_OK(DestroyDir(env_, sst_files_dir)); - ASSERT_OK(env_->CreateDir(sst_files_dir)); - SstFileWriter sst_writer(EnvOptions(), options); - const std::string sst_file_path = sst_files_dir + "test.sst"; - ASSERT_OK(sst_writer.Open(sst_file_path)); - ASSERT_OK(sst_writer.Put(Key(kNumKeysPerFile - 1), "value")); - ASSERT_OK(sst_writer.Finish()); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->LoadDependency({ - {"DBImpl::IngestExternalFile:AfterIncIngestFileCounter", - "BackgroundCallCompaction:0"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - - options.write_buffer_size = 110 << 10; // 110KB - options.level0_file_num_compaction_trigger = - options.level0_stop_writes_trigger; - options.max_subcompactions = max_subcompactions_; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - Random rnd(301); - - // Generate level0_stop_writes_trigger L0 files to trigger write stop - for (int i = 0; i != options.level0_file_num_compaction_trigger; ++i) { - for (int j = 0; j != kNumKeysPerFile; ++j) { - ASSERT_OK(Put(Key(j), rnd.RandomString(990))); - } - if (i > 0) { - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ(NumTableFilesAtLevel(0 /*level*/, 0 /*cf*/), i); - } - } - // When we reach this point, there will be level0_stop_writes_trigger L0 - // files and one extra key (99) in memory, which overlaps with the external - // SST file. Write stall triggers, and can be cleared only after compaction - // reduces the number of L0 files. - - // Compaction will also be triggered since we have reached the threshold for - // auto compaction. Note that compaction may begin after the following file - // ingestion thread and waits for ingestion to finish. - - // Thread to ingest file with overlapping key range with the current - // memtable. Consequently ingestion will trigger a flush. The flush MUST - // proceed without waiting for the write stall condition to clear, otherwise - // deadlock can happen. - port::Thread ingestion_thr([&]() { - IngestExternalFileOptions ifo; - Status s = db_->IngestExternalFile({sst_file_path}, ifo); - ASSERT_OK(s); - }); - - // More write to trigger write stop - ingestion_thr.join(); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - Close(); -} - -class DBCompactionTestWithOngoingFileIngestionParam - : public DBCompactionTest, - public testing::WithParamInterface { - public: - DBCompactionTestWithOngoingFileIngestionParam() : DBCompactionTest() { - compaction_path_to_test_ = GetParam(); - } - void SetupOptions() { - options_ = CurrentOptions(); - options_.create_if_missing = true; - - if (compaction_path_to_test_ == "RefitLevelCompactRange") { - options_.num_levels = 7; - } else { - options_.num_levels = 3; - } - options_.compaction_style = CompactionStyle::kCompactionStyleLevel; - if (compaction_path_to_test_ == "AutoCompaction") { - options_.disable_auto_compactions = false; - options_.level0_file_num_compaction_trigger = 1; - } else { - options_.disable_auto_compactions = true; - } - } - - void PauseCompactionThread() { - sleeping_task_.reset(new test::SleepingBackgroundTask()); - env_->SetBackgroundThreads(1, Env::LOW); - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - sleeping_task_.get(), Env::Priority::LOW); - sleeping_task_->WaitUntilSleeping(); - } - - void ResumeCompactionThread() { - if (sleeping_task_) { - sleeping_task_->WakeUp(); - sleeping_task_->WaitUntilDone(); - } - } - - void SetupFilesToForceFutureFilesIngestedToCertainLevel() { - SstFileWriter sst_file_writer(EnvOptions(), options_); - std::string dummy = dbname_ + "/dummy.sst"; - ASSERT_OK(sst_file_writer.Open(dummy)); - ASSERT_OK(sst_file_writer.Put("k2", "dummy")); - ASSERT_OK(sst_file_writer.Finish()); - ASSERT_OK(db_->IngestExternalFile({dummy}, IngestExternalFileOptions())); - // L2 is made to contain a file overlapped with files to be ingested in - // later steps on key "k2". This will force future files ingested to L1 or - // above. - ASSERT_EQ("0,0,1", FilesPerLevel(0)); - } - - void SetupSyncPoints() { - if (compaction_path_to_test_ == "AutoCompaction") { - SyncPoint::GetInstance()->SetCallBack( - "ExternalSstFileIngestionJob::Run", [&](void*) { - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::BackgroundCompaction():AfterPickCompaction", - "VersionSet::LogAndApply:WriteManifest"}}); - }); - } else if (compaction_path_to_test_ == "NonRefitLevelCompactRange") { - SyncPoint::GetInstance()->SetCallBack( - "ExternalSstFileIngestionJob::Run", [&](void*) { - SyncPoint::GetInstance()->LoadDependency( - {{"ColumnFamilyData::CompactRange:Return", - "VersionSet::LogAndApply:WriteManifest"}}); - }); - } else if (compaction_path_to_test_ == "RefitLevelCompactRange") { - SyncPoint::GetInstance()->SetCallBack( - "ExternalSstFileIngestionJob::Run", [&](void*) { - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::CompactRange:PostRefitLevel", - "VersionSet::LogAndApply:WriteManifest"}}); - }); - } else if (compaction_path_to_test_ == "CompactFiles") { - SyncPoint::GetInstance()->SetCallBack( - "ExternalSstFileIngestionJob::Run", [&](void*) { - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::CompactFilesImpl::PostSanitizeCompactionInputFiles", - "VersionSet::LogAndApply:WriteManifest"}}); - }); - } else { - assert(false); - } - SyncPoint::GetInstance()->LoadDependency( - {{"ExternalSstFileIngestionJob::Run", "PreCompaction"}}); - SyncPoint::GetInstance()->EnableProcessing(); - } - - void RunCompactionOverlappedWithFileIngestion() { - if (compaction_path_to_test_ == "AutoCompaction") { - TEST_SYNC_POINT("PreCompaction"); - ResumeCompactionThread(); - // Without proper range conflict check, - // this would have been `Status::Corruption` about overlapping ranges - Status s = dbfull()->TEST_WaitForCompact(); - EXPECT_OK(s); - } else if (compaction_path_to_test_ == "NonRefitLevelCompactRange") { - CompactRangeOptions cro; - cro.change_level = false; - std::string start_key = "k1"; - Slice start(start_key); - std::string end_key = "k4"; - Slice end(end_key); - TEST_SYNC_POINT("PreCompaction"); - // Without proper range conflict check, - // this would have been `Status::Corruption` about overlapping ranges - Status s = dbfull()->CompactRange(cro, &start, &end); - EXPECT_OK(s); - } else if (compaction_path_to_test_ == "RefitLevelCompactRange") { - CompactRangeOptions cro; - cro.change_level = true; - cro.target_level = 5; - std::string start_key = "k1"; - Slice start(start_key); - std::string end_key = "k4"; - Slice end(end_key); - TEST_SYNC_POINT("PreCompaction"); - Status s = dbfull()->CompactRange(cro, &start, &end); - // Without proper range conflict check, - // this would have been `Status::Corruption` about overlapping ranges - // To see this, remove the fix AND replace - // `DBImpl::CompactRange:PostRefitLevel` in sync point dependency with - // `DBImpl::ReFitLevel:PostRegisterCompaction` - EXPECT_TRUE(s.IsNotSupported()); - EXPECT_TRUE(s.ToString().find("some ongoing compaction's output") != - std::string::npos); - } else if (compaction_path_to_test_ == "CompactFiles") { - ColumnFamilyMetaData cf_meta_data; - db_->GetColumnFamilyMetaData(&cf_meta_data); - ASSERT_EQ(cf_meta_data.levels[0].files.size(), 1); - std::vector input_files; - for (const auto& file : cf_meta_data.levels[0].files) { - input_files.push_back(file.name); - } - TEST_SYNC_POINT("PreCompaction"); - Status s = db_->CompactFiles(CompactionOptions(), input_files, 1); - // Without proper range conflict check, - // this would have been `Status::Corruption` about overlapping ranges - EXPECT_TRUE(s.IsAborted()); - EXPECT_TRUE( - s.ToString().find( - "A running compaction is writing to the same output level") != - std::string::npos); - } else { - assert(false); - } - } - - void DisableSyncPoints() { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } - - protected: - std::string compaction_path_to_test_; - Options options_; - std::shared_ptr sleeping_task_; -}; - -INSTANTIATE_TEST_CASE_P(DBCompactionTestWithOngoingFileIngestionParam, - DBCompactionTestWithOngoingFileIngestionParam, - ::testing::Values("AutoCompaction", - "NonRefitLevelCompactRange", - "RefitLevelCompactRange", - "CompactFiles")); - -TEST_P(DBCompactionTestWithOngoingFileIngestionParam, RangeConflictCheck) { - SetupOptions(); - DestroyAndReopen(options_); - - if (compaction_path_to_test_ == "AutoCompaction") { - PauseCompactionThread(); - } - - if (compaction_path_to_test_ != "RefitLevelCompactRange") { - SetupFilesToForceFutureFilesIngestedToCertainLevel(); - } - - // Create s1 - ASSERT_OK(Put("k1", "v")); - ASSERT_OK(Put("k4", "v")); - ASSERT_OK(Flush()); - if (compaction_path_to_test_ == "RefitLevelCompactRange") { - MoveFilesToLevel(6 /* level */); - ASSERT_EQ("0,0,0,0,0,0,1", FilesPerLevel(0)); - } else { - ASSERT_EQ("1,0,1", FilesPerLevel(0)); - } - - // To coerce following sequence of events - // Timeline Thread 1 (Ingest s2) Thread 2 (Compact s1) - // t0 | Decide to output to Lk - // t1 | Release lock in LogAndApply() - // t2 | Acquire lock - // t3 | Decides to compact to Lk - // | Expected to fail due to range - // | conflict check with file - // | ingestion - // t4 | Release lock in LogAndApply() - // t5 | Acquire lock again and finish - // t6 | Acquire lock again and finish - SetupSyncPoints(); - - // Ingest s2 - port::Thread thread1([&] { - SstFileWriter sst_file_writer(EnvOptions(), options_); - std::string s2 = dbname_ + "/ingested_s2.sst"; - ASSERT_OK(sst_file_writer.Open(s2)); - ASSERT_OK(sst_file_writer.Put("k2", "v2")); - ASSERT_OK(sst_file_writer.Put("k3", "v2")); - ASSERT_OK(sst_file_writer.Finish()); - ASSERT_OK(db_->IngestExternalFile({s2}, IngestExternalFileOptions())); - }); - - // Compact s1. Without proper range conflict check, - // this will encounter overlapping file corruption. - port::Thread thread2([&] { RunCompactionOverlappedWithFileIngestion(); }); - - thread1.join(); - thread2.join(); - DisableSyncPoints(); -} - -TEST_F(DBCompactionTest, ConsistencyFailTest) { - Options options = CurrentOptions(); - options.force_consistency_checks = true; - DestroyAndReopen(options); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "VersionBuilder::CheckConsistency0", [&](void* arg) { - auto p = - reinterpret_cast*>(arg); - // just swap the two FileMetaData so that we hit error - // in CheckConsistency funcion - FileMetaData* temp = *(p->first); - *(p->first) = *(p->second); - *(p->second) = temp; - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - for (int k = 0; k < 2; ++k) { - ASSERT_OK(Put("foo", "bar")); - Status s = Flush(); - if (k < 1) { - ASSERT_OK(s); - } else { - ASSERT_TRUE(s.IsCorruption()); - } - } - - ASSERT_NOK(Put("foo", "bar")); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_F(DBCompactionTest, ConsistencyFailTest2) { - Options options = CurrentOptions(); - options.force_consistency_checks = true; - options.target_file_size_base = 1000; - options.level0_file_num_compaction_trigger = 2; - BlockBasedTableOptions bbto; - bbto.block_size = 400; // small block size - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "VersionBuilder::CheckConsistency1", [&](void* arg) { - auto p = - reinterpret_cast*>(arg); - // just swap the two FileMetaData so that we hit error - // in CheckConsistency funcion - FileMetaData* temp = *(p->first); - *(p->first) = *(p->second); - *(p->second) = temp; - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - std::string value = rnd.RandomString(1000); - - ASSERT_OK(Put("foo1", value)); - ASSERT_OK(Put("z", "")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo2", value)); - ASSERT_OK(Put("z", "")); - Status s = Flush(); - ASSERT_TRUE(s.ok() || s.IsCorruption()); - - // This probably returns non-OK, but we rely on the next Put() - // to determine the DB is frozen. - ASSERT_NOK(dbfull()->TEST_WaitForCompact()); - ASSERT_NOK(Put("foo", "bar")); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -void IngestOneKeyValue(DBImpl* db, const std::string& key, - const std::string& value, const Options& options) { - ExternalSstFileInfo info; - std::string f = test::PerThreadDBPath("sst_file" + key); - EnvOptions env; - ROCKSDB_NAMESPACE::SstFileWriter writer(env, options); - auto s = writer.Open(f); - ASSERT_OK(s); - // ASSERT_OK(writer.Put(Key(), "")); - ASSERT_OK(writer.Put(key, value)); - - ASSERT_OK(writer.Finish(&info)); - IngestExternalFileOptions ingest_opt; - - ASSERT_OK(db->IngestExternalFile({info.file_path}, ingest_opt)); -} - -class DBCompactionTestL0FilesMisorderCorruption : public DBCompactionTest { - public: - DBCompactionTestL0FilesMisorderCorruption() : DBCompactionTest() {} - void SetupOptions(const CompactionStyle compaciton_style, - const std::string& compaction_path_to_test = "") { - options_ = CurrentOptions(); - options_.create_if_missing = true; - options_.compression = kNoCompression; - - options_.force_consistency_checks = true; - options_.compaction_style = compaciton_style; - - if (compaciton_style == CompactionStyle::kCompactionStyleLevel) { - options_.num_levels = 7; - // Level compaction's PickIntraL0Compaction() impl detail requires - // `options.level0_file_num_compaction_trigger` to be - // at least 2 files less than the actual number of level 0 files - // (i.e, 7 by design in this test) - options_.level0_file_num_compaction_trigger = 5; - options_.max_background_compactions = 2; - options_.write_buffer_size = 2 << 20; - options_.max_write_buffer_number = 6; - } else if (compaciton_style == CompactionStyle::kCompactionStyleUniversal) { - // TODO: expand test coverage to num_lvels > 1 for universal compacion, - // which requires careful unit test design to compact to level 0 despite - // num_levels > 1 - options_.num_levels = 1; - options_.level0_file_num_compaction_trigger = 5; - - CompactionOptionsUniversal universal_options; - if (compaction_path_to_test == "PickCompactionToReduceSizeAmp") { - universal_options.max_size_amplification_percent = 50; - } else if (compaction_path_to_test == - "PickCompactionToReduceSortedRuns") { - universal_options.max_size_amplification_percent = 400; - } else if (compaction_path_to_test == "PickDeleteTriggeredCompaction") { - universal_options.max_size_amplification_percent = 400; - universal_options.min_merge_width = 6; - } - options_.compaction_options_universal = universal_options; - } else if (compaciton_style == CompactionStyle::kCompactionStyleFIFO) { - options_.max_open_files = -1; - options_.num_levels = 1; - options_.level0_file_num_compaction_trigger = 3; - - CompactionOptionsFIFO fifo_options; - if (compaction_path_to_test == "FindIntraL0Compaction" || - compaction_path_to_test == "CompactRange") { - fifo_options.allow_compaction = true; - fifo_options.age_for_warm = 0; - } else if (compaction_path_to_test == "CompactFile") { - fifo_options.allow_compaction = false; - fifo_options.age_for_warm = 0; - } - options_.compaction_options_fifo = fifo_options; - } - - if (compaction_path_to_test == "CompactFile" || - compaction_path_to_test == "CompactRange") { - options_.disable_auto_compactions = true; - } else { - options_.disable_auto_compactions = false; - } - } - - void Destroy(const Options& options) { - if (snapshot_) { - assert(db_); - db_->ReleaseSnapshot(snapshot_); - snapshot_ = nullptr; - } - DBTestBase::Destroy(options); - } - - void Reopen(const Options& options) { - DBTestBase::Reopen(options); - if (options.compaction_style != CompactionStyle::kCompactionStyleLevel) { - // To force assigning the global seqno to ingested file - // for our test purpose. - assert(snapshot_ == nullptr); - snapshot_ = db_->GetSnapshot(); - } - } - - void DestroyAndReopen(Options& options) { - Destroy(options); - Reopen(options); - } - - void PauseCompactionThread() { - sleeping_task_.reset(new test::SleepingBackgroundTask()); - env_->SetBackgroundThreads(1, Env::LOW); - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - sleeping_task_.get(), Env::Priority::LOW); - sleeping_task_->WaitUntilSleeping(); - } - - void ResumeCompactionThread() { - if (sleeping_task_) { - sleeping_task_->WakeUp(); - sleeping_task_->WaitUntilDone(); - } - } - - void AddFilesMarkedForPeriodicCompaction(const size_t num_files) { - assert(options_.compaction_style == - CompactionStyle::kCompactionStyleUniversal); - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - assert(cfd); - Version* const current = cfd->current(); - assert(current); - - VersionStorageInfo* const storage_info = current->storage_info(); - assert(storage_info); - - const std::vector level0_files = storage_info->LevelFiles(0); - assert(level0_files.size() == num_files); - - for (FileMetaData* f : level0_files) { - storage_info->TEST_AddFileMarkedForPeriodicCompaction(0, f); - } - } - - void AddFilesMarkedForCompaction(const size_t num_files) { - assert(options_.compaction_style == - CompactionStyle::kCompactionStyleUniversal); - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - assert(cfd); - Version* const current = cfd->current(); - assert(current); - - VersionStorageInfo* const storage_info = current->storage_info(); - assert(storage_info); - - const std::vector level0_files = storage_info->LevelFiles(0); - assert(level0_files.size() == num_files); - - for (FileMetaData* f : level0_files) { - storage_info->TEST_AddFileMarkedForCompaction(0, f); - } - } - - void SetupSyncPoints(const std::string& compaction_path_to_test) { - compaction_path_sync_point_called_.store(false); - if (compaction_path_to_test == "FindIntraL0Compaction" && - options_.compaction_style == CompactionStyle::kCompactionStyleLevel) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "PostPickFileToCompact", [&](void* arg) { - bool* picked_file_to_compact = (bool*)arg; - // To trigger intra-L0 compaction specifically, - // we mock PickFileToCompact()'s result to be false - *picked_file_to_compact = false; - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "FindIntraL0Compaction", [&](void* /*arg*/) { - compaction_path_sync_point_called_.store(true); - }); - - } else if (compaction_path_to_test == "PickPeriodicCompaction") { - assert(options_.compaction_style == - CompactionStyle::kCompactionStyleUniversal); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "PostPickPeriodicCompaction", [&](void* compaction_arg) { - Compaction* compaction = (Compaction*)compaction_arg; - if (compaction != nullptr) { - compaction_path_sync_point_called_.store(true); - } - }); - } else if (compaction_path_to_test == "PickCompactionToReduceSizeAmp") { - assert(options_.compaction_style == - CompactionStyle::kCompactionStyleUniversal); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "PickCompactionToReduceSizeAmpReturnNonnullptr", [&](void* /*arg*/) { - compaction_path_sync_point_called_.store(true); - }); - } else if (compaction_path_to_test == "PickCompactionToReduceSortedRuns") { - assert(options_.compaction_style == - CompactionStyle::kCompactionStyleUniversal); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "PickCompactionToReduceSortedRunsReturnNonnullptr", - [&](void* /*arg*/) { - compaction_path_sync_point_called_.store(true); - }); - } else if (compaction_path_to_test == "PickDeleteTriggeredCompaction") { - assert(options_.compaction_style == - CompactionStyle::kCompactionStyleUniversal); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "PickDeleteTriggeredCompactionReturnNonnullptr", [&](void* /*arg*/) { - compaction_path_sync_point_called_.store(true); - }); - } else if ((compaction_path_to_test == "FindIntraL0Compaction" || - compaction_path_to_test == "CompactRange") && - options_.compaction_style == - CompactionStyle::kCompactionStyleFIFO) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "FindIntraL0Compaction", [&](void* /*arg*/) { - compaction_path_sync_point_called_.store(true); - }); - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - } - - bool SyncPointsCalled() { return compaction_path_sync_point_called_.load(); } - - void DisableSyncPoints() { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } - - // Return the largest seqno of the latest L0 file based on file number - SequenceNumber GetLatestL0FileLargestSeqnoHelper() { - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - assert(cfd); - Version* const current = cfd->current(); - assert(current); - VersionStorageInfo* const storage_info = current->storage_info(); - assert(storage_info); - const std::vector level0_files = storage_info->LevelFiles(0); - assert(level0_files.size() >= 1); - - uint64_t latest_file_num = 0; - uint64_t latest_file_largest_seqno = 0; - for (FileMetaData* f : level0_files) { - if (f->fd.GetNumber() > latest_file_num) { - latest_file_num = f->fd.GetNumber(); - latest_file_largest_seqno = f->fd.largest_seqno; - } - } - - return latest_file_largest_seqno; - } - - protected: - Options options_; - - private: - const Snapshot* snapshot_ = nullptr; - std::atomic compaction_path_sync_point_called_; - std::shared_ptr sleeping_task_; -}; - -TEST_F(DBCompactionTestL0FilesMisorderCorruption, - FlushAfterIntraL0LevelCompactionWithIngestedFile) { - SetupOptions(CompactionStyle::kCompactionStyleLevel, ""); - DestroyAndReopen(options_); - // Prevents trivial move - for (int i = 0; i < 10; ++i) { - ASSERT_OK(Put(Key(i), "")); // Prevents trivial move - } - ASSERT_OK(Flush()); - Compact("", Key(99)); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - - // To get accurate NumTableFilesAtLevel(0) when the number reaches - // options_.level0_file_num_compaction_trigger - PauseCompactionThread(); - - // To create below LSM tree - // (key:value@n indicates key-value pair has seqno "n", L0 is sorted): - // - // memtable: m1[ 5:new@12 .. 1:new@8, 0:new@7] - // L0: s6[6:new@13], s5[5:old@6] ... s1[1:old@2],s0[0:old@1] - // - // (1) Make 6 L0 sst (i.e, s0 - s5) - for (int i = 0; i < 6; ++i) { - if (i % 2 == 0) { - IngestOneKeyValue(dbfull(), Key(i), "old", options_); - } else { - ASSERT_OK(Put(Key(i), "old")); - ASSERT_OK(Flush()); - } - } - ASSERT_EQ(6, NumTableFilesAtLevel(0)); - - // (2) Create m1 - for (int i = 0; i < 6; ++i) { - ASSERT_OK(Put(Key(i), "new")); - } - ASSERT_EQ(6, NumTableFilesAtLevel(0)); - - // (3) Ingest file (i.e, s6) to trigger IntraL0Compaction() - for (int i = 6; i < 7; ++i) { - ASSERT_EQ(i, NumTableFilesAtLevel(0)); - IngestOneKeyValue(dbfull(), Key(i), "new", options_); - } - - SetupSyncPoints("FindIntraL0Compaction"); - ResumeCompactionThread(); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_TRUE(SyncPointsCalled()); - DisableSyncPoints(); - - // After compaction, we have LSM tree: - // - // memtable: m1[ 5:new@12 .. 1:new@8, 0:new@7] - // L0: s7[6:new@13, 5:old@6 .. 0:old@1] - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - SequenceNumber compact_output_file_largest_seqno = - GetLatestL0FileLargestSeqnoHelper(); - - ASSERT_OK(Flush()); - // After flush, we have LSM tree: - // - // L0: s8[5:new@12 .. 0:new@7],s7[6:new@13, 5:old@5 .. 0:old@1] - ASSERT_EQ(2, NumTableFilesAtLevel(0)); - SequenceNumber flushed_file_largest_seqno = - GetLatestL0FileLargestSeqnoHelper(); - - // To verify there isn't any file misorder leading to returning a old value - // of Key(0) - Key(5) , which is caused by flushed table s8 has a - // smaller largest seqno than the compaction output file s7's largest seqno - // while the flushed table has the newer version of the values than the - // compaction output file's. - ASSERT_TRUE(flushed_file_largest_seqno < compact_output_file_largest_seqno); - for (int i = 0; i < 6; ++i) { - ASSERT_EQ("new", Get(Key(i))); - } - for (int i = 6; i < 7; ++i) { - ASSERT_EQ("new", Get(Key(i))); - } -} - -TEST_F(DBCompactionTestL0FilesMisorderCorruption, - FlushAfterIntraL0UniversalCompactionWithIngestedFile) { - for (const std::string compaction_path_to_test : - {"PickPeriodicCompaction", "PickCompactionToReduceSizeAmp", - "PickCompactionToReduceSortedRuns", "PickDeleteTriggeredCompaction"}) { - SetupOptions(CompactionStyle::kCompactionStyleUniversal, - compaction_path_to_test); - DestroyAndReopen(options_); - - // To get accurate NumTableFilesAtLevel(0) when the number reaches - // options_.level0_file_num_compaction_trigger - PauseCompactionThread(); - - // To create below LSM tree - // (key:value@n indicates key-value pair has seqno "n", L0 is sorted): - // - // memtable: m1 [ k2:new@8, k1:new@7] - // L0: s4[k9:dummy@10], s3[k8:dummy@9], - // s2[k7:old@6, k6:old@5].. s0[k3:old@2, k1:old@1] - // - // (1) Create 3 existing SST file (i.e, s0 - s2) - ASSERT_OK(Put("k1", "old")); - ASSERT_OK(Put("k3", "old")); - ASSERT_OK(Flush()); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - ASSERT_OK(Put("k4", "old")); - ASSERT_OK(Put("k5", "old")); - ASSERT_OK(Flush()); - ASSERT_EQ(2, NumTableFilesAtLevel(0)); - ASSERT_OK(Put("k6", "old")); - ASSERT_OK(Put("k7", "old")); - ASSERT_OK(Flush()); - ASSERT_EQ(3, NumTableFilesAtLevel(0)); - - // (2) Create m1. Noted that it contains a overlaped key with s0 - ASSERT_OK(Put("k1", "new")); // overlapped key - ASSERT_OK(Put("k2", "new")); - - // (3) Ingest two SST files s3, s4 - IngestOneKeyValue(dbfull(), "k8", "dummy", options_); - IngestOneKeyValue(dbfull(), "k9", "dummy", options_); - // Up to now, L0 contains s0 - s4 - ASSERT_EQ(5, NumTableFilesAtLevel(0)); - - if (compaction_path_to_test == "PickPeriodicCompaction") { - AddFilesMarkedForPeriodicCompaction(5); - } else if (compaction_path_to_test == "PickDeleteTriggeredCompaction") { - AddFilesMarkedForCompaction(5); - } - - SetupSyncPoints(compaction_path_to_test); - ResumeCompactionThread(); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_TRUE(SyncPointsCalled()) - << "failed for compaction path to test: " << compaction_path_to_test; - DisableSyncPoints(); - - // After compaction, we have LSM tree: - // - // memtable: m1[ k2:new@8, k1:new@7] - // L0: s5[k9:dummy@10, k8@dummy@9, k7:old@6 .. k3:old@2, k1:old@1] - ASSERT_EQ(1, NumTableFilesAtLevel(0)) - << "failed for compaction path to test: " << compaction_path_to_test; - SequenceNumber compact_output_file_largest_seqno = - GetLatestL0FileLargestSeqnoHelper(); - - ASSERT_OK(Flush()) << "failed for compaction path to test: " - << compaction_path_to_test; - // After flush, we have LSM tree: - // - // L0: s6[k2:new@8, k1:new@7], - // s5[k9:dummy@10, k8@dummy@9, k7:old@6 .. k3:old@2, k1:old@1] - ASSERT_EQ(2, NumTableFilesAtLevel(0)) - << "failed for compaction path to test: " << compaction_path_to_test; - SequenceNumber flushed_file_largest_seqno = - GetLatestL0FileLargestSeqnoHelper(); - - // To verify there isn't any file misorder leading to returning a old - // value of "k1" , which is caused by flushed table s6 has a - // smaller largest seqno than the compaction output file s5's largest seqno - // while the flushed table has the newer version of the value - // than the compaction output file's. - ASSERT_TRUE(flushed_file_largest_seqno < compact_output_file_largest_seqno) - << "failed for compaction path to test: " << compaction_path_to_test; - EXPECT_EQ(Get("k1"), "new") - << "failed for compaction path to test: " << compaction_path_to_test; - } - - Destroy(options_); -} - -TEST_F(DBCompactionTestL0FilesMisorderCorruption, - FlushAfterIntraL0FIFOCompactionWithIngestedFile) { - for (const std::string compaction_path_to_test : {"FindIntraL0Compaction"}) { - SetupOptions(CompactionStyle::kCompactionStyleFIFO, - compaction_path_to_test); - DestroyAndReopen(options_); - - // To create below LSM tree - // (key:value@n indicates key-value pair has seqno "n", L0 is sorted): - // - // memtable: m1 [ k2:new@4, k1:new@3] - // L0: s2[k5:dummy@6], s1[k4:dummy@5], s0[k3:old@2, k1:old@1] - // - // (1) Create an existing SST file s0 - ASSERT_OK(Put("k1", "old")); - ASSERT_OK(Put("k3", "old")); - ASSERT_OK(Flush()); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - // (2) Create memtable m1. Noted that it contains a overlaped key with s0 - ASSERT_OK(Put("k1", "new")); // overlapped key - ASSERT_OK(Put("k2", "new")); - - // To get accurate NumTableFilesAtLevel(0) when the number reaches - // options_.level0_file_num_compaction_trigger - PauseCompactionThread(); - - // (3) Ingest two SST files s1, s2 - IngestOneKeyValue(dbfull(), "k4", "dummy", options_); - IngestOneKeyValue(dbfull(), "k5", "dummy", options_); - // Up to now, L0 contains s0, s1, s2 - ASSERT_EQ(3, NumTableFilesAtLevel(0)); - - SetupSyncPoints(compaction_path_to_test); - ResumeCompactionThread(); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_TRUE(SyncPointsCalled()) - << "failed for compaction path to test: " << compaction_path_to_test; - DisableSyncPoints(); - // After compaction, we have LSM tree: - // - // memtable: m1 [ k2:new@4, k1:new@3] - // L0: s3[k5:dummy@6, k4:dummy@5, k3:old@2, k1:old@1] - ASSERT_EQ(1, NumTableFilesAtLevel(0)) - << "failed for compaction path to test: " << compaction_path_to_test; - SequenceNumber compact_output_file_largest_seqno = - GetLatestL0FileLargestSeqnoHelper(); - - ASSERT_OK(Flush()) << "failed for compaction path to test: " - << compaction_path_to_test; - // After flush, we have LSM tree: - // - // L0: s4[k2:new@4, k1:new@3], s3[k5:dummy@6, k4:dummy@5, k3:old@2, - // k1:old@1] - ASSERT_EQ(2, NumTableFilesAtLevel(0)) - << "failed for compaction path to test: " << compaction_path_to_test; - SequenceNumber flushed_file_largest_seqno = - GetLatestL0FileLargestSeqnoHelper(); - - // To verify there isn't any file misorder leading to returning a old - // value of "k1" , which is caused by flushed table s4 has a - // smaller largest seqno than the compaction output file s3's largest seqno - // while the flushed table has the newer version of the value - // than the compaction output file's. - ASSERT_TRUE(flushed_file_largest_seqno < compact_output_file_largest_seqno) - << "failed for compaction path to test: " << compaction_path_to_test; - EXPECT_EQ(Get("k1"), "new") - << "failed for compaction path to test: " << compaction_path_to_test; - } - - Destroy(options_); -} - -class DBCompactionTestL0FilesMisorderCorruptionWithParam - : public DBCompactionTestL0FilesMisorderCorruption, - public testing::WithParamInterface { - public: - DBCompactionTestL0FilesMisorderCorruptionWithParam() - : DBCompactionTestL0FilesMisorderCorruption() {} -}; - -// TODO: add `CompactionStyle::kCompactionStyleLevel` to testing parameter, -// which requires careful unit test -// design for ingesting file to L0 and CompactRange()/CompactFile() to L0 -INSTANTIATE_TEST_CASE_P( - DBCompactionTestL0FilesMisorderCorruptionWithParam, - DBCompactionTestL0FilesMisorderCorruptionWithParam, - ::testing::Values(CompactionStyle::kCompactionStyleUniversal, - CompactionStyle::kCompactionStyleFIFO)); - -TEST_P(DBCompactionTestL0FilesMisorderCorruptionWithParam, - FlushAfterIntraL0CompactFileWithIngestedFile) { - SetupOptions(GetParam(), "CompactFile"); - DestroyAndReopen(options_); - - // To create below LSM tree - // (key:value@n indicates key-value pair has seqno "n", L0 is sorted): - // - // memtable: m1 [ k2:new@4, k1:new@3] - // L0: s2[k5:dummy@6], s1[k4:dummy@5], s0[k3:old@2, k1:old@1] - // - // (1) Create an existing SST file s0 - ASSERT_OK(Put("k1", "old")); - ASSERT_OK(Put("k3", "old")); - ASSERT_OK(Flush()); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - // (2) Create memtable m1. Noted that it contains a overlaped key with s0 - ASSERT_OK(Put("k1", "new")); // overlapped key - ASSERT_OK(Put("k2", "new")); - - // (3) Ingest two SST files s1, s2 - IngestOneKeyValue(dbfull(), "k4", "dummy", options_); - IngestOneKeyValue(dbfull(), "k5", "dummy", options_); - // Up to now, L0 contains s0, s1, s2 - ASSERT_EQ(3, NumTableFilesAtLevel(0)); - - ColumnFamilyMetaData cf_meta_data; - db_->GetColumnFamilyMetaData(&cf_meta_data); - ASSERT_EQ(cf_meta_data.levels[0].files.size(), 3); - std::vector input_files; - for (const auto& file : cf_meta_data.levels[0].files) { - input_files.push_back(file.name); - } - ASSERT_EQ(input_files.size(), 3); - - Status s = db_->CompactFiles(CompactionOptions(), input_files, 0); - // After compaction, we have LSM tree: - // - // memtable: m1 [ k2:new@4, k1:new@3] - // L0: s3[k5:dummy@6, k4:dummy@5, k3:old@2, k1:old@1] - ASSERT_OK(s); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - SequenceNumber compact_output_file_largest_seqno = - GetLatestL0FileLargestSeqnoHelper(); - - ASSERT_OK(Flush()); - // After flush, we have LSM tree: - // - // L0: s4[k2:new@4, k1:new@3], s3[k5:dummy@6, k4:dummy@5, k3:old@2, - // k1:old@1] - ASSERT_EQ(2, NumTableFilesAtLevel(0)); - SequenceNumber flushed_file_largest_seqno = - GetLatestL0FileLargestSeqnoHelper(); - - // To verify there isn't any file misorder leading to returning a old value - // of "1" , which is caused by flushed table s4 has a smaller - // largest seqno than the compaction output file s3's largest seqno while the - // flushed table has the newer version of the value than the - // compaction output file's. - ASSERT_TRUE(flushed_file_largest_seqno < compact_output_file_largest_seqno); - EXPECT_EQ(Get("k1"), "new"); - - Destroy(options_); -} - -TEST_P(DBCompactionTestL0FilesMisorderCorruptionWithParam, - FlushAfterIntraL0CompactRangeWithIngestedFile) { - SetupOptions(GetParam(), "CompactRange"); - DestroyAndReopen(options_); - - // To create below LSM tree - // (key:value@n indicates key-value pair has seqno "n", L0 is sorted): - // - // memtable: m1 [ k2:new@4, k1:new@3] - // L0: s2[k5:dummy@6], s1[k4:dummy@5], s0[k3:old@2, k1:old@1] - // - // (1) Create an existing SST file s0 - ASSERT_OK(Put("k1", "old")); - ASSERT_OK(Put("k3", "old")); - ASSERT_OK(Flush()); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - // (2) Create memtable m1. Noted that it contains a overlaped key with s0 - ASSERT_OK(Put("k1", "new")); // overlapped key - ASSERT_OK(Put("k2", "new")); - - // (3) Ingest two SST files s1, s2 - IngestOneKeyValue(dbfull(), "k4", "dummy", options_); - IngestOneKeyValue(dbfull(), "k5", "dummy", options_); - // Up to now, L0 contains s0, s1, s2 - ASSERT_EQ(3, NumTableFilesAtLevel(0)); - - if (options_.compaction_style == CompactionStyle::kCompactionStyleFIFO) { - SetupSyncPoints("CompactRange"); - } - // `start` and `end` is carefully chosen so that compact range: - // (1) doesn't overlap with memtable therefore the memtable won't be flushed - // (2) should target at compacting s0 with s1 and s2 - Slice start("k3"), end("k5"); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &start, &end)); - // After compaction, we have LSM tree: - // - // memtable: m1 [ k2:new@4, k1:new@3] - // L0: s3[k5:dummy@6, k4:dummy@5, k3:old@2, k1:old@1] - if (options_.compaction_style == CompactionStyle::kCompactionStyleFIFO) { - ASSERT_TRUE(SyncPointsCalled()); - DisableSyncPoints(); - } - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - SequenceNumber compact_output_file_largest_seqno = - GetLatestL0FileLargestSeqnoHelper(); - - ASSERT_OK(Flush()); - // After flush, we have LSM tree: - // - // L0: s4[k2:new@4, k1:new@3], s3[k5:dummy@6, k4:dummy@5, k3:old@2, - // k1:old@1] - ASSERT_EQ(2, NumTableFilesAtLevel(0)); - SequenceNumber flushed_file_largest_seqno = - GetLatestL0FileLargestSeqnoHelper(); - - // To verify there isn't any file misorder leading to returning a old value - // of "k1" , which is caused by flushed table s4 has a smaller - // largest seqno than the compaction output file s3's largest seqno while the - // flushed table has the newer version of the value than the - // compaction output file's. - ASSERT_TRUE(flushed_file_largest_seqno < compact_output_file_largest_seqno); - EXPECT_EQ(Get("k1"), "new"); - - Destroy(options_); -} - -TEST_P(DBCompactionTestWithBottommostParam, SequenceKeysManualCompaction) { - constexpr int kSstNum = 10; - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - DestroyAndReopen(options); - - // Generate some sst files on level 0 with sequence keys (no overlap) - for (int i = 0; i < kSstNum; i++) { - for (int j = 1; j < UCHAR_MAX; j++) { - auto key = std::string(kSstNum, '\0'); - key[kSstNum - i] += static_cast(j); - ASSERT_OK(Put(key, std::string(i % 1000, 'A'))); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - ASSERT_EQ(std::to_string(kSstNum), FilesPerLevel(0)); - - auto cro = CompactRangeOptions(); - cro.bottommost_level_compaction = bottommost_level_compaction_; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - if (bottommost_level_compaction_ == BottommostLevelCompaction::kForce || - bottommost_level_compaction_ == - BottommostLevelCompaction::kForceOptimized) { - // Real compaction to compact all sst files from level 0 to 1 file on level - // 1 - ASSERT_EQ("0,1", FilesPerLevel(0)); - } else { - // Just trivial move from level 0 -> 1 - ASSERT_EQ("0," + std::to_string(kSstNum), FilesPerLevel(0)); - } -} - -INSTANTIATE_TEST_CASE_P( - DBCompactionTestWithBottommostParam, DBCompactionTestWithBottommostParam, - ::testing::Values(BottommostLevelCompaction::kSkip, - BottommostLevelCompaction::kIfHaveCompactionFilter, - BottommostLevelCompaction::kForce, - BottommostLevelCompaction::kForceOptimized)); - -TEST_F(DBCompactionTest, UpdateLevelSubCompactionTest) { - Options options = CurrentOptions(); - options.max_subcompactions = 10; - options.target_file_size_base = 1 << 10; // 1KB - DestroyAndReopen(options); - - bool has_compaction = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - ASSERT_TRUE(compaction->max_subcompactions() == 10); - has_compaction = true; - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_TRUE(dbfull()->GetDBOptions().max_subcompactions == 10); - // Trigger compaction - for (int i = 0; i < 32; i++) { - for (int j = 0; j < 5000; j++) { - ASSERT_OK(Put(std::to_string(j), std::string(1, 'A'))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_TRUE(has_compaction); - - has_compaction = false; - ASSERT_OK(dbfull()->SetDBOptions({{"max_subcompactions", "2"}})); - ASSERT_TRUE(dbfull()->GetDBOptions().max_subcompactions == 2); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - ASSERT_TRUE(compaction->max_subcompactions() == 2); - has_compaction = true; - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Trigger compaction - for (int i = 0; i < 32; i++) { - for (int j = 0; j < 5000; j++) { - ASSERT_OK(Put(std::to_string(j), std::string(1, 'A'))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_TRUE(has_compaction); -} - -TEST_F(DBCompactionTest, UpdateUniversalSubCompactionTest) { - Options options = CurrentOptions(); - options.max_subcompactions = 10; - options.compaction_style = kCompactionStyleUniversal; - options.target_file_size_base = 1 << 10; // 1KB - DestroyAndReopen(options); - - bool has_compaction = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "UniversalCompactionBuilder::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - ASSERT_TRUE(compaction->max_subcompactions() == 10); - has_compaction = true; - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Trigger compaction - for (int i = 0; i < 32; i++) { - for (int j = 0; j < 5000; j++) { - ASSERT_OK(Put(std::to_string(j), std::string(1, 'A'))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_TRUE(has_compaction); - has_compaction = false; - - ASSERT_OK(dbfull()->SetDBOptions({{"max_subcompactions", "2"}})); - ASSERT_TRUE(dbfull()->GetDBOptions().max_subcompactions == 2); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "UniversalCompactionBuilder::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - ASSERT_TRUE(compaction->max_subcompactions() == 2); - has_compaction = true; - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Trigger compaction - for (int i = 0; i < 32; i++) { - for (int j = 0; j < 5000; j++) { - ASSERT_OK(Put(std::to_string(j), std::string(1, 'A'))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_TRUE(has_compaction); -} - -TEST_P(ChangeLevelConflictsWithAuto, TestConflict) { - // A `CompactRange()` may race with an automatic compaction, we'll need - // to make sure it doesn't corrupte the data. - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 2; - Reopen(options); - - ASSERT_OK(Put("foo", "v1")); - ASSERT_OK(Put("bar", "v1")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - { - CompactRangeOptions cro; - cro.change_level = true; - cro.target_level = 2; - ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); - } - ASSERT_EQ("0,0,1", FilesPerLevel(0)); - - // Run a qury to refitting to level 1 while another thread writing to - // the same level. - SyncPoint::GetInstance()->LoadDependency({ - // The first two dependencies ensure the foreground creates an L0 file - // between the background compaction's L0->L1 and its L1->L2. - { - "DBImpl::CompactRange:BeforeRefit:1", - "AutoCompactionFinished1", - }, - { - "AutoCompactionFinished2", - "DBImpl::CompactRange:BeforeRefit:2", - }, - }); - SyncPoint::GetInstance()->EnableProcessing(); - - std::thread auto_comp([&] { - TEST_SYNC_POINT("AutoCompactionFinished1"); - ASSERT_OK(Put("bar", "v2")); - ASSERT_OK(Put("foo", "v2")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("bar", "v3")); - ASSERT_OK(Put("foo", "v3")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - TEST_SYNC_POINT("AutoCompactionFinished2"); - }); - - { - CompactRangeOptions cro; - cro.change_level = true; - cro.target_level = GetParam() ? 1 : 0; - // This should return non-OK, but it's more important for the test to - // make sure that the DB is not corrupted. - ASSERT_NOK(dbfull()->CompactRange(cro, nullptr, nullptr)); - } - auto_comp.join(); - // Refitting didn't happen. - SyncPoint::GetInstance()->DisableProcessing(); - - // Write something to DB just make sure that consistency check didn't - // fail and make the DB readable. -} - -INSTANTIATE_TEST_CASE_P(ChangeLevelConflictsWithAuto, - ChangeLevelConflictsWithAuto, testing::Bool()); - -TEST_F(DBCompactionTest, ChangeLevelCompactRangeConflictsWithManual) { - // A `CompactRange()` with `change_level == true` needs to execute its final - // step, `ReFitLevel()`, in isolation. Previously there was a bug where - // refitting could target the same level as an ongoing manual compaction, - // leading to overlapping files in that level. - // - // This test ensures that case is not possible by verifying any manual - // compaction issued during the `ReFitLevel()` phase fails with - // `Status::Incomplete`. - Options options = CurrentOptions(); - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); - options.level0_file_num_compaction_trigger = 2; - options.num_levels = 3; - Reopen(options); - - // Setup an LSM with three levels populated. - Random rnd(301); - int key_idx = 0; - GenerateNewFile(&rnd, &key_idx); - { - CompactRangeOptions cro; - cro.change_level = true; - cro.target_level = 2; - ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); - } - ASSERT_EQ("0,0,2", FilesPerLevel(0)); - - GenerateNewFile(&rnd, &key_idx); - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1,1,2", FilesPerLevel(0)); - - // The background thread will refit L2->L1 while the - // foreground thread will try to simultaneously compact L0->L1. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - // The first two dependencies ensure the foreground creates an L0 file - // between the background compaction's L0->L1 and its L1->L2. - { - "DBImpl::RunManualCompaction()::1", - "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:" - "PutFG", - }, - { - "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:" - "FlushedFG", - "DBImpl::RunManualCompaction()::2", - }, - // The next two dependencies ensure the foreground invokes - // `CompactRange()` while the background is refitting. The - // foreground's `CompactRange()` is guaranteed to attempt an L0->L1 - // as we set it up with an empty memtable and a new L0 file. - { - "DBImpl::CompactRange:PreRefitLevel", - "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:" - "CompactFG", - }, - { - "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:" - "CompactedFG", - "DBImpl::CompactRange:PostRefitLevel", - }, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ROCKSDB_NAMESPACE::port::Thread refit_level_thread([&] { - CompactRangeOptions cro; - cro.change_level = true; - cro.target_level = 1; - ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); - }); - - TEST_SYNC_POINT( - "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:PutFG"); - // Make sure we have something new to compact in the foreground. - // Note key 1 is carefully chosen as it ensures the file we create here - // overlaps with one of the files being refitted L2->L1 in the background. - // If we chose key 0, the file created here would not overlap. - ASSERT_OK(Put(Key(1), "val")); - ASSERT_OK(Flush()); - TEST_SYNC_POINT( - "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:FlushedFG"); - - TEST_SYNC_POINT( - "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:CompactFG"); - ASSERT_TRUE(dbfull() - ->CompactRange(CompactRangeOptions(), nullptr, nullptr) - .IsIncomplete()); - TEST_SYNC_POINT( - "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:" - "CompactedFG"); - refit_level_thread.join(); -} - -TEST_F(DBCompactionTest, ChangeLevelErrorPathTest) { - // This test is added to ensure that RefitLevel() error paths are clearing - // internal flags and to test that subsequent valid RefitLevel() calls - // succeeds - Options options = CurrentOptions(); - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); - options.level0_file_num_compaction_trigger = 2; - options.num_levels = 3; - Reopen(options); - - ASSERT_EQ("", FilesPerLevel(0)); - - // Setup an LSM with three levels populated. - Random rnd(301); - int key_idx = 0; - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ("1", FilesPerLevel(0)); - { - CompactRangeOptions cro; - cro.change_level = true; - cro.target_level = 2; - ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); - } - ASSERT_EQ("0,0,2", FilesPerLevel(0)); - - auto start_idx = key_idx; - GenerateNewFile(&rnd, &key_idx); - GenerateNewFile(&rnd, &key_idx); - auto end_idx = key_idx - 1; - ASSERT_EQ("1,1,2", FilesPerLevel(0)); - - // Next two CompactRange() calls are used to test exercise error paths within - // RefitLevel() before triggering a valid RefitLevel() call - - // Trigger a refit to L1 first - { - std::string begin_string = Key(start_idx); - std::string end_string = Key(end_idx); - Slice begin(begin_string); - Slice end(end_string); - - CompactRangeOptions cro; - cro.change_level = true; - cro.target_level = 1; - ASSERT_OK(dbfull()->CompactRange(cro, &begin, &end)); - } - ASSERT_EQ("0,3,2", FilesPerLevel(0)); - - // Try a refit from L2->L1 - this should fail and exercise error paths in - // RefitLevel() - { - // Select key range that matches the bottom most level (L2) - std::string begin_string = Key(0); - std::string end_string = Key(start_idx - 1); - Slice begin(begin_string); - Slice end(end_string); - - CompactRangeOptions cro; - cro.change_level = true; - cro.target_level = 1; - ASSERT_NOK(dbfull()->CompactRange(cro, &begin, &end)); - } - ASSERT_EQ("0,3,2", FilesPerLevel(0)); - - // Try a valid Refit request to ensure, the path is still working - { - CompactRangeOptions cro; - cro.change_level = true; - cro.target_level = 1; - ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); - } - ASSERT_EQ("0,5", FilesPerLevel(0)); -} - -TEST_F(DBCompactionTest, CompactionWithBlob) { - Options options; - options.env = env_; - options.disable_auto_compactions = true; - - Reopen(options); - - constexpr char first_key[] = "first_key"; - constexpr char second_key[] = "second_key"; - constexpr char first_value[] = "first_value"; - constexpr char second_value[] = "second_value"; - constexpr char third_value[] = "third_value"; - - ASSERT_OK(Put(first_key, first_value)); - ASSERT_OK(Put(second_key, first_value)); - ASSERT_OK(Flush()); - - ASSERT_OK(Put(first_key, second_value)); - ASSERT_OK(Put(second_key, second_value)); - ASSERT_OK(Flush()); - - ASSERT_OK(Put(first_key, third_value)); - ASSERT_OK(Put(second_key, third_value)); - ASSERT_OK(Flush()); - - options.enable_blob_files = true; - - Reopen(options); - - constexpr Slice* begin = nullptr; - constexpr Slice* end = nullptr; - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end)); - - ASSERT_EQ(Get(first_key), third_value); - ASSERT_EQ(Get(second_key), third_value); - - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - ASSERT_NE(cfd, nullptr); - - Version* const current = cfd->current(); - ASSERT_NE(current, nullptr); - - const VersionStorageInfo* const storage_info = current->storage_info(); - ASSERT_NE(storage_info, nullptr); - - const auto& l1_files = storage_info->LevelFiles(1); - ASSERT_EQ(l1_files.size(), 1); - - const FileMetaData* const table_file = l1_files[0]; - ASSERT_NE(table_file, nullptr); - - const auto& blob_files = storage_info->GetBlobFiles(); - ASSERT_EQ(blob_files.size(), 1); - - const auto& blob_file = blob_files.front(); - ASSERT_NE(blob_file, nullptr); - - ASSERT_EQ(table_file->smallest.user_key(), first_key); - ASSERT_EQ(table_file->largest.user_key(), second_key); - ASSERT_EQ(table_file->oldest_blob_file_number, - blob_file->GetBlobFileNumber()); - - ASSERT_EQ(blob_file->GetTotalBlobCount(), 2); - - const InternalStats* const internal_stats = cfd->internal_stats(); - ASSERT_NE(internal_stats, nullptr); - - const auto& compaction_stats = internal_stats->TEST_GetCompactionStats(); - ASSERT_GE(compaction_stats.size(), 2); - ASSERT_EQ(compaction_stats[1].bytes_read_blob, 0); - ASSERT_EQ(compaction_stats[1].bytes_written, table_file->fd.GetFileSize()); - ASSERT_EQ(compaction_stats[1].bytes_written_blob, - blob_file->GetTotalBlobBytes()); - ASSERT_EQ(compaction_stats[1].num_output_files, 1); - ASSERT_EQ(compaction_stats[1].num_output_files_blob, 1); -} - -class DBCompactionTestBlobError - : public DBCompactionTest, - public testing::WithParamInterface { - public: - DBCompactionTestBlobError() : sync_point_(GetParam()) {} - - std::string sync_point_; -}; - -INSTANTIATE_TEST_CASE_P(DBCompactionTestBlobError, DBCompactionTestBlobError, - ::testing::ValuesIn(std::vector{ - "BlobFileBuilder::WriteBlobToFile:AddRecord", - "BlobFileBuilder::WriteBlobToFile:AppendFooter"})); - -TEST_P(DBCompactionTestBlobError, CompactionError) { - Options options; - options.disable_auto_compactions = true; - options.env = env_; - - Reopen(options); - - constexpr char first_key[] = "first_key"; - constexpr char second_key[] = "second_key"; - constexpr char first_value[] = "first_value"; - constexpr char second_value[] = "second_value"; - constexpr char third_value[] = "third_value"; - - ASSERT_OK(Put(first_key, first_value)); - ASSERT_OK(Put(second_key, first_value)); - ASSERT_OK(Flush()); - - ASSERT_OK(Put(first_key, second_value)); - ASSERT_OK(Put(second_key, second_value)); - ASSERT_OK(Flush()); - - ASSERT_OK(Put(first_key, third_value)); - ASSERT_OK(Put(second_key, third_value)); - ASSERT_OK(Flush()); - - options.enable_blob_files = true; - - Reopen(options); - - SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* arg) { - Status* const s = static_cast(arg); - assert(s); - - (*s) = Status::IOError(sync_point_); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - constexpr Slice* begin = nullptr; - constexpr Slice* end = nullptr; - - ASSERT_TRUE(db_->CompactRange(CompactRangeOptions(), begin, end).IsIOError()); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - ASSERT_NE(cfd, nullptr); - - Version* const current = cfd->current(); - ASSERT_NE(current, nullptr); - - const VersionStorageInfo* const storage_info = current->storage_info(); - ASSERT_NE(storage_info, nullptr); - - const auto& l1_files = storage_info->LevelFiles(1); - ASSERT_TRUE(l1_files.empty()); - - const auto& blob_files = storage_info->GetBlobFiles(); - ASSERT_TRUE(blob_files.empty()); - - const InternalStats* const internal_stats = cfd->internal_stats(); - ASSERT_NE(internal_stats, nullptr); - - const auto& compaction_stats = internal_stats->TEST_GetCompactionStats(); - ASSERT_GE(compaction_stats.size(), 2); - - if (sync_point_ == "BlobFileBuilder::WriteBlobToFile:AddRecord") { - ASSERT_EQ(compaction_stats[1].bytes_read_blob, 0); - ASSERT_EQ(compaction_stats[1].bytes_written, 0); - ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0); - ASSERT_EQ(compaction_stats[1].num_output_files, 0); - ASSERT_EQ(compaction_stats[1].num_output_files_blob, 0); - } else { - // SST file writing succeeded; blob file writing failed (during Finish) - ASSERT_EQ(compaction_stats[1].bytes_read_blob, 0); - ASSERT_GT(compaction_stats[1].bytes_written, 0); - ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0); - ASSERT_EQ(compaction_stats[1].num_output_files, 1); - ASSERT_EQ(compaction_stats[1].num_output_files_blob, 0); - } -} - -class DBCompactionTestBlobGC - : public DBCompactionTest, - public testing::WithParamInterface> { - public: - DBCompactionTestBlobGC() - : blob_gc_age_cutoff_(std::get<0>(GetParam())), - updated_enable_blob_files_(std::get<1>(GetParam())) {} - - double blob_gc_age_cutoff_; - bool updated_enable_blob_files_; -}; - -INSTANTIATE_TEST_CASE_P(DBCompactionTestBlobGC, DBCompactionTestBlobGC, - ::testing::Combine(::testing::Values(0.0, 0.5, 1.0), - ::testing::Bool())); - -TEST_P(DBCompactionTestBlobGC, CompactionWithBlobGCOverrides) { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.enable_blob_files = true; - options.blob_file_size = 32; // one blob per file - options.enable_blob_garbage_collection = true; - options.blob_garbage_collection_age_cutoff = 0; - - DestroyAndReopen(options); - - for (int i = 0; i < 128; i += 2) { - ASSERT_OK(Put("key" + std::to_string(i), "value" + std::to_string(i))); - ASSERT_OK( - Put("key" + std::to_string(i + 1), "value" + std::to_string(i + 1))); - ASSERT_OK(Flush()); - } - - std::vector original_blob_files = GetBlobFileNumbers(); - ASSERT_EQ(original_blob_files.size(), 128); - - // Note: turning off enable_blob_files before the compaction results in - // garbage collected values getting inlined. - ASSERT_OK(db_->SetOptions({{"enable_blob_files", "false"}})); - - CompactRangeOptions cro; - cro.blob_garbage_collection_policy = BlobGarbageCollectionPolicy::kForce; - cro.blob_garbage_collection_age_cutoff = blob_gc_age_cutoff_; - - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - - // Check that the GC stats are correct - { - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - assert(versions->GetColumnFamilySet()); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - assert(cfd); - - const InternalStats* const internal_stats = cfd->internal_stats(); - assert(internal_stats); - - const auto& compaction_stats = internal_stats->TEST_GetCompactionStats(); - ASSERT_GE(compaction_stats.size(), 2); - - ASSERT_GE(compaction_stats[1].bytes_read_blob, 0); - ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0); - } - - const size_t cutoff_index = static_cast( - cro.blob_garbage_collection_age_cutoff * original_blob_files.size()); - const size_t expected_num_files = original_blob_files.size() - cutoff_index; - - const std::vector new_blob_files = GetBlobFileNumbers(); - - ASSERT_EQ(new_blob_files.size(), expected_num_files); - - // Original blob files below the cutoff should be gone, original blob files - // at or above the cutoff should be still there - for (size_t i = cutoff_index; i < original_blob_files.size(); ++i) { - ASSERT_EQ(new_blob_files[i - cutoff_index], original_blob_files[i]); - } - - for (size_t i = 0; i < 128; ++i) { - ASSERT_EQ(Get("key" + std::to_string(i)), "value" + std::to_string(i)); - } -} - -TEST_P(DBCompactionTestBlobGC, CompactionWithBlobGC) { - Options options; - options.env = env_; - options.disable_auto_compactions = true; - options.enable_blob_files = true; - options.blob_file_size = 32; // one blob per file - options.enable_blob_garbage_collection = true; - options.blob_garbage_collection_age_cutoff = blob_gc_age_cutoff_; - - Reopen(options); - - constexpr char first_key[] = "first_key"; - constexpr char first_value[] = "first_value"; - constexpr char second_key[] = "second_key"; - constexpr char second_value[] = "second_value"; - - ASSERT_OK(Put(first_key, first_value)); - ASSERT_OK(Put(second_key, second_value)); - ASSERT_OK(Flush()); - - constexpr char third_key[] = "third_key"; - constexpr char third_value[] = "third_value"; - constexpr char fourth_key[] = "fourth_key"; - constexpr char fourth_value[] = "fourth_value"; - - ASSERT_OK(Put(third_key, third_value)); - ASSERT_OK(Put(fourth_key, fourth_value)); - ASSERT_OK(Flush()); - - const std::vector original_blob_files = GetBlobFileNumbers(); - - ASSERT_EQ(original_blob_files.size(), 4); - - const size_t cutoff_index = static_cast( - options.blob_garbage_collection_age_cutoff * original_blob_files.size()); - - // Note: turning off enable_blob_files before the compaction results in - // garbage collected values getting inlined. - size_t expected_number_of_files = original_blob_files.size(); - - if (!updated_enable_blob_files_) { - ASSERT_OK(db_->SetOptions({{"enable_blob_files", "false"}})); - - expected_number_of_files -= cutoff_index; - } - - constexpr Slice* begin = nullptr; - constexpr Slice* end = nullptr; - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end)); - - ASSERT_EQ(Get(first_key), first_value); - ASSERT_EQ(Get(second_key), second_value); - ASSERT_EQ(Get(third_key), third_value); - ASSERT_EQ(Get(fourth_key), fourth_value); - - const std::vector new_blob_files = GetBlobFileNumbers(); - - ASSERT_EQ(new_blob_files.size(), expected_number_of_files); - - // Original blob files below the cutoff should be gone, original blob files at - // or above the cutoff should be still there - for (size_t i = cutoff_index; i < original_blob_files.size(); ++i) { - ASSERT_EQ(new_blob_files[i - cutoff_index], original_blob_files[i]); - } - - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - assert(versions->GetColumnFamilySet()); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - assert(cfd); - - const InternalStats* const internal_stats = cfd->internal_stats(); - assert(internal_stats); - - const auto& compaction_stats = internal_stats->TEST_GetCompactionStats(); - ASSERT_GE(compaction_stats.size(), 2); - - if (blob_gc_age_cutoff_ > 0.0) { - ASSERT_GT(compaction_stats[1].bytes_read_blob, 0); - - if (updated_enable_blob_files_) { - // GC relocated some blobs to new blob files - ASSERT_GT(compaction_stats[1].bytes_written_blob, 0); - ASSERT_EQ(compaction_stats[1].bytes_read_blob, - compaction_stats[1].bytes_written_blob); - } else { - // GC moved some blobs back to the LSM, no new blob files - ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0); - } - } else { - ASSERT_EQ(compaction_stats[1].bytes_read_blob, 0); - ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0); - } -} - -TEST_F(DBCompactionTest, CompactionWithBlobGCError_CorruptIndex) { - Options options; - options.env = env_; - options.disable_auto_compactions = true; - options.enable_blob_files = true; - options.enable_blob_garbage_collection = true; - options.blob_garbage_collection_age_cutoff = 1.0; - - Reopen(options); - - constexpr char first_key[] = "first_key"; - constexpr char first_value[] = "first_value"; - ASSERT_OK(Put(first_key, first_value)); - - constexpr char second_key[] = "second_key"; - constexpr char second_value[] = "second_value"; - ASSERT_OK(Put(second_key, second_value)); - - ASSERT_OK(Flush()); - - constexpr char third_key[] = "third_key"; - constexpr char third_value[] = "third_value"; - ASSERT_OK(Put(third_key, third_value)); - - constexpr char fourth_key[] = "fourth_key"; - constexpr char fourth_value[] = "fourth_value"; - ASSERT_OK(Put(fourth_key, fourth_value)); - - ASSERT_OK(Flush()); - - SyncPoint::GetInstance()->SetCallBack( - "CompactionIterator::GarbageCollectBlobIfNeeded::TamperWithBlobIndex", - [](void* arg) { - Slice* const blob_index = static_cast(arg); - assert(blob_index); - assert(!blob_index->empty()); - blob_index->remove_prefix(1); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - constexpr Slice* begin = nullptr; - constexpr Slice* end = nullptr; - - ASSERT_TRUE( - db_->CompactRange(CompactRangeOptions(), begin, end).IsCorruption()); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_F(DBCompactionTest, CompactionWithBlobGCError_InlinedTTLIndex) { - constexpr uint64_t min_blob_size = 10; - - Options options; - options.env = env_; - options.disable_auto_compactions = true; - options.enable_blob_files = true; - options.min_blob_size = min_blob_size; - options.enable_blob_garbage_collection = true; - options.blob_garbage_collection_age_cutoff = 1.0; - - Reopen(options); - - constexpr char first_key[] = "first_key"; - constexpr char first_value[] = "first_value"; - ASSERT_OK(Put(first_key, first_value)); - - constexpr char second_key[] = "second_key"; - constexpr char second_value[] = "second_value"; - ASSERT_OK(Put(second_key, second_value)); - - ASSERT_OK(Flush()); - - constexpr char third_key[] = "third_key"; - constexpr char third_value[] = "third_value"; - ASSERT_OK(Put(third_key, third_value)); - - constexpr char fourth_key[] = "fourth_key"; - constexpr char blob[] = "short"; - static_assert(sizeof(short) - 1 < min_blob_size, - "Blob too long to be inlined"); - - // Fake an inlined TTL blob index. - std::string blob_index; - - constexpr uint64_t expiration = 1234567890; - - BlobIndex::EncodeInlinedTTL(&blob_index, expiration, blob); - - WriteBatch batch; - ASSERT_OK( - WriteBatchInternal::PutBlobIndex(&batch, 0, fourth_key, blob_index)); - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - - ASSERT_OK(Flush()); - - constexpr Slice* begin = nullptr; - constexpr Slice* end = nullptr; - - ASSERT_TRUE( - db_->CompactRange(CompactRangeOptions(), begin, end).IsCorruption()); -} - -TEST_F(DBCompactionTest, CompactionWithBlobGCError_IndexWithInvalidFileNumber) { - Options options; - options.env = env_; - options.disable_auto_compactions = true; - options.enable_blob_files = true; - options.enable_blob_garbage_collection = true; - options.blob_garbage_collection_age_cutoff = 1.0; - - Reopen(options); - - constexpr char first_key[] = "first_key"; - constexpr char first_value[] = "first_value"; - ASSERT_OK(Put(first_key, first_value)); - - constexpr char second_key[] = "second_key"; - constexpr char second_value[] = "second_value"; - ASSERT_OK(Put(second_key, second_value)); - - ASSERT_OK(Flush()); - - constexpr char third_key[] = "third_key"; - constexpr char third_value[] = "third_value"; - ASSERT_OK(Put(third_key, third_value)); - - constexpr char fourth_key[] = "fourth_key"; - - // Fake a blob index referencing a non-existent blob file. - std::string blob_index; - - constexpr uint64_t blob_file_number = 1000; - constexpr uint64_t offset = 1234; - constexpr uint64_t size = 5678; - - BlobIndex::EncodeBlob(&blob_index, blob_file_number, offset, size, - kNoCompression); - - WriteBatch batch; - ASSERT_OK( - WriteBatchInternal::PutBlobIndex(&batch, 0, fourth_key, blob_index)); - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - - ASSERT_OK(Flush()); - - constexpr Slice* begin = nullptr; - constexpr Slice* end = nullptr; - - ASSERT_TRUE( - db_->CompactRange(CompactRangeOptions(), begin, end).IsCorruption()); -} - -TEST_F(DBCompactionTest, CompactionWithChecksumHandoff1) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - std::shared_ptr fault_fs( - new FaultInjectionTestFS(FileSystem::Default())); - std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 2; - options.num_levels = 3; - options.env = fault_fs_env.get(); - options.create_if_missing = true; - options.checksum_handoff_file_types.Add(FileType::kTableFile); - Status s; - Reopen(options); - - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - ASSERT_OK(Put(Key(0), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - ASSERT_OK(Put(Key(1), "value3")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s, Status::OK()); - Destroy(options); - Reopen(options); - - // The hash does not match, compaction write fails - // fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); - // Since the file system returns IOStatus::Corruption, it is an - // unrecoverable error. - ASSERT_OK(Put(Key(0), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", [&](void*) { - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Put(Key(1), "value3")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s.severity(), - ROCKSDB_NAMESPACE::Status::Severity::kUnrecoverableError); - SyncPoint::GetInstance()->DisableProcessing(); - Destroy(options); - Reopen(options); - - // The file system does not support checksum handoff. The check - // will be ignored. - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum); - ASSERT_OK(Put(Key(0), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - ASSERT_OK(Put(Key(1), "value3")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s, Status::OK()); - - // Each write will be similated as corrupted. - // Since the file system returns IOStatus::Corruption, it is an - // unrecoverable error. - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - ASSERT_OK(Put(Key(0), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", - [&](void*) { fault_fs->IngestDataCorruptionBeforeWrite(); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Put(Key(1), "value3")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s.severity(), - ROCKSDB_NAMESPACE::Status::Severity::kUnrecoverableError); - SyncPoint::GetInstance()->DisableProcessing(); - - Destroy(options); -} - -TEST_F(DBCompactionTest, CompactionWithChecksumHandoff2) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - std::shared_ptr fault_fs( - new FaultInjectionTestFS(FileSystem::Default())); - std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 2; - options.num_levels = 3; - options.env = fault_fs_env.get(); - options.create_if_missing = true; - Status s; - Reopen(options); - - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - ASSERT_OK(Put(Key(0), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - ASSERT_OK(Put(Key(1), "value3")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s, Status::OK()); - Destroy(options); - Reopen(options); - - // options is not set, the checksum handoff will not be triggered - ASSERT_OK(Put(Key(0), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", [&](void*) { - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Put(Key(1), "value3")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s, Status::OK()); - SyncPoint::GetInstance()->DisableProcessing(); - Destroy(options); - Reopen(options); - - // The file system does not support checksum handoff. The check - // will be ignored. - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum); - ASSERT_OK(Put(Key(0), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - ASSERT_OK(Put(Key(1), "value3")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s, Status::OK()); - - // options is not set, the checksum handoff will not be triggered - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - ASSERT_OK(Put(Key(0), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", - [&](void*) { fault_fs->IngestDataCorruptionBeforeWrite(); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Put(Key(1), "value3")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s, Status::OK()); - - Destroy(options); -} - -TEST_F(DBCompactionTest, CompactionWithChecksumHandoffManifest1) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - std::shared_ptr fault_fs( - new FaultInjectionTestFS(FileSystem::Default())); - std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 2; - options.num_levels = 3; - options.env = fault_fs_env.get(); - options.create_if_missing = true; - options.checksum_handoff_file_types.Add(FileType::kDescriptorFile); - Status s; - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - Reopen(options); - - ASSERT_OK(Put(Key(0), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - ASSERT_OK(Put(Key(1), "value3")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s, Status::OK()); - Destroy(options); - Reopen(options); - - // The hash does not match, compaction write fails - // fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); - // Since the file system returns IOStatus::Corruption, it is mapped to - // kFatalError error. - ASSERT_OK(Put(Key(0), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", [&](void*) { - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Put(Key(1), "value3")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kFatalError); - SyncPoint::GetInstance()->DisableProcessing(); - Destroy(options); -} - -TEST_F(DBCompactionTest, CompactionWithChecksumHandoffManifest2) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - std::shared_ptr fault_fs( - new FaultInjectionTestFS(FileSystem::Default())); - std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 2; - options.num_levels = 3; - options.env = fault_fs_env.get(); - options.create_if_missing = true; - options.checksum_handoff_file_types.Add(FileType::kDescriptorFile); - Status s; - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum); - Reopen(options); - - // The file system does not support checksum handoff. The check - // will be ignored. - ASSERT_OK(Put(Key(0), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - ASSERT_OK(Put(Key(1), "value3")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s, Status::OK()); - - // Each write will be similated as corrupted. - // Since the file system returns IOStatus::Corruption, it is mapped to - // kFatalError error. - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - ASSERT_OK(Put(Key(0), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", - [&](void*) { fault_fs->IngestDataCorruptionBeforeWrite(); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Put(Key(1), "value3")); - s = Flush(); - ASSERT_EQ(s, Status::OK()); - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kFatalError); - SyncPoint::GetInstance()->DisableProcessing(); - - Destroy(options); -} - -TEST_F(DBCompactionTest, FIFOWarm) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleFIFO; - options.num_levels = 1; - options.max_open_files = -1; - options.level0_file_num_compaction_trigger = 2; - options.create_if_missing = true; - CompactionOptionsFIFO fifo_options; - fifo_options.age_for_warm = 1000; - fifo_options.max_table_files_size = 100000000; - options.compaction_options_fifo = fifo_options; - env_->SetMockSleep(); - Reopen(options); - - int total_warm = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "NewWritableFile::FileOptions.temperature", [&](void* arg) { - Temperature temperature = *(static_cast(arg)); - if (temperature == Temperature::kWarm) { - total_warm++; - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // The file system does not support checksum handoff. The check - // will be ignored. - ASSERT_OK(Put(Key(0), "value1")); - env_->MockSleepForSeconds(800); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put(Key(0), "value1")); - env_->MockSleepForSeconds(800); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put(Key(0), "value1")); - env_->MockSleepForSeconds(800); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_OK(Put(Key(0), "value1")); - env_->MockSleepForSeconds(800); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - ColumnFamilyMetaData metadata; - db_->GetColumnFamilyMetaData(&metadata); - ASSERT_EQ(4, metadata.file_count); - ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature); - ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[1].temperature); - ASSERT_EQ(Temperature::kWarm, metadata.levels[0].files[2].temperature); - ASSERT_EQ(Temperature::kWarm, metadata.levels[0].files[3].temperature); - ASSERT_EQ(2, total_warm); - - Destroy(options); -} - -TEST_F(DBCompactionTest, DisableMultiManualCompaction) { - const int kNumL0Files = 10; - - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = kNumL0Files; - Reopen(options); - - // Generate 2 levels of file to make sure the manual compaction is not skipped - for (int i = 0; i < 10; i++) { - ASSERT_OK(Put(Key(i), "value")); - if (i % 2) { - ASSERT_OK(Flush()); - } - } - MoveFilesToLevel(2); - - for (int i = 0; i < 10; i++) { - ASSERT_OK(Put(Key(i), "value")); - if (i % 2) { - ASSERT_OK(Flush()); - } - } - MoveFilesToLevel(1); - - // Block compaction queue - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - - port::Thread compact_thread1([&]() { - CompactRangeOptions cro; - cro.exclusive_manual_compaction = false; - std::string begin_str = Key(0); - std::string end_str = Key(3); - Slice b = begin_str; - Slice e = end_str; - auto s = db_->CompactRange(cro, &b, &e); - ASSERT_TRUE(s.IsIncomplete()); - }); - - port::Thread compact_thread2([&]() { - CompactRangeOptions cro; - cro.exclusive_manual_compaction = false; - std::string begin_str = Key(4); - std::string end_str = Key(7); - Slice b = begin_str; - Slice e = end_str; - auto s = db_->CompactRange(cro, &b, &e); - ASSERT_TRUE(s.IsIncomplete()); - }); - - // Disable manual compaction should cancel both manual compactions and both - // compaction should return incomplete. - db_->DisableManualCompaction(); - - compact_thread1.join(); - compact_thread2.join(); - - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); - ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); -} - -TEST_F(DBCompactionTest, DisableJustStartedManualCompaction) { - const int kNumL0Files = 4; - - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = kNumL0Files; - Reopen(options); - - // generate files, but avoid trigger auto compaction - for (int i = 0; i < kNumL0Files / 2; i++) { - ASSERT_OK(Put(Key(1), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); - } - - // make sure the manual compaction background is started but not yet set the - // status to in_progress, then cancel the manual compaction, which should not - // result in segfault - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::BGWorkCompaction", - "DBCompactionTest::DisableJustStartedManualCompaction:" - "PreDisableManualCompaction"}, - {"DBImpl::RunManualCompaction:Unscheduled", - "BackgroundCallCompaction:0"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - port::Thread compact_thread([&]() { - CompactRangeOptions cro; - cro.exclusive_manual_compaction = true; - auto s = db_->CompactRange(cro, nullptr, nullptr); - ASSERT_TRUE(s.IsIncomplete()); - }); - TEST_SYNC_POINT( - "DBCompactionTest::DisableJustStartedManualCompaction:" - "PreDisableManualCompaction"); - db_->DisableManualCompaction(); - - compact_thread.join(); -} - -TEST_F(DBCompactionTest, DisableInProgressManualCompaction) { - const int kNumL0Files = 4; - - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = kNumL0Files; - Reopen(options); - - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::BackgroundCompaction:InProgress", - "DBCompactionTest::DisableInProgressManualCompaction:" - "PreDisableManualCompaction"}, - {"DBImpl::RunManualCompaction:Unscheduled", - "CompactionJob::Run():Start"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - // generate files, but avoid trigger auto compaction - for (int i = 0; i < kNumL0Files / 2; i++) { - ASSERT_OK(Put(Key(1), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); - } - - port::Thread compact_thread([&]() { - CompactRangeOptions cro; - cro.exclusive_manual_compaction = true; - auto s = db_->CompactRange(cro, nullptr, nullptr); - ASSERT_TRUE(s.IsIncomplete()); - }); - - TEST_SYNC_POINT( - "DBCompactionTest::DisableInProgressManualCompaction:" - "PreDisableManualCompaction"); - db_->DisableManualCompaction(); - - compact_thread.join(); -} - -TEST_F(DBCompactionTest, DisableManualCompactionThreadQueueFull) { - const int kNumL0Files = 4; - - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::RunManualCompaction:Scheduled", - "DBCompactionTest::DisableManualCompactionThreadQueueFull:" - "PreDisableManualCompaction"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = kNumL0Files; - Reopen(options); - - // Block compaction queue - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - - // generate files, but avoid trigger auto compaction - for (int i = 0; i < kNumL0Files / 2; i++) { - ASSERT_OK(Put(Key(1), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); - } - - port::Thread compact_thread([&]() { - CompactRangeOptions cro; - cro.exclusive_manual_compaction = true; - auto s = db_->CompactRange(cro, nullptr, nullptr); - ASSERT_TRUE(s.IsIncomplete()); - }); - - TEST_SYNC_POINT( - "DBCompactionTest::DisableManualCompactionThreadQueueFull:" - "PreDisableManualCompaction"); - - // Generate more files to trigger auto compaction which is scheduled after - // manual compaction. Has to generate 4 more files because existing files are - // pending compaction - for (int i = 0; i < kNumL0Files; i++) { - ASSERT_OK(Put(Key(1), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); - } - ASSERT_EQ(std::to_string(kNumL0Files + (kNumL0Files / 2)), FilesPerLevel(0)); - - db_->DisableManualCompaction(); - - // CompactRange should return before the compaction has the chance to run - compact_thread.join(); - - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); - ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); - ASSERT_EQ("0,1", FilesPerLevel(0)); -} - -TEST_F(DBCompactionTest, DisableManualCompactionThreadQueueFullDBClose) { - const int kNumL0Files = 4; - - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::RunManualCompaction:Scheduled", - "DBCompactionTest::DisableManualCompactionThreadQueueFullDBClose:" - "PreDisableManualCompaction"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = kNumL0Files; - Reopen(options); - - // Block compaction queue - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - - // generate files, but avoid trigger auto compaction - for (int i = 0; i < kNumL0Files / 2; i++) { - ASSERT_OK(Put(Key(1), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); - } - - port::Thread compact_thread([&]() { - CompactRangeOptions cro; - cro.exclusive_manual_compaction = true; - auto s = db_->CompactRange(cro, nullptr, nullptr); - ASSERT_TRUE(s.IsIncomplete()); - }); - - TEST_SYNC_POINT( - "DBCompactionTest::DisableManualCompactionThreadQueueFullDBClose:" - "PreDisableManualCompaction"); - - // Generate more files to trigger auto compaction which is scheduled after - // manual compaction. Has to generate 4 more files because existing files are - // pending compaction - for (int i = 0; i < kNumL0Files; i++) { - ASSERT_OK(Put(Key(1), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); - } - ASSERT_EQ(std::to_string(kNumL0Files + (kNumL0Files / 2)), FilesPerLevel(0)); - - db_->DisableManualCompaction(); - - // CompactRange should return before the compaction has the chance to run - compact_thread.join(); - - // Try close DB while manual compaction is canceled but still in the queue. - // And an auto-triggered compaction is also in the queue. - auto s = db_->Close(); - ASSERT_OK(s); - - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); -} - -TEST_F(DBCompactionTest, DBCloseWithManualCompaction) { - const int kNumL0Files = 4; - - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::RunManualCompaction:Scheduled", - "DBCompactionTest::DisableManualCompactionThreadQueueFullDBClose:" - "PreDisableManualCompaction"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = kNumL0Files; - Reopen(options); - - // Block compaction queue - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - - // generate files, but avoid trigger auto compaction - for (int i = 0; i < kNumL0Files / 2; i++) { - ASSERT_OK(Put(Key(1), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); - } - - port::Thread compact_thread([&]() { - CompactRangeOptions cro; - cro.exclusive_manual_compaction = true; - auto s = db_->CompactRange(cro, nullptr, nullptr); - ASSERT_TRUE(s.IsIncomplete()); - }); - - TEST_SYNC_POINT( - "DBCompactionTest::DisableManualCompactionThreadQueueFullDBClose:" - "PreDisableManualCompaction"); - - // Generate more files to trigger auto compaction which is scheduled after - // manual compaction. Has to generate 4 more files because existing files are - // pending compaction - for (int i = 0; i < kNumL0Files; i++) { - ASSERT_OK(Put(Key(1), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); - } - ASSERT_EQ(std::to_string(kNumL0Files + (kNumL0Files / 2)), FilesPerLevel(0)); - - // Close DB with manual compaction and auto triggered compaction in the queue. - auto s = db_->Close(); - ASSERT_OK(s); - - // manual compaction thread should return with Incomplete(). - compact_thread.join(); - - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); -} - -TEST_F(DBCompactionTest, - DisableManualCompactionDoesNotWaitForDrainingAutomaticCompaction) { - // When `CompactRangeOptions::exclusive_manual_compaction == true`, we wait - // for automatic compactions to drain before starting the manual compaction. - // This test verifies `DisableManualCompaction()` can cancel such a compaction - // without waiting for the drain to complete. - const int kNumL0Files = 4; - - // Enforces manual compaction enters wait loop due to pending automatic - // compaction. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::BGWorkCompaction", "DBImpl::RunManualCompaction:NotScheduled"}, - {"DBImpl::RunManualCompaction:WaitScheduled", - "BackgroundCallCompaction:0"}}); - // The automatic compaction will cancel the waiting manual compaction. - // Completing this implies the cancellation did not wait on automatic - // compactions to finish. - bool callback_completed = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", [&](void* /*arg*/) { - db_->DisableManualCompaction(); - callback_completed = true; - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = kNumL0Files; - Reopen(options); - - for (int i = 0; i < kNumL0Files; ++i) { - ASSERT_OK(Put(Key(1), "value1")); - ASSERT_OK(Put(Key(2), "value2")); - ASSERT_OK(Flush()); - } - - CompactRangeOptions cro; - cro.exclusive_manual_compaction = true; - ASSERT_TRUE(db_->CompactRange(cro, nullptr, nullptr).IsIncomplete()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_TRUE(callback_completed); -} - -TEST_F(DBCompactionTest, ChangeLevelConflictsWithManual) { - Options options = CurrentOptions(); - options.num_levels = 3; - Reopen(options); - - // Setup an LSM with L2 populated. - Random rnd(301); - ASSERT_OK(Put(Key(0), rnd.RandomString(990))); - ASSERT_OK(Put(Key(1), rnd.RandomString(990))); - { - CompactRangeOptions cro; - cro.change_level = true; - cro.target_level = 2; - ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); - } - ASSERT_EQ("0,0,1", FilesPerLevel(0)); - - // The background thread will refit L2->L1 while the foreground thread will - // attempt to run a compaction on new data. The following dependencies - // ensure the background manual compaction's refitting phase disables manual - // compaction immediately before the foreground manual compaction can register - // itself. Manual compaction is kept disabled until the foreground manual - // checks for the failure once. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - // Only do Put()s for foreground CompactRange() once the background - // CompactRange() has reached the refitting phase. - { - "DBImpl::CompactRange:BeforeRefit:1", - "DBCompactionTest::ChangeLevelConflictsWithManual:" - "PreForegroundCompactRange", - }, - // Right before we register the manual compaction, proceed with - // the refitting phase so manual compactions are disabled. Stay in - // the refitting phase with manual compactions disabled until it is - // noticed. - { - "DBImpl::RunManualCompaction:0", - "DBImpl::CompactRange:BeforeRefit:2", - }, - { - "DBImpl::CompactRange:PreRefitLevel", - "DBImpl::RunManualCompaction:1", - }, - { - "DBImpl::RunManualCompaction:PausedAtStart", - "DBImpl::CompactRange:PostRefitLevel", - }, - // If compaction somehow were scheduled, let's let it run after reenabling - // manual compactions. This dependency is not expected to be hit but is - // here for speculatively coercing future bugs. - { - "DBImpl::CompactRange:PostRefitLevel:ManualCompactionEnabled", - "BackgroundCallCompaction:0", - }, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ROCKSDB_NAMESPACE::port::Thread refit_level_thread([&] { - CompactRangeOptions cro; - cro.change_level = true; - cro.target_level = 1; - ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); - }); - - TEST_SYNC_POINT( - "DBCompactionTest::ChangeLevelConflictsWithManual:" - "PreForegroundCompactRange"); - ASSERT_OK(Put(Key(0), rnd.RandomString(990))); - ASSERT_OK(Put(Key(1), rnd.RandomString(990))); - ASSERT_TRUE(dbfull() - ->CompactRange(CompactRangeOptions(), nullptr, nullptr) - .IsIncomplete()); - - refit_level_thread.join(); -} - -TEST_F(DBCompactionTest, BottomPriCompactionCountsTowardConcurrencyLimit) { - // Flushes several files to trigger compaction while lock is released during - // a bottom-pri compaction. Verifies it does not get scheduled to thread pool - // because per-DB limit for compaction parallelism is one (default). - const int kNumL0Files = 4; - const int kNumLevels = 3; - - env_->SetBackgroundThreads(1, Env::Priority::BOTTOM); - - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = kNumL0Files; - options.num_levels = kNumLevels; - DestroyAndReopen(options); - - // Setup last level to be non-empty since it's a bit unclear whether - // compaction to an empty level would be considered "bottommost". - ASSERT_OK(Put(Key(0), "val")); - ASSERT_OK(Flush()); - MoveFilesToLevel(kNumLevels - 1); - - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::BGWorkBottomCompaction", - "DBCompactionTest::BottomPriCompactionCountsTowardConcurrencyLimit:" - "PreTriggerCompaction"}, - {"DBCompactionTest::BottomPriCompactionCountsTowardConcurrencyLimit:" - "PostTriggerCompaction", - "BackgroundCallCompaction:0"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - port::Thread compact_range_thread([&] { - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - cro.exclusive_manual_compaction = false; - ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); - }); - - // Sleep in the low-pri thread so any newly scheduled compaction will be - // queued. Otherwise it might finish before we check its existence. - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - - TEST_SYNC_POINT( - "DBCompactionTest::BottomPriCompactionCountsTowardConcurrencyLimit:" - "PreTriggerCompaction"); - for (int i = 0; i < kNumL0Files; ++i) { - ASSERT_OK(Put(Key(0), "val")); - ASSERT_OK(Flush()); - } - ASSERT_EQ(0u, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); - TEST_SYNC_POINT( - "DBCompactionTest::BottomPriCompactionCountsTowardConcurrencyLimit:" - "PostTriggerCompaction"); - - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); - compact_range_thread.join(); -} - -TEST_F(DBCompactionTest, BottommostFileCompactionAllowIngestBehind) { - // allow_ingest_behind prevents seqnum zeroing, and could cause - // compaction loop with reason kBottommostFiles. - Options options = CurrentOptions(); - options.env = env_; - options.compaction_style = kCompactionStyleLevel; - options.allow_ingest_behind = true; - options.comparator = BytewiseComparator(); - DestroyAndReopen(options); - - WriteOptions write_opts; - ASSERT_OK(db_->Put(write_opts, "infinite", "compaction loop")); - ASSERT_OK(db_->Put(write_opts, "infinite", "loop")); - - ASSERT_OK(Flush()); - MoveFilesToLevel(1); - ASSERT_OK(db_->Put(write_opts, "bumpseqnum", "")); - ASSERT_OK(Flush()); - auto snapshot = db_->GetSnapshot(); - // Bump up oldest_snapshot_seqnum_ in VersionStorageInfo. - db_->ReleaseSnapshot(snapshot); - bool compacted = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* /* arg */) { - // There should not be a compaction. - compacted = true; - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - // Wait for compaction to be scheduled. - env_->SleepForMicroseconds(2000000); - ASSERT_FALSE(compacted); - // The following assert can be used to check for compaction loop: - // it used to wait forever before the fix. - // ASSERT_OK(dbfull()->TEST_WaitForCompact(true /* wait_unscheduled */)); -} - - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_dynamic_level_test.cc b/db/db_dynamic_level_test.cc deleted file mode 100644 index a1c2fa943..000000000 --- a/db/db_dynamic_level_test.cc +++ /dev/null @@ -1,499 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -// Introduction of SyncPoint effectively disabled building and running this test -// in Release build. -// which is a pity, it is a good test - -#include "db/db_test_util.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/env.h" -#include "util/random.h" - -namespace ROCKSDB_NAMESPACE { -class DBTestDynamicLevel : public DBTestBase { - public: - DBTestDynamicLevel() - : DBTestBase("db_dynamic_level_test", /*env_do_fsync=*/true) {} -}; - -TEST_F(DBTestDynamicLevel, DynamicLevelMaxBytesBase) { - if (!Snappy_Supported() || !LZ4_Supported()) { - return; - } - // Use InMemoryEnv, or it would be too slow. - std::unique_ptr env(NewMemEnv(env_)); - - const int kNKeys = 1000; - int keys[kNKeys]; - - auto verify_func = [&]() { - for (int i = 0; i < kNKeys; i++) { - ASSERT_NE("NOT_FOUND", Get(Key(i))); - ASSERT_NE("NOT_FOUND", Get(Key(kNKeys * 2 + i))); - if (i < kNKeys / 10) { - ASSERT_EQ("NOT_FOUND", Get(Key(kNKeys + keys[i]))); - } else { - ASSERT_NE("NOT_FOUND", Get(Key(kNKeys + keys[i]))); - } - } - }; - - Random rnd(301); - for (int ordered_insert = 0; ordered_insert <= 1; ordered_insert++) { - for (int i = 0; i < kNKeys; i++) { - keys[i] = i; - } - if (ordered_insert == 0) { - RandomShuffle(std::begin(keys), std::end(keys), rnd.Next()); - } - for (int max_background_compactions = 1; max_background_compactions < 4; - max_background_compactions += 2) { - Options options; - options.env = env.get(); - options.create_if_missing = true; - options.write_buffer_size = 2048; - options.max_write_buffer_number = 2; - options.level0_file_num_compaction_trigger = 2; - options.level0_slowdown_writes_trigger = 2; - options.level0_stop_writes_trigger = 2; - options.target_file_size_base = 2048; - options.level_compaction_dynamic_level_bytes = true; - options.max_bytes_for_level_base = 10240; - options.max_bytes_for_level_multiplier = 4; - options.max_background_compactions = max_background_compactions; - options.num_levels = 5; - - options.compression_per_level.resize(3); - options.compression_per_level[0] = kNoCompression; - options.compression_per_level[1] = kLZ4Compression; - options.compression_per_level[2] = kSnappyCompression; - options.env = env_; - - DestroyAndReopen(options); - - for (int i = 0; i < kNKeys; i++) { - int key = keys[i]; - ASSERT_OK(Put(Key(kNKeys + key), rnd.RandomString(102))); - ASSERT_OK(Put(Key(key), rnd.RandomString(102))); - ASSERT_OK(Put(Key(kNKeys * 2 + key), rnd.RandomString(102))); - ASSERT_OK(Delete(Key(kNKeys + keys[i / 10]))); - env_->SleepForMicroseconds(5000); - } - - uint64_t int_prop; - ASSERT_TRUE(db_->GetIntProperty("rocksdb.background-errors", &int_prop)); - ASSERT_EQ(0U, int_prop); - - // Verify DB - for (int j = 0; j < 2; j++) { - verify_func(); - if (j == 0) { - Reopen(options); - } - } - - // Test compact range works - ASSERT_OK( - dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - // All data should be in the last level. - ColumnFamilyMetaData cf_meta; - db_->GetColumnFamilyMetaData(&cf_meta); - ASSERT_EQ(5U, cf_meta.levels.size()); - for (int i = 0; i < 4; i++) { - ASSERT_EQ(0U, cf_meta.levels[i].files.size()); - } - ASSERT_GT(cf_meta.levels[4U].files.size(), 0U); - verify_func(); - - Close(); - } - } - - env_->SetBackgroundThreads(1, Env::LOW); - env_->SetBackgroundThreads(1, Env::HIGH); -} - -// Test specific cases in dynamic max bytes -TEST_F(DBTestDynamicLevel, DynamicLevelMaxBytesBase2) { - Random rnd(301); - int kMaxKey = 1000000; - - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.create_if_missing = true; - options.write_buffer_size = 20480; - options.max_write_buffer_number = 2; - options.level0_file_num_compaction_trigger = 2; - options.level0_slowdown_writes_trigger = 9999; - options.level0_stop_writes_trigger = 9999; - options.target_file_size_base = 9102; - options.level_compaction_dynamic_level_bytes = true; - options.max_bytes_for_level_base = 40960; - options.max_bytes_for_level_multiplier = 4; - options.max_background_compactions = 2; - options.num_levels = 5; - options.max_compaction_bytes = 0; // Force not expanding in compactions - options.db_host_id = ""; // Setting this messes up the file size calculation - BlockBasedTableOptions table_options; - table_options.block_size = 1024; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - DestroyAndReopen(options); - ASSERT_OK(dbfull()->SetOptions({ - {"disable_auto_compactions", "true"}, - })); - - uint64_t int_prop; - std::string str_prop; - - // Initial base level is the last level - ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); - ASSERT_EQ(4U, int_prop); - - // Put about 28K to L0 - for (int i = 0; i < 70; i++) { - ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), - rnd.RandomString(380))); - } - ASSERT_OK(dbfull()->SetOptions({ - {"disable_auto_compactions", "false"}, - })); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); - ASSERT_EQ(4U, int_prop); - - // Insert extra about 28K to L0. After they are compacted to L4, the base - // level should be changed to L3. - ASSERT_OK(dbfull()->SetOptions({ - {"disable_auto_compactions", "true"}, - })); - for (int i = 0; i < 70; i++) { - ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), - rnd.RandomString(380))); - } - - ASSERT_OK(dbfull()->SetOptions({ - {"disable_auto_compactions", "false"}, - })); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); - ASSERT_EQ(3U, int_prop); - ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level1", &str_prop)); - ASSERT_EQ("0", str_prop); - ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level2", &str_prop)); - ASSERT_EQ("0", str_prop); - - // Write even more data while leaving the base level at L3. - ASSERT_OK(dbfull()->SetOptions({ - {"disable_auto_compactions", "true"}, - })); - // Write about 40K more - for (int i = 0; i < 100; i++) { - ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), - rnd.RandomString(380))); - } - ASSERT_OK(dbfull()->SetOptions({ - {"disable_auto_compactions", "false"}, - })); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); - ASSERT_EQ(3U, int_prop); - - // Fill up L0, and then run an (auto) L0->Lmax compaction to raise the base - // level to 2. - ASSERT_OK(dbfull()->SetOptions({ - {"disable_auto_compactions", "true"}, - })); - // Write about 650K more. - // Each file is about 11KB, with 9KB of data. - for (int i = 0; i < 1300; i++) { - ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), - rnd.RandomString(380))); - } - - // Make sure that the compaction starts before the last bit of data is - // flushed, so that the base level isn't raised to L1. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"CompactionJob::Run():Start", "DynamicLevelMaxBytesBase2:0"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(dbfull()->SetOptions({ - {"disable_auto_compactions", "false"}, - })); - - TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:0"); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); - ASSERT_EQ(2U, int_prop); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - - // Write more data until the base level changes to L1. There will be - // a manual compaction going on at the same time. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"CompactionJob::Run():Start", "DynamicLevelMaxBytesBase2:1"}, - {"DynamicLevelMaxBytesBase2:2", "CompactionJob::Run():End"}, - {"DynamicLevelMaxBytesBase2:compact_range_finish", - "FlushJob::WriteLevel0Table"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ROCKSDB_NAMESPACE::port::Thread thread([this] { - TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:compact_range_start"); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:compact_range_finish"); - }); - - TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:1"); - for (int i = 0; i < 2; i++) { - ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), - rnd.RandomString(380))); - } - TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:2"); - - ASSERT_OK(Flush()); - - thread.join(); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - - ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); - ASSERT_EQ(1U, int_prop); -} - -// Test specific cases in dynamic max bytes -TEST_F(DBTestDynamicLevel, DynamicLevelMaxBytesCompactRange) { - Random rnd(301); - int kMaxKey = 1000000; - - Options options = CurrentOptions(); - options.create_if_missing = true; - options.write_buffer_size = 2048; - options.max_write_buffer_number = 2; - options.level0_file_num_compaction_trigger = 2; - options.level0_slowdown_writes_trigger = 9999; - options.level0_stop_writes_trigger = 9999; - options.target_file_size_base = 2; - options.level_compaction_dynamic_level_bytes = true; - options.max_bytes_for_level_base = 10240; - options.max_bytes_for_level_multiplier = 4; - options.max_background_compactions = 1; - const int kNumLevels = 5; - options.num_levels = kNumLevels; - options.max_compaction_bytes = 1; // Force not expanding in compactions - BlockBasedTableOptions table_options; - table_options.block_size = 1024; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - DestroyAndReopen(options); - - // Compact against empty DB - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - uint64_t int_prop; - std::string str_prop; - - // Initial base level is the last level - ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); - ASSERT_EQ(4U, int_prop); - - // Put about 7K to L0 - for (int i = 0; i < 140; i++) { - ASSERT_OK( - Put(Key(static_cast(rnd.Uniform(kMaxKey))), rnd.RandomString(80))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - if (NumTableFilesAtLevel(0) == 0) { - // Make sure level 0 is not empty - ASSERT_OK( - Put(Key(static_cast(rnd.Uniform(kMaxKey))), rnd.RandomString(80))); - ASSERT_OK(Flush()); - } - - ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); - ASSERT_EQ(3U, int_prop); - ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level1", &str_prop)); - ASSERT_EQ("0", str_prop); - ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level2", &str_prop)); - ASSERT_EQ("0", str_prop); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - - std::set output_levels; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "CompactionPicker::CompactRange:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - output_levels.insert(compaction->output_level()); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(output_levels.size(), 2); - ASSERT_TRUE(output_levels.find(3) != output_levels.end()); - ASSERT_TRUE(output_levels.find(4) != output_levels.end()); - ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level0", &str_prop)); - ASSERT_EQ("0", str_prop); - ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level3", &str_prop)); - ASSERT_EQ("0", str_prop); - // Base level is still level 3. - ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); - ASSERT_EQ(3U, int_prop); -} - -TEST_F(DBTestDynamicLevel, DynamicLevelMaxBytesBaseInc) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.write_buffer_size = 2048; - options.max_write_buffer_number = 2; - options.level0_file_num_compaction_trigger = 2; - options.level0_slowdown_writes_trigger = 2; - options.level0_stop_writes_trigger = 2; - options.target_file_size_base = 2048; - options.level_compaction_dynamic_level_bytes = true; - options.max_bytes_for_level_base = 10240; - options.max_bytes_for_level_multiplier = 4; - options.max_background_compactions = 2; - options.num_levels = 5; - options.max_compaction_bytes = 100000000; - - DestroyAndReopen(options); - - int non_trivial = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial", - [&](void* /*arg*/) { non_trivial++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - const int total_keys = 3000; - const int random_part_size = 100; - for (int i = 0; i < total_keys; i++) { - std::string value = rnd.RandomString(random_part_size); - PutFixed32(&value, static_cast(i)); - ASSERT_OK(Put(Key(i), value)); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - ASSERT_EQ(non_trivial, 0); - - for (int i = 0; i < total_keys; i++) { - std::string value = Get(Key(i)); - ASSERT_EQ(DecodeFixed32(value.c_str() + random_part_size), - static_cast(i)); - } - - env_->SetBackgroundThreads(1, Env::LOW); - env_->SetBackgroundThreads(1, Env::HIGH); -} - -TEST_F(DBTestDynamicLevel, DISABLED_MigrateToDynamicLevelMaxBytesBase) { - Random rnd(301); - const int kMaxKey = 2000; - - Options options; - options.create_if_missing = true; - options.write_buffer_size = 2048; - options.max_write_buffer_number = 8; - options.level0_file_num_compaction_trigger = 4; - options.level0_slowdown_writes_trigger = 4; - options.level0_stop_writes_trigger = 8; - options.target_file_size_base = 2048; - options.level_compaction_dynamic_level_bytes = false; - options.max_bytes_for_level_base = 10240; - options.max_bytes_for_level_multiplier = 4; - options.num_levels = 8; - - DestroyAndReopen(options); - - auto verify_func = [&](int num_keys, bool if_sleep) { - for (int i = 0; i < num_keys; i++) { - ASSERT_NE("NOT_FOUND", Get(Key(kMaxKey + i))); - if (i < num_keys / 10) { - ASSERT_EQ("NOT_FOUND", Get(Key(i))); - } else { - ASSERT_NE("NOT_FOUND", Get(Key(i))); - } - if (if_sleep && i % 1000 == 0) { - // Without it, valgrind may choose not to give another - // thread a chance to run before finishing the function, - // causing the test to be extremely slow. - env_->SleepForMicroseconds(1); - } - } - }; - - int total_keys = 1000; - for (int i = 0; i < total_keys; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(102))); - ASSERT_OK(Put(Key(kMaxKey + i), rnd.RandomString(102))); - ASSERT_OK(Delete(Key(i / 10))); - } - verify_func(total_keys, false); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - options.level_compaction_dynamic_level_bytes = true; - options.disable_auto_compactions = true; - Reopen(options); - verify_func(total_keys, false); - - std::atomic_bool compaction_finished; - compaction_finished = false; - // Issue manual compaction in one thread and still verify DB state - // in main thread. - ROCKSDB_NAMESPACE::port::Thread t([&]() { - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = options.num_levels - 1; - ASSERT_OK(dbfull()->CompactRange(compact_options, nullptr, nullptr)); - compaction_finished.store(true); - }); - do { - verify_func(total_keys, true); - } while (!compaction_finished.load()); - t.join(); - - ASSERT_OK(dbfull()->SetOptions({ - {"disable_auto_compactions", "false"}, - })); - - int total_keys2 = 2000; - for (int i = total_keys; i < total_keys2; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(102))); - ASSERT_OK(Put(Key(kMaxKey + i), rnd.RandomString(102))); - ASSERT_OK(Delete(Key(i / 10))); - } - - verify_func(total_keys2, false); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - verify_func(total_keys2, false); - - // Base level is not level 1 - ASSERT_EQ(NumTableFilesAtLevel(1), 0); - ASSERT_EQ(NumTableFilesAtLevel(2), 0); -} -} // namespace ROCKSDB_NAMESPACE - - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_encryption_test.cc b/db/db_encryption_test.cc deleted file mode 100644 index fc8be5b69..000000000 --- a/db/db_encryption_test.cc +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -#include "db/db_test_util.h" -#include "port/stack_trace.h" -#include "rocksdb/perf_context.h" -#include "test_util/sync_point.h" -#include -#include - -namespace ROCKSDB_NAMESPACE { - -class DBEncryptionTest : public DBTestBase { - public: - DBEncryptionTest() - : DBTestBase("db_encryption_test", /*env_do_fsync=*/true) {} - Env* GetTargetEnv() { - if (encrypted_env_ != nullptr) { - return (static_cast(encrypted_env_))->target(); - } else { - return env_; - } - } -}; - - -TEST_F(DBEncryptionTest, CheckEncrypted) { - ASSERT_OK(Put("foo567", "v1.fetdq")); - ASSERT_OK(Put("bar123", "v2.dfgkjdfghsd")); - Close(); - - // Open all files and look for the values we've put in there. - // They should not be found if encrypted, otherwise - // they should be found. - std::vector fileNames; - auto status = env_->GetChildren(dbname_, &fileNames); - ASSERT_OK(status); - - Env* target = GetTargetEnv(); - int hits = 0; - for (auto it = fileNames.begin(); it != fileNames.end(); ++it) { - if (*it == "LOCK") { - continue; - } - auto filePath = dbname_ + "/" + *it; - std::unique_ptr seqFile; - auto envOptions = EnvOptions(CurrentOptions()); - status = target->NewSequentialFile(filePath, &seqFile, envOptions); - ASSERT_OK(status); - - uint64_t fileSize; - status = target->GetFileSize(filePath, &fileSize); - ASSERT_OK(status); - - std::string scratch; - scratch.reserve(fileSize); - Slice data; - status = seqFile->Read(fileSize, &data, (char*)scratch.data()); - ASSERT_OK(status); - - if (data.ToString().find("foo567") != std::string::npos) { - hits++; - // std::cout << "Hit in " << filePath << "\n"; - } - if (data.ToString().find("v1.fetdq") != std::string::npos) { - hits++; - // std::cout << "Hit in " << filePath << "\n"; - } - if (data.ToString().find("bar123") != std::string::npos) { - hits++; - // std::cout << "Hit in " << filePath << "\n"; - } - if (data.ToString().find("v2.dfgkjdfghsd") != std::string::npos) { - hits++; - // std::cout << "Hit in " << filePath << "\n"; - } - if (data.ToString().find("dfgk") != std::string::npos) { - hits++; - // std::cout << "Hit in " << filePath << "\n"; - } - } - if (encrypted_env_) { - ASSERT_EQ(hits, 0); - } else { - ASSERT_GE(hits, 4); - } -} - -TEST_F(DBEncryptionTest, ReadEmptyFile) { - auto defaultEnv = GetTargetEnv(); - - // create empty file for reading it back in later - auto envOptions = EnvOptions(CurrentOptions()); - auto filePath = dbname_ + "/empty.empty"; - - Status status; - { - std::unique_ptr writableFile; - status = defaultEnv->NewWritableFile(filePath, &writableFile, envOptions); - ASSERT_OK(status); - } - - std::unique_ptr seqFile; - status = defaultEnv->NewSequentialFile(filePath, &seqFile, envOptions); - ASSERT_OK(status); - - std::string scratch; - Slice data; - // reading back 16 bytes from the empty file shouldn't trigger an assertion. - // it should just work and return an empty string - status = seqFile->Read(16, &data, (char*)scratch.data()); - ASSERT_OK(status); - - ASSERT_TRUE(data.empty()); -} - - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_flush_test.cc b/db/db_flush_test.cc deleted file mode 100644 index 0b2e7abb1..000000000 --- a/db/db_flush_test.cc +++ /dev/null @@ -1,3202 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include -#include - -#include "db/db_impl/db_impl.h" -#include "db/db_test_util.h" -#include "env/mock_env.h" -#include "file/filename.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/utilities/transaction_db.h" -#include "test_util/sync_point.h" -#include "test_util/testutil.h" -#include "util/cast_util.h" -#include "util/mutexlock.h" -#include "utilities/fault_injection_env.h" -#include "utilities/fault_injection_fs.h" - -namespace ROCKSDB_NAMESPACE { - -// This is a static filter used for filtering -// kvs during the compaction process. -static std::string NEW_VALUE = "NewValue"; - -class DBFlushTest : public DBTestBase { - public: - DBFlushTest() : DBTestBase("db_flush_test", /*env_do_fsync=*/true) {} -}; - -class DBFlushDirectIOTest : public DBFlushTest, - public ::testing::WithParamInterface { - public: - DBFlushDirectIOTest() : DBFlushTest() {} -}; - -class DBAtomicFlushTest : public DBFlushTest, - public ::testing::WithParamInterface { - public: - DBAtomicFlushTest() : DBFlushTest() {} -}; - -// We had issue when two background threads trying to flush at the same time, -// only one of them get committed. The test verifies the issue is fixed. -TEST_F(DBFlushTest, FlushWhileWritingManifest) { - Options options; - options.disable_auto_compactions = true; - options.max_background_flushes = 2; - options.env = env_; - Reopen(options); - FlushOptions no_wait; - no_wait.wait = false; - no_wait.allow_write_stall = true; - - SyncPoint::GetInstance()->LoadDependency( - {{"VersionSet::LogAndApply:WriteManifest", - "DBFlushTest::FlushWhileWritingManifest:1"}, - {"MemTableList::TryInstallMemtableFlushResults:InProgress", - "VersionSet::LogAndApply:WriteManifestDone"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put("foo", "v")); - ASSERT_OK(dbfull()->Flush(no_wait)); - TEST_SYNC_POINT("DBFlushTest::FlushWhileWritingManifest:1"); - ASSERT_OK(Put("bar", "v")); - ASSERT_OK(dbfull()->Flush(no_wait)); - // If the issue is hit we will wait here forever. - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ(2, TotalTableFiles()); -} - -// Disable this test temporarily on Travis as it fails intermittently. -// Github issue: #4151 -TEST_F(DBFlushTest, SyncFail) { - std::unique_ptr fault_injection_env( - new FaultInjectionTestEnv(env_)); - Options options; - options.disable_auto_compactions = true; - options.env = fault_injection_env.get(); - - SyncPoint::GetInstance()->LoadDependency( - {{"DBFlushTest::SyncFail:1", "DBImpl::SyncClosedLogs:Start"}, - {"DBImpl::SyncClosedLogs:Failed", "DBFlushTest::SyncFail:2"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_OK(Put("key", "value")); - FlushOptions flush_options; - flush_options.wait = false; - ASSERT_OK(dbfull()->Flush(flush_options)); - // Flush installs a new super-version. Get the ref count after that. - fault_injection_env->SetFilesystemActive(false); - TEST_SYNC_POINT("DBFlushTest::SyncFail:1"); - TEST_SYNC_POINT("DBFlushTest::SyncFail:2"); - fault_injection_env->SetFilesystemActive(true); - // Now the background job will do the flush; wait for it. - // Returns the IO error happend during flush. - ASSERT_NOK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ("", FilesPerLevel()); // flush failed. - Destroy(options); -} - -TEST_F(DBFlushTest, SyncSkip) { - Options options = CurrentOptions(); - - SyncPoint::GetInstance()->LoadDependency( - {{"DBFlushTest::SyncSkip:1", "DBImpl::SyncClosedLogs:Skip"}, - {"DBImpl::SyncClosedLogs:Skip", "DBFlushTest::SyncSkip:2"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - Reopen(options); - ASSERT_OK(Put("key", "value")); - - FlushOptions flush_options; - flush_options.wait = false; - ASSERT_OK(dbfull()->Flush(flush_options)); - - TEST_SYNC_POINT("DBFlushTest::SyncSkip:1"); - TEST_SYNC_POINT("DBFlushTest::SyncSkip:2"); - - // Now the background job will do the flush; wait for it. - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - Destroy(options); -} - -TEST_F(DBFlushTest, FlushInLowPriThreadPool) { - // Verify setting an empty high-pri (flush) thread pool causes flushes to be - // scheduled in the low-pri (compaction) thread pool. - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 4; - options.memtable_factory.reset(test::NewSpecialSkipListFactory(1)); - Reopen(options); - env_->SetBackgroundThreads(0, Env::HIGH); - - std::thread::id tid; - int num_flushes = 0, num_compactions = 0; - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BGWorkFlush", [&](void* /*arg*/) { - if (tid == std::thread::id()) { - tid = std::this_thread::get_id(); - } else { - ASSERT_EQ(tid, std::this_thread::get_id()); - } - ++num_flushes; - }); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BGWorkCompaction", [&](void* /*arg*/) { - ASSERT_EQ(tid, std::this_thread::get_id()); - ++num_compactions; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put("key", "val")); - for (int i = 0; i < 4; ++i) { - ASSERT_OK(Put("key", "val")); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(4, num_flushes); - ASSERT_EQ(1, num_compactions); -} - -// Test when flush job is submitted to low priority thread pool and when DB is -// closed in the meanwhile, CloseHelper doesn't hang. -TEST_F(DBFlushTest, CloseDBWhenFlushInLowPri) { - Options options = CurrentOptions(); - options.max_background_flushes = 1; - options.max_total_wal_size = 8192; - - DestroyAndReopen(options); - CreateColumnFamilies({"cf1", "cf2"}, options); - - env_->SetBackgroundThreads(0, Env::HIGH); - env_->SetBackgroundThreads(1, Env::LOW); - test::SleepingBackgroundTask sleeping_task_low; - int num_flushes = 0; - - SyncPoint::GetInstance()->SetCallBack("DBImpl::BGWorkFlush", - [&](void* /*arg*/) { ++num_flushes; }); - - int num_low_flush_unscheduled = 0; - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::UnscheduleLowFlushCallback", [&](void* /*arg*/) { - num_low_flush_unscheduled++; - // There should be one flush job in low pool that needs to be - // unscheduled - ASSERT_EQ(num_low_flush_unscheduled, 1); - }); - - int num_high_flush_unscheduled = 0; - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::UnscheduleHighFlushCallback", [&](void* /*arg*/) { - num_high_flush_unscheduled++; - // There should be no flush job in high pool - ASSERT_EQ(num_high_flush_unscheduled, 0); - }); - - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put(0, "key1", DummyString(8192))); - // Block thread so that flush cannot be run and can be removed from the queue - // when called Unschedule. - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - sleeping_task_low.WaitUntilSleeping(); - - // Trigger flush and flush job will be scheduled to LOW priority thread. - ASSERT_OK(Put(0, "key2", DummyString(8192))); - - // Close DB and flush job in low priority queue will be removed without - // running. - Close(); - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); - ASSERT_EQ(0, num_flushes); - - TryReopenWithColumnFamilies({"default", "cf1", "cf2"}, options); - ASSERT_OK(Put(0, "key3", DummyString(8192))); - ASSERT_OK(Flush(0)); - ASSERT_EQ(1, num_flushes); -} - -TEST_F(DBFlushTest, ManualFlushWithMinWriteBufferNumberToMerge) { - Options options = CurrentOptions(); - options.write_buffer_size = 100; - options.max_write_buffer_number = 4; - options.min_write_buffer_number_to_merge = 3; - Reopen(options); - - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::BGWorkFlush", - "DBFlushTest::ManualFlushWithMinWriteBufferNumberToMerge:1"}, - {"DBFlushTest::ManualFlushWithMinWriteBufferNumberToMerge:2", - "FlushJob::WriteLevel0Table"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put("key1", "value1")); - - port::Thread t([&]() { - // The call wait for flush to finish, i.e. with flush_options.wait = true. - ASSERT_OK(Flush()); - }); - - // Wait for flush start. - TEST_SYNC_POINT("DBFlushTest::ManualFlushWithMinWriteBufferNumberToMerge:1"); - // Insert a second memtable before the manual flush finish. - // At the end of the manual flush job, it will check if further flush - // is needed, but it will not trigger flush of the second memtable because - // min_write_buffer_number_to_merge is not reached. - ASSERT_OK(Put("key2", "value2")); - ASSERT_OK(dbfull()->TEST_SwitchMemtable()); - TEST_SYNC_POINT("DBFlushTest::ManualFlushWithMinWriteBufferNumberToMerge:2"); - - // Manual flush should return, without waiting for flush indefinitely. - t.join(); -} - -TEST_F(DBFlushTest, ScheduleOnlyOneBgThread) { - Options options = CurrentOptions(); - Reopen(options); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - int called = 0; - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::MaybeScheduleFlushOrCompaction:AfterSchedule:0", [&](void* arg) { - ASSERT_NE(nullptr, arg); - auto unscheduled_flushes = *reinterpret_cast(arg); - ASSERT_EQ(0, unscheduled_flushes); - ++called; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put("a", "foo")); - FlushOptions flush_opts; - ASSERT_OK(dbfull()->Flush(flush_opts)); - ASSERT_EQ(1, called); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -// The following 3 tests are designed for testing garbage statistics at flush -// time. -// -// ======= General Information ======= (from GitHub Wiki). -// There are three scenarios where memtable flush can be triggered: -// -// 1 - Memtable size exceeds ColumnFamilyOptions::write_buffer_size -// after a write. -// 2 - Total memtable size across all column families exceeds -// DBOptions::db_write_buffer_size, -// or DBOptions::write_buffer_manager signals a flush. In this scenario -// the largest memtable will be flushed. -// 3 - Total WAL file size exceeds DBOptions::max_total_wal_size. -// In this scenario the memtable with the oldest data will be flushed, -// in order to allow the WAL file with data from this memtable to be -// purged. -// -// As a result, a memtable can be flushed before it is full. This is one -// reason the generated SST file can be smaller than the corresponding -// memtable. Compression is another factor to make SST file smaller than -// corresponding memtable, since data in memtable is uncompressed. - -TEST_F(DBFlushTest, StatisticsGarbageBasic) { - Options options = CurrentOptions(); - - // The following options are used to enforce several values that - // may already exist as default values to make this test resilient - // to default value updates in the future. - options.statistics = CreateDBStatistics(); - - // Record all statistics. - options.statistics->set_stats_level(StatsLevel::kAll); - - // create the DB if it's not already present - options.create_if_missing = true; - - // Useful for now as we are trying to compare uncompressed data savings on - // flush(). - options.compression = kNoCompression; - - // Prevent memtable in place updates. Should already be disabled - // (from Wiki: - // In place updates can be enabled by toggling on the bool - // inplace_update_support flag. However, this flag is by default set to - // false - // because this thread-safe in-place update support is not compatible - // with concurrent memtable writes. Note that the bool - // allow_concurrent_memtable_write is set to true by default ) - options.inplace_update_support = false; - options.allow_concurrent_memtable_write = true; - - // Enforce size of a single MemTable to 64MB (64MB = 67108864 bytes). - options.write_buffer_size = 64 << 20; - - ASSERT_OK(TryReopen(options)); - - // Put multiple times the same key-values. - // The encoded length of a db entry in the memtable is - // defined in db/memtable.cc (MemTable::Add) as the variable: - // encoded_len= VarintLength(internal_key_size) --> = - // log_256(internal_key). - // Min # of bytes - // necessary to - // store - // internal_key_size. - // + internal_key_size --> = actual key string, - // (size key_size: w/o term null char) - // + 8 bytes for - // fixed uint64 "seq - // number - // + - // insertion type" - // + VarintLength(val_size) --> = min # of bytes to - // store val_size - // + val_size --> = actual value - // string - // For example, in our situation, "key1" : size 4, "value1" : size 6 - // (the terminating null characters are not copied over to the memtable). - // And therefore encoded_len = 1 + (4+8) + 1 + 6 = 20 bytes per entry. - // However in terms of raw data contained in the memtable, and written - // over to the SSTable, we only count internal_key_size and val_size, - // because this is the only raw chunk of bytes that contains everything - // necessary to reconstruct a user entry: sequence number, insertion type, - // key, and value. - - // To test the relevance of our Memtable garbage statistics, - // namely MEMTABLE_PAYLOAD_BYTES_AT_FLUSH and MEMTABLE_GARBAGE_BYTES_AT_FLUSH, - // we insert K-V pairs with 3 distinct keys (of length 4), - // and random values of arbitrary length RAND_VALUES_LENGTH, - // and we repeat this step NUM_REPEAT times total. - // At the end, we insert 3 final K-V pairs with the same 3 keys - // and known values (these will be the final values, of length 6). - // I chose NUM_REPEAT=2,000 such that no automatic flush is - // triggered (the number of bytes in the memtable is therefore - // well below any meaningful heuristic for a memtable of size 64MB). - // As a result, since each K-V pair is inserted as a payload - // of N meaningful bytes (sequence number, insertion type, - // key, and value = 8 + 4 + RAND_VALUE_LENGTH), - // MEMTABLE_GARBAGE_BYTES_AT_FLUSH should be equal to 2,000 * N bytes - // and MEMTABLE_PAYLAOD_BYTES_AT_FLUSH = MEMTABLE_GARBAGE_BYTES_AT_FLUSH + - // (3*(8 + 4 + 6)) bytes. For RAND_VALUE_LENGTH = 172 (arbitrary value), we - // expect: - // N = 8 + 4 + 172 = 184 bytes - // MEMTABLE_GARBAGE_BYTES_AT_FLUSH = 2,000 * 184 = 368,000 bytes. - // MEMTABLE_PAYLOAD_BYTES_AT_FLUSH = 368,000 + 3*18 = 368,054 bytes. - - const size_t NUM_REPEAT = 2000; - const size_t RAND_VALUES_LENGTH = 172; - const std::string KEY1 = "key1"; - const std::string KEY2 = "key2"; - const std::string KEY3 = "key3"; - const std::string VALUE1 = "value1"; - const std::string VALUE2 = "value2"; - const std::string VALUE3 = "value3"; - uint64_t EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH = 0; - uint64_t EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH = 0; - - Random rnd(301); - // Insertion of of K-V pairs, multiple times. - for (size_t i = 0; i < NUM_REPEAT; i++) { - // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. - std::string p_v1 = rnd.RandomString(RAND_VALUES_LENGTH); - std::string p_v2 = rnd.RandomString(RAND_VALUES_LENGTH); - std::string p_v3 = rnd.RandomString(RAND_VALUES_LENGTH); - ASSERT_OK(Put(KEY1, p_v1)); - ASSERT_OK(Put(KEY2, p_v2)); - ASSERT_OK(Put(KEY3, p_v3)); - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += - KEY1.size() + p_v1.size() + sizeof(uint64_t); - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += - KEY2.size() + p_v2.size() + sizeof(uint64_t); - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += - KEY3.size() + p_v3.size() + sizeof(uint64_t); - } - - // The memtable data bytes includes the "garbage" - // bytes along with the useful payload. - EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH = - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH; - - ASSERT_OK(Put(KEY1, VALUE1)); - ASSERT_OK(Put(KEY2, VALUE2)); - ASSERT_OK(Put(KEY3, VALUE3)); - - // Add useful payload to the memtable data bytes: - EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH += - KEY1.size() + VALUE1.size() + KEY2.size() + VALUE2.size() + KEY3.size() + - VALUE3.size() + 3 * sizeof(uint64_t); - - // We assert that the last K-V pairs have been successfully inserted, - // and that the valid values are VALUE1, VALUE2, VALUE3. - PinnableSlice value; - ASSERT_OK(Get(KEY1, &value)); - ASSERT_EQ(value.ToString(), VALUE1); - ASSERT_OK(Get(KEY2, &value)); - ASSERT_EQ(value.ToString(), VALUE2); - ASSERT_OK(Get(KEY3, &value)); - ASSERT_EQ(value.ToString(), VALUE3); - - // Force flush to SST. Increments the statistics counter. - ASSERT_OK(Flush()); - - // Collect statistics. - uint64_t mem_data_bytes = - TestGetTickerCount(options, MEMTABLE_PAYLOAD_BYTES_AT_FLUSH); - uint64_t mem_garbage_bytes = - TestGetTickerCount(options, MEMTABLE_GARBAGE_BYTES_AT_FLUSH); - - EXPECT_EQ(mem_data_bytes, EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH); - EXPECT_EQ(mem_garbage_bytes, EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH); - - Close(); -} - -TEST_F(DBFlushTest, StatisticsGarbageInsertAndDeletes) { - Options options = CurrentOptions(); - options.statistics = CreateDBStatistics(); - options.statistics->set_stats_level(StatsLevel::kAll); - options.create_if_missing = true; - options.compression = kNoCompression; - options.inplace_update_support = false; - options.allow_concurrent_memtable_write = true; - options.write_buffer_size = 67108864; - - ASSERT_OK(TryReopen(options)); - - const size_t NUM_REPEAT = 2000; - const size_t RAND_VALUES_LENGTH = 37; - const std::string KEY1 = "key1"; - const std::string KEY2 = "key2"; - const std::string KEY3 = "key3"; - const std::string KEY4 = "key4"; - const std::string KEY5 = "key5"; - const std::string KEY6 = "key6"; - - uint64_t EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH = 0; - uint64_t EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH = 0; - - WriteBatch batch; - - Random rnd(301); - // Insertion of of K-V pairs, multiple times. - for (size_t i = 0; i < NUM_REPEAT; i++) { - // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. - std::string p_v1 = rnd.RandomString(RAND_VALUES_LENGTH); - std::string p_v2 = rnd.RandomString(RAND_VALUES_LENGTH); - std::string p_v3 = rnd.RandomString(RAND_VALUES_LENGTH); - ASSERT_OK(Put(KEY1, p_v1)); - ASSERT_OK(Put(KEY2, p_v2)); - ASSERT_OK(Put(KEY3, p_v3)); - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += - KEY1.size() + p_v1.size() + sizeof(uint64_t); - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += - KEY2.size() + p_v2.size() + sizeof(uint64_t); - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += - KEY3.size() + p_v3.size() + sizeof(uint64_t); - ASSERT_OK(Delete(KEY1)); - ASSERT_OK(Delete(KEY2)); - ASSERT_OK(Delete(KEY3)); - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += - KEY1.size() + KEY2.size() + KEY3.size() + 3 * sizeof(uint64_t); - } - - // The memtable data bytes includes the "garbage" - // bytes along with the useful payload. - EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH = - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH; - - // Note : one set of delete for KEY1, KEY2, KEY3 is written to - // SSTable to propagate the delete operations to K-V pairs - // that could have been inserted into the database during past Flush - // opeartions. - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH -= - KEY1.size() + KEY2.size() + KEY3.size() + 3 * sizeof(uint64_t); - - // Additional useful paylaod. - ASSERT_OK(Delete(KEY4)); - ASSERT_OK(Delete(KEY5)); - ASSERT_OK(Delete(KEY6)); - - // // Add useful payload to the memtable data bytes: - EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH += - KEY4.size() + KEY5.size() + KEY6.size() + 3 * sizeof(uint64_t); - - // We assert that the K-V pairs have been successfully deleted. - PinnableSlice value; - ASSERT_NOK(Get(KEY1, &value)); - ASSERT_NOK(Get(KEY2, &value)); - ASSERT_NOK(Get(KEY3, &value)); - - // Force flush to SST. Increments the statistics counter. - ASSERT_OK(Flush()); - - // Collect statistics. - uint64_t mem_data_bytes = - TestGetTickerCount(options, MEMTABLE_PAYLOAD_BYTES_AT_FLUSH); - uint64_t mem_garbage_bytes = - TestGetTickerCount(options, MEMTABLE_GARBAGE_BYTES_AT_FLUSH); - - EXPECT_EQ(mem_data_bytes, EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH); - EXPECT_EQ(mem_garbage_bytes, EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH); - - Close(); -} - -TEST_F(DBFlushTest, StatisticsGarbageRangeDeletes) { - Options options = CurrentOptions(); - options.statistics = CreateDBStatistics(); - options.statistics->set_stats_level(StatsLevel::kAll); - options.create_if_missing = true; - options.compression = kNoCompression; - options.inplace_update_support = false; - options.allow_concurrent_memtable_write = true; - options.write_buffer_size = 67108864; - - ASSERT_OK(TryReopen(options)); - - const size_t NUM_REPEAT = 1000; - const size_t RAND_VALUES_LENGTH = 42; - const std::string KEY1 = "key1"; - const std::string KEY2 = "key2"; - const std::string KEY3 = "key3"; - const std::string KEY4 = "key4"; - const std::string KEY5 = "key5"; - const std::string KEY6 = "key6"; - const std::string VALUE3 = "value3"; - - uint64_t EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH = 0; - uint64_t EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH = 0; - - Random rnd(301); - // Insertion of of K-V pairs, multiple times. - // Also insert DeleteRange - for (size_t i = 0; i < NUM_REPEAT; i++) { - // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. - std::string p_v1 = rnd.RandomString(RAND_VALUES_LENGTH); - std::string p_v2 = rnd.RandomString(RAND_VALUES_LENGTH); - std::string p_v3 = rnd.RandomString(RAND_VALUES_LENGTH); - ASSERT_OK(Put(KEY1, p_v1)); - ASSERT_OK(Put(KEY2, p_v2)); - ASSERT_OK(Put(KEY3, p_v3)); - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += - KEY1.size() + p_v1.size() + sizeof(uint64_t); - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += - KEY2.size() + p_v2.size() + sizeof(uint64_t); - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += - KEY3.size() + p_v3.size() + sizeof(uint64_t); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), KEY1, - KEY2)); - // Note: DeleteRange have an exclusive upper bound, e.g. here: [KEY2,KEY3) - // is deleted. - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), KEY2, - KEY3)); - // Delete ranges are stored as a regular K-V pair, with key=STARTKEY, - // value=ENDKEY. - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += - (KEY1.size() + KEY2.size() + sizeof(uint64_t)) + - (KEY2.size() + KEY3.size() + sizeof(uint64_t)); - } - - // The memtable data bytes includes the "garbage" - // bytes along with the useful payload. - EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH = - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH; - - // Note : one set of deleteRange for (KEY1, KEY2) and (KEY2, KEY3) is written - // to SSTable to propagate the deleteRange operations to K-V pairs that could - // have been inserted into the database during past Flush opeartions. - EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH -= - (KEY1.size() + KEY2.size() + sizeof(uint64_t)) + - (KEY2.size() + KEY3.size() + sizeof(uint64_t)); - - // Overwrite KEY3 with known value (VALUE3) - // Note that during the whole time KEY3 has never been deleted - // by the RangeDeletes. - ASSERT_OK(Put(KEY3, VALUE3)); - EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH += - KEY3.size() + VALUE3.size() + sizeof(uint64_t); - - // Additional useful paylaod. - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), KEY4, KEY5)); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), KEY5, KEY6)); - - // Add useful payload to the memtable data bytes: - EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH += - (KEY4.size() + KEY5.size() + sizeof(uint64_t)) + - (KEY5.size() + KEY6.size() + sizeof(uint64_t)); - - // We assert that the K-V pairs have been successfully deleted. - PinnableSlice value; - ASSERT_NOK(Get(KEY1, &value)); - ASSERT_NOK(Get(KEY2, &value)); - // And that KEY3's value is correct. - ASSERT_OK(Get(KEY3, &value)); - ASSERT_EQ(value, VALUE3); - - // Force flush to SST. Increments the statistics counter. - ASSERT_OK(Flush()); - - // Collect statistics. - uint64_t mem_data_bytes = - TestGetTickerCount(options, MEMTABLE_PAYLOAD_BYTES_AT_FLUSH); - uint64_t mem_garbage_bytes = - TestGetTickerCount(options, MEMTABLE_GARBAGE_BYTES_AT_FLUSH); - - EXPECT_EQ(mem_data_bytes, EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH); - EXPECT_EQ(mem_garbage_bytes, EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH); - - Close(); -} - -// This simple Listener can only handle one flush at a time. -class TestFlushListener : public EventListener { - public: - TestFlushListener(Env* env, DBFlushTest* test) - : slowdown_count(0), stop_count(0), db_closed(), env_(env), test_(test) { - db_closed = false; - } - - ~TestFlushListener() override { - prev_fc_info_.status.PermitUncheckedError(); // Ignore the status - } - - void OnTableFileCreated(const TableFileCreationInfo& info) override { - // remember the info for later checking the FlushJobInfo. - prev_fc_info_ = info; - ASSERT_GT(info.db_name.size(), 0U); - ASSERT_GT(info.cf_name.size(), 0U); - ASSERT_GT(info.file_path.size(), 0U); - ASSERT_GT(info.job_id, 0); - ASSERT_GT(info.table_properties.data_size, 0U); - ASSERT_GT(info.table_properties.raw_key_size, 0U); - ASSERT_GT(info.table_properties.raw_value_size, 0U); - ASSERT_GT(info.table_properties.num_data_blocks, 0U); - ASSERT_GT(info.table_properties.num_entries, 0U); - ASSERT_EQ(info.file_checksum, kUnknownFileChecksum); - ASSERT_EQ(info.file_checksum_func_name, kUnknownFileChecksumFuncName); - } - - void OnFlushCompleted(DB* db, const FlushJobInfo& info) override { - flushed_dbs_.push_back(db); - flushed_column_family_names_.push_back(info.cf_name); - if (info.triggered_writes_slowdown) { - slowdown_count++; - } - if (info.triggered_writes_stop) { - stop_count++; - } - // verify whether the previously created file matches the flushed file. - ASSERT_EQ(prev_fc_info_.db_name, db->GetName()); - ASSERT_EQ(prev_fc_info_.cf_name, info.cf_name); - ASSERT_EQ(prev_fc_info_.job_id, info.job_id); - ASSERT_EQ(prev_fc_info_.file_path, info.file_path); - ASSERT_EQ(TableFileNameToNumber(info.file_path), info.file_number); - - // Note: the following chunk relies on the notification pertaining to the - // database pointed to by DBTestBase::db_, and is thus bypassed when - // that assumption does not hold (see the test case MultiDBMultiListeners - // below). - ASSERT_TRUE(test_); - if (db == test_->db_) { - std::vector> files_by_level; - test_->dbfull()->TEST_GetFilesMetaData(db->DefaultColumnFamily(), - &files_by_level); - - ASSERT_FALSE(files_by_level.empty()); - auto it = std::find_if(files_by_level[0].begin(), files_by_level[0].end(), - [&](const FileMetaData& meta) { - return meta.fd.GetNumber() == info.file_number; - }); - ASSERT_NE(it, files_by_level[0].end()); - ASSERT_EQ(info.oldest_blob_file_number, it->oldest_blob_file_number); - } - - ASSERT_EQ(db->GetEnv()->GetThreadID(), info.thread_id); - ASSERT_GT(info.thread_id, 0U); - } - - std::vector flushed_column_family_names_; - std::vector flushed_dbs_; - int slowdown_count; - int stop_count; - bool db_closing; - std::atomic_bool db_closed; - TableFileCreationInfo prev_fc_info_; - - protected: - Env* env_; - DBFlushTest* test_; -}; - -TEST_F( - DBFlushTest, - FixUnrecoverableWriteDuringAtomicFlushWaitUntilFlushWouldNotStallWrites) { - Options options = CurrentOptions(); - options.atomic_flush = true; - - // To simulate a real-life crash where we can't flush during db's shutdown - options.avoid_flush_during_shutdown = true; - - // Set 3 low thresholds (while `disable_auto_compactions=false`) here so flush - // adding one more L0 file during `GetLiveFiles()` will have to wait till such - // flush will not stall writes - options.level0_stop_writes_trigger = 2; - options.level0_slowdown_writes_trigger = 2; - // Disable level-0 compaction triggered by number of files to avoid - // stalling check being skipped (resulting in the flush mentioned above didn't - // wait) - options.level0_file_num_compaction_trigger = -1; - - CreateAndReopenWithCF({"cf1"}, options); - - // Manually pause compaction thread to ensure enough L0 files as - // `disable_auto_compactions=false`is needed, in order to meet the 3 low - // thresholds above - std::unique_ptr sleeping_task_; - sleeping_task_.reset(new test::SleepingBackgroundTask()); - env_->SetBackgroundThreads(1, Env::LOW); - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - sleeping_task_.get(), Env::Priority::LOW); - sleeping_task_->WaitUntilSleeping(); - - // Create some initial file to help meet the 3 low thresholds above - ASSERT_OK(Put(1, "dontcare", "dontcare")); - ASSERT_OK(Flush(1)); - - // Insert some initial data so we have something to atomic-flush later - // triggered by `GetLiveFiles()` - WriteOptions write_opts; - write_opts.disableWAL = true; - ASSERT_OK(Put(1, "k1", "v1", write_opts)); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({{ - "DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait", - "DBFlushTest::" - "UnrecoverableWriteInAtomicFlushWaitUntilFlushWouldNotStallWrites::Write", - }}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Write to db when atomic flush releases the lock to wait on write stall - // condition to be gone in `WaitUntilFlushWouldNotStallWrites()` - port::Thread write_thread([&] { - TEST_SYNC_POINT( - "DBFlushTest::" - "UnrecoverableWriteInAtomicFlushWaitUntilFlushWouldNotStallWrites::" - "Write"); - // Before the fix, the empty default CF would've been prematurely excluded - // from this atomic flush. The following two writes together make default CF - // later contain data that should've been included in the atomic flush. - ASSERT_OK(Put(0, "k2", "v2", write_opts)); - // The following write increases the max seqno of this atomic flush to be 3, - // which is greater than the seqno of default CF's data. This then violates - // the invariant that all entries of seqno less than the max seqno - // of this atomic flush should've been flushed by the time of this atomic - // flush finishes. - ASSERT_OK(Put(1, "k3", "v3", write_opts)); - - // Resume compaction threads and reduce L0 files so `GetLiveFiles()` can - // resume from the wait - sleeping_task_->WakeUp(); - sleeping_task_->WaitUntilDone(); - MoveFilesToLevel(1, 1); - }); - - // Trigger an atomic flush by `GetLiveFiles()` - std::vector files; - uint64_t manifest_file_size; - ASSERT_OK(db_->GetLiveFiles(files, &manifest_file_size, /*flush*/ true)); - - write_thread.join(); - - ReopenWithColumnFamilies({"default", "cf1"}, options); - - ASSERT_EQ(Get(1, "k3"), "v3"); - // Prior to the fix, `Get()` will return `NotFound as "k2" entry in default CF - // can't be recovered from a crash right after the atomic flush finishes, - // resulting in a "recovery hole" as "k3" can be recovered. It's due to the - // invariant violation described above. - ASSERT_EQ(Get(0, "k2"), "v2"); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBFlushTest, FixFlushReasonRaceFromConcurrentFlushes) { - Options options = CurrentOptions(); - options.atomic_flush = true; - options.disable_auto_compactions = true; - CreateAndReopenWithCF({"cf1"}, options); - - for (int idx = 0; idx < 1; ++idx) { - ASSERT_OK(Put(0, Key(idx), std::string(1, 'v'))); - ASSERT_OK(Put(1, Key(idx), std::string(1, 'v'))); - } - - // To coerce a manual flush happenning in the middle of GetLiveFiles's flush, - // we need to pause background flush thread and enable it later. - std::shared_ptr sleeping_task = - std::make_shared(); - env_->SetBackgroundThreads(1, Env::HIGH); - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - sleeping_task.get(), Env::Priority::HIGH); - sleeping_task->WaitUntilSleeping(); - - // Coerce a manual flush happenning in the middle of GetLiveFiles's flush - bool get_live_files_paused_at_sync_point = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::AtomicFlushMemTables:AfterScheduleFlush", [&](void* /* arg */) { - if (get_live_files_paused_at_sync_point) { - // To prevent non-GetLiveFiles() flush from pausing at this sync point - return; - } - get_live_files_paused_at_sync_point = true; - - FlushOptions fo; - fo.wait = false; - fo.allow_write_stall = true; - ASSERT_OK(dbfull()->Flush(fo)); - - // Resume background flush thread so GetLiveFiles() can finish - sleeping_task->WakeUp(); - sleeping_task->WaitUntilDone(); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - std::vector files; - uint64_t manifest_file_size; - // Before the fix, a race condition on default cf's flush reason due to - // concurrent GetLiveFiles's flush and manual flush will fail - // an internal assertion. - // After the fix, such race condition is fixed and there is no assertion - // failure. - ASSERT_OK(db_->GetLiveFiles(files, &manifest_file_size, /*flush*/ true)); - ASSERT_TRUE(get_live_files_paused_at_sync_point); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBFlushTest, MemPurgeBasic) { - Options options = CurrentOptions(); - - // The following options are used to enforce several values that - // may already exist as default values to make this test resilient - // to default value updates in the future. - options.statistics = CreateDBStatistics(); - - // Record all statistics. - options.statistics->set_stats_level(StatsLevel::kAll); - - // create the DB if it's not already present - options.create_if_missing = true; - - // Useful for now as we are trying to compare uncompressed data savings on - // flush(). - options.compression = kNoCompression; - - // Prevent memtable in place updates. Should already be disabled - // (from Wiki: - // In place updates can be enabled by toggling on the bool - // inplace_update_support flag. However, this flag is by default set to - // false - // because this thread-safe in-place update support is not compatible - // with concurrent memtable writes. Note that the bool - // allow_concurrent_memtable_write is set to true by default ) - options.inplace_update_support = false; - options.allow_concurrent_memtable_write = true; - - // Enforce size of a single MemTable to 64MB (64MB = 67108864 bytes). - options.write_buffer_size = 1 << 20; - // Initially deactivate the MemPurge prototype. - options.experimental_mempurge_threshold = 0.0; - TestFlushListener* listener = new TestFlushListener(options.env, this); - options.listeners.emplace_back(listener); - ASSERT_OK(TryReopen(options)); - - // RocksDB lite does not support dynamic options - // Dynamically activate the MemPurge prototype without restarting the DB. - ColumnFamilyHandle* cfh = db_->DefaultColumnFamily(); - ASSERT_OK(db_->SetOptions(cfh, {{"experimental_mempurge_threshold", "1.0"}})); - - std::atomic mempurge_count{0}; - std::atomic sst_count{0}; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushJob:MemPurgeSuccessful", - [&](void* /*arg*/) { mempurge_count++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushJob:SSTFileCreated", [&](void* /*arg*/) { sst_count++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - std::string KEY1 = "IamKey1"; - std::string KEY2 = "IamKey2"; - std::string KEY3 = "IamKey3"; - std::string KEY4 = "IamKey4"; - std::string KEY5 = "IamKey5"; - std::string KEY6 = "IamKey6"; - std::string KEY7 = "IamKey7"; - std::string KEY8 = "IamKey8"; - std::string KEY9 = "IamKey9"; - std::string RNDKEY1, RNDKEY2, RNDKEY3; - const std::string NOT_FOUND = "NOT_FOUND"; - - // Heavy overwrite workload, - // more than would fit in maximum allowed memtables. - Random rnd(719); - const size_t NUM_REPEAT = 100; - const size_t RAND_KEYS_LENGTH = 57; - const size_t RAND_VALUES_LENGTH = 10240; - std::string p_v1, p_v2, p_v3, p_v4, p_v5, p_v6, p_v7, p_v8, p_v9, p_rv1, - p_rv2, p_rv3; - - // Insert a very first set of keys that will be - // mempurged at least once. - p_v1 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v2 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v3 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v4 = rnd.RandomString(RAND_VALUES_LENGTH); - ASSERT_OK(Put(KEY1, p_v1)); - ASSERT_OK(Put(KEY2, p_v2)); - ASSERT_OK(Put(KEY3, p_v3)); - ASSERT_OK(Put(KEY4, p_v4)); - ASSERT_EQ(Get(KEY1), p_v1); - ASSERT_EQ(Get(KEY2), p_v2); - ASSERT_EQ(Get(KEY3), p_v3); - ASSERT_EQ(Get(KEY4), p_v4); - - // Insertion of of K-V pairs, multiple times (overwrites). - for (size_t i = 0; i < NUM_REPEAT; i++) { - // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. - p_v5 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v6 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v7 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v8 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v9 = rnd.RandomString(RAND_VALUES_LENGTH); - - ASSERT_OK(Put(KEY5, p_v5)); - ASSERT_OK(Put(KEY6, p_v6)); - ASSERT_OK(Put(KEY7, p_v7)); - ASSERT_OK(Put(KEY8, p_v8)); - ASSERT_OK(Put(KEY9, p_v9)); - - ASSERT_EQ(Get(KEY1), p_v1); - ASSERT_EQ(Get(KEY2), p_v2); - ASSERT_EQ(Get(KEY3), p_v3); - ASSERT_EQ(Get(KEY4), p_v4); - ASSERT_EQ(Get(KEY5), p_v5); - ASSERT_EQ(Get(KEY6), p_v6); - ASSERT_EQ(Get(KEY7), p_v7); - ASSERT_EQ(Get(KEY8), p_v8); - ASSERT_EQ(Get(KEY9), p_v9); - } - - // Check that there was at least one mempurge - const uint32_t EXPECTED_MIN_MEMPURGE_COUNT = 1; - // Check that there was no SST files created during flush. - const uint32_t EXPECTED_SST_COUNT = 0; - - EXPECT_GE(mempurge_count.exchange(0), EXPECTED_MIN_MEMPURGE_COUNT); - EXPECT_EQ(sst_count.exchange(0), EXPECTED_SST_COUNT); - - // Insertion of of K-V pairs, no overwrites. - for (size_t i = 0; i < NUM_REPEAT; i++) { - // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. - RNDKEY1 = rnd.RandomString(RAND_KEYS_LENGTH); - RNDKEY2 = rnd.RandomString(RAND_KEYS_LENGTH); - RNDKEY3 = rnd.RandomString(RAND_KEYS_LENGTH); - p_rv1 = rnd.RandomString(RAND_VALUES_LENGTH); - p_rv2 = rnd.RandomString(RAND_VALUES_LENGTH); - p_rv3 = rnd.RandomString(RAND_VALUES_LENGTH); - - ASSERT_OK(Put(RNDKEY1, p_rv1)); - ASSERT_OK(Put(RNDKEY2, p_rv2)); - ASSERT_OK(Put(RNDKEY3, p_rv3)); - - ASSERT_EQ(Get(KEY1), p_v1); - ASSERT_EQ(Get(KEY2), p_v2); - ASSERT_EQ(Get(KEY3), p_v3); - ASSERT_EQ(Get(KEY4), p_v4); - ASSERT_EQ(Get(KEY5), p_v5); - ASSERT_EQ(Get(KEY6), p_v6); - ASSERT_EQ(Get(KEY7), p_v7); - ASSERT_EQ(Get(KEY8), p_v8); - ASSERT_EQ(Get(KEY9), p_v9); - ASSERT_EQ(Get(RNDKEY1), p_rv1); - ASSERT_EQ(Get(RNDKEY2), p_rv2); - ASSERT_EQ(Get(RNDKEY3), p_rv3); - } - - // Assert that at least one flush to storage has been performed - EXPECT_GT(sst_count.exchange(0), EXPECTED_SST_COUNT); - // (which will consequently increase the number of mempurges recorded too). - EXPECT_GE(mempurge_count.exchange(0), EXPECTED_MIN_MEMPURGE_COUNT); - - // Assert that there is no data corruption, even with - // a flush to storage. - ASSERT_EQ(Get(KEY1), p_v1); - ASSERT_EQ(Get(KEY2), p_v2); - ASSERT_EQ(Get(KEY3), p_v3); - ASSERT_EQ(Get(KEY4), p_v4); - ASSERT_EQ(Get(KEY5), p_v5); - ASSERT_EQ(Get(KEY6), p_v6); - ASSERT_EQ(Get(KEY7), p_v7); - ASSERT_EQ(Get(KEY8), p_v8); - ASSERT_EQ(Get(KEY9), p_v9); - ASSERT_EQ(Get(RNDKEY1), p_rv1); - ASSERT_EQ(Get(RNDKEY2), p_rv2); - ASSERT_EQ(Get(RNDKEY3), p_rv3); - - Close(); -} - -// RocksDB lite does not support dynamic options -TEST_F(DBFlushTest, MemPurgeBasicToggle) { - Options options = CurrentOptions(); - - // The following options are used to enforce several values that - // may already exist as default values to make this test resilient - // to default value updates in the future. - options.statistics = CreateDBStatistics(); - - // Record all statistics. - options.statistics->set_stats_level(StatsLevel::kAll); - - // create the DB if it's not already present - options.create_if_missing = true; - - // Useful for now as we are trying to compare uncompressed data savings on - // flush(). - options.compression = kNoCompression; - - // Prevent memtable in place updates. Should already be disabled - // (from Wiki: - // In place updates can be enabled by toggling on the bool - // inplace_update_support flag. However, this flag is by default set to - // false - // because this thread-safe in-place update support is not compatible - // with concurrent memtable writes. Note that the bool - // allow_concurrent_memtable_write is set to true by default ) - options.inplace_update_support = false; - options.allow_concurrent_memtable_write = true; - - // Enforce size of a single MemTable to 64MB (64MB = 67108864 bytes). - options.write_buffer_size = 1 << 20; - // Initially deactivate the MemPurge prototype. - // (negative values are equivalent to 0.0). - options.experimental_mempurge_threshold = -25.3; - TestFlushListener* listener = new TestFlushListener(options.env, this); - options.listeners.emplace_back(listener); - - ASSERT_OK(TryReopen(options)); - // Dynamically activate the MemPurge prototype without restarting the DB. - ColumnFamilyHandle* cfh = db_->DefaultColumnFamily(); - // Values greater than 1.0 are equivalent to 1.0 - ASSERT_OK( - db_->SetOptions(cfh, {{"experimental_mempurge_threshold", "3.7898"}})); - std::atomic mempurge_count{0}; - std::atomic sst_count{0}; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushJob:MemPurgeSuccessful", - [&](void* /*arg*/) { mempurge_count++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushJob:SSTFileCreated", [&](void* /*arg*/) { sst_count++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - const size_t KVSIZE = 3; - std::vector KEYS(KVSIZE); - for (size_t k = 0; k < KVSIZE; k++) { - KEYS[k] = "IamKey" + std::to_string(k); - } - - std::vector RNDVALS(KVSIZE); - const std::string NOT_FOUND = "NOT_FOUND"; - - // Heavy overwrite workload, - // more than would fit in maximum allowed memtables. - Random rnd(719); - const size_t NUM_REPEAT = 100; - const size_t RAND_VALUES_LENGTH = 10240; - - // Insertion of of K-V pairs, multiple times (overwrites). - for (size_t i = 0; i < NUM_REPEAT; i++) { - for (size_t j = 0; j < KEYS.size(); j++) { - RNDVALS[j] = rnd.RandomString(RAND_VALUES_LENGTH); - ASSERT_OK(Put(KEYS[j], RNDVALS[j])); - ASSERT_EQ(Get(KEYS[j]), RNDVALS[j]); - } - for (size_t j = 0; j < KEYS.size(); j++) { - ASSERT_EQ(Get(KEYS[j]), RNDVALS[j]); - } - } - - // Check that there was at least one mempurge - const uint32_t EXPECTED_MIN_MEMPURGE_COUNT = 1; - // Check that there was no SST files created during flush. - const uint32_t EXPECTED_SST_COUNT = 0; - - EXPECT_GE(mempurge_count.exchange(0), EXPECTED_MIN_MEMPURGE_COUNT); - EXPECT_EQ(sst_count.exchange(0), EXPECTED_SST_COUNT); - - // Dynamically deactivate MemPurge. - ASSERT_OK( - db_->SetOptions(cfh, {{"experimental_mempurge_threshold", "-1023.0"}})); - - // Insertion of of K-V pairs, multiple times (overwrites). - for (size_t i = 0; i < NUM_REPEAT; i++) { - for (size_t j = 0; j < KEYS.size(); j++) { - RNDVALS[j] = rnd.RandomString(RAND_VALUES_LENGTH); - ASSERT_OK(Put(KEYS[j], RNDVALS[j])); - ASSERT_EQ(Get(KEYS[j]), RNDVALS[j]); - } - for (size_t j = 0; j < KEYS.size(); j++) { - ASSERT_EQ(Get(KEYS[j]), RNDVALS[j]); - } - } - - // Check that there was at least one mempurge - const uint32_t ZERO = 0; - // Assert that at least one flush to storage has been performed - EXPECT_GT(sst_count.exchange(0), EXPECTED_SST_COUNT); - // The mempurge count is expected to be set to 0 when the options are updated. - // We expect no mempurge at all. - EXPECT_EQ(mempurge_count.exchange(0), ZERO); - - Close(); -} -// End of MemPurgeBasicToggle, which is not -// supported with RocksDB LITE because it -// relies on dynamically changing the option -// flag experimental_mempurge_threshold. - -// At the moment, MemPurge feature is deactivated -// when atomic_flush is enabled. This is because the level -// of garbage between Column Families is not guaranteed to -// be consistent, therefore a CF could hypothetically -// trigger a MemPurge while another CF would trigger -// a regular Flush. -TEST_F(DBFlushTest, MemPurgeWithAtomicFlush) { - Options options = CurrentOptions(); - - // The following options are used to enforce several values that - // may already exist as default values to make this test resilient - // to default value updates in the future. - options.statistics = CreateDBStatistics(); - - // Record all statistics. - options.statistics->set_stats_level(StatsLevel::kAll); - - // create the DB if it's not already present - options.create_if_missing = true; - - // Useful for now as we are trying to compare uncompressed data savings on - // flush(). - options.compression = kNoCompression; - - // Prevent memtable in place updates. Should already be disabled - // (from Wiki: - // In place updates can be enabled by toggling on the bool - // inplace_update_support flag. However, this flag is by default set to - // false - // because this thread-safe in-place update support is not compatible - // with concurrent memtable writes. Note that the bool - // allow_concurrent_memtable_write is set to true by default ) - options.inplace_update_support = false; - options.allow_concurrent_memtable_write = true; - - // Enforce size of a single MemTable to 64KB (64KB = 65,536 bytes). - options.write_buffer_size = 1 << 20; - // Activate the MemPurge prototype. - options.experimental_mempurge_threshold = 153.245; - // Activate atomic_flush. - options.atomic_flush = true; - - const std::vector new_cf_names = {"pikachu", "eevie"}; - CreateColumnFamilies(new_cf_names, options); - - Close(); - - // 3 CFs: default will be filled with overwrites (would normally trigger - // mempurge) - // new_cf_names[1] will be filled with random values (would trigger - // flush) new_cf_names[2] not filled with anything. - ReopenWithColumnFamilies( - {kDefaultColumnFamilyName, new_cf_names[0], new_cf_names[1]}, options); - size_t num_cfs = handles_.size(); - ASSERT_EQ(3, num_cfs); - ASSERT_OK(Put(1, "foo", "bar")); - ASSERT_OK(Put(2, "bar", "baz")); - - std::atomic mempurge_count{0}; - std::atomic sst_count{0}; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushJob:MemPurgeSuccessful", - [&](void* /*arg*/) { mempurge_count++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushJob:SSTFileCreated", [&](void* /*arg*/) { sst_count++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - const size_t KVSIZE = 3; - std::vector KEYS(KVSIZE); - for (size_t k = 0; k < KVSIZE; k++) { - KEYS[k] = "IamKey" + std::to_string(k); - } - - std::string RNDKEY; - std::vector RNDVALS(KVSIZE); - const std::string NOT_FOUND = "NOT_FOUND"; - - // Heavy overwrite workload, - // more than would fit in maximum allowed memtables. - Random rnd(106); - const size_t NUM_REPEAT = 100; - const size_t RAND_KEY_LENGTH = 128; - const size_t RAND_VALUES_LENGTH = 10240; - - // Insertion of of K-V pairs, multiple times (overwrites). - for (size_t i = 0; i < NUM_REPEAT; i++) { - for (size_t j = 0; j < KEYS.size(); j++) { - RNDKEY = rnd.RandomString(RAND_KEY_LENGTH); - RNDVALS[j] = rnd.RandomString(RAND_VALUES_LENGTH); - ASSERT_OK(Put(KEYS[j], RNDVALS[j])); - ASSERT_OK(Put(1, RNDKEY, RNDVALS[j])); - ASSERT_EQ(Get(KEYS[j]), RNDVALS[j]); - ASSERT_EQ(Get(1, RNDKEY), RNDVALS[j]); - } - } - - // Check that there was no mempurge because atomic_flush option is true. - const uint32_t EXPECTED_MIN_MEMPURGE_COUNT = 0; - // Check that there was at least one SST files created during flush. - const uint32_t EXPECTED_SST_COUNT = 1; - - EXPECT_EQ(mempurge_count.exchange(0), EXPECTED_MIN_MEMPURGE_COUNT); - EXPECT_GE(sst_count.exchange(0), EXPECTED_SST_COUNT); - - Close(); -} - -TEST_F(DBFlushTest, MemPurgeDeleteAndDeleteRange) { - Options options = CurrentOptions(); - - options.statistics = CreateDBStatistics(); - options.statistics->set_stats_level(StatsLevel::kAll); - options.create_if_missing = true; - options.compression = kNoCompression; - options.inplace_update_support = false; - options.allow_concurrent_memtable_write = true; - TestFlushListener* listener = new TestFlushListener(options.env, this); - options.listeners.emplace_back(listener); - // Enforce size of a single MemTable to 64MB (64MB = 67108864 bytes). - options.write_buffer_size = 1 << 20; - // Activate the MemPurge prototype. - options.experimental_mempurge_threshold = 15.0; - - ASSERT_OK(TryReopen(options)); - - std::atomic mempurge_count{0}; - std::atomic sst_count{0}; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushJob:MemPurgeSuccessful", - [&](void* /*arg*/) { mempurge_count++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushJob:SSTFileCreated", [&](void* /*arg*/) { sst_count++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - std::string KEY1 = "ThisIsKey1"; - std::string KEY2 = "ThisIsKey2"; - std::string KEY3 = "ThisIsKey3"; - std::string KEY4 = "ThisIsKey4"; - std::string KEY5 = "ThisIsKey5"; - const std::string NOT_FOUND = "NOT_FOUND"; - - Random rnd(117); - const size_t NUM_REPEAT = 100; - const size_t RAND_VALUES_LENGTH = 10240; - - std::string key, value, p_v1, p_v2, p_v3, p_v3b, p_v4, p_v5; - int count = 0; - const int EXPECTED_COUNT_FORLOOP = 3; - const int EXPECTED_COUNT_END = 4; - - ReadOptions ropt; - ropt.pin_data = true; - ropt.total_order_seek = true; - Iterator* iter = nullptr; - - // Insertion of of K-V pairs, multiple times. - // Also insert DeleteRange - for (size_t i = 0; i < NUM_REPEAT; i++) { - // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. - p_v1 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v2 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v3 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v3b = rnd.RandomString(RAND_VALUES_LENGTH); - p_v4 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v5 = rnd.RandomString(RAND_VALUES_LENGTH); - ASSERT_OK(Put(KEY1, p_v1)); - ASSERT_OK(Put(KEY2, p_v2)); - ASSERT_OK(Put(KEY3, p_v3)); - ASSERT_OK(Put(KEY4, p_v4)); - ASSERT_OK(Put(KEY5, p_v5)); - ASSERT_OK(Delete(KEY2)); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), KEY2, - KEY4)); - ASSERT_OK(Put(KEY3, p_v3b)); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), KEY1, - KEY3)); - ASSERT_OK(Delete(KEY1)); - - ASSERT_EQ(Get(KEY1), NOT_FOUND); - ASSERT_EQ(Get(KEY2), NOT_FOUND); - ASSERT_EQ(Get(KEY3), p_v3b); - ASSERT_EQ(Get(KEY4), p_v4); - ASSERT_EQ(Get(KEY5), p_v5); - - iter = db_->NewIterator(ropt); - iter->SeekToFirst(); - count = 0; - for (; iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - key = (iter->key()).ToString(false); - value = (iter->value()).ToString(false); - if (key.compare(KEY3) == 0) - ASSERT_EQ(value, p_v3b); - else if (key.compare(KEY4) == 0) - ASSERT_EQ(value, p_v4); - else if (key.compare(KEY5) == 0) - ASSERT_EQ(value, p_v5); - else - ASSERT_EQ(value, NOT_FOUND); - count++; - } - - // Expected count here is 3: KEY3, KEY4, KEY5. - ASSERT_EQ(count, EXPECTED_COUNT_FORLOOP); - if (iter) { - delete iter; - } - } - - // Check that there was at least one mempurge - const uint32_t EXPECTED_MIN_MEMPURGE_COUNT = 1; - // Check that there was no SST files created during flush. - const uint32_t EXPECTED_SST_COUNT = 0; - - EXPECT_GE(mempurge_count.exchange(0), EXPECTED_MIN_MEMPURGE_COUNT); - EXPECT_EQ(sst_count.exchange(0), EXPECTED_SST_COUNT); - - // Additional test for the iterator+memPurge. - ASSERT_OK(Put(KEY2, p_v2)); - iter = db_->NewIterator(ropt); - iter->SeekToFirst(); - ASSERT_OK(Put(KEY4, p_v4)); - count = 0; - for (; iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - key = (iter->key()).ToString(false); - value = (iter->value()).ToString(false); - if (key.compare(KEY2) == 0) - ASSERT_EQ(value, p_v2); - else if (key.compare(KEY3) == 0) - ASSERT_EQ(value, p_v3b); - else if (key.compare(KEY4) == 0) - ASSERT_EQ(value, p_v4); - else if (key.compare(KEY5) == 0) - ASSERT_EQ(value, p_v5); - else - ASSERT_EQ(value, NOT_FOUND); - count++; - } - - // Expected count here is 4: KEY2, KEY3, KEY4, KEY5. - ASSERT_EQ(count, EXPECTED_COUNT_END); - if (iter) delete iter; - - Close(); -} - -// Create a Compaction Fitler that will be invoked -// at flush time and will update the value of a KV pair -// if the key string is "lower" than the filter_key_ string. -class ConditionalUpdateFilter : public CompactionFilter { - public: - explicit ConditionalUpdateFilter(const std::string* filtered_key) - : filtered_key_(filtered_key) {} - bool Filter(int /*level*/, const Slice& key, const Slice& /*value*/, - std::string* new_value, bool* value_changed) const override { - // If key CreateCompactionFilter( - const CompactionFilter::Context& /*context*/) override { - return std::unique_ptr( - new ConditionalUpdateFilter(&filtered_key_)); - } - - const char* Name() const override { return "ConditionalUpdateFilterFactory"; } - - bool ShouldFilterTableFileCreation( - TableFileCreationReason reason) const override { - // This compaction filter will be invoked - // at flush time (and therefore at MemPurge time). - return (reason == TableFileCreationReason::kFlush); - } - - private: - std::string filtered_key_; -}; - -TEST_F(DBFlushTest, MemPurgeAndCompactionFilter) { - Options options = CurrentOptions(); - - std::string KEY1 = "ThisIsKey1"; - std::string KEY2 = "ThisIsKey2"; - std::string KEY3 = "ThisIsKey3"; - std::string KEY4 = "ThisIsKey4"; - std::string KEY5 = "ThisIsKey5"; - std::string KEY6 = "ThisIsKey6"; - std::string KEY7 = "ThisIsKey7"; - std::string KEY8 = "ThisIsKey8"; - std::string KEY9 = "ThisIsKey9"; - const std::string NOT_FOUND = "NOT_FOUND"; - - options.statistics = CreateDBStatistics(); - options.statistics->set_stats_level(StatsLevel::kAll); - options.create_if_missing = true; - options.compression = kNoCompression; - options.inplace_update_support = false; - options.allow_concurrent_memtable_write = true; - TestFlushListener* listener = new TestFlushListener(options.env, this); - options.listeners.emplace_back(listener); - // Create a ConditionalUpdate compaction filter - // that will update all the values of the KV pairs - // where the keys are "lower" than KEY4. - options.compaction_filter_factory = - std::make_shared(KEY4); - - // Enforce size of a single MemTable to 64MB (64MB = 67108864 bytes). - options.write_buffer_size = 1 << 20; - // Activate the MemPurge prototype. - options.experimental_mempurge_threshold = 26.55; - - ASSERT_OK(TryReopen(options)); - - std::atomic mempurge_count{0}; - std::atomic sst_count{0}; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushJob:MemPurgeSuccessful", - [&](void* /*arg*/) { mempurge_count++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushJob:SSTFileCreated", [&](void* /*arg*/) { sst_count++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(53); - const size_t NUM_REPEAT = 1000; - const size_t RAND_VALUES_LENGTH = 10240; - std::string p_v1, p_v2, p_v3, p_v4, p_v5, p_v6, p_v7, p_v8, p_v9; - - p_v1 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v2 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v3 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v4 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v5 = rnd.RandomString(RAND_VALUES_LENGTH); - ASSERT_OK(Put(KEY1, p_v1)); - ASSERT_OK(Put(KEY2, p_v2)); - ASSERT_OK(Put(KEY3, p_v3)); - ASSERT_OK(Put(KEY4, p_v4)); - ASSERT_OK(Put(KEY5, p_v5)); - ASSERT_OK(Delete(KEY1)); - - // Insertion of of K-V pairs, multiple times. - for (size_t i = 0; i < NUM_REPEAT; i++) { - // Create value strings of arbitrary - // length RAND_VALUES_LENGTH bytes. - p_v6 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v7 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v8 = rnd.RandomString(RAND_VALUES_LENGTH); - p_v9 = rnd.RandomString(RAND_VALUES_LENGTH); - ASSERT_OK(Put(KEY6, p_v6)); - ASSERT_OK(Put(KEY7, p_v7)); - ASSERT_OK(Put(KEY8, p_v8)); - ASSERT_OK(Put(KEY9, p_v9)); - - ASSERT_OK(Delete(KEY7)); - } - - // Check that there was at least one mempurge - const uint32_t EXPECTED_MIN_MEMPURGE_COUNT = 1; - // Check that there was no SST files created during flush. - const uint32_t EXPECTED_SST_COUNT = 0; - - EXPECT_GE(mempurge_count.exchange(0), EXPECTED_MIN_MEMPURGE_COUNT); - EXPECT_EQ(sst_count.exchange(0), EXPECTED_SST_COUNT); - - // Verify that the ConditionalUpdateCompactionFilter - // updated the values of KEY2 and KEY3, and not KEY4 and KEY5. - ASSERT_EQ(Get(KEY1), NOT_FOUND); - ASSERT_EQ(Get(KEY2), NEW_VALUE); - ASSERT_EQ(Get(KEY3), NEW_VALUE); - ASSERT_EQ(Get(KEY4), p_v4); - ASSERT_EQ(Get(KEY5), p_v5); -} - -TEST_F(DBFlushTest, DISABLED_MemPurgeWALSupport) { - Options options = CurrentOptions(); - - options.statistics = CreateDBStatistics(); - options.statistics->set_stats_level(StatsLevel::kAll); - options.create_if_missing = true; - options.compression = kNoCompression; - options.inplace_update_support = false; - options.allow_concurrent_memtable_write = true; - - // Enforce size of a single MemTable to 128KB. - options.write_buffer_size = 128 << 10; - // Activate the MemPurge prototype - // (values >1.0 are equivalent to 1.0). - options.experimental_mempurge_threshold = 2.5; - - ASSERT_OK(TryReopen(options)); - - const size_t KVSIZE = 10; - - do { - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_OK(Put(1, "baz", "v5")); - - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ("v1", Get(1, "foo")); - - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_EQ("v5", Get(1, "baz")); - ASSERT_OK(Put(0, "bar", "v2")); - ASSERT_OK(Put(1, "bar", "v2")); - ASSERT_OK(Put(1, "foo", "v3")); - std::atomic mempurge_count{0}; - std::atomic sst_count{0}; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushJob:MemPurgeSuccessful", - [&](void* /*arg*/) { mempurge_count++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushJob:SSTFileCreated", [&](void* /*arg*/) { sst_count++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - std::vector keys; - for (size_t k = 0; k < KVSIZE; k++) { - keys.push_back("IamKey" + std::to_string(k)); - } - - std::string RNDKEY, RNDVALUE; - const std::string NOT_FOUND = "NOT_FOUND"; - - // Heavy overwrite workload, - // more than would fit in maximum allowed memtables. - Random rnd(719); - const size_t NUM_REPEAT = 100; - const size_t RAND_KEY_LENGTH = 4096; - const size_t RAND_VALUES_LENGTH = 1024; - std::vector values_default(KVSIZE), values_pikachu(KVSIZE); - - // Insert a very first set of keys that will be - // mempurged at least once. - for (size_t k = 0; k < KVSIZE / 2; k++) { - values_default[k] = rnd.RandomString(RAND_VALUES_LENGTH); - values_pikachu[k] = rnd.RandomString(RAND_VALUES_LENGTH); - } - - // Insert keys[0:KVSIZE/2] to - // both 'default' and 'pikachu' CFs. - for (size_t k = 0; k < KVSIZE / 2; k++) { - ASSERT_OK(Put(0, keys[k], values_default[k])); - ASSERT_OK(Put(1, keys[k], values_pikachu[k])); - } - - // Check that the insertion was seamless. - for (size_t k = 0; k < KVSIZE / 2; k++) { - ASSERT_EQ(Get(0, keys[k]), values_default[k]); - ASSERT_EQ(Get(1, keys[k]), values_pikachu[k]); - } - - // Insertion of of K-V pairs, multiple times (overwrites) - // into 'default' CF. Will trigger mempurge. - for (size_t j = 0; j < NUM_REPEAT; j++) { - // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. - for (size_t k = KVSIZE / 2; k < KVSIZE; k++) { - values_default[k] = rnd.RandomString(RAND_VALUES_LENGTH); - } - - // Insert K-V into default CF. - for (size_t k = KVSIZE / 2; k < KVSIZE; k++) { - ASSERT_OK(Put(0, keys[k], values_default[k])); - } - - // Check key validity, for all keys, both in - // default and pikachu CFs. - for (size_t k = 0; k < KVSIZE; k++) { - ASSERT_EQ(Get(0, keys[k]), values_default[k]); - } - // Note that at this point, only keys[0:KVSIZE/2] - // have been inserted into Pikachu. - for (size_t k = 0; k < KVSIZE / 2; k++) { - ASSERT_EQ(Get(1, keys[k]), values_pikachu[k]); - } - } - - // Insertion of of K-V pairs, multiple times (overwrites) - // into 'pikachu' CF. Will trigger mempurge. - // Check that we keep the older logs for 'default' imm(). - for (size_t j = 0; j < NUM_REPEAT; j++) { - // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. - for (size_t k = KVSIZE / 2; k < KVSIZE; k++) { - values_pikachu[k] = rnd.RandomString(RAND_VALUES_LENGTH); - } - - // Insert K-V into pikachu CF. - for (size_t k = KVSIZE / 2; k < KVSIZE; k++) { - ASSERT_OK(Put(1, keys[k], values_pikachu[k])); - } - - // Check key validity, for all keys, - // both in default and pikachu. - for (size_t k = 0; k < KVSIZE; k++) { - ASSERT_EQ(Get(0, keys[k]), values_default[k]); - ASSERT_EQ(Get(1, keys[k]), values_pikachu[k]); - } - } - - // Check that there was at least one mempurge - const uint32_t EXPECTED_MIN_MEMPURGE_COUNT = 1; - // Check that there was no SST files created during flush. - const uint32_t EXPECTED_SST_COUNT = 0; - - EXPECT_GE(mempurge_count.exchange(0), EXPECTED_MIN_MEMPURGE_COUNT); - if (options.experimental_mempurge_threshold == - std::numeric_limits::max()) { - EXPECT_EQ(sst_count.exchange(0), EXPECTED_SST_COUNT); - } - - ReopenWithColumnFamilies({"default", "pikachu"}, options); - // Check that there was no data corruption anywhere, - // not in 'default' nor in 'Pikachu' CFs. - ASSERT_EQ("v3", Get(1, "foo")); - ASSERT_OK(Put(1, "foo", "v4")); - ASSERT_EQ("v4", Get(1, "foo")); - ASSERT_EQ("v2", Get(1, "bar")); - ASSERT_EQ("v5", Get(1, "baz")); - // Check keys in 'Default' and 'Pikachu'. - // keys[0:KVSIZE/2] were for sure contained - // in the imm() at Reopen/recovery time. - for (size_t k = 0; k < KVSIZE; k++) { - ASSERT_EQ(Get(0, keys[k]), values_default[k]); - ASSERT_EQ(Get(1, keys[k]), values_pikachu[k]); - } - // Insertion of random K-V pairs to trigger - // a flush in the Pikachu CF. - for (size_t j = 0; j < NUM_REPEAT; j++) { - RNDKEY = rnd.RandomString(RAND_KEY_LENGTH); - RNDVALUE = rnd.RandomString(RAND_VALUES_LENGTH); - ASSERT_OK(Put(1, RNDKEY, RNDVALUE)); - } - // ASsert than there was at least one flush to storage. - EXPECT_GT(sst_count.exchange(0), EXPECTED_SST_COUNT); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ("v4", Get(1, "foo")); - ASSERT_EQ("v2", Get(1, "bar")); - ASSERT_EQ("v5", Get(1, "baz")); - // Since values in default are held in mutable mem() - // and imm(), check if the flush in pikachu didn't - // affect these values. - for (size_t k = 0; k < KVSIZE; k++) { - ASSERT_EQ(Get(0, keys[k]), values_default[k]); - ASSERT_EQ(Get(1, keys[k]), values_pikachu[k]); - } - ASSERT_EQ(Get(1, RNDKEY), RNDVALUE); - } while (ChangeWalOptions()); -} - -TEST_F(DBFlushTest, MemPurgeCorrectLogNumberAndSSTFileCreation) { - // Before our bug fix, we noticed that when 2 memtables were - // being flushed (with one memtable being the output of a - // previous MemPurge and one memtable being a newly-sealed memtable), - // the SST file created was not properly added to the DB version - // (via the VersionEdit obj), leading to data loss (the SST file - // was later being purged as an obsolete file). - // Therefore, we reproduce this scenario to test our fix. - Options options = CurrentOptions(); - - options.create_if_missing = true; - options.compression = kNoCompression; - options.inplace_update_support = false; - options.allow_concurrent_memtable_write = true; - - // Enforce size of a single MemTable to 1MB (64MB = 1048576 bytes). - options.write_buffer_size = 1 << 20; - // Activate the MemPurge prototype. - options.experimental_mempurge_threshold = 1.0; - - // Force to have more than one memtable to trigger a flush. - // For some reason this option does not seem to be enforced, - // so the following test is designed to make sure that we - // are testing the correct test case. - options.min_write_buffer_number_to_merge = 3; - options.max_write_buffer_number = 5; - options.max_write_buffer_size_to_maintain = 2 * (options.write_buffer_size); - options.disable_auto_compactions = true; - ASSERT_OK(TryReopen(options)); - - std::atomic mempurge_count{0}; - std::atomic sst_count{0}; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushJob:MemPurgeSuccessful", - [&](void* /*arg*/) { mempurge_count++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushJob:SSTFileCreated", [&](void* /*arg*/) { sst_count++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Dummy variable used for the following callback function. - uint64_t ZERO = 0; - // We will first execute mempurge operations exclusively. - // Therefore, when the first flush is triggered, we want to make - // sure there is at least 2 memtables being flushed: one output - // from a previous mempurge, and one newly sealed memtable. - // This is when we observed in the past that some SST files created - // were not properly added to the DB version (via the VersionEdit obj). - std::atomic num_memtable_at_first_flush(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "FlushJob::WriteLevel0Table:num_memtables", [&](void* arg) { - uint64_t* mems_size = reinterpret_cast(arg); - // atomic_compare_exchange_strong sometimes updates the value - // of ZERO (the "expected" object), so we make sure ZERO is indeed... - // zero. - ZERO = 0; - std::atomic_compare_exchange_strong(&num_memtable_at_first_flush, &ZERO, - *mems_size); - }); - - const std::vector KEYS = { - "ThisIsKey1", "ThisIsKey2", "ThisIsKey3", "ThisIsKey4", "ThisIsKey5", - "ThisIsKey6", "ThisIsKey7", "ThisIsKey8", "ThisIsKey9"}; - const std::string NOT_FOUND = "NOT_FOUND"; - - Random rnd(117); - const uint64_t NUM_REPEAT_OVERWRITES = 100; - const uint64_t NUM_RAND_INSERTS = 500; - const uint64_t RAND_VALUES_LENGTH = 10240; - - std::string key, value; - std::vector values(9, ""); - - // Keys used to check that no SST file disappeared. - for (uint64_t k = 0; k < 5; k++) { - values[k] = rnd.RandomString(RAND_VALUES_LENGTH); - ASSERT_OK(Put(KEYS[k], values[k])); - } - - // Insertion of of K-V pairs, multiple times. - // Trigger at least one mempurge and no SST file creation. - for (size_t i = 0; i < NUM_REPEAT_OVERWRITES; i++) { - // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. - for (uint64_t k = 5; k < values.size(); k++) { - values[k] = rnd.RandomString(RAND_VALUES_LENGTH); - ASSERT_OK(Put(KEYS[k], values[k])); - } - // Check database consistency. - for (uint64_t k = 0; k < values.size(); k++) { - ASSERT_EQ(Get(KEYS[k]), values[k]); - } - } - - // Check that there was at least one mempurge - uint32_t expected_min_mempurge_count = 1; - // Check that there was no SST files created during flush. - uint32_t expected_sst_count = 0; - EXPECT_GE(mempurge_count.load(), expected_min_mempurge_count); - EXPECT_EQ(sst_count.load(), expected_sst_count); - - // Trigger an SST file creation and no mempurge. - for (size_t i = 0; i < NUM_RAND_INSERTS; i++) { - key = rnd.RandomString(RAND_VALUES_LENGTH); - // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. - value = rnd.RandomString(RAND_VALUES_LENGTH); - ASSERT_OK(Put(key, value)); - // Check database consistency. - for (uint64_t k = 0; k < values.size(); k++) { - ASSERT_EQ(Get(KEYS[k]), values[k]); - } - ASSERT_EQ(Get(key), value); - } - - // Check that there was at least one SST files created during flush. - expected_sst_count = 1; - EXPECT_GE(sst_count.load(), expected_sst_count); - - // Oddly enough, num_memtable_at_first_flush is not enforced to be - // equal to min_write_buffer_number_to_merge. So by asserting that - // the first SST file creation comes from one output memtable - // from a previous mempurge, and one newly sealed memtable. This - // is the scenario where we observed that some SST files created - // were not properly added to the DB version before our bug fix. - ASSERT_GE(num_memtable_at_first_flush.load(), 2); - - // Check that no data was lost after SST file creation. - for (uint64_t k = 0; k < values.size(); k++) { - ASSERT_EQ(Get(KEYS[k]), values[k]); - } - // Extra check of database consistency. - ASSERT_EQ(Get(key), value); - - Close(); -} - -TEST_P(DBFlushDirectIOTest, DirectIO) { - Options options; - options.create_if_missing = true; - options.disable_auto_compactions = true; - options.max_background_flushes = 2; - options.use_direct_io_for_flush_and_compaction = GetParam(); - options.env = MockEnv::Create(Env::Default()); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:create_file", [&](void* arg) { - bool* use_direct_writes = static_cast(arg); - ASSERT_EQ(*use_direct_writes, - options.use_direct_io_for_flush_and_compaction); - }); - - SyncPoint::GetInstance()->EnableProcessing(); - Reopen(options); - ASSERT_OK(Put("foo", "v")); - FlushOptions flush_options; - flush_options.wait = true; - ASSERT_OK(dbfull()->Flush(flush_options)); - Destroy(options); - delete options.env; -} - -TEST_F(DBFlushTest, FlushError) { - Options options; - std::unique_ptr fault_injection_env( - new FaultInjectionTestEnv(env_)); - options.write_buffer_size = 100; - options.max_write_buffer_number = 4; - options.min_write_buffer_number_to_merge = 3; - options.disable_auto_compactions = true; - options.env = fault_injection_env.get(); - Reopen(options); - - ASSERT_OK(Put("key1", "value1")); - ASSERT_OK(Put("key2", "value2")); - fault_injection_env->SetFilesystemActive(false); - Status s = dbfull()->TEST_SwitchMemtable(); - fault_injection_env->SetFilesystemActive(true); - Destroy(options); - ASSERT_NE(s, Status::OK()); -} - -TEST_F(DBFlushTest, ManualFlushFailsInReadOnlyMode) { - // Regression test for bug where manual flush hangs forever when the DB - // is in read-only mode. Verify it now at least returns, despite failing. - Options options; - std::unique_ptr fault_injection_env( - new FaultInjectionTestEnv(env_)); - options.env = fault_injection_env.get(); - options.max_write_buffer_number = 2; - Reopen(options); - - // Trigger a first flush but don't let it run - ASSERT_OK(db_->PauseBackgroundWork()); - ASSERT_OK(Put("key1", "value1")); - FlushOptions flush_opts; - flush_opts.wait = false; - ASSERT_OK(db_->Flush(flush_opts)); - - // Write a key to the second memtable so we have something to flush later - // after the DB is in read-only mode. - ASSERT_OK(Put("key2", "value2")); - - // Let the first flush continue, hit an error, and put the DB in read-only - // mode. - fault_injection_env->SetFilesystemActive(false); - ASSERT_OK(db_->ContinueBackgroundWork()); - // We ingested the error to env, so the returned status is not OK. - ASSERT_NOK(dbfull()->TEST_WaitForFlushMemTable()); - uint64_t num_bg_errors; - ASSERT_TRUE( - db_->GetIntProperty(DB::Properties::kBackgroundErrors, &num_bg_errors)); - ASSERT_GT(num_bg_errors, 0); - - // In the bug scenario, triggering another flush would cause the second flush - // to hang forever. After the fix we expect it to return an error. - ASSERT_NOK(db_->Flush(FlushOptions())); - - Close(); -} - -TEST_F(DBFlushTest, CFDropRaceWithWaitForFlushMemTables) { - Options options = CurrentOptions(); - options.create_if_missing = true; - CreateAndReopenWithCF({"pikachu"}, options); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:AfterScheduleFlush", - "DBFlushTest::CFDropRaceWithWaitForFlushMemTables:BeforeDrop"}, - {"DBFlushTest::CFDropRaceWithWaitForFlushMemTables:AfterFree", - "DBImpl::BackgroundCallFlush:start"}, - {"DBImpl::BackgroundCallFlush:start", - "DBImpl::FlushMemTable:BeforeWaitForBgFlush"}}); - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_EQ(2, handles_.size()); - ASSERT_OK(Put(1, "key", "value")); - auto* cfd = static_cast(handles_[1])->cfd(); - port::Thread drop_cf_thr([&]() { - TEST_SYNC_POINT( - "DBFlushTest::CFDropRaceWithWaitForFlushMemTables:BeforeDrop"); - ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); - ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[1])); - handles_.resize(1); - TEST_SYNC_POINT( - "DBFlushTest::CFDropRaceWithWaitForFlushMemTables:AfterFree"); - }); - FlushOptions flush_opts; - flush_opts.allow_write_stall = true; - ASSERT_NOK(dbfull()->TEST_FlushMemTable(cfd, flush_opts)); - drop_cf_thr.join(); - Close(); - SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBFlushTest, FireOnFlushCompletedAfterCommittedResult) { - class TestListener : public EventListener { - public: - void OnFlushCompleted(DB* db, const FlushJobInfo& info) override { - // There's only one key in each flush. - ASSERT_EQ(info.smallest_seqno, info.largest_seqno); - ASSERT_NE(0, info.smallest_seqno); - if (info.smallest_seqno == seq1) { - // First flush completed - ASSERT_FALSE(completed1); - completed1 = true; - CheckFlushResultCommitted(db, seq1); - } else { - // Second flush completed - ASSERT_FALSE(completed2); - completed2 = true; - ASSERT_EQ(info.smallest_seqno, seq2); - CheckFlushResultCommitted(db, seq2); - } - } - - void CheckFlushResultCommitted(DB* db, SequenceNumber seq) { - DBImpl* db_impl = static_cast_with_check(db); - InstrumentedMutex* mutex = db_impl->mutex(); - mutex->Lock(); - auto* cfd = static_cast_with_check( - db->DefaultColumnFamily()) - ->cfd(); - ASSERT_LT(seq, cfd->imm()->current()->GetEarliestSequenceNumber()); - mutex->Unlock(); - } - - std::atomic seq1{0}; - std::atomic seq2{0}; - std::atomic completed1{false}; - std::atomic completed2{false}; - }; - std::shared_ptr listener = std::make_shared(); - - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTableToOutputFile:AfterPickMemtables", - "DBFlushTest::FireOnFlushCompletedAfterCommittedResult:WaitFirst"}, - {"DBImpl::FlushMemTableToOutputFile:Finish", - "DBFlushTest::FireOnFlushCompletedAfterCommittedResult:WaitSecond"}}); - SyncPoint::GetInstance()->SetCallBack( - "FlushJob::WriteLevel0Table", [&listener](void* arg) { - // Wait for the second flush finished, out of mutex. - auto* mems = reinterpret_cast*>(arg); - if (mems->front()->GetEarliestSequenceNumber() == listener->seq1 - 1) { - TEST_SYNC_POINT( - "DBFlushTest::FireOnFlushCompletedAfterCommittedResult:" - "WaitSecond"); - } - }); - - Options options = CurrentOptions(); - options.create_if_missing = true; - options.listeners.push_back(listener); - // Setting max_flush_jobs = max_background_jobs / 4 = 2. - options.max_background_jobs = 8; - // Allow 2 immutable memtables. - options.max_write_buffer_number = 3; - Reopen(options); - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Put("foo", "v")); - listener->seq1 = db_->GetLatestSequenceNumber(); - // t1 will wait for the second flush complete before committing flush result. - auto t1 = port::Thread([&]() { - // flush_opts.wait = true - ASSERT_OK(db_->Flush(FlushOptions())); - }); - // Wait for first flush started. - TEST_SYNC_POINT( - "DBFlushTest::FireOnFlushCompletedAfterCommittedResult:WaitFirst"); - // The second flush will exit early without commit its result. The work - // is delegated to the first flush. - ASSERT_OK(Put("bar", "v")); - listener->seq2 = db_->GetLatestSequenceNumber(); - FlushOptions flush_opts; - flush_opts.wait = false; - ASSERT_OK(db_->Flush(flush_opts)); - t1.join(); - // Ensure background work is fully finished including listener callbacks - // before accessing listener state. - ASSERT_OK(dbfull()->TEST_WaitForBackgroundWork()); - ASSERT_TRUE(listener->completed1); - ASSERT_TRUE(listener->completed2); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_F(DBFlushTest, FlushWithBlob) { - constexpr uint64_t min_blob_size = 10; - - Options options; - options.enable_blob_files = true; - options.min_blob_size = min_blob_size; - options.disable_auto_compactions = true; - options.env = env_; - - Reopen(options); - - constexpr char short_value[] = "short"; - static_assert(sizeof(short_value) - 1 < min_blob_size, - "short_value too long"); - - constexpr char long_value[] = "long_value"; - static_assert(sizeof(long_value) - 1 >= min_blob_size, - "long_value too short"); - - ASSERT_OK(Put("key1", short_value)); - ASSERT_OK(Put("key2", long_value)); - - ASSERT_OK(Flush()); - - ASSERT_EQ(Get("key1"), short_value); - ASSERT_EQ(Get("key2"), long_value); - - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - assert(cfd); - - Version* const current = cfd->current(); - assert(current); - - const VersionStorageInfo* const storage_info = current->storage_info(); - assert(storage_info); - - const auto& l0_files = storage_info->LevelFiles(0); - ASSERT_EQ(l0_files.size(), 1); - - const FileMetaData* const table_file = l0_files[0]; - assert(table_file); - - const auto& blob_files = storage_info->GetBlobFiles(); - ASSERT_EQ(blob_files.size(), 1); - - const auto& blob_file = blob_files.front(); - assert(blob_file); - - ASSERT_EQ(table_file->smallest.user_key(), "key1"); - ASSERT_EQ(table_file->largest.user_key(), "key2"); - ASSERT_EQ(table_file->fd.smallest_seqno, 1); - ASSERT_EQ(table_file->fd.largest_seqno, 2); - ASSERT_EQ(table_file->oldest_blob_file_number, - blob_file->GetBlobFileNumber()); - - ASSERT_EQ(blob_file->GetTotalBlobCount(), 1); - - const InternalStats* const internal_stats = cfd->internal_stats(); - assert(internal_stats); - - const auto& compaction_stats = internal_stats->TEST_GetCompactionStats(); - ASSERT_FALSE(compaction_stats.empty()); - ASSERT_EQ(compaction_stats[0].bytes_written, table_file->fd.GetFileSize()); - ASSERT_EQ(compaction_stats[0].bytes_written_blob, - blob_file->GetTotalBlobBytes()); - ASSERT_EQ(compaction_stats[0].num_output_files, 1); - ASSERT_EQ(compaction_stats[0].num_output_files_blob, 1); - - const uint64_t* const cf_stats_value = internal_stats->TEST_GetCFStatsValue(); - ASSERT_EQ(cf_stats_value[InternalStats::BYTES_FLUSHED], - compaction_stats[0].bytes_written + - compaction_stats[0].bytes_written_blob); -} - -TEST_F(DBFlushTest, FlushWithChecksumHandoff1) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - std::shared_ptr fault_fs( - new FaultInjectionTestFS(FileSystem::Default())); - std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); - Options options = CurrentOptions(); - options.write_buffer_size = 100; - options.max_write_buffer_number = 4; - options.min_write_buffer_number_to_merge = 3; - options.disable_auto_compactions = true; - options.env = fault_fs_env.get(); - options.checksum_handoff_file_types.Add(FileType::kTableFile); - Reopen(options); - - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - ASSERT_OK(Put("key1", "value1")); - ASSERT_OK(Put("key2", "value2")); - ASSERT_OK(dbfull()->TEST_SwitchMemtable()); - - // The hash does not match, write fails - // fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); - // Since the file system returns IOStatus::Corruption, it is an - // unrecoverable error. - SyncPoint::GetInstance()->SetCallBack("FlushJob::Start", [&](void*) { - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); - }); - ASSERT_OK(Put("key3", "value3")); - ASSERT_OK(Put("key4", "value4")); - SyncPoint::GetInstance()->EnableProcessing(); - Status s = Flush(); - ASSERT_EQ(s.severity(), - ROCKSDB_NAMESPACE::Status::Severity::kUnrecoverableError); - SyncPoint::GetInstance()->DisableProcessing(); - Destroy(options); - Reopen(options); - - // The file system does not support checksum handoff. The check - // will be ignored. - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum); - ASSERT_OK(Put("key5", "value5")); - ASSERT_OK(Put("key6", "value6")); - ASSERT_OK(dbfull()->TEST_SwitchMemtable()); - - // Each write will be similated as corrupted. - // Since the file system returns IOStatus::Corruption, it is an - // unrecoverable error. - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - SyncPoint::GetInstance()->SetCallBack("FlushJob::Start", [&](void*) { - fault_fs->IngestDataCorruptionBeforeWrite(); - }); - ASSERT_OK(Put("key7", "value7")); - ASSERT_OK(Put("key8", "value8")); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), - ROCKSDB_NAMESPACE::Status::Severity::kUnrecoverableError); - SyncPoint::GetInstance()->DisableProcessing(); - - Destroy(options); -} - -TEST_F(DBFlushTest, FlushWithChecksumHandoff2) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - std::shared_ptr fault_fs( - new FaultInjectionTestFS(FileSystem::Default())); - std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); - Options options = CurrentOptions(); - options.write_buffer_size = 100; - options.max_write_buffer_number = 4; - options.min_write_buffer_number_to_merge = 3; - options.disable_auto_compactions = true; - options.env = fault_fs_env.get(); - Reopen(options); - - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - ASSERT_OK(Put("key1", "value1")); - ASSERT_OK(Put("key2", "value2")); - ASSERT_OK(Flush()); - - // options is not set, the checksum handoff will not be triggered - SyncPoint::GetInstance()->SetCallBack("FlushJob::Start", [&](void*) { - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); - }); - ASSERT_OK(Put("key3", "value3")); - ASSERT_OK(Put("key4", "value4")); - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Flush()); - SyncPoint::GetInstance()->DisableProcessing(); - Destroy(options); - Reopen(options); - - // The file system does not support checksum handoff. The check - // will be ignored. - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum); - ASSERT_OK(Put("key5", "value5")); - ASSERT_OK(Put("key6", "value6")); - ASSERT_OK(Flush()); - - // options is not set, the checksum handoff will not be triggered - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - SyncPoint::GetInstance()->SetCallBack("FlushJob::Start", [&](void*) { - fault_fs->IngestDataCorruptionBeforeWrite(); - }); - ASSERT_OK(Put("key7", "value7")); - ASSERT_OK(Put("key8", "value8")); - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Flush()); - SyncPoint::GetInstance()->DisableProcessing(); - - Destroy(options); -} - -TEST_F(DBFlushTest, FlushWithChecksumHandoffManifest1) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - std::shared_ptr fault_fs( - new FaultInjectionTestFS(FileSystem::Default())); - std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); - Options options = CurrentOptions(); - options.write_buffer_size = 100; - options.max_write_buffer_number = 4; - options.min_write_buffer_number_to_merge = 3; - options.disable_auto_compactions = true; - options.env = fault_fs_env.get(); - options.checksum_handoff_file_types.Add(FileType::kDescriptorFile); - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - Reopen(options); - - ASSERT_OK(Put("key1", "value1")); - ASSERT_OK(Put("key2", "value2")); - ASSERT_OK(Flush()); - - // The hash does not match, write fails - // fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); - // Since the file system returns IOStatus::Corruption, it is mapped to - // kFatalError error. - ASSERT_OK(Put("key3", "value3")); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", [&](void*) { - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); - }); - ASSERT_OK(Put("key3", "value3")); - ASSERT_OK(Put("key4", "value4")); - SyncPoint::GetInstance()->EnableProcessing(); - Status s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kFatalError); - SyncPoint::GetInstance()->DisableProcessing(); - Destroy(options); -} - -TEST_F(DBFlushTest, FlushWithChecksumHandoffManifest2) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - std::shared_ptr fault_fs( - new FaultInjectionTestFS(FileSystem::Default())); - std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); - Options options = CurrentOptions(); - options.write_buffer_size = 100; - options.max_write_buffer_number = 4; - options.min_write_buffer_number_to_merge = 3; - options.disable_auto_compactions = true; - options.env = fault_fs_env.get(); - options.checksum_handoff_file_types.Add(FileType::kDescriptorFile); - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum); - Reopen(options); - // The file system does not support checksum handoff. The check - // will be ignored. - ASSERT_OK(Put("key5", "value5")); - ASSERT_OK(Put("key6", "value6")); - ASSERT_OK(Flush()); - - // Each write will be similated as corrupted. - // Since the file system returns IOStatus::Corruption, it is mapped to - // kFatalError error. - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", - [&](void*) { fault_fs->IngestDataCorruptionBeforeWrite(); }); - ASSERT_OK(Put("key7", "value7")); - ASSERT_OK(Put("key8", "value8")); - SyncPoint::GetInstance()->EnableProcessing(); - Status s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kFatalError); - SyncPoint::GetInstance()->DisableProcessing(); - - Destroy(options); -} - -TEST_F(DBFlushTest, PickRightMemtables) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - options.create_if_missing = true; - - const std::string test_cf_name = "test_cf"; - options.max_write_buffer_number = 128; - CreateColumnFamilies({test_cf_name}, options); - - Close(); - - ReopenWithColumnFamilies({kDefaultColumnFamilyName, test_cf_name}, options); - - ASSERT_OK(db_->Put(WriteOptions(), "key", "value")); - - ASSERT_OK(db_->Put(WriteOptions(), handles_[1], "key", "value")); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::SyncClosedLogs:BeforeReLock", [&](void* /*arg*/) { - ASSERT_OK(db_->Put(WriteOptions(), handles_[1], "what", "v")); - auto* cfhi = - static_cast_with_check(handles_[1]); - assert(cfhi); - ASSERT_OK(dbfull()->TEST_SwitchMemtable(cfhi->cfd())); - }); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushMemTableToOutputFile:AfterPickMemtables", [&](void* arg) { - auto* job = reinterpret_cast(arg); - assert(job); - const auto& mems = job->GetMemTables(); - assert(mems.size() == 1); - assert(mems[0]); - ASSERT_EQ(1, mems[0]->GetID()); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(db_->Flush(FlushOptions(), handles_[1])); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -class DBFlushTestBlobError : public DBFlushTest, - public testing::WithParamInterface { - public: - DBFlushTestBlobError() : sync_point_(GetParam()) {} - - std::string sync_point_; -}; - -INSTANTIATE_TEST_CASE_P(DBFlushTestBlobError, DBFlushTestBlobError, - ::testing::ValuesIn(std::vector{ - "BlobFileBuilder::WriteBlobToFile:AddRecord", - "BlobFileBuilder::WriteBlobToFile:AppendFooter"})); - -TEST_P(DBFlushTestBlobError, FlushError) { - Options options; - options.enable_blob_files = true; - options.disable_auto_compactions = true; - options.env = env_; - - Reopen(options); - - ASSERT_OK(Put("key", "blob")); - - SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* arg) { - Status* const s = static_cast(arg); - assert(s); - - (*s) = Status::IOError(sync_point_); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_NOK(Flush()); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - assert(cfd); - - Version* const current = cfd->current(); - assert(current); - - const VersionStorageInfo* const storage_info = current->storage_info(); - assert(storage_info); - - const auto& l0_files = storage_info->LevelFiles(0); - ASSERT_TRUE(l0_files.empty()); - - const auto& blob_files = storage_info->GetBlobFiles(); - ASSERT_TRUE(blob_files.empty()); - - // Make sure the files generated by the failed job have been deleted - std::vector files; - ASSERT_OK(env_->GetChildren(dbname_, &files)); - for (const auto& file : files) { - uint64_t number = 0; - FileType type = kTableFile; - - if (!ParseFileName(file, &number, &type)) { - continue; - } - - ASSERT_NE(type, kTableFile); - ASSERT_NE(type, kBlobFile); - } - - const InternalStats* const internal_stats = cfd->internal_stats(); - assert(internal_stats); - - const auto& compaction_stats = internal_stats->TEST_GetCompactionStats(); - ASSERT_FALSE(compaction_stats.empty()); - - if (sync_point_ == "BlobFileBuilder::WriteBlobToFile:AddRecord") { - ASSERT_EQ(compaction_stats[0].bytes_written, 0); - ASSERT_EQ(compaction_stats[0].bytes_written_blob, 0); - ASSERT_EQ(compaction_stats[0].num_output_files, 0); - ASSERT_EQ(compaction_stats[0].num_output_files_blob, 0); - } else { - // SST file writing succeeded; blob file writing failed (during Finish) - ASSERT_GT(compaction_stats[0].bytes_written, 0); - ASSERT_EQ(compaction_stats[0].bytes_written_blob, 0); - ASSERT_EQ(compaction_stats[0].num_output_files, 1); - ASSERT_EQ(compaction_stats[0].num_output_files_blob, 0); - } - - const uint64_t* const cf_stats_value = internal_stats->TEST_GetCFStatsValue(); - ASSERT_EQ(cf_stats_value[InternalStats::BYTES_FLUSHED], - compaction_stats[0].bytes_written + - compaction_stats[0].bytes_written_blob); -} - -TEST_F(DBFlushTest, TombstoneVisibleInSnapshot) { - class SimpleTestFlushListener : public EventListener { - public: - explicit SimpleTestFlushListener(DBFlushTest* _test) : test_(_test) {} - ~SimpleTestFlushListener() override {} - - void OnFlushBegin(DB* db, const FlushJobInfo& info) override { - ASSERT_EQ(static_cast(0), info.cf_id); - - ASSERT_OK(db->Delete(WriteOptions(), "foo")); - snapshot_ = db->GetSnapshot(); - ASSERT_OK(db->Put(WriteOptions(), "foo", "value")); - - auto* dbimpl = static_cast_with_check(db); - assert(dbimpl); - - ColumnFamilyHandle* cfh = db->DefaultColumnFamily(); - auto* cfhi = static_cast_with_check(cfh); - assert(cfhi); - ASSERT_OK(dbimpl->TEST_SwitchMemtable(cfhi->cfd())); - } - - DBFlushTest* test_ = nullptr; - const Snapshot* snapshot_ = nullptr; - }; - - Options options = CurrentOptions(); - options.create_if_missing = true; - auto* listener = new SimpleTestFlushListener(this); - options.listeners.emplace_back(listener); - DestroyAndReopen(options); - - ASSERT_OK(db_->Put(WriteOptions(), "foo", "value0")); - - ManagedSnapshot snapshot_guard(db_); - - ColumnFamilyHandle* default_cf = db_->DefaultColumnFamily(); - ASSERT_OK(db_->Flush(FlushOptions(), default_cf)); - - const Snapshot* snapshot = listener->snapshot_; - assert(snapshot); - - ReadOptions read_opts; - read_opts.snapshot = snapshot; - - // Using snapshot should not see "foo". - { - std::string value; - Status s = db_->Get(read_opts, "foo", &value); - ASSERT_TRUE(s.IsNotFound()); - } - - db_->ReleaseSnapshot(snapshot); -} - -TEST_P(DBAtomicFlushTest, ManualFlushUnder2PC) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.allow_2pc = true; - options.atomic_flush = GetParam(); - // 64MB so that memtable flush won't be trigger by the small writes. - options.write_buffer_size = (static_cast(64) << 20); - auto flush_listener = std::make_shared(); - flush_listener->expected_flush_reason = FlushReason::kManualFlush; - options.listeners.push_back(flush_listener); - // Destroy the DB to recreate as a TransactionDB. - Close(); - Destroy(options, true); - - // Create a TransactionDB. - TransactionDB* txn_db = nullptr; - TransactionDBOptions txn_db_opts; - txn_db_opts.write_policy = TxnDBWritePolicy::WRITE_COMMITTED; - ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, &txn_db)); - ASSERT_NE(txn_db, nullptr); - db_ = txn_db; - - // Create two more columns other than default CF. - std::vector cfs = {"puppy", "kitty"}; - CreateColumnFamilies(cfs, options); - ASSERT_EQ(handles_.size(), 2); - ASSERT_EQ(handles_[0]->GetName(), cfs[0]); - ASSERT_EQ(handles_[1]->GetName(), cfs[1]); - const size_t kNumCfToFlush = options.atomic_flush ? 2 : 1; - - WriteOptions wopts; - TransactionOptions txn_opts; - // txn1 only prepare, but does not commit. - // The WAL containing the prepared but uncommitted data must be kept. - Transaction* txn1 = txn_db->BeginTransaction(wopts, txn_opts, nullptr); - // txn2 not only prepare, but also commit. - Transaction* txn2 = txn_db->BeginTransaction(wopts, txn_opts, nullptr); - ASSERT_NE(txn1, nullptr); - ASSERT_NE(txn2, nullptr); - for (size_t i = 0; i < kNumCfToFlush; i++) { - ASSERT_OK(txn1->Put(handles_[i], "k1", "v1")); - ASSERT_OK(txn2->Put(handles_[i], "k2", "v2")); - } - // A txn must be named before prepare. - ASSERT_OK(txn1->SetName("txn1")); - ASSERT_OK(txn2->SetName("txn2")); - // Prepare writes to WAL, but not to memtable. (WriteCommitted) - ASSERT_OK(txn1->Prepare()); - ASSERT_OK(txn2->Prepare()); - // Commit writes to memtable. - ASSERT_OK(txn2->Commit()); - delete txn1; - delete txn2; - - // There are still data in memtable not flushed. - // But since data is small enough to reside in the active memtable, - // there are no immutable memtable. - for (size_t i = 0; i < kNumCfToFlush; i++) { - auto cfh = static_cast(handles_[i]); - ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed()); - ASSERT_FALSE(cfh->cfd()->mem()->IsEmpty()); - } - - // Atomic flush memtables, - // the min log with prepared data should be written to MANIFEST. - std::vector cfs_to_flush(kNumCfToFlush); - for (size_t i = 0; i < kNumCfToFlush; i++) { - cfs_to_flush[i] = handles_[i]; - } - ASSERT_OK(txn_db->Flush(FlushOptions(), cfs_to_flush)); - - // There are no remaining data in memtable after flush. - for (size_t i = 0; i < kNumCfToFlush; i++) { - auto cfh = static_cast(handles_[i]); - ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed()); - ASSERT_TRUE(cfh->cfd()->mem()->IsEmpty()); - } - - // The recovered min log number with prepared data should be non-zero. - // In 2pc mode, MinLogNumberToKeep returns the - // VersionSet::min_log_number_to_keep recovered from MANIFEST, if it's 0, - // it means atomic flush didn't write the min_log_number_to_keep to MANIFEST. - cfs.push_back(kDefaultColumnFamilyName); - ASSERT_OK(TryReopenWithColumnFamilies(cfs, options)); - DBImpl* db_impl = reinterpret_cast(db_); - ASSERT_TRUE(db_impl->allow_2pc()); - ASSERT_NE(db_impl->MinLogNumberToKeep(), 0); -} - -TEST_P(DBAtomicFlushTest, ManualAtomicFlush) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.atomic_flush = GetParam(); - options.write_buffer_size = (static_cast(64) << 20); - auto flush_listener = std::make_shared(); - flush_listener->expected_flush_reason = FlushReason::kManualFlush; - options.listeners.push_back(flush_listener); - - CreateAndReopenWithCF({"pikachu", "eevee"}, options); - size_t num_cfs = handles_.size(); - ASSERT_EQ(3, num_cfs); - WriteOptions wopts; - wopts.disableWAL = true; - for (size_t i = 0; i != num_cfs; ++i) { - ASSERT_OK(Put(static_cast(i) /*cf*/, "key", "value", wopts)); - } - - for (size_t i = 0; i != num_cfs; ++i) { - auto cfh = static_cast(handles_[i]); - ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed()); - ASSERT_FALSE(cfh->cfd()->mem()->IsEmpty()); - } - - std::vector cf_ids; - for (size_t i = 0; i != num_cfs; ++i) { - cf_ids.emplace_back(static_cast(i)); - } - ASSERT_OK(Flush(cf_ids)); - - for (size_t i = 0; i != num_cfs; ++i) { - auto cfh = static_cast(handles_[i]); - ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed()); - ASSERT_TRUE(cfh->cfd()->mem()->IsEmpty()); - } -} - -TEST_P(DBAtomicFlushTest, PrecomputeMinLogNumberToKeepNon2PC) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.atomic_flush = GetParam(); - options.write_buffer_size = (static_cast(64) << 20); - CreateAndReopenWithCF({"pikachu"}, options); - - const size_t num_cfs = handles_.size(); - ASSERT_EQ(num_cfs, 2); - WriteOptions wopts; - for (size_t i = 0; i != num_cfs; ++i) { - ASSERT_OK(Put(static_cast(i) /*cf*/, "key", "value", wopts)); - } - - { - // Flush the default CF only. - std::vector cf_ids{0}; - ASSERT_OK(Flush(cf_ids)); - - autovector flushed_cfds; - autovector> flush_edits; - auto flushed_cfh = static_cast(handles_[0]); - flushed_cfds.push_back(flushed_cfh->cfd()); - flush_edits.push_back({}); - auto unflushed_cfh = static_cast(handles_[1]); - - ASSERT_EQ(PrecomputeMinLogNumberToKeepNon2PC(dbfull()->GetVersionSet(), - flushed_cfds, flush_edits), - unflushed_cfh->cfd()->GetLogNumber()); - } - - { - // Flush all CFs. - std::vector cf_ids; - for (size_t i = 0; i != num_cfs; ++i) { - cf_ids.emplace_back(static_cast(i)); - } - ASSERT_OK(Flush(cf_ids)); - uint64_t log_num_after_flush = dbfull()->TEST_GetCurrentLogNumber(); - - uint64_t min_log_number_to_keep = std::numeric_limits::max(); - autovector flushed_cfds; - autovector> flush_edits; - for (size_t i = 0; i != num_cfs; ++i) { - auto cfh = static_cast(handles_[i]); - flushed_cfds.push_back(cfh->cfd()); - flush_edits.push_back({}); - min_log_number_to_keep = - std::min(min_log_number_to_keep, cfh->cfd()->GetLogNumber()); - } - ASSERT_EQ(min_log_number_to_keep, log_num_after_flush); - ASSERT_EQ(PrecomputeMinLogNumberToKeepNon2PC(dbfull()->GetVersionSet(), - flushed_cfds, flush_edits), - min_log_number_to_keep); - } -} - -TEST_P(DBAtomicFlushTest, AtomicFlushTriggeredByMemTableFull) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.atomic_flush = GetParam(); - // 4KB so that we can easily trigger auto flush. - options.write_buffer_size = 4096; - - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::BackgroundCallFlush:FlushFinish:0", - "DBAtomicFlushTest::AtomicFlushTriggeredByMemTableFull:BeforeCheck"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - CreateAndReopenWithCF({"pikachu", "eevee"}, options); - size_t num_cfs = handles_.size(); - ASSERT_EQ(3, num_cfs); - WriteOptions wopts; - wopts.disableWAL = true; - for (size_t i = 0; i != num_cfs; ++i) { - ASSERT_OK(Put(static_cast(i) /*cf*/, "key", "value", wopts)); - } - // Keep writing to one of them column families to trigger auto flush. - for (int i = 0; i != 4000; ++i) { - ASSERT_OK(Put(static_cast(num_cfs) - 1 /*cf*/, - "key" + std::to_string(i), "value" + std::to_string(i), - wopts)); - } - - TEST_SYNC_POINT( - "DBAtomicFlushTest::AtomicFlushTriggeredByMemTableFull:BeforeCheck"); - if (options.atomic_flush) { - for (size_t i = 0; i + 1 != num_cfs; ++i) { - auto cfh = static_cast(handles_[i]); - ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed()); - ASSERT_TRUE(cfh->cfd()->mem()->IsEmpty()); - } - } else { - for (size_t i = 0; i + 1 != num_cfs; ++i) { - auto cfh = static_cast(handles_[i]); - ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed()); - ASSERT_FALSE(cfh->cfd()->mem()->IsEmpty()); - } - } - SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_P(DBAtomicFlushTest, AtomicFlushRollbackSomeJobs) { - bool atomic_flush = GetParam(); - if (!atomic_flush) { - return; - } - std::unique_ptr fault_injection_env( - new FaultInjectionTestEnv(env_)); - Options options = CurrentOptions(); - options.create_if_missing = true; - options.atomic_flush = atomic_flush; - options.env = fault_injection_env.get(); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::AtomicFlushMemTablesToOutputFiles:SomeFlushJobsComplete:1", - "DBAtomicFlushTest::AtomicFlushRollbackSomeJobs:1"}, - {"DBAtomicFlushTest::AtomicFlushRollbackSomeJobs:2", - "DBImpl::AtomicFlushMemTablesToOutputFiles:SomeFlushJobsComplete:2"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - CreateAndReopenWithCF({"pikachu", "eevee"}, options); - size_t num_cfs = handles_.size(); - ASSERT_EQ(3, num_cfs); - WriteOptions wopts; - wopts.disableWAL = true; - for (size_t i = 0; i != num_cfs; ++i) { - int cf_id = static_cast(i); - ASSERT_OK(Put(cf_id, "key", "value", wopts)); - } - FlushOptions flush_opts; - flush_opts.wait = false; - ASSERT_OK(dbfull()->Flush(flush_opts, handles_)); - TEST_SYNC_POINT("DBAtomicFlushTest::AtomicFlushRollbackSomeJobs:1"); - fault_injection_env->SetFilesystemActive(false); - TEST_SYNC_POINT("DBAtomicFlushTest::AtomicFlushRollbackSomeJobs:2"); - for (auto* cfh : handles_) { - // Returns the IO error happend during flush. - ASSERT_NOK(dbfull()->TEST_WaitForFlushMemTable(cfh)); - } - for (size_t i = 0; i != num_cfs; ++i) { - auto cfh = static_cast(handles_[i]); - ASSERT_EQ(1, cfh->cfd()->imm()->NumNotFlushed()); - ASSERT_TRUE(cfh->cfd()->mem()->IsEmpty()); - } - fault_injection_env->SetFilesystemActive(true); - Destroy(options); -} - -TEST_P(DBAtomicFlushTest, FlushMultipleCFs_DropSomeBeforeRequestFlush) { - bool atomic_flush = GetParam(); - if (!atomic_flush) { - return; - } - Options options = CurrentOptions(); - options.create_if_missing = true; - options.atomic_flush = atomic_flush; - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->EnableProcessing(); - - CreateAndReopenWithCF({"pikachu", "eevee"}, options); - size_t num_cfs = handles_.size(); - ASSERT_EQ(3, num_cfs); - WriteOptions wopts; - wopts.disableWAL = true; - std::vector cf_ids; - for (size_t i = 0; i != num_cfs; ++i) { - int cf_id = static_cast(i); - ASSERT_OK(Put(cf_id, "key", "value", wopts)); - cf_ids.push_back(cf_id); - } - ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); - ASSERT_TRUE(Flush(cf_ids).IsColumnFamilyDropped()); - Destroy(options); -} - -TEST_P(DBAtomicFlushTest, - FlushMultipleCFs_DropSomeAfterScheduleFlushBeforeFlushJobRun) { - bool atomic_flush = GetParam(); - if (!atomic_flush) { - return; - } - Options options = CurrentOptions(); - options.create_if_missing = true; - options.atomic_flush = atomic_flush; - - CreateAndReopenWithCF({"pikachu", "eevee"}, options); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::AtomicFlushMemTables:AfterScheduleFlush", - "DBAtomicFlushTest::BeforeDropCF"}, - {"DBAtomicFlushTest::AfterDropCF", - "DBImpl::BackgroundCallFlush:start"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - size_t num_cfs = handles_.size(); - ASSERT_EQ(3, num_cfs); - WriteOptions wopts; - wopts.disableWAL = true; - for (size_t i = 0; i != num_cfs; ++i) { - int cf_id = static_cast(i); - ASSERT_OK(Put(cf_id, "key", "value", wopts)); - } - port::Thread user_thread([&]() { - TEST_SYNC_POINT("DBAtomicFlushTest::BeforeDropCF"); - ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); - TEST_SYNC_POINT("DBAtomicFlushTest::AfterDropCF"); - }); - FlushOptions flush_opts; - flush_opts.wait = true; - ASSERT_OK(dbfull()->Flush(flush_opts, handles_)); - user_thread.join(); - for (size_t i = 0; i != num_cfs; ++i) { - int cf_id = static_cast(i); - ASSERT_EQ("value", Get(cf_id, "key")); - } - - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "eevee"}, options); - num_cfs = handles_.size(); - ASSERT_EQ(2, num_cfs); - for (size_t i = 0; i != num_cfs; ++i) { - int cf_id = static_cast(i); - ASSERT_EQ("value", Get(cf_id, "key")); - } - Destroy(options); -} - -TEST_P(DBAtomicFlushTest, TriggerFlushAndClose) { - bool atomic_flush = GetParam(); - if (!atomic_flush) { - return; - } - const int kNumKeysTriggerFlush = 4; - Options options = CurrentOptions(); - options.create_if_missing = true; - options.atomic_flush = atomic_flush; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysTriggerFlush)); - CreateAndReopenWithCF({"pikachu"}, options); - - for (int i = 0; i != kNumKeysTriggerFlush; ++i) { - ASSERT_OK(Put(0, "key" + std::to_string(i), "value" + std::to_string(i))); - } - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Put(0, "key", "value")); - Close(); - - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); - ASSERT_EQ("value", Get(0, "key")); -} - -TEST_P(DBAtomicFlushTest, PickMemtablesRaceWithBackgroundFlush) { - bool atomic_flush = GetParam(); - Options options = CurrentOptions(); - options.create_if_missing = true; - options.atomic_flush = atomic_flush; - options.max_write_buffer_number = 4; - // Set min_write_buffer_number_to_merge to be greater than 1, so that - // a column family with one memtable in the imm will not cause IsFlushPending - // to return true when flush_requested_ is false. - options.min_write_buffer_number_to_merge = 2; - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_EQ(2, handles_.size()); - ASSERT_OK(dbfull()->PauseBackgroundWork()); - ASSERT_OK(Put(0, "key00", "value00")); - ASSERT_OK(Put(1, "key10", "value10")); - FlushOptions flush_opts; - flush_opts.wait = false; - ASSERT_OK(dbfull()->Flush(flush_opts, handles_)); - ASSERT_OK(Put(0, "key01", "value01")); - // Since max_write_buffer_number is 4, the following flush won't cause write - // stall. - ASSERT_OK(dbfull()->Flush(flush_opts)); - ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); - ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[1])); - handles_[1] = nullptr; - ASSERT_OK(dbfull()->ContinueBackgroundWork()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[0])); - delete handles_[0]; - handles_.clear(); -} - -TEST_P(DBAtomicFlushTest, CFDropRaceWithWaitForFlushMemTables) { - bool atomic_flush = GetParam(); - if (!atomic_flush) { - return; - } - Options options = CurrentOptions(); - options.create_if_missing = true; - options.atomic_flush = atomic_flush; - CreateAndReopenWithCF({"pikachu"}, options); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::AtomicFlushMemTables:AfterScheduleFlush", - "DBAtomicFlushTest::CFDropRaceWithWaitForFlushMemTables:BeforeDrop"}, - {"DBAtomicFlushTest::CFDropRaceWithWaitForFlushMemTables:AfterFree", - "DBImpl::BackgroundCallFlush:start"}, - {"DBImpl::BackgroundCallFlush:start", - "DBImpl::AtomicFlushMemTables:BeforeWaitForBgFlush"}}); - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_EQ(2, handles_.size()); - ASSERT_OK(Put(0, "key", "value")); - ASSERT_OK(Put(1, "key", "value")); - auto* cfd_default = - static_cast(dbfull()->DefaultColumnFamily()) - ->cfd(); - auto* cfd_pikachu = static_cast(handles_[1])->cfd(); - port::Thread drop_cf_thr([&]() { - TEST_SYNC_POINT( - "DBAtomicFlushTest::CFDropRaceWithWaitForFlushMemTables:BeforeDrop"); - ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); - delete handles_[1]; - handles_.resize(1); - TEST_SYNC_POINT( - "DBAtomicFlushTest::CFDropRaceWithWaitForFlushMemTables:AfterFree"); - }); - FlushOptions flush_opts; - flush_opts.allow_write_stall = true; - ASSERT_OK(dbfull()->TEST_AtomicFlushMemTables({cfd_default, cfd_pikachu}, - flush_opts)); - drop_cf_thr.join(); - Close(); - SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_P(DBAtomicFlushTest, RollbackAfterFailToInstallResults) { - bool atomic_flush = GetParam(); - if (!atomic_flush) { - return; - } - auto fault_injection_env = std::make_shared(env_); - Options options = CurrentOptions(); - options.env = fault_injection_env.get(); - options.create_if_missing = true; - options.atomic_flush = atomic_flush; - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_EQ(2, handles_.size()); - for (size_t cf = 0; cf < handles_.size(); ++cf) { - ASSERT_OK(Put(static_cast(cf), "a", "value")); - } - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:0", - [&](void* /*arg*/) { fault_injection_env->SetFilesystemActive(false); }); - SyncPoint::GetInstance()->EnableProcessing(); - FlushOptions flush_opts; - Status s = db_->Flush(flush_opts, handles_); - ASSERT_NOK(s); - fault_injection_env->SetFilesystemActive(true); - Close(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -// In atomic flush, concurrent bg flush threads commit to the MANIFEST in -// serial, in the order of their picked memtables for each column family. -// Only when a bg flush thread finds out that its memtables are the earliest -// unflushed ones for all the included column families will this bg flush -// thread continue to commit to MANIFEST. -// This unit test uses sync point to coordinate the execution of two bg threads -// executing the same sequence of functions. The interleaving are as follows. -// time bg1 bg2 -// | pick memtables to flush -// | flush memtables cf1_m1, cf2_m1 -// | join MANIFEST write queue -// | pick memtabls to flush -// | flush memtables cf1_(m1+1) -// | join MANIFEST write queue -// | wait to write MANIFEST -// | write MANIFEST -// | IO error -// | detect IO error and stop waiting -// V -TEST_P(DBAtomicFlushTest, BgThreadNoWaitAfterManifestError) { - bool atomic_flush = GetParam(); - if (!atomic_flush) { - return; - } - auto fault_injection_env = std::make_shared(env_); - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.atomic_flush = true; - options.env = fault_injection_env.get(); - // Set a larger value than default so that RocksDB can schedule concurrent - // background flush threads. - options.max_background_jobs = 8; - options.max_write_buffer_number = 8; - CreateAndReopenWithCF({"pikachu"}, options); - - assert(2 == handles_.size()); - - WriteOptions write_opts; - write_opts.disableWAL = true; - - ASSERT_OK(Put(0, "a", "v_0_a", write_opts)); - ASSERT_OK(Put(1, "a", "v_1_a", write_opts)); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - - SyncPoint::GetInstance()->LoadDependency({ - {"BgFlushThr2:WaitToCommit", "BgFlushThr1:BeforeWriteManifest"}, - }); - - std::thread::id bg_flush_thr1, bg_flush_thr2; - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCallFlush:start", [&](void*) { - if (bg_flush_thr1 == std::thread::id()) { - bg_flush_thr1 = std::this_thread::get_id(); - } else if (bg_flush_thr2 == std::thread::id()) { - bg_flush_thr2 = std::this_thread::get_id(); - } - }); - - int called = 0; - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::AtomicFlushMemTablesToOutputFiles:WaitToCommit", [&](void* arg) { - if (std::this_thread::get_id() == bg_flush_thr2) { - const auto* ptr = reinterpret_cast*>(arg); - assert(ptr); - if (0 == called) { - // When bg flush thread 2 reaches here for the first time. - ASSERT_OK(ptr->first); - ASSERT_TRUE(ptr->second); - } else if (1 == called) { - // When bg flush thread 2 reaches here for the second time. - ASSERT_TRUE(ptr->first.IsIOError()); - ASSERT_FALSE(ptr->second); - } - ++called; - TEST_SYNC_POINT("BgFlushThr2:WaitToCommit"); - } - }); - - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:0", - [&](void*) { - if (std::this_thread::get_id() == bg_flush_thr1) { - TEST_SYNC_POINT("BgFlushThr1:BeforeWriteManifest"); - } - }); - - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", [&](void*) { - if (std::this_thread::get_id() != bg_flush_thr1) { - return; - } - ASSERT_OK(db_->Put(write_opts, "b", "v_1_b")); - - FlushOptions flush_opts; - flush_opts.wait = false; - std::vector cfhs(1, db_->DefaultColumnFamily()); - ASSERT_OK(dbfull()->Flush(flush_opts, cfhs)); - }); - - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::ProcessManifestWrites:AfterSyncManifest", [&](void* arg) { - auto* ptr = reinterpret_cast(arg); - assert(ptr); - *ptr = IOStatus::IOError("Injected failure"); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_TRUE(dbfull()->Flush(FlushOptions(), handles_).IsIOError()); - - Close(); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_P(DBAtomicFlushTest, NoWaitWhenWritesStopped) { - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.atomic_flush = GetParam(); - options.max_write_buffer_number = 2; - options.memtable_factory.reset(test::NewSpecialSkipListFactory(1)); - - Reopen(options); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::DelayWrite:Start", - "DBAtomicFlushTest::NoWaitWhenWritesStopped:0"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(dbfull()->PauseBackgroundWork()); - for (int i = 0; i < options.max_write_buffer_number; ++i) { - ASSERT_OK(Put("k" + std::to_string(i), "v" + std::to_string(i))); - } - std::thread stalled_writer([&]() { ASSERT_OK(Put("k", "v")); }); - - TEST_SYNC_POINT("DBAtomicFlushTest::NoWaitWhenWritesStopped:0"); - - { - FlushOptions flush_opts; - flush_opts.wait = false; - flush_opts.allow_write_stall = true; - ASSERT_TRUE(db_->Flush(flush_opts).IsTryAgain()); - } - - ASSERT_OK(dbfull()->ContinueBackgroundWork()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - stalled_writer.join(); - - SyncPoint::GetInstance()->DisableProcessing(); -} - -INSTANTIATE_TEST_CASE_P(DBFlushDirectIOTest, DBFlushDirectIOTest, - testing::Bool()); - -INSTANTIATE_TEST_CASE_P(DBAtomicFlushTest, DBAtomicFlushTest, testing::Bool()); - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_inplace_update_test.cc b/db/db_inplace_update_test.cc deleted file mode 100644 index 3921a3b00..000000000 --- a/db/db_inplace_update_test.cc +++ /dev/null @@ -1,262 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -#include "db/db_test_util.h" -#include "port/stack_trace.h" - -namespace ROCKSDB_NAMESPACE { - -class DBTestInPlaceUpdate : public DBTestBase { - public: - DBTestInPlaceUpdate() - : DBTestBase("db_inplace_update_test", /*env_do_fsync=*/true) {} -}; - -TEST_F(DBTestInPlaceUpdate, InPlaceUpdate) { - do { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.inplace_update_support = true; - options.env = env_; - options.write_buffer_size = 100000; - options.allow_concurrent_memtable_write = false; - Reopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Update key with values of smaller size - int numValues = 10; - for (int i = numValues; i > 0; i--) { - std::string value = DummyString(i, 'a'); - ASSERT_OK(Put(1, "key", value)); - ASSERT_EQ(value, Get(1, "key")); - } - - // Only 1 instance for that key. - validateNumberOfEntries(1, 1); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTestInPlaceUpdate, InPlaceUpdateLargeNewValue) { - do { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.inplace_update_support = true; - options.env = env_; - options.write_buffer_size = 100000; - options.allow_concurrent_memtable_write = false; - Reopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Update key with values of larger size - int numValues = 10; - for (int i = 0; i < numValues; i++) { - std::string value = DummyString(i, 'a'); - ASSERT_OK(Put(1, "key", value)); - ASSERT_EQ(value, Get(1, "key")); - } - - // All 10 updates exist in the internal iterator - validateNumberOfEntries(numValues, 1); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTestInPlaceUpdate, InPlaceUpdateEntitySmallerNewValue) { - do { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.inplace_update_support = true; - options.env = env_; - options.allow_concurrent_memtable_write = false; - - Reopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Update key with values of smaller size - constexpr int num_values = 10; - for (int i = num_values; i > 0; --i) { - constexpr char key[] = "key"; - const std::string value = DummyString(i, 'a'); - WideColumns wide_columns{{"attr", value}}; - - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[1], key, wide_columns)); - // TODO: use Get to check entity once it's supported - } - - // Only 1 instance for that key. - validateNumberOfEntries(1, 1); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTestInPlaceUpdate, InPlaceUpdateEntityLargerNewValue) { - do { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.inplace_update_support = true; - options.env = env_; - options.allow_concurrent_memtable_write = false; - - Reopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Update key with values of larger size - constexpr int num_values = 10; - for (int i = 0; i < num_values; ++i) { - constexpr char key[] = "key"; - const std::string value = DummyString(i, 'a'); - WideColumns wide_columns{{"attr", value}}; - - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[1], key, wide_columns)); - // TODO: use Get to check entity once it's supported - } - - // All 10 updates exist in the internal iterator - validateNumberOfEntries(num_values, 1); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackSmallerSize) { - do { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.inplace_update_support = true; - - options.env = env_; - options.write_buffer_size = 100000; - options.inplace_callback = - ROCKSDB_NAMESPACE::DBTestInPlaceUpdate::updateInPlaceSmallerSize; - options.allow_concurrent_memtable_write = false; - Reopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Update key with values of smaller size - int numValues = 10; - ASSERT_OK(Put(1, "key", DummyString(numValues, 'a'))); - ASSERT_EQ(DummyString(numValues, 'c'), Get(1, "key")); - - for (int i = numValues; i > 0; i--) { - ASSERT_OK(Put(1, "key", DummyString(i, 'a'))); - ASSERT_EQ(DummyString(i - 1, 'b'), Get(1, "key")); - } - - // Only 1 instance for that key. - validateNumberOfEntries(1, 1); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackSmallerVarintSize) { - do { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.inplace_update_support = true; - - options.env = env_; - options.write_buffer_size = 100000; - options.inplace_callback = - ROCKSDB_NAMESPACE::DBTestInPlaceUpdate::updateInPlaceSmallerVarintSize; - options.allow_concurrent_memtable_write = false; - Reopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Update key with values of smaller varint size - int numValues = 265; - ASSERT_OK(Put(1, "key", DummyString(numValues, 'a'))); - ASSERT_EQ(DummyString(numValues, 'c'), Get(1, "key")); - - for (int i = numValues; i > 0; i--) { - ASSERT_OK(Put(1, "key", DummyString(i, 'a'))); - ASSERT_EQ(DummyString(1, 'b'), Get(1, "key")); - } - - // Only 1 instance for that key. - validateNumberOfEntries(1, 1); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackLargeNewValue) { - do { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.inplace_update_support = true; - - options.env = env_; - options.write_buffer_size = 100000; - options.inplace_callback = - ROCKSDB_NAMESPACE::DBTestInPlaceUpdate::updateInPlaceLargerSize; - options.allow_concurrent_memtable_write = false; - Reopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Update key with values of larger size - int numValues = 10; - for (int i = 0; i < numValues; i++) { - ASSERT_OK(Put(1, "key", DummyString(i, 'a'))); - ASSERT_EQ(DummyString(i, 'c'), Get(1, "key")); - } - - // No inplace updates. All updates are puts with new seq number - // All 10 updates exist in the internal iterator - validateNumberOfEntries(numValues, 1); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackNoAction) { - do { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.inplace_update_support = true; - - options.env = env_; - options.write_buffer_size = 100000; - options.inplace_callback = - ROCKSDB_NAMESPACE::DBTestInPlaceUpdate::updateInPlaceNoAction; - options.allow_concurrent_memtable_write = false; - Reopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Callback function requests no actions from db - ASSERT_OK(Put(1, "key", DummyString(1, 'a'))); - ASSERT_EQ(Get(1, "key"), "NOT_FOUND"); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTestInPlaceUpdate, InPlaceUpdateAndSnapshot) { - do { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.inplace_update_support = true; - options.env = env_; - options.write_buffer_size = 100000; - options.allow_concurrent_memtable_write = false; - Reopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Update key with values of smaller size, and - // run GetSnapshot and ReleaseSnapshot - int numValues = 2; - for (int i = numValues; i > 0; i--) { - const Snapshot* s = db_->GetSnapshot(); - ASSERT_EQ(nullptr, s); - std::string value = DummyString(i, 'a'); - ASSERT_OK(Put(1, "key", value)); - ASSERT_EQ(value, Get(1, "key")); - // release s (nullptr) - db_->ReleaseSnapshot(s); - } - - // Only 1 instance for that key. - validateNumberOfEntries(1, 1); - } while (ChangeCompactOptions()); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_io_failure_test.cc b/db/db_io_failure_test.cc deleted file mode 100644 index e79272ea7..000000000 --- a/db/db_io_failure_test.cc +++ /dev/null @@ -1,589 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/db_test_util.h" -#include "port/stack_trace.h" -#include "test_util/testutil.h" -#include "util/random.h" - -namespace ROCKSDB_NAMESPACE { - -class DBIOFailureTest : public DBTestBase { - public: - DBIOFailureTest() : DBTestBase("db_io_failure_test", /*env_do_fsync=*/true) {} -}; - -// Check that number of files does not grow when writes are dropped -TEST_F(DBIOFailureTest, DropWrites) { - do { - Options options = CurrentOptions(); - options.env = env_; - options.paranoid_checks = false; - Reopen(options); - - ASSERT_OK(Put("foo", "v1")); - ASSERT_EQ("v1", Get("foo")); - Compact("a", "z"); - const size_t num_files = CountFiles(); - // Force out-of-space errors - env_->drop_writes_.store(true, std::memory_order_release); - env_->sleep_counter_.Reset(); - env_->SetMockSleep(); - for (int i = 0; i < 5; i++) { - if (option_config_ != kUniversalCompactionMultiLevel && - option_config_ != kUniversalSubcompactions) { - for (int level = 0; level < dbfull()->NumberLevels(); level++) { - if (level > 0 && level == dbfull()->NumberLevels() - 1) { - break; - } - Status s = - dbfull()->TEST_CompactRange(level, nullptr, nullptr, nullptr, - true /* disallow trivial move */); - ASSERT_TRUE(s.ok() || s.IsCorruption()); - } - } else { - Status s = - dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); - ASSERT_TRUE(s.ok() || s.IsCorruption()); - } - } - - std::string property_value; - ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value)); - ASSERT_EQ("5", property_value); - - env_->drop_writes_.store(false, std::memory_order_release); - const size_t count = CountFiles(); - ASSERT_LT(count, num_files + 3); - - // Check that compaction attempts slept after errors - // TODO @krad: Figure out why ASSERT_EQ 5 keeps failing in certain compiler - // versions - ASSERT_GE(env_->sleep_counter_.Read(), 4); - } while (ChangeCompactOptions()); -} - -// Check background error counter bumped on flush failures. -TEST_F(DBIOFailureTest, DropWritesFlush) { - do { - Options options = CurrentOptions(); - options.env = env_; - options.max_background_flushes = 1; - Reopen(options); - - ASSERT_OK(Put("foo", "v1")); - // Force out-of-space errors - env_->drop_writes_.store(true, std::memory_order_release); - - std::string property_value; - // Background error count is 0 now. - ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value)); - ASSERT_EQ("0", property_value); - - // ASSERT file is too short - ASSERT_TRUE(dbfull()->TEST_FlushMemTable(true).IsCorruption()); - - ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value)); - ASSERT_EQ("1", property_value); - - env_->drop_writes_.store(false, std::memory_order_release); - } while (ChangeCompactOptions()); -} - -// Check that CompactRange() returns failure if there is not enough space left -// on device -TEST_F(DBIOFailureTest, NoSpaceCompactRange) { - do { - Options options = CurrentOptions(); - options.env = env_; - options.disable_auto_compactions = true; - Reopen(options); - - // generate 5 tables - for (int i = 0; i < 5; ++i) { - ASSERT_OK(Put(Key(i), Key(i) + "v")); - ASSERT_OK(Flush()); - } - - // Force out-of-space errors - env_->no_space_.store(true, std::memory_order_release); - - Status s = dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, - true /* disallow trivial move */); - ASSERT_TRUE(s.IsIOError()); - ASSERT_TRUE(s.IsNoSpace()); - - env_->no_space_.store(false, std::memory_order_release); - } while (ChangeCompactOptions()); -} - -TEST_F(DBIOFailureTest, NonWritableFileSystem) { - do { - Options options = CurrentOptions(); - options.write_buffer_size = 4096; - options.arena_block_size = 4096; - options.env = env_; - Reopen(options); - ASSERT_OK(Put("foo", "v1")); - env_->non_writeable_rate_.store(100); - std::string big(100000, 'x'); - int errors = 0; - for (int i = 0; i < 20; i++) { - if (!Put("foo", big).ok()) { - errors++; - env_->SleepForMicroseconds(100000); - } - } - ASSERT_GT(errors, 0); - env_->non_writeable_rate_.store(0); - } while (ChangeCompactOptions()); -} - -TEST_F(DBIOFailureTest, ManifestWriteError) { - // Test for the following problem: - // (a) Compaction produces file F - // (b) Log record containing F is written to MANIFEST file, but Sync() fails - // (c) GC deletes F - // (d) After reopening DB, reads fail since deleted F is named in log record - - // We iterate twice. In the second iteration, everything is the - // same except the log record never makes it to the MANIFEST file. - for (int iter = 0; iter < 2; iter++) { - std::atomic* error_type = (iter == 0) ? &env_->manifest_sync_error_ - : &env_->manifest_write_error_; - - // Insert foo=>bar mapping - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.error_if_exists = false; - options.paranoid_checks = true; - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "bar")); - ASSERT_EQ("bar", Get("foo")); - - // Memtable compaction (will succeed) - ASSERT_OK(Flush()); - ASSERT_EQ("bar", Get("foo")); - const int last = 2; - MoveFilesToLevel(2); - ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level - - // Merging compaction (will fail) - error_type->store(true, std::memory_order_release); - ASSERT_NOK( - dbfull()->TEST_CompactRange(last, nullptr, nullptr)); // Should fail - ASSERT_EQ("bar", Get("foo")); - - error_type->store(false, std::memory_order_release); - - // Since paranoid_checks=true, writes should fail - ASSERT_NOK(Put("foo2", "bar2")); - - // Recovery: should not lose data - ASSERT_EQ("bar", Get("foo")); - - // Try again with paranoid_checks=false - Close(); - options.paranoid_checks = false; - Reopen(options); - - // Merging compaction (will fail) - error_type->store(true, std::memory_order_release); - Status s = - dbfull()->TEST_CompactRange(last, nullptr, nullptr); // Should fail - if (iter == 0) { - ASSERT_OK(s); - } else { - ASSERT_TRUE(s.IsIOError()); - } - ASSERT_EQ("bar", Get("foo")); - - // Recovery: should not lose data - error_type->store(false, std::memory_order_release); - Reopen(options); - ASSERT_EQ("bar", Get("foo")); - - // Since paranoid_checks=false, writes should succeed - ASSERT_OK(Put("foo2", "bar2")); - ASSERT_EQ("bar", Get("foo")); - ASSERT_EQ("bar2", Get("foo2")); - } -} - -TEST_F(DBIOFailureTest, PutFailsParanoid) { - // Test the following: - // (a) A random put fails in paranoid mode (simulate by sync fail) - // (b) All other puts have to fail, even if writes would succeed - // (c) All of that should happen ONLY if paranoid_checks = true - - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.error_if_exists = false; - options.paranoid_checks = true; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "foo", "bar")); - ASSERT_OK(Put(1, "foo1", "bar1")); - // simulate error - env_->log_write_error_.store(true, std::memory_order_release); - ASSERT_NOK(Put(1, "foo2", "bar2")); - env_->log_write_error_.store(false, std::memory_order_release); - // the next put should fail, too - ASSERT_NOK(Put(1, "foo3", "bar3")); - // but we're still able to read - ASSERT_EQ("bar", Get(1, "foo")); - - // do the same thing with paranoid checks off - options.paranoid_checks = false; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "foo", "bar")); - ASSERT_OK(Put(1, "foo1", "bar1")); - // simulate error - env_->log_write_error_.store(true, std::memory_order_release); - ASSERT_NOK(Put(1, "foo2", "bar2")); - env_->log_write_error_.store(false, std::memory_order_release); - // the next put should NOT fail - ASSERT_OK(Put(1, "foo3", "bar3")); -} -#if !(defined NDEBUG) || !defined(OS_WIN) -TEST_F(DBIOFailureTest, FlushSstRangeSyncError) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.error_if_exists = false; - options.paranoid_checks = true; - options.write_buffer_size = 256 * 1024 * 1024; - options.writable_file_max_buffer_size = 128 * 1024; - options.bytes_per_sync = 128 * 1024; - options.level0_file_num_compaction_trigger = 4; - options.memtable_factory.reset(test::NewSpecialSkipListFactory(10)); - BlockBasedTableOptions table_options; - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - const char* io_error_msg = "range sync dummy error"; - std::atomic range_sync_called(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SpecialEnv::SStableFile::RangeSync", [&](void* arg) { - if (range_sync_called.fetch_add(1) == 0) { - Status* st = static_cast(arg); - *st = Status::IOError(io_error_msg); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - std::string rnd_str = - rnd.RandomString(static_cast(options.bytes_per_sync / 2)); - std::string rnd_str_512kb = rnd.RandomString(512 * 1024); - - ASSERT_OK(Put(1, "foo", "bar")); - // First 1MB doesn't get range synced - ASSERT_OK(Put(1, "foo0_0", rnd_str_512kb)); - ASSERT_OK(Put(1, "foo0_1", rnd_str_512kb)); - ASSERT_OK(Put(1, "foo1_1", rnd_str)); - ASSERT_OK(Put(1, "foo1_2", rnd_str)); - ASSERT_OK(Put(1, "foo1_3", rnd_str)); - ASSERT_OK(Put(1, "foo2", "bar")); - ASSERT_OK(Put(1, "foo3_1", rnd_str)); - ASSERT_OK(Put(1, "foo3_2", rnd_str)); - ASSERT_OK(Put(1, "foo3_3", rnd_str)); - ASSERT_OK(Put(1, "foo4", "bar")); - Status s = dbfull()->TEST_WaitForFlushMemTable(handles_[1]); - ASSERT_TRUE(s.IsIOError()); - ASSERT_STREQ(s.getState(), io_error_msg); - - // Following writes should fail as flush failed. - ASSERT_NOK(Put(1, "foo2", "bar3")); - ASSERT_EQ("bar", Get(1, "foo")); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ASSERT_GE(1, range_sync_called.load()); - - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ("bar", Get(1, "foo")); -} - -TEST_F(DBIOFailureTest, CompactSstRangeSyncError) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.error_if_exists = false; - options.paranoid_checks = true; - options.write_buffer_size = 256 * 1024 * 1024; - options.writable_file_max_buffer_size = 128 * 1024; - options.bytes_per_sync = 128 * 1024; - options.level0_file_num_compaction_trigger = 2; - options.target_file_size_base = 256 * 1024 * 1024; - options.disable_auto_compactions = true; - BlockBasedTableOptions table_options; - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - Random rnd(301); - std::string rnd_str = - rnd.RandomString(static_cast(options.bytes_per_sync / 2)); - std::string rnd_str_512kb = rnd.RandomString(512 * 1024); - - ASSERT_OK(Put(1, "foo", "bar")); - // First 1MB doesn't get range synced - ASSERT_OK(Put(1, "foo0_0", rnd_str_512kb)); - ASSERT_OK(Put(1, "foo0_1", rnd_str_512kb)); - ASSERT_OK(Put(1, "foo1_1", rnd_str)); - ASSERT_OK(Put(1, "foo1_2", rnd_str)); - ASSERT_OK(Put(1, "foo1_3", rnd_str)); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "foo", "bar")); - ASSERT_OK(Put(1, "foo3_1", rnd_str)); - ASSERT_OK(Put(1, "foo3_2", rnd_str)); - ASSERT_OK(Put(1, "foo3_3", rnd_str)); - ASSERT_OK(Put(1, "foo4", "bar")); - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1])); - - const char* io_error_msg = "range sync dummy error"; - std::atomic range_sync_called(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SpecialEnv::SStableFile::RangeSync", [&](void* arg) { - if (range_sync_called.fetch_add(1) == 0) { - Status* st = static_cast(arg); - *st = Status::IOError(io_error_msg); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(dbfull()->SetOptions(handles_[1], - { - {"disable_auto_compactions", "false"}, - })); - Status s = dbfull()->TEST_WaitForCompact(); - ASSERT_TRUE(s.IsIOError()); - ASSERT_STREQ(s.getState(), io_error_msg); - - // Following writes should fail as flush failed. - ASSERT_NOK(Put(1, "foo2", "bar3")); - ASSERT_EQ("bar", Get(1, "foo")); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ASSERT_GE(1, range_sync_called.load()); - - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ("bar", Get(1, "foo")); -} - -TEST_F(DBIOFailureTest, FlushSstCloseError) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.error_if_exists = false; - options.paranoid_checks = true; - options.level0_file_num_compaction_trigger = 4; - options.memtable_factory.reset(test::NewSpecialSkipListFactory(2)); - - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - const char* io_error_msg = "close dummy error"; - std::atomic close_called(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SpecialEnv::SStableFile::Close", [&](void* arg) { - if (close_called.fetch_add(1) == 0) { - Status* st = static_cast(arg); - *st = Status::IOError(io_error_msg); - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put(1, "foo", "bar")); - ASSERT_OK(Put(1, "foo1", "bar1")); - ASSERT_OK(Put(1, "foo", "bar2")); - Status s = dbfull()->TEST_WaitForFlushMemTable(handles_[1]); - ASSERT_TRUE(s.IsIOError()); - ASSERT_STREQ(s.getState(), io_error_msg); - - // Following writes should fail as flush failed. - ASSERT_NOK(Put(1, "foo2", "bar3")); - ASSERT_EQ("bar2", Get(1, "foo")); - ASSERT_EQ("bar1", Get(1, "foo1")); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ("bar2", Get(1, "foo")); - ASSERT_EQ("bar1", Get(1, "foo1")); -} - -TEST_F(DBIOFailureTest, CompactionSstCloseError) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.error_if_exists = false; - options.paranoid_checks = true; - options.level0_file_num_compaction_trigger = 2; - options.disable_auto_compactions = true; - - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "foo", "bar")); - ASSERT_OK(Put(1, "foo2", "bar")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "foo", "bar2")); - ASSERT_OK(Put(1, "foo2", "bar")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "foo", "bar3")); - ASSERT_OK(Put(1, "foo2", "bar")); - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - const char* io_error_msg = "close dummy error"; - std::atomic close_called(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SpecialEnv::SStableFile::Close", [&](void* arg) { - if (close_called.fetch_add(1) == 0) { - Status* st = static_cast(arg); - *st = Status::IOError(io_error_msg); - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(dbfull()->SetOptions(handles_[1], - { - {"disable_auto_compactions", "false"}, - })); - Status s = dbfull()->TEST_WaitForCompact(); - ASSERT_TRUE(s.IsIOError()); - ASSERT_STREQ(s.getState(), io_error_msg); - - // Following writes should fail as compaction failed. - ASSERT_NOK(Put(1, "foo2", "bar3")); - ASSERT_EQ("bar3", Get(1, "foo")); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ("bar3", Get(1, "foo")); -} - -TEST_F(DBIOFailureTest, FlushSstSyncError) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.error_if_exists = false; - options.paranoid_checks = true; - options.use_fsync = false; - options.level0_file_num_compaction_trigger = 4; - options.memtable_factory.reset(test::NewSpecialSkipListFactory(2)); - - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - const char* io_error_msg = "sync dummy error"; - std::atomic sync_called(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SpecialEnv::SStableFile::Sync", [&](void* arg) { - if (sync_called.fetch_add(1) == 0) { - Status* st = static_cast(arg); - *st = Status::IOError(io_error_msg); - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put(1, "foo", "bar")); - ASSERT_OK(Put(1, "foo1", "bar1")); - ASSERT_OK(Put(1, "foo", "bar2")); - Status s = dbfull()->TEST_WaitForFlushMemTable(handles_[1]); - ASSERT_TRUE(s.IsIOError()); - ASSERT_STREQ(s.getState(), io_error_msg); - - // Following writes should fail as flush failed. - ASSERT_NOK(Put(1, "foo2", "bar3")); - ASSERT_EQ("bar2", Get(1, "foo")); - ASSERT_EQ("bar1", Get(1, "foo1")); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ("bar2", Get(1, "foo")); - ASSERT_EQ("bar1", Get(1, "foo1")); -} - -TEST_F(DBIOFailureTest, CompactionSstSyncError) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.error_if_exists = false; - options.paranoid_checks = true; - options.level0_file_num_compaction_trigger = 2; - options.disable_auto_compactions = true; - options.use_fsync = false; - - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "foo", "bar")); - ASSERT_OK(Put(1, "foo2", "bar")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "foo", "bar2")); - ASSERT_OK(Put(1, "foo2", "bar")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "foo", "bar3")); - ASSERT_OK(Put(1, "foo2", "bar")); - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - const char* io_error_msg = "sync dummy error"; - std::atomic sync_called(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SpecialEnv::SStableFile::Sync", [&](void* arg) { - if (sync_called.fetch_add(1) == 0) { - Status* st = static_cast(arg); - *st = Status::IOError(io_error_msg); - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(dbfull()->SetOptions(handles_[1], - { - {"disable_auto_compactions", "false"}, - })); - Status s = dbfull()->TEST_WaitForCompact(); - ASSERT_TRUE(s.IsIOError()); - ASSERT_STREQ(s.getState(), io_error_msg); - - // Following writes should fail as compaction failed. - ASSERT_NOK(Put(1, "foo2", "bar3")); - ASSERT_EQ("bar3", Get(1, "foo")); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ("bar3", Get(1, "foo")); -} -#endif // !(defined NDEBUG) || !defined(OS_WIN) -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_iter_stress_test.cc b/db/db_iter_stress_test.cc deleted file mode 100644 index 872f7e6bd..000000000 --- a/db/db_iter_stress_test.cc +++ /dev/null @@ -1,658 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db/db_iter.h" -#include "db/dbformat.h" -#include "rocksdb/comparator.h" -#include "rocksdb/options.h" -#include "rocksdb/slice.h" -#include "test_util/testharness.h" -#include "util/random.h" -#include "util/string_util.h" -#include "utilities/merge_operators.h" - -#ifdef GFLAGS - -#include "util/gflags_compat.h" - -using GFLAGS_NAMESPACE::ParseCommandLineFlags; - -DEFINE_bool(verbose, false, - "Print huge, detailed trace. Intended for debugging failures."); - -#else - -void ParseCommandLineFlags(int*, char***, bool) {} -bool FLAGS_verbose = false; - -#endif - -namespace ROCKSDB_NAMESPACE { - -class DBIteratorStressTest : public testing::Test { - public: - Env* env_; - - DBIteratorStressTest() : env_(Env::Default()) {} -}; - -namespace { - -struct Entry { - std::string key; - ValueType type; // kTypeValue, kTypeDeletion, kTypeMerge - uint64_t sequence; - std::string ikey; // internal key, made from `key`, `sequence` and `type` - std::string value; - // If false, we'll pretend that this entry doesn't exist. - bool visible = true; - - bool operator<(const Entry& e) const { - if (key != e.key) return key < e.key; - return std::tie(sequence, type) > std::tie(e.sequence, e.type); - } -}; - -struct Data { - std::vector entries; - - // Indices in `entries` with `visible` = false. - std::vector hidden; - // Keys of entries whose `visible` changed since the last seek of iterators. - std::set recently_touched_keys; -}; - -struct StressTestIterator : public InternalIterator { - Data* data; - Random64* rnd; - InternalKeyComparator cmp; - - // Each operation will return error with this probability... - double error_probability = 0; - // ... and add/remove entries with this probability. - double mutation_probability = 0; - // The probability of adding vs removing entries will be chosen so that the - // amount of removed entries stays somewhat close to this number. - double target_hidden_fraction = 0; - // If true, print all mutations to stdout for debugging. - bool trace = false; - - int iter = -1; - Status status_; - - StressTestIterator(Data* _data, Random64* _rnd, const Comparator* _cmp) - : data(_data), rnd(_rnd), cmp(_cmp) {} - - bool Valid() const override { - if (iter >= 0 && iter < (int)data->entries.size()) { - assert(status_.ok()); - return true; - } - return false; - } - - Status status() const override { return status_; } - - bool MaybeFail() { - if (rnd->Next() >= - static_cast(std::numeric_limits::max()) * - error_probability) { - return false; - } - if (rnd->Next() % 2) { - status_ = Status::Incomplete("test"); - } else { - status_ = Status::IOError("test"); - } - if (trace) { - std::cout << "injecting " << status_.ToString() << std::endl; - } - iter = -1; - return true; - } - - void MaybeMutate() { - if (rnd->Next() >= - static_cast(std::numeric_limits::max()) * - mutation_probability) { - return; - } - do { - // If too many entries are hidden, hide less, otherwise hide more. - double hide_probability = - data->hidden.size() > data->entries.size() * target_hidden_fraction - ? 1. / 3 - : 2. / 3; - if (data->hidden.empty()) { - hide_probability = 1; - } - bool do_hide = rnd->Next() < - static_cast(std::numeric_limits::max()) * - hide_probability; - if (do_hide) { - // Hide a random entry. - size_t idx = rnd->Next() % data->entries.size(); - Entry& e = data->entries[idx]; - if (e.visible) { - if (trace) { - std::cout << "hiding idx " << idx << std::endl; - } - e.visible = false; - data->hidden.push_back(idx); - data->recently_touched_keys.insert(e.key); - } else { - // Already hidden. Let's go unhide something instead, just because - // it's easy and it doesn't really matter what we do. - do_hide = false; - } - } - if (!do_hide) { - // Unhide a random entry. - size_t hi = rnd->Next() % data->hidden.size(); - size_t idx = data->hidden[hi]; - if (trace) { - std::cout << "unhiding idx " << idx << std::endl; - } - Entry& e = data->entries[idx]; - assert(!e.visible); - e.visible = true; - data->hidden[hi] = data->hidden.back(); - data->hidden.pop_back(); - data->recently_touched_keys.insert(e.key); - } - } while (rnd->Next() % 3 != 0); // do 3 mutations on average - } - - void SkipForward() { - while (iter < (int)data->entries.size() && !data->entries[iter].visible) { - ++iter; - } - } - void SkipBackward() { - while (iter >= 0 && !data->entries[iter].visible) { - --iter; - } - } - - void SeekToFirst() override { - if (MaybeFail()) return; - MaybeMutate(); - - status_ = Status::OK(); - iter = 0; - SkipForward(); - } - void SeekToLast() override { - if (MaybeFail()) return; - MaybeMutate(); - - status_ = Status::OK(); - iter = (int)data->entries.size() - 1; - SkipBackward(); - } - - void Seek(const Slice& target) override { - if (MaybeFail()) return; - MaybeMutate(); - - status_ = Status::OK(); - // Binary search. - auto it = std::partition_point( - data->entries.begin(), data->entries.end(), - [&](const Entry& e) { return cmp.Compare(e.ikey, target) < 0; }); - iter = (int)(it - data->entries.begin()); - SkipForward(); - } - void SeekForPrev(const Slice& target) override { - if (MaybeFail()) return; - MaybeMutate(); - - status_ = Status::OK(); - // Binary search. - auto it = std::partition_point( - data->entries.begin(), data->entries.end(), - [&](const Entry& e) { return cmp.Compare(e.ikey, target) <= 0; }); - iter = (int)(it - data->entries.begin()); - --iter; - SkipBackward(); - } - - void Next() override { - assert(Valid()); - if (MaybeFail()) return; - MaybeMutate(); - ++iter; - SkipForward(); - } - void Prev() override { - assert(Valid()); - if (MaybeFail()) return; - MaybeMutate(); - --iter; - SkipBackward(); - } - - Slice key() const override { - assert(Valid()); - return data->entries[iter].ikey; - } - Slice value() const override { - assert(Valid()); - return data->entries[iter].value; - } - - bool IsKeyPinned() const override { return true; } - bool IsValuePinned() const override { return true; } -}; - -// A small reimplementation of DBIter, supporting only some of the features, -// and doing everything in O(log n). -// Skips all keys that are in recently_touched_keys. -struct ReferenceIterator { - Data* data; - uint64_t sequence; // ignore entries with sequence number below this - - bool valid = false; - std::string key; - std::string value; - - ReferenceIterator(Data* _data, uint64_t _sequence) - : data(_data), sequence(_sequence) {} - - bool Valid() const { return valid; } - - // Finds the first entry with key - // greater/less/greater-or-equal/less-or-equal than `key`, depending on - // arguments: if `skip`, inequality is strict; if `forward`, it's - // greater/greater-or-equal, otherwise less/less-or-equal. - // Sets `key` to the result. - // If no such key exists, returns false. Doesn't check `visible`. - bool FindNextKey(bool skip, bool forward) { - valid = false; - auto it = std::partition_point(data->entries.begin(), data->entries.end(), - [&](const Entry& e) { - if (forward != skip) { - return e.key < key; - } else { - return e.key <= key; - } - }); - if (forward) { - if (it != data->entries.end()) { - key = it->key; - return true; - } - } else { - if (it != data->entries.begin()) { - --it; - key = it->key; - return true; - } - } - return false; - } - - bool FindValueForCurrentKey() { - if (data->recently_touched_keys.count(key)) { - return false; - } - - // Find the first entry for the key. The caller promises that it exists. - auto it = std::partition_point(data->entries.begin(), data->entries.end(), - [&](const Entry& e) { - if (e.key != key) { - return e.key < key; - } - return e.sequence > sequence; - }); - - // Find the first visible entry. - for (;; ++it) { - if (it == data->entries.end()) { - return false; - } - Entry& e = *it; - if (e.key != key) { - return false; - } - assert(e.sequence <= sequence); - if (!e.visible) continue; - if (e.type == kTypeDeletion) { - return false; - } - if (e.type == kTypeValue) { - value = e.value; - valid = true; - return true; - } - assert(e.type == kTypeMerge); - break; - } - - // Collect merge operands. - std::vector operands; - for (; it != data->entries.end(); ++it) { - Entry& e = *it; - if (e.key != key) { - break; - } - assert(e.sequence <= sequence); - if (!e.visible) continue; - if (e.type == kTypeDeletion) { - break; - } - operands.push_back(e.value); - if (e.type == kTypeValue) { - break; - } - } - - // Do a merge. - value = operands.back().ToString(); - for (int i = (int)operands.size() - 2; i >= 0; --i) { - value.append(","); - value.append(operands[i].data(), operands[i].size()); - } - - valid = true; - return true; - } - - // Start at `key` and move until we encounter a valid value. - // `forward` defines the direction of movement. - // If `skip` is true, we're looking for key not equal to `key`. - void DoTheThing(bool skip, bool forward) { - while (FindNextKey(skip, forward) && !FindValueForCurrentKey()) { - skip = true; - } - } - - void Seek(const Slice& target) { - key = target.ToString(); - DoTheThing(false, true); - } - void SeekForPrev(const Slice& target) { - key = target.ToString(); - DoTheThing(false, false); - } - void SeekToFirst() { Seek(""); } - void SeekToLast() { - key = data->entries.back().key; - DoTheThing(false, false); - } - void Next() { - assert(Valid()); - DoTheThing(true, true); - } - void Prev() { - assert(Valid()); - DoTheThing(true, false); - } -}; - -} // anonymous namespace - -// Use an internal iterator that sometimes returns errors and sometimes -// adds/removes entries on the fly. Do random operations on a DBIter and -// check results. -// TODO: can be improved for more coverage: -// * Override IsKeyPinned() and IsValuePinned() to actually use -// PinnedIteratorManager and check that there's no use-after free. -// * Try different combinations of prefix_extractor, total_order_seek, -// prefix_same_as_start, iterate_lower_bound, iterate_upper_bound. -TEST_F(DBIteratorStressTest, StressTest) { - // We use a deterministic RNG, and everything happens in a single thread. - Random64 rnd(826909345792864532ll); - - auto gen_key = [&](int max_key) { - assert(max_key > 0); - int len = 0; - int a = max_key; - while (a) { - a /= 10; - ++len; - } - std::string s = std::to_string(rnd.Next() % static_cast(max_key)); - s.insert(0, len - (int)s.size(), '0'); - return s; - }; - - Options options; - options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); - ReadOptions ropt; - - size_t num_matching = 0; - size_t num_at_end = 0; - size_t num_not_ok = 0; - size_t num_recently_removed = 0; - - // Number of iterations for each combination of parameters - // (there are ~250 of those). - // Tweak this to change the test run time. - // As of the time of writing, the test takes ~4 seconds for value of 5000. - const int num_iterations = 5000; - // Enable this to print all the operations for debugging. - bool trace = FLAGS_verbose; - - for (int num_entries : {5, 10, 100}) { - for (double key_space : {0.1, 1.0, 3.0}) { - for (ValueType prevalent_entry_type : - {kTypeValue, kTypeDeletion, kTypeMerge}) { - for (double error_probability : {0.01, 0.1}) { - for (double mutation_probability : {0.01, 0.5}) { - for (double target_hidden_fraction : {0.1, 0.5}) { - std::string trace_str = - "entries: " + std::to_string(num_entries) + - ", key_space: " + std::to_string(key_space) + - ", error_probability: " + std::to_string(error_probability) + - ", mutation_probability: " + - std::to_string(mutation_probability) + - ", target_hidden_fraction: " + - std::to_string(target_hidden_fraction); - SCOPED_TRACE(trace_str); - if (trace) { - std::cout << trace_str << std::endl; - } - - // Generate data. - Data data; - int max_key = (int)(num_entries * key_space) + 1; - for (int i = 0; i < num_entries; ++i) { - Entry e; - e.key = gen_key(max_key); - if (rnd.Next() % 10 != 0) { - e.type = prevalent_entry_type; - } else { - const ValueType types[] = {kTypeValue, kTypeDeletion, - kTypeMerge}; - e.type = - types[rnd.Next() % (sizeof(types) / sizeof(types[0]))]; - } - e.sequence = i; - e.value = "v" + std::to_string(i); - ParsedInternalKey internal_key(e.key, e.sequence, e.type); - AppendInternalKey(&e.ikey, internal_key); - - data.entries.push_back(e); - } - std::sort(data.entries.begin(), data.entries.end()); - if (trace) { - std::cout << "entries:"; - for (size_t i = 0; i < data.entries.size(); ++i) { - Entry& e = data.entries[i]; - std::cout << "\n idx " << i << ": \"" << e.key << "\": \"" - << e.value << "\" seq: " << e.sequence << " type: " - << (e.type == kTypeValue ? "val" - : e.type == kTypeDeletion ? "del" - : "merge"); - } - std::cout << std::endl; - } - - std::unique_ptr db_iter; - std::unique_ptr ref_iter; - for (int iteration = 0; iteration < num_iterations; ++iteration) { - SCOPED_TRACE(iteration); - // Create a new iterator every ~30 operations. - if (db_iter == nullptr || rnd.Next() % 30 == 0) { - uint64_t sequence = rnd.Next() % (data.entries.size() + 2); - ref_iter.reset(new ReferenceIterator(&data, sequence)); - if (trace) { - std::cout << "new iterator, seq: " << sequence << std::endl; - } - - auto internal_iter = - new StressTestIterator(&data, &rnd, BytewiseComparator()); - internal_iter->error_probability = error_probability; - internal_iter->mutation_probability = mutation_probability; - internal_iter->target_hidden_fraction = - target_hidden_fraction; - internal_iter->trace = trace; - db_iter.reset(NewDBIterator( - env_, ropt, ImmutableOptions(options), - MutableCFOptions(options), BytewiseComparator(), - internal_iter, nullptr /* version */, sequence, - options.max_sequential_skip_in_iterations, - nullptr /*read_callback*/)); - } - - // Do a random operation. It's important to do it on ref_it - // later than on db_iter to make sure ref_it sees the correct - // recently_touched_keys. - std::string old_key; - bool forward = rnd.Next() % 2 > 0; - // Do Next()/Prev() ~90% of the time. - bool seek = !ref_iter->Valid() || rnd.Next() % 10 == 0; - if (trace) { - std::cout << iteration << ": "; - } - - if (!seek) { - assert(db_iter->Valid()); - old_key = ref_iter->key; - if (trace) { - std::cout << (forward ? "Next" : "Prev") << std::endl; - } - - if (forward) { - db_iter->Next(); - ref_iter->Next(); - } else { - db_iter->Prev(); - ref_iter->Prev(); - } - } else { - data.recently_touched_keys.clear(); - // Do SeekToFirst less often than Seek. - if (rnd.Next() % 4 == 0) { - if (trace) { - std::cout << (forward ? "SeekToFirst" : "SeekToLast") - << std::endl; - } - - if (forward) { - old_key = ""; - db_iter->SeekToFirst(); - ref_iter->SeekToFirst(); - } else { - old_key = data.entries.back().key; - db_iter->SeekToLast(); - ref_iter->SeekToLast(); - } - } else { - old_key = gen_key(max_key); - if (trace) { - std::cout << (forward ? "Seek" : "SeekForPrev") << " \"" - << old_key << '"' << std::endl; - } - if (forward) { - db_iter->Seek(old_key); - ref_iter->Seek(old_key); - } else { - db_iter->SeekForPrev(old_key); - ref_iter->SeekForPrev(old_key); - } - } - } - - // Check the result. - if (db_iter->Valid()) { - ASSERT_TRUE(db_iter->status().ok()); - if (data.recently_touched_keys.count( - db_iter->key().ToString())) { - // Ended on a key that may have been mutated during the - // operation. Reference iterator skips such keys, so we - // can't check the exact result. - - // Check that the key moved in the right direction. - if (forward) { - if (seek) - ASSERT_GE(db_iter->key().ToString(), old_key); - else - ASSERT_GT(db_iter->key().ToString(), old_key); - } else { - if (seek) - ASSERT_LE(db_iter->key().ToString(), old_key); - else - ASSERT_LT(db_iter->key().ToString(), old_key); - } - - if (ref_iter->Valid()) { - // Check that DBIter didn't miss any non-mutated key. - if (forward) { - ASSERT_LT(db_iter->key().ToString(), ref_iter->key); - } else { - ASSERT_GT(db_iter->key().ToString(), ref_iter->key); - } - } - // Tell the next iteration of the loop to reseek the - // iterators. - ref_iter->valid = false; - - ++num_recently_removed; - } else { - ASSERT_TRUE(ref_iter->Valid()); - ASSERT_EQ(ref_iter->key, db_iter->key().ToString()); - ASSERT_EQ(ref_iter->value, db_iter->value()); - ++num_matching; - } - } else if (db_iter->status().ok()) { - ASSERT_FALSE(ref_iter->Valid()); - ++num_at_end; - } else { - // Non-ok status. Nothing to check here. - // Tell the next iteration of the loop to reseek the - // iterators. - ref_iter->valid = false; - ++num_not_ok; - } - } - } - } - } - } - } - } - - // Check that all cases were hit many times. - EXPECT_GT(num_matching, 10000); - EXPECT_GT(num_at_end, 10000); - EXPECT_GT(num_not_ok, 10000); - EXPECT_GT(num_recently_removed, 10000); - - std::cout << "stats:\n exact matches: " << num_matching - << "\n end reached: " << num_at_end - << "\n non-ok status: " << num_not_ok - << "\n mutated on the fly: " << num_recently_removed << std::endl; -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - ParseCommandLineFlags(&argc, &argv, true); - return RUN_ALL_TESTS(); -} diff --git a/db/db_iter_test.cc b/db/db_iter_test.cc deleted file mode 100644 index 65290bfad..000000000 --- a/db/db_iter_test.cc +++ /dev/null @@ -1,3195 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db/db_iter.h" - -#include -#include -#include -#include - -#include "db/dbformat.h" -#include "rocksdb/comparator.h" -#include "rocksdb/options.h" -#include "rocksdb/perf_context.h" -#include "rocksdb/slice.h" -#include "rocksdb/statistics.h" -#include "table/iterator_wrapper.h" -#include "table/merging_iterator.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "util/string_util.h" -#include "utilities/merge_operators.h" - -namespace ROCKSDB_NAMESPACE { - -static uint64_t TestGetTickerCount(const Options& options, - Tickers ticker_type) { - return options.statistics->getTickerCount(ticker_type); -} - -class TestIterator : public InternalIterator { - public: - explicit TestIterator(const Comparator* comparator) - : initialized_(false), - valid_(false), - sequence_number_(0), - iter_(0), - cmp(comparator) { - data_.reserve(16); - } - - void AddPut(std::string argkey, std::string argvalue) { - Add(argkey, kTypeValue, argvalue); - } - - void AddDeletion(std::string argkey) { - Add(argkey, kTypeDeletion, std::string()); - } - - void AddSingleDeletion(std::string argkey) { - Add(argkey, kTypeSingleDeletion, std::string()); - } - - void AddMerge(std::string argkey, std::string argvalue) { - Add(argkey, kTypeMerge, argvalue); - } - - void Add(std::string argkey, ValueType type, std::string argvalue) { - Add(argkey, type, argvalue, sequence_number_++); - } - - void Add(std::string argkey, ValueType type, std::string argvalue, - size_t seq_num, bool update_iter = false) { - valid_ = true; - ParsedInternalKey internal_key(argkey, seq_num, type); - data_.push_back( - std::pair(std::string(), argvalue)); - AppendInternalKey(&data_.back().first, internal_key); - if (update_iter && valid_ && cmp.Compare(data_.back().first, key()) < 0) { - // insert a key smaller than current key - Finish(); - // data_[iter_] is not anymore the current element of the iterator. - // Increment it to reposition it to the right position. - iter_++; - } - } - - // should be called before operations with iterator - void Finish() { - initialized_ = true; - std::sort(data_.begin(), data_.end(), - [this](std::pair a, - std::pair b) { - return (cmp.Compare(a.first, b.first) < 0); - }); - } - - // Removes the key from the set of keys over which this iterator iterates. - // Not to be confused with AddDeletion(). - // If the iterator is currently positioned on this key, the deletion will - // apply next time the iterator moves. - // Used for simulating ForwardIterator updating to a new version that doesn't - // have some of the keys (e.g. after compaction with a filter). - void Vanish(std::string _key) { - if (valid_ && data_[iter_].first == _key) { - delete_current_ = true; - return; - } - for (auto it = data_.begin(); it != data_.end(); ++it) { - ParsedInternalKey ikey; - Status pik_status = - ParseInternalKey(it->first, &ikey, true /* log_err_key */); - pik_status.PermitUncheckedError(); - assert(pik_status.ok()); - if (!pik_status.ok() || ikey.user_key != _key) { - continue; - } - if (valid_ && data_.begin() + iter_ > it) { - --iter_; - } - data_.erase(it); - return; - } - assert(false); - } - - // Number of operations done on this iterator since construction. - size_t steps() const { return steps_; } - - bool Valid() const override { - assert(initialized_); - return valid_; - } - - void SeekToFirst() override { - assert(initialized_); - ++steps_; - DeleteCurrentIfNeeded(); - valid_ = (data_.size() > 0); - iter_ = 0; - } - - void SeekToLast() override { - assert(initialized_); - ++steps_; - DeleteCurrentIfNeeded(); - valid_ = (data_.size() > 0); - iter_ = data_.size() - 1; - } - - void Seek(const Slice& target) override { - assert(initialized_); - SeekToFirst(); - ++steps_; - if (!valid_) { - return; - } - while (iter_ < data_.size() && - (cmp.Compare(data_[iter_].first, target) < 0)) { - ++iter_; - } - - if (iter_ == data_.size()) { - valid_ = false; - } - } - - void SeekForPrev(const Slice& target) override { - assert(initialized_); - DeleteCurrentIfNeeded(); - SeekForPrevImpl(target, &cmp); - } - - void Next() override { - assert(initialized_); - assert(valid_); - assert(iter_ < data_.size()); - - ++steps_; - if (delete_current_) { - DeleteCurrentIfNeeded(); - } else { - ++iter_; - } - valid_ = iter_ < data_.size(); - } - - void Prev() override { - assert(initialized_); - assert(valid_); - assert(iter_ < data_.size()); - - ++steps_; - DeleteCurrentIfNeeded(); - if (iter_ == 0) { - valid_ = false; - } else { - --iter_; - } - } - - Slice key() const override { - assert(initialized_); - return data_[iter_].first; - } - - Slice value() const override { - assert(initialized_); - return data_[iter_].second; - } - - Status status() const override { - assert(initialized_); - return Status::OK(); - } - - bool IsKeyPinned() const override { return true; } - bool IsValuePinned() const override { return true; } - - private: - bool initialized_; - bool valid_; - size_t sequence_number_; - size_t iter_; - size_t steps_ = 0; - - InternalKeyComparator cmp; - std::vector> data_; - bool delete_current_ = false; - - void DeleteCurrentIfNeeded() { - if (!delete_current_) { - return; - } - data_.erase(data_.begin() + iter_); - delete_current_ = false; - } -}; - -class DBIteratorTest : public testing::Test { - public: - Env* env_; - - DBIteratorTest() : env_(Env::Default()) {} -}; - -TEST_F(DBIteratorTest, DBIteratorPrevNext) { - Options options; - ImmutableOptions ioptions = ImmutableOptions(options); - MutableCFOptions mutable_cf_options = MutableCFOptions(options); - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddDeletion("a"); - internal_iter->AddDeletion("a"); - internal_iter->AddDeletion("a"); - internal_iter->AddDeletion("a"); - internal_iter->AddPut("a", "val_a"); - - internal_iter->AddPut("b", "val_b"); - internal_iter->Finish(); - - ReadOptions ro; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "val_b"); - - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "val_a"); - - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "val_b"); - - db_iter->Next(); - ASSERT_TRUE(!db_iter->Valid()); - } - // Test to check the SeekToLast() with iterate_upper_bound not set - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("b", "val_b"); - internal_iter->AddPut("b", "val_b"); - internal_iter->AddPut("c", "val_c"); - internal_iter->Finish(); - - ReadOptions ro; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - } - - // Test to check the SeekToLast() with iterate_upper_bound set - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("b", "val_b"); - internal_iter->AddPut("c", "val_c"); - internal_iter->AddPut("d", "val_d"); - internal_iter->AddPut("e", "val_e"); - internal_iter->AddPut("f", "val_f"); - internal_iter->Finish(); - - Slice prefix("d"); - - ReadOptions ro; - ro.iterate_upper_bound = &prefix; - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - - db_iter->Next(); - ASSERT_TRUE(!db_iter->Valid()); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - } - // Test to check the SeekToLast() iterate_upper_bound set to a key that - // is not Put yet - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("b", "val_b"); - internal_iter->AddPut("c", "val_c"); - internal_iter->AddPut("d", "val_d"); - internal_iter->Finish(); - - Slice prefix("z"); - - ReadOptions ro; - ro.iterate_upper_bound = &prefix; - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "d"); - - db_iter->Next(); - ASSERT_TRUE(!db_iter->Valid()); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "d"); - - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - } - // Test to check the SeekToLast() with iterate_upper_bound set to the - // first key - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("b", "val_b"); - internal_iter->AddPut("b", "val_b"); - internal_iter->Finish(); - - Slice prefix("a"); - - ReadOptions ro; - ro.iterate_upper_bound = &prefix; - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToLast(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - } - // Test case to check SeekToLast with iterate_upper_bound set - // (same key put may times - SeekToLast should start with the - // maximum sequence id of the upper bound) - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("b", "val_b"); - internal_iter->AddPut("c", "val_c"); - internal_iter->AddPut("c", "val_c"); - internal_iter->AddPut("c", "val_c"); - internal_iter->AddPut("c", "val_c"); - internal_iter->AddPut("c", "val_c"); - internal_iter->AddPut("c", "val_c"); - internal_iter->AddPut("c", "val_c"); - internal_iter->Finish(); - - Slice prefix("c"); - - ReadOptions ro; - ro.iterate_upper_bound = &prefix; - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 7 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - SetPerfLevel(kEnableCount); - ASSERT_TRUE(GetPerfLevel() == kEnableCount); - - get_perf_context()->Reset(); - db_iter->SeekToLast(); - - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(static_cast(get_perf_context()->internal_key_skipped_count), - 1); - ASSERT_EQ(db_iter->key().ToString(), "b"); - - SetPerfLevel(kDisable); - } - // Test to check the SeekToLast() with the iterate_upper_bound set - // (Checking the value of the key which has sequence ids greater than - // and less that the iterator's sequence id) - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - - internal_iter->AddPut("a", "val_a1"); - internal_iter->AddPut("a", "val_a2"); - internal_iter->AddPut("b", "val_b1"); - internal_iter->AddPut("c", "val_c1"); - internal_iter->AddPut("c", "val_c2"); - internal_iter->AddPut("c", "val_c3"); - internal_iter->AddPut("b", "val_b2"); - internal_iter->AddPut("d", "val_d1"); - internal_iter->Finish(); - - Slice prefix("c"); - - ReadOptions ro; - ro.iterate_upper_bound = &prefix; - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 4 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "val_b1"); - } - - // Test to check the SeekToLast() with the iterate_upper_bound set to the - // key that is deleted - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddDeletion("a"); - internal_iter->AddPut("b", "val_b"); - internal_iter->AddPut("c", "val_c"); - internal_iter->Finish(); - - Slice prefix("a"); - - ReadOptions ro; - ro.iterate_upper_bound = &prefix; - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToLast(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - } - // Test to check the SeekToLast() with the iterate_upper_bound set - // (Deletion cases) - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("b", "val_b"); - internal_iter->AddDeletion("b"); - internal_iter->AddPut("c", "val_c"); - internal_iter->Finish(); - - Slice prefix("c"); - - ReadOptions ro; - ro.iterate_upper_bound = &prefix; - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - - db_iter->Next(); - ASSERT_TRUE(!db_iter->Valid()); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - } - // Test to check the SeekToLast() with iterate_upper_bound set - // (Deletion cases - Lot of internal keys after the upper_bound - // is deleted) - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("b", "val_b"); - internal_iter->AddDeletion("c"); - internal_iter->AddDeletion("d"); - internal_iter->AddDeletion("e"); - internal_iter->AddDeletion("f"); - internal_iter->AddDeletion("g"); - internal_iter->AddDeletion("h"); - internal_iter->Finish(); - - Slice prefix("c"); - - ReadOptions ro; - ro.iterate_upper_bound = &prefix; - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 7 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - SetPerfLevel(kEnableCount); - ASSERT_TRUE(GetPerfLevel() == kEnableCount); - - get_perf_context()->Reset(); - db_iter->SeekToLast(); - - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ( - static_cast(get_perf_context()->internal_delete_skipped_count), 0); - ASSERT_EQ(db_iter->key().ToString(), "b"); - - SetPerfLevel(kDisable); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddDeletion("a"); - internal_iter->AddDeletion("a"); - internal_iter->AddDeletion("a"); - internal_iter->AddDeletion("a"); - internal_iter->AddPut("a", "val_a"); - - internal_iter->AddPut("b", "val_b"); - internal_iter->Finish(); - - ReadOptions ro; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "val_a"); - - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "val_b"); - - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "val_a"); - - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("b", "val_b"); - - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("b", "val_b"); - - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("b", "val_b"); - - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("b", "val_b"); - - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("b", "val_b"); - internal_iter->Finish(); - - ReadOptions ro; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 2 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "val_b"); - - db_iter->Next(); - ASSERT_TRUE(!db_iter->Valid()); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "val_b"); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("a", "val_a"); - - internal_iter->AddPut("b", "val_b"); - - internal_iter->AddPut("c", "val_c"); - internal_iter->Finish(); - - ReadOptions ro; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "val_c"); - - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "val_b"); - - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "val_c"); - } -} - -TEST_F(DBIteratorTest, DBIteratorEmpty) { - Options options; - ImmutableOptions ioptions = ImmutableOptions(options); - MutableCFOptions mutable_cf_options = MutableCFOptions(options); - ReadOptions ro; - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 0 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 0 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToFirst(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - } -} - -TEST_F(DBIteratorTest, DBIteratorUseSkipCountSkips) { - ReadOptions ro; - Options options; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); - - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - for (size_t i = 0; i < 200; ++i) { - internal_iter->AddPut("a", "a"); - internal_iter->AddPut("b", "b"); - internal_iter->AddPut("c", "c"); - } - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 2 /* sequence */, options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "c"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1u); - - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "b"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2u); - - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "a"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 3u); - - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 3u); -} - -TEST_F(DBIteratorTest, DBIteratorUseSkip) { - ReadOptions ro; - Options options; - options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); - ImmutableOptions ioptions = ImmutableOptions(options); - MutableCFOptions mutable_cf_options = MutableCFOptions(options); - - { - for (size_t i = 0; i < 200; ++i) { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("b", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - for (size_t k = 0; k < 200; ++k) { - internal_iter->AddPut("c", std::to_string(k)); - } - internal_iter->Finish(); - - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, i + 2 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), std::to_string(i)); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_2"); - db_iter->Prev(); - - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - } - } - - { - for (size_t i = 0; i < 200; ++i) { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("b", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - for (size_t k = 0; k < 200; ++k) { - internal_iter->AddDeletion("c"); - } - internal_iter->AddPut("c", "200"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, i + 2 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_2"); - db_iter->Prev(); - - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("b", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - for (size_t i = 0; i < 200; ++i) { - internal_iter->AddDeletion("c"); - } - internal_iter->AddPut("c", "200"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 202 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "200"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_2"); - db_iter->Prev(); - - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - } - } - - { - for (size_t i = 0; i < 200; ++i) { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - for (size_t k = 0; k < 200; ++k) { - internal_iter->AddDeletion("c"); - } - internal_iter->AddPut("c", "200"); - internal_iter->Finish(); - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, i /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - - db_iter->SeekToFirst(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - } - - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - for (size_t i = 0; i < 200; ++i) { - internal_iter->AddDeletion("c"); - } - internal_iter->AddPut("c", "200"); - internal_iter->Finish(); - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 200 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "200"); - - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "200"); - - db_iter->Next(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - } - - { - for (size_t i = 0; i < 200; ++i) { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("b", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - for (size_t k = 0; k < 200; ++k) { - internal_iter->AddPut("d", std::to_string(k)); - } - - for (size_t k = 0; k < 200; ++k) { - internal_iter->AddPut("c", std::to_string(k)); - } - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, i + 2 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "d"); - ASSERT_EQ(db_iter->value().ToString(), std::to_string(i)); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_2"); - db_iter->Prev(); - - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - } - } - - { - for (size_t i = 0; i < 200; ++i) { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("b", "b"); - internal_iter->AddMerge("a", "a"); - for (size_t k = 0; k < 200; ++k) { - internal_iter->AddMerge("c", std::to_string(k)); - } - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, i + 2 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "c"); - std::string merge_result = "0"; - for (size_t j = 1; j <= i; ++j) { - merge_result += "," + std::to_string(j); - } - ASSERT_EQ(db_iter->value().ToString(), merge_result); - - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "b"); - - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "a"); - - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - } - } -} - -TEST_F(DBIteratorTest, DBIteratorSkipInternalKeys) { - Options options; - ImmutableOptions ioptions = ImmutableOptions(options); - MutableCFOptions mutable_cf_options = MutableCFOptions(options); - ReadOptions ro; - - // Basic test case ... Make sure explicityly passing the default value works. - // Skipping internal keys is disabled by default, when the value is 0. - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddDeletion("b"); - internal_iter->AddDeletion("b"); - internal_iter->AddPut("c", "val_c"); - internal_iter->AddPut("c", "val_c"); - internal_iter->AddDeletion("c"); - internal_iter->AddPut("d", "val_d"); - internal_iter->Finish(); - - ro.max_skippable_internal_keys = 0; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "val_a"); - - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "d"); - ASSERT_EQ(db_iter->value().ToString(), "val_d"); - - db_iter->Next(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_TRUE(db_iter->status().ok()); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "d"); - ASSERT_EQ(db_iter->value().ToString(), "val_d"); - - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "val_a"); - - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_OK(db_iter->status()); - } - - // Test to make sure that the request will *not* fail as incomplete if - // num_internal_keys_skipped is *equal* to max_skippable_internal_keys - // threshold. (It will fail as incomplete only when the threshold is - // exceeded.) - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddDeletion("b"); - internal_iter->AddDeletion("b"); - internal_iter->AddPut("c", "val_c"); - internal_iter->Finish(); - - ro.max_skippable_internal_keys = 2; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "val_a"); - - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "val_c"); - - db_iter->Next(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_TRUE(db_iter->status().ok()); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "val_c"); - - db_iter->Prev(); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "val_a"); - - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_TRUE(db_iter->status().ok()); - } - - // Fail the request as incomplete when num_internal_keys_skipped > - // max_skippable_internal_keys - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddDeletion("b"); - internal_iter->AddDeletion("b"); - internal_iter->AddDeletion("b"); - internal_iter->AddPut("c", "val_c"); - internal_iter->Finish(); - - ro.max_skippable_internal_keys = 2; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "val_a"); - - db_iter->Next(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_TRUE(db_iter->status().IsIncomplete()); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "val_c"); - - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_TRUE(db_iter->status().IsIncomplete()); - } - - // Test that the num_internal_keys_skipped counter resets after a successful - // read. - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddDeletion("b"); - internal_iter->AddDeletion("b"); - internal_iter->AddPut("c", "val_c"); - internal_iter->AddDeletion("d"); - internal_iter->AddDeletion("d"); - internal_iter->AddDeletion("d"); - internal_iter->AddPut("e", "val_e"); - internal_iter->Finish(); - - ro.max_skippable_internal_keys = 2; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "val_a"); - - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "val_c"); - - db_iter->Next(); // num_internal_keys_skipped counter resets here. - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_TRUE(db_iter->status().IsIncomplete()); - } - - // Test that the num_internal_keys_skipped counter resets after a successful - // read. - // Reverse direction - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddDeletion("b"); - internal_iter->AddDeletion("b"); - internal_iter->AddDeletion("b"); - internal_iter->AddPut("c", "val_c"); - internal_iter->AddDeletion("d"); - internal_iter->AddDeletion("d"); - internal_iter->AddPut("e", "val_e"); - internal_iter->Finish(); - - ro.max_skippable_internal_keys = 2; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "e"); - ASSERT_EQ(db_iter->value().ToString(), "val_e"); - - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "val_c"); - - db_iter->Prev(); // num_internal_keys_skipped counter resets here. - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_TRUE(db_iter->status().IsIncomplete()); - } - - // Test that skipping separate keys is handled - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddDeletion("b"); - internal_iter->AddDeletion("c"); - internal_iter->AddDeletion("d"); - internal_iter->AddPut("e", "val_e"); - internal_iter->Finish(); - - ro.max_skippable_internal_keys = 2; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "val_a"); - - db_iter->Next(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_TRUE(db_iter->status().IsIncomplete()); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "e"); - ASSERT_EQ(db_iter->value().ToString(), "val_e"); - - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_TRUE(db_iter->status().IsIncomplete()); - } - - // Test if alternating puts and deletes of the same key are handled correctly. - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddPut("b", "val_b"); - internal_iter->AddDeletion("b"); - internal_iter->AddPut("c", "val_c"); - internal_iter->AddDeletion("c"); - internal_iter->AddPut("d", "val_d"); - internal_iter->AddDeletion("d"); - internal_iter->AddPut("e", "val_e"); - internal_iter->Finish(); - - ro.max_skippable_internal_keys = 2; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "val_a"); - - db_iter->Next(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_TRUE(db_iter->status().IsIncomplete()); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "e"); - ASSERT_EQ(db_iter->value().ToString(), "val_e"); - - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_TRUE(db_iter->status().IsIncomplete()); - } - - // Test for large number of skippable internal keys with *default* - // max_sequential_skip_in_iterations. - { - for (size_t i = 1; i <= 200; ++i) { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - for (size_t j = 1; j <= i; ++j) { - internal_iter->AddPut("b", "val_b"); - internal_iter->AddDeletion("b"); - } - internal_iter->AddPut("c", "val_c"); - internal_iter->Finish(); - - ro.max_skippable_internal_keys = i; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 2 * i + 1 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "val_a"); - - db_iter->Next(); - if ((options.max_sequential_skip_in_iterations + 1) >= - ro.max_skippable_internal_keys) { - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_TRUE(db_iter->status().IsIncomplete()); - } else { - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "val_c"); - } - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "val_c"); - - db_iter->Prev(); - if ((options.max_sequential_skip_in_iterations + 1) >= - ro.max_skippable_internal_keys) { - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_TRUE(db_iter->status().IsIncomplete()); - } else { - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "val_a"); - } - } - } - - // Test for large number of skippable internal keys with a *non-default* - // max_sequential_skip_in_iterations. - { - for (size_t i = 1; i <= 200; ++i) { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - for (size_t j = 1; j <= i; ++j) { - internal_iter->AddPut("b", "val_b"); - internal_iter->AddDeletion("b"); - } - internal_iter->AddPut("c", "val_c"); - internal_iter->Finish(); - - options.max_sequential_skip_in_iterations = 1000; - ro.max_skippable_internal_keys = i; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 2 * i + 1 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "val_a"); - - db_iter->Next(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_TRUE(db_iter->status().IsIncomplete()); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "val_c"); - - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - ASSERT_TRUE(db_iter->status().IsIncomplete()); - } - } -} - -TEST_F(DBIteratorTest, DBIterator1) { - ReadOptions ro; - Options options; - options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); - - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "0"); - internal_iter->AddPut("b", "0"); - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("a", "1"); - internal_iter->AddMerge("b", "2"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 1 /* sequence */, options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "0"); - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - db_iter->Next(); - ASSERT_FALSE(db_iter->Valid()); -} - -TEST_F(DBIteratorTest, DBIterator2) { - ReadOptions ro; - Options options; - options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); - - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "0"); - internal_iter->AddPut("b", "0"); - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("a", "1"); - internal_iter->AddMerge("b", "2"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 0 /* sequence */, options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "0"); - db_iter->Next(); - ASSERT_TRUE(!db_iter->Valid()); -} - -TEST_F(DBIteratorTest, DBIterator3) { - ReadOptions ro; - Options options; - options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); - - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "0"); - internal_iter->AddPut("b", "0"); - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("a", "1"); - internal_iter->AddMerge("b", "2"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 2 /* sequence */, options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "0"); - db_iter->Next(); - ASSERT_TRUE(!db_iter->Valid()); -} - -TEST_F(DBIteratorTest, DBIterator4) { - ReadOptions ro; - Options options; - options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); - - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "0"); - internal_iter->AddPut("b", "0"); - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("a", "1"); - internal_iter->AddMerge("b", "2"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 4 /* sequence */, options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "0,1"); - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "2"); - db_iter->Next(); - ASSERT_TRUE(!db_iter->Valid()); -} - -TEST_F(DBIteratorTest, DBIterator5) { - ReadOptions ro; - Options options; - options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); - ImmutableOptions ioptions = ImmutableOptions(options); - MutableCFOptions mutable_cf_options = MutableCFOptions(options); - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddMerge("a", "merge_3"); - internal_iter->AddPut("a", "put_1"); - internal_iter->AddMerge("a", "merge_4"); - internal_iter->AddMerge("a", "merge_5"); - internal_iter->AddMerge("a", "merge_6"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 0 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddMerge("a", "merge_3"); - internal_iter->AddPut("a", "put_1"); - internal_iter->AddMerge("a", "merge_4"); - internal_iter->AddMerge("a", "merge_5"); - internal_iter->AddMerge("a", "merge_6"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 1 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1,merge_2"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddMerge("a", "merge_3"); - internal_iter->AddPut("a", "put_1"); - internal_iter->AddMerge("a", "merge_4"); - internal_iter->AddMerge("a", "merge_5"); - internal_iter->AddMerge("a", "merge_6"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 2 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1,merge_2,merge_3"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddMerge("a", "merge_3"); - internal_iter->AddPut("a", "put_1"); - internal_iter->AddMerge("a", "merge_4"); - internal_iter->AddMerge("a", "merge_5"); - internal_iter->AddMerge("a", "merge_6"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 3 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "put_1"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddMerge("a", "merge_3"); - internal_iter->AddPut("a", "put_1"); - internal_iter->AddMerge("a", "merge_4"); - internal_iter->AddMerge("a", "merge_5"); - internal_iter->AddMerge("a", "merge_6"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 4 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "put_1,merge_4"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddMerge("a", "merge_3"); - internal_iter->AddPut("a", "put_1"); - internal_iter->AddMerge("a", "merge_4"); - internal_iter->AddMerge("a", "merge_5"); - internal_iter->AddMerge("a", "merge_6"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 5 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "put_1,merge_4,merge_5"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddMerge("a", "merge_3"); - internal_iter->AddPut("a", "put_1"); - internal_iter->AddMerge("a", "merge_4"); - internal_iter->AddMerge("a", "merge_5"); - internal_iter->AddMerge("a", "merge_6"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 6 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "put_1,merge_4,merge_5,merge_6"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - // put, singledelete, merge - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "val_a"); - internal_iter->AddSingleDeletion("a"); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddPut("b", "val_b"); - internal_iter->Finish(); - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 10 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->Seek("b"); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - } -} - -TEST_F(DBIteratorTest, DBIterator6) { - ReadOptions ro; - Options options; - options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); - ImmutableOptions ioptions = ImmutableOptions(options); - MutableCFOptions mutable_cf_options = MutableCFOptions(options); - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddMerge("a", "merge_3"); - internal_iter->AddDeletion("a"); - internal_iter->AddMerge("a", "merge_4"); - internal_iter->AddMerge("a", "merge_5"); - internal_iter->AddMerge("a", "merge_6"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 0 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddMerge("a", "merge_3"); - internal_iter->AddDeletion("a"); - internal_iter->AddMerge("a", "merge_4"); - internal_iter->AddMerge("a", "merge_5"); - internal_iter->AddMerge("a", "merge_6"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 1 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1,merge_2"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddMerge("a", "merge_3"); - internal_iter->AddDeletion("a"); - internal_iter->AddMerge("a", "merge_4"); - internal_iter->AddMerge("a", "merge_5"); - internal_iter->AddMerge("a", "merge_6"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 2 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1,merge_2,merge_3"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddMerge("a", "merge_3"); - internal_iter->AddDeletion("a"); - internal_iter->AddMerge("a", "merge_4"); - internal_iter->AddMerge("a", "merge_5"); - internal_iter->AddMerge("a", "merge_6"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 3 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddMerge("a", "merge_3"); - internal_iter->AddDeletion("a"); - internal_iter->AddMerge("a", "merge_4"); - internal_iter->AddMerge("a", "merge_5"); - internal_iter->AddMerge("a", "merge_6"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 4 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_4"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddMerge("a", "merge_3"); - internal_iter->AddDeletion("a"); - internal_iter->AddMerge("a", "merge_4"); - internal_iter->AddMerge("a", "merge_5"); - internal_iter->AddMerge("a", "merge_6"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 5 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_4,merge_5"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddMerge("a", "merge_3"); - internal_iter->AddDeletion("a"); - internal_iter->AddMerge("a", "merge_4"); - internal_iter->AddMerge("a", "merge_5"); - internal_iter->AddMerge("a", "merge_6"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 6 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_4,merge_5,merge_6"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } -} - -TEST_F(DBIteratorTest, DBIterator7) { - ReadOptions ro; - Options options; - options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); - ImmutableOptions ioptions = ImmutableOptions(options); - MutableCFOptions mutable_cf_options = MutableCFOptions(options); - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddPut("b", "val"); - internal_iter->AddMerge("b", "merge_2"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_3"); - - internal_iter->AddMerge("c", "merge_4"); - internal_iter->AddMerge("c", "merge_5"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_6"); - internal_iter->AddMerge("b", "merge_7"); - internal_iter->AddMerge("b", "merge_8"); - internal_iter->AddMerge("b", "merge_9"); - internal_iter->AddMerge("b", "merge_10"); - internal_iter->AddMerge("b", "merge_11"); - - internal_iter->AddDeletion("c"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 0 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddPut("b", "val"); - internal_iter->AddMerge("b", "merge_2"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_3"); - - internal_iter->AddMerge("c", "merge_4"); - internal_iter->AddMerge("c", "merge_5"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_6"); - internal_iter->AddMerge("b", "merge_7"); - internal_iter->AddMerge("b", "merge_8"); - internal_iter->AddMerge("b", "merge_9"); - internal_iter->AddMerge("b", "merge_10"); - internal_iter->AddMerge("b", "merge_11"); - - internal_iter->AddDeletion("c"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 2 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "val,merge_2"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddPut("b", "val"); - internal_iter->AddMerge("b", "merge_2"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_3"); - - internal_iter->AddMerge("c", "merge_4"); - internal_iter->AddMerge("c", "merge_5"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_6"); - internal_iter->AddMerge("b", "merge_7"); - internal_iter->AddMerge("b", "merge_8"); - internal_iter->AddMerge("b", "merge_9"); - internal_iter->AddMerge("b", "merge_10"); - internal_iter->AddMerge("b", "merge_11"); - - internal_iter->AddDeletion("c"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 4 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "merge_3"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddPut("b", "val"); - internal_iter->AddMerge("b", "merge_2"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_3"); - - internal_iter->AddMerge("c", "merge_4"); - internal_iter->AddMerge("c", "merge_5"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_6"); - internal_iter->AddMerge("b", "merge_7"); - internal_iter->AddMerge("b", "merge_8"); - internal_iter->AddMerge("b", "merge_9"); - internal_iter->AddMerge("b", "merge_10"); - internal_iter->AddMerge("b", "merge_11"); - - internal_iter->AddDeletion("c"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 5 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "merge_4"); - db_iter->Prev(); - - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "merge_3"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddPut("b", "val"); - internal_iter->AddMerge("b", "merge_2"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_3"); - - internal_iter->AddMerge("c", "merge_4"); - internal_iter->AddMerge("c", "merge_5"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_6"); - internal_iter->AddMerge("b", "merge_7"); - internal_iter->AddMerge("b", "merge_8"); - internal_iter->AddMerge("b", "merge_9"); - internal_iter->AddMerge("b", "merge_10"); - internal_iter->AddMerge("b", "merge_11"); - - internal_iter->AddDeletion("c"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 6 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "merge_4,merge_5"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "merge_3"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddPut("b", "val"); - internal_iter->AddMerge("b", "merge_2"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_3"); - - internal_iter->AddMerge("c", "merge_4"); - internal_iter->AddMerge("c", "merge_5"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_6"); - internal_iter->AddMerge("b", "merge_7"); - internal_iter->AddMerge("b", "merge_8"); - internal_iter->AddMerge("b", "merge_9"); - internal_iter->AddMerge("b", "merge_10"); - internal_iter->AddMerge("b", "merge_11"); - - internal_iter->AddDeletion("c"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 7 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "merge_4,merge_5"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddPut("b", "val"); - internal_iter->AddMerge("b", "merge_2"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_3"); - - internal_iter->AddMerge("c", "merge_4"); - internal_iter->AddMerge("c", "merge_5"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_6"); - internal_iter->AddMerge("b", "merge_7"); - internal_iter->AddMerge("b", "merge_8"); - internal_iter->AddMerge("b", "merge_9"); - internal_iter->AddMerge("b", "merge_10"); - internal_iter->AddMerge("b", "merge_11"); - - internal_iter->AddDeletion("c"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 9 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "merge_4,merge_5"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "merge_6,merge_7"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddPut("b", "val"); - internal_iter->AddMerge("b", "merge_2"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_3"); - - internal_iter->AddMerge("c", "merge_4"); - internal_iter->AddMerge("c", "merge_5"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_6"); - internal_iter->AddMerge("b", "merge_7"); - internal_iter->AddMerge("b", "merge_8"); - internal_iter->AddMerge("b", "merge_9"); - internal_iter->AddMerge("b", "merge_10"); - internal_iter->AddMerge("b", "merge_11"); - - internal_iter->AddDeletion("c"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 13 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "merge_4,merge_5"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), - "merge_6,merge_7,merge_8,merge_9,merge_10,merge_11"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } - - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddPut("b", "val"); - internal_iter->AddMerge("b", "merge_2"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_3"); - - internal_iter->AddMerge("c", "merge_4"); - internal_iter->AddMerge("c", "merge_5"); - - internal_iter->AddDeletion("b"); - internal_iter->AddMerge("b", "merge_6"); - internal_iter->AddMerge("b", "merge_7"); - internal_iter->AddMerge("b", "merge_8"); - internal_iter->AddMerge("b", "merge_9"); - internal_iter->AddMerge("b", "merge_10"); - internal_iter->AddMerge("b", "merge_11"); - - internal_iter->AddDeletion("c"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ioptions, mutable_cf_options, BytewiseComparator(), - internal_iter, nullptr /* version */, 14 /* sequence */, - options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), - "merge_6,merge_7,merge_8,merge_9,merge_10,merge_11"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1"); - db_iter->Prev(); - ASSERT_TRUE(!db_iter->Valid()); - } -} - -TEST_F(DBIteratorTest, DBIterator8) { - ReadOptions ro; - Options options; - options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); - - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddDeletion("a"); - internal_iter->AddPut("a", "0"); - internal_iter->AddPut("b", "0"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 10 /* sequence */, options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "0"); - - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "0"); -} - -// TODO(3.13): fix the issue of Seek() then Prev() which might not necessary -// return the biggest element smaller than the seek key. -TEST_F(DBIteratorTest, DBIterator9) { - ReadOptions ro; - Options options; - options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); - { - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddMerge("a", "merge_1"); - internal_iter->AddMerge("a", "merge_2"); - internal_iter->AddMerge("b", "merge_3"); - internal_iter->AddMerge("b", "merge_4"); - internal_iter->AddMerge("d", "merge_5"); - internal_iter->AddMerge("d", "merge_6"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 10 /* sequence */, options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "merge_3,merge_4"); - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "d"); - ASSERT_EQ(db_iter->value().ToString(), "merge_5,merge_6"); - - db_iter->Seek("b"); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "merge_3,merge_4"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "merge_1,merge_2"); - - db_iter->SeekForPrev("b"); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "merge_3,merge_4"); - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "d"); - ASSERT_EQ(db_iter->value().ToString(), "merge_5,merge_6"); - - db_iter->Seek("c"); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "d"); - ASSERT_EQ(db_iter->value().ToString(), "merge_5,merge_6"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "merge_3,merge_4"); - - db_iter->SeekForPrev("c"); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "merge_3,merge_4"); - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "d"); - ASSERT_EQ(db_iter->value().ToString(), "merge_5,merge_6"); - } -} - -// TODO(3.13): fix the issue of Seek() then Prev() which might not necessary -// return the biggest element smaller than the seek key. -TEST_F(DBIteratorTest, DBIterator10) { - ReadOptions ro; - Options options; - - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "1"); - internal_iter->AddPut("b", "2"); - internal_iter->AddPut("c", "3"); - internal_iter->AddPut("d", "4"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 10 /* sequence */, options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->Seek("c"); - ASSERT_TRUE(db_iter->Valid()); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "2"); - - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "3"); - - db_iter->SeekForPrev("c"); - ASSERT_TRUE(db_iter->Valid()); - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "d"); - ASSERT_EQ(db_iter->value().ToString(), "4"); - - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "3"); -} - -TEST_F(DBIteratorTest, SeekToLastOccurrenceSeq0) { - ReadOptions ro; - Options options; - options.merge_operator = nullptr; - - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "1"); - internal_iter->AddPut("b", "2"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 10 /* sequence */, 0 /* force seek */, nullptr /* read_callback */)); - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "1"); - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "2"); - db_iter->Next(); - ASSERT_FALSE(db_iter->Valid()); -} - -TEST_F(DBIteratorTest, DBIterator11) { - ReadOptions ro; - Options options; - options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); - - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "0"); - internal_iter->AddPut("b", "0"); - internal_iter->AddSingleDeletion("b"); - internal_iter->AddMerge("a", "1"); - internal_iter->AddMerge("b", "2"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 1 /* sequence */, options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - db_iter->SeekToFirst(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "0"); - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - db_iter->Next(); - ASSERT_FALSE(db_iter->Valid()); -} - -TEST_F(DBIteratorTest, DBIterator12) { - ReadOptions ro; - Options options; - options.merge_operator = nullptr; - - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "1"); - internal_iter->AddPut("b", "2"); - internal_iter->AddPut("c", "3"); - internal_iter->AddSingleDeletion("b"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 10 /* sequence */, 0 /* force seek */, nullptr /* read_callback */)); - db_iter->SeekToLast(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "c"); - ASSERT_EQ(db_iter->value().ToString(), "3"); - db_iter->Prev(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "1"); - db_iter->Prev(); - ASSERT_FALSE(db_iter->Valid()); -} - -TEST_F(DBIteratorTest, DBIterator13) { - ReadOptions ro; - Options options; - options.merge_operator = nullptr; - - std::string key; - key.resize(9); - key.assign(9, static_cast(0)); - key[0] = 'b'; - - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut(key, "0"); - internal_iter->AddPut(key, "1"); - internal_iter->AddPut(key, "2"); - internal_iter->AddPut(key, "3"); - internal_iter->AddPut(key, "4"); - internal_iter->AddPut(key, "5"); - internal_iter->AddPut(key, "6"); - internal_iter->AddPut(key, "7"); - internal_iter->AddPut(key, "8"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 2 /* sequence */, 3 /* max_sequential_skip_in_iterations */, - nullptr /* read_callback */)); - db_iter->Seek("b"); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), key); - ASSERT_EQ(db_iter->value().ToString(), "2"); -} - -TEST_F(DBIteratorTest, DBIterator14) { - ReadOptions ro; - Options options; - options.merge_operator = nullptr; - - std::string key("b"); - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("b", "0"); - internal_iter->AddPut("b", "1"); - internal_iter->AddPut("b", "2"); - internal_iter->AddPut("b", "3"); - internal_iter->AddPut("a", "4"); - internal_iter->AddPut("a", "5"); - internal_iter->AddPut("a", "6"); - internal_iter->AddPut("c", "7"); - internal_iter->AddPut("c", "8"); - internal_iter->AddPut("c", "9"); - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 4 /* sequence */, 1 /* max_sequential_skip_in_iterations */, - nullptr /* read_callback */)); - db_iter->Seek("b"); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(db_iter->key().ToString(), "b"); - ASSERT_EQ(db_iter->value().ToString(), "3"); - db_iter->SeekToFirst(); - ASSERT_EQ(db_iter->key().ToString(), "a"); - ASSERT_EQ(db_iter->value().ToString(), "4"); -} - -class DBIterWithMergeIterTest : public testing::Test { - public: - DBIterWithMergeIterTest() - : env_(Env::Default()), icomp_(BytewiseComparator()) { - options_.merge_operator = nullptr; - - internal_iter1_ = new TestIterator(BytewiseComparator()); - internal_iter1_->Add("a", kTypeValue, "1", 3u); - internal_iter1_->Add("f", kTypeValue, "2", 5u); - internal_iter1_->Add("g", kTypeValue, "3", 7u); - internal_iter1_->Finish(); - - internal_iter2_ = new TestIterator(BytewiseComparator()); - internal_iter2_->Add("a", kTypeValue, "4", 6u); - internal_iter2_->Add("b", kTypeValue, "5", 1u); - internal_iter2_->Add("c", kTypeValue, "6", 2u); - internal_iter2_->Add("d", kTypeValue, "7", 3u); - internal_iter2_->Finish(); - - std::vector child_iters; - child_iters.push_back(internal_iter1_); - child_iters.push_back(internal_iter2_); - InternalKeyComparator icomp(BytewiseComparator()); - InternalIterator* merge_iter = - NewMergingIterator(&icomp_, &child_iters[0], 2u); - - db_iter_.reset(NewDBIterator( - env_, ro_, ImmutableOptions(options_), MutableCFOptions(options_), - BytewiseComparator(), merge_iter, nullptr /* version */, - 8 /* read data earlier than seqId 8 */, - 3 /* max iterators before reseek */, nullptr /* read_callback */)); - } - - Env* env_; - ReadOptions ro_; - Options options_; - TestIterator* internal_iter1_; - TestIterator* internal_iter2_; - InternalKeyComparator icomp_; - Iterator* merge_iter_; - std::unique_ptr db_iter_; -}; - -TEST_F(DBIterWithMergeIterTest, InnerMergeIterator1) { - db_iter_->SeekToFirst(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "a"); - ASSERT_EQ(db_iter_->value().ToString(), "4"); - db_iter_->Next(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "b"); - ASSERT_EQ(db_iter_->value().ToString(), "5"); - db_iter_->Next(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "c"); - ASSERT_EQ(db_iter_->value().ToString(), "6"); - db_iter_->Next(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "d"); - ASSERT_EQ(db_iter_->value().ToString(), "7"); - db_iter_->Next(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "f"); - ASSERT_EQ(db_iter_->value().ToString(), "2"); - db_iter_->Next(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "g"); - ASSERT_EQ(db_iter_->value().ToString(), "3"); - db_iter_->Next(); - ASSERT_FALSE(db_iter_->Valid()); -} - -TEST_F(DBIterWithMergeIterTest, InnerMergeIterator2) { - // Test Prev() when one child iterator is at its end. - db_iter_->SeekForPrev("g"); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "g"); - ASSERT_EQ(db_iter_->value().ToString(), "3"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "f"); - ASSERT_EQ(db_iter_->value().ToString(), "2"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "d"); - ASSERT_EQ(db_iter_->value().ToString(), "7"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "c"); - ASSERT_EQ(db_iter_->value().ToString(), "6"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "b"); - ASSERT_EQ(db_iter_->value().ToString(), "5"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "a"); - ASSERT_EQ(db_iter_->value().ToString(), "4"); -} - -TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace1) { - // Test Prev() when one child iterator is at its end but more rows - // are added. - db_iter_->Seek("f"); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "f"); - ASSERT_EQ(db_iter_->value().ToString(), "2"); - - // Test call back inserts a key in the end of the mem table after - // MergeIterator::Prev() realized the mem table iterator is at its end - // and before an SeekToLast() is called. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "MergeIterator::Prev:BeforePrev", - [&](void* /*arg*/) { internal_iter2_->Add("z", kTypeValue, "7", 12u); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "d"); - ASSERT_EQ(db_iter_->value().ToString(), "7"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "c"); - ASSERT_EQ(db_iter_->value().ToString(), "6"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "b"); - ASSERT_EQ(db_iter_->value().ToString(), "5"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "a"); - ASSERT_EQ(db_iter_->value().ToString(), "4"); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace2) { - // Test Prev() when one child iterator is at its end but more rows - // are added. - db_iter_->Seek("f"); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "f"); - ASSERT_EQ(db_iter_->value().ToString(), "2"); - - // Test call back inserts entries for update a key in the end of the - // mem table after MergeIterator::Prev() realized the mem tableiterator is at - // its end and before an SeekToLast() is called. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "MergeIterator::Prev:BeforePrev", [&](void* /*arg*/) { - internal_iter2_->Add("z", kTypeValue, "7", 12u); - internal_iter2_->Add("z", kTypeValue, "7", 11u); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "d"); - ASSERT_EQ(db_iter_->value().ToString(), "7"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "c"); - ASSERT_EQ(db_iter_->value().ToString(), "6"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "b"); - ASSERT_EQ(db_iter_->value().ToString(), "5"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "a"); - ASSERT_EQ(db_iter_->value().ToString(), "4"); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace3) { - // Test Prev() when one child iterator is at its end but more rows - // are added and max_skipped is triggered. - db_iter_->Seek("f"); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "f"); - ASSERT_EQ(db_iter_->value().ToString(), "2"); - - // Test call back inserts entries for update a key in the end of the - // mem table after MergeIterator::Prev() realized the mem table iterator is at - // its end and before an SeekToLast() is called. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "MergeIterator::Prev:BeforePrev", [&](void* /*arg*/) { - internal_iter2_->Add("z", kTypeValue, "7", 16u, true); - internal_iter2_->Add("z", kTypeValue, "7", 15u, true); - internal_iter2_->Add("z", kTypeValue, "7", 14u, true); - internal_iter2_->Add("z", kTypeValue, "7", 13u, true); - internal_iter2_->Add("z", kTypeValue, "7", 12u, true); - internal_iter2_->Add("z", kTypeValue, "7", 11u, true); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "d"); - ASSERT_EQ(db_iter_->value().ToString(), "7"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "c"); - ASSERT_EQ(db_iter_->value().ToString(), "6"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "b"); - ASSERT_EQ(db_iter_->value().ToString(), "5"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "a"); - ASSERT_EQ(db_iter_->value().ToString(), "4"); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace4) { - // Test Prev() when one child iterator has more rows inserted - // between Seek() and Prev() when changing directions. - internal_iter2_->Add("z", kTypeValue, "9", 4u); - - db_iter_->Seek("g"); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "g"); - ASSERT_EQ(db_iter_->value().ToString(), "3"); - - // Test call back inserts entries for update a key before "z" in - // mem table after MergeIterator::Prev() calls mem table iterator's - // Seek() and before calling Prev() - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "MergeIterator::Prev:BeforePrev", [&](void* arg) { - IteratorWrapper* it = reinterpret_cast(arg); - if (it->key().starts_with("z")) { - internal_iter2_->Add("x", kTypeValue, "7", 16u, true); - internal_iter2_->Add("x", kTypeValue, "7", 15u, true); - internal_iter2_->Add("x", kTypeValue, "7", 14u, true); - internal_iter2_->Add("x", kTypeValue, "7", 13u, true); - internal_iter2_->Add("x", kTypeValue, "7", 12u, true); - internal_iter2_->Add("x", kTypeValue, "7", 11u, true); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "f"); - ASSERT_EQ(db_iter_->value().ToString(), "2"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "d"); - ASSERT_EQ(db_iter_->value().ToString(), "7"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "c"); - ASSERT_EQ(db_iter_->value().ToString(), "6"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "b"); - ASSERT_EQ(db_iter_->value().ToString(), "5"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "a"); - ASSERT_EQ(db_iter_->value().ToString(), "4"); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace5) { - internal_iter2_->Add("z", kTypeValue, "9", 4u); - - // Test Prev() when one child iterator has more rows inserted - // between Seek() and Prev() when changing directions. - db_iter_->Seek("g"); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "g"); - ASSERT_EQ(db_iter_->value().ToString(), "3"); - - // Test call back inserts entries for update a key before "z" in - // mem table after MergeIterator::Prev() calls mem table iterator's - // Seek() and before calling Prev() - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "MergeIterator::Prev:BeforePrev", [&](void* arg) { - IteratorWrapper* it = reinterpret_cast(arg); - if (it->key().starts_with("z")) { - internal_iter2_->Add("x", kTypeValue, "7", 16u, true); - internal_iter2_->Add("x", kTypeValue, "7", 15u, true); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "f"); - ASSERT_EQ(db_iter_->value().ToString(), "2"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "d"); - ASSERT_EQ(db_iter_->value().ToString(), "7"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "c"); - ASSERT_EQ(db_iter_->value().ToString(), "6"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "b"); - ASSERT_EQ(db_iter_->value().ToString(), "5"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "a"); - ASSERT_EQ(db_iter_->value().ToString(), "4"); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace6) { - internal_iter2_->Add("z", kTypeValue, "9", 4u); - - // Test Prev() when one child iterator has more rows inserted - // between Seek() and Prev() when changing directions. - db_iter_->Seek("g"); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "g"); - ASSERT_EQ(db_iter_->value().ToString(), "3"); - - // Test call back inserts an entry for update a key before "z" in - // mem table after MergeIterator::Prev() calls mem table iterator's - // Seek() and before calling Prev() - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "MergeIterator::Prev:BeforePrev", [&](void* arg) { - IteratorWrapper* it = reinterpret_cast(arg); - if (it->key().starts_with("z")) { - internal_iter2_->Add("x", kTypeValue, "7", 16u, true); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "f"); - ASSERT_EQ(db_iter_->value().ToString(), "2"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "d"); - ASSERT_EQ(db_iter_->value().ToString(), "7"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "c"); - ASSERT_EQ(db_iter_->value().ToString(), "6"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "b"); - ASSERT_EQ(db_iter_->value().ToString(), "5"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "a"); - ASSERT_EQ(db_iter_->value().ToString(), "4"); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace7) { - internal_iter1_->Add("u", kTypeValue, "10", 4u); - internal_iter1_->Add("v", kTypeValue, "11", 4u); - internal_iter1_->Add("w", kTypeValue, "12", 4u); - internal_iter2_->Add("z", kTypeValue, "9", 4u); - - // Test Prev() when one child iterator has more rows inserted - // between Seek() and Prev() when changing directions. - db_iter_->Seek("g"); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "g"); - ASSERT_EQ(db_iter_->value().ToString(), "3"); - - // Test call back inserts entries for update a key before "z" in - // mem table after MergeIterator::Prev() calls mem table iterator's - // Seek() and before calling Prev() - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "MergeIterator::Prev:BeforePrev", [&](void* arg) { - IteratorWrapper* it = reinterpret_cast(arg); - if (it->key().starts_with("z")) { - internal_iter2_->Add("x", kTypeValue, "7", 16u, true); - internal_iter2_->Add("x", kTypeValue, "7", 15u, true); - internal_iter2_->Add("x", kTypeValue, "7", 14u, true); - internal_iter2_->Add("x", kTypeValue, "7", 13u, true); - internal_iter2_->Add("x", kTypeValue, "7", 12u, true); - internal_iter2_->Add("x", kTypeValue, "7", 11u, true); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "f"); - ASSERT_EQ(db_iter_->value().ToString(), "2"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "d"); - ASSERT_EQ(db_iter_->value().ToString(), "7"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "c"); - ASSERT_EQ(db_iter_->value().ToString(), "6"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "b"); - ASSERT_EQ(db_iter_->value().ToString(), "5"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "a"); - ASSERT_EQ(db_iter_->value().ToString(), "4"); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace8) { - // internal_iter1_: a, f, g - // internal_iter2_: a, b, c, d, adding (z) - internal_iter2_->Add("z", kTypeValue, "9", 4u); - - // Test Prev() when one child iterator has more rows inserted - // between Seek() and Prev() when changing directions. - db_iter_->Seek("g"); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "g"); - ASSERT_EQ(db_iter_->value().ToString(), "3"); - - // Test call back inserts two keys before "z" in mem table after - // MergeIterator::Prev() calls mem table iterator's Seek() and - // before calling Prev() - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "MergeIterator::Prev:BeforePrev", [&](void* arg) { - IteratorWrapper* it = reinterpret_cast(arg); - if (it->key().starts_with("z")) { - internal_iter2_->Add("x", kTypeValue, "7", 16u, true); - internal_iter2_->Add("y", kTypeValue, "7", 17u, true); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "f"); - ASSERT_EQ(db_iter_->value().ToString(), "2"); - db_iter_->Prev(); - ASSERT_TRUE(db_iter_->Valid()); - ASSERT_EQ(db_iter_->key().ToString(), "d"); - ASSERT_EQ(db_iter_->value().ToString(), "7"); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBIteratorTest, SeekPrefixTombstones) { - ReadOptions ro; - Options options; - options.prefix_extractor.reset(NewNoopTransform()); - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddDeletion("b"); - internal_iter->AddDeletion("c"); - internal_iter->AddDeletion("d"); - internal_iter->AddDeletion("e"); - internal_iter->AddDeletion("f"); - internal_iter->AddDeletion("g"); - internal_iter->Finish(); - - ro.prefix_same_as_start = true; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 10 /* sequence */, options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - int skipped_keys = 0; - - get_perf_context()->Reset(); - db_iter->SeekForPrev("z"); - skipped_keys = - static_cast(get_perf_context()->internal_key_skipped_count); - ASSERT_EQ(skipped_keys, 0); - - get_perf_context()->Reset(); - db_iter->Seek("a"); - skipped_keys = - static_cast(get_perf_context()->internal_key_skipped_count); - ASSERT_EQ(skipped_keys, 0); -} - -TEST_F(DBIteratorTest, SeekToFirstLowerBound) { - const int kNumKeys = 3; - for (int i = 0; i < kNumKeys + 2; ++i) { - // + 2 for two special cases: lower bound before and lower bound after the - // internal iterator's keys - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - for (int j = 1; j <= kNumKeys; ++j) { - internal_iter->AddPut(std::to_string(j), "val"); - } - internal_iter->Finish(); - - ReadOptions ro; - auto lower_bound_str = std::to_string(i); - Slice lower_bound(lower_bound_str); - ro.iterate_lower_bound = &lower_bound; - Options options; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 10 /* sequence */, options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToFirst(); - if (i == kNumKeys + 1) { - // lower bound was beyond the last key - ASSERT_FALSE(db_iter->Valid()); - ASSERT_OK(db_iter->status()); - } else { - ASSERT_TRUE(db_iter->Valid()); - int expected; - if (i == 0) { - // lower bound was before the first key - expected = 1; - } else { - // lower bound was at the ith key - expected = i; - } - ASSERT_EQ(std::to_string(expected), db_iter->key().ToString()); - } - } -} - -TEST_F(DBIteratorTest, PrevLowerBound) { - const int kNumKeys = 3; - const int kLowerBound = 2; - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - for (int j = 1; j <= kNumKeys; ++j) { - internal_iter->AddPut(std::to_string(j), "val"); - } - internal_iter->Finish(); - - ReadOptions ro; - auto lower_bound_str = std::to_string(kLowerBound); - Slice lower_bound(lower_bound_str); - ro.iterate_lower_bound = &lower_bound; - Options options; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 10 /* sequence */, options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekToLast(); - for (int i = kNumKeys; i >= kLowerBound; --i) { - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(std::to_string(i), db_iter->key().ToString()); - db_iter->Prev(); - } - ASSERT_FALSE(db_iter->Valid()); -} - -TEST_F(DBIteratorTest, SeekLessLowerBound) { - const int kNumKeys = 3; - const int kLowerBound = 2; - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - for (int j = 1; j <= kNumKeys; ++j) { - internal_iter->AddPut(std::to_string(j), "val"); - } - internal_iter->Finish(); - - ReadOptions ro; - auto lower_bound_str = std::to_string(kLowerBound); - Slice lower_bound(lower_bound_str); - ro.iterate_lower_bound = &lower_bound; - Options options; - std::unique_ptr db_iter(NewDBIterator( - env_, ro, ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 10 /* sequence */, options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - auto before_lower_bound_str = std::to_string(kLowerBound - 1); - Slice before_lower_bound(lower_bound_str); - - db_iter->Seek(before_lower_bound); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_EQ(lower_bound_str, db_iter->key().ToString()); -} - -TEST_F(DBIteratorTest, ReverseToForwardWithDisappearingKeys) { - Options options; - options.prefix_extractor.reset(NewCappedPrefixTransform(0)); - - TestIterator* internal_iter = new TestIterator(BytewiseComparator()); - internal_iter->AddPut("a", "A"); - internal_iter->AddPut("b", "B"); - for (int i = 0; i < 100; ++i) { - internal_iter->AddPut("c" + std::to_string(i), ""); - } - internal_iter->Finish(); - - std::unique_ptr db_iter(NewDBIterator( - env_, ReadOptions(), ImmutableOptions(options), MutableCFOptions(options), - BytewiseComparator(), internal_iter, nullptr /* version */, - 10 /* sequence */, options.max_sequential_skip_in_iterations, - nullptr /* read_callback */)); - - db_iter->SeekForPrev("a"); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_OK(db_iter->status()); - ASSERT_EQ("a", db_iter->key().ToString()); - - internal_iter->Vanish("a"); - db_iter->Next(); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_OK(db_iter->status()); - ASSERT_EQ("b", db_iter->key().ToString()); - - // A (sort of) bug used to cause DBIter to pointlessly drag the internal - // iterator all the way to the end. But this doesn't really matter at the time - // of writing because the only iterator that can see disappearing keys is - // ForwardIterator, which doesn't support SeekForPrev(). - EXPECT_LT(internal_iter->steps(), 20); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_iterator_test.cc b/db/db_iterator_test.cc deleted file mode 100644 index f9e026a8c..000000000 --- a/db/db_iterator_test.cc +++ /dev/null @@ -1,3253 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include - -#include "db/arena_wrapped_db_iter.h" -#include "db/db_iter.h" -#include "db/db_test_util.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/iostats_context.h" -#include "rocksdb/perf_context.h" -#include "table/block_based/flush_block_policy.h" -#include "util/random.h" -#include "utilities/merge_operators/string_append/stringappend2.h" - -namespace ROCKSDB_NAMESPACE { - -// A dumb ReadCallback which saying every key is committed. -class DummyReadCallback : public ReadCallback { - public: - DummyReadCallback() : ReadCallback(kMaxSequenceNumber) {} - bool IsVisibleFullCheck(SequenceNumber /*seq*/) override { return true; } - void SetSnapshot(SequenceNumber seq) { max_visible_seq_ = seq; } -}; - -// Test param: -// bool: whether to pass read_callback to NewIterator(). -class DBIteratorTest : public DBTestBase, - public testing::WithParamInterface { - public: - DBIteratorTest() : DBTestBase("db_iterator_test", /*env_do_fsync=*/true) {} - - Iterator* NewIterator(const ReadOptions& read_options, - ColumnFamilyHandle* column_family = nullptr) { - if (column_family == nullptr) { - column_family = db_->DefaultColumnFamily(); - } - auto* cfd = - static_cast_with_check(column_family)->cfd(); - SequenceNumber seq = read_options.snapshot != nullptr - ? read_options.snapshot->GetSequenceNumber() - : db_->GetLatestSequenceNumber(); - bool use_read_callback = GetParam(); - DummyReadCallback* read_callback = nullptr; - if (use_read_callback) { - read_callback = new DummyReadCallback(); - read_callback->SetSnapshot(seq); - InstrumentedMutexLock lock(&mutex_); - read_callbacks_.push_back( - std::unique_ptr(read_callback)); - } - return dbfull()->NewIteratorImpl(read_options, cfd, seq, read_callback); - } - - private: - InstrumentedMutex mutex_; - std::vector> read_callbacks_; -}; - -TEST_P(DBIteratorTest, IteratorProperty) { - // The test needs to be changed if kPersistedTier is supported in iterator. - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_OK(Put(1, "1", "2")); - ASSERT_OK(Delete(1, "2")); - ReadOptions ropt; - ropt.pin_data = false; - { - std::unique_ptr iter(NewIterator(ropt, handles_[1])); - iter->SeekToFirst(); - std::string prop_value; - ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value)); - ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("0", prop_value); - ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value)); - ASSERT_EQ("1", prop_value); - iter->Next(); - ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("Iterator is not valid.", prop_value); - - // Get internal key at which the iteration stopped (tombstone in this case). - ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value)); - ASSERT_EQ("2", prop_value); - } - Close(); -} - -TEST_P(DBIteratorTest, PersistedTierOnIterator) { - // The test needs to be changed if kPersistedTier is supported in iterator. - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu"}, options); - ReadOptions ropt; - ropt.read_tier = kPersistedTier; - - auto* iter = db_->NewIterator(ropt, handles_[1]); - ASSERT_TRUE(iter->status().IsNotSupported()); - delete iter; - - std::vector iters; - ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported()); - Close(); -} - -TEST_P(DBIteratorTest, NonBlockingIteration) { - do { - ReadOptions non_blocking_opts, regular_opts; - anon::OptionsOverride options_override; - options_override.full_block_cache = true; - Options options = CurrentOptions(options_override); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - non_blocking_opts.read_tier = kBlockCacheTier; - - CreateAndReopenWithCF({"pikachu"}, options); - // write one kv to the database. - ASSERT_OK(Put(1, "a", "b")); - - // scan using non-blocking iterator. We should find it because - // it is in memtable. - Iterator* iter = NewIterator(non_blocking_opts, handles_[1]); - int count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - count++; - } - ASSERT_EQ(count, 1); - delete iter; - - // flush memtable to storage. Now, the key should not be in the - // memtable neither in the block cache. - ASSERT_OK(Flush(1)); - - // verify that a non-blocking iterator does not find any - // kvs. Neither does it do any IOs to storage. - uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS); - uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - iter = NewIterator(non_blocking_opts, handles_[1]); - count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - count++; - } - ASSERT_EQ(count, 0); - ASSERT_TRUE(iter->status().IsIncomplete()); - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - delete iter; - - // read in the specified block via a regular get - ASSERT_EQ(Get(1, "a"), "b"); - - // verify that we can find it via a non-blocking scan - numopen = TestGetTickerCount(options, NO_FILE_OPENS); - cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); - iter = NewIterator(non_blocking_opts, handles_[1]); - count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - count++; - } - ASSERT_EQ(count, 1); - ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); - ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); - delete iter; - - // This test verifies block cache behaviors, which is not used by plain - // table format. - } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipMmapReads)); -} - -TEST_P(DBIteratorTest, IterSeekBeforePrev) { - ASSERT_OK(Put("a", "b")); - ASSERT_OK(Put("c", "d")); - EXPECT_OK(dbfull()->Flush(FlushOptions())); - ASSERT_OK(Put("0", "f")); - ASSERT_OK(Put("1", "h")); - EXPECT_OK(dbfull()->Flush(FlushOptions())); - ASSERT_OK(Put("2", "j")); - auto iter = NewIterator(ReadOptions()); - iter->Seek(Slice("c")); - iter->Prev(); - iter->Seek(Slice("a")); - iter->Prev(); - delete iter; -} - -TEST_P(DBIteratorTest, IterReseekNewUpperBound) { - Random rnd(301); - Options options = CurrentOptions(); - BlockBasedTableOptions table_options; - table_options.block_size = 1024; - table_options.block_size_deviation = 50; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.compression = kNoCompression; - Reopen(options); - - ASSERT_OK(Put("a", rnd.RandomString(400))); - ASSERT_OK(Put("aabb", rnd.RandomString(400))); - ASSERT_OK(Put("aaef", rnd.RandomString(400))); - ASSERT_OK(Put("b", rnd.RandomString(400))); - EXPECT_OK(dbfull()->Flush(FlushOptions())); - ReadOptions opts; - Slice ub = Slice("aa"); - opts.iterate_upper_bound = &ub; - auto iter = NewIterator(opts); - iter->Seek(Slice("a")); - ub = Slice("b"); - iter->Seek(Slice("aabc")); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), "aaef"); - delete iter; -} - -TEST_P(DBIteratorTest, IterSeekForPrevBeforeNext) { - ASSERT_OK(Put("a", "b")); - ASSERT_OK(Put("c", "d")); - EXPECT_OK(dbfull()->Flush(FlushOptions())); - ASSERT_OK(Put("0", "f")); - ASSERT_OK(Put("1", "h")); - EXPECT_OK(dbfull()->Flush(FlushOptions())); - ASSERT_OK(Put("2", "j")); - auto iter = NewIterator(ReadOptions()); - iter->SeekForPrev(Slice("0")); - iter->Next(); - iter->SeekForPrev(Slice("1")); - iter->Next(); - delete iter; -} - -namespace { -std::string MakeLongKey(size_t length, char c) { - return std::string(length, c); -} -} // anonymous namespace - -TEST_P(DBIteratorTest, IterLongKeys) { - ASSERT_OK(Put(MakeLongKey(20, 0), "0")); - ASSERT_OK(Put(MakeLongKey(32, 2), "2")); - ASSERT_OK(Put("a", "b")); - EXPECT_OK(dbfull()->Flush(FlushOptions())); - ASSERT_OK(Put(MakeLongKey(50, 1), "1")); - ASSERT_OK(Put(MakeLongKey(127, 3), "3")); - ASSERT_OK(Put(MakeLongKey(64, 4), "4")); - auto iter = NewIterator(ReadOptions()); - - // Create a key that needs to be skipped for Seq too new - iter->Seek(MakeLongKey(20, 0)); - ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4"); - - iter->SeekForPrev(MakeLongKey(127, 3)); - ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1"); - delete iter; - - iter = NewIterator(ReadOptions()); - iter->Seek(MakeLongKey(50, 1)); - ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3"); - delete iter; -} - -TEST_P(DBIteratorTest, IterNextWithNewerSeq) { - ASSERT_OK(Put("0", "0")); - EXPECT_OK(dbfull()->Flush(FlushOptions())); - ASSERT_OK(Put("a", "b")); - ASSERT_OK(Put("c", "d")); - ASSERT_OK(Put("d", "e")); - auto iter = NewIterator(ReadOptions()); - - // Create a key that needs to be skipped for Seq too new - for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1; - i++) { - ASSERT_OK(Put("b", "f")); - } - - iter->Seek(Slice("a")); - ASSERT_EQ(IterStatus(iter), "a->b"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "c->d"); - iter->SeekForPrev(Slice("b")); - ASSERT_EQ(IterStatus(iter), "a->b"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "c->d"); - - delete iter; -} - -TEST_P(DBIteratorTest, IterPrevWithNewerSeq) { - ASSERT_OK(Put("0", "0")); - EXPECT_OK(dbfull()->Flush(FlushOptions())); - ASSERT_OK(Put("a", "b")); - ASSERT_OK(Put("c", "d")); - ASSERT_OK(Put("d", "e")); - auto iter = NewIterator(ReadOptions()); - - // Create a key that needs to be skipped for Seq too new - for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1; - i++) { - ASSERT_OK(Put("b", "f")); - } - - iter->Seek(Slice("d")); - ASSERT_EQ(IterStatus(iter), "d->e"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "c->d"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "a->b"); - iter->Prev(); - iter->SeekForPrev(Slice("d")); - ASSERT_EQ(IterStatus(iter), "d->e"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "c->d"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "a->b"); - iter->Prev(); - delete iter; -} - -TEST_P(DBIteratorTest, IterPrevWithNewerSeq2) { - ASSERT_OK(Put("0", "0")); - EXPECT_OK(dbfull()->Flush(FlushOptions())); - ASSERT_OK(Put("a", "b")); - ASSERT_OK(Put("c", "d")); - ASSERT_OK(Put("e", "f")); - auto iter = NewIterator(ReadOptions()); - auto iter2 = NewIterator(ReadOptions()); - iter->Seek(Slice("c")); - iter2->SeekForPrev(Slice("d")); - ASSERT_EQ(IterStatus(iter), "c->d"); - ASSERT_EQ(IterStatus(iter2), "c->d"); - - // Create a key that needs to be skipped for Seq too new - for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1; - i++) { - ASSERT_OK(Put("b", "f")); - } - - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "a->b"); - iter->Prev(); - iter2->Prev(); - ASSERT_EQ(IterStatus(iter2), "a->b"); - iter2->Prev(); - delete iter; - delete iter2; -} - -TEST_P(DBIteratorTest, IterEmpty) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - Iterator* iter = NewIterator(ReadOptions(), handles_[1]); - - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek("foo"); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->SeekForPrev("foo"); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - ASSERT_OK(iter->status()); - - delete iter; - } while (ChangeCompactOptions()); -} - -TEST_P(DBIteratorTest, IterSingle) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "a", "va")); - Iterator* iter = NewIterator(ReadOptions(), handles_[1]); - - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek(""); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekForPrev(""); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek("a"); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekForPrev("a"); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek("b"); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekForPrev("b"); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - delete iter; - } while (ChangeCompactOptions()); -} - -TEST_P(DBIteratorTest, IterMulti) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "a", "va")); - ASSERT_OK(Put(1, "b", "vb")); - ASSERT_OK(Put(1, "c", "vc")); - Iterator* iter = NewIterator(ReadOptions(), handles_[1]); - - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek(""); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Seek("a"); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Seek("ax"); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->SeekForPrev("d"); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->SeekForPrev("c"); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->SeekForPrev("bx"); - ASSERT_EQ(IterStatus(iter), "b->vb"); - - iter->Seek("b"); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Seek("z"); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekForPrev("b"); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->SeekForPrev(""); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - // Switch from reverse to forward - iter->SeekToLast(); - iter->Prev(); - iter->Prev(); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - - // Switch from forward to reverse - iter->SeekToFirst(); - iter->Next(); - iter->Next(); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - - // Make sure iter stays at snapshot - ASSERT_OK(Put(1, "a", "va2")); - ASSERT_OK(Put(1, "a2", "va3")); - ASSERT_OK(Put(1, "b", "vb2")); - ASSERT_OK(Put(1, "c", "vc2")); - ASSERT_OK(Delete(1, "b")); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - delete iter; - } while (ChangeCompactOptions()); -} - -// Check that we can skip over a run of user keys -// by using reseek rather than sequential scan -TEST_P(DBIteratorTest, IterReseek) { - anon::OptionsOverride options_override; - options_override.skip_policy = kSkipNoSnapshot; - Options options = CurrentOptions(options_override); - options.max_sequential_skip_in_iterations = 3; - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // insert three keys with same userkey and verify that - // reseek is not invoked. For each of these test cases, - // verify that we can find the next key "b". - ASSERT_OK(Put(1, "a", "zero")); - ASSERT_OK(Put(1, "a", "one")); - ASSERT_OK(Put(1, "a", "two")); - ASSERT_OK(Put(1, "b", "bone")); - Iterator* iter = NewIterator(ReadOptions(), handles_[1]); - iter->SeekToFirst(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); - ASSERT_EQ(IterStatus(iter), "a->two"); - iter->Next(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); - ASSERT_EQ(IterStatus(iter), "b->bone"); - delete iter; - - // insert a total of three keys with same userkey and verify - // that reseek is still not invoked. - ASSERT_OK(Put(1, "a", "three")); - iter = NewIterator(ReadOptions(), handles_[1]); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->three"); - iter->Next(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); - ASSERT_EQ(IterStatus(iter), "b->bone"); - delete iter; - - // insert a total of four keys with same userkey and verify - // that reseek is invoked. - ASSERT_OK(Put(1, "a", "four")); - iter = NewIterator(ReadOptions(), handles_[1]); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->four"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); - iter->Next(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1); - ASSERT_EQ(IterStatus(iter), "b->bone"); - delete iter; - - // Testing reverse iterator - // At this point, we have three versions of "a" and one version of "b". - // The reseek statistics is already at 1. - int num_reseeks = static_cast( - TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION)); - - // Insert another version of b and assert that reseek is not invoked - ASSERT_OK(Put(1, "b", "btwo")); - iter = NewIterator(ReadOptions(), handles_[1]); - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "b->btwo"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), - num_reseeks); - iter->Prev(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), - num_reseeks + 1); - ASSERT_EQ(IterStatus(iter), "a->four"); - delete iter; - - // insert two more versions of b. This makes a total of 4 versions - // of b and 4 versions of a. - ASSERT_OK(Put(1, "b", "bthree")); - ASSERT_OK(Put(1, "b", "bfour")); - iter = NewIterator(ReadOptions(), handles_[1]); - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "b->bfour"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), - num_reseeks + 2); - iter->Prev(); - - // the previous Prev call should have invoked reseek - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), - num_reseeks + 3); - ASSERT_EQ(IterStatus(iter), "a->four"); - delete iter; -} - -TEST_F(DBIteratorTest, ReseekUponDirectionChange) { - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.merge_operator.reset( - new StringAppendTESTOperator(/*delim_char=*/' ')); - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "value")); - ASSERT_OK(Put("bar", "value")); - { - std::unique_ptr it(db_->NewIterator(ReadOptions())); - it->SeekToLast(); - it->Prev(); - it->Next(); - } - ASSERT_EQ(1, - options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION)); - - const std::string merge_key("good"); - ASSERT_OK(Put(merge_key, "orig")); - ASSERT_OK(Merge(merge_key, "suffix")); - { - std::unique_ptr it(db_->NewIterator(ReadOptions())); - it->Seek(merge_key); - ASSERT_TRUE(it->Valid()); - const uint64_t prev_reseek_count = - options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION); - it->Prev(); - ASSERT_EQ(prev_reseek_count + 1, options.statistics->getTickerCount( - NUMBER_OF_RESEEKS_IN_ITERATION)); - } -} - -TEST_P(DBIteratorTest, IterSmallAndLargeMix) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "a", "va")); - ASSERT_OK(Put(1, "b", std::string(100000, 'b'))); - ASSERT_OK(Put(1, "c", "vc")); - ASSERT_OK(Put(1, "d", std::string(100000, 'd'))); - ASSERT_OK(Put(1, "e", std::string(100000, 'e'))); - - Iterator* iter = NewIterator(ReadOptions(), handles_[1]); - - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b')); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd')); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e')); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e')); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd')); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b')); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - delete iter; - } while (ChangeCompactOptions()); -} - -TEST_P(DBIteratorTest, IterMultiWithDelete) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "ka", "va")); - ASSERT_OK(Put(1, "kb", "vb")); - ASSERT_OK(Put(1, "kc", "vc")); - ASSERT_OK(Delete(1, "kb")); - ASSERT_EQ("NOT_FOUND", Get(1, "kb")); - - Iterator* iter = NewIterator(ReadOptions(), handles_[1]); - iter->Seek("kc"); - ASSERT_EQ(IterStatus(iter), "kc->vc"); - if (!CurrentOptions().merge_operator) { - // TODO: merge operator does not support backward iteration yet - if (kPlainTableAllBytesPrefix != option_config_ && - kBlockBasedTableWithWholeKeyHashIndex != option_config_ && - kHashLinkList != option_config_ && - kHashSkipList != option_config_) { // doesn't support SeekToLast - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "ka->va"); - } - } - delete iter; - } while (ChangeOptions()); -} - -TEST_P(DBIteratorTest, IterPrevMaxSkip) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - for (int i = 0; i < 2; i++) { - ASSERT_OK(Put(1, "key1", "v1")); - ASSERT_OK(Put(1, "key2", "v2")); - ASSERT_OK(Put(1, "key3", "v3")); - ASSERT_OK(Put(1, "key4", "v4")); - ASSERT_OK(Put(1, "key5", "v5")); - } - - VerifyIterLast("key5->v5", 1); - - ASSERT_OK(Delete(1, "key5")); - VerifyIterLast("key4->v4", 1); - - ASSERT_OK(Delete(1, "key4")); - VerifyIterLast("key3->v3", 1); - - ASSERT_OK(Delete(1, "key3")); - VerifyIterLast("key2->v2", 1); - - ASSERT_OK(Delete(1, "key2")); - VerifyIterLast("key1->v1", 1); - - ASSERT_OK(Delete(1, "key1")); - VerifyIterLast("(invalid)", 1); - } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast)); -} - -TEST_P(DBIteratorTest, IterWithSnapshot) { - anon::OptionsOverride options_override; - options_override.skip_policy = kSkipNoSnapshot; - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override)); - ASSERT_OK(Put(1, "key1", "val1")); - ASSERT_OK(Put(1, "key2", "val2")); - ASSERT_OK(Put(1, "key3", "val3")); - ASSERT_OK(Put(1, "key4", "val4")); - ASSERT_OK(Put(1, "key5", "val5")); - - const Snapshot* snapshot = db_->GetSnapshot(); - ReadOptions options; - options.snapshot = snapshot; - Iterator* iter = NewIterator(options, handles_[1]); - - ASSERT_OK(Put(1, "key0", "val0")); - // Put more values after the snapshot - ASSERT_OK(Put(1, "key100", "val100")); - ASSERT_OK(Put(1, "key101", "val101")); - - iter->Seek("key5"); - ASSERT_EQ(IterStatus(iter), "key5->val5"); - if (!CurrentOptions().merge_operator) { - // TODO: merge operator does not support backward iteration yet - if (kPlainTableAllBytesPrefix != option_config_ && - kBlockBasedTableWithWholeKeyHashIndex != option_config_ && - kHashLinkList != option_config_ && kHashSkipList != option_config_) { - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "key4->val4"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "key3->val3"); - - iter->Next(); - ASSERT_EQ(IterStatus(iter), "key4->val4"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "key5->val5"); - } - iter->Next(); - ASSERT_TRUE(!iter->Valid()); - } - - if (!CurrentOptions().merge_operator) { - // TODO(gzh): merge operator does not support backward iteration yet - if (kPlainTableAllBytesPrefix != option_config_ && - kBlockBasedTableWithWholeKeyHashIndex != option_config_ && - kHashLinkList != option_config_ && kHashSkipList != option_config_) { - iter->SeekForPrev("key1"); - ASSERT_EQ(IterStatus(iter), "key1->val1"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "key2->val2"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "key3->val3"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "key2->val2"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "key1->val1"); - iter->Prev(); - ASSERT_TRUE(!iter->Valid()); - } - } - db_->ReleaseSnapshot(snapshot); - delete iter; - } while (ChangeOptions()); -} - -TEST_P(DBIteratorTest, IteratorPinsRef) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "foo", "hello")); - - // Get iterator that will yield the current contents of the DB. - Iterator* iter = NewIterator(ReadOptions(), handles_[1]); - - // Write to force compactions - ASSERT_OK(Put(1, "foo", "newvalue1")); - for (int i = 0; i < 100; i++) { - // 100K values - ASSERT_OK(Put(1, Key(i), Key(i) + std::string(100000, 'v'))); - } - ASSERT_OK(Put(1, "foo", "newvalue2")); - - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("foo", iter->key().ToString()); - ASSERT_EQ("hello", iter->value().ToString()); - iter->Next(); - ASSERT_TRUE(!iter->Valid()); - delete iter; - } while (ChangeCompactOptions()); -} - -TEST_P(DBIteratorTest, IteratorDeleteAfterCfDelete) { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - - ASSERT_OK(Put(1, "foo", "delete-cf-then-delete-iter")); - ASSERT_OK(Put(1, "hello", "value2")); - - ColumnFamilyHandle* cf = handles_[1]; - ReadOptions ro; - - auto* iter = db_->NewIterator(ro, cf); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "foo->delete-cf-then-delete-iter"); - - // delete CF handle - EXPECT_OK(db_->DestroyColumnFamilyHandle(cf)); - handles_.erase(std::begin(handles_) + 1); - - // delete Iterator after CF handle is deleted - iter->Next(); - ASSERT_EQ(IterStatus(iter), "hello->value2"); - delete iter; -} - -TEST_P(DBIteratorTest, IteratorDeleteAfterCfDrop) { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - - ASSERT_OK(Put(1, "foo", "drop-cf-then-delete-iter")); - - ReadOptions ro; - ColumnFamilyHandle* cf = handles_[1]; - - auto* iter = db_->NewIterator(ro, cf); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "foo->drop-cf-then-delete-iter"); - - // drop and delete CF - EXPECT_OK(db_->DropColumnFamily(cf)); - EXPECT_OK(db_->DestroyColumnFamilyHandle(cf)); - handles_.erase(std::begin(handles_) + 1); - - // delete Iterator after CF handle is dropped - delete iter; -} - -// SetOptions not defined in ROCKSDB LITE -TEST_P(DBIteratorTest, DBIteratorBoundTest) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - - options.prefix_extractor = nullptr; - DestroyAndReopen(options); - ASSERT_OK(Put("a", "0")); - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Put("foo1", "bar1")); - ASSERT_OK(Put("g1", "0")); - - // testing basic case with no iterate_upper_bound and no prefix_extractor - { - ReadOptions ro; - ro.iterate_upper_bound = nullptr; - - std::unique_ptr iter(NewIterator(ro)); - - iter->Seek("foo"); - - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("foo")), 0); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("foo1")), 0); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("g1")), 0); - - iter->SeekForPrev("g1"); - - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("g1")), 0); - - iter->Prev(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("foo1")), 0); - - iter->Prev(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("foo")), 0); - } - - // testing iterate_upper_bound and forward iterator - // to make sure it stops at bound - { - ReadOptions ro; - // iterate_upper_bound points beyond the last expected entry - Slice prefix("foo2"); - ro.iterate_upper_bound = &prefix; - - std::unique_ptr iter(NewIterator(ro)); - - iter->Seek("foo"); - - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("foo")), 0); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(("foo1")), 0); - - iter->Next(); - // should stop here... - ASSERT_TRUE(!iter->Valid()); - } - // Testing SeekToLast with iterate_upper_bound set - { - ReadOptions ro; - - Slice prefix("foo"); - ro.iterate_upper_bound = &prefix; - - std::unique_ptr iter(NewIterator(ro)); - - iter->SeekToLast(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("a")), 0); - } - - // prefix is the first letter of the key - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:1"}})); - ASSERT_OK(Put("a", "0")); - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Put("foo1", "bar1")); - ASSERT_OK(Put("g1", "0")); - - // testing with iterate_upper_bound and prefix_extractor - // Seek target and iterate_upper_bound are not is same prefix - // This should be an error - { - ReadOptions ro; - Slice upper_bound("g"); - ro.iterate_upper_bound = &upper_bound; - - std::unique_ptr iter(NewIterator(ro)); - - iter->Seek("foo"); - - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("foo", iter->key().ToString()); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("foo1", iter->key().ToString()); - - iter->Next(); - ASSERT_TRUE(!iter->Valid()); - } - - // testing that iterate_upper_bound prevents iterating over deleted items - // if the bound has already reached - { - options.prefix_extractor = nullptr; - DestroyAndReopen(options); - ASSERT_OK(Put("a", "0")); - ASSERT_OK(Put("b", "0")); - ASSERT_OK(Put("b1", "0")); - ASSERT_OK(Put("c", "0")); - ASSERT_OK(Put("d", "0")); - ASSERT_OK(Put("e", "0")); - ASSERT_OK(Delete("c")); - ASSERT_OK(Delete("d")); - - // base case with no bound - ReadOptions ro; - ro.iterate_upper_bound = nullptr; - - std::unique_ptr iter(NewIterator(ro)); - - iter->Seek("b"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("b")), 0); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(("b1")), 0); - - get_perf_context()->Reset(); - iter->Next(); - - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ( - static_cast(get_perf_context()->internal_delete_skipped_count), 2); - - // now testing with iterate_bound - Slice prefix("c"); - ro.iterate_upper_bound = &prefix; - - iter.reset(NewIterator(ro)); - - get_perf_context()->Reset(); - - iter->Seek("b"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("b")), 0); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(("b1")), 0); - - iter->Next(); - // the iteration should stop as soon as the bound key is reached - // even though the key is deleted - // hence internal_delete_skipped_count should be 0 - ASSERT_TRUE(!iter->Valid()); - ASSERT_EQ( - static_cast(get_perf_context()->internal_delete_skipped_count), 0); - } -} - -TEST_P(DBIteratorTest, DBIteratorBoundMultiSeek) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.prefix_extractor = nullptr; - DestroyAndReopen(options); - ASSERT_OK(Put("a", "0")); - ASSERT_OK(Put("z", "0")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo1", "bar1")); - ASSERT_OK(Put("foo2", "bar2")); - ASSERT_OK(Put("foo3", "bar3")); - ASSERT_OK(Put("foo4", "bar4")); - - { - std::string up_str = "foo5"; - Slice up(up_str); - ReadOptions ro; - ro.iterate_upper_bound = &up; - std::unique_ptr iter(NewIterator(ro)); - - iter->Seek("foo1"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("foo1")), 0); - - uint64_t prev_block_cache_hit = - TestGetTickerCount(options, BLOCK_CACHE_HIT); - uint64_t prev_block_cache_miss = - TestGetTickerCount(options, BLOCK_CACHE_MISS); - - ASSERT_GT(prev_block_cache_hit + prev_block_cache_miss, 0); - - iter->Seek("foo4"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("foo4")), 0); - ASSERT_EQ(prev_block_cache_hit, - TestGetTickerCount(options, BLOCK_CACHE_HIT)); - ASSERT_EQ(prev_block_cache_miss, - TestGetTickerCount(options, BLOCK_CACHE_MISS)); - - iter->Seek("foo2"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("foo2")), 0); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("foo3")), 0); - ASSERT_EQ(prev_block_cache_hit, - TestGetTickerCount(options, BLOCK_CACHE_HIT)); - ASSERT_EQ(prev_block_cache_miss, - TestGetTickerCount(options, BLOCK_CACHE_MISS)); - } -} - -TEST_P(DBIteratorTest, DBIteratorBoundOptimizationTest) { - for (auto format_version : {2, 3, 4}) { - int upper_bound_hits = 0; - Options options = CurrentOptions(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BlockBasedTableIterator:out_of_bound", - [&upper_bound_hits](void*) { upper_bound_hits++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - options.env = env_; - options.create_if_missing = true; - options.prefix_extractor = nullptr; - BlockBasedTableOptions table_options; - table_options.format_version = format_version; - table_options.flush_block_policy_factory = - std::make_shared(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - DestroyAndReopen(options); - ASSERT_OK(Put("foo1", "bar1")); - ASSERT_OK(Put("foo2", "bar2")); - ASSERT_OK(Put("foo4", "bar4")); - ASSERT_OK(Flush()); - - Slice ub("foo3"); - ReadOptions ro; - ro.iterate_upper_bound = &ub; - - std::unique_ptr iter(NewIterator(ro)); - - iter->Seek("foo"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("foo1")), 0); - ASSERT_EQ(upper_bound_hits, 0); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("foo2")), 0); - ASSERT_EQ(upper_bound_hits, 0); - - iter->Next(); - ASSERT_FALSE(iter->Valid()); - ASSERT_EQ(upper_bound_hits, 1); - } -} - -// Enable kBinarySearchWithFirstKey, do some iterator operations and check that -// they don't do unnecessary block reads. -TEST_P(DBIteratorTest, IndexWithFirstKey) { - for (int tailing = 0; tailing < 2; ++tailing) { - SCOPED_TRACE("tailing = " + std::to_string(tailing)); - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.prefix_extractor = nullptr; - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - Statistics* stats = options.statistics.get(); - BlockBasedTableOptions table_options; - table_options.index_type = - BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey; - table_options.index_shortening = - BlockBasedTableOptions::IndexShorteningMode::kNoShortening; - table_options.flush_block_policy_factory = - std::make_shared(); - table_options.block_cache = - NewLRUCache(8000); // fits all blocks and their cache metadata overhead - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - DestroyAndReopen(options); - ASSERT_OK(Merge("a1", "x1")); - ASSERT_OK(Merge("b1", "y1")); - ASSERT_OK(Merge("c0", "z1")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("a2", "x2")); - ASSERT_OK(Merge("b2", "y2")); - ASSERT_OK(Merge("c0", "z2")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("a3", "x3")); - ASSERT_OK(Merge("b3", "y3")); - ASSERT_OK(Merge("c3", "z3")); - ASSERT_OK(Flush()); - - // Block cache is not important for this test. - // We use BLOCK_CACHE_DATA_* counters just because they're the most readily - // available way of counting block accesses. - - ReadOptions ropt; - ropt.tailing = tailing; - std::unique_ptr iter(NewIterator(ropt)); - - ropt.read_tier = ReadTier::kBlockCacheTier; - std::unique_ptr nonblocking_iter(NewIterator(ropt)); - - iter->Seek("b10"); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ("b2", iter->key().ToString()); - EXPECT_EQ("y2", iter->value().ToString()); - EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - - // The cache-only iterator should succeed too, using the blocks pulled into - // the cache by the previous iterator. - nonblocking_iter->Seek("b10"); - ASSERT_TRUE(nonblocking_iter->Valid()); - EXPECT_EQ("b2", nonblocking_iter->key().ToString()); - EXPECT_EQ("y2", nonblocking_iter->value().ToString()); - EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - // ... but it shouldn't be able to step forward since the next block is - // not in cache yet. - nonblocking_iter->Next(); - ASSERT_FALSE(nonblocking_iter->Valid()); - ASSERT_TRUE(nonblocking_iter->status().IsIncomplete()); - - // ... nor should a seek to the next key succeed. - nonblocking_iter->Seek("b20"); - ASSERT_FALSE(nonblocking_iter->Valid()); - ASSERT_TRUE(nonblocking_iter->status().IsIncomplete()); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ("b3", iter->key().ToString()); - EXPECT_EQ("y3", iter->value().ToString()); - EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - // After the blocking iterator loaded the next block, the nonblocking - // iterator's seek should succeed. - nonblocking_iter->Seek("b20"); - ASSERT_TRUE(nonblocking_iter->Valid()); - EXPECT_EQ("b3", nonblocking_iter->key().ToString()); - EXPECT_EQ("y3", nonblocking_iter->value().ToString()); - EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - iter->Seek("c0"); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ("c0", iter->key().ToString()); - EXPECT_EQ("z1,z2", iter->value().ToString()); - EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - EXPECT_EQ(6, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ("c3", iter->key().ToString()); - EXPECT_EQ("z3", iter->value().ToString()); - EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - - iter.reset(); - - // Enable iterate_upper_bound and check that iterator is not trying to read - // blocks that are fully above upper bound. - std::string ub = "b3"; - Slice ub_slice(ub); - ropt.iterate_upper_bound = &ub_slice; - iter.reset(NewIterator(ropt)); - - iter->Seek("b2"); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ("b2", iter->key().ToString()); - EXPECT_EQ("y2", iter->value().ToString()); - EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - - iter->Next(); - ASSERT_FALSE(iter->Valid()); - EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - } -} - -TEST_P(DBIteratorTest, IndexWithFirstKeyGet) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.prefix_extractor = nullptr; - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - Statistics* stats = options.statistics.get(); - BlockBasedTableOptions table_options; - table_options.index_type = - BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey; - table_options.index_shortening = - BlockBasedTableOptions::IndexShorteningMode::kNoShortening; - table_options.flush_block_policy_factory = - std::make_shared(); - table_options.block_cache = NewLRUCache(1000); // fits all blocks - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - DestroyAndReopen(options); - ASSERT_OK(Merge("a", "x1")); - ASSERT_OK(Merge("c", "y1")); - ASSERT_OK(Merge("e", "z1")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("c", "y2")); - ASSERT_OK(Merge("e", "z2")); - ASSERT_OK(Flush()); - - // Get() between blocks shouldn't read any blocks. - ASSERT_EQ("NOT_FOUND", Get("b")); - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - // Get() of an existing key shouldn't read any unnecessary blocks when there's - // only one key per block. - - ASSERT_EQ("y1,y2", Get("c")); - EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - ASSERT_EQ("x1", Get("a")); - EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - EXPECT_EQ(std::vector({"NOT_FOUND", "z1,z2"}), - MultiGet({"b", "e"})); -} - -// TODO(3.13): fix the issue of Seek() + Prev() which might not necessary -// return the biggest key which is smaller than the seek key. -TEST_P(DBIteratorTest, PrevAfterAndNextAfterMerge) { - Options options; - options.create_if_missing = true; - options.merge_operator = MergeOperators::CreatePutOperator(); - options.env = env_; - DestroyAndReopen(options); - - // write three entries with different keys using Merge() - WriteOptions wopts; - ASSERT_OK(db_->Merge(wopts, "1", "data1")); - ASSERT_OK(db_->Merge(wopts, "2", "data2")); - ASSERT_OK(db_->Merge(wopts, "3", "data3")); - - std::unique_ptr it(NewIterator(ReadOptions())); - - it->Seek("2"); - ASSERT_TRUE(it->Valid()); - ASSERT_EQ("2", it->key().ToString()); - - it->Prev(); - ASSERT_TRUE(it->Valid()); - ASSERT_EQ("1", it->key().ToString()); - - it->SeekForPrev("1"); - ASSERT_TRUE(it->Valid()); - ASSERT_EQ("1", it->key().ToString()); - - it->Next(); - ASSERT_TRUE(it->Valid()); - ASSERT_EQ("2", it->key().ToString()); -} - -class DBIteratorTestForPinnedData : public DBIteratorTest { - public: - enum TestConfig { - NORMAL, - CLOSE_AND_OPEN, - COMPACT_BEFORE_READ, - FLUSH_EVERY_1000, - MAX - }; - DBIteratorTestForPinnedData() : DBIteratorTest() {} - void PinnedDataIteratorRandomized(TestConfig run_config) { - // Generate Random data - Random rnd(301); - - int puts = 100000; - int key_pool = static_cast(puts * 0.7); - int key_size = 100; - int val_size = 1000; - int seeks_percentage = 20; // 20% of keys will be used to test seek() - int delete_percentage = 20; // 20% of keys will be deleted - int merge_percentage = 20; // 20% of keys will be added using Merge() - - Options options = CurrentOptions(); - BlockBasedTableOptions table_options; - table_options.use_delta_encoding = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.merge_operator = MergeOperators::CreatePutOperator(); - DestroyAndReopen(options); - - std::vector generated_keys(key_pool); - for (int i = 0; i < key_pool; i++) { - generated_keys[i] = rnd.RandomString(key_size); - } - - std::map true_data; - std::vector random_keys; - std::vector deleted_keys; - for (int i = 0; i < puts; i++) { - auto& k = generated_keys[rnd.Next() % key_pool]; - auto v = rnd.RandomString(val_size); - - // Insert data to true_data map and to DB - true_data[k] = v; - if (rnd.PercentTrue(merge_percentage)) { - ASSERT_OK(db_->Merge(WriteOptions(), k, v)); - } else { - ASSERT_OK(Put(k, v)); - } - - // Pick random keys to be used to test Seek() - if (rnd.PercentTrue(seeks_percentage)) { - random_keys.push_back(k); - } - - // Delete some random keys - if (rnd.PercentTrue(delete_percentage)) { - deleted_keys.push_back(k); - true_data.erase(k); - ASSERT_OK(Delete(k)); - } - - if (run_config == TestConfig::FLUSH_EVERY_1000) { - if (i && i % 1000 == 0) { - ASSERT_OK(Flush()); - } - } - } - - if (run_config == TestConfig::CLOSE_AND_OPEN) { - Close(); - Reopen(options); - } else if (run_config == TestConfig::COMPACT_BEFORE_READ) { - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - } - - ReadOptions ro; - ro.pin_data = true; - auto iter = NewIterator(ro); - - { - // Test Seek to random keys - std::vector keys_slices; - std::vector true_keys; - for (auto& k : random_keys) { - iter->Seek(k); - if (!iter->Valid()) { - ASSERT_EQ(true_data.lower_bound(k), true_data.end()); - continue; - } - std::string prop_value; - ASSERT_OK( - iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("1", prop_value); - keys_slices.push_back(iter->key()); - true_keys.push_back(true_data.lower_bound(k)->first); - } - - for (size_t i = 0; i < keys_slices.size(); i++) { - ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]); - } - } - - { - // Test SeekForPrev to random keys - std::vector keys_slices; - std::vector true_keys; - for (auto& k : random_keys) { - iter->SeekForPrev(k); - if (!iter->Valid()) { - ASSERT_EQ(true_data.upper_bound(k), true_data.begin()); - continue; - } - std::string prop_value; - ASSERT_OK( - iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("1", prop_value); - keys_slices.push_back(iter->key()); - true_keys.push_back((--true_data.upper_bound(k))->first); - } - - for (size_t i = 0; i < keys_slices.size(); i++) { - ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]); - } - } - - { - // Test iterating all data forward - std::vector all_keys; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - std::string prop_value; - ASSERT_OK( - iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("1", prop_value); - all_keys.push_back(iter->key()); - } - ASSERT_EQ(all_keys.size(), true_data.size()); - - // Verify that all keys slices are valid - auto data_iter = true_data.begin(); - for (size_t i = 0; i < all_keys.size(); i++) { - ASSERT_EQ(all_keys[i].ToString(), data_iter->first); - data_iter++; - } - } - - { - // Test iterating all data backward - std::vector all_keys; - for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { - std::string prop_value; - ASSERT_OK( - iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("1", prop_value); - all_keys.push_back(iter->key()); - } - ASSERT_EQ(all_keys.size(), true_data.size()); - - // Verify that all keys slices are valid (backward) - auto data_iter = true_data.rbegin(); - for (size_t i = 0; i < all_keys.size(); i++) { - ASSERT_EQ(all_keys[i].ToString(), data_iter->first); - data_iter++; - } - } - - delete iter; - } -}; - -#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedNormal) { - PinnedDataIteratorRandomized(TestConfig::NORMAL); -} -#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) - -TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedCLoseAndOpen) { - PinnedDataIteratorRandomized(TestConfig::CLOSE_AND_OPEN); -} - -TEST_P(DBIteratorTestForPinnedData, - PinnedDataIteratorRandomizedCompactBeforeRead) { - PinnedDataIteratorRandomized(TestConfig::COMPACT_BEFORE_READ); -} - -TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedFlush) { - PinnedDataIteratorRandomized(TestConfig::FLUSH_EVERY_1000); -} - -INSTANTIATE_TEST_CASE_P(DBIteratorTestForPinnedDataInstance, - DBIteratorTestForPinnedData, - testing::Values(true, false)); - -TEST_P(DBIteratorTest, PinnedDataIteratorMultipleFiles) { - Options options = CurrentOptions(); - BlockBasedTableOptions table_options; - table_options.use_delta_encoding = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.disable_auto_compactions = true; - options.write_buffer_size = 1024 * 1024 * 10; // 10 Mb - DestroyAndReopen(options); - - std::map true_data; - - // Generate 4 sst files in L2 - Random rnd(301); - for (int i = 1; i <= 1000; i++) { - std::string k = Key(i * 3); - std::string v = rnd.RandomString(100); - ASSERT_OK(Put(k, v)); - true_data[k] = v; - if (i % 250 == 0) { - ASSERT_OK(Flush()); - } - } - ASSERT_EQ(FilesPerLevel(0), "4"); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(FilesPerLevel(0), "0,4"); - - // Generate 4 sst files in L0 - for (int i = 1; i <= 1000; i++) { - std::string k = Key(i * 2); - std::string v = rnd.RandomString(100); - ASSERT_OK(Put(k, v)); - true_data[k] = v; - if (i % 250 == 0) { - ASSERT_OK(Flush()); - } - } - ASSERT_EQ(FilesPerLevel(0), "4,4"); - - // Add some keys/values in memtables - for (int i = 1; i <= 1000; i++) { - std::string k = Key(i); - std::string v = rnd.RandomString(100); - ASSERT_OK(Put(k, v)); - true_data[k] = v; - } - ASSERT_EQ(FilesPerLevel(0), "4,4"); - - ReadOptions ro; - ro.pin_data = true; - auto iter = NewIterator(ro); - - std::vector> results; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - std::string prop_value; - ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("1", prop_value); - results.emplace_back(iter->key(), iter->value().ToString()); - } - - ASSERT_EQ(results.size(), true_data.size()); - auto data_iter = true_data.begin(); - for (size_t i = 0; i < results.size(); i++, data_iter++) { - auto& kv = results[i]; - ASSERT_EQ(kv.first, data_iter->first); - ASSERT_EQ(kv.second, data_iter->second); - } - - delete iter; -} - -TEST_P(DBIteratorTest, PinnedDataIteratorMergeOperator) { - Options options = CurrentOptions(); - BlockBasedTableOptions table_options; - table_options.use_delta_encoding = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.merge_operator = MergeOperators::CreateUInt64AddOperator(); - DestroyAndReopen(options); - - std::string numbers[7]; - for (int val = 0; val <= 6; val++) { - PutFixed64(numbers + val, val); - } - - // +1 all keys in range [ 0 => 999] - for (int i = 0; i < 1000; i++) { - WriteOptions wo; - ASSERT_OK(db_->Merge(wo, Key(i), numbers[1])); - } - - // +2 all keys divisible by 2 in range [ 0 => 999] - for (int i = 0; i < 1000; i += 2) { - WriteOptions wo; - ASSERT_OK(db_->Merge(wo, Key(i), numbers[2])); - } - - // +3 all keys divisible by 5 in range [ 0 => 999] - for (int i = 0; i < 1000; i += 5) { - WriteOptions wo; - ASSERT_OK(db_->Merge(wo, Key(i), numbers[3])); - } - - ReadOptions ro; - ro.pin_data = true; - auto iter = NewIterator(ro); - - std::vector> results; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - std::string prop_value; - ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("1", prop_value); - results.emplace_back(iter->key(), iter->value().ToString()); - } - - ASSERT_EQ(results.size(), 1000); - for (size_t i = 0; i < results.size(); i++) { - auto& kv = results[i]; - ASSERT_EQ(kv.first, Key(static_cast(i))); - int expected_val = 1; - if (i % 2 == 0) { - expected_val += 2; - } - if (i % 5 == 0) { - expected_val += 3; - } - ASSERT_EQ(kv.second, numbers[expected_val]); - } - - delete iter; -} - -TEST_P(DBIteratorTest, PinnedDataIteratorReadAfterUpdate) { - Options options = CurrentOptions(); - BlockBasedTableOptions table_options; - table_options.use_delta_encoding = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.write_buffer_size = 100000; - DestroyAndReopen(options); - - Random rnd(301); - - std::map true_data; - for (int i = 0; i < 1000; i++) { - std::string k = rnd.RandomString(10); - std::string v = rnd.RandomString(1000); - ASSERT_OK(Put(k, v)); - true_data[k] = v; - } - - ReadOptions ro; - ro.pin_data = true; - auto iter = NewIterator(ro); - - // Delete 50% of the keys and update the other 50% - for (auto& kv : true_data) { - if (rnd.OneIn(2)) { - ASSERT_OK(Delete(kv.first)); - } else { - std::string new_val = rnd.RandomString(1000); - ASSERT_OK(Put(kv.first, new_val)); - } - } - - std::vector> results; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - std::string prop_value; - ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); - ASSERT_EQ("1", prop_value); - results.emplace_back(iter->key(), iter->value().ToString()); - } - - auto data_iter = true_data.begin(); - for (size_t i = 0; i < results.size(); i++, data_iter++) { - auto& kv = results[i]; - ASSERT_EQ(kv.first, data_iter->first); - ASSERT_EQ(kv.second, data_iter->second); - } - - delete iter; -} - -class SliceTransformLimitedDomainGeneric : public SliceTransform { - const char* Name() const override { - return "SliceTransformLimitedDomainGeneric"; - } - - Slice Transform(const Slice& src) const override { - return Slice(src.data(), 1); - } - - bool InDomain(const Slice& src) const override { - // prefix will be x???? - return src.size() >= 1; - } - - bool InRange(const Slice& dst) const override { - // prefix will be x???? - return dst.size() == 1; - } -}; - -TEST_P(DBIteratorTest, IterSeekForPrevCrossingFiles) { - Options options = CurrentOptions(); - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - options.disable_auto_compactions = true; - // Enable prefix bloom for SST files - BlockBasedTableOptions table_options; - table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - ASSERT_OK(Put("a1", "va1")); - ASSERT_OK(Put("a2", "va2")); - ASSERT_OK(Put("a3", "va3")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("b1", "vb1")); - ASSERT_OK(Put("b2", "vb2")); - ASSERT_OK(Put("b3", "vb3")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("b4", "vb4")); - ASSERT_OK(Put("d1", "vd1")); - ASSERT_OK(Put("d2", "vd2")); - ASSERT_OK(Put("d4", "vd4")); - ASSERT_OK(Flush()); - - MoveFilesToLevel(1); - { - ReadOptions ro; - Iterator* iter = NewIterator(ro); - - iter->SeekForPrev("a4"); - ASSERT_EQ(iter->key().ToString(), "a3"); - ASSERT_EQ(iter->value().ToString(), "va3"); - - iter->SeekForPrev("c2"); - ASSERT_EQ(iter->key().ToString(), "b3"); - iter->SeekForPrev("d3"); - ASSERT_EQ(iter->key().ToString(), "d2"); - iter->SeekForPrev("b5"); - ASSERT_EQ(iter->key().ToString(), "b4"); - delete iter; - } - - { - ReadOptions ro; - ro.prefix_same_as_start = true; - Iterator* iter = NewIterator(ro); - iter->SeekForPrev("c2"); - ASSERT_TRUE(!iter->Valid()); - ASSERT_OK(iter->status()); - delete iter; - } -} - -TEST_P(DBIteratorTest, IterSeekForPrevCrossingFilesCustomPrefixExtractor) { - Options options = CurrentOptions(); - options.prefix_extractor = - std::make_shared(); - options.disable_auto_compactions = true; - // Enable prefix bloom for SST files - BlockBasedTableOptions table_options; - table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - ASSERT_OK(Put("a1", "va1")); - ASSERT_OK(Put("a2", "va2")); - ASSERT_OK(Put("a3", "va3")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("b1", "vb1")); - ASSERT_OK(Put("b2", "vb2")); - ASSERT_OK(Put("b3", "vb3")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("b4", "vb4")); - ASSERT_OK(Put("d1", "vd1")); - ASSERT_OK(Put("d2", "vd2")); - ASSERT_OK(Put("d4", "vd4")); - ASSERT_OK(Flush()); - - MoveFilesToLevel(1); - { - ReadOptions ro; - Iterator* iter = NewIterator(ro); - - iter->SeekForPrev("a4"); - ASSERT_EQ(iter->key().ToString(), "a3"); - ASSERT_EQ(iter->value().ToString(), "va3"); - - iter->SeekForPrev("c2"); - ASSERT_EQ(iter->key().ToString(), "b3"); - iter->SeekForPrev("d3"); - ASSERT_EQ(iter->key().ToString(), "d2"); - iter->SeekForPrev("b5"); - ASSERT_EQ(iter->key().ToString(), "b4"); - delete iter; - } - - { - ReadOptions ro; - ro.prefix_same_as_start = true; - Iterator* iter = NewIterator(ro); - iter->SeekForPrev("c2"); - ASSERT_TRUE(!iter->Valid()); - ASSERT_OK(iter->status()); - delete iter; - } -} - -TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocks) { - Options options = CurrentOptions(); - BlockBasedTableOptions table_options; - table_options.block_size = 1; // every block will contain one entry - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.merge_operator = MergeOperators::CreateStringAppendTESTOperator(); - options.disable_auto_compactions = true; - options.max_sequential_skip_in_iterations = 8; - - DestroyAndReopen(options); - - // Putting such deletes will force DBIter::Prev() to fallback to a Seek - for (int file_num = 0; file_num < 10; file_num++) { - ASSERT_OK(Delete("key4")); - ASSERT_OK(Flush()); - } - - // First File containing 5 blocks of puts - ASSERT_OK(Put("key1", "val1.0")); - ASSERT_OK(Put("key2", "val2.0")); - ASSERT_OK(Put("key3", "val3.0")); - ASSERT_OK(Put("key4", "val4.0")); - ASSERT_OK(Put("key5", "val5.0")); - ASSERT_OK(Flush()); - - // Second file containing 9 blocks of merge operands - ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.1")); - ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.2")); - - ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.1")); - ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.2")); - ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.3")); - - ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.1")); - ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.2")); - ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.3")); - ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.4")); - ASSERT_OK(Flush()); - - { - ReadOptions ro; - ro.fill_cache = false; - Iterator* iter = NewIterator(ro); - - iter->SeekToLast(); - ASSERT_EQ(iter->key().ToString(), "key5"); - ASSERT_EQ(iter->value().ToString(), "val5.0"); - - iter->Prev(); - ASSERT_EQ(iter->key().ToString(), "key4"); - ASSERT_EQ(iter->value().ToString(), "val4.0"); - - iter->Prev(); - ASSERT_EQ(iter->key().ToString(), "key3"); - ASSERT_EQ(iter->value().ToString(), "val3.0,val3.1,val3.2,val3.3,val3.4"); - - iter->Prev(); - ASSERT_EQ(iter->key().ToString(), "key2"); - ASSERT_EQ(iter->value().ToString(), "val2.0,val2.1,val2.2,val2.3"); - - iter->Prev(); - ASSERT_EQ(iter->key().ToString(), "key1"); - ASSERT_EQ(iter->value().ToString(), "val1.0,val1.1,val1.2"); - - delete iter; - } -} - -TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocksRandomized) { - Options options = CurrentOptions(); - options.merge_operator = MergeOperators::CreateStringAppendTESTOperator(); - options.disable_auto_compactions = true; - options.level0_slowdown_writes_trigger = (1 << 30); - options.level0_stop_writes_trigger = (1 << 30); - options.max_sequential_skip_in_iterations = 8; - DestroyAndReopen(options); - - const int kNumKeys = 500; - // Small number of merge operands to make sure that DBIter::Prev() don't - // fall back to Seek() - const int kNumMergeOperands = 3; - // Use value size that will make sure that every block contain 1 key - const int kValSize = - static_cast(BlockBasedTableOptions().block_size) * 4; - // Percentage of keys that wont get merge operations - const int kNoMergeOpPercentage = 20; - // Percentage of keys that will be deleted - const int kDeletePercentage = 10; - - // For half of the key range we will write multiple deletes first to - // force DBIter::Prev() to fall back to Seek() - for (int file_num = 0; file_num < 10; file_num++) { - for (int i = 0; i < kNumKeys; i += 2) { - ASSERT_OK(Delete(Key(i))); - } - ASSERT_OK(Flush()); - } - - Random rnd(301); - std::map true_data; - std::string gen_key; - std::string gen_val; - - for (int i = 0; i < kNumKeys; i++) { - gen_key = Key(i); - gen_val = rnd.RandomString(kValSize); - - ASSERT_OK(Put(gen_key, gen_val)); - true_data[gen_key] = gen_val; - } - ASSERT_OK(Flush()); - - // Separate values and merge operands in different file so that we - // make sure that we don't merge them while flushing but actually - // merge them in the read path - for (int i = 0; i < kNumKeys; i++) { - if (rnd.PercentTrue(kNoMergeOpPercentage)) { - // Dont give merge operations for some keys - continue; - } - - for (int j = 0; j < kNumMergeOperands; j++) { - gen_key = Key(i); - gen_val = rnd.RandomString(kValSize); - - ASSERT_OK(db_->Merge(WriteOptions(), gen_key, gen_val)); - true_data[gen_key] += "," + gen_val; - } - } - ASSERT_OK(Flush()); - - for (int i = 0; i < kNumKeys; i++) { - if (rnd.PercentTrue(kDeletePercentage)) { - gen_key = Key(i); - - ASSERT_OK(Delete(gen_key)); - true_data.erase(gen_key); - } - } - ASSERT_OK(Flush()); - - { - ReadOptions ro; - ro.fill_cache = false; - Iterator* iter = NewIterator(ro); - auto data_iter = true_data.rbegin(); - - for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { - ASSERT_EQ(iter->key().ToString(), data_iter->first); - ASSERT_EQ(iter->value().ToString(), data_iter->second); - data_iter++; - } - ASSERT_EQ(data_iter, true_data.rend()); - - delete iter; - } - - { - ReadOptions ro; - ro.fill_cache = false; - Iterator* iter = NewIterator(ro); - auto data_iter = true_data.rbegin(); - - int entries_right = 0; - std::string seek_key; - for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { - // Verify key/value of current position - ASSERT_EQ(iter->key().ToString(), data_iter->first); - ASSERT_EQ(iter->value().ToString(), data_iter->second); - - bool restore_position_with_seek = rnd.Uniform(2); - if (restore_position_with_seek) { - seek_key = iter->key().ToString(); - } - - // Do some Next() operations the restore the iterator to orignal position - int next_count = - entries_right > 0 ? rnd.Uniform(std::min(entries_right, 10)) : 0; - for (int i = 0; i < next_count; i++) { - iter->Next(); - data_iter--; - - ASSERT_EQ(iter->key().ToString(), data_iter->first); - ASSERT_EQ(iter->value().ToString(), data_iter->second); - } - - if (restore_position_with_seek) { - // Restore orignal position using Seek() - iter->Seek(seek_key); - for (int i = 0; i < next_count; i++) { - data_iter++; - } - - ASSERT_EQ(iter->key().ToString(), data_iter->first); - ASSERT_EQ(iter->value().ToString(), data_iter->second); - } else { - // Restore original position using Prev() - for (int i = 0; i < next_count; i++) { - iter->Prev(); - data_iter++; - - ASSERT_EQ(iter->key().ToString(), data_iter->first); - ASSERT_EQ(iter->value().ToString(), data_iter->second); - } - } - - entries_right++; - data_iter++; - } - ASSERT_EQ(data_iter, true_data.rend()); - - delete iter; - } -} - -TEST_P(DBIteratorTest, IteratorWithLocalStatistics) { - Options options = CurrentOptions(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - DestroyAndReopen(options); - - Random rnd(301); - for (int i = 0; i < 1000; i++) { - // Key 10 bytes / Value 10 bytes - ASSERT_OK(Put(rnd.RandomString(10), rnd.RandomString(10))); - } - - std::atomic total_next(0); - std::atomic total_next_found(0); - std::atomic total_prev(0); - std::atomic total_prev_found(0); - std::atomic total_bytes(0); - - std::vector threads; - std::function reader_func_next = [&]() { - SetPerfLevel(kEnableCount); - get_perf_context()->Reset(); - Iterator* iter = NewIterator(ReadOptions()); - - iter->SeekToFirst(); - // Seek will bump ITER_BYTES_READ - uint64_t bytes = 0; - bytes += iter->key().size(); - bytes += iter->value().size(); - while (true) { - iter->Next(); - total_next++; - - if (!iter->Valid()) { - break; - } - total_next_found++; - bytes += iter->key().size(); - bytes += iter->value().size(); - } - - delete iter; - ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes); - SetPerfLevel(kDisable); - total_bytes += bytes; - }; - - std::function reader_func_prev = [&]() { - SetPerfLevel(kEnableCount); - Iterator* iter = NewIterator(ReadOptions()); - - iter->SeekToLast(); - // Seek will bump ITER_BYTES_READ - uint64_t bytes = 0; - bytes += iter->key().size(); - bytes += iter->value().size(); - while (true) { - iter->Prev(); - total_prev++; - - if (!iter->Valid()) { - break; - } - total_prev_found++; - bytes += iter->key().size(); - bytes += iter->value().size(); - } - - delete iter; - ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes); - SetPerfLevel(kDisable); - total_bytes += bytes; - }; - - for (int i = 0; i < 10; i++) { - threads.emplace_back(reader_func_next); - } - for (int i = 0; i < 15; i++) { - threads.emplace_back(reader_func_prev); - } - - for (auto& t : threads) { - t.join(); - } - - ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT), (uint64_t)total_next); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT_FOUND), - (uint64_t)total_next_found); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV), (uint64_t)total_prev); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV_FOUND), - (uint64_t)total_prev_found); - ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ), - (uint64_t)total_bytes); -} - -TEST_P(DBIteratorTest, ReadAhead) { - Options options; - env_->count_random_reads_ = true; - options.env = env_; - options.disable_auto_compactions = true; - options.write_buffer_size = 4 << 20; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - BlockBasedTableOptions table_options; - table_options.block_size = 1024; - table_options.no_block_cache = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - - std::string value(1024, 'a'); - for (int i = 0; i < 100; i++) { - ASSERT_OK(Put(Key(i), value)); - } - ASSERT_OK(Flush()); - MoveFilesToLevel(2); - - for (int i = 0; i < 100; i++) { - ASSERT_OK(Put(Key(i), value)); - } - ASSERT_OK(Flush()); - MoveFilesToLevel(1); - - for (int i = 0; i < 100; i++) { - ASSERT_OK(Put(Key(i), value)); - } - ASSERT_OK(Flush()); - ASSERT_EQ("1,1,1", FilesPerLevel()); - - env_->random_read_bytes_counter_ = 0; - options.statistics->setTickerCount(NO_FILE_OPENS, 0); - ReadOptions read_options; - auto* iter = NewIterator(read_options); - iter->SeekToFirst(); - int64_t num_file_opens = TestGetTickerCount(options, NO_FILE_OPENS); - size_t bytes_read = env_->random_read_bytes_counter_; - delete iter; - - env_->random_read_bytes_counter_ = 0; - options.statistics->setTickerCount(NO_FILE_OPENS, 0); - read_options.readahead_size = 1024 * 10; - iter = NewIterator(read_options); - iter->SeekToFirst(); - int64_t num_file_opens_readahead = TestGetTickerCount(options, NO_FILE_OPENS); - size_t bytes_read_readahead = env_->random_read_bytes_counter_; - delete iter; - ASSERT_EQ(num_file_opens, num_file_opens_readahead); - ASSERT_GT(bytes_read_readahead, bytes_read); - ASSERT_GT(bytes_read_readahead, read_options.readahead_size * 3); - - // Verify correctness. - iter = NewIterator(read_options); - int count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_EQ(value, iter->value()); - count++; - } - ASSERT_EQ(100, count); - for (int i = 0; i < 100; i++) { - iter->Seek(Key(i)); - ASSERT_EQ(value, iter->value()); - } - delete iter; -} - -// Insert a key, create a snapshot iterator, overwrite key lots of times, -// seek to a smaller key. Expect DBIter to fall back to a seek instead of -// going through all the overwrites linearly. -TEST_P(DBIteratorTest, DBIteratorSkipRecentDuplicatesTest) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.max_sequential_skip_in_iterations = 3; - options.prefix_extractor = nullptr; - options.write_buffer_size = 1 << 27; // big enough to avoid flush - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - DestroyAndReopen(options); - - // Insert. - ASSERT_OK(Put("b", "0")); - - // Create iterator. - ReadOptions ro; - std::unique_ptr iter(NewIterator(ro)); - - // Insert a lot. - for (int i = 0; i < 100; ++i) { - ASSERT_OK(Put("b", std::to_string(i + 1).c_str())); - } - - // Check that memtable wasn't flushed. - std::string val; - ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level0", &val)); - EXPECT_EQ("0", val); - - // Seek iterator to a smaller key. - get_perf_context()->Reset(); - iter->Seek("a"); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ("b", iter->key().ToString()); - EXPECT_EQ("0", iter->value().ToString()); - - // Check that the seek didn't do too much work. - // Checks are not tight, just make sure that everything is well below 100. - EXPECT_LT(get_perf_context()->internal_key_skipped_count, 4); - EXPECT_LT(get_perf_context()->internal_recent_skipped_count, 8); - EXPECT_LT(get_perf_context()->seek_on_memtable_count, 10); - EXPECT_LT(get_perf_context()->next_on_memtable_count, 10); - EXPECT_LT(get_perf_context()->prev_on_memtable_count, 10); - - // Check that iterator did something like what we expect. - EXPECT_EQ(get_perf_context()->internal_delete_skipped_count, 0); - EXPECT_EQ(get_perf_context()->internal_merge_count, 0); - EXPECT_GE(get_perf_context()->internal_recent_skipped_count, 2); - EXPECT_GE(get_perf_context()->seek_on_memtable_count, 2); - EXPECT_EQ(1, - options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION)); -} - -TEST_P(DBIteratorTest, Refresh) { - ASSERT_OK(Put("x", "y")); - - std::unique_ptr iter(NewIterator(ReadOptions())); - ASSERT_OK(iter->status()); - iter->Seek(Slice("a")); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("x")), 0); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - - ASSERT_OK(Put("c", "d")); - - iter->Seek(Slice("a")); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("x")), 0); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - - ASSERT_OK(iter->status()); - ASSERT_OK(iter->Refresh()); - - iter->Seek(Slice("a")); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("c")), 0); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("x")), 0); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - - EXPECT_OK(dbfull()->Flush(FlushOptions())); - - ASSERT_OK(Put("m", "n")); - - iter->Seek(Slice("a")); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("c")), 0); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("x")), 0); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - - ASSERT_OK(iter->status()); - ASSERT_OK(iter->Refresh()); - - iter->Seek(Slice("a")); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("c")), 0); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("m")), 0); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("x")), 0); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - - iter.reset(); -} - -TEST_P(DBIteratorTest, RefreshWithSnapshot) { - ASSERT_OK(Put("x", "y")); - const Snapshot* snapshot = db_->GetSnapshot(); - ReadOptions options; - options.snapshot = snapshot; - Iterator* iter = NewIterator(options); - ASSERT_OK(iter->status()); - - iter->Seek(Slice("a")); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("x")), 0); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - - ASSERT_OK(Put("c", "d")); - - iter->Seek(Slice("a")); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("x")), 0); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - - ASSERT_OK(iter->status()); - Status s = iter->Refresh(); - ASSERT_TRUE(s.IsNotSupported()); - db_->ReleaseSnapshot(snapshot); - delete iter; -} - -TEST_P(DBIteratorTest, CreationFailure) { - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::NewInternalIterator:StatusCallback", [](void* arg) { - *(reinterpret_cast(arg)) = Status::Corruption("test status"); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - Iterator* iter = NewIterator(ReadOptions()); - ASSERT_FALSE(iter->Valid()); - ASSERT_TRUE(iter->status().IsCorruption()); - delete iter; -} - -TEST_P(DBIteratorTest, UpperBoundWithChangeDirection) { - Options options = CurrentOptions(); - options.max_sequential_skip_in_iterations = 3; - DestroyAndReopen(options); - - // write a bunch of kvs to the database. - ASSERT_OK(Put("a", "1")); - ASSERT_OK(Put("y", "1")); - ASSERT_OK(Put("y1", "1")); - ASSERT_OK(Put("y2", "1")); - ASSERT_OK(Put("y3", "1")); - ASSERT_OK(Put("z", "1")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("a", "1")); - ASSERT_OK(Put("z", "1")); - ASSERT_OK(Put("bar", "1")); - ASSERT_OK(Put("foo", "1")); - - std::string upper_bound = "x"; - Slice ub_slice(upper_bound); - ReadOptions ro; - ro.iterate_upper_bound = &ub_slice; - ro.max_skippable_internal_keys = 1000; - - Iterator* iter = NewIterator(ro); - iter->Seek("foo"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("foo", iter->key().ToString()); - - iter->Prev(); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("bar", iter->key().ToString()); - - delete iter; -} - -TEST_P(DBIteratorTest, TableFilter) { - ASSERT_OK(Put("a", "1")); - EXPECT_OK(dbfull()->Flush(FlushOptions())); - ASSERT_OK(Put("b", "2")); - ASSERT_OK(Put("c", "3")); - EXPECT_OK(dbfull()->Flush(FlushOptions())); - ASSERT_OK(Put("d", "4")); - ASSERT_OK(Put("e", "5")); - ASSERT_OK(Put("f", "6")); - EXPECT_OK(dbfull()->Flush(FlushOptions())); - - // Ensure the table_filter callback is called once for each table. - { - std::set unseen{1, 2, 3}; - ReadOptions opts; - opts.table_filter = [&](const TableProperties& props) { - auto it = unseen.find(props.num_entries); - if (it == unseen.end()) { - ADD_FAILURE() << "saw table properties with an unexpected " - << props.num_entries << " entries"; - } else { - unseen.erase(it); - } - return true; - }; - auto iter = NewIterator(opts); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->1"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "b->2"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "c->3"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "d->4"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "e->5"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "f->6"); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - ASSERT_TRUE(unseen.empty()); - delete iter; - } - - // Ensure returning false in the table_filter hides the keys from that table - // during iteration. - { - ReadOptions opts; - opts.table_filter = [](const TableProperties& props) { - return props.num_entries != 2; - }; - auto iter = NewIterator(opts); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->1"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "d->4"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "e->5"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "f->6"); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - delete iter; - } -} - -TEST_P(DBIteratorTest, UpperBoundWithPrevReseek) { - Options options = CurrentOptions(); - options.max_sequential_skip_in_iterations = 3; - DestroyAndReopen(options); - - // write a bunch of kvs to the database. - ASSERT_OK(Put("a", "1")); - ASSERT_OK(Put("y", "1")); - ASSERT_OK(Put("z", "1")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("a", "1")); - ASSERT_OK(Put("z", "1")); - ASSERT_OK(Put("bar", "1")); - ASSERT_OK(Put("foo", "1")); - ASSERT_OK(Put("foo", "2")); - - ASSERT_OK(Put("foo", "3")); - ASSERT_OK(Put("foo", "4")); - ASSERT_OK(Put("foo", "5")); - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(Put("foo", "6")); - - std::string upper_bound = "x"; - Slice ub_slice(upper_bound); - ReadOptions ro; - ro.snapshot = snapshot; - ro.iterate_upper_bound = &ub_slice; - - Iterator* iter = NewIterator(ro); - iter->SeekForPrev("goo"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("foo", iter->key().ToString()); - iter->Prev(); - - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("bar", iter->key().ToString()); - - delete iter; - db_->ReleaseSnapshot(snapshot); -} - -TEST_P(DBIteratorTest, SkipStatistics) { - Options options = CurrentOptions(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - DestroyAndReopen(options); - - int skip_count = 0; - - // write a bunch of kvs to the database. - ASSERT_OK(Put("a", "1")); - ASSERT_OK(Put("b", "1")); - ASSERT_OK(Put("c", "1")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("d", "1")); - ASSERT_OK(Put("e", "1")); - ASSERT_OK(Put("f", "1")); - ASSERT_OK(Put("a", "2")); - ASSERT_OK(Put("b", "2")); - ASSERT_OK(Flush()); - ASSERT_OK(Delete("d")); - ASSERT_OK(Delete("e")); - ASSERT_OK(Delete("f")); - - Iterator* iter = NewIterator(ReadOptions()); - int count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - count++; - } - ASSERT_EQ(count, 3); - delete iter; - skip_count += 8; // 3 deletes + 3 original keys + 2 lower in sequence - ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); - - iter = NewIterator(ReadOptions()); - count = 0; - for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { - ASSERT_OK(iter->status()); - count++; - } - ASSERT_EQ(count, 3); - delete iter; - skip_count += 8; // Same as above, but in reverse order - ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); - - ASSERT_OK(Put("aa", "1")); - ASSERT_OK(Put("ab", "1")); - ASSERT_OK(Put("ac", "1")); - ASSERT_OK(Put("ad", "1")); - ASSERT_OK(Flush()); - ASSERT_OK(Delete("ab")); - ASSERT_OK(Delete("ac")); - ASSERT_OK(Delete("ad")); - - ReadOptions ro; - Slice prefix("b"); - ro.iterate_upper_bound = &prefix; - - iter = NewIterator(ro); - count = 0; - for (iter->Seek("aa"); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - count++; - } - ASSERT_EQ(count, 1); - delete iter; - skip_count += 6; // 3 deletes + 3 original keys - ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); - - iter = NewIterator(ro); - count = 0; - for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { - ASSERT_OK(iter->status()); - count++; - } - ASSERT_EQ(count, 2); - delete iter; - // 3 deletes + 3 original keys + lower sequence of "a" - skip_count += 7; - ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); -} - -TEST_P(DBIteratorTest, SeekAfterHittingManyInternalKeys) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - ReadOptions ropts; - ropts.max_skippable_internal_keys = 2; - - ASSERT_OK(Put("1", "val_1")); - // Add more tombstones than max_skippable_internal_keys so that Next() fails. - ASSERT_OK(Delete("2")); - ASSERT_OK(Delete("3")); - ASSERT_OK(Delete("4")); - ASSERT_OK(Delete("5")); - ASSERT_OK(Put("6", "val_6")); - - std::unique_ptr iter(NewIterator(ropts)); - iter->SeekToFirst(); - - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), "1"); - ASSERT_EQ(iter->value().ToString(), "val_1"); - - // This should fail as incomplete due to too many non-visible internal keys on - // the way to the next valid user key. - iter->Next(); - ASSERT_TRUE(!iter->Valid()); - ASSERT_TRUE(iter->status().IsIncomplete()); - - // Get the internal key at which Next() failed. - std::string prop_value; - ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value)); - ASSERT_EQ("4", prop_value); - - // Create a new iterator to seek to the internal key. - std::unique_ptr iter2(NewIterator(ropts)); - iter2->Seek(prop_value); - ASSERT_TRUE(iter2->Valid()); - ASSERT_OK(iter2->status()); - - ASSERT_EQ(iter2->key().ToString(), "6"); - ASSERT_EQ(iter2->value().ToString(), "val_6"); -} - -// Reproduces a former bug where iterator would skip some records when DBIter -// re-seeks subiterator with Incomplete status. -TEST_P(DBIteratorTest, NonBlockingIterationBugRepro) { - Options options = CurrentOptions(); - BlockBasedTableOptions table_options; - // Make sure the sst file has more than one block. - table_options.flush_block_policy_factory = - std::make_shared(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - // Two records in sst file, each in its own block. - ASSERT_OK(Put("b", "")); - ASSERT_OK(Put("d", "")); - ASSERT_OK(Flush()); - - // Create a nonblocking iterator before writing to memtable. - ReadOptions ropt; - ropt.read_tier = kBlockCacheTier; - std::unique_ptr iter(NewIterator(ropt)); - - // Overwrite a key in memtable many times to hit - // max_sequential_skip_in_iterations (which is 8 by default). - for (int i = 0; i < 20; ++i) { - ASSERT_OK(Put("c", "")); - } - - // Load the second block in sst file into the block cache. - { - std::unique_ptr iter2(NewIterator(ReadOptions())); - iter2->Seek("d"); - } - - // Finally seek the nonblocking iterator. - iter->Seek("a"); - // With the bug, the status used to be OK, and the iterator used to point to - // "d". - EXPECT_TRUE(iter->status().IsIncomplete()); -} - -TEST_P(DBIteratorTest, SeekBackwardAfterOutOfUpperBound) { - ASSERT_OK(Put("a", "")); - ASSERT_OK(Put("b", "")); - ASSERT_OK(Flush()); - - ReadOptions ropt; - Slice ub = "b"; - ropt.iterate_upper_bound = &ub; - - std::unique_ptr it(dbfull()->NewIterator(ropt)); - it->SeekForPrev("a"); - ASSERT_TRUE(it->Valid()); - ASSERT_OK(it->status()); - ASSERT_EQ("a", it->key().ToString()); - it->Next(); - ASSERT_FALSE(it->Valid()); - ASSERT_OK(it->status()); - it->SeekForPrev("a"); - ASSERT_OK(it->status()); - - ASSERT_TRUE(it->Valid()); - ASSERT_EQ("a", it->key().ToString()); -} - -TEST_P(DBIteratorTest, AvoidReseekLevelIterator) { - Options options = CurrentOptions(); - options.compression = CompressionType::kNoCompression; - BlockBasedTableOptions table_options; - table_options.block_size = 800; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - - Random rnd(301); - std::string random_str = rnd.RandomString(180); - - ASSERT_OK(Put("1", random_str)); - ASSERT_OK(Put("2", random_str)); - ASSERT_OK(Put("3", random_str)); - ASSERT_OK(Put("4", random_str)); - // A new block - ASSERT_OK(Put("5", random_str)); - ASSERT_OK(Put("6", random_str)); - ASSERT_OK(Put("7", random_str)); - ASSERT_OK(Flush()); - ASSERT_OK(Put("8", random_str)); - ASSERT_OK(Put("9", random_str)); - ASSERT_OK(Flush()); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - int num_find_file_in_level = 0; - int num_idx_blk_seek = 0; - SyncPoint::GetInstance()->SetCallBack( - "LevelIterator::Seek:BeforeFindFile", - [&](void* /*arg*/) { num_find_file_in_level++; }); - SyncPoint::GetInstance()->SetCallBack( - "IndexBlockIter::Seek:0", [&](void* /*arg*/) { num_idx_blk_seek++; }); - SyncPoint::GetInstance()->EnableProcessing(); - - { - std::unique_ptr iter(NewIterator(ReadOptions())); - iter->Seek("1"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(1, num_find_file_in_level); - ASSERT_EQ(1, num_idx_blk_seek); - - iter->Seek("2"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(1, num_find_file_in_level); - ASSERT_EQ(1, num_idx_blk_seek); - - iter->Seek("3"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(1, num_find_file_in_level); - ASSERT_EQ(1, num_idx_blk_seek); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(1, num_find_file_in_level); - ASSERT_EQ(1, num_idx_blk_seek); - - iter->Seek("5"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(1, num_find_file_in_level); - ASSERT_EQ(2, num_idx_blk_seek); - - iter->Seek("6"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(1, num_find_file_in_level); - ASSERT_EQ(2, num_idx_blk_seek); - - iter->Seek("7"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(1, num_find_file_in_level); - ASSERT_EQ(3, num_idx_blk_seek); - - iter->Seek("8"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(2, num_find_file_in_level); - // Still re-seek because "8" is the boundary key, which has - // the same user key as the seek key. - ASSERT_EQ(4, num_idx_blk_seek); - - iter->Seek("5"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(3, num_find_file_in_level); - ASSERT_EQ(5, num_idx_blk_seek); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(3, num_find_file_in_level); - ASSERT_EQ(5, num_idx_blk_seek); - - // Seek backward never triggers the index block seek to be skipped - iter->Seek("5"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(3, num_find_file_in_level); - ASSERT_EQ(6, num_idx_blk_seek); - } - - SyncPoint::GetInstance()->DisableProcessing(); -} - -// MyRocks may change iterate bounds before seek. Simply test to make sure such -// usage doesn't break iterator. -TEST_P(DBIteratorTest, IterateBoundChangedBeforeSeek) { - Options options = CurrentOptions(); - options.compression = CompressionType::kNoCompression; - BlockBasedTableOptions table_options; - table_options.block_size = 100; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - std::string value(50, 'v'); - Reopen(options); - ASSERT_OK(Put("aaa", value)); - ASSERT_OK(Flush()); - ASSERT_OK(Put("bbb", "v")); - ASSERT_OK(Put("ccc", "v")); - ASSERT_OK(Put("ddd", "v")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("eee", "v")); - ASSERT_OK(Flush()); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - std::string ub1 = "e"; - std::string ub2 = "c"; - Slice ub(ub1); - ReadOptions read_opts1; - read_opts1.iterate_upper_bound = &ub; - Iterator* iter = NewIterator(read_opts1); - // Seek and iterate accross block boundary. - iter->Seek("b"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("bbb", iter->key()); - ub = Slice(ub2); - iter->Seek("b"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("bbb", iter->key()); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - ASSERT_OK(iter->status()); - delete iter; - - std::string lb1 = "a"; - std::string lb2 = "c"; - Slice lb(lb1); - ReadOptions read_opts2; - read_opts2.iterate_lower_bound = &lb; - iter = NewIterator(read_opts2); - iter->SeekForPrev("d"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("ccc", iter->key()); - lb = Slice(lb2); - iter->SeekForPrev("d"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("ccc", iter->key()); - iter->Prev(); - ASSERT_FALSE(iter->Valid()); - ASSERT_OK(iter->status()); - delete iter; -} - -TEST_P(DBIteratorTest, IterateWithLowerBoundAcrossFileBoundary) { - ASSERT_OK(Put("aaa", "v")); - ASSERT_OK(Put("bbb", "v")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("ccc", "v")); - ASSERT_OK(Put("ddd", "v")); - ASSERT_OK(Flush()); - // Move both files to bottom level. - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - Slice lower_bound("b"); - ReadOptions read_opts; - read_opts.iterate_lower_bound = &lower_bound; - std::unique_ptr iter(NewIterator(read_opts)); - iter->SeekForPrev("d"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("ccc", iter->key()); - iter->Prev(); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("bbb", iter->key()); - iter->Prev(); - ASSERT_FALSE(iter->Valid()); - ASSERT_OK(iter->status()); -} - -TEST_P(DBIteratorTest, Blob) { - Options options = CurrentOptions(); - options.enable_blob_files = true; - options.max_sequential_skip_in_iterations = 2; - options.statistics = CreateDBStatistics(); - - Reopen(options); - - // Note: we have 4 KVs (3 of which are hidden) for key "b" and - // max_sequential_skip_in_iterations is set to 2. Thus, we need to do a reseek - // anytime we move from "b" to "c" or vice versa. - ASSERT_OK(Put("a", "va")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("b", "vb0")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("b", "vb1")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("b", "vb2")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("b", "vb3")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("c", "vc")); - ASSERT_OK(Flush()); - - std::unique_ptr iter_guard(NewIterator(ReadOptions())); - Iterator* const iter = iter_guard.get(); - - iter->SeekToFirst(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); - ASSERT_EQ(IterStatus(iter), "b->vb3"); - iter->Next(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Next(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToFirst(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->SeekToLast(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Prev(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2); - ASSERT_EQ(IterStatus(iter), "b->vb3"); - iter->Prev(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToLast(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Next(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek(""); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Seek("a"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Seek("ax"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2); - ASSERT_EQ(IterStatus(iter), "b->vb3"); - - iter->SeekForPrev("d"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->SeekForPrev("c"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->SeekForPrev("bx"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 3); - ASSERT_EQ(IterStatus(iter), "b->vb3"); - - iter->Seek("b"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 3); - ASSERT_EQ(IterStatus(iter), "b->vb3"); - iter->Seek("z"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 3); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekForPrev("b"); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 4); - ASSERT_EQ(IterStatus(iter), "b->vb3"); - iter->SeekForPrev(""); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 4); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - // Switch from reverse to forward - iter->SeekToLast(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 4); - iter->Prev(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 5); - iter->Prev(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 5); - iter->Next(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 6); - ASSERT_EQ(IterStatus(iter), "b->vb3"); - - // Switch from forward to reverse - iter->SeekToFirst(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 6); - iter->Next(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 6); - iter->Next(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 7); - iter->Prev(); - ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 8); - ASSERT_EQ(IterStatus(iter), "b->vb3"); -} - -INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest, - testing::Values(true, false)); - -// Tests how DBIter work with ReadCallback -class DBIteratorWithReadCallbackTest : public DBIteratorTest {}; - -TEST_F(DBIteratorWithReadCallbackTest, ReadCallback) { - class TestReadCallback : public ReadCallback { - public: - explicit TestReadCallback(SequenceNumber _max_visible_seq) - : ReadCallback(_max_visible_seq) {} - - bool IsVisibleFullCheck(SequenceNumber seq) override { - return seq <= max_visible_seq_; - } - }; - - ASSERT_OK(Put("foo", "v1")); - ASSERT_OK(Put("foo", "v2")); - ASSERT_OK(Put("foo", "v3")); - ASSERT_OK(Put("a", "va")); - ASSERT_OK(Put("z", "vz")); - SequenceNumber seq1 = db_->GetLatestSequenceNumber(); - TestReadCallback callback1(seq1); - ASSERT_OK(Put("foo", "v4")); - ASSERT_OK(Put("foo", "v5")); - ASSERT_OK(Put("bar", "v7")); - - SequenceNumber seq2 = db_->GetLatestSequenceNumber(); - auto* cfd = - static_cast_with_check(db_->DefaultColumnFamily()) - ->cfd(); - // The iterator are suppose to see data before seq1. - Iterator* iter = - dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq2, &callback1); - - // Seek - // The latest value of "foo" before seq1 is "v3" - iter->Seek("foo"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("foo", iter->key()); - ASSERT_EQ("v3", iter->value()); - // "bar" is not visible to the iterator. It will move on to the next key - // "foo". - iter->Seek("bar"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("foo", iter->key()); - ASSERT_EQ("v3", iter->value()); - - // Next - // Seek to "a" - iter->Seek("a"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("va", iter->value()); - // "bar" is not visible to the iterator. It will move on to the next key - // "foo". - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("foo", iter->key()); - ASSERT_EQ("v3", iter->value()); - - // Prev - // Seek to "z" - iter->Seek("z"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("vz", iter->value()); - // The previous key is "foo", which is visible to the iterator. - iter->Prev(); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("foo", iter->key()); - ASSERT_EQ("v3", iter->value()); - // "bar" is not visible to the iterator. It will move on to the next key "a". - iter->Prev(); // skipping "bar" - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("a", iter->key()); - ASSERT_EQ("va", iter->value()); - - // SeekForPrev - // The previous key is "foo", which is visible to the iterator. - iter->SeekForPrev("y"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("foo", iter->key()); - ASSERT_EQ("v3", iter->value()); - // "bar" is not visible to the iterator. It will move on to the next key "a". - iter->SeekForPrev("bar"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("a", iter->key()); - ASSERT_EQ("va", iter->value()); - - delete iter; - - // Prev beyond max_sequential_skip_in_iterations - uint64_t num_versions = - CurrentOptions().max_sequential_skip_in_iterations + 10; - for (uint64_t i = 0; i < num_versions; i++) { - ASSERT_OK(Put("bar", std::to_string(i))); - } - SequenceNumber seq3 = db_->GetLatestSequenceNumber(); - TestReadCallback callback2(seq3); - ASSERT_OK(Put("bar", "v8")); - SequenceNumber seq4 = db_->GetLatestSequenceNumber(); - - // The iterator is suppose to see data before seq3. - iter = dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq4, &callback2); - // Seek to "z", which is visible. - iter->Seek("z"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("vz", iter->value()); - // Previous key is "foo" and the last value "v5" is visible. - iter->Prev(); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("foo", iter->key()); - ASSERT_EQ("v5", iter->value()); - // Since the number of values of "bar" is more than - // max_sequential_skip_in_iterations, Prev() will ultimately fallback to - // seek in forward direction. Here we test the fallback seek is correct. - // The last visible value should be (num_versions - 1), as "v8" is not - // visible. - iter->Prev(); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_EQ("bar", iter->key()); - ASSERT_EQ(std::to_string(num_versions - 1), iter->value()); - - delete iter; -} - -TEST_F(DBIteratorTest, BackwardIterationOnInplaceUpdateMemtable) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.inplace_update_support = false; - options.env = env_; - DestroyAndReopen(options); - constexpr int kNumKeys = 10; - - // Write kNumKeys to WAL. - for (int i = 0; i < kNumKeys; ++i) { - ASSERT_OK(Put(Key(i), "val")); - } - ReadOptions read_opts; - read_opts.total_order_seek = true; - { - std::unique_ptr iter(db_->NewIterator(read_opts)); - int count = 0; - for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { - ++count; - } - ASSERT_EQ(kNumKeys, count); - } - - // Reopen and rebuild the memtable from WAL. - options.create_if_missing = false; - options.avoid_flush_during_recovery = true; - options.inplace_update_support = true; - options.allow_concurrent_memtable_write = false; - Reopen(options); - { - std::unique_ptr iter(db_->NewIterator(read_opts)); - iter->SeekToLast(); - // Backward iteration not supported due to inplace_update_support = true. - ASSERT_TRUE(iter->status().IsNotSupported()); - ASSERT_FALSE(iter->Valid()); - } -} - -TEST_F(DBIteratorTest, IteratorRefreshReturnSV) { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - DestroyAndReopen(options); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); - std::unique_ptr iter{db_->NewIterator(ReadOptions())}; - SyncPoint::GetInstance()->SetCallBack( - "ArenaWrappedDBIter::Refresh:SV", [&](void*) { - ASSERT_OK(db_->Put(WriteOptions(), "dummy", "new SV")); - // This makes the local SV obselete. - ASSERT_OK(Flush()); - SyncPoint::GetInstance()->DisableProcessing(); - }); - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(iter->Refresh()); - iter.reset(); - // iter used to not cleanup SV, so the Close() below would hit an assertion - // error. - Close(); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_kv_checksum_test.cc b/db/db_kv_checksum_test.cc deleted file mode 100644 index 614399243..000000000 --- a/db/db_kv_checksum_test.cc +++ /dev/null @@ -1,885 +0,0 @@ -// Copyright (c) 2020-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db/blob/blob_index.h" -#include "db/db_test_util.h" -#include "rocksdb/rocksdb_namespace.h" - -namespace ROCKSDB_NAMESPACE { - -enum class WriteBatchOpType { - kPut = 0, - kDelete, - kSingleDelete, - kMerge, - kPutEntity, - kDeleteRange, - kNum, -}; - -// Integer addition is needed for `::testing::Range()` to take the enum type. -WriteBatchOpType operator+(WriteBatchOpType lhs, const int rhs) { - using T = std::underlying_type::type; - return static_cast(static_cast(lhs) + rhs); -} - -enum class WriteMode { - // `Write()` a `WriteBatch` constructed with `protection_bytes_per_key = 0` - // and `WriteOptions::protection_bytes_per_key = 0` - kWriteUnprotectedBatch = 0, - // `Write()` a `WriteBatch` constructed with `protection_bytes_per_key > 0`. - kWriteProtectedBatch, - // `Write()` a `WriteBatch` constructed with `protection_bytes_per_key == 0`. - // Protection is enabled via `WriteOptions::protection_bytes_per_key > 0`. - kWriteOptionProtectedBatch, - // TODO(ajkr): add a mode that uses `Write()` wrappers, e.g., `Put()`. - kNum, -}; - -// Integer addition is needed for `::testing::Range()` to take the enum type. -WriteMode operator+(WriteMode lhs, const int rhs) { - using T = std::underlying_type::type; - return static_cast(static_cast(lhs) + rhs); -} - -std::pair GetWriteBatch(ColumnFamilyHandle* cf_handle, - size_t protection_bytes_per_key, - WriteBatchOpType op_type) { - Status s; - WriteBatch wb(0 /* reserved_bytes */, 0 /* max_bytes */, - protection_bytes_per_key, 0 /* default_cf_ts_sz */); - switch (op_type) { - case WriteBatchOpType::kPut: - s = wb.Put(cf_handle, "key", "val"); - break; - case WriteBatchOpType::kDelete: - s = wb.Delete(cf_handle, "key"); - break; - case WriteBatchOpType::kSingleDelete: - s = wb.SingleDelete(cf_handle, "key"); - break; - case WriteBatchOpType::kDeleteRange: - s = wb.DeleteRange(cf_handle, "begin", "end"); - break; - case WriteBatchOpType::kMerge: - s = wb.Merge(cf_handle, "key", "val"); - break; - case WriteBatchOpType::kPutEntity: - s = wb.PutEntity(cf_handle, "key", - {{"attr_name1", "foo"}, {"attr_name2", "bar"}}); - break; - case WriteBatchOpType::kNum: - assert(false); - } - return {std::move(wb), std::move(s)}; -} - -class DbKvChecksumTestBase : public DBTestBase { - public: - DbKvChecksumTestBase(const std::string& path, bool env_do_fsync) - : DBTestBase(path, env_do_fsync) {} - - ColumnFamilyHandle* GetCFHandleToUse(ColumnFamilyHandle* column_family, - WriteBatchOpType op_type) const { - // Note: PutEntity cannot be called without column family - if (op_type == WriteBatchOpType::kPutEntity && !column_family) { - return db_->DefaultColumnFamily(); - } - - return column_family; - } -}; - -class DbKvChecksumTest - : public DbKvChecksumTestBase, - public ::testing::WithParamInterface< - std::tuple> { - public: - DbKvChecksumTest() - : DbKvChecksumTestBase("db_kv_checksum_test", /*env_do_fsync=*/false) { - op_type_ = std::get<0>(GetParam()); - corrupt_byte_addend_ = std::get<1>(GetParam()); - write_mode_ = std::get<2>(GetParam()); - memtable_protection_bytes_per_key_ = std::get<3>(GetParam()); - } - - Status ExecuteWrite(ColumnFamilyHandle* cf_handle) { - switch (write_mode_) { - case WriteMode::kWriteUnprotectedBatch: { - auto batch_and_status = - GetWriteBatch(GetCFHandleToUse(cf_handle, op_type_), - 0 /* protection_bytes_per_key */, op_type_); - assert(batch_and_status.second.ok()); - // Default write option has protection_bytes_per_key = 0 - return db_->Write(WriteOptions(), &batch_and_status.first); - } - case WriteMode::kWriteProtectedBatch: { - auto batch_and_status = - GetWriteBatch(GetCFHandleToUse(cf_handle, op_type_), - 8 /* protection_bytes_per_key */, op_type_); - assert(batch_and_status.second.ok()); - return db_->Write(WriteOptions(), &batch_and_status.first); - } - case WriteMode::kWriteOptionProtectedBatch: { - auto batch_and_status = - GetWriteBatch(GetCFHandleToUse(cf_handle, op_type_), - 0 /* protection_bytes_per_key */, op_type_); - assert(batch_and_status.second.ok()); - WriteOptions write_opts; - write_opts.protection_bytes_per_key = 8; - return db_->Write(write_opts, &batch_and_status.first); - } - case WriteMode::kNum: - assert(false); - } - return Status::NotSupported("WriteMode " + - std::to_string(static_cast(write_mode_))); - } - - void CorruptNextByteCallBack(void* arg) { - Slice encoded = *static_cast(arg); - if (entry_len_ == std::numeric_limits::max()) { - // We learn the entry size on the first attempt - entry_len_ = encoded.size(); - } - char* buf = const_cast(encoded.data()); - buf[corrupt_byte_offset_] += corrupt_byte_addend_; - ++corrupt_byte_offset_; - } - - bool MoreBytesToCorrupt() { return corrupt_byte_offset_ < entry_len_; } - - protected: - WriteBatchOpType op_type_; - char corrupt_byte_addend_; - WriteMode write_mode_; - uint32_t memtable_protection_bytes_per_key_; - size_t corrupt_byte_offset_ = 0; - size_t entry_len_ = std::numeric_limits::max(); -}; - -std::string GetOpTypeString(const WriteBatchOpType& op_type) { - switch (op_type) { - case WriteBatchOpType::kPut: - return "Put"; - case WriteBatchOpType::kDelete: - return "Delete"; - case WriteBatchOpType::kSingleDelete: - return "SingleDelete"; - case WriteBatchOpType::kDeleteRange: - return "DeleteRange"; - case WriteBatchOpType::kMerge: - return "Merge"; - case WriteBatchOpType::kPutEntity: - return "PutEntity"; - case WriteBatchOpType::kNum: - assert(false); - } - assert(false); - return ""; -} - -std::string GetWriteModeString(const WriteMode& mode) { - switch (mode) { - case WriteMode::kWriteUnprotectedBatch: - return "WriteUnprotectedBatch"; - case WriteMode::kWriteProtectedBatch: - return "WriteProtectedBatch"; - case WriteMode::kWriteOptionProtectedBatch: - return "kWriteOptionProtectedBatch"; - case WriteMode::kNum: - assert(false); - } - return ""; -} - -INSTANTIATE_TEST_CASE_P( - DbKvChecksumTest, DbKvChecksumTest, - ::testing::Combine(::testing::Range(static_cast(0), - WriteBatchOpType::kNum), - ::testing::Values(2, 103, 251), - ::testing::Range(WriteMode::kWriteProtectedBatch, - WriteMode::kNum), - ::testing::Values(0)), - [](const testing::TestParamInfo< - std::tuple>& args) { - std::ostringstream oss; - oss << GetOpTypeString(std::get<0>(args.param)) << "Add" - << static_cast( - static_cast(std::get<1>(args.param))) - << GetWriteModeString(std::get<2>(args.param)) - << static_cast(std::get<3>(args.param)); - return oss.str(); - }); - -// TODO(ajkr): add a test that corrupts the `WriteBatch` contents. Such -// corruptions should only be detectable in `WriteMode::kWriteProtectedBatch`. - -TEST_P(DbKvChecksumTest, MemTableAddCorrupted) { - // This test repeatedly attempts to write `WriteBatch`es containing a single - // entry of type `op_type_`. Each attempt has one byte corrupted in its - // memtable entry by adding `corrupt_byte_addend_` to its original value. The - // test repeats until an attempt has been made on each byte in the encoded - // memtable entry. All attempts are expected to fail with `Status::Corruption` - SyncPoint::GetInstance()->SetCallBack( - "MemTable::Add:Encoded", - std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this, - std::placeholders::_1)); - - while (MoreBytesToCorrupt()) { - // Failed memtable insert always leads to read-only mode, so we have to - // reopen for every attempt. - Options options = CurrentOptions(); - if (op_type_ == WriteBatchOpType::kMerge) { - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - } - Reopen(options); - - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_TRUE(ExecuteWrite(nullptr /* cf_handle */).IsCorruption()); - SyncPoint::GetInstance()->DisableProcessing(); - - // In case the above callback is not invoked, this test will run - // numeric_limits::max() times until it reports an error (or will - // exhaust disk space). Added this assert to report error early. - ASSERT_TRUE(entry_len_ < std::numeric_limits::max()); - } -} - -TEST_P(DbKvChecksumTest, MemTableAddWithColumnFamilyCorrupted) { - // This test repeatedly attempts to write `WriteBatch`es containing a single - // entry of type `op_type_` to a non-default column family. Each attempt has - // one byte corrupted in its memtable entry by adding `corrupt_byte_addend_` - // to its original value. The test repeats until an attempt has been made on - // each byte in the encoded memtable entry. All attempts are expected to fail - // with `Status::Corruption`. - Options options = CurrentOptions(); - if (op_type_ == WriteBatchOpType::kMerge) { - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - } - CreateAndReopenWithCF({"pikachu"}, options); - SyncPoint::GetInstance()->SetCallBack( - "MemTable::Add:Encoded", - std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this, - std::placeholders::_1)); - - while (MoreBytesToCorrupt()) { - // Failed memtable insert always leads to read-only mode, so we have to - // reopen for every attempt. - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); - - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_TRUE(ExecuteWrite(handles_[1]).IsCorruption()); - SyncPoint::GetInstance()->DisableProcessing(); - - // In case the above callback is not invoked, this test will run - // numeric_limits::max() times until it reports an error (or will - // exhaust disk space). Added this assert to report error early. - ASSERT_TRUE(entry_len_ < std::numeric_limits::max()); - } -} - -TEST_P(DbKvChecksumTest, NoCorruptionCase) { - // If this test fails, we may have found a piece of malfunctioned hardware - auto batch_and_status = - GetWriteBatch(GetCFHandleToUse(nullptr, op_type_), - 8 /* protection_bytes_per_key */, op_type_); - ASSERT_OK(batch_and_status.second); - ASSERT_OK(batch_and_status.first.VerifyChecksum()); -} - -TEST_P(DbKvChecksumTest, WriteToWALCorrupted) { - // This test repeatedly attempts to write `WriteBatch`es containing a single - // entry of type `op_type_`. Each attempt has one byte corrupted by adding - // `corrupt_byte_addend_` to its original value. The test repeats until an - // attempt has been made on each byte in the encoded write batch. All attempts - // are expected to fail with `Status::Corruption` - Options options = CurrentOptions(); - if (op_type_ == WriteBatchOpType::kMerge) { - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - } - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::WriteToWAL:log_entry", - std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this, - std::placeholders::_1)); - // First 8 bytes are for sequence number which is not protected in write batch - corrupt_byte_offset_ = 8; - - while (MoreBytesToCorrupt()) { - // Corrupted write batch leads to read-only mode, so we have to - // reopen for every attempt. - Reopen(options); - auto log_size_pre_write = dbfull()->TEST_total_log_size(); - - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_TRUE(ExecuteWrite(nullptr /* cf_handle */).IsCorruption()); - // Confirm that nothing was written to WAL - ASSERT_EQ(log_size_pre_write, dbfull()->TEST_total_log_size()); - ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption()); - SyncPoint::GetInstance()->DisableProcessing(); - - // In case the above callback is not invoked, this test will run - // numeric_limits::max() times until it reports an error (or will - // exhaust disk space). Added this assert to report error early. - ASSERT_TRUE(entry_len_ < std::numeric_limits::max()); - } -} - -TEST_P(DbKvChecksumTest, WriteToWALWithColumnFamilyCorrupted) { - // This test repeatedly attempts to write `WriteBatch`es containing a single - // entry of type `op_type_`. Each attempt has one byte corrupted by adding - // `corrupt_byte_addend_` to its original value. The test repeats until an - // attempt has been made on each byte in the encoded write batch. All attempts - // are expected to fail with `Status::Corruption` - Options options = CurrentOptions(); - if (op_type_ == WriteBatchOpType::kMerge) { - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - } - CreateAndReopenWithCF({"pikachu"}, options); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::WriteToWAL:log_entry", - std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this, - std::placeholders::_1)); - // First 8 bytes are for sequence number which is not protected in write batch - corrupt_byte_offset_ = 8; - - while (MoreBytesToCorrupt()) { - // Corrupted write batch leads to read-only mode, so we have to - // reopen for every attempt. - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); - auto log_size_pre_write = dbfull()->TEST_total_log_size(); - - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_TRUE(ExecuteWrite(nullptr /* cf_handle */).IsCorruption()); - // Confirm that nothing was written to WAL - ASSERT_EQ(log_size_pre_write, dbfull()->TEST_total_log_size()); - ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption()); - SyncPoint::GetInstance()->DisableProcessing(); - - // In case the above callback is not invoked, this test will run - // numeric_limits::max() times until it reports an error (or will - // exhaust disk space). Added this assert to report error early. - ASSERT_TRUE(entry_len_ < std::numeric_limits::max()); - } -} - -class DbKvChecksumTestMergedBatch - : public DbKvChecksumTestBase, - public ::testing::WithParamInterface< - std::tuple> { - public: - DbKvChecksumTestMergedBatch() - : DbKvChecksumTestBase("db_kv_checksum_test", /*env_do_fsync=*/false) { - op_type1_ = std::get<0>(GetParam()); - op_type2_ = std::get<1>(GetParam()); - corrupt_byte_addend_ = std::get<2>(GetParam()); - } - - protected: - WriteBatchOpType op_type1_; - WriteBatchOpType op_type2_; - char corrupt_byte_addend_; -}; - -void CorruptWriteBatch(Slice* content, size_t offset, - char corrupt_byte_addend) { - ASSERT_TRUE(offset < content->size()); - char* buf = const_cast(content->data()); - buf[offset] += corrupt_byte_addend; -} - -TEST_P(DbKvChecksumTestMergedBatch, NoCorruptionCase) { - // Veirfy write batch checksum after write batch append - auto batch1 = GetWriteBatch(GetCFHandleToUse(nullptr, op_type1_), - 8 /* protection_bytes_per_key */, op_type1_); - ASSERT_OK(batch1.second); - auto batch2 = GetWriteBatch(GetCFHandleToUse(nullptr, op_type2_), - 8 /* protection_bytes_per_key */, op_type2_); - ASSERT_OK(batch2.second); - ASSERT_OK(WriteBatchInternal::Append(&batch1.first, &batch2.first)); - ASSERT_OK(batch1.first.VerifyChecksum()); -} - -TEST_P(DbKvChecksumTestMergedBatch, WriteToWALCorrupted) { - // This test has two writers repeatedly attempt to write `WriteBatch`es - // containing a single entry of type op_type1_ and op_type2_ respectively. The - // leader of the write group writes the batch containinng the entry of type - // op_type1_. One byte of the pre-merged write batches is corrupted by adding - // `corrupt_byte_addend_` to the batch's original value during each attempt. - // The test repeats until an attempt has been made on each byte in both - // pre-merged write batches. All attempts are expected to fail with - // `Status::Corruption`. - Options options = CurrentOptions(); - if (op_type1_ == WriteBatchOpType::kMerge || - op_type2_ == WriteBatchOpType::kMerge) { - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - } - - auto leader_batch_and_status = - GetWriteBatch(GetCFHandleToUse(nullptr, op_type1_), - 8 /* protection_bytes_per_key */, op_type1_); - ASSERT_OK(leader_batch_and_status.second); - auto follower_batch_and_status = - GetWriteBatch(GetCFHandleToUse(nullptr, op_type2_), - 8 /* protection_bytes_per_key */, op_type2_); - size_t leader_batch_size = leader_batch_and_status.first.GetDataSize(); - size_t total_bytes = - leader_batch_size + follower_batch_and_status.first.GetDataSize(); - // First 8 bytes are for sequence number which is not protected in write batch - size_t corrupt_byte_offset = 8; - - std::atomic follower_joined{false}; - std::atomic leader_count{0}; - port::Thread follower_thread; - // This callback should only be called by the leader thread - SyncPoint::GetInstance()->SetCallBack( - "WriteThread::JoinBatchGroup:Wait2", [&](void* arg_leader) { - auto* leader = reinterpret_cast(arg_leader); - ASSERT_EQ(leader->state, WriteThread::STATE_GROUP_LEADER); - - // This callback should only be called by the follower thread - SyncPoint::GetInstance()->SetCallBack( - "WriteThread::JoinBatchGroup:Wait", [&](void* arg_follower) { - auto* follower = - reinterpret_cast(arg_follower); - // The leader thread will wait on this bool and hence wait until - // this writer joins the write group - ASSERT_NE(follower->state, WriteThread::STATE_GROUP_LEADER); - if (corrupt_byte_offset >= leader_batch_size) { - Slice batch_content = follower->batch->Data(); - CorruptWriteBatch(&batch_content, - corrupt_byte_offset - leader_batch_size, - corrupt_byte_addend_); - } - // Leader busy waits on this flag - follower_joined = true; - // So the follower does not enter the outer callback at - // WriteThread::JoinBatchGroup:Wait2 - SyncPoint::GetInstance()->DisableProcessing(); - }); - - // Start the other writer thread which will join the write group as - // follower - follower_thread = port::Thread([&]() { - follower_batch_and_status = - GetWriteBatch(GetCFHandleToUse(nullptr, op_type2_), - 8 /* protection_bytes_per_key */, op_type2_); - ASSERT_OK(follower_batch_and_status.second); - ASSERT_TRUE( - db_->Write(WriteOptions(), &follower_batch_and_status.first) - .IsCorruption()); - }); - - ASSERT_EQ(leader->batch->GetDataSize(), leader_batch_size); - if (corrupt_byte_offset < leader_batch_size) { - Slice batch_content = leader->batch->Data(); - CorruptWriteBatch(&batch_content, corrupt_byte_offset, - corrupt_byte_addend_); - } - leader_count++; - while (!follower_joined) { - // busy waiting - } - }); - while (corrupt_byte_offset < total_bytes) { - // Reopen DB since it failed WAL write which lead to read-only mode - Reopen(options); - SyncPoint::GetInstance()->EnableProcessing(); - auto log_size_pre_write = dbfull()->TEST_total_log_size(); - leader_batch_and_status = - GetWriteBatch(GetCFHandleToUse(nullptr, op_type1_), - 8 /* protection_bytes_per_key */, op_type1_); - ASSERT_OK(leader_batch_and_status.second); - ASSERT_TRUE(db_->Write(WriteOptions(), &leader_batch_and_status.first) - .IsCorruption()); - follower_thread.join(); - // Prevent leader thread from entering this callback - SyncPoint::GetInstance()->ClearCallBack("WriteThread::JoinBatchGroup:Wait"); - ASSERT_EQ(1, leader_count); - // Nothing should have been written to WAL - ASSERT_EQ(log_size_pre_write, dbfull()->TEST_total_log_size()); - ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption()); - - corrupt_byte_offset++; - if (corrupt_byte_offset == leader_batch_size) { - // skip over the sequence number part of follower's write batch - corrupt_byte_offset += 8; - } - follower_joined = false; - leader_count = 0; - } - SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_P(DbKvChecksumTestMergedBatch, WriteToWALWithColumnFamilyCorrupted) { - // This test has two writers repeatedly attempt to write `WriteBatch`es - // containing a single entry of type op_type1_ and op_type2_ respectively. The - // leader of the write group writes the batch containinng the entry of type - // op_type1_. One byte of the pre-merged write batches is corrupted by adding - // `corrupt_byte_addend_` to the batch's original value during each attempt. - // The test repeats until an attempt has been made on each byte in both - // pre-merged write batches. All attempts are expected to fail with - // `Status::Corruption`. - Options options = CurrentOptions(); - if (op_type1_ == WriteBatchOpType::kMerge || - op_type2_ == WriteBatchOpType::kMerge) { - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - } - CreateAndReopenWithCF({"ramen"}, options); - - auto leader_batch_and_status = - GetWriteBatch(GetCFHandleToUse(handles_[1], op_type1_), - 8 /* protection_bytes_per_key */, op_type1_); - ASSERT_OK(leader_batch_and_status.second); - auto follower_batch_and_status = - GetWriteBatch(GetCFHandleToUse(handles_[1], op_type2_), - 8 /* protection_bytes_per_key */, op_type2_); - size_t leader_batch_size = leader_batch_and_status.first.GetDataSize(); - size_t total_bytes = - leader_batch_size + follower_batch_and_status.first.GetDataSize(); - // First 8 bytes are for sequence number which is not protected in write batch - size_t corrupt_byte_offset = 8; - - std::atomic follower_joined{false}; - std::atomic leader_count{0}; - port::Thread follower_thread; - // This callback should only be called by the leader thread - SyncPoint::GetInstance()->SetCallBack( - "WriteThread::JoinBatchGroup:Wait2", [&](void* arg_leader) { - auto* leader = reinterpret_cast(arg_leader); - ASSERT_EQ(leader->state, WriteThread::STATE_GROUP_LEADER); - - // This callback should only be called by the follower thread - SyncPoint::GetInstance()->SetCallBack( - "WriteThread::JoinBatchGroup:Wait", [&](void* arg_follower) { - auto* follower = - reinterpret_cast(arg_follower); - // The leader thread will wait on this bool and hence wait until - // this writer joins the write group - ASSERT_NE(follower->state, WriteThread::STATE_GROUP_LEADER); - if (corrupt_byte_offset >= leader_batch_size) { - Slice batch_content = - WriteBatchInternal::Contents(follower->batch); - CorruptWriteBatch(&batch_content, - corrupt_byte_offset - leader_batch_size, - corrupt_byte_addend_); - } - follower_joined = true; - // So the follower does not enter the outer callback at - // WriteThread::JoinBatchGroup:Wait2 - SyncPoint::GetInstance()->DisableProcessing(); - }); - - // Start the other writer thread which will join the write group as - // follower - follower_thread = port::Thread([&]() { - follower_batch_and_status = - GetWriteBatch(GetCFHandleToUse(handles_[1], op_type2_), - 8 /* protection_bytes_per_key */, op_type2_); - ASSERT_OK(follower_batch_and_status.second); - ASSERT_TRUE( - db_->Write(WriteOptions(), &follower_batch_and_status.first) - .IsCorruption()); - }); - - ASSERT_EQ(leader->batch->GetDataSize(), leader_batch_size); - if (corrupt_byte_offset < leader_batch_size) { - Slice batch_content = WriteBatchInternal::Contents(leader->batch); - CorruptWriteBatch(&batch_content, corrupt_byte_offset, - corrupt_byte_addend_); - } - leader_count++; - while (!follower_joined) { - // busy waiting - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - while (corrupt_byte_offset < total_bytes) { - // Reopen DB since it failed WAL write which lead to read-only mode - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "ramen"}, options); - SyncPoint::GetInstance()->EnableProcessing(); - auto log_size_pre_write = dbfull()->TEST_total_log_size(); - leader_batch_and_status = - GetWriteBatch(GetCFHandleToUse(handles_[1], op_type1_), - 8 /* protection_bytes_per_key */, op_type1_); - ASSERT_OK(leader_batch_and_status.second); - ASSERT_TRUE(db_->Write(WriteOptions(), &leader_batch_and_status.first) - .IsCorruption()); - follower_thread.join(); - // Prevent leader thread from entering this callback - SyncPoint::GetInstance()->ClearCallBack("WriteThread::JoinBatchGroup:Wait"); - - ASSERT_EQ(1, leader_count); - // Nothing should have been written to WAL - ASSERT_EQ(log_size_pre_write, dbfull()->TEST_total_log_size()); - ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption()); - - corrupt_byte_offset++; - if (corrupt_byte_offset == leader_batch_size) { - // skip over the sequence number part of follower's write batch - corrupt_byte_offset += 8; - } - follower_joined = false; - leader_count = 0; - } - SyncPoint::GetInstance()->DisableProcessing(); -} - -INSTANTIATE_TEST_CASE_P( - DbKvChecksumTestMergedBatch, DbKvChecksumTestMergedBatch, - ::testing::Combine(::testing::Range(static_cast(0), - WriteBatchOpType::kNum), - ::testing::Range(static_cast(0), - WriteBatchOpType::kNum), - ::testing::Values(2, 103, 251)), - [](const testing::TestParamInfo< - std::tuple>& args) { - std::ostringstream oss; - oss << GetOpTypeString(std::get<0>(args.param)) - << GetOpTypeString(std::get<1>(args.param)) << "Add" - << static_cast( - static_cast(std::get<2>(args.param))); - return oss.str(); - }); - -// TODO: add test for transactions -// TODO: add test for corrupted write batch with WAL disabled - -class DbKVChecksumWALToWriteBatchTest : public DBTestBase { - public: - DbKVChecksumWALToWriteBatchTest() - : DBTestBase("db_kv_checksum_test", /*env_do_fsync=*/false) {} -}; - -TEST_F(DbKVChecksumWALToWriteBatchTest, WriteBatchChecksumHandoff) { - Options options = CurrentOptions(); - Reopen(options); - ASSERT_OK(db_->Put(WriteOptions(), "key", "val")); - std::string content = ""; - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:batch", - [&](void* batch_ptr) { - WriteBatch* batch = reinterpret_cast(batch_ptr); - content.assign(batch->Data().data(), batch->GetDataSize()); - Slice batch_content = batch->Data(); - // Corrupt first bit - CorruptWriteBatch(&batch_content, 0, 1); - }); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:checksum", - [&](void* checksum_ptr) { - // Verify that checksum is produced on the batch content - uint64_t checksum = *reinterpret_cast(checksum_ptr); - ASSERT_EQ(checksum, XXH3_64bits(content.data(), content.size())); - }); - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_TRUE(TryReopen(options).IsCorruption()); - SyncPoint::GetInstance()->DisableProcessing(); -}; - -// TODO (cbi): add DeleteRange coverage once it is implemented -class DbMemtableKVChecksumTest : public DbKvChecksumTest { - public: - DbMemtableKVChecksumTest() : DbKvChecksumTest() {} - - protected: - // Indices in the memtable entry that we will not corrupt. - // For memtable entry format, see comments in MemTable::Add(). - // We do not corrupt key length and value length fields in this test - // case since it causes segfault and ASAN will complain. - // For this test case, key and value are all of length 3, so - // key length field is at index 0 and value length field is at index 12. - const std::set index_not_to_corrupt{0, 12}; - - void SkipNotToCorruptEntry() { - if (index_not_to_corrupt.find(corrupt_byte_offset_) != - index_not_to_corrupt.end()) { - corrupt_byte_offset_++; - } - } -}; - -INSTANTIATE_TEST_CASE_P( - DbMemtableKVChecksumTest, DbMemtableKVChecksumTest, - ::testing::Combine(::testing::Range(static_cast(0), - WriteBatchOpType::kDeleteRange), - ::testing::Values(2, 103, 251), - ::testing::Range(static_cast(0), - WriteMode::kWriteOptionProtectedBatch), - // skip 1 byte checksum as it makes test flaky - ::testing::Values(2, 4, 8)), - [](const testing::TestParamInfo< - std::tuple>& args) { - std::ostringstream oss; - oss << GetOpTypeString(std::get<0>(args.param)) << "Add" - << static_cast( - static_cast(std::get<1>(args.param))) - << GetWriteModeString(std::get<2>(args.param)) - << static_cast(std::get<3>(args.param)); - return oss.str(); - }); - -TEST_P(DbMemtableKVChecksumTest, GetWithCorruptAfterMemtableInsert) { - // Record memtable entry size. - // Not corrupting memtable entry here since it will segfault - // or fail some asserts inside memtablerep implementation - // e.g., when key_len is corrupted. - SyncPoint::GetInstance()->SetCallBack( - "MemTable::Add:BeforeReturn:Encoded", [&](void* arg) { - Slice encoded = *static_cast(arg); - entry_len_ = encoded.size(); - }); - - SyncPoint::GetInstance()->SetCallBack( - "Memtable::SaveValue:Begin:entry", [&](void* entry) { - char* buf = *static_cast(entry); - buf[corrupt_byte_offset_] += corrupt_byte_addend_; - ++corrupt_byte_offset_; - }); - SyncPoint::GetInstance()->EnableProcessing(); - Options options = CurrentOptions(); - options.memtable_protection_bytes_per_key = - memtable_protection_bytes_per_key_; - if (op_type_ == WriteBatchOpType::kMerge) { - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - } - - SkipNotToCorruptEntry(); - while (MoreBytesToCorrupt()) { - Reopen(options); - ASSERT_OK(ExecuteWrite(nullptr)); - std::string val; - ASSERT_TRUE(db_->Get(ReadOptions(), "key", &val).IsCorruption()); - Destroy(options); - SkipNotToCorruptEntry(); - } -} - -TEST_P(DbMemtableKVChecksumTest, - GetWithColumnFamilyCorruptAfterMemtableInsert) { - // Record memtable entry size. - // Not corrupting memtable entry here since it will segfault - // or fail some asserts inside memtablerep implementation - // e.g., when key_len is corrupted. - SyncPoint::GetInstance()->SetCallBack( - "MemTable::Add:BeforeReturn:Encoded", [&](void* arg) { - Slice encoded = *static_cast(arg); - entry_len_ = encoded.size(); - }); - - SyncPoint::GetInstance()->SetCallBack( - "Memtable::SaveValue:Begin:entry", [&](void* entry) { - char* buf = *static_cast(entry); - buf[corrupt_byte_offset_] += corrupt_byte_addend_; - ++corrupt_byte_offset_; - }); - SyncPoint::GetInstance()->EnableProcessing(); - Options options = CurrentOptions(); - options.memtable_protection_bytes_per_key = - memtable_protection_bytes_per_key_; - if (op_type_ == WriteBatchOpType::kMerge) { - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - } - - SkipNotToCorruptEntry(); - while (MoreBytesToCorrupt()) { - Reopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_OK(ExecuteWrite(handles_[1])); - std::string val; - ASSERT_TRUE( - db_->Get(ReadOptions(), handles_[1], "key", &val).IsCorruption()); - Destroy(options); - SkipNotToCorruptEntry(); - } -} - -TEST_P(DbMemtableKVChecksumTest, IteratorWithCorruptAfterMemtableInsert) { - SyncPoint::GetInstance()->SetCallBack( - "MemTable::Add:BeforeReturn:Encoded", - std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this, - std::placeholders::_1)); - SyncPoint::GetInstance()->EnableProcessing(); - Options options = CurrentOptions(); - options.memtable_protection_bytes_per_key = - memtable_protection_bytes_per_key_; - if (op_type_ == WriteBatchOpType::kMerge) { - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - } - - SkipNotToCorruptEntry(); - while (MoreBytesToCorrupt()) { - Reopen(options); - ASSERT_OK(ExecuteWrite(nullptr)); - Iterator* it = db_->NewIterator(ReadOptions()); - it->SeekToFirst(); - ASSERT_FALSE(it->Valid()); - ASSERT_TRUE(it->status().IsCorruption()); - delete it; - Destroy(options); - SkipNotToCorruptEntry(); - } -} - -TEST_P(DbMemtableKVChecksumTest, - IteratorWithColumnFamilyCorruptAfterMemtableInsert) { - SyncPoint::GetInstance()->SetCallBack( - "MemTable::Add:BeforeReturn:Encoded", - std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this, - std::placeholders::_1)); - SyncPoint::GetInstance()->EnableProcessing(); - Options options = CurrentOptions(); - options.memtable_protection_bytes_per_key = - memtable_protection_bytes_per_key_; - if (op_type_ == WriteBatchOpType::kMerge) { - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - } - - SkipNotToCorruptEntry(); - while (MoreBytesToCorrupt()) { - Reopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_OK(ExecuteWrite(handles_[1])); - Iterator* it = db_->NewIterator(ReadOptions(), handles_[1]); - it->SeekToFirst(); - ASSERT_FALSE(it->Valid()); - ASSERT_TRUE(it->status().IsCorruption()); - delete it; - Destroy(options); - SkipNotToCorruptEntry(); - } -} - -TEST_P(DbMemtableKVChecksumTest, FlushWithCorruptAfterMemtableInsert) { - SyncPoint::GetInstance()->SetCallBack( - "MemTable::Add:BeforeReturn:Encoded", - std::bind(&DbKvChecksumTest::CorruptNextByteCallBack, this, - std::placeholders::_1)); - SyncPoint::GetInstance()->EnableProcessing(); - Options options = CurrentOptions(); - options.memtable_protection_bytes_per_key = - memtable_protection_bytes_per_key_; - if (op_type_ == WriteBatchOpType::kMerge) { - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - } - - SkipNotToCorruptEntry(); - // Not corruping each byte like other tests since Flush() is relatively slow. - Reopen(options); - ASSERT_OK(ExecuteWrite(nullptr)); - ASSERT_TRUE(Flush().IsCorruption()); - // DB enters read-only state when flush reads corrupted data - ASSERT_TRUE(dbfull()->TEST_GetBGError().IsCorruption()); - Destroy(options); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_log_iter_test.cc b/db/db_log_iter_test.cc deleted file mode 100644 index 4c9434586..000000000 --- a/db/db_log_iter_test.cc +++ /dev/null @@ -1,297 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -// Introduction of SyncPoint effectively disabled building and running this test -// in Release build. -// which is a pity, it is a good test - -#include "db/db_test_util.h" -#include "env/mock_env.h" -#include "port/stack_trace.h" - -namespace ROCKSDB_NAMESPACE { - -class DBTestXactLogIterator : public DBTestBase { - public: - DBTestXactLogIterator() - : DBTestBase("db_log_iter_test", /*env_do_fsync=*/true) {} - - std::unique_ptr OpenTransactionLogIter( - const SequenceNumber seq) { - std::unique_ptr iter; - Status status = dbfull()->GetUpdatesSince(seq, &iter); - EXPECT_OK(status); - EXPECT_TRUE(iter->Valid()); - return iter; - } -}; - -namespace { -SequenceNumber ReadRecords(std::unique_ptr& iter, - int& count, bool expect_ok = true) { - count = 0; - SequenceNumber lastSequence = 0; - BatchResult res; - while (iter->Valid()) { - res = iter->GetBatch(); - EXPECT_TRUE(res.sequence > lastSequence); - ++count; - lastSequence = res.sequence; - EXPECT_OK(iter->status()); - iter->Next(); - } - if (expect_ok) { - EXPECT_OK(iter->status()); - } else { - EXPECT_NOK(iter->status()); - } - return res.sequence; -} - -void ExpectRecords(const int expected_no_records, - std::unique_ptr& iter) { - int num_records; - ReadRecords(iter, num_records); - ASSERT_EQ(num_records, expected_no_records); -} -} // anonymous namespace - -TEST_F(DBTestXactLogIterator, TransactionLogIterator) { - do { - Options options = OptionsForLogIterTest(); - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_OK(Put(0, "key1", DummyString(1024))); - ASSERT_OK(Put(1, "key2", DummyString(1024))); - ASSERT_OK(Put(1, "key2", DummyString(1024))); - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3U); - { - auto iter = OpenTransactionLogIter(0); - ExpectRecords(3, iter); - } - ReopenWithColumnFamilies({"default", "pikachu"}, options); - env_->SleepForMicroseconds(2 * 1000 * 1000); - { - ASSERT_OK(Put(0, "key4", DummyString(1024))); - ASSERT_OK(Put(1, "key5", DummyString(1024))); - ASSERT_OK(Put(0, "key6", DummyString(1024))); - } - { - auto iter = OpenTransactionLogIter(0); - ExpectRecords(6, iter); - } - } while (ChangeCompactOptions()); -} - -#ifndef NDEBUG // sync point is not included with DNDEBUG build -TEST_F(DBTestXactLogIterator, TransactionLogIteratorRace) { - static const int LOG_ITERATOR_RACE_TEST_COUNT = 2; - static const char* sync_points[LOG_ITERATOR_RACE_TEST_COUNT][4] = { - {"WalManager::GetSortedWalFiles:1", "WalManager::PurgeObsoleteFiles:1", - "WalManager::PurgeObsoleteFiles:2", "WalManager::GetSortedWalFiles:2"}, - {"WalManager::GetSortedWalsOfType:1", "WalManager::PurgeObsoleteFiles:1", - "WalManager::PurgeObsoleteFiles:2", - "WalManager::GetSortedWalsOfType:2"}}; - for (int test = 0; test < LOG_ITERATOR_RACE_TEST_COUNT; ++test) { - // Setup sync point dependency to reproduce the race condition of - // a log file moved to archived dir, in the middle of GetSortedWalFiles - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {sync_points[test][0], sync_points[test][1]}, - {sync_points[test][2], sync_points[test][3]}, - }); - - do { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - Options options = OptionsForLogIterTest(); - DestroyAndReopen(options); - ASSERT_OK(Put("key1", DummyString(1024))); - ASSERT_OK(dbfull()->Flush(FlushOptions())); - ASSERT_OK(Put("key2", DummyString(1024))); - ASSERT_OK(dbfull()->Flush(FlushOptions())); - ASSERT_OK(Put("key3", DummyString(1024))); - ASSERT_OK(dbfull()->Flush(FlushOptions())); - ASSERT_OK(Put("key4", DummyString(1024))); - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4U); - ASSERT_OK(dbfull()->FlushWAL(false)); - - { - auto iter = OpenTransactionLogIter(0); - ExpectRecords(4, iter); - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - // trigger async flush, and log move. Well, log move will - // wait until the GetSortedWalFiles:1 to reproduce the race - // condition - FlushOptions flush_options; - flush_options.wait = false; - ASSERT_OK(dbfull()->Flush(flush_options)); - - // "key5" would be written in a new memtable and log - ASSERT_OK(Put("key5", DummyString(1024))); - ASSERT_OK(dbfull()->FlushWAL(false)); - { - // this iter would miss "key4" if not fixed - auto iter = OpenTransactionLogIter(0); - ExpectRecords(5, iter); - } - } while (ChangeCompactOptions()); - } -} -#endif - -TEST_F(DBTestXactLogIterator, TransactionLogIteratorStallAtLastRecord) { - do { - Options options = OptionsForLogIterTest(); - DestroyAndReopen(options); - ASSERT_OK(Put("key1", DummyString(1024))); - auto iter = OpenTransactionLogIter(0); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(!iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_OK(Put("key2", DummyString(1024))); - iter->Next(); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTestXactLogIterator, TransactionLogIteratorCheckAfterRestart) { - do { - Options options = OptionsForLogIterTest(); - DestroyAndReopen(options); - ASSERT_OK(Put("key1", DummyString(1024))); - ASSERT_OK(Put("key2", DummyString(1023))); - ASSERT_OK(dbfull()->Flush(FlushOptions())); - Reopen(options); - auto iter = OpenTransactionLogIter(0); - ExpectRecords(2, iter); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTestXactLogIterator, TransactionLogIteratorCorruptedLog) { - do { - Options options = OptionsForLogIterTest(); - DestroyAndReopen(options); - - for (int i = 0; i < 1024; i++) { - ASSERT_OK(Put("key" + std::to_string(i), DummyString(10))); - } - - ASSERT_OK(Flush()); - ASSERT_OK(db_->FlushWAL(false)); - - // Corrupt this log to create a gap - ASSERT_OK(db_->DisableFileDeletions()); - - VectorLogPtr wal_files; - ASSERT_OK(db_->GetSortedWalFiles(wal_files)); - ASSERT_FALSE(wal_files.empty()); - - const auto logfile_path = dbname_ + "/" + wal_files.front()->PathName(); - ASSERT_OK(test::TruncateFile(env_, logfile_path, - wal_files.front()->SizeFileBytes() / 2)); - - ASSERT_OK(db_->EnableFileDeletions()); - - // Insert a new entry to a new log file - ASSERT_OK(Put("key1025", DummyString(10))); - ASSERT_OK(db_->FlushWAL(false)); - - // Try to read from the beginning. Should stop before the gap and read less - // than 1025 entries - auto iter = OpenTransactionLogIter(0); - int count = 0; - SequenceNumber last_sequence_read = ReadRecords(iter, count, false); - ASSERT_LT(last_sequence_read, 1025U); - - // Try to read past the gap, should be able to seek to key1025 - auto iter2 = OpenTransactionLogIter(last_sequence_read + 1); - ExpectRecords(1, iter2); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTestXactLogIterator, TransactionLogIteratorBatchOperations) { - do { - Options options = OptionsForLogIterTest(); - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - WriteBatch batch; - ASSERT_OK(batch.Put(handles_[1], "key1", DummyString(1024))); - ASSERT_OK(batch.Put(handles_[0], "key2", DummyString(1024))); - ASSERT_OK(batch.Put(handles_[1], "key3", DummyString(1024))); - ASSERT_OK(batch.Delete(handles_[0], "key2")); - ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); - ASSERT_OK(Flush(1)); - ASSERT_OK(Flush(0)); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_OK(Put(1, "key4", DummyString(1024))); - auto iter = OpenTransactionLogIter(3); - ExpectRecords(2, iter); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTestXactLogIterator, TransactionLogIteratorBlobs) { - Options options = OptionsForLogIterTest(); - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - { - WriteBatch batch; - ASSERT_OK(batch.Put(handles_[1], "key1", DummyString(1024))); - ASSERT_OK(batch.Put(handles_[0], "key2", DummyString(1024))); - ASSERT_OK(batch.PutLogData(Slice("blob1"))); - ASSERT_OK(batch.Put(handles_[1], "key3", DummyString(1024))); - ASSERT_OK(batch.PutLogData(Slice("blob2"))); - ASSERT_OK(batch.Delete(handles_[0], "key2")); - ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - } - - auto res = OpenTransactionLogIter(0)->GetBatch(); - struct Handler : public WriteBatch::Handler { - std::string seen; - Status PutCF(uint32_t cf, const Slice& key, const Slice& value) override { - seen += "Put(" + std::to_string(cf) + ", " + key.ToString() + ", " + - std::to_string(value.size()) + ")"; - return Status::OK(); - } - Status MergeCF(uint32_t cf, const Slice& key, const Slice& value) override { - seen += "Merge(" + std::to_string(cf) + ", " + key.ToString() + ", " + - std::to_string(value.size()) + ")"; - return Status::OK(); - } - void LogData(const Slice& blob) override { - seen += "LogData(" + blob.ToString() + ")"; - } - Status DeleteCF(uint32_t cf, const Slice& key) override { - seen += "Delete(" + std::to_string(cf) + ", " + key.ToString() + ")"; - return Status::OK(); - } - } handler; - ASSERT_OK(res.writeBatchPtr->Iterate(&handler)); - ASSERT_EQ( - "Put(1, key1, 1024)" - "Put(0, key2, 1024)" - "LogData(blob1)" - "Put(1, key3, 1024)" - "LogData(blob2)" - "Delete(0, key2)", - handler.seen); -} -} // namespace ROCKSDB_NAMESPACE - - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_logical_block_size_cache_test.cc b/db/db_logical_block_size_cache_test.cc deleted file mode 100644 index ff56d56e3..000000000 --- a/db/db_logical_block_size_cache_test.cc +++ /dev/null @@ -1,505 +0,0 @@ -// Copyright (c) 2020-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "test_util/testharness.h" - -#ifdef OS_LINUX -#include "env/io_posix.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" - -namespace ROCKSDB_NAMESPACE { -class EnvWithCustomLogicalBlockSizeCache : public EnvWrapper { - public: - EnvWithCustomLogicalBlockSizeCache(Env* env, LogicalBlockSizeCache* cache) - : EnvWrapper(env), cache_(cache) {} - - Status RegisterDbPaths(const std::vector& paths) override { - return cache_->RefAndCacheLogicalBlockSize(paths); - } - - Status UnregisterDbPaths(const std::vector& paths) override { - cache_->UnrefAndTryRemoveCachedLogicalBlockSize(paths); - return Status::OK(); - } - - private: - LogicalBlockSizeCache* cache_; -}; - -class DBLogicalBlockSizeCacheTest : public testing::Test { - public: - DBLogicalBlockSizeCacheTest() - : dbname_(test::PerThreadDBPath("logical_block_size_cache_test")), - data_path_0_(dbname_ + "/data_path_0"), - data_path_1_(dbname_ + "/data_path_1"), - cf_path_0_(dbname_ + "/cf_path_0"), - cf_path_1_(dbname_ + "/cf_path_1") { - auto get_fd_block_size = [&](int fd) { return fd; }; - auto get_dir_block_size = [&](const std::string& /*dir*/, size_t* size) { - *size = 1024; - return Status::OK(); - }; - cache_.reset( - new LogicalBlockSizeCache(get_fd_block_size, get_dir_block_size)); - env_.reset( - new EnvWithCustomLogicalBlockSizeCache(Env::Default(), cache_.get())); - } - - protected: - std::string dbname_; - std::string data_path_0_; - std::string data_path_1_; - std::string cf_path_0_; - std::string cf_path_1_; - std::unique_ptr cache_; - std::unique_ptr env_; -}; - -TEST_F(DBLogicalBlockSizeCacheTest, OpenClose) { - // Tests that Open will cache the logical block size for data paths, - // and Close will remove the cached sizes. - Options options; - options.create_if_missing = true; - options.env = env_.get(); - options.db_paths = {{data_path_0_, 2048}, {data_path_1_, 2048}}; - - for (int i = 0; i < 2; i++) { - DB* db; - if (!i) { - printf("Open\n"); - ASSERT_OK(DB::Open(options, dbname_, &db)); - } else { - printf("OpenForReadOnly\n"); - ASSERT_OK(DB::OpenForReadOnly(options, dbname_, &db)); - } - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(data_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(data_path_0_)); - ASSERT_TRUE(cache_->Contains(data_path_1_)); - ASSERT_EQ(1, cache_->GetRefCount(data_path_1_)); - ASSERT_OK(db->Close()); - ASSERT_EQ(0, cache_->Size()); - delete db; - } - ASSERT_OK(DestroyDB(dbname_, options, {})); -} - -TEST_F(DBLogicalBlockSizeCacheTest, OpenDelete) { - // Tests that Open will cache the logical block size for data paths, - // and delete the db pointer will remove the cached sizes. - Options options; - options.create_if_missing = true; - options.env = env_.get(); - - for (int i = 0; i < 2; i++) { - DB* db; - if (!i) { - printf("Open\n"); - ASSERT_OK(DB::Open(options, dbname_, &db)); - } else { - printf("OpenForReadOnly\n"); - ASSERT_OK(DB::OpenForReadOnly(options, dbname_, &db)); - } - ASSERT_EQ(1, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - delete db; - ASSERT_EQ(0, cache_->Size()); - } - ASSERT_OK(DestroyDB(dbname_, options, {})); -} - -TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamily) { - // Tests that CreateColumnFamily will cache the cf_paths, - // drop the column family handle won't drop the cache, - // drop and then delete the column family handle will drop the cache. - Options options; - options.create_if_missing = true; - options.env = env_.get(); - ColumnFamilyOptions cf_options; - cf_options.cf_paths = {{cf_path_0_, 1024}, {cf_path_1_, 2048}}; - - DB* db; - ASSERT_OK(DB::Open(options, dbname_, &db)); - ASSERT_EQ(1, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - - ColumnFamilyHandle* cf = nullptr; - ASSERT_OK(db->CreateColumnFamily(cf_options, "cf", &cf)); - ASSERT_EQ(3, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); - ASSERT_TRUE(cache_->Contains(cf_path_1_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_1_)); - - // Drop column family does not drop cache. - ASSERT_OK(db->DropColumnFamily(cf)); - ASSERT_EQ(3, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); - ASSERT_TRUE(cache_->Contains(cf_path_1_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_1_)); - - // Delete handle will drop cache. - ASSERT_OK(db->DestroyColumnFamilyHandle(cf)); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - - delete db; - ASSERT_EQ(0, cache_->Size()); - ASSERT_OK(DestroyDB(dbname_, options, {{"cf", cf_options}})); -} - -TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamilies) { - // To test: - // (1) CreateColumnFamilies will cache the cf_paths in - // DBLogicalBlockSizeCache - // (2) Dropping column family handles associated with - // that cf_paths won't drop the cached cf_paths - // (3) Deleting all the column family handles associated - // with that cf_paths will drop the cached cf_paths - - Options options; - options.create_if_missing = true; - options.env = env_.get(); - ColumnFamilyOptions cf_options; - cf_options.cf_paths = {{cf_path_0_, 1024}}; - - DB* db; - ASSERT_OK(DB::Open(options, dbname_, &db)); - ASSERT_EQ(1, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - - std::vector cfs; - ASSERT_OK(db->CreateColumnFamilies(cf_options, {"cf1", "cf2"}, &cfs)); - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_)); - - // Drop column family does not drop cf_path_0_'s entry from cache - for (ColumnFamilyHandle* cf : cfs) { - ASSERT_OK(db->DropColumnFamily(cf)); - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_)); - } - - // Delete one cf handle will not drop cf_path_0_'s entry from cache because - // another handle is still referencing cf_path_0_. - ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[0])); - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - - // Delete all cf handles and ensure the ref count of cf_path_0_ in cache_ - // can be properly decreased by releasing any background reference to the - // ColumnFamilyData during db deletion - ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[1])); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - delete db; - - // Now cf_path_0_ in cache_ has been properly decreased and cf_path_0_'s entry - // is dropped from cache - ASSERT_EQ(0, cache_->Size()); - ASSERT_OK( - DestroyDB(dbname_, options, {{"cf1", cf_options}, {"cf2", cf_options}})); -} - -TEST_F(DBLogicalBlockSizeCacheTest, OpenWithColumnFamilies) { - // Tests that Open two column families with the same cf_path will cache the - // cf_path and have 2 references to the cached size, - // drop the column family handle won't drop the cache, - // drop and then delete the column family handle will drop the cache. - Options options; - options.create_if_missing = true; - options.env = env_.get(); - - ColumnFamilyOptions cf_options; - cf_options.cf_paths = {{cf_path_0_, 1024}}; - - for (int i = 0; i < 2; i++) { - DB* db; - ColumnFamilyHandle* cf1 = nullptr; - ColumnFamilyHandle* cf2 = nullptr; - ASSERT_OK(DB::Open(options, dbname_, &db)); - ASSERT_OK(db->CreateColumnFamily(cf_options, "cf1", &cf1)); - ASSERT_OK(db->CreateColumnFamily(cf_options, "cf2", &cf2)); - ASSERT_OK(db->DestroyColumnFamilyHandle(cf1)); - ASSERT_OK(db->DestroyColumnFamilyHandle(cf2)); - delete db; - ASSERT_EQ(0, cache_->Size()); - - std::vector cfs; - if (!i) { - printf("Open\n"); - ASSERT_OK(DB::Open(options, dbname_, - {{"cf1", cf_options}, - {"cf2", cf_options}, - {"default", ColumnFamilyOptions()}}, - &cfs, &db)); - } else { - printf("OpenForReadOnly\n"); - ASSERT_OK(DB::OpenForReadOnly(options, dbname_, - {{"cf1", cf_options}, - {"cf2", cf_options}, - {"default", ColumnFamilyOptions()}}, - &cfs, &db)); - } - - // Logical block sizes of dbname_ and cf_path_0_ are cached during Open. - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_)); - - // Drop handles won't drop the cache. - ASSERT_OK(db->DropColumnFamily(cfs[0])); - ASSERT_OK(db->DropColumnFamily(cfs[1])); - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_)); - - // Delete 1st handle won't drop the cache for cf_path_0_. - ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[0])); - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); - - // Delete 2nd handle will drop the cache for cf_path_0_. - ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[1])); - ASSERT_EQ(1, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - - // Delete the default handle won't affect the cache because db still refers - // to the default CF. - ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[2])); - ASSERT_EQ(1, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - - delete db; - ASSERT_EQ(0, cache_->Size()); - } - ASSERT_OK( - DestroyDB(dbname_, options, {{"cf1", cf_options}, {"cf2", cf_options}})); -} - -TEST_F(DBLogicalBlockSizeCacheTest, DestroyColumnFamilyHandle) { - // Tests that destroy column family without dropping won't drop the cache, - // because compaction and flush might still need to get logical block size - // when opening new files. - Options options; - options.create_if_missing = true; - options.env = env_.get(); - ColumnFamilyOptions cf_options; - cf_options.cf_paths = {{cf_path_0_, 1024}}; - - DB* db; - ASSERT_OK(DB::Open(options, dbname_, &db)); - ASSERT_EQ(1, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - ColumnFamilyHandle* cf = nullptr; - ASSERT_OK(db->CreateColumnFamily(cf_options, "cf", &cf)); - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); - - // Delete handle won't drop cache. - ASSERT_OK(db->DestroyColumnFamilyHandle(cf)); - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); - - delete db; - ASSERT_EQ(0, cache_->Size()); - - // Open with column families. - std::vector cfs; - for (int i = 0; i < 2; i++) { - if (!i) { - printf("Open\n"); - ASSERT_OK(DB::Open( - options, dbname_, - {{"cf", cf_options}, {"default", ColumnFamilyOptions()}}, &cfs, &db)); - } else { - printf("OpenForReadOnly\n"); - ASSERT_OK(DB::OpenForReadOnly( - options, dbname_, - {{"cf", cf_options}, {"default", ColumnFamilyOptions()}}, &cfs, &db)); - } - // cf_path_0_ and dbname_ are cached. - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); - - // Deleting handle won't drop cache. - ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[0])); - ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[1])); - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(dbname_)); - ASSERT_EQ(1, cache_->GetRefCount(dbname_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); - - delete db; - ASSERT_EQ(0, cache_->Size()); - } - ASSERT_OK(DestroyDB(dbname_, options, {{"cf", cf_options}})); -} - -TEST_F(DBLogicalBlockSizeCacheTest, MultiDBWithDifferentPaths) { - // Tests the cache behavior when there are multiple DBs sharing the same env - // with different db_paths and cf_paths. - Options options; - options.create_if_missing = true; - options.env = env_.get(); - - ASSERT_OK(env_->CreateDirIfMissing(dbname_)); - - DB* db0; - ASSERT_OK(DB::Open(options, data_path_0_, &db0)); - ASSERT_EQ(1, cache_->Size()); - ASSERT_TRUE(cache_->Contains(data_path_0_)); - - ColumnFamilyOptions cf_options0; - cf_options0.cf_paths = {{cf_path_0_, 1024}}; - ColumnFamilyHandle* cf0; - ASSERT_OK(db0->CreateColumnFamily(cf_options0, "cf", &cf0)); - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(data_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(data_path_0_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); - - DB* db1; - ASSERT_OK(DB::Open(options, data_path_1_, &db1)); - ASSERT_EQ(3, cache_->Size()); - ASSERT_TRUE(cache_->Contains(data_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(data_path_0_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); - ASSERT_TRUE(cache_->Contains(data_path_1_)); - ASSERT_EQ(1, cache_->GetRefCount(data_path_1_)); - - ColumnFamilyOptions cf_options1; - cf_options1.cf_paths = {{cf_path_1_, 1024}}; - ColumnFamilyHandle* cf1; - ASSERT_OK(db1->CreateColumnFamily(cf_options1, "cf", &cf1)); - ASSERT_EQ(4, cache_->Size()); - ASSERT_TRUE(cache_->Contains(data_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(data_path_0_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); - ASSERT_TRUE(cache_->Contains(data_path_1_)); - ASSERT_EQ(1, cache_->GetRefCount(data_path_1_)); - ASSERT_TRUE(cache_->Contains(cf_path_1_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_1_)); - - ASSERT_OK(db0->DestroyColumnFamilyHandle(cf0)); - delete db0; - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(data_path_1_)); - ASSERT_EQ(1, cache_->GetRefCount(data_path_1_)); - ASSERT_TRUE(cache_->Contains(cf_path_1_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_1_)); - ASSERT_OK(DestroyDB(data_path_0_, options, {{"cf", cf_options0}})); - - ASSERT_OK(db1->DestroyColumnFamilyHandle(cf1)); - delete db1; - ASSERT_EQ(0, cache_->Size()); - ASSERT_OK(DestroyDB(data_path_1_, options, {{"cf", cf_options1}})); -} - -TEST_F(DBLogicalBlockSizeCacheTest, MultiDBWithSamePaths) { - // Tests the cache behavior when there are multiple DBs sharing the same env - // with the same db_paths and cf_paths. - Options options; - options.create_if_missing = true; - options.env = env_.get(); - options.db_paths = {{data_path_0_, 1024}}; - ColumnFamilyOptions cf_options; - cf_options.cf_paths = {{cf_path_0_, 1024}}; - - ASSERT_OK(env_->CreateDirIfMissing(dbname_)); - - DB* db0; - ASSERT_OK(DB::Open(options, dbname_ + "/db0", &db0)); - ASSERT_EQ(1, cache_->Size()); - ASSERT_TRUE(cache_->Contains(data_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(data_path_0_)); - - ColumnFamilyHandle* cf0; - ASSERT_OK(db0->CreateColumnFamily(cf_options, "cf", &cf0)); - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(data_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(data_path_0_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); - - DB* db1; - ASSERT_OK(DB::Open(options, dbname_ + "/db1", &db1)); - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(data_path_0_)); - ASSERT_EQ(2, cache_->GetRefCount(data_path_0_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); - - ColumnFamilyHandle* cf1; - ASSERT_OK(db1->CreateColumnFamily(cf_options, "cf", &cf1)); - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(data_path_0_)); - ASSERT_EQ(2, cache_->GetRefCount(data_path_0_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_)); - - ASSERT_OK(db0->DestroyColumnFamilyHandle(cf0)); - delete db0; - ASSERT_EQ(2, cache_->Size()); - ASSERT_TRUE(cache_->Contains(data_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(data_path_0_)); - ASSERT_TRUE(cache_->Contains(cf_path_0_)); - ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); - ASSERT_OK(DestroyDB(dbname_ + "/db0", options, {{"cf", cf_options}})); - - ASSERT_OK(db1->DestroyColumnFamilyHandle(cf1)); - delete db1; - ASSERT_EQ(0, cache_->Size()); - ASSERT_OK(DestroyDB(dbname_ + "/db1", options, {{"cf", cf_options}})); -} - -} // namespace ROCKSDB_NAMESPACE -#endif // OS_LINUX - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_memtable_test.cc b/db/db_memtable_test.cc deleted file mode 100644 index cae592db3..000000000 --- a/db/db_memtable_test.cc +++ /dev/null @@ -1,344 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include -#include - -#include "db/db_test_util.h" -#include "db/memtable.h" -#include "db/range_del_aggregator.h" -#include "port/stack_trace.h" -#include "rocksdb/memtablerep.h" -#include "rocksdb/slice_transform.h" - -namespace ROCKSDB_NAMESPACE { - -class DBMemTableTest : public DBTestBase { - public: - DBMemTableTest() : DBTestBase("db_memtable_test", /*env_do_fsync=*/true) {} -}; - -class MockMemTableRep : public MemTableRep { - public: - explicit MockMemTableRep(Allocator* allocator, MemTableRep* rep) - : MemTableRep(allocator), rep_(rep), num_insert_with_hint_(0) {} - - KeyHandle Allocate(const size_t len, char** buf) override { - return rep_->Allocate(len, buf); - } - - void Insert(KeyHandle handle) override { rep_->Insert(handle); } - - void InsertWithHint(KeyHandle handle, void** hint) override { - num_insert_with_hint_++; - EXPECT_NE(nullptr, hint); - last_hint_in_ = *hint; - rep_->InsertWithHint(handle, hint); - last_hint_out_ = *hint; - } - - bool Contains(const char* key) const override { return rep_->Contains(key); } - - void Get(const LookupKey& k, void* callback_args, - bool (*callback_func)(void* arg, const char* entry)) override { - rep_->Get(k, callback_args, callback_func); - } - - size_t ApproximateMemoryUsage() override { - return rep_->ApproximateMemoryUsage(); - } - - Iterator* GetIterator(Arena* arena) override { - return rep_->GetIterator(arena); - } - - void* last_hint_in() { return last_hint_in_; } - void* last_hint_out() { return last_hint_out_; } - int num_insert_with_hint() { return num_insert_with_hint_; } - - private: - std::unique_ptr rep_; - void* last_hint_in_; - void* last_hint_out_; - int num_insert_with_hint_; -}; - -class MockMemTableRepFactory : public MemTableRepFactory { - public: - MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator& cmp, - Allocator* allocator, - const SliceTransform* transform, - Logger* logger) override { - SkipListFactory factory; - MemTableRep* skiplist_rep = - factory.CreateMemTableRep(cmp, allocator, transform, logger); - mock_rep_ = new MockMemTableRep(allocator, skiplist_rep); - return mock_rep_; - } - - MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator& cmp, - Allocator* allocator, - const SliceTransform* transform, - Logger* logger, - uint32_t column_family_id) override { - last_column_family_id_ = column_family_id; - return CreateMemTableRep(cmp, allocator, transform, logger); - } - - const char* Name() const override { return "MockMemTableRepFactory"; } - - MockMemTableRep* rep() { return mock_rep_; } - - bool IsInsertConcurrentlySupported() const override { return false; } - - uint32_t GetLastColumnFamilyId() { return last_column_family_id_; } - - private: - MockMemTableRep* mock_rep_; - // workaround since there's no std::numeric_limits::max() yet. - uint32_t last_column_family_id_ = static_cast(-1); -}; - -class TestPrefixExtractor : public SliceTransform { - public: - const char* Name() const override { return "TestPrefixExtractor"; } - - Slice Transform(const Slice& key) const override { - const char* p = separator(key); - if (p == nullptr) { - return Slice(); - } - return Slice(key.data(), p - key.data() + 1); - } - - bool InDomain(const Slice& key) const override { - return separator(key) != nullptr; - } - - bool InRange(const Slice& /*key*/) const override { return false; } - - private: - const char* separator(const Slice& key) const { - return reinterpret_cast(memchr(key.data(), '_', key.size())); - } -}; - -// Test that ::Add properly returns false when inserting duplicate keys -TEST_F(DBMemTableTest, DuplicateSeq) { - SequenceNumber seq = 123; - std::string value; - MergeContext merge_context; - Options options; - InternalKeyComparator ikey_cmp(options.comparator); - ReadRangeDelAggregator range_del_agg(&ikey_cmp, - kMaxSequenceNumber /* upper_bound */); - - // Create a MemTable - InternalKeyComparator cmp(BytewiseComparator()); - auto factory = std::make_shared(); - options.memtable_factory = factory; - ImmutableOptions ioptions(options); - WriteBufferManager wb(options.db_write_buffer_size); - MemTable* mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, - kMaxSequenceNumber, 0 /* column_family_id */); - - // Write some keys and make sure it returns false on duplicates - ASSERT_OK( - mem->Add(seq, kTypeValue, "key", "value2", nullptr /* kv_prot_info */)); - ASSERT_TRUE( - mem->Add(seq, kTypeValue, "key", "value2", nullptr /* kv_prot_info */) - .IsTryAgain()); - // Changing the type should still cause the duplicatae key - ASSERT_TRUE( - mem->Add(seq, kTypeMerge, "key", "value2", nullptr /* kv_prot_info */) - .IsTryAgain()); - // Changing the seq number will make the key fresh - ASSERT_OK(mem->Add(seq + 1, kTypeMerge, "key", "value2", - nullptr /* kv_prot_info */)); - // Test with different types for duplicate keys - ASSERT_TRUE( - mem->Add(seq, kTypeDeletion, "key", "", nullptr /* kv_prot_info */) - .IsTryAgain()); - ASSERT_TRUE( - mem->Add(seq, kTypeSingleDeletion, "key", "", nullptr /* kv_prot_info */) - .IsTryAgain()); - - // Test the duplicate keys under stress - for (int i = 0; i < 10000; i++) { - bool insert_dup = i % 10 == 1; - if (!insert_dup) { - seq++; - } - Status s = mem->Add(seq, kTypeValue, "foo", "value" + std::to_string(seq), - nullptr /* kv_prot_info */); - if (insert_dup) { - ASSERT_TRUE(s.IsTryAgain()); - } else { - ASSERT_OK(s); - } - } - delete mem; - - // Test with InsertWithHint - options.memtable_insert_with_hint_prefix_extractor.reset( - new TestPrefixExtractor()); // which uses _ to extract the prefix - ioptions = ImmutableOptions(options); - mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, - kMaxSequenceNumber, 0 /* column_family_id */); - // Insert a duplicate key with _ in it - ASSERT_OK( - mem->Add(seq, kTypeValue, "key_1", "value", nullptr /* kv_prot_info */)); - ASSERT_TRUE( - mem->Add(seq, kTypeValue, "key_1", "value", nullptr /* kv_prot_info */) - .IsTryAgain()); - delete mem; - - // Test when InsertConcurrently will be invoked - options.allow_concurrent_memtable_write = true; - ioptions = ImmutableOptions(options); - mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, - kMaxSequenceNumber, 0 /* column_family_id */); - MemTablePostProcessInfo post_process_info; - ASSERT_OK(mem->Add(seq, kTypeValue, "key", "value", - nullptr /* kv_prot_info */, true, &post_process_info)); - ASSERT_TRUE(mem->Add(seq, kTypeValue, "key", "value", - nullptr /* kv_prot_info */, true, &post_process_info) - .IsTryAgain()); - delete mem; -} - -// A simple test to verify that the concurrent merge writes is functional -TEST_F(DBMemTableTest, ConcurrentMergeWrite) { - int num_ops = 1000; - std::string value; - MergeContext merge_context; - Options options; - // A merge operator that is not sensitive to concurrent writes since in this - // test we don't order the writes. - options.merge_operator = MergeOperators::CreateUInt64AddOperator(); - - // Create a MemTable - InternalKeyComparator cmp(BytewiseComparator()); - auto factory = std::make_shared(); - options.memtable_factory = factory; - options.allow_concurrent_memtable_write = true; - ImmutableOptions ioptions(options); - WriteBufferManager wb(options.db_write_buffer_size); - MemTable* mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, - kMaxSequenceNumber, 0 /* column_family_id */); - - // Put 0 as the base - PutFixed64(&value, static_cast(0)); - ASSERT_OK(mem->Add(0, kTypeValue, "key", value, nullptr /* kv_prot_info */)); - value.clear(); - - // Write Merge concurrently - ROCKSDB_NAMESPACE::port::Thread write_thread1([&]() { - MemTablePostProcessInfo post_process_info1; - std::string v1; - for (int seq = 1; seq < num_ops / 2; seq++) { - PutFixed64(&v1, seq); - ASSERT_OK(mem->Add(seq, kTypeMerge, "key", v1, nullptr /* kv_prot_info */, - true, &post_process_info1)); - v1.clear(); - } - }); - ROCKSDB_NAMESPACE::port::Thread write_thread2([&]() { - MemTablePostProcessInfo post_process_info2; - std::string v2; - for (int seq = num_ops / 2; seq < num_ops; seq++) { - PutFixed64(&v2, seq); - ASSERT_OK(mem->Add(seq, kTypeMerge, "key", v2, nullptr /* kv_prot_info */, - true, &post_process_info2)); - v2.clear(); - } - }); - write_thread1.join(); - write_thread2.join(); - - Status status; - ReadOptions roptions; - SequenceNumber max_covering_tombstone_seq = 0; - LookupKey lkey("key", kMaxSequenceNumber); - bool res = mem->Get(lkey, &value, /*columns=*/nullptr, /*timestamp=*/nullptr, - &status, &merge_context, &max_covering_tombstone_seq, - roptions, false /* immutable_memtable */); - ASSERT_OK(status); - ASSERT_TRUE(res); - uint64_t ivalue = DecodeFixed64(Slice(value).data()); - uint64_t sum = 0; - for (int seq = 0; seq < num_ops; seq++) { - sum += seq; - } - ASSERT_EQ(ivalue, sum); - - delete mem; -} - -TEST_F(DBMemTableTest, InsertWithHint) { - Options options; - options.allow_concurrent_memtable_write = false; - options.create_if_missing = true; - options.memtable_factory.reset(new MockMemTableRepFactory()); - options.memtable_insert_with_hint_prefix_extractor.reset( - new TestPrefixExtractor()); - options.env = env_; - Reopen(options); - MockMemTableRep* rep = - reinterpret_cast(options.memtable_factory.get()) - ->rep(); - ASSERT_OK(Put("foo_k1", "foo_v1")); - ASSERT_EQ(nullptr, rep->last_hint_in()); - void* hint_foo = rep->last_hint_out(); - ASSERT_OK(Put("foo_k2", "foo_v2")); - ASSERT_EQ(hint_foo, rep->last_hint_in()); - ASSERT_EQ(hint_foo, rep->last_hint_out()); - ASSERT_OK(Put("foo_k3", "foo_v3")); - ASSERT_EQ(hint_foo, rep->last_hint_in()); - ASSERT_EQ(hint_foo, rep->last_hint_out()); - ASSERT_OK(Put("bar_k1", "bar_v1")); - ASSERT_EQ(nullptr, rep->last_hint_in()); - void* hint_bar = rep->last_hint_out(); - ASSERT_NE(hint_foo, hint_bar); - ASSERT_OK(Put("bar_k2", "bar_v2")); - ASSERT_EQ(hint_bar, rep->last_hint_in()); - ASSERT_EQ(hint_bar, rep->last_hint_out()); - ASSERT_EQ(5, rep->num_insert_with_hint()); - ASSERT_OK(Put("NotInPrefixDomain", "vvv")); - ASSERT_EQ(5, rep->num_insert_with_hint()); - ASSERT_EQ("foo_v1", Get("foo_k1")); - ASSERT_EQ("foo_v2", Get("foo_k2")); - ASSERT_EQ("foo_v3", Get("foo_k3")); - ASSERT_EQ("bar_v1", Get("bar_k1")); - ASSERT_EQ("bar_v2", Get("bar_k2")); - ASSERT_EQ("vvv", Get("NotInPrefixDomain")); -} - -TEST_F(DBMemTableTest, ColumnFamilyId) { - // Verifies MemTableRepFactory is told the right column family id. - Options options; - options.env = CurrentOptions().env; - options.allow_concurrent_memtable_write = false; - options.create_if_missing = true; - options.memtable_factory.reset(new MockMemTableRepFactory()); - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - for (uint32_t cf = 0; cf < 2; ++cf) { - ASSERT_OK(Put(cf, "key", "val")); - ASSERT_OK(Flush(cf)); - ASSERT_EQ( - cf, static_cast(options.memtable_factory.get()) - ->GetLastColumnFamilyId()); - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_merge_operand_test.cc b/db/db_merge_operand_test.cc deleted file mode 100644 index 774ae4a96..000000000 --- a/db/db_merge_operand_test.cc +++ /dev/null @@ -1,488 +0,0 @@ -// Copyright (c) 2018-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db/db_test_util.h" -#include "port/stack_trace.h" -#include "rocksdb/perf_context.h" -#include "rocksdb/utilities/debug.h" -#include "table/block_based/block_builder.h" -#include "test_util/sync_point.h" -#include "rocksdb/merge_operator.h" -#include "utilities/fault_injection_env.h" -#include "utilities/merge_operators.h" -#include "utilities/merge_operators/sortlist.h" -#include "utilities/merge_operators/string_append/stringappend2.h" - -namespace ROCKSDB_NAMESPACE { - -namespace { -class LimitedStringAppendMergeOp : public StringAppendTESTOperator { - public: - LimitedStringAppendMergeOp(int limit, char delim) - : StringAppendTESTOperator(delim), limit_(limit) {} - - const char* Name() const override { - return "DBMergeOperatorTest::LimitedStringAppendMergeOp"; - } - - bool ShouldMerge(const std::vector& operands) const override { - if (operands.size() > 0 && limit_ > 0 && operands.size() >= limit_) { - return true; - } - return false; - } - - private: - size_t limit_ = 0; -}; -} // anonymous namespace - -class DBMergeOperandTest : public DBTestBase { - public: - DBMergeOperandTest() - : DBTestBase("db_merge_operand_test", /*env_do_fsync=*/true) {} -}; - -TEST_F(DBMergeOperandTest, CacheEvictedMergeOperandReadAfterFreeBug) { - // There was a bug of reading merge operands after they are mistakely freed - // in DB::GetMergeOperands, which is surfaced by cache full. - // See PR#9507 for more. - Options options; - options.create_if_missing = true; - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - options.env = env_; - BlockBasedTableOptions table_options; - - // Small cache to simulate cache full - table_options.block_cache = NewLRUCache(1); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - Reopen(options); - int num_records = 4; - int number_of_operands = 0; - std::vector values(num_records); - GetMergeOperandsOptions merge_operands_info; - merge_operands_info.expected_max_number_of_operands = num_records; - - ASSERT_OK(Merge("k1", "v1")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k1", "v2")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k1", "v3")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k1", "v4")); - - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k1", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(number_of_operands, 4); - ASSERT_EQ(values[0].ToString(), "v1"); - ASSERT_EQ(values[1].ToString(), "v2"); - ASSERT_EQ(values[2].ToString(), "v3"); - ASSERT_EQ(values[3].ToString(), "v4"); -} - -TEST_F(DBMergeOperandTest, FlushedMergeOperandReadAfterFreeBug) { - // Repro for a bug where a memtable containing a merge operand could be - // deleted before the merge operand was saved to the result. - auto options = CurrentOptions(); - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - Reopen(options); - - ASSERT_OK(Merge("key", "value")); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::GetImpl:PostMemTableGet:0", - "DBMergeOperandTest::FlushedMergeOperandReadAfterFreeBug:PreFlush"}, - {"DBMergeOperandTest::FlushedMergeOperandReadAfterFreeBug:PostFlush", - "DBImpl::GetImpl:PostMemTableGet:1"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - auto flush_thread = port::Thread([&]() { - TEST_SYNC_POINT( - "DBMergeOperandTest::FlushedMergeOperandReadAfterFreeBug:PreFlush"); - ASSERT_OK(Flush()); - TEST_SYNC_POINT( - "DBMergeOperandTest::FlushedMergeOperandReadAfterFreeBug:PostFlush"); - }); - - PinnableSlice value; - GetMergeOperandsOptions merge_operands_info; - merge_operands_info.expected_max_number_of_operands = 1; - int number_of_operands; - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "key", &value, &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(1, number_of_operands); - - flush_thread.join(); -} - -TEST_F(DBMergeOperandTest, GetMergeOperandsBasic) { - Options options; - options.create_if_missing = true; - // Use only the latest two merge operands. - options.merge_operator = std::make_shared(2, ','); - options.env = env_; - Reopen(options); - int num_records = 4; - int number_of_operands = 0; - std::vector values(num_records); - GetMergeOperandsOptions merge_operands_info; - merge_operands_info.expected_max_number_of_operands = num_records; - - // k0 value in memtable - ASSERT_OK(Put("k0", "PutARock")); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k0", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "PutARock"); - - // k0.1 value in SST - ASSERT_OK(Put("k0.1", "RockInSST")); - ASSERT_OK(Flush()); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k0.1", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "RockInSST"); - - // All k1 values are in memtable. - ASSERT_OK(Merge("k1", "a")); - ASSERT_OK(Put("k1", "x")); - ASSERT_OK(Merge("k1", "b")); - ASSERT_OK(Merge("k1", "c")); - ASSERT_OK(Merge("k1", "d")); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k1", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "x"); - ASSERT_EQ(values[1], "b"); - ASSERT_EQ(values[2], "c"); - ASSERT_EQ(values[3], "d"); - - // expected_max_number_of_operands is less than number of merge operands so - // status should be Incomplete. - merge_operands_info.expected_max_number_of_operands = num_records - 1; - Status status = db_->GetMergeOperands( - ReadOptions(), db_->DefaultColumnFamily(), "k1", values.data(), - &merge_operands_info, &number_of_operands); - ASSERT_EQ(status.IsIncomplete(), true); - merge_operands_info.expected_max_number_of_operands = num_records; - - // All k1.1 values are in memtable. - ASSERT_OK(Merge("k1.1", "r")); - ASSERT_OK(Delete("k1.1")); - ASSERT_OK(Merge("k1.1", "c")); - ASSERT_OK(Merge("k1.1", "k")); - ASSERT_OK(Merge("k1.1", "s")); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k1.1", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "c"); - ASSERT_EQ(values[1], "k"); - ASSERT_EQ(values[2], "s"); - - // All k2 values are flushed to L0 into a single file. - ASSERT_OK(Merge("k2", "q")); - ASSERT_OK(Merge("k2", "w")); - ASSERT_OK(Merge("k2", "e")); - ASSERT_OK(Merge("k2", "r")); - ASSERT_OK(Flush()); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k2", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "q"); - ASSERT_EQ(values[1], "w"); - ASSERT_EQ(values[2], "e"); - ASSERT_EQ(values[3], "r"); - - // All k2.1 values are flushed to L0 into a single file. - ASSERT_OK(Merge("k2.1", "m")); - ASSERT_OK(Put("k2.1", "l")); - ASSERT_OK(Merge("k2.1", "n")); - ASSERT_OK(Merge("k2.1", "o")); - ASSERT_OK(Flush()); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k2.1", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "l,n,o"); - - // All k2.2 values are flushed to L0 into a single file. - ASSERT_OK(Merge("k2.2", "g")); - ASSERT_OK(Delete("k2.2")); - ASSERT_OK(Merge("k2.2", "o")); - ASSERT_OK(Merge("k2.2", "t")); - ASSERT_OK(Flush()); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k2.2", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "o,t"); - - // Do some compaction that will make the following tests more predictable - // Slice start("PutARock"); - // Slice end("t"); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - // All k3 values are flushed and are in different files. - ASSERT_OK(Merge("k3", "ab")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k3", "bc")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k3", "cd")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k3", "de")); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k3", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "ab"); - ASSERT_EQ(values[1], "bc"); - ASSERT_EQ(values[2], "cd"); - ASSERT_EQ(values[3], "de"); - - // All k3.1 values are flushed and are in different files. - ASSERT_OK(Merge("k3.1", "ab")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("k3.1", "bc")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k3.1", "cd")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k3.1", "de")); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k3.1", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "bc"); - ASSERT_EQ(values[1], "cd"); - ASSERT_EQ(values[2], "de"); - - // All k3.2 values are flushed and are in different files. - ASSERT_OK(Merge("k3.2", "ab")); - ASSERT_OK(Flush()); - ASSERT_OK(Delete("k3.2")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k3.2", "cd")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k3.2", "de")); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k3.2", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "cd"); - ASSERT_EQ(values[1], "de"); - - // All K4 values are in different levels - ASSERT_OK(Merge("k4", "ba")); - ASSERT_OK(Flush()); - MoveFilesToLevel(4); - ASSERT_OK(Merge("k4", "cb")); - ASSERT_OK(Flush()); - MoveFilesToLevel(3); - ASSERT_OK(Merge("k4", "dc")); - ASSERT_OK(Flush()); - MoveFilesToLevel(1); - ASSERT_OK(Merge("k4", "ed")); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k4", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "ba"); - ASSERT_EQ(values[1], "cb"); - ASSERT_EQ(values[2], "dc"); - ASSERT_EQ(values[3], "ed"); - - // First 3 k5 values are in SST and next 4 k5 values are in Immutable - // Memtable - ASSERT_OK(Merge("k5", "who")); - ASSERT_OK(Merge("k5", "am")); - ASSERT_OK(Merge("k5", "i")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("k5", "remember")); - ASSERT_OK(Merge("k5", "i")); - ASSERT_OK(Merge("k5", "am")); - ASSERT_OK(Merge("k5", "rocks")); - ASSERT_OK(dbfull()->TEST_SwitchMemtable()); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k5", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "remember"); - ASSERT_EQ(values[1], "i"); - ASSERT_EQ(values[2], "am"); -} - -TEST_F(DBMergeOperandTest, BlobDBGetMergeOperandsBasic) { - Options options; - options.create_if_missing = true; - options.enable_blob_files = true; - options.min_blob_size = 0; - // Use only the latest two merge operands. - options.merge_operator = std::make_shared(2, ','); - options.env = env_; - Reopen(options); - int num_records = 4; - int number_of_operands = 0; - std::vector values(num_records); - GetMergeOperandsOptions merge_operands_info; - merge_operands_info.expected_max_number_of_operands = num_records; - - // All k1 values are in memtable. - ASSERT_OK(Put("k1", "x")); - ASSERT_OK(Merge("k1", "b")); - ASSERT_OK(Merge("k1", "c")); - ASSERT_OK(Merge("k1", "d")); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k1", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "x"); - ASSERT_EQ(values[1], "b"); - ASSERT_EQ(values[2], "c"); - ASSERT_EQ(values[3], "d"); - - // expected_max_number_of_operands is less than number of merge operands so - // status should be Incomplete. - merge_operands_info.expected_max_number_of_operands = num_records - 1; - Status status = db_->GetMergeOperands( - ReadOptions(), db_->DefaultColumnFamily(), "k1", values.data(), - &merge_operands_info, &number_of_operands); - ASSERT_EQ(status.IsIncomplete(), true); - merge_operands_info.expected_max_number_of_operands = num_records; - - // All k2 values are flushed to L0 into a single file. - ASSERT_OK(Put("k2", "q")); - ASSERT_OK(Merge("k2", "w")); - ASSERT_OK(Merge("k2", "e")); - ASSERT_OK(Merge("k2", "r")); - ASSERT_OK(Flush()); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k2", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "q,w,e,r"); - - // Do some compaction that will make the following tests more predictable - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - // All k3 values are flushed and are in different files. - ASSERT_OK(Put("k3", "ab")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k3", "bc")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k3", "cd")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k3", "de")); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k3", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "ab"); - ASSERT_EQ(values[1], "bc"); - ASSERT_EQ(values[2], "cd"); - ASSERT_EQ(values[3], "de"); - - // All K4 values are in different levels - ASSERT_OK(Put("k4", "ba")); - ASSERT_OK(Flush()); - MoveFilesToLevel(4); - ASSERT_OK(Merge("k4", "cb")); - ASSERT_OK(Flush()); - MoveFilesToLevel(3); - ASSERT_OK(Merge("k4", "dc")); - ASSERT_OK(Flush()); - MoveFilesToLevel(1); - ASSERT_OK(Merge("k4", "ed")); - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k4", values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(values[0], "ba"); - ASSERT_EQ(values[1], "cb"); - ASSERT_EQ(values[2], "dc"); - ASSERT_EQ(values[3], "ed"); -} - -TEST_F(DBMergeOperandTest, GetMergeOperandsLargeResultOptimization) { - // These constants are chosen to trigger the large result optimization - // (pinning a bundle of `DBImpl` resources). - const int kNumOperands = 1024; - const int kOperandLen = 1024; - - Options options; - options.create_if_missing = true; - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - DestroyAndReopen(options); - - Random rnd(301); - std::vector expected_merge_operands; - expected_merge_operands.reserve(kNumOperands); - for (int i = 0; i < kNumOperands; ++i) { - expected_merge_operands.emplace_back(rnd.RandomString(kOperandLen)); - ASSERT_OK(Merge("key", expected_merge_operands.back())); - } - - std::vector merge_operands(kNumOperands); - GetMergeOperandsOptions merge_operands_info; - merge_operands_info.expected_max_number_of_operands = kNumOperands; - int num_merge_operands = 0; - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "key", merge_operands.data(), - &merge_operands_info, &num_merge_operands)); - ASSERT_EQ(num_merge_operands, kNumOperands); - - // Ensures the large result optimization was used. - for (int i = 0; i < kNumOperands; ++i) { - ASSERT_TRUE(merge_operands[i].IsPinned()); - } - - // Add a Flush() to change the `SuperVersion` to challenge the resource - // pinning. - ASSERT_OK(Flush()); - - for (int i = 0; i < kNumOperands; ++i) { - ASSERT_EQ(expected_merge_operands[i], merge_operands[i]); - } -} - -TEST_F(DBMergeOperandTest, GetMergeOperandsBaseDeletionInImmMem) { - // In this test, "k1" has a MERGE in a mutable memtable on top of a base - // DELETE in an immutable memtable. - Options opts = CurrentOptions(); - opts.max_write_buffer_number = 10; - opts.min_write_buffer_number_to_merge = 10; - opts.merge_operator = MergeOperators::CreateDeprecatedPutOperator(); - Reopen(opts); - - ASSERT_OK(Put("k1", "val")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("k0", "val")); - ASSERT_OK(Delete("k1")); - ASSERT_OK(Put("k2", "val")); - ASSERT_OK(dbfull()->TEST_SwitchMemtable()); - ASSERT_OK(Merge("k1", "val")); - - { - std::vector values(2); - - GetMergeOperandsOptions merge_operands_info; - merge_operands_info.expected_max_number_of_operands = - static_cast(values.size()); - - std::string key = "k1", from_db; - int number_of_operands = 0; - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - key, values.data(), &merge_operands_info, - &number_of_operands)); - ASSERT_EQ(1, number_of_operands); - from_db = values[0].ToString(); - ASSERT_EQ("val", from_db); - } - - { - std::string val; - ASSERT_OK(db_->Get(ReadOptions(), "k1", &val)); - ASSERT_EQ("val", val); - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_merge_operator_test.cc b/db/db_merge_operator_test.cc deleted file mode 100644 index 19c7bd1e8..000000000 --- a/db/db_merge_operator_test.cc +++ /dev/null @@ -1,824 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -#include -#include - -#include "db/db_test_util.h" -#include "db/forward_iterator.h" -#include "port/stack_trace.h" -#include "rocksdb/merge_operator.h" -#include "util/random.h" -#include "utilities/merge_operators.h" -#include "utilities/merge_operators/string_append/stringappend2.h" - -namespace ROCKSDB_NAMESPACE { - -class TestReadCallback : public ReadCallback { - public: - TestReadCallback(SnapshotChecker* snapshot_checker, - SequenceNumber snapshot_seq) - : ReadCallback(snapshot_seq), - snapshot_checker_(snapshot_checker), - snapshot_seq_(snapshot_seq) {} - - bool IsVisibleFullCheck(SequenceNumber seq) override { - return snapshot_checker_->CheckInSnapshot(seq, snapshot_seq_) == - SnapshotCheckerResult::kInSnapshot; - } - - private: - SnapshotChecker* snapshot_checker_; - SequenceNumber snapshot_seq_; -}; - -// Test merge operator functionality. -class DBMergeOperatorTest : public DBTestBase { - public: - DBMergeOperatorTest() - : DBTestBase("db_merge_operator_test", /*env_do_fsync=*/false) {} - - std::string GetWithReadCallback(SnapshotChecker* snapshot_checker, - const Slice& key, - const Snapshot* snapshot = nullptr) { - SequenceNumber seq = snapshot == nullptr ? db_->GetLatestSequenceNumber() - : snapshot->GetSequenceNumber(); - TestReadCallback read_callback(snapshot_checker, seq); - ReadOptions read_opt; - read_opt.snapshot = snapshot; - PinnableSlice value; - DBImpl::GetImplOptions get_impl_options; - get_impl_options.column_family = db_->DefaultColumnFamily(); - get_impl_options.value = &value; - get_impl_options.callback = &read_callback; - Status s = dbfull()->GetImpl(read_opt, key, get_impl_options); - if (!s.ok()) { - return s.ToString(); - } - return value.ToString(); - } -}; - -TEST_F(DBMergeOperatorTest, LimitMergeOperands) { - class LimitedStringAppendMergeOp : public StringAppendTESTOperator { - public: - LimitedStringAppendMergeOp(int limit, char delim) - : StringAppendTESTOperator(delim), limit_(limit) {} - - const char* Name() const override { - return "DBMergeOperatorTest::LimitedStringAppendMergeOp"; - } - - bool ShouldMerge(const std::vector& operands) const override { - if (operands.size() > 0 && limit_ > 0 && operands.size() >= limit_) { - return true; - } - return false; - } - - private: - size_t limit_ = 0; - }; - - Options options; - options.create_if_missing = true; - // Use only the latest two merge operands. - options.merge_operator = std::make_shared(2, ','); - options.env = env_; - Reopen(options); - // All K1 values are in memtable. - ASSERT_OK(Merge("k1", "a")); - ASSERT_OK(Merge("k1", "b")); - ASSERT_OK(Merge("k1", "c")); - ASSERT_OK(Merge("k1", "d")); - std::string value; - ASSERT_OK(db_->Get(ReadOptions(), "k1", &value)); - // Make sure that only the latest two merge operands are used. If this was - // not the case the value would be "a,b,c,d". - ASSERT_EQ(value, "c,d"); - - // All K2 values are flushed to L0 into a single file. - ASSERT_OK(Merge("k2", "a")); - ASSERT_OK(Merge("k2", "b")); - ASSERT_OK(Merge("k2", "c")); - ASSERT_OK(Merge("k2", "d")); - ASSERT_OK(Flush()); - ASSERT_OK(db_->Get(ReadOptions(), "k2", &value)); - ASSERT_EQ(value, "c,d"); - - // All K3 values are flushed and are in different files. - ASSERT_OK(Merge("k3", "ab")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k3", "bc")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k3", "cd")); - ASSERT_OK(Flush()); - ASSERT_OK(Merge("k3", "de")); - ASSERT_OK(db_->Get(ReadOptions(), "k3", &value)); - ASSERT_EQ(value, "cd,de"); - - // All K4 values are in different levels - ASSERT_OK(Merge("k4", "ab")); - ASSERT_OK(Flush()); - MoveFilesToLevel(4); - ASSERT_OK(Merge("k4", "bc")); - ASSERT_OK(Flush()); - MoveFilesToLevel(3); - ASSERT_OK(Merge("k4", "cd")); - ASSERT_OK(Flush()); - MoveFilesToLevel(1); - ASSERT_OK(Merge("k4", "de")); - ASSERT_OK(db_->Get(ReadOptions(), "k4", &value)); - ASSERT_EQ(value, "cd,de"); -} - -TEST_F(DBMergeOperatorTest, MergeErrorOnRead) { - Options options; - options.create_if_missing = true; - options.merge_operator.reset(new TestPutOperator()); - options.env = env_; - Reopen(options); - ASSERT_OK(Merge("k1", "v1")); - ASSERT_OK(Merge("k1", "corrupted")); - std::string value; - ASSERT_TRUE(db_->Get(ReadOptions(), "k1", &value).IsCorruption()); - VerifyDBInternal({{"k1", "corrupted"}, {"k1", "v1"}}); -} - -TEST_F(DBMergeOperatorTest, MergeErrorOnWrite) { - Options options; - options.create_if_missing = true; - options.merge_operator.reset(new TestPutOperator()); - options.max_successive_merges = 3; - options.env = env_; - Reopen(options); - ASSERT_OK(Merge("k1", "v1")); - ASSERT_OK(Merge("k1", "v2")); - // Will trigger a merge when hitting max_successive_merges and the merge - // will fail. The delta will be inserted nevertheless. - ASSERT_OK(Merge("k1", "corrupted")); - // Data should stay unmerged after the error. - VerifyDBInternal({{"k1", "corrupted"}, {"k1", "v2"}, {"k1", "v1"}}); -} - -TEST_F(DBMergeOperatorTest, MergeErrorOnIteration) { - Options options; - options.create_if_missing = true; - options.merge_operator.reset(new TestPutOperator()); - options.env = env_; - - DestroyAndReopen(options); - ASSERT_OK(Merge("k1", "v1")); - ASSERT_OK(Merge("k1", "corrupted")); - ASSERT_OK(Put("k2", "v2")); - auto* iter = db_->NewIterator(ReadOptions()); - iter->Seek("k1"); - ASSERT_FALSE(iter->Valid()); - ASSERT_TRUE(iter->status().IsCorruption()); - delete iter; - iter = db_->NewIterator(ReadOptions()); - iter->Seek("k2"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - iter->Prev(); - ASSERT_FALSE(iter->Valid()); - ASSERT_TRUE(iter->status().IsCorruption()); - delete iter; - VerifyDBInternal({{"k1", "corrupted"}, {"k1", "v1"}, {"k2", "v2"}}); - - DestroyAndReopen(options); - ASSERT_OK(Merge("k1", "v1")); - ASSERT_OK(Put("k2", "v2")); - ASSERT_OK(Merge("k2", "corrupted")); - iter = db_->NewIterator(ReadOptions()); - iter->Seek("k1"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - ASSERT_TRUE(iter->status().IsCorruption()); - delete iter; - VerifyDBInternal({{"k1", "v1"}, {"k2", "corrupted"}, {"k2", "v2"}}); -} - - -TEST_F(DBMergeOperatorTest, MergeOperatorFailsWithMustMerge) { - // This is like a mini-stress test dedicated to `OpFailureScope::kMustMerge`. - // Some or most of it might be deleted upon adding that option to the actual - // stress test. - // - // "k0" and "k2" are stable (uncorrupted) keys before and after a corrupted - // key ("k1"). The outer loop (`i`) varies which write (`j`) to "k1" triggers - // the corruption. Inside that loop there are three cases: - // - // - Case 1: pure `Merge()`s - // - Case 2: `Merge()`s on top of a `Put()` - // - Case 3: `Merge()`s on top of a `Delete()` - // - // For each case we test query results before flush, after flush, and after - // compaction, as well as cleanup after deletion+compaction. The queries - // expect "k0" and "k2" to always be readable. "k1" is expected to be readable - // only by APIs that do not require merging, such as `GetMergeOperands()`. - const int kNumOperands = 3; - Options options; - options.merge_operator.reset(new TestPutOperator()); - options.env = env_; - Reopen(options); - - for (int i = 0; i < kNumOperands; ++i) { - auto check_query = [&]() { - { - std::string value; - ASSERT_OK(db_->Get(ReadOptions(), "k0", &value)); - Status s = db_->Get(ReadOptions(), "k1", &value); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_EQ(Status::SubCode::kMergeOperatorFailed, s.subcode()); - ASSERT_OK(db_->Get(ReadOptions(), "k2", &value)); - } - - { - std::unique_ptr iter; - iter.reset(db_->NewIterator(ReadOptions())); - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("k0", iter->key()); - iter->Next(); - ASSERT_TRUE(iter->status().IsCorruption()); - ASSERT_EQ(Status::SubCode::kMergeOperatorFailed, - iter->status().subcode()); - - iter->SeekToLast(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("k2", iter->key()); - iter->Prev(); - ASSERT_TRUE(iter->status().IsCorruption()); - - iter->Seek("k2"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("k2", iter->key()); - } - - std::vector values(kNumOperands); - GetMergeOperandsOptions merge_operands_info; - merge_operands_info.expected_max_number_of_operands = kNumOperands; - int num_operands_found = 0; - ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - "k1", values.data(), &merge_operands_info, - &num_operands_found)); - ASSERT_EQ(kNumOperands, num_operands_found); - for (int j = 0; j < num_operands_found; ++j) { - if (i == j) { - ASSERT_EQ(values[j], "corrupted_must_merge"); - } else { - ASSERT_EQ(values[j], "ok"); - } - } - }; - - ASSERT_OK(Put("k0", "val")); - ASSERT_OK(Put("k2", "val")); - - // Case 1 - for (int j = 0; j < kNumOperands; ++j) { - if (j == i) { - ASSERT_OK(Merge("k1", "corrupted_must_merge")); - } else { - ASSERT_OK(Merge("k1", "ok")); - } - } - check_query(); - ASSERT_OK(Flush()); - check_query(); - { - CompactRangeOptions cro; - cro.bottommost_level_compaction = - BottommostLevelCompaction::kForceOptimized; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - } - check_query(); - - // Case 2 - for (int j = 0; j < kNumOperands; ++j) { - Slice val; - if (j == i) { - val = "corrupted_must_merge"; - } else { - val = "ok"; - } - if (j == 0) { - ASSERT_OK(Put("k1", val)); - } else { - ASSERT_OK(Merge("k1", val)); - } - } - check_query(); - ASSERT_OK(Flush()); - check_query(); - { - CompactRangeOptions cro; - cro.bottommost_level_compaction = - BottommostLevelCompaction::kForceOptimized; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - } - check_query(); - - // Case 3 - ASSERT_OK(Delete("k1")); - for (int j = 0; j < kNumOperands; ++j) { - if (i == j) { - ASSERT_OK(Merge("k1", "corrupted_must_merge")); - } else { - ASSERT_OK(Merge("k1", "ok")); - } - } - check_query(); - ASSERT_OK(Flush()); - check_query(); - { - CompactRangeOptions cro; - cro.bottommost_level_compaction = - BottommostLevelCompaction::kForceOptimized; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - } - check_query(); - - // Verify obsolete data removal still happens - ASSERT_OK(Delete("k0")); - ASSERT_OK(Delete("k1")); - ASSERT_OK(Delete("k2")); - ASSERT_EQ("NOT_FOUND", Get("k0")); - ASSERT_EQ("NOT_FOUND", Get("k1")); - ASSERT_EQ("NOT_FOUND", Get("k2")); - CompactRangeOptions cro; - cro.bottommost_level_compaction = - BottommostLevelCompaction::kForceOptimized; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - ASSERT_EQ("", FilesPerLevel()); - } -} - - -class MergeOperatorPinningTest : public DBMergeOperatorTest, - public testing::WithParamInterface { - public: - MergeOperatorPinningTest() { disable_block_cache_ = GetParam(); } - - bool disable_block_cache_; -}; - -INSTANTIATE_TEST_CASE_P(MergeOperatorPinningTest, MergeOperatorPinningTest, - ::testing::Bool()); - -TEST_P(MergeOperatorPinningTest, OperandsMultiBlocks) { - Options options = CurrentOptions(); - BlockBasedTableOptions table_options; - table_options.block_size = 1; // every block will contain one entry - table_options.no_block_cache = disable_block_cache_; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.merge_operator = MergeOperators::CreateStringAppendTESTOperator(); - options.level0_slowdown_writes_trigger = (1 << 30); - options.level0_stop_writes_trigger = (1 << 30); - options.disable_auto_compactions = true; - DestroyAndReopen(options); - - const int kKeysPerFile = 10; - const int kOperandsPerKeyPerFile = 7; - const int kOperandSize = 100; - // Filse to write in L0 before compacting to lower level - const int kFilesPerLevel = 3; - - Random rnd(301); - std::map true_data; - int batch_num = 1; - int lvl_to_fill = 4; - int key_id = 0; - while (true) { - for (int j = 0; j < kKeysPerFile; j++) { - std::string key = Key(key_id % 35); - key_id++; - for (int k = 0; k < kOperandsPerKeyPerFile; k++) { - std::string val = rnd.RandomString(kOperandSize); - ASSERT_OK(db_->Merge(WriteOptions(), key, val)); - if (true_data[key].size() == 0) { - true_data[key] = val; - } else { - true_data[key] += "," + val; - } - } - } - - if (lvl_to_fill == -1) { - // Keep last batch in memtable and stop - break; - } - - ASSERT_OK(Flush()); - if (batch_num % kFilesPerLevel == 0) { - if (lvl_to_fill != 0) { - MoveFilesToLevel(lvl_to_fill); - } - lvl_to_fill--; - } - batch_num++; - } - - // 3 L0 files - // 1 L1 file - // 3 L2 files - // 1 L3 file - // 3 L4 Files - ASSERT_EQ(FilesPerLevel(), "3,1,3,1,3"); - - VerifyDBFromMap(true_data); -} - -class MergeOperatorHook : public MergeOperator { - public: - explicit MergeOperatorHook(std::shared_ptr _merge_op) - : merge_op_(_merge_op) {} - - bool FullMergeV2(const MergeOperationInput& merge_in, - MergeOperationOutput* merge_out) const override { - before_merge_(); - bool res = merge_op_->FullMergeV2(merge_in, merge_out); - after_merge_(); - return res; - } - - const char* Name() const override { return merge_op_->Name(); } - - std::shared_ptr merge_op_; - std::function before_merge_ = []() {}; - std::function after_merge_ = []() {}; -}; - -TEST_P(MergeOperatorPinningTest, EvictCacheBeforeMerge) { - Options options = CurrentOptions(); - - auto merge_hook = - std::make_shared(MergeOperators::CreateMaxOperator()); - options.merge_operator = merge_hook; - options.disable_auto_compactions = true; - options.level0_slowdown_writes_trigger = (1 << 30); - options.level0_stop_writes_trigger = (1 << 30); - options.max_open_files = 20; - BlockBasedTableOptions bbto; - bbto.no_block_cache = disable_block_cache_; - if (bbto.no_block_cache == false) { - bbto.block_cache = NewLRUCache(64 * 1024 * 1024); - } else { - bbto.block_cache = nullptr; - } - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - - const int kNumOperands = 30; - const int kNumKeys = 1000; - const int kOperandSize = 100; - Random rnd(301); - - // 1000 keys every key have 30 operands, every operand is in a different file - std::map true_data; - for (int i = 0; i < kNumOperands; i++) { - for (int j = 0; j < kNumKeys; j++) { - std::string k = Key(j); - std::string v = rnd.RandomString(kOperandSize); - ASSERT_OK(db_->Merge(WriteOptions(), k, v)); - - true_data[k] = std::max(true_data[k], v); - } - ASSERT_OK(Flush()); - } - - std::vector file_numbers = ListTableFiles(env_, dbname_); - ASSERT_EQ(file_numbers.size(), kNumOperands); - int merge_cnt = 0; - - // Code executed before merge operation - merge_hook->before_merge_ = [&]() { - // Evict all tables from cache before every merge operation - auto* table_cache = dbfull()->TEST_table_cache(); - for (uint64_t num : file_numbers) { - TableCache::Evict(table_cache, num); - } - // Decrease cache capacity to force all unrefed blocks to be evicted - if (bbto.block_cache) { - bbto.block_cache->SetCapacity(1); - } - merge_cnt++; - }; - - // Code executed after merge operation - merge_hook->after_merge_ = [&]() { - // Increase capacity again after doing the merge - if (bbto.block_cache) { - bbto.block_cache->SetCapacity(64 * 1024 * 1024); - } - }; - - size_t total_reads; - VerifyDBFromMap(true_data, &total_reads); - ASSERT_EQ(merge_cnt, total_reads); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - VerifyDBFromMap(true_data, &total_reads); -} - -TEST_P(MergeOperatorPinningTest, TailingIterator) { - Options options = CurrentOptions(); - options.merge_operator = MergeOperators::CreateMaxOperator(); - BlockBasedTableOptions bbto; - bbto.no_block_cache = disable_block_cache_; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - - const int kNumOperands = 100; - const int kNumWrites = 100000; - - std::function writer_func = [&]() { - int k = 0; - for (int i = 0; i < kNumWrites; i++) { - ASSERT_OK(db_->Merge(WriteOptions(), Key(k), Key(k))); - - if (i && i % kNumOperands == 0) { - k++; - } - if (i && i % 127 == 0) { - ASSERT_OK(Flush()); - } - if (i && i % 317 == 0) { - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - } - } - }; - - std::function reader_func = [&]() { - ReadOptions ro; - ro.tailing = true; - Iterator* iter = db_->NewIterator(ro); - ASSERT_OK(iter->status()); - iter->SeekToFirst(); - for (int i = 0; i < (kNumWrites / kNumOperands); i++) { - while (!iter->Valid()) { - // wait for the key to be written - env_->SleepForMicroseconds(100); - iter->Seek(Key(i)); - } - ASSERT_EQ(iter->key(), Key(i)); - ASSERT_EQ(iter->value(), Key(i)); - - iter->Next(); - } - ASSERT_OK(iter->status()); - - delete iter; - }; - - ROCKSDB_NAMESPACE::port::Thread writer_thread(writer_func); - ROCKSDB_NAMESPACE::port::Thread reader_thread(reader_func); - - writer_thread.join(); - reader_thread.join(); -} - -TEST_F(DBMergeOperatorTest, TailingIteratorMemtableUnrefedBySomeoneElse) { - Options options = CurrentOptions(); - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - DestroyAndReopen(options); - - // Overview of the test: - // * There are two merge operands for the same key: one in an sst file, - // another in a memtable. - // * Seek a tailing iterator to this key. - // * As part of the seek, the iterator will: - // (a) first visit the operand in the memtable and tell ForwardIterator - // to pin this operand, then - // (b) move on to the operand in the sst file, then pass both operands - // to merge operator. - // * The memtable may get flushed and unreferenced by another thread between - // (a) and (b). The test simulates it by flushing the memtable inside a - // SyncPoint callback located between (a) and (b). - // * In this case it's ForwardIterator's responsibility to keep the memtable - // pinned until (b) is complete. There used to be a bug causing - // ForwardIterator to not pin it in some circumstances. This test - // reproduces it. - - ASSERT_OK(db_->Merge(WriteOptions(), "key", "sst")); - ASSERT_OK(db_->Flush(FlushOptions())); // Switch to SuperVersion A - ASSERT_OK(db_->Merge(WriteOptions(), "key", "memtable")); - - // Pin SuperVersion A - std::unique_ptr someone_else(db_->NewIterator(ReadOptions())); - ASSERT_OK(someone_else->status()); - - bool pushed_first_operand = false; - bool stepped_to_next_operand = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBIter::MergeValuesNewToOld:PushedFirstOperand", [&](void*) { - EXPECT_FALSE(pushed_first_operand); - pushed_first_operand = true; - EXPECT_OK(db_->Flush(FlushOptions())); // Switch to SuperVersion B - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBIter::MergeValuesNewToOld:SteppedToNextOperand", [&](void*) { - EXPECT_FALSE(stepped_to_next_operand); - stepped_to_next_operand = true; - someone_else.reset(); // Unpin SuperVersion A - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ReadOptions ro; - ro.tailing = true; - std::unique_ptr iter(db_->NewIterator(ro)); - iter->Seek("key"); - - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(std::string("sst,memtable"), iter->value().ToString()); - EXPECT_TRUE(pushed_first_operand); - EXPECT_TRUE(stepped_to_next_operand); -} - -TEST_F(DBMergeOperatorTest, SnapshotCheckerAndReadCallback) { - Options options = CurrentOptions(); - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - DestroyAndReopen(options); - - class TestSnapshotChecker : public SnapshotChecker { - public: - SnapshotCheckerResult CheckInSnapshot( - SequenceNumber seq, SequenceNumber snapshot_seq) const override { - return IsInSnapshot(seq, snapshot_seq) - ? SnapshotCheckerResult::kInSnapshot - : SnapshotCheckerResult::kNotInSnapshot; - } - - bool IsInSnapshot(SequenceNumber seq, SequenceNumber snapshot_seq) const { - switch (snapshot_seq) { - case 0: - return seq == 0; - case 1: - return seq <= 1; - case 2: - // seq = 2 not visible to snapshot with seq = 2 - return seq <= 1; - case 3: - return seq <= 3; - case 4: - // seq = 4 not visible to snpahost with seq = 4 - return seq <= 3; - default: - // seq >=4 is uncommitted - return seq <= 4; - }; - } - }; - TestSnapshotChecker* snapshot_checker = new TestSnapshotChecker(); - dbfull()->SetSnapshotChecker(snapshot_checker); - - std::string value; - ASSERT_OK(Merge("foo", "v1")); - ASSERT_EQ(1, db_->GetLatestSequenceNumber()); - ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo")); - ASSERT_OK(Merge("foo", "v2")); - ASSERT_EQ(2, db_->GetLatestSequenceNumber()); - // v2 is not visible to latest snapshot, which has seq = 2. - ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo")); - // Take a snapshot with seq = 2. - const Snapshot* snapshot1 = db_->GetSnapshot(); - ASSERT_EQ(2, snapshot1->GetSequenceNumber()); - // v2 is not visible to snapshot1, which has seq = 2 - ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo", snapshot1)); - - // Verify flush doesn't alter the result. - ASSERT_OK(Flush()); - ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo", snapshot1)); - ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo")); - - ASSERT_OK(Merge("foo", "v3")); - ASSERT_EQ(3, db_->GetLatestSequenceNumber()); - ASSERT_EQ("v1,v2,v3", GetWithReadCallback(snapshot_checker, "foo")); - ASSERT_OK(Merge("foo", "v4")); - ASSERT_EQ(4, db_->GetLatestSequenceNumber()); - // v4 is not visible to latest snapshot, which has seq = 4. - ASSERT_EQ("v1,v2,v3", GetWithReadCallback(snapshot_checker, "foo")); - const Snapshot* snapshot2 = db_->GetSnapshot(); - ASSERT_EQ(4, snapshot2->GetSequenceNumber()); - // v4 is not visible to snapshot2, which has seq = 4. - ASSERT_EQ("v1,v2,v3", - GetWithReadCallback(snapshot_checker, "foo", snapshot2)); - - // Verify flush doesn't alter the result. - ASSERT_OK(Flush()); - ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo", snapshot1)); - ASSERT_EQ("v1,v2,v3", - GetWithReadCallback(snapshot_checker, "foo", snapshot2)); - ASSERT_EQ("v1,v2,v3", GetWithReadCallback(snapshot_checker, "foo")); - - ASSERT_OK(Merge("foo", "v5")); - ASSERT_EQ(5, db_->GetLatestSequenceNumber()); - // v5 is uncommitted - ASSERT_EQ("v1,v2,v3,v4", GetWithReadCallback(snapshot_checker, "foo")); - - // full manual compaction. - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - // Verify compaction doesn't alter the result. - ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo", snapshot1)); - ASSERT_EQ("v1,v2,v3", - GetWithReadCallback(snapshot_checker, "foo", snapshot2)); - ASSERT_EQ("v1,v2,v3,v4", GetWithReadCallback(snapshot_checker, "foo")); - - db_->ReleaseSnapshot(snapshot1); - db_->ReleaseSnapshot(snapshot2); -} - -class PerConfigMergeOperatorPinningTest - : public DBMergeOperatorTest, - public testing::WithParamInterface> { - public: - PerConfigMergeOperatorPinningTest() { - std::tie(disable_block_cache_, option_config_) = GetParam(); - } - - bool disable_block_cache_; -}; - -INSTANTIATE_TEST_CASE_P( - MergeOperatorPinningTest, PerConfigMergeOperatorPinningTest, - ::testing::Combine(::testing::Bool(), - ::testing::Range(static_cast(DBTestBase::kDefault), - static_cast(DBTestBase::kEnd)))); - -TEST_P(PerConfigMergeOperatorPinningTest, Randomized) { - if (ShouldSkipOptions(option_config_, kSkipMergePut)) { - return; - } - - Options options = CurrentOptions(); - options.merge_operator = MergeOperators::CreateMaxOperator(); - BlockBasedTableOptions table_options; - table_options.no_block_cache = disable_block_cache_; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - - Random rnd(301); - std::map true_data; - - const int kTotalMerges = 5000; - // Every key gets ~10 operands - const int kKeyRange = kTotalMerges / 10; - const int kOperandSize = 20; - const int kNumPutBefore = kKeyRange / 10; // 10% value - const int kNumPutAfter = kKeyRange / 10; // 10% overwrite - const int kNumDelete = kKeyRange / 10; // 10% delete - - // kNumPutBefore keys will have base values - for (int i = 0; i < kNumPutBefore; i++) { - std::string key = Key(rnd.Next() % kKeyRange); - std::string value = rnd.RandomString(kOperandSize); - ASSERT_OK(db_->Put(WriteOptions(), key, value)); - - true_data[key] = value; - } - - // Do kTotalMerges merges - for (int i = 0; i < kTotalMerges; i++) { - std::string key = Key(rnd.Next() % kKeyRange); - std::string value = rnd.RandomString(kOperandSize); - ASSERT_OK(db_->Merge(WriteOptions(), key, value)); - - if (true_data[key] < value) { - true_data[key] = value; - } - } - - // Overwrite random kNumPutAfter keys - for (int i = 0; i < kNumPutAfter; i++) { - std::string key = Key(rnd.Next() % kKeyRange); - std::string value = rnd.RandomString(kOperandSize); - ASSERT_OK(db_->Put(WriteOptions(), key, value)); - - true_data[key] = value; - } - - // Delete random kNumDelete keys - for (int i = 0; i < kNumDelete; i++) { - std::string key = Key(rnd.Next() % kKeyRange); - ASSERT_OK(db_->Delete(WriteOptions(), key)); - - true_data.erase(key); - } - - VerifyDBFromMap(true_data); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_options_test.cc b/db/db_options_test.cc deleted file mode 100644 index 3304c6339..000000000 --- a/db/db_options_test.cc +++ /dev/null @@ -1,1215 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -#include -#include -#include - -#include "db/column_family.h" -#include "db/db_impl/db_impl.h" -#include "db/db_test_util.h" -#include "options/options_helper.h" -#include "port/stack_trace.h" -#include "rocksdb/cache.h" -#include "rocksdb/convenience.h" -#include "rocksdb/rate_limiter.h" -#include "rocksdb/stats_history.h" -#include "test_util/sync_point.h" -#include "test_util/testutil.h" -#include "util/random.h" - -namespace ROCKSDB_NAMESPACE { - -class DBOptionsTest : public DBTestBase { - public: - DBOptionsTest() : DBTestBase("db_options_test", /*env_do_fsync=*/true) {} - - std::unordered_map GetMutableDBOptionsMap( - const DBOptions& options) { - std::string options_str; - std::unordered_map mutable_map; - ConfigOptions config_options(options); - config_options.delimiter = "; "; - - EXPECT_OK(GetStringFromMutableDBOptions( - config_options, MutableDBOptions(options), &options_str)); - EXPECT_OK(StringToMap(options_str, &mutable_map)); - - return mutable_map; - } - - std::unordered_map GetMutableCFOptionsMap( - const ColumnFamilyOptions& options) { - std::string options_str; - ConfigOptions config_options; - config_options.delimiter = "; "; - - std::unordered_map mutable_map; - EXPECT_OK(GetStringFromMutableCFOptions( - config_options, MutableCFOptions(options), &options_str)); - EXPECT_OK(StringToMap(options_str, &mutable_map)); - return mutable_map; - } - - std::unordered_map GetRandomizedMutableCFOptionsMap( - Random* rnd) { - Options options = CurrentOptions(); - options.env = env_; - ImmutableDBOptions db_options(options); - test::RandomInitCFOptions(&options, options, rnd); - auto sanitized_options = SanitizeOptions(db_options, options); - auto opt_map = GetMutableCFOptionsMap(sanitized_options); - delete options.compaction_filter; - return opt_map; - } - - std::unordered_map GetRandomizedMutableDBOptionsMap( - Random* rnd) { - DBOptions db_options; - test::RandomInitDBOptions(&db_options, rnd); - auto sanitized_options = SanitizeOptions(dbname_, db_options); - return GetMutableDBOptionsMap(sanitized_options); - } -}; - -TEST_F(DBOptionsTest, ImmutableTrackAndVerifyWalsInManifest) { - Options options; - options.env = env_; - options.track_and_verify_wals_in_manifest = true; - - ImmutableDBOptions db_options(options); - ASSERT_TRUE(db_options.track_and_verify_wals_in_manifest); - - Reopen(options); - ASSERT_TRUE(dbfull()->GetDBOptions().track_and_verify_wals_in_manifest); - - Status s = - dbfull()->SetDBOptions({{"track_and_verify_wals_in_manifest", "false"}}); - ASSERT_FALSE(s.ok()); -} - -TEST_F(DBOptionsTest, ImmutableVerifySstUniqueIdInManifest) { - Options options; - options.env = env_; - options.verify_sst_unique_id_in_manifest = true; - - ImmutableDBOptions db_options(options); - ASSERT_TRUE(db_options.verify_sst_unique_id_in_manifest); - - Reopen(options); - ASSERT_TRUE(dbfull()->GetDBOptions().verify_sst_unique_id_in_manifest); - - Status s = - dbfull()->SetDBOptions({{"verify_sst_unique_id_in_manifest", "false"}}); - ASSERT_FALSE(s.ok()); -} - -// RocksDB lite don't support dynamic options. - -TEST_F(DBOptionsTest, AvoidUpdatingOptions) { - Options options; - options.env = env_; - options.max_background_jobs = 4; - options.delayed_write_rate = 1024; - - Reopen(options); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - bool is_changed_stats = false; - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::WriteOptionsFile:PersistOptions", [&](void* /*arg*/) { - ASSERT_FALSE(is_changed_stats); // should only save options file once - is_changed_stats = true; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - // helper function to check the status and reset after each check - auto is_changed = [&] { - bool ret = is_changed_stats; - is_changed_stats = false; - return ret; - }; - - // without changing the value, but it's sanitized to a different value - ASSERT_OK(dbfull()->SetDBOptions({{"bytes_per_sync", "0"}})); - ASSERT_TRUE(is_changed()); - - // without changing the value - ASSERT_OK(dbfull()->SetDBOptions({{"max_background_jobs", "4"}})); - ASSERT_FALSE(is_changed()); - - // changing the value - ASSERT_OK(dbfull()->SetDBOptions({{"bytes_per_sync", "123"}})); - ASSERT_TRUE(is_changed()); - - // update again - ASSERT_OK(dbfull()->SetDBOptions({{"bytes_per_sync", "123"}})); - ASSERT_FALSE(is_changed()); - - // without changing a default value - ASSERT_OK(dbfull()->SetDBOptions({{"strict_bytes_per_sync", "false"}})); - ASSERT_FALSE(is_changed()); - - // now change - ASSERT_OK(dbfull()->SetDBOptions({{"strict_bytes_per_sync", "true"}})); - ASSERT_TRUE(is_changed()); - - // multiple values without change - ASSERT_OK(dbfull()->SetDBOptions( - {{"max_total_wal_size", "0"}, {"stats_dump_period_sec", "600"}})); - ASSERT_FALSE(is_changed()); - - // multiple values with change - ASSERT_OK(dbfull()->SetDBOptions( - {{"max_open_files", "100"}, {"stats_dump_period_sec", "600"}})); - ASSERT_TRUE(is_changed()); -} - -TEST_F(DBOptionsTest, GetLatestDBOptions) { - // GetOptions should be able to get latest option changed by SetOptions. - Options options; - options.create_if_missing = true; - options.env = env_; - Random rnd(228); - Reopen(options); - auto new_options = GetRandomizedMutableDBOptionsMap(&rnd); - ASSERT_OK(dbfull()->SetDBOptions(new_options)); - ASSERT_EQ(new_options, GetMutableDBOptionsMap(dbfull()->GetDBOptions())); -} - -TEST_F(DBOptionsTest, GetLatestCFOptions) { - // GetOptions should be able to get latest option changed by SetOptions. - Options options; - options.create_if_missing = true; - options.env = env_; - Random rnd(228); - Reopen(options); - CreateColumnFamilies({"foo"}, options); - ReopenWithColumnFamilies({"default", "foo"}, options); - auto options_default = GetRandomizedMutableCFOptionsMap(&rnd); - auto options_foo = GetRandomizedMutableCFOptionsMap(&rnd); - ASSERT_OK(dbfull()->SetOptions(handles_[0], options_default)); - ASSERT_OK(dbfull()->SetOptions(handles_[1], options_foo)); - ASSERT_EQ(options_default, - GetMutableCFOptionsMap(dbfull()->GetOptions(handles_[0]))); - ASSERT_EQ(options_foo, - GetMutableCFOptionsMap(dbfull()->GetOptions(handles_[1]))); -} - -TEST_F(DBOptionsTest, SetMutableTableOptions) { - Options options; - options.create_if_missing = true; - options.env = env_; - options.blob_file_size = 16384; - BlockBasedTableOptions bbto; - bbto.no_block_cache = true; - bbto.block_size = 8192; - bbto.block_restart_interval = 7; - - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - Reopen(options); - - ColumnFamilyHandle* cfh = dbfull()->DefaultColumnFamily(); - Options c_opts = dbfull()->GetOptions(cfh); - - const auto* c_bbto = - c_opts.table_factory->GetOptions(); - ASSERT_NE(c_bbto, nullptr); - ASSERT_EQ(c_opts.blob_file_size, 16384); - ASSERT_EQ(c_bbto->no_block_cache, true); - ASSERT_EQ(c_bbto->block_size, 8192); - ASSERT_EQ(c_bbto->block_restart_interval, 7); - ASSERT_OK(dbfull()->SetOptions( - cfh, {{"table_factory.block_size", "16384"}, - {"table_factory.block_restart_interval", "11"}})); - ASSERT_EQ(c_bbto->block_size, 16384); - ASSERT_EQ(c_bbto->block_restart_interval, 11); - - // Now set an option that is not mutable - options should not change - ASSERT_NOK( - dbfull()->SetOptions(cfh, {{"table_factory.no_block_cache", "false"}})); - ASSERT_EQ(c_bbto->no_block_cache, true); - ASSERT_EQ(c_bbto->block_size, 16384); - ASSERT_EQ(c_bbto->block_restart_interval, 11); - - // Set some that are mutable and some that are not - options should not change - ASSERT_NOK(dbfull()->SetOptions( - cfh, {{"table_factory.no_block_cache", "false"}, - {"table_factory.block_size", "8192"}, - {"table_factory.block_restart_interval", "7"}})); - ASSERT_EQ(c_bbto->no_block_cache, true); - ASSERT_EQ(c_bbto->block_size, 16384); - ASSERT_EQ(c_bbto->block_restart_interval, 11); - - // Set some that are mutable and some that do not exist - options should not - // change - ASSERT_NOK(dbfull()->SetOptions( - cfh, {{"table_factory.block_size", "8192"}, - {"table_factory.does_not_exist", "true"}, - {"table_factory.block_restart_interval", "7"}})); - ASSERT_EQ(c_bbto->no_block_cache, true); - ASSERT_EQ(c_bbto->block_size, 16384); - ASSERT_EQ(c_bbto->block_restart_interval, 11); - - // Trying to change the table factory fails - ASSERT_NOK(dbfull()->SetOptions( - cfh, {{"table_factory", TableFactory::kPlainTableName()}})); - - // Set some on the table and some on the Column Family - ASSERT_OK(dbfull()->SetOptions( - cfh, {{"table_factory.block_size", "16384"}, - {"blob_file_size", "32768"}, - {"table_factory.block_restart_interval", "13"}})); - c_opts = dbfull()->GetOptions(cfh); - ASSERT_EQ(c_opts.blob_file_size, 32768); - ASSERT_EQ(c_bbto->block_size, 16384); - ASSERT_EQ(c_bbto->block_restart_interval, 13); - // Set some on the table and a bad one on the ColumnFamily - options should - // not change - ASSERT_NOK(dbfull()->SetOptions( - cfh, {{"table_factory.block_size", "1024"}, - {"no_such_option", "32768"}, - {"table_factory.block_restart_interval", "7"}})); - ASSERT_EQ(c_bbto->block_size, 16384); - ASSERT_EQ(c_bbto->block_restart_interval, 13); -} - -TEST_F(DBOptionsTest, SetWithCustomMemTableFactory) { - class DummySkipListFactory : public SkipListFactory { - public: - static const char* kClassName() { return "DummySkipListFactory"; } - const char* Name() const override { return kClassName(); } - explicit DummySkipListFactory() : SkipListFactory(2) {} - }; - { - // Verify the DummySkipList cannot be created - ConfigOptions config_options; - config_options.ignore_unsupported_options = false; - std::unique_ptr factory; - ASSERT_NOK(MemTableRepFactory::CreateFromString( - config_options, DummySkipListFactory::kClassName(), &factory)); - } - Options options; - options.create_if_missing = true; - // Try with fail_if_options_file_error=false/true to update the options - for (bool on_error : {false, true}) { - options.fail_if_options_file_error = on_error; - options.env = env_; - options.disable_auto_compactions = false; - - options.memtable_factory.reset(new DummySkipListFactory()); - Reopen(options); - - ColumnFamilyHandle* cfh = dbfull()->DefaultColumnFamily(); - ASSERT_OK( - dbfull()->SetOptions(cfh, {{"disable_auto_compactions", "true"}})); - ColumnFamilyDescriptor cfd; - ASSERT_OK(cfh->GetDescriptor(&cfd)); - ASSERT_STREQ(cfd.options.memtable_factory->Name(), - DummySkipListFactory::kClassName()); - ColumnFamilyHandle* test = nullptr; - ASSERT_OK(dbfull()->CreateColumnFamily(options, "test", &test)); - ASSERT_OK(test->GetDescriptor(&cfd)); - ASSERT_STREQ(cfd.options.memtable_factory->Name(), - DummySkipListFactory::kClassName()); - - ASSERT_OK(dbfull()->DropColumnFamily(test)); - delete test; - } -} - -TEST_F(DBOptionsTest, SetBytesPerSync) { - const size_t kValueSize = 1024 * 1024; // 1MB - Options options; - options.create_if_missing = true; - options.bytes_per_sync = 1024 * 1024; - options.use_direct_reads = false; - options.write_buffer_size = 400 * kValueSize; - options.disable_auto_compactions = true; - options.compression = kNoCompression; - options.env = env_; - Reopen(options); - int counter = 0; - int low_bytes_per_sync = 0; - int i = 0; - const std::string kValue(kValueSize, 'v'); - ASSERT_EQ(options.bytes_per_sync, dbfull()->GetDBOptions().bytes_per_sync); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WritableFileWriter::RangeSync:0", [&](void* /*arg*/) { counter++; }); - - WriteOptions write_opts; - // should sync approximately 40MB/1MB ~= 40 times. - for (i = 0; i < 40; i++) { - ASSERT_OK(Put(Key(i), kValue, write_opts)); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - low_bytes_per_sync = counter; - ASSERT_GT(low_bytes_per_sync, 35); - ASSERT_LT(low_bytes_per_sync, 45); - - counter = 0; - // 8388608 = 8 * 1024 * 1024 - ASSERT_OK(dbfull()->SetDBOptions({{"bytes_per_sync", "8388608"}})); - ASSERT_EQ(8388608, dbfull()->GetDBOptions().bytes_per_sync); - // should sync approximately 40MB*2/8MB ~= 10 times. - // data will be 40*2MB because of previous Puts too. - for (i = 0; i < 40; i++) { - ASSERT_OK(Put(Key(i), kValue, write_opts)); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_GT(counter, 5); - ASSERT_LT(counter, 15); - - // Redundant assert. But leaving it here just to get the point across that - // low_bytes_per_sync > counter. - ASSERT_GT(low_bytes_per_sync, counter); -} - -TEST_F(DBOptionsTest, SetWalBytesPerSync) { - const size_t kValueSize = 1024 * 1024 * 3; - Options options; - options.create_if_missing = true; - options.wal_bytes_per_sync = 512; - options.write_buffer_size = 100 * kValueSize; - options.disable_auto_compactions = true; - options.compression = kNoCompression; - options.env = env_; - Reopen(options); - ASSERT_EQ(512, dbfull()->GetDBOptions().wal_bytes_per_sync); - std::atomic_int counter{0}; - int low_bytes_per_sync = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WritableFileWriter::RangeSync:0", - [&](void* /*arg*/) { counter.fetch_add(1); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - const std::string kValue(kValueSize, 'v'); - int i = 0; - for (; i < 10; i++) { - ASSERT_OK(Put(Key(i), kValue)); - } - // Do not flush. If we flush here, SwitchWAL will reuse old WAL file since its - // empty and will not get the new wal_bytes_per_sync value. - low_bytes_per_sync = counter; - // 5242880 = 1024 * 1024 * 5 - ASSERT_OK(dbfull()->SetDBOptions({{"wal_bytes_per_sync", "5242880"}})); - ASSERT_EQ(5242880, dbfull()->GetDBOptions().wal_bytes_per_sync); - counter = 0; - i = 0; - for (; i < 10; i++) { - ASSERT_OK(Put(Key(i), kValue)); - } - ASSERT_GT(counter, 0); - ASSERT_GT(low_bytes_per_sync, 0); - ASSERT_GT(low_bytes_per_sync, counter); -} - -TEST_F(DBOptionsTest, WritableFileMaxBufferSize) { - Options options; - options.create_if_missing = true; - options.writable_file_max_buffer_size = 1024 * 1024; - options.level0_file_num_compaction_trigger = 3; - options.max_manifest_file_size = 1; - options.env = env_; - int buffer_size = 1024 * 1024; - Reopen(options); - ASSERT_EQ(buffer_size, - dbfull()->GetDBOptions().writable_file_max_buffer_size); - - std::atomic match_cnt(0); - std::atomic unmatch_cnt(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WritableFileWriter::WritableFileWriter:0", [&](void* arg) { - int value = static_cast(reinterpret_cast(arg)); - if (value == buffer_size) { - match_cnt++; - } else { - unmatch_cnt++; - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - int i = 0; - for (; i < 3; i++) { - ASSERT_OK(Put("foo", std::to_string(i))); - ASSERT_OK(Put("bar", std::to_string(i))); - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(unmatch_cnt, 0); - ASSERT_GE(match_cnt, 11); - - ASSERT_OK( - dbfull()->SetDBOptions({{"writable_file_max_buffer_size", "524288"}})); - buffer_size = 512 * 1024; - match_cnt = 0; - unmatch_cnt = 0; // SetDBOptions() will create a WritableFileWriter - - ASSERT_EQ(buffer_size, - dbfull()->GetDBOptions().writable_file_max_buffer_size); - i = 0; - for (; i < 3; i++) { - ASSERT_OK(Put("foo", std::to_string(i))); - ASSERT_OK(Put("bar", std::to_string(i))); - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(unmatch_cnt, 0); - ASSERT_GE(match_cnt, 11); -} - -TEST_F(DBOptionsTest, SetOptionsAndReopen) { - Random rnd(1044); - auto rand_opts = GetRandomizedMutableCFOptionsMap(&rnd); - ASSERT_OK(dbfull()->SetOptions(rand_opts)); - // Verify if DB can be reopen after setting options. - Options options; - options.env = env_; - ASSERT_OK(TryReopen(options)); -} - -TEST_F(DBOptionsTest, EnableAutoCompactionAndTriggerStall) { - const std::string kValue(1024, 'v'); - for (int method_type = 0; method_type < 2; method_type++) { - for (int option_type = 0; option_type < 4; option_type++) { - Options options; - options.create_if_missing = true; - options.disable_auto_compactions = true; - options.write_buffer_size = 1024 * 1024 * 10; - options.compression = CompressionType::kNoCompression; - options.level0_file_num_compaction_trigger = 1; - options.level0_stop_writes_trigger = std::numeric_limits::max(); - options.level0_slowdown_writes_trigger = std::numeric_limits::max(); - options.hard_pending_compaction_bytes_limit = - std::numeric_limits::max(); - options.soft_pending_compaction_bytes_limit = - std::numeric_limits::max(); - options.env = env_; - - DestroyAndReopen(options); - int i = 0; - for (; i < 1024; i++) { - ASSERT_OK(Put(Key(i), kValue)); - } - ASSERT_OK(Flush()); - for (; i < 1024 * 2; i++) { - ASSERT_OK(Put(Key(i), kValue)); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ(2, NumTableFilesAtLevel(0)); - uint64_t l0_size = SizeAtLevel(0); - - switch (option_type) { - case 0: - // test with level0_stop_writes_trigger - options.level0_stop_writes_trigger = 2; - options.level0_slowdown_writes_trigger = 2; - break; - case 1: - options.level0_slowdown_writes_trigger = 2; - break; - case 2: - options.hard_pending_compaction_bytes_limit = l0_size; - options.soft_pending_compaction_bytes_limit = l0_size; - break; - case 3: - options.soft_pending_compaction_bytes_limit = l0_size; - break; - } - Reopen(options); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_FALSE(dbfull()->TEST_write_controler().IsStopped()); - ASSERT_FALSE(dbfull()->TEST_write_controler().NeedsDelay()); - - SyncPoint::GetInstance()->LoadDependency( - {{"DBOptionsTest::EnableAutoCompactionAndTriggerStall:1", - "BackgroundCallCompaction:0"}, - {"DBImpl::BackgroundCompaction():BeforePickCompaction", - "DBOptionsTest::EnableAutoCompactionAndTriggerStall:2"}, - {"DBOptionsTest::EnableAutoCompactionAndTriggerStall:3", - "DBImpl::BackgroundCompaction():AfterPickCompaction"}}); - // Block background compaction. - SyncPoint::GetInstance()->EnableProcessing(); - - switch (method_type) { - case 0: - ASSERT_OK( - dbfull()->SetOptions({{"disable_auto_compactions", "false"}})); - break; - case 1: - ASSERT_OK(dbfull()->EnableAutoCompaction( - {dbfull()->DefaultColumnFamily()})); - break; - } - TEST_SYNC_POINT("DBOptionsTest::EnableAutoCompactionAndTriggerStall:1"); - // Wait for stall condition recalculate. - TEST_SYNC_POINT("DBOptionsTest::EnableAutoCompactionAndTriggerStall:2"); - - switch (option_type) { - case 0: - ASSERT_TRUE(dbfull()->TEST_write_controler().IsStopped()); - break; - case 1: - ASSERT_FALSE(dbfull()->TEST_write_controler().IsStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - break; - case 2: - ASSERT_TRUE(dbfull()->TEST_write_controler().IsStopped()); - break; - case 3: - ASSERT_FALSE(dbfull()->TEST_write_controler().IsStopped()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - break; - } - TEST_SYNC_POINT("DBOptionsTest::EnableAutoCompactionAndTriggerStall:3"); - - // Background compaction executed. - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_FALSE(dbfull()->TEST_write_controler().IsStopped()); - ASSERT_FALSE(dbfull()->TEST_write_controler().NeedsDelay()); - } - } -} - -TEST_F(DBOptionsTest, SetOptionsMayTriggerCompaction) { - Options options; - options.create_if_missing = true; - options.level0_file_num_compaction_trigger = 1000; - options.env = env_; - Reopen(options); - for (int i = 0; i < 3; i++) { - // Need to insert two keys to avoid trivial move. - ASSERT_OK(Put("foo", std::to_string(i))); - ASSERT_OK(Put("bar", std::to_string(i))); - ASSERT_OK(Flush()); - } - ASSERT_EQ("3", FilesPerLevel()); - ASSERT_OK( - dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "3"}})); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("0,1", FilesPerLevel()); -} - -TEST_F(DBOptionsTest, SetBackgroundCompactionThreads) { - Options options; - options.create_if_missing = true; - options.max_background_compactions = 1; // default value - options.env = env_; - Reopen(options); - ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); - ASSERT_OK(dbfull()->SetDBOptions({{"max_background_compactions", "3"}})); - ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); - auto stop_token = dbfull()->TEST_write_controler().GetStopToken(); - ASSERT_EQ(3, dbfull()->TEST_BGCompactionsAllowed()); -} - -TEST_F(DBOptionsTest, SetBackgroundFlushThreads) { - Options options; - options.create_if_missing = true; - options.max_background_flushes = 1; - options.env = env_; - Reopen(options); - ASSERT_EQ(1, dbfull()->TEST_BGFlushesAllowed()); - ASSERT_EQ(1, env_->GetBackgroundThreads(Env::Priority::HIGH)); - ASSERT_OK(dbfull()->SetDBOptions({{"max_background_flushes", "3"}})); - ASSERT_EQ(3, env_->GetBackgroundThreads(Env::Priority::HIGH)); - ASSERT_EQ(3, dbfull()->TEST_BGFlushesAllowed()); -} - -TEST_F(DBOptionsTest, SetBackgroundJobs) { - Options options; - options.create_if_missing = true; - options.max_background_jobs = 8; - options.env = env_; - Reopen(options); - - for (int i = 0; i < 2; ++i) { - if (i > 0) { - options.max_background_jobs = 12; - ASSERT_OK(dbfull()->SetDBOptions( - {{"max_background_jobs", - std::to_string(options.max_background_jobs)}})); - } - - const int expected_max_flushes = options.max_background_jobs / 4; - - ASSERT_EQ(expected_max_flushes, dbfull()->TEST_BGFlushesAllowed()); - ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); - - auto stop_token = dbfull()->TEST_write_controler().GetStopToken(); - - const int expected_max_compactions = 3 * expected_max_flushes; - - ASSERT_EQ(expected_max_flushes, dbfull()->TEST_BGFlushesAllowed()); - ASSERT_EQ(expected_max_compactions, dbfull()->TEST_BGCompactionsAllowed()); - - ASSERT_EQ(expected_max_flushes, - env_->GetBackgroundThreads(Env::Priority::HIGH)); - ASSERT_EQ(expected_max_compactions, - env_->GetBackgroundThreads(Env::Priority::LOW)); - } -} - -TEST_F(DBOptionsTest, AvoidFlushDuringShutdown) { - Options options; - options.create_if_missing = true; - options.disable_auto_compactions = true; - options.env = env_; - WriteOptions write_without_wal; - write_without_wal.disableWAL = true; - - ASSERT_FALSE(options.avoid_flush_during_shutdown); - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "v1", write_without_wal)); - Reopen(options); - ASSERT_EQ("v1", Get("foo")); - ASSERT_EQ("1", FilesPerLevel()); - - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "v2", write_without_wal)); - ASSERT_OK(dbfull()->SetDBOptions({{"avoid_flush_during_shutdown", "true"}})); - Reopen(options); - ASSERT_EQ("NOT_FOUND", Get("foo")); - ASSERT_EQ("", FilesPerLevel()); -} - -TEST_F(DBOptionsTest, SetDelayedWriteRateOption) { - Options options; - options.create_if_missing = true; - options.delayed_write_rate = 2 * 1024U * 1024U; - options.env = env_; - Reopen(options); - ASSERT_EQ(2 * 1024U * 1024U, - dbfull()->TEST_write_controler().max_delayed_write_rate()); - - ASSERT_OK(dbfull()->SetDBOptions({{"delayed_write_rate", "20000"}})); - ASSERT_EQ(20000, dbfull()->TEST_write_controler().max_delayed_write_rate()); -} - -TEST_F(DBOptionsTest, MaxTotalWalSizeChange) { - Random rnd(1044); - const auto value_size = size_t(1024); - std::string value = rnd.RandomString(value_size); - - Options options; - options.create_if_missing = true; - options.env = env_; - CreateColumnFamilies({"1", "2", "3"}, options); - ReopenWithColumnFamilies({"default", "1", "2", "3"}, options); - - WriteOptions write_options; - - const int key_count = 100; - for (int i = 0; i < key_count; ++i) { - for (size_t cf = 0; cf < handles_.size(); ++cf) { - ASSERT_OK(Put(static_cast(cf), Key(i), value)); - } - } - ASSERT_OK(dbfull()->SetDBOptions({{"max_total_wal_size", "10"}})); - - for (size_t cf = 0; cf < handles_.size(); ++cf) { - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf])); - ASSERT_EQ("1", FilesPerLevel(static_cast(cf))); - } -} - -TEST_F(DBOptionsTest, SetStatsDumpPeriodSec) { - Options options; - options.create_if_missing = true; - options.stats_dump_period_sec = 5; - options.env = env_; - Reopen(options); - ASSERT_EQ(5u, dbfull()->GetDBOptions().stats_dump_period_sec); - - for (int i = 0; i < 20; i++) { - unsigned int num = rand() % 5000 + 1; - ASSERT_OK(dbfull()->SetDBOptions( - {{"stats_dump_period_sec", std::to_string(num)}})); - ASSERT_EQ(num, dbfull()->GetDBOptions().stats_dump_period_sec); - } - Close(); -} - -TEST_F(DBOptionsTest, SetOptionsStatsPersistPeriodSec) { - Options options; - options.create_if_missing = true; - options.stats_persist_period_sec = 5; - options.env = env_; - Reopen(options); - ASSERT_EQ(5u, dbfull()->GetDBOptions().stats_persist_period_sec); - - ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "12345"}})); - ASSERT_EQ(12345u, dbfull()->GetDBOptions().stats_persist_period_sec); - ASSERT_NOK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "abcde"}})); - ASSERT_EQ(12345u, dbfull()->GetDBOptions().stats_persist_period_sec); -} - -static void assert_candidate_files_empty(DBImpl* dbfull, const bool empty) { - dbfull->TEST_LockMutex(); - JobContext job_context(0); - dbfull->FindObsoleteFiles(&job_context, false); - ASSERT_EQ(empty, job_context.full_scan_candidate_files.empty()); - dbfull->TEST_UnlockMutex(); - if (job_context.HaveSomethingToDelete()) { - // fulfill the contract of FindObsoleteFiles by calling PurgeObsoleteFiles - // afterwards; otherwise the test may hang on shutdown - dbfull->PurgeObsoleteFiles(job_context); - } - job_context.Clean(); -} - -TEST_F(DBOptionsTest, DeleteObsoleteFilesPeriodChange) { - Options options; - options.env = env_; - SetTimeElapseOnlySleepOnReopen(&options); - options.create_if_missing = true; - ASSERT_OK(TryReopen(options)); - - // Verify that candidate files set is empty when no full scan requested. - assert_candidate_files_empty(dbfull(), true); - - ASSERT_OK( - dbfull()->SetDBOptions({{"delete_obsolete_files_period_micros", "0"}})); - - // After delete_obsolete_files_period_micros updated to 0, the next call - // to FindObsoleteFiles should make a full scan - assert_candidate_files_empty(dbfull(), false); - - ASSERT_OK( - dbfull()->SetDBOptions({{"delete_obsolete_files_period_micros", "20"}})); - - assert_candidate_files_empty(dbfull(), true); - - env_->MockSleepForMicroseconds(20); - assert_candidate_files_empty(dbfull(), true); - - env_->MockSleepForMicroseconds(1); - assert_candidate_files_empty(dbfull(), false); - - Close(); -} - -TEST_F(DBOptionsTest, MaxOpenFilesChange) { - SpecialEnv env(env_); - Options options; - options.env = CurrentOptions().env; - options.max_open_files = -1; - - Reopen(options); - - Cache* tc = dbfull()->TEST_table_cache(); - - ASSERT_EQ(-1, dbfull()->GetDBOptions().max_open_files); - ASSERT_LT(2000, tc->GetCapacity()); - ASSERT_OK(dbfull()->SetDBOptions({{"max_open_files", "1024"}})); - ASSERT_EQ(1024, dbfull()->GetDBOptions().max_open_files); - // examine the table cache (actual size should be 1014) - ASSERT_GT(1500, tc->GetCapacity()); - Close(); -} - -TEST_F(DBOptionsTest, SanitizeDelayedWriteRate) { - Options options; - options.env = CurrentOptions().env; - options.delayed_write_rate = 0; - Reopen(options); - ASSERT_EQ(16 * 1024 * 1024, dbfull()->GetDBOptions().delayed_write_rate); - - options.rate_limiter.reset(NewGenericRateLimiter(31 * 1024 * 1024)); - Reopen(options); - ASSERT_EQ(31 * 1024 * 1024, dbfull()->GetDBOptions().delayed_write_rate); -} - -TEST_F(DBOptionsTest, SanitizeUniversalTTLCompaction) { - Options options; - options.env = CurrentOptions().env; - options.compaction_style = kCompactionStyleUniversal; - - options.ttl = 0; - options.periodic_compaction_seconds = 0; - Reopen(options); - ASSERT_EQ(0, dbfull()->GetOptions().ttl); - ASSERT_EQ(0, dbfull()->GetOptions().periodic_compaction_seconds); - - options.ttl = 0; - options.periodic_compaction_seconds = 100; - Reopen(options); - ASSERT_EQ(0, dbfull()->GetOptions().ttl); - ASSERT_EQ(100, dbfull()->GetOptions().periodic_compaction_seconds); - - options.ttl = 100; - options.periodic_compaction_seconds = 0; - Reopen(options); - ASSERT_EQ(100, dbfull()->GetOptions().ttl); - ASSERT_EQ(100, dbfull()->GetOptions().periodic_compaction_seconds); - - options.ttl = 100; - options.periodic_compaction_seconds = 500; - Reopen(options); - ASSERT_EQ(100, dbfull()->GetOptions().ttl); - ASSERT_EQ(100, dbfull()->GetOptions().periodic_compaction_seconds); -} - -TEST_F(DBOptionsTest, SanitizeTtlDefault) { - Options options; - options.env = CurrentOptions().env; - Reopen(options); - ASSERT_EQ(30 * 24 * 60 * 60, dbfull()->GetOptions().ttl); - - options.compaction_style = kCompactionStyleLevel; - options.ttl = 0; - Reopen(options); - ASSERT_EQ(0, dbfull()->GetOptions().ttl); - - options.ttl = 100; - Reopen(options); - ASSERT_EQ(100, dbfull()->GetOptions().ttl); -} - -TEST_F(DBOptionsTest, SanitizeFIFOPeriodicCompaction) { - Options options; - options.compaction_style = kCompactionStyleFIFO; - options.env = CurrentOptions().env; - options.ttl = 0; - Reopen(options); - ASSERT_EQ(30 * 24 * 60 * 60, dbfull()->GetOptions().ttl); - - options.ttl = 100; - Reopen(options); - ASSERT_EQ(100, dbfull()->GetOptions().ttl); - - options.ttl = 100 * 24 * 60 * 60; - Reopen(options); - ASSERT_EQ(100 * 24 * 60 * 60, dbfull()->GetOptions().ttl); - - options.ttl = 200; - options.periodic_compaction_seconds = 300; - Reopen(options); - ASSERT_EQ(200, dbfull()->GetOptions().ttl); - - options.ttl = 500; - options.periodic_compaction_seconds = 300; - Reopen(options); - ASSERT_EQ(300, dbfull()->GetOptions().ttl); -} - -TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { - Options options; - options.env = CurrentOptions().env; - options.compaction_style = kCompactionStyleFIFO; - options.write_buffer_size = 10 << 10; // 10KB - options.arena_block_size = 4096; - options.compression = kNoCompression; - options.create_if_missing = true; - options.compaction_options_fifo.allow_compaction = false; - env_->SetMockSleep(); - options.env = env_; - - // NOTE: Presumed unnecessary and removed: resetting mock time in env - - // Test dynamically changing ttl. - options.ttl = 1 * 60 * 60; // 1 hour - ASSERT_OK(TryReopen(options)); - - Random rnd(301); - for (int i = 0; i < 10; i++) { - // Generate and flush a file about 10KB. - for (int j = 0; j < 10; j++) { - ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980))); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(0), 10); - - env_->MockSleepForSeconds(61); - - // No files should be compacted as ttl is set to 1 hour. - ASSERT_EQ(dbfull()->GetOptions().ttl, 3600); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(NumTableFilesAtLevel(0), 10); - - // Set ttl to 1 minute. So all files should get deleted. - ASSERT_OK(dbfull()->SetOptions({{"ttl", "60"}})); - ASSERT_EQ(dbfull()->GetOptions().ttl, 60); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - - // NOTE: Presumed unnecessary and removed: resetting mock time in env - - // Test dynamically changing compaction_options_fifo.max_table_files_size - options.compaction_options_fifo.max_table_files_size = 500 << 10; // 00KB - options.ttl = 0; - DestroyAndReopen(options); - - for (int i = 0; i < 10; i++) { - // Generate and flush a file about 10KB. - for (int j = 0; j < 10; j++) { - ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980))); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(0), 10); - - // No files should be compacted as max_table_files_size is set to 500 KB. - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, - 500 << 10); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(NumTableFilesAtLevel(0), 10); - - // Set max_table_files_size to 12 KB. So only 1 file should remain now. - ASSERT_OK(dbfull()->SetOptions( - {{"compaction_options_fifo", "{max_table_files_size=12288;}"}})); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, - 12 << 10); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(0), 1); - - // Test dynamically changing compaction_options_fifo.allow_compaction - options.compaction_options_fifo.max_table_files_size = 500 << 10; // 500KB - options.ttl = 0; - options.compaction_options_fifo.allow_compaction = false; - options.level0_file_num_compaction_trigger = 6; - DestroyAndReopen(options); - - for (int i = 0; i < 10; i++) { - // Generate and flush a file about 10KB. - for (int j = 0; j < 10; j++) { - ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980))); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(0), 10); - - // No files should be compacted as max_table_files_size is set to 500 KB and - // allow_compaction is false - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, - false); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(NumTableFilesAtLevel(0), 10); - - // Set allow_compaction to true. So number of files should be between 1 and 5. - ASSERT_OK(dbfull()->SetOptions( - {{"compaction_options_fifo", "{allow_compaction=true;}"}})); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, - true); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_GE(NumTableFilesAtLevel(0), 1); - ASSERT_LE(NumTableFilesAtLevel(0), 5); -} - -TEST_F(DBOptionsTest, CompactionReadaheadSizeChange) { - SpecialEnv env(env_); - Options options; - options.env = &env; - - options.compaction_readahead_size = 0; - options.level0_file_num_compaction_trigger = 2; - const std::string kValue(1024, 'v'); - Reopen(options); - - ASSERT_EQ(0, dbfull()->GetDBOptions().compaction_readahead_size); - ASSERT_OK(dbfull()->SetDBOptions({{"compaction_readahead_size", "256"}})); - ASSERT_EQ(256, dbfull()->GetDBOptions().compaction_readahead_size); - for (int i = 0; i < 1024; i++) { - ASSERT_OK(Put(Key(i), kValue)); - } - ASSERT_OK(Flush()); - for (int i = 0; i < 1024 * 2; i++) { - ASSERT_OK(Put(Key(i), kValue)); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(256, env_->compaction_readahead_size_); - Close(); -} - -TEST_F(DBOptionsTest, FIFOTtlBackwardCompatible) { - Options options; - options.compaction_style = kCompactionStyleFIFO; - options.write_buffer_size = 10 << 10; // 10KB - options.create_if_missing = true; - options.env = CurrentOptions().env; - - ASSERT_OK(TryReopen(options)); - - Random rnd(301); - for (int i = 0; i < 10; i++) { - // Generate and flush a file about 10KB. - for (int j = 0; j < 10; j++) { - ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980))); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(0), 10); - - // In release 6.0, ttl was promoted from a secondary level option under - // compaction_options_fifo to a top level option under ColumnFamilyOptions. - // We still need to handle old SetOptions calls but should ignore - // ttl under compaction_options_fifo. - ASSERT_OK(dbfull()->SetOptions( - {{"compaction_options_fifo", - "{allow_compaction=true;max_table_files_size=1024;ttl=731;}"}, - {"ttl", "60"}})); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, - true); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, - 1024); - ASSERT_EQ(dbfull()->GetOptions().ttl, 60); - - // Put ttl as the first option inside compaction_options_fifo. That works as - // it doesn't overwrite any other option. - ASSERT_OK(dbfull()->SetOptions( - {{"compaction_options_fifo", - "{ttl=985;allow_compaction=true;max_table_files_size=1024;}"}, - {"ttl", "191"}})); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, - true); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, - 1024); - ASSERT_EQ(dbfull()->GetOptions().ttl, 191); -} - -TEST_F(DBOptionsTest, ChangeCompression) { - if (!Snappy_Supported() || !LZ4_Supported()) { - return; - } - Options options; - options.write_buffer_size = 10 << 10; // 10KB - options.level0_file_num_compaction_trigger = 2; - options.create_if_missing = true; - options.compression = CompressionType::kLZ4Compression; - options.bottommost_compression = CompressionType::kNoCompression; - options.bottommost_compression_opts.level = 2; - options.bottommost_compression_opts.parallel_threads = 1; - options.env = CurrentOptions().env; - - ASSERT_OK(TryReopen(options)); - - CompressionType compression_used = CompressionType::kLZ4Compression; - CompressionOptions compression_opt_used; - bool compacted = false; - SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - Compaction* c = reinterpret_cast(arg); - compression_used = c->output_compression(); - compression_opt_used = c->output_compression_opts(); - compacted = true; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put("foo", "foofoofoo")); - ASSERT_OK(Put("bar", "foofoofoo")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo", "foofoofoo")); - ASSERT_OK(Put("bar", "foofoofoo")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_TRUE(compacted); - ASSERT_EQ(CompressionType::kNoCompression, compression_used); - ASSERT_EQ(options.compression_opts.level, compression_opt_used.level); - ASSERT_EQ(options.compression_opts.parallel_threads, - compression_opt_used.parallel_threads); - - compression_used = CompressionType::kLZ4Compression; - compacted = false; - ASSERT_OK(dbfull()->SetOptions( - {{"bottommost_compression", "kSnappyCompression"}, - {"bottommost_compression_opts", "0:6:0:0:4:true"}})); - ASSERT_OK(Put("foo", "foofoofoo")); - ASSERT_OK(Put("bar", "foofoofoo")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo", "foofoofoo")); - ASSERT_OK(Put("bar", "foofoofoo")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_TRUE(compacted); - ASSERT_EQ(CompressionType::kSnappyCompression, compression_used); - ASSERT_EQ(6, compression_opt_used.level); - // Right now parallel_level is not yet allowed to be changed. - - SyncPoint::GetInstance()->DisableProcessing(); -} - - -TEST_F(DBOptionsTest, BottommostCompressionOptsWithFallbackType) { - // Verify the bottommost compression options still take effect even when the - // bottommost compression type is left at its default value. Verify for both - // automatic and manual compaction. - if (!Snappy_Supported() || !LZ4_Supported()) { - return; - } - - constexpr int kUpperCompressionLevel = 1; - constexpr int kBottommostCompressionLevel = 2; - constexpr int kNumL0Files = 2; - - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = kNumL0Files; - options.compression = CompressionType::kLZ4Compression; - options.compression_opts.level = kUpperCompressionLevel; - options.bottommost_compression_opts.level = kBottommostCompressionLevel; - options.bottommost_compression_opts.enabled = true; - Reopen(options); - - CompressionType compression_used = CompressionType::kDisableCompressionOption; - CompressionOptions compression_opt_used; - bool compacted = false; - SyncPoint::GetInstance()->SetCallBack( - "CompactionPicker::RegisterCompaction:Registered", [&](void* arg) { - Compaction* c = static_cast(arg); - compression_used = c->output_compression(); - compression_opt_used = c->output_compression_opts(); - compacted = true; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - // First, verify for automatic compaction. - for (int i = 0; i < kNumL0Files; ++i) { - ASSERT_OK(Put("foo", "foofoofoo")); - ASSERT_OK(Put("bar", "foofoofoo")); - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_TRUE(compacted); - ASSERT_EQ(CompressionType::kLZ4Compression, compression_used); - ASSERT_EQ(kBottommostCompressionLevel, compression_opt_used.level); - - // Second, verify for manual compaction. - compacted = false; - compression_used = CompressionType::kDisableCompressionOption; - compression_opt_used = CompressionOptions(); - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; - ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - - ASSERT_TRUE(compacted); - ASSERT_EQ(CompressionType::kLZ4Compression, compression_used); - ASSERT_EQ(kBottommostCompressionLevel, compression_opt_used.level); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc deleted file mode 100644 index 074f4e9a8..000000000 --- a/db/db_properties_test.cc +++ /dev/null @@ -1,2376 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include - -#include -#include - -#include "db/db_test_util.h" -#include "db/write_stall_stats.h" -#include "options/cf_options.h" -#include "port/stack_trace.h" -#include "rocksdb/listener.h" -#include "rocksdb/options.h" -#include "rocksdb/perf_context.h" -#include "rocksdb/perf_level.h" -#include "rocksdb/table.h" -#include "table/block_based/block.h" -#include "table/format.h" -#include "table/meta_blocks.h" -#include "table/table_builder.h" -#include "test_util/mock_time_env.h" -#include "util/random.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -class DBPropertiesTest : public DBTestBase { - public: - DBPropertiesTest() - : DBTestBase("db_properties_test", /*env_do_fsync=*/false) {} - - void AssertDbStats(const std::map& db_stats, - double expected_uptime, int expected_user_bytes_written, - int expected_wal_bytes_written, - int expected_user_writes_by_self, - int expected_user_writes_with_wal) { - ASSERT_EQ(std::to_string(expected_uptime), db_stats.at("db.uptime")); - ASSERT_EQ(std::to_string(expected_wal_bytes_written), - db_stats.at("db.wal_bytes_written")); - ASSERT_EQ("0", db_stats.at("db.wal_syncs")); - ASSERT_EQ(std::to_string(expected_user_bytes_written), - db_stats.at("db.user_bytes_written")); - ASSERT_EQ("0", db_stats.at("db.user_writes_by_other")); - ASSERT_EQ(std::to_string(expected_user_writes_by_self), - db_stats.at("db.user_writes_by_self")); - ASSERT_EQ(std::to_string(expected_user_writes_with_wal), - db_stats.at("db.user_writes_with_wal")); - ASSERT_EQ("0", db_stats.at("db.user_write_stall_micros")); - } -}; - -TEST_F(DBPropertiesTest, Empty) { - do { - Options options; - options.env = env_; - options.write_buffer_size = 100000; // Small write buffer - options.allow_concurrent_memtable_write = false; - options = CurrentOptions(options); - CreateAndReopenWithCF({"pikachu"}, options); - - std::string num; - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &num)); - ASSERT_EQ("0", num); - - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &num)); - ASSERT_EQ("1", num); - - // Block sync calls - env_->delay_sstable_sync_.store(true, std::memory_order_release); - ASSERT_OK(Put(1, "k1", std::string(100000, 'x'))); // Fill memtable - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &num)); - ASSERT_EQ("2", num); - - ASSERT_OK(Put(1, "k2", std::string(100000, 'y'))); // Trigger compaction - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &num)); - ASSERT_EQ("1", num); - - ASSERT_EQ("v1", Get(1, "foo")); - // Release sync calls - env_->delay_sstable_sync_.store(false, std::memory_order_release); - - ASSERT_OK(db_->DisableFileDeletions()); - ASSERT_TRUE( - dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); - ASSERT_EQ("0", num); - - ASSERT_OK(db_->DisableFileDeletions()); - ASSERT_TRUE( - dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); - ASSERT_EQ("0", num); - - ASSERT_OK(db_->DisableFileDeletions()); - ASSERT_TRUE( - dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); - ASSERT_EQ("0", num); - - ASSERT_OK(db_->EnableFileDeletions(false)); - ASSERT_TRUE( - dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); - ASSERT_EQ("0", num); - - ASSERT_OK(db_->EnableFileDeletions()); - ASSERT_TRUE( - dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); - ASSERT_EQ("1", num); - } while (ChangeOptions()); -} - -TEST_F(DBPropertiesTest, CurrentVersionNumber) { - uint64_t v1, v2, v3; - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.current-super-version-number", &v1)); - ASSERT_OK(Put("12345678", "")); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.current-super-version-number", &v2)); - ASSERT_OK(Flush()); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.current-super-version-number", &v3)); - - ASSERT_EQ(v1, v2); - ASSERT_GT(v3, v2); -} - -TEST_F(DBPropertiesTest, GetAggregatedIntPropertyTest) { - const int kKeySize = 100; - const int kValueSize = 500; - const int kKeyNum = 100; - - Options options; - options.env = env_; - options.create_if_missing = true; - options.write_buffer_size = (kKeySize + kValueSize) * kKeyNum / 10; - // Make them never flush - options.min_write_buffer_number_to_merge = 1000; - options.max_write_buffer_number = 1000; - options = CurrentOptions(options); - CreateAndReopenWithCF({"one", "two", "three", "four"}, options); - - Random rnd(301); - for (auto* handle : handles_) { - for (int i = 0; i < kKeyNum; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), handle, rnd.RandomString(kKeySize), - rnd.RandomString(kValueSize))); - } - } - - uint64_t manual_sum = 0; - uint64_t api_sum = 0; - uint64_t value = 0; - for (auto* handle : handles_) { - ASSERT_TRUE( - db_->GetIntProperty(handle, DB::Properties::kSizeAllMemTables, &value)); - manual_sum += value; - } - ASSERT_TRUE(db_->GetAggregatedIntProperty(DB::Properties::kSizeAllMemTables, - &api_sum)); - ASSERT_GT(manual_sum, 0); - ASSERT_EQ(manual_sum, api_sum); - - ASSERT_FALSE(db_->GetAggregatedIntProperty(DB::Properties::kDBStats, &value)); - - uint64_t before_flush_trm; - uint64_t after_flush_trm; - for (auto* handle : handles_) { - ASSERT_TRUE(db_->GetAggregatedIntProperty( - DB::Properties::kEstimateTableReadersMem, &before_flush_trm)); - - // Issue flush and expect larger memory usage of table readers. - ASSERT_OK(db_->Flush(FlushOptions(), handle)); - - ASSERT_TRUE(db_->GetAggregatedIntProperty( - DB::Properties::kEstimateTableReadersMem, &after_flush_trm)); - ASSERT_GT(after_flush_trm, before_flush_trm); - } -} - -namespace { -void ResetTableProperties(TableProperties* tp) { - tp->data_size = 0; - tp->index_size = 0; - tp->filter_size = 0; - tp->raw_key_size = 0; - tp->raw_value_size = 0; - tp->num_data_blocks = 0; - tp->num_entries = 0; - tp->num_deletions = 0; - tp->num_merge_operands = 0; - tp->num_range_deletions = 0; -} - -void ParseTablePropertiesString(std::string tp_string, TableProperties* tp) { - double dummy_double; - std::replace(tp_string.begin(), tp_string.end(), ';', ' '); - std::replace(tp_string.begin(), tp_string.end(), '=', ' '); - ResetTableProperties(tp); - sscanf(tp_string.c_str(), - "# data blocks %" SCNu64 " # entries %" SCNu64 " # deletions %" SCNu64 - " # merge operands %" SCNu64 " # range deletions %" SCNu64 - " raw key size %" SCNu64 - " raw average key size %lf " - " raw value size %" SCNu64 - " raw average value size %lf " - " data block size %" SCNu64 " index block size (user-key? %" SCNu64 - ", delta-value? %" SCNu64 ") %" SCNu64 " filter block size %" SCNu64, - &tp->num_data_blocks, &tp->num_entries, &tp->num_deletions, - &tp->num_merge_operands, &tp->num_range_deletions, &tp->raw_key_size, - &dummy_double, &tp->raw_value_size, &dummy_double, &tp->data_size, - &tp->index_key_is_user_key, &tp->index_value_is_delta_encoded, - &tp->index_size, &tp->filter_size); -} - -void VerifySimilar(uint64_t a, uint64_t b, double bias) { - ASSERT_EQ(a == 0U, b == 0U); - if (a == 0) { - return; - } - double dbl_a = static_cast(a); - double dbl_b = static_cast(b); - if (dbl_a > dbl_b) { - ASSERT_LT(static_cast(dbl_a - dbl_b) / (dbl_a + dbl_b), bias); - } else { - ASSERT_LT(static_cast(dbl_b - dbl_a) / (dbl_a + dbl_b), bias); - } -} - -void VerifyTableProperties( - const TableProperties& base_tp, const TableProperties& new_tp, - double filter_size_bias = CACHE_LINE_SIZE >= 256 ? 0.18 : 0.1, - double index_size_bias = 0.1, double data_size_bias = 0.1, - double num_data_blocks_bias = 0.05) { - VerifySimilar(base_tp.data_size, new_tp.data_size, data_size_bias); - VerifySimilar(base_tp.index_size, new_tp.index_size, index_size_bias); - VerifySimilar(base_tp.filter_size, new_tp.filter_size, filter_size_bias); - VerifySimilar(base_tp.num_data_blocks, new_tp.num_data_blocks, - num_data_blocks_bias); - - ASSERT_EQ(base_tp.raw_key_size, new_tp.raw_key_size); - ASSERT_EQ(base_tp.raw_value_size, new_tp.raw_value_size); - ASSERT_EQ(base_tp.num_entries, new_tp.num_entries); - ASSERT_EQ(base_tp.num_deletions, new_tp.num_deletions); - ASSERT_EQ(base_tp.num_range_deletions, new_tp.num_range_deletions); - - // Merge operands may become Puts, so we only have an upper bound the exact - // number of merge operands. - ASSERT_GE(base_tp.num_merge_operands, new_tp.num_merge_operands); -} - -void GetExpectedTableProperties( - TableProperties* expected_tp, const int kKeySize, const int kValueSize, - const int kPutsPerTable, const int kDeletionsPerTable, - const int kMergeOperandsPerTable, const int kRangeDeletionsPerTable, - const int kTableCount, const int kBloomBitsPerKey, const size_t kBlockSize, - const bool index_key_is_user_key, const bool value_delta_encoding) { - const int kKeysPerTable = - kPutsPerTable + kDeletionsPerTable + kMergeOperandsPerTable; - const int kPutCount = kTableCount * kPutsPerTable; - const int kDeletionCount = kTableCount * kDeletionsPerTable; - const int kMergeCount = kTableCount * kMergeOperandsPerTable; - const int kRangeDeletionCount = kTableCount * kRangeDeletionsPerTable; - const int kKeyCount = - kPutCount + kDeletionCount + kMergeCount + kRangeDeletionCount; - const int kAvgSuccessorSize = kKeySize / 5; - const int kEncodingSavePerKey = kKeySize / 4; - expected_tp->raw_key_size = kKeyCount * (kKeySize + 8); - expected_tp->raw_value_size = - (kPutCount + kMergeCount + kRangeDeletionCount) * kValueSize; - expected_tp->num_entries = kKeyCount; - expected_tp->num_deletions = kDeletionCount + kRangeDeletionCount; - expected_tp->num_merge_operands = kMergeCount; - expected_tp->num_range_deletions = kRangeDeletionCount; - expected_tp->num_data_blocks = - kTableCount * - (kKeysPerTable * (kKeySize - kEncodingSavePerKey + kValueSize)) / - kBlockSize; - expected_tp->data_size = - kTableCount * (kKeysPerTable * (kKeySize + 8 + kValueSize)); - expected_tp->index_size = - expected_tp->num_data_blocks * - (kAvgSuccessorSize + (index_key_is_user_key ? 0 : 8) - - // discount 1 byte as value size is not encoded in value delta encoding - (value_delta_encoding ? 1 : 0)); - expected_tp->filter_size = - kTableCount * ((kKeysPerTable * kBloomBitsPerKey + 7) / 8 + - /*average-ish overhead*/ CACHE_LINE_SIZE / 2); -} -} // anonymous namespace - -TEST_F(DBPropertiesTest, ValidatePropertyInfo) { - for (const auto& ppt_name_and_info : InternalStats::ppt_name_to_info) { - // If C++ gets a std::string_literal, this would be better to check at - // compile-time using static_assert. - ASSERT_TRUE(ppt_name_and_info.first.empty() || - !isdigit(ppt_name_and_info.first.back())); - - int count = 0; - count += (ppt_name_and_info.second.handle_string == nullptr) ? 0 : 1; - count += (ppt_name_and_info.second.handle_int == nullptr) ? 0 : 1; - count += (ppt_name_and_info.second.handle_string_dbimpl == nullptr) ? 0 : 1; - ASSERT_TRUE(count == 1); - } -} - -TEST_F(DBPropertiesTest, ValidateSampleNumber) { - // When "max_open_files" is -1, we read all the files for - // "rocksdb.estimate-num-keys" computation, which is the ground truth. - // Otherwise, we sample 20 newest files to make an estimation. - // Formula: lastest_20_files_active_key_ratio * total_files - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.level0_stop_writes_trigger = 1000; - DestroyAndReopen(options); - int key = 0; - for (int files = 20; files >= 10; files -= 10) { - for (int i = 0; i < files; i++) { - int rows = files / 10; - for (int j = 0; j < rows; j++) { - ASSERT_OK(db_->Put(WriteOptions(), std::to_string(++key), "foo")); - } - ASSERT_OK(db_->Flush(FlushOptions())); - } - } - std::string num; - Reopen(options); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); - ASSERT_EQ("45", num); - options.max_open_files = -1; - Reopen(options); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); - ASSERT_EQ("50", num); -} - -TEST_F(DBPropertiesTest, AggregatedTableProperties) { - for (int kTableCount = 40; kTableCount <= 100; kTableCount += 30) { - const int kDeletionsPerTable = 0; - const int kMergeOperandsPerTable = 15; - const int kRangeDeletionsPerTable = 5; - const int kPutsPerTable = 100; - const int kKeySize = 80; - const int kValueSize = 200; - const int kBloomBitsPerKey = 20; - - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 8; - options.compression = kNoCompression; - options.create_if_missing = true; - options.merge_operator.reset(new TestPutOperator()); - - BlockBasedTableOptions table_options; - table_options.filter_policy.reset( - NewBloomFilterPolicy(kBloomBitsPerKey, false)); - table_options.block_size = 1024; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - DestroyAndReopen(options); - - // Hold open a snapshot to prevent range tombstones from being compacted - // away. - ManagedSnapshot snapshot(db_); - - Random rnd(5632); - for (int table = 1; table <= kTableCount; ++table) { - for (int i = 0; i < kPutsPerTable; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), rnd.RandomString(kKeySize), - rnd.RandomString(kValueSize))); - } - for (int i = 0; i < kDeletionsPerTable; i++) { - ASSERT_OK(db_->Delete(WriteOptions(), rnd.RandomString(kKeySize))); - } - for (int i = 0; i < kMergeOperandsPerTable; i++) { - ASSERT_OK(db_->Merge(WriteOptions(), rnd.RandomString(kKeySize), - rnd.RandomString(kValueSize))); - } - for (int i = 0; i < kRangeDeletionsPerTable; i++) { - std::string start = rnd.RandomString(kKeySize); - std::string end = start; - end.resize(kValueSize); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - start, end)); - } - ASSERT_OK(db_->Flush(FlushOptions())); - } - std::string property; - db_->GetProperty(DB::Properties::kAggregatedTableProperties, &property); - TableProperties output_tp; - ParseTablePropertiesString(property, &output_tp); - bool index_key_is_user_key = output_tp.index_key_is_user_key > 0; - bool value_is_delta_encoded = output_tp.index_value_is_delta_encoded > 0; - - TableProperties expected_tp; - GetExpectedTableProperties( - &expected_tp, kKeySize, kValueSize, kPutsPerTable, kDeletionsPerTable, - kMergeOperandsPerTable, kRangeDeletionsPerTable, kTableCount, - kBloomBitsPerKey, table_options.block_size, index_key_is_user_key, - value_is_delta_encoded); - - VerifyTableProperties(expected_tp, output_tp); - } -} - -TEST_F(DBPropertiesTest, ReadLatencyHistogramByLevel) { - Options options = CurrentOptions(); - options.write_buffer_size = 110 << 10; - options.level0_file_num_compaction_trigger = 6; - options.num_levels = 4; - options.compression = kNoCompression; - options.max_bytes_for_level_base = 4500 << 10; - options.target_file_size_base = 98 << 10; - options.max_write_buffer_number = 2; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.max_open_files = 11; // Make sure no proloading of table readers - - // RocksDB sanitize max open files to at least 20. Modify it back. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { - int* max_open_files = static_cast(arg); - *max_open_files = 11; - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - BlockBasedTableOptions table_options; - table_options.no_block_cache = true; - - CreateAndReopenWithCF({"pikachu"}, options); - int key_index = 0; - Random rnd(301); - for (int num = 0; num < 8; num++) { - ASSERT_OK(Put("foo", "bar")); - GenerateNewFile(&rnd, &key_index); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - std::string prop; - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); - - // Get() after flushes, See latency histogram tracked. - for (int key = 0; key < key_index; key++) { - Get(Key(key)); - } - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.cfstats", &prop)); - ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); - ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); - - // Reopen and issue Get(). See thee latency tracked - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - for (int key = 0; key < key_index; key++) { - Get(Key(key)); - } - - // Test for getting immutable_db_options_.statistics - ASSERT_TRUE(dbfull()->GetProperty(dbfull()->DefaultColumnFamily(), - "rocksdb.options-statistics", &prop)); - ASSERT_NE(std::string::npos, prop.find("rocksdb.block.cache.miss")); - ASSERT_EQ(std::string::npos, prop.find("rocksdb.db.f.micros")); - - ASSERT_TRUE(dbfull()->GetProperty(dbfull()->DefaultColumnFamily(), - "rocksdb.cf-file-histogram", &prop)); - ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); - ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); - - // Reopen and issue iterating. See thee latency tracked - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.cf-file-histogram", &prop)); - ASSERT_EQ(std::string::npos, prop.find("** Level 0 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 1 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); - { - std::unique_ptr iter(db_->NewIterator(ReadOptions())); - for (iter->Seek(Key(0)); iter->Valid(); iter->Next()) { - } - ASSERT_OK(iter->status()); - } - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.cf-file-histogram", &prop)); - ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); - ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); - - // CF 1 should show no histogram. - ASSERT_TRUE( - dbfull()->GetProperty(handles_[1], "rocksdb.cf-file-histogram", &prop)); - ASSERT_EQ(std::string::npos, prop.find("** Level 0 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 1 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); - // put something and read it back , CF 1 should show histogram. - ASSERT_OK(Put(1, "foo", "bar")); - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("bar", Get(1, "foo")); - - ASSERT_TRUE( - dbfull()->GetProperty(handles_[1], "rocksdb.cf-file-histogram", &prop)); - ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 1 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); - - // options.max_open_files preloads table readers. - options.max_open_files = -1; - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_TRUE(dbfull()->GetProperty(dbfull()->DefaultColumnFamily(), - "rocksdb.cf-file-histogram", &prop)); - ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); - ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); - for (int key = 0; key < key_index; key++) { - Get(Key(key)); - } - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.cfstats", &prop)); - ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); - ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); - - // Clear internal stats - ASSERT_OK(dbfull()->ResetStats()); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.cfstats", &prop)); - ASSERT_EQ(std::string::npos, prop.find("** Level 0 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 1 read latency histogram")); - ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); -} - -TEST_F(DBPropertiesTest, AggregatedTablePropertiesAtLevel) { - const int kTableCount = 100; - const int kDeletionsPerTable = 0; - const int kMergeOperandsPerTable = 2; - const int kRangeDeletionsPerTable = 2; - const int kPutsPerTable = 10; - const int kKeySize = 50; - const int kValueSize = 400; - const int kMaxLevel = 7; - const int kBloomBitsPerKey = 20; - Random rnd(301); - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 8; - options.compression = kNoCompression; - options.create_if_missing = true; - options.level0_file_num_compaction_trigger = 2; - options.target_file_size_base = 8192; - options.max_bytes_for_level_base = 10000; - options.max_bytes_for_level_multiplier = 2; - // This ensures there no compaction happening when we call GetProperty(). - options.disable_auto_compactions = true; - options.merge_operator.reset(new TestPutOperator()); - - BlockBasedTableOptions table_options; - table_options.filter_policy.reset( - NewBloomFilterPolicy(kBloomBitsPerKey, false)); - table_options.block_size = 1024; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - DestroyAndReopen(options); - - // Hold open a snapshot to prevent range tombstones from being compacted away. - ManagedSnapshot snapshot(db_); - - std::string level_tp_strings[kMaxLevel]; - std::string tp_string; - TableProperties level_tps[kMaxLevel]; - TableProperties tp, sum_tp, expected_tp; - for (int table = 1; table <= kTableCount; ++table) { - for (int i = 0; i < kPutsPerTable; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), rnd.RandomString(kKeySize), - rnd.RandomString(kValueSize))); - } - for (int i = 0; i < kDeletionsPerTable; i++) { - ASSERT_OK(db_->Delete(WriteOptions(), rnd.RandomString(kKeySize))); - } - for (int i = 0; i < kMergeOperandsPerTable; i++) { - ASSERT_OK(db_->Merge(WriteOptions(), rnd.RandomString(kKeySize), - rnd.RandomString(kValueSize))); - } - for (int i = 0; i < kRangeDeletionsPerTable; i++) { - std::string start = rnd.RandomString(kKeySize); - std::string end = start; - end.resize(kValueSize); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - start, end)); - } - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ResetTableProperties(&sum_tp); - for (int level = 0; level < kMaxLevel; ++level) { - db_->GetProperty(DB::Properties::kAggregatedTablePropertiesAtLevel + - std::to_string(level), - &level_tp_strings[level]); - ParseTablePropertiesString(level_tp_strings[level], &level_tps[level]); - sum_tp.data_size += level_tps[level].data_size; - sum_tp.index_size += level_tps[level].index_size; - sum_tp.filter_size += level_tps[level].filter_size; - sum_tp.raw_key_size += level_tps[level].raw_key_size; - sum_tp.raw_value_size += level_tps[level].raw_value_size; - sum_tp.num_data_blocks += level_tps[level].num_data_blocks; - sum_tp.num_entries += level_tps[level].num_entries; - sum_tp.num_deletions += level_tps[level].num_deletions; - sum_tp.num_merge_operands += level_tps[level].num_merge_operands; - sum_tp.num_range_deletions += level_tps[level].num_range_deletions; - } - db_->GetProperty(DB::Properties::kAggregatedTableProperties, &tp_string); - ParseTablePropertiesString(tp_string, &tp); - bool index_key_is_user_key = tp.index_key_is_user_key > 0; - bool value_is_delta_encoded = tp.index_value_is_delta_encoded > 0; - ASSERT_EQ(sum_tp.data_size, tp.data_size); - ASSERT_EQ(sum_tp.index_size, tp.index_size); - ASSERT_EQ(sum_tp.filter_size, tp.filter_size); - ASSERT_EQ(sum_tp.raw_key_size, tp.raw_key_size); - ASSERT_EQ(sum_tp.raw_value_size, tp.raw_value_size); - ASSERT_EQ(sum_tp.num_data_blocks, tp.num_data_blocks); - ASSERT_EQ(sum_tp.num_entries, tp.num_entries); - ASSERT_EQ(sum_tp.num_deletions, tp.num_deletions); - ASSERT_EQ(sum_tp.num_merge_operands, tp.num_merge_operands); - ASSERT_EQ(sum_tp.num_range_deletions, tp.num_range_deletions); - if (table > 3) { - GetExpectedTableProperties( - &expected_tp, kKeySize, kValueSize, kPutsPerTable, kDeletionsPerTable, - kMergeOperandsPerTable, kRangeDeletionsPerTable, table, - kBloomBitsPerKey, table_options.block_size, index_key_is_user_key, - value_is_delta_encoded); - // Gives larger bias here as index block size, filter block size, - // and data block size become much harder to estimate in this test. - VerifyTableProperties(expected_tp, tp, CACHE_LINE_SIZE >= 256 ? 0.6 : 0.5, - 0.5, 0.5, 0.25); - } - } -} - -TEST_F(DBPropertiesTest, NumImmutableMemTable) { - do { - Options options = CurrentOptions(); - WriteOptions writeOpt = WriteOptions(); - writeOpt.disableWAL = true; - options.max_write_buffer_number = 4; - options.min_write_buffer_number_to_merge = 3; - options.write_buffer_size = 1000000; - options.max_write_buffer_size_to_maintain = - 5 * static_cast(options.write_buffer_size); - CreateAndReopenWithCF({"pikachu"}, options); - - std::string big_value(1000000 * 2, 'x'); - std::string num; - uint64_t value; - SetPerfLevel(kEnableTime); - ASSERT_TRUE(GetPerfLevel() == kEnableTime); - - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k1", big_value)); - ASSERT_TRUE(dbfull()->GetProperty(handles_[1], - "rocksdb.num-immutable-mem-table", &num)); - ASSERT_EQ(num, "0"); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], DB::Properties::kNumImmutableMemTableFlushed, &num)); - ASSERT_EQ(num, "0"); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &num)); - ASSERT_EQ(num, "1"); - get_perf_context()->Reset(); - Get(1, "k1"); - ASSERT_EQ(1, static_cast(get_perf_context()->get_from_memtable_count)); - - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k2", big_value)); - ASSERT_TRUE(dbfull()->GetProperty(handles_[1], - "rocksdb.num-immutable-mem-table", &num)); - ASSERT_EQ(num, "1"); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &num)); - ASSERT_EQ(num, "1"); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-imm-mem-tables", &num)); - ASSERT_EQ(num, "1"); - - get_perf_context()->Reset(); - Get(1, "k1"); - ASSERT_EQ(2, static_cast(get_perf_context()->get_from_memtable_count)); - get_perf_context()->Reset(); - Get(1, "k2"); - ASSERT_EQ(1, static_cast(get_perf_context()->get_from_memtable_count)); - - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k3", big_value)); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.cur-size-active-mem-table", &num)); - ASSERT_TRUE(dbfull()->GetProperty(handles_[1], - "rocksdb.num-immutable-mem-table", &num)); - ASSERT_EQ(num, "2"); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &num)); - ASSERT_EQ(num, "1"); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], "rocksdb.num-entries-imm-mem-tables", &num)); - ASSERT_EQ(num, "2"); - get_perf_context()->Reset(); - Get(1, "k2"); - ASSERT_EQ(2, static_cast(get_perf_context()->get_from_memtable_count)); - get_perf_context()->Reset(); - Get(1, "k3"); - ASSERT_EQ(1, static_cast(get_perf_context()->get_from_memtable_count)); - get_perf_context()->Reset(); - Get(1, "k1"); - ASSERT_EQ(3, static_cast(get_perf_context()->get_from_memtable_count)); - - ASSERT_OK(Flush(1)); - ASSERT_TRUE(dbfull()->GetProperty(handles_[1], - "rocksdb.num-immutable-mem-table", &num)); - ASSERT_EQ(num, "0"); - ASSERT_TRUE(dbfull()->GetProperty( - handles_[1], DB::Properties::kNumImmutableMemTableFlushed, &num)); - ASSERT_EQ(num, "3"); - ASSERT_TRUE(dbfull()->GetIntProperty( - handles_[1], "rocksdb.cur-size-active-mem-table", &value)); - // "192" is the size of the metadata of two empty skiplists, this would - // break if we change the default skiplist implementation - ASSERT_GE(value, 192); - - uint64_t int_num; - uint64_t base_total_size; - ASSERT_TRUE(dbfull()->GetIntProperty( - handles_[1], "rocksdb.estimate-num-keys", &base_total_size)); - - ASSERT_OK(dbfull()->Delete(writeOpt, handles_[1], "k2")); - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k3", "")); - ASSERT_OK(dbfull()->Delete(writeOpt, handles_[1], "k3")); - ASSERT_TRUE(dbfull()->GetIntProperty( - handles_[1], "rocksdb.num-deletes-active-mem-table", &int_num)); - ASSERT_EQ(int_num, 2U); - ASSERT_TRUE(dbfull()->GetIntProperty( - handles_[1], "rocksdb.num-entries-active-mem-table", &int_num)); - ASSERT_EQ(int_num, 3U); - - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k2", big_value)); - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k2", big_value)); - ASSERT_TRUE(dbfull()->GetIntProperty( - handles_[1], "rocksdb.num-entries-imm-mem-tables", &int_num)); - ASSERT_EQ(int_num, 4U); - ASSERT_TRUE(dbfull()->GetIntProperty( - handles_[1], "rocksdb.num-deletes-imm-mem-tables", &int_num)); - ASSERT_EQ(int_num, 2U); - - ASSERT_TRUE(dbfull()->GetIntProperty( - handles_[1], "rocksdb.estimate-num-keys", &int_num)); - ASSERT_EQ(int_num, base_total_size + 1); - - SetPerfLevel(kDisable); - ASSERT_TRUE(GetPerfLevel() == kDisable); - } while (ChangeCompactOptions()); -} - -// TODO(techdept) : Disabled flaky test #12863555 -TEST_F(DBPropertiesTest, DISABLED_GetProperty) { - // Set sizes to both background thread pool to be 1 and block them. - env_->SetBackgroundThreads(1, Env::HIGH); - env_->SetBackgroundThreads(1, Env::LOW); - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - test::SleepingBackgroundTask sleeping_task_high; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - &sleeping_task_high, Env::Priority::HIGH); - - Options options = CurrentOptions(); - WriteOptions writeOpt = WriteOptions(); - writeOpt.disableWAL = true; - options.compaction_style = kCompactionStyleUniversal; - options.level0_file_num_compaction_trigger = 1; - options.compaction_options_universal.size_ratio = 50; - options.max_background_compactions = 1; - options.max_background_flushes = 1; - options.max_write_buffer_number = 10; - options.min_write_buffer_number_to_merge = 1; - options.max_write_buffer_size_to_maintain = 0; - options.write_buffer_size = 1000000; - Reopen(options); - - std::string big_value(1000000 * 2, 'x'); - std::string num; - uint64_t int_num; - SetPerfLevel(kEnableTime); - - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); - ASSERT_EQ(int_num, 0U); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.estimate-live-data-size", &int_num)); - ASSERT_EQ(int_num, 0U); - - ASSERT_OK(dbfull()->Put(writeOpt, "k1", big_value)); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); - ASSERT_EQ(num, "0"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num)); - ASSERT_EQ(num, "0"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); - ASSERT_EQ(num, "0"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); - ASSERT_EQ(num, "1"); - get_perf_context()->Reset(); - - ASSERT_OK(dbfull()->Put(writeOpt, "k2", big_value)); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); - ASSERT_EQ(num, "1"); - ASSERT_OK(dbfull()->Delete(writeOpt, "k-non-existing")); - ASSERT_OK(dbfull()->Put(writeOpt, "k3", big_value)); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); - ASSERT_EQ(num, "2"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num)); - ASSERT_EQ(num, "1"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); - ASSERT_EQ(num, "0"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); - ASSERT_EQ(num, "2"); - // Verify the same set of properties through GetIntProperty - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.num-immutable-mem-table", &int_num)); - ASSERT_EQ(int_num, 2U); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.mem-table-flush-pending", &int_num)); - ASSERT_EQ(int_num, 1U); - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.compaction-pending", &int_num)); - ASSERT_EQ(int_num, 0U); - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.estimate-num-keys", &int_num)); - ASSERT_EQ(int_num, 2U); - - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); - ASSERT_EQ(int_num, 0U); - - sleeping_task_high.WakeUp(); - sleeping_task_high.WaitUntilDone(); - dbfull()->TEST_WaitForFlushMemTable(); - - ASSERT_OK(dbfull()->Put(writeOpt, "k4", big_value)); - ASSERT_OK(dbfull()->Put(writeOpt, "k5", big_value)); - dbfull()->TEST_WaitForFlushMemTable(); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num)); - ASSERT_EQ(num, "0"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); - ASSERT_EQ(num, "1"); - ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); - ASSERT_EQ(num, "4"); - - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); - ASSERT_GT(int_num, 0U); - - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); - - // Wait for compaction to be done. This is important because otherwise RocksDB - // might schedule a compaction when reopening the database, failing assertion - // (A) as a result. - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - options.max_open_files = 10; - Reopen(options); - // After reopening, no table reader is loaded, so no memory for table readers - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); - ASSERT_EQ(int_num, 0U); // (A) - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.estimate-num-keys", &int_num)); - ASSERT_GT(int_num, 0U); - - // After reading a key, at least one table reader is loaded. - Get("k5"); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); - ASSERT_GT(int_num, 0U); - - // Test rocksdb.num-live-versions - { - options.level0_file_num_compaction_trigger = 20; - Reopen(options); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); - ASSERT_EQ(int_num, 1U); - - // Use an iterator to hold current version - std::unique_ptr iter1(dbfull()->NewIterator(ReadOptions())); - - ASSERT_OK(dbfull()->Put(writeOpt, "k6", big_value)); - ASSERT_OK(Flush()); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); - ASSERT_EQ(int_num, 2U); - - // Use an iterator to hold current version - std::unique_ptr iter2(dbfull()->NewIterator(ReadOptions())); - - ASSERT_OK(dbfull()->Put(writeOpt, "k7", big_value)); - ASSERT_OK(Flush()); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); - ASSERT_EQ(int_num, 3U); - - iter2.reset(); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); - ASSERT_EQ(int_num, 2U); - - iter1.reset(); - ASSERT_TRUE( - dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); - ASSERT_EQ(int_num, 1U); - } -} - -TEST_F(DBPropertiesTest, ApproximateMemoryUsage) { - const int kNumRounds = 10; - // TODO(noetzli) kFlushesPerRound does not really correlate with how many - // flushes happen. - const int kFlushesPerRound = 10; - const int kWritesPerFlush = 10; - const int kKeySize = 100; - const int kValueSize = 1000; - Options options; - options.write_buffer_size = 1000; // small write buffer - options.min_write_buffer_number_to_merge = 4; - options.compression = kNoCompression; - options.create_if_missing = true; - options = CurrentOptions(options); - DestroyAndReopen(options); - - Random rnd(301); - - std::vector iters; - - uint64_t active_mem; - uint64_t unflushed_mem; - uint64_t all_mem; - uint64_t prev_all_mem; - - // Phase 0. The verify the initial value of all these properties are the same - // as we have no mem-tables. - dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); - dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); - dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); - ASSERT_EQ(all_mem, active_mem); - ASSERT_EQ(all_mem, unflushed_mem); - - // Phase 1. Simply issue Put() and expect "cur-size-all-mem-tables" equals to - // "size-all-mem-tables" - for (int r = 0; r < kNumRounds; ++r) { - for (int f = 0; f < kFlushesPerRound; ++f) { - for (int w = 0; w < kWritesPerFlush; ++w) { - ASSERT_OK( - Put(rnd.RandomString(kKeySize), rnd.RandomString(kValueSize))); - } - } - // Make sure that there is no flush between getting the two properties. - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); - dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); - // in no iterator case, these two number should be the same. - ASSERT_EQ(unflushed_mem, all_mem); - } - prev_all_mem = all_mem; - - // Phase 2. Keep issuing Put() but also create new iterators. This time we - // expect "size-all-mem-tables" > "cur-size-all-mem-tables". - for (int r = 0; r < kNumRounds; ++r) { - iters.push_back(db_->NewIterator(ReadOptions())); - for (int f = 0; f < kFlushesPerRound; ++f) { - for (int w = 0; w < kWritesPerFlush; ++w) { - ASSERT_OK( - Put(rnd.RandomString(kKeySize), rnd.RandomString(kValueSize))); - } - } - // Force flush to prevent flush from happening between getting the - // properties or after getting the properties and before the new round. - ASSERT_OK(Flush()); - - // In the second round, add iterators. - dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); - dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); - dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); - ASSERT_GT(all_mem, active_mem); - ASSERT_GT(all_mem, unflushed_mem); - ASSERT_GT(all_mem, prev_all_mem); - prev_all_mem = all_mem; - } - - // Phase 3. Delete iterators and expect "size-all-mem-tables" shrinks - // whenever we release an iterator. - for (auto* iter : iters) { - ASSERT_OK(iter->status()); - delete iter; - dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); - // Expect the size shrinking - ASSERT_LT(all_mem, prev_all_mem); - prev_all_mem = all_mem; - } - - // Expect all these three counters to be the same. - dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); - dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); - dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); - ASSERT_EQ(active_mem, unflushed_mem); - ASSERT_EQ(unflushed_mem, all_mem); - - // Phase 5. Reopen, and expect all these three counters to be the same again. - Reopen(options); - dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); - dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); - dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); - ASSERT_EQ(active_mem, unflushed_mem); - ASSERT_EQ(unflushed_mem, all_mem); -} - -TEST_F(DBPropertiesTest, EstimatePendingCompBytes) { - // Set sizes to both background thread pool to be 1 and block them. - env_->SetBackgroundThreads(1, Env::HIGH); - env_->SetBackgroundThreads(1, Env::LOW); - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - - Options options = CurrentOptions(); - WriteOptions writeOpt = WriteOptions(); - writeOpt.disableWAL = true; - options.compaction_style = kCompactionStyleLevel; - options.level0_file_num_compaction_trigger = 2; - options.max_background_compactions = 1; - options.max_background_flushes = 1; - options.max_write_buffer_number = 10; - options.min_write_buffer_number_to_merge = 1; - options.max_write_buffer_size_to_maintain = 0; - options.write_buffer_size = 1000000; - Reopen(options); - - std::string big_value(1000000 * 2, 'x'); - std::string num; - uint64_t int_num; - - ASSERT_OK(dbfull()->Put(writeOpt, "k1", big_value)); - ASSERT_OK(Flush()); - ASSERT_TRUE(dbfull()->GetIntProperty( - "rocksdb.estimate-pending-compaction-bytes", &int_num)); - ASSERT_EQ(int_num, 0U); - - ASSERT_OK(dbfull()->Put(writeOpt, "k2", big_value)); - ASSERT_OK(Flush()); - ASSERT_TRUE(dbfull()->GetIntProperty( - "rocksdb.estimate-pending-compaction-bytes", &int_num)); - ASSERT_GT(int_num, 0U); - - ASSERT_OK(dbfull()->Put(writeOpt, "k3", big_value)); - ASSERT_OK(Flush()); - ASSERT_TRUE(dbfull()->GetIntProperty( - "rocksdb.estimate-pending-compaction-bytes", &int_num)); - ASSERT_GT(int_num, 0U); - - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_TRUE(dbfull()->GetIntProperty( - "rocksdb.estimate-pending-compaction-bytes", &int_num)); - ASSERT_EQ(int_num, 0U); -} - -TEST_F(DBPropertiesTest, EstimateCompressionRatio) { - if (!Snappy_Supported()) { - return; - } - const int kNumL0Files = 3; - const int kNumEntriesPerFile = 1000; - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.num_levels = 3; - Reopen(options); - - ASSERT_OK(db_->SetOptions( - {{"compression_per_level", "kNoCompression:kSnappyCompression"}})); - auto opts = db_->GetOptions(); - ASSERT_EQ(opts.compression_per_level.size(), 2); - ASSERT_EQ(opts.compression_per_level[0], kNoCompression); - ASSERT_EQ(opts.compression_per_level[1], kSnappyCompression); - - // compression ratio is -1.0 when no open files at level - ASSERT_EQ(CompressionRatioAtLevel(0), -1.0); - - const std::string kVal(100, 'a'); - for (int i = 0; i < kNumL0Files; ++i) { - for (int j = 0; j < kNumEntriesPerFile; ++j) { - // Put common data ("key") at end to prevent delta encoding from - // compressing the key effectively - std::string key = std::to_string(i) + std::to_string(j) + "key"; - ASSERT_OK(dbfull()->Put(WriteOptions(), key, kVal)); - } - ASSERT_OK(Flush()); - } - - // no compression at L0, so ratio is less than one - ASSERT_LT(CompressionRatioAtLevel(0), 1.0); - ASSERT_GT(CompressionRatioAtLevel(0), 0.0); - ASSERT_EQ(CompressionRatioAtLevel(1), -1.0); - - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr)); - - ASSERT_EQ(CompressionRatioAtLevel(0), -1.0); - // Data at L1 should be highly compressed thanks to Snappy and redundant data - // in values (ratio is 12.846 as of 4/19/2016). - ASSERT_GT(CompressionRatioAtLevel(1), 10.0); -} - - -class CountingUserTblPropCollector : public TablePropertiesCollector { - public: - const char* Name() const override { return "CountingUserTblPropCollector"; } - - Status Finish(UserCollectedProperties* properties) override { - std::string encoded; - PutVarint32(&encoded, count_); - *properties = UserCollectedProperties{ - {"CountingUserTblPropCollector", message_}, - {"Count", encoded}, - }; - return Status::OK(); - } - - Status AddUserKey(const Slice& /*user_key*/, const Slice& /*value*/, - EntryType /*type*/, SequenceNumber /*seq*/, - uint64_t /*file_size*/) override { - ++count_; - return Status::OK(); - } - - UserCollectedProperties GetReadableProperties() const override { - return UserCollectedProperties{}; - } - - private: - std::string message_ = "Rocksdb"; - uint32_t count_ = 0; -}; - -class CountingUserTblPropCollectorFactory - : public TablePropertiesCollectorFactory { - public: - explicit CountingUserTblPropCollectorFactory( - uint32_t expected_column_family_id) - : expected_column_family_id_(expected_column_family_id), - num_created_(0) {} - TablePropertiesCollector* CreateTablePropertiesCollector( - TablePropertiesCollectorFactory::Context context) override { - EXPECT_EQ(expected_column_family_id_, context.column_family_id); - num_created_++; - return new CountingUserTblPropCollector(); - } - const char* Name() const override { - return "CountingUserTblPropCollectorFactory"; - } - void set_expected_column_family_id(uint32_t v) { - expected_column_family_id_ = v; - } - uint32_t expected_column_family_id_; - uint32_t num_created_; -}; - -class CountingDeleteTabPropCollector : public TablePropertiesCollector { - public: - const char* Name() const override { return "CountingDeleteTabPropCollector"; } - - Status AddUserKey(const Slice& /*user_key*/, const Slice& /*value*/, - EntryType type, SequenceNumber /*seq*/, - uint64_t /*file_size*/) override { - if (type == kEntryDelete) { - num_deletes_++; - } - return Status::OK(); - } - - bool NeedCompact() const override { return num_deletes_ > 10; } - - UserCollectedProperties GetReadableProperties() const override { - return UserCollectedProperties{}; - } - - Status Finish(UserCollectedProperties* properties) override { - *properties = - UserCollectedProperties{{"num_delete", std::to_string(num_deletes_)}}; - return Status::OK(); - } - - private: - uint32_t num_deletes_ = 0; -}; - -class CountingDeleteTabPropCollectorFactory - : public TablePropertiesCollectorFactory { - public: - TablePropertiesCollector* CreateTablePropertiesCollector( - TablePropertiesCollectorFactory::Context /*context*/) override { - return new CountingDeleteTabPropCollector(); - } - const char* Name() const override { - return "CountingDeleteTabPropCollectorFactory"; - } -}; - -class BlockCountingTablePropertiesCollector : public TablePropertiesCollector { - public: - static const std::string kNumSampledBlocksPropertyName; - - const char* Name() const override { - return "BlockCountingTablePropertiesCollector"; - } - - Status Finish(UserCollectedProperties* properties) override { - (*properties)[kNumSampledBlocksPropertyName] = - std::to_string(num_sampled_blocks_); - return Status::OK(); - } - - Status AddUserKey(const Slice& /*user_key*/, const Slice& /*value*/, - EntryType /*type*/, SequenceNumber /*seq*/, - uint64_t /*file_size*/) override { - return Status::OK(); - } - - void BlockAdd(uint64_t /* block_uncomp_bytes */, - uint64_t block_compressed_bytes_fast, - uint64_t block_compressed_bytes_slow) override { - if (block_compressed_bytes_fast > 0 || block_compressed_bytes_slow > 0) { - num_sampled_blocks_++; - } - } - - UserCollectedProperties GetReadableProperties() const override { - return UserCollectedProperties{ - {kNumSampledBlocksPropertyName, std::to_string(num_sampled_blocks_)}, - }; - } - - private: - uint32_t num_sampled_blocks_ = 0; -}; - -const std::string - BlockCountingTablePropertiesCollector::kNumSampledBlocksPropertyName = - "NumSampledBlocks"; - -class BlockCountingTablePropertiesCollectorFactory - : public TablePropertiesCollectorFactory { - public: - const char* Name() const override { - return "BlockCountingTablePropertiesCollectorFactory"; - } - - TablePropertiesCollector* CreateTablePropertiesCollector( - TablePropertiesCollectorFactory::Context /* context */) override { - return new BlockCountingTablePropertiesCollector(); - } -}; - -TEST_F(DBPropertiesTest, GetUserDefinedTableProperties) { - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = (1 << 30); - options.table_properties_collector_factories.resize(1); - std::shared_ptr collector_factory = - std::make_shared(0); - options.table_properties_collector_factories[0] = collector_factory; - Reopen(options); - // Create 4 tables - for (int table = 0; table < 4; ++table) { - for (int i = 0; i < 10 + table; ++i) { - ASSERT_OK( - db_->Put(WriteOptions(), std::to_string(table * 100 + i), "val")); - } - ASSERT_OK(db_->Flush(FlushOptions())); - } - - TablePropertiesCollection props; - ASSERT_OK(db_->GetPropertiesOfAllTables(&props)); - ASSERT_EQ(4U, props.size()); - uint32_t sum = 0; - for (const auto& item : props) { - auto& user_collected = item.second->user_collected_properties; - ASSERT_TRUE(user_collected.find("CountingUserTblPropCollector") != - user_collected.end()); - ASSERT_EQ(user_collected.at("CountingUserTblPropCollector"), "Rocksdb"); - ASSERT_TRUE(user_collected.find("Count") != user_collected.end()); - Slice key(user_collected.at("Count")); - uint32_t count; - ASSERT_TRUE(GetVarint32(&key, &count)); - sum += count; - } - ASSERT_EQ(10u + 11u + 12u + 13u, sum); - - ASSERT_GT(collector_factory->num_created_, 0U); - collector_factory->num_created_ = 0; - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr)); - ASSERT_GT(collector_factory->num_created_, 0U); -} - -TEST_F(DBPropertiesTest, UserDefinedTablePropertiesContext) { - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 3; - options.table_properties_collector_factories.resize(1); - std::shared_ptr collector_factory = - std::make_shared(1); - options.table_properties_collector_factories[0] = collector_factory, - CreateAndReopenWithCF({"pikachu"}, options); - // Create 2 files - for (int table = 0; table < 2; ++table) { - for (int i = 0; i < 10 + table; ++i) { - ASSERT_OK(Put(1, std::to_string(table * 100 + i), "val")); - } - ASSERT_OK(Flush(1)); - } - ASSERT_GT(collector_factory->num_created_, 0U); - - collector_factory->num_created_ = 0; - // Trigger automatic compactions. - for (int table = 0; table < 3; ++table) { - for (int i = 0; i < 10 + table; ++i) { - ASSERT_OK(Put(1, std::to_string(table * 100 + i), "val")); - } - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_GT(collector_factory->num_created_, 0U); - - collector_factory->num_created_ = 0; - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1])); - ASSERT_GT(collector_factory->num_created_, 0U); - - // Come back to write to default column family - collector_factory->num_created_ = 0; - collector_factory->set_expected_column_family_id(0); // default CF - // Create 4 tables in default column family - for (int table = 0; table < 2; ++table) { - for (int i = 0; i < 10 + table; ++i) { - ASSERT_OK(Put(std::to_string(table * 100 + i), "val")); - } - ASSERT_OK(Flush()); - } - ASSERT_GT(collector_factory->num_created_, 0U); - - collector_factory->num_created_ = 0; - // Trigger automatic compactions. - for (int table = 0; table < 3; ++table) { - for (int i = 0; i < 10 + table; ++i) { - ASSERT_OK(Put(std::to_string(table * 100 + i), "val")); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_GT(collector_factory->num_created_, 0U); - - collector_factory->num_created_ = 0; - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr)); - ASSERT_GT(collector_factory->num_created_, 0U); -} - -TEST_F(DBPropertiesTest, TablePropertiesNeedCompactTest) { - Random rnd(301); - - Options options; - options.create_if_missing = true; - options.write_buffer_size = 4096; - options.max_write_buffer_number = 8; - options.level0_file_num_compaction_trigger = 2; - options.level0_slowdown_writes_trigger = 2; - options.level0_stop_writes_trigger = 4; - options.target_file_size_base = 2048; - options.max_bytes_for_level_base = 10240; - options.max_bytes_for_level_multiplier = 4; - options.soft_pending_compaction_bytes_limit = 1024 * 1024; - options.num_levels = 8; - options.env = env_; - - std::shared_ptr collector_factory = - std::make_shared(); - options.table_properties_collector_factories.resize(1); - options.table_properties_collector_factories[0] = collector_factory; - - DestroyAndReopen(options); - - const int kMaxKey = 1000; - for (int i = 0; i < kMaxKey; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(102))); - ASSERT_OK(Put(Key(kMaxKey + i), rnd.RandomString(102))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - if (NumTableFilesAtLevel(0) == 1) { - // Clear Level 0 so that when later flush a file with deletions, - // we don't trigger an organic compaction. - ASSERT_OK(Put(Key(0), "")); - ASSERT_OK(Put(Key(kMaxKey * 2), "")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - - { - int c = 0; - std::unique_ptr iter(db_->NewIterator(ReadOptions())); - iter->Seek(Key(kMaxKey - 100)); - while (iter->Valid() && iter->key().compare(Key(kMaxKey + 100)) < 0) { - iter->Next(); - ++c; - } - ASSERT_OK(iter->status()); - ASSERT_EQ(c, 200); - } - - ASSERT_OK(Delete(Key(0))); - for (int i = kMaxKey - 100; i < kMaxKey + 100; i++) { - ASSERT_OK(Delete(Key(i))); - } - ASSERT_OK(Delete(Key(kMaxKey * 2))); - - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - { - SetPerfLevel(kEnableCount); - get_perf_context()->Reset(); - int c = 0; - std::unique_ptr iter(db_->NewIterator(ReadOptions())); - iter->Seek(Key(kMaxKey - 100)); - while (iter->Valid() && iter->key().compare(Key(kMaxKey + 100)) < 0) { - iter->Next(); - } - ASSERT_OK(iter->status()); - ASSERT_EQ(c, 0); - ASSERT_LT(get_perf_context()->internal_delete_skipped_count, 30u); - ASSERT_LT(get_perf_context()->internal_key_skipped_count, 30u); - SetPerfLevel(kDisable); - } -} - -TEST_F(DBPropertiesTest, NeedCompactHintPersistentTest) { - Random rnd(301); - - Options options; - options.create_if_missing = true; - options.max_write_buffer_number = 8; - options.level0_file_num_compaction_trigger = 10; - options.level0_slowdown_writes_trigger = 10; - options.level0_stop_writes_trigger = 10; - options.disable_auto_compactions = true; - options.env = env_; - - std::shared_ptr collector_factory = - std::make_shared(); - options.table_properties_collector_factories.resize(1); - options.table_properties_collector_factories[0] = collector_factory; - - DestroyAndReopen(options); - - const int kMaxKey = 100; - for (int i = 0; i < kMaxKey; i++) { - ASSERT_OK(Put(Key(i), "")); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - for (int i = 1; i < kMaxKey - 1; i++) { - ASSERT_OK(Delete(Key(i))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ(NumTableFilesAtLevel(0), 2); - - // Restart the DB. Although number of files didn't reach - // options.level0_file_num_compaction_trigger, compaction should - // still be triggered because of the need-compaction hint. - options.disable_auto_compactions = false; - Reopen(options); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - { - SetPerfLevel(kEnableCount); - get_perf_context()->Reset(); - int c = 0; - std::unique_ptr iter(db_->NewIterator(ReadOptions())); - for (iter->Seek(Key(0)); iter->Valid(); iter->Next()) { - c++; - } - ASSERT_OK(iter->status()); - ASSERT_EQ(c, 2); - ASSERT_EQ(get_perf_context()->internal_delete_skipped_count, 0); - // We iterate every key twice. Is it a bug? - ASSERT_LE(get_perf_context()->internal_key_skipped_count, 2); - SetPerfLevel(kDisable); - } -} - -// Excluded from RocksDB lite tests due to `GetPropertiesOfAllTables()` usage. -TEST_F(DBPropertiesTest, BlockAddForCompressionSampling) { - // Sampled compression requires at least one of the following four types. - if (!Snappy_Supported() && !Zlib_Supported() && !LZ4_Supported() && - !ZSTD_Supported()) { - return; - } - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.table_properties_collector_factories.emplace_back( - std::make_shared()); - - for (bool sample_for_compression : {false, true}) { - // For simplicity/determinism, sample 100% when enabled, or 0% when disabled - options.sample_for_compression = sample_for_compression ? 1 : 0; - - DestroyAndReopen(options); - - // Setup the following LSM: - // - // L0_0 ["a", "b"] - // L1_0 ["a", "b"] - // - // L0_0 was created by flush. L1_0 was created by compaction. Each file - // contains one data block. - for (int i = 0; i < 3; ++i) { - ASSERT_OK(Put("a", "val")); - ASSERT_OK(Put("b", "val")); - ASSERT_OK(Flush()); - if (i == 1) { - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - } - } - - // A `BlockAdd()` should have been seen for files generated by flush or - // compaction when `sample_for_compression` is enabled. - TablePropertiesCollection file_to_props; - ASSERT_OK(db_->GetPropertiesOfAllTables(&file_to_props)); - ASSERT_EQ(2, file_to_props.size()); - for (const auto& file_and_props : file_to_props) { - auto& user_props = file_and_props.second->user_collected_properties; - ASSERT_TRUE(user_props.find(BlockCountingTablePropertiesCollector:: - kNumSampledBlocksPropertyName) != - user_props.end()); - ASSERT_EQ(user_props.at(BlockCountingTablePropertiesCollector:: - kNumSampledBlocksPropertyName), - std::to_string(sample_for_compression ? 1 : 0)); - } - } -} - -class CompressionSamplingDBPropertiesTest - : public DBPropertiesTest, - public ::testing::WithParamInterface { - public: - CompressionSamplingDBPropertiesTest() : fast_(GetParam()) {} - - protected: - const bool fast_; -}; - -INSTANTIATE_TEST_CASE_P(CompressionSamplingDBPropertiesTest, - CompressionSamplingDBPropertiesTest, ::testing::Bool()); - -// Excluded from RocksDB lite tests due to `GetPropertiesOfAllTables()` usage. -TEST_P(CompressionSamplingDBPropertiesTest, - EstimateDataSizeWithCompressionSampling) { - Options options = CurrentOptions(); - if (fast_) { - // One of the following light compression libraries must be present. - if (LZ4_Supported()) { - options.compression = kLZ4Compression; - } else if (Snappy_Supported()) { - options.compression = kSnappyCompression; - } else { - return; - } - } else { - // One of the following heavy compression libraries must be present. - if (ZSTD_Supported()) { - options.compression = kZSTD; - } else if (Zlib_Supported()) { - options.compression = kZlibCompression; - } else { - return; - } - } - options.disable_auto_compactions = true; - // For simplicity/determinism, sample 100%. - options.sample_for_compression = 1; - Reopen(options); - - // Setup the following LSM: - // - // L0_0 ["a", "b"] - // L1_0 ["a", "b"] - // - // L0_0 was created by flush. L1_0 was created by compaction. Each file - // contains one data block. The value consists of compressible data so the - // data block should be stored compressed. - std::string val(1024, 'a'); - for (int i = 0; i < 3; ++i) { - ASSERT_OK(Put("a", val)); - ASSERT_OK(Put("b", val)); - ASSERT_OK(Flush()); - if (i == 1) { - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - } - } - - TablePropertiesCollection file_to_props; - ASSERT_OK(db_->GetPropertiesOfAllTables(&file_to_props)); - ASSERT_EQ(2, file_to_props.size()); - for (const auto& file_and_props : file_to_props) { - ASSERT_GT(file_and_props.second->data_size, 0); - if (fast_) { - ASSERT_EQ(file_and_props.second->data_size, - file_and_props.second->fast_compression_estimated_data_size); - } else { - ASSERT_EQ(file_and_props.second->data_size, - file_and_props.second->slow_compression_estimated_data_size); - } - } -} - -TEST_F(DBPropertiesTest, EstimateNumKeysUnderflow) { - Options options = CurrentOptions(); - Reopen(options); - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Delete("foo")); - ASSERT_OK(Delete("foo")); - uint64_t num_keys = 0; - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.estimate-num-keys", &num_keys)); - ASSERT_EQ(0, num_keys); -} - -TEST_F(DBPropertiesTest, EstimateOldestKeyTime) { - uint64_t oldest_key_time = 0; - Options options = CurrentOptions(); - SetTimeElapseOnlySleepOnReopen(&options); - - // "rocksdb.estimate-oldest-key-time" only available to fifo compaction. - for (auto compaction : {kCompactionStyleLevel, kCompactionStyleUniversal, - kCompactionStyleNone}) { - options.compaction_style = compaction; - options.create_if_missing = true; - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "bar")); - ASSERT_FALSE(dbfull()->GetIntProperty( - DB::Properties::kEstimateOldestKeyTime, &oldest_key_time)); - } - - int64_t mock_start_time; - ASSERT_OK(env_->GetCurrentTime(&mock_start_time)); - - options.compaction_style = kCompactionStyleFIFO; - options.ttl = 300; - options.max_open_files = -1; - options.compaction_options_fifo.allow_compaction = false; - DestroyAndReopen(options); - - env_->MockSleepForSeconds(100); - ASSERT_OK(Put("k1", "v1")); - ASSERT_TRUE(dbfull()->GetIntProperty(DB::Properties::kEstimateOldestKeyTime, - &oldest_key_time)); - ASSERT_EQ(100, oldest_key_time - mock_start_time); - ASSERT_OK(Flush()); - ASSERT_EQ("1", FilesPerLevel()); - ASSERT_TRUE(dbfull()->GetIntProperty(DB::Properties::kEstimateOldestKeyTime, - &oldest_key_time)); - ASSERT_EQ(100, oldest_key_time - mock_start_time); - - env_->MockSleepForSeconds(100); // -> 200 - ASSERT_OK(Put("k2", "v2")); - ASSERT_OK(Flush()); - ASSERT_EQ("2", FilesPerLevel()); - ASSERT_TRUE(dbfull()->GetIntProperty(DB::Properties::kEstimateOldestKeyTime, - &oldest_key_time)); - ASSERT_EQ(100, oldest_key_time - mock_start_time); - - env_->MockSleepForSeconds(100); // -> 300 - ASSERT_OK(Put("k3", "v3")); - ASSERT_OK(Flush()); - ASSERT_EQ("3", FilesPerLevel()); - ASSERT_TRUE(dbfull()->GetIntProperty(DB::Properties::kEstimateOldestKeyTime, - &oldest_key_time)); - ASSERT_EQ(100, oldest_key_time - mock_start_time); - - env_->MockSleepForSeconds(150); // -> 450 - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("2", FilesPerLevel()); - ASSERT_TRUE(dbfull()->GetIntProperty(DB::Properties::kEstimateOldestKeyTime, - &oldest_key_time)); - ASSERT_EQ(200, oldest_key_time - mock_start_time); - - env_->MockSleepForSeconds(100); // -> 550 - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("1", FilesPerLevel()); - ASSERT_TRUE(dbfull()->GetIntProperty(DB::Properties::kEstimateOldestKeyTime, - &oldest_key_time)); - ASSERT_EQ(300, oldest_key_time - mock_start_time); - - env_->MockSleepForSeconds(100); // -> 650 - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("", FilesPerLevel()); - ASSERT_FALSE(dbfull()->GetIntProperty(DB::Properties::kEstimateOldestKeyTime, - &oldest_key_time)); -} - -TEST_F(DBPropertiesTest, SstFilesSize) { - struct TestListener : public EventListener { - void OnCompactionCompleted(DB* db, - const CompactionJobInfo& /*info*/) override { - assert(callback_triggered == false); - assert(size_before_compaction > 0); - callback_triggered = true; - uint64_t total_sst_size = 0; - uint64_t live_sst_size = 0; - bool ok = db->GetIntProperty(DB::Properties::kTotalSstFilesSize, - &total_sst_size); - ASSERT_TRUE(ok); - // total_sst_size include files before and after compaction. - ASSERT_GT(total_sst_size, size_before_compaction); - ok = - db->GetIntProperty(DB::Properties::kLiveSstFilesSize, &live_sst_size); - ASSERT_TRUE(ok); - // live_sst_size only include files after compaction. - ASSERT_GT(live_sst_size, 0); - ASSERT_LT(live_sst_size, size_before_compaction); - } - - uint64_t size_before_compaction = 0; - bool callback_triggered = false; - }; - std::shared_ptr listener = std::make_shared(); - - Options options; - options.env = CurrentOptions().env; - options.disable_auto_compactions = true; - options.listeners.push_back(listener); - Reopen(options); - - for (int i = 0; i < 10; i++) { - ASSERT_OK(Put("key" + std::to_string(i), std::string(1000, 'v'))); - } - ASSERT_OK(Flush()); - for (int i = 0; i < 5; i++) { - ASSERT_OK(Delete("key" + std::to_string(i))); - } - ASSERT_OK(Flush()); - uint64_t sst_size; - bool ok = db_->GetIntProperty(DB::Properties::kTotalSstFilesSize, &sst_size); - ASSERT_TRUE(ok); - ASSERT_GT(sst_size, 0); - listener->size_before_compaction = sst_size; - // Compact to clean all keys and trigger listener. - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_TRUE(listener->callback_triggered); -} - -TEST_F(DBPropertiesTest, MinObsoleteSstNumberToKeep) { - class TestListener : public EventListener { - public: - void OnTableFileCreated(const TableFileCreationInfo& info) override { - if (info.reason == TableFileCreationReason::kCompaction) { - // Verify the property indicates that SSTs created by a running - // compaction cannot be deleted. - uint64_t created_file_num; - FileType created_file_type; - std::string filename = - info.file_path.substr(info.file_path.rfind('/') + 1); - ASSERT_TRUE( - ParseFileName(filename, &created_file_num, &created_file_type)); - ASSERT_EQ(kTableFile, created_file_type); - - uint64_t keep_sst_lower_bound; - ASSERT_TRUE( - db_->GetIntProperty(DB::Properties::kMinObsoleteSstNumberToKeep, - &keep_sst_lower_bound)); - - ASSERT_LE(keep_sst_lower_bound, created_file_num); - validated_ = true; - } - } - - void SetDB(DB* db) { db_ = db; } - - int GetNumCompactions() { return num_compactions_; } - - // True if we've verified the property for at least one output file - bool Validated() { return validated_; } - - private: - int num_compactions_ = 0; - bool validated_ = false; - DB* db_ = nullptr; - }; - - const int kNumL0Files = 4; - - std::shared_ptr listener = std::make_shared(); - - Options options = CurrentOptions(); - options.listeners.push_back(listener); - options.level0_file_num_compaction_trigger = kNumL0Files; - DestroyAndReopen(options); - listener->SetDB(db_); - - for (int i = 0; i < kNumL0Files; ++i) { - // Make sure they overlap in keyspace to prevent trivial move - ASSERT_OK(Put("key1", "val")); - ASSERT_OK(Put("key2", "val")); - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_TRUE(listener->Validated()); -} - -TEST_F(DBPropertiesTest, BlobCacheProperties) { - Options options; - uint64_t value; - - options.env = CurrentOptions().env; - - // Test with empty blob cache. - constexpr size_t kCapacity = 100; - LRUCacheOptions co; - co.capacity = kCapacity; - co.num_shard_bits = 0; - co.metadata_charge_policy = kDontChargeCacheMetadata; - auto blob_cache = NewLRUCache(co); - options.blob_cache = blob_cache; - - Reopen(options); - - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheCapacity, &value)); - ASSERT_EQ(kCapacity, value); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheUsage, &value)); - ASSERT_EQ(0, value); - ASSERT_TRUE( - db_->GetIntProperty(DB::Properties::kBlobCachePinnedUsage, &value)); - ASSERT_EQ(0, value); - - // Insert unpinned blob to the cache and check size. - constexpr size_t kSize1 = 70; - ASSERT_OK(blob_cache->Insert("blob1", nullptr /*value*/, - &kNoopCacheItemHelper, kSize1)); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheCapacity, &value)); - ASSERT_EQ(kCapacity, value); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheUsage, &value)); - ASSERT_EQ(kSize1, value); - ASSERT_TRUE( - db_->GetIntProperty(DB::Properties::kBlobCachePinnedUsage, &value)); - ASSERT_EQ(0, value); - - // Insert pinned blob to the cache and check size. - constexpr size_t kSize2 = 60; - Cache::Handle* blob2 = nullptr; - ASSERT_OK(blob_cache->Insert("blob2", nullptr /*value*/, - &kNoopCacheItemHelper, kSize2, &blob2)); - ASSERT_NE(nullptr, blob2); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheCapacity, &value)); - ASSERT_EQ(kCapacity, value); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheUsage, &value)); - // blob1 is evicted. - ASSERT_EQ(kSize2, value); - ASSERT_TRUE( - db_->GetIntProperty(DB::Properties::kBlobCachePinnedUsage, &value)); - ASSERT_EQ(kSize2, value); - - // Insert another pinned blob to make the cache over-sized. - constexpr size_t kSize3 = 80; - Cache::Handle* blob3 = nullptr; - ASSERT_OK(blob_cache->Insert("blob3", nullptr /*value*/, - &kNoopCacheItemHelper, kSize3, &blob3)); - ASSERT_NE(nullptr, blob3); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheCapacity, &value)); - ASSERT_EQ(kCapacity, value); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheUsage, &value)); - ASSERT_EQ(kSize2 + kSize3, value); - ASSERT_TRUE( - db_->GetIntProperty(DB::Properties::kBlobCachePinnedUsage, &value)); - ASSERT_EQ(kSize2 + kSize3, value); - - // Check size after release. - blob_cache->Release(blob2); - blob_cache->Release(blob3); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheCapacity, &value)); - ASSERT_EQ(kCapacity, value); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheUsage, &value)); - // blob2 will be evicted, while blob3 remain in cache after release. - ASSERT_EQ(kSize3, value); - ASSERT_TRUE( - db_->GetIntProperty(DB::Properties::kBlobCachePinnedUsage, &value)); - ASSERT_EQ(0, value); -} - -TEST_F(DBPropertiesTest, BlockCacheProperties) { - Options options; - uint64_t value; - - options.env = CurrentOptions().env; - - // Block cache properties are not available for tables other than - // block-based table. - options.table_factory.reset(NewPlainTableFactory()); - Reopen(options); - ASSERT_FALSE( - db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); - ASSERT_FALSE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); - ASSERT_FALSE( - db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); - - options.table_factory.reset(NewCuckooTableFactory()); - Reopen(options); - ASSERT_FALSE( - db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); - ASSERT_FALSE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); - ASSERT_FALSE( - db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); - - // Block cache properties are not available if block cache is not used. - BlockBasedTableOptions table_options; - table_options.no_block_cache = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - ASSERT_FALSE( - db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); - ASSERT_FALSE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); - ASSERT_FALSE( - db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); - - // Test with empty block cache. - constexpr size_t kCapacity = 100; - LRUCacheOptions co; - co.capacity = kCapacity; - co.num_shard_bits = 0; - co.metadata_charge_policy = kDontChargeCacheMetadata; - auto block_cache = NewLRUCache(co); - table_options.block_cache = block_cache; - table_options.no_block_cache = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); - ASSERT_EQ(kCapacity, value); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); - ASSERT_EQ(0, value); - ASSERT_TRUE( - db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); - ASSERT_EQ(0, value); - - // Insert unpinned item to the cache and check size. - constexpr size_t kSize1 = 50; - ASSERT_OK(block_cache->Insert("item1", nullptr /*value*/, - &kNoopCacheItemHelper, kSize1)); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); - ASSERT_EQ(kCapacity, value); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); - ASSERT_EQ(kSize1, value); - ASSERT_TRUE( - db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); - ASSERT_EQ(0, value); - - // Insert pinned item to the cache and check size. - constexpr size_t kSize2 = 30; - Cache::Handle* item2 = nullptr; - ASSERT_OK(block_cache->Insert("item2", nullptr /*value*/, - &kNoopCacheItemHelper, kSize2, &item2)); - ASSERT_NE(nullptr, item2); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); - ASSERT_EQ(kCapacity, value); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); - ASSERT_EQ(kSize1 + kSize2, value); - ASSERT_TRUE( - db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); - ASSERT_EQ(kSize2, value); - - // Insert another pinned item to make the cache over-sized. - constexpr size_t kSize3 = 80; - Cache::Handle* item3 = nullptr; - ASSERT_OK(block_cache->Insert("item3", nullptr /*value*/, - &kNoopCacheItemHelper, kSize3, &item3)); - ASSERT_NE(nullptr, item2); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); - ASSERT_EQ(kCapacity, value); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); - // Item 1 is evicted. - ASSERT_EQ(kSize2 + kSize3, value); - ASSERT_TRUE( - db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); - ASSERT_EQ(kSize2 + kSize3, value); - - // Check size after release. - block_cache->Release(item2); - block_cache->Release(item3); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); - ASSERT_EQ(kCapacity, value); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); - // item2 will be evicted, while item3 remain in cache after release. - ASSERT_EQ(kSize3, value); - ASSERT_TRUE( - db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); - ASSERT_EQ(0, value); -} - -TEST_F(DBPropertiesTest, GetMapPropertyDbStats) { - auto mock_clock = std::make_shared(env_->GetSystemClock()); - CompositeEnvWrapper env(env_, mock_clock); - - Options opts = CurrentOptions(); - opts.env = &env; - Reopen(opts); - - { - std::map db_stats; - ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kDBStats, &db_stats)); - AssertDbStats(db_stats, 0.0 /* expected_uptime */, - 0 /* expected_user_bytes_written */, - 0 /* expected_wal_bytes_written */, - 0 /* expected_user_writes_by_self */, - 0 /* expected_user_writes_with_wal */); - } - - { - mock_clock->SleepForMicroseconds(1500000); - - std::map db_stats; - ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kDBStats, &db_stats)); - AssertDbStats(db_stats, 1.5 /* expected_uptime */, - 0 /* expected_user_bytes_written */, - 0 /* expected_wal_bytes_written */, - 0 /* expected_user_writes_by_self */, - 0 /* expected_user_writes_with_wal */); - } - - int expected_user_bytes_written = 0; - { - // Write with WAL disabled. - WriteOptions write_opts; - write_opts.disableWAL = true; - - WriteBatch batch; - ASSERT_OK(batch.Put("key", "val")); - expected_user_bytes_written += static_cast(batch.GetDataSize()); - - ASSERT_OK(db_->Write(write_opts, &batch)); - - std::map db_stats; - ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kDBStats, &db_stats)); - AssertDbStats(db_stats, 1.5 /* expected_uptime */, - expected_user_bytes_written, - 0 /* expected_wal_bytes_written */, - 1 /* expected_user_writes_by_self */, - 0 /* expected_user_writes_with_wal */); - } - - int expected_wal_bytes_written = 0; - { - // Write with WAL enabled. - WriteBatch batch; - ASSERT_OK(batch.Delete("key")); - expected_user_bytes_written += static_cast(batch.GetDataSize()); - expected_wal_bytes_written += static_cast(batch.GetDataSize()); - - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - - std::map db_stats; - ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kDBStats, &db_stats)); - AssertDbStats(db_stats, 1.5 /* expected_uptime */, - expected_user_bytes_written, expected_wal_bytes_written, - 2 /* expected_user_writes_by_self */, - 1 /* expected_user_writes_with_wal */); - } - - Close(); -} - -TEST_F(DBPropertiesTest, GetMapPropertyBlockCacheEntryStats) { - // Currently only verifies the expected properties are present - std::map values; - ASSERT_TRUE( - db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, &values)); - - ASSERT_TRUE(values.find(BlockCacheEntryStatsMapKeys::CacheId()) != - values.end()); - ASSERT_TRUE(values.find(BlockCacheEntryStatsMapKeys::CacheCapacityBytes()) != - values.end()); - ASSERT_TRUE( - values.find( - BlockCacheEntryStatsMapKeys::LastCollectionDurationSeconds()) != - values.end()); - ASSERT_TRUE( - values.find(BlockCacheEntryStatsMapKeys::LastCollectionAgeSeconds()) != - values.end()); - for (size_t i = 0; i < kNumCacheEntryRoles; ++i) { - CacheEntryRole role = static_cast(i); - ASSERT_TRUE(values.find(BlockCacheEntryStatsMapKeys::EntryCount(role)) != - values.end()); - ASSERT_TRUE(values.find(BlockCacheEntryStatsMapKeys::UsedBytes(role)) != - values.end()); - ASSERT_TRUE(values.find(BlockCacheEntryStatsMapKeys::UsedPercent(role)) != - values.end()); - } - - // There should be no extra values in the map. - ASSERT_EQ(3 * kNumCacheEntryRoles + 4, values.size()); -} - -TEST_F(DBPropertiesTest, WriteStallStatsSanityCheck) { - for (uint32_t i = 0; i < static_cast(WriteStallCause::kNone); ++i) { - WriteStallCause cause = static_cast(i); - const std::string& str = WriteStallCauseToHyphenString(cause); - ASSERT_TRUE(!str.empty()) - << "Please ensure mapping from `WriteStallCause` to " - "`WriteStallCauseToHyphenString` is complete"; - if (cause == WriteStallCause::kCFScopeWriteStallCauseEnumMax || - cause == WriteStallCause::kDBScopeWriteStallCauseEnumMax) { - ASSERT_EQ(str, InvalidWriteStallHyphenString()) - << "Please ensure order in `WriteStallCauseToHyphenString` is " - "consistent with `WriteStallCause`"; - } - } - - for (uint32_t i = 0; i < static_cast(WriteStallCondition::kNormal); - ++i) { - WriteStallCondition condition = static_cast(i); - const std::string& str = WriteStallConditionToHyphenString(condition); - ASSERT_TRUE(!str.empty()) - << "Please ensure mapping from `WriteStallCondition` to " - "`WriteStallConditionToHyphenString` is complete"; - } - - for (uint32_t i = 0; i < static_cast(WriteStallCause::kNone); ++i) { - for (uint32_t j = 0; - j < static_cast(WriteStallCondition::kNormal); ++j) { - WriteStallCause cause = static_cast(i); - WriteStallCondition condition = static_cast(j); - - if (isCFScopeWriteStallCause(cause)) { - ASSERT_TRUE(InternalCFStat(cause, condition) != - InternalStats::INTERNAL_CF_STATS_ENUM_MAX) - << "Please ensure the combination of WriteStallCause(" + - std::to_string(static_cast(cause)) + - ") + WriteStallCondition(" + - std::to_string(static_cast(condition)) + - ") is correctly mapped to a valid `InternalStats` or bypass " - "its check in this test"; - } else if (isDBScopeWriteStallCause(cause)) { - InternalStats::InternalDBStatsType internal_db_stat = - InternalDBStat(cause, condition); - if (internal_db_stat == InternalStats::kIntStatsNumMax) { - ASSERT_TRUE(cause == WriteStallCause::kWriteBufferManagerLimit && - condition == WriteStallCondition::kDelayed) - << "Please ensure the combination of WriteStallCause(" + - std::to_string(static_cast(cause)) + - ") + WriteStallCondition(" + - std::to_string(static_cast(condition)) + - ") is correctly mapped to a valid `InternalStats` or " - "bypass its check in this test"; - } - } else if (cause != WriteStallCause::kCFScopeWriteStallCauseEnumMax && - cause != WriteStallCause::kDBScopeWriteStallCauseEnumMax) { - ASSERT_TRUE(false) << "Please ensure the WriteStallCause(" + - std::to_string(static_cast(cause)) + - ") is either CF-scope or DB-scope write " - "stall cause in enum `WriteStallCause`"; - } - } - } -} -TEST_F(DBPropertiesTest, GetMapPropertyWriteStallStats) { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"heavy_write_cf"}, options); - - for (auto test_cause : {WriteStallCause::kWriteBufferManagerLimit, - WriteStallCause::kMemtableLimit}) { - if (test_cause == WriteStallCause::kWriteBufferManagerLimit) { - options.write_buffer_manager.reset( - new WriteBufferManager(100000, nullptr, true)); - } else if (test_cause == WriteStallCause::kMemtableLimit) { - options.max_write_buffer_number = 2; - options.disable_auto_compactions = true; - } - ReopenWithColumnFamilies({"default", "heavy_write_cf"}, options); - - // Assert initial write stall stats are all 0 - std::map db_values; - ASSERT_TRUE(dbfull()->GetMapProperty(DB::Properties::kDBWriteStallStats, - &db_values)); - ASSERT_EQ(std::stoi(db_values[WriteStallStatsMapKeys::CauseConditionCount( - WriteStallCause::kWriteBufferManagerLimit, - WriteStallCondition::kStopped)]), - 0); - - for (int cf = 0; cf <= 1; ++cf) { - std::map cf_values; - ASSERT_TRUE(dbfull()->GetMapProperty( - handles_[cf], DB::Properties::kCFWriteStallStats, &cf_values)); - ASSERT_EQ(std::stoi(cf_values[WriteStallStatsMapKeys::TotalStops()]), 0); - ASSERT_EQ(std::stoi(cf_values[WriteStallStatsMapKeys::TotalDelays()]), 0); - } - - // Pause flush thread to help coerce write stall - std::unique_ptr sleeping_task( - new test::SleepingBackgroundTask()); - env_->SetBackgroundThreads(1, Env::HIGH); - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - sleeping_task.get(), Env::Priority::HIGH); - sleeping_task->WaitUntilSleeping(); - - // Coerce write stall - if (test_cause == WriteStallCause::kWriteBufferManagerLimit) { - ASSERT_OK(dbfull()->Put( - WriteOptions(), handles_[1], Key(1), - DummyString(options.write_buffer_manager->buffer_size()))); - - WriteOptions wo; - wo.no_slowdown = true; - Status s = dbfull()->Put( - wo, handles_[1], Key(2), - DummyString(options.write_buffer_manager->buffer_size())); - ASSERT_TRUE(s.IsIncomplete()); - ASSERT_TRUE(s.ToString().find("Write stall") != std::string::npos); - } else if (test_cause == WriteStallCause::kMemtableLimit) { - FlushOptions fo; - fo.allow_write_stall = true; - fo.wait = false; - - ASSERT_OK( - dbfull()->Put(WriteOptions(), handles_[1], Key(1), DummyString(1))); - ASSERT_OK(dbfull()->Flush(fo, handles_[1])); - - ASSERT_OK( - dbfull()->Put(WriteOptions(), handles_[1], Key(2), DummyString(1))); - ASSERT_OK(dbfull()->Flush(fo, handles_[1])); - } - - if (test_cause == WriteStallCause::kWriteBufferManagerLimit) { - db_values.clear(); - EXPECT_TRUE(dbfull()->GetMapProperty(DB::Properties::kDBWriteStallStats, - &db_values)); - EXPECT_EQ(std::stoi(db_values[WriteStallStatsMapKeys::CauseConditionCount( - WriteStallCause::kWriteBufferManagerLimit, - WriteStallCondition::kStopped)]), - 1); - // `WriteStallCause::kWriteBufferManagerLimit` should not result in any - // CF-scope write stall stats changes - for (int cf = 0; cf <= 1; ++cf) { - std::map cf_values; - EXPECT_TRUE(dbfull()->GetMapProperty( - handles_[cf], DB::Properties::kCFWriteStallStats, &cf_values)); - EXPECT_EQ(std::stoi(cf_values[WriteStallStatsMapKeys::TotalStops()]), - 0); - EXPECT_EQ(std::stoi(cf_values[WriteStallStatsMapKeys::TotalDelays()]), - 0); - } - } else if (test_cause == WriteStallCause::kMemtableLimit) { - for (int cf = 0; cf <= 1; ++cf) { - std::map cf_values; - EXPECT_TRUE(dbfull()->GetMapProperty( - handles_[cf], DB::Properties::kCFWriteStallStats, &cf_values)); - EXPECT_EQ(std::stoi(cf_values[WriteStallStatsMapKeys::TotalStops()]), - cf == 1 ? 1 : 0); - EXPECT_EQ( - std::stoi(cf_values[WriteStallStatsMapKeys::CauseConditionCount( - WriteStallCause::kMemtableLimit, - WriteStallCondition::kStopped)]), - cf == 1 ? 1 : 0); - EXPECT_EQ(std::stoi(cf_values[WriteStallStatsMapKeys::TotalDelays()]), - 0); - EXPECT_EQ( - std::stoi(cf_values[WriteStallStatsMapKeys::CauseConditionCount( - WriteStallCause::kMemtableLimit, - WriteStallCondition::kDelayed)]), - 0); - } - } - - sleeping_task->WakeUp(); - sleeping_task->WaitUntilDone(); - } -} - -namespace { -std::string PopMetaIndexKey(InternalIterator* meta_iter) { - Status s = meta_iter->status(); - if (!s.ok()) { - return s.ToString(); - } else if (meta_iter->Valid()) { - std::string rv = meta_iter->key().ToString(); - meta_iter->Next(); - return rv; - } else { - return "NOT_FOUND"; - } -} - -} // anonymous namespace - -TEST_F(DBPropertiesTest, TableMetaIndexKeys) { - // This is to detect unexpected churn in metaindex block keys. This is more - // of a "table test" but table_test.cc doesn't depend on db_test_util.h and - // we need ChangeOptions() for broad coverage. - constexpr int kKeyCount = 100; - do { - Options options; - options = CurrentOptions(options); - DestroyAndReopen(options); - - // Create an SST file - for (int key = 0; key < kKeyCount; key++) { - ASSERT_OK(Put(Key(key), "val")); - } - ASSERT_OK(Flush()); - - // Find its file number - std::vector files; - db_->GetLiveFilesMetaData(&files); - // 1 SST file - ASSERT_EQ(1, files.size()); - - // Open it for inspection - std::string sst_file = - files[0].directory + "/" + files[0].relative_filename; - std::unique_ptr f; - ASSERT_OK(env_->GetFileSystem()->NewRandomAccessFile( - sst_file, FileOptions(), &f, nullptr)); - std::unique_ptr r; - r.reset(new RandomAccessFileReader(std::move(f), sst_file)); - uint64_t file_size = 0; - ASSERT_OK(env_->GetFileSize(sst_file, &file_size)); - - // Read metaindex - BlockContents bc; - ASSERT_OK(ReadMetaIndexBlockInFile(r.get(), file_size, 0U, - ImmutableOptions(options), &bc)); - Block metaindex_block(std::move(bc)); - std::unique_ptr meta_iter; - meta_iter.reset(metaindex_block.NewMetaIterator()); - meta_iter->SeekToFirst(); - - if (strcmp(options.table_factory->Name(), - TableFactory::kBlockBasedTableName()) == 0) { - auto bbto = options.table_factory->GetOptions(); - if (bbto->filter_policy) { - if (bbto->partition_filters) { - // The key names are intentionally hard-coded here to detect - // accidental regression on compatibility. - EXPECT_EQ("partitionedfilter.rocksdb.BuiltinBloomFilter", - PopMetaIndexKey(meta_iter.get())); - } else { - EXPECT_EQ("fullfilter.rocksdb.BuiltinBloomFilter", - PopMetaIndexKey(meta_iter.get())); - } - } - if (bbto->index_type == BlockBasedTableOptions::kHashSearch) { - EXPECT_EQ("rocksdb.hashindex.metadata", - PopMetaIndexKey(meta_iter.get())); - EXPECT_EQ("rocksdb.hashindex.prefixes", - PopMetaIndexKey(meta_iter.get())); - } - } - EXPECT_EQ("rocksdb.properties", PopMetaIndexKey(meta_iter.get())); - EXPECT_EQ("NOT_FOUND", PopMetaIndexKey(meta_iter.get())); - } while (ChangeOptions()); -} - - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc deleted file mode 100644 index 08bd3af04..000000000 --- a/db/db_range_del_test.cc +++ /dev/null @@ -1,3414 +0,0 @@ -// Copyright (c) 2016-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db/db_test_util.h" -#include "db/version_set.h" -#include "port/stack_trace.h" -#include "rocksdb/utilities/write_batch_with_index.h" -#include "test_util/testutil.h" -#include "util/random.h" -#include "utilities/merge_operators.h" - -namespace ROCKSDB_NAMESPACE { - -// TODO(cbi): parameterize the test to cover user-defined timestamp cases -class DBRangeDelTest : public DBTestBase { - public: - DBRangeDelTest() : DBTestBase("db_range_del_test", /*env_do_fsync=*/false) {} - - std::string GetNumericStr(int key) { - uint64_t uint64_key = static_cast(key); - std::string str; - str.resize(8); - memcpy(&str[0], static_cast(&uint64_key), 8); - return str; - } -}; - -TEST_F(DBRangeDelTest, NonBlockBasedTableNotSupported) { - // TODO: figure out why MmapReads trips the iterator pinning assertion in - // RangeDelAggregator. Ideally it would be supported; otherwise it should at - // least be explicitly unsupported. - for (auto config : {kPlainTableAllBytesPrefix, /* kWalDirAndMmapReads */}) { - option_config_ = config; - DestroyAndReopen(CurrentOptions()); - ASSERT_TRUE(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - "dr1", "dr1") - .IsNotSupported()); - } -} - -TEST_F(DBRangeDelTest, WriteBatchWithIndexNotSupported) { - WriteBatchWithIndex indexedBatch{}; - ASSERT_TRUE(indexedBatch.DeleteRange(db_->DefaultColumnFamily(), "dr1", "dr1") - .IsNotSupported()); - ASSERT_TRUE(indexedBatch.DeleteRange("dr1", "dr1").IsNotSupported()); -} - -TEST_F(DBRangeDelTest, EndSameAsStartCoversNothing) { - ASSERT_OK(db_->Put(WriteOptions(), "b", "val")); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "b", "b")); - ASSERT_EQ("val", Get("b")); -} - -TEST_F(DBRangeDelTest, EndComesBeforeStartInvalidArgument) { - ASSERT_OK(db_->Put(WriteOptions(), "b", "val")); - ASSERT_TRUE( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "b", "a") - .IsInvalidArgument()); - ASSERT_EQ("val", Get("b")); -} - -TEST_F(DBRangeDelTest, FlushOutputHasOnlyRangeTombstones) { - do { - DestroyAndReopen(CurrentOptions()); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - "dr1", "dr2")); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - } while (ChangeOptions(kRangeDelSkipConfigs)); -} - -TEST_F(DBRangeDelTest, DictionaryCompressionWithOnlyRangeTombstones) { - Options opts = CurrentOptions(); - opts.compression_opts.max_dict_bytes = 16384; - Reopen(opts); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr1", - "dr2")); - ASSERT_OK(db_->Flush(FlushOptions())); -} - -TEST_F(DBRangeDelTest, CompactionOutputHasOnlyRangeTombstone) { - do { - Options opts = CurrentOptions(); - opts.disable_auto_compactions = true; - opts.statistics = CreateDBStatistics(); - DestroyAndReopen(opts); - - // snapshot protects range tombstone from dropping due to becoming obsolete. - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); - ASSERT_OK(db_->Flush(FlushOptions())); - - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - ASSERT_EQ(0, NumTableFilesAtLevel(1)); - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, - true /* disallow_trivial_move */)); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - ASSERT_EQ(0, TestGetTickerCount(opts, COMPACTION_RANGE_DEL_DROP_OBSOLETE)); - db_->ReleaseSnapshot(snapshot); - // Skip cuckoo memtables, which do not support snapshots. Skip non-leveled - // compactions as the above assertions about the number of files in a level - // do not hold true. - } while (ChangeOptions(kRangeDelSkipConfigs | kSkipUniversalCompaction | - kSkipFIFOCompaction)); -} - -TEST_F(DBRangeDelTest, CompactionOutputFilesExactlyFilled) { - // regression test for exactly filled compaction output files. Previously - // another file would be generated containing all range deletions, which - // could invalidate the non-overlapping file boundary invariant. - const int kNumPerFile = 4, kNumFiles = 2, kFileBytes = 9 << 10; - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.level0_file_num_compaction_trigger = kNumFiles; - options.memtable_factory.reset(test::NewSpecialSkipListFactory(kNumPerFile)); - options.num_levels = 2; - options.target_file_size_base = kFileBytes; - BlockBasedTableOptions table_options; - table_options.block_size_deviation = 50; // each block holds two keys - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - - // snapshot protects range tombstone from dropping due to becoming obsolete. - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), - Key(1))); - - Random rnd(301); - for (int i = 0; i < kNumFiles; ++i) { - std::vector values; - // Write 12K (4 values, each 3K) - for (int j = 0; j < kNumPerFile; j++) { - values.push_back(rnd.RandomString(3 << 10)); - ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j])); - if (j == 0 && i > 0) { - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - } - } - // put extra key to trigger final flush - ASSERT_OK(Put("", "")); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0)); - ASSERT_EQ(0, NumTableFilesAtLevel(1)); - - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, - true /* disallow_trivial_move */)); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_EQ(2, NumTableFilesAtLevel(1)); - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, MaxCompactionBytesCutsOutputFiles) { - // Ensures range deletion spanning multiple compaction output files that are - // cut by max_compaction_bytes will have non-overlapping key-ranges. - // https://github.com/facebook/rocksdb/issues/1778 - const int kNumFiles = 2, kNumPerFile = 1 << 8, kBytesPerVal = 1 << 12; - Options opts = CurrentOptions(); - opts.comparator = test::Uint64Comparator(); - opts.disable_auto_compactions = true; - opts.level0_file_num_compaction_trigger = kNumFiles; - opts.max_compaction_bytes = kNumPerFile * kBytesPerVal; - opts.memtable_factory.reset(test::NewSpecialSkipListFactory(kNumPerFile)); - // Want max_compaction_bytes to trigger the end of compaction output file, not - // target_file_size_base, so make the latter much bigger - // opts.target_file_size_base = 100 * opts.max_compaction_bytes; - opts.target_file_size_base = 1; - DestroyAndReopen(opts); - - // snapshot protects range tombstone from dropping due to becoming obsolete. - const Snapshot* snapshot = db_->GetSnapshot(); - - Random rnd(301); - - ASSERT_OK(Put(GetNumericStr(0), rnd.RandomString(kBytesPerVal))); - ASSERT_OK( - Put(GetNumericStr(kNumPerFile - 1), rnd.RandomString(kBytesPerVal))); - ASSERT_OK(Flush()); - ASSERT_OK(Put(GetNumericStr(kNumPerFile), rnd.RandomString(kBytesPerVal))); - ASSERT_OK( - Put(GetNumericStr(kNumPerFile * 2 - 1), rnd.RandomString(kBytesPerVal))); - ASSERT_OK(Flush()); - MoveFilesToLevel(2); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_EQ(NumTableFilesAtLevel(2), 2); - - ASSERT_OK( - db_->SetOptions(db_->DefaultColumnFamily(), - {{"target_file_size_base", - std::to_string(100 * opts.max_compaction_bytes)}})); - - // It spans the whole key-range, thus will be included in all output files - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - GetNumericStr(0), - GetNumericStr(kNumFiles * kNumPerFile - 1))); - - for (int i = 0; i < kNumFiles; ++i) { - std::vector values; - // Write 1MB (256 values, each 4K) - for (int j = 0; j < kNumPerFile; j++) { - values.push_back(rnd.RandomString(kBytesPerVal)); - ASSERT_OK(Put(GetNumericStr(kNumPerFile * i + j), values[j])); - } - // extra entry to trigger SpecialSkipListFactory's flush - ASSERT_OK(Put(GetNumericStr(kNumPerFile), "")); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ(i + 1, NumTableFilesAtLevel(0)); - } - - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, - /*column_family=*/nullptr, - /*disallow_trivial_move=*/true)); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_GE(NumTableFilesAtLevel(1), 2); - std::vector> files; - dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files); - - for (size_t i = 0; i + 1 < files[1].size(); ++i) { - ASSERT_TRUE(InternalKeyComparator(opts.comparator) - .Compare(files[1][i].largest, files[1][i + 1].smallest) < - 0); - } - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, SentinelsOmittedFromOutputFile) { - // Regression test for bug where sentinel range deletions (i.e., ones with - // sequence number of zero) were included in output files. - // snapshot protects range tombstone from dropping due to becoming obsolete. - const Snapshot* snapshot = db_->GetSnapshot(); - - // gaps between ranges creates sentinels in our internal representation - std::vector> range_dels = { - {"a", "b"}, {"c", "d"}, {"e", "f"}}; - for (const auto& range_del : range_dels) { - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - range_del.first, range_del.second)); - } - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - std::vector> files; - dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files); - ASSERT_GT(files[0][0].fd.smallest_seqno, 0); - - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, FlushRangeDelsSameStartKey) { - ASSERT_OK(db_->Put(WriteOptions(), "b1", "val")); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c")); - ASSERT_OK(db_->Put(WriteOptions(), "b2", "val")); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b")); - // first iteration verifies query correctness in memtable, second verifies - // query correctness for a single SST file - for (int i = 0; i < 2; ++i) { - if (i > 0) { - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - } - std::string value; - ASSERT_TRUE(db_->Get(ReadOptions(), "b1", &value).IsNotFound()); - ASSERT_OK(db_->Get(ReadOptions(), "b2", &value)); - } -} - -TEST_F(DBRangeDelTest, CompactRangeDelsSameStartKey) { - ASSERT_OK(db_->Put(WriteOptions(), "unused", - "val")); // prevents empty after compaction - ASSERT_OK(db_->Put(WriteOptions(), "b1", "val")); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c")); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b")); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(3, NumTableFilesAtLevel(0)); - - for (int i = 0; i < 2; ++i) { - if (i > 0) { - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, - true /* disallow_trivial_move */)); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - } - std::string value; - ASSERT_TRUE(db_->Get(ReadOptions(), "b1", &value).IsNotFound()); - } -} - -TEST_F(DBRangeDelTest, FlushRemovesCoveredKeys) { - const int kNum = 300, kRangeBegin = 50, kRangeEnd = 250; - Options opts = CurrentOptions(); - opts.comparator = test::Uint64Comparator(); - DestroyAndReopen(opts); - - // Write a third before snapshot, a third between snapshot and tombstone, and - // a third after the tombstone. Keys older than snapshot or newer than the - // tombstone should be preserved. - const Snapshot* snapshot = nullptr; - for (int i = 0; i < kNum; ++i) { - if (i == kNum / 3) { - snapshot = db_->GetSnapshot(); - } else if (i == 2 * kNum / 3) { - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - GetNumericStr(kRangeBegin), - GetNumericStr(kRangeEnd))); - } - ASSERT_OK(db_->Put(WriteOptions(), GetNumericStr(i), "val")); - } - ASSERT_OK(db_->Flush(FlushOptions())); - - for (int i = 0; i < kNum; ++i) { - ReadOptions read_opts; - read_opts.ignore_range_deletions = true; - std::string value; - if (i < kRangeBegin || i > kRangeEnd || i < kNum / 3 || i >= 2 * kNum / 3) { - ASSERT_OK(db_->Get(read_opts, GetNumericStr(i), &value)); - } else { - ASSERT_TRUE(db_->Get(read_opts, GetNumericStr(i), &value).IsNotFound()); - } - } - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, CompactionRemovesCoveredKeys) { - const int kNumPerFile = 100, kNumFiles = 4; - Options opts = CurrentOptions(); - opts.comparator = test::Uint64Comparator(); - opts.disable_auto_compactions = true; - opts.memtable_factory.reset(test::NewSpecialSkipListFactory(kNumPerFile)); - opts.num_levels = 2; - opts.statistics = CreateDBStatistics(); - DestroyAndReopen(opts); - - for (int i = 0; i < kNumFiles; ++i) { - if (i > 0) { - // range tombstone covers first half of the previous file - ASSERT_OK(db_->DeleteRange( - WriteOptions(), db_->DefaultColumnFamily(), - GetNumericStr((i - 1) * kNumPerFile), - GetNumericStr((i - 1) * kNumPerFile + kNumPerFile / 2))); - } - // Make sure a given key appears in each file so compaction won't be able to - // use trivial move, which would happen if the ranges were non-overlapping. - // Also, we need an extra element since flush is only triggered when the - // number of keys is one greater than SpecialSkipListFactory's limit. - // We choose a key outside the key-range used by the test to avoid conflict. - ASSERT_OK(db_->Put(WriteOptions(), GetNumericStr(kNumPerFile * kNumFiles), - "val")); - - for (int j = 0; j < kNumPerFile; ++j) { - ASSERT_OK( - db_->Put(WriteOptions(), GetNumericStr(i * kNumPerFile + j), "val")); - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ(i + 1, NumTableFilesAtLevel(0)); - } - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_GT(NumTableFilesAtLevel(1), 0); - ASSERT_EQ((kNumFiles - 1) * kNumPerFile / 2, - TestGetTickerCount(opts, COMPACTION_KEY_DROP_RANGE_DEL)); - - for (int i = 0; i < kNumFiles; ++i) { - for (int j = 0; j < kNumPerFile; ++j) { - ReadOptions read_opts; - read_opts.ignore_range_deletions = true; - std::string value; - if (i == kNumFiles - 1 || j >= kNumPerFile / 2) { - ASSERT_OK( - db_->Get(read_opts, GetNumericStr(i * kNumPerFile + j), &value)); - } else { - ASSERT_TRUE( - db_->Get(read_opts, GetNumericStr(i * kNumPerFile + j), &value) - .IsNotFound()); - } - } - } -} - -TEST_F(DBRangeDelTest, ValidLevelSubcompactionBoundaries) { - const int kNumPerFile = 100, kNumFiles = 4, kFileBytes = 100 << 10; - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.level0_file_num_compaction_trigger = kNumFiles; - options.max_bytes_for_level_base = 2 * kFileBytes; - options.max_subcompactions = 4; - options.memtable_factory.reset(test::NewSpecialSkipListFactory(kNumPerFile)); - options.num_levels = 3; - options.target_file_size_base = kFileBytes; - options.target_file_size_multiplier = 1; - options.max_compaction_bytes = 1500; - Reopen(options); - - Random rnd(301); - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < kNumFiles; ++j) { - if (i > 0) { - // delete [95,105) in two files, [295,305) in next two - int mid = (j + (1 - j % 2)) * kNumPerFile; - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(mid - 5), Key(mid + 5))); - } - std::vector values; - // Write 100KB (100 values, each 1K) - for (int k = 0; k < kNumPerFile; k++) { - values.push_back(rnd.RandomString(990)); - ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k])); - } - // put extra key to trigger flush - ASSERT_OK(Put("", "")); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - if (j < kNumFiles - 1) { - // background compaction may happen early for kNumFiles'th file - ASSERT_EQ(NumTableFilesAtLevel(0), j + 1); - } - if (j == options.level0_file_num_compaction_trigger - 1) { - // When i == 1, compaction will output some files to L1, at which point - // L1 is not bottommost so range deletions cannot be compacted away. The - // new L1 files must be generated with non-overlapping key ranges even - // though multiple subcompactions see the same ranges deleted, else an - // assertion will fail. - // - // Only enable auto-compactions when we're ready; otherwise, the - // oversized L0 (relative to base_level) causes the compaction to run - // earlier. - ASSERT_OK(db_->EnableAutoCompaction({db_->DefaultColumnFamily()})); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_OK(db_->SetOptions(db_->DefaultColumnFamily(), - {{"disable_auto_compactions", "true"}})); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - ASSERT_GT(NumTableFilesAtLevel(1), 0); - ASSERT_GT(NumTableFilesAtLevel(2), 0); - } - } - } -} - -TEST_F(DBRangeDelTest, ValidUniversalSubcompactionBoundaries) { - const int kNumPerFile = 100, kFilesPerLevel = 4, kNumLevels = 4; - Options options = CurrentOptions(); - options.compaction_options_universal.min_merge_width = kFilesPerLevel; - options.compaction_options_universal.max_merge_width = kFilesPerLevel; - options.compaction_options_universal.size_ratio = 10; - options.compaction_style = kCompactionStyleUniversal; - options.level0_file_num_compaction_trigger = kFilesPerLevel; - options.max_subcompactions = 4; - options.memtable_factory.reset(test::NewSpecialSkipListFactory(kNumPerFile)); - options.num_levels = kNumLevels; - options.target_file_size_base = kNumPerFile << 10; - options.target_file_size_multiplier = 1; - Reopen(options); - - Random rnd(301); - for (int i = 0; i < kNumLevels - 1; ++i) { - for (int j = 0; j < kFilesPerLevel; ++j) { - if (i == kNumLevels - 2) { - // insert range deletions [95,105) in two files, [295,305) in next two - // to prepare L1 for later manual compaction. - int mid = (j + (1 - j % 2)) * kNumPerFile; - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(mid - 5), Key(mid + 5))); - } - std::vector values; - // Write 100KB (100 values, each 1K) - for (int k = 0; k < kNumPerFile; k++) { - // For the highest level, use smaller value size such that it does not - // prematurely cause auto compaction due to range tombstone adding - // additional compensated file size - values.push_back(rnd.RandomString((i == kNumLevels - 2) ? 600 : 990)); - ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k])); - } - // put extra key to trigger flush - ASSERT_OK(Put("", "")); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - if (j < kFilesPerLevel - 1) { - // background compaction may happen early for kFilesPerLevel'th file - ASSERT_EQ(NumTableFilesAtLevel(0), j + 1); - } - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - if (i == kNumLevels - 2) { - // For the highest level, value size is smaller (see Put() above), - // so output file number is smaller. - ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 2); - } else { - ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 1); - } - } - // Now L1-L3 are full, when we compact L1->L2 we should see (1) subcompactions - // happen since input level > 0; (2) range deletions are not dropped since - // output level is not bottommost. If no file boundary assertion fails, that - // probably means universal compaction + subcompaction + range deletion are - // compatible. - ASSERT_OK(dbfull()->RunManualCompaction( - static_cast_with_check(db_->DefaultColumnFamily()) - ->cfd(), - 1 /* input_level */, 2 /* output_level */, CompactRangeOptions(), - nullptr /* begin */, nullptr /* end */, true /* exclusive */, - true /* disallow_trivial_move */, - std::numeric_limits::max() /* max_file_num_to_ignore */, - "" /*trim_ts*/)); -} - -TEST_F(DBRangeDelTest, CompactionRemovesCoveredMergeOperands) { - const int kNumPerFile = 3, kNumFiles = 3; - Options opts = CurrentOptions(); - opts.disable_auto_compactions = true; - opts.memtable_factory.reset(test::NewSpecialSkipListFactory(2 * kNumPerFile)); - opts.merge_operator = MergeOperators::CreateUInt64AddOperator(); - opts.num_levels = 2; - Reopen(opts); - - // Iterates kNumFiles * kNumPerFile + 1 times since flushing the last file - // requires an extra entry. - for (int i = 0; i <= kNumFiles * kNumPerFile; ++i) { - if (i % kNumPerFile == 0 && i / kNumPerFile == kNumFiles - 1) { - // Delete merge operands from all but the last file - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - "key", "key_")); - } - std::string val; - PutFixed64(&val, i); - ASSERT_OK(db_->Merge(WriteOptions(), "key", val)); - // we need to prevent trivial move using Puts so compaction will actually - // process the merge operands. - ASSERT_OK(db_->Put(WriteOptions(), "prevent_trivial_move", "")); - if (i > 0 && i % kNumPerFile == 0) { - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - } - - ReadOptions read_opts; - read_opts.ignore_range_deletions = true; - std::string expected, actual; - ASSERT_OK(db_->Get(read_opts, "key", &actual)); - PutFixed64(&expected, 45); // 1+2+...+9 - ASSERT_EQ(expected, actual); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - expected.clear(); - ASSERT_OK(db_->Get(read_opts, "key", &actual)); - uint64_t tmp; - Slice tmp2(actual); - GetFixed64(&tmp2, &tmp); - PutFixed64(&expected, 30); // 6+7+8+9 (earlier operands covered by tombstone) - ASSERT_EQ(expected, actual); -} - -TEST_F(DBRangeDelTest, PutDeleteRangeMergeFlush) { - // Test the sequence of operations: (1) Put, (2) DeleteRange, (3) Merge, (4) - // Flush. The `CompactionIterator` previously had a bug where we forgot to - // check for covering range tombstones when processing the (1) Put, causing - // it to reappear after the flush. - Options opts = CurrentOptions(); - opts.merge_operator = MergeOperators::CreateUInt64AddOperator(); - Reopen(opts); - - std::string val; - PutFixed64(&val, 1); - ASSERT_OK(db_->Put(WriteOptions(), "key", val)); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key", - "key_")); - ASSERT_OK(db_->Merge(WriteOptions(), "key", val)); - ASSERT_OK(db_->Flush(FlushOptions())); - - ReadOptions read_opts; - std::string expected, actual; - ASSERT_OK(db_->Get(read_opts, "key", &actual)); - PutFixed64(&expected, 1); - ASSERT_EQ(expected, actual); -} - -TEST_F(DBRangeDelTest, ObsoleteTombstoneCleanup) { - // During compaction to bottommost level, verify range tombstones older than - // the oldest snapshot are removed, while others are preserved. - Options opts = CurrentOptions(); - opts.disable_auto_compactions = true; - opts.num_levels = 2; - opts.statistics = CreateDBStatistics(); - Reopen(opts); - - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr1", - "dr10")); // obsolete after compaction - ASSERT_OK(db_->Put(WriteOptions(), "key", "val")); - ASSERT_OK(db_->Flush(FlushOptions())); - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr2", - "dr20")); // protected by snapshot - ASSERT_OK(db_->Put(WriteOptions(), "key", "val")); - ASSERT_OK(db_->Flush(FlushOptions())); - - ASSERT_EQ(2, NumTableFilesAtLevel(0)); - ASSERT_EQ(0, NumTableFilesAtLevel(1)); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - ASSERT_EQ(1, TestGetTickerCount(opts, COMPACTION_RANGE_DEL_DROP_OBSOLETE)); - - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, TableEvictedDuringScan) { - // The RangeDelAggregator holds pointers into range deletion blocks created by - // table readers. This test ensures the aggregator can still access those - // blocks even if it outlives the table readers that created them. - // - // DBIter always keeps readers open for L0 files. So, in order to test - // aggregator outliving reader, we need to have deletions in L1 files, which - // are opened/closed on-demand during the scan. This is accomplished by - // setting kNumRanges > level0_stop_writes_trigger, which prevents deletions - // from all lingering in L0 (there is at most one range deletion per L0 file). - // - // The first L1 file will contain a range deletion since its begin key is 0. - // SeekToFirst() references that table's reader and adds its range tombstone - // to the aggregator. Upon advancing beyond that table's key-range via Next(), - // the table reader will be unreferenced by the iterator. Since we manually - // call Evict() on all readers before the full scan, this unreference causes - // the reader's refcount to drop to zero and thus be destroyed. - // - // When it is destroyed, we do not remove its range deletions from the - // aggregator. So, subsequent calls to Next() must be able to use these - // deletions to decide whether a key is covered. This will work as long as - // the aggregator properly references the range deletion block. - const int kNum = 25, kRangeBegin = 0, kRangeEnd = 7, kNumRanges = 5; - Options opts = CurrentOptions(); - opts.comparator = test::Uint64Comparator(); - opts.level0_file_num_compaction_trigger = 4; - opts.level0_stop_writes_trigger = 4; - opts.memtable_factory.reset(test::NewSpecialSkipListFactory(1)); - opts.num_levels = 2; - BlockBasedTableOptions bbto; - bbto.cache_index_and_filter_blocks = true; - bbto.block_cache = NewLRUCache(8 << 20); - opts.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(opts); - - // Hold a snapshot so range deletions can't become obsolete during compaction - // to bottommost level (i.e., L1). - const Snapshot* snapshot = db_->GetSnapshot(); - for (int i = 0; i < kNum; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), GetNumericStr(i), "val")); - if (i > 0) { - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - if (i >= kNum / 2 && i < kNum / 2 + kNumRanges) { - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - GetNumericStr(kRangeBegin), - GetNumericStr(kRangeEnd))); - } - } - // Must be > 1 so the first L1 file can be closed before scan finishes - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_GT(NumTableFilesAtLevel(1), 1); - std::vector file_numbers = ListTableFiles(env_, dbname_); - - ReadOptions read_opts; - auto* iter = db_->NewIterator(read_opts); - ASSERT_OK(iter->status()); - int expected = kRangeEnd; - iter->SeekToFirst(); - for (auto file_number : file_numbers) { - // This puts table caches in the state of being externally referenced only - // so they are destroyed immediately upon iterator unreferencing. - TableCache::Evict(dbfull()->TEST_table_cache(), file_number); - } - for (; iter->Valid(); iter->Next()) { - ASSERT_EQ(GetNumericStr(expected), iter->key()); - ++expected; - // Keep clearing block cache's LRU so range deletion block can be freed as - // soon as its refcount drops to zero. - bbto.block_cache->EraseUnRefEntries(); - } - ASSERT_EQ(kNum, expected); - delete iter; - db_->ReleaseSnapshot(snapshot); - - // Also test proper cache handling in GetRangeTombstoneIterator, - // via TablesRangeTombstoneSummary. (This once triggered memory leak - // report with ASAN.) - opts.max_open_files = 1; - Reopen(opts); - - std::string str; - ASSERT_OK(dbfull()->TablesRangeTombstoneSummary(db_->DefaultColumnFamily(), - 100, &str)); -} - -TEST_F(DBRangeDelTest, GetCoveredKeyFromMutableMemtable) { - do { - DestroyAndReopen(CurrentOptions()); - ASSERT_OK(db_->Put(WriteOptions(), "key", "val")); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); - - ReadOptions read_opts; - std::string value; - ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound()); - } while (ChangeOptions(kRangeDelSkipConfigs)); -} - -TEST_F(DBRangeDelTest, GetCoveredKeyFromImmutableMemtable) { - do { - Options opts = CurrentOptions(); - opts.max_write_buffer_number = 3; - opts.min_write_buffer_number_to_merge = 2; - // SpecialSkipListFactory lets us specify maximum number of elements the - // memtable can hold. It switches the active memtable to immutable (flush is - // prevented by the above options) upon inserting an element that would - // overflow the memtable. - opts.memtable_factory.reset(test::NewSpecialSkipListFactory(1)); - DestroyAndReopen(opts); - - ASSERT_OK(db_->Put(WriteOptions(), "key", "val")); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); - ASSERT_OK(db_->Put(WriteOptions(), "blah", "val")); - - ReadOptions read_opts; - std::string value; - ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound()); - } while (ChangeOptions(kRangeDelSkipConfigs)); -} - -TEST_F(DBRangeDelTest, GetCoveredKeyFromSst) { - do { - DestroyAndReopen(CurrentOptions()); - ASSERT_OK(db_->Put(WriteOptions(), "key", "val")); - // snapshot prevents key from being deleted during flush - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); - ASSERT_OK(db_->Flush(FlushOptions())); - - ReadOptions read_opts; - std::string value; - ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound()); - db_->ReleaseSnapshot(snapshot); - } while (ChangeOptions(kRangeDelSkipConfigs)); -} - -TEST_F(DBRangeDelTest, GetCoveredMergeOperandFromMemtable) { - const int kNumMergeOps = 10; - Options opts = CurrentOptions(); - opts.merge_operator = MergeOperators::CreateUInt64AddOperator(); - Reopen(opts); - - for (int i = 0; i < kNumMergeOps; ++i) { - std::string val; - PutFixed64(&val, i); - ASSERT_OK(db_->Merge(WriteOptions(), "key", val)); - if (i == kNumMergeOps / 2) { - // deletes [0, 5] - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - "key", "key_")); - } - } - - ReadOptions read_opts; - std::string expected, actual; - ASSERT_OK(db_->Get(read_opts, "key", &actual)); - PutFixed64(&expected, 30); // 6+7+8+9 - ASSERT_EQ(expected, actual); - - expected.clear(); - read_opts.ignore_range_deletions = true; - ASSERT_OK(db_->Get(read_opts, "key", &actual)); - PutFixed64(&expected, 45); // 0+1+2+...+9 - ASSERT_EQ(expected, actual); -} - -TEST_F(DBRangeDelTest, GetIgnoresRangeDeletions) { - Options opts = CurrentOptions(); - opts.max_write_buffer_number = 4; - opts.min_write_buffer_number_to_merge = 3; - opts.memtable_factory.reset(test::NewSpecialSkipListFactory(1)); - Reopen(opts); - - ASSERT_OK(db_->Put(WriteOptions(), "sst_key", "val")); - // snapshot prevents key from being deleted during flush - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_OK(db_->Put(WriteOptions(), "imm_key", "val")); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); - ASSERT_OK(db_->Put(WriteOptions(), "mem_key", "val")); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); - - ReadOptions read_opts; - read_opts.ignore_range_deletions = true; - for (std::string key : {"sst_key", "imm_key", "mem_key"}) { - std::string value; - ASSERT_OK(db_->Get(read_opts, key, &value)); - } - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, IteratorRemovesCoveredKeys) { - const int kNum = 200, kRangeBegin = 50, kRangeEnd = 150, kNumPerFile = 25; - Options opts = CurrentOptions(); - opts.comparator = test::Uint64Comparator(); - opts.memtable_factory.reset(test::NewSpecialSkipListFactory(kNumPerFile)); - DestroyAndReopen(opts); - - // Write half of the keys before the tombstone and half after the tombstone. - // Only covered keys (i.e., within the range and older than the tombstone) - // should be deleted. - for (int i = 0; i < kNum; ++i) { - if (i == kNum / 2) { - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - GetNumericStr(kRangeBegin), - GetNumericStr(kRangeEnd))); - } - ASSERT_OK(db_->Put(WriteOptions(), GetNumericStr(i), "val")); - } - ReadOptions read_opts; - auto* iter = db_->NewIterator(read_opts); - ASSERT_OK(iter->status()); - - int expected = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_EQ(GetNumericStr(expected), iter->key()); - if (expected == kRangeBegin - 1) { - expected = kNum / 2; - } else { - ++expected; - } - } - ASSERT_EQ(kNum, expected); - delete iter; -} - -TEST_F(DBRangeDelTest, IteratorOverUserSnapshot) { - const int kNum = 200, kRangeBegin = 50, kRangeEnd = 150, kNumPerFile = 25; - Options opts = CurrentOptions(); - opts.comparator = test::Uint64Comparator(); - opts.memtable_factory.reset(test::NewSpecialSkipListFactory(kNumPerFile)); - DestroyAndReopen(opts); - - const Snapshot* snapshot = nullptr; - // Put a snapshot before the range tombstone, verify an iterator using that - // snapshot sees all inserted keys. - for (int i = 0; i < kNum; ++i) { - if (i == kNum / 2) { - snapshot = db_->GetSnapshot(); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - GetNumericStr(kRangeBegin), - GetNumericStr(kRangeEnd))); - } - ASSERT_OK(db_->Put(WriteOptions(), GetNumericStr(i), "val")); - } - ReadOptions read_opts; - read_opts.snapshot = snapshot; - auto* iter = db_->NewIterator(read_opts); - ASSERT_OK(iter->status()); - - int expected = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_EQ(GetNumericStr(expected), iter->key()); - ++expected; - } - ASSERT_EQ(kNum / 2, expected); - delete iter; - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, IteratorIgnoresRangeDeletions) { - Options opts = CurrentOptions(); - opts.max_write_buffer_number = 4; - opts.min_write_buffer_number_to_merge = 3; - opts.memtable_factory.reset(test::NewSpecialSkipListFactory(1)); - Reopen(opts); - - ASSERT_OK(db_->Put(WriteOptions(), "sst_key", "val")); - // snapshot prevents key from being deleted during flush - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_OK(db_->Put(WriteOptions(), "imm_key", "val")); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); - ASSERT_OK(db_->Put(WriteOptions(), "mem_key", "val")); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); - - ReadOptions read_opts; - read_opts.ignore_range_deletions = true; - auto* iter = db_->NewIterator(read_opts); - ASSERT_OK(iter->status()); - int i = 0; - std::string expected[] = {"imm_key", "mem_key", "sst_key"}; - for (iter->SeekToFirst(); iter->Valid(); iter->Next(), ++i) { - std::string key; - ASSERT_EQ(expected[i], iter->key()); - } - ASSERT_EQ(3, i); - delete iter; - db_->ReleaseSnapshot(snapshot); -} - -#ifndef ROCKSDB_UBSAN_RUN -TEST_F(DBRangeDelTest, TailingIteratorRangeTombstoneUnsupported) { - ASSERT_OK(db_->Put(WriteOptions(), "key", "val")); - // snapshot prevents key from being deleted during flush - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); - - // iterations check unsupported in memtable, l0, and then l1 - for (int i = 0; i < 3; ++i) { - ReadOptions read_opts; - read_opts.tailing = true; - auto* iter = db_->NewIterator(read_opts); - if (i == 2) { - // For L1+, iterators over files are created on-demand, so need seek - iter->SeekToFirst(); - } - ASSERT_TRUE(iter->status().IsNotSupported()); - - delete iter; - if (i == 0) { - ASSERT_OK(db_->Flush(FlushOptions())); - } else if (i == 1) { - MoveFilesToLevel(1); - } - } - db_->ReleaseSnapshot(snapshot); -} -#endif // !ROCKSDB_UBSAN_RUN - -TEST_F(DBRangeDelTest, SubcompactionHasEmptyDedicatedRangeDelFile) { - const int kNumFiles = 2, kNumKeysPerFile = 4; - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.level0_file_num_compaction_trigger = kNumFiles; - options.max_subcompactions = 2; - options.num_levels = 2; - options.target_file_size_base = 4096; - Reopen(options); - - // need a L1 file for subcompaction to be triggered - ASSERT_OK( - db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(0), "val")); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - - // put enough keys to fill up the first subcompaction, and later range-delete - // them so that the first subcompaction outputs no key-values. In that case - // it'll consider making an SST file dedicated to range deletions. - for (int i = 0; i < kNumKeysPerFile; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(i), - std::string(1024, 'a'))); - } - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), - Key(kNumKeysPerFile))); - - // the above range tombstone can be dropped, so that one alone won't cause a - // dedicated file to be opened. We can make one protected by snapshot that - // must be considered. Make its range outside the first subcompaction's range - // to exercise the tricky part of the code. - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(kNumKeysPerFile + 1), - Key(kNumKeysPerFile + 2))); - ASSERT_OK(db_->Flush(FlushOptions())); - - ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0)); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - - ASSERT_OK(db_->EnableAutoCompaction({db_->DefaultColumnFamily()})); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, MemtableBloomFilter) { - // regression test for #2743. the range delete tombstones in memtable should - // be added even when Get() skips searching due to its prefix bloom filter - const int kMemtableSize = 1 << 20; // 1MB - const int kMemtablePrefixFilterSize = 1 << 13; // 8KB - const int kNumKeys = 1000; - const int kPrefixLen = 8; - Options options = CurrentOptions(); - options.memtable_prefix_bloom_size_ratio = - static_cast(kMemtablePrefixFilterSize) / kMemtableSize; - options.prefix_extractor.reset( - ROCKSDB_NAMESPACE::NewFixedPrefixTransform(kPrefixLen)); - options.write_buffer_size = kMemtableSize; - Reopen(options); - - for (int i = 0; i < kNumKeys; ++i) { - ASSERT_OK(Put(Key(i), "val")); - } - ASSERT_OK(Flush()); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), - Key(kNumKeys))); - for (int i = 0; i < kNumKeys; ++i) { - std::string value; - ASSERT_TRUE(db_->Get(ReadOptions(), Key(i), &value).IsNotFound()); - } -} - -TEST_F(DBRangeDelTest, CompactionTreatsSplitInputLevelDeletionAtomically) { - // This test originally verified that compaction treated files containing a - // split range deletion in the input level as an atomic unit. I.e., - // compacting any input-level file(s) containing a portion of the range - // deletion causes all other input-level files containing portions of that - // same range deletion to be included in the compaction. Range deletion - // tombstones are now truncated to sstable boundaries which removed the need - // for that behavior (which could lead to excessively large - // compactions). - const int kNumFilesPerLevel = 4, kValueBytes = 4 << 10; - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.level0_file_num_compaction_trigger = kNumFilesPerLevel; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(2 /* num_entries_flush */)); - // max file size could be 2x of target file size, so set it to half of that - options.target_file_size_base = kValueBytes / 2; - // disable dynamic_file_size, as it will cut L1 files into more files (than - // kNumFilesPerLevel). - options.level_compaction_dynamic_file_size = false; - options.max_compaction_bytes = 1500; - // i == 0: CompactFiles - // i == 1: CompactRange - // i == 2: automatic compaction - for (int i = 0; i < 3; ++i) { - DestroyAndReopen(options); - - ASSERT_OK(Put(Key(0), "")); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(2); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - // snapshot protects range tombstone from dropping due to becoming obsolete. - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(0), Key(2 * kNumFilesPerLevel))); - - Random rnd(301); - std::string value = rnd.RandomString(kValueBytes); - for (int j = 0; j < kNumFilesPerLevel; ++j) { - // give files overlapping key-ranges to prevent trivial move - ASSERT_OK(Put(Key(j), value)); - ASSERT_OK(Put(Key(2 * kNumFilesPerLevel - 1 - j), value)); - if (j > 0) { - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ(j, NumTableFilesAtLevel(0)); - } - } - // put extra key to trigger final flush - ASSERT_OK(Put("", "")); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_EQ(kNumFilesPerLevel, NumTableFilesAtLevel(1)); - - ColumnFamilyMetaData meta; - db_->GetColumnFamilyMetaData(&meta); - if (i == 0) { - ASSERT_OK(db_->CompactFiles( - CompactionOptions(), {meta.levels[1].files[0].name}, 2 /* level */)); - ASSERT_EQ(0, NumTableFilesAtLevel(1)); - } else if (i == 1) { - auto begin_str = Key(0), end_str = Key(1); - Slice begin = begin_str, end = end_str; - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin, &end)); - ASSERT_EQ(3, NumTableFilesAtLevel(1)); - } else if (i == 2) { - ASSERT_OK(db_->SetOptions(db_->DefaultColumnFamily(), - {{"max_bytes_for_level_base", "10000"}})); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - } - ASSERT_GT(NumTableFilesAtLevel(2), 0); - - db_->ReleaseSnapshot(snapshot); - } -} - -TEST_F(DBRangeDelTest, RangeTombstoneEndKeyAsSstableUpperBound) { - // Test the handling of the range-tombstone end-key as the - // upper-bound for an sstable. - - const int kNumFilesPerLevel = 2, kValueBytes = 4 << 10; - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.level0_file_num_compaction_trigger = kNumFilesPerLevel; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(2 /* num_entries_flush */)); - options.target_file_size_base = kValueBytes; - options.disable_auto_compactions = true; - // disable it for now, otherwise the L1 files are going be cut before data 1: - // L1: [0] [1,4] - // L2: [0,0] - // because the grandparent file is between [0]->[1] and it's size is more than - // 1/8 of target size (4k). - options.level_compaction_dynamic_file_size = false; - - DestroyAndReopen(options); - - // Create an initial sstable at L2: - // [key000000#1,1, key000000#1,1] - ASSERT_OK(Put(Key(0), "")); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(2); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - // A snapshot protects the range tombstone from dropping due to - // becoming obsolete. - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), - Key(2 * kNumFilesPerLevel))); - - // Create 2 additional sstables in L0. Note that the first sstable - // contains the range tombstone. - // [key000000#3,1, key000004#72057594037927935,15] - // [key000001#5,1, key000002#6,1] - Random rnd(301); - std::string value = rnd.RandomString(kValueBytes); - for (int j = 0; j < kNumFilesPerLevel; ++j) { - // Give files overlapping key-ranges to prevent a trivial move when we - // compact from L0 to L1. - ASSERT_OK(Put(Key(j), value)); - ASSERT_OK(Put(Key(2 * kNumFilesPerLevel - 1 - j), value)); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(j + 1, NumTableFilesAtLevel(0)); - } - // Compact the 2 L0 sstables to L1, resulting in the following LSM. There - // are 2 sstables generated in L1 due to the target_file_size_base setting. - // L1: - // [key000000#3,1, key000002#72057594037927935,15] - // [key000002#6,1, key000004#72057594037927935,15] - // L2: - // [key000000#1,1, key000000#1,1] - MoveFilesToLevel(1); - ASSERT_EQ(2, NumTableFilesAtLevel(1)); - - { - // Compact the second sstable in L1: - // L1: - // [key000000#3,1, key000002#72057594037927935,15] - // L2: - // [key000000#1,1, key000000#1,1] - // [key000002#6,1, key000004#72057594037927935,15] - // - // At the same time, verify the compaction does not cause the key at the - // endpoint (key000002#6,1) to disappear. - ASSERT_EQ(value, Get(Key(2))); - auto begin_str = Key(3); - const ROCKSDB_NAMESPACE::Slice begin = begin_str; - ASSERT_OK(dbfull()->TEST_CompactRange(1, &begin, nullptr)); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - ASSERT_EQ(2, NumTableFilesAtLevel(2)); - ASSERT_EQ(value, Get(Key(2))); - } - - { - // Compact the first sstable in L1. This should be copacetic, but - // was previously resulting in overlapping sstables in L2 due to - // mishandling of the range tombstone end-key when used as the - // largest key for an sstable. The resulting LSM structure should - // be: - // - // L2: - // [key000000#1,1, key000001#72057594037927935,15] - // [key000001#5,1, key000002#72057594037927935,15] - // [key000002#6,1, key000004#72057594037927935,15] - auto begin_str = Key(0); - const ROCKSDB_NAMESPACE::Slice begin = begin_str; - ASSERT_OK(dbfull()->TEST_CompactRange(1, &begin, &begin)); - ASSERT_EQ(0, NumTableFilesAtLevel(1)); - ASSERT_EQ(3, NumTableFilesAtLevel(2)); - } - - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, UnorderedTombstones) { - // Regression test for #2752. Range delete tombstones between - // different snapshot stripes are not stored in order, so the first - // tombstone of each snapshot stripe should be checked as a smallest - // candidate. - Options options = CurrentOptions(); - DestroyAndReopen(options); - - auto cf = db_->DefaultColumnFamily(); - - ASSERT_OK(db_->Put(WriteOptions(), cf, "a", "a")); - ASSERT_OK(db_->Flush(FlushOptions(), cf)); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr)); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - - ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "b", "c")); - // Hold a snapshot to separate these two delete ranges. - auto snapshot = db_->GetSnapshot(); - ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "a", "b")); - ASSERT_OK(db_->Flush(FlushOptions(), cf)); - db_->ReleaseSnapshot(snapshot); - - std::vector> files; - dbfull()->TEST_GetFilesMetaData(cf, &files); - ASSERT_EQ(1, files[0].size()); - ASSERT_EQ("a", files[0][0].smallest.user_key()); - ASSERT_EQ("c", files[0][0].largest.user_key()); - - std::string v; - auto s = db_->Get(ReadOptions(), "a", &v); - ASSERT_TRUE(s.IsNotFound()); -} - -class MockMergeOperator : public MergeOperator { - // Mock non-associative operator. Non-associativity is expressed by lack of - // implementation for any `PartialMerge*` functions. - public: - bool FullMergeV2(const MergeOperationInput& merge_in, - MergeOperationOutput* merge_out) const override { - assert(merge_out != nullptr); - merge_out->new_value = merge_in.operand_list.back().ToString(); - return true; - } - - const char* Name() const override { return "MockMergeOperator"; } -}; - -TEST_F(DBRangeDelTest, KeyAtOverlappingEndpointReappears) { - // This test uses a non-associative merge operator since that is a convenient - // way to get compaction to write out files with overlapping user-keys at the - // endpoints. Note, however, overlapping endpoints can also occur with other - // value types (Put, etc.), assuming the right snapshots are present. - const int kFileBytes = 1 << 20; - const int kValueBytes = 1 << 10; - const int kNumFiles = 4; - - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.merge_operator.reset(new MockMergeOperator()); - options.target_file_size_base = kFileBytes; - Reopen(options); - - // Push dummy data to L3 so that our actual test files on L0-L2 - // will not be considered "bottommost" level, otherwise compaction - // may prevent us from creating overlapping user keys - // as on the bottommost layer MergeHelper - ASSERT_OK(db_->Merge(WriteOptions(), "key", "dummy")); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(3); - - Random rnd(301); - const Snapshot* snapshot = nullptr; - for (int i = 0; i < kNumFiles; ++i) { - for (int j = 0; j < kFileBytes / kValueBytes; ++j) { - auto value = rnd.RandomString(kValueBytes); - ASSERT_OK(db_->Merge(WriteOptions(), "key", value)); - } - if (i == kNumFiles - 1) { - // Take snapshot to prevent covered merge operands from being dropped by - // compaction. - snapshot = db_->GetSnapshot(); - // The DeleteRange is the last write so all merge operands are covered. - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - "key", "key_")); - } - ASSERT_OK(db_->Flush(FlushOptions())); - } - ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0)); - std::string value; - ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound()); - - ASSERT_OK(dbfull()->TEST_CompactRange( - 0 /* level */, nullptr /* begin */, nullptr /* end */, - nullptr /* column_family */, true /* disallow_trivial_move */)); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - // Now we have multiple files at L1 all containing a single user key, thus - // guaranteeing overlap in the file endpoints. - ASSERT_GT(NumTableFilesAtLevel(1), 1); - - // Verify no merge operands reappeared after the compaction. - ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound()); - - // Compact and verify again. It's worthwhile because now the files have - // tighter endpoints, so we can verify that doesn't mess anything up. - ASSERT_OK(dbfull()->TEST_CompactRange( - 1 /* level */, nullptr /* begin */, nullptr /* end */, - nullptr /* column_family */, true /* disallow_trivial_move */)); - ASSERT_GT(NumTableFilesAtLevel(2), 1); - ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound()); - - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, UntruncatedTombstoneDoesNotDeleteNewerKey) { - // Verify a key newer than a range tombstone cannot be deleted by being - // compacted to the bottom level (and thus having its seqnum zeroed) before - // the range tombstone. This used to happen when range tombstones were - // untruncated on reads such that they extended past their file boundaries. - // - // Test summary: - // - // - L1 is bottommost. - // - A couple snapshots are strategically taken to prevent seqnums from being - // zeroed, range tombstone from being dropped, merge operands from being - // dropped, and merge operands from being combined. - // - Left half of files in L1 all have same user key, ensuring their file - // boundaries overlap. In the past this would cause range tombstones to be - // untruncated. - // - Right half of L1 files all have different keys, ensuring no overlap. - // - A range tombstone spans all L1 keys, so it is stored in every L1 file. - // - Keys in the right side of the key-range are overwritten. These are - // compacted down to L1 after releasing snapshots such that their seqnums - // will be zeroed. - // - A full range scan is performed. If the tombstone in the left L1 files - // were untruncated, it would now cover keys newer than it (but with zeroed - // seqnums) in the right L1 files. - const int kFileBytes = 1 << 20; - const int kValueBytes = 1 << 10; - const int kNumFiles = 4; - const int kMaxKey = kNumFiles * kFileBytes / kValueBytes; - const int kKeysOverwritten = 10; - - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.merge_operator.reset(new MockMergeOperator()); - options.num_levels = 2; - options.target_file_size_base = kFileBytes; - Reopen(options); - - Random rnd(301); - // - snapshots[0] prevents merge operands from being combined during - // compaction. - // - snapshots[1] prevents merge operands from being dropped due to the - // covering range tombstone. - const Snapshot* snapshots[] = {nullptr, nullptr}; - for (int i = 0; i < kNumFiles; ++i) { - for (int j = 0; j < kFileBytes / kValueBytes; ++j) { - auto value = rnd.RandomString(kValueBytes); - std::string key; - if (i < kNumFiles / 2) { - key = Key(0); - } else { - key = Key(1 + i * kFileBytes / kValueBytes + j); - } - ASSERT_OK(db_->Merge(WriteOptions(), key, value)); - } - if (i == 0) { - snapshots[0] = db_->GetSnapshot(); - } - if (i == kNumFiles - 1) { - snapshots[1] = db_->GetSnapshot(); - // The DeleteRange is the last write so all merge operands are covered. - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(0), Key(kMaxKey + 1))); - } - ASSERT_OK(db_->Flush(FlushOptions())); - } - ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0)); - - auto get_key_count = [this]() -> int { - auto* iter = db_->NewIterator(ReadOptions()); - assert(iter->status().ok()); - iter->SeekToFirst(); - int keys_found = 0; - for (; iter->Valid(); iter->Next()) { - ++keys_found; - } - delete iter; - return keys_found; - }; - - // All keys should be covered - ASSERT_EQ(0, get_key_count()); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr /* begin_key */, - nullptr /* end_key */)); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - // Roughly the left half of L1 files should have overlapping boundary keys, - // while the right half should not. - ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles); - - // Now overwrite a few keys that are in L1 files that definitely don't have - // overlapping boundary keys. - for (int i = kMaxKey; i > kMaxKey - kKeysOverwritten; --i) { - auto value = rnd.RandomString(kValueBytes); - ASSERT_OK(db_->Merge(WriteOptions(), Key(i), value)); - } - ASSERT_OK(db_->Flush(FlushOptions())); - - // The overwritten keys are in L0 now, so clearly aren't covered by the range - // tombstone in L1. - ASSERT_EQ(kKeysOverwritten, get_key_count()); - - // Release snapshots so seqnums can be zeroed when L0->L1 happens. - db_->ReleaseSnapshot(snapshots[0]); - db_->ReleaseSnapshot(snapshots[1]); - - auto begin_key_storage = Key(kMaxKey - kKeysOverwritten + 1); - auto end_key_storage = Key(kMaxKey); - Slice begin_key(begin_key_storage); - Slice end_key(end_key_storage); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin_key, &end_key)); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles); - - ASSERT_EQ(kKeysOverwritten, get_key_count()); -} - -TEST_F(DBRangeDelTest, DeletedMergeOperandReappearsIterPrev) { - // Exposes a bug where we were using - // `RangeDelPositioningMode::kBackwardTraversal` while scanning merge operands - // in the forward direction. Confusingly, this case happened during - // `DBIter::Prev`. It could cause assertion failure, or reappearing keys. - const int kFileBytes = 1 << 20; - const int kValueBytes = 1 << 10; - // Need multiple keys so we can get results when calling `Prev()` after - // `SeekToLast()`. - const int kNumKeys = 3; - const int kNumFiles = 4; - - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.merge_operator.reset(new MockMergeOperator()); - options.target_file_size_base = kFileBytes; - Reopen(options); - - Random rnd(301); - const Snapshot* snapshot = nullptr; - for (int i = 0; i < kNumFiles; ++i) { - for (int j = 0; j < kFileBytes / kValueBytes; ++j) { - auto value = rnd.RandomString(kValueBytes); - ASSERT_OK(db_->Merge(WriteOptions(), Key(j % kNumKeys), value)); - if (i == 0 && j == kNumKeys) { - // Take snapshot to prevent covered merge operands from being dropped or - // merged by compaction. - snapshot = db_->GetSnapshot(); - // Do a DeleteRange near the beginning so only the oldest merge operand - // for each key is covered. This ensures the sequence of events: - // - // - `DBIter::Prev()` is called - // - After several same versions of the same user key are encountered, - // it decides to seek using `DBIter::FindValueForCurrentKeyUsingSeek`. - // - Binary searches to the newest version of the key, which is in the - // leftmost file containing the user key. - // - Scans forwards to collect all merge operands. Eventually reaches - // the rightmost file containing the oldest merge operand, which - // should be covered by the `DeleteRange`. If `RangeDelAggregator` - // were not properly using `kForwardTraversal` here, that operand - // would reappear. - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(0), Key(kNumKeys + 1))); - } - } - ASSERT_OK(db_->Flush(FlushOptions())); - } - ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0)); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr /* begin_key */, - nullptr /* end_key */)); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_GT(NumTableFilesAtLevel(1), 1); - - auto* iter = db_->NewIterator(ReadOptions()); - ASSERT_OK(iter->status()); - iter->SeekToLast(); - int keys_found = 0; - for (; iter->Valid(); iter->Prev()) { - ++keys_found; - } - delete iter; - ASSERT_EQ(kNumKeys, keys_found); - - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeys) { - const int kFileBytes = 1 << 20; - - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.target_file_size_base = kFileBytes; - Reopen(options); - - ASSERT_OK(Put(Key(0), "a")); - const Snapshot* snapshot = db_->GetSnapshot(); - - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), - Key(10))); - - ASSERT_OK(db_->Flush(FlushOptions())); - - ReadOptions read_opts; - read_opts.snapshot = snapshot; - auto* iter = db_->NewIterator(read_opts); - ASSERT_OK(iter->status()); - - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(Key(0), iter->key()); - - iter->Next(); - ASSERT_FALSE(iter->Valid()); - - delete iter; - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeysInImmMemTables) { - const int kFileBytes = 1 << 20; - - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.target_file_size_base = kFileBytes; - Reopen(options); - - // block flush thread -> pin immtables in memory - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency({ - {"SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator", - "DBImpl::BGWorkFlush"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put(Key(0), "a")); - std::unique_ptr> - snapshot(db_->GetSnapshot(), - [this](const Snapshot* s) { db_->ReleaseSnapshot(s); }); - - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), - Key(10))); - - ASSERT_OK(dbfull()->TEST_SwitchMemtable()); - - ReadOptions read_opts; - read_opts.snapshot = snapshot.get(); - std::unique_ptr iter(db_->NewIterator(read_opts)); - ASSERT_OK(iter->status()); - - TEST_SYNC_POINT("SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator"); - - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(Key(0), iter->key()); - - iter->Next(); - ASSERT_FALSE(iter->Valid()); -} - -TEST_F(DBRangeDelTest, RangeTombstoneWrittenToMinimalSsts) { - // Adapted from - // https://github.com/cockroachdb/cockroach/blob/de8b3ea603dd1592d9dc26443c2cc92c356fbc2f/pkg/storage/engine/rocksdb_test.go#L1267-L1398. - // Regression test for issue where range tombstone was written to more files - // than necessary when it began exactly at the begin key in the next - // compaction output file. - const int kFileBytes = 1 << 20; - const int kValueBytes = 4 << 10; - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - // Have a bit of slack in the size limits but we enforce them more strictly - // when manually flushing/compacting. - options.max_compaction_bytes = 2 * kFileBytes; - options.target_file_size_base = 2 * kFileBytes; - options.write_buffer_size = 2 * kFileBytes; - Reopen(options); - - Random rnd(301); - for (char first_char : {'a', 'b', 'c'}) { - for (int i = 0; i < kFileBytes / kValueBytes; ++i) { - std::string key(1, first_char); - key.append(Key(i)); - std::string value = rnd.RandomString(kValueBytes); - ASSERT_OK(Put(key, value)); - } - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(2); - } - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_EQ(3, NumTableFilesAtLevel(2)); - - // Populate the memtable lightly while spanning the whole key-space. The - // setting of `max_compaction_bytes` will cause the L0->L1 to output multiple - // files to prevent a large L1->L2 compaction later. - ASSERT_OK(Put("a", "val")); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - "c" + Key(1), "d")); - // Our compaction output file cutting logic currently only considers point - // keys. So, in order for the range tombstone to have a chance at landing at - // the start of a new file, we need a point key at the range tombstone's - // start. - // TODO(ajkr): remove this `Put` after file cutting accounts for range - // tombstones (#3977). - ASSERT_OK(Put("c" + Key(1), "value")); - ASSERT_OK(db_->Flush(FlushOptions())); - - // Ensure manual L0->L1 compaction cuts the outputs before the range tombstone - // and the range tombstone is only placed in the second SST. - std::string begin_key_storage("c" + Key(1)); - Slice begin_key(begin_key_storage); - std::string end_key_storage("d"); - Slice end_key(end_key_storage); - ASSERT_OK(dbfull()->TEST_CompactRange( - 0 /* level */, &begin_key /* begin */, &end_key /* end */, - nullptr /* column_family */, true /* disallow_trivial_move */)); - ASSERT_EQ(2, NumTableFilesAtLevel(1)); - - std::vector all_metadata; - std::vector l1_metadata; - db_->GetLiveFilesMetaData(&all_metadata); - for (const auto& metadata : all_metadata) { - if (metadata.level == 1) { - l1_metadata.push_back(metadata); - } - } - std::sort(l1_metadata.begin(), l1_metadata.end(), - [&](const LiveFileMetaData& a, const LiveFileMetaData& b) { - return options.comparator->Compare(a.smallestkey, b.smallestkey) < - 0; - }); - ASSERT_EQ("a", l1_metadata[0].smallestkey); - ASSERT_EQ("a", l1_metadata[0].largestkey); - ASSERT_EQ("c" + Key(1), l1_metadata[1].smallestkey); - ASSERT_EQ("d", l1_metadata[1].largestkey); - - TablePropertiesCollection all_table_props; - ASSERT_OK(db_->GetPropertiesOfAllTables(&all_table_props)); - int64_t num_range_deletions = 0; - for (const auto& name_and_table_props : all_table_props) { - const auto& name = name_and_table_props.first; - const auto& table_props = name_and_table_props.second; - // The range tombstone should only be output to the second L1 SST. - if (name.size() >= l1_metadata[1].name.size() && - name.substr(name.size() - l1_metadata[1].name.size()) - .compare(l1_metadata[1].name) == 0) { - ASSERT_EQ(1, table_props->num_range_deletions); - ++num_range_deletions; - } else { - ASSERT_EQ(0, table_props->num_range_deletions); - } - } - ASSERT_EQ(1, num_range_deletions); -} - -TEST_F(DBRangeDelTest, LevelCompactOutputCutAtRangeTombstoneForTtlFiles) { - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.compaction_pri = kMinOverlappingRatio; - options.disable_auto_compactions = true; - options.ttl = 24 * 60 * 60; // 24 hours - options.target_file_size_base = 8 << 10; - env_->SetMockSleep(); - options.env = env_; - DestroyAndReopen(options); - - Random rnd(301); - // Fill some data so that future compactions are not bottommost level - // compaction, and hence they would try cut around files for ttl - for (int i = 5; i < 10; ++i) { - ASSERT_OK(Put(Key(i), rnd.RandomString(1 << 10))); - } - ASSERT_OK(Flush()); - MoveFilesToLevel(3); - ASSERT_EQ("0,0,0,1", FilesPerLevel()); - - for (int i = 5; i < 10; ++i) { - ASSERT_OK(Put(Key(i), rnd.RandomString(1 << 10))); - } - ASSERT_OK(Flush()); - MoveFilesToLevel(1); - ASSERT_EQ("0,1,0,1", FilesPerLevel()); - - env_->MockSleepForSeconds(20 * 60 * 60); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(11), Key(12))); - ASSERT_OK(Put(Key(0), rnd.RandomString(1 << 10))); - ASSERT_OK(Flush()); - ASSERT_EQ("1,1,0,1", FilesPerLevel()); - // L0 file is new, L1 and L3 file are old and qualified for TTL - env_->MockSleepForSeconds(10 * 60 * 60); - MoveFilesToLevel(1); - // L1 output should be cut into 3 files: - // File 0: Key(0) - // File 1: (qualified for TTL): Key(5) - Key(10) - // File 1: DeleteRange [11, 12) - ASSERT_EQ("0,3,0,1", FilesPerLevel()); -} - -// Test SST partitioner cut after every single key -class SingleKeySstPartitioner : public SstPartitioner { - public: - const char* Name() const override { return "SingleKeySstPartitioner"; } - - PartitionerResult ShouldPartition( - const PartitionerRequest& /*request*/) override { - return kRequired; - } - - bool CanDoTrivialMove(const Slice& /*smallest_user_key*/, - const Slice& /*largest_user_key*/) override { - return false; - } -}; - -class SingleKeySstPartitionerFactory : public SstPartitionerFactory { - public: - static const char* kClassName() { return "SingleKeySstPartitionerFactory"; } - const char* Name() const override { return kClassName(); } - - std::unique_ptr CreatePartitioner( - const SstPartitioner::Context& /* context */) const override { - return std::unique_ptr(new SingleKeySstPartitioner()); - } -}; - -TEST_F(DBRangeDelTest, CompactionEmitRangeTombstoneToSSTPartitioner) { - Options options = CurrentOptions(); - auto factory = std::make_shared(); - options.sst_partitioner_factory = factory; - options.disable_auto_compactions = true; - DestroyAndReopen(options); - - Random rnd(301); - // range deletion keys are not processed when compacting to bottommost level, - // so creating a file at older level to make the next compaction not - // bottommost level - ASSERT_OK(db_->Put(WriteOptions(), Key(4), rnd.RandomString(10))); - ASSERT_OK(Flush()); - MoveFilesToLevel(5); - - ASSERT_OK(db_->Put(WriteOptions(), Key(1), rnd.RandomString(10))); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(2), - Key(5))); - ASSERT_OK(Flush()); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - MoveFilesToLevel(1); - // SSTPartitioner decides to cut when range tombstone start key is passed to - // it. Note that the range tombstone [2, 5) itself span multiple keys, but we - // are not able to partition within its range yet. - ASSERT_EQ(2, NumTableFilesAtLevel(1)); -} - -TEST_F(DBRangeDelTest, OversizeCompactionGapBetweenPointKeyAndTombstone) { - // L2 has 2 files - // L2_0: 0, 1, 2, 3, 4 - // L2_1: 5, 6, 7 - // L0 has 1 file - // L0: 0, [5, 6), 8 - // max_compaction_bytes is less than the size of L2_0 and L2_1. - // When compacting L0 into L1, it should split into 3 files: - // compaction output should cut before key 5 and key 8 to - // limit future compaction size. - const int kNumPerFile = 4, kNumFiles = 2; - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.target_file_size_base = 9 * 1024; - options.max_compaction_bytes = 9 * 1024; - DestroyAndReopen(options); - Random rnd(301); - for (int i = 0; i < kNumFiles; ++i) { - std::vector values; - for (int j = 0; j < kNumPerFile; j++) { - values.push_back(rnd.RandomString(3 << 10)); - ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j])); - } - } - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - MoveFilesToLevel(2); - ASSERT_EQ(2, NumTableFilesAtLevel(2)); - ASSERT_OK(Put(Key(0), rnd.RandomString(1 << 10))); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(5), - Key(6))); - ASSERT_OK(Put(Key(8), rnd.RandomString(1 << 10))); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, - true /* disallow_trivial_move */)); - ASSERT_EQ(3, NumTableFilesAtLevel(1)); -} - -TEST_F(DBRangeDelTest, OversizeCompactionGapBetweenTombstone) { - // L2 has two files - // L2_0: 0, 1, 2, 3, 4. L2_1: 5, 6, 7 - // L0 has two range tombstones [0, 1), [7, 8). - // max_compaction_bytes is less than the size of L2_0. - // When compacting L0 into L1, the two range tombstones should be - // split into two files. - const int kNumPerFile = 4, kNumFiles = 2; - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.target_file_size_base = 9 * 1024; - options.max_compaction_bytes = 9 * 1024; - DestroyAndReopen(options); - Random rnd(301); - for (int i = 0; i < kNumFiles; ++i) { - std::vector values; - // Write 12K (4 values, each 3K) - for (int j = 0; j < kNumPerFile; j++) { - values.push_back(rnd.RandomString(3 << 10)); - ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j])); - } - } - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - MoveFilesToLevel(2); - ASSERT_EQ(2, NumTableFilesAtLevel(2)); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), - Key(1))); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(7), - Key(8))); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, - true /* disallow_trivial_move */)); - // This is L0 -> L1 compaction - // The two range tombstones are broken up into two output files - // to limit compaction size. - ASSERT_EQ(2, NumTableFilesAtLevel(1)); -} - -TEST_F(DBRangeDelTest, OversizeCompactionPointKeyWithinRangetombstone) { - // L2 has two files - // L2_0: 0, 1, 2, 3, 4. L2_1: 6, 7, 8 - // L0 has [0, 9) and point key 5 - // max_compaction_bytes is less than the size of L2_0. - // When compacting L0 into L1, the compaction should cut at point key 5. - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.target_file_size_base = 9 * 1024; - options.max_compaction_bytes = 9 * 1024; - DestroyAndReopen(options); - Random rnd(301); - for (int i = 0; i < 9; ++i) { - if (i == 5) { - ++i; - } - ASSERT_OK(Put(Key(i), rnd.RandomString(3 << 10))); - } - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - MoveFilesToLevel(2); - ASSERT_EQ(2, NumTableFilesAtLevel(2)); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), - Key(9))); - ASSERT_OK(Put(Key(5), rnd.RandomString(1 << 10))); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, - true /* disallow_trivial_move */)); - ASSERT_EQ(2, NumTableFilesAtLevel(1)); -} - -TEST_F(DBRangeDelTest, OverlappedTombstones) { - const int kNumPerFile = 4, kNumFiles = 2; - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.target_file_size_base = 9 * 1024; - options.max_compaction_bytes = 9 * 1024; - DestroyAndReopen(options); - Random rnd(301); - for (int i = 0; i < kNumFiles; ++i) { - std::vector values; - // Write 12K (4 values, each 3K) - for (int j = 0; j < kNumPerFile; j++) { - values.push_back(rnd.RandomString(3 << 10)); - ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j])); - } - } - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - MoveFilesToLevel(2); - ASSERT_EQ(2, NumTableFilesAtLevel(2)); - - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(1), - Key((kNumFiles)*kNumPerFile + 1))); - ASSERT_OK(db_->Flush(FlushOptions())); - - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, - true /* disallow_trivial_move */)); - - // The tombstone range is not broken up into multiple SSTs which may incur a - // large compaction with L2. - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - std::vector> files; - ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr, - true /* disallow_trivial_move */)); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - ASSERT_EQ(0, NumTableFilesAtLevel(1)); -} - -TEST_F(DBRangeDelTest, OverlappedKeys) { - const int kNumPerFile = 4, kNumFiles = 2; - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.target_file_size_base = 9 * 1024; - options.max_compaction_bytes = 9 * 1024; - DestroyAndReopen(options); - Random rnd(301); - for (int i = 0; i < kNumFiles; ++i) { - std::vector values; - // Write 12K (4 values, each 3K) - for (int j = 0; j < kNumPerFile; j++) { - values.push_back(rnd.RandomString(3 << 10)); - ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j])); - } - } - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - MoveFilesToLevel(2); - ASSERT_EQ(2, NumTableFilesAtLevel(2)); - - for (int i = 1; i < kNumFiles * kNumPerFile + 1; i++) { - ASSERT_OK(Put(Key(i), "0x123")); - } - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - // The key range is broken up into three SSTs to avoid a future big compaction - // with the grandparent - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, - true /* disallow_trivial_move */)); - ASSERT_EQ(3, NumTableFilesAtLevel(1)); - - ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr, - true /* disallow_trivial_move */)); - // L1->L2 compaction size is limited to max_compaction_bytes - ASSERT_EQ(3, NumTableFilesAtLevel(2)); - ASSERT_EQ(0, NumTableFilesAtLevel(1)); -} - -TEST_F(DBRangeDelTest, IteratorRefresh) { - // Refreshing an iterator after a range tombstone is added should cause the - // deleted range of keys to disappear. - for (bool sv_changed : {false, true}) { - ASSERT_OK(db_->Put(WriteOptions(), "key1", "value1")); - ASSERT_OK(db_->Put(WriteOptions(), "key2", "value2")); - - auto* iter = db_->NewIterator(ReadOptions()); - ASSERT_OK(iter->status()); - - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - "key2", "key3")); - - if (sv_changed) { - ASSERT_OK(db_->Flush(FlushOptions())); - } - - ASSERT_OK(iter->Refresh()); - ASSERT_OK(iter->status()); - iter->SeekToFirst(); - ASSERT_EQ("key1", iter->key()); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - - delete iter; - } -} - -void VerifyIteratorReachesEnd(InternalIterator* iter) { - ASSERT_TRUE(!iter->Valid() && iter->status().ok()); -} - -void VerifyIteratorReachesEnd(Iterator* iter) { - ASSERT_TRUE(!iter->Valid() && iter->status().ok()); -} - -TEST_F(DBRangeDelTest, IteratorReseek) { - // Range tombstone triggers reseek (seeking to a range tombstone end key) in - // merging iterator. Test set up: - // one memtable: range tombstone [0, 1) - // one immutable memtable: range tombstone [1, 2) - // one L0 file with range tombstone [2, 3) - // one L1 file with range tombstone [3, 4) - // Seek(0) should trigger cascading reseeks at all levels below memtable. - // Seek(1) should trigger cascading reseeks at all levels below immutable - // memtable. SeekToFirst and SeekToLast trigger no reseek. - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - - DestroyAndReopen(options); - // L1 - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(3), - Key(4))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - // L0 - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(2), - Key(3))); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - // Immutable memtable - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(1), - Key(2))); - ASSERT_OK(static_cast_with_check(db_)->TEST_SwitchMemtable()); - std::string value; - ASSERT_TRUE(dbfull()->GetProperty(db_->DefaultColumnFamily(), - "rocksdb.num-immutable-mem-table", &value)); - ASSERT_EQ(1, std::stoi(value)); - // live memtable - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), - Key(1))); - // this memtable is still active - ASSERT_TRUE(dbfull()->GetProperty(db_->DefaultColumnFamily(), - "rocksdb.num-immutable-mem-table", &value)); - ASSERT_EQ(1, std::stoi(value)); - - auto iter = db_->NewIterator(ReadOptions()); - get_perf_context()->Reset(); - iter->Seek(Key(0)); - // Reseeked immutable memtable, L0 and L1 - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 3); - VerifyIteratorReachesEnd(iter); - get_perf_context()->Reset(); - iter->SeekForPrev(Key(1)); - // Reseeked L0 and L1 - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 2); - VerifyIteratorReachesEnd(iter); - get_perf_context()->Reset(); - iter->SeekToFirst(); - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 0); - VerifyIteratorReachesEnd(iter); - iter->SeekToLast(); - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 0); - VerifyIteratorReachesEnd(iter); - delete iter; -} - -TEST_F(DBRangeDelTest, ReseekDuringNextAndPrev) { - // Range tombstone triggers reseek during Next()/Prev() in merging iterator. - // Test set up: - // memtable has: [0, 1) [2, 3) - // L0 has: 2 - // L1 has: 1, 2, 3 - // Seek(0) will reseek to 1 for L0 and L1. Seek(1) will not trigger any - // reseek. Then Next() determines 2 is covered by [2, 3), it will try to - // reseek to 3 for L0 and L1. Similar story for Prev() and SeekForPrev() is - // tested. - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - - DestroyAndReopen(options); - // L1 - ASSERT_OK(db_->Put(WriteOptions(), Key(1), "foo")); - ASSERT_OK(db_->Put(WriteOptions(), Key(2), "foo")); - ASSERT_OK(db_->Put(WriteOptions(), Key(3), "foo")); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - - // L0 - ASSERT_OK(db_->Put(WriteOptions(), Key(2), "foo")); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - // Memtable - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), - Key(1))); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(2), - Key(3))); - - auto iter = db_->NewIterator(ReadOptions()); - auto iter_test_forward = [&] { - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), Key(1)); - - get_perf_context()->Reset(); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), Key(3)); - // Reseeked L0 and L1 - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 2); - - // Next to Prev - get_perf_context()->Reset(); - iter->Prev(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), Key(1)); - // Reseeked L0 and L1 - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 2); - - // Prev to Next - get_perf_context()->Reset(); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), Key(3)); - // Reseeked L0 and L1 - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 2); - - iter->Next(); - VerifyIteratorReachesEnd(iter); - }; - - get_perf_context()->Reset(); - iter->Seek(Key(0)); - // Reseeked L0 and L1 - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 2); - iter_test_forward(); - get_perf_context()->Reset(); - iter->Seek(Key(1)); - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 0); - iter_test_forward(); - - get_perf_context()->Reset(); - iter->SeekForPrev(Key(2)); - // Reseeked L0 and L1 - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 2); - iter_test_forward(); - get_perf_context()->Reset(); - iter->SeekForPrev(Key(1)); - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 0); - iter_test_forward(); - - get_perf_context()->Reset(); - iter->SeekToFirst(); - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 0); - iter_test_forward(); - - iter->SeekToLast(); - iter->Prev(); - iter_test_forward(); - delete iter; -} - -TEST_F(DBRangeDelTest, TombstoneFromCurrentLevel) { - // Range tombstone triggers reseek when covering key from the same level. - // in merging iterator. Test set up: - // memtable has: [0, 1) - // L0 has: [2, 3), 2 - // L1 has: 1, 2, 3 - // Seek(0) will reseek to 1 for L0 and L1. - // Then Next() will reseek to 3 for L1 since 2 in L0 is covered by [2, 3) in - // L0. - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - - DestroyAndReopen(options); - // L1 - ASSERT_OK(db_->Put(WriteOptions(), Key(1), "foo")); - ASSERT_OK(db_->Put(WriteOptions(), Key(2), "foo")); - ASSERT_OK(db_->Put(WriteOptions(), Key(3), "foo")); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - - // L0 - ASSERT_OK(db_->Put(WriteOptions(), Key(2), "foo")); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(2), - Key(3))); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - // Memtable - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), - Key(1))); - - auto iter = db_->NewIterator(ReadOptions()); - get_perf_context()->Reset(); - iter->Seek(Key(0)); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), Key(1)); - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 2); - - get_perf_context()->Reset(); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), Key(3)); - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 1); - - delete iter; -} - -class TombstoneTestSstPartitioner : public SstPartitioner { - public: - const char* Name() const override { return "SingleKeySstPartitioner"; } - - PartitionerResult ShouldPartition( - const PartitionerRequest& request) override { - if (cmp->Compare(*request.current_user_key, DBTestBase::Key(5)) == 0) { - return kRequired; - } else { - return kNotRequired; - } - } - - bool CanDoTrivialMove(const Slice& /*smallest_user_key*/, - const Slice& /*largest_user_key*/) override { - return false; - } - - const Comparator* cmp = BytewiseComparator(); -}; - -class TombstoneTestSstPartitionerFactory : public SstPartitionerFactory { - public: - static const char* kClassName() { - return "TombstoneTestSstPartitionerFactory"; - } - const char* Name() const override { return kClassName(); } - - std::unique_ptr CreatePartitioner( - const SstPartitioner::Context& /* context */) const override { - return std::unique_ptr(new TombstoneTestSstPartitioner()); - } -}; - -TEST_F(DBRangeDelTest, TombstoneAcrossFileBoundary) { - // Verify that a range tombstone across file boundary covers keys from older - // levels. Test set up: - // L1_0: 1, 3, [2, 6) L1_1: 5, 7, [2, 6) ([2, 6) is from compaction with - // L1_0) L2 has: 5 - // Seek(1) and then Next() should move the L1 level iterator to - // L1_1. Check if 5 is returned after Next(). - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.target_file_size_base = 2 * 1024; - options.max_compaction_bytes = 2 * 1024; - - // Make sure L1 files are split before "5" - auto factory = std::make_shared(); - options.sst_partitioner_factory = factory; - - DestroyAndReopen(options); - - Random rnd(301); - // L2 - // the file should be smaller than max_compaction_bytes, otherwise the file - // will be cut before 7. - ASSERT_OK(db_->Put(WriteOptions(), Key(5), rnd.RandomString(1 << 9))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(2); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - // L1_1 - ASSERT_OK(db_->Put(WriteOptions(), Key(5), rnd.RandomString(1 << 10))); - ASSERT_OK(db_->Put(WriteOptions(), Key(7), rnd.RandomString(1 << 10))); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - // L1_0 - ASSERT_OK(db_->Put(WriteOptions(), Key(1), rnd.RandomString(1 << 10))); - ASSERT_OK(db_->Put(WriteOptions(), Key(3), rnd.RandomString(1 << 10))); - // Prevent keys being compacted away - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(2), - Key(6))); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(2, NumTableFilesAtLevel(0)); - MoveFilesToLevel(1); - ASSERT_EQ(2, NumTableFilesAtLevel(1)); - - auto iter = db_->NewIterator(ReadOptions()); - get_perf_context()->Reset(); - iter->Seek(Key(1)); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), Key(1)); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), Key(7)); - // 1 reseek into L2 when key 5 in L2 is covered by [2, 6) from L1 - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 1); - - delete iter; - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, NonOverlappingTombstonAtBoundary) { - // Verify that a range tombstone across file boundary covers keys from older - // levels. - // Test set up: - // L1_0: 1, 3, [4, 7) L1_1: 6, 8, [4, 7) - // L2: 5 - // Note that [4, 7) is at end of L1_0 and not overlapping with any point key - // in L1_0. [4, 7) from L1_0 should cover 5 is sentinel works - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.target_file_size_base = 2 * 1024; - options.level_compaction_dynamic_file_size = false; - DestroyAndReopen(options); - - Random rnd(301); - // L2 - ASSERT_OK(db_->Put(WriteOptions(), Key(5), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(2); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - // L1_1 - ASSERT_OK(db_->Put(WriteOptions(), Key(6), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Put(WriteOptions(), Key(8), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - // L1_0 - ASSERT_OK(db_->Put(WriteOptions(), Key(1), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Put(WriteOptions(), Key(3), rnd.RandomString(4 << 10))); - // Prevent keys being compacted away - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(4), - Key(7))); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(2, NumTableFilesAtLevel(0)); - MoveFilesToLevel(1); - ASSERT_EQ(2, NumTableFilesAtLevel(1)); - - auto iter = db_->NewIterator(ReadOptions()); - iter->Seek(Key(3)); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), Key(3)); - get_perf_context()->Reset(); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), Key(8)); - // 1 reseek into L1 since 5 from L2 is covered by [4, 7) from L1 - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 1); - for (auto& k : {4, 5, 6}) { - get_perf_context()->Reset(); - iter->Seek(Key(k)); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), Key(8)); - // 1 reseek into L1 - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, 1); - } - delete iter; -} - -TEST_F(DBRangeDelTest, OlderLevelHasNewerData) { - // L1_0: 1, 3, [2, 7) L1_1: 5, 6 at a newer sequence number than [2, 7) - // Compact L1_1 to L2. Seek(3) should not skip 5 or 6. - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.target_file_size_base = 3 * 1024; - DestroyAndReopen(options); - - Random rnd(301); - // L1_0 - ASSERT_OK(db_->Put(WriteOptions(), Key(1), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Put(WriteOptions(), Key(3), rnd.RandomString(4 << 10))); - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(2), - Key(7))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - - // L1_1 - ASSERT_OK(db_->Put(WriteOptions(), Key(5), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Put(WriteOptions(), Key(6), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - MoveFilesToLevel(1); - ASSERT_EQ(2, NumTableFilesAtLevel(1)); - - auto key = Key(6); - Slice begin(key); - EXPECT_OK(dbfull()->TEST_CompactRange(1, &begin, nullptr)); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - auto iter = db_->NewIterator(ReadOptions()); - iter->Seek(Key(3)); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), Key(5)); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), Key(6)); - delete iter; - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, LevelBoundaryDefinedByTombstone) { - // L1 has: 1, 2, [4, 5) - // L2 has: 4 - // Seek(3), which is over all points keys in L1, check whether - // sentinel key from L1 works in this case. - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.target_file_size_base = 3 * 1024; - DestroyAndReopen(options); - Random rnd(301); - // L2 - ASSERT_OK(db_->Put(WriteOptions(), Key(4), "foo")); - ASSERT_OK(db_->Flush(FlushOptions())); - const Snapshot* snapshot = db_->GetSnapshot(); - MoveFilesToLevel(2); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - // L1_0 - ASSERT_OK(db_->Put(WriteOptions(), Key(1), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Put(WriteOptions(), Key(2), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(4), - Key(5))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - auto iter = db_->NewIterator(ReadOptions()); - iter->Seek(Key(3)); - ASSERT_TRUE(!iter->Valid()); - ASSERT_OK(iter->status()); - - get_perf_context()->Reset(); - iter->SeekForPrev(Key(5)); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), Key(2)); - db_->ReleaseSnapshot(snapshot); - delete iter; -} - -TEST_F(DBRangeDelTest, TombstoneOnlyFile) { - // L1_0: 1, 2, L1_1: [3, 5) - // L2: 3 - // Seek(2) then Next() should advance L1 iterator into L1_1. - // If sentinel works with tombstone only file, it should cover the key in L2. - // Similar story for SeekForPrev(4). - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.target_file_size_base = 3 * 1024; - - DestroyAndReopen(options); - Random rnd(301); - // L2 - ASSERT_OK(db_->Put(WriteOptions(), Key(3), "foo")); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(2); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - // L1_0 - ASSERT_OK(db_->Put(WriteOptions(), Key(1), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Put(WriteOptions(), Key(2), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - // L1_1 - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(3), - Key(5))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(2, NumTableFilesAtLevel(1)); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - auto iter = db_->NewIterator(ReadOptions()); - iter->Seek(Key(2)); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), Key(2)); - iter->Next(); - VerifyIteratorReachesEnd(iter); - iter->SeekForPrev(Key(4)); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), Key(2)); - iter->Next(); - VerifyIteratorReachesEnd(iter); - delete iter; -} - -void VerifyIteratorKey(InternalIterator* iter, - const std::vector& expected_keys, - bool forward = true) { - for (auto& key : expected_keys) { - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->user_key(), key); - if (forward) { - iter->Next(); - } else { - iter->Prev(); - } - } -} - -TEST_F(DBRangeDelTest, TombstoneOnlyLevel) { - // L1 [3, 5) - // L2 has: 3, 4 - // Any kind of iterator seek should skip 3 and 4 in L2. - // L1 level iterator should produce sentinel key. - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.target_file_size_base = 3 * 1024; - - DestroyAndReopen(options); - // L2 - ASSERT_OK(db_->Put(WriteOptions(), Key(3), "foo")); - ASSERT_OK(db_->Put(WriteOptions(), Key(4), "bar")); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(2); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - // L1 - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(3), - Key(5))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - - auto iter = db_->NewIterator(ReadOptions()); - get_perf_context()->Reset(); - uint64_t expected_reseek = 0; - for (auto i = 0; i < 7; ++i) { - iter->Seek(Key(i)); - VerifyIteratorReachesEnd(iter); - if (i < 5) { - ++expected_reseek; - } - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, - expected_reseek); - iter->SeekForPrev(Key(i)); - VerifyIteratorReachesEnd(iter); - if (i > 2) { - ++expected_reseek; - } - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, - expected_reseek); - iter->SeekToFirst(); - VerifyIteratorReachesEnd(iter); - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, - ++expected_reseek); - iter->SeekToLast(); - VerifyIteratorReachesEnd(iter); - ASSERT_EQ(get_perf_context()->internal_range_del_reseek_count, - ++expected_reseek); - } - delete iter; - - // Check L1 LevelIterator behavior - ColumnFamilyData* cfd = - static_cast_with_check(db_->DefaultColumnFamily()) - ->cfd(); - SuperVersion* sv = cfd->GetSuperVersion(); - Arena arena; - ReadOptions read_options; - MergeIteratorBuilder merge_iter_builder(&cfd->internal_comparator(), &arena, - false /* prefix seek */); - InternalIterator* level_iter = sv->current->TEST_GetLevelIterator( - read_options, &merge_iter_builder, 1 /* level */, true); - // This is needed to make LevelIterator range tombstone aware - auto miter = merge_iter_builder.Finish(); - auto k = Key(3); - IterKey target; - target.SetInternalKey(k, kMaxSequenceNumber, kValueTypeForSeek); - level_iter->Seek(target.GetInternalKey()); - // sentinel key (file boundary as a fake key) - VerifyIteratorKey(level_iter, {Key(5)}); - VerifyIteratorReachesEnd(level_iter); - - k = Key(5); - target.SetInternalKey(k, 0, kValueTypeForSeekForPrev); - level_iter->SeekForPrev(target.GetInternalKey()); - VerifyIteratorKey(level_iter, {Key(3)}, false); - VerifyIteratorReachesEnd(level_iter); - - level_iter->SeekToFirst(); - VerifyIteratorKey(level_iter, {Key(5)}); - VerifyIteratorReachesEnd(level_iter); - - level_iter->SeekToLast(); - VerifyIteratorKey(level_iter, {Key(3)}, false); - VerifyIteratorReachesEnd(level_iter); - - miter->~InternalIterator(); -} - -TEST_F(DBRangeDelTest, TombstoneOnlyWithOlderVisibleKey) { - // L1: [3, 5) - // L2: 2, 4, 5 - // 2 and 5 should be visible - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.target_file_size_base = 3 * 1024; - - DestroyAndReopen(options); - // L2 - ASSERT_OK(db_->Put(WriteOptions(), Key(2), "foo")); - ASSERT_OK(db_->Put(WriteOptions(), Key(4), "bar")); - ASSERT_OK(db_->Put(WriteOptions(), Key(5), "foobar")); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(2); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - // l1 - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(3), - Key(5))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - - auto iter = db_->NewIterator(ReadOptions()); - auto iter_test_backward = [&] { - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), Key(5)); - iter->Prev(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), Key(2)); - iter->Prev(); - VerifyIteratorReachesEnd(iter); - }; - auto iter_test_forward = [&] { - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), Key(2)); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), Key(5)); - iter->Next(); - VerifyIteratorReachesEnd(iter); - }; - iter->Seek(Key(4)); - iter_test_backward(); - iter->SeekForPrev(Key(4)); - iter->Next(); - iter_test_backward(); - - iter->Seek(Key(4)); - iter->Prev(); - iter_test_forward(); - iter->SeekForPrev(Key(4)); - iter_test_forward(); - - iter->SeekToFirst(); - iter_test_forward(); - iter->SeekToLast(); - iter_test_backward(); - - delete iter; -} - -TEST_F(DBRangeDelTest, TombstoneSentinelDirectionChange) { - // L1: 7 - // L2: [4, 6) - // L3: 4 - // Seek(5) will have sentinel key 6 at the top of minHeap in merging iterator. - // then do a prev, how would sentinel work? - // Redo the test after Put(5) into L1 so that there is a visible key in range - // [4, 6). - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.target_file_size_base = 3 * 1024; - - DestroyAndReopen(options); - // L3 - ASSERT_OK(db_->Put(WriteOptions(), Key(4), "bar")); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(3); - ASSERT_EQ(1, NumTableFilesAtLevel(3)); - // L2 - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(4), - Key(6))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(2); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - // L1 - ASSERT_OK(db_->Put(WriteOptions(), Key(7), "foobar")); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - - auto iter = db_->NewIterator(ReadOptions()); - iter->Seek(Key(5)); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), Key(7)); - iter->Prev(); - ASSERT_TRUE(!iter->Valid() && iter->status().ok()); - delete iter; - - ASSERT_OK(db_->Put(WriteOptions(), Key(5), "foobar")); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(2, NumTableFilesAtLevel(1)); - - iter = db_->NewIterator(ReadOptions()); - iter->Seek(Key(5)); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), Key(5)); - iter->Prev(); - ASSERT_TRUE(!iter->Valid() && iter->status().ok()); - delete iter; -} - -// Right sentinel tested in many test cases above -TEST_F(DBRangeDelTest, LeftSentinelKeyTest) { - // L1_0: 0, 1 L1_1: [2, 3), 5 - // L2: 2 - // SeekForPrev(4) should give 1 due to sentinel key keeping [2, 3) alive. - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.target_file_size_base = 3 * 1024; - options.max_compaction_bytes = 2048; - - DestroyAndReopen(options); - // L2 - ASSERT_OK(db_->Put(WriteOptions(), Key(2), "foo")); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(2); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - // L1_0 - Random rnd(301); - ASSERT_OK(db_->Put(WriteOptions(), Key(0), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Put(WriteOptions(), Key(1), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - - // L1_1 - ASSERT_OK(db_->Put(WriteOptions(), Key(5), "bar")); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(2), - Key(3))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(2, NumTableFilesAtLevel(1)); - - auto iter = db_->NewIterator(ReadOptions()); - iter->SeekForPrev(Key(4)); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), Key(1)); - iter->Prev(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), Key(0)); - iter->Prev(); - ASSERT_TRUE(!iter->Valid()); - ASSERT_OK(iter->status()); - delete iter; -} - -TEST_F(DBRangeDelTest, LeftSentinelKeyTestWithNewerKey) { - // L1_0: 1, 2 newer than L1_1, L1_1: [2, 4), 5 - // L2: 3 - // SeekForPrev(4) then Prev() should give 2 and then 1. - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.target_file_size_base = 3 * 1024; - options.max_compaction_bytes = 3 * 1024; - - DestroyAndReopen(options); - // L2 - ASSERT_OK(db_->Put(WriteOptions(), Key(3), "foo")); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(2); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - // L1_1 - ASSERT_OK(db_->Put(WriteOptions(), Key(5), "bar")); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(2), - Key(4))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - - // L1_0 - Random rnd(301); - ASSERT_OK(db_->Put(WriteOptions(), Key(1), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Put(WriteOptions(), Key(2), rnd.RandomString(4 << 10))); - // Used to verify sequence number of iterator key later. - auto seq = dbfull()->TEST_GetLastVisibleSequence(); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(2, NumTableFilesAtLevel(1)); - - Arena arena; - InternalKeyComparator icmp(options.comparator); - ReadOptions read_options; - ScopedArenaIterator iter; - iter.set( - dbfull()->NewInternalIterator(read_options, &arena, kMaxSequenceNumber)); - - auto k = Key(4); - IterKey target; - target.SetInternalKey(k, 0 /* sequence_number */, kValueTypeForSeekForPrev); - iter->SeekForPrev(target.GetInternalKey()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->user_key(), Key(2)); - SequenceNumber actual_seq; - ValueType type; - UnPackSequenceAndType(ExtractInternalKeyFooter(iter->key()), &actual_seq, - &type); - ASSERT_EQ(seq, actual_seq); - // might as well check type - ASSERT_EQ(type, kTypeValue); - - iter->Prev(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->user_key(), Key(1)); - iter->Prev(); - ASSERT_TRUE(!iter->Valid()); - ASSERT_OK(iter->status()); -} - -TEST_F(DBRangeDelTest, SentinelKeyCommonCaseTest) { - // L1 has 3 files - // L1_0: 1, 2 L1_1: [3, 4) 5, 6, [7, 8) L1_2: 9 - // Check iterator operations on LevelIterator. - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.target_file_size_base = 3 * 1024; - - DestroyAndReopen(options); - Random rnd(301); - // L1_0 - ASSERT_OK(db_->Put(WriteOptions(), Key(1), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Put(WriteOptions(), Key(2), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - - // L1_1 - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(3), - Key(4))); - ASSERT_OK(db_->Put(WriteOptions(), Key(5), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Put(WriteOptions(), Key(6), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(7), - Key(8))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(2, NumTableFilesAtLevel(1)); - - // L1_2 - ASSERT_OK(db_->Put(WriteOptions(), Key(9), rnd.RandomString(4 << 10))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(3, NumTableFilesAtLevel(1)); - - ColumnFamilyData* cfd = - static_cast_with_check(db_->DefaultColumnFamily()) - ->cfd(); - SuperVersion* sv = cfd->GetSuperVersion(); - Arena arena; - ReadOptions read_options; - MergeIteratorBuilder merge_iter_builder(&cfd->internal_comparator(), &arena, - false /* prefix seek */); - InternalIterator* level_iter = sv->current->TEST_GetLevelIterator( - read_options, &merge_iter_builder, 1 /* level */, true); - // This is needed to make LevelIterator range tombstone aware - auto miter = merge_iter_builder.Finish(); - auto k = Key(7); - IterKey target; - target.SetInternalKey(k, kMaxSequenceNumber, kValueTypeForSeek); - level_iter->Seek(target.GetInternalKey()); - // The last Key(9) is a sentinel key. - VerifyIteratorKey(level_iter, {Key(8), Key(9), Key(9)}); - ASSERT_TRUE(!level_iter->Valid() && level_iter->status().ok()); - - k = Key(6); - target.SetInternalKey(k, kMaxSequenceNumber, kValueTypeForSeek); - level_iter->Seek(target.GetInternalKey()); - VerifyIteratorKey(level_iter, {Key(6), Key(8), Key(9), Key(9)}); - ASSERT_TRUE(!level_iter->Valid() && level_iter->status().ok()); - - k = Key(4); - target.SetInternalKey(k, 0, kValueTypeForSeekForPrev); - level_iter->SeekForPrev(target.GetInternalKey()); - VerifyIteratorKey(level_iter, {Key(3), Key(2), Key(1), Key(1)}, false); - ASSERT_TRUE(!level_iter->Valid() && level_iter->status().ok()); - - k = Key(5); - target.SetInternalKey(k, 0, kValueTypeForSeekForPrev); - level_iter->SeekForPrev(target.GetInternalKey()); - VerifyIteratorKey(level_iter, {Key(5), Key(3), Key(2), Key(1), Key(1)}, - false); - - level_iter->SeekToFirst(); - VerifyIteratorKey(level_iter, {Key(1), Key(2), Key(2), Key(5), Key(6), Key(8), - Key(9), Key(9)}); - ASSERT_TRUE(!level_iter->Valid() && level_iter->status().ok()); - - level_iter->SeekToLast(); - VerifyIteratorKey( - level_iter, - {Key(9), Key(9), Key(6), Key(5), Key(3), Key(2), Key(1), Key(1)}, false); - ASSERT_TRUE(!level_iter->Valid() && level_iter->status().ok()); - - miter->~InternalIterator(); -} - -TEST_F(DBRangeDelTest, PrefixSentinelKey) { - // L1: ['aaaa', 'aaad'), 'bbbb' - // L2: 'aaac', 'aaae' - // Prefix extracts first 3 chars - // Seek('aaab') should give 'aaae' as first key. - // This is to test a previous bug where prefix seek sees there is no prefix in - // the SST file, and will just set file iter to null in LevelIterator and may - // just skip to the next SST file. But in this case, we should keep the file's - // tombstone alive. - Options options = CurrentOptions(); - options.compression = kNoCompression; - options.disable_auto_compactions = true; - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - BlockBasedTableOptions table_options; - table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); - table_options.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); - Random rnd(301); - - // L2: - ASSERT_OK(db_->Put(WriteOptions(), "aaac", rnd.RandomString(10))); - ASSERT_OK(db_->Put(WriteOptions(), "aaae", rnd.RandomString(10))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(2); - ASSERT_EQ(1, NumTableFilesAtLevel(2)); - - // L1 - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "aaaa", - "aaad")); - ASSERT_OK(db_->Put(WriteOptions(), "bbbb", rnd.RandomString(10))); - ASSERT_OK(db_->Flush(FlushOptions())); - MoveFilesToLevel(1); - ASSERT_EQ(1, NumTableFilesAtLevel(1)); - - auto iter = db_->NewIterator(ReadOptions()); - iter->Seek("aaab"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), "aaae"); - delete iter; -} - -TEST_F(DBRangeDelTest, RefreshMemtableIter) { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - DestroyAndReopen(options); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); - ReadOptions ro; - ro.read_tier = kMemtableTier; - std::unique_ptr iter{db_->NewIterator(ro)}; - ASSERT_OK(Flush()); - // First refresh reinits iter, which had a bug where - // iter.memtable_range_tombstone_iter_ was not set to nullptr, and caused - // subsequent refresh to double free. - ASSERT_OK(iter->Refresh()); - ASSERT_OK(iter->Refresh()); -} - -TEST_F(DBRangeDelTest, RangeTombstoneRespectIterateUpperBound) { - // Memtable: a, [b, bz) - // Do a Seek on `a` with iterate_upper_bound being az - // range tombstone [b, bz) should not be processed (added to and - // popped from the min_heap in MergingIterator). - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - DestroyAndReopen(options); - - ASSERT_OK(Put("a", "bar")); - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "b", "bz")); - - // I could not find a cleaner way to test this without relying on - // implementation detail. Tried to test the value of - // `internal_range_del_reseek_count` but that did not work - // since BlockBasedTable iterator becomes !Valid() when point key - // is out of bound and that reseek only happens when a point key - // is covered by some range tombstone. - SyncPoint::GetInstance()->SetCallBack("MergeIterator::PopDeleteRangeStart", - [](void*) { - // there should not be any range - // tombstone in the heap. - FAIL(); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - ReadOptions read_opts; - std::string upper_bound = "az"; - Slice upper_bound_slice = upper_bound; - read_opts.iterate_upper_bound = &upper_bound_slice; - std::unique_ptr iter{db_->NewIterator(read_opts)}; - iter->Seek("a"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), "a"); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - ASSERT_OK(iter->status()); -} - -TEST_F(DBRangeDelTest, RangetombesoneCompensateFilesize) { - Options opts = CurrentOptions(); - opts.disable_auto_compactions = true; - DestroyAndReopen(opts); - - std::vector values; - Random rnd(301); - // file in L2 - values.push_back(rnd.RandomString(1 << 10)); - ASSERT_OK(Put("a", values.back())); - values.push_back(rnd.RandomString(1 << 10)); - ASSERT_OK(Put("b", values.back())); - ASSERT_OK(Flush()); - MoveFilesToLevel(2); - uint64_t l2_size = 0; - ASSERT_OK(Size("a", "c", 0 /* cf */, &l2_size)); - ASSERT_GT(l2_size, 0); - // file in L1 - values.push_back(rnd.RandomString(1 << 10)); - ASSERT_OK(Put("d", values.back())); - values.push_back(rnd.RandomString(1 << 10)); - ASSERT_OK(Put("e", values.back())); - ASSERT_OK(Flush()); - MoveFilesToLevel(1); - uint64_t l1_size = 0; - ASSERT_OK(Size("d", "f", 0 /* cf */, &l1_size)); - ASSERT_GT(l1_size, 0); - - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "f")); - ASSERT_OK(Flush()); - // Range deletion compensated size computed during flush time - std::vector> level_to_files; - dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), - &level_to_files); - ASSERT_EQ(level_to_files[0].size(), 1); - ASSERT_EQ(level_to_files[0][0].compensated_range_deletion_size, - l1_size + l2_size); - ASSERT_EQ(level_to_files[1].size(), 1); - ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, 0); - ASSERT_EQ(level_to_files[2].size(), 1); - ASSERT_EQ(level_to_files[2][0].compensated_range_deletion_size, 0); - - // Range deletion compensated size computed during compaction time - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, - true /* disallow_trivial_move */)); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - ASSERT_EQ(NumTableFilesAtLevel(1), 1); - ASSERT_EQ(NumTableFilesAtLevel(2), 1); - dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), - &level_to_files); - ASSERT_EQ(level_to_files[1].size(), 1); - ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, l2_size); - ASSERT_EQ(level_to_files[2].size(), 1); - ASSERT_EQ(level_to_files[2][0].compensated_range_deletion_size, 0); -} - -TEST_F(DBRangeDelTest, RangetombesoneCompensateFilesizePersistDuringReopen) { - Options opts = CurrentOptions(); - opts.disable_auto_compactions = true; - DestroyAndReopen(opts); - - std::vector values; - Random rnd(301); - values.push_back(rnd.RandomString(1 << 10)); - ASSERT_OK(Put("a", values.back())); - values.push_back(rnd.RandomString(1 << 10)); - ASSERT_OK(Put("b", values.back())); - ASSERT_OK(Flush()); - MoveFilesToLevel(2); - - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c")); - ASSERT_OK(Flush()); - MoveFilesToLevel(1); - - ASSERT_OK( - db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); - ASSERT_OK(Flush()); - - std::vector> level_to_files; - dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), - &level_to_files); - ASSERT_EQ(level_to_files[0].size(), 1); - ASSERT_EQ(level_to_files[1].size(), 1); - ASSERT_EQ(level_to_files[2].size(), 1); - uint64_t l2_size = level_to_files[2][0].fd.GetFileSize(); - uint64_t l1_size = level_to_files[1][0].fd.GetFileSize(); - ASSERT_GT(l2_size, 0); - ASSERT_GT(l1_size, 0); - ASSERT_EQ(level_to_files[0][0].compensated_range_deletion_size, - l1_size + l2_size); - ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, l2_size); - - Reopen(opts); - dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), - &level_to_files); - ASSERT_EQ(level_to_files[0].size(), 1); - ASSERT_EQ(level_to_files[0][0].compensated_range_deletion_size, - l1_size + l2_size); - ASSERT_EQ(level_to_files[1].size(), 1); - ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, l2_size); -} - -TEST_F(DBRangeDelTest, SingleKeyFile) { - // Test for a bug fix where a range tombstone could be added - // to an SST file while is not within the file's key range. - // Create 3 files in L0 and then L1 where all keys have the same user key - // `Key(2)`. The middle file will contain Key(2)@6 and Key(2)@5. Before fix, - // the range tombstone [Key(2), Key(5))@2 would be added to this file during - // compaction, but it is not in this file's key range. - Options opts = CurrentOptions(); - opts.disable_auto_compactions = true; - opts.target_file_size_base = 1 << 10; - opts.level_compaction_dynamic_file_size = false; - DestroyAndReopen(opts); - - // prevent range tombstone drop - std::vector snapshots; - snapshots.push_back(db_->GetSnapshot()); - - // write a key to bottommost file so the compactions below - // are not bottommost compactions and will calculate - // compensated range tombstone size. Before bug fix, an assert would fail - // during this process. - Random rnd(301); - ASSERT_OK(Put(Key(2), rnd.RandomString(8 << 10))); - ASSERT_OK(Flush()); - MoveFilesToLevel(6); - - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(2), - Key(5))); - snapshots.push_back(db_->GetSnapshot()); - std::vector values; - - values.push_back(rnd.RandomString(8 << 10)); - ASSERT_OK(Put(Key(2), rnd.RandomString(8 << 10))); - snapshots.push_back(db_->GetSnapshot()); - ASSERT_OK(Flush()); - - ASSERT_OK(Put(Key(2), rnd.RandomString(8 << 10))); - snapshots.push_back(db_->GetSnapshot()); - ASSERT_OK(Flush()); - - ASSERT_OK(Put(Key(2), rnd.RandomString(8 << 10))); - snapshots.push_back(db_->GetSnapshot()); - ASSERT_OK(Flush()); - - ASSERT_EQ(NumTableFilesAtLevel(0), 3); - CompactRangeOptions co; - co.bottommost_level_compaction = BottommostLevelCompaction::kForce; - - ASSERT_OK(dbfull()->RunManualCompaction( - static_cast_with_check(db_->DefaultColumnFamily()) - ->cfd(), - 0, 1, co, nullptr, nullptr, true, true, - std::numeric_limits::max() /*max_file_num_to_ignore*/, - "" /*trim_ts*/)); - - for (const auto s : snapshots) { - db_->ReleaseSnapshot(s); - } -} - -TEST_F(DBRangeDelTest, DoubleCountRangeTombstoneCompensatedSize) { - // Test for a bug fix if a file has multiple range tombstones - // with same start and end key but with different sequence numbers, - // we should only calculate compensated range tombstone size - // for one of them. - Options opts = CurrentOptions(); - opts.disable_auto_compactions = true; - DestroyAndReopen(opts); - - std::vector values; - Random rnd(301); - // file in L2 - ASSERT_OK(Put(Key(1), rnd.RandomString(1 << 10))); - ASSERT_OK(Put(Key(2), rnd.RandomString(1 << 10))); - ASSERT_OK(Flush()); - MoveFilesToLevel(2); - uint64_t l2_size = 0; - ASSERT_OK(Size(Key(1), Key(3), 0 /* cf */, &l2_size)); - ASSERT_GT(l2_size, 0); - - // file in L1 - ASSERT_OK(Put(Key(3), rnd.RandomString(1 << 10))); - ASSERT_OK(Put(Key(4), rnd.RandomString(1 << 10))); - ASSERT_OK(Flush()); - MoveFilesToLevel(1); - uint64_t l1_size = 0; - ASSERT_OK(Size(Key(3), Key(5), 0 /* cf */, &l1_size)); - ASSERT_GT(l1_size, 0); - - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(1), - Key(5))); - // so that the range tombstone above is not dropped - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(1), - Key(5))); - ASSERT_OK(Flush()); - // Range deletion compensated size computed during flush time - std::vector> level_to_files; - dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), - &level_to_files); - ASSERT_EQ(level_to_files[0].size(), 1); - // instead of 2 * (l1_size + l2_size) - ASSERT_EQ(level_to_files[0][0].compensated_range_deletion_size, - l1_size + l2_size); - - // Range deletion compensated size computed during compaction time - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, - true /* disallow_trivial_move */)); - dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), - &level_to_files); - ASSERT_EQ(level_to_files[1].size(), 1); - ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, l2_size); - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, AddRangeDelsSameLowerAndUpperBound) { - // Test for an edge case where CompactionOutputs::AddRangeDels() - // is called with an empty range: `range_tombstone_lower_bound_` is not empty - // and have the same user_key and sequence number as `next_table_min_key. - // This used to cause file's smallest and largest key to be incorrectly set - // such that smallest > largest, and fail some assertions in iterator and/or - // assertion in VersionSet::ApproximateSize(). - Options opts = CurrentOptions(); - opts.disable_auto_compactions = true; - opts.target_file_size_base = 1 << 10; - opts.level_compaction_dynamic_file_size = false; - DestroyAndReopen(opts); - - Random rnd(301); - // Create file at bottommost level so the manual compaction below is - // non-bottommost level and goes through code path like compensate range - // tombstone size. - ASSERT_OK(Put(Key(1), "v1")); - ASSERT_OK(Put(Key(4), "v2")); - ASSERT_OK(Flush()); - MoveFilesToLevel(6); - - ASSERT_OK(Put(Key(1), rnd.RandomString(4 << 10))); - ASSERT_OK(Put(Key(3), rnd.RandomString(4 << 10))); - // So Key(3) does not get dropped. - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(2), - Key(4))); - ASSERT_OK(Flush()); - - ASSERT_OK(Put(Key(3), rnd.RandomString(4 << 10))); - ASSERT_OK(Put(Key(4), rnd.RandomString(4 << 10))); - ASSERT_OK(Flush()); - - MoveFilesToLevel(1); - // Each file will have two keys, with Key(3) straddle between two files. - // File 1: Key(1)@1, Key(3)@6, DeleteRange ends at Key(3)@6 - // File 2: Key(3)@4, Key(4)@7, DeleteRange start from Key(3)@4 - ASSERT_EQ(NumTableFilesAtLevel(1), 2); - - // Manually update compaction output file cutting decisions - // to cut before range tombstone sentinel Key(3)@4 - // and the point key Key(3)@4 itself - SyncPoint::GetInstance()->SetCallBack( - "CompactionOutputs::ShouldStopBefore::manual_decision", [opts](void* p) { - auto* pair = (std::pair*)p; - if ((opts.comparator->Compare(ExtractUserKey(pair->second), Key(3)) == - 0) && - (GetInternalKeySeqno(pair->second) <= 4)) { - *(pair->first) = true; - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - std::string begin_key = Key(0); - std::string end_key = Key(5); - Slice begin_slice{begin_key}; - Slice end_slice{end_key}; - ASSERT_OK(dbfull()->RunManualCompaction( - static_cast_with_check(db_->DefaultColumnFamily()) - ->cfd(), - 1, 2, CompactRangeOptions(), &begin_slice, &end_slice, true, - true /* disallow_trivial_move */, - std::numeric_limits::max() /*max_file_num_to_ignore*/, - "" /*trim_ts*/)); - // iterate through to check if any assertion breaks - std::unique_ptr iter{db_->NewIterator(ReadOptions())}; - iter->SeekToFirst(); - std::vector expected{1, 3, 4}; - for (auto i : expected) { - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), Key(i)); - iter->Next(); - } - ASSERT_TRUE(iter->status().ok() && !iter->Valid()); - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(DBRangeDelTest, AddRangeDelsSingleUserKeyTombstoneOnlyFile) { - // Test for an edge case where CompactionOutputs::AddRangeDels() - // is called with an SST file that has no point keys, and that - // the lower bound and upper bound have the same user key. - // This could cause a file's smallest and largest key to be incorrectly set - // such that smallest > largest, and fail some assertions in iterator and/or - // assertion in VersionSet::ApproximateSize(). - Options opts = CurrentOptions(); - opts.disable_auto_compactions = true; - opts.target_file_size_base = 1 << 10; - opts.level_compaction_dynamic_file_size = false; - DestroyAndReopen(opts); - - Random rnd(301); - // Create file at bottommost level so the manual compaction below is - // non-bottommost level and goes through code path like compensate range - // tombstone size. - ASSERT_OK(Put(Key(1), "v1")); - ASSERT_OK(Put(Key(4), "v2")); - ASSERT_OK(Flush()); - MoveFilesToLevel(6); - - ASSERT_OK(Put(Key(1), rnd.RandomString(10))); - // Key(3)@4 - ASSERT_OK(Put(Key(3), rnd.RandomString(10))); - const Snapshot* snapshot1 = db_->GetSnapshot(); - // Key(3)@5 - ASSERT_OK(Put(Key(3), rnd.RandomString(10))); - const Snapshot* snapshot2 = db_->GetSnapshot(); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(2), - Key(4))); - // Key(3)@7 - ASSERT_OK(Put(Key(3), rnd.RandomString(10))); - ASSERT_OK(Flush()); - - // L0 -> L1 compaction: cut output into two files: - // File 1: Key(1), Key(3)@7, Range tombstone ends at Key(3)@7 - // File 2: Key(3)@5, Key(3)@4, Range tombstone starts from Key(3)@5 - SyncPoint::GetInstance()->SetCallBack( - "CompactionOutputs::ShouldStopBefore::manual_decision", [opts](void* p) { - auto* pair = (std::pair*)p; - if ((opts.comparator->Compare(ExtractUserKey(pair->second), Key(3)) == - 0) && - (GetInternalKeySeqno(pair->second) <= 6)) { - *(pair->first) = true; - SyncPoint::GetInstance()->DisableProcessing(); - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - std::string begin_key = Key(0); - std::string end_key = Key(5); - Slice begin_slice{begin_key}; - Slice end_slice{end_key}; - ASSERT_OK(dbfull()->RunManualCompaction( - static_cast_with_check(db_->DefaultColumnFamily()) - ->cfd(), - 0, 1, CompactRangeOptions(), &begin_slice, &end_slice, true, - true /* disallow_trivial_move */, - std::numeric_limits::max() /*max_file_num_to_ignore*/, - "" /*trim_ts*/)); - ASSERT_EQ(NumTableFilesAtLevel(1), 2); - - // L1 -> L2 compaction, drop the snapshot protecting Key(3)@5. - // Let ShouldStopBefore() return true for Key(3)@5 (delete range sentinel) - // and Key(3)@4. - // Output should have two files: - // File 1: Key(1), Key(3)@7, range tombstone ends at Key(3)@7 - // File dropped: range tombstone only file (from Key(3)@5 to Key(3)@4) - // File 2: Range tombstone starting from Key(3)@4, Key(3)@4 - db_->ReleaseSnapshot(snapshot2); - SyncPoint::GetInstance()->SetCallBack( - "CompactionOutputs::ShouldStopBefore::manual_decision", [opts](void* p) { - auto* pair = (std::pair*)p; - if ((opts.comparator->Compare(ExtractUserKey(pair->second), Key(3)) == - 0) && - (GetInternalKeySeqno(pair->second) <= 6)) { - *(pair->first) = true; - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(dbfull()->RunManualCompaction( - static_cast_with_check(db_->DefaultColumnFamily()) - ->cfd(), - 1, 2, CompactRangeOptions(), &begin_slice, &end_slice, true, - true /* disallow_trivial_move */, - std::numeric_limits::max() /*max_file_num_to_ignore*/, - "" /*trim_ts*/)); - ASSERT_EQ(NumTableFilesAtLevel(2), 2); - // iterate through to check if any assertion breaks - std::unique_ptr iter{db_->NewIterator(ReadOptions())}; - iter->SeekToFirst(); - std::vector expected{1, 3, 4}; - for (auto i : expected) { - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), Key(i)); - iter->Next(); - } - ASSERT_TRUE(iter->status().ok() && !iter->Valid()); - db_->ReleaseSnapshot(snapshot1); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_rate_limiter_test.cc b/db/db_rate_limiter_test.cc deleted file mode 100644 index acea673cb..000000000 --- a/db/db_rate_limiter_test.cc +++ /dev/null @@ -1,436 +0,0 @@ -// Copyright (c) 2022-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include - -#include -#include - -#include "db/db_test_util.h" -#include "port/stack_trace.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "test_util/testharness.h" -#include "util/file_checksum_helper.h" - -namespace ROCKSDB_NAMESPACE { - -class DBRateLimiterOnReadTest - : public DBTestBase, - public ::testing::WithParamInterface> { - public: - explicit DBRateLimiterOnReadTest() - : DBTestBase("db_rate_limiter_on_read_test", /*env_do_fsync=*/false), - use_direct_io_(std::get<0>(GetParam())), - use_block_cache_(std::get<1>(GetParam())), - use_readahead_(std::get<2>(GetParam())) {} - - void Init() { - options_ = GetOptions(); - Reopen(options_); - for (int i = 0; i < kNumFiles; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK(Put(Key(i * kNumKeysPerFile + j), "val")); - } - ASSERT_OK(Flush()); - } - MoveFilesToLevel(1); - } - - BlockBasedTableOptions GetTableOptions() { - BlockBasedTableOptions table_options; - table_options.no_block_cache = !use_block_cache_; - return table_options; - } - - ReadOptions GetReadOptions() { - ReadOptions read_options; - read_options.rate_limiter_priority = Env::IO_USER; - read_options.readahead_size = use_readahead_ ? kReadaheadBytes : 0; - return read_options; - } - - Options GetOptions() { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.file_checksum_gen_factory.reset(new FileChecksumGenCrc32cFactory()); - options.rate_limiter.reset(NewGenericRateLimiter( - 1 << 20 /* rate_bytes_per_sec */, 100 * 1000 /* refill_period_us */, - 10 /* fairness */, RateLimiter::Mode::kAllIo)); - options.table_factory.reset(NewBlockBasedTableFactory(GetTableOptions())); - options.use_direct_reads = use_direct_io_; - return options; - } - - protected: - const static int kNumKeysPerFile = 1; - const static int kNumFiles = 3; - const static int kReadaheadBytes = 32 << 10; // 32KB - - Options options_; - const bool use_direct_io_; - const bool use_block_cache_; - const bool use_readahead_; -}; - -std::string GetTestNameSuffix( - ::testing::TestParamInfo> info) { - std::ostringstream oss; - if (std::get<0>(info.param)) { - oss << "DirectIO"; - } else { - oss << "BufferedIO"; - } - if (std::get<1>(info.param)) { - oss << "_BlockCache"; - } else { - oss << "_NoBlockCache"; - } - if (std::get<2>(info.param)) { - oss << "_Readahead"; - } else { - oss << "_NoReadahead"; - } - return oss.str(); -} - -INSTANTIATE_TEST_CASE_P(DBRateLimiterOnReadTest, DBRateLimiterOnReadTest, - ::testing::Combine(::testing::Bool(), ::testing::Bool(), - ::testing::Bool()), - GetTestNameSuffix); - -TEST_P(DBRateLimiterOnReadTest, Get) { - if (use_direct_io_ && !IsDirectIOSupported()) { - return; - } - Init(); - - ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); - - int expected = 0; - for (int i = 0; i < kNumFiles; ++i) { - { - std::string value; - ASSERT_OK(db_->Get(GetReadOptions(), Key(i * kNumKeysPerFile), &value)); - ++expected; - } - ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); - - { - std::string value; - ASSERT_OK(db_->Get(GetReadOptions(), Key(i * kNumKeysPerFile), &value)); - if (!use_block_cache_) { - ++expected; - } - } - ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); - } -} - -TEST_P(DBRateLimiterOnReadTest, NewMultiGet) { - if (use_direct_io_ && !IsDirectIOSupported()) { - return; - } - Init(); - - ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); - - const int kNumKeys = kNumFiles * kNumKeysPerFile; - int64_t expected = 0; - { - std::vector key_bufs; - key_bufs.reserve(kNumKeys); - std::vector keys; - keys.reserve(kNumKeys); - for (int i = 0; i < kNumKeys; ++i) { - key_bufs.emplace_back(Key(i)); - keys.emplace_back(key_bufs[i]); - } - std::vector statuses(kNumKeys); - std::vector values(kNumKeys); - const int64_t prev_total_rl_req = options_.rate_limiter->GetTotalRequests(); - db_->MultiGet(GetReadOptions(), dbfull()->DefaultColumnFamily(), kNumKeys, - keys.data(), values.data(), statuses.data()); - const int64_t cur_total_rl_req = options_.rate_limiter->GetTotalRequests(); - for (int i = 0; i < kNumKeys; ++i) { - ASSERT_TRUE(statuses[i].ok()); - } - ASSERT_GT(cur_total_rl_req, prev_total_rl_req); - ASSERT_EQ(cur_total_rl_req - prev_total_rl_req, - options_.rate_limiter->GetTotalRequests(Env::IO_USER)); - } - expected += kNumKeys; - ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); -} - -TEST_P(DBRateLimiterOnReadTest, OldMultiGet) { - // The old `vector`-returning `MultiGet()` APIs use `Read()`, which - // supports rate limiting. - if (use_direct_io_ && !IsDirectIOSupported()) { - return; - } - Init(); - - ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); - - const int kNumKeys = kNumFiles * kNumKeysPerFile; - int expected = 0; - { - std::vector key_bufs; - key_bufs.reserve(kNumKeys); - std::vector keys; - keys.reserve(kNumKeys); - for (int i = 0; i < kNumKeys; ++i) { - key_bufs.emplace_back(Key(i)); - keys.emplace_back(key_bufs[i]); - } - std::vector values; - std::vector statuses = - db_->MultiGet(GetReadOptions(), keys, &values); - for (int i = 0; i < kNumKeys; ++i) { - ASSERT_OK(statuses[i]); - } - } - expected += kNumKeys; - ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); -} - -TEST_P(DBRateLimiterOnReadTest, Iterator) { - if (use_direct_io_ && !IsDirectIOSupported()) { - return; - } - Init(); - - std::unique_ptr iter(db_->NewIterator(GetReadOptions())); - ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); - - int expected = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ++expected; - ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); - } - - for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { - // When `use_block_cache_ == true`, the reverse scan will access the blocks - // loaded to cache during the above forward scan, in which case no further - // file reads are expected. - if (!use_block_cache_) { - ++expected; - } - } - // Reverse scan does not read evenly (one block per iteration) due to - // descending seqno ordering, so wait until after the loop to check total. - ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); -} - - -TEST_P(DBRateLimiterOnReadTest, VerifyChecksum) { - if (use_direct_io_ && !IsDirectIOSupported()) { - return; - } - Init(); - - ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); - - ASSERT_OK(db_->VerifyChecksum(GetReadOptions())); - // The files are tiny so there should have just been one read per file. - int expected = kNumFiles; - ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); -} - -TEST_P(DBRateLimiterOnReadTest, VerifyFileChecksums) { - if (use_direct_io_ && !IsDirectIOSupported()) { - return; - } - Init(); - - ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); - - ASSERT_OK(db_->VerifyFileChecksums(GetReadOptions())); - // The files are tiny so there should have just been one read per file. - int expected = kNumFiles; - ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); -} - - -class DBRateLimiterOnWriteTest : public DBTestBase { - public: - explicit DBRateLimiterOnWriteTest() - : DBTestBase("db_rate_limiter_on_write_test", /*env_do_fsync=*/false) {} - - void Init() { - options_ = GetOptions(); - ASSERT_OK(TryReopenWithColumnFamilies({"default"}, options_)); - Random rnd(301); - for (int i = 0; i < kNumFiles; i++) { - ASSERT_OK(Put(0, kStartKey, rnd.RandomString(2))); - ASSERT_OK(Put(0, kEndKey, rnd.RandomString(2))); - ASSERT_OK(Flush(0)); - } - } - - Options GetOptions() { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.rate_limiter.reset(NewGenericRateLimiter( - 1 << 20 /* rate_bytes_per_sec */, 100 * 1000 /* refill_period_us */, - 10 /* fairness */, RateLimiter::Mode::kWritesOnly)); - options.table_factory.reset( - NewBlockBasedTableFactory(BlockBasedTableOptions())); - return options; - } - - protected: - inline const static int64_t kNumFiles = 3; - inline const static std::string kStartKey = "a"; - inline const static std::string kEndKey = "b"; - Options options_; -}; - -TEST_F(DBRateLimiterOnWriteTest, Flush) { - std::int64_t prev_total_request = 0; - - Init(); - - std::int64_t actual_flush_request = - options_.rate_limiter->GetTotalRequests(Env::IO_TOTAL) - - prev_total_request; - std::int64_t exepcted_flush_request = kNumFiles; - EXPECT_EQ(actual_flush_request, exepcted_flush_request); - EXPECT_EQ(actual_flush_request, - options_.rate_limiter->GetTotalRequests(Env::IO_HIGH)); -} - -TEST_F(DBRateLimiterOnWriteTest, Compact) { - Init(); - - // Pre-comaction: - // level-0 : `kNumFiles` SST files overlapping on [kStartKey, kEndKey] - std::string files_per_level_pre_compaction = std::to_string(kNumFiles); - ASSERT_EQ(files_per_level_pre_compaction, FilesPerLevel(0 /* cf */)); - - std::int64_t prev_total_request = - options_.rate_limiter->GetTotalRequests(Env::IO_TOTAL); - ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_LOW)); - - Compact(kStartKey, kEndKey); - - std::int64_t actual_compaction_request = - options_.rate_limiter->GetTotalRequests(Env::IO_TOTAL) - - prev_total_request; - - // Post-comaction: - // level-0 : 0 SST file - // level-1 : 1 SST file - std::string files_per_level_post_compaction = "0,1"; - ASSERT_EQ(files_per_level_post_compaction, FilesPerLevel(0 /* cf */)); - - std::int64_t exepcted_compaction_request = 1; - EXPECT_EQ(actual_compaction_request, exepcted_compaction_request); - EXPECT_EQ(actual_compaction_request, - options_.rate_limiter->GetTotalRequests(Env::IO_LOW)); -} - -class DBRateLimiterOnWriteWALTest - : public DBRateLimiterOnWriteTest, - public ::testing::WithParamInterface> { - public: - static std::string GetTestNameSuffix( - ::testing::TestParamInfo> info) { - std::ostringstream oss; - if (std::get<0>(info.param)) { - oss << "DisableWAL"; - } else { - oss << "EnableWAL"; - } - if (std::get<1>(info.param)) { - oss << "_ManualWALFlush"; - } else { - oss << "_AutoWALFlush"; - } - if (std::get<2>(info.param) == Env::IO_USER) { - oss << "_RateLimitAutoWALFlush"; - } else if (std::get<2>(info.param) == Env::IO_TOTAL) { - oss << "_NoRateLimitAutoWALFlush"; - } else { - oss << "_RateLimitAutoWALFlushWithIncorrectPriority"; - } - return oss.str(); - } - - explicit DBRateLimiterOnWriteWALTest() - : disable_wal_(std::get<0>(GetParam())), - manual_wal_flush_(std::get<1>(GetParam())), - rate_limiter_priority_(std::get<2>(GetParam())) {} - - void Init() { - options_ = GetOptions(); - options_.manual_wal_flush = manual_wal_flush_; - Reopen(options_); - } - - WriteOptions GetWriteOptions() { - WriteOptions write_options; - write_options.disableWAL = disable_wal_; - write_options.rate_limiter_priority = rate_limiter_priority_; - return write_options; - } - - protected: - bool disable_wal_; - bool manual_wal_flush_; - Env::IOPriority rate_limiter_priority_; -}; - -INSTANTIATE_TEST_CASE_P( - DBRateLimiterOnWriteWALTest, DBRateLimiterOnWriteWALTest, - ::testing::Values(std::make_tuple(false, false, Env::IO_TOTAL), - std::make_tuple(false, false, Env::IO_USER), - std::make_tuple(false, false, Env::IO_HIGH), - std::make_tuple(false, true, Env::IO_USER), - std::make_tuple(true, false, Env::IO_USER)), - DBRateLimiterOnWriteWALTest::GetTestNameSuffix); - -TEST_P(DBRateLimiterOnWriteWALTest, AutoWalFlush) { - Init(); - - const bool no_rate_limit_auto_wal_flush = - (rate_limiter_priority_ == Env::IO_TOTAL); - const bool valid_arg = (rate_limiter_priority_ == Env::IO_USER && - !disable_wal_ && !manual_wal_flush_); - - std::int64_t prev_total_request = - options_.rate_limiter->GetTotalRequests(Env::IO_TOTAL); - ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); - - Status s = Put("foo", "v1", GetWriteOptions()); - - if (no_rate_limit_auto_wal_flush || valid_arg) { - EXPECT_TRUE(s.ok()); - } else { - EXPECT_TRUE(s.IsInvalidArgument()); - EXPECT_TRUE(s.ToString().find("WriteOptions::rate_limiter_priority") != - std::string::npos); - } - - std::int64_t actual_auto_wal_flush_request = - options_.rate_limiter->GetTotalRequests(Env::IO_TOTAL) - - prev_total_request; - std::int64_t expected_auto_wal_flush_request = valid_arg ? 1 : 0; - - EXPECT_EQ(actual_auto_wal_flush_request, expected_auto_wal_flush_request); - EXPECT_EQ(actual_auto_wal_flush_request, - options_.rate_limiter->GetTotalRequests(Env::IO_USER)); -} -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_readonly_with_timestamp_test.cc b/db/db_readonly_with_timestamp_test.cc deleted file mode 100644 index 675e4943b..000000000 --- a/db/db_readonly_with_timestamp_test.cc +++ /dev/null @@ -1,956 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/db_with_timestamp_test_util.h" -#include "test_util/testutil.h" - -namespace ROCKSDB_NAMESPACE { -class DBReadOnlyTestWithTimestamp : public DBBasicTestWithTimestampBase { - public: - DBReadOnlyTestWithTimestamp() - : DBBasicTestWithTimestampBase("db_readonly_test_with_timestamp") {} - - protected: - void CheckDBOpenedAsCompactedDBWithOneLevel0File() { - VersionSet* const versions = dbfull()->GetVersionSet(); - ASSERT_NE(versions, nullptr); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - ASSERT_NE(cfd, nullptr); - - Version* const current = cfd->current(); - ASSERT_NE(current, nullptr); - - const VersionStorageInfo* const storage_info = current->storage_info(); - ASSERT_NE(storage_info, nullptr); - - // Only 1 L0 file. - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - // L0 is the max level. - ASSERT_EQ(storage_info->num_non_empty_levels(), 1); - } - - void CheckDBOpenedAsCompactedDBWithOnlyHighestNonEmptyLevelFiles() { - VersionSet* const versions = dbfull()->GetVersionSet(); - ASSERT_NE(versions, nullptr); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - ASSERT_NE(cfd, nullptr); - - Version* const current = cfd->current(); - ASSERT_NE(current, nullptr); - - const VersionStorageInfo* const storage_info = current->storage_info(); - ASSERT_NE(storage_info, nullptr); - - // L0 has no files. - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - - // All other levels have no files except the highest level with files. - for (int i = 1; i < storage_info->num_non_empty_levels() - 1; ++i) { - ASSERT_FALSE(storage_info->LevelFilesBrief(i).num_files > 0); - } - - // The highest level with files have some files. - int highest_non_empty_level = storage_info->num_non_empty_levels() - 1; - ASSERT_TRUE( - storage_info->LevelFilesBrief(highest_non_empty_level).num_files > 0); - } -}; - -TEST_F(DBReadOnlyTestWithTimestamp, IteratorAndGetReadTimestampSizeMismatch) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::string write_timestamp = Timestamp(1, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamp, - "value" + std::to_string(key)); - ASSERT_OK(s); - } - - // Reopen the database in read only mode to test its timestamp support. - Close(); - ASSERT_OK(ReadOnlyReopen(options)); - ReadOptions read_opts; - std::string different_size_read_timestamp; - PutFixed32(&different_size_read_timestamp, 2); - Slice different_size_read_ts = different_size_read_timestamp; - read_opts.timestamp = &different_size_read_ts; - { - std::unique_ptr iter(db_->NewIterator(read_opts)); - ASSERT_FALSE(iter->Valid()); - ASSERT_TRUE(iter->status().IsInvalidArgument()); - } - - for (uint64_t key = 0; key <= kMaxKey; ++key) { - std::string value_from_get; - std::string timestamp; - ASSERT_TRUE(db_->Get(read_opts, Key1(key), &value_from_get, ×tamp) - .IsInvalidArgument()); - } - - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, - IteratorAndGetReadTimestampSpecifiedWithoutWriteTimestamp) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(key)); - ASSERT_OK(s); - } - - // Reopen the database in read only mode to test its timestamp support. - Close(); - ASSERT_OK(ReadOnlyReopen(options)); - ReadOptions read_opts; - const std::string read_timestamp = Timestamp(2, 0); - Slice read_ts = read_timestamp; - read_opts.timestamp = &read_ts; - { - std::unique_ptr iter(db_->NewIterator(read_opts)); - ASSERT_FALSE(iter->Valid()); - ASSERT_TRUE(iter->status().IsInvalidArgument()); - } - - for (uint64_t key = 0; key <= kMaxKey; ++key) { - std::string value_from_get; - std::string timestamp; - ASSERT_TRUE(db_->Get(read_opts, Key1(key), &value_from_get, ×tamp) - .IsInvalidArgument()); - } - - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, - IteratorAndGetWriteWithTimestampReadWithoutTimestamp) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::string write_timestamp = Timestamp(1, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamp, - "value" + std::to_string(key)); - ASSERT_OK(s); - } - - // Reopen the database in read only mode to test its timestamp support. - Close(); - ASSERT_OK(ReadOnlyReopen(options)); - ReadOptions read_opts; - { - std::unique_ptr iter(db_->NewIterator(read_opts)); - ASSERT_FALSE(iter->Valid()); - ASSERT_TRUE(iter->status().IsInvalidArgument()); - } - - for (uint64_t key = 0; key <= kMaxKey; ++key) { - std::string value_from_get; - ASSERT_TRUE( - db_->Get(read_opts, Key1(key), &value_from_get).IsInvalidArgument()); - } - - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, IteratorAndGet) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::vector start_keys = {1, 0}; - const std::vector write_timestamps = {Timestamp(1, 0), - Timestamp(3, 0)}; - const std::vector read_timestamps = {Timestamp(2, 0), - Timestamp(4, 0)}; - for (size_t i = 0; i < write_timestamps.size(); ++i) { - WriteOptions write_opts; - for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamps[i], - "value" + std::to_string(i)); - ASSERT_OK(s); - } - } - - // Reopen the database in read only mode to test its timestamp support. - Close(); - ASSERT_OK(ReadOnlyReopen(options)); - - auto get_value_and_check = [](DB* db, ReadOptions read_opts, Slice key, - Slice expected_value, std::string expected_ts) { - std::string value_from_get; - std::string timestamp; - ASSERT_OK(db->Get(read_opts, key.ToString(), &value_from_get, ×tamp)); - ASSERT_EQ(expected_value, value_from_get); - ASSERT_EQ(expected_ts, timestamp); - }; - for (size_t i = 0; i < read_timestamps.size(); ++i) { - ReadOptions read_opts; - Slice read_ts = read_timestamps[i]; - read_opts.timestamp = &read_ts; - std::unique_ptr it(db_->NewIterator(read_opts)); - int count = 0; - uint64_t key = 0; - // Forward iterate. - for (it->Seek(Key1(0)), key = start_keys[i]; it->Valid(); - it->Next(), ++count, ++key) { - CheckIterUserEntry(it.get(), Key1(key), kTypeValue, - "value" + std::to_string(i), write_timestamps[i]); - get_value_and_check(db_, read_opts, it->key(), it->value(), - write_timestamps[i]); - } - size_t expected_count = kMaxKey - start_keys[i] + 1; - ASSERT_EQ(expected_count, count); - - // Backward iterate. - count = 0; - for (it->SeekForPrev(Key1(kMaxKey)), key = kMaxKey; it->Valid(); - it->Prev(), ++count, --key) { - CheckIterUserEntry(it.get(), Key1(key), kTypeValue, - "value" + std::to_string(i), write_timestamps[i]); - get_value_and_check(db_, read_opts, it->key(), it->value(), - write_timestamps[i]); - } - ASSERT_EQ(static_cast(kMaxKey) - start_keys[i] + 1, count); - - // SeekToFirst()/SeekToLast() with lower/upper bounds. - // Then iter with lower and upper bounds. - uint64_t l = 0; - uint64_t r = kMaxKey + 1; - while (l < r) { - std::string lb_str = Key1(l); - Slice lb = lb_str; - std::string ub_str = Key1(r); - Slice ub = ub_str; - read_opts.iterate_lower_bound = &lb; - read_opts.iterate_upper_bound = &ub; - it.reset(db_->NewIterator(read_opts)); - for (it->SeekToFirst(), key = std::max(l, start_keys[i]), count = 0; - it->Valid(); it->Next(), ++key, ++count) { - CheckIterUserEntry(it.get(), Key1(key), kTypeValue, - "value" + std::to_string(i), write_timestamps[i]); - get_value_and_check(db_, read_opts, it->key(), it->value(), - write_timestamps[i]); - } - ASSERT_EQ(r - std::max(l, start_keys[i]), count); - - for (it->SeekToLast(), key = std::min(r, kMaxKey + 1), count = 0; - it->Valid(); it->Prev(), --key, ++count) { - CheckIterUserEntry(it.get(), Key1(key - 1), kTypeValue, - "value" + std::to_string(i), write_timestamps[i]); - get_value_and_check(db_, read_opts, it->key(), it->value(), - write_timestamps[i]); - } - l += (kMaxKey / 100); - r -= (kMaxKey / 100); - } - } - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, Iterators) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::string write_timestamp = Timestamp(1, 0); - const std::string read_timestamp = Timestamp(2, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamp, - "value" + std::to_string(key)); - ASSERT_OK(s); - } - - // Reopen the database in read only mode to test its timestamp support. - Close(); - ASSERT_OK(ReadOnlyReopen(options)); - ReadOptions read_opts; - Slice read_ts = read_timestamp; - read_opts.timestamp = &read_ts; - std::vector iters; - ASSERT_OK(db_->NewIterators(read_opts, {db_->DefaultColumnFamily()}, &iters)); - ASSERT_EQ(static_cast(1), iters.size()); - - int count = 0; - uint64_t key = 0; - // Forward iterate. - for (iters[0]->Seek(Key1(0)), key = 0; iters[0]->Valid(); - iters[0]->Next(), ++count, ++key) { - CheckIterUserEntry(iters[0], Key1(key), kTypeValue, - "value" + std::to_string(key), write_timestamp); - } - - size_t expected_count = kMaxKey - 0 + 1; - ASSERT_EQ(expected_count, count); - delete iters[0]; - - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, IteratorsReadTimestampSizeMismatch) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::string write_timestamp = Timestamp(1, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamp, - "value" + std::to_string(key)); - ASSERT_OK(s); - } - - // Reopen the database in read only mode to test its timestamp support. - Close(); - ASSERT_OK(ReadOnlyReopen(options)); - ReadOptions read_opts; - std::string different_size_read_timestamp; - PutFixed32(&different_size_read_timestamp, 2); - Slice different_size_read_ts = different_size_read_timestamp; - read_opts.timestamp = &different_size_read_ts; - { - std::vector iters; - ASSERT_TRUE( - db_->NewIterators(read_opts, {db_->DefaultColumnFamily()}, &iters) - .IsInvalidArgument()); - } - - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, - IteratorsReadTimestampSpecifiedWithoutWriteTimestamp) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(key)); - ASSERT_OK(s); - } - - // Reopen the database in read only mode to test its timestamp support. - Close(); - ASSERT_OK(ReadOnlyReopen(options)); - ReadOptions read_opts; - const std::string read_timestamp = Timestamp(2, 0); - Slice read_ts = read_timestamp; - read_opts.timestamp = &read_ts; - { - std::vector iters; - ASSERT_TRUE( - db_->NewIterators(read_opts, {db_->DefaultColumnFamily()}, &iters) - .IsInvalidArgument()); - } - - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, - IteratorsWriteWithTimestampReadWithoutTimestamp) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::string write_timestamp = Timestamp(1, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamp, - "value" + std::to_string(key)); - ASSERT_OK(s); - } - - // Reopen the database in read only mode to test its timestamp support. - Close(); - ASSERT_OK(ReadOnlyReopen(options)); - ReadOptions read_opts; - { - std::vector iters; - ASSERT_TRUE( - db_->NewIterators(read_opts, {db_->DefaultColumnFamily()}, &iters) - .IsInvalidArgument()); - } - - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, CompactedDBGetReadTimestampSizeMismatch) { - const int kNumKeysPerFile = 1026; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.disable_auto_compactions = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - std::string write_timestamp = Timestamp(1, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamp, - "value" + std::to_string(0)); - ASSERT_OK(s); - } - ASSERT_OK(db_->Flush(FlushOptions())); - Close(); - - // Reopen the database in read only mode as a Compacted DB to test its - // timestamp support. - options.max_open_files = -1; - ASSERT_OK(ReadOnlyReopen(options)); - CheckDBOpenedAsCompactedDBWithOneLevel0File(); - - ReadOptions read_opts; - std::string different_size_read_timestamp; - PutFixed32(&different_size_read_timestamp, 2); - Slice different_size_read_ts = different_size_read_timestamp; - read_opts.timestamp = &different_size_read_ts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - std::string value_from_get; - std::string timestamp; - ASSERT_TRUE(db_->Get(read_opts, Key1(key), &value_from_get, ×tamp) - .IsInvalidArgument()); - } - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, - CompactedDBGetReadTimestampSpecifiedWithoutWriteTimestamp) { - const int kNumKeysPerFile = 1026; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.disable_auto_compactions = true; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(0)); - ASSERT_OK(s); - } - ASSERT_OK(db_->Flush(FlushOptions())); - Close(); - - // Reopen the database in read only mode as a Compacted DB to test its - // timestamp support. - options.max_open_files = -1; - ASSERT_OK(ReadOnlyReopen(options)); - CheckDBOpenedAsCompactedDBWithOneLevel0File(); - - ReadOptions read_opts; - const std::string read_timestamp = Timestamp(2, 0); - Slice read_ts = read_timestamp; - read_opts.timestamp = &read_ts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - std::string value_from_get; - std::string timestamp; - ASSERT_TRUE(db_->Get(read_opts, Key1(key), &value_from_get, ×tamp) - .IsInvalidArgument()); - } - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, - CompactedDBGetWriteWithTimestampReadWithoutTimestamp) { - const int kNumKeysPerFile = 1026; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.disable_auto_compactions = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - std::string write_timestamp = Timestamp(1, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamp, - "value" + std::to_string(0)); - ASSERT_OK(s); - } - ASSERT_OK(db_->Flush(FlushOptions())); - Close(); - - // Reopen the database in read only mode as a Compacted DB to test its - // timestamp support. - options.max_open_files = -1; - ASSERT_OK(ReadOnlyReopen(options)); - CheckDBOpenedAsCompactedDBWithOneLevel0File(); - - ReadOptions read_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - std::string value_from_get; - ASSERT_TRUE( - db_->Get(read_opts, Key1(key), &value_from_get).IsInvalidArgument()); - } - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, CompactedDBGetWithOnlyOneL0File) { - const int kNumKeysPerFile = 1026 * 2; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.disable_auto_compactions = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::vector start_keys = {1, 0}; - const std::vector write_timestamps = {Timestamp(1, 0), - Timestamp(3, 0)}; - const std::vector read_timestamps = {Timestamp(2, 0), - Timestamp(4, 0)}; - for (size_t i = 0; i < write_timestamps.size(); ++i) { - WriteOptions write_opts; - for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamps[i], - "value" + std::to_string(i)); - ASSERT_OK(s); - } - } - ASSERT_OK(db_->Flush(FlushOptions())); - Close(); - - // Reopen the database in read only mode as a Compacted DB to test its - // timestamp support. - options.max_open_files = -1; - ASSERT_OK(ReadOnlyReopen(options)); - CheckDBOpenedAsCompactedDBWithOneLevel0File(); - - for (size_t i = 0; i < read_timestamps.size(); ++i) { - ReadOptions read_opts; - Slice read_ts = read_timestamps[i]; - read_opts.timestamp = &read_ts; - int count = 0; - for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key, ++count) { - std::string value_from_get; - std::string timestamp; - ASSERT_OK(db_->Get(read_opts, Key1(key), &value_from_get, ×tamp)); - ASSERT_EQ("value" + std::to_string(i), value_from_get); - ASSERT_EQ(write_timestamps[i], timestamp); - } - size_t expected_count = kMaxKey - start_keys[i] + 1; - ASSERT_EQ(expected_count, count); - } - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, - CompactedDBGetWithOnlyHighestNonEmptyLevelFiles) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.disable_auto_compactions = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::vector start_keys = {1, 0}; - const std::vector write_timestamps = {Timestamp(1, 0), - Timestamp(3, 0)}; - const std::vector read_timestamps = {Timestamp(2, 0), - Timestamp(4, 0)}; - for (size_t i = 0; i < write_timestamps.size(); ++i) { - WriteOptions write_opts; - for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamps[i], - "value" + std::to_string(i)); - ASSERT_OK(s); - } - } - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - Close(); - - // Reopen the database in read only mode as a Compacted DB to test its - // timestamp support. - options.max_open_files = -1; - ASSERT_OK(ReadOnlyReopen(options)); - CheckDBOpenedAsCompactedDBWithOnlyHighestNonEmptyLevelFiles(); - - for (size_t i = 0; i < read_timestamps.size(); ++i) { - ReadOptions read_opts; - Slice read_ts = read_timestamps[i]; - read_opts.timestamp = &read_ts; - int count = 0; - for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key, ++count) { - std::string value_from_get; - std::string timestamp; - ASSERT_OK(db_->Get(read_opts, Key1(key), &value_from_get, ×tamp)); - ASSERT_EQ("value" + std::to_string(i), value_from_get); - ASSERT_EQ(write_timestamps[i], timestamp); - } - size_t expected_count = kMaxKey - start_keys[i] + 1; - ASSERT_EQ(expected_count, count); - } - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, - CompactedDBMultiGetReadTimestampSizeMismatch) { - const int kNumKeysPerFile = 1026; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.disable_auto_compactions = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - std::string write_timestamp = Timestamp(1, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamp, - "value" + std::to_string(0)); - ASSERT_OK(s); - } - ASSERT_OK(db_->Flush(FlushOptions())); - Close(); - - // Reopen the database in read only mode as a Compacted DB to test its - // timestamp support. - options.max_open_files = -1; - ASSERT_OK(ReadOnlyReopen(options)); - CheckDBOpenedAsCompactedDBWithOneLevel0File(); - - ReadOptions read_opts; - std::string different_size_read_timestamp; - PutFixed32(&different_size_read_timestamp, 2); - Slice different_size_read_ts = different_size_read_timestamp; - read_opts.timestamp = &different_size_read_ts; - std::vector key_strs; - std::vector keys; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - key_strs.push_back(Key1(key)); - } - for (const auto& key_str : key_strs) { - keys.emplace_back(key_str); - } - std::vector values; - std::vector timestamps; - std::vector status_list = - db_->MultiGet(read_opts, keys, &values, ×tamps); - for (const auto& status : status_list) { - ASSERT_TRUE(status.IsInvalidArgument()); - } - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, - CompactedDBMultiGetReadTimestampSpecifiedWithoutWriteTimestamp) { - const int kNumKeysPerFile = 1026; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.disable_auto_compactions = true; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(0)); - ASSERT_OK(s); - } - ASSERT_OK(db_->Flush(FlushOptions())); - Close(); - - // Reopen the database in read only mode as a Compacted DB to test its - // timestamp support. - options.max_open_files = -1; - ASSERT_OK(ReadOnlyReopen(options)); - CheckDBOpenedAsCompactedDBWithOneLevel0File(); - - ReadOptions read_opts; - std::string read_timestamp = Timestamp(2, 0); - Slice read_ts = read_timestamp; - read_opts.timestamp = &read_ts; - std::vector key_strs; - std::vector keys; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - key_strs.push_back(Key1(key)); - } - for (const auto& key_str : key_strs) { - keys.emplace_back(key_str); - } - std::vector values; - std::vector timestamps; - std::vector status_list = - db_->MultiGet(read_opts, keys, &values, ×tamps); - for (const auto& status : status_list) { - ASSERT_TRUE(status.IsInvalidArgument()); - } - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, - CompactedDBMultiGetWriteWithTimestampReadWithoutTimestamp) { - const int kNumKeysPerFile = 1026; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.disable_auto_compactions = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - std::string write_timestamp = Timestamp(1, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamp, - "value" + std::to_string(0)); - ASSERT_OK(s); - } - ASSERT_OK(db_->Flush(FlushOptions())); - Close(); - - // Reopen the database in read only mode as a Compacted DB to test its - // timestamp support. - options.max_open_files = -1; - ASSERT_OK(ReadOnlyReopen(options)); - CheckDBOpenedAsCompactedDBWithOneLevel0File(); - - ReadOptions read_opts; - std::vector key_strs; - std::vector keys; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - key_strs.push_back(Key1(key)); - } - for (const auto& key_str : key_strs) { - keys.emplace_back(key_str); - } - std::vector values; - std::vector status_list = db_->MultiGet(read_opts, keys, &values); - for (const auto& status : status_list) { - ASSERT_TRUE(status.IsInvalidArgument()); - } - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, CompactedDBMultiGetWithOnlyOneL0File) { - const int kNumKeysPerFile = 1026 * 2; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.disable_auto_compactions = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::vector start_keys = {1, 0}; - const std::vector write_timestamps = {Timestamp(1, 0), - Timestamp(3, 0)}; - const std::vector read_timestamps = {Timestamp(2, 0), - Timestamp(4, 0)}; - for (size_t i = 0; i < write_timestamps.size(); ++i) { - WriteOptions write_opts; - for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamps[i], - "value" + std::to_string(i)); - ASSERT_OK(s); - } - } - ASSERT_OK(db_->Flush(FlushOptions())); - Close(); - - // Reopen the database in read only mode as a Compacted DB to test its - // timestamp support. - options.max_open_files = -1; - ASSERT_OK(ReadOnlyReopen(options)); - CheckDBOpenedAsCompactedDBWithOneLevel0File(); - - for (size_t i = 0; i < write_timestamps.size(); ++i) { - ReadOptions read_opts; - Slice read_ts = read_timestamps[i]; - read_opts.timestamp = &read_ts; - std::vector key_strs; - std::vector keys; - for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) { - key_strs.push_back(Key1(key)); - } - for (const auto& key_str : key_strs) { - keys.emplace_back(key_str); - } - size_t batch_size = kMaxKey - start_keys[i] + 1; - std::vector values; - std::vector timestamps; - std::vector status_list = - db_->MultiGet(read_opts, keys, &values, ×tamps); - ASSERT_EQ(batch_size, values.size()); - ASSERT_EQ(batch_size, timestamps.size()); - for (uint64_t idx = 0; idx < values.size(); ++idx) { - ASSERT_EQ("value" + std::to_string(i), values[idx]); - ASSERT_EQ(write_timestamps[i], timestamps[idx]); - ASSERT_OK(status_list[idx]); - } - } - - Close(); -} - -TEST_F(DBReadOnlyTestWithTimestamp, - CompactedDBMultiGetWithOnlyHighestNonEmptyLevelFiles) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.disable_auto_compactions = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::vector start_keys = {1, 0}; - const std::vector write_timestamps = {Timestamp(1, 0), - Timestamp(3, 0)}; - const std::vector read_timestamps = {Timestamp(2, 0), - Timestamp(4, 0)}; - for (size_t i = 0; i < write_timestamps.size(); ++i) { - WriteOptions write_opts; - for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamps[i], - "value" + std::to_string(i)); - ASSERT_OK(s); - } - } - ASSERT_OK(db_->Flush(FlushOptions())); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - Close(); - - // Reopen the database in read only mode as a Compacted DB to test its - // timestamp support. - options.max_open_files = -1; - ASSERT_OK(ReadOnlyReopen(options)); - CheckDBOpenedAsCompactedDBWithOnlyHighestNonEmptyLevelFiles(); - - for (size_t i = 0; i < write_timestamps.size(); ++i) { - ReadOptions read_opts; - Slice read_ts = read_timestamps[i]; - read_opts.timestamp = &read_ts; - std::vector key_strs; - std::vector keys; - for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) { - key_strs.push_back(Key1(key)); - } - for (const auto& key_str : key_strs) { - keys.emplace_back(key_str); - } - size_t batch_size = kMaxKey - start_keys[i] + 1; - std::vector values; - std::vector timestamps; - std::vector status_list = - db_->MultiGet(read_opts, keys, &values, ×tamps); - ASSERT_EQ(batch_size, values.size()); - ASSERT_EQ(batch_size, timestamps.size()); - for (uint64_t idx = 0; idx < values.size(); ++idx) { - ASSERT_EQ("value" + std::to_string(i), values[idx]); - ASSERT_EQ(write_timestamps[i], timestamps[idx]); - ASSERT_OK(status_list[idx]); - } - } - - Close(); -} -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_secondary_test.cc b/db/db_secondary_test.cc deleted file mode 100644 index f3f0a8d05..000000000 --- a/db/db_secondary_test.cc +++ /dev/null @@ -1,1691 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/db_impl/db_impl_secondary.h" -#include "db/db_test_util.h" -#include "db/db_with_timestamp_test_util.h" -#include "port/stack_trace.h" -#include "rocksdb/utilities/transaction_db.h" -#include "test_util/sync_point.h" -#include "test_util/testutil.h" -#include "utilities/fault_injection_env.h" - -namespace ROCKSDB_NAMESPACE { - -class DBSecondaryTestBase : public DBBasicTestWithTimestampBase { - public: - explicit DBSecondaryTestBase(const std::string& dbname) - : DBBasicTestWithTimestampBase(dbname), - secondary_path_(), - handles_secondary_(), - db_secondary_(nullptr) { - secondary_path_ = - test::PerThreadDBPath(env_, "/db_secondary_test_secondary"); - } - - ~DBSecondaryTestBase() override { - CloseSecondary(); - if (getenv("KEEP_DB") != nullptr) { - fprintf(stdout, "Secondary DB is still at %s\n", secondary_path_.c_str()); - } else { - Options options; - options.env = env_; - EXPECT_OK(DestroyDB(secondary_path_, options)); - } - } - - protected: - Status ReopenAsSecondary(const Options& options) { - return DB::OpenAsSecondary(options, dbname_, secondary_path_, &db_); - } - - void OpenSecondary(const Options& options); - - Status TryOpenSecondary(const Options& options); - - void OpenSecondaryWithColumnFamilies( - const std::vector& column_families, const Options& options); - - void CloseSecondary() { - for (auto h : handles_secondary_) { - ASSERT_OK(db_secondary_->DestroyColumnFamilyHandle(h)); - } - handles_secondary_.clear(); - delete db_secondary_; - db_secondary_ = nullptr; - } - - DBImplSecondary* db_secondary_full() { - return static_cast(db_secondary_); - } - - void CheckFileTypeCounts(const std::string& dir, int expected_log, - int expected_sst, int expected_manifest) const; - - std::string secondary_path_; - std::vector handles_secondary_; - DB* db_secondary_; -}; - -void DBSecondaryTestBase::OpenSecondary(const Options& options) { - ASSERT_OK(TryOpenSecondary(options)); -} - -Status DBSecondaryTestBase::TryOpenSecondary(const Options& options) { - Status s = - DB::OpenAsSecondary(options, dbname_, secondary_path_, &db_secondary_); - return s; -} - -void DBSecondaryTestBase::OpenSecondaryWithColumnFamilies( - const std::vector& column_families, const Options& options) { - std::vector cf_descs; - cf_descs.emplace_back(kDefaultColumnFamilyName, options); - for (const auto& cf_name : column_families) { - cf_descs.emplace_back(cf_name, options); - } - Status s = DB::OpenAsSecondary(options, dbname_, secondary_path_, cf_descs, - &handles_secondary_, &db_secondary_); - ASSERT_OK(s); -} - -void DBSecondaryTestBase::CheckFileTypeCounts(const std::string& dir, - int expected_log, - int expected_sst, - int expected_manifest) const { - std::vector filenames; - ASSERT_OK(env_->GetChildren(dir, &filenames)); - - int log_cnt = 0, sst_cnt = 0, manifest_cnt = 0; - for (auto file : filenames) { - uint64_t number; - FileType type; - if (ParseFileName(file, &number, &type)) { - log_cnt += (type == kWalFile); - sst_cnt += (type == kTableFile); - manifest_cnt += (type == kDescriptorFile); - } - } - ASSERT_EQ(expected_log, log_cnt); - ASSERT_EQ(expected_sst, sst_cnt); - ASSERT_EQ(expected_manifest, manifest_cnt); -} - -class DBSecondaryTest : public DBSecondaryTestBase { - public: - explicit DBSecondaryTest() : DBSecondaryTestBase("db_secondary_test") {} -}; - -TEST_F(DBSecondaryTest, FailOpenIfLoggerCreationFail) { - Options options = GetDefaultOptions(); - options.create_if_missing = true; - Reopen(options); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "rocksdb::CreateLoggerFromOptions:AfterGetPath", [&](void* arg) { - auto* s = reinterpret_cast(arg); - assert(s); - *s = Status::IOError("Injected"); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - options.max_open_files = -1; - Status s = TryOpenSecondary(options); - ASSERT_EQ(nullptr, options.info_log); - ASSERT_TRUE(s.IsIOError()); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_F(DBSecondaryTest, NonExistingDb) { - Destroy(last_options_); - - Options options = GetDefaultOptions(); - options.env = env_; - options.max_open_files = -1; - const std::string dbname = "/doesnt/exist"; - Status s = - DB::OpenAsSecondary(options, dbname, secondary_path_, &db_secondary_); - ASSERT_TRUE(s.IsIOError()); -} - -TEST_F(DBSecondaryTest, ReopenAsSecondary) { - Options options; - options.env = env_; - Reopen(options); - ASSERT_OK(Put("foo", "foo_value")); - ASSERT_OK(Put("bar", "bar_value")); - ASSERT_OK(dbfull()->Flush(FlushOptions())); - Close(); - - ASSERT_OK(ReopenAsSecondary(options)); - ASSERT_EQ("foo_value", Get("foo")); - ASSERT_EQ("bar_value", Get("bar")); - ReadOptions ropts; - ropts.verify_checksums = true; - auto db1 = static_cast(db_); - ASSERT_NE(nullptr, db1); - Iterator* iter = db1->NewIterator(ropts); - ASSERT_NE(nullptr, iter); - size_t count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - if (0 == count) { - ASSERT_EQ("bar", iter->key().ToString()); - ASSERT_EQ("bar_value", iter->value().ToString()); - } else if (1 == count) { - ASSERT_EQ("foo", iter->key().ToString()); - ASSERT_EQ("foo_value", iter->value().ToString()); - } - ++count; - } - delete iter; - ASSERT_EQ(2, count); -} - -TEST_F(DBSecondaryTest, SimpleInternalCompaction) { - Options options; - options.env = env_; - Reopen(options); - for (int i = 0; i < 3; ++i) { - ASSERT_OK(Put("foo", "foo_value" + std::to_string(i))); - ASSERT_OK(Put("bar", "bar_value" + std::to_string(i))); - ASSERT_OK(Flush()); - } - CompactionServiceInput input; - - ColumnFamilyMetaData meta; - db_->GetColumnFamilyMetaData(&meta); - for (auto& file : meta.levels[0].files) { - ASSERT_EQ(0, meta.levels[0].level); - input.input_files.push_back(file.name); - } - ASSERT_EQ(input.input_files.size(), 3); - - input.output_level = 1; - ASSERT_OK(db_->GetDbIdentity(input.db_id)); - Close(); - - options.max_open_files = -1; - OpenSecondary(options); - auto cfh = db_secondary_->DefaultColumnFamily(); - - CompactionServiceResult result; - ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation( - OpenAndCompactOptions(), cfh, input, &result)); - - ASSERT_EQ(result.output_files.size(), 1); - InternalKey smallest, largest; - smallest.DecodeFrom(result.output_files[0].smallest_internal_key); - largest.DecodeFrom(result.output_files[0].largest_internal_key); - ASSERT_EQ(smallest.user_key().ToString(), "bar"); - ASSERT_EQ(largest.user_key().ToString(), "foo"); - ASSERT_EQ(result.output_level, 1); - ASSERT_EQ(result.output_path, this->secondary_path_); - ASSERT_EQ(result.num_output_records, 2); - ASSERT_GT(result.bytes_written, 0); - ASSERT_OK(result.status); -} - -TEST_F(DBSecondaryTest, InternalCompactionMultiLevels) { - Options options; - options.env = env_; - options.disable_auto_compactions = true; - Reopen(options); - const int kRangeL2 = 10; - const int kRangeL1 = 30; - for (int i = 0; i < 10; i++) { - ASSERT_OK(Put(Key(i * kRangeL2), "value" + std::to_string(i))); - ASSERT_OK(Put(Key((i + 1) * kRangeL2 - 1), "value" + std::to_string(i))); - ASSERT_OK(Flush()); - } - MoveFilesToLevel(2); - for (int i = 0; i < 5; i++) { - ASSERT_OK(Put(Key(i * kRangeL1), "value" + std::to_string(i))); - ASSERT_OK(Put(Key((i + 1) * kRangeL1 - 1), "value" + std::to_string(i))); - ASSERT_OK(Flush()); - } - MoveFilesToLevel(1); - for (int i = 0; i < 4; i++) { - ASSERT_OK(Put(Key(i * 30), "value" + std::to_string(i))); - ASSERT_OK(Put(Key(i * 30 + 50), "value" + std::to_string(i))); - ASSERT_OK(Flush()); - } - - ColumnFamilyMetaData meta; - db_->GetColumnFamilyMetaData(&meta); - - // pick 2 files on level 0 for compaction, which has 3 overlap files on L1 - CompactionServiceInput input1; - input1.input_files.push_back(meta.levels[0].files[2].name); - input1.input_files.push_back(meta.levels[0].files[3].name); - input1.input_files.push_back(meta.levels[1].files[0].name); - input1.input_files.push_back(meta.levels[1].files[1].name); - input1.input_files.push_back(meta.levels[1].files[2].name); - - input1.output_level = 1; - ASSERT_OK(db_->GetDbIdentity(input1.db_id)); - - options.max_open_files = -1; - Close(); - - OpenSecondary(options); - auto cfh = db_secondary_->DefaultColumnFamily(); - CompactionServiceResult result; - ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation( - OpenAndCompactOptions(), cfh, input1, &result)); - ASSERT_OK(result.status); - - // pick 2 files on level 1 for compaction, which has 6 overlap files on L2 - CompactionServiceInput input2; - input2.input_files.push_back(meta.levels[1].files[1].name); - input2.input_files.push_back(meta.levels[1].files[2].name); - for (int i = 3; i < 9; i++) { - input2.input_files.push_back(meta.levels[2].files[i].name); - } - - input2.output_level = 2; - input2.db_id = input1.db_id; - ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation( - OpenAndCompactOptions(), cfh, input2, &result)); - ASSERT_OK(result.status); - - CloseSecondary(); - - // delete all l2 files, without update manifest - for (auto& file : meta.levels[2].files) { - ASSERT_OK(env_->DeleteFile(dbname_ + file.name)); - } - OpenSecondary(options); - cfh = db_secondary_->DefaultColumnFamily(); - Status s = db_secondary_full()->TEST_CompactWithoutInstallation( - OpenAndCompactOptions(), cfh, input2, &result); - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_OK(result.status); - - // TODO: L0 -> L1 compaction should success, currently version is not built - // if files is missing. - // ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation(OpenAndCompactOptions(), - // cfh, input1, &result)); -} - -TEST_F(DBSecondaryTest, InternalCompactionCompactedFiles) { - Options options; - options.env = env_; - options.level0_file_num_compaction_trigger = 4; - Reopen(options); - for (int i = 0; i < 3; ++i) { - ASSERT_OK(Put("foo", "foo_value" + std::to_string(i))); - ASSERT_OK(Put("bar", "bar_value" + std::to_string(i))); - ASSERT_OK(Flush()); - } - CompactionServiceInput input; - - ColumnFamilyMetaData meta; - db_->GetColumnFamilyMetaData(&meta); - for (auto& file : meta.levels[0].files) { - ASSERT_EQ(0, meta.levels[0].level); - input.input_files.push_back(file.name); - } - ASSERT_EQ(input.input_files.size(), 3); - - input.output_level = 1; - ASSERT_OK(db_->GetDbIdentity(input.db_id)); - - // trigger compaction to delete the files for secondary instance compaction - ASSERT_OK(Put("foo", "foo_value" + std::to_string(3))); - ASSERT_OK(Put("bar", "bar_value" + std::to_string(3))); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - Close(); - - options.max_open_files = -1; - OpenSecondary(options); - auto cfh = db_secondary_->DefaultColumnFamily(); - - CompactionServiceResult result; - Status s = db_secondary_full()->TEST_CompactWithoutInstallation( - OpenAndCompactOptions(), cfh, input, &result); - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_OK(result.status); -} - -TEST_F(DBSecondaryTest, InternalCompactionMissingFiles) { - Options options; - options.env = env_; - options.level0_file_num_compaction_trigger = 4; - Reopen(options); - for (int i = 0; i < 3; ++i) { - ASSERT_OK(Put("foo", "foo_value" + std::to_string(i))); - ASSERT_OK(Put("bar", "bar_value" + std::to_string(i))); - ASSERT_OK(Flush()); - } - CompactionServiceInput input; - - ColumnFamilyMetaData meta; - db_->GetColumnFamilyMetaData(&meta); - for (auto& file : meta.levels[0].files) { - ASSERT_EQ(0, meta.levels[0].level); - input.input_files.push_back(file.name); - } - ASSERT_EQ(input.input_files.size(), 3); - - input.output_level = 1; - ASSERT_OK(db_->GetDbIdentity(input.db_id)); - - Close(); - - ASSERT_OK(env_->DeleteFile(dbname_ + input.input_files[0])); - - options.max_open_files = -1; - OpenSecondary(options); - auto cfh = db_secondary_->DefaultColumnFamily(); - - CompactionServiceResult result; - Status s = db_secondary_full()->TEST_CompactWithoutInstallation( - OpenAndCompactOptions(), cfh, input, &result); - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_OK(result.status); - - input.input_files.erase(input.input_files.begin()); - - ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation( - OpenAndCompactOptions(), cfh, input, &result)); - ASSERT_OK(result.status); -} - -TEST_F(DBSecondaryTest, OpenAsSecondary) { - Options options; - options.env = env_; - options.level0_file_num_compaction_trigger = 4; - Reopen(options); - for (int i = 0; i < 3; ++i) { - ASSERT_OK(Put("foo", "foo_value" + std::to_string(i))); - ASSERT_OK(Put("bar", "bar_value" + std::to_string(i))); - ASSERT_OK(Flush()); - } - Options options1; - options1.env = env_; - options1.max_open_files = -1; - OpenSecondary(options1); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ReadOptions ropts; - ropts.verify_checksums = true; - const auto verify_db_func = [&](const std::string& foo_val, - const std::string& bar_val) { - std::string value; - ASSERT_OK(db_secondary_->Get(ropts, "foo", &value)); - ASSERT_EQ(foo_val, value); - ASSERT_OK(db_secondary_->Get(ropts, "bar", &value)); - ASSERT_EQ(bar_val, value); - Iterator* iter = db_secondary_->NewIterator(ropts); - ASSERT_NE(nullptr, iter); - iter->Seek("foo"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("foo", iter->key().ToString()); - ASSERT_EQ(foo_val, iter->value().ToString()); - iter->Seek("bar"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("bar", iter->key().ToString()); - ASSERT_EQ(bar_val, iter->value().ToString()); - size_t count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ++count; - } - ASSERT_EQ(2, count); - delete iter; - }; - - verify_db_func("foo_value2", "bar_value2"); - - ASSERT_OK(Put("foo", "new_foo_value")); - ASSERT_OK(Put("bar", "new_bar_value")); - ASSERT_OK(Flush()); - - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - verify_db_func("new_foo_value", "new_bar_value"); -} - -namespace { -class TraceFileEnv : public EnvWrapper { - public: - explicit TraceFileEnv(Env* _target) : EnvWrapper(_target) {} - static const char* kClassName() { return "TraceFileEnv"; } - const char* Name() const override { return kClassName(); } - - Status NewRandomAccessFile(const std::string& f, - std::unique_ptr* r, - const EnvOptions& env_options) override { - class TracedRandomAccessFile : public RandomAccessFile { - public: - TracedRandomAccessFile(std::unique_ptr&& target, - std::atomic& counter) - : target_(std::move(target)), files_closed_(counter) {} - ~TracedRandomAccessFile() override { - files_closed_.fetch_add(1, std::memory_order_relaxed); - } - Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const override { - return target_->Read(offset, n, result, scratch); - } - - private: - std::unique_ptr target_; - std::atomic& files_closed_; - }; - Status s = target()->NewRandomAccessFile(f, r, env_options); - if (s.ok()) { - r->reset(new TracedRandomAccessFile(std::move(*r), files_closed_)); - } - return s; - } - - int files_closed() const { - return files_closed_.load(std::memory_order_relaxed); - } - - private: - std::atomic files_closed_{0}; -}; -} // anonymous namespace - -TEST_F(DBSecondaryTest, SecondaryCloseFiles) { - Options options; - options.env = env_; - options.max_open_files = 1; - options.disable_auto_compactions = true; - Reopen(options); - Options options1; - std::unique_ptr traced_env(new TraceFileEnv(env_)); - options1.env = traced_env.get(); - OpenSecondary(options1); - - static const auto verify_db = [&]() { - std::unique_ptr iter1(dbfull()->NewIterator(ReadOptions())); - std::unique_ptr iter2(db_secondary_->NewIterator(ReadOptions())); - for (iter1->SeekToFirst(), iter2->SeekToFirst(); - iter1->Valid() && iter2->Valid(); iter1->Next(), iter2->Next()) { - ASSERT_EQ(iter1->key(), iter2->key()); - ASSERT_EQ(iter1->value(), iter2->value()); - } - ASSERT_FALSE(iter1->Valid()); - ASSERT_FALSE(iter2->Valid()); - }; - - ASSERT_OK(Put("a", "value")); - ASSERT_OK(Put("c", "value")); - ASSERT_OK(Flush()); - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - verify_db(); - - ASSERT_OK(Put("b", "value")); - ASSERT_OK(Put("d", "value")); - ASSERT_OK(Flush()); - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - verify_db(); - - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - ASSERT_EQ(2, static_cast(traced_env.get())->files_closed()); - - Status s = db_secondary_->SetDBOptions({{"max_open_files", "-1"}}); - ASSERT_TRUE(s.IsNotSupported()); - CloseSecondary(); -} - -TEST_F(DBSecondaryTest, OpenAsSecondaryWALTailing) { - Options options; - options.env = env_; - options.level0_file_num_compaction_trigger = 4; - Reopen(options); - for (int i = 0; i < 3; ++i) { - ASSERT_OK(Put("foo", "foo_value" + std::to_string(i))); - ASSERT_OK(Put("bar", "bar_value" + std::to_string(i))); - } - Options options1; - options1.env = env_; - options1.max_open_files = -1; - OpenSecondary(options1); - - ReadOptions ropts; - ropts.verify_checksums = true; - const auto verify_db_func = [&](const std::string& foo_val, - const std::string& bar_val) { - std::string value; - ASSERT_OK(db_secondary_->Get(ropts, "foo", &value)); - ASSERT_EQ(foo_val, value); - ASSERT_OK(db_secondary_->Get(ropts, "bar", &value)); - ASSERT_EQ(bar_val, value); - Iterator* iter = db_secondary_->NewIterator(ropts); - ASSERT_NE(nullptr, iter); - iter->Seek("foo"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("foo", iter->key().ToString()); - ASSERT_EQ(foo_val, iter->value().ToString()); - iter->Seek("bar"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("bar", iter->key().ToString()); - ASSERT_EQ(bar_val, iter->value().ToString()); - size_t count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ++count; - } - ASSERT_EQ(2, count); - delete iter; - }; - - verify_db_func("foo_value2", "bar_value2"); - - ASSERT_OK(Put("foo", "new_foo_value")); - ASSERT_OK(Put("bar", "new_bar_value")); - - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - verify_db_func("new_foo_value", "new_bar_value"); - - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo", "new_foo_value_1")); - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - verify_db_func("new_foo_value_1", "new_bar_value"); -} - -TEST_F(DBSecondaryTest, SecondaryTailingBug_ISSUE_8467) { - Options options; - options.env = env_; - Reopen(options); - for (int i = 0; i < 3; ++i) { - ASSERT_OK(Put("foo", "foo_value" + std::to_string(i))); - ASSERT_OK(Put("bar", "bar_value" + std::to_string(i))); - } - - Options options1; - options1.env = env_; - options1.max_open_files = -1; - OpenSecondary(options1); - - const auto verify_db = [&](const std::string& foo_val, - const std::string& bar_val) { - std::string value; - ReadOptions ropts; - Status s = db_secondary_->Get(ropts, "foo", &value); - ASSERT_OK(s); - ASSERT_EQ(foo_val, value); - - s = db_secondary_->Get(ropts, "bar", &value); - ASSERT_OK(s); - ASSERT_EQ(bar_val, value); - }; - - for (int i = 0; i < 2; ++i) { - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - verify_db("foo_value2", "bar_value2"); - } -} - -TEST_F(DBSecondaryTest, RefreshIterator) { - Options options; - options.env = env_; - Reopen(options); - - Options options1; - options1.env = env_; - options1.max_open_files = -1; - OpenSecondary(options1); - - std::unique_ptr it(db_secondary_->NewIterator(ReadOptions())); - for (int i = 0; i < 3; ++i) { - ASSERT_OK(Put("foo", "foo_value" + std::to_string(i))); - - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - if (0 == i) { - it->Seek("foo"); - ASSERT_FALSE(it->Valid()); - ASSERT_OK(it->status()); - - ASSERT_OK(it->Refresh()); - - it->Seek("foo"); - ASSERT_OK(it->status()); - ASSERT_TRUE(it->Valid()); - ASSERT_EQ("foo", it->key()); - ASSERT_EQ("foo_value0", it->value()); - } else { - it->Seek("foo"); - ASSERT_TRUE(it->Valid()); - ASSERT_EQ("foo", it->key()); - ASSERT_EQ("foo_value" + std::to_string(i - 1), it->value()); - ASSERT_OK(it->status()); - - ASSERT_OK(it->Refresh()); - - it->Seek("foo"); - ASSERT_OK(it->status()); - ASSERT_TRUE(it->Valid()); - ASSERT_EQ("foo", it->key()); - ASSERT_EQ("foo_value" + std::to_string(i), it->value()); - } - } -} - -TEST_F(DBSecondaryTest, OpenWithNonExistColumnFamily) { - Options options; - options.env = env_; - CreateAndReopenWithCF({"pikachu"}, options); - - Options options1; - options1.env = env_; - options1.max_open_files = -1; - std::vector cf_descs; - cf_descs.emplace_back(kDefaultColumnFamilyName, options1); - cf_descs.emplace_back("pikachu", options1); - cf_descs.emplace_back("eevee", options1); - Status s = DB::OpenAsSecondary(options1, dbname_, secondary_path_, cf_descs, - &handles_secondary_, &db_secondary_); - ASSERT_NOK(s); -} - -TEST_F(DBSecondaryTest, OpenWithSubsetOfColumnFamilies) { - Options options; - options.env = env_; - CreateAndReopenWithCF({"pikachu"}, options); - Options options1; - options1.env = env_; - options1.max_open_files = -1; - OpenSecondary(options1); - ASSERT_EQ(0, handles_secondary_.size()); - ASSERT_NE(nullptr, db_secondary_); - - ASSERT_OK(Put(0 /*cf*/, "foo", "foo_value")); - ASSERT_OK(Put(1 /*cf*/, "foo", "foo_value")); - ASSERT_OK(Flush(0 /*cf*/)); - ASSERT_OK(Flush(1 /*cf*/)); - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - ReadOptions ropts; - ropts.verify_checksums = true; - std::string value; - ASSERT_OK(db_secondary_->Get(ropts, "foo", &value)); - ASSERT_EQ("foo_value", value); -} - -TEST_F(DBSecondaryTest, SwitchToNewManifestDuringOpen) { - Options options; - options.env = env_; - Reopen(options); - Close(); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->LoadDependency( - {{"ReactiveVersionSet::MaybeSwitchManifest:AfterGetCurrentManifestPath:0", - "VersionSet::ProcessManifestWrites:BeforeNewManifest"}, - {"DBImpl::Open:AfterDeleteFiles", - "ReactiveVersionSet::MaybeSwitchManifest:AfterGetCurrentManifestPath:" - "1"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - port::Thread ro_db_thread([&]() { - Options options1; - options1.env = env_; - options1.max_open_files = -1; - Status s = TryOpenSecondary(options1); - ASSERT_TRUE(s.IsTryAgain()); - - // Try again - OpenSecondary(options1); - CloseSecondary(); - }); - Reopen(options); - ro_db_thread.join(); -} - -TEST_F(DBSecondaryTest, MissingTableFileDuringOpen) { - Options options; - options.env = env_; - options.level0_file_num_compaction_trigger = 4; - Reopen(options); - for (int i = 0; i != options.level0_file_num_compaction_trigger; ++i) { - ASSERT_OK(Put("foo", "foo_value" + std::to_string(i))); - ASSERT_OK(Put("bar", "bar_value" + std::to_string(i))); - ASSERT_OK(dbfull()->Flush(FlushOptions())); - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - Options options1; - options1.env = env_; - options1.max_open_files = -1; - OpenSecondary(options1); - ReadOptions ropts; - ropts.verify_checksums = true; - std::string value; - ASSERT_OK(db_secondary_->Get(ropts, "foo", &value)); - ASSERT_EQ("foo_value" + - std::to_string(options.level0_file_num_compaction_trigger - 1), - value); - ASSERT_OK(db_secondary_->Get(ropts, "bar", &value)); - ASSERT_EQ("bar_value" + - std::to_string(options.level0_file_num_compaction_trigger - 1), - value); - Iterator* iter = db_secondary_->NewIterator(ropts); - ASSERT_NE(nullptr, iter); - iter->Seek("bar"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("bar", iter->key().ToString()); - ASSERT_EQ("bar_value" + - std::to_string(options.level0_file_num_compaction_trigger - 1), - iter->value().ToString()); - iter->Seek("foo"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("foo", iter->key().ToString()); - ASSERT_EQ("foo_value" + - std::to_string(options.level0_file_num_compaction_trigger - 1), - iter->value().ToString()); - size_t count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ++count; - } - ASSERT_EQ(2, count); - delete iter; -} - -TEST_F(DBSecondaryTest, MissingTableFile) { - Options options; - options.env = env_; - options.level0_file_num_compaction_trigger = 4; - Reopen(options); - - Options options1; - options1.env = env_; - options1.max_open_files = -1; - OpenSecondary(options1); - - for (int i = 0; i != options.level0_file_num_compaction_trigger; ++i) { - ASSERT_OK(Put("foo", "foo_value" + std::to_string(i))); - ASSERT_OK(Put("bar", "bar_value" + std::to_string(i))); - ASSERT_OK(dbfull()->Flush(FlushOptions())); - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_NE(nullptr, db_secondary_full()); - ReadOptions ropts; - ropts.verify_checksums = true; - std::string value; - ASSERT_NOK(db_secondary_->Get(ropts, "foo", &value)); - ASSERT_NOK(db_secondary_->Get(ropts, "bar", &value)); - - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - ASSERT_OK(db_secondary_->Get(ropts, "foo", &value)); - ASSERT_EQ("foo_value" + - std::to_string(options.level0_file_num_compaction_trigger - 1), - value); - ASSERT_OK(db_secondary_->Get(ropts, "bar", &value)); - ASSERT_EQ("bar_value" + - std::to_string(options.level0_file_num_compaction_trigger - 1), - value); - Iterator* iter = db_secondary_->NewIterator(ropts); - ASSERT_NE(nullptr, iter); - iter->Seek("bar"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("bar", iter->key().ToString()); - ASSERT_EQ("bar_value" + - std::to_string(options.level0_file_num_compaction_trigger - 1), - iter->value().ToString()); - iter->Seek("foo"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("foo", iter->key().ToString()); - ASSERT_EQ("foo_value" + - std::to_string(options.level0_file_num_compaction_trigger - 1), - iter->value().ToString()); - size_t count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ++count; - } - ASSERT_EQ(2, count); - delete iter; -} - -TEST_F(DBSecondaryTest, PrimaryDropColumnFamily) { - Options options; - options.env = env_; - const std::string kCfName1 = "pikachu"; - CreateAndReopenWithCF({kCfName1}, options); - - Options options1; - options1.env = env_; - options1.max_open_files = -1; - OpenSecondaryWithColumnFamilies({kCfName1}, options1); - ASSERT_EQ(2, handles_secondary_.size()); - - ASSERT_OK(Put(1 /*cf*/, "foo", "foo_val_1")); - ASSERT_OK(Flush(1 /*cf*/)); - - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - ReadOptions ropts; - ropts.verify_checksums = true; - std::string value; - ASSERT_OK(db_secondary_->Get(ropts, handles_secondary_[1], "foo", &value)); - ASSERT_EQ("foo_val_1", value); - - ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); - Close(); - CheckFileTypeCounts(dbname_, 1, 0, 1); - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - value.clear(); - ASSERT_OK(db_secondary_->Get(ropts, handles_secondary_[1], "foo", &value)); - ASSERT_EQ("foo_val_1", value); -} - -TEST_F(DBSecondaryTest, SwitchManifest) { - Options options; - options.env = env_; - options.level0_file_num_compaction_trigger = 4; - const std::string cf1_name("test_cf"); - CreateAndReopenWithCF({cf1_name}, options); - - Options options1; - options1.env = env_; - options1.max_open_files = -1; - OpenSecondaryWithColumnFamilies({kDefaultColumnFamilyName, cf1_name}, - options1); - - const int kNumFiles = options.level0_file_num_compaction_trigger - 1; - // Keep it smaller than 10 so that key0, key1, ..., key9 are sorted as 0, 1, - // ..., 9. - const int kNumKeys = 10; - // Create two sst - for (int i = 0; i != kNumFiles; ++i) { - for (int j = 0; j != kNumKeys; ++j) { - ASSERT_OK(Put("key" + std::to_string(j), "value_" + std::to_string(i))); - } - ASSERT_OK(Flush()); - } - - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - const auto& range_scan_db = [&]() { - ReadOptions tmp_ropts; - tmp_ropts.total_order_seek = true; - tmp_ropts.verify_checksums = true; - std::unique_ptr iter(db_secondary_->NewIterator(tmp_ropts)); - int cnt = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next(), ++cnt) { - ASSERT_EQ("key" + std::to_string(cnt), iter->key().ToString()); - ASSERT_EQ("value_" + std::to_string(kNumFiles - 1), - iter->value().ToString()); - } - }; - - range_scan_db(); - - // While secondary instance still keeps old MANIFEST open, we close primary, - // restart primary, performs full compaction, close again, restart again so - // that next time secondary tries to catch up with primary, the secondary - // will skip the MANIFEST in middle. - ReopenWithColumnFamilies({kDefaultColumnFamilyName, cf1_name}, options); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ReopenWithColumnFamilies({kDefaultColumnFamilyName, cf1_name}, options); - ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}})); - - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - range_scan_db(); -} - -TEST_F(DBSecondaryTest, SwitchManifestTwice) { - Options options; - options.env = env_; - options.disable_auto_compactions = true; - const std::string cf1_name("test_cf"); - CreateAndReopenWithCF({cf1_name}, options); - - Options options1; - options1.env = env_; - options1.max_open_files = -1; - OpenSecondaryWithColumnFamilies({kDefaultColumnFamilyName, cf1_name}, - options1); - - ASSERT_OK(Put("0", "value0")); - ASSERT_OK(Flush()); - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - std::string value; - ReadOptions ropts; - ropts.verify_checksums = true; - ASSERT_OK(db_secondary_->Get(ropts, "0", &value)); - ASSERT_EQ("value0", value); - - ReopenWithColumnFamilies({kDefaultColumnFamilyName, cf1_name}, options); - ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}})); - ReopenWithColumnFamilies({kDefaultColumnFamilyName, cf1_name}, options); - ASSERT_OK(Put("0", "value1")); - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - - ASSERT_OK(db_secondary_->Get(ropts, "0", &value)); - ASSERT_EQ("value1", value); -} - -TEST_F(DBSecondaryTest, DISABLED_SwitchWAL) { - const int kNumKeysPerMemtable = 1; - Options options; - options.env = env_; - options.max_write_buffer_number = 4; - options.min_write_buffer_number_to_merge = 2; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerMemtable)); - Reopen(options); - - Options options1; - options1.env = env_; - options1.max_open_files = -1; - OpenSecondary(options1); - - const auto& verify_db = [](DB* db1, DB* db2) { - ASSERT_NE(nullptr, db1); - ASSERT_NE(nullptr, db2); - ReadOptions read_opts; - read_opts.verify_checksums = true; - std::unique_ptr it1(db1->NewIterator(read_opts)); - std::unique_ptr it2(db2->NewIterator(read_opts)); - it1->SeekToFirst(); - it2->SeekToFirst(); - for (; it1->Valid() && it2->Valid(); it1->Next(), it2->Next()) { - ASSERT_EQ(it1->key(), it2->key()); - ASSERT_EQ(it1->value(), it2->value()); - } - ASSERT_FALSE(it1->Valid()); - ASSERT_FALSE(it2->Valid()); - - for (it1->SeekToFirst(); it1->Valid(); it1->Next()) { - std::string value; - ASSERT_OK(db2->Get(read_opts, it1->key(), &value)); - ASSERT_EQ(it1->value(), value); - } - for (it2->SeekToFirst(); it2->Valid(); it2->Next()) { - std::string value; - ASSERT_OK(db1->Get(read_opts, it2->key(), &value)); - ASSERT_EQ(it2->value(), value); - } - }; - for (int k = 0; k != 16; ++k) { - ASSERT_OK(Put("key" + std::to_string(k), "value" + std::to_string(k))); - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - verify_db(dbfull(), db_secondary_); - } -} - -TEST_F(DBSecondaryTest, DISABLED_SwitchWALMultiColumnFamilies) { - const int kNumKeysPerMemtable = 1; - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::BackgroundCallFlush:ContextCleanedUp", - "DBSecondaryTest::SwitchWALMultipleColumnFamilies:BeforeCatchUp"}}); - SyncPoint::GetInstance()->EnableProcessing(); - const std::string kCFName1 = "pikachu"; - Options options; - options.env = env_; - options.max_write_buffer_number = 4; - options.min_write_buffer_number_to_merge = 2; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerMemtable)); - CreateAndReopenWithCF({kCFName1}, options); - - Options options1; - options1.env = env_; - options1.max_open_files = -1; - OpenSecondaryWithColumnFamilies({kCFName1}, options1); - ASSERT_EQ(2, handles_secondary_.size()); - - const auto& verify_db = [](DB* db1, - const std::vector& handles1, - DB* db2, - const std::vector& handles2) { - ASSERT_NE(nullptr, db1); - ASSERT_NE(nullptr, db2); - ReadOptions read_opts; - read_opts.verify_checksums = true; - ASSERT_EQ(handles1.size(), handles2.size()); - for (size_t i = 0; i != handles1.size(); ++i) { - std::unique_ptr it1(db1->NewIterator(read_opts, handles1[i])); - std::unique_ptr it2(db2->NewIterator(read_opts, handles2[i])); - it1->SeekToFirst(); - it2->SeekToFirst(); - for (; it1->Valid() && it2->Valid(); it1->Next(), it2->Next()) { - ASSERT_EQ(it1->key(), it2->key()); - ASSERT_EQ(it1->value(), it2->value()); - } - ASSERT_FALSE(it1->Valid()); - ASSERT_FALSE(it2->Valid()); - - for (it1->SeekToFirst(); it1->Valid(); it1->Next()) { - std::string value; - ASSERT_OK(db2->Get(read_opts, handles2[i], it1->key(), &value)); - ASSERT_EQ(it1->value(), value); - } - for (it2->SeekToFirst(); it2->Valid(); it2->Next()) { - std::string value; - ASSERT_OK(db1->Get(read_opts, handles1[i], it2->key(), &value)); - ASSERT_EQ(it2->value(), value); - } - } - }; - for (int k = 0; k != 8; ++k) { - for (int j = 0; j < 2; ++j) { - ASSERT_OK(Put(0 /*cf*/, "key" + std::to_string(k), - "value" + std::to_string(k))); - ASSERT_OK(Put(1 /*cf*/, "key" + std::to_string(k), - "value" + std::to_string(k))); - } - TEST_SYNC_POINT( - "DBSecondaryTest::SwitchWALMultipleColumnFamilies:BeforeCatchUp"); - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - verify_db(dbfull(), handles_, db_secondary_, handles_secondary_); - SyncPoint::GetInstance()->ClearTrace(); - } -} - -TEST_F(DBSecondaryTest, CatchUpAfterFlush) { - const int kNumKeysPerMemtable = 16; - Options options; - options.env = env_; - options.max_write_buffer_number = 4; - options.min_write_buffer_number_to_merge = 2; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerMemtable)); - Reopen(options); - - Options options1; - options1.env = env_; - options1.max_open_files = -1; - OpenSecondary(options1); - - WriteOptions write_opts; - WriteBatch wb; - ASSERT_OK(wb.Put("key0", "value0")); - ASSERT_OK(wb.Put("key1", "value1")); - ASSERT_OK(dbfull()->Write(write_opts, &wb)); - ReadOptions read_opts; - std::unique_ptr iter1(db_secondary_->NewIterator(read_opts)); - iter1->Seek("key0"); - ASSERT_FALSE(iter1->Valid()); - iter1->Seek("key1"); - ASSERT_FALSE(iter1->Valid()); - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - iter1->Seek("key0"); - ASSERT_FALSE(iter1->Valid()); - iter1->Seek("key1"); - ASSERT_FALSE(iter1->Valid()); - ASSERT_OK(iter1->status()); - std::unique_ptr iter2(db_secondary_->NewIterator(read_opts)); - iter2->Seek("key0"); - ASSERT_TRUE(iter2->Valid()); - ASSERT_EQ("value0", iter2->value()); - iter2->Seek("key1"); - ASSERT_TRUE(iter2->Valid()); - ASSERT_OK(iter2->status()); - ASSERT_EQ("value1", iter2->value()); - - { - WriteBatch wb1; - ASSERT_OK(wb1.Put("key0", "value01")); - ASSERT_OK(wb1.Put("key1", "value11")); - ASSERT_OK(dbfull()->Write(write_opts, &wb1)); - } - - { - WriteBatch wb2; - ASSERT_OK(wb2.Put("key0", "new_value0")); - ASSERT_OK(wb2.Delete("key1")); - ASSERT_OK(dbfull()->Write(write_opts, &wb2)); - } - - ASSERT_OK(Flush()); - - ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); - std::unique_ptr iter3(db_secondary_->NewIterator(read_opts)); - // iter3 should not see value01 and value11 at all. - iter3->Seek("key0"); - ASSERT_TRUE(iter3->Valid()); - ASSERT_EQ("new_value0", iter3->value()); - iter3->Seek("key1"); - ASSERT_FALSE(iter3->Valid()); - ASSERT_OK(iter3->status()); -} - -TEST_F(DBSecondaryTest, CheckConsistencyWhenOpen) { - bool called = false; - Options options; - options.env = env_; - options.disable_auto_compactions = true; - Reopen(options); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "DBImplSecondary::CheckConsistency:AfterFirstAttempt", [&](void* arg) { - ASSERT_NE(nullptr, arg); - called = true; - auto* s = reinterpret_cast(arg); - ASSERT_NOK(*s); - }); - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::CheckConsistency:AfterGetLiveFilesMetaData", - "BackgroundCallCompaction:0"}, - {"DBImpl::BackgroundCallCompaction:PurgedObsoleteFiles", - "DBImpl::CheckConsistency:BeforeGetFileSize"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put("a", "value0")); - ASSERT_OK(Put("c", "value0")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("b", "value1")); - ASSERT_OK(Put("d", "value1")); - ASSERT_OK(Flush()); - port::Thread thread([this]() { - Options opts; - opts.env = env_; - opts.max_open_files = -1; - OpenSecondary(opts); - }); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - thread.join(); - ASSERT_TRUE(called); -} - -TEST_F(DBSecondaryTest, StartFromInconsistent) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "value")); - ASSERT_OK(Flush()); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "VersionBuilder::CheckConsistencyBeforeReturn", [&](void* arg) { - ASSERT_NE(nullptr, arg); - *(reinterpret_cast(arg)) = - Status::Corruption("Inject corruption"); - }); - SyncPoint::GetInstance()->EnableProcessing(); - Options options1; - options1.env = env_; - Status s = TryOpenSecondary(options1); - ASSERT_TRUE(s.IsCorruption()); -} - -TEST_F(DBSecondaryTest, InconsistencyDuringCatchUp) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "value")); - ASSERT_OK(Flush()); - - Options options1; - options1.env = env_; - OpenSecondary(options1); - - { - std::string value; - ASSERT_OK(db_secondary_->Get(ReadOptions(), "foo", &value)); - ASSERT_EQ("value", value); - } - - ASSERT_OK(Put("bar", "value1")); - ASSERT_OK(Flush()); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "VersionBuilder::CheckConsistencyBeforeReturn", [&](void* arg) { - ASSERT_NE(nullptr, arg); - *(reinterpret_cast(arg)) = - Status::Corruption("Inject corruption"); - }); - SyncPoint::GetInstance()->EnableProcessing(); - Status s = db_secondary_->TryCatchUpWithPrimary(); - ASSERT_TRUE(s.IsCorruption()); -} - -TEST_F(DBSecondaryTest, OpenWithTransactionDB) { - Options options = CurrentOptions(); - options.create_if_missing = true; - - // Destroy the DB to recreate as a TransactionDB. - Close(); - Destroy(options, true); - - // Create a TransactionDB. - TransactionDB* txn_db = nullptr; - TransactionDBOptions txn_db_opts; - ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, &txn_db)); - ASSERT_NE(txn_db, nullptr); - db_ = txn_db; - - std::vector cfs = {"new_CF"}; - CreateColumnFamilies(cfs, options); - ASSERT_EQ(handles_.size(), 1); - - WriteOptions wopts; - TransactionOptions txn_opts; - Transaction* txn1 = txn_db->BeginTransaction(wopts, txn_opts, nullptr); - ASSERT_NE(txn1, nullptr); - ASSERT_OK(txn1->Put(handles_[0], "k1", "v1")); - ASSERT_OK(txn1->Commit()); - delete txn1; - - options = CurrentOptions(); - options.max_open_files = -1; - ASSERT_OK(TryOpenSecondary(options)); -} - -class DBSecondaryTestWithTimestamp : public DBSecondaryTestBase { - public: - explicit DBSecondaryTestWithTimestamp() - : DBSecondaryTestBase("db_secondary_test_with_timestamp") {} -}; -TEST_F(DBSecondaryTestWithTimestamp, IteratorAndGetReadTimestampSizeMismatch) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::string write_timestamp = Timestamp(1, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamp, - "value" + std::to_string(key)); - ASSERT_OK(s); - } - - // Reopen the database as secondary instance to test its timestamp support. - Close(); - options.max_open_files = -1; - ASSERT_OK(ReopenAsSecondary(options)); - - ReadOptions read_opts; - std::string different_size_read_timestamp; - PutFixed32(&different_size_read_timestamp, 2); - Slice different_size_read_ts = different_size_read_timestamp; - read_opts.timestamp = &different_size_read_ts; - { - std::unique_ptr iter(db_->NewIterator(read_opts)); - ASSERT_FALSE(iter->Valid()); - ASSERT_TRUE(iter->status().IsInvalidArgument()); - } - - for (uint64_t key = 0; key <= kMaxKey; ++key) { - std::string value_from_get; - std::string timestamp; - ASSERT_TRUE(db_->Get(read_opts, Key1(key), &value_from_get, ×tamp) - .IsInvalidArgument()); - } - - Close(); -} - -TEST_F(DBSecondaryTestWithTimestamp, - IteratorAndGetReadTimestampSpecifiedWithoutWriteTimestamp) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(key)); - ASSERT_OK(s); - } - - // Reopen the database as secondary instance to test its timestamp support. - Close(); - options.max_open_files = -1; - ASSERT_OK(ReopenAsSecondary(options)); - - ReadOptions read_opts; - const std::string read_timestamp = Timestamp(2, 0); - Slice read_ts = read_timestamp; - read_opts.timestamp = &read_ts; - { - std::unique_ptr iter(db_->NewIterator(read_opts)); - ASSERT_FALSE(iter->Valid()); - ASSERT_TRUE(iter->status().IsInvalidArgument()); - } - - for (uint64_t key = 0; key <= kMaxKey; ++key) { - std::string value_from_get; - std::string timestamp; - ASSERT_TRUE(db_->Get(read_opts, Key1(key), &value_from_get, ×tamp) - .IsInvalidArgument()); - } - - Close(); -} - -TEST_F(DBSecondaryTestWithTimestamp, - IteratorAndGetWriteWithTimestampReadWithoutTimestamp) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::string write_timestamp = Timestamp(1, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamp, - "value" + std::to_string(key)); - ASSERT_OK(s); - } - - // Reopen the database as secondary instance to test its timestamp support. - Close(); - options.max_open_files = -1; - ASSERT_OK(ReopenAsSecondary(options)); - - ReadOptions read_opts; - { - std::unique_ptr iter(db_->NewIterator(read_opts)); - ASSERT_FALSE(iter->Valid()); - ASSERT_TRUE(iter->status().IsInvalidArgument()); - } - - for (uint64_t key = 0; key <= kMaxKey; ++key) { - std::string value_from_get; - ASSERT_TRUE( - db_->Get(read_opts, Key1(key), &value_from_get).IsInvalidArgument()); - } - - Close(); -} - -TEST_F(DBSecondaryTestWithTimestamp, IteratorAndGet) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::vector start_keys = {1, 0}; - const std::vector write_timestamps = {Timestamp(1, 0), - Timestamp(3, 0)}; - const std::vector read_timestamps = {Timestamp(2, 0), - Timestamp(4, 0)}; - for (size_t i = 0; i < write_timestamps.size(); ++i) { - WriteOptions write_opts; - for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamps[i], - "value" + std::to_string(i)); - ASSERT_OK(s); - } - } - - // Reopen the database as secondary instance to test its timestamp support. - Close(); - options.max_open_files = -1; - ASSERT_OK(ReopenAsSecondary(options)); - - auto get_value_and_check = [](DB* db, ReadOptions read_opts, Slice key, - Slice expected_value, std::string expected_ts) { - std::string value_from_get; - std::string timestamp; - ASSERT_OK(db->Get(read_opts, key.ToString(), &value_from_get, ×tamp)); - ASSERT_EQ(expected_value, value_from_get); - ASSERT_EQ(expected_ts, timestamp); - }; - for (size_t i = 0; i < read_timestamps.size(); ++i) { - ReadOptions read_opts; - Slice read_ts = read_timestamps[i]; - read_opts.timestamp = &read_ts; - std::unique_ptr it(db_->NewIterator(read_opts)); - int count = 0; - uint64_t key = 0; - // Forward iterate. - for (it->Seek(Key1(0)), key = start_keys[i]; it->Valid(); - it->Next(), ++count, ++key) { - CheckIterUserEntry(it.get(), Key1(key), kTypeValue, - "value" + std::to_string(i), write_timestamps[i]); - get_value_and_check(db_, read_opts, it->key(), it->value(), - write_timestamps[i]); - } - size_t expected_count = kMaxKey - start_keys[i] + 1; - ASSERT_EQ(expected_count, count); - - // Backward iterate. - count = 0; - for (it->SeekForPrev(Key1(kMaxKey)), key = kMaxKey; it->Valid(); - it->Prev(), ++count, --key) { - CheckIterUserEntry(it.get(), Key1(key), kTypeValue, - "value" + std::to_string(i), write_timestamps[i]); - get_value_and_check(db_, read_opts, it->key(), it->value(), - write_timestamps[i]); - } - ASSERT_EQ(static_cast(kMaxKey) - start_keys[i] + 1, count); - - // SeekToFirst()/SeekToLast() with lower/upper bounds. - // Then iter with lower and upper bounds. - uint64_t l = 0; - uint64_t r = kMaxKey + 1; - while (l < r) { - std::string lb_str = Key1(l); - Slice lb = lb_str; - std::string ub_str = Key1(r); - Slice ub = ub_str; - read_opts.iterate_lower_bound = &lb; - read_opts.iterate_upper_bound = &ub; - it.reset(db_->NewIterator(read_opts)); - for (it->SeekToFirst(), key = std::max(l, start_keys[i]), count = 0; - it->Valid(); it->Next(), ++key, ++count) { - CheckIterUserEntry(it.get(), Key1(key), kTypeValue, - "value" + std::to_string(i), write_timestamps[i]); - get_value_and_check(db_, read_opts, it->key(), it->value(), - write_timestamps[i]); - } - ASSERT_EQ(r - std::max(l, start_keys[i]), count); - - for (it->SeekToLast(), key = std::min(r, kMaxKey + 1), count = 0; - it->Valid(); it->Prev(), --key, ++count) { - CheckIterUserEntry(it.get(), Key1(key - 1), kTypeValue, - "value" + std::to_string(i), write_timestamps[i]); - get_value_and_check(db_, read_opts, it->key(), it->value(), - write_timestamps[i]); - } - l += (kMaxKey / 100); - r -= (kMaxKey / 100); - } - } - Close(); -} - -TEST_F(DBSecondaryTestWithTimestamp, IteratorsReadTimestampSizeMismatch) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::string write_timestamp = Timestamp(1, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamp, - "value" + std::to_string(key)); - ASSERT_OK(s); - } - - // Reopen the database as secondary instance to test its timestamp support. - Close(); - options.max_open_files = -1; - ASSERT_OK(ReopenAsSecondary(options)); - - ReadOptions read_opts; - std::string different_size_read_timestamp; - PutFixed32(&different_size_read_timestamp, 2); - Slice different_size_read_ts = different_size_read_timestamp; - read_opts.timestamp = &different_size_read_ts; - { - std::vector iters; - ASSERT_TRUE( - db_->NewIterators(read_opts, {db_->DefaultColumnFamily()}, &iters) - .IsInvalidArgument()); - } - - Close(); -} - -TEST_F(DBSecondaryTestWithTimestamp, - IteratorsReadTimestampSpecifiedWithoutWriteTimestamp) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(key)); - ASSERT_OK(s); - } - - // Reopen the database as secondary instance to test its timestamp support. - Close(); - options.max_open_files = -1; - ASSERT_OK(ReopenAsSecondary(options)); - - ReadOptions read_opts; - const std::string read_timestamp = Timestamp(2, 0); - Slice read_ts = read_timestamp; - read_opts.timestamp = &read_ts; - { - std::vector iters; - ASSERT_TRUE( - db_->NewIterators(read_opts, {db_->DefaultColumnFamily()}, &iters) - .IsInvalidArgument()); - } - - Close(); -} - -TEST_F(DBSecondaryTestWithTimestamp, - IteratorsWriteWithTimestampReadWithoutTimestamp) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::string write_timestamp = Timestamp(1, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamp, - "value" + std::to_string(key)); - ASSERT_OK(s); - } - - // Reopen the database as secondary instance to test its timestamp support. - Close(); - options.max_open_files = -1; - ASSERT_OK(ReopenAsSecondary(options)); - - ReadOptions read_opts; - { - std::vector iters; - ASSERT_TRUE( - db_->NewIterators(read_opts, {db_->DefaultColumnFamily()}, &iters) - .IsInvalidArgument()); - } - - Close(); -} - -TEST_F(DBSecondaryTestWithTimestamp, Iterators) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::string write_timestamp = Timestamp(1, 0); - const std::string read_timestamp = Timestamp(2, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamp, - "value" + std::to_string(key)); - ASSERT_OK(s); - } - - // Reopen the database as secondary instance to test its timestamp support. - Close(); - options.max_open_files = -1; - ASSERT_OK(ReopenAsSecondary(options)); - - ReadOptions read_opts; - Slice read_ts = read_timestamp; - read_opts.timestamp = &read_ts; - std::vector iters; - ASSERT_OK(db_->NewIterators(read_opts, {db_->DefaultColumnFamily()}, &iters)); - ASSERT_EQ(static_cast(1), iters.size()); - - int count = 0; - uint64_t key = 0; - // Forward iterate. - for (iters[0]->Seek(Key1(0)), key = 0; iters[0]->Valid(); - iters[0]->Next(), ++count, ++key) { - CheckIterUserEntry(iters[0], Key1(key), kTypeValue, - "value" + std::to_string(key), write_timestamp); - } - - size_t expected_count = kMaxKey - 0 + 1; - ASSERT_EQ(expected_count, count); - delete iters[0]; - - Close(); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_sst_test.cc b/db/db_sst_test.cc deleted file mode 100644 index 11e7f49fa..000000000 --- a/db/db_sst_test.cc +++ /dev/null @@ -1,1864 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/db_test_util.h" -#include "env/mock_env.h" -#include "file/sst_file_manager_impl.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/cache.h" -#include "rocksdb/sst_file_manager.h" -#include "rocksdb/table.h" -#include "util/random.h" - -namespace ROCKSDB_NAMESPACE { - -class DBSSTTest : public DBTestBase { - public: - DBSSTTest() : DBTestBase("db_sst_test", /*env_do_fsync=*/true) {} -}; - -// A class which remembers the name of each flushed file. -class FlushedFileCollector : public EventListener { - public: - FlushedFileCollector() {} - ~FlushedFileCollector() override {} - - void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override { - std::lock_guard lock(mutex_); - flushed_files_.push_back(info.file_path); - } - - std::vector GetFlushedFiles() { - std::lock_guard lock(mutex_); - std::vector result; - for (auto fname : flushed_files_) { - result.push_back(fname); - } - return result; - } - void ClearFlushedFiles() { - std::lock_guard lock(mutex_); - flushed_files_.clear(); - } - - private: - std::vector flushed_files_; - std::mutex mutex_; -}; - -TEST_F(DBSSTTest, DontDeletePendingOutputs) { - Options options; - options.env = env_; - options.create_if_missing = true; - DestroyAndReopen(options); - - // Every time we write to a table file, call FOF/POF with full DB scan. This - // will make sure our pending_outputs_ protection work correctly - std::function purge_obsolete_files_function = [&]() { - JobContext job_context(0); - dbfull()->TEST_LockMutex(); - dbfull()->FindObsoleteFiles(&job_context, true /*force*/); - dbfull()->TEST_UnlockMutex(); - dbfull()->PurgeObsoleteFiles(job_context); - job_context.Clean(); - }; - - env_->table_write_callback_ = &purge_obsolete_files_function; - - for (int i = 0; i < 2; ++i) { - ASSERT_OK(Put("a", "begin")); - ASSERT_OK(Put("z", "end")); - ASSERT_OK(Flush()); - } - - // If pending output guard does not work correctly, PurgeObsoleteFiles() will - // delete the file that Compaction is trying to create, causing this: error - // db/db_test.cc:975: IO error: - // /tmp/rocksdbtest-1552237650/db_test/000009.sst: No such file or directory - Compact("a", "b"); -} - -// 1 Create some SST files by inserting K-V pairs into DB -// 2 Close DB and change suffix from ".sst" to ".ldb" for every other SST file -// 3 Open DB and check if all key can be read -TEST_F(DBSSTTest, SSTsWithLdbSuffixHandling) { - Options options = CurrentOptions(); - options.write_buffer_size = 110 << 10; // 110KB - options.num_levels = 4; - DestroyAndReopen(options); - - Random rnd(301); - int key_id = 0; - for (int i = 0; i < 10; ++i) { - GenerateNewFile(&rnd, &key_id, false); - } - ASSERT_OK(Flush()); - Close(); - int const num_files = GetSstFileCount(dbname_); - ASSERT_GT(num_files, 0); - - Reopen(options); - std::vector values; - values.reserve(key_id); - for (int k = 0; k < key_id; ++k) { - values.push_back(Get(Key(k))); - } - Close(); - - std::vector filenames; - GetSstFiles(env_, dbname_, &filenames); - int num_ldb_files = 0; - for (size_t i = 0; i < filenames.size(); ++i) { - if (i & 1) { - continue; - } - std::string const rdb_name = dbname_ + "/" + filenames[i]; - std::string const ldb_name = Rocks2LevelTableFileName(rdb_name); - ASSERT_TRUE(env_->RenameFile(rdb_name, ldb_name).ok()); - ++num_ldb_files; - } - ASSERT_GT(num_ldb_files, 0); - ASSERT_EQ(num_files, GetSstFileCount(dbname_)); - - Reopen(options); - for (int k = 0; k < key_id; ++k) { - ASSERT_EQ(values[k], Get(Key(k))); - } - Destroy(options); -} - -// Check that we don't crash when opening DB with -// DBOptions::skip_checking_sst_file_sizes_on_db_open = true. -TEST_F(DBSSTTest, SkipCheckingSSTFileSizesOnDBOpen) { - ASSERT_OK(Put("pika", "choo")); - ASSERT_OK(Flush()); - - // Just open the DB with the option set to true and check that we don't crash. - Options options; - options.env = env_; - options.skip_checking_sst_file_sizes_on_db_open = true; - Reopen(options); - - ASSERT_EQ("choo", Get("pika")); -} - -TEST_F(DBSSTTest, DontDeleteMovedFile) { - // This test triggers move compaction and verifies that the file is not - // deleted when it's part of move compaction - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.max_bytes_for_level_base = 1024 * 1024; // 1 MB - options.level0_file_num_compaction_trigger = - 2; // trigger compaction when we have 2 files - DestroyAndReopen(options); - - Random rnd(301); - // Create two 1MB sst files - for (int i = 0; i < 2; ++i) { - // Create 1MB sst file - for (int j = 0; j < 100; ++j) { - ASSERT_OK(Put(Key(i * 50 + j), rnd.RandomString(10 * 1024))); - } - ASSERT_OK(Flush()); - } - // this should execute both L0->L1 and L1->(move)->L2 compactions - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("0,0,1", FilesPerLevel(0)); - - // If the moved file is actually deleted (the move-safeguard in - // ~Version::Version() is not there), we get this failure: - // Corruption: Can't access /000009.sst - Reopen(options); -} - -// This reproduces a bug where we don't delete a file because when it was -// supposed to be deleted, it was blocked by pending_outputs -// Consider: -// 1. current file_number is 13 -// 2. compaction (1) starts, blocks deletion of all files starting with 13 -// (pending outputs) -// 3. file 13 is created by compaction (2) -// 4. file 13 is consumed by compaction (3) and file 15 was created. Since file -// 13 has no references, it is put into VersionSet::obsolete_files_ -// 5. FindObsoleteFiles() gets file 13 from VersionSet::obsolete_files_. File 13 -// is deleted from obsolete_files_ set. -// 6. PurgeObsoleteFiles() tries to delete file 13, but this file is blocked by -// pending outputs since compaction (1) is still running. It is not deleted and -// it is not present in obsolete_files_ anymore. Therefore, we never delete it. -TEST_F(DBSSTTest, DeleteObsoleteFilesPendingOutputs) { - Options options = CurrentOptions(); - options.env = env_; - options.write_buffer_size = 2 * 1024 * 1024; // 2 MB - options.max_bytes_for_level_base = 1024 * 1024; // 1 MB - options.level0_file_num_compaction_trigger = - 2; // trigger compaction when we have 2 files - options.max_background_flushes = 2; - options.max_background_compactions = 2; - - OnFileDeletionListener* listener = new OnFileDeletionListener(); - options.listeners.emplace_back(listener); - - Reopen(options); - - Random rnd(301); - // Create two 1MB sst files - for (int i = 0; i < 2; ++i) { - // Create 1MB sst file - for (int j = 0; j < 100; ++j) { - ASSERT_OK(Put(Key(i * 50 + j), rnd.RandomString(10 * 1024))); - } - ASSERT_OK(Flush()); - } - // this should execute both L0->L1 and L1->(move)->L2 compactions - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("0,0,1", FilesPerLevel(0)); - - test::SleepingBackgroundTask blocking_thread; - port::Mutex mutex_; - bool already_blocked(false); - - // block the flush - std::function block_first_time = [&]() { - bool blocking = false; - { - MutexLock l(&mutex_); - if (!already_blocked) { - blocking = true; - already_blocked = true; - } - } - if (blocking) { - blocking_thread.DoSleep(); - } - }; - env_->table_write_callback_ = &block_first_time; - // Insert 2.5MB data, which should trigger a flush because we exceed - // write_buffer_size. The flush will be blocked with block_first_time - // pending_file is protecting all the files created after - for (int j = 0; j < 256; ++j) { - ASSERT_OK(Put(Key(j), rnd.RandomString(10 * 1024))); - } - blocking_thread.WaitUntilSleeping(); - - ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr)); - - ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - ASSERT_EQ(metadata.size(), 1U); - auto file_on_L2 = metadata[0].name; - listener->SetExpectedFileName(dbname_ + file_on_L2); - - ASSERT_OK(dbfull()->TEST_CompactRange(3, nullptr, nullptr, nullptr, - true /* disallow trivial move */)); - ASSERT_EQ("0,0,0,0,1", FilesPerLevel(0)); - - // finish the flush! - blocking_thread.WakeUp(); - blocking_thread.WaitUntilDone(); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - // File just flushed is too big for L0 and L1 so gets moved to L2. - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("0,0,1,0,1", FilesPerLevel(0)); - - metadata.clear(); - db_->GetLiveFilesMetaData(&metadata); - ASSERT_EQ(metadata.size(), 2U); - - // This file should have been deleted during last compaction - ASSERT_EQ(Status::NotFound(), env_->FileExists(dbname_ + file_on_L2)); - listener->VerifyMatchedCount(1); -} - -// Test that producing an empty .sst file does not write it out to -// disk, and that the DeleteFile() env method is not called for -// removing the non-existing file later. -TEST_F(DBSSTTest, DeleteFileNotCalledForNotCreatedSSTFile) { - Options options = CurrentOptions(); - options.env = env_; - - OnFileDeletionListener* listener = new OnFileDeletionListener(); - options.listeners.emplace_back(listener); - - Reopen(options); - - // Flush the empty database. - ASSERT_OK(Flush()); - ASSERT_EQ("", FilesPerLevel(0)); - - // We expect no .sst files. - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - ASSERT_EQ(metadata.size(), 0U); - - // We expect no file deletions. - listener->VerifyMatchedCount(0); -} - -// Test that producing a non-empty .sst file does write it out to -// disk, and that the DeleteFile() env method is not called for removing -// the file later. -TEST_F(DBSSTTest, DeleteFileNotCalledForCreatedSSTFile) { - Options options = CurrentOptions(); - options.env = env_; - - OnFileDeletionListener* listener = new OnFileDeletionListener(); - options.listeners.emplace_back(listener); - - Reopen(options); - - ASSERT_OK(Put("pika", "choo")); - - // Flush the non-empty database. - ASSERT_OK(Flush()); - ASSERT_EQ("1", FilesPerLevel(0)); - - // We expect 1 .sst files. - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - ASSERT_EQ(metadata.size(), 1U); - - // We expect no file deletions. - listener->VerifyMatchedCount(0); -} - -TEST_F(DBSSTTest, DBWithSstFileManager) { - std::shared_ptr sst_file_manager(NewSstFileManager(env_)); - auto sfm = static_cast(sst_file_manager.get()); - - int files_added = 0; - int files_deleted = 0; - int files_moved = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::OnAddFile", [&](void* /*arg*/) { files_added++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::OnDeleteFile", - [&](void* /*arg*/) { files_deleted++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::OnMoveFile", [&](void* /*arg*/) { files_moved++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.sst_file_manager = sst_file_manager; - DestroyAndReopen(options); - - Random rnd(301); - for (int i = 0; i < 25; i++) { - GenerateNewRandomFile(&rnd); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Verify that we are tracking all sst files in dbname_ - std::unordered_map files_in_db; - ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db)); - ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); - } - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - std::unordered_map files_in_db; - ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db)); - // Verify that we are tracking all sst files in dbname_ - ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); - // Verify the total files size - uint64_t total_files_size = 0; - for (auto& file_to_size : files_in_db) { - total_files_size += file_to_size.second; - } - ASSERT_EQ(sfm->GetTotalSize(), total_files_size); - // We flushed at least 25 files - ASSERT_GE(files_added, 25); - // Compaction must have deleted some files - ASSERT_GT(files_deleted, 0); - // No files were moved - ASSERT_EQ(files_moved, 0); - - Close(); - Reopen(options); - ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); - ASSERT_EQ(sfm->GetTotalSize(), total_files_size); - - // Verify that we track all the files again after the DB is closed and opened - Close(); - sst_file_manager.reset(NewSstFileManager(env_)); - options.sst_file_manager = sst_file_manager; - sfm = static_cast(sst_file_manager.get()); - - Reopen(options); - ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); - ASSERT_EQ(sfm->GetTotalSize(), total_files_size); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBSSTTest, DBWithSstFileManagerForBlobFiles) { - std::shared_ptr sst_file_manager(NewSstFileManager(env_)); - auto sfm = static_cast(sst_file_manager.get()); - - int files_added = 0; - int files_deleted = 0; - int files_moved = 0; - int files_scheduled_to_delete = 0; - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::OnAddFile", [&](void* arg) { - const std::string* const file_path = - static_cast(arg); - if (file_path->find(".blob") != std::string::npos) { - files_added++; - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::OnDeleteFile", [&](void* arg) { - const std::string* const file_path = - static_cast(arg); - if (file_path->find(".blob") != std::string::npos) { - files_deleted++; - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::ScheduleFileDeletion", [&](void* arg) { - assert(arg); - const std::string* const file_path = - static_cast(arg); - if (file_path->find(".blob") != std::string::npos) { - ++files_scheduled_to_delete; - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::OnMoveFile", [&](void* /*arg*/) { files_moved++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.sst_file_manager = sst_file_manager; - options.enable_blob_files = true; - options.blob_file_size = 32; // create one blob per file - DestroyAndReopen(options); - Random rnd(301); - - for (int i = 0; i < 10; i++) { - ASSERT_OK(Put("Key_" + std::to_string(i), "Value_" + std::to_string(i))); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // Verify that we are tracking all sst and blob files in dbname_ - std::unordered_map files_in_db; - ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db)); - ASSERT_OK(GetAllDataFiles(kBlobFile, &files_in_db)); - ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); - } - - std::vector blob_files = GetBlobFileNumbers(); - ASSERT_EQ(files_added, blob_files.size()); - // No blob file is obsoleted. - ASSERT_EQ(files_deleted, 0); - ASSERT_EQ(files_scheduled_to_delete, 0); - // No files were moved. - ASSERT_EQ(files_moved, 0); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - std::unordered_map files_in_db; - ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db)); - ASSERT_OK(GetAllDataFiles(kBlobFile, &files_in_db)); - - // Verify that we are tracking all sst and blob files in dbname_ - ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); - // Verify the total files size - uint64_t total_files_size = 0; - for (auto& file_to_size : files_in_db) { - total_files_size += file_to_size.second; - } - ASSERT_EQ(sfm->GetTotalSize(), total_files_size); - Close(); - - Reopen(options); - ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); - ASSERT_EQ(sfm->GetTotalSize(), total_files_size); - - // Verify that we track all the files again after the DB is closed and opened. - Close(); - - sst_file_manager.reset(NewSstFileManager(env_)); - options.sst_file_manager = sst_file_manager; - sfm = static_cast(sst_file_manager.get()); - - Reopen(options); - - ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); - ASSERT_EQ(sfm->GetTotalSize(), total_files_size); - - // Destroy DB and it will remove all the blob files from sst file manager and - // blob files deletion will go through ScheduleFileDeletion. - ASSERT_EQ(files_deleted, 0); - ASSERT_EQ(files_scheduled_to_delete, 0); - Close(); - ASSERT_OK(DestroyDB(dbname_, options)); - ASSERT_EQ(files_deleted, blob_files.size()); - ASSERT_EQ(files_scheduled_to_delete, blob_files.size()); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_F(DBSSTTest, DBWithSstFileManagerForBlobFilesWithGC) { - std::shared_ptr sst_file_manager(NewSstFileManager(env_)); - auto sfm = static_cast(sst_file_manager.get()); - Options options = CurrentOptions(); - options.sst_file_manager = sst_file_manager; - options.enable_blob_files = true; - options.blob_file_size = 32; // create one blob per file - options.disable_auto_compactions = true; - options.enable_blob_garbage_collection = true; - options.blob_garbage_collection_age_cutoff = 0.5; - - int files_added = 0; - int files_deleted = 0; - int files_moved = 0; - int files_scheduled_to_delete = 0; - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::OnAddFile", [&](void* arg) { - const std::string* const file_path = - static_cast(arg); - if (file_path->find(".blob") != std::string::npos) { - files_added++; - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::OnDeleteFile", [&](void* arg) { - const std::string* const file_path = - static_cast(arg); - if (file_path->find(".blob") != std::string::npos) { - files_deleted++; - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::ScheduleFileDeletion", [&](void* arg) { - assert(arg); - const std::string* const file_path = - static_cast(arg); - if (file_path->find(".blob") != std::string::npos) { - ++files_scheduled_to_delete; - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::OnMoveFile", [&](void* /*arg*/) { files_moved++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - DestroyAndReopen(options); - Random rnd(301); - - constexpr char first_key[] = "first_key"; - constexpr char first_value[] = "first_value"; - constexpr char second_key[] = "second_key"; - constexpr char second_value[] = "second_value"; - - ASSERT_OK(Put(first_key, first_value)); - ASSERT_OK(Put(second_key, second_value)); - ASSERT_OK(Flush()); - - constexpr char third_key[] = "third_key"; - constexpr char third_value[] = "third_value"; - constexpr char fourth_key[] = "fourth_key"; - constexpr char fourth_value[] = "fourth_value"; - constexpr char fifth_key[] = "fifth_key"; - constexpr char fifth_value[] = "fifth_value"; - - ASSERT_OK(Put(third_key, third_value)); - ASSERT_OK(Put(fourth_key, fourth_value)); - ASSERT_OK(Put(fifth_key, fifth_value)); - ASSERT_OK(Flush()); - - const std::vector original_blob_files = GetBlobFileNumbers(); - - ASSERT_EQ(original_blob_files.size(), 5); - ASSERT_EQ(files_added, 5); - ASSERT_EQ(files_deleted, 0); - ASSERT_EQ(files_scheduled_to_delete, 0); - ASSERT_EQ(files_moved, 0); - { - // Verify that we are tracking all sst and blob files in dbname_ - std::unordered_map files_in_db; - ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db)); - ASSERT_OK(GetAllDataFiles(kBlobFile, &files_in_db)); - ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); - } - - const size_t cutoff_index = static_cast( - options.blob_garbage_collection_age_cutoff * original_blob_files.size()); - - size_t expected_number_of_files = original_blob_files.size(); - // Note: turning off enable_blob_files before the compaction results in - // garbage collected values getting inlined. - ASSERT_OK(db_->SetOptions({{"enable_blob_files", "false"}})); - expected_number_of_files -= cutoff_index; - files_added = 0; - - constexpr Slice* begin = nullptr; - constexpr Slice* end = nullptr; - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - sfm->WaitForEmptyTrash(); - - ASSERT_EQ(Get(first_key), first_value); - ASSERT_EQ(Get(second_key), second_value); - ASSERT_EQ(Get(third_key), third_value); - ASSERT_EQ(Get(fourth_key), fourth_value); - ASSERT_EQ(Get(fifth_key), fifth_value); - - const std::vector new_blob_files = GetBlobFileNumbers(); - - ASSERT_EQ(new_blob_files.size(), expected_number_of_files); - // No new file is added. - ASSERT_EQ(files_added, 0); - ASSERT_EQ(files_deleted, cutoff_index); - ASSERT_EQ(files_scheduled_to_delete, cutoff_index); - ASSERT_EQ(files_moved, 0); - - // Original blob files below the cutoff should be gone, original blob files at - // or above the cutoff should be still there - for (size_t i = cutoff_index; i < original_blob_files.size(); ++i) { - ASSERT_EQ(new_blob_files[i - cutoff_index], original_blob_files[i]); - } - - { - // Verify that we are tracking all sst and blob files in dbname_ - std::unordered_map files_in_db; - ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db)); - ASSERT_OK(GetAllDataFiles(kBlobFile, &files_in_db)); - ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); - } - - Close(); - ASSERT_OK(DestroyDB(dbname_, options)); - sfm->WaitForEmptyTrash(); - ASSERT_EQ(files_deleted, 5); - ASSERT_EQ(files_scheduled_to_delete, 5); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -class DBSSTTestRateLimit : public DBSSTTest, - public ::testing::WithParamInterface { - public: - DBSSTTestRateLimit() : DBSSTTest() {} - ~DBSSTTestRateLimit() override {} -}; - -TEST_P(DBSSTTestRateLimit, RateLimitedDelete) { - Destroy(last_options_); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"DBSSTTest::RateLimitedDelete:1", - "DeleteScheduler::BackgroundEmptyTrash"}, - }); - - std::vector penalties; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::BackgroundEmptyTrash:Wait", - [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "InstrumentedCondVar::TimedWaitInternal", [&](void* arg) { - // Turn timed wait into a simulated sleep - uint64_t* abs_time_us = static_cast(arg); - uint64_t cur_time = env_->NowMicros(); - if (*abs_time_us > cur_time) { - env_->MockSleepForMicroseconds(*abs_time_us - cur_time); - } - - // Plus an additional short, random amount - env_->MockSleepForMicroseconds(Random::GetTLSInstance()->Uniform(10)); - - // Set wait until time to before (actual) current time to force not - // to sleep - *abs_time_us = Env::Default()->NowMicros(); - }); - - // Disable PeriodicTaskScheduler as it also has TimedWait, which could update - // the simulated sleep time - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::StartPeriodicTaskScheduler:DisableScheduler", [&](void* arg) { - bool* disable_scheduler = static_cast(arg); - *disable_scheduler = true; - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - bool different_wal_dir = GetParam(); - Options options = CurrentOptions(); - SetTimeElapseOnlySleepOnReopen(&options); - options.disable_auto_compactions = true; - options.env = env_; - options.statistics = CreateDBStatistics(); - if (different_wal_dir) { - options.wal_dir = alternative_wal_dir_; - } - - int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec - Status s; - options.sst_file_manager.reset( - NewSstFileManager(env_, nullptr, "", 0, false, &s, 0)); - ASSERT_OK(s); - options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec); - auto sfm = static_cast(options.sst_file_manager.get()); - sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1); - - WriteOptions wo; - if (!different_wal_dir) { - wo.disableWAL = true; - } - Reopen(options); - // Create 4 files in L0 - for (char v = 'a'; v <= 'd'; v++) { - ASSERT_OK(Put("Key2", DummyString(1024, v), wo)); - ASSERT_OK(Put("Key3", DummyString(1024, v), wo)); - ASSERT_OK(Put("Key4", DummyString(1024, v), wo)); - ASSERT_OK(Put("Key1", DummyString(1024, v), wo)); - ASSERT_OK(Put("Key4", DummyString(1024, v), wo)); - ASSERT_OK(Flush()); - } - // We created 4 sst files in L0 - ASSERT_EQ("4", FilesPerLevel(0)); - - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - - // Compaction will move the 4 files in L0 to trash and create 1 L1 file - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); - ASSERT_EQ("0,1", FilesPerLevel(0)); - - uint64_t delete_start_time = env_->NowMicros(); - // Hold BackgroundEmptyTrash - TEST_SYNC_POINT("DBSSTTest::RateLimitedDelete:1"); - sfm->WaitForEmptyTrash(); - uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time; - - uint64_t total_files_size = 0; - uint64_t expected_penlty = 0; - ASSERT_EQ(penalties.size(), metadata.size()); - for (size_t i = 0; i < metadata.size(); i++) { - total_files_size += metadata[i].size; - expected_penlty = ((total_files_size * 1000000) / rate_bytes_per_sec); - ASSERT_EQ(expected_penlty, penalties[i]); - } - ASSERT_GT(time_spent_deleting, expected_penlty * 0.9); - ASSERT_LT(time_spent_deleting, expected_penlty * 1.1); - ASSERT_EQ(4, options.statistics->getAndResetTickerCount(FILES_MARKED_TRASH)); - ASSERT_EQ( - 0, options.statistics->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -INSTANTIATE_TEST_CASE_P(RateLimitedDelete, DBSSTTestRateLimit, - ::testing::Bool()); - -TEST_F(DBSSTTest, RateLimitedWALDelete) { - Destroy(last_options_); - - std::vector penalties; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::BackgroundEmptyTrash:Wait", - [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.compression = kNoCompression; - options.env = env_; - - int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec - Status s; - options.sst_file_manager.reset( - NewSstFileManager(env_, nullptr, "", 0, false, &s, 0)); - ASSERT_OK(s); - options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec); - auto sfm = static_cast(options.sst_file_manager.get()); - sfm->delete_scheduler()->SetMaxTrashDBRatio(3.1); - SetTimeElapseOnlySleepOnReopen(&options); - - ASSERT_OK(TryReopen(options)); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Create 4 files in L0 - for (char v = 'a'; v <= 'd'; v++) { - ASSERT_OK(Put("Key2", DummyString(1024, v))); - ASSERT_OK(Put("Key3", DummyString(1024, v))); - ASSERT_OK(Put("Key4", DummyString(1024, v))); - ASSERT_OK(Put("Key1", DummyString(1024, v))); - ASSERT_OK(Put("Key4", DummyString(1024, v))); - ASSERT_OK(Flush()); - } - // We created 4 sst files in L0 - ASSERT_EQ("4", FilesPerLevel(0)); - - // Compaction will move the 4 files in L0 to trash and create 1 L1 file - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); - ASSERT_EQ("0,1", FilesPerLevel(0)); - - sfm->WaitForEmptyTrash(); - ASSERT_EQ(penalties.size(), 8); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -class DBWALTestWithParam - : public DBTestBase, - public testing::WithParamInterface> { - public: - explicit DBWALTestWithParam() - : DBTestBase("db_wal_test_with_params", /*env_do_fsync=*/true) { - wal_dir_ = std::get<0>(GetParam()); - wal_dir_same_as_dbname_ = std::get<1>(GetParam()); - } - - std::string wal_dir_; - bool wal_dir_same_as_dbname_; -}; - -TEST_P(DBWALTestWithParam, WALTrashCleanupOnOpen) { - class MyEnv : public EnvWrapper { - public: - MyEnv(Env* t) : EnvWrapper(t), fake_log_delete(false) {} - const char* Name() const override { return "MyEnv"; } - Status DeleteFile(const std::string& fname) override { - if (fname.find(".log.trash") != std::string::npos && fake_log_delete) { - return Status::OK(); - } - - return target()->DeleteFile(fname); - } - - void set_fake_log_delete(bool fake) { fake_log_delete = fake; } - - private: - bool fake_log_delete; - }; - - std::unique_ptr env(new MyEnv(env_)); - Destroy(last_options_); - - env->set_fake_log_delete(true); - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.compression = kNoCompression; - options.env = env.get(); - options.wal_dir = dbname_ + wal_dir_; - - int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec - Status s; - options.sst_file_manager.reset( - NewSstFileManager(env_, nullptr, "", 0, false, &s, 0)); - ASSERT_OK(s); - options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec); - auto sfm = static_cast(options.sst_file_manager.get()); - sfm->delete_scheduler()->SetMaxTrashDBRatio(3.1); - - Reopen(options); - - // Create 4 files in L0 - for (char v = 'a'; v <= 'd'; v++) { - if (v == 'c') { - // Maximize the change that the last log file will be preserved in trash - // before restarting the DB. - // We have to set this on the 2nd to last file for it to delay deletion - // on the last file. (Quirk of DeleteScheduler::BackgroundEmptyTrash()) - options.sst_file_manager->SetDeleteRateBytesPerSecond(1); - } - ASSERT_OK(Put("Key2", DummyString(1024, v))); - ASSERT_OK(Put("Key3", DummyString(1024, v))); - ASSERT_OK(Put("Key4", DummyString(1024, v))); - ASSERT_OK(Put("Key1", DummyString(1024, v))); - ASSERT_OK(Put("Key4", DummyString(1024, v))); - ASSERT_OK(Flush()); - } - // We created 4 sst files in L0 - ASSERT_EQ("4", FilesPerLevel(0)); - - Close(); - - options.sst_file_manager.reset(); - std::vector filenames; - int trash_log_count = 0; - if (!wal_dir_same_as_dbname_) { - // Forcibly create some trash log files - std::unique_ptr result; - ASSERT_OK(env->NewWritableFile(options.wal_dir + "/1000.log.trash", &result, - EnvOptions())); - result.reset(); - } - ASSERT_OK(env->GetChildren(options.wal_dir, &filenames)); - for (const std::string& fname : filenames) { - if (fname.find(".log.trash") != std::string::npos) { - trash_log_count++; - } - } - ASSERT_GE(trash_log_count, 1); - - env->set_fake_log_delete(false); - Reopen(options); - - filenames.clear(); - trash_log_count = 0; - ASSERT_OK(env->GetChildren(options.wal_dir, &filenames)); - for (const std::string& fname : filenames) { - if (fname.find(".log.trash") != std::string::npos) { - trash_log_count++; - } - } - ASSERT_EQ(trash_log_count, 0); - Close(); -} - -INSTANTIATE_TEST_CASE_P(DBWALTestWithParam, DBWALTestWithParam, - ::testing::Values(std::make_tuple("", true), - std::make_tuple("_wal_dir", false))); - -TEST_F(DBSSTTest, OpenDBWithExistingTrash) { - Options options = CurrentOptions(); - - options.sst_file_manager.reset( - NewSstFileManager(env_, nullptr, "", 1024 * 1024 /* 1 MB/sec */)); - auto sfm = static_cast(options.sst_file_manager.get()); - - Destroy(last_options_); - - // Add some trash files to the db directory so the DB can clean them up - ASSERT_OK(env_->CreateDirIfMissing(dbname_)); - ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "001.sst.trash")); - ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "002.sst.trash")); - ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "003.sst.trash")); - - // Reopen the DB and verify that it deletes existing trash files - Reopen(options); - sfm->WaitForEmptyTrash(); - ASSERT_NOK(env_->FileExists(dbname_ + "/" + "001.sst.trash")); - ASSERT_NOK(env_->FileExists(dbname_ + "/" + "002.sst.trash")); - ASSERT_NOK(env_->FileExists(dbname_ + "/" + "003.sst.trash")); -} - -// Create a DB with 2 db_paths, and generate multiple files in the 2 -// db_paths using CompactRangeOptions, make sure that files that were -// deleted from first db_path were deleted using DeleteScheduler and -// files in the second path were not. -TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) { - std::atomic bg_delete_file(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteTrashFile:DeleteFile", - [&](void* /*arg*/) { bg_delete_file++; }); - // The deletion scheduler sometimes skips marking file as trash according to - // a heuristic. In that case the deletion will go through the below SyncPoint. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteFile", [&](void* /*arg*/) { bg_delete_file++; }); - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.db_paths.emplace_back(dbname_, 1024 * 100); - options.db_paths.emplace_back(dbname_ + "_2", 1024 * 100); - options.env = env_; - - int64_t rate_bytes_per_sec = 1024 * 1024; // 1 Mb / Sec - Status s; - options.sst_file_manager.reset( - NewSstFileManager(env_, nullptr, "", rate_bytes_per_sec, false, &s, - /* max_trash_db_ratio= */ 1.1)); - - ASSERT_OK(s); - auto sfm = static_cast(options.sst_file_manager.get()); - - DestroyAndReopen(options); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - WriteOptions wo; - wo.disableWAL = true; - - // Create 4 files in L0 - for (int i = 0; i < 4; i++) { - ASSERT_OK(Put("Key" + std::to_string(i), DummyString(1024, 'A'), wo)); - ASSERT_OK(Flush()); - } - // We created 4 sst files in L0 - ASSERT_EQ("4", FilesPerLevel(0)); - // Compaction will delete files from L0 in first db path and generate a new - // file in L1 in second db path - CompactRangeOptions compact_options; - compact_options.target_path_id = 1; - Slice begin("Key0"); - Slice end("Key3"); - ASSERT_OK(db_->CompactRange(compact_options, &begin, &end)); - ASSERT_EQ("0,1", FilesPerLevel(0)); - - // Create 4 files in L0 - for (int i = 4; i < 8; i++) { - ASSERT_OK(Put("Key" + std::to_string(i), DummyString(1024, 'B'), wo)); - ASSERT_OK(Flush()); - } - ASSERT_EQ("4,1", FilesPerLevel(0)); - - // Compaction will delete files from L0 in first db path and generate a new - // file in L1 in second db path - begin = "Key4"; - end = "Key7"; - ASSERT_OK(db_->CompactRange(compact_options, &begin, &end)); - ASSERT_EQ("0,2", FilesPerLevel(0)); - - sfm->WaitForEmptyTrash(); - ASSERT_EQ(bg_delete_file, 8); - - // Compaction will delete both files and regenerate a file in L1 in second - // db path. The deleted files should still be cleaned up via delete scheduler. - compact_options.bottommost_level_compaction = - BottommostLevelCompaction::kForceOptimized; - ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - ASSERT_EQ("0,1", FilesPerLevel(0)); - - sfm->WaitForEmptyTrash(); - ASSERT_EQ(bg_delete_file, 10); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBSSTTest, DestroyDBWithRateLimitedDelete) { - int bg_delete_file = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteTrashFile:DeleteFile", - [&](void* /*arg*/) { bg_delete_file++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Status s; - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.env = env_; - options.sst_file_manager.reset( - NewSstFileManager(env_, nullptr, "", 0, false, &s, 0)); - ASSERT_OK(s); - DestroyAndReopen(options); - - // Create 4 files in L0 - for (int i = 0; i < 4; i++) { - ASSERT_OK(Put("Key" + std::to_string(i), DummyString(1024, 'A'))); - ASSERT_OK(Flush()); - } - // We created 4 sst files in L0 - ASSERT_EQ("4", FilesPerLevel(0)); - - // Close DB and destroy it using DeleteScheduler - Close(); - - int num_sst_files = 0; - int num_wal_files = 0; - std::vector db_files; - ASSERT_OK(env_->GetChildren(dbname_, &db_files)); - for (std::string f : db_files) { - if (f.substr(f.find_last_of(".") + 1) == "sst") { - num_sst_files++; - } else if (f.substr(f.find_last_of(".") + 1) == "log") { - num_wal_files++; - } - } - ASSERT_GT(num_sst_files, 0); - ASSERT_GT(num_wal_files, 0); - - auto sfm = static_cast(options.sst_file_manager.get()); - - sfm->SetDeleteRateBytesPerSecond(1024 * 1024); - // Set an extra high trash ratio to prevent immediate/non-rate limited - // deletions - sfm->delete_scheduler()->SetMaxTrashDBRatio(1000.0); - ASSERT_OK(DestroyDB(dbname_, options)); - sfm->WaitForEmptyTrash(); - ASSERT_EQ(bg_delete_file, num_sst_files + num_wal_files); -} - -TEST_F(DBSSTTest, DBWithMaxSpaceAllowed) { - std::shared_ptr sst_file_manager(NewSstFileManager(env_)); - auto sfm = static_cast(sst_file_manager.get()); - - Options options = CurrentOptions(); - options.sst_file_manager = sst_file_manager; - options.disable_auto_compactions = true; - DestroyAndReopen(options); - - Random rnd(301); - - // Generate a file containing 100 keys. - for (int i = 0; i < 100; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(50))); - } - ASSERT_OK(Flush()); - - uint64_t first_file_size = 0; - std::unordered_map files_in_db; - ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db, &first_file_size)); - ASSERT_EQ(sfm->GetTotalSize(), first_file_size); - - // Set the maximum allowed space usage to the current total size - sfm->SetMaxAllowedSpaceUsage(first_file_size + 1); - - ASSERT_OK(Put("key1", "val1")); - // This flush will cause bg_error_ and will fail - ASSERT_NOK(Flush()); -} - -TEST_F(DBSSTTest, DBWithMaxSpaceAllowedWithBlobFiles) { - std::shared_ptr sst_file_manager(NewSstFileManager(env_)); - auto sfm = static_cast(sst_file_manager.get()); - - Options options = CurrentOptions(); - options.sst_file_manager = sst_file_manager; - options.disable_auto_compactions = true; - options.enable_blob_files = true; - DestroyAndReopen(options); - - Random rnd(301); - - // Generate a file containing keys. - for (int i = 0; i < 10; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(50))); - } - ASSERT_OK(Flush()); - - uint64_t files_size = 0; - uint64_t total_files_size = 0; - std::unordered_map files_in_db; - - ASSERT_OK(GetAllDataFiles(kBlobFile, &files_in_db, &files_size)); - // Make sure blob files are considered by SSTFileManage in size limits. - ASSERT_GT(files_size, 0); - total_files_size = files_size; - ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db, &files_size)); - total_files_size += files_size; - ASSERT_EQ(sfm->GetTotalSize(), total_files_size); - - // Set the maximum allowed space usage to the current total size. - sfm->SetMaxAllowedSpaceUsage(total_files_size + 1); - - bool max_allowed_space_reached = false; - bool delete_blob_file = false; - // Sync point called after blob file is closed and max allowed space is - // checked. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BlobFileCompletionCallback::CallBack::MaxAllowedSpaceReached", - [&](void* /*arg*/) { max_allowed_space_reached = true; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BuildTable::AfterDeleteFile", - [&](void* /*arg*/) { delete_blob_file = true; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - { - "BuildTable::AfterDeleteFile", - "DBSSTTest::DBWithMaxSpaceAllowedWithBlobFiles:1", - }, - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put("key1", "val1")); - // This flush will fail - ASSERT_NOK(Flush()); - ASSERT_TRUE(max_allowed_space_reached); - - TEST_SYNC_POINT("DBSSTTest::DBWithMaxSpaceAllowedWithBlobFiles:1"); - ASSERT_TRUE(delete_blob_file); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBSSTTest, CancellingCompactionsWorks) { - std::shared_ptr sst_file_manager(NewSstFileManager(env_)); - auto sfm = static_cast(sst_file_manager.get()); - - Options options = CurrentOptions(); - options.sst_file_manager = sst_file_manager; - options.level0_file_num_compaction_trigger = 2; - options.statistics = CreateDBStatistics(); - DestroyAndReopen(options); - - int completed_compactions = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction():CancelledCompaction", [&](void* /*arg*/) { - sfm->SetMaxAllowedSpaceUsage(0); - ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", - [&](void* /*arg*/) { completed_compactions++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - - // Generate a file containing 10 keys. - for (int i = 0; i < 10; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(50))); - } - ASSERT_OK(Flush()); - uint64_t total_file_size = 0; - std::unordered_map files_in_db; - ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db, &total_file_size)); - // Set the maximum allowed space usage to the current total size - sfm->SetMaxAllowedSpaceUsage(2 * total_file_size + 1); - - // Generate another file to trigger compaction. - for (int i = 0; i < 10; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(50))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); - - // Because we set a callback in CancelledCompaction, we actually - // let the compaction run - ASSERT_GT(completed_compactions, 0); - ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0); - // Make sure the stat is bumped - ASSERT_GT(dbfull()->immutable_db_options().statistics.get()->getTickerCount( - COMPACTION_CANCELLED), - 0); - ASSERT_EQ(0, - dbfull()->immutable_db_options().statistics.get()->getTickerCount( - FILES_MARKED_TRASH)); - ASSERT_EQ(4, - dbfull()->immutable_db_options().statistics.get()->getTickerCount( - FILES_DELETED_IMMEDIATELY)); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBSSTTest, CancellingManualCompactionsWorks) { - std::shared_ptr sst_file_manager(NewSstFileManager(env_)); - auto sfm = static_cast(sst_file_manager.get()); - - Options options = CurrentOptions(); - options.sst_file_manager = sst_file_manager; - options.statistics = CreateDBStatistics(); - - FlushedFileCollector* collector = new FlushedFileCollector(); - options.listeners.emplace_back(collector); - - DestroyAndReopen(options); - - Random rnd(301); - - // Generate a file containing 10 keys. - for (int i = 0; i < 10; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(50))); - } - ASSERT_OK(Flush()); - uint64_t total_file_size = 0; - std::unordered_map files_in_db; - ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db, &total_file_size)); - // Set the maximum allowed space usage to the current total size - sfm->SetMaxAllowedSpaceUsage(2 * total_file_size + 1); - - // Generate another file to trigger compaction. - for (int i = 0; i < 10; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(50))); - } - ASSERT_OK(Flush()); - - // OK, now trigger a manual compaction - ASSERT_TRUE(dbfull() - ->CompactRange(CompactRangeOptions(), nullptr, nullptr) - .IsCompactionTooLarge()); - - // Wait for manual compaction to get scheduled and finish - ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); - - ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0); - // Make sure the stat is bumped - ASSERT_EQ(dbfull()->immutable_db_options().statistics.get()->getTickerCount( - COMPACTION_CANCELLED), - 1); - - // Now make sure CompactFiles also gets cancelled - auto l0_files = collector->GetFlushedFiles(); - ASSERT_TRUE( - dbfull() - ->CompactFiles(ROCKSDB_NAMESPACE::CompactionOptions(), l0_files, 0) - .IsCompactionTooLarge()); - - // Wait for manual compaction to get scheduled and finish - ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); - - ASSERT_EQ(dbfull()->immutable_db_options().statistics.get()->getTickerCount( - COMPACTION_CANCELLED), - 2); - ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0); - - // Now let the flush through and make sure GetCompactionsReservedSize - // returns to normal - sfm->SetMaxAllowedSpaceUsage(0); - int completed_compactions = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "CompactFilesImpl:End", [&](void* /*arg*/) { completed_compactions++; }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(dbfull()->CompactFiles(ROCKSDB_NAMESPACE::CompactionOptions(), - l0_files, 0)); - ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); - - ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0); - ASSERT_GT(completed_compactions, 0); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBSSTTest, DBWithMaxSpaceAllowedRandomized) { - // This test will set a maximum allowed space for the DB, then it will - // keep filling the DB until the limit is reached and bg_error_ is set. - // When bg_error_ is set we will verify that the DB size is greater - // than the limit. - - std::vector max_space_limits_mbs = {1, 10}; - std::atomic bg_error_set(false); - - std::atomic reached_max_space_on_flush(0); - std::atomic reached_max_space_on_compaction(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushMemTableToOutputFile:MaxAllowedSpaceReached", - [&](void* arg) { - Status* bg_error = static_cast(arg); - bg_error_set = true; - reached_max_space_on_flush++; - // clear error to ensure compaction callback is called - *bg_error = Status::OK(); - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction():CancelledCompaction", [&](void* arg) { - bool* enough_room = static_cast(arg); - *enough_room = true; - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "CompactionJob::FinishCompactionOutputFile:MaxAllowedSpaceReached", - [&](void* /*arg*/) { - bg_error_set = true; - reached_max_space_on_compaction++; - }); - - for (auto limit_mb : max_space_limits_mbs) { - bg_error_set = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - std::shared_ptr sst_file_manager(NewSstFileManager(env_)); - auto sfm = static_cast(sst_file_manager.get()); - - Options options = CurrentOptions(); - options.sst_file_manager = sst_file_manager; - options.write_buffer_size = 1024 * 512; // 512 Kb - DestroyAndReopen(options); - Random rnd(301); - - sfm->SetMaxAllowedSpaceUsage(limit_mb * 1024 * 1024); - - // It is easy to detect if the test is stuck in a loop. No need for - // complex termination logic. - while (true) { - auto s = Put(rnd.RandomString(10), rnd.RandomString(50)); - if (!s.ok()) { - break; - } - } - ASSERT_TRUE(bg_error_set); - uint64_t total_sst_files_size = 0; - std::unordered_map files_in_db; - ASSERT_OK(GetAllDataFiles(kTableFile, &files_in_db, &total_sst_files_size)); - ASSERT_GE(total_sst_files_size, limit_mb * 1024 * 1024); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } - - ASSERT_GT(reached_max_space_on_flush, 0); - ASSERT_GT(reached_max_space_on_compaction, 0); -} - -TEST_F(DBSSTTest, OpenDBWithInfiniteMaxOpenFiles) { - // Open DB with infinite max open files - // - First iteration use 1 thread to open files - // - Second iteration use 5 threads to open files - for (int iter = 0; iter < 2; iter++) { - Options options; - options.create_if_missing = true; - options.write_buffer_size = 100000; - options.disable_auto_compactions = true; - options.max_open_files = -1; - if (iter == 0) { - options.max_file_opening_threads = 1; - } else { - options.max_file_opening_threads = 5; - } - options = CurrentOptions(options); - DestroyAndReopen(options); - - // Create 12 Files in L0 (then move then to L2) - for (int i = 0; i < 12; i++) { - std::string k = "L2_" + Key(i); - ASSERT_OK(Put(k, k + std::string(1000, 'a'))); - ASSERT_OK(Flush()); - } - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 2; - ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - - // Create 12 Files in L0 - for (int i = 0; i < 12; i++) { - std::string k = "L0_" + Key(i); - ASSERT_OK(Put(k, k + std::string(1000, 'a'))); - ASSERT_OK(Flush()); - } - Close(); - - // Reopening the DB will load all existing files - Reopen(options); - ASSERT_EQ("12,0,12", FilesPerLevel(0)); - std::vector> files; - dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files); - - for (const auto& level : files) { - for (const auto& file : level) { - ASSERT_TRUE(file.table_reader_handle != nullptr); - } - } - - for (int i = 0; i < 12; i++) { - ASSERT_EQ(Get("L0_" + Key(i)), "L0_" + Key(i) + std::string(1000, 'a')); - ASSERT_EQ(Get("L2_" + Key(i)), "L2_" + Key(i) + std::string(1000, 'a')); - } - } -} - -TEST_F(DBSSTTest, OpenDBWithInfiniteMaxOpenFilesSubjectToMemoryLimit) { - for (CacheEntryRoleOptions::Decision charge_table_reader : - {CacheEntryRoleOptions::Decision::kEnabled, - CacheEntryRoleOptions::Decision::kDisabled}) { - // Open DB with infinite max open files - // - First iteration use 1 thread to open files - // - Second iteration use 5 threads to open files - for (int iter = 0; iter < 2; iter++) { - Options options; - options.create_if_missing = true; - options.write_buffer_size = 100000; - options.disable_auto_compactions = true; - options.max_open_files = -1; - - BlockBasedTableOptions table_options; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - if (iter == 0) { - options.max_file_opening_threads = 1; - } else { - options.max_file_opening_threads = 5; - } - - DestroyAndReopen(options); - - // Create 5 Files in L0 (then move then to L2) - for (int i = 0; i < 5; i++) { - std::string k = "L2_" + Key(i); - ASSERT_OK(Put(k, k + std::string(1000, 'a'))); - ASSERT_OK(Flush()) << i; - } - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 2; - ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - - // Create 5 Files in L0 - for (int i = 0; i < 5; i++) { - std::string k = "L0_" + Key(i); - ASSERT_OK(Put(k, k + std::string(1000, 'a'))); - ASSERT_OK(Flush()); - } - Close(); - - table_options.cache_usage_options.options_overrides.insert( - {CacheEntryRole::kBlockBasedTableReader, - {/*.charged = */ charge_table_reader}}); - table_options.block_cache = - NewLRUCache(1024 /* capacity */, 0 /* num_shard_bits */, - true /* strict_capacity_limit */); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - // Reopening the DB will try to load all existing files, conditionally - // subject to memory limit - Status s = TryReopen(options); - - if (charge_table_reader == CacheEntryRoleOptions::Decision::kEnabled) { - EXPECT_TRUE(s.IsMemoryLimit()); - EXPECT_TRUE(s.ToString().find( - kCacheEntryRoleToCamelString[static_cast( - CacheEntryRole::kBlockBasedTableReader)]) != - std::string::npos); - EXPECT_TRUE(s.ToString().find("memory limit based on cache capacity") != - std::string::npos); - - } else { - EXPECT_TRUE(s.ok()); - ASSERT_EQ("5,0,5", FilesPerLevel(0)); - } - } - } -} - -TEST_F(DBSSTTest, GetTotalSstFilesSize) { - // We don't propagate oldest-key-time table property on compaction and - // just write 0 as default value. This affect the exact table size, since - // we encode table properties as varint64. Force time to be 0 to work around - // it. Should remove the workaround after we propagate the property on - // compaction. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "FlushJob::WriteLevel0Table:oldest_ancester_time", [&](void* arg) { - uint64_t* current_time = static_cast(arg); - *current_time = 0; - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.compression = kNoCompression; - DestroyAndReopen(options); - // Generate 5 files in L0 - for (int i = 0; i < 5; i++) { - for (int j = 0; j < 10; j++) { - std::string val = "val_file_" + std::to_string(i); - ASSERT_OK(Put(Key(j), val)); - } - ASSERT_OK(Flush()); - } - ASSERT_EQ("5", FilesPerLevel(0)); - - std::vector live_files_meta; - dbfull()->GetLiveFilesMetaData(&live_files_meta); - ASSERT_EQ(live_files_meta.size(), 5); - uint64_t single_file_size = live_files_meta[0].size; - - uint64_t live_sst_files_size = 0; - uint64_t total_sst_files_size = 0; - for (const auto& file_meta : live_files_meta) { - live_sst_files_size += file_meta.size; - } - - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 5 - // Total SST files = 5 - ASSERT_EQ(live_sst_files_size, 5 * single_file_size); - ASSERT_EQ(total_sst_files_size, 5 * single_file_size); - - // hold current version - std::unique_ptr iter1(dbfull()->NewIterator(ReadOptions())); - ASSERT_OK(iter1->status()); - - // Compact 5 files into 1 file in L0 - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("0,1", FilesPerLevel(0)); - - live_files_meta.clear(); - dbfull()->GetLiveFilesMetaData(&live_files_meta); - ASSERT_EQ(live_files_meta.size(), 1); - - live_sst_files_size = 0; - total_sst_files_size = 0; - for (const auto& file_meta : live_files_meta) { - live_sst_files_size += file_meta.size; - } - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 1 (compacted file) - // Total SST files = 6 (5 original files + compacted file) - ASSERT_EQ(live_sst_files_size, 1 * single_file_size); - ASSERT_EQ(total_sst_files_size, 6 * single_file_size); - - // hold current version - std::unique_ptr iter2(dbfull()->NewIterator(ReadOptions())); - ASSERT_OK(iter2->status()); - - // Delete all keys and compact, this will delete all live files - for (int i = 0; i < 10; i++) { - ASSERT_OK(Delete(Key(i))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("", FilesPerLevel(0)); - - live_files_meta.clear(); - dbfull()->GetLiveFilesMetaData(&live_files_meta); - ASSERT_EQ(live_files_meta.size(), 0); - - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 0 - // Total SST files = 6 (5 original files + compacted file) - ASSERT_EQ(total_sst_files_size, 6 * single_file_size); - - ASSERT_OK(iter1->status()); - iter1.reset(); - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 0 - // Total SST files = 1 (compacted file) - ASSERT_EQ(total_sst_files_size, 1 * single_file_size); - - ASSERT_OK(iter2->status()); - iter2.reset(); - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 0 - // Total SST files = 0 - ASSERT_EQ(total_sst_files_size, 0); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBSSTTest, OpenDBWithoutGetFileSizeInvocations) { - Options options = CurrentOptions(); - std::unique_ptr env{MockEnv::Create(Env::Default())}; - options.env = env.get(); - options.disable_auto_compactions = true; - options.compression = kNoCompression; - options.enable_blob_files = true; - options.blob_file_size = 32; // create one blob per file - options.skip_checking_sst_file_sizes_on_db_open = true; - - DestroyAndReopen(options); - // Generate 5 files in L0 - for (int i = 0; i < 5; i++) { - for (int j = 0; j < 10; j++) { - std::string val = "val_file_" + std::to_string(i); - ASSERT_OK(Put(Key(j), val)); - } - ASSERT_OK(Flush()); - } - Close(); - - bool is_get_file_size_called = false; - SyncPoint::GetInstance()->SetCallBack( - "MockFileSystem::GetFileSize:CheckFileType", [&](void* arg) { - std::string* filename = reinterpret_cast(arg); - if (filename->find(".blob") != std::string::npos) { - is_get_file_size_called = true; - } - }); - - SyncPoint::GetInstance()->EnableProcessing(); - Reopen(options); - ASSERT_FALSE(is_get_file_size_called); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - - Destroy(options); -} - -TEST_F(DBSSTTest, GetTotalSstFilesSizeVersionsFilesShared) { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.compression = kNoCompression; - DestroyAndReopen(options); - // Generate 5 files in L0 - for (int i = 0; i < 5; i++) { - ASSERT_OK(Put(Key(i), "val")); - ASSERT_OK(Flush()); - } - ASSERT_EQ("5", FilesPerLevel(0)); - - std::vector live_files_meta; - dbfull()->GetLiveFilesMetaData(&live_files_meta); - ASSERT_EQ(live_files_meta.size(), 5); - uint64_t single_file_size = live_files_meta[0].size; - - uint64_t live_sst_files_size = 0; - uint64_t total_sst_files_size = 0; - for (const auto& file_meta : live_files_meta) { - live_sst_files_size += file_meta.size; - } - - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - - // Live SST files = 5 - // Total SST files = 5 - ASSERT_EQ(live_sst_files_size, 5 * single_file_size); - ASSERT_EQ(total_sst_files_size, 5 * single_file_size); - - // hold current version - std::unique_ptr iter1(dbfull()->NewIterator(ReadOptions())); - ASSERT_OK(iter1->status()); - - // Compaction will do trivial move from L0 to L1 - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("0,5", FilesPerLevel(0)); - - live_files_meta.clear(); - dbfull()->GetLiveFilesMetaData(&live_files_meta); - ASSERT_EQ(live_files_meta.size(), 5); - - live_sst_files_size = 0; - total_sst_files_size = 0; - for (const auto& file_meta : live_files_meta) { - live_sst_files_size += file_meta.size; - } - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 5 - // Total SST files = 5 (used in 2 version) - ASSERT_EQ(live_sst_files_size, 5 * single_file_size); - ASSERT_EQ(total_sst_files_size, 5 * single_file_size); - - // hold current version - std::unique_ptr iter2(dbfull()->NewIterator(ReadOptions())); - ASSERT_OK(iter2->status()); - - // Delete all keys and compact, this will delete all live files - for (int i = 0; i < 5; i++) { - ASSERT_OK(Delete(Key(i))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("", FilesPerLevel(0)); - - live_files_meta.clear(); - dbfull()->GetLiveFilesMetaData(&live_files_meta); - ASSERT_EQ(live_files_meta.size(), 0); - - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 0 - // Total SST files = 5 (used in 2 version) - ASSERT_EQ(total_sst_files_size, 5 * single_file_size); - - ASSERT_OK(iter1->status()); - iter1.reset(); - ASSERT_OK(iter2->status()); - iter2.reset(); - - ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", - &total_sst_files_size)); - // Live SST files = 0 - // Total SST files = 0 - ASSERT_EQ(total_sst_files_size, 0); -} - -// This test if blob files are recorded by SST File Manager when Compaction job -// creates/delete them and in case of AtomicFlush. -TEST_F(DBSSTTest, DBWithSFMForBlobFilesAtomicFlush) { - std::shared_ptr sst_file_manager(NewSstFileManager(env_)); - auto sfm = static_cast(sst_file_manager.get()); - Options options = CurrentOptions(); - options.sst_file_manager = sst_file_manager; - options.enable_blob_files = true; - options.min_blob_size = 0; - options.disable_auto_compactions = true; - options.enable_blob_garbage_collection = true; - options.blob_garbage_collection_age_cutoff = 0.5; - options.atomic_flush = true; - - int files_added = 0; - int files_deleted = 0; - int files_scheduled_to_delete = 0; - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::OnAddFile", [&](void* arg) { - const std::string* const file_path = - static_cast(arg); - if (EndsWith(*file_path, ".blob")) { - files_added++; - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::OnDeleteFile", [&](void* arg) { - const std::string* const file_path = - static_cast(arg); - if (EndsWith(*file_path, ".blob")) { - files_deleted++; - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SstFileManagerImpl::ScheduleFileDeletion", [&](void* arg) { - assert(arg); - const std::string* const file_path = - static_cast(arg); - if (EndsWith(*file_path, ".blob")) { - ++files_scheduled_to_delete; - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - DestroyAndReopen(options); - Random rnd(301); - - ASSERT_OK(Put("key_1", "value_1")); - ASSERT_OK(Put("key_2", "value_2")); - ASSERT_OK(Put("key_3", "value_3")); - ASSERT_OK(Put("key_4", "value_4")); - ASSERT_OK(Flush()); - - // Overwrite will create the garbage data. - ASSERT_OK(Put("key_3", "new_value_3")); - ASSERT_OK(Put("key_4", "new_value_4")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("Key5", "blob_value5")); - ASSERT_OK(Put("Key6", "blob_value6")); - ASSERT_OK(Flush()); - - ASSERT_EQ(files_added, 3); - ASSERT_EQ(files_deleted, 0); - ASSERT_EQ(files_scheduled_to_delete, 0); - files_added = 0; - - constexpr Slice* begin = nullptr; - constexpr Slice* end = nullptr; - // Compaction job will create a new file and delete the older files. - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ(files_added, 1); - ASSERT_EQ(files_scheduled_to_delete, 1); - - sfm->WaitForEmptyTrash(); - - ASSERT_EQ(files_deleted, 1); - - Close(); - ASSERT_OK(DestroyDB(dbname_, options)); - - ASSERT_EQ(files_scheduled_to_delete, 4); - - sfm->WaitForEmptyTrash(); - - ASSERT_EQ(files_deleted, 4); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_statistics_test.cc b/db/db_statistics_test.cc deleted file mode 100644 index 85a54aa94..000000000 --- a/db/db_statistics_test.cc +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include - -#include "db/db_test_util.h" -#include "monitoring/thread_status_util.h" -#include "port/stack_trace.h" -#include "rocksdb/statistics.h" -#include "util/random.h" - -namespace ROCKSDB_NAMESPACE { - -class DBStatisticsTest : public DBTestBase { - public: - DBStatisticsTest() - : DBTestBase("db_statistics_test", /*env_do_fsync=*/true) {} -}; - -TEST_F(DBStatisticsTest, CompressionStatsTest) { - CompressionType type; - - if (Snappy_Supported()) { - type = kSnappyCompression; - fprintf(stderr, "using snappy\n"); - } else if (Zlib_Supported()) { - type = kZlibCompression; - fprintf(stderr, "using zlib\n"); - } else if (BZip2_Supported()) { - type = kBZip2Compression; - fprintf(stderr, "using bzip2\n"); - } else if (LZ4_Supported()) { - type = kLZ4Compression; - fprintf(stderr, "using lz4\n"); - } else if (XPRESS_Supported()) { - type = kXpressCompression; - fprintf(stderr, "using xpress\n"); - } else if (ZSTD_Supported()) { - type = kZSTD; - fprintf(stderr, "using ZSTD\n"); - } else { - fprintf(stderr, "skipping test, compression disabled\n"); - return; - } - - Options options = CurrentOptions(); - options.compression = type; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.statistics->set_stats_level(StatsLevel::kExceptTimeForMutex); - DestroyAndReopen(options); - - int kNumKeysWritten = 100000; - - // Check that compressions occur and are counted when compression is turned on - Random rnd(301); - for (int i = 0; i < kNumKeysWritten; ++i) { - // compressible string - ASSERT_OK(Put(Key(i), rnd.RandomString(128) + std::string(128, 'a'))); - } - ASSERT_OK(Flush()); - ASSERT_GT(options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED), 0); - - for (int i = 0; i < kNumKeysWritten; ++i) { - auto r = Get(Key(i)); - } - ASSERT_GT(options.statistics->getTickerCount(NUMBER_BLOCK_DECOMPRESSED), 0); - - options.compression = kNoCompression; - DestroyAndReopen(options); - uint64_t currentCompressions = - options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED); - uint64_t currentDecompressions = - options.statistics->getTickerCount(NUMBER_BLOCK_DECOMPRESSED); - - // Check that compressions do not occur when turned off - for (int i = 0; i < kNumKeysWritten; ++i) { - // compressible string - ASSERT_OK(Put(Key(i), rnd.RandomString(128) + std::string(128, 'a'))); - } - ASSERT_OK(Flush()); - ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED) - - currentCompressions, - 0); - - for (int i = 0; i < kNumKeysWritten; ++i) { - auto r = Get(Key(i)); - } - ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_DECOMPRESSED) - - currentDecompressions, - 0); -} - -TEST_F(DBStatisticsTest, MutexWaitStatsDisabledByDefault) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - CreateAndReopenWithCF({"pikachu"}, options); - const uint64_t kMutexWaitDelay = 100; - ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, - kMutexWaitDelay); - ASSERT_OK(Put("hello", "rocksdb")); - ASSERT_EQ(TestGetTickerCount(options, DB_MUTEX_WAIT_MICROS), 0); - ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0); -} - -TEST_F(DBStatisticsTest, MutexWaitStats) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.statistics->set_stats_level(StatsLevel::kAll); - CreateAndReopenWithCF({"pikachu"}, options); - const uint64_t kMutexWaitDelay = 100; - ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, - kMutexWaitDelay); - ASSERT_OK(Put("hello", "rocksdb")); - ASSERT_GE(TestGetTickerCount(options, DB_MUTEX_WAIT_MICROS), kMutexWaitDelay); - ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0); -} - -TEST_F(DBStatisticsTest, ResetStats) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - DestroyAndReopen(options); - for (int i = 0; i < 2; ++i) { - // pick arbitrary ticker and histogram. On first iteration they're zero - // because db is unused. On second iteration they're zero due to Reset(). - ASSERT_EQ(0, TestGetTickerCount(options, NUMBER_KEYS_WRITTEN)); - HistogramData histogram_data; - options.statistics->histogramData(DB_WRITE, &histogram_data); - ASSERT_EQ(0.0, histogram_data.max); - - if (i == 0) { - // The Put() makes some of the ticker/histogram stats nonzero until we - // Reset(). - ASSERT_OK(Put("hello", "rocksdb")); - ASSERT_EQ(1, TestGetTickerCount(options, NUMBER_KEYS_WRITTEN)); - options.statistics->histogramData(DB_WRITE, &histogram_data); - ASSERT_GT(histogram_data.max, 0.0); - ASSERT_OK(options.statistics->Reset()); - } - } -} - -TEST_F(DBStatisticsTest, ExcludeTickers) { - Options options = CurrentOptions(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - DestroyAndReopen(options); - options.statistics->set_stats_level(StatsLevel::kExceptTickers); - ASSERT_OK(Put("foo", "value")); - ASSERT_EQ(0, options.statistics->getTickerCount(BYTES_WRITTEN)); - options.statistics->set_stats_level(StatsLevel::kExceptHistogramOrTimers); - Reopen(options); - ASSERT_EQ("value", Get("foo")); - ASSERT_GT(options.statistics->getTickerCount(BYTES_READ), 0); -} - - -TEST_F(DBStatisticsTest, VerifyChecksumReadStat) { - Options options = CurrentOptions(); - options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - Reopen(options); - - // Expected to be populated regardless of `PerfLevel` in user thread - SetPerfLevel(kDisable); - - { - // Scenario 0: only WAL data. Not verified so require ticker to be zero. - ASSERT_OK(Put("foo", "value")); - ASSERT_OK(db_->VerifyFileChecksums(ReadOptions())); - ASSERT_OK(db_->VerifyChecksum()); - ASSERT_EQ(0, - options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES)); - } - - // Create one SST. - ASSERT_OK(Flush()); - std::unordered_map table_files; - uint64_t table_files_size = 0; - GetAllDataFiles(kTableFile, &table_files, &table_files_size); - - { - // Scenario 1: Table verified in `VerifyFileChecksums()`. This should read - // the whole file so we require the ticker stat exactly matches the file - // size. - ASSERT_OK(options.statistics->Reset()); - ASSERT_OK(db_->VerifyFileChecksums(ReadOptions())); - ASSERT_EQ(table_files_size, - options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES)); - } - - { - // Scenario 2: Table verified in `VerifyChecksum()`. This opens a - // `TableReader` to verify each block. It can involve duplicate reads of the - // same data so we set a lower-bound only. - ASSERT_OK(options.statistics->Reset()); - ASSERT_OK(db_->VerifyChecksum()); - ASSERT_GE(options.statistics->getTickerCount(VERIFY_CHECKSUM_READ_BYTES), - table_files_size); - } -} - - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_table_properties_test.cc b/db/db_table_properties_test.cc deleted file mode 100644 index 7be05e93c..000000000 --- a/db/db_table_properties_test.cc +++ /dev/null @@ -1,623 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include -#include -#include - -#include "db/db_test_util.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/db.h" -#include "rocksdb/types.h" -#include "rocksdb/utilities/table_properties_collectors.h" -#include "table/format.h" -#include "table/meta_blocks.h" -#include "table/table_properties_internal.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/random.h" - - -namespace ROCKSDB_NAMESPACE { - -// A helper function that ensures the table properties returned in -// `GetPropertiesOfAllTablesTest` is correct. -// This test assumes entries size is different for each of the tables. -namespace { - -void VerifyTableProperties(DB* db, uint64_t expected_entries_size) { - TablePropertiesCollection props; - ASSERT_OK(db->GetPropertiesOfAllTables(&props)); - - ASSERT_EQ(4U, props.size()); - std::unordered_set unique_entries; - - // Indirect test - uint64_t sum = 0; - for (const auto& item : props) { - unique_entries.insert(item.second->num_entries); - sum += item.second->num_entries; - } - - ASSERT_EQ(props.size(), unique_entries.size()); - ASSERT_EQ(expected_entries_size, sum); - - VerifySstUniqueIds(props); -} -} // anonymous namespace - -class DBTablePropertiesTest : public DBTestBase, - public testing::WithParamInterface { - public: - DBTablePropertiesTest() - : DBTestBase("db_table_properties_test", /*env_do_fsync=*/false) {} - TablePropertiesCollection TestGetPropertiesOfTablesInRange( - std::vector ranges, std::size_t* num_properties = nullptr, - std::size_t* num_files = nullptr); -}; - -TEST_F(DBTablePropertiesTest, GetPropertiesOfAllTablesTest) { - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 8; - // Part of strategy to prevent pinning table files - options.max_open_files = 42; - Reopen(options); - - // Create 4 tables - for (int table = 0; table < 4; ++table) { - // Use old meta name for table properties for one file - if (table == 3) { - SyncPoint::GetInstance()->SetCallBack( - "BlockBasedTableBuilder::WritePropertiesBlock:Meta", [&](void* meta) { - *reinterpret_cast(meta) = - &kPropertiesBlockOldName; - }); - SyncPoint::GetInstance()->EnableProcessing(); - } - // Build file - for (int i = 0; i < 10 + table; ++i) { - ASSERT_OK( - db_->Put(WriteOptions(), std::to_string(table * 100 + i), "val")); - } - ASSERT_OK(db_->Flush(FlushOptions())); - } - SyncPoint::GetInstance()->DisableProcessing(); - std::string original_session_id; - ASSERT_OK(db_->GetDbSessionId(original_session_id)); - - // Part of strategy to prevent pinning table files - SyncPoint::GetInstance()->SetCallBack( - "VersionEditHandler::LoadTables:skip_load_table_files", - [&](void* skip_load) { *reinterpret_cast(skip_load) = true; }); - SyncPoint::GetInstance()->EnableProcessing(); - - // 1. Read table properties directly from file - Reopen(options); - // Clear out auto-opened files - dbfull()->TEST_table_cache()->EraseUnRefEntries(); - ASSERT_EQ(dbfull()->TEST_table_cache()->GetUsage(), 0U); - VerifyTableProperties(db_, 10 + 11 + 12 + 13); - - // 2. Put two tables to table cache and - Reopen(options); - // Clear out auto-opened files - dbfull()->TEST_table_cache()->EraseUnRefEntries(); - ASSERT_EQ(dbfull()->TEST_table_cache()->GetUsage(), 0U); - // fetch key from 1st and 2nd table, which will internally place that table to - // the table cache. - for (int i = 0; i < 2; ++i) { - Get(std::to_string(i * 100 + 0)); - } - - VerifyTableProperties(db_, 10 + 11 + 12 + 13); - - // 3. Put all tables to table cache - Reopen(options); - // fetch key from all tables, which will place them in table cache. - for (int i = 0; i < 4; ++i) { - Get(std::to_string(i * 100 + 0)); - } - VerifyTableProperties(db_, 10 + 11 + 12 + 13); - - // 4. Try to read CORRUPT properties (a) directly from file, and (b) - // through reader on Get - - // It's not practical to prevent table file read on Open, so we - // corrupt after open and after purging table cache. - for (bool direct : {true, false}) { - Reopen(options); - // Clear out auto-opened files - dbfull()->TEST_table_cache()->EraseUnRefEntries(); - ASSERT_EQ(dbfull()->TEST_table_cache()->GetUsage(), 0U); - - TablePropertiesCollection props; - ASSERT_OK(db_->GetPropertiesOfAllTables(&props)); - std::string sst_file = props.begin()->first; - - // Corrupt the file's TableProperties using session id - std::string contents; - ASSERT_OK( - ReadFileToString(env_->GetFileSystem().get(), sst_file, &contents)); - size_t pos = contents.find(original_session_id); - ASSERT_NE(pos, std::string::npos); - ASSERT_OK(test::CorruptFile(env_, sst_file, static_cast(pos), 1, - /*verify checksum fails*/ false)); - - // Try to read CORRUPT properties - if (direct) { - ASSERT_TRUE(db_->GetPropertiesOfAllTables(&props).IsCorruption()); - } else { - bool found_corruption = false; - for (int i = 0; i < 4; ++i) { - std::string result = Get(std::to_string(i * 100 + 0)); - if (result.find_first_of("Corruption: block checksum mismatch") != - std::string::npos) { - found_corruption = true; - } - } - ASSERT_TRUE(found_corruption); - } - - // UN-corrupt file for next iteration - ASSERT_OK(test::CorruptFile(env_, sst_file, static_cast(pos), 1, - /*verify checksum fails*/ false)); - } - - SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBTablePropertiesTest, InvalidIgnored) { - // RocksDB versions 2.5 - 2.7 generate some properties that Block considers - // invalid in some way. This approximates that. - - // Inject properties block data that Block considers invalid - SyncPoint::GetInstance()->SetCallBack( - "BlockBasedTableBuilder::WritePropertiesBlock:BlockData", - [&](void* block_data) { - *reinterpret_cast(block_data) = Slice("X"); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - // Corrupting the table properties corrupts the unique id. - // Ignore the unique id recorded in the manifest. - auto options = CurrentOptions(); - options.verify_sst_unique_id_in_manifest = false; - Reopen(options); - - // Build file - for (int i = 0; i < 10; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), std::to_string(i), "val")); - } - ASSERT_OK(db_->Flush(FlushOptions())); - - SyncPoint::GetInstance()->DisableProcessing(); - - // Not crashing is good enough - TablePropertiesCollection props; - ASSERT_OK(db_->GetPropertiesOfAllTables(&props)); -} - -TEST_F(DBTablePropertiesTest, CreateOnDeletionCollectorFactory) { - ConfigOptions options; - options.ignore_unsupported_options = false; - - std::shared_ptr factory; - std::string id = CompactOnDeletionCollectorFactory::kClassName(); - ASSERT_OK( - TablePropertiesCollectorFactory::CreateFromString(options, id, &factory)); - auto del_factory = factory->CheckedCast(); - ASSERT_NE(del_factory, nullptr); - ASSERT_EQ(0U, del_factory->GetWindowSize()); - ASSERT_EQ(0U, del_factory->GetDeletionTrigger()); - ASSERT_EQ(0.0, del_factory->GetDeletionRatio()); - ASSERT_OK(TablePropertiesCollectorFactory::CreateFromString( - options, "window_size=100; deletion_trigger=90; id=" + id, &factory)); - del_factory = factory->CheckedCast(); - ASSERT_NE(del_factory, nullptr); - ASSERT_EQ(100U, del_factory->GetWindowSize()); - ASSERT_EQ(90U, del_factory->GetDeletionTrigger()); - ASSERT_EQ(0.0, del_factory->GetDeletionRatio()); - ASSERT_OK(TablePropertiesCollectorFactory::CreateFromString( - options, - "window_size=100; deletion_trigger=90; deletion_ratio=0.5; id=" + id, - &factory)); - del_factory = factory->CheckedCast(); - ASSERT_NE(del_factory, nullptr); - ASSERT_EQ(100U, del_factory->GetWindowSize()); - ASSERT_EQ(90U, del_factory->GetDeletionTrigger()); - ASSERT_EQ(0.5, del_factory->GetDeletionRatio()); -} - -TablePropertiesCollection -DBTablePropertiesTest::TestGetPropertiesOfTablesInRange( - std::vector ranges, std::size_t* num_properties, - std::size_t* num_files) { - // Since we deref zero element in the vector it can not be empty - // otherwise we pass an address to some random memory - EXPECT_GT(ranges.size(), 0U); - // run the query - TablePropertiesCollection props; - EXPECT_OK(db_->GetPropertiesOfTablesInRange( - db_->DefaultColumnFamily(), &ranges[0], ranges.size(), &props)); - - // Make sure that we've received properties for those and for those files - // only which fall within requested ranges - std::vector vmd; - db_->GetLiveFilesMetaData(&vmd); - for (auto& md : vmd) { - std::string fn = md.db_path + md.name; - bool in_range = false; - for (auto& r : ranges) { - // smallestkey < limit && largestkey >= start - if (r.limit.compare(md.smallestkey) >= 0 && - r.start.compare(md.largestkey) <= 0) { - in_range = true; - EXPECT_GT(props.count(fn), 0); - } - } - if (!in_range) { - EXPECT_EQ(props.count(fn), 0); - } - } - - if (num_properties) { - *num_properties = props.size(); - } - - if (num_files) { - *num_files = vmd.size(); - } - return props; -} - -TEST_F(DBTablePropertiesTest, GetPropertiesOfTablesInRange) { - // Fixed random sead - Random rnd(301); - - Options options; - options.create_if_missing = true; - options.write_buffer_size = 4096; - options.max_write_buffer_number = 2; - options.level0_file_num_compaction_trigger = 2; - options.level0_slowdown_writes_trigger = 2; - options.level0_stop_writes_trigger = 2; - options.target_file_size_base = 2048; - options.max_bytes_for_level_base = 40960; - options.max_bytes_for_level_multiplier = 4; - options.hard_pending_compaction_bytes_limit = 16 * 1024; - options.num_levels = 8; - options.env = env_; - - DestroyAndReopen(options); - - // build a decent LSM - for (int i = 0; i < 10000; i++) { - ASSERT_OK(Put(test::RandomKey(&rnd, 5), rnd.RandomString(102))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - if (NumTableFilesAtLevel(0) == 0) { - ASSERT_OK(Put(test::RandomKey(&rnd, 5), rnd.RandomString(102))); - ASSERT_OK(Flush()); - } - - ASSERT_OK(db_->PauseBackgroundWork()); - - // Ensure that we have at least L0, L1 and L2 - ASSERT_GT(NumTableFilesAtLevel(0), 0); - ASSERT_GT(NumTableFilesAtLevel(1), 0); - ASSERT_GT(NumTableFilesAtLevel(2), 0); - - // Query the largest range - std::size_t num_properties, num_files; - TestGetPropertiesOfTablesInRange( - {Range(test::RandomKey(&rnd, 5, test::RandomKeyType::SMALLEST), - test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST))}, - &num_properties, &num_files); - ASSERT_EQ(num_properties, num_files); - - // Query the empty range - TestGetPropertiesOfTablesInRange( - {Range(test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST), - test::RandomKey(&rnd, 5, test::RandomKeyType::SMALLEST))}, - &num_properties, &num_files); - ASSERT_GT(num_files, 0); - ASSERT_EQ(num_properties, 0); - - // Query the middle rangee - TestGetPropertiesOfTablesInRange( - {Range(test::RandomKey(&rnd, 5, test::RandomKeyType::MIDDLE), - test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST))}, - &num_properties, &num_files); - ASSERT_GT(num_files, 0); - ASSERT_GT(num_files, num_properties); - ASSERT_GT(num_properties, 0); - - // Query a bunch of random ranges - for (int j = 0; j < 100; j++) { - // create a bunch of ranges - std::vector random_keys; - // Random returns numbers with zero included - // when we pass empty ranges TestGetPropertiesOfTablesInRange() - // derefs random memory in the empty ranges[0] - // so want to be greater than zero and even since - // the below loop requires that random_keys.size() to be even. - auto n = 2 * (rnd.Uniform(50) + 1); - - for (uint32_t i = 0; i < n; ++i) { - random_keys.push_back(test::RandomKey(&rnd, 5)); - } - - ASSERT_GT(random_keys.size(), 0U); - ASSERT_EQ((random_keys.size() % 2), 0U); - - std::vector ranges; - auto it = random_keys.begin(); - while (it != random_keys.end()) { - ranges.push_back(Range(*it, *(it + 1))); - it += 2; - } - - TestGetPropertiesOfTablesInRange(std::move(ranges)); - } -} - -TEST_F(DBTablePropertiesTest, GetColumnFamilyNameProperty) { - std::string kExtraCfName = "pikachu"; - CreateAndReopenWithCF({kExtraCfName}, CurrentOptions()); - - // Create one table per CF, then verify it was created with the column family - // name property. - for (uint32_t cf = 0; cf < 2; ++cf) { - ASSERT_OK(Put(cf, "key", "val")); - ASSERT_OK(Flush(cf)); - - TablePropertiesCollection fname_to_props; - ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[cf], &fname_to_props)); - ASSERT_EQ(1U, fname_to_props.size()); - - std::string expected_cf_name; - if (cf > 0) { - expected_cf_name = kExtraCfName; - } else { - expected_cf_name = kDefaultColumnFamilyName; - } - ASSERT_EQ(expected_cf_name, - fname_to_props.begin()->second->column_family_name); - ASSERT_EQ(cf, static_cast( - fname_to_props.begin()->second->column_family_id)); - } -} - -TEST_F(DBTablePropertiesTest, GetDbIdentifiersProperty) { - CreateAndReopenWithCF({"goku"}, CurrentOptions()); - - for (uint32_t cf = 0; cf < 2; ++cf) { - ASSERT_OK(Put(cf, "key", "val")); - ASSERT_OK(Put(cf, "foo", "bar")); - ASSERT_OK(Flush(cf)); - - TablePropertiesCollection fname_to_props; - ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[cf], &fname_to_props)); - ASSERT_EQ(1U, fname_to_props.size()); - - std::string id, sid; - ASSERT_OK(db_->GetDbIdentity(id)); - ASSERT_OK(db_->GetDbSessionId(sid)); - ASSERT_EQ(id, fname_to_props.begin()->second->db_id); - ASSERT_EQ(sid, fname_to_props.begin()->second->db_session_id); - } -} - -class DBTableHostnamePropertyTest - : public DBTestBase, - public ::testing::WithParamInterface> { - public: - DBTableHostnamePropertyTest() - : DBTestBase("db_table_hostname_property_test", - /*env_do_fsync=*/false) {} -}; - -TEST_P(DBTableHostnamePropertyTest, DbHostLocationProperty) { - option_config_ = std::get<0>(GetParam()); - Options opts = CurrentOptions(); - std::string expected_host_id = std::get<1>(GetParam()); - ; - if (expected_host_id == kHostnameForDbHostId) { - ASSERT_OK(env_->GetHostNameString(&expected_host_id)); - } else { - opts.db_host_id = expected_host_id; - } - CreateAndReopenWithCF({"goku"}, opts); - - for (uint32_t cf = 0; cf < 2; ++cf) { - ASSERT_OK(Put(cf, "key", "val")); - ASSERT_OK(Put(cf, "foo", "bar")); - ASSERT_OK(Flush(cf)); - - TablePropertiesCollection fname_to_props; - ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[cf], &fname_to_props)); - ASSERT_EQ(1U, fname_to_props.size()); - - ASSERT_EQ(fname_to_props.begin()->second->db_host_id, expected_host_id); - } -} - -INSTANTIATE_TEST_CASE_P( - DBTableHostnamePropertyTest, DBTableHostnamePropertyTest, - ::testing::Values( - // OptionConfig, override db_host_location - std::make_tuple(DBTestBase::OptionConfig::kDefault, - kHostnameForDbHostId), - std::make_tuple(DBTestBase::OptionConfig::kDefault, "foobar"), - std::make_tuple(DBTestBase::OptionConfig::kDefault, ""), - std::make_tuple(DBTestBase::OptionConfig::kPlainTableFirstBytePrefix, - kHostnameForDbHostId), - std::make_tuple(DBTestBase::OptionConfig::kPlainTableFirstBytePrefix, - "foobar"), - std::make_tuple(DBTestBase::OptionConfig::kPlainTableFirstBytePrefix, - ""))); - -class DeletionTriggeredCompactionTestListener : public EventListener { - public: - void OnCompactionBegin(DB*, const CompactionJobInfo& ci) override { - ASSERT_EQ(ci.compaction_reason, - CompactionReason::kFilesMarkedForCompaction); - } - - void OnCompactionCompleted(DB*, const CompactionJobInfo& ci) override { - ASSERT_EQ(ci.compaction_reason, - CompactionReason::kFilesMarkedForCompaction); - } -}; - -TEST_P(DBTablePropertiesTest, DeletionTriggeredCompactionMarking) { - int kNumKeys = 1000; - int kWindowSize = 100; - int kNumDelsTrigger = 90; - std::shared_ptr compact_on_del = - NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger); - - Options opts = CurrentOptions(); - opts.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - opts.table_properties_collector_factories.emplace_back(compact_on_del); - - if (GetParam() == "kCompactionStyleUniversal") { - opts.compaction_style = kCompactionStyleUniversal; - } - Reopen(opts); - - // add an L1 file to prevent tombstones from dropping due to obsolescence - // during flush - ASSERT_OK(Put(Key(0), "val")); - ASSERT_OK(Flush()); - MoveFilesToLevel(1); - - DeletionTriggeredCompactionTestListener* listener = - new DeletionTriggeredCompactionTestListener(); - opts.listeners.emplace_back(listener); - Reopen(opts); - - for (int i = 0; i < kNumKeys; ++i) { - if (i >= kNumKeys - kWindowSize && - i < kNumKeys - kWindowSize + kNumDelsTrigger) { - ASSERT_OK(Delete(Key(i))); - } else { - ASSERT_OK(Put(Key(i), "val")); - } - } - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - - // Change the window size and deletion trigger and ensure new values take - // effect - kWindowSize = 50; - kNumDelsTrigger = 40; - static_cast(compact_on_del.get()) - ->SetWindowSize(kWindowSize); - static_cast(compact_on_del.get()) - ->SetDeletionTrigger(kNumDelsTrigger); - for (int i = 0; i < kNumKeys; ++i) { - if (i >= kNumKeys - kWindowSize && - i < kNumKeys - kWindowSize + kNumDelsTrigger) { - ASSERT_OK(Delete(Key(i))); - } else { - ASSERT_OK(Put(Key(i), "val")); - } - } - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - - // Change the window size to disable delete triggered compaction - kWindowSize = 0; - static_cast(compact_on_del.get()) - ->SetWindowSize(kWindowSize); - static_cast(compact_on_del.get()) - ->SetDeletionTrigger(kNumDelsTrigger); - for (int i = 0; i < kNumKeys; ++i) { - if (i >= kNumKeys - kWindowSize && - i < kNumKeys - kWindowSize + kNumDelsTrigger) { - ASSERT_OK(Delete(Key(i))); - } else { - ASSERT_OK(Put(Key(i), "val")); - } - } - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - ASSERT_LT(0, opts.statistics->getTickerCount(COMPACT_WRITE_BYTES_MARKED)); - ASSERT_LT(0, opts.statistics->getTickerCount(COMPACT_READ_BYTES_MARKED)); -} - -TEST_P(DBTablePropertiesTest, RatioBasedDeletionTriggeredCompactionMarking) { - constexpr int kNumKeys = 1000; - constexpr int kWindowSize = 0; - constexpr int kNumDelsTrigger = 0; - constexpr double kDeletionRatio = 0.1; - std::shared_ptr compact_on_del = - NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger, - kDeletionRatio); - - Options opts = CurrentOptions(); - opts.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - opts.table_properties_collector_factories.emplace_back(compact_on_del); - - Reopen(opts); - - // Add an L2 file to prevent tombstones from dropping due to obsolescence - // during flush - ASSERT_OK(Put(Key(0), "val")); - ASSERT_OK(Flush()); - MoveFilesToLevel(2); - - auto* listener = new DeletionTriggeredCompactionTestListener(); - opts.listeners.emplace_back(listener); - Reopen(opts); - - // Generate one L0 with kNumKeys Put. - for (int i = 0; i < kNumKeys; ++i) { - ASSERT_OK(Put(Key(i), "not important")); - } - ASSERT_OK(Flush()); - - // Generate another L0 with kNumKeys Delete. - // This file, due to deletion ratio, will trigger compaction: 2@0 files to L1. - // The resulting L1 file has only one tombstone for user key 'Key(0)'. - // Again, due to deletion ratio, a compaction will be triggered: 1@1 + 1@2 - // files to L2. However, the resulting file is empty because the tombstone - // and value are both dropped. - for (int i = 0; i < kNumKeys; ++i) { - ASSERT_OK(Delete(Key(i))); - } - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - for (int i = 0; i < 3; ++i) { - ASSERT_EQ(0, NumTableFilesAtLevel(i)); - } -} - -INSTANTIATE_TEST_CASE_P(DBTablePropertiesTest, DBTablePropertiesTest, - ::testing::Values("kCompactionStyleLevel", - "kCompactionStyleUniversal")); - -} // namespace ROCKSDB_NAMESPACE - - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_tailing_iter_test.cc b/db/db_tailing_iter_test.cc deleted file mode 100644 index 964e06eb3..000000000 --- a/db/db_tailing_iter_test.cc +++ /dev/null @@ -1,595 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -// Introduction of SyncPoint effectively disabled building and running this test -// in Release build. -// which is a pity, it is a good test - -#include "db/db_test_util.h" -#include "db/forward_iterator.h" -#include "port/stack_trace.h" - -namespace ROCKSDB_NAMESPACE { - -class DBTestTailingIterator : public DBTestBase, - public ::testing::WithParamInterface { - public: - DBTestTailingIterator() - : DBTestBase("db_tailing_iterator_test", /*env_do_fsync=*/true) {} -}; - -INSTANTIATE_TEST_CASE_P(DBTestTailingIterator, DBTestTailingIterator, - ::testing::Bool()); - -TEST_P(DBTestTailingIterator, TailingIteratorSingle) { - ReadOptions read_options; - read_options.tailing = true; - if (GetParam()) { - read_options.async_io = true; - } - - std::unique_ptr iter(db_->NewIterator(read_options)); - iter->SeekToFirst(); - ASSERT_TRUE(!iter->Valid()); - ASSERT_OK(iter->status()); - - // add a record and check that iter can see it - ASSERT_OK(db_->Put(WriteOptions(), "mirko", "fodor")); - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), "mirko"); - - iter->Next(); - ASSERT_TRUE(!iter->Valid()); -} - -TEST_P(DBTestTailingIterator, TailingIteratorKeepAdding) { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ReadOptions read_options; - read_options.tailing = true; - if (GetParam()) { - read_options.async_io = true; - } - std::unique_ptr iter(db_->NewIterator(read_options, handles_[1])); - ASSERT_OK(iter->status()); - std::string value(1024, 'a'); - - const int num_records = 10000; - for (int i = 0; i < num_records; ++i) { - char buf[32]; - snprintf(buf, sizeof(buf), "%016d", i); - - Slice key(buf, 16); - ASSERT_OK(Put(1, key, value)); - - iter->Seek(key); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(key), 0); - } -} - -TEST_P(DBTestTailingIterator, TailingIteratorSeekToNext) { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ReadOptions read_options; - read_options.tailing = true; - if (GetParam()) { - read_options.async_io = true; - } - std::unique_ptr iter(db_->NewIterator(read_options, handles_[1])); - ASSERT_OK(iter->status()); - std::unique_ptr itern(db_->NewIterator(read_options, handles_[1])); - ASSERT_OK(itern->status()); - std::string value(1024, 'a'); - - const int num_records = 1000; - for (int i = 1; i < num_records; ++i) { - char buf1[32]; - char buf2[32]; - snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5); - - Slice key(buf1, 20); - ASSERT_OK(Put(1, key, value)); - - if (i % 100 == 99) { - ASSERT_OK(Flush(1)); - } - - snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2); - Slice target(buf2, 20); - iter->Seek(target); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(key), 0); - if (i == 1) { - itern->SeekToFirst(); - } else { - itern->Next(); - } - ASSERT_TRUE(itern->Valid()); - ASSERT_EQ(itern->key().compare(key), 0); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - for (int i = 2 * num_records; i > 0; --i) { - char buf1[32]; - char buf2[32]; - snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5); - - Slice key(buf1, 20); - ASSERT_OK(Put(1, key, value)); - - if (i % 100 == 99) { - ASSERT_OK(Flush(1)); - } - - snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2); - Slice target(buf2, 20); - iter->Seek(target); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(key), 0); - } -} - -TEST_P(DBTestTailingIterator, TailingIteratorTrimSeekToNext) { - const uint64_t k150KB = 150 * 1024; - Options options; - options.write_buffer_size = k150KB; - options.max_write_buffer_number = 3; - options.min_write_buffer_number_to_merge = 2; - options.env = env_; - CreateAndReopenWithCF({"pikachu"}, options); - ReadOptions read_options; - read_options.tailing = true; - if (GetParam()) { - read_options.async_io = true; - } - int num_iters, deleted_iters; - - char bufe[32]; - snprintf(bufe, sizeof(bufe), "00b0%016d", 0); - Slice keyu(bufe, 20); - read_options.iterate_upper_bound = &keyu; - std::unique_ptr iter(db_->NewIterator(read_options, handles_[1])); - ASSERT_OK(iter->status()); - std::unique_ptr itern(db_->NewIterator(read_options, handles_[1])); - ASSERT_OK(itern->status()); - std::unique_ptr iterh(db_->NewIterator(read_options, handles_[1])); - ASSERT_OK(iterh->status()); - std::string value(1024, 'a'); - bool file_iters_deleted = false; - bool file_iters_renewed_null = false; - bool file_iters_renewed_copy = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "ForwardIterator::SeekInternal:Return", [&](void* arg) { - ForwardIterator* fiter = reinterpret_cast(arg); - ASSERT_TRUE(!file_iters_deleted || - fiter->TEST_CheckDeletedIters(&deleted_iters, &num_iters)); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "ForwardIterator::Next:Return", [&](void* arg) { - ForwardIterator* fiter = reinterpret_cast(arg); - ASSERT_TRUE(!file_iters_deleted || - fiter->TEST_CheckDeletedIters(&deleted_iters, &num_iters)); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "ForwardIterator::RenewIterators:Null", - [&](void* /*arg*/) { file_iters_renewed_null = true; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "ForwardIterator::RenewIterators:Copy", - [&](void* /*arg*/) { file_iters_renewed_copy = true; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - const int num_records = 1000; - for (int i = 1; i < num_records; ++i) { - char buf1[32]; - char buf2[32]; - char buf3[32]; - char buf4[32]; - snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5); - snprintf(buf3, sizeof(buf3), "00b0%016d", i * 5); - - Slice key(buf1, 20); - ASSERT_OK(Put(1, key, value)); - Slice keyn(buf3, 20); - ASSERT_OK(Put(1, keyn, value)); - - if (i % 100 == 99) { - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - if (i == 299) { - file_iters_deleted = true; - } - snprintf(buf4, sizeof(buf4), "00a0%016d", i * 5 / 2); - Slice target(buf4, 20); - iterh->Seek(target); - ASSERT_TRUE(iter->Valid()); - for (int j = (i + 1) * 5 / 2; j < i * 5; j += 5) { - iterh->Next(); - ASSERT_TRUE(iterh->Valid()); - } - if (i == 299) { - file_iters_deleted = false; - } - } - - file_iters_deleted = true; - snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2); - Slice target(buf2, 20); - iter->Seek(target); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(key), 0); - ASSERT_LE(num_iters, 1); - if (i == 1) { - itern->SeekToFirst(); - } else { - itern->Next(); - } - ASSERT_TRUE(itern->Valid()); - ASSERT_EQ(itern->key().compare(key), 0); - ASSERT_LE(num_iters, 1); - file_iters_deleted = false; - } - ASSERT_TRUE(file_iters_renewed_null); - ASSERT_TRUE(file_iters_renewed_copy); - iter = nullptr; - itern = nullptr; - iterh = nullptr; - BlockBasedTableOptions table_options; - table_options.no_block_cache = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - read_options.read_tier = kBlockCacheTier; - std::unique_ptr iteri(db_->NewIterator(read_options, handles_[1])); - ASSERT_OK(iteri->status()); - char buf5[32]; - snprintf(buf5, sizeof(buf5), "00a0%016d", (num_records / 2) * 5 - 2); - Slice target1(buf5, 20); - iteri->Seek(target1); - ASSERT_TRUE(iteri->status().IsIncomplete()); - iteri = nullptr; - - read_options.read_tier = kReadAllTier; - options.table_factory.reset(NewBlockBasedTableFactory()); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - iter.reset(db_->NewIterator(read_options, handles_[1])); - ASSERT_OK(iter->status()); - for (int i = 2 * num_records; i > 0; --i) { - char buf1[32]; - char buf2[32]; - snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5); - - Slice key(buf1, 20); - ASSERT_OK(Put(1, key, value)); - - if (i % 100 == 99) { - ASSERT_OK(Flush(1)); - } - - snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2); - Slice target(buf2, 20); - iter->Seek(target); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(key), 0); - } -} - -TEST_P(DBTestTailingIterator, TailingIteratorDeletes) { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ReadOptions read_options; - read_options.tailing = true; - if (GetParam()) { - read_options.async_io = true; - } - - std::unique_ptr iter(db_->NewIterator(read_options, handles_[1])); - ASSERT_OK(iter->status()); - - // write a single record, read it using the iterator, then delete it - ASSERT_OK(Put(1, "0test", "test")); - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), "0test"); - ASSERT_OK(Delete(1, "0test")); - - // write many more records - const int num_records = 10000; - std::string value(1024, 'A'); - - for (int i = 0; i < num_records; ++i) { - char buf[32]; - snprintf(buf, sizeof(buf), "1%015d", i); - - Slice key(buf, 16); - ASSERT_OK(Put(1, key, value)); - } - - // force a flush to make sure that no records are read from memtable - ASSERT_OK(Flush(1)); - - // skip "0test" - iter->Next(); - - // make sure we can read all new records using the existing iterator - int count = 0; - for (; iter->Valid(); iter->Next(), ++count) - ; - - ASSERT_EQ(count, num_records); -} - -TEST_P(DBTestTailingIterator, TailingIteratorPrefixSeek) { - ReadOptions read_options; - read_options.tailing = true; - if (GetParam()) { - read_options.async_io = true; - } - Options options = CurrentOptions(); - options.create_if_missing = true; - options.disable_auto_compactions = true; - options.prefix_extractor.reset(NewFixedPrefixTransform(2)); - options.memtable_factory.reset(NewHashSkipListRepFactory(16)); - options.allow_concurrent_memtable_write = false; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - std::unique_ptr iter(db_->NewIterator(read_options, handles_[1])); - ASSERT_OK(iter->status()); - ASSERT_OK(Put(1, "0101", "test")); - - ASSERT_OK(Flush(1)); - - ASSERT_OK(Put(1, "0202", "test")); - - // Seek(0102) shouldn't find any records since 0202 has a different prefix - iter->Seek("0102"); - ASSERT_TRUE(!iter->Valid()); - - iter->Seek("0202"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), "0202"); - - iter->Next(); - ASSERT_TRUE(!iter->Valid()); -} - -TEST_P(DBTestTailingIterator, TailingIteratorIncomplete) { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ReadOptions read_options; - read_options.tailing = true; - if (GetParam()) { - read_options.async_io = true; - } - read_options.read_tier = kBlockCacheTier; - - std::string key("key"); - std::string value("value"); - - ASSERT_OK(db_->Put(WriteOptions(), key, value)); - - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_OK(iter->status()); - iter->SeekToFirst(); - // we either see the entry or it's not in cache - ASSERT_TRUE(iter->Valid() || iter->status().IsIncomplete()); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - iter->SeekToFirst(); - // should still be true after compaction - ASSERT_TRUE(iter->Valid() || iter->status().IsIncomplete()); -} - -TEST_P(DBTestTailingIterator, TailingIteratorSeekToSame) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.write_buffer_size = 1000; - CreateAndReopenWithCF({"pikachu"}, options); - - ReadOptions read_options; - read_options.tailing = true; - if (GetParam()) { - read_options.async_io = true; - } - const int NROWS = 10000; - // Write rows with keys 00000, 00002, 00004 etc. - for (int i = 0; i < NROWS; ++i) { - char buf[100]; - snprintf(buf, sizeof(buf), "%05d", 2 * i); - std::string key(buf); - std::string value("value"); - ASSERT_OK(db_->Put(WriteOptions(), key, value)); - } - - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_OK(iter->status()); - // Seek to 00001. We expect to find 00002. - std::string start_key = "00001"; - iter->Seek(start_key); - ASSERT_TRUE(iter->Valid()); - - std::string found = iter->key().ToString(); - ASSERT_EQ("00002", found); - - // Now seek to the same key. The iterator should remain in the same - // position. - iter->Seek(found); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(found, iter->key().ToString()); -} - -// Sets iterate_upper_bound and verifies that ForwardIterator doesn't call -// Seek() on immutable iterators when target key is >= prev_key and all -// iterators, including the memtable iterator, are over the upper bound. -TEST_P(DBTestTailingIterator, TailingIteratorUpperBound) { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - - const Slice upper_bound("20", 3); - ReadOptions read_options; - read_options.tailing = true; - read_options.iterate_upper_bound = &upper_bound; - if (GetParam()) { - read_options.async_io = true; - } - ASSERT_OK(Put(1, "11", "11")); - ASSERT_OK(Put(1, "12", "12")); - ASSERT_OK(Put(1, "22", "22")); - ASSERT_OK(Flush(1)); // flush all those keys to an immutable SST file - - // Add another key to the memtable. - ASSERT_OK(Put(1, "21", "21")); - - std::unique_ptr it(db_->NewIterator(read_options, handles_[1])); - ASSERT_OK(it->status()); - it->Seek("12"); - ASSERT_TRUE(it->Valid()); - ASSERT_EQ("12", it->key().ToString()); - - it->Next(); - // Not valid since "21" is over the upper bound. - ASSERT_FALSE(it->Valid()); - ASSERT_OK(it->status()); - // This keeps track of the number of times NeedToSeekImmutable() was true. - int immutable_seeks = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "ForwardIterator::SeekInternal:Immutable", - [&](void* /*arg*/) { ++immutable_seeks; }); - - // Seek to 13. This should not require any immutable seeks. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - it->Seek("13"); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - ASSERT_FALSE(it->Valid()); - ASSERT_OK(it->status()); - if (GetParam()) { - ASSERT_EQ(1, immutable_seeks); - } else { - ASSERT_EQ(0, immutable_seeks); - } -} - -TEST_P(DBTestTailingIterator, TailingIteratorGap) { - // level 1: [20, 25] [35, 40] - // level 2: [10 - 15] [45 - 50] - // level 3: [20, 30, 40] - // Previously there is a bug in tailing_iterator that if there is a gap in - // lower level, the key will be skipped if it is within the range between - // the largest key of index n file and the smallest key of index n+1 file - // if both file fit in that gap. In this example, 25 < key < 35 - // https://github.com/facebook/rocksdb/issues/1372 - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - - ReadOptions read_options; - read_options.tailing = true; - if (GetParam()) { - read_options.async_io = true; - } - ASSERT_OK(Put(1, "20", "20")); - ASSERT_OK(Put(1, "30", "30")); - ASSERT_OK(Put(1, "40", "40")); - ASSERT_OK(Flush(1)); - MoveFilesToLevel(3, 1); - - ASSERT_OK(Put(1, "10", "10")); - ASSERT_OK(Put(1, "15", "15")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "45", "45")); - ASSERT_OK(Put(1, "50", "50")); - ASSERT_OK(Flush(1)); - MoveFilesToLevel(2, 1); - - ASSERT_OK(Put(1, "20", "20")); - ASSERT_OK(Put(1, "25", "25")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "35", "35")); - ASSERT_OK(Put(1, "40", "40")); - ASSERT_OK(Flush(1)); - MoveFilesToLevel(1, 1); - - ColumnFamilyMetaData meta; - db_->GetColumnFamilyMetaData(handles_[1], &meta); - - std::unique_ptr it(db_->NewIterator(read_options, handles_[1])); - it->Seek("30"); - ASSERT_TRUE(it->Valid()); - ASSERT_EQ("30", it->key().ToString()); - - it->Next(); - ASSERT_TRUE(it->Valid()); - ASSERT_EQ("35", it->key().ToString()); - - it->Next(); - ASSERT_TRUE(it->Valid()); - ASSERT_EQ("40", it->key().ToString()); - - ASSERT_OK(it->status()); -} - -TEST_P(DBTestTailingIterator, SeekWithUpperBoundBug) { - ReadOptions read_options; - read_options.tailing = true; - if (GetParam()) { - read_options.async_io = true; - } - const Slice upper_bound("cc", 3); - read_options.iterate_upper_bound = &upper_bound; - - // 1st L0 file - ASSERT_OK(db_->Put(WriteOptions(), "aa", "SEEN")); - ASSERT_OK(Flush()); - - // 2nd L0 file - ASSERT_OK(db_->Put(WriteOptions(), "zz", "NOT-SEEN")); - ASSERT_OK(Flush()); - - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_OK(iter->status()); - - iter->Seek("aa"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), "aa"); -} - -TEST_P(DBTestTailingIterator, SeekToFirstWithUpperBoundBug) { - ReadOptions read_options; - read_options.tailing = true; - if (GetParam()) { - read_options.async_io = true; - } - const Slice upper_bound("cc", 3); - read_options.iterate_upper_bound = &upper_bound; - - // 1st L0 file - ASSERT_OK(db_->Put(WriteOptions(), "aa", "SEEN")); - ASSERT_OK(Flush()); - - // 2nd L0 file - ASSERT_OK(db_->Put(WriteOptions(), "zz", "NOT-SEEN")); - ASSERT_OK(Flush()); - - std::unique_ptr iter(db_->NewIterator(read_options)); - ASSERT_OK(iter->status()); - - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), "aa"); - - iter->Next(); - ASSERT_FALSE(iter->Valid()); - - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().ToString(), "aa"); -} - -} // namespace ROCKSDB_NAMESPACE - - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_test.cc b/db/db_test.cc deleted file mode 100644 index 05ee14fe2..000000000 --- a/db/db_test.cc +++ /dev/null @@ -1,7338 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -// Introduction of SyncPoint effectively disabled building and running this test -// in Release build. -// which is a pity, it is a good test -#include - -#include -#include -#include -#include -#include - -#ifndef OS_WIN -#include -#endif -#ifdef OS_SOLARIS -#include -#endif - -#include "cache/lru_cache.h" -#include "db/blob/blob_index.h" -#include "db/blob/blob_log_format.h" -#include "db/db_impl/db_impl.h" -#include "db/db_test_util.h" -#include "db/dbformat.h" -#include "db/job_context.h" -#include "db/version_set.h" -#include "db/write_batch_internal.h" -#include "env/mock_env.h" -#include "file/filename.h" -#include "monitoring/thread_status_util.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/cache.h" -#include "rocksdb/compaction_filter.h" -#include "rocksdb/convenience.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/experimental.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/options.h" -#include "rocksdb/perf_context.h" -#include "rocksdb/slice.h" -#include "rocksdb/slice_transform.h" -#include "rocksdb/snapshot.h" -#include "rocksdb/table.h" -#include "rocksdb/table_properties.h" -#include "rocksdb/thread_status.h" -#include "rocksdb/types.h" -#include "rocksdb/utilities/checkpoint.h" -#include "rocksdb/utilities/optimistic_transaction_db.h" -#include "rocksdb/utilities/write_batch_with_index.h" -#include "table/mock_table.h" -#include "table/scoped_arena_iterator.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/compression.h" -#include "util/mutexlock.h" -#include "util/random.h" -#include "util/rate_limiter.h" -#include "util/string_util.h" -#include "utilities/merge_operators.h" - -namespace ROCKSDB_NAMESPACE { - -// Note that whole DBTest and its child classes disable fsync on files -// and directories for speed. -// If fsync needs to be covered in a test, put it in other places. -class DBTest : public DBTestBase { - public: - DBTest() : DBTestBase("db_test", /*env_do_fsync=*/false) {} -}; - -class DBTestWithParam - : public DBTest, - public testing::WithParamInterface> { - public: - DBTestWithParam() { - max_subcompactions_ = std::get<0>(GetParam()); - exclusive_manual_compaction_ = std::get<1>(GetParam()); - } - - // Required if inheriting from testing::WithParamInterface<> - static void SetUpTestCase() {} - static void TearDownTestCase() {} - - uint32_t max_subcompactions_; - bool exclusive_manual_compaction_; -}; - -TEST_F(DBTest, MockEnvTest) { - std::unique_ptr env{MockEnv::Create(Env::Default())}; - Options options; - options.create_if_missing = true; - options.env = env.get(); - DB* db; - - const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")}; - const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")}; - - ASSERT_OK(DB::Open(options, "/dir/db", &db)); - for (size_t i = 0; i < 3; ++i) { - ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i])); - } - - for (size_t i = 0; i < 3; ++i) { - std::string res; - ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); - ASSERT_TRUE(res == vals[i]); - } - - Iterator* iterator = db->NewIterator(ReadOptions()); - iterator->SeekToFirst(); - for (size_t i = 0; i < 3; ++i) { - ASSERT_TRUE(iterator->Valid()); - ASSERT_TRUE(keys[i] == iterator->key()); - ASSERT_TRUE(vals[i] == iterator->value()); - iterator->Next(); - } - ASSERT_TRUE(!iterator->Valid()); - delete iterator; - - DBImpl* dbi = static_cast_with_check(db); - ASSERT_OK(dbi->TEST_FlushMemTable()); - - for (size_t i = 0; i < 3; ++i) { - std::string res; - ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); - ASSERT_TRUE(res == vals[i]); - } - - delete db; -} - -TEST_F(DBTest, MemEnvTest) { - std::unique_ptr env{NewMemEnv(Env::Default())}; - Options options; - options.create_if_missing = true; - options.env = env.get(); - DB* db; - - const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")}; - const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")}; - - ASSERT_OK(DB::Open(options, "/dir/db", &db)); - for (size_t i = 0; i < 3; ++i) { - ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i])); - } - - for (size_t i = 0; i < 3; ++i) { - std::string res; - ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); - ASSERT_TRUE(res == vals[i]); - } - - Iterator* iterator = db->NewIterator(ReadOptions()); - iterator->SeekToFirst(); - for (size_t i = 0; i < 3; ++i) { - ASSERT_TRUE(iterator->Valid()); - ASSERT_TRUE(keys[i] == iterator->key()); - ASSERT_TRUE(vals[i] == iterator->value()); - iterator->Next(); - } - ASSERT_TRUE(!iterator->Valid()); - delete iterator; - - DBImpl* dbi = static_cast_with_check(db); - ASSERT_OK(dbi->TEST_FlushMemTable()); - - for (size_t i = 0; i < 3; ++i) { - std::string res; - ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); - ASSERT_TRUE(res == vals[i]); - } - - delete db; - - options.create_if_missing = false; - ASSERT_OK(DB::Open(options, "/dir/db", &db)); - for (size_t i = 0; i < 3; ++i) { - std::string res; - ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); - ASSERT_TRUE(res == vals[i]); - } - delete db; -} - -TEST_F(DBTest, WriteEmptyBatch) { - Options options = CurrentOptions(); - options.env = env_; - options.write_buffer_size = 100000; - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "foo", "bar")); - WriteOptions wo; - wo.sync = true; - wo.disableWAL = false; - WriteBatch empty_batch; - ASSERT_OK(dbfull()->Write(wo, &empty_batch)); - - // make sure we can re-open it. - ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); - ASSERT_EQ("bar", Get(1, "foo")); -} - -TEST_F(DBTest, SkipDelay) { - Options options = CurrentOptions(); - options.env = env_; - options.write_buffer_size = 100000; - CreateAndReopenWithCF({"pikachu"}, options); - - for (bool sync : {true, false}) { - for (bool disableWAL : {true, false}) { - if (sync && disableWAL) { - // sync and disableWAL is incompatible. - continue; - } - // Use a small number to ensure a large delay that is still effective - // when we do Put - // TODO(myabandeh): this is time dependent and could potentially make - // the test flaky - auto token = dbfull()->TEST_write_controler().GetDelayToken(1); - std::atomic sleep_count(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::DelayWrite:Sleep", - [&](void* /*arg*/) { sleep_count.fetch_add(1); }); - std::atomic wait_count(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::DelayWrite:Wait", - [&](void* /*arg*/) { wait_count.fetch_add(1); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - WriteOptions wo; - wo.sync = sync; - wo.disableWAL = disableWAL; - wo.no_slowdown = true; - // Large enough to exceed allowance for one time interval - std::string large_value(1024, 'x'); - // Perhaps ideally this first write would fail because of delay, but - // the current implementation does not guarantee that. - dbfull()->Put(wo, "foo", large_value).PermitUncheckedError(); - // We need the 2nd write to trigger delay. This is because delay is - // estimated based on the last write size which is 0 for the first write. - ASSERT_NOK(dbfull()->Put(wo, "foo2", large_value)); - ASSERT_GE(sleep_count.load(), 0); - ASSERT_GE(wait_count.load(), 0); - token.reset(); - - token = dbfull()->TEST_write_controler().GetDelayToken(1000000); - wo.no_slowdown = false; - ASSERT_OK(dbfull()->Put(wo, "foo3", large_value)); - ASSERT_GE(sleep_count.load(), 1); - token.reset(); - } - } -} - -TEST_F(DBTest, MixedSlowdownOptions) { - Options options = CurrentOptions(); - options.env = env_; - options.write_buffer_size = 100000; - CreateAndReopenWithCF({"pikachu"}, options); - std::vector threads; - std::atomic thread_num(0); - - std::function write_slowdown_func = [&]() { - int a = thread_num.fetch_add(1); - std::string key = "foo" + std::to_string(a); - WriteOptions wo; - wo.no_slowdown = false; - ASSERT_OK(dbfull()->Put(wo, key, "bar")); - }; - std::function write_no_slowdown_func = [&]() { - int a = thread_num.fetch_add(1); - std::string key = "foo" + std::to_string(a); - WriteOptions wo; - wo.no_slowdown = true; - ASSERT_NOK(dbfull()->Put(wo, key, "bar")); - }; - // Use a small number to ensure a large delay that is still effective - // when we do Put - // TODO(myabandeh): this is time dependent and could potentially make - // the test flaky - auto token = dbfull()->TEST_write_controler().GetDelayToken(1); - std::atomic sleep_count(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::DelayWrite:BeginWriteStallDone", [&](void* /*arg*/) { - sleep_count.fetch_add(1); - if (threads.empty()) { - for (int i = 0; i < 2; ++i) { - threads.emplace_back(write_slowdown_func); - } - for (int i = 0; i < 2; ++i) { - threads.emplace_back(write_no_slowdown_func); - } - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - WriteOptions wo; - wo.sync = false; - wo.disableWAL = false; - wo.no_slowdown = false; - ASSERT_OK(dbfull()->Put(wo, "foo", "bar")); - // We need the 2nd write to trigger delay. This is because delay is - // estimated based on the last write size which is 0 for the first write. - ASSERT_OK(dbfull()->Put(wo, "foo2", "bar2")); - token.reset(); - - for (auto& t : threads) { - t.join(); - } - ASSERT_GE(sleep_count.load(), 1); - - wo.no_slowdown = true; - ASSERT_OK(dbfull()->Put(wo, "foo3", "bar")); -} - -TEST_F(DBTest, MixedSlowdownOptionsInQueue) { - Options options = CurrentOptions(); - options.env = env_; - options.write_buffer_size = 100000; - CreateAndReopenWithCF({"pikachu"}, options); - std::vector threads; - std::atomic thread_num(0); - - std::function write_no_slowdown_func = [&]() { - int a = thread_num.fetch_add(1); - std::string key = "foo" + std::to_string(a); - WriteOptions wo; - wo.no_slowdown = true; - ASSERT_NOK(dbfull()->Put(wo, key, "bar")); - }; - // Use a small number to ensure a large delay that is still effective - // when we do Put - // TODO(myabandeh): this is time dependent and could potentially make - // the test flaky - auto token = dbfull()->TEST_write_controler().GetDelayToken(1); - std::atomic sleep_count(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::DelayWrite:Sleep", [&](void* /*arg*/) { - sleep_count.fetch_add(1); - if (threads.empty()) { - for (int i = 0; i < 2; ++i) { - threads.emplace_back(write_no_slowdown_func); - } - // Sleep for 2s to allow the threads to insert themselves into the - // write queue - env_->SleepForMicroseconds(3000000ULL); - } - }); - std::atomic wait_count(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::DelayWrite:Wait", - [&](void* /*arg*/) { wait_count.fetch_add(1); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - WriteOptions wo; - wo.sync = false; - wo.disableWAL = false; - wo.no_slowdown = false; - ASSERT_OK(dbfull()->Put(wo, "foo", "bar")); - // We need the 2nd write to trigger delay. This is because delay is - // estimated based on the last write size which is 0 for the first write. - ASSERT_OK(dbfull()->Put(wo, "foo2", "bar2")); - token.reset(); - - for (auto& t : threads) { - t.join(); - } - ASSERT_EQ(sleep_count.load(), 1); - ASSERT_GE(wait_count.load(), 0); -} - -TEST_F(DBTest, MixedSlowdownOptionsStop) { - Options options = CurrentOptions(); - options.env = env_; - options.write_buffer_size = 100000; - CreateAndReopenWithCF({"pikachu"}, options); - std::vector threads; - std::atomic thread_num(0); - - std::function write_slowdown_func = [&]() { - int a = thread_num.fetch_add(1); - std::string key = "foo" + std::to_string(a); - WriteOptions wo; - wo.no_slowdown = false; - ASSERT_OK(dbfull()->Put(wo, key, "bar")); - }; - std::function write_no_slowdown_func = [&]() { - int a = thread_num.fetch_add(1); - std::string key = "foo" + std::to_string(a); - WriteOptions wo; - wo.no_slowdown = true; - ASSERT_NOK(dbfull()->Put(wo, key, "bar")); - }; - std::function wakeup_writer = [&]() { - dbfull()->mutex_.Lock(); - dbfull()->bg_cv_.SignalAll(); - dbfull()->mutex_.Unlock(); - }; - // Use a small number to ensure a large delay that is still effective - // when we do Put - // TODO(myabandeh): this is time dependent and could potentially make - // the test flaky - auto token = dbfull()->TEST_write_controler().GetStopToken(); - std::atomic wait_count(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::DelayWrite:Wait", [&](void* /*arg*/) { - wait_count.fetch_add(1); - if (threads.empty()) { - for (int i = 0; i < 2; ++i) { - threads.emplace_back(write_slowdown_func); - } - for (int i = 0; i < 2; ++i) { - threads.emplace_back(write_no_slowdown_func); - } - // Sleep for 2s to allow the threads to insert themselves into the - // write queue - env_->SleepForMicroseconds(3000000ULL); - } - token.reset(); - threads.emplace_back(wakeup_writer); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - WriteOptions wo; - wo.sync = false; - wo.disableWAL = false; - wo.no_slowdown = false; - ASSERT_OK(dbfull()->Put(wo, "foo", "bar")); - // We need the 2nd write to trigger delay. This is because delay is - // estimated based on the last write size which is 0 for the first write. - ASSERT_OK(dbfull()->Put(wo, "foo2", "bar2")); - token.reset(); - - for (auto& t : threads) { - t.join(); - } - ASSERT_GE(wait_count.load(), 1); - - wo.no_slowdown = true; - ASSERT_OK(dbfull()->Put(wo, "foo3", "bar")); -} - -TEST_F(DBTest, LevelLimitReopen) { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu"}, options); - - const std::string value(1024 * 1024, ' '); - int i = 0; - while (NumTableFilesAtLevel(2, 1) == 0) { - ASSERT_OK(Put(1, Key(i++), value)); - } - - options.num_levels = 1; - options.max_bytes_for_level_multiplier_additional.resize(1, 1); - Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ(s.IsInvalidArgument(), true); - ASSERT_EQ(s.ToString(), - "Invalid argument: db has more levels than options.num_levels"); - - options.num_levels = 10; - options.max_bytes_for_level_multiplier_additional.resize(10, 1); - ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); -} - -TEST_F(DBTest, LevelReopenWithFIFO) { - const int kLevelCount = 4; - const int kKeyCount = 5; - const int kTotalSstFileCount = kLevelCount * kKeyCount; - const int kCF = 1; - - Options options = CurrentOptions(); - // Config level0_file_num_compaction_trigger to prevent L0 files being - // automatically compacted while we are constructing a LSM tree structure - // to test multi-level FIFO compaction. - options.level0_file_num_compaction_trigger = kKeyCount + 1; - CreateAndReopenWithCF({"pikachu"}, options); - - // The expected number of files per level after each file creation. - const std::string expected_files_per_level[kLevelCount][kKeyCount] = { - {"0,0,0,1", "0,0,0,2", "0,0,0,3", "0,0,0,4", "0,0,0,5"}, - {"0,0,1,5", "0,0,2,5", "0,0,3,5", "0,0,4,5", "0,0,5,5"}, - {"0,1,5,5", "0,2,5,5", "0,3,5,5", "0,4,5,5", "0,5,5,5"}, - {"1,5,5,5", "2,5,5,5", "3,5,5,5", "4,5,5,5", "5,5,5,5"}, - }; - - const std::string expected_entries[kKeyCount][kLevelCount + 1] = { - {"[ ]", "[ a3 ]", "[ a2, a3 ]", "[ a1, a2, a3 ]", "[ a0, a1, a2, a3 ]"}, - {"[ ]", "[ b3 ]", "[ b2, b3 ]", "[ b1, b2, b3 ]", "[ b0, b1, b2, b3 ]"}, - {"[ ]", "[ c3 ]", "[ c2, c3 ]", "[ c1, c2, c3 ]", "[ c0, c1, c2, c3 ]"}, - {"[ ]", "[ d3 ]", "[ d2, d3 ]", "[ d1, d2, d3 ]", "[ d0, d1, d2, d3 ]"}, - {"[ ]", "[ e3 ]", "[ e2, e3 ]", "[ e1, e2, e3 ]", "[ e0, e1, e2, e3 ]"}, - }; - - // The loop below creates the following LSM tree where each (k, v) pair - // represents a file that contains that entry. When a file is created, - // the db is reopend with FIFO compaction and verified the LSM tree - // structure is still the same. - // - // The resulting LSM tree will contain 5 different keys. Each key as - // 4 different versions, located in different level. - // - // L0: (e, e0) (d, d0) (c, c0) (b, b0) (a, a0) - // L1: (a, a1) (b, b1) (c, c1) (d, d1) (e, e1) - // L2: (a, a2) (b, b2) (c, c2) (d, d2) (e, e2) - // L3: (a, a3) (b, b3) (c, c3) (d, d3) (e, e3) - for (int l = 0; l < kLevelCount; ++l) { - int level = kLevelCount - 1 - l; - for (int p = 0; p < kKeyCount; ++p) { - std::string put_key = std::string(1, char('a' + p)); - ASSERT_OK(Put(kCF, put_key, put_key + std::to_string(level))); - ASSERT_OK(Flush(kCF)); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - for (int g = 0; g < kKeyCount; ++g) { - int entry_count = (p >= g) ? l + 1 : l; - std::string get_key = std::string(1, char('a' + g)); - CheckAllEntriesWithFifoReopen(expected_entries[g][entry_count], get_key, - kCF, {"pikachu"}, options); - } - if (level != 0) { - MoveFilesToLevel(level, kCF); - for (int g = 0; g < kKeyCount; ++g) { - int entry_count = (p >= g) ? l + 1 : l; - std::string get_key = std::string(1, char('a' + g)); - CheckAllEntriesWithFifoReopen(expected_entries[g][entry_count], - get_key, kCF, {"pikachu"}, options); - } - } - ASSERT_EQ(expected_files_per_level[l][p], FilesPerLevel(kCF)); - } - } - - // The expected number of sst files in each level after each FIFO compaction - // that deletes the oldest sst file. - const std::string expected_files_per_level_after_fifo[] = { - "5,5,5,4", "5,5,5,3", "5,5,5,2", "5,5,5,1", "5,5,5", "5,5,4", "5,5,3", - "5,5,2", "5,5,1", "5,5", "5,4", "5,3", "5,2", "5,1", - "5", "4", "3", "2", "1", "", - }; - - // The expected value entries of each key after each FIFO compaction. - // This verifies whether FIFO removes the file with the smallest key in non-L0 - // files first then the oldest files in L0. - const std::string expected_entries_after_fifo[kKeyCount][kLevelCount + 1] = { - {"[ a0, a1, a2, a3 ]", "[ a0, a1, a2 ]", "[ a0, a1 ]", "[ a0 ]", "[ ]"}, - {"[ b0, b1, b2, b3 ]", "[ b0, b1, b2 ]", "[ b0, b1 ]", "[ b0 ]", "[ ]"}, - {"[ c0, c1, c2, c3 ]", "[ c0, c1, c2 ]", "[ c0, c1 ]", "[ c0 ]", "[ ]"}, - {"[ d0, d1, d2, d3 ]", "[ d0, d1, d2 ]", "[ d0, d1 ]", "[ d0 ]", "[ ]"}, - {"[ e0, e1, e2, e3 ]", "[ e0, e1, e2 ]", "[ e0, e1 ]", "[ e0 ]", "[ ]"}, - }; - - // In the 2nd phase, we reopen the DB with FIFO compaction. In each reopen, - // we config max_table_files_size so that FIFO will remove exactly one file - // at a time upon compaction, and we will use it to verify whether the sst - // files are deleted in the correct order. - for (int i = 0; i < kTotalSstFileCount; ++i) { - uint64_t total_sst_files_size = 0; - ASSERT_TRUE(dbfull()->GetIntProperty( - handles_[1], "rocksdb.total-sst-files-size", &total_sst_files_size)); - ASSERT_TRUE(total_sst_files_size > 0); - - Options fifo_options(options); - fifo_options.compaction_style = kCompactionStyleFIFO; - options.create_if_missing = false; - fifo_options.max_open_files = -1; - fifo_options.disable_auto_compactions = false; - // Config max_table_files_size to be total_sst_files_size - 1 so that - // FIFO will delete one file. - fifo_options.compaction_options_fifo.max_table_files_size = - total_sst_files_size - 1; - ASSERT_OK( - TryReopenWithColumnFamilies({"default", "pikachu"}, fifo_options)); - // For FIFO to pick a compaction - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1])); - ASSERT_OK(dbfull()->TEST_WaitForCompact(false)); - for (int g = 0; g < kKeyCount; ++g) { - std::string get_key = std::string(1, char('a' + g)); - int status_index = i / kKeyCount; - if ((i % kKeyCount) >= g) { - // If true, then it means the sst file containing the get_key in the - // current level has already been deleted, so we need to move the - // status_index for checking the expected value. - status_index++; - } - CheckAllEntriesWithFifoReopen( - expected_entries_after_fifo[g][status_index], get_key, kCF, - {"pikachu"}, options); - } - ASSERT_EQ(expected_files_per_level_after_fifo[i], FilesPerLevel(kCF)); - } -} - -TEST_F(DBTest, PutSingleDeleteGet) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_OK(Put(1, "foo2", "v2")); - ASSERT_EQ("v2", Get(1, "foo2")); - ASSERT_OK(SingleDelete(1, "foo")); - ASSERT_EQ("NOT_FOUND", Get(1, "foo")); - // Skip FIFO and universal compaction because they do not apply to the test - // case. Skip MergePut because single delete does not get removed when it - // encounters a merge. - } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | - kSkipMergePut)); -} - -TEST_F(DBTest, ReadFromPersistedTier) { - do { - Random rnd(301); - Options options = CurrentOptions(); - for (int disableWAL = 0; disableWAL <= 1; ++disableWAL) { - CreateAndReopenWithCF({"pikachu"}, options); - WriteOptions wopt; - wopt.disableWAL = (disableWAL == 1); - // 1st round: put but not flush - ASSERT_OK(db_->Put(wopt, handles_[1], "foo", "first")); - ASSERT_OK(db_->Put(wopt, handles_[1], "bar", "one")); - ASSERT_EQ("first", Get(1, "foo")); - ASSERT_EQ("one", Get(1, "bar")); - - // Read directly from persited data. - ReadOptions ropt; - ropt.read_tier = kPersistedTier; - std::string value; - if (wopt.disableWAL) { - // as data has not yet being flushed, we expect not found. - ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound()); - ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound()); - } else { - ASSERT_OK(db_->Get(ropt, handles_[1], "foo", &value)); - ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value)); - } - - // Multiget - std::vector multiget_cfs; - multiget_cfs.push_back(handles_[1]); - multiget_cfs.push_back(handles_[1]); - std::vector multiget_keys; - multiget_keys.push_back("foo"); - multiget_keys.push_back("bar"); - std::vector multiget_values; - auto statuses = - db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values); - if (wopt.disableWAL) { - ASSERT_TRUE(statuses[0].IsNotFound()); - ASSERT_TRUE(statuses[1].IsNotFound()); - } else { - ASSERT_OK(statuses[0]); - ASSERT_OK(statuses[1]); - } - - // 2nd round: flush and put a new value in memtable. - ASSERT_OK(Flush(1)); - ASSERT_OK(db_->Put(wopt, handles_[1], "rocksdb", "hello")); - - // once the data has been flushed, we are able to get the - // data when kPersistedTier is used. - ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).ok()); - ASSERT_EQ(value, "first"); - ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok()); - ASSERT_EQ(value, "one"); - if (wopt.disableWAL) { - ASSERT_TRUE( - db_->Get(ropt, handles_[1], "rocksdb", &value).IsNotFound()); - } else { - ASSERT_OK(db_->Get(ropt, handles_[1], "rocksdb", &value)); - ASSERT_EQ(value, "hello"); - } - - // Expect same result in multiget - multiget_cfs.push_back(handles_[1]); - multiget_keys.push_back("rocksdb"); - statuses = - db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values); - ASSERT_TRUE(statuses[0].ok()); - ASSERT_EQ("first", multiget_values[0]); - ASSERT_TRUE(statuses[1].ok()); - ASSERT_EQ("one", multiget_values[1]); - if (wopt.disableWAL) { - ASSERT_TRUE(statuses[2].IsNotFound()); - } else { - ASSERT_OK(statuses[2]); - } - - // 3rd round: delete and flush - ASSERT_OK(db_->Delete(wopt, handles_[1], "foo")); - Flush(1); - ASSERT_OK(db_->Delete(wopt, handles_[1], "bar")); - - ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound()); - if (wopt.disableWAL) { - // Still expect finding the value as its delete has not yet being - // flushed. - ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok()); - ASSERT_EQ(value, "one"); - } else { - ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound()); - } - ASSERT_TRUE(db_->Get(ropt, handles_[1], "rocksdb", &value).ok()); - ASSERT_EQ(value, "hello"); - - statuses = - db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values); - ASSERT_TRUE(statuses[0].IsNotFound()); - if (wopt.disableWAL) { - ASSERT_TRUE(statuses[1].ok()); - ASSERT_EQ("one", multiget_values[1]); - } else { - ASSERT_TRUE(statuses[1].IsNotFound()); - } - ASSERT_TRUE(statuses[2].ok()); - ASSERT_EQ("hello", multiget_values[2]); - if (wopt.disableWAL == 0) { - DestroyAndReopen(options); - } - } - } while (ChangeOptions()); -} - -TEST_F(DBTest, SingleDeleteFlush) { - // Test to check whether flushing preserves a single delete hidden - // behind a put. - do { - Random rnd(301); - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - CreateAndReopenWithCF({"pikachu"}, options); - - // Put values on second level (so that they will not be in the same - // compaction as the other operations. - ASSERT_OK(Put(1, "foo", "first")); - ASSERT_OK(Put(1, "bar", "one")); - ASSERT_OK(Flush(1)); - MoveFilesToLevel(2, 1); - - // (Single) delete hidden by a put - ASSERT_OK(SingleDelete(1, "foo")); - ASSERT_OK(Put(1, "foo", "second")); - ASSERT_OK(Delete(1, "bar")); - ASSERT_OK(Put(1, "bar", "two")); - ASSERT_OK(Flush(1)); - - ASSERT_OK(SingleDelete(1, "foo")); - ASSERT_OK(Delete(1, "bar")); - ASSERT_OK(Flush(1)); - - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], - nullptr, nullptr)); - - ASSERT_EQ("NOT_FOUND", Get(1, "bar")); - ASSERT_EQ("NOT_FOUND", Get(1, "foo")); - // Skip FIFO and universal compaction beccaus they do not apply to the test - // case. Skip MergePut because single delete does not get removed when it - // encounters a merge. - } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | - kSkipMergePut)); -} - -TEST_F(DBTest, SingleDeletePutFlush) { - // Single deletes that encounter the matching put in a flush should get - // removed. - do { - Random rnd(301); - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "foo", Slice())); - ASSERT_OK(Put(1, "a", Slice())); - ASSERT_OK(SingleDelete(1, "a")); - ASSERT_OK(Flush(1)); - - ASSERT_EQ("[ ]", AllEntriesFor("a", 1)); - // Skip FIFO and universal compaction because they do not apply to the test - // case. Skip MergePut because single delete does not get removed when it - // encounters a merge. - } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | - kSkipMergePut)); -} - -// Disable because not all platform can run it. -// It requires more than 9GB memory to run it, With single allocation -// of more than 3GB. -TEST_F(DBTest, DISABLED_SanitizeVeryVeryLargeValue) { - const size_t kValueSize = 4 * size_t{1024 * 1024 * 1024}; // 4GB value - std::string raw(kValueSize, 'v'); - Options options = CurrentOptions(); - options.env = env_; - options.merge_operator = MergeOperators::CreatePutOperator(); - options.write_buffer_size = 100000; // Small write buffer - options.paranoid_checks = true; - DestroyAndReopen(options); - - ASSERT_OK(Put("boo", "v1")); - ASSERT_TRUE(Put("foo", raw).IsInvalidArgument()); - ASSERT_TRUE(Merge("foo", raw).IsInvalidArgument()); - - WriteBatch wb; - ASSERT_TRUE(wb.Put("foo", raw).IsInvalidArgument()); - ASSERT_TRUE(wb.Merge("foo", raw).IsInvalidArgument()); - - Slice value_slice = raw; - Slice key_slice = "foo"; - SliceParts sp_key(&key_slice, 1); - SliceParts sp_value(&value_slice, 1); - - ASSERT_TRUE(wb.Put(sp_key, sp_value).IsInvalidArgument()); - ASSERT_TRUE(wb.Merge(sp_key, sp_value).IsInvalidArgument()); -} - -// Disable because not all platform can run it. -// It requires more than 9GB memory to run it, With single allocation -// of more than 3GB. -TEST_F(DBTest, DISABLED_VeryLargeValue) { - const size_t kValueSize = 3221225472u; // 3GB value - const size_t kKeySize = 8388608u; // 8MB key - std::string raw(kValueSize, 'v'); - std::string key1(kKeySize, 'c'); - std::string key2(kKeySize, 'd'); - - Options options = CurrentOptions(); - options.env = env_; - options.write_buffer_size = 100000; // Small write buffer - options.paranoid_checks = true; - DestroyAndReopen(options); - - ASSERT_OK(Put("boo", "v1")); - ASSERT_OK(Put("foo", "v1")); - ASSERT_OK(Put(key1, raw)); - raw[0] = 'w'; - ASSERT_OK(Put(key2, raw)); - dbfull()->TEST_WaitForFlushMemTable(); - - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - std::string value; - Status s = db_->Get(ReadOptions(), key1, &value); - ASSERT_OK(s); - ASSERT_EQ(kValueSize, value.size()); - ASSERT_EQ('v', value[0]); - - s = db_->Get(ReadOptions(), key2, &value); - ASSERT_OK(s); - ASSERT_EQ(kValueSize, value.size()); - ASSERT_EQ('w', value[0]); - - // Compact all files. - Flush(); - db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); - - // Check DB is not in read-only state. - ASSERT_OK(Put("boo", "v1")); - - s = db_->Get(ReadOptions(), key1, &value); - ASSERT_OK(s); - ASSERT_EQ(kValueSize, value.size()); - ASSERT_EQ('v', value[0]); - - s = db_->Get(ReadOptions(), key2, &value); - ASSERT_OK(s); - ASSERT_EQ(kValueSize, value.size()); - ASSERT_EQ('w', value[0]); -} - -TEST_F(DBTest, GetFromImmutableLayer) { - do { - Options options = CurrentOptions(); - options.env = env_; - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_EQ("v1", Get(1, "foo")); - - // Block sync calls - env_->delay_sstable_sync_.store(true, std::memory_order_release); - ASSERT_OK(Put(1, "k1", std::string(100000, 'x'))); // Fill memtable - ASSERT_OK(Put(1, "k2", std::string(100000, 'y'))); // Trigger flush - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_EQ("NOT_FOUND", Get(0, "foo")); - // Release sync calls - env_->delay_sstable_sync_.store(false, std::memory_order_release); - } while (ChangeOptions()); -} - -TEST_F(DBTest, GetLevel0Ordering) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - // Check that we process level-0 files in correct order. The code - // below generates two level-0 files where the earlier one comes - // before the later one in the level-0 file list since the earlier - // one has a smaller "smallest" key. - ASSERT_OK(Put(1, "bar", "b")); - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "foo", "v2")); - ASSERT_OK(Flush(1)); - ASSERT_EQ("v2", Get(1, "foo")); - } while (ChangeOptions()); -} - -TEST_F(DBTest, WrongLevel0Config) { - Options options = CurrentOptions(); - Close(); - ASSERT_OK(DestroyDB(dbname_, options)); - options.level0_stop_writes_trigger = 1; - options.level0_slowdown_writes_trigger = 2; - options.level0_file_num_compaction_trigger = 3; - ASSERT_OK(DB::Open(options, dbname_, &db_)); -} - -TEST_F(DBTest, GetOrderedByLevels) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "foo", "v1")); - Compact(1, "a", "z"); - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_OK(Put(1, "foo", "v2")); - ASSERT_EQ("v2", Get(1, "foo")); - ASSERT_OK(Flush(1)); - ASSERT_EQ("v2", Get(1, "foo")); - } while (ChangeOptions()); -} - -TEST_F(DBTest, GetPicksCorrectFile) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - // Arrange to have multiple files in a non-level-0 level. - ASSERT_OK(Put(1, "a", "va")); - Compact(1, "a", "b"); - ASSERT_OK(Put(1, "x", "vx")); - Compact(1, "x", "y"); - ASSERT_OK(Put(1, "f", "vf")); - Compact(1, "f", "g"); - ASSERT_EQ("va", Get(1, "a")); - ASSERT_EQ("vf", Get(1, "f")); - ASSERT_EQ("vx", Get(1, "x")); - } while (ChangeOptions()); -} - -TEST_F(DBTest, GetEncountersEmptyLevel) { - do { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu"}, options); - // Arrange for the following to happen: - // * sstable A in level 0 - // * nothing in level 1 - // * sstable B in level 2 - // Then do enough Get() calls to arrange for an automatic compaction - // of sstable A. A bug would cause the compaction to be marked as - // occurring at level 1 (instead of the correct level 0). - - // Step 1: First place sstables in levels 0 and 2 - ASSERT_OK(Put(1, "a", "begin")); - ASSERT_OK(Put(1, "z", "end")); - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1])); - ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1])); - ASSERT_OK(Put(1, "a", "begin")); - ASSERT_OK(Put(1, "z", "end")); - ASSERT_OK(Flush(1)); - ASSERT_GT(NumTableFilesAtLevel(0, 1), 0); - ASSERT_GT(NumTableFilesAtLevel(2, 1), 0); - - // Step 2: clear level 1 if necessary. - ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1])); - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1); - ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0); - ASSERT_EQ(NumTableFilesAtLevel(2, 1), 1); - - // Step 3: read a bunch of times - for (int i = 0; i < 1000; i++) { - ASSERT_EQ("NOT_FOUND", Get(1, "missing")); - } - - // Step 4: Wait for compaction to finish - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1); // XXX - } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction)); -} - -TEST_F(DBTest, FlushMultipleMemtable) { - do { - Options options = CurrentOptions(); - WriteOptions writeOpt = WriteOptions(); - writeOpt.disableWAL = true; - options.max_write_buffer_number = 4; - options.min_write_buffer_number_to_merge = 3; - options.max_write_buffer_size_to_maintain = -1; - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1")); - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1")); - - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_EQ("v1", Get(1, "bar")); - ASSERT_OK(Flush(1)); - } while (ChangeCompactOptions()); -} -TEST_F(DBTest, FlushSchedule) { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.level0_stop_writes_trigger = 1 << 10; - options.level0_slowdown_writes_trigger = 1 << 10; - options.min_write_buffer_number_to_merge = 1; - options.max_write_buffer_size_to_maintain = - static_cast(options.write_buffer_size); - options.max_write_buffer_number = 2; - options.write_buffer_size = 120 * 1024; - auto flush_listener = std::make_shared(); - flush_listener->expected_flush_reason = FlushReason::kWriteBufferFull; - options.listeners.push_back(flush_listener); - CreateAndReopenWithCF({"pikachu"}, options); - std::vector threads; - - std::atomic thread_num(0); - // each column family will have 5 thread, each thread generating 2 memtables. - // each column family should end up with 10 table files - std::function fill_memtable_func = [&]() { - int a = thread_num.fetch_add(1); - Random rnd(a); - WriteOptions wo; - // this should fill up 2 memtables - for (int k = 0; k < 5000; ++k) { - ASSERT_OK(db_->Put(wo, handles_[a & 1], rnd.RandomString(13), "")); - } - }; - - for (int i = 0; i < 10; ++i) { - threads.emplace_back(fill_memtable_func); - } - - for (auto& t : threads) { - t.join(); - } - - auto default_tables = GetNumberOfSstFilesForColumnFamily(db_, "default"); - auto pikachu_tables = GetNumberOfSstFilesForColumnFamily(db_, "pikachu"); - ASSERT_LE(default_tables, static_cast(10)); - ASSERT_GT(default_tables, static_cast(0)); - ASSERT_LE(pikachu_tables, static_cast(10)); - ASSERT_GT(pikachu_tables, static_cast(0)); -} - -namespace { -class KeepFilter : public CompactionFilter { - public: - bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, - std::string* /*new_value*/, - bool* /*value_changed*/) const override { - return false; - } - - const char* Name() const override { return "KeepFilter"; } -}; - -class KeepFilterFactory : public CompactionFilterFactory { - public: - explicit KeepFilterFactory(bool check_context = false) - : check_context_(check_context) {} - - std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& context) override { - if (check_context_) { - EXPECT_EQ(expect_full_compaction_.load(), context.is_full_compaction); - EXPECT_EQ(expect_manual_compaction_.load(), context.is_manual_compaction); - } - return std::unique_ptr(new KeepFilter()); - } - - const char* Name() const override { return "KeepFilterFactory"; } - bool check_context_; - std::atomic_bool expect_full_compaction_; - std::atomic_bool expect_manual_compaction_; -}; - -class DelayFilter : public CompactionFilter { - public: - explicit DelayFilter(DBTestBase* d) : db_test(d) {} - bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, - std::string* /*new_value*/, - bool* /*value_changed*/) const override { - db_test->env_->MockSleepForMicroseconds(1000); - return true; - } - - const char* Name() const override { return "DelayFilter"; } - - private: - DBTestBase* db_test; -}; - -class DelayFilterFactory : public CompactionFilterFactory { - public: - explicit DelayFilterFactory(DBTestBase* d) : db_test(d) {} - std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& /*context*/) override { - return std::unique_ptr(new DelayFilter(db_test)); - } - - const char* Name() const override { return "DelayFilterFactory"; } - - private: - DBTestBase* db_test; -}; -} // anonymous namespace - - -static std::string CompressibleString(Random* rnd, int len) { - std::string r; - test::CompressibleString(rnd, 0.8, len, &r); - return r; -} - -TEST_F(DBTest, FailMoreDbPaths) { - Options options = CurrentOptions(); - options.db_paths.emplace_back(dbname_, 10000000); - options.db_paths.emplace_back(dbname_ + "_2", 1000000); - options.db_paths.emplace_back(dbname_ + "_3", 1000000); - options.db_paths.emplace_back(dbname_ + "_4", 1000000); - options.db_paths.emplace_back(dbname_ + "_5", 1000000); - ASSERT_TRUE(TryReopen(options).IsNotSupported()); -} - -void CheckColumnFamilyMeta( - const ColumnFamilyMetaData& cf_meta, const std::string& cf_name, - const std::vector>& files_by_level, - uint64_t start_time, uint64_t end_time) { - ASSERT_EQ(cf_meta.name, cf_name); - ASSERT_EQ(cf_meta.levels.size(), files_by_level.size()); - - uint64_t cf_size = 0; - size_t file_count = 0; - - for (size_t i = 0; i < cf_meta.levels.size(); ++i) { - const auto& level_meta_from_cf = cf_meta.levels[i]; - const auto& level_meta_from_files = files_by_level[i]; - - ASSERT_EQ(level_meta_from_cf.level, i); - ASSERT_EQ(level_meta_from_cf.files.size(), level_meta_from_files.size()); - - file_count += level_meta_from_cf.files.size(); - - uint64_t level_size = 0; - for (size_t j = 0; j < level_meta_from_cf.files.size(); ++j) { - const auto& file_meta_from_cf = level_meta_from_cf.files[j]; - const auto& file_meta_from_files = level_meta_from_files[j]; - - level_size += file_meta_from_cf.size; - - ASSERT_EQ(file_meta_from_cf.file_number, - file_meta_from_files.fd.GetNumber()); - ASSERT_EQ(file_meta_from_cf.file_number, - TableFileNameToNumber(file_meta_from_cf.name)); - ASSERT_EQ(file_meta_from_cf.size, file_meta_from_files.fd.file_size); - ASSERT_EQ(file_meta_from_cf.smallest_seqno, - file_meta_from_files.fd.smallest_seqno); - ASSERT_EQ(file_meta_from_cf.largest_seqno, - file_meta_from_files.fd.largest_seqno); - ASSERT_EQ(file_meta_from_cf.smallestkey, - file_meta_from_files.smallest.user_key().ToString()); - ASSERT_EQ(file_meta_from_cf.largestkey, - file_meta_from_files.largest.user_key().ToString()); - ASSERT_EQ(file_meta_from_cf.oldest_blob_file_number, - file_meta_from_files.oldest_blob_file_number); - ASSERT_EQ(file_meta_from_cf.oldest_ancester_time, - file_meta_from_files.oldest_ancester_time); - ASSERT_EQ(file_meta_from_cf.file_creation_time, - file_meta_from_files.file_creation_time); - ASSERT_GE(file_meta_from_cf.file_creation_time, start_time); - ASSERT_LE(file_meta_from_cf.file_creation_time, end_time); - ASSERT_EQ(file_meta_from_cf.epoch_number, - file_meta_from_files.epoch_number); - ASSERT_GE(file_meta_from_cf.oldest_ancester_time, start_time); - ASSERT_LE(file_meta_from_cf.oldest_ancester_time, end_time); - // More from FileStorageInfo - ASSERT_EQ(file_meta_from_cf.file_type, kTableFile); - ASSERT_EQ(file_meta_from_cf.name, - "/" + file_meta_from_cf.relative_filename); - ASSERT_EQ(file_meta_from_cf.directory, file_meta_from_cf.db_path); - } - - ASSERT_EQ(level_meta_from_cf.size, level_size); - cf_size += level_size; - } - - ASSERT_EQ(cf_meta.file_count, file_count); - ASSERT_EQ(cf_meta.size, cf_size); -} - -void CheckLiveFilesMeta( - const std::vector& live_file_meta, - const std::vector>& files_by_level) { - size_t total_file_count = 0; - for (const auto& f : files_by_level) { - total_file_count += f.size(); - } - - ASSERT_EQ(live_file_meta.size(), total_file_count); - - int level = 0; - int i = 0; - - for (const auto& meta : live_file_meta) { - if (level != meta.level) { - level = meta.level; - i = 0; - } - - ASSERT_LT(i, files_by_level[level].size()); - - const auto& expected_meta = files_by_level[level][i]; - - ASSERT_EQ(meta.column_family_name, kDefaultColumnFamilyName); - ASSERT_EQ(meta.file_number, expected_meta.fd.GetNumber()); - ASSERT_EQ(meta.file_number, TableFileNameToNumber(meta.name)); - ASSERT_EQ(meta.size, expected_meta.fd.file_size); - ASSERT_EQ(meta.smallest_seqno, expected_meta.fd.smallest_seqno); - ASSERT_EQ(meta.largest_seqno, expected_meta.fd.largest_seqno); - ASSERT_EQ(meta.smallestkey, expected_meta.smallest.user_key().ToString()); - ASSERT_EQ(meta.largestkey, expected_meta.largest.user_key().ToString()); - ASSERT_EQ(meta.oldest_blob_file_number, - expected_meta.oldest_blob_file_number); - ASSERT_EQ(meta.epoch_number, expected_meta.epoch_number); - - // More from FileStorageInfo - ASSERT_EQ(meta.file_type, kTableFile); - ASSERT_EQ(meta.name, "/" + meta.relative_filename); - ASSERT_EQ(meta.directory, meta.db_path); - - ++i; - } -} - -void AddBlobFile(const ColumnFamilyHandle* cfh, uint64_t blob_file_number, - uint64_t total_blob_count, uint64_t total_blob_bytes, - const std::string& checksum_method, - const std::string& checksum_value, - uint64_t garbage_blob_count = 0, - uint64_t garbage_blob_bytes = 0) { - ColumnFamilyData* cfd = - (static_cast(cfh))->cfd(); - assert(cfd); - - Version* const version = cfd->current(); - assert(version); - - VersionStorageInfo* const storage_info = version->storage_info(); - assert(storage_info); - - // Add a live blob file. - - auto shared_meta = SharedBlobFileMetaData::Create( - blob_file_number, total_blob_count, total_blob_bytes, checksum_method, - checksum_value); - - auto meta = BlobFileMetaData::Create(std::move(shared_meta), - BlobFileMetaData::LinkedSsts(), - garbage_blob_count, garbage_blob_bytes); - - storage_info->AddBlobFile(std::move(meta)); -} - -static void CheckBlobMetaData( - const BlobMetaData& bmd, uint64_t blob_file_number, - uint64_t total_blob_count, uint64_t total_blob_bytes, - const std::string& checksum_method, const std::string& checksum_value, - uint64_t garbage_blob_count = 0, uint64_t garbage_blob_bytes = 0) { - ASSERT_EQ(bmd.blob_file_number, blob_file_number); - ASSERT_EQ(bmd.blob_file_name, BlobFileName("", blob_file_number)); - ASSERT_EQ(bmd.blob_file_size, - total_blob_bytes + BlobLogHeader::kSize + BlobLogFooter::kSize); - - ASSERT_EQ(bmd.total_blob_count, total_blob_count); - ASSERT_EQ(bmd.total_blob_bytes, total_blob_bytes); - ASSERT_EQ(bmd.garbage_blob_count, garbage_blob_count); - ASSERT_EQ(bmd.garbage_blob_bytes, garbage_blob_bytes); - ASSERT_EQ(bmd.checksum_method, checksum_method); - ASSERT_EQ(bmd.checksum_value, checksum_value); -} - -TEST_F(DBTest, MetaDataTest) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.disable_auto_compactions = true; - - int64_t temp_time = 0; - options.env->GetCurrentTime(&temp_time); - uint64_t start_time = static_cast(temp_time); - - DestroyAndReopen(options); - - Random rnd(301); - int key_index = 0; - for (int i = 0; i < 100; ++i) { - // Add a single blob reference to each file - std::string blob_index; - BlobIndex::EncodeBlob(&blob_index, /* blob_file_number */ i + 1000, - /* offset */ 1234, /* size */ 5678, kNoCompression); - - WriteBatch batch; - ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, Key(key_index), - blob_index)); - ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); - - ++key_index; - - // Fill up the rest of the file with random values. - GenerateNewFile(&rnd, &key_index, /* nowait */ true); - - ASSERT_OK(Flush()); - } - - std::vector> files_by_level; - dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files_by_level); - - options.env->GetCurrentTime(&temp_time); - uint64_t end_time = static_cast(temp_time); - - ColumnFamilyMetaData cf_meta; - db_->GetColumnFamilyMetaData(&cf_meta); - CheckColumnFamilyMeta(cf_meta, kDefaultColumnFamilyName, files_by_level, - start_time, end_time); - std::vector live_file_meta; - db_->GetLiveFilesMetaData(&live_file_meta); - CheckLiveFilesMeta(live_file_meta, files_by_level); -} - -TEST_F(DBTest, AllMetaDataTest) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.disable_auto_compactions = true; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - constexpr uint64_t blob_file_number = 234; - constexpr uint64_t total_blob_count = 555; - constexpr uint64_t total_blob_bytes = 66666; - constexpr char checksum_method[] = "CRC32"; - constexpr char checksum_value[] = "\x3d\x87\xff\x57"; - - int64_t temp_time = 0; - options.env->GetCurrentTime(&temp_time).PermitUncheckedError(); - uint64_t start_time = static_cast(temp_time); - - Random rnd(301); - dbfull()->TEST_LockMutex(); - for (int cf = 0; cf < 2; cf++) { - AddBlobFile(handles_[cf], blob_file_number * (cf + 1), - total_blob_count * (cf + 1), total_blob_bytes * (cf + 1), - checksum_method, checksum_value); - } - dbfull()->TEST_UnlockMutex(); - - std::vector all_meta; - db_->GetAllColumnFamilyMetaData(&all_meta); - - std::vector> default_files_by_level; - std::vector> pikachu_files_by_level; - dbfull()->TEST_GetFilesMetaData(handles_[0], &default_files_by_level); - dbfull()->TEST_GetFilesMetaData(handles_[1], &pikachu_files_by_level); - - options.env->GetCurrentTime(&temp_time).PermitUncheckedError(); - uint64_t end_time = static_cast(temp_time); - - ASSERT_EQ(all_meta.size(), 2); - for (int cf = 0; cf < 2; cf++) { - const auto& cfmd = all_meta[cf]; - if (cf == 0) { - CheckColumnFamilyMeta(cfmd, "default", default_files_by_level, start_time, - end_time); - } else { - CheckColumnFamilyMeta(cfmd, "pikachu", pikachu_files_by_level, start_time, - end_time); - } - ASSERT_EQ(cfmd.blob_files.size(), 1U); - const auto& bmd = cfmd.blob_files[0]; - ASSERT_EQ(cfmd.blob_file_count, 1U); - ASSERT_EQ(cfmd.blob_file_size, bmd.blob_file_size); - ASSERT_EQ(NormalizePath(bmd.blob_file_path), NormalizePath(dbname_)); - CheckBlobMetaData(bmd, blob_file_number * (cf + 1), - total_blob_count * (cf + 1), total_blob_bytes * (cf + 1), - checksum_method, checksum_value); - } -} - -namespace { -void MinLevelHelper(DBTest* self, Options& options) { - Random rnd(301); - - for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; - num++) { - std::vector values; - // Write 120KB (12 values, each 10K) - for (int i = 0; i < 12; i++) { - values.push_back(rnd.RandomString(10000)); - ASSERT_OK(self->Put(DBTestBase::Key(i), values[i])); - } - ASSERT_OK(self->dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ(self->NumTableFilesAtLevel(0), num + 1); - } - - // generate one more file in level-0, and should trigger level-0 compaction - std::vector values; - for (int i = 0; i < 12; i++) { - values.push_back(rnd.RandomString(10000)); - ASSERT_OK(self->Put(DBTestBase::Key(i), values[i])); - } - ASSERT_OK(self->dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ(self->NumTableFilesAtLevel(0), 0); - ASSERT_EQ(self->NumTableFilesAtLevel(1), 1); -} - -// returns false if the calling-Test should be skipped -bool MinLevelToCompress(CompressionType& type, Options& options, int wbits, - int lev, int strategy) { - fprintf(stderr, - "Test with compression options : window_bits = %d, level = %d, " - "strategy = %d}\n", - wbits, lev, strategy); - options.write_buffer_size = 100 << 10; // 100KB - options.arena_block_size = 4096; - options.num_levels = 3; - options.level0_file_num_compaction_trigger = 3; - options.create_if_missing = true; - - if (Snappy_Supported()) { - type = kSnappyCompression; - fprintf(stderr, "using snappy\n"); - } else if (Zlib_Supported()) { - type = kZlibCompression; - fprintf(stderr, "using zlib\n"); - } else if (BZip2_Supported()) { - type = kBZip2Compression; - fprintf(stderr, "using bzip2\n"); - } else if (LZ4_Supported()) { - type = kLZ4Compression; - fprintf(stderr, "using lz4\n"); - } else if (XPRESS_Supported()) { - type = kXpressCompression; - fprintf(stderr, "using xpress\n"); - } else if (ZSTD_Supported()) { - type = kZSTD; - fprintf(stderr, "using ZSTD\n"); - } else { - fprintf(stderr, "skipping test, compression disabled\n"); - return false; - } - options.compression_per_level.resize(options.num_levels); - - // do not compress L0 - for (int i = 0; i < 1; i++) { - options.compression_per_level[i] = kNoCompression; - } - for (int i = 1; i < options.num_levels; i++) { - options.compression_per_level[i] = type; - } - return true; -} -} // anonymous namespace - -TEST_F(DBTest, MinLevelToCompress1) { - Options options = CurrentOptions(); - CompressionType type = kSnappyCompression; - if (!MinLevelToCompress(type, options, -14, -1, 0)) { - return; - } - Reopen(options); - MinLevelHelper(this, options); - - // do not compress L0 and L1 - for (int i = 0; i < 2; i++) { - options.compression_per_level[i] = kNoCompression; - } - for (int i = 2; i < options.num_levels; i++) { - options.compression_per_level[i] = type; - } - DestroyAndReopen(options); - MinLevelHelper(this, options); -} - -TEST_F(DBTest, MinLevelToCompress2) { - Options options = CurrentOptions(); - CompressionType type = kSnappyCompression; - if (!MinLevelToCompress(type, options, 15, -1, 0)) { - return; - } - Reopen(options); - MinLevelHelper(this, options); - - // do not compress L0 and L1 - for (int i = 0; i < 2; i++) { - options.compression_per_level[i] = kNoCompression; - } - for (int i = 2; i < options.num_levels; i++) { - options.compression_per_level[i] = type; - } - DestroyAndReopen(options); - MinLevelHelper(this, options); -} - -// This test may fail because of a legit case that multiple L0 files -// are trivial moved to L1. -TEST_F(DBTest, DISABLED_RepeatedWritesToSameKey) { - do { - Options options = CurrentOptions(); - options.env = env_; - options.write_buffer_size = 100000; // Small write buffer - CreateAndReopenWithCF({"pikachu"}, options); - - // We must have at most one file per level except for level-0, - // which may have up to kL0_StopWritesTrigger files. - const int kMaxFiles = - options.num_levels + options.level0_stop_writes_trigger; - - Random rnd(301); - std::string value = - rnd.RandomString(static_cast(2 * options.write_buffer_size)); - for (int i = 0; i < 5 * kMaxFiles; i++) { - ASSERT_OK(Put(1, "key", value)); - ASSERT_LE(TotalTableFiles(1), kMaxFiles); - } - } while (ChangeCompactOptions()); -} - -static bool Between(uint64_t val, uint64_t low, uint64_t high) { - bool result = (val >= low) && (val <= high); - if (!result) { - fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", - (unsigned long long)(val), (unsigned long long)(low), - (unsigned long long)(high)); - } - return result; -} - -TEST_F(DBTest, ApproximateSizesMemTable) { - Options options = CurrentOptions(); - options.write_buffer_size = 100000000; // Large write buffer - options.compression = kNoCompression; - options.create_if_missing = true; - DestroyAndReopen(options); - auto default_cf = db_->DefaultColumnFamily(); - - const int N = 128; - Random rnd(301); - for (int i = 0; i < N; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(1024))); - } - - uint64_t size; - std::string start = Key(50); - std::string end = Key(60); - Range r(start, end); - SizeApproximationOptions size_approx_options; - size_approx_options.include_memtables = true; - size_approx_options.include_files = true; - ASSERT_OK( - db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size)); - ASSERT_GT(size, 6000); - ASSERT_LT(size, 204800); - // Zero if not including mem table - ASSERT_OK(db_->GetApproximateSizes(&r, 1, &size)); - ASSERT_EQ(size, 0); - - start = Key(500); - end = Key(600); - r = Range(start, end); - ASSERT_OK( - db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size)); - ASSERT_EQ(size, 0); - - for (int i = 0; i < N; i++) { - ASSERT_OK(Put(Key(1000 + i), rnd.RandomString(1024))); - } - - start = Key(500); - end = Key(600); - r = Range(start, end); - ASSERT_OK( - db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size)); - ASSERT_EQ(size, 0); - - start = Key(100); - end = Key(1020); - r = Range(start, end); - ASSERT_OK( - db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size)); - ASSERT_GT(size, 6000); - - options.max_write_buffer_number = 8; - options.min_write_buffer_number_to_merge = 5; - options.write_buffer_size = 1024 * N; // Not very large - DestroyAndReopen(options); - default_cf = db_->DefaultColumnFamily(); - - int keys[N * 3]; - for (int i = 0; i < N; i++) { - keys[i * 3] = i * 5; - keys[i * 3 + 1] = i * 5 + 1; - keys[i * 3 + 2] = i * 5 + 2; - } - // MemTable entry counting is estimated and can vary greatly depending on - // layout. Thus, using deterministic seed for test stability. - RandomShuffle(std::begin(keys), std::end(keys), rnd.Next()); - - for (int i = 0; i < N * 3; i++) { - ASSERT_OK(Put(Key(keys[i] + 1000), rnd.RandomString(1024))); - } - - start = Key(100); - end = Key(300); - r = Range(start, end); - ASSERT_OK( - db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size)); - ASSERT_EQ(size, 0); - - start = Key(1050); - end = Key(1080); - r = Range(start, end); - ASSERT_OK( - db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size)); - ASSERT_GT(size, 6000); - - start = Key(2100); - end = Key(2300); - r = Range(start, end); - ASSERT_OK( - db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size)); - ASSERT_EQ(size, 0); - - start = Key(1050); - end = Key(1080); - r = Range(start, end); - uint64_t size_with_mt, size_without_mt; - ASSERT_OK(db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, - &size_with_mt)); - ASSERT_GT(size_with_mt, 6000); - ASSERT_OK(db_->GetApproximateSizes(&r, 1, &size_without_mt)); - ASSERT_EQ(size_without_mt, 0); - - ASSERT_OK(Flush()); - - for (int i = 0; i < N; i++) { - ASSERT_OK(Put(Key(i + 1000), rnd.RandomString(1024))); - } - - start = Key(1050); - end = Key(1080); - r = Range(start, end); - ASSERT_OK(db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, - &size_with_mt)); - ASSERT_OK(db_->GetApproximateSizes(&r, 1, &size_without_mt)); - ASSERT_GT(size_with_mt, size_without_mt); - ASSERT_GT(size_without_mt, 6000); - - // Check that include_memtables flag works as expected - size_approx_options.include_memtables = false; - ASSERT_OK( - db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size)); - ASSERT_EQ(size, size_without_mt); - - // Check that files_size_error_margin works as expected, when the heuristic - // conditions are not met - start = Key(1); - end = Key(1000 + N - 2); - r = Range(start, end); - size_approx_options.files_size_error_margin = -1.0; // disabled - ASSERT_OK( - db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size)); - uint64_t size2; - size_approx_options.files_size_error_margin = 0.5; // enabled, but not used - ASSERT_OK( - db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size2)); - ASSERT_EQ(size, size2); -} - -TEST_F(DBTest, ApproximateSizesFilesWithErrorMargin) { - // Roughly 4 keys per data block, 1000 keys per file, - // with filter substantially larger than a data block - BlockBasedTableOptions table_options; - table_options.filter_policy.reset(NewBloomFilterPolicy(16)); - table_options.block_size = 100; - Options options = CurrentOptions(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.write_buffer_size = 24 * 1024; - options.compression = kNoCompression; - options.create_if_missing = true; - options.target_file_size_base = 24 * 1024; - DestroyAndReopen(options); - const auto default_cf = db_->DefaultColumnFamily(); - - const int N = 64000; - Random rnd(301); - for (int i = 0; i < N; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(24))); - } - // Flush everything to files - ASSERT_OK(Flush()); - // Compact the entire key space into the next level - ASSERT_OK( - db_->CompactRange(CompactRangeOptions(), default_cf, nullptr, nullptr)); - - // Write more keys - for (int i = N; i < (N + N / 4); i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(24))); - } - // Flush everything to files again - ASSERT_OK(Flush()); - - // Wait for compaction to finish - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - { - const std::string start = Key(0); - const std::string end = Key(2 * N); - const Range r(start, end); - - SizeApproximationOptions size_approx_options; - size_approx_options.include_memtables = false; - size_approx_options.include_files = true; - size_approx_options.files_size_error_margin = -1.0; // disabled - - // Get the precise size without any approximation heuristic - uint64_t size; - ASSERT_OK(db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, - &size)); - ASSERT_NE(size, 0); - - // Get the size with an approximation heuristic - uint64_t size2; - const double error_margin = 0.2; - size_approx_options.files_size_error_margin = error_margin; - ASSERT_OK(db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, - &size2)); - ASSERT_LT(size2, size * (1 + error_margin)); - ASSERT_GT(size2, size * (1 - error_margin)); - } - - { - // Ensure that metadata is not falsely attributed only to the last data in - // the file. (In some applications, filters can be large portion of data - // size.) - // Perform many queries over small range, enough to ensure crossing file - // boundary, and make sure we never see a spike for large filter. - for (int i = 0; i < 3000; i += 10) { - const std::string start = Key(i); - const std::string end = Key(i + 11); // overlap by 1 key - const Range r(start, end); - uint64_t size; - ASSERT_OK(db_->GetApproximateSizes(&r, 1, &size)); - ASSERT_LE(size, 11 * 100); - } - } -} - -TEST_F(DBTest, GetApproximateMemTableStats) { - Options options = CurrentOptions(); - options.write_buffer_size = 100000000; - options.compression = kNoCompression; - options.create_if_missing = true; - DestroyAndReopen(options); - - const int N = 128; - Random rnd(301); - for (int i = 0; i < N; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(1024))); - } - - uint64_t count; - uint64_t size; - - std::string start = Key(50); - std::string end = Key(60); - Range r(start, end); - db_->GetApproximateMemTableStats(r, &count, &size); - ASSERT_GT(count, 0); - ASSERT_LE(count, N); - ASSERT_GT(size, 6000); - ASSERT_LT(size, 204800); - - start = Key(500); - end = Key(600); - r = Range(start, end); - db_->GetApproximateMemTableStats(r, &count, &size); - ASSERT_EQ(count, 0); - ASSERT_EQ(size, 0); - - ASSERT_OK(Flush()); - - start = Key(50); - end = Key(60); - r = Range(start, end); - db_->GetApproximateMemTableStats(r, &count, &size); - ASSERT_EQ(count, 0); - ASSERT_EQ(size, 0); - - for (int i = 0; i < N; i++) { - ASSERT_OK(Put(Key(1000 + i), rnd.RandomString(1024))); - } - - start = Key(100); - end = Key(1020); - r = Range(start, end); - db_->GetApproximateMemTableStats(r, &count, &size); - ASSERT_GT(count, 20); - ASSERT_GT(size, 6000); -} - -TEST_F(DBTest, ApproximateSizes) { - do { - Options options = CurrentOptions(); - options.write_buffer_size = 100000000; // Large write buffer - options.compression = kNoCompression; - options.create_if_missing = true; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - uint64_t size; - ASSERT_OK(Size("", "xyz", 1, &size)); - ASSERT_TRUE(Between(size, 0, 0)); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_OK(Size("", "xyz", 1, &size)); - ASSERT_TRUE(Between(size, 0, 0)); - - // Write 8MB (80 values, each 100K) - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); - const int N = 80; - static const int S1 = 100000; - static const int S2 = 105000; // Allow some expansion from metadata - Random rnd(301); - for (int i = 0; i < N; i++) { - ASSERT_OK(Put(1, Key(i), rnd.RandomString(S1))); - } - - // 0 because GetApproximateSizes() does not account for memtable space - ASSERT_OK(Size("", Key(50), 1, &size)); - ASSERT_TRUE(Between(size, 0, 0)); - - // Check sizes across recovery by reopening a few times - for (int run = 0; run < 3; run++) { - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - for (int compact_start = 0; compact_start < N; compact_start += 10) { - for (int i = 0; i < N; i += 10) { - ASSERT_OK(Size("", Key(i), 1, &size)); - ASSERT_TRUE(Between(size, S1 * i, S2 * i)); - ASSERT_OK(Size("", Key(i) + ".suffix", 1, &size)); - ASSERT_TRUE(Between(size, S1 * (i + 1), S2 * (i + 1))); - ASSERT_OK(Size(Key(i), Key(i + 10), 1, &size)); - ASSERT_TRUE(Between(size, S1 * 10, S2 * 10)); - } - ASSERT_OK(Size("", Key(50), 1, &size)); - ASSERT_TRUE(Between(size, S1 * 50, S2 * 50)); - ASSERT_OK(Size("", Key(50) + ".suffix", 1, &size)); - ASSERT_TRUE(Between(size, S1 * 50, S2 * 50)); - - std::string cstart_str = Key(compact_start); - std::string cend_str = Key(compact_start + 9); - Slice cstart = cstart_str; - Slice cend = cend_str; - ASSERT_OK(dbfull()->TEST_CompactRange(0, &cstart, &cend, handles_[1])); - } - - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); - ASSERT_GT(NumTableFilesAtLevel(1, 1), 0); - } - // ApproximateOffsetOf() is not yet implemented in plain table format. - } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction | - kSkipPlainTable | kSkipHashIndex)); -} - -TEST_F(DBTest, ApproximateSizes_MixOfSmallAndLarge) { - do { - Options options = CurrentOptions(); - options.compression = kNoCompression; - CreateAndReopenWithCF({"pikachu"}, options); - - Random rnd(301); - std::string big1 = rnd.RandomString(100000); - ASSERT_OK(Put(1, Key(0), rnd.RandomString(10000))); - ASSERT_OK(Put(1, Key(1), rnd.RandomString(10000))); - ASSERT_OK(Put(1, Key(2), big1)); - ASSERT_OK(Put(1, Key(3), rnd.RandomString(10000))); - ASSERT_OK(Put(1, Key(4), big1)); - ASSERT_OK(Put(1, Key(5), rnd.RandomString(10000))); - ASSERT_OK(Put(1, Key(6), rnd.RandomString(300000))); - ASSERT_OK(Put(1, Key(7), rnd.RandomString(10000))); - - // Check sizes across recovery by reopening a few times - uint64_t size; - for (int run = 0; run < 3; run++) { - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - ASSERT_OK(Size("", Key(0), 1, &size)); - ASSERT_TRUE(Between(size, 0, 0)); - ASSERT_OK(Size("", Key(1), 1, &size)); - ASSERT_TRUE(Between(size, 10000, 11000)); - ASSERT_OK(Size("", Key(2), 1, &size)); - ASSERT_TRUE(Between(size, 20000, 21000)); - ASSERT_OK(Size("", Key(3), 1, &size)); - ASSERT_TRUE(Between(size, 120000, 121000)); - ASSERT_OK(Size("", Key(4), 1, &size)); - ASSERT_TRUE(Between(size, 130000, 131000)); - ASSERT_OK(Size("", Key(5), 1, &size)); - ASSERT_TRUE(Between(size, 230000, 232000)); - ASSERT_OK(Size("", Key(6), 1, &size)); - ASSERT_TRUE(Between(size, 240000, 242000)); - // Ensure some overhead is accounted for, even without including all - ASSERT_OK(Size("", Key(7), 1, &size)); - ASSERT_TRUE(Between(size, 540500, 545000)); - ASSERT_OK(Size("", Key(8), 1, &size)); - ASSERT_TRUE(Between(size, 550500, 555000)); - - ASSERT_OK(Size(Key(3), Key(5), 1, &size)); - ASSERT_TRUE(Between(size, 110100, 111000)); - - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1])); - } - // ApproximateOffsetOf() is not yet implemented in plain table format. - } while (ChangeOptions(kSkipPlainTable)); -} - -TEST_F(DBTest, Snapshot) { - env_->SetMockSleep(); - anon::OptionsOverride options_override; - options_override.skip_policy = kSkipNoSnapshot; - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override)); - ASSERT_OK(Put(0, "foo", "0v1")); - ASSERT_OK(Put(1, "foo", "1v1")); - - const Snapshot* s1 = db_->GetSnapshot(); - ASSERT_EQ(1U, GetNumSnapshots()); - uint64_t time_snap1 = GetTimeOldestSnapshots(); - ASSERT_GT(time_snap1, 0U); - ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); - ASSERT_EQ(GetTimeOldestSnapshots(), - static_cast(s1->GetUnixTime())); - ASSERT_OK(Put(0, "foo", "0v2")); - ASSERT_OK(Put(1, "foo", "1v2")); - - env_->MockSleepForSeconds(1); - - const Snapshot* s2 = db_->GetSnapshot(); - ASSERT_EQ(2U, GetNumSnapshots()); - ASSERT_EQ(time_snap1, GetTimeOldestSnapshots()); - ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); - ASSERT_EQ(GetTimeOldestSnapshots(), - static_cast(s1->GetUnixTime())); - ASSERT_OK(Put(0, "foo", "0v3")); - ASSERT_OK(Put(1, "foo", "1v3")); - - { - ManagedSnapshot s3(db_); - ASSERT_EQ(3U, GetNumSnapshots()); - ASSERT_EQ(time_snap1, GetTimeOldestSnapshots()); - ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); - ASSERT_EQ(GetTimeOldestSnapshots(), - static_cast(s1->GetUnixTime())); - - ASSERT_OK(Put(0, "foo", "0v4")); - ASSERT_OK(Put(1, "foo", "1v4")); - ASSERT_EQ("0v1", Get(0, "foo", s1)); - ASSERT_EQ("1v1", Get(1, "foo", s1)); - ASSERT_EQ("0v2", Get(0, "foo", s2)); - ASSERT_EQ("1v2", Get(1, "foo", s2)); - ASSERT_EQ("0v3", Get(0, "foo", s3.snapshot())); - ASSERT_EQ("1v3", Get(1, "foo", s3.snapshot())); - ASSERT_EQ("0v4", Get(0, "foo")); - ASSERT_EQ("1v4", Get(1, "foo")); - } - - ASSERT_EQ(2U, GetNumSnapshots()); - ASSERT_EQ(time_snap1, GetTimeOldestSnapshots()); - ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); - ASSERT_EQ(GetTimeOldestSnapshots(), - static_cast(s1->GetUnixTime())); - ASSERT_EQ("0v1", Get(0, "foo", s1)); - ASSERT_EQ("1v1", Get(1, "foo", s1)); - ASSERT_EQ("0v2", Get(0, "foo", s2)); - ASSERT_EQ("1v2", Get(1, "foo", s2)); - ASSERT_EQ("0v4", Get(0, "foo")); - ASSERT_EQ("1v4", Get(1, "foo")); - - db_->ReleaseSnapshot(s1); - ASSERT_EQ("0v2", Get(0, "foo", s2)); - ASSERT_EQ("1v2", Get(1, "foo", s2)); - ASSERT_EQ("0v4", Get(0, "foo")); - ASSERT_EQ("1v4", Get(1, "foo")); - ASSERT_EQ(1U, GetNumSnapshots()); - ASSERT_LT(time_snap1, GetTimeOldestSnapshots()); - ASSERT_EQ(GetSequenceOldestSnapshots(), s2->GetSequenceNumber()); - ASSERT_EQ(GetTimeOldestSnapshots(), - static_cast(s2->GetUnixTime())); - - db_->ReleaseSnapshot(s2); - ASSERT_EQ(0U, GetNumSnapshots()); - ASSERT_EQ(GetSequenceOldestSnapshots(), 0); - ASSERT_EQ("0v4", Get(0, "foo")); - ASSERT_EQ("1v4", Get(1, "foo")); - } while (ChangeOptions()); -} - -TEST_F(DBTest, HiddenValuesAreRemoved) { - anon::OptionsOverride options_override; - options_override.skip_policy = kSkipNoSnapshot; - uint64_t size; - do { - Options options = CurrentOptions(options_override); - CreateAndReopenWithCF({"pikachu"}, options); - Random rnd(301); - FillLevels("a", "z", 1); - - std::string big = rnd.RandomString(50000); - ASSERT_OK(Put(1, "foo", big)); - ASSERT_OK(Put(1, "pastfoo", "v")); - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(Put(1, "foo", "tiny")); - ASSERT_OK(Put(1, "pastfoo2", "v2")); // Advance sequence number one more - - ASSERT_OK(Flush(1)); - ASSERT_GT(NumTableFilesAtLevel(0, 1), 0); - - ASSERT_EQ(big, Get(1, "foo", snapshot)); - ASSERT_OK(Size("", "pastfoo", 1, &size)); - ASSERT_TRUE(Between(size, 50000, 60000)); - db_->ReleaseSnapshot(snapshot); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny, " + big + " ]"); - Slice x("x"); - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, &x, handles_[1])); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]"); - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); - ASSERT_GE(NumTableFilesAtLevel(1, 1), 1); - ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, &x, handles_[1])); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]"); - - ASSERT_OK(Size("", "pastfoo", 1, &size)); - ASSERT_TRUE(Between(size, 0, 1000)); - // ApproximateOffsetOf() is not yet implemented in plain table format, - // which is used by Size(). - } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction | - kSkipPlainTable)); -} - -TEST_F(DBTest, UnremovableSingleDelete) { - // If we compact: - // - // Put(A, v1) Snapshot SingleDelete(A) Put(A, v2) - // - // We do not want to end up with: - // - // Put(A, v1) Snapshot Put(A, v2) - // - // Because a subsequent SingleDelete(A) would delete the Put(A, v2) - // but not Put(A, v1), so Get(A) would return v1. - anon::OptionsOverride options_override; - options_override.skip_policy = kSkipNoSnapshot; - do { - Options options = CurrentOptions(options_override); - options.disable_auto_compactions = true; - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "foo", "first")); - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(SingleDelete(1, "foo")); - ASSERT_OK(Put(1, "foo", "second")); - ASSERT_OK(Flush(1)); - - ASSERT_EQ("first", Get(1, "foo", snapshot)); - ASSERT_EQ("second", Get(1, "foo")); - - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], - nullptr, nullptr)); - ASSERT_EQ("[ second, SDEL, first ]", AllEntriesFor("foo", 1)); - - ASSERT_OK(SingleDelete(1, "foo")); - - ASSERT_EQ("first", Get(1, "foo", snapshot)); - ASSERT_EQ("NOT_FOUND", Get(1, "foo")); - - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], - nullptr, nullptr)); - - ASSERT_EQ("first", Get(1, "foo", snapshot)); - ASSERT_EQ("NOT_FOUND", Get(1, "foo")); - db_->ReleaseSnapshot(snapshot); - // Skip FIFO and universal compaction because they do not apply to the test - // case. Skip MergePut because single delete does not get removed when it - // encounters a merge. - } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | - kSkipMergePut)); -} - -TEST_F(DBTest, DeletionMarkers1) { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_OK(Flush(1)); - const int last = 2; - MoveFilesToLevel(last, 1); - // foo => v1 is now in last level - ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1); - - // Place a table at level last-1 to prevent merging with preceding mutation - ASSERT_OK(Put(1, "a", "begin")); - ASSERT_OK(Put(1, "z", "end")); - ASSERT_OK(Flush(1)); - MoveFilesToLevel(last - 1, 1); - ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1); - ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1); - - ASSERT_OK(Delete(1, "foo")); - ASSERT_OK(Put(1, "foo", "v2")); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]"); - ASSERT_OK(Flush(1)); // Moves to level last-2 - ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]"); - Slice z("z"); - ASSERT_OK(dbfull()->TEST_CompactRange(last - 2, nullptr, &z, handles_[1])); - // DEL eliminated, but v1 remains because we aren't compacting that level - // (DEL can be eliminated because v2 hides v1). - ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]"); - ASSERT_OK( - dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1])); - // Merging last-1 w/ last, so we are the base level for "foo", so - // DEL is removed. (as is v1). - ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]"); -} - -TEST_F(DBTest, DeletionMarkers2) { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_OK(Flush(1)); - const int last = 2; - MoveFilesToLevel(last, 1); - // foo => v1 is now in last level - ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1); - - // Place a table at level last-1 to prevent merging with preceding mutation - ASSERT_OK(Put(1, "a", "begin")); - ASSERT_OK(Put(1, "z", "end")); - ASSERT_OK(Flush(1)); - MoveFilesToLevel(last - 1, 1); - ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1); - ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1); - - ASSERT_OK(Delete(1, "foo")); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]"); - ASSERT_OK(Flush(1)); // Moves to level last-2 - ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]"); - ASSERT_OK( - dbfull()->TEST_CompactRange(last - 2, nullptr, nullptr, handles_[1])); - // DEL kept: "last" file overlaps - ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]"); - ASSERT_OK( - dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1])); - // Merging last-1 w/ last, so we are the base level for "foo", so - // DEL is removed. (as is v1). - ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]"); -} - -TEST_F(DBTest, OverlapInLevel0) { - do { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu"}, options); - - // Fill levels 1 and 2 to disable the pushing of new memtables to levels > - // 0. - ASSERT_OK(Put(1, "100", "v100")); - ASSERT_OK(Put(1, "999", "v999")); - ASSERT_OK(Flush(1)); - MoveFilesToLevel(2, 1); - ASSERT_OK(Delete(1, "100")); - ASSERT_OK(Delete(1, "999")); - ASSERT_OK(Flush(1)); - MoveFilesToLevel(1, 1); - ASSERT_EQ("0,1,1", FilesPerLevel(1)); - - // Make files spanning the following ranges in level-0: - // files[0] 200 .. 900 - // files[1] 300 .. 500 - // Note that files are sorted by smallest key. - ASSERT_OK(Put(1, "300", "v300")); - ASSERT_OK(Put(1, "500", "v500")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "200", "v200")); - ASSERT_OK(Put(1, "600", "v600")); - ASSERT_OK(Put(1, "900", "v900")); - ASSERT_OK(Flush(1)); - ASSERT_EQ("2,1,1", FilesPerLevel(1)); - - // BEGIN addition to existing test - // Take this opportunity to verify SST unique ids (including Plain table) - TablePropertiesCollection tbc; - ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[1], &tbc)); - VerifySstUniqueIds(tbc); - // END addition to existing test - - // Compact away the placeholder files we created initially - ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1])); - ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr, handles_[1])); - ASSERT_EQ("2", FilesPerLevel(1)); - - // Do a memtable compaction. Before bug-fix, the compaction would - // not detect the overlap with level-0 files and would incorrectly place - // the deletion in a deeper level. - ASSERT_OK(Delete(1, "600")); - ASSERT_OK(Flush(1)); - ASSERT_EQ("3", FilesPerLevel(1)); - ASSERT_EQ("NOT_FOUND", Get(1, "600")); - } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction)); -} - -TEST_F(DBTest, ComparatorCheck) { - class NewComparator : public Comparator { - public: - const char* Name() const override { return "rocksdb.NewComparator"; } - int Compare(const Slice& a, const Slice& b) const override { - return BytewiseComparator()->Compare(a, b); - } - void FindShortestSeparator(std::string* s, const Slice& l) const override { - BytewiseComparator()->FindShortestSeparator(s, l); - } - void FindShortSuccessor(std::string* key) const override { - BytewiseComparator()->FindShortSuccessor(key); - } - }; - Options new_options, options; - NewComparator cmp; - do { - options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu"}, options); - new_options = CurrentOptions(); - new_options.comparator = &cmp; - // only the non-default column family has non-matching comparator - Status s = TryReopenWithColumnFamilies( - {"default", "pikachu"}, std::vector({options, new_options})); - ASSERT_TRUE(!s.ok()); - ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos) - << s.ToString(); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTest, CustomComparator) { - class NumberComparator : public Comparator { - public: - const char* Name() const override { return "test.NumberComparator"; } - int Compare(const Slice& a, const Slice& b) const override { - return ToNumber(a) - ToNumber(b); - } - void FindShortestSeparator(std::string* s, const Slice& l) const override { - ToNumber(*s); // Check format - ToNumber(l); // Check format - } - void FindShortSuccessor(std::string* key) const override { - ToNumber(*key); // Check format - } - - private: - static int ToNumber(const Slice& x) { - // Check that there are no extra characters. - EXPECT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size() - 1] == ']') - << EscapeString(x); - int val; - char ignored; - EXPECT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1) - << EscapeString(x); - return val; - } - }; - Options new_options; - NumberComparator cmp; - do { - new_options = CurrentOptions(); - new_options.create_if_missing = true; - new_options.comparator = &cmp; - new_options.write_buffer_size = 4096; // Compact more often - new_options.arena_block_size = 4096; - new_options = CurrentOptions(new_options); - DestroyAndReopen(new_options); - CreateAndReopenWithCF({"pikachu"}, new_options); - ASSERT_OK(Put(1, "[10]", "ten")); - ASSERT_OK(Put(1, "[0x14]", "twenty")); - for (int i = 0; i < 2; i++) { - ASSERT_EQ("ten", Get(1, "[10]")); - ASSERT_EQ("ten", Get(1, "[0xa]")); - ASSERT_EQ("twenty", Get(1, "[20]")); - ASSERT_EQ("twenty", Get(1, "[0x14]")); - ASSERT_EQ("NOT_FOUND", Get(1, "[15]")); - ASSERT_EQ("NOT_FOUND", Get(1, "[0xf]")); - Compact(1, "[0]", "[9999]"); - } - - for (int run = 0; run < 2; run++) { - for (int i = 0; i < 1000; i++) { - char buf[100]; - snprintf(buf, sizeof(buf), "[%d]", i * 10); - ASSERT_OK(Put(1, buf, buf)); - } - Compact(1, "[0]", "[1000000]"); - } - } while (ChangeCompactOptions()); -} - -TEST_F(DBTest, DBOpen_Options) { - Options options = CurrentOptions(); - std::string dbname = test::PerThreadDBPath("db_options_test"); - ASSERT_OK(DestroyDB(dbname, options)); - - // Does not exist, and create_if_missing == false: error - DB* db = nullptr; - options.create_if_missing = false; - Status s = DB::Open(options, dbname, &db); - ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != nullptr); - ASSERT_TRUE(db == nullptr); - - // Does not exist, and create_if_missing == true: OK - options.create_if_missing = true; - s = DB::Open(options, dbname, &db); - ASSERT_OK(s); - ASSERT_TRUE(db != nullptr); - - delete db; - db = nullptr; - - // Does exist, and error_if_exists == true: error - options.create_if_missing = false; - options.error_if_exists = true; - s = DB::Open(options, dbname, &db); - ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != nullptr); - ASSERT_TRUE(db == nullptr); - - // Does exist, and error_if_exists == false: OK - options.create_if_missing = true; - options.error_if_exists = false; - s = DB::Open(options, dbname, &db); - ASSERT_OK(s); - ASSERT_TRUE(db != nullptr); - - delete db; - db = nullptr; -} - -TEST_F(DBTest, DBOpen_Change_NumLevels) { - Options options = CurrentOptions(); - options.create_if_missing = true; - DestroyAndReopen(options); - ASSERT_TRUE(db_ != nullptr); - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "a", "123")); - ASSERT_OK(Put(1, "b", "234")); - ASSERT_OK(Flush(1)); - MoveFilesToLevel(3, 1); - Close(); - - options.create_if_missing = false; - options.num_levels = 2; - Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_TRUE(strstr(s.ToString().c_str(), "Invalid argument") != nullptr); - ASSERT_TRUE(db_ == nullptr); -} - -TEST_F(DBTest, DestroyDBMetaDatabase) { - std::string dbname = test::PerThreadDBPath("db_meta"); - ASSERT_OK(env_->CreateDirIfMissing(dbname)); - std::string metadbname = MetaDatabaseName(dbname, 0); - ASSERT_OK(env_->CreateDirIfMissing(metadbname)); - std::string metametadbname = MetaDatabaseName(metadbname, 0); - ASSERT_OK(env_->CreateDirIfMissing(metametadbname)); - - // Destroy previous versions if they exist. Using the long way. - Options options = CurrentOptions(); - ASSERT_OK(DestroyDB(metametadbname, options)); - ASSERT_OK(DestroyDB(metadbname, options)); - ASSERT_OK(DestroyDB(dbname, options)); - - // Setup databases - DB* db = nullptr; - ASSERT_OK(DB::Open(options, dbname, &db)); - delete db; - db = nullptr; - ASSERT_OK(DB::Open(options, metadbname, &db)); - delete db; - db = nullptr; - ASSERT_OK(DB::Open(options, metametadbname, &db)); - delete db; - db = nullptr; - - // Delete databases - ASSERT_OK(DestroyDB(dbname, options)); - - // Check if deletion worked. - options.create_if_missing = false; - ASSERT_TRUE(!(DB::Open(options, dbname, &db)).ok()); - ASSERT_TRUE(!(DB::Open(options, metadbname, &db)).ok()); - ASSERT_TRUE(!(DB::Open(options, metametadbname, &db)).ok()); -} - -TEST_F(DBTest, SnapshotFiles) { - do { - Options options = CurrentOptions(); - options.write_buffer_size = 100000000; // Large write buffer - CreateAndReopenWithCF({"pikachu"}, options); - - Random rnd(301); - - // Write 8MB (80 values, each 100K) - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); - std::vector values; - for (int i = 0; i < 80; i++) { - values.push_back(rnd.RandomString(100000)); - ASSERT_OK(Put((i < 40), Key(i), values[i])); - } - - // assert that nothing makes it to disk yet. - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); - - // get a file snapshot - uint64_t manifest_number = 0; - uint64_t manifest_size = 0; - std::vector files; - ASSERT_OK(dbfull()->DisableFileDeletions()); - ASSERT_OK(dbfull()->GetLiveFiles(files, &manifest_size)); - - // CURRENT, MANIFEST, OPTIONS, *.sst files (one for each CF) - ASSERT_EQ(files.size(), 5U); - - uint64_t number = 0; - FileType type; - - // copy these files to a new snapshot directory - std::string snapdir = dbname_ + ".snapdir/"; - if (env_->FileExists(snapdir).ok()) { - ASSERT_OK(DestroyDir(env_, snapdir)); - } - ASSERT_OK(env_->CreateDir(snapdir)); - - for (size_t i = 0; i < files.size(); i++) { - // our clients require that GetLiveFiles returns - // files with "/" as first character! - ASSERT_EQ(files[i][0], '/'); - std::string src = dbname_ + files[i]; - std::string dest = snapdir + files[i]; - - uint64_t size; - ASSERT_OK(env_->GetFileSize(src, &size)); - - // record the number and the size of the - // latest manifest file - if (ParseFileName(files[i].substr(1), &number, &type)) { - if (type == kDescriptorFile) { - ASSERT_EQ(manifest_number, 0); - manifest_number = number; - ASSERT_GE(size, manifest_size); - size = manifest_size; // copy only valid MANIFEST data - } - } - CopyFile(src, dest, size); - } - - // release file snapshot - ASSERT_OK(dbfull()->EnableFileDeletions(/*force*/ false)); - // overwrite one key, this key should not appear in the snapshot - std::vector extras; - for (unsigned int i = 0; i < 1; i++) { - extras.push_back(rnd.RandomString(100000)); - ASSERT_OK(Put(0, Key(i), extras[i])); - } - - // verify that data in the snapshot are correct - std::vector column_families; - column_families.emplace_back("default", ColumnFamilyOptions()); - column_families.emplace_back("pikachu", ColumnFamilyOptions()); - std::vector cf_handles; - DB* snapdb; - DBOptions opts; - opts.env = env_; - opts.create_if_missing = false; - Status stat = - DB::Open(opts, snapdir, column_families, &cf_handles, &snapdb); - ASSERT_OK(stat); - - ReadOptions roptions; - std::string val; - for (unsigned int i = 0; i < 80; i++) { - ASSERT_OK(snapdb->Get(roptions, cf_handles[i < 40], Key(i), &val)); - ASSERT_EQ(values[i].compare(val), 0); - } - for (auto cfh : cf_handles) { - delete cfh; - } - delete snapdb; - - // look at the new live files after we added an 'extra' key - // and after we took the first snapshot. - uint64_t new_manifest_number = 0; - uint64_t new_manifest_size = 0; - std::vector newfiles; - ASSERT_OK(dbfull()->DisableFileDeletions()); - ASSERT_OK(dbfull()->GetLiveFiles(newfiles, &new_manifest_size)); - - // find the new manifest file. assert that this manifest file is - // the same one as in the previous snapshot. But its size should be - // larger because we added an extra key after taking the - // previous shapshot. - for (size_t i = 0; i < newfiles.size(); i++) { - std::string src = dbname_ + "/" + newfiles[i]; - // record the lognumber and the size of the - // latest manifest file - if (ParseFileName(newfiles[i].substr(1), &number, &type)) { - if (type == kDescriptorFile) { - ASSERT_EQ(new_manifest_number, 0); - uint64_t size; - new_manifest_number = number; - ASSERT_OK(env_->GetFileSize(src, &size)); - ASSERT_GE(size, new_manifest_size); - } - } - } - ASSERT_EQ(manifest_number, new_manifest_number); - ASSERT_GT(new_manifest_size, manifest_size); - - // Also test GetLiveFilesStorageInfo - std::vector new_infos; - ASSERT_OK(db_->GetLiveFilesStorageInfo(LiveFilesStorageInfoOptions(), - &new_infos)); - - // Close DB (while deletions disabled) - Close(); - - // Validate - for (auto& info : new_infos) { - std::string path = info.directory + "/" + info.relative_filename; - uint64_t size; - ASSERT_OK(env_->GetFileSize(path, &size)); - if (info.trim_to_size) { - ASSERT_LE(info.size, size); - } else if (!info.replacement_contents.empty()) { - ASSERT_EQ(info.size, info.replacement_contents.size()); - } else { - ASSERT_EQ(info.size, size); - } - if (info.file_type == kDescriptorFile) { - ASSERT_EQ(info.file_number, manifest_number); - } - } - } while (ChangeCompactOptions()); -} - -TEST_F(DBTest, ReadonlyDBGetLiveManifestSize) { - do { - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 2; - DestroyAndReopen(options); - - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - Close(); - ASSERT_OK(ReadOnlyReopen(options)); - - uint64_t manifest_size = 0; - std::vector files; - ASSERT_OK(dbfull()->GetLiveFiles(files, &manifest_size)); - - for (const std::string& f : files) { - uint64_t number = 0; - FileType type; - if (ParseFileName(f.substr(1), &number, &type)) { - if (type == kDescriptorFile) { - uint64_t size_on_disk; - ASSERT_OK(env_->GetFileSize(dbname_ + "/" + f, &size_on_disk)); - ASSERT_EQ(manifest_size, size_on_disk); - break; - } - } - } - Close(); - } while (ChangeCompactOptions()); -} - -TEST_F(DBTest, GetLiveBlobFiles) { - // Note: the following prevents an otherwise harmless data race between the - // test setup code (AddBlobFile) below and the periodic stat dumping thread. - Options options = CurrentOptions(); - options.stats_dump_period_sec = 0; - - constexpr uint64_t blob_file_number = 234; - constexpr uint64_t total_blob_count = 555; - constexpr uint64_t total_blob_bytes = 66666; - constexpr char checksum_method[] = "CRC32"; - constexpr char checksum_value[] = "\x3d\x87\xff\x57"; - constexpr uint64_t garbage_blob_count = 0; - constexpr uint64_t garbage_blob_bytes = 0; - - Reopen(options); - - AddBlobFile(db_->DefaultColumnFamily(), blob_file_number, total_blob_count, - total_blob_bytes, checksum_method, checksum_value, - garbage_blob_count, garbage_blob_bytes); - // Make sure it appears in the results returned by GetLiveFiles. - uint64_t manifest_size = 0; - std::vector files; - ASSERT_OK(dbfull()->GetLiveFiles(files, &manifest_size)); - - ASSERT_FALSE(files.empty()); - ASSERT_EQ(files[0], BlobFileName("", blob_file_number)); - - ColumnFamilyMetaData cfmd; - - db_->GetColumnFamilyMetaData(&cfmd); - ASSERT_EQ(cfmd.blob_files.size(), 1); - const BlobMetaData& bmd = cfmd.blob_files[0]; - - CheckBlobMetaData(bmd, blob_file_number, total_blob_count, total_blob_bytes, - checksum_method, checksum_value, garbage_blob_count, - garbage_blob_bytes); - ASSERT_EQ(NormalizePath(bmd.blob_file_path), NormalizePath(dbname_)); - ASSERT_EQ(cfmd.blob_file_count, 1U); - ASSERT_EQ(cfmd.blob_file_size, bmd.blob_file_size); -} - -TEST_F(DBTest, PurgeInfoLogs) { - Options options = CurrentOptions(); - options.keep_log_file_num = 5; - options.create_if_missing = true; - options.env = env_; - for (int mode = 0; mode <= 1; mode++) { - if (mode == 1) { - options.db_log_dir = dbname_ + "_logs"; - ASSERT_OK(env_->CreateDirIfMissing(options.db_log_dir)); - } else { - options.db_log_dir = ""; - } - for (int i = 0; i < 8; i++) { - Reopen(options); - } - - std::vector files; - ASSERT_OK(env_->GetChildren( - options.db_log_dir.empty() ? dbname_ : options.db_log_dir, &files)); - int info_log_count = 0; - for (std::string file : files) { - if (file.find("LOG") != std::string::npos) { - info_log_count++; - } - } - ASSERT_EQ(5, info_log_count); - - Destroy(options); - // For mode (1), test DestroyDB() to delete all the logs under DB dir. - // For mode (2), no info log file should have been put under DB dir. - // Since dbname_ has no children, there is no need to loop db_files - std::vector db_files; - ASSERT_TRUE(env_->GetChildren(dbname_, &db_files).IsNotFound()); - ASSERT_TRUE(db_files.empty()); - - if (mode == 1) { - // Cleaning up - ASSERT_OK(env_->GetChildren(options.db_log_dir, &files)); - for (std::string file : files) { - ASSERT_OK(env_->DeleteFile(options.db_log_dir + "/" + file)); - } - ASSERT_OK(env_->DeleteDir(options.db_log_dir)); - } - } -} - -// Multi-threaded test: -namespace { - -static const int kColumnFamilies = 10; -static const int kNumThreads = 10; -static const int kTestSeconds = 10; -static const int kNumKeys = 1000; - -struct MTState { - DBTest* test; - std::atomic counter[kNumThreads]; -}; - -struct MTThread { - MTState* state; - int id; - bool multiget_batched; -}; - -static void MTThreadBody(void* arg) { - MTThread* t = reinterpret_cast(arg); - int id = t->id; - DB* db = t->state->test->db_; - int counter = 0; - std::shared_ptr clock = SystemClock::Default(); - auto end_micros = clock->NowMicros() + kTestSeconds * 1000000U; - - fprintf(stderr, "... starting thread %d\n", id); - Random rnd(1000 + id); - char valbuf[1500]; - while (clock->NowMicros() < end_micros) { - t->state->counter[id].store(counter, std::memory_order_release); - - int key = rnd.Uniform(kNumKeys); - char keybuf[20]; - snprintf(keybuf, sizeof(keybuf), "%016d", key); - - if (rnd.OneIn(2)) { - // Write values of the form . - // into each of the CFs - // We add some padding for force compactions. - int unique_id = rnd.Uniform(1000000); - - // Half of the time directly use WriteBatch. Half of the time use - // WriteBatchWithIndex. - if (rnd.OneIn(2)) { - WriteBatch batch; - for (int cf = 0; cf < kColumnFamilies; ++cf) { - snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id, - static_cast(counter), cf, unique_id); - ASSERT_OK(batch.Put(t->state->test->handles_[cf], Slice(keybuf), - Slice(valbuf))); - } - ASSERT_OK(db->Write(WriteOptions(), &batch)); - } else { - WriteBatchWithIndex batch(db->GetOptions().comparator); - for (int cf = 0; cf < kColumnFamilies; ++cf) { - snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id, - static_cast(counter), cf, unique_id); - ASSERT_OK(batch.Put(t->state->test->handles_[cf], Slice(keybuf), - Slice(valbuf))); - } - ASSERT_OK(db->Write(WriteOptions(), batch.GetWriteBatch())); - } - } else { - // Read a value and verify that it matches the pattern written above - // and that writes to all column families were atomic (unique_id is the - // same) - std::vector keys(kColumnFamilies, Slice(keybuf)); - std::vector values; - std::vector statuses; - if (!t->multiget_batched) { - statuses = db->MultiGet(ReadOptions(), t->state->test->handles_, keys, - &values); - } else { - std::vector pin_values(keys.size()); - statuses.resize(keys.size()); - const Snapshot* snapshot = db->GetSnapshot(); - ReadOptions ro; - ro.snapshot = snapshot; - for (int cf = 0; cf < kColumnFamilies; ++cf) { - db->MultiGet(ro, t->state->test->handles_[cf], 1, &keys[cf], - &pin_values[cf], &statuses[cf]); - } - db->ReleaseSnapshot(snapshot); - values.resize(keys.size()); - for (int cf = 0; cf < kColumnFamilies; ++cf) { - if (statuses[cf].ok()) { - values[cf].assign(pin_values[cf].data(), pin_values[cf].size()); - } - } - } - Status s = statuses[0]; - // all statuses have to be the same - for (size_t i = 1; i < statuses.size(); ++i) { - // they are either both ok or both not-found - ASSERT_TRUE((s.ok() && statuses[i].ok()) || - (s.IsNotFound() && statuses[i].IsNotFound())); - } - if (s.IsNotFound()) { - // Key has not yet been written - } else { - // Check that the writer thread counter is >= the counter in the value - ASSERT_OK(s); - int unique_id = -1; - for (int i = 0; i < kColumnFamilies; ++i) { - int k, w, c, cf, u; - ASSERT_EQ(5, sscanf(values[i].c_str(), "%d.%d.%d.%d.%d", &k, &w, &c, - &cf, &u)) - << values[i]; - ASSERT_EQ(k, key); - ASSERT_GE(w, 0); - ASSERT_LT(w, kNumThreads); - ASSERT_LE(c, t->state->counter[w].load(std::memory_order_acquire)); - ASSERT_EQ(cf, i); - if (i == 0) { - unique_id = u; - } else { - // this checks that updates across column families happened - // atomically -- all unique ids are the same - ASSERT_EQ(u, unique_id); - } - } - } - } - counter++; - } - fprintf(stderr, "... stopping thread %d after %d ops\n", id, int(counter)); -} - -} // anonymous namespace - -class MultiThreadedDBTest - : public DBTest, - public ::testing::WithParamInterface> { - public: - void SetUp() override { - std::tie(option_config_, multiget_batched_) = GetParam(); - } - - static std::vector GenerateOptionConfigs() { - std::vector optionConfigs; - for (int optionConfig = kDefault; optionConfig < kEnd; ++optionConfig) { - optionConfigs.push_back(optionConfig); - } - return optionConfigs; - } - - bool multiget_batched_; -}; - -TEST_P(MultiThreadedDBTest, MultiThreaded) { - if (option_config_ == kPipelinedWrite) return; - anon::OptionsOverride options_override; - options_override.skip_policy = kSkipNoSnapshot; - Options options = CurrentOptions(options_override); - std::vector cfs; - for (int i = 1; i < kColumnFamilies; ++i) { - cfs.push_back(std::to_string(i)); - } - Reopen(options); - CreateAndReopenWithCF(cfs, options); - // Initialize state - MTState mt; - mt.test = this; - for (int id = 0; id < kNumThreads; id++) { - mt.counter[id].store(0, std::memory_order_release); - } - - // Start threads - MTThread thread[kNumThreads]; - for (int id = 0; id < kNumThreads; id++) { - thread[id].state = &mt; - thread[id].id = id; - thread[id].multiget_batched = multiget_batched_; - env_->StartThread(MTThreadBody, &thread[id]); - } - - env_->WaitForJoin(); -} - -INSTANTIATE_TEST_CASE_P( - MultiThreaded, MultiThreadedDBTest, - ::testing::Combine( - ::testing::ValuesIn(MultiThreadedDBTest::GenerateOptionConfigs()), - ::testing::Bool())); - -// Group commit test: -#if !defined(OS_WIN) -// Disable this test temporarily on Travis and appveyor as it fails -// intermittently. Github issue: #4151 -namespace { - -static const int kGCNumThreads = 4; -static const int kGCNumKeys = 1000; - -struct GCThread { - DB* db; - int id; - std::atomic done; -}; - -static void GCThreadBody(void* arg) { - GCThread* t = reinterpret_cast(arg); - int id = t->id; - DB* db = t->db; - WriteOptions wo; - - for (int i = 0; i < kGCNumKeys; ++i) { - std::string kv(std::to_string(i + id * kGCNumKeys)); - ASSERT_OK(db->Put(wo, kv, kv)); - } - t->done = true; -} - -} // anonymous namespace - -TEST_F(DBTest, GroupCommitTest) { - do { - Options options = CurrentOptions(); - options.env = env_; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - Reopen(options); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"WriteThread::JoinBatchGroup:BeganWaiting", - "DBImpl::WriteImpl:BeforeLeaderEnters"}, - {"WriteThread::AwaitState:BlockingWaiting", - "WriteThread::EnterAsBatchGroupLeader:End"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Start threads - GCThread thread[kGCNumThreads]; - for (int id = 0; id < kGCNumThreads; id++) { - thread[id].id = id; - thread[id].db = db_; - thread[id].done = false; - env_->StartThread(GCThreadBody, &thread[id]); - } - env_->WaitForJoin(); - - ASSERT_GT(TestGetTickerCount(options, WRITE_DONE_BY_OTHER), 0); - - std::vector expected_db; - for (int i = 0; i < kGCNumThreads * kGCNumKeys; ++i) { - expected_db.push_back(std::to_string(i)); - } - std::sort(expected_db.begin(), expected_db.end()); - - Iterator* itr = db_->NewIterator(ReadOptions()); - itr->SeekToFirst(); - for (auto x : expected_db) { - ASSERT_TRUE(itr->Valid()); - ASSERT_EQ(itr->key().ToString(), x); - ASSERT_EQ(itr->value().ToString(), x); - itr->Next(); - } - ASSERT_TRUE(!itr->Valid()); - delete itr; - - HistogramData hist_data; - options.statistics->histogramData(DB_WRITE, &hist_data); - ASSERT_GT(hist_data.average, 0.0); - } while (ChangeOptions(kSkipNoSeekToLast)); -} -#endif // OS_WIN - -namespace { -using KVMap = std::map; -} - -class ModelDB : public DB { - public: - class ModelSnapshot : public Snapshot { - public: - KVMap map_; - - SequenceNumber GetSequenceNumber() const override { - // no need to call this - assert(false); - return 0; - } - - int64_t GetUnixTime() const override { - // no need to call this - assert(false); - return 0; - } - - uint64_t GetTimestamp() const override { - // no need to call this - assert(false); - return 0; - } - }; - - explicit ModelDB(const Options& options) : options_(options) {} - using DB::Put; - Status Put(const WriteOptions& o, ColumnFamilyHandle* cf, const Slice& k, - const Slice& v) override { - WriteBatch batch; - Status s = batch.Put(cf, k, v); - if (!s.ok()) { - return s; - } - return Write(o, &batch); - } - Status Put(const WriteOptions& /*o*/, ColumnFamilyHandle* /*cf*/, - const Slice& /*k*/, const Slice& /*ts*/, - const Slice& /*v*/) override { - return Status::NotSupported(); - } - - using DB::PutEntity; - Status PutEntity(const WriteOptions& /* options */, - ColumnFamilyHandle* /* column_family */, - const Slice& /* key */, - const WideColumns& /* columns */) override { - return Status::NotSupported(); - } - - using DB::Close; - Status Close() override { return Status::OK(); } - using DB::Delete; - Status Delete(const WriteOptions& o, ColumnFamilyHandle* cf, - const Slice& key) override { - WriteBatch batch; - Status s = batch.Delete(cf, key); - if (!s.ok()) { - return s; - } - return Write(o, &batch); - } - Status Delete(const WriteOptions& /*o*/, ColumnFamilyHandle* /*cf*/, - const Slice& /*key*/, const Slice& /*ts*/) override { - return Status::NotSupported(); - } - using DB::SingleDelete; - Status SingleDelete(const WriteOptions& o, ColumnFamilyHandle* cf, - const Slice& key) override { - WriteBatch batch; - Status s = batch.SingleDelete(cf, key); - if (!s.ok()) { - return s; - } - return Write(o, &batch); - } - Status SingleDelete(const WriteOptions& /*o*/, ColumnFamilyHandle* /*cf*/, - const Slice& /*key*/, const Slice& /*ts*/) override { - return Status::NotSupported(); - } - using DB::Merge; - Status Merge(const WriteOptions& o, ColumnFamilyHandle* cf, const Slice& k, - const Slice& v) override { - WriteBatch batch; - Status s = batch.Merge(cf, k, v); - if (!s.ok()) { - return s; - } - return Write(o, &batch); - } - Status Merge(const WriteOptions& /*o*/, ColumnFamilyHandle* /*cf*/, - const Slice& /*k*/, const Slice& /*ts*/, - const Slice& /*value*/) override { - return Status::NotSupported(); - } - using DB::Get; - Status Get(const ReadOptions& /*options*/, ColumnFamilyHandle* /*cf*/, - const Slice& key, PinnableSlice* /*value*/) override { - return Status::NotSupported(key); - } - - using DB::GetMergeOperands; - virtual Status GetMergeOperands( - const ReadOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, - const Slice& key, PinnableSlice* /*slice*/, - GetMergeOperandsOptions* /*merge_operands_options*/, - int* /*number_of_operands*/) override { - return Status::NotSupported(key); - } - - using DB::MultiGet; - std::vector MultiGet( - const ReadOptions& /*options*/, - const std::vector& /*column_family*/, - const std::vector& keys, - std::vector* /*values*/) override { - std::vector s(keys.size(), - Status::NotSupported("Not implemented.")); - return s; - } - - using DB::IngestExternalFile; - Status IngestExternalFile( - ColumnFamilyHandle* /*column_family*/, - const std::vector& /*external_files*/, - const IngestExternalFileOptions& /*options*/) override { - return Status::NotSupported("Not implemented."); - } - - using DB::IngestExternalFiles; - Status IngestExternalFiles( - const std::vector& /*args*/) override { - return Status::NotSupported("Not implemented"); - } - - using DB::CreateColumnFamilyWithImport; - virtual Status CreateColumnFamilyWithImport( - const ColumnFamilyOptions& /*options*/, - const std::string& /*column_family_name*/, - const ImportColumnFamilyOptions& /*import_options*/, - const ExportImportFilesMetaData& /*metadata*/, - ColumnFamilyHandle** /*handle*/) override { - return Status::NotSupported("Not implemented."); - } - - using DB::VerifyChecksum; - Status VerifyChecksum(const ReadOptions&) override { - return Status::NotSupported("Not implemented."); - } - - using DB::GetPropertiesOfAllTables; - Status GetPropertiesOfAllTables( - ColumnFamilyHandle* /*column_family*/, - TablePropertiesCollection* /*props*/) override { - return Status(); - } - - Status GetPropertiesOfTablesInRange( - ColumnFamilyHandle* /*column_family*/, const Range* /*range*/, - std::size_t /*n*/, TablePropertiesCollection* /*props*/) override { - return Status(); - } - - using DB::KeyMayExist; - bool KeyMayExist(const ReadOptions& /*options*/, - ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/, - std::string* /*value*/, - bool* value_found = nullptr) override { - if (value_found != nullptr) { - *value_found = false; - } - return true; // Not Supported directly - } - using DB::NewIterator; - Iterator* NewIterator(const ReadOptions& options, - ColumnFamilyHandle* /*column_family*/) override { - if (options.snapshot == nullptr) { - KVMap* saved = new KVMap; - *saved = map_; - return new ModelIter(saved, true); - } else { - const KVMap* snapshot_state = - &(reinterpret_cast(options.snapshot)->map_); - return new ModelIter(snapshot_state, false); - } - } - Status NewIterators(const ReadOptions& /*options*/, - const std::vector& /*column_family*/, - std::vector* /*iterators*/) override { - return Status::NotSupported("Not supported yet"); - } - const Snapshot* GetSnapshot() override { - ModelSnapshot* snapshot = new ModelSnapshot; - snapshot->map_ = map_; - return snapshot; - } - - void ReleaseSnapshot(const Snapshot* snapshot) override { - delete reinterpret_cast(snapshot); - } - - Status Write(const WriteOptions& /*options*/, WriteBatch* batch) override { - class Handler : public WriteBatch::Handler { - public: - KVMap* map_; - void Put(const Slice& key, const Slice& value) override { - (*map_)[key.ToString()] = value.ToString(); - } - void Merge(const Slice& /*key*/, const Slice& /*value*/) override { - // ignore merge for now - // (*map_)[key.ToString()] = value.ToString(); - } - void Delete(const Slice& key) override { map_->erase(key.ToString()); } - }; - Handler handler; - handler.map_ = &map_; - return batch->Iterate(&handler); - } - - using DB::GetProperty; - bool GetProperty(ColumnFamilyHandle* /*column_family*/, - const Slice& /*property*/, std::string* /*value*/) override { - return false; - } - using DB::GetIntProperty; - bool GetIntProperty(ColumnFamilyHandle* /*column_family*/, - const Slice& /*property*/, uint64_t* /*value*/) override { - return false; - } - using DB::GetMapProperty; - bool GetMapProperty(ColumnFamilyHandle* /*column_family*/, - const Slice& /*property*/, - std::map* /*value*/) override { - return false; - } - using DB::GetAggregatedIntProperty; - bool GetAggregatedIntProperty(const Slice& /*property*/, - uint64_t* /*value*/) override { - return false; - } - using DB::GetApproximateSizes; - Status GetApproximateSizes(const SizeApproximationOptions& /*options*/, - ColumnFamilyHandle* /*column_family*/, - const Range* /*range*/, int n, - uint64_t* sizes) override { - for (int i = 0; i < n; i++) { - sizes[i] = 0; - } - return Status::OK(); - } - using DB::GetApproximateMemTableStats; - void GetApproximateMemTableStats(ColumnFamilyHandle* /*column_family*/, - const Range& /*range*/, - uint64_t* const count, - uint64_t* const size) override { - *count = 0; - *size = 0; - } - using DB::CompactRange; - Status CompactRange(const CompactRangeOptions& /*options*/, - ColumnFamilyHandle* /*column_family*/, - const Slice* /*start*/, const Slice* /*end*/) override { - return Status::NotSupported("Not supported operation."); - } - - Status SetDBOptions( - const std::unordered_map& /*new_options*/) - override { - return Status::NotSupported("Not supported operation."); - } - - using DB::CompactFiles; - Status CompactFiles( - const CompactionOptions& /*compact_options*/, - ColumnFamilyHandle* /*column_family*/, - const std::vector& /*input_file_names*/, - const int /*output_level*/, const int /*output_path_id*/ = -1, - std::vector* const /*output_file_names*/ = nullptr, - CompactionJobInfo* /*compaction_job_info*/ = nullptr) override { - return Status::NotSupported("Not supported operation."); - } - - Status PauseBackgroundWork() override { - return Status::NotSupported("Not supported operation."); - } - - Status ContinueBackgroundWork() override { - return Status::NotSupported("Not supported operation."); - } - - Status EnableAutoCompaction( - const std::vector& /*column_family_handles*/) - override { - return Status::NotSupported("Not supported operation."); - } - - void EnableManualCompaction() override { return; } - - void DisableManualCompaction() override { return; } - - using DB::NumberLevels; - int NumberLevels(ColumnFamilyHandle* /*column_family*/) override { return 1; } - - using DB::MaxMemCompactionLevel; - int MaxMemCompactionLevel(ColumnFamilyHandle* /*column_family*/) override { - return 1; - } - - using DB::Level0StopWriteTrigger; - int Level0StopWriteTrigger(ColumnFamilyHandle* /*column_family*/) override { - return -1; - } - - const std::string& GetName() const override { return name_; } - - Env* GetEnv() const override { return nullptr; } - - using DB::GetOptions; - Options GetOptions(ColumnFamilyHandle* /*column_family*/) const override { - return options_; - } - - using DB::GetDBOptions; - DBOptions GetDBOptions() const override { return options_; } - - using DB::Flush; - Status Flush(const ROCKSDB_NAMESPACE::FlushOptions& /*options*/, - ColumnFamilyHandle* /*column_family*/) override { - Status ret; - return ret; - } - Status Flush( - const ROCKSDB_NAMESPACE::FlushOptions& /*options*/, - const std::vector& /*column_families*/) override { - return Status::OK(); - } - - Status SyncWAL() override { return Status::OK(); } - - Status DisableFileDeletions() override { return Status::OK(); } - - Status EnableFileDeletions(bool /*force*/) override { return Status::OK(); } - - Status GetLiveFiles(std::vector&, uint64_t* /*size*/, - bool /*flush_memtable*/ = true) override { - return Status::OK(); - } - - Status GetLiveFilesChecksumInfo( - FileChecksumList* /*checksum_list*/) override { - return Status::OK(); - } - - Status GetLiveFilesStorageInfo( - const LiveFilesStorageInfoOptions& /*opts*/, - std::vector* /*files*/) override { - return Status::OK(); - } - - Status GetSortedWalFiles(VectorLogPtr& /*files*/) override { - return Status::OK(); - } - - Status GetCurrentWalFile( - std::unique_ptr* /*current_log_file*/) override { - return Status::OK(); - } - - virtual Status GetCreationTimeOfOldestFile( - uint64_t* /*creation_time*/) override { - return Status::NotSupported(); - } - - Status DeleteFile(std::string /*name*/) override { return Status::OK(); } - - Status GetUpdatesSince( - ROCKSDB_NAMESPACE::SequenceNumber, - std::unique_ptr*, - const TransactionLogIterator::ReadOptions& /*read_options*/ = - TransactionLogIterator::ReadOptions()) override { - return Status::NotSupported("Not supported in Model DB"); - } - - void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/, - ColumnFamilyMetaData* /*metadata*/) override {} - - Status GetDbIdentity(std::string& /*identity*/) const override { - return Status::OK(); - } - - Status GetDbSessionId(std::string& /*session_id*/) const override { - return Status::OK(); - } - - SequenceNumber GetLatestSequenceNumber() const override { return 0; } - - Status IncreaseFullHistoryTsLow(ColumnFamilyHandle* /*cf*/, - std::string /*ts_low*/) override { - return Status::OK(); - } - - Status GetFullHistoryTsLow(ColumnFamilyHandle* /*cf*/, - std::string* /*ts_low*/) override { - return Status::OK(); - } - - ColumnFamilyHandle* DefaultColumnFamily() const override { return nullptr; } - - private: - class ModelIter : public Iterator { - public: - ModelIter(const KVMap* map, bool owned) - : map_(map), owned_(owned), iter_(map_->end()) {} - ~ModelIter() override { - if (owned_) delete map_; - } - bool Valid() const override { return iter_ != map_->end(); } - void SeekToFirst() override { iter_ = map_->begin(); } - void SeekToLast() override { - if (map_->empty()) { - iter_ = map_->end(); - } else { - iter_ = map_->find(map_->rbegin()->first); - } - } - void Seek(const Slice& k) override { - iter_ = map_->lower_bound(k.ToString()); - } - void SeekForPrev(const Slice& k) override { - iter_ = map_->upper_bound(k.ToString()); - Prev(); - } - void Next() override { ++iter_; } - void Prev() override { - if (iter_ == map_->begin()) { - iter_ = map_->end(); - return; - } - --iter_; - } - - Slice key() const override { return iter_->first; } - Slice value() const override { return iter_->second; } - Status status() const override { return Status::OK(); } - - private: - const KVMap* const map_; - const bool owned_; // Do we own map_ - KVMap::const_iterator iter_; - }; - const Options options_; - KVMap map_; - std::string name_ = ""; -}; - -#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -static std::string RandomKey(Random* rnd, int minimum = 0) { - int len; - do { - len = (rnd->OneIn(3) - ? 1 // Short sometimes to encourage collisions - : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10))); - } while (len < minimum); - return test::RandomKey(rnd, len); -} - -static bool CompareIterators(int step, DB* model, DB* db, - const Snapshot* model_snap, - const Snapshot* db_snap) { - ReadOptions options; - options.snapshot = model_snap; - Iterator* miter = model->NewIterator(options); - options.snapshot = db_snap; - Iterator* dbiter = db->NewIterator(options); - bool ok = true; - int count = 0; - for (miter->SeekToFirst(), dbiter->SeekToFirst(); - ok && miter->Valid() && dbiter->Valid(); miter->Next(), dbiter->Next()) { - count++; - if (miter->key().compare(dbiter->key()) != 0) { - fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n", step, - EscapeString(miter->key()).c_str(), - EscapeString(dbiter->key()).c_str()); - ok = false; - break; - } - - if (miter->value().compare(dbiter->value()) != 0) { - fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n", - step, EscapeString(miter->key()).c_str(), - EscapeString(miter->value()).c_str(), - EscapeString(dbiter->value()).c_str()); - ok = false; - } - } - - if (ok) { - if (miter->Valid() != dbiter->Valid()) { - fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n", - step, miter->Valid(), dbiter->Valid()); - ok = false; - } - } - delete miter; - delete dbiter; - return ok; -} - -class DBTestRandomized : public DBTest, - public ::testing::WithParamInterface { - public: - void SetUp() override { option_config_ = GetParam(); } - - static std::vector GenerateOptionConfigs() { - std::vector option_configs; - // skip cuckoo hash as it does not support snapshot. - for (int option_config = kDefault; option_config < kEnd; ++option_config) { - if (!ShouldSkipOptions(option_config, - kSkipDeletesFilterFirst | kSkipNoSeekToLast)) { - option_configs.push_back(option_config); - } - } - option_configs.push_back(kBlockBasedTableWithIndexRestartInterval); - return option_configs; - } -}; - -INSTANTIATE_TEST_CASE_P( - DBTestRandomized, DBTestRandomized, - ::testing::ValuesIn(DBTestRandomized::GenerateOptionConfigs())); - -TEST_P(DBTestRandomized, Randomized) { - anon::OptionsOverride options_override; - options_override.skip_policy = kSkipNoSnapshot; - Options options = CurrentOptions(options_override); - DestroyAndReopen(options); - - Random rnd(test::RandomSeed() + GetParam()); - ModelDB model(options); - const int N = 10000; - const Snapshot* model_snap = nullptr; - const Snapshot* db_snap = nullptr; - std::string k, v; - for (int step = 0; step < N; step++) { - // TODO(sanjay): Test Get() works - int p = rnd.Uniform(100); - int minimum = 0; - if (option_config_ == kHashSkipList || option_config_ == kHashLinkList || - option_config_ == kPlainTableFirstBytePrefix || - option_config_ == kBlockBasedTableWithWholeKeyHashIndex || - option_config_ == kBlockBasedTableWithPrefixHashIndex) { - minimum = 1; - } - if (p < 45) { // Put - k = RandomKey(&rnd, minimum); - v = rnd.RandomString(rnd.OneIn(20) ? 100 + rnd.Uniform(100) - : rnd.Uniform(8)); - ASSERT_OK(model.Put(WriteOptions(), k, v)); - ASSERT_OK(db_->Put(WriteOptions(), k, v)); - } else if (p < 90) { // Delete - k = RandomKey(&rnd, minimum); - ASSERT_OK(model.Delete(WriteOptions(), k)); - ASSERT_OK(db_->Delete(WriteOptions(), k)); - } else { // Multi-element batch - WriteBatch b; - const int num = rnd.Uniform(8); - for (int i = 0; i < num; i++) { - if (i == 0 || !rnd.OneIn(10)) { - k = RandomKey(&rnd, minimum); - } else { - // Periodically re-use the same key from the previous iter, so - // we have multiple entries in the write batch for the same key - } - if (rnd.OneIn(2)) { - v = rnd.RandomString(rnd.Uniform(10)); - ASSERT_OK(b.Put(k, v)); - } else { - ASSERT_OK(b.Delete(k)); - } - } - ASSERT_OK(model.Write(WriteOptions(), &b)); - ASSERT_OK(db_->Write(WriteOptions(), &b)); - } - - if ((step % 100) == 0) { - // For DB instances that use the hash index + block-based table, the - // iterator will be invalid right when seeking a non-existent key, right - // than return a key that is close to it. - if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex && - option_config_ != kBlockBasedTableWithPrefixHashIndex) { - ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr)); - ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap)); - } - - // Save a snapshot from each DB this time that we'll use next - // time we compare things, to make sure the current state is - // preserved with the snapshot - if (model_snap != nullptr) model.ReleaseSnapshot(model_snap); - if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap); - - Reopen(options); - ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr)); - - model_snap = model.GetSnapshot(); - db_snap = db_->GetSnapshot(); - } - } - if (model_snap != nullptr) model.ReleaseSnapshot(model_snap); - if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap); -} -#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) - -TEST_F(DBTest, BlockBasedTablePrefixIndexTest) { - // create a DB with block prefix index - BlockBasedTableOptions table_options; - Options options = CurrentOptions(); - table_options.index_type = BlockBasedTableOptions::kHashSearch; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - - Reopen(options); - ASSERT_OK(Put("k1", "v1")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("k2", "v2")); - - // Reopen with different prefix extractor, make sure everything still works. - // RocksDB should just fall back to the binary index. - options.prefix_extractor.reset(NewFixedPrefixTransform(2)); - - Reopen(options); - ASSERT_EQ("v1", Get("k1")); - ASSERT_EQ("v2", Get("k2")); - - // Back to original - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:1"}})); - ASSERT_EQ("v1", Get("k1")); - ASSERT_EQ("v2", Get("k2")); - - // Same if there's a problem initally loading prefix transform - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - SyncPoint::GetInstance()->SetCallBack( - "BlockBasedTable::Open::ForceNullTablePrefixExtractor", - [&](void* arg) { *static_cast(arg) = true; }); - SyncPoint::GetInstance()->EnableProcessing(); - Reopen(options); - ASSERT_EQ("v1", Get("k1")); - ASSERT_EQ("v2", Get("k2")); - - // Change again - ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:2"}})); - ASSERT_EQ("v1", Get("k1")); - ASSERT_EQ("v2", Get("k2")); - SyncPoint::GetInstance()->DisableProcessing(); - - // Reopen with no prefix extractor, make sure everything still works. - // RocksDB should just fall back to the binary index. - table_options.index_type = BlockBasedTableOptions::kBinarySearch; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.prefix_extractor.reset(); - - Reopen(options); - ASSERT_EQ("v1", Get("k1")); - ASSERT_EQ("v2", Get("k2")); -} - -TEST_F(DBTest, BlockBasedTablePrefixHashIndexTest) { - // create a DB with block prefix index - BlockBasedTableOptions table_options; - Options options = CurrentOptions(); - table_options.index_type = BlockBasedTableOptions::kHashSearch; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.prefix_extractor.reset(NewCappedPrefixTransform(2)); - - Reopen(options); - ASSERT_OK(Put("kk1", "v1")); - ASSERT_OK(Put("kk2", "v2")); - ASSERT_OK(Put("kk", "v3")); - ASSERT_OK(Put("k", "v4")); - Flush(); - - ASSERT_EQ("v1", Get("kk1")); - ASSERT_EQ("v2", Get("kk2")); - - ASSERT_EQ("v3", Get("kk")); - ASSERT_EQ("v4", Get("k")); -} - -TEST_F(DBTest, BlockBasedTablePrefixIndexTotalOrderSeek) { - // create a DB with block prefix index - BlockBasedTableOptions table_options; - Options options = CurrentOptions(); - options.max_open_files = 10; - table_options.index_type = BlockBasedTableOptions::kHashSearch; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - - // RocksDB sanitize max open files to at least 20. Modify it back. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { - int* max_open_files = static_cast(arg); - *max_open_files = 11; - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Reopen(options); - ASSERT_OK(Put("k1", "v1")); - ASSERT_OK(Flush()); - - CompactRangeOptions cro; - cro.change_level = true; - cro.target_level = 1; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - - // Force evict tables - dbfull()->TEST_table_cache()->SetCapacity(0); - // Make table cache to keep one entry. - dbfull()->TEST_table_cache()->SetCapacity(1); - - ReadOptions read_options; - read_options.total_order_seek = true; - { - std::unique_ptr iter(db_->NewIterator(read_options)); - iter->Seek("k1"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("k1", iter->key().ToString()); - } - - // After total order seek, prefix index should still be used. - read_options.total_order_seek = false; - { - std::unique_ptr iter(db_->NewIterator(read_options)); - iter->Seek("k1"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("k1", iter->key().ToString()); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBTest, ChecksumTest) { - BlockBasedTableOptions table_options; - Options options = CurrentOptions(); - - table_options.checksum = kCRC32c; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - ASSERT_OK(Put("a", "b")); - ASSERT_OK(Put("c", "d")); - ASSERT_OK(Flush()); // table with crc checksum - - table_options.checksum = kxxHash; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - ASSERT_OK(Put("e", "f")); - ASSERT_OK(Put("g", "h")); - ASSERT_OK(Flush()); // table with xxhash checksum - - table_options.checksum = kCRC32c; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - ASSERT_EQ("b", Get("a")); - ASSERT_EQ("d", Get("c")); - ASSERT_EQ("f", Get("e")); - ASSERT_EQ("h", Get("g")); - - table_options.checksum = kCRC32c; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - ASSERT_EQ("b", Get("a")); - ASSERT_EQ("d", Get("c")); - ASSERT_EQ("f", Get("e")); - ASSERT_EQ("h", Get("g")); -} - -TEST_P(DBTestWithParam, FIFOCompactionTest) { - for (int iter = 0; iter < 2; ++iter) { - // first iteration -- auto compaction - // second iteration -- manual compaction - Options options; - options.compaction_style = kCompactionStyleFIFO; - options.write_buffer_size = 100 << 10; // 100KB - options.arena_block_size = 4096; - options.compaction_options_fifo.max_table_files_size = 500 << 10; // 500KB - options.compression = kNoCompression; - options.create_if_missing = true; - options.max_subcompactions = max_subcompactions_; - if (iter == 1) { - options.disable_auto_compactions = true; - } - options = CurrentOptions(options); - DestroyAndReopen(options); - - Random rnd(301); - for (int i = 0; i < 6; ++i) { - for (int j = 0; j < 110; ++j) { - ASSERT_OK(Put(std::to_string(i * 100 + j), rnd.RandomString(980))); - } - // flush should happen here - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - if (iter == 0) { - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } else { - CompactRangeOptions cro; - cro.exclusive_manual_compaction = exclusive_manual_compaction_; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - } - // only 5 files should survive - ASSERT_EQ(NumTableFilesAtLevel(0), 5); - for (int i = 0; i < 50; ++i) { - // these keys should be deleted in previous compaction - ASSERT_EQ("NOT_FOUND", Get(std::to_string(i))); - } - } -} - -TEST_F(DBTest, FIFOCompactionTestWithCompaction) { - Options options; - options.compaction_style = kCompactionStyleFIFO; - options.write_buffer_size = 20 << 10; // 20K - options.arena_block_size = 4096; - options.compaction_options_fifo.max_table_files_size = 1500 << 10; // 1MB - options.compaction_options_fifo.allow_compaction = true; - options.level0_file_num_compaction_trigger = 6; - options.compression = kNoCompression; - options.create_if_missing = true; - options = CurrentOptions(options); - DestroyAndReopen(options); - - Random rnd(301); - for (int i = 0; i < 60; i++) { - // Generate and flush a file about 20KB. - for (int j = 0; j < 20; j++) { - ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - // It should be compacted to 10 files. - ASSERT_EQ(NumTableFilesAtLevel(0), 10); - - for (int i = 0; i < 60; i++) { - // Generate and flush a file about 20KB. - for (int j = 0; j < 20; j++) { - ASSERT_OK(Put(std::to_string(i * 20 + j + 2000), rnd.RandomString(980))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - - // It should be compacted to no more than 20 files. - ASSERT_GT(NumTableFilesAtLevel(0), 10); - ASSERT_LT(NumTableFilesAtLevel(0), 18); - // Size limit is still guaranteed. - ASSERT_LE(SizeAtLevel(0), - options.compaction_options_fifo.max_table_files_size); -} - -TEST_F(DBTest, FIFOCompactionStyleWithCompactionAndDelete) { - Options options; - options.compaction_style = kCompactionStyleFIFO; - options.write_buffer_size = 20 << 10; // 20K - options.arena_block_size = 4096; - options.compaction_options_fifo.max_table_files_size = 1500 << 10; // 1MB - options.compaction_options_fifo.allow_compaction = true; - options.level0_file_num_compaction_trigger = 3; - options.compression = kNoCompression; - options.create_if_missing = true; - options = CurrentOptions(options); - DestroyAndReopen(options); - - Random rnd(301); - for (int i = 0; i < 3; i++) { - // Each file contains a different key which will be dropped later. - ASSERT_OK(Put("a" + std::to_string(i), rnd.RandomString(500))); - ASSERT_OK(Put("key" + std::to_string(i), "")); - ASSERT_OK(Put("z" + std::to_string(i), rnd.RandomString(500))); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_EQ(NumTableFilesAtLevel(0), 1); - for (int i = 0; i < 3; i++) { - ASSERT_EQ("", Get("key" + std::to_string(i))); - } - for (int i = 0; i < 3; i++) { - // Each file contains a different key which will be dropped later. - ASSERT_OK(Put("a" + std::to_string(i), rnd.RandomString(500))); - ASSERT_OK(Delete("key" + std::to_string(i))); - ASSERT_OK(Put("z" + std::to_string(i), rnd.RandomString(500))); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_EQ(NumTableFilesAtLevel(0), 2); - for (int i = 0; i < 3; i++) { - ASSERT_EQ("NOT_FOUND", Get("key" + std::to_string(i))); - } -} - -// Check that FIFO-with-TTL is not supported with max_open_files != -1. -// Github issue #8014 -TEST_F(DBTest, FIFOCompactionWithTTLAndMaxOpenFilesTest) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleFIFO; - options.create_if_missing = true; - options.ttl = 600; // seconds - - // TTL is not supported with max_open_files != -1. - options.max_open_files = 0; - ASSERT_TRUE(TryReopen(options).IsNotSupported()); - - options.max_open_files = 100; - ASSERT_TRUE(TryReopen(options).IsNotSupported()); - - // TTL is supported with unlimited max_open_files - options.max_open_files = -1; - ASSERT_OK(TryReopen(options)); -} - -// Check that FIFO-with-TTL is supported only with BlockBasedTableFactory. -TEST_F(DBTest, FIFOCompactionWithTTLAndVariousTableFormatsTest) { - Options options; - options.compaction_style = kCompactionStyleFIFO; - options.create_if_missing = true; - options.ttl = 600; // seconds - - options = CurrentOptions(options); - options.table_factory.reset(NewBlockBasedTableFactory()); - ASSERT_OK(TryReopen(options)); - - Destroy(options); - options.table_factory.reset(NewPlainTableFactory()); - ASSERT_TRUE(TryReopen(options).IsNotSupported()); - - Destroy(options); - options.table_factory.reset(NewAdaptiveTableFactory()); - ASSERT_TRUE(TryReopen(options).IsNotSupported()); -} - -TEST_F(DBTest, FIFOCompactionWithTTLTest) { - Options options; - options.compaction_style = kCompactionStyleFIFO; - options.write_buffer_size = 10 << 10; // 10KB - options.arena_block_size = 4096; - options.compression = kNoCompression; - options.create_if_missing = true; - env_->SetMockSleep(); - options.env = env_; - - // Test to make sure that all files with expired ttl are deleted on next - // manual compaction. - { - // NOTE: Presumed unnecessary and removed: resetting mock time in env - - options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB - options.compaction_options_fifo.allow_compaction = false; - options.ttl = 1 * 60 * 60; // 1 hour - options = CurrentOptions(options); - DestroyAndReopen(options); - - Random rnd(301); - for (int i = 0; i < 10; i++) { - // Generate and flush a file about 10KB. - for (int j = 0; j < 10; j++) { - ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_EQ(NumTableFilesAtLevel(0), 10); - - // Sleep for 2 hours -- which is much greater than TTL. - env_->MockSleepForSeconds(2 * 60 * 60); - - // Since no flushes and compactions have run, the db should still be in - // the same state even after considerable time has passed. - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(0), 10); - - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - } - - // Test to make sure that all files with expired ttl are deleted on next - // automatic compaction. - { - options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB - options.compaction_options_fifo.allow_compaction = false; - options.ttl = 1 * 60 * 60; // 1 hour - options = CurrentOptions(options); - DestroyAndReopen(options); - - Random rnd(301); - for (int i = 0; i < 10; i++) { - // Generate and flush a file about 10KB. - for (int j = 0; j < 10; j++) { - ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_EQ(NumTableFilesAtLevel(0), 10); - - // Sleep for 2 hours -- which is much greater than TTL. - env_->MockSleepForSeconds(2 * 60 * 60); - // Just to make sure that we are in the same state even after sleeping. - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(0), 10); - - // Create 1 more file to trigger TTL compaction. The old files are dropped. - for (int i = 0; i < 1; i++) { - for (int j = 0; j < 10; j++) { - ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980))); - } - ASSERT_OK(Flush()); - } - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Only the new 10 files remain. - ASSERT_EQ(NumTableFilesAtLevel(0), 1); - ASSERT_LE(SizeAtLevel(0), - options.compaction_options_fifo.max_table_files_size); - } - - // Test that shows the fall back to size-based FIFO compaction if TTL-based - // deletion doesn't move the total size to be less than max_table_files_size. - { - options.write_buffer_size = 10 << 10; // 10KB - options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB - options.compaction_options_fifo.allow_compaction = false; - options.ttl = 1 * 60 * 60; // 1 hour - options = CurrentOptions(options); - DestroyAndReopen(options); - - Random rnd(301); - for (int i = 0; i < 3; i++) { - // Generate and flush a file about 10KB. - for (int j = 0; j < 10; j++) { - ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_EQ(NumTableFilesAtLevel(0), 3); - - // Sleep for 2 hours -- which is much greater than TTL. - env_->MockSleepForSeconds(2 * 60 * 60); - // Just to make sure that we are in the same state even after sleeping. - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(0), 3); - - for (int i = 0; i < 5; i++) { - for (int j = 0; j < 140; j++) { - ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - // Size limit is still guaranteed. - ASSERT_LE(SizeAtLevel(0), - options.compaction_options_fifo.max_table_files_size); - } - - // Test with TTL + Intra-L0 compactions. - { - options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB - options.compaction_options_fifo.allow_compaction = true; - options.ttl = 1 * 60 * 60; // 1 hour - options.level0_file_num_compaction_trigger = 6; - options = CurrentOptions(options); - DestroyAndReopen(options); - - Random rnd(301); - for (int i = 0; i < 10; i++) { - // Generate and flush a file about 10KB. - for (int j = 0; j < 10; j++) { - ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - // With Intra-L0 compaction, out of 10 files, 6 files will be compacted to 1 - // (due to level0_file_num_compaction_trigger = 6). - // So total files = 1 + remaining 4 = 5. - ASSERT_EQ(NumTableFilesAtLevel(0), 5); - - // Sleep for 2 hours -- which is much greater than TTL. - env_->MockSleepForSeconds(2 * 60 * 60); - // Just to make sure that we are in the same state even after sleeping. - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(0), 5); - - // Create 10 more files. The old 5 files are dropped as their ttl expired. - for (int i = 0; i < 10; i++) { - for (int j = 0; j < 10; j++) { - ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_EQ(NumTableFilesAtLevel(0), 5); - ASSERT_LE(SizeAtLevel(0), - options.compaction_options_fifo.max_table_files_size); - } - - // Test with large TTL + Intra-L0 compactions. - // Files dropped based on size, as ttl doesn't kick in. - { - options.write_buffer_size = 20 << 10; // 20K - options.compaction_options_fifo.max_table_files_size = 1500 << 10; // 1.5MB - options.compaction_options_fifo.allow_compaction = true; - options.ttl = 1 * 60 * 60; // 1 hour - options.level0_file_num_compaction_trigger = 6; - options = CurrentOptions(options); - DestroyAndReopen(options); - - Random rnd(301); - for (int i = 0; i < 60; i++) { - // Generate and flush a file about 20KB. - for (int j = 0; j < 20; j++) { - ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - // It should be compacted to 10 files. - ASSERT_EQ(NumTableFilesAtLevel(0), 10); - - for (int i = 0; i < 60; i++) { - // Generate and flush a file about 20KB. - for (int j = 0; j < 20; j++) { - ASSERT_OK( - Put(std::to_string(i * 20 + j + 2000), rnd.RandomString(980))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - - // It should be compacted to no more than 20 files. - ASSERT_GT(NumTableFilesAtLevel(0), 10); - ASSERT_LT(NumTableFilesAtLevel(0), 18); - // Size limit is still guaranteed. - ASSERT_LE(SizeAtLevel(0), - options.compaction_options_fifo.max_table_files_size); - } -} - -/* - * This test is not reliable enough as it heavily depends on disk behavior. - * Disable as it is flaky. - */ -TEST_F(DBTest, DISABLED_RateLimitingTest) { - Options options = CurrentOptions(); - options.write_buffer_size = 1 << 20; // 1MB - options.level0_file_num_compaction_trigger = 2; - options.target_file_size_base = 1 << 20; // 1MB - options.max_bytes_for_level_base = 4 << 20; // 4MB - options.max_bytes_for_level_multiplier = 4; - options.compression = kNoCompression; - options.create_if_missing = true; - options.env = env_; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.IncreaseParallelism(4); - DestroyAndReopen(options); - - WriteOptions wo; - wo.disableWAL = true; - - // # no rate limiting - Random rnd(301); - uint64_t start = env_->NowMicros(); - // Write ~96M data - for (int64_t i = 0; i < (96 << 10); ++i) { - ASSERT_OK(Put(rnd.RandomString(32), rnd.RandomString((1 << 10) + 1), wo)); - } - uint64_t elapsed = env_->NowMicros() - start; - double raw_rate = env_->bytes_written_ * 1000000.0 / elapsed; - uint64_t rate_limiter_drains = - TestGetTickerCount(options, NUMBER_RATE_LIMITER_DRAINS); - ASSERT_EQ(0, rate_limiter_drains); - Close(); - - // # rate limiting with 0.7 x threshold - options.rate_limiter.reset( - NewGenericRateLimiter(static_cast(0.7 * raw_rate))); - env_->bytes_written_ = 0; - DestroyAndReopen(options); - - start = env_->NowMicros(); - // Write ~96M data - for (int64_t i = 0; i < (96 << 10); ++i) { - ASSERT_OK(Put(rnd.RandomString(32), rnd.RandomString((1 << 10) + 1), wo)); - } - rate_limiter_drains = - TestGetTickerCount(options, NUMBER_RATE_LIMITER_DRAINS) - - rate_limiter_drains; - elapsed = env_->NowMicros() - start; - Close(); - ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_); - // Most intervals should've been drained (interval time is 100ms, elapsed is - // micros) - ASSERT_GT(rate_limiter_drains, 0); - ASSERT_LE(rate_limiter_drains, elapsed / 100000 + 1); - double ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate; - fprintf(stderr, "write rate ratio = %.2lf, expected 0.7\n", ratio); - ASSERT_TRUE(ratio < 0.8); - - // # rate limiting with half of the raw_rate - options.rate_limiter.reset( - NewGenericRateLimiter(static_cast(raw_rate / 2))); - env_->bytes_written_ = 0; - DestroyAndReopen(options); - - start = env_->NowMicros(); - // Write ~96M data - for (int64_t i = 0; i < (96 << 10); ++i) { - ASSERT_OK(Put(rnd.RandomString(32), rnd.RandomString((1 << 10) + 1), wo)); - } - elapsed = env_->NowMicros() - start; - rate_limiter_drains = - TestGetTickerCount(options, NUMBER_RATE_LIMITER_DRAINS) - - rate_limiter_drains; - Close(); - ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_); - // Most intervals should've been drained (interval time is 100ms, elapsed is - // micros) - ASSERT_GT(rate_limiter_drains, elapsed / 100000 / 2); - ASSERT_LE(rate_limiter_drains, elapsed / 100000 + 1); - ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate; - fprintf(stderr, "write rate ratio = %.2lf, expected 0.5\n", ratio); - ASSERT_LT(ratio, 0.6); -} - -// This is a mocked customed rate limiter without implementing optional APIs -// (e.g, RateLimiter::GetTotalPendingRequests()) -class MockedRateLimiterWithNoOptionalAPIImpl : public RateLimiter { - public: - MockedRateLimiterWithNoOptionalAPIImpl() {} - - ~MockedRateLimiterWithNoOptionalAPIImpl() override {} - - void SetBytesPerSecond(int64_t bytes_per_second) override { - (void)bytes_per_second; - } - - using RateLimiter::Request; - void Request(const int64_t bytes, const Env::IOPriority pri, - Statistics* stats) override { - (void)bytes; - (void)pri; - (void)stats; - } - - int64_t GetSingleBurstBytes() const override { return 200; } - - int64_t GetTotalBytesThrough( - const Env::IOPriority pri = Env::IO_TOTAL) const override { - (void)pri; - return 0; - } - - int64_t GetTotalRequests( - const Env::IOPriority pri = Env::IO_TOTAL) const override { - (void)pri; - return 0; - } - - int64_t GetBytesPerSecond() const override { return 0; } -}; - -// To test that customed rate limiter not implementing optional APIs (e.g, -// RateLimiter::GetTotalPendingRequests()) works fine with RocksDB basic -// operations (e.g, Put, Get, Flush) -TEST_F(DBTest, CustomedRateLimiterWithNoOptionalAPIImplTest) { - Options options = CurrentOptions(); - options.rate_limiter.reset(new MockedRateLimiterWithNoOptionalAPIImpl()); - DestroyAndReopen(options); - ASSERT_OK(Put("abc", "def")); - ASSERT_EQ(Get("abc"), "def"); - ASSERT_OK(Flush()); - ASSERT_EQ(Get("abc"), "def"); -} - -TEST_F(DBTest, TableOptionsSanitizeTest) { - Options options = CurrentOptions(); - options.create_if_missing = true; - DestroyAndReopen(options); - ASSERT_EQ(db_->GetOptions().allow_mmap_reads, false); - - options.table_factory.reset(NewPlainTableFactory()); - options.prefix_extractor.reset(NewNoopTransform()); - Destroy(options); - ASSERT_TRUE(!TryReopen(options).IsNotSupported()); - - // Test for check of prefix_extractor when hash index is used for - // block-based table - BlockBasedTableOptions to; - to.index_type = BlockBasedTableOptions::kHashSearch; - options = CurrentOptions(); - options.create_if_missing = true; - options.table_factory.reset(NewBlockBasedTableFactory(to)); - ASSERT_TRUE(TryReopen(options).IsInvalidArgument()); - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - ASSERT_OK(TryReopen(options)); -} - -TEST_F(DBTest, ConcurrentMemtableNotSupported) { - Options options = CurrentOptions(); - options.allow_concurrent_memtable_write = true; - options.soft_pending_compaction_bytes_limit = 0; - options.hard_pending_compaction_bytes_limit = 100; - options.create_if_missing = true; - - DestroyDB(dbname_, options); - options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true, 4)); - ASSERT_NOK(TryReopen(options)); - - options.memtable_factory.reset(new SkipListFactory); - ASSERT_OK(TryReopen(options)); - - ColumnFamilyOptions cf_options(options); - cf_options.memtable_factory.reset( - NewHashLinkListRepFactory(4, 0, 3, true, 4)); - ColumnFamilyHandle* handle; - ASSERT_NOK(db_->CreateColumnFamily(cf_options, "name", &handle)); -} - - -TEST_F(DBTest, SanitizeNumThreads) { - for (int attempt = 0; attempt < 2; attempt++) { - const size_t kTotalTasks = 8; - test::SleepingBackgroundTask sleeping_tasks[kTotalTasks]; - - Options options = CurrentOptions(); - if (attempt == 0) { - options.max_background_compactions = 3; - options.max_background_flushes = 2; - } - options.create_if_missing = true; - DestroyAndReopen(options); - - for (size_t i = 0; i < kTotalTasks; i++) { - // Insert 5 tasks to low priority queue and 5 tasks to high priority queue - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - &sleeping_tasks[i], - (i < 4) ? Env::Priority::LOW : Env::Priority::HIGH); - } - - // Wait until 10s for they are scheduled. - for (int i = 0; i < 10000; i++) { - if (options.env->GetThreadPoolQueueLen(Env::Priority::LOW) <= 1 && - options.env->GetThreadPoolQueueLen(Env::Priority::HIGH) <= 2) { - break; - } - env_->SleepForMicroseconds(1000); - } - - // pool size 3, total task 4. Queue size should be 1. - ASSERT_EQ(1U, options.env->GetThreadPoolQueueLen(Env::Priority::LOW)); - // pool size 2, total task 4. Queue size should be 2. - ASSERT_EQ(2U, options.env->GetThreadPoolQueueLen(Env::Priority::HIGH)); - - for (size_t i = 0; i < kTotalTasks; i++) { - sleeping_tasks[i].WakeUp(); - sleeping_tasks[i].WaitUntilDone(); - } - - ASSERT_OK(Put("abc", "def")); - ASSERT_EQ("def", Get("abc")); - ASSERT_OK(Flush()); - ASSERT_EQ("def", Get("abc")); - } -} - -TEST_F(DBTest, WriteSingleThreadEntry) { - std::vector threads; - dbfull()->TEST_LockMutex(); - auto w = dbfull()->TEST_BeginWrite(); - threads.emplace_back([&] { ASSERT_OK(Put("a", "b")); }); - env_->SleepForMicroseconds(10000); - threads.emplace_back([&] { ASSERT_OK(Flush()); }); - env_->SleepForMicroseconds(10000); - dbfull()->TEST_UnlockMutex(); - dbfull()->TEST_LockMutex(); - dbfull()->TEST_EndWrite(w); - dbfull()->TEST_UnlockMutex(); - - for (auto& t : threads) { - t.join(); - } -} - -TEST_F(DBTest, ConcurrentFlushWAL) { - const size_t cnt = 100; - Options options; - options.env = env_; - WriteOptions wopt; - ReadOptions ropt; - for (bool two_write_queues : {false, true}) { - for (bool manual_wal_flush : {false, true}) { - options.two_write_queues = two_write_queues; - options.manual_wal_flush = manual_wal_flush; - options.create_if_missing = true; - DestroyAndReopen(options); - std::vector threads; - threads.emplace_back([&] { - for (size_t i = 0; i < cnt; i++) { - auto istr = std::to_string(i); - ASSERT_OK(db_->Put(wopt, db_->DefaultColumnFamily(), "a" + istr, - "b" + istr)); - } - }); - if (two_write_queues) { - threads.emplace_back([&] { - for (size_t i = cnt; i < 2 * cnt; i++) { - auto istr = std::to_string(i); - WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */, - wopt.protection_bytes_per_key, - 0 /* default_cf_ts_sz */); - ASSERT_OK(batch.Put("a" + istr, "b" + istr)); - ASSERT_OK( - dbfull()->WriteImpl(wopt, &batch, nullptr, nullptr, 0, true)); - } - }); - } - threads.emplace_back([&] { - for (size_t i = 0; i < cnt * 100; i++) { // FlushWAL is faster than Put - ASSERT_OK(db_->FlushWAL(false)); - } - }); - for (auto& t : threads) { - t.join(); - } - options.create_if_missing = false; - // Recover from the wal and make sure that it is not corrupted - Reopen(options); - for (size_t i = 0; i < cnt; i++) { - PinnableSlice pval; - auto istr = std::to_string(i); - ASSERT_OK( - db_->Get(ropt, db_->DefaultColumnFamily(), "a" + istr, &pval)); - ASSERT_TRUE(pval == ("b" + istr)); - } - } - } -} - -// This test failure will be caught with a probability -TEST_F(DBTest, ManualFlushWalAndWriteRace) { - Options options; - options.env = env_; - options.manual_wal_flush = true; - options.create_if_missing = true; - - DestroyAndReopen(options); - - WriteOptions wopts; - wopts.sync = true; - - port::Thread writeThread([&]() { - for (int i = 0; i < 100; i++) { - auto istr = std::to_string(i); - ASSERT_OK(dbfull()->Put(wopts, "key_" + istr, "value_" + istr)); - } - }); - port::Thread flushThread([&]() { - for (int i = 0; i < 100; i++) { - ASSERT_OK(dbfull()->FlushWAL(false)); - } - }); - - writeThread.join(); - flushThread.join(); - ASSERT_OK(dbfull()->Put(wopts, "foo1", "value1")); - ASSERT_OK(dbfull()->Put(wopts, "foo2", "value2")); - Reopen(options); - ASSERT_EQ("value1", Get("foo1")); - ASSERT_EQ("value2", Get("foo2")); -} - -TEST_F(DBTest, DynamicMemtableOptions) { - const uint64_t k64KB = 1 << 16; - const uint64_t k128KB = 1 << 17; - const uint64_t k5KB = 5 * 1024; - Options options; - options.env = env_; - options.create_if_missing = true; - options.compression = kNoCompression; - options.max_background_compactions = 1; - options.write_buffer_size = k64KB; - options.arena_block_size = 16 * 1024; - options.max_write_buffer_number = 2; - // Don't trigger compact/slowdown/stop - options.level0_file_num_compaction_trigger = 1024; - options.level0_slowdown_writes_trigger = 1024; - options.level0_stop_writes_trigger = 1024; - DestroyAndReopen(options); - - auto gen_l0_kb = [this](int size) { - const int kNumPutsBeforeWaitForFlush = 64; - Random rnd(301); - for (int i = 0; i < size; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(1024))); - - // The following condition prevents a race condition between flush jobs - // acquiring work and this thread filling up multiple memtables. Without - // this, the flush might produce less files than expected because - // multiple memtables are flushed into a single L0 file. This race - // condition affects assertion (A). - if (i % kNumPutsBeforeWaitForFlush == kNumPutsBeforeWaitForFlush - 1) { - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - }; - - // Test write_buffer_size - gen_l0_kb(64); - ASSERT_EQ(NumTableFilesAtLevel(0), 1); - ASSERT_LT(SizeAtLevel(0), k64KB + k5KB); - ASSERT_GT(SizeAtLevel(0), k64KB - k5KB * 2); - - // Clean up L0 - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - - // Increase buffer size - ASSERT_OK(dbfull()->SetOptions({ - {"write_buffer_size", "131072"}, - })); - - // The existing memtable inflated 64KB->128KB when we invoked SetOptions(). - // Write 192KB, we should have a 128KB L0 file and a memtable with 64KB data. - gen_l0_kb(192); - ASSERT_EQ(NumTableFilesAtLevel(0), 1); // (A) - ASSERT_LT(SizeAtLevel(0), k128KB + 2 * k5KB); - ASSERT_GT(SizeAtLevel(0), k128KB - 4 * k5KB); - - // Decrease buffer size below current usage - ASSERT_OK(dbfull()->SetOptions({ - {"write_buffer_size", "65536"}, - })); - // The existing memtable became eligible for flush when we reduced its - // capacity to 64KB. Two keys need to be added to trigger flush: first causes - // memtable to be marked full, second schedules the flush. Then we should have - // a 128KB L0 file, a 64KB L0 file, and a memtable with just one key. - gen_l0_kb(2); - ASSERT_EQ(NumTableFilesAtLevel(0), 2); - ASSERT_LT(SizeAtLevel(0), k128KB + k64KB + 2 * k5KB); - ASSERT_GT(SizeAtLevel(0), k128KB + k64KB - 4 * k5KB); - - // Test max_write_buffer_number - // Block compaction thread, which will also block the flushes because - // max_background_flushes == 0, so flushes are getting executed by the - // compaction thread - env_->SetBackgroundThreads(1, Env::LOW); - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - // Start from scratch and disable compaction/flush. Flush can only happen - // during compaction but trigger is pretty high - options.disable_auto_compactions = true; - DestroyAndReopen(options); - env_->SetBackgroundThreads(0, Env::HIGH); - - // Put until writes are stopped, bounded by 256 puts. We should see stop at - // ~128KB - int count = 0; - Random rnd(301); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::DelayWrite:Wait", - [&](void* /*arg*/) { sleeping_task_low.WakeUp(); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - while (!sleeping_task_low.WokenUp() && count < 256) { - ASSERT_OK(Put(Key(count), rnd.RandomString(1024), WriteOptions())); - count++; - } - ASSERT_GT(static_cast(count), 128 * 0.8); - ASSERT_LT(static_cast(count), 128 * 1.2); - - sleeping_task_low.WaitUntilDone(); - - // Increase - ASSERT_OK(dbfull()->SetOptions({ - {"max_write_buffer_number", "8"}, - })); - // Clean up memtable and L0 - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - sleeping_task_low.Reset(); - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - count = 0; - while (!sleeping_task_low.WokenUp() && count < 1024) { - ASSERT_OK(Put(Key(count), rnd.RandomString(1024), WriteOptions())); - count++; - } -// Windows fails this test. Will tune in the future and figure out -// approp number -#ifndef OS_WIN - ASSERT_GT(static_cast(count), 512 * 0.8); - ASSERT_LT(static_cast(count), 512 * 1.2); -#endif - sleeping_task_low.WaitUntilDone(); - - // Decrease - ASSERT_OK(dbfull()->SetOptions({ - {"max_write_buffer_number", "4"}, - })); - // Clean up memtable and L0 - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - sleeping_task_low.Reset(); - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - - count = 0; - while (!sleeping_task_low.WokenUp() && count < 1024) { - ASSERT_OK(Put(Key(count), rnd.RandomString(1024), WriteOptions())); - count++; - } -// Windows fails this test. Will tune in the future and figure out -// approp number -#ifndef OS_WIN - ASSERT_GT(static_cast(count), 256 * 0.8); - ASSERT_LT(static_cast(count), 266 * 1.2); -#endif - sleeping_task_low.WaitUntilDone(); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -#ifdef ROCKSDB_USING_THREAD_STATUS -namespace { -void VerifyOperationCount(Env* env, ThreadStatus::OperationType op_type, - int expected_count) { - int op_count = 0; - std::vector thread_list; - ASSERT_OK(env->GetThreadList(&thread_list)); - for (auto thread : thread_list) { - if (thread.operation_type == op_type) { - op_count++; - } - } - ASSERT_EQ(op_count, expected_count); -} -} // anonymous namespace - -TEST_F(DBTest, GetThreadStatus) { - Options options; - options.env = env_; - options.enable_thread_tracking = true; - TryReopen(options); - - std::vector thread_list; - Status s = env_->GetThreadList(&thread_list); - - for (int i = 0; i < 2; ++i) { - // repeat the test with differet number of high / low priority threads - const int kTestCount = 3; - const unsigned int kHighPriCounts[kTestCount] = {3, 2, 5}; - const unsigned int kLowPriCounts[kTestCount] = {10, 15, 3}; - const unsigned int kBottomPriCounts[kTestCount] = {2, 1, 4}; - for (int test = 0; test < kTestCount; ++test) { - // Change the number of threads in high / low priority pool. - env_->SetBackgroundThreads(kHighPriCounts[test], Env::HIGH); - env_->SetBackgroundThreads(kLowPriCounts[test], Env::LOW); - env_->SetBackgroundThreads(kBottomPriCounts[test], Env::BOTTOM); - // Wait to ensure the all threads has been registered - unsigned int thread_type_counts[ThreadStatus::NUM_THREAD_TYPES]; - // TODO(ajkr): it'd be better if SetBackgroundThreads returned only after - // all threads have been registered. - // Try up to 60 seconds. - for (int num_try = 0; num_try < 60000; num_try++) { - env_->SleepForMicroseconds(1000); - thread_list.clear(); - s = env_->GetThreadList(&thread_list); - ASSERT_OK(s); - memset(thread_type_counts, 0, sizeof(thread_type_counts)); - for (auto thread : thread_list) { - ASSERT_LT(thread.thread_type, ThreadStatus::NUM_THREAD_TYPES); - thread_type_counts[thread.thread_type]++; - } - if (thread_type_counts[ThreadStatus::HIGH_PRIORITY] == - kHighPriCounts[test] && - thread_type_counts[ThreadStatus::LOW_PRIORITY] == - kLowPriCounts[test] && - thread_type_counts[ThreadStatus::BOTTOM_PRIORITY] == - kBottomPriCounts[test]) { - break; - } - } - // Verify the number of high-priority threads - ASSERT_EQ(thread_type_counts[ThreadStatus::HIGH_PRIORITY], - kHighPriCounts[test]); - // Verify the number of low-priority threads - ASSERT_EQ(thread_type_counts[ThreadStatus::LOW_PRIORITY], - kLowPriCounts[test]); - // Verify the number of bottom-priority threads - ASSERT_EQ(thread_type_counts[ThreadStatus::BOTTOM_PRIORITY], - kBottomPriCounts[test]); - } - if (i == 0) { - // repeat the test with multiple column families - CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options); - env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_, - true); - } - } - ASSERT_OK(db_->DropColumnFamily(handles_[2])); - delete handles_[2]; - handles_.erase(handles_.begin() + 2); - env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_, - true); - Close(); - env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_, - true); -} - -TEST_F(DBTest, DisableThreadStatus) { - Options options; - options.env = env_; - options.enable_thread_tracking = false; - TryReopen(options); - CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options); - // Verify non of the column family info exists - env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_, - false); -} - -TEST_F(DBTest, ThreadStatusFlush) { - Options options; - options.env = env_; - options.write_buffer_size = 100000; // Small write buffer - options.enable_thread_tracking = true; - options = CurrentOptions(options); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"FlushJob::FlushJob()", "DBTest::ThreadStatusFlush:1"}, - {"DBTest::ThreadStatusFlush:2", "FlushJob::WriteLevel0Table"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - CreateAndReopenWithCF({"pikachu"}, options); - VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0); - - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_EQ("v1", Get(1, "foo")); - VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0); - - uint64_t num_running_flushes = 0; - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumRunningFlushes, - &num_running_flushes)); - ASSERT_EQ(num_running_flushes, 0); - - ASSERT_OK(Put(1, "k1", std::string(100000, 'x'))); // Fill memtable - ASSERT_OK(Put(1, "k2", std::string(100000, 'y'))); // Trigger flush - - // The first sync point is to make sure there's one flush job - // running when we perform VerifyOperationCount(). - TEST_SYNC_POINT("DBTest::ThreadStatusFlush:1"); - VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 1); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumRunningFlushes, - &num_running_flushes)); - ASSERT_EQ(num_running_flushes, 1); - // This second sync point is to ensure the flush job will not - // be completed until we already perform VerifyOperationCount(). - TEST_SYNC_POINT("DBTest::ThreadStatusFlush:2"); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_P(DBTestWithParam, ThreadStatusSingleCompaction) { - const int kTestKeySize = 16; - const int kTestValueSize = 984; - const int kEntrySize = kTestKeySize + kTestValueSize; - const int kEntriesPerBuffer = 100; - Options options; - options.create_if_missing = true; - options.write_buffer_size = kEntrySize * kEntriesPerBuffer; - options.compaction_style = kCompactionStyleLevel; - options.target_file_size_base = options.write_buffer_size; - options.max_bytes_for_level_base = options.target_file_size_base * 2; - options.max_bytes_for_level_multiplier = 2; - options.compression = kNoCompression; - options = CurrentOptions(options); - options.env = env_; - options.enable_thread_tracking = true; - const int kNumL0Files = 4; - options.level0_file_num_compaction_trigger = kNumL0Files; - options.max_subcompactions = max_subcompactions_; - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"DBTest::ThreadStatusSingleCompaction:0", "DBImpl::BGWorkCompaction"}, - {"CompactionJob::Run():Start", "DBTest::ThreadStatusSingleCompaction:1"}, - {"DBTest::ThreadStatusSingleCompaction:2", "CompactionJob::Run():End"}, - }); - for (int tests = 0; tests < 2; ++tests) { - DestroyAndReopen(options); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - // The Put Phase. - for (int file = 0; file < kNumL0Files; ++file) { - for (int key = 0; key < kEntriesPerBuffer; ++key) { - ASSERT_OK(Put(std::to_string(key + file * kEntriesPerBuffer), - rnd.RandomString(kTestValueSize))); - } - ASSERT_OK(Flush()); - } - // This makes sure a compaction won't be scheduled until - // we have done with the above Put Phase. - uint64_t num_running_compactions = 0; - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumRunningCompactions, - &num_running_compactions)); - ASSERT_EQ(num_running_compactions, 0); - TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:0"); - ASSERT_GE(NumTableFilesAtLevel(0), - options.level0_file_num_compaction_trigger); - - // This makes sure at least one compaction is running. - TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:1"); - - if (options.enable_thread_tracking) { - // expecting one single L0 to L1 compaction - VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 1); - } else { - // If thread tracking is not enabled, compaction count should be 0. - VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 0); - } - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumRunningCompactions, - &num_running_compactions)); - ASSERT_EQ(num_running_compactions, 1); - // TODO(yhchiang): adding assert to verify each compaction stage. - TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:2"); - - // repeat the test with disabling thread tracking. - options.enable_thread_tracking = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } -} - -TEST_P(DBTestWithParam, PreShutdownManualCompaction) { - Options options = CurrentOptions(); - options.max_subcompactions = max_subcompactions_; - CreateAndReopenWithCF({"pikachu"}, options); - - // iter - 0 with 7 levels - // iter - 1 with 3 levels - for (int iter = 0; iter < 2; ++iter) { - MakeTables(3, "p", "q", 1); - ASSERT_EQ("1,1,1", FilesPerLevel(1)); - - // Compaction range falls before files - Compact(1, "", "c"); - ASSERT_EQ("1,1,1", FilesPerLevel(1)); - - // Compaction range falls after files - Compact(1, "r", "z"); - ASSERT_EQ("1,1,1", FilesPerLevel(1)); - - // Compaction range overlaps files - Compact(1, "p", "q"); - ASSERT_EQ("0,0,1", FilesPerLevel(1)); - - // Populate a different range - MakeTables(3, "c", "e", 1); - ASSERT_EQ("1,1,2", FilesPerLevel(1)); - - // Compact just the new range - Compact(1, "b", "f"); - ASSERT_EQ("0,0,2", FilesPerLevel(1)); - - // Compact all - MakeTables(1, "a", "z", 1); - ASSERT_EQ("1,0,2", FilesPerLevel(1)); - CancelAllBackgroundWork(db_); - ASSERT_TRUE( - db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr) - .IsShutdownInProgress()); - ASSERT_EQ("1,0,2", FilesPerLevel(1)); - - if (iter == 0) { - options = CurrentOptions(); - options.num_levels = 3; - options.create_if_missing = true; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - } - } -} - -TEST_F(DBTest, PreShutdownFlush) { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_OK(Put(1, "key", "value")); - CancelAllBackgroundWork(db_); - Status s = - db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); - ASSERT_TRUE(s.IsShutdownInProgress()); -} - -TEST_P(DBTestWithParam, PreShutdownMultipleCompaction) { - const int kTestKeySize = 16; - const int kTestValueSize = 984; - const int kEntrySize = kTestKeySize + kTestValueSize; - const int kEntriesPerBuffer = 40; - const int kNumL0Files = 4; - - const int kHighPriCount = 3; - const int kLowPriCount = 5; - env_->SetBackgroundThreads(kHighPriCount, Env::HIGH); - env_->SetBackgroundThreads(kLowPriCount, Env::LOW); - - Options options; - options.create_if_missing = true; - options.write_buffer_size = kEntrySize * kEntriesPerBuffer; - options.compaction_style = kCompactionStyleLevel; - options.target_file_size_base = options.write_buffer_size; - options.max_bytes_for_level_base = - options.target_file_size_base * kNumL0Files; - options.compression = kNoCompression; - options = CurrentOptions(options); - options.env = env_; - options.enable_thread_tracking = true; - options.level0_file_num_compaction_trigger = kNumL0Files; - options.max_bytes_for_level_multiplier = 2; - options.max_background_compactions = kLowPriCount; - options.level0_stop_writes_trigger = 1 << 10; - options.level0_slowdown_writes_trigger = 1 << 10; - options.max_subcompactions = max_subcompactions_; - - TryReopen(options); - Random rnd(301); - - std::vector thread_list; - // Delay both flush and compaction - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"FlushJob::FlushJob()", "CompactionJob::Run():Start"}, - {"CompactionJob::Run():Start", - "DBTest::PreShutdownMultipleCompaction:Preshutdown"}, - {"CompactionJob::Run():Start", - "DBTest::PreShutdownMultipleCompaction:VerifyCompaction"}, - {"DBTest::PreShutdownMultipleCompaction:Preshutdown", - "CompactionJob::Run():End"}, - {"CompactionJob::Run():End", - "DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown"}}); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Make rocksdb busy - int key = 0; - // check how many threads are doing compaction using GetThreadList - int operation_count[ThreadStatus::NUM_OP_TYPES] = {0}; - for (int file = 0; file < 16 * kNumL0Files; ++file) { - for (int k = 0; k < kEntriesPerBuffer; ++k) { - ASSERT_OK(Put(std::to_string(key++), rnd.RandomString(kTestValueSize))); - } - - ASSERT_OK(env_->GetThreadList(&thread_list)); - for (auto thread : thread_list) { - operation_count[thread.operation_type]++; - } - - // Speed up the test - if (operation_count[ThreadStatus::OP_FLUSH] > 1 && - operation_count[ThreadStatus::OP_COMPACTION] > - 0.6 * options.max_background_compactions) { - break; - } - if (file == 15 * kNumL0Files) { - TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown"); - } - } - - TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown"); - ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1); - CancelAllBackgroundWork(db_); - TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown"); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Record the number of compactions at a time. - for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) { - operation_count[i] = 0; - } - ASSERT_OK(env_->GetThreadList(&thread_list)); - for (auto thread : thread_list) { - operation_count[thread.operation_type]++; - } - ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0); -} - -TEST_P(DBTestWithParam, PreShutdownCompactionMiddle) { - const int kTestKeySize = 16; - const int kTestValueSize = 984; - const int kEntrySize = kTestKeySize + kTestValueSize; - const int kEntriesPerBuffer = 40; - const int kNumL0Files = 4; - - const int kHighPriCount = 3; - const int kLowPriCount = 5; - env_->SetBackgroundThreads(kHighPriCount, Env::HIGH); - env_->SetBackgroundThreads(kLowPriCount, Env::LOW); - - Options options; - options.create_if_missing = true; - options.write_buffer_size = kEntrySize * kEntriesPerBuffer; - options.compaction_style = kCompactionStyleLevel; - options.target_file_size_base = options.write_buffer_size; - options.max_bytes_for_level_base = - options.target_file_size_base * kNumL0Files; - options.compression = kNoCompression; - options = CurrentOptions(options); - options.env = env_; - options.enable_thread_tracking = true; - options.level0_file_num_compaction_trigger = kNumL0Files; - options.max_bytes_for_level_multiplier = 2; - options.max_background_compactions = kLowPriCount; - options.level0_stop_writes_trigger = 1 << 10; - options.level0_slowdown_writes_trigger = 1 << 10; - options.max_subcompactions = max_subcompactions_; - - TryReopen(options); - Random rnd(301); - - std::vector thread_list; - // Delay both flush and compaction - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBTest::PreShutdownCompactionMiddle:Preshutdown", - "CompactionJob::Run():Inprogress"}, - {"CompactionJob::Run():Start", - "DBTest::PreShutdownCompactionMiddle:VerifyCompaction"}, - {"CompactionJob::Run():Inprogress", "CompactionJob::Run():End"}, - {"CompactionJob::Run():End", - "DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown"}}); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Make rocksdb busy - int key = 0; - // check how many threads are doing compaction using GetThreadList - int operation_count[ThreadStatus::NUM_OP_TYPES] = {0}; - for (int file = 0; file < 16 * kNumL0Files; ++file) { - for (int k = 0; k < kEntriesPerBuffer; ++k) { - ASSERT_OK(Put(std::to_string(key++), rnd.RandomString(kTestValueSize))); - } - - ASSERT_OK(env_->GetThreadList(&thread_list)); - for (auto thread : thread_list) { - operation_count[thread.operation_type]++; - } - - // Speed up the test - if (operation_count[ThreadStatus::OP_FLUSH] > 1 && - operation_count[ThreadStatus::OP_COMPACTION] > - 0.6 * options.max_background_compactions) { - break; - } - if (file == 15 * kNumL0Files) { - TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyCompaction"); - } - } - - ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1); - CancelAllBackgroundWork(db_); - TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:Preshutdown"); - TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown"); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Record the number of compactions at a time. - for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) { - operation_count[i] = 0; - } - ASSERT_OK(env_->GetThreadList(&thread_list)); - for (auto thread : thread_list) { - operation_count[thread.operation_type]++; - } - ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0); -} - -#endif // ROCKSDB_USING_THREAD_STATUS - -TEST_F(DBTest, FlushOnDestroy) { - WriteOptions wo; - wo.disableWAL = true; - ASSERT_OK(Put("foo", "v1", wo)); - CancelAllBackgroundWork(db_); -} - -TEST_F(DBTest, DynamicLevelCompressionPerLevel) { - if (!Snappy_Supported()) { - return; - } - const int kNKeys = 120; - int keys[kNKeys]; - for (int i = 0; i < kNKeys; i++) { - keys[i] = i; - } - RandomShuffle(std::begin(keys), std::end(keys)); - - Random rnd(301); - Options options; - options.env = env_; - options.create_if_missing = true; - options.db_write_buffer_size = 20480; - options.write_buffer_size = 20480; - options.max_write_buffer_number = 2; - options.level0_file_num_compaction_trigger = 2; - options.level0_slowdown_writes_trigger = 2; - options.level0_stop_writes_trigger = 2; - options.target_file_size_base = 20480; - options.level_compaction_dynamic_level_bytes = true; - options.max_bytes_for_level_base = 102400; - options.max_bytes_for_level_multiplier = 4; - options.max_background_compactions = 1; - options.num_levels = 5; - - options.compression_per_level.resize(3); - options.compression_per_level[0] = kNoCompression; - options.compression_per_level[1] = kNoCompression; - options.compression_per_level[2] = kSnappyCompression; - - OnFileDeletionListener* listener = new OnFileDeletionListener(); - options.listeners.emplace_back(listener); - - DestroyAndReopen(options); - - // Insert more than 80K. L4 should be base level. Neither L0 nor L4 should - // be compressed, so total data size should be more than 80K. - for (int i = 0; i < 20; i++) { - ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ(NumTableFilesAtLevel(1), 0); - ASSERT_EQ(NumTableFilesAtLevel(2), 0); - ASSERT_EQ(NumTableFilesAtLevel(3), 0); - // Assuming each files' metadata is at least 50 bytes/ - ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(4), 20U * 4000U + 50U * 4); - - // Insert 400KB. Some data will be compressed - for (int i = 21; i < 120; i++) { - ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000))); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(1), 0); - ASSERT_EQ(NumTableFilesAtLevel(2), 0); - - ASSERT_LT(SizeAtLevel(0) + SizeAtLevel(3) + SizeAtLevel(4), - 120U * 4000U + 50U * 24); - // Make sure data in files in L3 is not compacted by removing all files - // in L4 and calculate number of rows - ASSERT_OK(dbfull()->SetOptions({ - {"disable_auto_compactions", "true"}, - })); - ColumnFamilyMetaData cf_meta; - db_->GetColumnFamilyMetaData(&cf_meta); - for (auto file : cf_meta.levels[4].files) { - listener->SetExpectedFileName(dbname_ + file.name); - ASSERT_OK(dbfull()->DeleteFile(file.name)); - } - listener->VerifyMatchedCount(cf_meta.levels[4].files.size()); - - int num_keys = 0; - std::unique_ptr iter(db_->NewIterator(ReadOptions())); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - num_keys++; - } - ASSERT_OK(iter->status()); - ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(3), num_keys * 4000U + num_keys * 10U); -} - -TEST_F(DBTest, DynamicLevelCompressionPerLevel2) { - if (!Snappy_Supported() || !LZ4_Supported() || !Zlib_Supported()) { - return; - } - const int kNKeys = 500; - int keys[kNKeys]; - for (int i = 0; i < kNKeys; i++) { - keys[i] = i; - } - RandomShuffle(std::begin(keys), std::end(keys)); - - Random rnd(301); - Options options; - options.create_if_missing = true; - options.db_write_buffer_size = 6000000; - options.write_buffer_size = 600000; - options.max_write_buffer_number = 2; - options.level0_file_num_compaction_trigger = 2; - options.level0_slowdown_writes_trigger = 2; - options.level0_stop_writes_trigger = 2; - options.soft_pending_compaction_bytes_limit = 1024 * 1024; - options.target_file_size_base = 20; - options.env = env_; - options.level_compaction_dynamic_level_bytes = true; - options.max_bytes_for_level_base = 200; - options.max_bytes_for_level_multiplier = 8; - options.max_background_compactions = 1; - options.num_levels = 5; - std::shared_ptr mtf(new mock::MockTableFactory); - options.table_factory = mtf; - - options.compression_per_level.resize(3); - options.compression_per_level[0] = kNoCompression; - options.compression_per_level[1] = kLZ4Compression; - options.compression_per_level[2] = kZlibCompression; - - DestroyAndReopen(options); - // When base level is L4, L4 is LZ4. - std::atomic num_zlib(0); - std::atomic num_lz4(0); - std::atomic num_no(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - if (compaction->output_level() == 4) { - ASSERT_TRUE(compaction->output_compression() == kLZ4Compression); - num_lz4.fetch_add(1); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) { - auto* compression = reinterpret_cast(arg); - ASSERT_TRUE(*compression == kNoCompression); - num_no.fetch_add(1); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - for (int i = 0; i < 100; i++) { - std::string value = rnd.RandomString(200); - ASSERT_OK(Put(Key(keys[i]), value)); - if (i % 25 == 24) { - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - } - - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - - ASSERT_EQ(NumTableFilesAtLevel(1), 0); - ASSERT_EQ(NumTableFilesAtLevel(2), 0); - ASSERT_EQ(NumTableFilesAtLevel(3), 0); - ASSERT_GT(NumTableFilesAtLevel(4), 0); - ASSERT_GT(num_no.load(), 2); - ASSERT_GT(num_lz4.load(), 0); - int prev_num_files_l4 = NumTableFilesAtLevel(4); - - // After base level turn L4->L3, L3 becomes LZ4 and L4 becomes Zlib - num_lz4.store(0); - num_no.store(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - if (compaction->output_level() == 4 && compaction->start_level() == 3) { - ASSERT_TRUE(compaction->output_compression() == kZlibCompression); - num_zlib.fetch_add(1); - } else { - ASSERT_TRUE(compaction->output_compression() == kLZ4Compression); - num_lz4.fetch_add(1); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) { - auto* compression = reinterpret_cast(arg); - ASSERT_TRUE(*compression == kNoCompression); - num_no.fetch_add(1); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - for (int i = 101; i < 500; i++) { - std::string value = rnd.RandomString(200); - ASSERT_OK(Put(Key(keys[i]), value)); - if (i % 100 == 99) { - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ASSERT_EQ(NumTableFilesAtLevel(1), 0); - ASSERT_EQ(NumTableFilesAtLevel(2), 0); - ASSERT_GT(NumTableFilesAtLevel(3), 0); - ASSERT_GT(NumTableFilesAtLevel(4), prev_num_files_l4); - ASSERT_GT(num_no.load(), 2); - ASSERT_GT(num_lz4.load(), 0); - ASSERT_GT(num_zlib.load(), 0); -} - -TEST_F(DBTest, DynamicCompactionOptions) { - // minimum write buffer size is enforced at 64KB - const uint64_t k32KB = 1 << 15; - const uint64_t k64KB = 1 << 16; - const uint64_t k128KB = 1 << 17; - const uint64_t k1MB = 1 << 20; - const uint64_t k4KB = 1 << 12; - Options options; - options.env = env_; - options.create_if_missing = true; - options.compression = kNoCompression; - options.soft_pending_compaction_bytes_limit = 1024 * 1024; - options.write_buffer_size = k64KB; - options.arena_block_size = 4 * k4KB; - options.max_write_buffer_number = 2; - // Compaction related options - options.level0_file_num_compaction_trigger = 3; - options.level0_slowdown_writes_trigger = 4; - options.level0_stop_writes_trigger = 8; - options.target_file_size_base = k64KB; - options.max_compaction_bytes = options.target_file_size_base * 10; - options.target_file_size_multiplier = 1; - options.max_bytes_for_level_base = k128KB; - options.max_bytes_for_level_multiplier = 4; - - // Block flush thread and disable compaction thread - env_->SetBackgroundThreads(1, Env::LOW); - env_->SetBackgroundThreads(1, Env::HIGH); - DestroyAndReopen(options); - - auto gen_l0_kb = [this](int start, int size, int stride) { - Random rnd(301); - for (int i = 0; i < size; i++) { - ASSERT_OK(Put(Key(start + stride * i), rnd.RandomString(1024))); - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - }; - - // Write 3 files that have the same key range. - // Since level0_file_num_compaction_trigger is 3, compaction should be - // triggered. The compaction should result in one L1 file - gen_l0_kb(0, 64, 1); - ASSERT_EQ(NumTableFilesAtLevel(0), 1); - gen_l0_kb(0, 64, 1); - ASSERT_EQ(NumTableFilesAtLevel(0), 2); - gen_l0_kb(0, 64, 1); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("0,1", FilesPerLevel()); - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - ASSERT_EQ(1U, metadata.size()); - ASSERT_LE(metadata[0].size, k64KB + k4KB); - ASSERT_GE(metadata[0].size, k64KB - k4KB); - - // Test compaction trigger and target_file_size_base - // Reduce compaction trigger to 2, and reduce L1 file size to 32KB. - // Writing to 64KB L0 files should trigger a compaction. Since these - // 2 L0 files have the same key range, compaction merge them and should - // result in 2 32KB L1 files. - ASSERT_OK( - dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"}, - {"target_file_size_base", std::to_string(k32KB)}})); - - gen_l0_kb(0, 64, 1); - ASSERT_EQ("1,1", FilesPerLevel()); - gen_l0_kb(0, 64, 1); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ("0,2", FilesPerLevel()); - metadata.clear(); - db_->GetLiveFilesMetaData(&metadata); - ASSERT_EQ(2U, metadata.size()); - ASSERT_LE(metadata[0].size, k32KB + k4KB); - ASSERT_GE(metadata[0].size, k32KB - k4KB); - ASSERT_LE(metadata[1].size, k32KB + k4KB); - ASSERT_GE(metadata[1].size, k32KB - k4KB); - - // Test max_bytes_for_level_base - // Increase level base size to 256KB and write enough data that will - // fill L1 and L2. L1 size should be around 256KB while L2 size should be - // around 256KB x 4. - ASSERT_OK(dbfull()->SetOptions( - {{"max_bytes_for_level_base", std::to_string(k1MB)}})); - - // writing 96 x 64KB => 6 * 1024KB - // (L1 + L2) = (1 + 4) * 1024KB - for (int i = 0; i < 96; ++i) { - gen_l0_kb(i, 64, 96); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_GT(SizeAtLevel(1), k1MB / 2); - ASSERT_LT(SizeAtLevel(1), k1MB + k1MB / 2); - - // Within (0.5, 1.5) of 4MB. - ASSERT_GT(SizeAtLevel(2), 2 * k1MB); - ASSERT_LT(SizeAtLevel(2), 6 * k1MB); - - // Test max_bytes_for_level_multiplier and - // max_bytes_for_level_base. Now, reduce both mulitplier and level base, - // After filling enough data that can fit in L1 - L3, we should see L1 size - // reduces to 128KB from 256KB which was asserted previously. Same for L2. - ASSERT_OK(dbfull()->SetOptions( - {{"max_bytes_for_level_multiplier", "2"}, - {"max_bytes_for_level_base", std::to_string(k128KB)}})); - - // writing 20 x 64KB = 10 x 128KB - // (L1 + L2 + L3) = (1 + 2 + 4) * 128KB - for (int i = 0; i < 20; ++i) { - gen_l0_kb(i, 64, 32); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - uint64_t total_size = SizeAtLevel(1) + SizeAtLevel(2) + SizeAtLevel(3); - ASSERT_TRUE(total_size < k128KB * 7 * 1.5); - - // Test level0_stop_writes_trigger. - // Clean up memtable and L0. Block compaction threads. If continue to write - // and flush memtables. We should see put stop after 8 memtable flushes - // since level0_stop_writes_trigger = 8 - ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true)); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - // Block compaction - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - sleeping_task_low.WaitUntilSleeping(); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - int count = 0; - Random rnd(301); - WriteOptions wo; - while (count < 64) { - ASSERT_OK(Put(Key(count), rnd.RandomString(1024), wo)); - ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true)); - count++; - if (dbfull()->TEST_write_controler().IsStopped()) { - sleeping_task_low.WakeUp(); - break; - } - } - // Stop trigger = 8 - ASSERT_EQ(count, 8); - // Unblock - sleeping_task_low.WaitUntilDone(); - - // Now reduce level0_stop_writes_trigger to 6. Clear up memtables and L0. - // Block compaction thread again. Perform the put and memtable flushes - // until we see the stop after 6 memtable flushes. - ASSERT_OK(dbfull()->SetOptions({{"level0_stop_writes_trigger", "6"}})); - ASSERT_OK(dbfull()->TEST_FlushMemTable(true)); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - - // Block compaction again - sleeping_task_low.Reset(); - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - sleeping_task_low.WaitUntilSleeping(); - count = 0; - while (count < 64) { - ASSERT_OK(Put(Key(count), rnd.RandomString(1024), wo)); - ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true)); - count++; - if (dbfull()->TEST_write_controler().IsStopped()) { - sleeping_task_low.WakeUp(); - break; - } - } - ASSERT_EQ(count, 6); - // Unblock - sleeping_task_low.WaitUntilDone(); - - // Test disable_auto_compactions - // Compaction thread is unblocked but auto compaction is disabled. Write - // 4 L0 files and compaction should be triggered. If auto compaction is - // disabled, then TEST_WaitForCompact will be waiting for nothing. Number of - // L0 files do not change after the call. - ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "true"}})); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - - for (int i = 0; i < 4; ++i) { - ASSERT_OK(Put(Key(i), rnd.RandomString(1024))); - // Wait for compaction so that put won't stop - ASSERT_OK(dbfull()->TEST_FlushMemTable(true)); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumTableFilesAtLevel(0), 4); - - // Enable auto compaction and perform the same test, # of L0 files should be - // reduced after compaction. - ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}})); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - - for (int i = 0; i < 4; ++i) { - ASSERT_OK(Put(Key(i), rnd.RandomString(1024))); - // Wait for compaction so that put won't stop - ASSERT_OK(dbfull()->TEST_FlushMemTable(true)); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_LT(NumTableFilesAtLevel(0), 4); -} - -// Test dynamic FIFO compaction options. -// This test covers just option parsing and makes sure that the options are -// correctly assigned. Also look at DBOptionsTest.SetFIFOCompactionOptions -// test which makes sure that the FIFO compaction funcionality is working -// as expected on dynamically changing the options. -// Even more FIFOCompactionTests are at DBTest.FIFOCompaction* . -TEST_F(DBTest, DynamicFIFOCompactionOptions) { - Options options; - options.ttl = 0; - options.create_if_missing = true; - options.env = env_; - DestroyAndReopen(options); - - // Initial defaults - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, - 1024 * 1024 * 1024); - ASSERT_EQ(dbfull()->GetOptions().ttl, 0); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, - false); - - ASSERT_OK(dbfull()->SetOptions( - {{"compaction_options_fifo", "{max_table_files_size=23;}"}})); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, - 23); - ASSERT_EQ(dbfull()->GetOptions().ttl, 0); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, - false); - - ASSERT_OK(dbfull()->SetOptions({{"ttl", "97"}})); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, - 23); - ASSERT_EQ(dbfull()->GetOptions().ttl, 97); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, - false); - - ASSERT_OK(dbfull()->SetOptions({{"ttl", "203"}})); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, - 23); - ASSERT_EQ(dbfull()->GetOptions().ttl, 203); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, - false); - - ASSERT_OK(dbfull()->SetOptions( - {{"compaction_options_fifo", "{allow_compaction=true;}"}})); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, - 23); - ASSERT_EQ(dbfull()->GetOptions().ttl, 203); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, - true); - - ASSERT_OK(dbfull()->SetOptions( - {{"compaction_options_fifo", "{max_table_files_size=31;}"}})); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, - 31); - ASSERT_EQ(dbfull()->GetOptions().ttl, 203); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, - true); - - ASSERT_OK(dbfull()->SetOptions( - {{"compaction_options_fifo", - "{max_table_files_size=51;allow_compaction=true;}"}})); - ASSERT_OK(dbfull()->SetOptions({{"ttl", "49"}})); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, - 51); - ASSERT_EQ(dbfull()->GetOptions().ttl, 49); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, - true); -} - -TEST_F(DBTest, DynamicUniversalCompactionOptions) { - Options options; - options.create_if_missing = true; - options.env = env_; - DestroyAndReopen(options); - - // Initial defaults - ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.size_ratio, 1U); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.min_merge_width, - 2u); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.max_merge_width, - UINT_MAX); - ASSERT_EQ(dbfull() - ->GetOptions() - .compaction_options_universal.max_size_amplification_percent, - 200u); - ASSERT_EQ(dbfull() - ->GetOptions() - .compaction_options_universal.compression_size_percent, - -1); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.stop_style, - kCompactionStopStyleTotalSize); - ASSERT_EQ( - dbfull()->GetOptions().compaction_options_universal.allow_trivial_move, - false); - - ASSERT_OK(dbfull()->SetOptions( - {{"compaction_options_universal", "{size_ratio=7;}"}})); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.size_ratio, 7u); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.min_merge_width, - 2u); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.max_merge_width, - UINT_MAX); - ASSERT_EQ(dbfull() - ->GetOptions() - .compaction_options_universal.max_size_amplification_percent, - 200u); - ASSERT_EQ(dbfull() - ->GetOptions() - .compaction_options_universal.compression_size_percent, - -1); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.stop_style, - kCompactionStopStyleTotalSize); - ASSERT_EQ( - dbfull()->GetOptions().compaction_options_universal.allow_trivial_move, - false); - - ASSERT_OK(dbfull()->SetOptions( - {{"compaction_options_universal", "{min_merge_width=11;}"}})); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.size_ratio, 7u); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.min_merge_width, - 11u); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.max_merge_width, - UINT_MAX); - ASSERT_EQ(dbfull() - ->GetOptions() - .compaction_options_universal.max_size_amplification_percent, - 200u); - ASSERT_EQ(dbfull() - ->GetOptions() - .compaction_options_universal.compression_size_percent, - -1); - ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.stop_style, - kCompactionStopStyleTotalSize); - ASSERT_EQ( - dbfull()->GetOptions().compaction_options_universal.allow_trivial_move, - false); -} - -TEST_F(DBTest, FileCreationRandomFailure) { - Options options; - options.env = env_; - options.create_if_missing = true; - options.write_buffer_size = 100000; // Small write buffer - options.target_file_size_base = 200000; - options.max_bytes_for_level_base = 1000000; - options.max_bytes_for_level_multiplier = 2; - - DestroyAndReopen(options); - Random rnd(301); - - constexpr int kCDTKeysPerBuffer = 4; - constexpr int kTestSize = kCDTKeysPerBuffer * 4096; - constexpr int kTotalIteration = 20; - // the second half of the test involves in random failure - // of file creation. - constexpr int kRandomFailureTest = kTotalIteration / 2; - - std::vector values; - for (int i = 0; i < kTestSize; ++i) { - values.push_back("NOT_FOUND"); - } - for (int j = 0; j < kTotalIteration; ++j) { - if (j == kRandomFailureTest) { - env_->non_writeable_rate_.store(90); - } - for (int k = 0; k < kTestSize; ++k) { - // here we expect some of the Put fails. - std::string value = rnd.RandomString(100); - Status s = Put(Key(k), Slice(value)); - if (s.ok()) { - // update the latest successful put - values[k] = value; - } - // But everything before we simulate the failure-test should succeed. - if (j < kRandomFailureTest) { - ASSERT_OK(s); - } - } - } - - // If rocksdb does not do the correct job, internal assert will fail here. - ASSERT_TRUE(dbfull()->TEST_WaitForFlushMemTable().IsIOError()); - ASSERT_TRUE(dbfull()->TEST_WaitForCompact().IsIOError()); - - // verify we have the latest successful update - for (int k = 0; k < kTestSize; ++k) { - auto v = Get(Key(k)); - ASSERT_EQ(v, values[k]); - } - - // reopen and reverify we have the latest successful update - env_->non_writeable_rate_.store(0); - Reopen(options); - for (int k = 0; k < kTestSize; ++k) { - auto v = Get(Key(k)); - ASSERT_EQ(v, values[k]); - } -} - - -TEST_F(DBTest, DynamicMiscOptions) { - // Test max_sequential_skip_in_iterations - Options options; - options.env = env_; - options.create_if_missing = true; - options.max_sequential_skip_in_iterations = 16; - options.compression = kNoCompression; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - DestroyAndReopen(options); - - auto assert_reseek_count = [this, &options](int key_start, int num_reseek) { - int key0 = key_start; - int key1 = key_start + 1; - int key2 = key_start + 2; - Random rnd(301); - ASSERT_OK(Put(Key(key0), rnd.RandomString(8))); - for (int i = 0; i < 10; ++i) { - ASSERT_OK(Put(Key(key1), rnd.RandomString(8))); - } - ASSERT_OK(Put(Key(key2), rnd.RandomString(8))); - std::unique_ptr iter(db_->NewIterator(ReadOptions())); - iter->Seek(Key(key1)); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Key(key1)), 0); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Key(key2)), 0); - ASSERT_EQ(num_reseek, - TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION)); - }; - // No reseek - assert_reseek_count(100, 0); - - ASSERT_OK(dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "4"}})); - // Clear memtable and make new option effective - ASSERT_OK(dbfull()->TEST_FlushMemTable(true)); - // Trigger reseek - assert_reseek_count(200, 1); - - ASSERT_OK( - dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "16"}})); - // Clear memtable and make new option effective - ASSERT_OK(dbfull()->TEST_FlushMemTable(true)); - // No reseek - assert_reseek_count(300, 1); - - MutableCFOptions mutable_cf_options; - CreateAndReopenWithCF({"pikachu"}, options); - // Test soft_pending_compaction_bytes_limit, - // hard_pending_compaction_bytes_limit - ASSERT_OK(dbfull()->SetOptions( - handles_[1], {{"soft_pending_compaction_bytes_limit", "200"}, - {"hard_pending_compaction_bytes_limit", "300"}})); - ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1], - &mutable_cf_options)); - ASSERT_EQ(200, mutable_cf_options.soft_pending_compaction_bytes_limit); - ASSERT_EQ(300, mutable_cf_options.hard_pending_compaction_bytes_limit); - // Test report_bg_io_stats - ASSERT_OK( - dbfull()->SetOptions(handles_[1], {{"report_bg_io_stats", "true"}})); - // sanity check - ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1], - &mutable_cf_options)); - ASSERT_TRUE(mutable_cf_options.report_bg_io_stats); - // Test compression - // sanity check - ASSERT_OK(dbfull()->SetOptions({{"compression", "kNoCompression"}})); - ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[0], - &mutable_cf_options)); - ASSERT_EQ(CompressionType::kNoCompression, mutable_cf_options.compression); - - if (Snappy_Supported()) { - ASSERT_OK(dbfull()->SetOptions({{"compression", "kSnappyCompression"}})); - ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[0], - &mutable_cf_options)); - ASSERT_EQ(CompressionType::kSnappyCompression, - mutable_cf_options.compression); - } - - // Test paranoid_file_checks already done in db_block_cache_test - ASSERT_OK( - dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "true"}})); - ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1], - &mutable_cf_options)); - ASSERT_TRUE(mutable_cf_options.report_bg_io_stats); - ASSERT_TRUE(mutable_cf_options.check_flush_compaction_key_order); - - ASSERT_OK(dbfull()->SetOptions( - handles_[1], {{"check_flush_compaction_key_order", "false"}})); - ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1], - &mutable_cf_options)); - ASSERT_FALSE(mutable_cf_options.check_flush_compaction_key_order); -} - -TEST_F(DBTest, L0L1L2AndUpHitCounter) { - const int kNumLevels = 3; - const int kNumKeysPerLevel = 10000; - const int kNumKeysPerDb = kNumLevels * kNumKeysPerLevel; - - Options options = CurrentOptions(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - Reopen(options); - - // After the below loop there will be one file on each of L0, L1, and L2. - int key = 0; - for (int output_level = kNumLevels - 1; output_level >= 0; --output_level) { - for (int i = 0; i < kNumKeysPerLevel; ++i) { - ASSERT_OK(Put(Key(key), "val")); - key++; - } - ASSERT_OK(Flush()); - for (int input_level = 0; input_level < output_level; ++input_level) { - // `TEST_CompactRange(input_level, ...)` compacts from `input_level` to - // `input_level + 1`. - ASSERT_OK(dbfull()->TEST_CompactRange(input_level, nullptr, nullptr)); - } - } - assert(key == kNumKeysPerDb); - - ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0)); - ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1)); - ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP)); - - for (int i = 0; i < kNumKeysPerDb; i++) { - ASSERT_EQ(Get(Key(i)), "val"); - } - - ASSERT_EQ(kNumKeysPerLevel, TestGetTickerCount(options, GET_HIT_L0)); - ASSERT_EQ(kNumKeysPerLevel, TestGetTickerCount(options, GET_HIT_L1)); - ASSERT_EQ(kNumKeysPerLevel, TestGetTickerCount(options, GET_HIT_L2_AND_UP)); - - ASSERT_EQ(kNumKeysPerDb, TestGetTickerCount(options, GET_HIT_L0) + - TestGetTickerCount(options, GET_HIT_L1) + - TestGetTickerCount(options, GET_HIT_L2_AND_UP)); -} - -TEST_F(DBTest, EncodeDecompressedBlockSizeTest) { - // iter 0 -- zlib - // iter 1 -- bzip2 - // iter 2 -- lz4 - // iter 3 -- lz4HC - // iter 4 -- xpress - CompressionType compressions[] = {kZlibCompression, kBZip2Compression, - kLZ4Compression, kLZ4HCCompression, - kXpressCompression}; - for (auto comp : compressions) { - if (!CompressionTypeSupported(comp)) { - continue; - } - // first_table_version 1 -- generate with table_version == 1, read with - // table_version == 2 - // first_table_version 2 -- generate with table_version == 2, read with - // table_version == 1 - for (int first_table_version = 1; first_table_version <= 2; - ++first_table_version) { - BlockBasedTableOptions table_options; - table_options.format_version = first_table_version; - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); - Options options = CurrentOptions(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.create_if_missing = true; - options.compression = comp; - DestroyAndReopen(options); - - int kNumKeysWritten = 1000; - - Random rnd(301); - for (int i = 0; i < kNumKeysWritten; ++i) { - // compressible string - ASSERT_OK(Put(Key(i), rnd.RandomString(128) + std::string(128, 'a'))); - } - - table_options.format_version = first_table_version == 1 ? 2 : 1; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - for (int i = 0; i < kNumKeysWritten; ++i) { - auto r = Get(Key(i)); - ASSERT_EQ(r.substr(128), std::string(128, 'a')); - } - } - } -} - -TEST_F(DBTest, CloseSpeedup) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleLevel; - options.write_buffer_size = 110 << 10; // 110KB - options.arena_block_size = 4 << 10; - options.level0_file_num_compaction_trigger = 2; - options.num_levels = 4; - options.max_bytes_for_level_base = 400 * 1024; - options.max_write_buffer_number = 16; - - // Block background threads - env_->SetBackgroundThreads(1, Env::LOW); - env_->SetBackgroundThreads(1, Env::HIGH); - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - test::SleepingBackgroundTask sleeping_task_high; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - &sleeping_task_high, Env::Priority::HIGH); - - std::vector filenames; - ASSERT_OK(env_->GetChildren(dbname_, &filenames)); - // In Windows, LOCK file cannot be deleted because it is locked by db_test - // After closing db_test, the LOCK file is unlocked and can be deleted - // Delete archival files. - bool deleteDir = true; - for (size_t i = 0; i < filenames.size(); ++i) { - Status s = env_->DeleteFile(dbname_ + "/" + filenames[i]); - if (!s.ok()) { - deleteDir = false; - } - } - if (deleteDir) { - ASSERT_OK(env_->DeleteDir(dbname_)); - } - DestroyAndReopen(options); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - env_->SetBackgroundThreads(1, Env::LOW); - env_->SetBackgroundThreads(1, Env::HIGH); - Random rnd(301); - int key_idx = 0; - - // First three 110KB files are not going to level 2 - // After that, (100K, 200K) - for (int num = 0; num < 5; num++) { - GenerateNewFile(&rnd, &key_idx, true); - } - - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - Close(); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - // Unblock background threads - sleeping_task_high.WakeUp(); - sleeping_task_high.WaitUntilDone(); - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); - - Destroy(options); -} - -class DelayedMergeOperator : public MergeOperator { - private: - DBTest* db_test_; - - public: - explicit DelayedMergeOperator(DBTest* d) : db_test_(d) {} - - bool FullMergeV2(const MergeOperationInput& merge_in, - MergeOperationOutput* merge_out) const override { - db_test_->env_->MockSleepForMicroseconds(1000 * - merge_in.operand_list.size()); - merge_out->new_value = ""; - return true; - } - - const char* Name() const override { return "DelayedMergeOperator"; } -}; - -TEST_F(DBTest, MergeTestTime) { - std::string one, two, three; - PutFixed64(&one, 1); - PutFixed64(&two, 2); - PutFixed64(&three, 3); - - // Enable time profiling - SetPerfLevel(kEnableTime); - Options options = CurrentOptions(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.merge_operator.reset(new DelayedMergeOperator(this)); - SetTimeElapseOnlySleepOnReopen(&options); - DestroyAndReopen(options); - - // NOTE: Presumed unnecessary and removed: resetting mock time in env - - ASSERT_EQ(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 0); - ASSERT_OK(db_->Put(WriteOptions(), "foo", one)); - ASSERT_OK(Flush()); - ASSERT_OK(db_->Merge(WriteOptions(), "foo", two)); - ASSERT_OK(Flush()); - ASSERT_OK(db_->Merge(WriteOptions(), "foo", three)); - ASSERT_OK(Flush()); - - ReadOptions opt; - opt.verify_checksums = true; - opt.snapshot = nullptr; - std::string result; - ASSERT_OK(db_->Get(opt, "foo", &result)); - - ASSERT_EQ(2000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME)); - - ReadOptions read_options; - std::unique_ptr iter(db_->NewIterator(read_options)); - int count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - ++count; - } - - ASSERT_EQ(1, count); - ASSERT_EQ(4000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME)); -#ifdef ROCKSDB_USING_THREAD_STATUS - ASSERT_GT(TestGetTickerCount(options, FLUSH_WRITE_BYTES), 0); -#endif // ROCKSDB_USING_THREAD_STATUS -} - -TEST_P(DBTestWithParam, MergeCompactionTimeTest) { - SetPerfLevel(kEnableTime); - Options options = CurrentOptions(); - options.compaction_filter_factory = std::make_shared(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.merge_operator.reset(new DelayedMergeOperator(this)); - options.disable_auto_compactions = true; - options.max_subcompactions = max_subcompactions_; - SetTimeElapseOnlySleepOnReopen(&options); - DestroyAndReopen(options); - - constexpr unsigned n = 1000; - for (unsigned i = 0; i < n; i++) { - ASSERT_OK(db_->Merge(WriteOptions(), "foo", "TEST")); - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - CompactRangeOptions cro; - cro.exclusive_manual_compaction = exclusive_manual_compaction_; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - - ASSERT_EQ(uint64_t{n} * 1000000U, - TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME)); -} - -TEST_P(DBTestWithParam, FilterCompactionTimeTest) { - Options options = CurrentOptions(); - options.compaction_filter_factory = - std::make_shared(this); - options.disable_auto_compactions = true; - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.statistics->set_stats_level(kExceptTimeForMutex); - options.max_subcompactions = max_subcompactions_; - SetTimeElapseOnlySleepOnReopen(&options); - DestroyAndReopen(options); - - unsigned n = 0; - // put some data - for (int table = 0; table < 4; ++table) { - for (int i = 0; i < 10 + table; ++i) { - ASSERT_OK(Put(std::to_string(table * 100 + i), "val")); - ++n; - } - ASSERT_OK(Flush()); - } - - CompactRangeOptions cro; - cro.exclusive_manual_compaction = exclusive_manual_compaction_; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - ASSERT_EQ(0U, CountLiveFiles()); - - Reopen(options); - - Iterator* itr = db_->NewIterator(ReadOptions()); - itr->SeekToFirst(); - ASSERT_OK(itr->status()); - ASSERT_EQ(uint64_t{n} * 1000000U, - TestGetTickerCount(options, FILTER_OPERATION_TOTAL_TIME)); - delete itr; -} - -TEST_F(DBTest, TestLogCleanup) { - Options options = CurrentOptions(); - options.write_buffer_size = 64 * 1024; // very small - // only two memtables allowed ==> only two log files - options.max_write_buffer_number = 2; - Reopen(options); - - for (int i = 0; i < 100000; ++i) { - ASSERT_OK(Put(Key(i), "val")); - // only 2 memtables will be alive, so logs_to_free needs to always be below - // 2 - ASSERT_LT(dbfull()->TEST_LogsToFreeSize(), static_cast(3)); - } -} - -TEST_F(DBTest, EmptyCompactedDB) { - Options options = CurrentOptions(); - options.max_open_files = -1; - Close(); - ASSERT_OK(ReadOnlyReopen(options)); - Status s = Put("new", "value"); - ASSERT_TRUE(s.IsNotSupported()); - Close(); -} - -TEST_F(DBTest, SuggestCompactRangeTest) { - class CompactionFilterFactoryGetContext : public CompactionFilterFactory { - public: - std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& context) override { - saved_context = context; - std::unique_ptr empty_filter; - return empty_filter; - } - const char* Name() const override { - return "CompactionFilterFactoryGetContext"; - } - static bool IsManual(CompactionFilterFactory* compaction_filter_factory) { - return reinterpret_cast( - compaction_filter_factory) - ->saved_context.is_manual_compaction; - } - CompactionFilter::Context saved_context; - }; - - Options options = CurrentOptions(); - options.memtable_factory.reset(test::NewSpecialSkipListFactory( - DBTestBase::kNumKeysByGenerateNewRandomFile)); - options.compaction_style = kCompactionStyleLevel; - options.compaction_filter_factory.reset( - new CompactionFilterFactoryGetContext()); - options.write_buffer_size = 200 << 10; - options.arena_block_size = 4 << 10; - options.level0_file_num_compaction_trigger = 4; - options.num_levels = 4; - options.compression = kNoCompression; - options.max_bytes_for_level_base = 450 << 10; - options.target_file_size_base = 98 << 10; - options.max_compaction_bytes = static_cast(1) << 60; // inf - - Reopen(options); - - Random rnd(301); - - for (int num = 0; num < 10; num++) { - GenerateNewRandomFile(&rnd); - } - - ASSERT_TRUE(!CompactionFilterFactoryGetContext::IsManual( - options.compaction_filter_factory.get())); - - // make sure either L0 or L1 has file - while (NumTableFilesAtLevel(0) == 0 && NumTableFilesAtLevel(1) == 0) { - GenerateNewRandomFile(&rnd); - } - - // compact it three times - for (int i = 0; i < 3; ++i) { - ASSERT_OK(experimental::SuggestCompactRange(db_, nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - - // All files are compacted - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_EQ(0, NumTableFilesAtLevel(1)); - - GenerateNewRandomFile(&rnd); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - // nonoverlapping with the file on level 0 - Slice start("a"), end("b"); - ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // should not compact the level 0 file - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - start = Slice("j"); - end = Slice("m"); - ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // SuggestCompactRange() is not going to be reported as manual compaction - ASSERT_TRUE(!CompactionFilterFactoryGetContext::IsManual( - options.compaction_filter_factory.get())); - - // now it should compact the level 0 file - // as it's a trivial move to L1, it triggers another one to compact to L2 - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_EQ(0, NumTableFilesAtLevel(1)); -} - -TEST_F(DBTest, SuggestCompactRangeUniversal) { - Options options = CurrentOptions(); - options.memtable_factory.reset(test::NewSpecialSkipListFactory( - DBTestBase::kNumKeysByGenerateNewRandomFile)); - options.compaction_style = kCompactionStyleUniversal; - options.write_buffer_size = 200 << 10; - options.arena_block_size = 4 << 10; - options.level0_file_num_compaction_trigger = 4; - options.num_levels = 4; - options.compression = kNoCompression; - options.max_bytes_for_level_base = 450 << 10; - options.target_file_size_base = 98 << 10; - options.max_compaction_bytes = static_cast(1) << 60; // inf - - Reopen(options); - - Random rnd(301); - - for (int num = 0; num < 10; num++) { - GenerateNewRandomFile(&rnd); - } - - ASSERT_EQ("1,2,3,4", FilesPerLevel()); - for (int i = 0; i < 3; i++) { - ASSERT_OK( - db_->SuggestCompactRange(db_->DefaultColumnFamily(), nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - - // All files are compacted - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_EQ(0, NumTableFilesAtLevel(1)); - ASSERT_EQ(0, NumTableFilesAtLevel(2)); - - GenerateNewRandomFile(&rnd); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - // nonoverlapping with the file on level 0 - Slice start("a"), end("b"); - ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // should not compact the level 0 file - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - - start = Slice("j"); - end = Slice("m"); - ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // now it should compact the level 0 file to the last level - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_EQ(0, NumTableFilesAtLevel(1)); -} - -TEST_F(DBTest, PromoteL0) { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.write_buffer_size = 10 * 1024 * 1024; - DestroyAndReopen(options); - - // non overlapping ranges - std::vector> ranges = { - {81, 160}, {0, 80}, {161, 240}, {241, 320}}; - - int32_t value_size = 10 * 1024; // 10 KB - - Random rnd(301); - std::map values; - for (const auto& range : ranges) { - for (int32_t j = range.first; j < range.second; j++) { - values[j] = rnd.RandomString(value_size); - ASSERT_OK(Put(Key(j), values[j])); - } - ASSERT_OK(Flush()); - } - - int32_t level0_files = NumTableFilesAtLevel(0, 0); - ASSERT_EQ(level0_files, ranges.size()); - ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); // No files in L1 - - // Promote L0 level to L2. - ASSERT_OK(experimental::PromoteL0(db_, db_->DefaultColumnFamily(), 2)); - // We expect that all the files were trivially moved from L0 to L2 - ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); - ASSERT_EQ(NumTableFilesAtLevel(2, 0), level0_files); - - for (const auto& kv : values) { - ASSERT_EQ(Get(Key(kv.first)), kv.second); - } -} - -TEST_F(DBTest, PromoteL0Failure) { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.write_buffer_size = 10 * 1024 * 1024; - DestroyAndReopen(options); - - // Produce two L0 files with overlapping ranges. - ASSERT_OK(Put(Key(0), "")); - ASSERT_OK(Put(Key(3), "")); - ASSERT_OK(Flush()); - ASSERT_OK(Put(Key(1), "")); - ASSERT_OK(Flush()); - - Status status; - // Fails because L0 has overlapping files. - status = experimental::PromoteL0(db_, db_->DefaultColumnFamily()); - ASSERT_TRUE(status.IsInvalidArgument()); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - // Now there is a file in L1. - ASSERT_GE(NumTableFilesAtLevel(1, 0), 1); - - ASSERT_OK(Put(Key(5), "")); - ASSERT_OK(Flush()); - // Fails because L1 is non-empty. - status = experimental::PromoteL0(db_, db_->DefaultColumnFamily()); - ASSERT_TRUE(status.IsInvalidArgument()); -} - -// Github issue #596 -TEST_F(DBTest, CompactRangeWithEmptyBottomLevel) { - const int kNumLevels = 2; - const int kNumL0Files = 2; - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.num_levels = kNumLevels; - DestroyAndReopen(options); - - Random rnd(301); - for (int i = 0; i < kNumL0Files; ++i) { - ASSERT_OK(Put(Key(0), rnd.RandomString(1024))); - ASSERT_OK(Flush()); - } - ASSERT_EQ(NumTableFilesAtLevel(0), kNumL0Files); - ASSERT_EQ(NumTableFilesAtLevel(1), 0); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - ASSERT_EQ(NumTableFilesAtLevel(1), kNumL0Files); -} - -TEST_F(DBTest, AutomaticConflictsWithManualCompaction) { - const int kNumL0Files = 50; - Options options = CurrentOptions(); - options.level0_file_num_compaction_trigger = 4; - // never slowdown / stop - options.level0_slowdown_writes_trigger = 999999; - options.level0_stop_writes_trigger = 999999; - options.max_background_compactions = 10; - DestroyAndReopen(options); - - // schedule automatic compactions after the manual one starts, but before it - // finishes to ensure conflict. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::BackgroundCompaction:Start", - "DBTest::AutomaticConflictsWithManualCompaction:PrePuts"}, - {"DBTest::AutomaticConflictsWithManualCompaction:PostPuts", - "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}}); - std::atomic callback_count(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::MaybeScheduleFlushOrCompaction:Conflict", - [&](void* /*arg*/) { callback_count.fetch_add(1); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - for (int i = 0; i < 2; ++i) { - // put two keys to ensure no trivial move - for (int j = 0; j < 2; ++j) { - ASSERT_OK(Put(Key(j), rnd.RandomString(1024))); - } - ASSERT_OK(Flush()); - } - port::Thread manual_compaction_thread([this]() { - CompactRangeOptions croptions; - croptions.exclusive_manual_compaction = true; - ASSERT_OK(db_->CompactRange(croptions, nullptr, nullptr)); - }); - - TEST_SYNC_POINT("DBTest::AutomaticConflictsWithManualCompaction:PrePuts"); - for (int i = 0; i < kNumL0Files; ++i) { - // put two keys to ensure no trivial move - for (int j = 0; j < 2; ++j) { - ASSERT_OK(Put(Key(j), rnd.RandomString(1024))); - } - ASSERT_OK(Flush()); - } - TEST_SYNC_POINT("DBTest::AutomaticConflictsWithManualCompaction:PostPuts"); - - ASSERT_GE(callback_count.load(), 1); - for (int i = 0; i < 2; ++i) { - ASSERT_NE("NOT_FOUND", Get(Key(i))); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - manual_compaction_thread.join(); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); -} - -TEST_F(DBTest, CompactFilesShouldTriggerAutoCompaction) { - Options options = CurrentOptions(); - options.max_background_compactions = 1; - options.level0_file_num_compaction_trigger = 4; - options.level0_slowdown_writes_trigger = 36; - options.level0_stop_writes_trigger = 36; - DestroyAndReopen(options); - - // generate files for manual compaction - Random rnd(301); - for (int i = 0; i < 2; ++i) { - // put two keys to ensure no trivial move - for (int j = 0; j < 2; ++j) { - ASSERT_OK(Put(Key(j), rnd.RandomString(1024))); - } - ASSERT_OK(Flush()); - } - - ROCKSDB_NAMESPACE::ColumnFamilyMetaData cf_meta_data; - db_->GetColumnFamilyMetaData(db_->DefaultColumnFamily(), &cf_meta_data); - - std::vector input_files; - input_files.push_back(cf_meta_data.levels[0].files[0].name); - - SyncPoint::GetInstance()->LoadDependency({ - {"CompactFilesImpl:0", - "DBTest::CompactFilesShouldTriggerAutoCompaction:Begin"}, - {"DBTest::CompactFilesShouldTriggerAutoCompaction:End", - "CompactFilesImpl:1"}, - }); - - SyncPoint::GetInstance()->EnableProcessing(); - - port::Thread manual_compaction_thread([&]() { - auto s = db_->CompactFiles(CompactionOptions(), db_->DefaultColumnFamily(), - input_files, 0); - ASSERT_OK(s); - }); - - TEST_SYNC_POINT("DBTest::CompactFilesShouldTriggerAutoCompaction:Begin"); - // generate enough files to trigger compaction - for (int i = 0; i < 20; ++i) { - for (int j = 0; j < 2; ++j) { - ASSERT_OK(Put(Key(j), rnd.RandomString(1024))); - } - ASSERT_OK(Flush()); - } - db_->GetColumnFamilyMetaData(db_->DefaultColumnFamily(), &cf_meta_data); - ASSERT_GT(cf_meta_data.levels[0].files.size(), - options.level0_file_num_compaction_trigger); - TEST_SYNC_POINT("DBTest::CompactFilesShouldTriggerAutoCompaction:End"); - - manual_compaction_thread.join(); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - db_->GetColumnFamilyMetaData(db_->DefaultColumnFamily(), &cf_meta_data); - ASSERT_LE(cf_meta_data.levels[0].files.size(), - options.level0_file_num_compaction_trigger); -} - -// Github issue #595 -// Large write batch with column families -TEST_F(DBTest, LargeBatchWithColumnFamilies) { - Options options = CurrentOptions(); - options.env = env_; - options.write_buffer_size = 100000; // Small write buffer - CreateAndReopenWithCF({"pikachu"}, options); - int64_t j = 0; - for (int i = 0; i < 5; i++) { - for (int pass = 1; pass <= 3; pass++) { - WriteBatch batch; - size_t write_size = 1024 * 1024 * (5 + i); - fprintf(stderr, "prepare: %" ROCKSDB_PRIszt " MB, pass:%d\n", - (write_size / 1024 / 1024), pass); - for (;;) { - std::string data(3000, j++ % 127 + 20); - data += std::to_string(j); - ASSERT_OK(batch.Put(handles_[0], Slice(data), Slice(data))); - if (batch.GetDataSize() > write_size) { - break; - } - } - fprintf(stderr, "write: %" ROCKSDB_PRIszt " MB\n", - (batch.GetDataSize() / 1024 / 1024)); - ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); - fprintf(stderr, "done\n"); - } - } - // make sure we can re-open it. - ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); -} - -// Make sure that Flushes can proceed in parallel with CompactRange() -TEST_F(DBTest, FlushesInParallelWithCompactRange) { - // iter == 0 -- leveled - // iter == 1 -- leveled, but throw in a flush between two levels compacting - // iter == 2 -- universal - for (int iter = 0; iter < 3; ++iter) { - Options options = CurrentOptions(); - if (iter < 2) { - options.compaction_style = kCompactionStyleLevel; - } else { - options.compaction_style = kCompactionStyleUniversal; - } - options.write_buffer_size = 110 << 10; - options.level0_file_num_compaction_trigger = 4; - options.num_levels = 4; - options.compression = kNoCompression; - options.max_bytes_for_level_base = 450 << 10; - options.target_file_size_base = 98 << 10; - options.max_write_buffer_number = 2; - - DestroyAndReopen(options); - - Random rnd(301); - for (int num = 0; num < 14; num++) { - GenerateNewRandomFile(&rnd); - } - - if (iter == 1) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::RunManualCompaction()::1", - "DBTest::FlushesInParallelWithCompactRange:1"}, - {"DBTest::FlushesInParallelWithCompactRange:2", - "DBImpl::RunManualCompaction()::2"}}); - } else { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"CompactionJob::Run():Start", - "DBTest::FlushesInParallelWithCompactRange:1"}, - {"DBTest::FlushesInParallelWithCompactRange:2", - "CompactionJob::Run():End"}}); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - std::vector threads; - threads.emplace_back([&]() { Compact("a", "z"); }); - - TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:1"); - - // this has to start a flush. if flushes are blocked, this will try to - // create - // 3 memtables, and that will fail because max_write_buffer_number is 2 - for (int num = 0; num < 3; num++) { - GenerateNewRandomFile(&rnd, /* nowait */ true); - } - - TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:2"); - - for (auto& t : threads) { - t.join(); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } -} - -TEST_F(DBTest, DelayedWriteRate) { - const int kEntriesPerMemTable = 100; - const int kTotalFlushes = 12; - - Options options = CurrentOptions(); - env_->SetBackgroundThreads(1, Env::LOW); - options.env = env_; - options.write_buffer_size = 100000000; - options.max_write_buffer_number = 256; - options.max_background_compactions = 1; - options.level0_file_num_compaction_trigger = 3; - options.level0_slowdown_writes_trigger = 3; - options.level0_stop_writes_trigger = 999999; - options.delayed_write_rate = 20000000; // Start with 200MB/s - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kEntriesPerMemTable)); - - SetTimeElapseOnlySleepOnReopen(&options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Block compactions - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - - for (int i = 0; i < 3; i++) { - ASSERT_OK(Put(Key(i), std::string(10000, 'x'))); - ASSERT_OK(Flush()); - } - - // These writes will be slowed down to 1KB/s - uint64_t estimated_sleep_time = 0; - Random rnd(301); - ASSERT_OK(Put("", "")); - uint64_t cur_rate = options.delayed_write_rate; - for (int i = 0; i < kTotalFlushes; i++) { - uint64_t size_memtable = 0; - for (int j = 0; j < kEntriesPerMemTable; j++) { - auto rand_num = rnd.Uniform(20); - // Spread the size range to more. - size_t entry_size = rand_num * rand_num * rand_num; - WriteOptions wo; - ASSERT_OK(Put(Key(i), std::string(entry_size, 'x'), wo)); - size_memtable += entry_size + 18; - // Occasionally sleep a while - if (rnd.Uniform(20) == 6) { - env_->SleepForMicroseconds(2666); - } - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - estimated_sleep_time += size_memtable * 1000000u / cur_rate; - // Slow down twice. One for memtable switch and one for flush finishes. - cur_rate = static_cast(static_cast(cur_rate) * - kIncSlowdownRatio * kIncSlowdownRatio); - } - // Estimate the total sleep time fall into the rough range. - ASSERT_GT(env_->NowMicros(), estimated_sleep_time / 2); - ASSERT_LT(env_->NowMicros(), estimated_sleep_time * 2); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); -} - -TEST_F(DBTest, HardLimit) { - Options options = CurrentOptions(); - options.env = env_; - env_->SetBackgroundThreads(1, Env::LOW); - options.max_write_buffer_number = 256; - options.write_buffer_size = 110 << 10; // 110KB - options.arena_block_size = 4 * 1024; - options.level0_file_num_compaction_trigger = 4; - options.level0_slowdown_writes_trigger = 999999; - options.level0_stop_writes_trigger = 999999; - options.hard_pending_compaction_bytes_limit = 800 << 10; - options.max_bytes_for_level_base = 10000000000u; - options.max_background_compactions = 1; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); - - env_->SetBackgroundThreads(1, Env::LOW); - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - - CreateAndReopenWithCF({"pikachu"}, options); - - std::atomic callback_count(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::DelayWrite:Wait", [&](void* /*arg*/) { - callback_count.fetch_add(1); - sleeping_task_low.WakeUp(); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - int key_idx = 0; - for (int num = 0; num < 5; num++) { - GenerateNewFile(&rnd, &key_idx, true); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - - ASSERT_EQ(0, callback_count.load()); - - for (int num = 0; num < 5; num++) { - GenerateNewFile(&rnd, &key_idx, true); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - ASSERT_GE(callback_count.load(), 1); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - sleeping_task_low.WaitUntilDone(); -} - -#if !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION) -class WriteStallListener : public EventListener { - public: - WriteStallListener() : condition_(WriteStallCondition::kNormal) {} - void OnStallConditionsChanged(const WriteStallInfo& info) override { - MutexLock l(&mutex_); - condition_ = info.condition.cur; - } - bool CheckCondition(WriteStallCondition expected) { - MutexLock l(&mutex_); - return expected == condition_; - } - - private: - port::Mutex mutex_; - WriteStallCondition condition_; -}; - -TEST_F(DBTest, SoftLimit) { - Options options = CurrentOptions(); - options.env = env_; - options.write_buffer_size = 100000; // Small write buffer - options.max_write_buffer_number = 256; - options.level0_file_num_compaction_trigger = 1; - options.level0_slowdown_writes_trigger = 3; - options.level0_stop_writes_trigger = 999999; - options.delayed_write_rate = 20000; // About 200KB/s limited rate - options.soft_pending_compaction_bytes_limit = 160000; - options.target_file_size_base = 99999999; // All into one file - options.max_bytes_for_level_base = 50000; - options.max_bytes_for_level_multiplier = 10; - options.max_background_compactions = 1; - options.compression = kNoCompression; - WriteStallListener* listener = new WriteStallListener(); - options.listeners.emplace_back(listener); - - // FlushMemtable with opt.wait=true does not wait for - // `OnStallConditionsChanged` being called. The event listener is triggered - // on `JobContext::Clean`, which happens after flush result is installed. - // We use sync point to create a custom WaitForFlush that waits for - // context cleanup. - port::Mutex flush_mutex; - port::CondVar flush_cv(&flush_mutex); - bool flush_finished = false; - auto InstallFlushCallback = [&]() { - { - MutexLock l(&flush_mutex); - flush_finished = false; - } - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCallFlush:ContextCleanedUp", [&](void*) { - { - MutexLock l(&flush_mutex); - flush_finished = true; - } - flush_cv.SignalAll(); - }); - }; - auto WaitForFlush = [&]() { - { - MutexLock l(&flush_mutex); - while (!flush_finished) { - flush_cv.Wait(); - } - } - SyncPoint::GetInstance()->ClearCallBack( - "DBImpl::BackgroundCallFlush:ContextCleanedUp"); - }; - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Reopen(options); - - // Generating 360KB in Level 3 - for (int i = 0; i < 72; i++) { - ASSERT_OK(Put(Key(i), std::string(5000, 'x'))); - if (i % 10 == 0) { - ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true)); - } - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - MoveFilesToLevel(3); - - // Generating 360KB in Level 2 - for (int i = 0; i < 72; i++) { - ASSERT_OK(Put(Key(i), std::string(5000, 'x'))); - if (i % 10 == 0) { - ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true)); - } - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - MoveFilesToLevel(2); - - ASSERT_OK(Put(Key(0), "")); - - test::SleepingBackgroundTask sleeping_task_low; - // Block compactions - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - sleeping_task_low.WaitUntilSleeping(); - - // Create 3 L0 files, making score of L0 to be 3. - for (int i = 0; i < 3; i++) { - ASSERT_OK(Put(Key(i), std::string(5000, 'x'))); - ASSERT_OK(Put(Key(100 - i), std::string(5000, 'x'))); - // Flush the file. File size is around 30KB. - InstallFlushCallback(); - ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true)); - WaitForFlush(); - } - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kDelayed)); - - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); - sleeping_task_low.Reset(); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // Now there is one L1 file but doesn't trigger soft_rate_limit - // - // TODO: soft_rate_limit is depreciated. If this test - // relies on soft_rate_limit, then we need to change the test. - // - // The L1 file size is around 30KB. - ASSERT_EQ(NumTableFilesAtLevel(1), 1); - ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kNormal)); - - // Only allow one compactin going through. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", [&](void* /*arg*/) { - // Schedule a sleeping task. - sleeping_task_low.Reset(); - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - &sleeping_task_low, Env::Priority::LOW); - }); - - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - sleeping_task_low.WaitUntilSleeping(); - // Create 3 L0 files, making score of L0 to be 3 - for (int i = 0; i < 3; i++) { - ASSERT_OK(Put(Key(10 + i), std::string(5000, 'x'))); - ASSERT_OK(Put(Key(90 - i), std::string(5000, 'x'))); - // Flush the file. File size is around 30KB. - InstallFlushCallback(); - ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true)); - WaitForFlush(); - } - - // Wake up sleep task to enable compaction to run and waits - // for it to go to sleep state again to make sure one compaction - // goes through. - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilSleeping(); - - // Now there is one L1 file (around 60KB) which exceeds 50KB base by 10KB - // Given level multiplier 10, estimated pending compaction is around 100KB - // doesn't trigger soft_pending_compaction_bytes_limit - ASSERT_EQ(NumTableFilesAtLevel(1), 1); - ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kNormal)); - - // Create 3 L0 files, making score of L0 to be 3, higher than L0. - for (int i = 0; i < 3; i++) { - ASSERT_OK(Put(Key(20 + i), std::string(5000, 'x'))); - ASSERT_OK(Put(Key(80 - i), std::string(5000, 'x'))); - // Flush the file. File size is around 30KB. - InstallFlushCallback(); - ASSERT_OK(dbfull()->TEST_FlushMemTable(true, true)); - WaitForFlush(); - } - // Wake up sleep task to enable compaction to run and waits - // for it to go to sleep state again to make sure one compaction - // goes through. - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilSleeping(); - - // Now there is one L1 file (around 90KB) which exceeds 50KB base by 40KB - // L2 size is 360KB, so the estimated level fanout 4, estimated pending - // compaction is around 200KB - // triggerring soft_pending_compaction_bytes_limit - ASSERT_EQ(NumTableFilesAtLevel(1), 1); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kDelayed)); - - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilSleeping(); - - ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kNormal)); - - // shrink level base so L2 will hit soft limit easier. - ASSERT_OK(dbfull()->SetOptions({ - {"max_bytes_for_level_base", "5000"}, - })); - - ASSERT_OK(Put("", "")); - ASSERT_OK(Flush()); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kDelayed)); - - sleeping_task_low.WaitUntilSleeping(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); -} - -TEST_F(DBTest, LastWriteBufferDelay) { - Options options = CurrentOptions(); - options.env = env_; - options.write_buffer_size = 100000; - options.max_write_buffer_number = 4; - options.delayed_write_rate = 20000; - options.compression = kNoCompression; - options.disable_auto_compactions = true; - int kNumKeysPerMemtable = 3; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerMemtable)); - - Reopen(options); - test::SleepingBackgroundTask sleeping_task; - // Block flushes - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, - Env::Priority::HIGH); - sleeping_task.WaitUntilSleeping(); - - // Create 3 L0 files, making score of L0 to be 3. - for (int i = 0; i < 3; i++) { - // Fill one mem table - for (int j = 0; j < kNumKeysPerMemtable; j++) { - ASSERT_OK(Put(Key(j), "")); - } - ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); - } - // Inserting a new entry would create a new mem table, triggering slow down. - ASSERT_OK(Put(Key(0), "")); - ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); - - sleeping_task.WakeUp(); - sleeping_task.WaitUntilDone(); -} -#endif // !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION) - -TEST_F(DBTest, FailWhenCompressionNotSupportedTest) { - CompressionType compressions[] = {kZlibCompression, kBZip2Compression, - kLZ4Compression, kLZ4HCCompression, - kXpressCompression}; - for (auto comp : compressions) { - if (!CompressionTypeSupported(comp)) { - // not supported, we should fail the Open() - Options options = CurrentOptions(); - options.compression = comp; - ASSERT_TRUE(!TryReopen(options).ok()); - // Try if CreateColumnFamily also fails - options.compression = kNoCompression; - ASSERT_OK(TryReopen(options)); - ColumnFamilyOptions cf_options(options); - cf_options.compression = comp; - ColumnFamilyHandle* handle; - ASSERT_TRUE(!db_->CreateColumnFamily(cf_options, "name", &handle).ok()); - } - } -} - -TEST_F(DBTest, CreateColumnFamilyShouldFailOnIncompatibleOptions) { - Options options = CurrentOptions(); - options.max_open_files = 100; - Reopen(options); - - ColumnFamilyOptions cf_options(options); - // ttl is now supported when max_open_files is -1. - cf_options.ttl = 3600; - ColumnFamilyHandle* handle; - ASSERT_OK(db_->CreateColumnFamily(cf_options, "pikachu", &handle)); - delete handle; -} - -TEST_F(DBTest, RowCache) { - Options options = CurrentOptions(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.row_cache = NewLRUCache(8192); - DestroyAndReopen(options); - - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Flush()); - - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 0); - ASSERT_EQ(Get("foo"), "bar"); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1); - ASSERT_EQ(Get("foo"), "bar"); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1); -} - -TEST_F(DBTest, PinnableSliceAndRowCache) { - Options options = CurrentOptions(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.row_cache = NewLRUCache(8192); - DestroyAndReopen(options); - - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Flush()); - - ASSERT_EQ(Get("foo"), "bar"); - ASSERT_EQ( - reinterpret_cast(options.row_cache.get())->TEST_GetLRUSize(), - 1); - - { - PinnableSlice pin_slice; - ASSERT_EQ(Get("foo", &pin_slice), Status::OK()); - ASSERT_EQ(pin_slice.ToString(), "bar"); - // Entry is already in cache, lookup will remove the element from lru - ASSERT_EQ( - reinterpret_cast(options.row_cache.get())->TEST_GetLRUSize(), - 0); - } - // After PinnableSlice destruction element is added back in LRU - ASSERT_EQ( - reinterpret_cast(options.row_cache.get())->TEST_GetLRUSize(), - 1); -} - -TEST_F(DBTest, ReusePinnableSlice) { - Options options = CurrentOptions(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.row_cache = NewLRUCache(8192); - DestroyAndReopen(options); - - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Flush()); - - ASSERT_EQ(Get("foo"), "bar"); - ASSERT_EQ( - reinterpret_cast(options.row_cache.get())->TEST_GetLRUSize(), - 1); - - { - PinnableSlice pin_slice; - ASSERT_EQ(Get("foo", &pin_slice), Status::OK()); - ASSERT_EQ(Get("foo", &pin_slice), Status::OK()); - ASSERT_EQ(pin_slice.ToString(), "bar"); - - // Entry is already in cache, lookup will remove the element from lru - ASSERT_EQ( - reinterpret_cast(options.row_cache.get())->TEST_GetLRUSize(), - 0); - } - // After PinnableSlice destruction element is added back in LRU - ASSERT_EQ( - reinterpret_cast(options.row_cache.get())->TEST_GetLRUSize(), - 1); - - { - std::vector multiget_keys; - multiget_keys.push_back("foo"); - std::vector multiget_values(1); - std::vector statuses({Status::NotFound()}); - ReadOptions ropt; - dbfull()->MultiGet(ropt, dbfull()->DefaultColumnFamily(), - multiget_keys.size(), multiget_keys.data(), - multiget_values.data(), statuses.data()); - ASSERT_EQ(Status::OK(), statuses[0]); - dbfull()->MultiGet(ropt, dbfull()->DefaultColumnFamily(), - multiget_keys.size(), multiget_keys.data(), - multiget_values.data(), statuses.data()); - ASSERT_EQ(Status::OK(), statuses[0]); - - // Entry is already in cache, lookup will remove the element from lru - ASSERT_EQ( - reinterpret_cast(options.row_cache.get())->TEST_GetLRUSize(), - 0); - } - // After PinnableSlice destruction element is added back in LRU - ASSERT_EQ( - reinterpret_cast(options.row_cache.get())->TEST_GetLRUSize(), - 1); - - { - std::vector multiget_cfs; - multiget_cfs.push_back(dbfull()->DefaultColumnFamily()); - std::vector multiget_keys; - multiget_keys.push_back("foo"); - std::vector multiget_values(1); - std::vector statuses({Status::NotFound()}); - ReadOptions ropt; - dbfull()->MultiGet(ropt, multiget_keys.size(), multiget_cfs.data(), - multiget_keys.data(), multiget_values.data(), - statuses.data()); - ASSERT_EQ(Status::OK(), statuses[0]); - dbfull()->MultiGet(ropt, multiget_keys.size(), multiget_cfs.data(), - multiget_keys.data(), multiget_values.data(), - statuses.data()); - ASSERT_EQ(Status::OK(), statuses[0]); - - // Entry is already in cache, lookup will remove the element from lru - ASSERT_EQ( - reinterpret_cast(options.row_cache.get())->TEST_GetLRUSize(), - 0); - } - // After PinnableSlice destruction element is added back in LRU - ASSERT_EQ( - reinterpret_cast(options.row_cache.get())->TEST_GetLRUSize(), - 1); -} - - -TEST_F(DBTest, DeletingOldWalAfterDrop) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"Test:AllowFlushes", "DBImpl::BGWorkFlush"}, - {"DBImpl::BGWorkFlush:done", "Test:WaitForFlush"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - Options options = CurrentOptions(); - options.max_total_wal_size = 8192; - options.compression = kNoCompression; - options.write_buffer_size = 1 << 20; - options.level0_file_num_compaction_trigger = (1 << 30); - options.level0_slowdown_writes_trigger = (1 << 30); - options.level0_stop_writes_trigger = (1 << 30); - options.disable_auto_compactions = true; - DestroyAndReopen(options); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - CreateColumnFamilies({"cf1", "cf2"}, options); - ASSERT_OK(Put(0, "key1", DummyString(8192))); - ASSERT_OK(Put(0, "key2", DummyString(8192))); - // the oldest wal should now be getting_flushed - ASSERT_OK(db_->DropColumnFamily(handles_[0])); - // all flushes should now do nothing because their CF is dropped - TEST_SYNC_POINT("Test:AllowFlushes"); - TEST_SYNC_POINT("Test:WaitForFlush"); - uint64_t lognum1 = dbfull()->TEST_LogfileNumber(); - ASSERT_OK(Put(1, "key3", DummyString(8192))); - ASSERT_OK(Put(1, "key4", DummyString(8192))); - // new wal should have been created - uint64_t lognum2 = dbfull()->TEST_LogfileNumber(); - EXPECT_GT(lognum2, lognum1); -} - -TEST_F(DBTest, UnsupportedManualSync) { - DestroyAndReopen(CurrentOptions()); - env_->is_wal_sync_thread_safe_.store(false); - Status s = db_->SyncWAL(); - ASSERT_TRUE(s.IsNotSupported()); -} - -INSTANTIATE_TEST_CASE_P(DBTestWithParam, DBTestWithParam, - ::testing::Combine(::testing::Values(1, 4), - ::testing::Bool())); - -TEST_F(DBTest, PauseBackgroundWorkTest) { - Options options = CurrentOptions(); - options.write_buffer_size = 100000; // Small write buffer - Reopen(options); - - std::vector threads; - std::atomic done(false); - ASSERT_OK(db_->PauseBackgroundWork()); - threads.emplace_back([&]() { - Random rnd(301); - for (int i = 0; i < 10000; ++i) { - ASSERT_OK(Put(rnd.RandomString(10), rnd.RandomString(10))); - } - done.store(true); - }); - env_->SleepForMicroseconds(200000); - // make sure the thread is not done - ASSERT_FALSE(done.load()); - ASSERT_OK(db_->ContinueBackgroundWork()); - for (auto& t : threads) { - t.join(); - } - // now it's done - ASSERT_TRUE(done.load()); -} - -// Keep spawning short-living threads that create an iterator and quit. -// Meanwhile in another thread keep flushing memtables. -// This used to cause a deadlock. -TEST_F(DBTest, ThreadLocalPtrDeadlock) { - std::atomic flushes_done{0}; - std::atomic threads_destroyed{0}; - auto done = [&] { return flushes_done.load() > 10; }; - - port::Thread flushing_thread([&] { - for (int i = 0; !done(); ++i) { - ASSERT_OK(db_->Put(WriteOptions(), Slice("hi"), - Slice(std::to_string(i).c_str()))); - ASSERT_OK(db_->Flush(FlushOptions())); - int cnt = ++flushes_done; - fprintf(stderr, "Flushed %d times\n", cnt); - } - }); - - std::vector thread_spawning_threads(10); - for (auto& t : thread_spawning_threads) { - t = port::Thread([&] { - while (!done()) { - { - port::Thread tmp_thread([&] { - auto it = db_->NewIterator(ReadOptions()); - ASSERT_OK(it->status()); - delete it; - }); - tmp_thread.join(); - } - ++threads_destroyed; - } - }); - } - - for (auto& t : thread_spawning_threads) { - t.join(); - } - flushing_thread.join(); - fprintf(stderr, "Done. Flushed %d times, destroyed %d threads\n", - flushes_done.load(), threads_destroyed.load()); -} - -TEST_F(DBTest, LargeBlockSizeTest) { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_OK(Put(0, "foo", "bar")); - BlockBasedTableOptions table_options; - table_options.block_size = 8LL * 1024 * 1024 * 1024LL; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - ASSERT_NOK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); -} - - -TEST_F(DBTest, CreationTimeOfOldestFile) { - const int kNumKeysPerFile = 32; - const int kNumLevelFiles = 2; - const int kValueSize = 100; - - Options options = CurrentOptions(); - options.max_open_files = -1; - env_->SetMockSleep(); - options.env = env_; - - // NOTE: Presumed unnecessary and removed: resetting mock time in env - - DestroyAndReopen(options); - - bool set_file_creation_time_to_zero = true; - int idx = 0; - - int64_t time_1 = 0; - env_->GetCurrentTime(&time_1); - const uint64_t uint_time_1 = static_cast(time_1); - - // Add 50 hours - env_->MockSleepForSeconds(50 * 60 * 60); - - int64_t time_2 = 0; - env_->GetCurrentTime(&time_2); - const uint64_t uint_time_2 = static_cast(time_2); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "PropertyBlockBuilder::AddTableProperty:Start", [&](void* arg) { - TableProperties* props = reinterpret_cast(arg); - if (set_file_creation_time_to_zero) { - if (idx == 0) { - props->file_creation_time = 0; - idx++; - } else if (idx == 1) { - props->file_creation_time = uint_time_1; - idx = 0; - } - } else { - if (idx == 0) { - props->file_creation_time = uint_time_1; - idx++; - } else if (idx == 1) { - props->file_creation_time = uint_time_2; - } - } - }); - // Set file creation time in manifest all to 0. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "FileMetaData::FileMetaData", [&](void* arg) { - FileMetaData* meta = static_cast(arg); - meta->file_creation_time = 0; - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - for (int i = 0; i < kNumLevelFiles; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); - } - ASSERT_OK(Flush()); - } - - // At this point there should be 2 files, one with file_creation_time = 0 and - // the other non-zero. GetCreationTimeOfOldestFile API should return 0. - uint64_t creation_time; - Status s1 = dbfull()->GetCreationTimeOfOldestFile(&creation_time); - ASSERT_EQ(0, creation_time); - ASSERT_EQ(s1, Status::OK()); - - // Testing with non-zero file creation time. - set_file_creation_time_to_zero = false; - options = CurrentOptions(); - options.max_open_files = -1; - options.env = env_; - - // NOTE: Presumed unnecessary and removed: resetting mock time in env - - DestroyAndReopen(options); - - for (int i = 0; i < kNumLevelFiles; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); - } - ASSERT_OK(Flush()); - } - - // At this point there should be 2 files with non-zero file creation time. - // GetCreationTimeOfOldestFile API should return non-zero value. - uint64_t ctime; - Status s2 = dbfull()->GetCreationTimeOfOldestFile(&ctime); - ASSERT_EQ(uint_time_1, ctime); - ASSERT_EQ(s2, Status::OK()); - - // Testing with max_open_files != -1 - options = CurrentOptions(); - options.max_open_files = 10; - DestroyAndReopen(options); - Status s3 = dbfull()->GetCreationTimeOfOldestFile(&ctime); - ASSERT_EQ(s3, Status::NotSupported()); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBTest, MemoryUsageWithMaxWriteBufferSizeToMaintain) { - Options options = CurrentOptions(); - options.max_write_buffer_size_to_maintain = 10000; - options.write_buffer_size = 160000; - Reopen(options); - Random rnd(301); - bool memory_limit_exceeded = false; - - ColumnFamilyData* cfd = - static_cast(db_->DefaultColumnFamily())->cfd(); - - for (int i = 0; i < 1000; i++) { - std::string value = rnd.RandomString(1000); - ASSERT_OK(Put("keykey_" + std::to_string(i), value)); - - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - const uint64_t cur_active_mem = cfd->mem()->ApproximateMemoryUsage(); - const uint64_t size_all_mem_table = - cur_active_mem + cfd->imm()->ApproximateMemoryUsage(); - - // Errors out if memory usage keeps on increasing beyond the limit. - // Once memory limit exceeds, memory_limit_exceeded is set and if - // size_all_mem_table doesn't drop out in the next write then it errors out - // (not expected behaviour). If memory usage drops then - // memory_limit_exceeded is set to false. - if ((size_all_mem_table > cur_active_mem) && - (cur_active_mem >= - static_cast(options.max_write_buffer_size_to_maintain)) && - (size_all_mem_table > - static_cast(options.max_write_buffer_size_to_maintain) + - options.write_buffer_size)) { - ASSERT_FALSE(memory_limit_exceeded); - memory_limit_exceeded = true; - } else { - memory_limit_exceeded = false; - } - } -} - -TEST_F(DBTest, ShuttingDownNotBlockStalledWrites) { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - Reopen(options); - Random rnd(403); - - for (int i = 0; i < 20; i++) { - ASSERT_OK(Put("key_" + std::to_string(i), rnd.RandomString(10))); - ASSERT_OK(Flush()); - } - ASSERT_EQ(GetSstFileCount(dbname_), 20); - - // We need !disable_auto_compactions for writes to stall but also want to - // delay compaction so stalled writes unblocked due to kShutdownInProgress. BG - // compaction will first wait for the sync point - // DBTest::ShuttingDownNotBlockStalledWrites. Then it waits extra 2 sec to - // allow CancelAllBackgroundWork() to set shutting_down_. - SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", - [&](void* /* arg */) { env_->SleepForMicroseconds(2 * 1000 * 1000); }); - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::DelayWrite:Wait", "DBTest::ShuttingDownNotBlockStalledWrites"}, - {"DBTest::ShuttingDownNotBlockStalledWrites", - "BackgroundCallCompaction:0"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - options.level0_stop_writes_trigger = 20; - options.disable_auto_compactions = false; - Reopen(options); - - std::thread thd([&]() { - Status s = Put("key_" + std::to_string(101), "101"); - ASSERT_EQ(s.code(), Status::kShutdownInProgress); - }); - - TEST_SYNC_POINT("DBTest::ShuttingDownNotBlockStalledWrites"); - CancelAllBackgroundWork(db_, true); - - thd.join(); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc deleted file mode 100644 index bb6b67d9b..000000000 --- a/db/db_universal_compaction_test.cc +++ /dev/null @@ -1,2227 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/db_test_util.h" -#include "port/stack_trace.h" -#include "rocksdb/utilities/table_properties_collectors.h" -#include "test_util/sync_point.h" -#include "test_util/testutil.h" -#include "util/random.h" - -namespace ROCKSDB_NAMESPACE { - -static std::string CompressibleString(Random* rnd, int len) { - std::string r; - test::CompressibleString(rnd, 0.8, len, &r); - return r; -} - -class DBTestUniversalCompactionBase - : public DBTestBase, - public ::testing::WithParamInterface> { - public: - explicit DBTestUniversalCompactionBase(const std::string& path) - : DBTestBase(path, /*env_do_fsync=*/false) {} - void SetUp() override { - num_levels_ = std::get<0>(GetParam()); - exclusive_manual_compaction_ = std::get<1>(GetParam()); - } - int num_levels_; - bool exclusive_manual_compaction_; -}; - -class DBTestUniversalCompaction : public DBTestUniversalCompactionBase { - public: - DBTestUniversalCompaction() - : DBTestUniversalCompactionBase("/db_universal_compaction_test") {} -}; - -class DBTestUniversalCompaction2 : public DBTestBase { - public: - DBTestUniversalCompaction2() - : DBTestBase("db_universal_compaction_test2", /*env_do_fsync=*/false) {} -}; - -namespace { -void VerifyCompactionResult( - const ColumnFamilyMetaData& cf_meta, - const std::set& overlapping_file_numbers) { -#ifndef NDEBUG - for (auto& level : cf_meta.levels) { - for (auto& file : level.files) { - assert(overlapping_file_numbers.find(file.name) == - overlapping_file_numbers.end()); - } - } -#endif -} - -class KeepFilter : public CompactionFilter { - public: - bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, - std::string* /*new_value*/, - bool* /*value_changed*/) const override { - return false; - } - - const char* Name() const override { return "KeepFilter"; } -}; - -class KeepFilterFactory : public CompactionFilterFactory { - public: - explicit KeepFilterFactory(bool check_context = false) - : check_context_(check_context) {} - - std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& context) override { - if (check_context_) { - EXPECT_EQ(expect_full_compaction_.load(), context.is_full_compaction); - EXPECT_EQ(expect_manual_compaction_.load(), context.is_manual_compaction); - } - return std::unique_ptr(new KeepFilter()); - } - - const char* Name() const override { return "KeepFilterFactory"; } - bool check_context_; - std::atomic_bool expect_full_compaction_; - std::atomic_bool expect_manual_compaction_; -}; -} // anonymous namespace - -// Make sure we don't trigger a problem if the trigger condtion is given -// to be 0, which is invalid. -TEST_P(DBTestUniversalCompaction, UniversalCompactionSingleSortedRun) { - Options options = CurrentOptions(); - - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = num_levels_; - // Config universal compaction to always compact to one single sorted run. - options.level0_file_num_compaction_trigger = 0; - options.compaction_options_universal.size_ratio = 10; - options.compaction_options_universal.min_merge_width = 2; - options.compaction_options_universal.max_size_amplification_percent = 0; - - options.write_buffer_size = 105 << 10; // 105KB - options.arena_block_size = 4 << 10; - options.target_file_size_base = 32 << 10; // 32KB - // trigger compaction if there are >= 4 files - KeepFilterFactory* filter = new KeepFilterFactory(true); - filter->expect_manual_compaction_.store(false); - options.compaction_filter_factory.reset(filter); - - DestroyAndReopen(options); - ASSERT_EQ(1, db_->GetOptions().level0_file_num_compaction_trigger); - - Random rnd(301); - int key_idx = 0; - - filter->expect_full_compaction_.store(true); - - for (int num = 0; num < 16; num++) { - // Write 100KB file. And immediately it should be compacted to one file. - GenerateNewFile(&rnd, &key_idx); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumSortedRuns(0), 1); - } - ASSERT_OK(Put(Key(key_idx), "")); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumSortedRuns(0), 1); -} - -TEST_P(DBTestUniversalCompaction, OptimizeFiltersForHits) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.compaction_options_universal.size_ratio = 5; - options.num_levels = num_levels_; - options.write_buffer_size = 105 << 10; // 105KB - options.arena_block_size = 4 << 10; - options.target_file_size_base = 32 << 10; // 32KB - // trigger compaction if there are >= 4 files - options.level0_file_num_compaction_trigger = 4; - BlockBasedTableOptions bbto; - bbto.cache_index_and_filter_blocks = true; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options.optimize_filters_for_hits = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.memtable_factory.reset(test::NewSpecialSkipListFactory(3)); - - DestroyAndReopen(options); - - // block compaction from happening - env_->SetBackgroundThreads(1, Env::LOW); - test::SleepingBackgroundTask sleeping_task_low; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::LOW); - - for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { - ASSERT_OK(Put(Key(num * 10), "val")); - if (num) { - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - ASSERT_OK(Put(Key(30 + num * 10), "val")); - ASSERT_OK(Put(Key(60 + num * 10), "val")); - } - ASSERT_OK(Put("", "")); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - - // Query set of non existing keys - for (int i = 5; i < 90; i += 10) { - ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); - } - - // Make sure bloom filter is used at least once. - ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - auto prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL); - - // Make sure bloom filter is used for all but the last L0 file when looking - // up a non-existent key that's in the range of all L0 files. - ASSERT_EQ(Get(Key(35)), "NOT_FOUND"); - ASSERT_EQ(prev_counter + NumTableFilesAtLevel(0) - 1, - TestGetTickerCount(options, BLOOM_FILTER_USEFUL)); - prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL); - - // Unblock compaction and wait it for happening. - sleeping_task_low.WakeUp(); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // The same queries will not trigger bloom filter - for (int i = 5; i < 90; i += 10) { - ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); - } - ASSERT_EQ(prev_counter, TestGetTickerCount(options, BLOOM_FILTER_USEFUL)); -} - -// TODO(kailiu) The tests on UniversalCompaction has some issues: -// 1. A lot of magic numbers ("11" or "12"). -// 2. Made assumption on the memtable flush conditions, which may change from -// time to time. -TEST_P(DBTestUniversalCompaction, UniversalCompactionTrigger) { - Options options; - options.compaction_style = kCompactionStyleUniversal; - options.compaction_options_universal.size_ratio = 5; - options.num_levels = num_levels_; - options.write_buffer_size = 105 << 10; // 105KB - options.arena_block_size = 4 << 10; - options.target_file_size_base = 32 << 10; // 32KB - // trigger compaction if there are >= 4 files - options.level0_file_num_compaction_trigger = 4; - KeepFilterFactory* filter = new KeepFilterFactory(true); - filter->expect_manual_compaction_.store(false); - options.compaction_filter_factory.reset(filter); - - options = CurrentOptions(options); - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBTestWritableFile.GetPreallocationStatus", [&](void* arg) { - ASSERT_TRUE(arg != nullptr); - size_t preallocation_size = *(static_cast(arg)); - if (num_levels_ > 3) { - ASSERT_LE(preallocation_size, options.target_file_size_base * 1.1); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - int key_idx = 0; - - filter->expect_full_compaction_.store(true); - // Stage 1: - // Generate a set of files at level 0, but don't trigger level-0 - // compaction. - for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; - num++) { - // Write 100KB - GenerateNewFile(1, &rnd, &key_idx); - } - - // Generate one more file at level-0, which should trigger level-0 - // compaction. - GenerateNewFile(1, &rnd, &key_idx); - // Suppose each file flushed from mem table has size 1. Now we compact - // (level0_file_num_compaction_trigger+1)=4 files and should have a big - // file of size 4. - ASSERT_EQ(NumSortedRuns(1), 1); - - // Stage 2: - // Now we have one file at level 0, with size 4. We also have some data in - // mem table. Let's continue generating new files at level 0, but don't - // trigger level-0 compaction. - // First, clean up memtable before inserting new data. This will generate - // a level-0 file, with size around 0.4 (according to previously written - // data amount). - filter->expect_full_compaction_.store(false); - ASSERT_OK(Flush(1)); - for (int num = 0; num < options.level0_file_num_compaction_trigger - 3; - num++) { - GenerateNewFile(1, &rnd, &key_idx); - ASSERT_EQ(NumSortedRuns(1), num + 3); - } - - // Generate one more file at level-0, which should trigger level-0 - // compaction. - GenerateNewFile(1, &rnd, &key_idx); - // Before compaction, we have 4 files at level 0, with size 4, 0.4, 1, 1. - // After compaction, we should have 2 files, with size 4, 2.4. - ASSERT_EQ(NumSortedRuns(1), 2); - - // Stage 3: - // Now we have 2 files at level 0, with size 4 and 2.4. Continue - // generating new files at level 0. - for (int num = 0; num < options.level0_file_num_compaction_trigger - 3; - num++) { - GenerateNewFile(1, &rnd, &key_idx); - ASSERT_EQ(NumSortedRuns(1), num + 3); - } - - // Generate one more file at level-0, which should trigger level-0 - // compaction. - GenerateNewFile(1, &rnd, &key_idx); - // Before compaction, we have 4 files at level 0, with size 4, 2.4, 1, 1. - // After compaction, we should have 3 files, with size 4, 2.4, 2. - ASSERT_EQ(NumSortedRuns(1), 3); - - // Stage 4: - // Now we have 3 files at level 0, with size 4, 2.4, 2. Let's generate a - // new file of size 1. - GenerateNewFile(1, &rnd, &key_idx); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Level-0 compaction is triggered, but no file will be picked up. - ASSERT_EQ(NumSortedRuns(1), 4); - - // Stage 5: - // Now we have 4 files at level 0, with size 4, 2.4, 2, 1. Let's generate - // a new file of size 1. - filter->expect_full_compaction_.store(true); - GenerateNewFile(1, &rnd, &key_idx); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // All files at level 0 will be compacted into a single one. - ASSERT_EQ(NumSortedRuns(1), 1); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_P(DBTestUniversalCompaction, UniversalCompactionSizeAmplification) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = num_levels_; - options.write_buffer_size = 100 << 10; // 100KB - options.target_file_size_base = 32 << 10; // 32KB - options.level0_file_num_compaction_trigger = 3; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Trigger compaction if size amplification exceeds 110% - options.compaction_options_universal.max_size_amplification_percent = 110; - options = CurrentOptions(options); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - Random rnd(301); - int key_idx = 0; - - // Generate two files in Level 0. Both files are approx the same size. - for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; - num++) { - // Write 110KB (11 values, each 10K) - for (int i = 0; i < 11; i++) { - ASSERT_OK(Put(1, Key(key_idx), rnd.RandomString(10000))); - key_idx++; - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1])); - ASSERT_EQ(NumSortedRuns(1), num + 1); - } - ASSERT_EQ(NumSortedRuns(1), 2); - - // Flush whatever is remaining in memtable. This is typically - // small, which should not trigger size ratio based compaction - // but will instead trigger size amplification. - ASSERT_OK(Flush(1)); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // Verify that size amplification did occur - ASSERT_EQ(NumSortedRuns(1), 1); -} - -TEST_P(DBTestUniversalCompaction, DynamicUniversalCompactionSizeAmplification) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = 1; - options.write_buffer_size = 100 << 10; // 100KB - options.target_file_size_base = 32 << 10; // 32KB - options.level0_file_num_compaction_trigger = 3; - // Initial setup of compaction_options_universal will prevent universal - // compaction from happening - options.compaction_options_universal.size_ratio = 100; - options.compaction_options_universal.min_merge_width = 100; - DestroyAndReopen(options); - - int total_picked_compactions = 0; - int total_size_amp_compactions = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "UniversalCompactionBuilder::PickCompaction:Return", [&](void* arg) { - if (arg) { - total_picked_compactions++; - Compaction* c = static_cast(arg); - if (c->compaction_reason() == - CompactionReason::kUniversalSizeAmplification) { - total_size_amp_compactions++; - } - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - MutableCFOptions mutable_cf_options; - CreateAndReopenWithCF({"pikachu"}, options); - - Random rnd(301); - int key_idx = 0; - - // Generate two files in Level 0. Both files are approx the same size. - for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; - num++) { - // Write 110KB (11 values, each 10K) - for (int i = 0; i < 11; i++) { - ASSERT_OK(Put(1, Key(key_idx), rnd.RandomString(10000))); - key_idx++; - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1])); - ASSERT_EQ(NumSortedRuns(1), num + 1); - } - ASSERT_EQ(NumSortedRuns(1), 2); - - // Flush whatever is remaining in memtable. This is typically - // small, which should not trigger size ratio based compaction - // but could instead trigger size amplification if it's set - // to 110. - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Verify compaction did not happen - ASSERT_EQ(NumSortedRuns(1), 3); - - // Trigger compaction if size amplification exceeds 110% without reopening DB - ASSERT_EQ(dbfull() - ->GetOptions(handles_[1]) - .compaction_options_universal.max_size_amplification_percent, - 200U); - ASSERT_OK(dbfull()->SetOptions(handles_[1], - {{"compaction_options_universal", - "{max_size_amplification_percent=110;}"}})); - ASSERT_EQ(dbfull() - ->GetOptions(handles_[1]) - .compaction_options_universal.max_size_amplification_percent, - 110u); - ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1], - &mutable_cf_options)); - ASSERT_EQ(110u, mutable_cf_options.compaction_options_universal - .max_size_amplification_percent); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Verify that size amplification did happen - ASSERT_EQ(NumSortedRuns(1), 1); - ASSERT_EQ(total_picked_compactions, 1); - ASSERT_EQ(total_size_amp_compactions, 1); -} - -TEST_P(DBTestUniversalCompaction, DynamicUniversalCompactionReadAmplification) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = 1; - options.write_buffer_size = 100 << 10; // 100KB - options.target_file_size_base = 32 << 10; // 32KB - options.level0_file_num_compaction_trigger = 3; - // Initial setup of compaction_options_universal will prevent universal - // compaction from happening - options.compaction_options_universal.max_size_amplification_percent = 2000; - options.compaction_options_universal.size_ratio = 0; - options.compaction_options_universal.min_merge_width = 100; - DestroyAndReopen(options); - - int total_picked_compactions = 0; - int total_size_ratio_compactions = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "UniversalCompactionBuilder::PickCompaction:Return", [&](void* arg) { - if (arg) { - total_picked_compactions++; - Compaction* c = static_cast(arg); - if (c->compaction_reason() == CompactionReason::kUniversalSizeRatio) { - total_size_ratio_compactions++; - } - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - MutableCFOptions mutable_cf_options; - CreateAndReopenWithCF({"pikachu"}, options); - - Random rnd(301); - int key_idx = 0; - - // Generate three files in Level 0. All files are approx the same size. - for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { - // Write 110KB (11 values, each 10K) - for (int i = 0; i < 11; i++) { - ASSERT_OK(Put(1, Key(key_idx), rnd.RandomString(10000))); - key_idx++; - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1])); - ASSERT_EQ(NumSortedRuns(1), num + 1); - } - ASSERT_EQ(NumSortedRuns(1), options.level0_file_num_compaction_trigger); - - // Flush whatever is remaining in memtable. This is typically small, about - // 30KB. - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Verify compaction did not happen - ASSERT_EQ(NumSortedRuns(1), options.level0_file_num_compaction_trigger + 1); - ASSERT_EQ(total_picked_compactions, 0); - - ASSERT_OK(dbfull()->SetOptions( - handles_[1], - {{"compaction_options_universal", - "{min_merge_width=2;max_merge_width=2;size_ratio=100;}"}})); - ASSERT_EQ(dbfull() - ->GetOptions(handles_[1]) - .compaction_options_universal.min_merge_width, - 2u); - ASSERT_EQ(dbfull() - ->GetOptions(handles_[1]) - .compaction_options_universal.max_merge_width, - 2u); - ASSERT_EQ( - dbfull()->GetOptions(handles_[1]).compaction_options_universal.size_ratio, - 100u); - - ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1], - &mutable_cf_options)); - ASSERT_EQ(mutable_cf_options.compaction_options_universal.size_ratio, 100u); - ASSERT_EQ(mutable_cf_options.compaction_options_universal.min_merge_width, - 2u); - ASSERT_EQ(mutable_cf_options.compaction_options_universal.max_merge_width, - 2u); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // Files in L0 are approx: 0.3 (30KB), 1, 1, 1. - // On compaction: the files are below the size amp threshold, so we - // fallthrough to checking read amp conditions. The configured size ratio is - // not big enough to take 0.3 into consideration. So the next files 1 and 1 - // are compacted together first as they satisfy size ratio condition and - // (min_merge_width, max_merge_width) condition, to give out a file size of 2. - // Next, the newly generated 2 and the last file 1 are compacted together. So - // at the end: #sortedRuns = 2, #picked_compactions = 2, and all the picked - // ones are size ratio based compactions. - ASSERT_EQ(NumSortedRuns(1), 2); - // If max_merge_width had not been changed dynamically above, and if it - // continued to be the default value of UINIT_MAX, total_picked_compactions - // would have been 1. - ASSERT_EQ(total_picked_compactions, 2); - ASSERT_EQ(total_size_ratio_compactions, 2); -} - -TEST_P(DBTestUniversalCompaction, CompactFilesOnUniversalCompaction) { - const int kTestKeySize = 16; - const int kTestValueSize = 984; - const int kEntrySize = kTestKeySize + kTestValueSize; - const int kEntriesPerBuffer = 10; - - ChangeCompactOptions(); - Options options; - options.create_if_missing = true; - options.compaction_style = kCompactionStyleLevel; - options.num_levels = 1; - options.target_file_size_base = options.write_buffer_size; - options.compression = kNoCompression; - options = CurrentOptions(options); - options.write_buffer_size = kEntrySize * kEntriesPerBuffer; - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_EQ(options.compaction_style, kCompactionStyleUniversal); - Random rnd(301); - for (int key = 1024 * kEntriesPerBuffer; key >= 0; --key) { - ASSERT_OK(Put(1, std::to_string(key), rnd.RandomString(kTestValueSize))); - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1])); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ColumnFamilyMetaData cf_meta; - dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta); - std::vector compaction_input_file_names; - for (auto file : cf_meta.levels[0].files) { - if (rnd.OneIn(2)) { - compaction_input_file_names.push_back(file.name); - } - } - - if (compaction_input_file_names.size() == 0) { - compaction_input_file_names.push_back(cf_meta.levels[0].files[0].name); - } - - // expect fail since universal compaction only allow L0 output - ASSERT_FALSE(dbfull() - ->CompactFiles(CompactionOptions(), handles_[1], - compaction_input_file_names, 1) - .ok()); - - // expect ok and verify the compacted files no longer exist. - ASSERT_OK(dbfull()->CompactFiles(CompactionOptions(), handles_[1], - compaction_input_file_names, 0)); - - dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta); - VerifyCompactionResult( - cf_meta, std::set(compaction_input_file_names.begin(), - compaction_input_file_names.end())); - - compaction_input_file_names.clear(); - - // Pick the first and the last file, expect everything is - // compacted into one single file. - compaction_input_file_names.push_back(cf_meta.levels[0].files[0].name); - compaction_input_file_names.push_back( - cf_meta.levels[0].files[cf_meta.levels[0].files.size() - 1].name); - ASSERT_OK(dbfull()->CompactFiles(CompactionOptions(), handles_[1], - compaction_input_file_names, 0)); - - dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta); - ASSERT_EQ(cf_meta.levels[0].files.size(), 1U); -} - -TEST_P(DBTestUniversalCompaction, UniversalCompactionTargetLevel) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.write_buffer_size = 100 << 10; // 100KB - options.num_levels = 7; - options.disable_auto_compactions = true; - DestroyAndReopen(options); - - // Generate 3 overlapping files - Random rnd(301); - for (int i = 0; i < 210; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(100))); - } - ASSERT_OK(Flush()); - - for (int i = 200; i < 300; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(100))); - } - ASSERT_OK(Flush()); - - for (int i = 250; i < 260; i++) { - ASSERT_OK(Put(Key(i), rnd.RandomString(100))); - } - ASSERT_OK(Flush()); - - ASSERT_EQ("3", FilesPerLevel(0)); - // Compact all files into 1 file and put it in L4 - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 4; - compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; - ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - ASSERT_EQ("0,0,0,0,1", FilesPerLevel(0)); -} - -#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -class DBTestUniversalCompactionMultiLevels - : public DBTestUniversalCompactionBase { - public: - DBTestUniversalCompactionMultiLevels() - : DBTestUniversalCompactionBase( - "/db_universal_compaction_multi_levels_test") {} -}; - -TEST_P(DBTestUniversalCompactionMultiLevels, UniversalCompactionMultiLevels) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = num_levels_; - options.write_buffer_size = 100 << 10; // 100KB - options.level0_file_num_compaction_trigger = 8; - options.max_background_compactions = 3; - options.target_file_size_base = 32 * 1024; - CreateAndReopenWithCF({"pikachu"}, options); - - // Trigger compaction if size amplification exceeds 110% - options.compaction_options_universal.max_size_amplification_percent = 110; - options = CurrentOptions(options); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - Random rnd(301); - int num_keys = 100000; - for (int i = 0; i < num_keys * 2; i++) { - ASSERT_OK(Put(1, Key(i % num_keys), Key(i))); - } - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - for (int i = num_keys; i < num_keys * 2; i++) { - ASSERT_EQ(Get(1, Key(i % num_keys)), Key(i)); - } -} - -// Tests universal compaction with trivial move enabled -TEST_P(DBTestUniversalCompactionMultiLevels, UniversalCompactionTrivialMove) { - int32_t trivial_move = 0; - int32_t non_trivial_move = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:TrivialMove", - [&](void* /*arg*/) { trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial", [&](void* arg) { - non_trivial_move++; - ASSERT_TRUE(arg != nullptr); - int output_level = *(static_cast(arg)); - ASSERT_EQ(output_level, 0); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.compaction_options_universal.allow_trivial_move = true; - options.num_levels = 3; - options.write_buffer_size = 100 << 10; // 100KB - options.level0_file_num_compaction_trigger = 3; - options.max_background_compactions = 2; - options.target_file_size_base = 32 * 1024; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Trigger compaction if size amplification exceeds 110% - options.compaction_options_universal.max_size_amplification_percent = 110; - options = CurrentOptions(options); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - Random rnd(301); - int num_keys = 150000; - for (int i = 0; i < num_keys; i++) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - std::vector values; - - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_GT(trivial_move, 0); - ASSERT_GT(non_trivial_move, 0); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -INSTANTIATE_TEST_CASE_P(MultiLevels, DBTestUniversalCompactionMultiLevels, - ::testing::Combine(::testing::Values(3, 20), - ::testing::Bool())); - -class DBTestUniversalCompactionParallel : public DBTestUniversalCompactionBase { - public: - DBTestUniversalCompactionParallel() - : DBTestUniversalCompactionBase("/db_universal_compaction_prallel_test") { - } -}; - -TEST_P(DBTestUniversalCompactionParallel, UniversalCompactionParallel) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = num_levels_; - options.env = env_; - options.write_buffer_size = 1 << 10; // 1KB - options.level0_file_num_compaction_trigger = 3; - options.max_background_compactions = 3; - options.max_background_flushes = 3; - options.target_file_size_base = 1 * 1024; - options.compaction_options_universal.max_size_amplification_percent = 110; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Delay every compaction so multiple compactions will happen. - std::atomic num_compactions_running(0); - std::atomic has_parallel(false); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "CompactionJob::Run():Start", [&](void* /*arg*/) { - if (num_compactions_running.fetch_add(1) > 0) { - has_parallel.store(true); - return; - } - for (int nwait = 0; nwait < 20000; nwait++) { - if (has_parallel.load() || num_compactions_running.load() > 1) { - has_parallel.store(true); - break; - } - env_->SleepForMicroseconds(1000); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "CompactionJob::Run():End", - [&](void* /*arg*/) { num_compactions_running.fetch_add(-1); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - options = CurrentOptions(options); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - Random rnd(301); - int num_keys = 30000; - for (int i = 0; i < num_keys * 2; i++) { - ASSERT_OK(Put(1, Key(i % num_keys), Key(i))); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ASSERT_EQ(num_compactions_running.load(), 0); - ASSERT_TRUE(has_parallel.load()); - - for (int i = num_keys; i < num_keys * 2; i++) { - ASSERT_EQ(Get(1, Key(i % num_keys)), Key(i)); - } - - // Reopen and check. - ReopenWithColumnFamilies({"default", "pikachu"}, options); - for (int i = num_keys; i < num_keys * 2; i++) { - ASSERT_EQ(Get(1, Key(i % num_keys)), Key(i)); - } -} - -TEST_P(DBTestUniversalCompactionParallel, PickByFileNumberBug) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = num_levels_; - options.write_buffer_size = 1 * 1024; // 1KB - options.level0_file_num_compaction_trigger = 7; - options.max_background_compactions = 2; - options.target_file_size_base = 1024 * 1024; // 1MB - - // Disable size amplifiction compaction - options.compaction_options_universal.max_size_amplification_percent = - UINT_MAX; - DestroyAndReopen(options); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBTestUniversalCompactionParallel::PickByFileNumberBug:0", - "BackgroundCallCompaction:0"}, - {"UniversalCompactionBuilder::PickCompaction:Return", - "DBTestUniversalCompactionParallel::PickByFileNumberBug:1"}, - {"DBTestUniversalCompactionParallel::PickByFileNumberBug:2", - "CompactionJob::Run():Start"}}); - - int total_picked_compactions = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "UniversalCompactionBuilder::PickCompaction:Return", [&](void* arg) { - if (arg) { - total_picked_compactions++; - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Write 7 files to trigger compaction - int key_idx = 1; - for (int i = 1; i <= 70; i++) { - std::string k = Key(key_idx++); - ASSERT_OK(Put(k, k)); - if (i % 10 == 0) { - ASSERT_OK(Flush()); - } - } - - // Wait for the 1st background compaction process to start - TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:0"); - TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:1"); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); - - // Write 3 files while 1st compaction is held - // These 3 files have different sizes to avoid compacting based on size_ratio - int num_keys = 1000; - for (int i = 0; i < 3; i++) { - for (int j = 1; j <= num_keys; j++) { - std::string k = Key(key_idx++); - ASSERT_OK(Put(k, k)); - } - ASSERT_OK(Flush()); - num_keys -= 100; - } - - // Hold the 1st compaction from finishing - TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:2"); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // There should only be one picked compaction as the score drops below one - // after the first one is picked. - EXPECT_EQ(total_picked_compactions, 1); - EXPECT_EQ(TotalTableFiles(), 4); - - // Stop SyncPoint and destroy the DB and reopen it again - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - key_idx = 1; - total_picked_compactions = 0; - DestroyAndReopen(options); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Write 7 files to trigger compaction - for (int i = 1; i <= 70; i++) { - std::string k = Key(key_idx++); - ASSERT_OK(Put(k, k)); - if (i % 10 == 0) { - ASSERT_OK(Flush()); - } - } - - // Wait for the 1st background compaction process to start - TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:0"); - TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:1"); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); - - // Write 8 files while 1st compaction is held - // These 8 files have different sizes to avoid compacting based on size_ratio - num_keys = 1000; - for (int i = 0; i < 8; i++) { - for (int j = 1; j <= num_keys; j++) { - std::string k = Key(key_idx++); - ASSERT_OK(Put(k, k)); - } - ASSERT_OK(Flush()); - num_keys -= 100; - } - - // Wait for the 2nd background compaction process to start - TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:0"); - TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:1"); - - // Hold the 1st and 2nd compaction from finishing - TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:2"); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // This time we will trigger a compaction because of size ratio and - // another compaction because of number of files that are not compacted - // greater than 7 - EXPECT_GE(total_picked_compactions, 2); -} - -INSTANTIATE_TEST_CASE_P(Parallel, DBTestUniversalCompactionParallel, - ::testing::Combine(::testing::Values(1, 10), - ::testing::Values(false))); -#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) - -TEST_P(DBTestUniversalCompaction, UniversalCompactionOptions) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.write_buffer_size = 105 << 10; // 105KB - options.arena_block_size = 4 << 10; // 4KB - options.target_file_size_base = 32 << 10; // 32KB - options.level0_file_num_compaction_trigger = 4; - options.num_levels = num_levels_; - options.compaction_options_universal.compression_size_percent = -1; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - Random rnd(301); - int key_idx = 0; - - for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { - // Write 100KB (100 values, each 1K) - for (int i = 0; i < 100; i++) { - ASSERT_OK(Put(1, Key(key_idx), rnd.RandomString(990))); - key_idx++; - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1])); - - if (num < options.level0_file_num_compaction_trigger - 1) { - ASSERT_EQ(NumSortedRuns(1), num + 1); - } - } - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(NumSortedRuns(1), 1); -} - -TEST_P(DBTestUniversalCompaction, UniversalCompactionStopStyleSimilarSize) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.write_buffer_size = 105 << 10; // 105KB - options.arena_block_size = 4 << 10; // 4KB - options.target_file_size_base = 32 << 10; // 32KB - // trigger compaction if there are >= 4 files - options.level0_file_num_compaction_trigger = 4; - options.compaction_options_universal.size_ratio = 10; - options.compaction_options_universal.stop_style = - kCompactionStopStyleSimilarSize; - options.num_levels = num_levels_; - DestroyAndReopen(options); - - Random rnd(301); - int key_idx = 0; - - // Stage 1: - // Generate a set of files at level 0, but don't trigger level-0 - // compaction. - for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; - num++) { - // Write 100KB (100 values, each 1K) - for (int i = 0; i < 100; i++) { - ASSERT_OK(Put(Key(key_idx), rnd.RandomString(990))); - key_idx++; - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ(NumSortedRuns(), num + 1); - } - - // Generate one more file at level-0, which should trigger level-0 - // compaction. - for (int i = 0; i < 100; i++) { - ASSERT_OK(Put(Key(key_idx), rnd.RandomString(990))); - key_idx++; - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Suppose each file flushed from mem table has size 1. Now we compact - // (level0_file_num_compaction_trigger+1)=4 files and should have a big - // file of size 4. - ASSERT_EQ(NumSortedRuns(), 1); - - // Stage 2: - // Now we have one file at level 0, with size 4. We also have some data in - // mem table. Let's continue generating new files at level 0, but don't - // trigger level-0 compaction. - // First, clean up memtable before inserting new data. This will generate - // a level-0 file, with size around 0.4 (according to previously written - // data amount). - ASSERT_OK(dbfull()->Flush(FlushOptions())); - for (int num = 0; num < options.level0_file_num_compaction_trigger - 3; - num++) { - // Write 110KB (11 values, each 10K) - for (int i = 0; i < 100; i++) { - ASSERT_OK(Put(Key(key_idx), rnd.RandomString(990))); - key_idx++; - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ(NumSortedRuns(), num + 3); - } - - // Generate one more file at level-0, which should trigger level-0 - // compaction. - for (int i = 0; i < 100; i++) { - ASSERT_OK(Put(Key(key_idx), rnd.RandomString(990))); - key_idx++; - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Before compaction, we have 4 files at level 0, with size 4, 0.4, 1, 1. - // After compaction, we should have 3 files, with size 4, 0.4, 2. - ASSERT_EQ(NumSortedRuns(), 3); - // Stage 3: - // Now we have 3 files at level 0, with size 4, 0.4, 2. Generate one - // more file at level-0, which should trigger level-0 compaction. - for (int i = 0; i < 100; i++) { - ASSERT_OK(Put(Key(key_idx), rnd.RandomString(990))); - key_idx++; - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Level-0 compaction is triggered, but no file will be picked up. - ASSERT_EQ(NumSortedRuns(), 4); -} - -TEST_P(DBTestUniversalCompaction, UniversalCompactionCompressRatio1) { - if (!Snappy_Supported()) { - return; - } - - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.write_buffer_size = 100 << 10; // 100KB - options.target_file_size_base = 32 << 10; // 32KB - options.level0_file_num_compaction_trigger = 2; - options.num_levels = num_levels_; - options.compaction_options_universal.compression_size_percent = 70; - DestroyAndReopen(options); - - Random rnd(301); - int key_idx = 0; - - // The first compaction (2) is compressed. - for (int num = 0; num < 2; num++) { - // Write 110KB (11 values, each 10K) - for (int i = 0; i < 11; i++) { - ASSERT_OK(Put(Key(key_idx), CompressibleString(&rnd, 10000))); - key_idx++; - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_LT(TotalSize(), 110000U * 2 * 0.9); - - // The second compaction (4) is compressed - for (int num = 0; num < 2; num++) { - // Write 110KB (11 values, each 10K) - for (int i = 0; i < 11; i++) { - ASSERT_OK(Put(Key(key_idx), CompressibleString(&rnd, 10000))); - key_idx++; - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_LT(TotalSize(), 110000 * 4 * 0.9); - - // The third compaction (2 4) is compressed since this time it is - // (1 1 3.2) and 3.2/5.2 doesn't reach ratio. - for (int num = 0; num < 2; num++) { - // Write 110KB (11 values, each 10K) - for (int i = 0; i < 11; i++) { - ASSERT_OK(Put(Key(key_idx), CompressibleString(&rnd, 10000))); - key_idx++; - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_LT(TotalSize(), 110000 * 6 * 0.9); - - // When we start for the compaction up to (2 4 8), the latest - // compressed is not compressed. - for (int num = 0; num < 8; num++) { - // Write 110KB (11 values, each 10K) - for (int i = 0; i < 11; i++) { - ASSERT_OK(Put(Key(key_idx), CompressibleString(&rnd, 10000))); - key_idx++; - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_GT(TotalSize(), 110000 * 11 * 0.8 + 110000 * 2); -} - -TEST_P(DBTestUniversalCompaction, UniversalCompactionCompressRatio2) { - if (!Snappy_Supported()) { - return; - } - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.write_buffer_size = 100 << 10; // 100KB - options.target_file_size_base = 32 << 10; // 32KB - options.level0_file_num_compaction_trigger = 2; - options.num_levels = num_levels_; - options.compaction_options_universal.compression_size_percent = 95; - DestroyAndReopen(options); - - Random rnd(301); - int key_idx = 0; - - // When we start for the compaction up to (2 4 8), the latest - // compressed is compressed given the size ratio to compress. - for (int num = 0; num < 14; num++) { - // Write 120KB (12 values, each 10K) - for (int i = 0; i < 12; i++) { - ASSERT_OK(Put(Key(key_idx), CompressibleString(&rnd, 10000))); - key_idx++; - } - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_LT(TotalSize(), 120000U * 12 * 0.82 + 120000 * 2); -} - -#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -// Test that checks trivial move in universal compaction -TEST_P(DBTestUniversalCompaction, UniversalCompactionTrivialMoveTest1) { - int32_t trivial_move = 0; - int32_t non_trivial_move = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:TrivialMove", - [&](void* /*arg*/) { trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial", [&](void* arg) { - non_trivial_move++; - ASSERT_TRUE(arg != nullptr); - int output_level = *(static_cast(arg)); - ASSERT_EQ(output_level, 0); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.compaction_options_universal.allow_trivial_move = true; - options.num_levels = 2; - options.write_buffer_size = 100 << 10; // 100KB - options.level0_file_num_compaction_trigger = 3; - options.max_background_compactions = 1; - options.target_file_size_base = 32 * 1024; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Trigger compaction if size amplification exceeds 110% - options.compaction_options_universal.max_size_amplification_percent = 110; - options = CurrentOptions(options); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - Random rnd(301); - int num_keys = 250000; - for (int i = 0; i < num_keys; i++) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - std::vector values; - - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_GT(trivial_move, 0); - ASSERT_GT(non_trivial_move, 0); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} -// Test that checks trivial move in universal compaction -TEST_P(DBTestUniversalCompaction, UniversalCompactionTrivialMoveTest2) { - int32_t trivial_move = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:TrivialMove", - [&](void* /*arg*/) { trivial_move++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:NonTrivial", [&](void* arg) { - ASSERT_TRUE(arg != nullptr); - int output_level = *(static_cast(arg)); - ASSERT_EQ(output_level, 0); - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.compaction_options_universal.allow_trivial_move = true; - options.num_levels = 15; - options.write_buffer_size = 100 << 10; // 100KB - options.level0_file_num_compaction_trigger = 8; - options.max_background_compactions = 2; - options.target_file_size_base = 64 * 1024; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - // Trigger compaction if size amplification exceeds 110% - options.compaction_options_universal.max_size_amplification_percent = 110; - options = CurrentOptions(options); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - Random rnd(301); - int num_keys = 500000; - for (int i = 0; i < num_keys; i++) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - std::vector values; - - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_GT(trivial_move, 0); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} -#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) - -TEST_P(DBTestUniversalCompaction, UniversalCompactionFourPaths) { - Options options = CurrentOptions(); - options.db_paths.emplace_back(dbname_, 300 * 1024); - options.db_paths.emplace_back(dbname_ + "_2", 300 * 1024); - options.db_paths.emplace_back(dbname_ + "_3", 500 * 1024); - options.db_paths.emplace_back(dbname_ + "_4", 1024 * 1024 * 1024); - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); - options.compaction_style = kCompactionStyleUniversal; - options.compaction_options_universal.size_ratio = 5; - options.write_buffer_size = 111 << 10; // 114KB - options.arena_block_size = 4 << 10; - options.level0_file_num_compaction_trigger = 2; - options.num_levels = 1; - - std::vector filenames; - if (env_->GetChildren(options.db_paths[1].path, &filenames).ok()) { - // Delete archival files. - for (size_t i = 0; i < filenames.size(); ++i) { - ASSERT_OK( - env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i])); - } - ASSERT_OK(env_->DeleteDir(options.db_paths[1].path)); - } - Reopen(options); - - Random rnd(301); - int key_idx = 0; - - // First three 110KB files are not going to second path. - // After that, (100K, 200K) - for (int num = 0; num < 3; num++) { - GenerateNewFile(&rnd, &key_idx); - } - - // Another 110KB triggers a compaction to 400K file to second path - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path)); - - // (1, 4) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - // (1,1,4) -> (2, 4) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - // (1, 2, 4) -> (3, 4) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - // (1, 3, 4) -> (8) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path)); - - // (1, 8) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - // (1, 1, 8) -> (2, 8) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - - // (1, 2, 8) -> (3, 8) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - // (1, 3, 8) -> (4, 8) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path)); - - // (1, 4, 8) -> (5, 8) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - for (int i = 0; i < key_idx; i++) { - auto v = Get(Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - - Reopen(options); - - for (int i = 0; i < key_idx; i++) { - auto v = Get(Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - - Destroy(options); -} - -TEST_P(DBTestUniversalCompaction, UniversalCompactionCFPathUse) { - Options options = CurrentOptions(); - options.db_paths.emplace_back(dbname_, 300 * 1024); - options.db_paths.emplace_back(dbname_ + "_2", 300 * 1024); - options.db_paths.emplace_back(dbname_ + "_3", 500 * 1024); - options.db_paths.emplace_back(dbname_ + "_4", 1024 * 1024 * 1024); - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); - options.compaction_style = kCompactionStyleUniversal; - options.compaction_options_universal.size_ratio = 10; - options.write_buffer_size = 111 << 10; // 114KB - options.arena_block_size = 4 << 10; - options.level0_file_num_compaction_trigger = 2; - options.num_levels = 1; - - std::vector option_vector; - option_vector.emplace_back(options); - ColumnFamilyOptions cf_opt1(options), cf_opt2(options); - // Configure CF1 specific paths. - cf_opt1.cf_paths.emplace_back(dbname_ + "cf1", 300 * 1024); - cf_opt1.cf_paths.emplace_back(dbname_ + "cf1_2", 300 * 1024); - cf_opt1.cf_paths.emplace_back(dbname_ + "cf1_3", 500 * 1024); - cf_opt1.cf_paths.emplace_back(dbname_ + "cf1_4", 1024 * 1024 * 1024); - option_vector.emplace_back(DBOptions(options), cf_opt1); - CreateColumnFamilies({"one"}, option_vector[1]); - - // Configura CF2 specific paths. - cf_opt2.cf_paths.emplace_back(dbname_ + "cf2", 300 * 1024); - cf_opt2.cf_paths.emplace_back(dbname_ + "cf2_2", 300 * 1024); - cf_opt2.cf_paths.emplace_back(dbname_ + "cf2_3", 500 * 1024); - cf_opt2.cf_paths.emplace_back(dbname_ + "cf2_4", 1024 * 1024 * 1024); - option_vector.emplace_back(DBOptions(options), cf_opt2); - CreateColumnFamilies({"two"}, option_vector[2]); - - ReopenWithColumnFamilies({"default", "one", "two"}, option_vector); - - Random rnd(301); - int key_idx = 0; - int key_idx1 = 0; - int key_idx2 = 0; - - auto generate_file = [&]() { - GenerateNewFile(0, &rnd, &key_idx); - GenerateNewFile(1, &rnd, &key_idx1); - GenerateNewFile(2, &rnd, &key_idx2); - }; - - auto check_sstfilecount = [&](int path_id, int expected) { - ASSERT_EQ(expected, GetSstFileCount(options.db_paths[path_id].path)); - ASSERT_EQ(expected, GetSstFileCount(cf_opt1.cf_paths[path_id].path)); - ASSERT_EQ(expected, GetSstFileCount(cf_opt2.cf_paths[path_id].path)); - }; - - auto check_getvalues = [&]() { - for (int i = 0; i < key_idx; i++) { - auto v = Get(0, Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - - for (int i = 0; i < key_idx1; i++) { - auto v = Get(1, Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - - for (int i = 0; i < key_idx2; i++) { - auto v = Get(2, Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - }; - - // First three 110KB files are not going to second path. - // After that, (100K, 200K) - for (int num = 0; num < 3; num++) { - generate_file(); - } - - // Another 110KB triggers a compaction to 400K file to second path - generate_file(); - check_sstfilecount(2, 1); - - // (1, 4) - generate_file(); - check_sstfilecount(2, 1); - check_sstfilecount(0, 1); - - // (1,1,4) -> (2, 4) - generate_file(); - check_sstfilecount(2, 1); - check_sstfilecount(1, 1); - check_sstfilecount(0, 0); - - // (1, 2, 4) -> (3, 4) - generate_file(); - check_sstfilecount(2, 1); - check_sstfilecount(1, 1); - check_sstfilecount(0, 0); - - // (1, 3, 4) -> (8) - generate_file(); - check_sstfilecount(3, 1); - - // (1, 8) - generate_file(); - check_sstfilecount(3, 1); - check_sstfilecount(0, 1); - - // (1, 1, 8) -> (2, 8) - generate_file(); - check_sstfilecount(3, 1); - check_sstfilecount(1, 1); - - // (1, 2, 8) -> (3, 8) - generate_file(); - check_sstfilecount(3, 1); - check_sstfilecount(1, 1); - check_sstfilecount(0, 0); - - // (1, 3, 8) -> (4, 8) - generate_file(); - check_sstfilecount(2, 1); - check_sstfilecount(3, 1); - - // (1, 4, 8) -> (5, 8) - generate_file(); - check_sstfilecount(3, 1); - check_sstfilecount(2, 1); - check_sstfilecount(0, 0); - - check_getvalues(); - - ReopenWithColumnFamilies({"default", "one", "two"}, option_vector); - - check_getvalues(); - - Destroy(options, true); -} - -TEST_P(DBTestUniversalCompaction, IncreaseUniversalCompactionNumLevels) { - std::function verify_func = [&](int num_keys_in_db) { - std::string keys_in_db; - Iterator* iter = dbfull()->NewIterator(ReadOptions(), handles_[1]); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - keys_in_db.append(iter->key().ToString()); - keys_in_db.push_back(','); - } - delete iter; - - std::string expected_keys; - for (int i = 0; i <= num_keys_in_db; i++) { - expected_keys.append(Key(i)); - expected_keys.push_back(','); - } - - ASSERT_EQ(keys_in_db, expected_keys); - }; - - Random rnd(301); - int max_key1 = 200; - int max_key2 = 600; - int max_key3 = 800; - const int KNumKeysPerFile = 10; - - // Stage 1: open a DB with universal compaction, num_levels=1 - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = 1; - options.write_buffer_size = 200 << 10; // 200KB - options.level0_file_num_compaction_trigger = 3; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(KNumKeysPerFile)); - options = CurrentOptions(options); - CreateAndReopenWithCF({"pikachu"}, options); - - for (int i = 0; i <= max_key1; i++) { - // each value is 10K - ASSERT_OK(Put(1, Key(i), rnd.RandomString(10000))); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1])); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // Stage 2: reopen with universal compaction, num_levels=4 - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = 4; - options = CurrentOptions(options); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - verify_func(max_key1); - - // Insert more keys - for (int i = max_key1 + 1; i <= max_key2; i++) { - // each value is 10K - ASSERT_OK(Put(1, Key(i), rnd.RandomString(10000))); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1])); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - verify_func(max_key2); - // Compaction to non-L0 has happened. - ASSERT_GT(NumTableFilesAtLevel(options.num_levels - 1, 1), 0); - - // Stage 3: Revert it back to one level and revert to num_levels=1. - options.num_levels = 4; - options.target_file_size_base = INT_MAX; - ReopenWithColumnFamilies({"default", "pikachu"}, options); - // Compact all to level 0 - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 0; - compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; - ASSERT_OK( - dbfull()->CompactRange(compact_options, handles_[1], nullptr, nullptr)); - // Need to restart it once to remove higher level records in manifest. - ReopenWithColumnFamilies({"default", "pikachu"}, options); - // Final reopen - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = 1; - options = CurrentOptions(options); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - // Insert more keys - for (int i = max_key2 + 1; i <= max_key3; i++) { - // each value is 10K - ASSERT_OK(Put(1, Key(i), rnd.RandomString(10000))); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1])); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - ASSERT_OK(Flush(1)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - verify_func(max_key3); -} - -TEST_P(DBTestUniversalCompaction, UniversalCompactionSecondPathRatio) { - if (!Snappy_Supported()) { - return; - } - Options options = CurrentOptions(); - options.db_paths.emplace_back(dbname_, 500 * 1024); - options.db_paths.emplace_back(dbname_ + "_2", 1024 * 1024 * 1024); - options.compaction_style = kCompactionStyleUniversal; - options.compaction_options_universal.size_ratio = 5; - options.write_buffer_size = 111 << 10; // 114KB - options.arena_block_size = 4 << 10; - options.level0_file_num_compaction_trigger = 2; - options.num_levels = 1; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); - - std::vector filenames; - if (env_->GetChildren(options.db_paths[1].path, &filenames).ok()) { - // Delete archival files. - for (size_t i = 0; i < filenames.size(); ++i) { - ASSERT_OK( - env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i])); - } - ASSERT_OK(env_->DeleteDir(options.db_paths[1].path)); - } - Reopen(options); - - Random rnd(301); - int key_idx = 0; - - // First three 110KB files are not going to second path. - // After that, (100K, 200K) - for (int num = 0; num < 3; num++) { - GenerateNewFile(&rnd, &key_idx); - } - - // Another 110KB triggers a compaction to 400K file to second path - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - - // (1, 4) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - // (1,1,4) -> (2, 4) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - // (1, 2, 4) -> (3, 4) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - // (1, 3, 4) -> (8) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - // (1, 8) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - // (1, 1, 8) -> (2, 8) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(1, GetSstFileCount(dbname_)); - - // (1, 2, 8) -> (3, 8) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - // (1, 3, 8) -> (4, 8) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - // (1, 4, 8) -> (5, 8) - GenerateNewFile(&rnd, &key_idx); - ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path)); - ASSERT_EQ(0, GetSstFileCount(dbname_)); - - for (int i = 0; i < key_idx; i++) { - auto v = Get(Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - - Reopen(options); - - for (int i = 0; i < key_idx; i++) { - auto v = Get(Key(i)); - ASSERT_NE(v, "NOT_FOUND"); - ASSERT_TRUE(v.size() == 1 || v.size() == 990); - } - - Destroy(options); -} - -TEST_P(DBTestUniversalCompaction, ConcurrentBottomPriLowPriCompactions) { - if (num_levels_ == 1) { - // for single-level universal, everything's bottom level so nothing should - // be executed in bottom-pri thread pool. - return; - } - const int kNumFilesTrigger = 3; - Env::Default()->SetBackgroundThreads(1, Env::Priority::BOTTOM); - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.max_background_compactions = 2; - options.num_levels = num_levels_; - options.write_buffer_size = 100 << 10; // 100KB - options.target_file_size_base = 32 << 10; // 32KB - options.level0_file_num_compaction_trigger = kNumFilesTrigger; - // Trigger compaction if size amplification exceeds 110% - options.compaction_options_universal.max_size_amplification_percent = 110; - DestroyAndReopen(options); - - // Need to get a token to enable compaction parallelism up to - // `max_background_compactions` jobs. - auto pressure_token = - dbfull()->TEST_write_controler().GetCompactionPressureToken(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {// wait for the full compaction to be picked before adding files intended - // for the second one. - {"DBImpl::BackgroundCompaction:ForwardToBottomPriPool", - "DBTestUniversalCompaction:ConcurrentBottomPriLowPriCompactions:0"}, - // the full (bottom-pri) compaction waits until a partial (low-pri) - // compaction has started to verify they can run in parallel. - {"DBImpl::BackgroundCompaction:NonTrivial", - "DBImpl::BGWorkBottomCompaction"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - for (int i = 0; i < 2; ++i) { - for (int num = 0; num < kNumFilesTrigger; num++) { - int key_idx = 0; - GenerateNewFile(&rnd, &key_idx, true /* no_wait */); - // use no_wait above because that one waits for flush and compaction. We - // don't want to wait for compaction because the full compaction is - // intentionally blocked while more files are flushed. - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - if (i == 0) { - TEST_SYNC_POINT( - "DBTestUniversalCompaction:ConcurrentBottomPriLowPriCompactions:0"); - } - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - // First compaction should output to bottom level. Second should output to L0 - // since older L0 files pending compaction prevent it from being placed lower. - ASSERT_EQ(NumSortedRuns(), 2); - ASSERT_GT(NumTableFilesAtLevel(0), 0); - ASSERT_GT(NumTableFilesAtLevel(num_levels_ - 1), 0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - Env::Default()->SetBackgroundThreads(0, Env::Priority::BOTTOM); -} - -TEST_P(DBTestUniversalCompaction, RecalculateScoreAfterPicking) { - // Regression test for extra compactions scheduled. Once enough compactions - // have been scheduled to bring the score below one, we should stop - // scheduling more; otherwise, other CFs/DBs may be delayed unnecessarily. - const int kNumFilesTrigger = 8; - Options options = CurrentOptions(); - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); - options.compaction_options_universal.max_merge_width = kNumFilesTrigger / 2; - options.compaction_options_universal.max_size_amplification_percent = - static_cast(-1); - options.compaction_style = kCompactionStyleUniversal; - options.level0_file_num_compaction_trigger = kNumFilesTrigger; - options.num_levels = num_levels_; - Reopen(options); - - std::atomic num_compactions_attempted(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:Start", - [&](void* /*arg*/) { ++num_compactions_attempted; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - for (int num = 0; num < kNumFilesTrigger; num++) { - ASSERT_EQ(NumSortedRuns(), num); - int key_idx = 0; - GenerateNewFile(&rnd, &key_idx); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Compacting the first four files was enough to bring the score below one so - // there's no need to schedule any more compactions. - ASSERT_EQ(1, num_compactions_attempted); - ASSERT_EQ(NumSortedRuns(), 5); -} - -TEST_P(DBTestUniversalCompaction, FinalSortedRunCompactFilesConflict) { - // Regression test for conflict between: - // (1) Running CompactFiles including file in the final sorted run; and - // (2) Picking universal size-amp-triggered compaction, which always includes - // the final sorted run. - if (exclusive_manual_compaction_) { - return; - } - - Options opts = CurrentOptions(); - opts.compaction_style = kCompactionStyleUniversal; - opts.compaction_options_universal.max_size_amplification_percent = 50; - opts.compaction_options_universal.min_merge_width = 2; - opts.compression = kNoCompression; - opts.level0_file_num_compaction_trigger = 2; - opts.max_background_compactions = 2; - opts.num_levels = num_levels_; - Reopen(opts); - - // make sure compaction jobs can be parallelized - auto stop_token = - dbfull()->TEST_write_controler().GetCompactionPressureToken(); - - ASSERT_OK(Put("key", "val")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(NumTableFilesAtLevel(num_levels_ - 1), 1); - ColumnFamilyMetaData cf_meta; - ColumnFamilyHandle* default_cfh = db_->DefaultColumnFamily(); - dbfull()->GetColumnFamilyMetaData(default_cfh, &cf_meta); - ASSERT_EQ(1, cf_meta.levels[num_levels_ - 1].files.size()); - std::string first_sst_filename = - cf_meta.levels[num_levels_ - 1].files[0].name; - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"CompactFilesImpl:0", - "DBTestUniversalCompaction:FinalSortedRunCompactFilesConflict:0"}, - {"DBImpl::BackgroundCompaction():AfterPickCompaction", - "CompactFilesImpl:1"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - port::Thread compact_files_thread([&]() { - ASSERT_OK(dbfull()->CompactFiles(CompactionOptions(), default_cfh, - {first_sst_filename}, num_levels_ - 1)); - }); - - TEST_SYNC_POINT( - "DBTestUniversalCompaction:FinalSortedRunCompactFilesConflict:0"); - for (int i = 0; i < 2; ++i) { - ASSERT_OK(Put("key", "val")); - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - compact_files_thread.join(); -} - -INSTANTIATE_TEST_CASE_P(NumLevels, DBTestUniversalCompaction, - ::testing::Combine(::testing::Values(1, 3, 5), - ::testing::Bool())); - -class DBTestUniversalManualCompactionOutputPathId - : public DBTestUniversalCompactionBase { - public: - DBTestUniversalManualCompactionOutputPathId() - : DBTestUniversalCompactionBase( - "/db_universal_compaction_manual_pid_test") {} -}; - -TEST_P(DBTestUniversalManualCompactionOutputPathId, - ManualCompactionOutputPathId) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.db_paths.emplace_back(dbname_, 1000000000); - options.db_paths.emplace_back(dbname_ + "_2", 1000000000); - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = num_levels_; - options.target_file_size_base = 1 << 30; // Big size - options.level0_file_num_compaction_trigger = 10; - Destroy(options); - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - MakeTables(3, "p", "q", 1); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(2, TotalLiveFiles(1)); - ASSERT_EQ(2, GetSstFileCount(options.db_paths[0].path)); - ASSERT_EQ(0, GetSstFileCount(options.db_paths[1].path)); - - // Full compaction to DB path 0 - CompactRangeOptions compact_options; - compact_options.target_path_id = 1; - compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; - ASSERT_OK(db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); - ASSERT_EQ(1, TotalLiveFiles(1)); - ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); - ASSERT_EQ(1, TotalLiveFiles(1)); - ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - - MakeTables(1, "p", "q", 1); - ASSERT_EQ(2, TotalLiveFiles(1)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[0].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); - ASSERT_EQ(2, TotalLiveFiles(1)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[0].path)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); - - // Full compaction to DB path 0 - compact_options.target_path_id = 0; - compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; - ASSERT_OK(db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); - ASSERT_EQ(1, TotalLiveFiles(1)); - ASSERT_EQ(1, GetSstFileCount(options.db_paths[0].path)); - ASSERT_EQ(0, GetSstFileCount(options.db_paths[1].path)); - - // Fail when compacting to an invalid path ID - compact_options.target_path_id = 2; - compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; - ASSERT_TRUE(db_->CompactRange(compact_options, handles_[1], nullptr, nullptr) - .IsInvalidArgument()); -} - -INSTANTIATE_TEST_CASE_P(OutputPathId, - DBTestUniversalManualCompactionOutputPathId, - ::testing::Combine(::testing::Values(1, 8), - ::testing::Bool())); - -TEST_F(DBTestUniversalCompaction2, BasicL0toL1) { - const int kNumKeys = 3000; - const int kWindowSize = 100; - const int kNumDelsTrigger = 90; - - Options opts = CurrentOptions(); - opts.table_properties_collector_factories.emplace_back( - NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger)); - opts.compaction_style = kCompactionStyleUniversal; - opts.level0_file_num_compaction_trigger = 2; - opts.compression = kNoCompression; - opts.compaction_options_universal.size_ratio = 10; - opts.compaction_options_universal.min_merge_width = 2; - opts.compaction_options_universal.max_size_amplification_percent = 200; - Reopen(opts); - - // add an L1 file to prevent tombstones from dropping due to obsolescence - // during flush - int i; - for (i = 0; i < 2000; ++i) { - ASSERT_OK(Put(Key(i), "val")); - } - ASSERT_OK(Flush()); - // MoveFilesToLevel(6); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - for (i = 1999; i < kNumKeys; ++i) { - if (i >= kNumKeys - kWindowSize && - i < kNumKeys - kWindowSize + kNumDelsTrigger) { - ASSERT_OK(Delete(Key(i))); - } else { - ASSERT_OK(Put(Key(i), "val")); - } - } - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_GT(NumTableFilesAtLevel(6), 0); -} - -#if defined(ENABLE_SINGLE_LEVEL_DTC) -TEST_F(DBTestUniversalCompaction2, SingleLevel) { - const int kNumKeys = 3000; - const int kWindowSize = 100; - const int kNumDelsTrigger = 90; - - Options opts = CurrentOptions(); - opts.table_properties_collector_factories.emplace_back( - NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger)); - opts.compaction_style = kCompactionStyleUniversal; - opts.level0_file_num_compaction_trigger = 2; - opts.compression = kNoCompression; - opts.num_levels = 1; - opts.compaction_options_universal.size_ratio = 10; - opts.compaction_options_universal.min_merge_width = 2; - opts.compaction_options_universal.max_size_amplification_percent = 200; - Reopen(opts); - - // add an L1 file to prevent tombstones from dropping due to obsolescence - // during flush - int i; - for (i = 0; i < 2000; ++i) { - ASSERT_OK(Put(Key(i), "val")); - } - ASSERT_OK(Flush()); - - for (i = 1999; i < kNumKeys; ++i) { - if (i >= kNumKeys - kWindowSize && - i < kNumKeys - kWindowSize + kNumDelsTrigger) { - ASSERT_OK(Delete(Key(i))); - } else { - ASSERT_OK(Put(Key(i), "val")); - } - } - ASSERT_OK(Flush()(; - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); -} -#endif // ENABLE_SINGLE_LEVEL_DTC - -TEST_F(DBTestUniversalCompaction2, MultipleLevels) { - const int kWindowSize = 100; - const int kNumDelsTrigger = 90; - - Options opts = CurrentOptions(); - opts.table_properties_collector_factories.emplace_back( - NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger)); - opts.compaction_style = kCompactionStyleUniversal; - opts.level0_file_num_compaction_trigger = 4; - opts.compression = kNoCompression; - opts.compaction_options_universal.size_ratio = 10; - opts.compaction_options_universal.min_merge_width = 2; - opts.compaction_options_universal.max_size_amplification_percent = 200; - Reopen(opts); - - // add an L1 file to prevent tombstones from dropping due to obsolescence - // during flush - int i; - for (i = 0; i < 500; ++i) { - ASSERT_OK(Put(Key(i), "val")); - } - ASSERT_OK(Flush()); - for (i = 500; i < 1000; ++i) { - ASSERT_OK(Put(Key(i), "val")); - } - ASSERT_OK(Flush()); - for (i = 1000; i < 1500; ++i) { - ASSERT_OK(Put(Key(i), "val")); - } - ASSERT_OK(Flush()); - for (i = 1500; i < 2000; ++i) { - ASSERT_OK(Put(Key(i), "val")); - } - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_GT(NumTableFilesAtLevel(6), 0); - - for (i = 1999; i < 2333; ++i) { - ASSERT_OK(Put(Key(i), "val")); - } - ASSERT_OK(Flush()); - for (i = 2333; i < 2666; ++i) { - ASSERT_OK(Put(Key(i), "val")); - } - ASSERT_OK(Flush()); - for (i = 2666; i < 2999; ++i) { - ASSERT_OK(Put(Key(i), "val")); - } - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_GT(NumTableFilesAtLevel(6), 0); - ASSERT_GT(NumTableFilesAtLevel(5), 0); - - for (i = 1900; i < 2100; ++i) { - ASSERT_OK(Delete(Key(i))); - } - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_EQ(0, NumTableFilesAtLevel(1)); - ASSERT_EQ(0, NumTableFilesAtLevel(2)); - ASSERT_EQ(0, NumTableFilesAtLevel(3)); - ASSERT_EQ(0, NumTableFilesAtLevel(4)); - ASSERT_EQ(0, NumTableFilesAtLevel(5)); - ASSERT_GT(NumTableFilesAtLevel(6), 0); -} - -TEST_F(DBTestUniversalCompaction2, OverlappingL0) { - const int kWindowSize = 100; - const int kNumDelsTrigger = 90; - - Options opts = CurrentOptions(); - opts.table_properties_collector_factories.emplace_back( - NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger)); - opts.compaction_style = kCompactionStyleUniversal; - opts.level0_file_num_compaction_trigger = 5; - opts.compression = kNoCompression; - opts.compaction_options_universal.size_ratio = 10; - opts.compaction_options_universal.min_merge_width = 2; - opts.compaction_options_universal.max_size_amplification_percent = 200; - Reopen(opts); - - // add an L1 file to prevent tombstones from dropping due to obsolescence - // during flush - int i; - for (i = 0; i < 2000; ++i) { - ASSERT_OK(Put(Key(i), "val")); - } - ASSERT_OK(Flush()); - for (i = 2000; i < 3000; ++i) { - ASSERT_OK(Put(Key(i), "val")); - } - ASSERT_OK(Flush()); - for (i = 3500; i < 4000; ++i) { - ASSERT_OK(Put(Key(i), "val")); - } - ASSERT_OK(Flush()); - for (i = 2900; i < 3100; ++i) { - ASSERT_OK(Delete(Key(i))); - } - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(2, NumTableFilesAtLevel(0)); - ASSERT_GT(NumTableFilesAtLevel(6), 0); -} - -TEST_F(DBTestUniversalCompaction2, IngestBehind) { - const int kNumKeys = 3000; - const int kWindowSize = 100; - const int kNumDelsTrigger = 90; - - Options opts = CurrentOptions(); - opts.table_properties_collector_factories.emplace_back( - NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger)); - opts.compaction_style = kCompactionStyleUniversal; - opts.level0_file_num_compaction_trigger = 2; - opts.compression = kNoCompression; - opts.allow_ingest_behind = true; - opts.compaction_options_universal.size_ratio = 10; - opts.compaction_options_universal.min_merge_width = 2; - opts.compaction_options_universal.max_size_amplification_percent = 200; - Reopen(opts); - - // add an L1 file to prevent tombstones from dropping due to obsolescence - // during flush - int i; - for (i = 0; i < 2000; ++i) { - ASSERT_OK(Put(Key(i), "val")); - } - ASSERT_OK(Flush()); - // MoveFilesToLevel(6); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - for (i = 1999; i < kNumKeys; ++i) { - if (i >= kNumKeys - kWindowSize && - i < kNumKeys - kWindowSize + kNumDelsTrigger) { - ASSERT_OK(Delete(Key(i))); - } else { - ASSERT_OK(Put(Key(i), "val")); - } - } - ASSERT_OK(Flush()); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(0, NumTableFilesAtLevel(0)); - ASSERT_EQ(0, NumTableFilesAtLevel(6)); - ASSERT_GT(NumTableFilesAtLevel(5), 0); -} - -TEST_F(DBTestUniversalCompaction2, PeriodicCompactionDefault) { - Options options; - options.compaction_style = kCompactionStyleUniversal; - options.env = env_; - KeepFilterFactory* filter = new KeepFilterFactory(true); - options.compaction_filter_factory.reset(filter); - Reopen(options); - ASSERT_EQ(30 * 24 * 60 * 60, - dbfull()->GetOptions().periodic_compaction_seconds); - - KeepFilter df; - options.compaction_filter_factory.reset(); - options.compaction_filter = &df; - Reopen(options); - ASSERT_EQ(30 * 24 * 60 * 60, - dbfull()->GetOptions().periodic_compaction_seconds); - - options.ttl = 60 * 24 * 60 * 60; - options.compaction_filter = nullptr; - Reopen(options); - ASSERT_EQ(60 * 24 * 60 * 60, - dbfull()->GetOptions().periodic_compaction_seconds); -} - -TEST_F(DBTestUniversalCompaction2, PeriodicCompaction) { - Options opts = CurrentOptions(); - opts.env = env_; - opts.compaction_style = kCompactionStyleUniversal; - opts.level0_file_num_compaction_trigger = 10; - opts.max_open_files = -1; - opts.compaction_options_universal.size_ratio = 10; - opts.compaction_options_universal.min_merge_width = 2; - opts.compaction_options_universal.max_size_amplification_percent = 200; - opts.periodic_compaction_seconds = 48 * 60 * 60; // 2 days - opts.num_levels = 5; - env_->SetMockSleep(); - Reopen(opts); - - // NOTE: Presumed unnecessary and removed: resetting mock time in env - - int periodic_compactions = 0; - int start_level = -1; - int output_level = -1; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "UniversalCompactionPicker::PickPeriodicCompaction:Return", - [&](void* arg) { - Compaction* compaction = reinterpret_cast(arg); - ASSERT_TRUE(arg != nullptr); - ASSERT_TRUE(compaction->compaction_reason() == - CompactionReason::kPeriodicCompaction); - start_level = compaction->start_level(); - output_level = compaction->output_level(); - periodic_compactions++; - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Case 1: Oldest flushed file excceeds periodic compaction threshold. - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Flush()); - ASSERT_EQ(0, periodic_compactions); - // Move clock forward so that the flushed file would qualify periodic - // compaction. - env_->MockSleepForSeconds(48 * 60 * 60 + 100); - - // Another flush would trigger compaction the oldest file. - ASSERT_OK(Put("foo", "bar2")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ(1, periodic_compactions); - ASSERT_EQ(0, start_level); - ASSERT_EQ(4, output_level); - - // Case 2: Oldest compacted file excceeds periodic compaction threshold - periodic_compactions = 0; - // A flush doesn't trigger a periodic compaction when threshold not hit - ASSERT_OK(Put("foo", "bar2")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(0, periodic_compactions); - - // After periodic compaction threshold hits, a flush will trigger - // a compaction - ASSERT_OK(Put("foo", "bar2")); - env_->MockSleepForSeconds(48 * 60 * 60 + 100); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(1, periodic_compactions); - ASSERT_EQ(0, start_level); - ASSERT_EQ(4, output_level); -} - -} // namespace ROCKSDB_NAMESPACE - - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc deleted file mode 100644 index 705f53f90..000000000 --- a/db/db_wal_test.cc +++ /dev/null @@ -1,2408 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/db_test_util.h" -#include "options/options_helper.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/file_system.h" -#include "test_util/sync_point.h" -#include "utilities/fault_injection_env.h" -#include "utilities/fault_injection_fs.h" - -namespace ROCKSDB_NAMESPACE { -class DBWALTestBase : public DBTestBase { - protected: - explicit DBWALTestBase(const std::string& dir_name) - : DBTestBase(dir_name, /*env_do_fsync=*/true) {} - -#if defined(ROCKSDB_PLATFORM_POSIX) - public: -#if defined(ROCKSDB_FALLOCATE_PRESENT) - bool IsFallocateSupported() { - // Test fallocate support of running file system. - // Skip this test if fallocate is not supported. - std::string fname_test_fallocate = dbname_ + "/preallocate_testfile"; - int fd = -1; - do { - fd = open(fname_test_fallocate.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); - } while (fd < 0 && errno == EINTR); - assert(fd > 0); - int alloc_status = fallocate(fd, 0, 0, 1); - int err_number = errno; - close(fd); - assert(env_->DeleteFile(fname_test_fallocate) == Status::OK()); - if (err_number == ENOSYS || err_number == EOPNOTSUPP) { - fprintf(stderr, "Skipped preallocated space check: %s\n", - errnoStr(err_number).c_str()); - return false; - } - assert(alloc_status == 0); - return true; - } -#endif // ROCKSDB_FALLOCATE_PRESENT - - uint64_t GetAllocatedFileSize(std::string file_name) { - struct stat sbuf; - int err = stat(file_name.c_str(), &sbuf); - assert(err == 0); - return sbuf.st_blocks * 512; - } -#endif // ROCKSDB_PLATFORM_POSIX -}; - -class DBWALTest : public DBWALTestBase { - public: - DBWALTest() : DBWALTestBase("/db_wal_test") {} -}; - -// A SpecialEnv enriched to give more insight about deleted files -class EnrichedSpecialEnv : public SpecialEnv { - public: - explicit EnrichedSpecialEnv(Env* base) : SpecialEnv(base) {} - Status NewSequentialFile(const std::string& f, - std::unique_ptr* r, - const EnvOptions& soptions) override { - InstrumentedMutexLock l(&env_mutex_); - if (f == skipped_wal) { - deleted_wal_reopened = true; - if (IsWAL(f) && largest_deleted_wal.size() != 0 && - f.compare(largest_deleted_wal) <= 0) { - gap_in_wals = true; - } - } - return SpecialEnv::NewSequentialFile(f, r, soptions); - } - Status DeleteFile(const std::string& fname) override { - if (IsWAL(fname)) { - deleted_wal_cnt++; - InstrumentedMutexLock l(&env_mutex_); - // If this is the first WAL, remember its name and skip deleting it. We - // remember its name partly because the application might attempt to - // delete the file again. - if (skipped_wal.size() != 0 && skipped_wal != fname) { - if (largest_deleted_wal.size() == 0 || - largest_deleted_wal.compare(fname) < 0) { - largest_deleted_wal = fname; - } - } else { - skipped_wal = fname; - return Status::OK(); - } - } - return SpecialEnv::DeleteFile(fname); - } - bool IsWAL(const std::string& fname) { - // printf("iswal %s\n", fname.c_str()); - return fname.compare(fname.size() - 3, 3, "log") == 0; - } - - InstrumentedMutex env_mutex_; - // the wal whose actual delete was skipped by the env - std::string skipped_wal = ""; - // the largest WAL that was requested to be deleted - std::string largest_deleted_wal = ""; - // number of WALs that were successfully deleted - std::atomic deleted_wal_cnt = {0}; - // the WAL whose delete from fs was skipped is reopened during recovery - std::atomic deleted_wal_reopened = {false}; - // whether a gap in the WALs was detected during recovery - std::atomic gap_in_wals = {false}; -}; - -class DBWALTestWithEnrichedEnv : public DBTestBase { - public: - DBWALTestWithEnrichedEnv() - : DBTestBase("db_wal_test", /*env_do_fsync=*/true) { - enriched_env_ = new EnrichedSpecialEnv(env_->target()); - auto options = CurrentOptions(); - options.env = enriched_env_; - options.allow_2pc = true; - Reopen(options); - delete env_; - // to be deleted by the parent class - env_ = enriched_env_; - } - - protected: - EnrichedSpecialEnv* enriched_env_; -}; - -// Test that the recovery would successfully avoid the gaps between the logs. -// One known scenario that could cause this is that the application issue the -// WAL deletion out of order. For the sake of simplicity in the test, here we -// create the gap by manipulating the env to skip deletion of the first WAL but -// not the ones after it. -TEST_F(DBWALTestWithEnrichedEnv, SkipDeletedWALs) { - auto options = last_options_; - // To cause frequent WAL deletion - options.write_buffer_size = 128; - Reopen(options); - - WriteOptions writeOpt = WriteOptions(); - for (int i = 0; i < 128 * 5; i++) { - ASSERT_OK(dbfull()->Put(writeOpt, "foo", "v1")); - } - FlushOptions fo; - fo.wait = true; - ASSERT_OK(db_->Flush(fo)); - - // some wals are deleted - ASSERT_NE(0, enriched_env_->deleted_wal_cnt); - // but not the first one - ASSERT_NE(0, enriched_env_->skipped_wal.size()); - - // Test that the WAL that was not deleted will be skipped during recovery - options = last_options_; - Reopen(options); - ASSERT_FALSE(enriched_env_->deleted_wal_reopened); - ASSERT_FALSE(enriched_env_->gap_in_wals); -} - -TEST_F(DBWALTest, WAL) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - WriteOptions writeOpt = WriteOptions(); - writeOpt.disableWAL = true; - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1")); - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1")); - - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_EQ("v1", Get(1, "bar")); - - writeOpt.disableWAL = false; - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v2")); - writeOpt.disableWAL = true; - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v2")); - - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - // Both value's should be present. - ASSERT_EQ("v2", Get(1, "bar")); - ASSERT_EQ("v2", Get(1, "foo")); - - writeOpt.disableWAL = true; - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v3")); - writeOpt.disableWAL = false; - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v3")); - - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - // again both values should be present. - ASSERT_EQ("v3", Get(1, "foo")); - ASSERT_EQ("v3", Get(1, "bar")); - } while (ChangeWalOptions()); -} - -TEST_F(DBWALTest, RollLog) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_OK(Put(1, "baz", "v5")); - - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - for (int i = 0; i < 10; i++) { - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - } - ASSERT_OK(Put(1, "foo", "v4")); - for (int i = 0; i < 10; i++) { - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - } - } while (ChangeWalOptions()); -} - -TEST_F(DBWALTest, SyncWALNotBlockWrite) { - Options options = CurrentOptions(); - options.max_write_buffer_number = 4; - DestroyAndReopen(options); - - ASSERT_OK(Put("foo1", "bar1")); - ASSERT_OK(Put("foo5", "bar5")); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"WritableFileWriter::SyncWithoutFlush:1", - "DBWALTest::SyncWALNotBlockWrite:1"}, - {"DBWALTest::SyncWALNotBlockWrite:2", - "WritableFileWriter::SyncWithoutFlush:2"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ROCKSDB_NAMESPACE::port::Thread thread([&]() { ASSERT_OK(db_->SyncWAL()); }); - - TEST_SYNC_POINT("DBWALTest::SyncWALNotBlockWrite:1"); - ASSERT_OK(Put("foo2", "bar2")); - ASSERT_OK(Put("foo3", "bar3")); - FlushOptions fo; - fo.wait = false; - ASSERT_OK(db_->Flush(fo)); - ASSERT_OK(Put("foo4", "bar4")); - - TEST_SYNC_POINT("DBWALTest::SyncWALNotBlockWrite:2"); - - thread.join(); - - ASSERT_EQ(Get("foo1"), "bar1"); - ASSERT_EQ(Get("foo2"), "bar2"); - ASSERT_EQ(Get("foo3"), "bar3"); - ASSERT_EQ(Get("foo4"), "bar4"); - ASSERT_EQ(Get("foo5"), "bar5"); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBWALTest, SyncWALNotWaitWrite) { - ASSERT_OK(Put("foo1", "bar1")); - ASSERT_OK(Put("foo3", "bar3")); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"SpecialEnv::WalFile::Append:1", "DBWALTest::SyncWALNotWaitWrite:1"}, - {"DBWALTest::SyncWALNotWaitWrite:2", "SpecialEnv::WalFile::Append:2"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ROCKSDB_NAMESPACE::port::Thread thread( - [&]() { ASSERT_OK(Put("foo2", "bar2")); }); - // Moving this to SyncWAL before the actual fsync - // TEST_SYNC_POINT("DBWALTest::SyncWALNotWaitWrite:1"); - ASSERT_OK(db_->SyncWAL()); - // Moving this to SyncWAL after actual fsync - // TEST_SYNC_POINT("DBWALTest::SyncWALNotWaitWrite:2"); - - thread.join(); - - ASSERT_EQ(Get("foo1"), "bar1"); - ASSERT_EQ(Get("foo2"), "bar2"); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBWALTest, Recover) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_OK(Put(1, "baz", "v5")); - - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_EQ("v5", Get(1, "baz")); - ASSERT_OK(Put(1, "bar", "v2")); - ASSERT_OK(Put(1, "foo", "v3")); - - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_EQ("v3", Get(1, "foo")); - ASSERT_OK(Put(1, "foo", "v4")); - ASSERT_EQ("v4", Get(1, "foo")); - ASSERT_EQ("v2", Get(1, "bar")); - ASSERT_EQ("v5", Get(1, "baz")); - } while (ChangeWalOptions()); -} - -TEST_F(DBWALTest, RecoverWithTableHandle) { - do { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.disable_auto_compactions = true; - options.avoid_flush_during_recovery = false; - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_OK(Put(1, "bar", "v2")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "foo", "v3")); - ASSERT_OK(Put(1, "bar", "v4")); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(1, "big", std::string(100, 'a'))); - - options = CurrentOptions(); - const int kSmallMaxOpenFiles = 13; - if (option_config_ == kDBLogDir) { - // Use this option to check not preloading files - // Set the max open files to be small enough so no preload will - // happen. - options.max_open_files = kSmallMaxOpenFiles; - // RocksDB sanitize max open files to at least 20. Modify it back. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { - int* max_open_files = static_cast(arg); - *max_open_files = kSmallMaxOpenFiles; - }); - - } else if (option_config_ == kWalDirAndMmapReads) { - // Use this option to check always loading all files. - options.max_open_files = 100; - } else { - options.max_open_files = -1; - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - - std::vector> files; - dbfull()->TEST_GetFilesMetaData(handles_[1], &files); - size_t total_files = 0; - for (const auto& level : files) { - total_files += level.size(); - } - ASSERT_EQ(total_files, 3); - for (const auto& level : files) { - for (const auto& file : level) { - if (options.max_open_files == kSmallMaxOpenFiles) { - ASSERT_TRUE(file.table_reader_handle == nullptr); - } else { - ASSERT_TRUE(file.table_reader_handle != nullptr); - } - } - } - } while (ChangeWalOptions()); -} - -TEST_F(DBWALTest, RecoverWithBlob) { - // Write a value that's below the prospective size limit for blobs and another - // one that's above. Note that blob files are not actually enabled at this - // point. - constexpr uint64_t min_blob_size = 10; - - constexpr char short_value[] = "short"; - static_assert(sizeof(short_value) - 1 < min_blob_size, - "short_value too long"); - - constexpr char long_value[] = "long_value"; - static_assert(sizeof(long_value) - 1 >= min_blob_size, - "long_value too short"); - - ASSERT_OK(Put("key1", short_value)); - ASSERT_OK(Put("key2", long_value)); - - // There should be no files just yet since we haven't flushed. - { - VersionSet* const versions = dbfull()->GetVersionSet(); - ASSERT_NE(versions, nullptr); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - ASSERT_NE(cfd, nullptr); - - Version* const current = cfd->current(); - ASSERT_NE(current, nullptr); - - const VersionStorageInfo* const storage_info = current->storage_info(); - ASSERT_NE(storage_info, nullptr); - - ASSERT_EQ(storage_info->num_non_empty_levels(), 0); - ASSERT_TRUE(storage_info->GetBlobFiles().empty()); - } - - // Reopen the database with blob files enabled. A new table file/blob file - // pair should be written during recovery. - Options options; - options.enable_blob_files = true; - options.min_blob_size = min_blob_size; - options.avoid_flush_during_recovery = false; - options.disable_auto_compactions = true; - options.env = env_; - - Reopen(options); - - ASSERT_EQ(Get("key1"), short_value); - ASSERT_EQ(Get("key2"), long_value); - - VersionSet* const versions = dbfull()->GetVersionSet(); - ASSERT_NE(versions, nullptr); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - ASSERT_NE(cfd, nullptr); - - Version* const current = cfd->current(); - ASSERT_NE(current, nullptr); - - const VersionStorageInfo* const storage_info = current->storage_info(); - ASSERT_NE(storage_info, nullptr); - - const auto& l0_files = storage_info->LevelFiles(0); - ASSERT_EQ(l0_files.size(), 1); - - const FileMetaData* const table_file = l0_files[0]; - ASSERT_NE(table_file, nullptr); - - const auto& blob_files = storage_info->GetBlobFiles(); - ASSERT_EQ(blob_files.size(), 1); - - const auto& blob_file = blob_files.front(); - ASSERT_NE(blob_file, nullptr); - - ASSERT_EQ(table_file->smallest.user_key(), "key1"); - ASSERT_EQ(table_file->largest.user_key(), "key2"); - ASSERT_EQ(table_file->fd.smallest_seqno, 1); - ASSERT_EQ(table_file->fd.largest_seqno, 2); - ASSERT_EQ(table_file->oldest_blob_file_number, - blob_file->GetBlobFileNumber()); - - ASSERT_EQ(blob_file->GetTotalBlobCount(), 1); - - const InternalStats* const internal_stats = cfd->internal_stats(); - ASSERT_NE(internal_stats, nullptr); - - const auto& compaction_stats = internal_stats->TEST_GetCompactionStats(); - ASSERT_FALSE(compaction_stats.empty()); - ASSERT_EQ(compaction_stats[0].bytes_written, table_file->fd.GetFileSize()); - ASSERT_EQ(compaction_stats[0].bytes_written_blob, - blob_file->GetTotalBlobBytes()); - ASSERT_EQ(compaction_stats[0].num_output_files, 1); - ASSERT_EQ(compaction_stats[0].num_output_files_blob, 1); - - const uint64_t* const cf_stats_value = internal_stats->TEST_GetCFStatsValue(); - ASSERT_EQ(cf_stats_value[InternalStats::BYTES_FLUSHED], - compaction_stats[0].bytes_written + - compaction_stats[0].bytes_written_blob); -} - -TEST_F(DBWALTest, RecoverWithBlobMultiSST) { - // Write several large (4 KB) values without flushing. Note that blob files - // are not actually enabled at this point. - std::string large_value(1 << 12, 'a'); - - constexpr int num_keys = 64; - - for (int i = 0; i < num_keys; ++i) { - ASSERT_OK(Put(Key(i), large_value)); - } - - // There should be no files just yet since we haven't flushed. - { - VersionSet* const versions = dbfull()->GetVersionSet(); - ASSERT_NE(versions, nullptr); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - ASSERT_NE(cfd, nullptr); - - Version* const current = cfd->current(); - ASSERT_NE(current, nullptr); - - const VersionStorageInfo* const storage_info = current->storage_info(); - ASSERT_NE(storage_info, nullptr); - - ASSERT_EQ(storage_info->num_non_empty_levels(), 0); - ASSERT_TRUE(storage_info->GetBlobFiles().empty()); - } - - // Reopen the database with blob files enabled and write buffer size set to a - // smaller value. Multiple table files+blob files should be written and added - // to the Version during recovery. - Options options; - options.write_buffer_size = 1 << 16; // 64 KB - options.enable_blob_files = true; - options.avoid_flush_during_recovery = false; - options.disable_auto_compactions = true; - options.env = env_; - - Reopen(options); - - for (int i = 0; i < num_keys; ++i) { - ASSERT_EQ(Get(Key(i)), large_value); - } - - VersionSet* const versions = dbfull()->GetVersionSet(); - ASSERT_NE(versions, nullptr); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - ASSERT_NE(cfd, nullptr); - - Version* const current = cfd->current(); - ASSERT_NE(current, nullptr); - - const VersionStorageInfo* const storage_info = current->storage_info(); - ASSERT_NE(storage_info, nullptr); - - const auto& l0_files = storage_info->LevelFiles(0); - ASSERT_GT(l0_files.size(), 1); - - const auto& blob_files = storage_info->GetBlobFiles(); - ASSERT_GT(blob_files.size(), 1); - - ASSERT_EQ(l0_files.size(), blob_files.size()); -} - -TEST_F(DBWALTest, WALWithChecksumHandoff) { -#ifndef ROCKSDB_ASSERT_STATUS_CHECKED - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - std::shared_ptr fault_fs( - new FaultInjectionTestFS(FileSystem::Default())); - std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); - do { - Options options = CurrentOptions(); - - options.checksum_handoff_file_types.Add(FileType::kWalFile); - options.env = fault_fs_env.get(); - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - - CreateAndReopenWithCF({"pikachu"}, options); - WriteOptions writeOpt = WriteOptions(); - writeOpt.disableWAL = true; - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1")); - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1")); - - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ("v1", Get(1, "foo")); - ASSERT_EQ("v1", Get(1, "bar")); - - writeOpt.disableWAL = false; - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v2")); - writeOpt.disableWAL = true; - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v2")); - - ReopenWithColumnFamilies({"default", "pikachu"}, options); - // Both value's should be present. - ASSERT_EQ("v2", Get(1, "bar")); - ASSERT_EQ("v2", Get(1, "foo")); - - writeOpt.disableWAL = true; - // This put, data is persisted by Flush - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v3")); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - writeOpt.disableWAL = false; - // Data is persisted in the WAL - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "zoo", "v3")); - // The hash does not match, write fails - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); - writeOpt.disableWAL = false; - ASSERT_NOK(dbfull()->Put(writeOpt, handles_[1], "foo", "v3")); - - ReopenWithColumnFamilies({"default", "pikachu"}, options); - // Due to the write failure, Get should not find - ASSERT_NE("v3", Get(1, "foo")); - ASSERT_EQ("v3", Get(1, "zoo")); - ASSERT_EQ("v3", Get(1, "bar")); - - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - // Each write will be similated as corrupted. - fault_fs->IngestDataCorruptionBeforeWrite(); - writeOpt.disableWAL = true; - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v4")); - writeOpt.disableWAL = false; - ASSERT_NOK(dbfull()->Put(writeOpt, handles_[1], "foo", "v4")); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_NE("v4", Get(1, "foo")); - ASSERT_NE("v4", Get(1, "bar")); - fault_fs->NoDataCorruptionBeforeWrite(); - - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum); - // The file system does not provide checksum method and verification. - writeOpt.disableWAL = true; - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v5")); - writeOpt.disableWAL = false; - ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v5")); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ("v5", Get(1, "foo")); - ASSERT_EQ("v5", Get(1, "bar")); - - Destroy(options); - } while (ChangeWalOptions()); -#endif // ROCKSDB_ASSERT_STATUS_CHECKED -} - -TEST_F(DBWALTest, LockWal) { - do { - Options options = CurrentOptions(); - options.create_if_missing = true; - DestroyAndReopen(options); - - ASSERT_OK(Put("foo", "v")); - ASSERT_OK(Put("bar", "v")); - - ASSERT_OK(db_->LockWAL()); - // Verify writes are stopped - WriteOptions wopts; - wopts.no_slowdown = true; - Status s = db_->Put(wopts, "foo", "dontcare"); - ASSERT_TRUE(s.IsIncomplete()); - { - VectorLogPtr wals; - ASSERT_OK(db_->GetSortedWalFiles(wals)); - ASSERT_FALSE(wals.empty()); - } - port::Thread worker([&]() { - Status tmp_s = db_->Flush(FlushOptions()); - ASSERT_OK(tmp_s); - }); - FlushOptions flush_opts; - flush_opts.wait = false; - s = db_->Flush(flush_opts); - ASSERT_TRUE(s.IsTryAgain()); - ASSERT_OK(db_->UnlockWAL()); - ASSERT_OK(db_->Put(WriteOptions(), "foo", "dontcare")); - - worker.join(); - } while (ChangeWalOptions()); -} - -class DBRecoveryTestBlobError - : public DBWALTest, - public testing::WithParamInterface { - public: - DBRecoveryTestBlobError() : sync_point_(GetParam()) {} - - std::string sync_point_; -}; - -INSTANTIATE_TEST_CASE_P(DBRecoveryTestBlobError, DBRecoveryTestBlobError, - ::testing::ValuesIn(std::vector{ - "BlobFileBuilder::WriteBlobToFile:AddRecord", - "BlobFileBuilder::WriteBlobToFile:AppendFooter"})); - -TEST_P(DBRecoveryTestBlobError, RecoverWithBlobError) { - // Write a value. Note that blob files are not actually enabled at this point. - ASSERT_OK(Put("key", "blob")); - - // Reopen with blob files enabled but make blob file writing fail during - // recovery. - SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* arg) { - Status* const s = static_cast(arg); - assert(s); - - (*s) = Status::IOError(sync_point_); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - Options options; - options.enable_blob_files = true; - options.avoid_flush_during_recovery = false; - options.disable_auto_compactions = true; - options.env = env_; - - ASSERT_NOK(TryReopen(options)); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - - // Make sure the files generated by the failed recovery have been deleted. - std::vector files; - ASSERT_OK(env_->GetChildren(dbname_, &files)); - for (const auto& file : files) { - uint64_t number = 0; - FileType type = kTableFile; - - if (!ParseFileName(file, &number, &type)) { - continue; - } - - ASSERT_NE(type, kTableFile); - ASSERT_NE(type, kBlobFile); - } -} - -TEST_F(DBWALTest, IgnoreRecoveredLog) { - std::string backup_logs = dbname_ + "/backup_logs"; - - do { - // delete old files in backup_logs directory - ASSERT_OK(env_->CreateDirIfMissing(backup_logs)); - std::vector old_files; - ASSERT_OK(env_->GetChildren(backup_logs, &old_files)); - for (auto& file : old_files) { - ASSERT_OK(env_->DeleteFile(backup_logs + "/" + file)); - } - Options options = CurrentOptions(); - options.create_if_missing = true; - options.merge_operator = MergeOperators::CreateUInt64AddOperator(); - options.wal_dir = dbname_ + "/logs"; - DestroyAndReopen(options); - - // fill up the DB - std::string one, two; - PutFixed64(&one, 1); - PutFixed64(&two, 2); - ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one))); - ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one))); - ASSERT_OK(db_->Merge(WriteOptions(), Slice("bar"), Slice(one))); - - // copy the logs to backup - std::vector logs; - ASSERT_OK(env_->GetChildren(options.wal_dir, &logs)); - for (auto& log : logs) { - CopyFile(options.wal_dir + "/" + log, backup_logs + "/" + log); - } - - // recover the DB - Reopen(options); - ASSERT_EQ(two, Get("foo")); - ASSERT_EQ(one, Get("bar")); - Close(); - - // copy the logs from backup back to wal dir - for (auto& log : logs) { - CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log); - } - // this should ignore the log files, recovery should not happen again - // if the recovery happens, the same merge operator would be called twice, - // leading to incorrect results - Reopen(options); - ASSERT_EQ(two, Get("foo")); - ASSERT_EQ(one, Get("bar")); - Close(); - Destroy(options); - Reopen(options); - Close(); - - // copy the logs from backup back to wal dir - ASSERT_OK(env_->CreateDirIfMissing(options.wal_dir)); - for (auto& log : logs) { - CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log); - } - // assert that we successfully recovered only from logs, even though we - // destroyed the DB - Reopen(options); - ASSERT_EQ(two, Get("foo")); - ASSERT_EQ(one, Get("bar")); - - // Recovery will fail if DB directory doesn't exist. - Destroy(options); - // copy the logs from backup back to wal dir - ASSERT_OK(env_->CreateDirIfMissing(options.wal_dir)); - for (auto& log : logs) { - CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log); - // we won't be needing this file no more - ASSERT_OK(env_->DeleteFile(backup_logs + "/" + log)); - } - Status s = TryReopen(options); - ASSERT_NOK(s); - Destroy(options); - } while (ChangeWalOptions()); -} - -TEST_F(DBWALTest, RecoveryWithEmptyLog) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_OK(Put(1, "foo", "v2")); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "foo", "v3")); - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - ASSERT_EQ("v3", Get(1, "foo")); - } while (ChangeWalOptions()); -} - -#if !(defined NDEBUG) || !defined(OS_WIN) -TEST_F(DBWALTest, PreallocateBlock) { - Options options = CurrentOptions(); - options.write_buffer_size = 10 * 1000 * 1000; - options.max_total_wal_size = 0; - - size_t expected_preallocation_size = static_cast( - options.write_buffer_size + options.write_buffer_size / 10); - - DestroyAndReopen(options); - - std::atomic called(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBTestWalFile.GetPreallocationStatus", [&](void* arg) { - ASSERT_TRUE(arg != nullptr); - size_t preallocation_size = *(static_cast(arg)); - ASSERT_EQ(expected_preallocation_size, preallocation_size); - called.fetch_add(1); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Put("", "")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("", "")); - Close(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ASSERT_EQ(2, called.load()); - - options.max_total_wal_size = 1000 * 1000; - expected_preallocation_size = static_cast(options.max_total_wal_size); - Reopen(options); - called.store(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBTestWalFile.GetPreallocationStatus", [&](void* arg) { - ASSERT_TRUE(arg != nullptr); - size_t preallocation_size = *(static_cast(arg)); - ASSERT_EQ(expected_preallocation_size, preallocation_size); - called.fetch_add(1); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Put("", "")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("", "")); - Close(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ASSERT_EQ(2, called.load()); - - options.db_write_buffer_size = 800 * 1000; - expected_preallocation_size = - static_cast(options.db_write_buffer_size); - Reopen(options); - called.store(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBTestWalFile.GetPreallocationStatus", [&](void* arg) { - ASSERT_TRUE(arg != nullptr); - size_t preallocation_size = *(static_cast(arg)); - ASSERT_EQ(expected_preallocation_size, preallocation_size); - called.fetch_add(1); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Put("", "")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("", "")); - Close(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ASSERT_EQ(2, called.load()); - - expected_preallocation_size = 700 * 1000; - std::shared_ptr write_buffer_manager = - std::make_shared(static_cast(700 * 1000)); - options.write_buffer_manager = write_buffer_manager; - Reopen(options); - called.store(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBTestWalFile.GetPreallocationStatus", [&](void* arg) { - ASSERT_TRUE(arg != nullptr); - size_t preallocation_size = *(static_cast(arg)); - ASSERT_EQ(expected_preallocation_size, preallocation_size); - called.fetch_add(1); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(Put("", "")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("", "")); - Close(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ASSERT_EQ(2, called.load()); -} -#endif // !(defined NDEBUG) || !defined(OS_WIN) - -TEST_F(DBWALTest, DISABLED_FullPurgePreservesRecycledLog) { - // TODO(ajkr): Disabled until WAL recycling is fixed for - // `kPointInTimeRecovery`. - - // For github issue #1303 - for (int i = 0; i < 2; ++i) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.recycle_log_file_num = 2; - if (i != 0) { - options.wal_dir = alternative_wal_dir_; - } - - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "v1")); - VectorLogPtr log_files; - ASSERT_OK(dbfull()->GetSortedWalFiles(log_files)); - ASSERT_GT(log_files.size(), 0); - ASSERT_OK(Flush()); - - // Now the original WAL is in log_files[0] and should be marked for - // recycling. - // Verify full purge cannot remove this file. - JobContext job_context(0); - dbfull()->TEST_LockMutex(); - dbfull()->FindObsoleteFiles(&job_context, true /* force */); - dbfull()->TEST_UnlockMutex(); - dbfull()->PurgeObsoleteFiles(job_context); - - if (i == 0) { - ASSERT_OK( - env_->FileExists(LogFileName(dbname_, log_files[0]->LogNumber()))); - } else { - ASSERT_OK(env_->FileExists( - LogFileName(alternative_wal_dir_, log_files[0]->LogNumber()))); - } - } -} - -TEST_F(DBWALTest, DISABLED_FullPurgePreservesLogPendingReuse) { - // TODO(ajkr): Disabled until WAL recycling is fixed for - // `kPointInTimeRecovery`. - - // Ensures full purge cannot delete a WAL while it's in the process of being - // recycled. In particular, we force the full purge after a file has been - // chosen for reuse, but before it has been renamed. - for (int i = 0; i < 2; ++i) { - Options options = CurrentOptions(); - options.recycle_log_file_num = 1; - if (i != 0) { - options.wal_dir = alternative_wal_dir_; - } - DestroyAndReopen(options); - - // The first flush creates a second log so writes can continue before the - // flush finishes. - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Flush()); - - // The second flush can recycle the first log. Sync points enforce the - // full purge happens after choosing the log to recycle and before it is - // renamed. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"DBImpl::CreateWAL:BeforeReuseWritableFile1", - "DBWALTest::FullPurgePreservesLogPendingReuse:PreFullPurge"}, - {"DBWALTest::FullPurgePreservesLogPendingReuse:PostFullPurge", - "DBImpl::CreateWAL:BeforeReuseWritableFile2"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ROCKSDB_NAMESPACE::port::Thread thread([&]() { - TEST_SYNC_POINT( - "DBWALTest::FullPurgePreservesLogPendingReuse:PreFullPurge"); - ASSERT_OK(db_->EnableFileDeletions(true)); - TEST_SYNC_POINT( - "DBWALTest::FullPurgePreservesLogPendingReuse:PostFullPurge"); - }); - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Flush()); - thread.join(); - } -} - -TEST_F(DBWALTest, GetSortedWalFiles) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - VectorLogPtr log_files; - ASSERT_OK(dbfull()->GetSortedWalFiles(log_files)); - ASSERT_EQ(0, log_files.size()); - - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_OK(dbfull()->GetSortedWalFiles(log_files)); - ASSERT_EQ(1, log_files.size()); - } while (ChangeWalOptions()); -} - -TEST_F(DBWALTest, GetCurrentWalFile) { - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - - std::unique_ptr* bad_log_file = nullptr; - ASSERT_NOK(dbfull()->GetCurrentWalFile(bad_log_file)); - - std::unique_ptr log_file; - ASSERT_OK(dbfull()->GetCurrentWalFile(&log_file)); - - // nothing has been written to the log yet - ASSERT_EQ(log_file->StartSequence(), 0); - ASSERT_EQ(log_file->SizeFileBytes(), 0); - ASSERT_EQ(log_file->Type(), kAliveLogFile); - ASSERT_GT(log_file->LogNumber(), 0); - - // add some data and verify that the file size actually moves foward - ASSERT_OK(Put(0, "foo", "v1")); - ASSERT_OK(Put(0, "foo2", "v2")); - ASSERT_OK(Put(0, "foo3", "v3")); - - ASSERT_OK(dbfull()->GetCurrentWalFile(&log_file)); - - ASSERT_EQ(log_file->StartSequence(), 0); - ASSERT_GT(log_file->SizeFileBytes(), 0); - ASSERT_EQ(log_file->Type(), kAliveLogFile); - ASSERT_GT(log_file->LogNumber(), 0); - - // force log files to cycle and add some more data, then check if - // log number moves forward - - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - for (int i = 0; i < 10; i++) { - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - } - - ASSERT_OK(Put(0, "foo4", "v4")); - ASSERT_OK(Put(0, "foo5", "v5")); - ASSERT_OK(Put(0, "foo6", "v6")); - - ASSERT_OK(dbfull()->GetCurrentWalFile(&log_file)); - - ASSERT_EQ(log_file->StartSequence(), 0); - ASSERT_GT(log_file->SizeFileBytes(), 0); - ASSERT_EQ(log_file->Type(), kAliveLogFile); - ASSERT_GT(log_file->LogNumber(), 0); - - } while (ChangeWalOptions()); -} - -TEST_F(DBWALTest, RecoveryWithLogDataForSomeCFs) { - // Test for regression of WAL cleanup missing files that don't contain data - // for every column family. - do { - CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - ASSERT_OK(Put(1, "foo", "v1")); - ASSERT_OK(Put(1, "foo", "v2")); - uint64_t earliest_log_nums[2]; - for (int i = 0; i < 2; ++i) { - if (i > 0) { - ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - } - VectorLogPtr log_files; - ASSERT_OK(dbfull()->GetSortedWalFiles(log_files)); - if (log_files.size() > 0) { - earliest_log_nums[i] = log_files[0]->LogNumber(); - } else { - earliest_log_nums[i] = std::numeric_limits::max(); - } - } - // Check at least the first WAL was cleaned up during the recovery. - ASSERT_LT(earliest_log_nums[0], earliest_log_nums[1]); - } while (ChangeWalOptions()); -} - -TEST_F(DBWALTest, RecoverWithLargeLog) { - do { - { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"pikachu"}, options); - ASSERT_OK(Put(1, "big1", std::string(200000, '1'))); - ASSERT_OK(Put(1, "big2", std::string(200000, '2'))); - ASSERT_OK(Put(1, "small3", std::string(10, '3'))); - ASSERT_OK(Put(1, "small4", std::string(10, '4'))); - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); - } - - // Make sure that if we re-open with a small write buffer size that - // we flush table files in the middle of a large log file. - Options options; - options.write_buffer_size = 100000; - options = CurrentOptions(options); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ(NumTableFilesAtLevel(0, 1), 3); - ASSERT_EQ(std::string(200000, '1'), Get(1, "big1")); - ASSERT_EQ(std::string(200000, '2'), Get(1, "big2")); - ASSERT_EQ(std::string(10, '3'), Get(1, "small3")); - ASSERT_EQ(std::string(10, '4'), Get(1, "small4")); - ASSERT_GT(NumTableFilesAtLevel(0, 1), 1); - } while (ChangeWalOptions()); -} - -// In https://reviews.facebook.net/D20661 we change -// recovery behavior: previously for each log file each column family -// memtable was flushed, even it was empty. Now it's changed: -// we try to create the smallest number of table files by merging -// updates from multiple logs -TEST_F(DBWALTest, RecoverCheckFileAmountWithSmallWriteBuffer) { - Options options = CurrentOptions(); - options.write_buffer_size = 5000000; - CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options); - - // Since we will reopen DB with smaller write_buffer_size, - // each key will go to new SST file - ASSERT_OK(Put(1, Key(10), DummyString(1000000))); - ASSERT_OK(Put(1, Key(10), DummyString(1000000))); - ASSERT_OK(Put(1, Key(10), DummyString(1000000))); - ASSERT_OK(Put(1, Key(10), DummyString(1000000))); - - ASSERT_OK(Put(3, Key(10), DummyString(1))); - // Make 'dobrynia' to be flushed and new WAL file to be created - ASSERT_OK(Put(2, Key(10), DummyString(7500000))); - ASSERT_OK(Put(2, Key(1), DummyString(1))); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[2])); - { - auto tables = ListTableFiles(env_, dbname_); - ASSERT_EQ(tables.size(), static_cast(1)); - // Make sure 'dobrynia' was flushed: check sst files amount - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), - static_cast(1)); - } - // New WAL file - ASSERT_OK(Put(1, Key(1), DummyString(1))); - ASSERT_OK(Put(1, Key(1), DummyString(1))); - ASSERT_OK(Put(3, Key(10), DummyString(1))); - ASSERT_OK(Put(3, Key(10), DummyString(1))); - ASSERT_OK(Put(3, Key(10), DummyString(1))); - - options.write_buffer_size = 4096; - options.arena_block_size = 4096; - ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"}, - options); - { - // No inserts => default is empty - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), - static_cast(0)); - // First 4 keys goes to separate SSTs + 1 more SST for 2 smaller keys - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), - static_cast(5)); - // 1 SST for big key + 1 SST for small one - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), - static_cast(2)); - // 1 SST for all keys - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), - static_cast(1)); - } -} - -// In https://reviews.facebook.net/D20661 we change -// recovery behavior: previously for each log file each column family -// memtable was flushed, even it wasn't empty. Now it's changed: -// we try to create the smallest number of table files by merging -// updates from multiple logs -TEST_F(DBWALTest, RecoverCheckFileAmount) { - Options options = CurrentOptions(); - options.write_buffer_size = 100000; - options.arena_block_size = 4 * 1024; - options.avoid_flush_during_recovery = false; - CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options); - - ASSERT_OK(Put(0, Key(1), DummyString(1))); - ASSERT_OK(Put(1, Key(1), DummyString(1))); - ASSERT_OK(Put(2, Key(1), DummyString(1))); - - // Make 'nikitich' memtable to be flushed - ASSERT_OK(Put(3, Key(10), DummyString(1002400))); - ASSERT_OK(Put(3, Key(1), DummyString(1))); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[3])); - // 4 memtable are not flushed, 1 sst file - { - auto tables = ListTableFiles(env_, dbname_); - ASSERT_EQ(tables.size(), static_cast(1)); - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), - static_cast(1)); - } - // Memtable for 'nikitich' has flushed, new WAL file has opened - // 4 memtable still not flushed - - // Write to new WAL file - ASSERT_OK(Put(0, Key(1), DummyString(1))); - ASSERT_OK(Put(1, Key(1), DummyString(1))); - ASSERT_OK(Put(2, Key(1), DummyString(1))); - - // Fill up 'nikitich' one more time - ASSERT_OK(Put(3, Key(10), DummyString(1002400))); - // make it flush - ASSERT_OK(Put(3, Key(1), DummyString(1))); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[3])); - // There are still 4 memtable not flushed, and 2 sst tables - ASSERT_OK(Put(0, Key(1), DummyString(1))); - ASSERT_OK(Put(1, Key(1), DummyString(1))); - ASSERT_OK(Put(2, Key(1), DummyString(1))); - - { - auto tables = ListTableFiles(env_, dbname_); - ASSERT_EQ(tables.size(), static_cast(2)); - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), - static_cast(2)); - } - - ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"}, - options); - { - std::vector table_files = ListTableFiles(env_, dbname_); - // Check, that records for 'default', 'dobrynia' and 'pikachu' from - // first, second and third WALs went to the same SST. - // So, there is 6 SSTs: three for 'nikitich', one for 'default', one for - // 'dobrynia', one for 'pikachu' - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), - static_cast(1)); - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), - static_cast(3)); - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), - static_cast(1)); - ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), - static_cast(1)); - } -} - -TEST_F(DBWALTest, SyncMultipleLogs) { - const uint64_t kNumBatches = 2; - const int kBatchSize = 1000; - - Options options = CurrentOptions(); - options.create_if_missing = true; - options.write_buffer_size = 4096; - Reopen(options); - - WriteBatch batch; - WriteOptions wo; - wo.sync = true; - - for (uint64_t b = 0; b < kNumBatches; b++) { - batch.Clear(); - for (int i = 0; i < kBatchSize; i++) { - ASSERT_OK(batch.Put(Key(i), DummyString(128))); - } - - ASSERT_OK(dbfull()->Write(wo, &batch)); - } - - ASSERT_OK(dbfull()->SyncWAL()); -} - -// Github issue 1339. Prior the fix we read sequence id from the first log to -// a local variable, then keep increase the variable as we replay logs, -// ignoring actual sequence id of the records. This is incorrect if some writes -// come with WAL disabled. -TEST_F(DBWALTest, PartOfWritesWithWALDisabled) { - std::unique_ptr fault_env( - new FaultInjectionTestEnv(env_)); - Options options = CurrentOptions(); - options.env = fault_env.get(); - options.disable_auto_compactions = true; - WriteOptions wal_on, wal_off; - wal_on.sync = true; - wal_on.disableWAL = false; - wal_off.disableWAL = true; - CreateAndReopenWithCF({"dummy"}, options); - ASSERT_OK(Put(1, "dummy", "d1", wal_on)); // seq id 1 - ASSERT_OK(Put(1, "dummy", "d2", wal_off)); - ASSERT_OK(Put(1, "dummy", "d3", wal_off)); - ASSERT_OK(Put(0, "key", "v4", wal_on)); // seq id 4 - ASSERT_OK(Flush(0)); - ASSERT_OK(Put(0, "key", "v5", wal_on)); // seq id 5 - ASSERT_EQ("v5", Get(0, "key")); - ASSERT_OK(dbfull()->FlushWAL(false)); - // Simulate a crash. - fault_env->SetFilesystemActive(false); - Close(); - fault_env->ResetState(); - ReopenWithColumnFamilies({"default", "dummy"}, options); - // Prior to the fix, we may incorrectly recover "v5" with sequence id = 3. - ASSERT_EQ("v5", Get(0, "key")); - // Destroy DB before destruct fault_env. - Destroy(options); -} - -// -// Test WAL recovery for the various modes available -// -class RecoveryTestHelper { - public: - // Number of WAL files to generate - static constexpr int kWALFilesCount = 10; - // Starting number for the WAL file name like 00010.log - static constexpr int kWALFileOffset = 10; - // Keys to be written per WAL file - static constexpr int kKeysPerWALFile = 133; - // Size of the value - static constexpr int kValueSize = 96; - - // Create WAL files with values filled in - static void FillData(DBWALTestBase* test, const Options& options, - const size_t wal_count, size_t* count) { - // Calling internal functions requires sanitized options. - Options sanitized_options = SanitizeOptions(test->dbname_, options); - const ImmutableDBOptions db_options(sanitized_options); - - *count = 0; - - std::shared_ptr table_cache = NewLRUCache(50, 0); - FileOptions file_options; - WriteBufferManager write_buffer_manager(db_options.db_write_buffer_size); - - std::unique_ptr versions; - std::unique_ptr wal_manager; - WriteController write_controller; - - versions.reset(new VersionSet( - test->dbname_, &db_options, file_options, table_cache.get(), - &write_buffer_manager, &write_controller, - /*block_cache_tracer=*/nullptr, - /*io_tracer=*/nullptr, /*db_id*/ "", /*db_session_id*/ "")); - - wal_manager.reset( - new WalManager(db_options, file_options, /*io_tracer=*/nullptr)); - - std::unique_ptr current_log_writer; - - for (size_t j = kWALFileOffset; j < wal_count + kWALFileOffset; j++) { - uint64_t current_log_number = j; - std::string fname = LogFileName(test->dbname_, current_log_number); - std::unique_ptr file_writer; - ASSERT_OK(WritableFileWriter::Create(db_options.env->GetFileSystem(), - fname, file_options, &file_writer, - nullptr)); - log::Writer* log_writer = - new log::Writer(std::move(file_writer), current_log_number, - db_options.recycle_log_file_num > 0, false, - db_options.wal_compression); - ASSERT_OK(log_writer->AddCompressionTypeRecord()); - current_log_writer.reset(log_writer); - - WriteBatch batch; - for (int i = 0; i < kKeysPerWALFile; i++) { - std::string key = "key" + std::to_string((*count)++); - std::string value = test->DummyString(kValueSize); - ASSERT_NE(current_log_writer.get(), nullptr); - uint64_t seq = versions->LastSequence() + 1; - batch.Clear(); - ASSERT_OK(batch.Put(key, value)); - WriteBatchInternal::SetSequence(&batch, seq); - ASSERT_OK(current_log_writer->AddRecord( - WriteBatchInternal::Contents(&batch))); - versions->SetLastAllocatedSequence(seq); - versions->SetLastPublishedSequence(seq); - versions->SetLastSequence(seq); - } - } - } - - // Recreate and fill the store with some data - static size_t FillData(DBWALTestBase* test, Options* options) { - options->create_if_missing = true; - test->DestroyAndReopen(*options); - test->Close(); - - size_t count = 0; - FillData(test, *options, kWALFilesCount, &count); - return count; - } - - // Read back all the keys we wrote and return the number of keys found - static size_t GetData(DBWALTestBase* test) { - size_t count = 0; - for (size_t i = 0; i < kWALFilesCount * kKeysPerWALFile; i++) { - if (test->Get("key" + std::to_string(i)) != "NOT_FOUND") { - ++count; - } - } - return count; - } - - // Manuall corrupt the specified WAL - static void CorruptWAL(DBWALTestBase* test, const Options& options, - const double off, const double len, - const int wal_file_id, const bool trunc = false) { - Env* env = options.env; - std::string fname = LogFileName(test->dbname_, wal_file_id); - uint64_t size; - ASSERT_OK(env->GetFileSize(fname, &size)); - ASSERT_GT(size, 0); -#ifdef OS_WIN - // Windows disk cache behaves differently. When we truncate - // the original content is still in the cache due to the original - // handle is still open. Generally, in Windows, one prohibits - // shared access to files and it is not needed for WAL but we allow - // it to induce corruption at various tests. - test->Close(); -#endif - if (trunc) { - ASSERT_OK( - test::TruncateFile(env, fname, static_cast(size * off))); - } else { - ASSERT_OK(test::CorruptFile(env, fname, static_cast(size * off + 8), - static_cast(size * len), false)); - } - } -}; - -class DBWALTestWithParams : public DBWALTestBase, - public ::testing::WithParamInterface< - std::tuple> { - public: - DBWALTestWithParams() : DBWALTestBase("/db_wal_test_with_params") {} -}; - -INSTANTIATE_TEST_CASE_P( - Wal, DBWALTestWithParams, - ::testing::Combine(::testing::Bool(), ::testing::Range(0, 4, 1), - ::testing::Range(RecoveryTestHelper::kWALFileOffset, - RecoveryTestHelper::kWALFileOffset + - RecoveryTestHelper::kWALFilesCount, - 1), - ::testing::Values(CompressionType::kNoCompression, - CompressionType::kZSTD))); - -class DBWALTestWithParamsVaryingRecoveryMode - : public DBWALTestBase, - public ::testing::WithParamInterface< - std::tuple> { - public: - DBWALTestWithParamsVaryingRecoveryMode() - : DBWALTestBase("/db_wal_test_with_params_mode") {} -}; - -INSTANTIATE_TEST_CASE_P( - Wal, DBWALTestWithParamsVaryingRecoveryMode, - ::testing::Combine( - ::testing::Bool(), ::testing::Range(0, 4, 1), - ::testing::Range(RecoveryTestHelper::kWALFileOffset, - RecoveryTestHelper::kWALFileOffset + - RecoveryTestHelper::kWALFilesCount, - 1), - ::testing::Values(WALRecoveryMode::kTolerateCorruptedTailRecords, - WALRecoveryMode::kAbsoluteConsistency, - WALRecoveryMode::kPointInTimeRecovery, - WALRecoveryMode::kSkipAnyCorruptedRecords), - ::testing::Values(CompressionType::kNoCompression, - CompressionType::kZSTD))); - -// Test scope: -// - We expect to open the data store when there is incomplete trailing writes -// at the end of any of the logs -// - We do not expect to open the data store for corruption -TEST_P(DBWALTestWithParams, kTolerateCorruptedTailRecords) { - bool trunc = std::get<0>(GetParam()); // Corruption style - // Corruption offset position - int corrupt_offset = std::get<1>(GetParam()); - int wal_file_id = std::get<2>(GetParam()); // WAL file - - // Fill data for testing - Options options = CurrentOptions(); - const size_t row_count = RecoveryTestHelper::FillData(this, &options); - // test checksum failure or parsing - RecoveryTestHelper::CorruptWAL(this, options, corrupt_offset * .3, - /*len%=*/.1, wal_file_id, trunc); - - options.wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords; - if (trunc) { - options.create_if_missing = false; - ASSERT_OK(TryReopen(options)); - const size_t recovered_row_count = RecoveryTestHelper::GetData(this); - ASSERT_TRUE(corrupt_offset == 0 || recovered_row_count > 0); - ASSERT_LT(recovered_row_count, row_count); - } else { - ASSERT_NOK(TryReopen(options)); - } -} - -// Test scope: -// We don't expect the data store to be opened if there is any corruption -// (leading, middle or trailing -- incomplete writes or corruption) -TEST_P(DBWALTestWithParams, kAbsoluteConsistency) { - // Verify clean slate behavior - Options options = CurrentOptions(); - const size_t row_count = RecoveryTestHelper::FillData(this, &options); - options.create_if_missing = false; - ASSERT_OK(TryReopen(options)); - ASSERT_EQ(RecoveryTestHelper::GetData(this), row_count); - - bool trunc = std::get<0>(GetParam()); // Corruption style - // Corruption offset position - int corrupt_offset = std::get<1>(GetParam()); - int wal_file_id = std::get<2>(GetParam()); // WAL file - // WAL compression type - CompressionType compression_type = std::get<3>(GetParam()); - options.wal_compression = compression_type; - - if (trunc && corrupt_offset == 0) { - return; - } - - // fill with new date - RecoveryTestHelper::FillData(this, &options); - // corrupt the wal - RecoveryTestHelper::CorruptWAL(this, options, corrupt_offset * .33, - /*len%=*/.1, wal_file_id, trunc); - // verify - options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency; - options.create_if_missing = false; - ASSERT_NOK(TryReopen(options)); -} - -// Test scope: -// We don't expect the data store to be opened if there is any inconsistency -// between WAL and SST files -TEST_F(DBWALTest, kPointInTimeRecoveryCFConsistency) { - Options options = CurrentOptions(); - options.avoid_flush_during_recovery = true; - - // Create DB with multiple column families. - CreateAndReopenWithCF({"one", "two"}, options); - ASSERT_OK(Put(1, "key1", "val1")); - ASSERT_OK(Put(2, "key2", "val2")); - - // Record the offset at this point - Env* env = options.env; - uint64_t wal_file_id = dbfull()->TEST_LogfileNumber(); - std::string fname = LogFileName(dbname_, wal_file_id); - uint64_t offset_to_corrupt; - ASSERT_OK(env->GetFileSize(fname, &offset_to_corrupt)); - ASSERT_GT(offset_to_corrupt, 0); - - ASSERT_OK(Put(1, "key3", "val3")); - // Corrupt WAL at location of key3 - ASSERT_OK(test::CorruptFile(env, fname, static_cast(offset_to_corrupt), - 4, false)); - ASSERT_OK(Put(2, "key4", "val4")); - ASSERT_OK(Put(1, "key5", "val5")); - ASSERT_OK(Flush(2)); - - // PIT recovery & verify - options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; - ASSERT_NOK(TryReopenWithColumnFamilies({"default", "one", "two"}, options)); -} - -TEST_F(DBWALTest, RaceInstallFlushResultsWithWalObsoletion) { - Options options = CurrentOptions(); - options.env = env_; - options.track_and_verify_wals_in_manifest = true; - // The following make sure there are two bg flush threads. - options.max_background_jobs = 8; - - DestroyAndReopen(options); - - const std::string cf1_name("cf1"); - CreateAndReopenWithCF({cf1_name}, options); - assert(handles_.size() == 2); - - { - dbfull()->TEST_LockMutex(); - ASSERT_LE(2, dbfull()->GetBGJobLimits().max_flushes); - dbfull()->TEST_UnlockMutex(); - } - - ASSERT_OK(dbfull()->PauseBackgroundWork()); - - ASSERT_OK(db_->Put(WriteOptions(), handles_[1], "foo", "value")); - ASSERT_OK(db_->Put(WriteOptions(), "foo", "value")); - - ASSERT_OK(dbfull()->TEST_FlushMemTable( - /*wait=*/false, /*allow_write_stall=*/true, handles_[1])); - - ASSERT_OK(db_->Put(WriteOptions(), "foo", "value")); - - ASSERT_OK(dbfull()->TEST_FlushMemTable( - /*wait=*/false, /*allow_write_stall=*/true, handles_[0])); - - bool called = false; - std::atomic bg_flush_threads{0}; - std::atomic wal_synced{false}; - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCallFlush:start", [&](void* /*arg*/) { - int cur = bg_flush_threads.load(); - int desired = cur + 1; - if (cur > 0 || - !bg_flush_threads.compare_exchange_strong(cur, desired)) { - while (!wal_synced.load()) { - // Wait until the other bg flush thread finishes committing WAL sync - // operation to the MANIFEST. - } - } - }); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushMemTableToOutputFile:CommitWal:1", - [&](void* /*arg*/) { wal_synced.store(true); }); - // This callback will be called when the first bg flush thread reaches the - // point before entering the MANIFEST write queue after flushing the SST - // file. - // The purpose of the sync points here is to ensure both bg flush threads - // finish computing `min_wal_number_to_keep` before any of them updates the - // `log_number` for the column family that's being flushed. - SyncPoint::GetInstance()->SetCallBack( - "MemTableList::TryInstallMemtableFlushResults:AfterComputeMinWalToKeep", - [&](void* /*arg*/) { - dbfull()->mutex()->AssertHeld(); - if (!called) { - // We are the first bg flush thread in the MANIFEST write queue. - // We set up the dependency between sync points for two threads that - // will be executing the same code. - // For the interleaving of events, see - // https://github.com/facebook/rocksdb/pull/9715. - // bg flush thread1 will release the db mutex while in the MANIFEST - // write queue. In the meantime, bg flush thread2 locks db mutex and - // computes the min_wal_number_to_keep (before thread1 writes to - // MANIFEST thus before cf1->log_number is updated). Bg thread2 joins - // the MANIFEST write queue afterwards and bg flush thread1 proceeds - // with writing to MANIFEST. - called = true; - SyncPoint::GetInstance()->LoadDependency({ - {"VersionSet::LogAndApply:WriteManifestStart", - "DBWALTest::RaceInstallFlushResultsWithWalObsoletion:BgFlush2"}, - {"DBWALTest::RaceInstallFlushResultsWithWalObsoletion:BgFlush2", - "VersionSet::LogAndApply:WriteManifest"}, - }); - } else { - // The other bg flush thread has already been in the MANIFEST write - // queue, and we are after. - TEST_SYNC_POINT( - "DBWALTest::RaceInstallFlushResultsWithWalObsoletion:BgFlush2"); - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(dbfull()->ContinueBackgroundWork()); - - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[0])); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1])); - - ASSERT_TRUE(called); - - Close(); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - - DB* db1 = nullptr; - Status s = DB::OpenForReadOnly(options, dbname_, &db1); - ASSERT_OK(s); - assert(db1); - delete db1; -} - -TEST_F(DBWALTest, FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL) { - Options options = CurrentOptions(); - // Small size to force manifest creation - options.max_manifest_file_size = 1; - options.track_and_verify_wals_in_manifest = true; - DestroyAndReopen(options); - - // Accumulate memtable m1 and create the 1st wal (i.e, 4.log) - ASSERT_OK(Put(Key(1), "")); - ASSERT_OK(Put(Key(2), "")); - ASSERT_OK(Put(Key(3), "")); - - const std::string wal_file_path = db_->GetName() + "/000004.log"; - - // Coerce the following sequence of events: - // (1) Flush() marks 4.log to be obsoleted, 8.log to be the latest (i.e, - // active) log and release the lock - // (2) SyncWAL() proceeds with the lock. It - // creates a new manifest and syncs all the inactive wals before the latest - // (i.e, active log), which is 4.log. Note that SyncWAL() is not aware of the - // fact that 4.log has marked as to be obseleted. Such wal - // sync will then add a WAL addition record of 4.log to the new manifest - // without any special treatment. Prior to the fix, there is no WAL deletion - // record to offset it. (3) BackgroundFlush() will eventually purge 4.log. - - bool wal_synced = false; - SyncPoint::GetInstance()->SetCallBack( - "FindObsoleteFiles::PostMutexUnlock", [&](void*) { - ASSERT_OK(env_->FileExists(wal_file_path)); - uint64_t pre_sync_wal_manifest_no = - dbfull()->TEST_Current_Manifest_FileNo(); - ASSERT_OK(db_->SyncWAL()); - uint64_t post_sync_wal_manifest_no = - dbfull()->TEST_Current_Manifest_FileNo(); - bool new_manifest_created = - post_sync_wal_manifest_no == pre_sync_wal_manifest_no + 1; - ASSERT_TRUE(new_manifest_created); - wal_synced = true; - }); - - - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForBackgroundWork()); - - ASSERT_TRUE(wal_synced); - // BackgroundFlush() purged 4.log - // because the memtable associated with the WAL was flushed and new WAL was - // created (i.e, 8.log) - ASSERT_TRUE(env_->FileExists(wal_file_path).IsNotFound()); - - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - - // To verify the corruption of "Missing WAL with log number: 4" under - // `options.track_and_verify_wals_in_manifest = true` is fixed. - // - // Before the fix, `db_->SyncWAL()` will sync and record WAL addtion of the - // obseleted WAL 4.log in a new manifest without any special treament. - // This will result in missing-wal corruption in DB::Reopen(). - Status s = TryReopen(options); - EXPECT_OK(s); -} - -// Test scope: -// - We expect to open data store under all circumstances -// - We expect only data upto the point where the first error was encountered -TEST_P(DBWALTestWithParams, kPointInTimeRecovery) { - const int maxkeys = - RecoveryTestHelper::kWALFilesCount * RecoveryTestHelper::kKeysPerWALFile; - - bool trunc = std::get<0>(GetParam()); // Corruption style - // Corruption offset position - int corrupt_offset = std::get<1>(GetParam()); - int wal_file_id = std::get<2>(GetParam()); // WAL file - // WAL compression type - CompressionType compression_type = std::get<3>(GetParam()); - - // Fill data for testing - Options options = CurrentOptions(); - options.wal_compression = compression_type; - const size_t row_count = RecoveryTestHelper::FillData(this, &options); - - // Corrupt the wal - // The offset here was 0.3 which cuts off right at the end of a - // valid fragment after wal zstd compression checksum is enabled, - // so changed the value to 0.33. - RecoveryTestHelper::CorruptWAL(this, options, corrupt_offset * .33, - /*len%=*/.1, wal_file_id, trunc); - - // Verify - options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; - options.create_if_missing = false; - ASSERT_OK(TryReopen(options)); - - // Probe data for invariants - size_t recovered_row_count = RecoveryTestHelper::GetData(this); - ASSERT_LT(recovered_row_count, row_count); - - // Verify a prefix of keys were recovered. But not in the case of full WAL - // truncation, because we have no way to know there was a corruption when - // truncation happened on record boundaries (preventing recovery holes in - // that case requires using `track_and_verify_wals_in_manifest`). - if (!trunc || corrupt_offset != 0) { - bool expect_data = true; - for (size_t k = 0; k < maxkeys; ++k) { - bool found = Get("key" + std::to_string(k)) != "NOT_FOUND"; - if (expect_data && !found) { - expect_data = false; - } - ASSERT_EQ(found, expect_data); - } - } - - const size_t min = RecoveryTestHelper::kKeysPerWALFile * - (wal_file_id - RecoveryTestHelper::kWALFileOffset); - ASSERT_GE(recovered_row_count, min); - if (!trunc && corrupt_offset != 0) { - const size_t max = RecoveryTestHelper::kKeysPerWALFile * - (wal_file_id - RecoveryTestHelper::kWALFileOffset + 1); - ASSERT_LE(recovered_row_count, max); - } -} - -// Test scope: -// - We expect to open the data store under all scenarios -// - We expect to have recovered records past the corruption zone -TEST_P(DBWALTestWithParams, kSkipAnyCorruptedRecords) { - bool trunc = std::get<0>(GetParam()); // Corruption style - // Corruption offset position - int corrupt_offset = std::get<1>(GetParam()); - int wal_file_id = std::get<2>(GetParam()); // WAL file - // WAL compression type - CompressionType compression_type = std::get<3>(GetParam()); - - // Fill data for testing - Options options = CurrentOptions(); - options.wal_compression = compression_type; - const size_t row_count = RecoveryTestHelper::FillData(this, &options); - - // Corrupt the WAL - RecoveryTestHelper::CorruptWAL(this, options, corrupt_offset * .3, - /*len%=*/.1, wal_file_id, trunc); - - // Verify behavior - options.wal_recovery_mode = WALRecoveryMode::kSkipAnyCorruptedRecords; - options.create_if_missing = false; - ASSERT_OK(TryReopen(options)); - - // Probe data for invariants - size_t recovered_row_count = RecoveryTestHelper::GetData(this); - ASSERT_LT(recovered_row_count, row_count); - - if (!trunc) { - ASSERT_TRUE(corrupt_offset != 0 || recovered_row_count > 0); - } -} - -TEST_F(DBWALTest, AvoidFlushDuringRecovery) { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.avoid_flush_during_recovery = false; - - // Test with flush after recovery. - Reopen(options); - ASSERT_OK(Put("foo", "v1")); - ASSERT_OK(Put("bar", "v2")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo", "v3")); - ASSERT_OK(Put("bar", "v4")); - ASSERT_EQ(1, TotalTableFiles()); - // Reopen DB. Check if WAL logs flushed. - Reopen(options); - ASSERT_EQ("v3", Get("foo")); - ASSERT_EQ("v4", Get("bar")); - ASSERT_EQ(2, TotalTableFiles()); - - // Test without flush after recovery. - options.avoid_flush_during_recovery = true; - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "v5")); - ASSERT_OK(Put("bar", "v6")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo", "v7")); - ASSERT_OK(Put("bar", "v8")); - ASSERT_EQ(1, TotalTableFiles()); - // Reopen DB. WAL logs should not be flushed this time. - Reopen(options); - ASSERT_EQ("v7", Get("foo")); - ASSERT_EQ("v8", Get("bar")); - ASSERT_EQ(1, TotalTableFiles()); - - // Force flush with allow_2pc. - options.avoid_flush_during_recovery = true; - options.allow_2pc = true; - ASSERT_OK(Put("foo", "v9")); - ASSERT_OK(Put("bar", "v10")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo", "v11")); - ASSERT_OK(Put("bar", "v12")); - Reopen(options); - ASSERT_EQ("v11", Get("foo")); - ASSERT_EQ("v12", Get("bar")); - ASSERT_EQ(3, TotalTableFiles()); -} - -TEST_F(DBWALTest, WalCleanupAfterAvoidFlushDuringRecovery) { - // Verifies WAL files that were present during recovery, but not flushed due - // to avoid_flush_during_recovery, will be considered for deletion at a later - // stage. We check at least one such file is deleted during Flush(). - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.avoid_flush_during_recovery = true; - Reopen(options); - - ASSERT_OK(Put("foo", "v1")); - Reopen(options); - for (int i = 0; i < 2; ++i) { - if (i > 0) { - // Flush() triggers deletion of obsolete tracked files - ASSERT_OK(Flush()); - } - VectorLogPtr log_files; - ASSERT_OK(dbfull()->GetSortedWalFiles(log_files)); - if (i == 0) { - ASSERT_GT(log_files.size(), 0); - } else { - ASSERT_EQ(0, log_files.size()); - } - } -} - -TEST_F(DBWALTest, RecoverWithoutFlush) { - Options options = CurrentOptions(); - options.avoid_flush_during_recovery = true; - options.create_if_missing = false; - options.disable_auto_compactions = true; - options.write_buffer_size = 64 * 1024 * 1024; - - size_t count = RecoveryTestHelper::FillData(this, &options); - auto validateData = [this, count]() { - for (size_t i = 0; i < count; i++) { - ASSERT_NE(Get("key" + std::to_string(i)), "NOT_FOUND"); - } - }; - Reopen(options); - validateData(); - // Insert some data without flush - ASSERT_OK(Put("foo", "foo_v1")); - ASSERT_OK(Put("bar", "bar_v1")); - Reopen(options); - validateData(); - ASSERT_EQ(Get("foo"), "foo_v1"); - ASSERT_EQ(Get("bar"), "bar_v1"); - // Insert again and reopen - ASSERT_OK(Put("foo", "foo_v2")); - ASSERT_OK(Put("bar", "bar_v2")); - Reopen(options); - validateData(); - ASSERT_EQ(Get("foo"), "foo_v2"); - ASSERT_EQ(Get("bar"), "bar_v2"); - // manual flush and insert again - ASSERT_OK(Flush()); - ASSERT_EQ(Get("foo"), "foo_v2"); - ASSERT_EQ(Get("bar"), "bar_v2"); - ASSERT_OK(Put("foo", "foo_v3")); - ASSERT_OK(Put("bar", "bar_v3")); - Reopen(options); - validateData(); - ASSERT_EQ(Get("foo"), "foo_v3"); - ASSERT_EQ(Get("bar"), "bar_v3"); -} - -TEST_F(DBWALTest, RecoverWithoutFlushMultipleCF) { - const std::string kSmallValue = "v"; - const std::string kLargeValue = DummyString(1024); - Options options = CurrentOptions(); - options.avoid_flush_during_recovery = true; - options.create_if_missing = false; - options.disable_auto_compactions = true; - - auto countWalFiles = [this]() { - VectorLogPtr log_files; - if (!dbfull()->GetSortedWalFiles(log_files).ok()) { - return size_t{0}; - } - return log_files.size(); - }; - - // Create DB with multiple column families and multiple log files. - CreateAndReopenWithCF({"one", "two"}, options); - ASSERT_OK(Put(0, "key1", kSmallValue)); - ASSERT_OK(Put(1, "key2", kLargeValue)); - ASSERT_OK(Flush(1)); - ASSERT_EQ(1, countWalFiles()); - ASSERT_OK(Put(0, "key3", kSmallValue)); - ASSERT_OK(Put(2, "key4", kLargeValue)); - ASSERT_OK(Flush(2)); - ASSERT_EQ(2, countWalFiles()); - - // Reopen, insert and flush. - options.db_write_buffer_size = 64 * 1024 * 1024; - ReopenWithColumnFamilies({"default", "one", "two"}, options); - ASSERT_EQ(Get(0, "key1"), kSmallValue); - ASSERT_EQ(Get(1, "key2"), kLargeValue); - ASSERT_EQ(Get(0, "key3"), kSmallValue); - ASSERT_EQ(Get(2, "key4"), kLargeValue); - // Insert more data. - ASSERT_OK(Put(0, "key5", kLargeValue)); - ASSERT_OK(Put(1, "key6", kLargeValue)); - ASSERT_EQ(3, countWalFiles()); - ASSERT_OK(Flush(1)); - ASSERT_OK(Put(2, "key7", kLargeValue)); - ASSERT_OK(dbfull()->FlushWAL(false)); - ASSERT_EQ(4, countWalFiles()); - - // Reopen twice and validate. - for (int i = 0; i < 2; i++) { - ReopenWithColumnFamilies({"default", "one", "two"}, options); - ASSERT_EQ(Get(0, "key1"), kSmallValue); - ASSERT_EQ(Get(1, "key2"), kLargeValue); - ASSERT_EQ(Get(0, "key3"), kSmallValue); - ASSERT_EQ(Get(2, "key4"), kLargeValue); - ASSERT_EQ(Get(0, "key5"), kLargeValue); - ASSERT_EQ(Get(1, "key6"), kLargeValue); - ASSERT_EQ(Get(2, "key7"), kLargeValue); - ASSERT_EQ(4, countWalFiles()); - } -} - -// In this test we are trying to do the following: -// 1. Create a DB with corrupted WAL log; -// 2. Open with avoid_flush_during_recovery = true; -// 3. Append more data without flushing, which creates new WAL log. -// 4. Open again. See if it can correctly handle previous corruption. -TEST_P(DBWALTestWithParamsVaryingRecoveryMode, - RecoverFromCorruptedWALWithoutFlush) { - const int kAppendKeys = 100; - Options options = CurrentOptions(); - options.avoid_flush_during_recovery = true; - options.create_if_missing = false; - options.disable_auto_compactions = true; - options.write_buffer_size = 64 * 1024 * 1024; - - auto getAll = [this]() { - std::vector> data; - ReadOptions ropt; - Iterator* iter = dbfull()->NewIterator(ropt); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - data.push_back( - std::make_pair(iter->key().ToString(), iter->value().ToString())); - } - delete iter; - return data; - }; - - bool trunc = std::get<0>(GetParam()); // Corruption style - // Corruption offset position - int corrupt_offset = std::get<1>(GetParam()); - int wal_file_id = std::get<2>(GetParam()); // WAL file - WALRecoveryMode recovery_mode = std::get<3>(GetParam()); - // WAL compression type - CompressionType compression_type = std::get<4>(GetParam()); - - options.wal_recovery_mode = recovery_mode; - options.wal_compression = compression_type; - // Create corrupted WAL - RecoveryTestHelper::FillData(this, &options); - RecoveryTestHelper::CorruptWAL(this, options, corrupt_offset * .3, - /*len%=*/.1, wal_file_id, trunc); - // Skip the test if DB won't open. - if (!TryReopen(options).ok()) { - ASSERT_TRUE(options.wal_recovery_mode == - WALRecoveryMode::kAbsoluteConsistency || - (!trunc && options.wal_recovery_mode == - WALRecoveryMode::kTolerateCorruptedTailRecords)); - return; - } - ASSERT_OK(TryReopen(options)); - // Append some more data. - for (int k = 0; k < kAppendKeys; k++) { - std::string key = "extra_key" + std::to_string(k); - std::string value = DummyString(RecoveryTestHelper::kValueSize); - ASSERT_OK(Put(key, value)); - } - // Save data for comparison. - auto data = getAll(); - // Reopen. Verify data. - ASSERT_OK(TryReopen(options)); - auto actual_data = getAll(); - ASSERT_EQ(data, actual_data); -} - -// Tests that total log size is recovered if we set -// avoid_flush_during_recovery=true. -// Flush should trigger if max_total_wal_size is reached. -TEST_F(DBWALTest, RestoreTotalLogSizeAfterRecoverWithoutFlush) { - auto test_listener = std::make_shared(); - test_listener->expected_flush_reason = FlushReason::kWalFull; - - constexpr size_t kKB = 1024; - constexpr size_t kMB = 1024 * 1024; - Options options = CurrentOptions(); - options.avoid_flush_during_recovery = true; - options.max_total_wal_size = 1 * kMB; - options.listeners.push_back(test_listener); - // Have to open DB in multi-CF mode to trigger flush when - // max_total_wal_size is reached. - CreateAndReopenWithCF({"one"}, options); - // Write some keys and we will end up with one log file which is slightly - // smaller than 1MB. - std::string value_100k(100 * kKB, 'v'); - std::string value_300k(300 * kKB, 'v'); - ASSERT_OK(Put(0, "foo", "v1")); - for (int i = 0; i < 9; i++) { - ASSERT_OK(Put(1, "key" + std::to_string(i), value_100k)); - } - // Get log files before reopen. - VectorLogPtr log_files_before; - ASSERT_OK(dbfull()->GetSortedWalFiles(log_files_before)); - ASSERT_EQ(1, log_files_before.size()); - uint64_t log_size_before = log_files_before[0]->SizeFileBytes(); - ASSERT_GT(log_size_before, 900 * kKB); - ASSERT_LT(log_size_before, 1 * kMB); - ReopenWithColumnFamilies({"default", "one"}, options); - // Write one more value to make log larger than 1MB. - ASSERT_OK(Put(1, "bar", value_300k)); - // Get log files again. A new log file will be opened. - VectorLogPtr log_files_after_reopen; - ASSERT_OK(dbfull()->GetSortedWalFiles(log_files_after_reopen)); - ASSERT_EQ(2, log_files_after_reopen.size()); - ASSERT_EQ(log_files_before[0]->LogNumber(), - log_files_after_reopen[0]->LogNumber()); - ASSERT_GT(log_files_after_reopen[0]->SizeFileBytes() + - log_files_after_reopen[1]->SizeFileBytes(), - 1 * kMB); - // Write one more key to trigger flush. - ASSERT_OK(Put(0, "foo", "v2")); - for (auto* h : handles_) { - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(h)); - } - // Flushed two column families. - ASSERT_EQ(2, test_listener->count.load()); -} - -#if defined(ROCKSDB_PLATFORM_POSIX) -#if defined(ROCKSDB_FALLOCATE_PRESENT) -// Tests that we will truncate the preallocated space of the last log from -// previous. -TEST_F(DBWALTest, TruncateLastLogAfterRecoverWithoutFlush) { - constexpr size_t kKB = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.avoid_flush_during_recovery = true; - if (mem_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem environment"); - return; - } - if (!IsFallocateSupported()) { - return; - } - - DestroyAndReopen(options); - size_t preallocated_size = - dbfull()->TEST_GetWalPreallocateBlockSize(options.write_buffer_size); - ASSERT_OK(Put("foo", "v1")); - VectorLogPtr log_files_before; - ASSERT_OK(dbfull()->GetSortedWalFiles(log_files_before)); - ASSERT_EQ(1, log_files_before.size()); - auto& file_before = log_files_before[0]; - ASSERT_LT(file_before->SizeFileBytes(), 1 * kKB); - // The log file has preallocated space. - ASSERT_GE(GetAllocatedFileSize(dbname_ + file_before->PathName()), - preallocated_size); - Reopen(options); - VectorLogPtr log_files_after; - ASSERT_OK(dbfull()->GetSortedWalFiles(log_files_after)); - ASSERT_EQ(1, log_files_after.size()); - ASSERT_LT(log_files_after[0]->SizeFileBytes(), 1 * kKB); - // The preallocated space should be truncated. - ASSERT_LT(GetAllocatedFileSize(dbname_ + file_before->PathName()), - preallocated_size); -} -// Tests that we will truncate the preallocated space of the last log from -// previous. -TEST_F(DBWALTest, TruncateLastLogAfterRecoverWithFlush) { - constexpr size_t kKB = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.avoid_flush_during_recovery = false; - options.avoid_flush_during_shutdown = true; - if (mem_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem environment"); - return; - } - if (!IsFallocateSupported()) { - return; - } - - DestroyAndReopen(options); - size_t preallocated_size = - dbfull()->TEST_GetWalPreallocateBlockSize(options.write_buffer_size); - ASSERT_OK(Put("foo", "v1")); - VectorLogPtr log_files_before; - ASSERT_OK(dbfull()->GetSortedWalFiles(log_files_before)); - ASSERT_EQ(1, log_files_before.size()); - auto& file_before = log_files_before[0]; - ASSERT_LT(file_before->SizeFileBytes(), 1 * kKB); - ASSERT_GE(GetAllocatedFileSize(dbname_ + file_before->PathName()), - preallocated_size); - // The log file has preallocated space. - Close(); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::PurgeObsoleteFiles:Begin", - "DBWALTest::TruncateLastLogAfterRecoverWithFlush:AfterRecover"}, - {"DBWALTest::TruncateLastLogAfterRecoverWithFlush:AfterTruncate", - "DBImpl::DeleteObsoleteFileImpl::BeforeDeletion"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - port::Thread reopen_thread([&]() { Reopen(options); }); - - TEST_SYNC_POINT( - "DBWALTest::TruncateLastLogAfterRecoverWithFlush:AfterRecover"); - // After the flush during Open, the log file should get deleted. However, - // if the process is in a crash loop, the log file may not get - // deleted and thte preallocated space will keep accumulating. So we need - // to ensure it gets trtuncated. - EXPECT_LT(GetAllocatedFileSize(dbname_ + file_before->PathName()), - preallocated_size); - TEST_SYNC_POINT( - "DBWALTest::TruncateLastLogAfterRecoverWithFlush:AfterTruncate"); - reopen_thread.join(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DBWALTest, TruncateLastLogAfterRecoverWALEmpty) { - Options options = CurrentOptions(); - options.env = env_; - options.avoid_flush_during_recovery = false; - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem/non-encrypted environment"); - return; - } - if (!IsFallocateSupported()) { - return; - } - - DestroyAndReopen(options); - size_t preallocated_size = - dbfull()->TEST_GetWalPreallocateBlockSize(options.write_buffer_size); - Close(); - std::vector filenames; - std::string last_log; - uint64_t last_log_num = 0; - ASSERT_OK(env_->GetChildren(dbname_, &filenames)); - for (auto fname : filenames) { - uint64_t number; - FileType type; - if (ParseFileName(fname, &number, &type, nullptr)) { - if (type == kWalFile && number > last_log_num) { - last_log = fname; - } - } - } - ASSERT_NE(last_log, ""); - last_log = dbname_ + '/' + last_log; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::PurgeObsoleteFiles:Begin", - "DBWALTest::TruncateLastLogAfterRecoverWithFlush:AfterRecover"}, - {"DBWALTest::TruncateLastLogAfterRecoverWithFlush:AfterTruncate", - "DBImpl::DeleteObsoleteFileImpl::BeforeDeletion"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "PosixWritableFile::Close", - [](void* arg) { *(reinterpret_cast(arg)) = 0; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - // Preallocate space for the empty log file. This could happen if WAL data - // was buffered in memory and the process crashed. - std::unique_ptr log_file; - ASSERT_OK(env_->ReopenWritableFile(last_log, &log_file, EnvOptions())); - log_file->SetPreallocationBlockSize(preallocated_size); - log_file->PrepareWrite(0, 4096); - log_file.reset(); - - ASSERT_GE(GetAllocatedFileSize(last_log), preallocated_size); - - port::Thread reopen_thread([&]() { Reopen(options); }); - - TEST_SYNC_POINT( - "DBWALTest::TruncateLastLogAfterRecoverWithFlush:AfterRecover"); - // The preallocated space should be truncated. - EXPECT_LT(GetAllocatedFileSize(last_log), preallocated_size); - TEST_SYNC_POINT( - "DBWALTest::TruncateLastLogAfterRecoverWithFlush:AfterTruncate"); - reopen_thread.join(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_F(DBWALTest, ReadOnlyRecoveryNoTruncate) { - constexpr size_t kKB = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.avoid_flush_during_recovery = true; - if (mem_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem environment"); - return; - } - if (!IsFallocateSupported()) { - return; - } - - // create DB and close with file truncate disabled - std::atomic_bool enable_truncate{false}; - - SyncPoint::GetInstance()->SetCallBack( - "PosixWritableFile::Close", [&](void* arg) { - if (!enable_truncate) { - *(reinterpret_cast(arg)) = 0; - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - - DestroyAndReopen(options); - size_t preallocated_size = - dbfull()->TEST_GetWalPreallocateBlockSize(options.write_buffer_size); - ASSERT_OK(Put("foo", "v1")); - VectorLogPtr log_files_before; - ASSERT_OK(dbfull()->GetSortedWalFiles(log_files_before)); - ASSERT_EQ(1, log_files_before.size()); - auto& file_before = log_files_before[0]; - ASSERT_LT(file_before->SizeFileBytes(), 1 * kKB); - // The log file has preallocated space. - auto db_size = GetAllocatedFileSize(dbname_ + file_before->PathName()); - ASSERT_GE(db_size, preallocated_size); - Close(); - - // enable truncate and open DB as readonly, the file should not be truncated - // and DB size is not changed. - enable_truncate = true; - ASSERT_OK(ReadOnlyReopen(options)); - VectorLogPtr log_files_after; - ASSERT_OK(dbfull()->GetSortedWalFiles(log_files_after)); - ASSERT_EQ(1, log_files_after.size()); - ASSERT_LT(log_files_after[0]->SizeFileBytes(), 1 * kKB); - ASSERT_EQ(log_files_after[0]->PathName(), file_before->PathName()); - // The preallocated space should NOT be truncated. - // the DB size is almost the same. - ASSERT_NEAR(GetAllocatedFileSize(dbname_ + file_before->PathName()), db_size, - db_size / 100); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} -#endif // ROCKSDB_FALLOCATE_PRESENT -#endif // ROCKSDB_PLATFORM_POSIX - -TEST_F(DBWALTest, WalInManifestButNotInSortedWals) { - Options options = CurrentOptions(); - options.track_and_verify_wals_in_manifest = true; - options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency; - - // Build a way to make wal files selectively go missing - bool wals_go_missing = false; - struct MissingWalFs : public FileSystemWrapper { - MissingWalFs(const std::shared_ptr& t, - bool* _wals_go_missing_flag) - : FileSystemWrapper(t), wals_go_missing_flag(_wals_go_missing_flag) {} - bool* wals_go_missing_flag; - IOStatus GetChildren(const std::string& dir, const IOOptions& io_opts, - std::vector* r, - IODebugContext* dbg) override { - IOStatus s = target_->GetChildren(dir, io_opts, r, dbg); - if (s.ok() && *wals_go_missing_flag) { - for (size_t i = 0; i < r->size();) { - if (EndsWith(r->at(i), ".log")) { - r->erase(r->begin() + i); - } else { - ++i; - } - } - } - return s; - } - const char* Name() const override { return "MissingWalFs"; } - }; - auto my_fs = - std::make_shared(env_->GetFileSystem(), &wals_go_missing); - std::unique_ptr my_env(NewCompositeEnv(my_fs)); - options.env = my_env.get(); - - CreateAndReopenWithCF({"blah"}, options); - - // Currently necessary to get a WAL tracked in manifest; see - // https://github.com/facebook/rocksdb/issues/10080 - ASSERT_OK(Put(0, "x", "y")); - ASSERT_OK(db_->SyncWAL()); - ASSERT_OK(Put(1, "x", "y")); - ASSERT_OK(db_->SyncWAL()); - ASSERT_OK(Flush(1)); - - ASSERT_FALSE(dbfull()->GetVersionSet()->GetWalSet().GetWals().empty()); - std::vector> wals; - ASSERT_OK(db_->GetSortedWalFiles(wals)); - wals_go_missing = true; - ASSERT_NOK(db_->GetSortedWalFiles(wals)); - wals_go_missing = false; - Close(); -} - - -TEST_F(DBWALTest, WalTermTest) { - Options options = CurrentOptions(); - options.env = env_; - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(1, "foo", "bar")); - - WriteOptions wo; - wo.sync = true; - wo.disableWAL = false; - - WriteBatch batch; - ASSERT_OK(batch.Put("foo", "bar")); - batch.MarkWalTerminationPoint(); - ASSERT_OK(batch.Put("foo2", "bar2")); - - ASSERT_OK(dbfull()->Write(wo, &batch)); - - // make sure we can re-open it. - ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); - ASSERT_EQ("bar", Get(1, "foo")); - ASSERT_EQ("NOT_FOUND", Get(1, "foo2")); -} - -TEST_F(DBWALTest, GetCompressedWalsAfterSync) { - if (db_->GetOptions().wal_compression == kNoCompression) { - ROCKSDB_GTEST_BYPASS("stream compression not present"); - return; - } - Options options = GetDefaultOptions(); - options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; - options.create_if_missing = true; - options.env = env_; - options.avoid_flush_during_recovery = true; - options.track_and_verify_wals_in_manifest = true; - // Enable WAL compression so that the newly-created WAL will be non-empty - // after DB open, even if point-in-time WAL recovery encounters no - // corruption. - options.wal_compression = kZSTD; - DestroyAndReopen(options); - - // Write something to memtable and WAL so that log_empty_ will be false after - // next DB::Open(). - ASSERT_OK(Put("a", "v")); - - Reopen(options); - - // New WAL is created, thanks to !log_empty_. - ASSERT_OK(dbfull()->TEST_SwitchWAL()); - - ASSERT_OK(Put("b", "v")); - - ASSERT_OK(db_->SyncWAL()); - - VectorLogPtr wals; - Status s = dbfull()->GetSortedWalFiles(wals); - ASSERT_OK(s); -} -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_with_timestamp_basic_test.cc b/db/db_with_timestamp_basic_test.cc deleted file mode 100644 index 4b8132df3..000000000 --- a/db/db_with_timestamp_basic_test.cc +++ /dev/null @@ -1,3928 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/db_with_timestamp_test_util.h" -#include "port/stack_trace.h" -#include "rocksdb/perf_context.h" -#include "rocksdb/utilities/debug.h" -#include "table/block_based/block_based_table_reader.h" -#include "table/block_based/block_builder.h" -#include "test_util/sync_point.h" -#include "test_util/testutil.h" -#include "utilities/fault_injection_env.h" -#include "utilities/merge_operators/string_append/stringappend2.h" - -namespace ROCKSDB_NAMESPACE { -class DBBasicTestWithTimestamp : public DBBasicTestWithTimestampBase { - public: - DBBasicTestWithTimestamp() - : DBBasicTestWithTimestampBase("db_basic_test_with_timestamp") {} -}; - -TEST_F(DBBasicTestWithTimestamp, SanityChecks) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.avoid_flush_during_shutdown = true; - options.merge_operator = MergeOperators::CreateStringAppendTESTOperator(); - DestroyAndReopen(options); - - Options options1 = CurrentOptions(); - options1.env = env_; - options1.comparator = test::BytewiseComparatorWithU64TsWrapper(); - options1.merge_operator = MergeOperators::CreateStringAppendTESTOperator(); - assert(options1.comparator && - options1.comparator->timestamp_size() == sizeof(uint64_t)); - ColumnFamilyHandle* handle = nullptr; - Status s = db_->CreateColumnFamily(options1, "data", &handle); - ASSERT_OK(s); - - std::string dummy_ts(sizeof(uint64_t), '\0'); - // Perform timestamp operations on default cf. - ASSERT_TRUE( - db_->Put(WriteOptions(), "key", dummy_ts, "value").IsInvalidArgument()); - ASSERT_TRUE(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), "key", - dummy_ts, "value") - .IsInvalidArgument()); - ASSERT_TRUE(db_->Delete(WriteOptions(), "key", dummy_ts).IsInvalidArgument()); - ASSERT_TRUE( - db_->SingleDelete(WriteOptions(), "key", dummy_ts).IsInvalidArgument()); - ASSERT_TRUE(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - "begin_key", "end_key", dummy_ts) - .IsInvalidArgument()); - - // Perform non-timestamp operations on "data" cf. - ASSERT_TRUE( - db_->Put(WriteOptions(), handle, "key", "value").IsInvalidArgument()); - ASSERT_TRUE(db_->Delete(WriteOptions(), handle, "key").IsInvalidArgument()); - ASSERT_TRUE( - db_->SingleDelete(WriteOptions(), handle, "key").IsInvalidArgument()); - - ASSERT_TRUE( - db_->Merge(WriteOptions(), handle, "key", "value").IsInvalidArgument()); - ASSERT_TRUE(db_->DeleteRange(WriteOptions(), handle, "begin_key", "end_key") - .IsInvalidArgument()); - - { - WriteBatch wb; - ASSERT_OK(wb.Put(handle, "key", "value")); - ASSERT_TRUE(db_->Write(WriteOptions(), &wb).IsInvalidArgument()); - } - { - WriteBatch wb; - ASSERT_OK(wb.Delete(handle, "key")); - ASSERT_TRUE(db_->Write(WriteOptions(), &wb).IsInvalidArgument()); - } - { - WriteBatch wb; - ASSERT_OK(wb.SingleDelete(handle, "key")); - ASSERT_TRUE(db_->Write(WriteOptions(), &wb).IsInvalidArgument()); - } - { - WriteBatch wb; - ASSERT_OK(wb.DeleteRange(handle, "begin_key", "end_key")); - ASSERT_TRUE(db_->Write(WriteOptions(), &wb).IsInvalidArgument()); - } - - // Perform timestamp operations with timestamps of incorrect size. - const std::string wrong_ts(sizeof(uint32_t), '\0'); - ASSERT_TRUE(db_->Put(WriteOptions(), handle, "key", wrong_ts, "value") - .IsInvalidArgument()); - ASSERT_TRUE(db_->Merge(WriteOptions(), handle, "key", wrong_ts, "value") - .IsInvalidArgument()); - ASSERT_TRUE( - db_->Delete(WriteOptions(), handle, "key", wrong_ts).IsInvalidArgument()); - ASSERT_TRUE(db_->SingleDelete(WriteOptions(), handle, "key", wrong_ts) - .IsInvalidArgument()); - ASSERT_TRUE( - db_->DeleteRange(WriteOptions(), handle, "begin_key", "end_key", wrong_ts) - .IsInvalidArgument()); - - delete handle; -} - -TEST_F(DBBasicTestWithTimestamp, MixedCfs) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.avoid_flush_during_shutdown = true; - DestroyAndReopen(options); - - Options options1 = CurrentOptions(); - options1.env = env_; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options1.comparator = &test_cmp; - ColumnFamilyHandle* handle = nullptr; - Status s = db_->CreateColumnFamily(options1, "data", &handle); - ASSERT_OK(s); - - WriteBatch wb; - ASSERT_OK(wb.Put("a", "value")); - ASSERT_OK(wb.Put(handle, "a", "value")); - { - std::string ts = Timestamp(1, 0); - const auto ts_sz_func = [kTimestampSize, handle](uint32_t cf_id) { - assert(handle); - if (cf_id == 0) { - return static_cast(0); - } else if (cf_id == handle->GetID()) { - return kTimestampSize; - } else { - assert(false); - return std::numeric_limits::max(); - } - }; - ASSERT_OK(wb.UpdateTimestamps(ts, ts_sz_func)); - ASSERT_OK(db_->Write(WriteOptions(), &wb)); - } - - const auto verify_db = [this](ColumnFamilyHandle* h, const std::string& key, - const std::string& ts, - const std::string& expected_value) { - ASSERT_EQ(expected_value, Get(key)); - Slice read_ts_slice(ts); - ReadOptions read_opts; - read_opts.timestamp = &read_ts_slice; - std::string value; - ASSERT_OK(db_->Get(read_opts, h, key, &value)); - ASSERT_EQ(expected_value, value); - }; - - verify_db(handle, "a", Timestamp(1, 0), "value"); - - delete handle; - Close(); - - std::vector cf_descs; - cf_descs.emplace_back(kDefaultColumnFamilyName, options); - cf_descs.emplace_back("data", options1); - options.create_if_missing = false; - s = DB::Open(options, dbname_, cf_descs, &handles_, &db_); - ASSERT_OK(s); - - verify_db(handles_[1], "a", Timestamp(1, 0), "value"); - - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, CompactRangeWithSpecifiedRange) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - WriteOptions write_opts; - std::string ts = Timestamp(1, 0); - - ASSERT_OK(db_->Put(write_opts, "foo1", ts, "bar")); - ASSERT_OK(Flush()); - - ASSERT_OK(db_->Put(write_opts, "foo2", ts, "bar")); - ASSERT_OK(Flush()); - - std::string start_str = "foo"; - std::string end_str = "foo2"; - Slice start(start_str), end(end_str); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &start, &end)); - - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, GcPreserveLatestVersionBelowFullHistoryLow) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - std::string ts_str = Timestamp(1, 0); - WriteOptions wopts; - ASSERT_OK(db_->Put(wopts, "k1", ts_str, "v1")); - ASSERT_OK(db_->Put(wopts, "k2", ts_str, "v2")); - ASSERT_OK(db_->Put(wopts, "k3", ts_str, "v3")); - - ts_str = Timestamp(2, 0); - ASSERT_OK(db_->Delete(wopts, "k3", ts_str)); - - ts_str = Timestamp(4, 0); - ASSERT_OK(db_->Put(wopts, "k1", ts_str, "v5")); - - ts_str = Timestamp(5, 0); - ASSERT_OK( - db_->DeleteRange(wopts, db_->DefaultColumnFamily(), "k0", "k9", ts_str)); - - ts_str = Timestamp(3, 0); - Slice ts = ts_str; - CompactRangeOptions cro; - cro.full_history_ts_low = &ts; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - - ASSERT_OK(Flush()); - - ReadOptions ropts; - ropts.timestamp = &ts; - std::string value; - Status s = db_->Get(ropts, "k1", &value); - ASSERT_OK(s); - ASSERT_EQ("v1", value); - - std::string key_ts; - ASSERT_TRUE(db_->Get(ropts, "k3", &value, &key_ts).IsNotFound()); - ASSERT_EQ(Timestamp(2, 0), key_ts); - - ts_str = Timestamp(5, 0); - ts = ts_str; - ropts.timestamp = &ts; - ASSERT_TRUE(db_->Get(ropts, "k2", &value, &key_ts).IsNotFound()); - ASSERT_EQ(Timestamp(5, 0), key_ts); - ASSERT_TRUE(db_->Get(ropts, "k2", &value).IsNotFound()); - - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, UpdateFullHistoryTsLow) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - const std::string kKey = "test kKey"; - - // Test set ts_low first and flush() - int current_ts_low = 5; - std::string ts_low_str = Timestamp(current_ts_low, 0); - Slice ts_low = ts_low_str; - CompactRangeOptions comp_opts; - comp_opts.full_history_ts_low = &ts_low; - comp_opts.bottommost_level_compaction = BottommostLevelCompaction::kForce; - - ASSERT_OK(db_->CompactRange(comp_opts, nullptr, nullptr)); - - auto* cfd = - static_cast_with_check(db_->DefaultColumnFamily()) - ->cfd(); - auto result_ts_low = cfd->GetFullHistoryTsLow(); - - ASSERT_TRUE(test_cmp.CompareTimestamp(ts_low, result_ts_low) == 0); - - for (int i = 0; i < 10; i++) { - WriteOptions write_opts; - std::string ts = Timestamp(i, 0); - ASSERT_OK(db_->Put(write_opts, kKey, ts, Key(i))); - } - ASSERT_OK(Flush()); - - for (int i = 0; i < 10; i++) { - ReadOptions read_opts; - std::string ts_str = Timestamp(i, 0); - Slice ts = ts_str; - read_opts.timestamp = &ts; - std::string value; - Status status = db_->Get(read_opts, kKey, &value); - if (i < current_ts_low) { - ASSERT_TRUE(status.IsInvalidArgument()); - } else { - ASSERT_OK(status); - ASSERT_TRUE(value.compare(Key(i)) == 0); - } - } - - // Test set ts_low and then trigger compaction - for (int i = 10; i < 20; i++) { - WriteOptions write_opts; - std::string ts = Timestamp(i, 0); - ASSERT_OK(db_->Put(write_opts, kKey, ts, Key(i))); - } - - ASSERT_OK(Flush()); - - current_ts_low = 15; - ts_low_str = Timestamp(current_ts_low, 0); - ts_low = ts_low_str; - comp_opts.full_history_ts_low = &ts_low; - ASSERT_OK(db_->CompactRange(comp_opts, nullptr, nullptr)); - result_ts_low = cfd->GetFullHistoryTsLow(); - ASSERT_TRUE(test_cmp.CompareTimestamp(ts_low, result_ts_low) == 0); - - for (int i = current_ts_low; i < 20; i++) { - ReadOptions read_opts; - std::string ts_str = Timestamp(i, 0); - Slice ts = ts_str; - read_opts.timestamp = &ts; - std::string value; - Status status = db_->Get(read_opts, kKey, &value); - ASSERT_OK(status); - ASSERT_TRUE(value.compare(Key(i)) == 0); - } - - // Test invalid compaction with range - Slice start(kKey), end(kKey); - Status s = db_->CompactRange(comp_opts, &start, &end); - ASSERT_TRUE(s.IsInvalidArgument()); - s = db_->CompactRange(comp_opts, &start, nullptr); - ASSERT_TRUE(s.IsInvalidArgument()); - s = db_->CompactRange(comp_opts, nullptr, &end); - ASSERT_TRUE(s.IsInvalidArgument()); - - // Test invalid compaction with the decreasing ts_low - ts_low_str = Timestamp(current_ts_low - 1, 0); - ts_low = ts_low_str; - comp_opts.full_history_ts_low = &ts_low; - s = db_->CompactRange(comp_opts, nullptr, nullptr); - ASSERT_TRUE(s.IsInvalidArgument()); - - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, UpdateFullHistoryTsLowWithPublicAPI) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - std::string ts_low_str = Timestamp(9, 0); - ASSERT_OK( - db_->IncreaseFullHistoryTsLow(db_->DefaultColumnFamily(), ts_low_str)); - std::string result_ts_low; - ASSERT_OK(db_->GetFullHistoryTsLow(nullptr, &result_ts_low)); - ASSERT_TRUE(test_cmp.CompareTimestamp(ts_low_str, result_ts_low) == 0); - // test increase full_history_low backward - std::string ts_low_str_back = Timestamp(8, 0); - auto s = db_->IncreaseFullHistoryTsLow(db_->DefaultColumnFamily(), - ts_low_str_back); - ASSERT_EQ(s, Status::InvalidArgument()); - // test IncreaseFullHistoryTsLow with a timestamp whose length is longger - // than the cf's timestamp size - std::string ts_low_str_long(Timestamp(0, 0).size() + 1, 'a'); - s = db_->IncreaseFullHistoryTsLow(db_->DefaultColumnFamily(), - ts_low_str_long); - ASSERT_EQ(s, Status::InvalidArgument()); - // test IncreaseFullHistoryTsLow with a timestamp which is null - std::string ts_low_str_null = ""; - s = db_->IncreaseFullHistoryTsLow(db_->DefaultColumnFamily(), - ts_low_str_null); - ASSERT_EQ(s, Status::InvalidArgument()); - // test IncreaseFullHistoryTsLow for a column family that does not enable - // timestamp - options.comparator = BytewiseComparator(); - DestroyAndReopen(options); - ts_low_str = Timestamp(10, 0); - s = db_->IncreaseFullHistoryTsLow(db_->DefaultColumnFamily(), ts_low_str); - ASSERT_EQ(s, Status::InvalidArgument()); - // test GetFullHistoryTsLow for a column family that does not enable - // timestamp - std::string current_ts_low; - s = db_->GetFullHistoryTsLow(db_->DefaultColumnFamily(), ¤t_ts_low); - ASSERT_EQ(s, Status::InvalidArgument()); - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, GetApproximateSizes) { - Options options = CurrentOptions(); - options.write_buffer_size = 100000000; // Large write buffer - options.compression = kNoCompression; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - auto default_cf = db_->DefaultColumnFamily(); - - WriteOptions write_opts; - std::string ts = Timestamp(1, 0); - - const int N = 128; - Random rnd(301); - for (int i = 0; i < N; i++) { - ASSERT_OK(db_->Put(write_opts, Key(i), ts, rnd.RandomString(1024))); - } - - uint64_t size; - std::string start = Key(50); - std::string end = Key(60); - Range r(start, end); - SizeApproximationOptions size_approx_options; - size_approx_options.include_memtables = true; - size_approx_options.include_files = true; - ASSERT_OK( - db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size)); - ASSERT_GT(size, 6000); - ASSERT_LT(size, 204800); - - // test multiple ranges - std::vector ranges; - std::string start_tmp = Key(10); - std::string end_tmp = Key(20); - ranges.emplace_back(Range(start_tmp, end_tmp)); - ranges.emplace_back(Range(start, end)); - uint64_t range_sizes[2]; - ASSERT_OK(db_->GetApproximateSizes(size_approx_options, default_cf, - ranges.data(), 2, range_sizes)); - - ASSERT_EQ(range_sizes[1], size); - - // Zero if not including mem table - ASSERT_OK(db_->GetApproximateSizes(&r, 1, &size)); - ASSERT_EQ(size, 0); - - start = Key(500); - end = Key(600); - r = Range(start, end); - ASSERT_OK( - db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size)); - ASSERT_EQ(size, 0); - - // Test range boundaries - ASSERT_OK(db_->Put(write_opts, Key(1000), ts, rnd.RandomString(1024))); - // Should include start key - start = Key(1000); - end = Key(1100); - r = Range(start, end); - ASSERT_OK( - db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size)); - ASSERT_GT(size, 0); - - // Should exclude end key - start = Key(900); - end = Key(1000); - r = Range(start, end); - ASSERT_OK( - db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size)); - ASSERT_EQ(size, 0); - - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, SimpleIterate) { - const int kNumKeysPerFile = 128; - const uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::vector start_keys = {1, 0}; - const std::vector write_timestamps = {Timestamp(1, 0), - Timestamp(3, 0)}; - const std::vector read_timestamps = {Timestamp(2, 0), - Timestamp(4, 0)}; - for (size_t i = 0; i < write_timestamps.size(); ++i) { - WriteOptions write_opts; - for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamps[i], - "value" + std::to_string(i)); - ASSERT_OK(s); - } - } - for (size_t i = 0; i < read_timestamps.size(); ++i) { - ReadOptions read_opts; - Slice read_ts = read_timestamps[i]; - read_opts.timestamp = &read_ts; - std::unique_ptr it(db_->NewIterator(read_opts)); - int count = 0; - uint64_t key = 0; - // Forward iterate. - for (it->Seek(Key1(0)), key = start_keys[i]; it->Valid(); - it->Next(), ++count, ++key) { - CheckIterUserEntry(it.get(), Key1(key), kTypeValue, - "value" + std::to_string(i), write_timestamps[i]); - } - size_t expected_count = kMaxKey - start_keys[i] + 1; - ASSERT_EQ(expected_count, count); - - // Backward iterate. - count = 0; - for (it->SeekForPrev(Key1(kMaxKey)), key = kMaxKey; it->Valid(); - it->Prev(), ++count, --key) { - CheckIterUserEntry(it.get(), Key1(key), kTypeValue, - "value" + std::to_string(i), write_timestamps[i]); - } - ASSERT_EQ(static_cast(kMaxKey) - start_keys[i] + 1, count); - - // SeekToFirst()/SeekToLast() with lower/upper bounds. - // Then iter with lower and upper bounds. - uint64_t l = 0; - uint64_t r = kMaxKey + 1; - while (l < r) { - std::string lb_str = Key1(l); - Slice lb = lb_str; - std::string ub_str = Key1(r); - Slice ub = ub_str; - read_opts.iterate_lower_bound = &lb; - read_opts.iterate_upper_bound = &ub; - it.reset(db_->NewIterator(read_opts)); - for (it->SeekToFirst(), key = std::max(l, start_keys[i]), count = 0; - it->Valid(); it->Next(), ++key, ++count) { - CheckIterUserEntry(it.get(), Key1(key), kTypeValue, - "value" + std::to_string(i), write_timestamps[i]); - } - ASSERT_EQ(r - std::max(l, start_keys[i]), count); - - for (it->SeekToLast(), key = std::min(r, kMaxKey + 1), count = 0; - it->Valid(); it->Prev(), --key, ++count) { - CheckIterUserEntry(it.get(), Key1(key - 1), kTypeValue, - "value" + std::to_string(i), write_timestamps[i]); - } - l += (kMaxKey / 100); - r -= (kMaxKey / 100); - } - } - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, TrimHistoryTest) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - auto check_value_by_ts = [](DB* db, Slice key, std::string readTs, - Status status, std::string checkValue, - std::string expected_ts) { - ReadOptions ropts; - Slice ts = readTs; - ropts.timestamp = &ts; - std::string value; - std::string key_ts; - Status s = db->Get(ropts, key, &value, &key_ts); - ASSERT_TRUE(s == status); - if (s.ok()) { - ASSERT_EQ(checkValue, value); - } - if (s.ok() || s.IsNotFound()) { - ASSERT_EQ(expected_ts, key_ts); - } - }; - // Construct data of different versions with different ts - ASSERT_OK(db_->Put(WriteOptions(), "k1", Timestamp(2, 0), "v1")); - ASSERT_OK(db_->Put(WriteOptions(), "k1", Timestamp(4, 0), "v2")); - ASSERT_OK(db_->Delete(WriteOptions(), "k1", Timestamp(5, 0))); - ASSERT_OK(db_->Put(WriteOptions(), "k1", Timestamp(6, 0), "v3")); - check_value_by_ts(db_, "k1", Timestamp(7, 0), Status::OK(), "v3", - Timestamp(6, 0)); - ASSERT_OK(Flush()); - Close(); - - ColumnFamilyOptions cf_options(options); - std::vector column_families; - column_families.push_back( - ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options)); - DBOptions db_options(options); - - // Trim data whose version > Timestamp(5, 0), read(k1, ts(7)) <- NOT_FOUND. - ASSERT_OK(DB::OpenAndTrimHistory(db_options, dbname_, column_families, - &handles_, &db_, Timestamp(5, 0))); - check_value_by_ts(db_, "k1", Timestamp(7, 0), Status::NotFound(), "", - Timestamp(5, 0)); - Close(); - - // Trim data whose timestamp > Timestamp(4, 0), read(k1, ts(7)) <- v2 - ASSERT_OK(DB::OpenAndTrimHistory(db_options, dbname_, column_families, - &handles_, &db_, Timestamp(4, 0))); - check_value_by_ts(db_, "k1", Timestamp(7, 0), Status::OK(), "v2", - Timestamp(4, 0)); - Close(); - - Reopen(options); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "k1", - "k3", Timestamp(7, 0))); - check_value_by_ts(db_, "k1", Timestamp(8, 0), Status::NotFound(), "", - Timestamp(7, 0)); - Close(); - // Trim data whose timestamp > Timestamp(6, 0), read(k1, ts(8)) <- v2 - ASSERT_OK(DB::OpenAndTrimHistory(db_options, dbname_, column_families, - &handles_, &db_, Timestamp(6, 0))); - check_value_by_ts(db_, "k1", Timestamp(8, 0), Status::OK(), "v2", - Timestamp(4, 0)); - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, OpenAndTrimHistoryInvalidOptionTest) { - Destroy(last_options_); - - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - - ColumnFamilyOptions cf_options(options); - std::vector column_families; - column_families.push_back( - ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options)); - DBOptions db_options(options); - - // OpenAndTrimHistory should not work with avoid_flush_during_recovery - db_options.avoid_flush_during_recovery = true; - ASSERT_TRUE(DB::OpenAndTrimHistory(db_options, dbname_, column_families, - &handles_, &db_, Timestamp(0, 0)) - .IsInvalidArgument()); -} - -TEST_F(DBBasicTestWithTimestamp, GetTimestampTableProperties) { - Options options = CurrentOptions(); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - // Create 2 tables - for (int table = 0; table < 2; ++table) { - for (int i = 0; i < 10; i++) { - std::string ts = Timestamp(i, 0); - ASSERT_OK(db_->Put(WriteOptions(), "key", ts, Key(i))); - } - ASSERT_OK(Flush()); - } - - TablePropertiesCollection props; - ASSERT_OK(db_->GetPropertiesOfAllTables(&props)); - ASSERT_EQ(2U, props.size()); - for (const auto& item : props) { - auto& user_collected = item.second->user_collected_properties; - ASSERT_TRUE(user_collected.find("rocksdb.timestamp_min") != - user_collected.end()); - ASSERT_TRUE(user_collected.find("rocksdb.timestamp_max") != - user_collected.end()); - ASSERT_EQ(user_collected.at("rocksdb.timestamp_min"), Timestamp(0, 0)); - ASSERT_EQ(user_collected.at("rocksdb.timestamp_max"), Timestamp(9, 0)); - } - Close(); -} - -class DBBasicTestWithTimestampTableOptions - : public DBBasicTestWithTimestampBase, - public testing::WithParamInterface { - public: - explicit DBBasicTestWithTimestampTableOptions() - : DBBasicTestWithTimestampBase( - "db_basic_test_with_timestamp_table_options") {} -}; - -INSTANTIATE_TEST_CASE_P( - Timestamp, DBBasicTestWithTimestampTableOptions, - testing::Values( - BlockBasedTableOptions::IndexType::kBinarySearch, - BlockBasedTableOptions::IndexType::kHashSearch, - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch, - BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey)); - -TEST_P(DBBasicTestWithTimestampTableOptions, GetAndMultiGet) { - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - options.compression = kNoCompression; - BlockBasedTableOptions bbto; - bbto.index_type = GetParam(); - bbto.block_size = 100; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator cmp(kTimestampSize); - options.comparator = &cmp; - DestroyAndReopen(options); - constexpr uint64_t kNumKeys = 1024; - for (uint64_t k = 0; k < kNumKeys; ++k) { - WriteOptions write_opts; - ASSERT_OK(db_->Put(write_opts, Key1(k), Timestamp(1, 0), - "value" + std::to_string(k))); - } - ASSERT_OK(Flush()); - { - ReadOptions read_opts; - read_opts.total_order_seek = true; - std::string ts_str = Timestamp(2, 0); - Slice ts = ts_str; - read_opts.timestamp = &ts; - std::unique_ptr it(db_->NewIterator(read_opts)); - // verify Get() - for (it->SeekToFirst(); it->Valid(); it->Next()) { - std::string value_from_get; - std::string key_str(it->key().data(), it->key().size()); - std::string timestamp; - ASSERT_OK(db_->Get(read_opts, key_str, &value_from_get, ×tamp)); - ASSERT_EQ(it->value(), value_from_get); - ASSERT_EQ(Timestamp(1, 0), timestamp); - } - - // verify MultiGet() - constexpr uint64_t step = 2; - static_assert(0 == (kNumKeys % step), - "kNumKeys must be a multiple of step"); - for (uint64_t k = 0; k < kNumKeys; k += 2) { - std::vector key_strs; - std::vector keys; - for (size_t i = 0; i < step; ++i) { - key_strs.push_back(Key1(k + i)); - } - for (size_t i = 0; i < step; ++i) { - keys.emplace_back(key_strs[i]); - } - std::vector values; - std::vector timestamps; - std::vector statuses = - db_->MultiGet(read_opts, keys, &values, ×tamps); - ASSERT_EQ(step, statuses.size()); - ASSERT_EQ(step, values.size()); - ASSERT_EQ(step, timestamps.size()); - for (uint64_t i = 0; i < step; ++i) { - ASSERT_OK(statuses[i]); - ASSERT_EQ("value" + std::to_string(k + i), values[i]); - ASSERT_EQ(Timestamp(1, 0), timestamps[i]); - } - } - } - Close(); -} - -TEST_P(DBBasicTestWithTimestampTableOptions, SeekWithPrefixLessThanKey) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - options.memtable_whole_key_filtering = true; - options.memtable_prefix_bloom_size_ratio = 0.1; - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.cache_index_and_filter_blocks = true; - bbto.whole_key_filtering = true; - bbto.index_type = GetParam(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - WriteOptions write_opts; - std::string ts = Timestamp(1, 0); - - ASSERT_OK(db_->Put(write_opts, "foo1", ts, "bar")); - ASSERT_OK(Flush()); - - ASSERT_OK(db_->Put(write_opts, "foo2", ts, "bar")); - ASSERT_OK(Flush()); - - // Move sst file to next level - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - ASSERT_OK(db_->Put(write_opts, "foo3", ts, "bar")); - ASSERT_OK(Flush()); - - ReadOptions read_opts; - Slice read_ts = ts; - read_opts.timestamp = &read_ts; - { - std::unique_ptr iter(db_->NewIterator(read_opts)); - iter->Seek("foo"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - - iter->Seek("bbb"); - ASSERT_FALSE(iter->Valid()); - ASSERT_OK(iter->status()); - } - - Close(); -} - -TEST_P(DBBasicTestWithTimestampTableOptions, SeekWithCappedPrefix) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - // All of the keys or this test must be longer than 3 characters - constexpr int kMinKeyLen = 3; - options.prefix_extractor.reset(NewCappedPrefixTransform(kMinKeyLen)); - options.memtable_whole_key_filtering = true; - options.memtable_prefix_bloom_size_ratio = 0.1; - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.cache_index_and_filter_blocks = true; - bbto.whole_key_filtering = true; - bbto.index_type = GetParam(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - WriteOptions write_opts; - std::string ts = Timestamp(1, 0); - - ASSERT_OK(db_->Put(write_opts, "foo1", ts, "bar")); - ASSERT_OK(Flush()); - - ASSERT_OK(db_->Put(write_opts, "foo2", ts, "bar")); - ASSERT_OK(Flush()); - - // Move sst file to next level - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - ASSERT_OK(db_->Put(write_opts, "foo3", ts, "bar")); - ASSERT_OK(Flush()); - - ReadOptions read_opts; - ts = Timestamp(2, 0); - Slice read_ts = ts; - read_opts.timestamp = &read_ts; - { - std::unique_ptr iter(db_->NewIterator(read_opts)); - // Make sure the prefix extractor doesn't include timestamp, otherwise it - // may return invalid result. - iter->Seek("foo"); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_OK(iter->status()); - } - - Close(); -} - -TEST_P(DBBasicTestWithTimestampTableOptions, SeekWithBound) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.prefix_extractor.reset(NewFixedPrefixTransform(2)); - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.cache_index_and_filter_blocks = true; - bbto.whole_key_filtering = true; - bbto.index_type = GetParam(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - WriteOptions write_opts; - std::string ts = Timestamp(1, 0); - - ASSERT_OK(db_->Put(write_opts, "foo1", ts, "bar1")); - ASSERT_OK(Flush()); - - ASSERT_OK(db_->Put(write_opts, "foo2", ts, "bar2")); - ASSERT_OK(Flush()); - - // Move sst file to next level - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - for (int i = 3; i < 9; ++i) { - ASSERT_OK(db_->Put(write_opts, "foo" + std::to_string(i), ts, - "bar" + std::to_string(i))); - } - ASSERT_OK(Flush()); - - ReadOptions read_opts; - ts = Timestamp(2, 0); - Slice read_ts = ts; - read_opts.timestamp = &read_ts; - std::string up_bound = "foo5"; // exclusive - Slice up_bound_slice = up_bound; - std::string lo_bound = "foo2"; // inclusive - Slice lo_bound_slice = lo_bound; - read_opts.iterate_upper_bound = &up_bound_slice; - read_opts.iterate_lower_bound = &lo_bound_slice; - read_opts.auto_prefix_mode = true; - { - std::unique_ptr iter(db_->NewIterator(read_opts)); - // Make sure the prefix extractor doesn't include timestamp, otherwise it - // may return invalid result. - iter->Seek("foo"); - CheckIterUserEntry(iter.get(), lo_bound, kTypeValue, "bar2", - Timestamp(1, 0)); - iter->SeekToFirst(); - CheckIterUserEntry(iter.get(), lo_bound, kTypeValue, "bar2", - Timestamp(1, 0)); - iter->SeekForPrev("g"); - CheckIterUserEntry(iter.get(), "foo4", kTypeValue, "bar4", Timestamp(1, 0)); - iter->SeekToLast(); - CheckIterUserEntry(iter.get(), "foo4", kTypeValue, "bar4", Timestamp(1, 0)); - } - - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, ChangeIterationDirection) { - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.env = env_; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - DestroyAndReopen(options); - const std::vector timestamps = {Timestamp(1, 1), Timestamp(0, 2), - Timestamp(4, 3)}; - const std::vector> kvs = { - std::make_tuple("aa", "value1"), std::make_tuple("ab", "value2")}; - for (const auto& ts : timestamps) { - WriteBatch wb(0, 0, 0, kTimestampSize); - for (const auto& kv : kvs) { - const std::string& key = std::get<0>(kv); - const std::string& value = std::get<1>(kv); - ASSERT_OK(wb.Put(key, value)); - } - - ASSERT_OK(wb.UpdateTimestamps( - ts, [kTimestampSize](uint32_t) { return kTimestampSize; })); - ASSERT_OK(db_->Write(WriteOptions(), &wb)); - } - std::string read_ts_str = Timestamp(5, 3); - Slice read_ts = read_ts_str; - ReadOptions read_opts; - read_opts.timestamp = &read_ts; - std::unique_ptr it(db_->NewIterator(read_opts)); - - it->SeekToFirst(); - ASSERT_TRUE(it->Valid()); - it->Prev(); - ASSERT_FALSE(it->Valid()); - - it->SeekToLast(); - ASSERT_TRUE(it->Valid()); - uint64_t prev_reseek_count = - options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION); - ASSERT_EQ(0, prev_reseek_count); - it->Next(); - ASSERT_FALSE(it->Valid()); - ASSERT_EQ(1 + prev_reseek_count, - options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION)); - - it->Seek(std::get<0>(kvs[0])); - CheckIterUserEntry(it.get(), std::get<0>(kvs[0]), kTypeValue, - std::get<1>(kvs[0]), Timestamp(4, 3)); - it->Next(); - CheckIterUserEntry(it.get(), std::get<0>(kvs[1]), kTypeValue, - std::get<1>(kvs[1]), Timestamp(4, 3)); - it->Prev(); - CheckIterUserEntry(it.get(), std::get<0>(kvs[0]), kTypeValue, - std::get<1>(kvs[0]), Timestamp(4, 3)); - - prev_reseek_count = - options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION); - ASSERT_EQ(1, prev_reseek_count); - it->Next(); - CheckIterUserEntry(it.get(), std::get<0>(kvs[1]), kTypeValue, - std::get<1>(kvs[1]), Timestamp(4, 3)); - ASSERT_EQ(1 + prev_reseek_count, - options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION)); - - it->SeekForPrev(std::get<0>(kvs[1])); - CheckIterUserEntry(it.get(), std::get<0>(kvs[1]), kTypeValue, - std::get<1>(kvs[1]), Timestamp(4, 3)); - it->Prev(); - CheckIterUserEntry(it.get(), std::get<0>(kvs[0]), kTypeValue, - std::get<1>(kvs[0]), Timestamp(4, 3)); - - prev_reseek_count = - options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION); - it->Next(); - CheckIterUserEntry(it.get(), std::get<0>(kvs[1]), kTypeValue, - std::get<1>(kvs[1]), Timestamp(4, 3)); - ASSERT_EQ(1 + prev_reseek_count, - options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION)); - - it.reset(); - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, SimpleForwardIterateLowerTsBound) { - constexpr int kNumKeysPerFile = 128; - constexpr uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::vector write_timestamps = {Timestamp(1, 0), - Timestamp(3, 0)}; - const std::vector read_timestamps = {Timestamp(2, 0), - Timestamp(4, 0)}; - const std::vector read_timestamps_lb = {Timestamp(1, 0), - Timestamp(1, 0)}; - for (size_t i = 0; i < write_timestamps.size(); ++i) { - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamps[i], - "value" + std::to_string(i)); - ASSERT_OK(s); - } - } - for (size_t i = 0; i < read_timestamps.size(); ++i) { - ReadOptions read_opts; - Slice read_ts = read_timestamps[i]; - Slice read_ts_lb = read_timestamps_lb[i]; - read_opts.timestamp = &read_ts; - read_opts.iter_start_ts = &read_ts_lb; - std::unique_ptr it(db_->NewIterator(read_opts)); - int count = 0; - uint64_t key = 0; - for (it->Seek(Key1(0)), key = 0; it->Valid(); it->Next(), ++count, ++key) { - CheckIterEntry(it.get(), Key1(key), kTypeValue, - "value" + std::to_string(i), write_timestamps[i]); - if (i > 0) { - it->Next(); - CheckIterEntry(it.get(), Key1(key), kTypeValue, - "value" + std::to_string(i - 1), - write_timestamps[i - 1]); - } - } - size_t expected_count = kMaxKey + 1; - ASSERT_EQ(expected_count, count); - } - // Delete all keys@ts=5 and check iteration result with start ts set - { - std::string write_timestamp = Timestamp(5, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key < kMaxKey + 1; ++key) { - Status s = db_->Delete(write_opts, Key1(key), write_timestamp); - ASSERT_OK(s); - } - - std::string read_timestamp = Timestamp(6, 0); - ReadOptions read_opts; - Slice read_ts = read_timestamp; - read_opts.timestamp = &read_ts; - std::string read_timestamp_lb = Timestamp(2, 0); - Slice read_ts_lb = read_timestamp_lb; - read_opts.iter_start_ts = &read_ts_lb; - std::unique_ptr it(db_->NewIterator(read_opts)); - int count = 0; - uint64_t key = 0; - for (it->Seek(Key1(0)), key = 0; it->Valid(); it->Next(), ++count, ++key) { - CheckIterEntry(it.get(), Key1(key), kTypeDeletionWithTimestamp, Slice(), - write_timestamp); - // Skip key@ts=3 and land on tombstone key@ts=5 - it->Next(); - } - ASSERT_EQ(kMaxKey + 1, count); - } - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, BackwardIterateLowerTsBound) { - constexpr int kNumKeysPerFile = 128; - constexpr uint64_t kMaxKey = 1024; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - const std::vector write_timestamps = {Timestamp(1, 0), - Timestamp(3, 0)}; - const std::vector read_timestamps = {Timestamp(2, 0), - Timestamp(4, 0)}; - const std::vector read_timestamps_lb = {Timestamp(1, 0), - Timestamp(1, 0)}; - for (size_t i = 0; i < write_timestamps.size(); ++i) { - WriteOptions write_opts; - for (uint64_t key = 0; key <= kMaxKey; ++key) { - Status s = db_->Put(write_opts, Key1(key), write_timestamps[i], - "value" + std::to_string(i)); - ASSERT_OK(s); - } - } - for (size_t i = 0; i < read_timestamps.size(); ++i) { - ReadOptions read_opts; - Slice read_ts = read_timestamps[i]; - Slice read_ts_lb = read_timestamps_lb[i]; - read_opts.timestamp = &read_ts; - read_opts.iter_start_ts = &read_ts_lb; - std::unique_ptr it(db_->NewIterator(read_opts)); - int count = 0; - uint64_t key = 0; - for (it->SeekForPrev(Key1(kMaxKey)), key = kMaxKey; it->Valid(); - it->Prev(), ++count, --key) { - CheckIterEntry(it.get(), Key1(key), kTypeValue, "value0", - write_timestamps[0]); - if (i > 0) { - it->Prev(); - CheckIterEntry(it.get(), Key1(key), kTypeValue, "value1", - write_timestamps[1]); - } - } - size_t expected_count = kMaxKey + 1; - ASSERT_EQ(expected_count, count); - } - // Delete all keys@ts=5 and check iteration result with start ts set - { - std::string write_timestamp = Timestamp(5, 0); - WriteOptions write_opts; - for (uint64_t key = 0; key < kMaxKey + 1; ++key) { - Status s = db_->Delete(write_opts, Key1(key), write_timestamp); - ASSERT_OK(s); - } - - std::string read_timestamp = Timestamp(6, 0); - ReadOptions read_opts; - Slice read_ts = read_timestamp; - read_opts.timestamp = &read_ts; - std::string read_timestamp_lb = Timestamp(2, 0); - Slice read_ts_lb = read_timestamp_lb; - read_opts.iter_start_ts = &read_ts_lb; - std::unique_ptr it(db_->NewIterator(read_opts)); - int count = 0; - uint64_t key = kMaxKey; - for (it->SeekForPrev(Key1(key)), key = kMaxKey; it->Valid(); - it->Prev(), ++count, --key) { - CheckIterEntry(it.get(), Key1(key), kTypeValue, "value1", - Timestamp(3, 0)); - it->Prev(); - CheckIterEntry(it.get(), Key1(key), kTypeDeletionWithTimestamp, Slice(), - write_timestamp); - } - ASSERT_EQ(kMaxKey + 1, count); - } - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, SimpleBackwardIterateLowerTsBound) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - std::string ts_ub_buf = Timestamp(5, 0); - Slice ts_ub = ts_ub_buf; - std::string ts_lb_buf = Timestamp(1, 0); - Slice ts_lb = ts_lb_buf; - - { - ReadOptions read_opts; - read_opts.timestamp = &ts_ub; - read_opts.iter_start_ts = &ts_lb; - std::unique_ptr it(db_->NewIterator(read_opts)); - it->SeekToLast(); - ASSERT_FALSE(it->Valid()); - ASSERT_OK(it->status()); - - it->SeekForPrev("foo"); - ASSERT_FALSE(it->Valid()); - ASSERT_OK(it->status()); - } - - // Test iterate_upper_bound - ASSERT_OK(db_->Put(WriteOptions(), "a", Timestamp(0, 0), "v0")); - ASSERT_OK(db_->SingleDelete(WriteOptions(), "a", Timestamp(1, 0))); - - for (int i = 0; i < 5; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), "b", Timestamp(i, 0), - "v" + std::to_string(i))); - } - - { - ReadOptions read_opts; - read_opts.timestamp = &ts_ub; - read_opts.iter_start_ts = &ts_lb; - std::string key_ub_str = "b"; // exclusive - Slice key_ub = key_ub_str; - read_opts.iterate_upper_bound = &key_ub; - std::unique_ptr it(db_->NewIterator(read_opts)); - it->SeekToLast(); - CheckIterEntry(it.get(), "a", kTypeSingleDeletion, Slice(), - Timestamp(1, 0)); - - key_ub_str = "a"; // exclusive - key_ub = key_ub_str; - read_opts.iterate_upper_bound = &key_ub; - it.reset(db_->NewIterator(read_opts)); - it->SeekToLast(); - ASSERT_FALSE(it->Valid()); - ASSERT_OK(it->status()); - } - - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, BackwardIterateLowerTsBound_Reseek) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.max_sequential_skip_in_iterations = 2; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - for (int i = 0; i < 10; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), "a", Timestamp(i, 0), - "v" + std::to_string(i))); - } - - for (int i = 0; i < 10; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), "b", Timestamp(i, 0), - "v" + std::to_string(i))); - } - - { - std::string ts_ub_buf = Timestamp(6, 0); - Slice ts_ub = ts_ub_buf; - std::string ts_lb_buf = Timestamp(4, 0); - Slice ts_lb = ts_lb_buf; - - ReadOptions read_opts; - read_opts.timestamp = &ts_ub; - read_opts.iter_start_ts = &ts_lb; - std::unique_ptr it(db_->NewIterator(read_opts)); - it->SeekToLast(); - for (int i = 0; i < 3 && it->Valid(); it->Prev(), ++i) { - CheckIterEntry(it.get(), "b", kTypeValue, "v" + std::to_string(4 + i), - Timestamp(4 + i, 0)); - } - for (int i = 0; i < 3 && it->Valid(); it->Prev(), ++i) { - CheckIterEntry(it.get(), "a", kTypeValue, "v" + std::to_string(4 + i), - Timestamp(4 + i, 0)); - } - } - - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, ReseekToTargetTimestamp) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - constexpr size_t kNumKeys = 16; - options.max_sequential_skip_in_iterations = kNumKeys / 2; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - // Insert kNumKeys - WriteOptions write_opts; - Status s; - for (size_t i = 0; i != kNumKeys; ++i) { - std::string ts = Timestamp(static_cast(i + 1), 0); - s = db_->Put(write_opts, "foo", ts, "value" + std::to_string(i)); - ASSERT_OK(s); - } - { - ReadOptions read_opts; - std::string ts_str = Timestamp(1, 0); - Slice ts = ts_str; - read_opts.timestamp = &ts; - std::unique_ptr iter(db_->NewIterator(read_opts)); - iter->SeekToFirst(); - CheckIterUserEntry(iter.get(), "foo", kTypeValue, "value0", ts_str); - ASSERT_EQ( - 1, options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION)); - - ts_str = Timestamp(kNumKeys, 0); - ts = ts_str; - read_opts.timestamp = &ts; - iter.reset(db_->NewIterator(read_opts)); - iter->SeekToLast(); - CheckIterUserEntry(iter.get(), "foo", kTypeValue, - "value" + std::to_string(kNumKeys - 1), ts_str); - ASSERT_EQ( - 2, options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION)); - } - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, ReseekToNextUserKey) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - constexpr size_t kNumKeys = 16; - options.max_sequential_skip_in_iterations = kNumKeys / 2; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - // Write kNumKeys + 1 keys - WriteOptions write_opts; - Status s; - for (size_t i = 0; i != kNumKeys; ++i) { - std::string ts = Timestamp(static_cast(i + 1), 0); - s = db_->Put(write_opts, "a", ts, "value" + std::to_string(i)); - ASSERT_OK(s); - } - { - std::string ts_str = Timestamp(static_cast(kNumKeys + 1), 0); - WriteBatch batch(0, 0, 0, kTimestampSize); - { ASSERT_OK(batch.Put("a", "new_value")); } - { ASSERT_OK(batch.Put("b", "new_value")); } - s = batch.UpdateTimestamps( - ts_str, [kTimestampSize](uint32_t) { return kTimestampSize; }); - ASSERT_OK(s); - s = db_->Write(write_opts, &batch); - ASSERT_OK(s); - } - { - ReadOptions read_opts; - std::string ts_str = Timestamp(static_cast(kNumKeys + 1), 0); - Slice ts = ts_str; - read_opts.timestamp = &ts; - std::unique_ptr iter(db_->NewIterator(read_opts)); - iter->Seek("a"); - iter->Next(); - CheckIterUserEntry(iter.get(), "b", kTypeValue, "new_value", ts_str); - ASSERT_EQ( - 1, options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION)); - } - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, ReseekToUserKeyBeforeSavedKey) { - Options options = GetDefaultOptions(); - options.env = env_; - options.create_if_missing = true; - constexpr size_t kNumKeys = 16; - options.max_sequential_skip_in_iterations = kNumKeys / 2; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - for (size_t i = 0; i < kNumKeys; ++i) { - std::string ts = Timestamp(static_cast(i + 1), 0); - WriteOptions write_opts; - Status s = db_->Put(write_opts, "b", ts, "value" + std::to_string(i)); - ASSERT_OK(s); - } - { - std::string ts = Timestamp(1, 0); - WriteOptions write_opts; - ASSERT_OK(db_->Put(write_opts, "a", ts, "value")); - } - { - ReadOptions read_opts; - std::string ts_str = Timestamp(1, 0); - Slice ts = ts_str; - read_opts.timestamp = &ts; - std::unique_ptr iter(db_->NewIterator(read_opts)); - iter->SeekToLast(); - iter->Prev(); - CheckIterUserEntry(iter.get(), "a", kTypeValue, "value", ts_str); - ASSERT_EQ( - 1, options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION)); - } - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, MultiGetWithFastLocalBloom) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.cache_index_and_filter_blocks = true; - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - // Write any value - WriteOptions write_opts; - std::string ts = Timestamp(1, 0); - - ASSERT_OK(db_->Put(write_opts, "foo", ts, "bar")); - - ASSERT_OK(Flush()); - - // Read with MultiGet - ReadOptions read_opts; - Slice read_ts = ts; - read_opts.timestamp = &read_ts; - size_t batch_size = 1; - std::vector keys(batch_size); - std::vector values(batch_size); - std::vector statuses(batch_size); - std::vector timestamps(batch_size); - keys[0] = "foo"; - ColumnFamilyHandle* cfh = db_->DefaultColumnFamily(); - db_->MultiGet(read_opts, cfh, batch_size, keys.data(), values.data(), - timestamps.data(), statuses.data(), true); - - ASSERT_OK(statuses[0]); - ASSERT_EQ(Timestamp(1, 0), timestamps[0]); - for (auto& elem : values) { - elem.Reset(); - } - - ASSERT_OK(db_->SingleDelete(WriteOptions(), "foo", Timestamp(2, 0))); - ts = Timestamp(3, 0); - read_ts = ts; - read_opts.timestamp = &read_ts; - db_->MultiGet(read_opts, cfh, batch_size, keys.data(), values.data(), - timestamps.data(), statuses.data(), true); - ASSERT_TRUE(statuses[0].IsNotFound()); - ASSERT_EQ(Timestamp(2, 0), timestamps[0]); - - Close(); -} - -TEST_P(DBBasicTestWithTimestampTableOptions, MultiGetWithPrefix) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.prefix_extractor.reset(NewCappedPrefixTransform(5)); - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.cache_index_and_filter_blocks = true; - bbto.whole_key_filtering = false; - bbto.index_type = GetParam(); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - // Write any value - WriteOptions write_opts; - std::string ts = Timestamp(1, 0); - - ASSERT_OK(db_->Put(write_opts, "foo", ts, "bar")); - - ASSERT_OK(Flush()); - - // Read with MultiGet - ReadOptions read_opts; - Slice read_ts = ts; - read_opts.timestamp = &read_ts; - size_t batch_size = 1; - std::vector keys(batch_size); - std::vector values(batch_size); - std::vector statuses(batch_size); - std::vector timestamps(batch_size); - keys[0] = "foo"; - ColumnFamilyHandle* cfh = db_->DefaultColumnFamily(); - db_->MultiGet(read_opts, cfh, batch_size, keys.data(), values.data(), - timestamps.data(), statuses.data(), true); - - ASSERT_OK(statuses[0]); - ASSERT_EQ(Timestamp(1, 0), timestamps[0]); - for (auto& elem : values) { - elem.Reset(); - } - - ASSERT_OK(db_->SingleDelete(WriteOptions(), "foo", Timestamp(2, 0))); - // TODO re-enable after fixing a bug of kHashSearch - if (GetParam() != BlockBasedTableOptions::IndexType::kHashSearch) { - ASSERT_OK(Flush()); - } - - ts = Timestamp(3, 0); - read_ts = ts; - db_->MultiGet(read_opts, cfh, batch_size, keys.data(), values.data(), - timestamps.data(), statuses.data(), true); - ASSERT_TRUE(statuses[0].IsNotFound()); - ASSERT_EQ(Timestamp(2, 0), timestamps[0]); - - Close(); -} - -TEST_P(DBBasicTestWithTimestampTableOptions, MultiGetWithMemBloomFilter) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.prefix_extractor.reset(NewCappedPrefixTransform(5)); - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.cache_index_and_filter_blocks = true; - bbto.whole_key_filtering = false; - bbto.index_type = GetParam(); - options.memtable_prefix_bloom_size_ratio = 0.1; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - // Write any value - WriteOptions write_opts; - std::string ts = Timestamp(1, 0); - - ASSERT_OK(db_->Put(write_opts, "foo", ts, "bar")); - - // Read with MultiGet - ts = Timestamp(2, 0); - Slice read_ts = ts; - ReadOptions read_opts; - read_opts.timestamp = &read_ts; - size_t batch_size = 1; - std::vector keys(batch_size); - std::vector values(batch_size); - std::vector statuses(batch_size); - keys[0] = "foo"; - ColumnFamilyHandle* cfh = db_->DefaultColumnFamily(); - db_->MultiGet(read_opts, cfh, batch_size, keys.data(), values.data(), - statuses.data()); - - ASSERT_OK(statuses[0]); - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, MultiGetRangeFiltering) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.cache_index_and_filter_blocks = true; - bbto.whole_key_filtering = false; - options.memtable_prefix_bloom_size_ratio = 0.1; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - // Write any value - WriteOptions write_opts; - std::string ts = Timestamp(1, 0); - - // random data - for (int i = 0; i < 3; i++) { - auto key = std::to_string(i * 10); - auto value = std::to_string(i * 10); - Slice key_slice = key; - Slice value_slice = value; - ASSERT_OK(db_->Put(write_opts, key_slice, ts, value_slice)); - ASSERT_OK(Flush()); - } - - // Make num_levels to 2 to do key range filtering of sst files - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - ASSERT_OK(db_->Put(write_opts, "foo", ts, "bar")); - - ASSERT_OK(Flush()); - - // Read with MultiGet - ts = Timestamp(2, 0); - Slice read_ts = ts; - ReadOptions read_opts; - read_opts.timestamp = &read_ts; - size_t batch_size = 1; - std::vector keys(batch_size); - std::vector values(batch_size); - std::vector statuses(batch_size); - keys[0] = "foo"; - ColumnFamilyHandle* cfh = db_->DefaultColumnFamily(); - db_->MultiGet(read_opts, cfh, batch_size, keys.data(), values.data(), - statuses.data()); - - ASSERT_OK(statuses[0]); - Close(); -} - -TEST_P(DBBasicTestWithTimestampTableOptions, MultiGetPrefixFilter) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - options.prefix_extractor.reset(NewCappedPrefixTransform(3)); - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.cache_index_and_filter_blocks = true; - bbto.whole_key_filtering = false; - bbto.index_type = GetParam(); - options.memtable_prefix_bloom_size_ratio = 0.1; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - WriteOptions write_opts; - std::string ts = Timestamp(1, 0); - - ASSERT_OK(db_->Put(write_opts, "foo", ts, "bar")); - - ASSERT_OK(Flush()); - // Read with MultiGet - ts = Timestamp(2, 0); - Slice read_ts = ts; - ReadOptions read_opts; - read_opts.timestamp = &read_ts; - size_t batch_size = 1; - std::vector keys(batch_size); - std::vector values(batch_size); - std::vector timestamps(batch_size); - keys[0] = "foo"; - ColumnFamilyHandle* cfh = db_->DefaultColumnFamily(); - std::vector cfhs(keys.size(), cfh); - std::vector statuses = - db_->MultiGet(read_opts, cfhs, keys, &values, ×tamps); - - ASSERT_OK(statuses[0]); - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, MaxKeysSkippedDuringNext) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - constexpr size_t max_skippable_internal_keys = 2; - const size_t kNumKeys = max_skippable_internal_keys + 2; - WriteOptions write_opts; - Status s; - { - std::string ts = Timestamp(1, 0); - ASSERT_OK(db_->Put(write_opts, "a", ts, "value")); - } - for (size_t i = 0; i < kNumKeys; ++i) { - std::string ts = Timestamp(static_cast(i + 1), 0); - s = db_->Put(write_opts, "b", ts, "value" + std::to_string(i)); - ASSERT_OK(s); - } - { - ReadOptions read_opts; - read_opts.max_skippable_internal_keys = max_skippable_internal_keys; - std::string ts_str = Timestamp(1, 0); - Slice ts = ts_str; - read_opts.timestamp = &ts; - std::unique_ptr iter(db_->NewIterator(read_opts)); - iter->SeekToFirst(); - iter->Next(); - ASSERT_TRUE(iter->status().IsIncomplete()); - } - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, MaxKeysSkippedDuringPrev) { - Options options = GetDefaultOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - constexpr size_t max_skippable_internal_keys = 2; - const size_t kNumKeys = max_skippable_internal_keys + 2; - WriteOptions write_opts; - Status s; - { - std::string ts = Timestamp(1, 0); - ASSERT_OK(db_->Put(write_opts, "b", ts, "value")); - } - for (size_t i = 0; i < kNumKeys; ++i) { - std::string ts = Timestamp(static_cast(i + 1), 0); - s = db_->Put(write_opts, "a", ts, "value" + std::to_string(i)); - ASSERT_OK(s); - } - { - ReadOptions read_opts; - read_opts.max_skippable_internal_keys = max_skippable_internal_keys; - std::string ts_str = Timestamp(1, 0); - Slice ts = ts_str; - read_opts.timestamp = &ts; - std::unique_ptr iter(db_->NewIterator(read_opts)); - iter->SeekToLast(); - iter->Prev(); - ASSERT_TRUE(iter->status().IsIncomplete()); - } - Close(); -} - -// Create two L0, and compact them to a new L1. In this test, L1 is L_bottom. -// Two L0s: -// f1 f2 -// ... -// Since f2.smallest < f1.largest < f2.largest -// f1 and f2 will be the inputs of a real compaction instead of trivial move. -TEST_F(DBBasicTestWithTimestamp, CompactDeletionWithTimestampMarkerToBottom) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.num_levels = 2; - options.level0_file_num_compaction_trigger = 2; - DestroyAndReopen(options); - WriteOptions write_opts; - std::string ts = Timestamp(1, 0); - ASSERT_OK(db_->Put(write_opts, "a", ts, "value0")); - ASSERT_OK(Flush()); - - ts = Timestamp(2, 0); - ASSERT_OK(db_->Put(write_opts, "b", ts, "value0")); - ts = Timestamp(3, 0); - ASSERT_OK(db_->Delete(write_opts, "a", ts)); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ReadOptions read_opts; - ts = Timestamp(1, 0); - Slice read_ts = ts; - read_opts.timestamp = &read_ts; - std::string value; - Status s = db_->Get(read_opts, "a", &value); - ASSERT_OK(s); - ASSERT_EQ("value0", value); - - ts = Timestamp(3, 0); - read_ts = ts; - read_opts.timestamp = &read_ts; - std::string key_ts; - s = db_->Get(read_opts, "a", &value, &key_ts); - ASSERT_TRUE(s.IsNotFound()); - ASSERT_EQ(Timestamp(3, 0), key_ts); - - // Time-travel to the past before deletion - ts = Timestamp(2, 0); - read_ts = ts; - read_opts.timestamp = &read_ts; - s = db_->Get(read_opts, "a", &value); - ASSERT_OK(s); - ASSERT_EQ("value0", value); - Close(); -} - -#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -class DBBasicTestWithTimestampFilterPrefixSettings - : public DBBasicTestWithTimestampBase, - public testing::WithParamInterface< - std::tuple, bool, bool, - std::shared_ptr, bool, double, - BlockBasedTableOptions::IndexType>> { - public: - DBBasicTestWithTimestampFilterPrefixSettings() - : DBBasicTestWithTimestampBase( - "db_basic_test_with_timestamp_filter_prefix") {} -}; - -TEST_P(DBBasicTestWithTimestampFilterPrefixSettings, GetAndMultiGet) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - BlockBasedTableOptions bbto; - bbto.filter_policy = std::get<0>(GetParam()); - bbto.whole_key_filtering = std::get<1>(GetParam()); - bbto.cache_index_and_filter_blocks = std::get<2>(GetParam()); - bbto.index_type = std::get<6>(GetParam()); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options.prefix_extractor = std::get<3>(GetParam()); - options.memtable_whole_key_filtering = std::get<4>(GetParam()); - options.memtable_prefix_bloom_size_ratio = std::get<5>(GetParam()); - - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - const int kMaxKey = 1000; - - // Write any value - WriteOptions write_opts; - std::string ts = Timestamp(1, 0); - - int idx = 0; - for (; idx < kMaxKey / 4; idx++) { - ASSERT_OK(db_->Put(write_opts, Key1(idx), ts, "bar")); - ASSERT_OK(db_->Put(write_opts, KeyWithPrefix("foo", idx), ts, "bar")); - } - - ASSERT_OK(Flush()); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - for (; idx < kMaxKey / 2; idx++) { - ASSERT_OK(db_->Put(write_opts, Key1(idx), ts, "bar")); - ASSERT_OK(db_->Put(write_opts, KeyWithPrefix("foo", idx), ts, "bar")); - } - - ASSERT_OK(Flush()); - - for (; idx < kMaxKey; idx++) { - ASSERT_OK(db_->Put(write_opts, Key1(idx), ts, "bar")); - ASSERT_OK(db_->Put(write_opts, KeyWithPrefix("foo", idx), ts, "bar")); - } - - // Read with MultiGet - ReadOptions read_opts; - Slice read_ts = ts; - read_opts.timestamp = &read_ts; - - for (idx = 0; idx < kMaxKey; idx++) { - size_t batch_size = 4; - std::vector keys_str(batch_size); - std::vector values(batch_size); - std::vector statuses(batch_size); - ColumnFamilyHandle* cfh = db_->DefaultColumnFamily(); - - keys_str[0] = Key1(idx); - keys_str[1] = KeyWithPrefix("foo", idx); - keys_str[2] = Key1(kMaxKey + idx); - keys_str[3] = KeyWithPrefix("foo", kMaxKey + idx); - - auto keys = ConvertStrToSlice(keys_str); - - db_->MultiGet(read_opts, cfh, batch_size, keys.data(), values.data(), - statuses.data()); - - for (int i = 0; i < 2; i++) { - ASSERT_OK(statuses[i]); - } - for (int i = 2; i < 4; i++) { - ASSERT_TRUE(statuses[i].IsNotFound()); - } - - for (int i = 0; i < 2; i++) { - std::string value; - ASSERT_OK(db_->Get(read_opts, keys[i], &value)); - std::unique_ptr it1(db_->NewIterator(read_opts)); - ASSERT_NE(nullptr, it1); - ASSERT_OK(it1->status()); - it1->Seek(keys[i]); - ASSERT_TRUE(it1->Valid()); - } - - for (int i = 2; i < 4; i++) { - std::string value; - Status s = db_->Get(read_opts, keys[i], &value); - ASSERT_TRUE(s.IsNotFound()); - } - } - Close(); -} - -INSTANTIATE_TEST_CASE_P( - Timestamp, DBBasicTestWithTimestampFilterPrefixSettings, - ::testing::Combine( - ::testing::Values( - std::shared_ptr(nullptr), - std::shared_ptr(NewBloomFilterPolicy(10, true)), - std::shared_ptr(NewBloomFilterPolicy(10, - false))), - ::testing::Bool(), ::testing::Bool(), - ::testing::Values( - std::shared_ptr(NewFixedPrefixTransform(1)), - std::shared_ptr(NewFixedPrefixTransform(4)), - std::shared_ptr(NewFixedPrefixTransform(7)), - std::shared_ptr(NewFixedPrefixTransform(8))), - ::testing::Bool(), ::testing::Values(0, 0.1), - ::testing::Values( - BlockBasedTableOptions::IndexType::kBinarySearch, - BlockBasedTableOptions::IndexType::kHashSearch, - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch, - BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey))); -#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) - -class DataVisibilityTest : public DBBasicTestWithTimestampBase { - public: - DataVisibilityTest() : DBBasicTestWithTimestampBase("data_visibility_test") { - // Initialize test data - for (int i = 0; i < kTestDataSize; i++) { - test_data_[i].key = "key" + std::to_string(i); - test_data_[i].value = "value" + std::to_string(i); - test_data_[i].timestamp = Timestamp(i, 0); - test_data_[i].ts = i; - test_data_[i].seq_num = kMaxSequenceNumber; - } - } - - protected: - struct TestData { - std::string key; - std::string value; - int ts; - std::string timestamp; - SequenceNumber seq_num; - }; - - constexpr static int kTestDataSize = 3; - TestData test_data_[kTestDataSize]; - - void PutTestData(int index, ColumnFamilyHandle* cfh = nullptr) { - ASSERT_LE(index, kTestDataSize); - WriteOptions write_opts; - - if (cfh == nullptr) { - ASSERT_OK(db_->Put(write_opts, test_data_[index].key, - test_data_[index].timestamp, test_data_[index].value)); - const Snapshot* snap = db_->GetSnapshot(); - test_data_[index].seq_num = snap->GetSequenceNumber(); - if (index > 0) { - ASSERT_GT(test_data_[index].seq_num, test_data_[index - 1].seq_num); - } - db_->ReleaseSnapshot(snap); - } else { - ASSERT_OK(db_->Put(write_opts, cfh, test_data_[index].key, - test_data_[index].timestamp, test_data_[index].value)); - } - } - - void AssertVisibility(int ts, SequenceNumber seq, - std::vector statuses) { - ASSERT_EQ(kTestDataSize, statuses.size()); - for (int i = 0; i < kTestDataSize; i++) { - if (test_data_[i].seq_num <= seq && test_data_[i].ts <= ts) { - ASSERT_OK(statuses[i]); - } else { - ASSERT_TRUE(statuses[i].IsNotFound()); - } - } - } - - std::vector GetKeys() { - std::vector ret(kTestDataSize); - for (int i = 0; i < kTestDataSize; i++) { - ret[i] = test_data_[i].key; - } - return ret; - } - - void VerifyDefaultCF(int ts, const Snapshot* snap = nullptr) { - ReadOptions read_opts; - std::string read_ts = Timestamp(ts, 0); - Slice read_ts_slice = read_ts; - read_opts.timestamp = &read_ts_slice; - read_opts.snapshot = snap; - - ColumnFamilyHandle* cfh = db_->DefaultColumnFamily(); - std::vector cfs(kTestDataSize, cfh); - SequenceNumber seq = - snap ? snap->GetSequenceNumber() : kMaxSequenceNumber - 1; - - // There're several MultiGet interfaces with not exactly the same - // implementations, query data with all of them. - auto keys = GetKeys(); - std::vector values; - auto s1 = db_->MultiGet(read_opts, cfs, keys, &values); - AssertVisibility(ts, seq, s1); - - auto s2 = db_->MultiGet(read_opts, keys, &values); - AssertVisibility(ts, seq, s2); - - std::vector timestamps; - auto s3 = db_->MultiGet(read_opts, cfs, keys, &values, ×tamps); - AssertVisibility(ts, seq, s3); - - auto s4 = db_->MultiGet(read_opts, keys, &values, ×tamps); - AssertVisibility(ts, seq, s4); - - std::vector values_ps5(kTestDataSize); - std::vector s5(kTestDataSize); - db_->MultiGet(read_opts, cfh, kTestDataSize, keys.data(), values_ps5.data(), - s5.data()); - AssertVisibility(ts, seq, s5); - - std::vector values_ps6(kTestDataSize); - std::vector s6(kTestDataSize); - std::vector timestamps_array(kTestDataSize); - db_->MultiGet(read_opts, cfh, kTestDataSize, keys.data(), values_ps6.data(), - timestamps_array.data(), s6.data()); - AssertVisibility(ts, seq, s6); - - std::vector values_ps7(kTestDataSize); - std::vector s7(kTestDataSize); - db_->MultiGet(read_opts, kTestDataSize, cfs.data(), keys.data(), - values_ps7.data(), s7.data()); - AssertVisibility(ts, seq, s7); - - std::vector values_ps8(kTestDataSize); - std::vector s8(kTestDataSize); - db_->MultiGet(read_opts, kTestDataSize, cfs.data(), keys.data(), - values_ps8.data(), timestamps_array.data(), s8.data()); - AssertVisibility(ts, seq, s8); - } - - void VerifyDefaultCF(const Snapshot* snap = nullptr) { - for (int i = 0; i <= kTestDataSize; i++) { - VerifyDefaultCF(i, snap); - } - } -}; -constexpr int DataVisibilityTest::kTestDataSize; - -// Application specifies timestamp but not snapshot. -// reader writer -// ts'=90 -// ts=100 -// seq=10 -// seq'=11 -// write finishes -// GetImpl(ts,seq) -// It is OK to return if ts>=t1 AND seq>=s1. If ts>=t1 but seqDisableProcessing(); - SyncPoint::GetInstance()->LoadDependency({ - {"DBImpl::GetImpl:3", - "DataVisibilityTest::PointLookupWithoutSnapshot1:BeforePut"}, - {"DataVisibilityTest::PointLookupWithoutSnapshot1:AfterPut", - "DBImpl::GetImpl:4"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - port::Thread writer_thread([this]() { - std::string write_ts = Timestamp(1, 0); - WriteOptions write_opts; - TEST_SYNC_POINT( - "DataVisibilityTest::PointLookupWithoutSnapshot1:BeforePut"); - Status s = db_->Put(write_opts, "foo", write_ts, "value"); - ASSERT_OK(s); - TEST_SYNC_POINT("DataVisibilityTest::PointLookupWithoutSnapshot1:AfterPut"); - }); - ReadOptions read_opts; - std::string read_ts_str = Timestamp(3, 0); - Slice read_ts = read_ts_str; - read_opts.timestamp = &read_ts; - std::string value; - Status s = db_->Get(read_opts, "foo", &value); - - writer_thread.join(); - ASSERT_TRUE(s.IsNotFound()); - Close(); -} - -// Application specifies timestamp but not snapshot. -// reader writer -// ts'=90 -// ts=100 -// seq=10 -// seq'=11 -// write finishes -// Flush -// GetImpl(ts,seq) -// It is OK to return if ts>=t1 AND seq>=s1. If ts>=t1 but seqDisableProcessing(); - SyncPoint::GetInstance()->LoadDependency({ - {"DBImpl::GetImpl:3", - "DataVisibilityTest::PointLookupWithoutSnapshot2:BeforePut"}, - {"DataVisibilityTest::PointLookupWithoutSnapshot2:AfterPut", - "DBImpl::GetImpl:4"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - port::Thread writer_thread([this]() { - std::string write_ts = Timestamp(1, 0); - WriteOptions write_opts; - TEST_SYNC_POINT( - "DataVisibilityTest::PointLookupWithoutSnapshot2:BeforePut"); - Status s = db_->Put(write_opts, "foo", write_ts, "value"); - ASSERT_OK(s); - ASSERT_OK(Flush()); - - write_ts = Timestamp(2, 0); - s = db_->Put(write_opts, "bar", write_ts, "value"); - ASSERT_OK(s); - TEST_SYNC_POINT("DataVisibilityTest::PointLookupWithoutSnapshot2:AfterPut"); - }); - ReadOptions read_opts; - std::string read_ts_str = Timestamp(3, 0); - Slice read_ts = read_ts_str; - read_opts.timestamp = &read_ts; - std::string value; - Status s = db_->Get(read_opts, "foo", &value); - writer_thread.join(); - ASSERT_TRUE(s.IsNotFound()); - Close(); -} - -// Application specifies both timestamp and snapshot. -// reader writer -// seq=10 -// ts'=90 -// ts=100 -// seq'=11 -// write finishes -// GetImpl(ts,seq) -// Since application specifies both timestamp and snapshot, application expects -// to see data that visible in BOTH timestamp and sequence number. Therefore, -// can be returned only if t1<=ts AND s1<=seq. -TEST_F(DataVisibilityTest, PointLookupWithSnapshot1) { - Options options = CurrentOptions(); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency({ - {"DataVisibilityTest::PointLookupWithSnapshot1:AfterTakingSnap", - "DataVisibilityTest::PointLookupWithSnapshot1:BeforePut"}, - {"DataVisibilityTest::PointLookupWithSnapshot1:AfterPut", - "DBImpl::GetImpl:1"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - port::Thread writer_thread([this]() { - std::string write_ts = Timestamp(1, 0); - WriteOptions write_opts; - TEST_SYNC_POINT("DataVisibilityTest::PointLookupWithSnapshot1:BeforePut"); - Status s = db_->Put(write_opts, "foo", write_ts, "value"); - TEST_SYNC_POINT("DataVisibilityTest::PointLookupWithSnapshot1:AfterPut"); - ASSERT_OK(s); - }); - ReadOptions read_opts; - const Snapshot* snap = db_->GetSnapshot(); - TEST_SYNC_POINT( - "DataVisibilityTest::PointLookupWithSnapshot1:AfterTakingSnap"); - read_opts.snapshot = snap; - std::string read_ts_str = Timestamp(3, 0); - Slice read_ts = read_ts_str; - read_opts.timestamp = &read_ts; - std::string value; - Status s = db_->Get(read_opts, "foo", &value); - writer_thread.join(); - - ASSERT_TRUE(s.IsNotFound()); - - db_->ReleaseSnapshot(snap); - Close(); -} - -// Application specifies both timestamp and snapshot. -// reader writer -// seq=10 -// ts'=90 -// ts=100 -// seq'=11 -// write finishes -// Flush -// GetImpl(ts,seq) -// Since application specifies both timestamp and snapshot, application expects -// to see data that visible in BOTH timestamp and sequence number. Therefore, -// can be returned only if t1<=ts AND s1<=seq. -TEST_F(DataVisibilityTest, PointLookupWithSnapshot2) { - Options options = CurrentOptions(); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency({ - {"DataVisibilityTest::PointLookupWithSnapshot2:AfterTakingSnap", - "DataVisibilityTest::PointLookupWithSnapshot2:BeforePut"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - port::Thread writer_thread([this]() { - std::string write_ts = Timestamp(1, 0); - WriteOptions write_opts; - TEST_SYNC_POINT("DataVisibilityTest::PointLookupWithSnapshot2:BeforePut"); - Status s = db_->Put(write_opts, "foo", write_ts, "value1"); - ASSERT_OK(s); - ASSERT_OK(Flush()); - - write_ts = Timestamp(2, 0); - s = db_->Put(write_opts, "bar", write_ts, "value2"); - ASSERT_OK(s); - }); - const Snapshot* snap = db_->GetSnapshot(); - TEST_SYNC_POINT( - "DataVisibilityTest::PointLookupWithSnapshot2:AfterTakingSnap"); - writer_thread.join(); - std::string read_ts_str = Timestamp(3, 0); - Slice read_ts = read_ts_str; - ReadOptions read_opts; - read_opts.snapshot = snap; - read_opts.timestamp = &read_ts; - std::string value; - Status s = db_->Get(read_opts, "foo", &value); - ASSERT_TRUE(s.IsNotFound()); - db_->ReleaseSnapshot(snap); - Close(); -} - -// Application specifies timestamp but not snapshot. -// reader writer -// ts'=90 -// ts=100 -// seq=10 -// seq'=11 -// write finishes -// scan(ts,seq) -// can be seen in scan as long as ts>=t1 AND seq>=s1. If ts>=t1 but -// seqDisableProcessing(); - SyncPoint::GetInstance()->LoadDependency({ - {"DBImpl::NewIterator:3", - "DataVisibilityTest::RangeScanWithoutSnapshot:BeforePut"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - port::Thread writer_thread([this]() { - WriteOptions write_opts; - TEST_SYNC_POINT("DataVisibilityTest::RangeScanWithoutSnapshot:BeforePut"); - for (int i = 0; i < 3; ++i) { - std::string write_ts = Timestamp(i + 1, 0); - Status s = db_->Put(write_opts, "key" + std::to_string(i), write_ts, - "value" + std::to_string(i)); - ASSERT_OK(s); - } - }); - std::string read_ts_str = Timestamp(10, 0); - Slice read_ts = read_ts_str; - ReadOptions read_opts; - read_opts.total_order_seek = true; - read_opts.timestamp = &read_ts; - Iterator* it = db_->NewIterator(read_opts); - ASSERT_NE(nullptr, it); - writer_thread.join(); - it->SeekToFirst(); - ASSERT_FALSE(it->Valid()); - delete it; - Close(); -} - -// Application specifies both timestamp and snapshot. -// reader writer -// seq=10 -// ts'=90 -// ts=100 seq'=11 -// write finishes -// scan(ts,seq) -// can be seen by the scan only if t1<=ts AND s1<=seq. If t1<=ts -// but s1>seq, then the key should not be returned. -TEST_F(DataVisibilityTest, RangeScanWithSnapshot) { - Options options = CurrentOptions(); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency({ - {"DataVisibilityTest::RangeScanWithSnapshot:AfterTakingSnapshot", - "DataVisibilityTest::RangeScanWithSnapshot:BeforePut"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - port::Thread writer_thread([this]() { - WriteOptions write_opts; - TEST_SYNC_POINT("DataVisibilityTest::RangeScanWithSnapshot:BeforePut"); - for (int i = 0; i < 3; ++i) { - std::string write_ts = Timestamp(i + 1, 0); - Status s = db_->Put(write_opts, "key" + std::to_string(i), write_ts, - "value" + std::to_string(i)); - ASSERT_OK(s); - } - }); - const Snapshot* snap = db_->GetSnapshot(); - TEST_SYNC_POINT( - "DataVisibilityTest::RangeScanWithSnapshot:AfterTakingSnapshot"); - - writer_thread.join(); - - std::string read_ts_str = Timestamp(10, 0); - Slice read_ts = read_ts_str; - ReadOptions read_opts; - read_opts.snapshot = snap; - read_opts.total_order_seek = true; - read_opts.timestamp = &read_ts; - Iterator* it = db_->NewIterator(read_opts); - ASSERT_NE(nullptr, it); - it->Seek("key0"); - ASSERT_FALSE(it->Valid()); - - delete it; - db_->ReleaseSnapshot(snap); - Close(); -} - -// Application specifies both timestamp and snapshot. -// Query each combination and make sure for MultiGet key , only -// return keys that ts>=t1 AND seq>=s1. -TEST_F(DataVisibilityTest, MultiGetWithTimestamp) { - Options options = CurrentOptions(); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - const Snapshot* snap0 = db_->GetSnapshot(); - PutTestData(0); - VerifyDefaultCF(); - VerifyDefaultCF(snap0); - - const Snapshot* snap1 = db_->GetSnapshot(); - PutTestData(1); - VerifyDefaultCF(); - VerifyDefaultCF(snap0); - VerifyDefaultCF(snap1); - - ASSERT_OK(Flush()); - - const Snapshot* snap2 = db_->GetSnapshot(); - PutTestData(2); - VerifyDefaultCF(); - VerifyDefaultCF(snap0); - VerifyDefaultCF(snap1); - VerifyDefaultCF(snap2); - - db_->ReleaseSnapshot(snap0); - db_->ReleaseSnapshot(snap1); - db_->ReleaseSnapshot(snap2); - - Close(); -} - -// Application specifies timestamp but not snapshot. -// reader writer -// ts'=0, 1 -// ts=3 -// seq=10 -// seq'=11, 12 -// write finishes -// MultiGet(ts,seq) -// For MultiGet , only return keys that ts>=t1 AND seq>=s1. -TEST_F(DataVisibilityTest, MultiGetWithoutSnapshot) { - Options options = CurrentOptions(); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency({ - {"DBImpl::MultiGet:AfterGetSeqNum1", - "DataVisibilityTest::MultiGetWithoutSnapshot:BeforePut"}, - {"DataVisibilityTest::MultiGetWithoutSnapshot:AfterPut", - "DBImpl::MultiGet:AfterGetSeqNum2"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - port::Thread writer_thread([this]() { - TEST_SYNC_POINT("DataVisibilityTest::MultiGetWithoutSnapshot:BeforePut"); - PutTestData(0); - PutTestData(1); - TEST_SYNC_POINT("DataVisibilityTest::MultiGetWithoutSnapshot:AfterPut"); - }); - - ReadOptions read_opts; - std::string read_ts = Timestamp(kTestDataSize, 0); - Slice read_ts_slice = read_ts; - read_opts.timestamp = &read_ts_slice; - auto keys = GetKeys(); - std::vector values; - auto ss = db_->MultiGet(read_opts, keys, &values); - - writer_thread.join(); - for (auto s : ss) { - ASSERT_TRUE(s.IsNotFound()); - } - VerifyDefaultCF(); - Close(); -} - -TEST_F(DataVisibilityTest, MultiGetCrossCF) { - Options options = CurrentOptions(); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - CreateAndReopenWithCF({"second"}, options); - ColumnFamilyHandle* second_cf = handles_[1]; - - const Snapshot* snap0 = db_->GetSnapshot(); - PutTestData(0); - PutTestData(0, second_cf); - VerifyDefaultCF(); - VerifyDefaultCF(snap0); - - const Snapshot* snap1 = db_->GetSnapshot(); - PutTestData(1); - PutTestData(1, second_cf); - VerifyDefaultCF(); - VerifyDefaultCF(snap0); - VerifyDefaultCF(snap1); - - ASSERT_OK(Flush()); - - const Snapshot* snap2 = db_->GetSnapshot(); - PutTestData(2); - PutTestData(2, second_cf); - VerifyDefaultCF(); - VerifyDefaultCF(snap0); - VerifyDefaultCF(snap1); - VerifyDefaultCF(snap2); - - ReadOptions read_opts; - std::string read_ts = Timestamp(kTestDataSize, 0); - Slice read_ts_slice = read_ts; - read_opts.timestamp = &read_ts_slice; - read_opts.snapshot = snap1; - auto keys = GetKeys(); - auto keys2 = GetKeys(); - keys.insert(keys.end(), keys2.begin(), keys2.end()); - std::vector cfs(kTestDataSize, - db_->DefaultColumnFamily()); - std::vector cfs2(kTestDataSize, second_cf); - cfs.insert(cfs.end(), cfs2.begin(), cfs2.end()); - - std::vector values; - auto ss = db_->MultiGet(read_opts, cfs, keys, &values); - for (int i = 0; i < 2 * kTestDataSize; i++) { - if (i % 3 == 0) { - // only the first key for each column family should be returned - ASSERT_OK(ss[i]); - } else { - ASSERT_TRUE(ss[i].IsNotFound()); - } - } - - db_->ReleaseSnapshot(snap0); - db_->ReleaseSnapshot(snap1); - db_->ReleaseSnapshot(snap2); - Close(); -} - -#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -class DBBasicTestWithTimestampCompressionSettings - : public DBBasicTestWithTimestampBase, - public testing::WithParamInterface< - std::tuple, CompressionType, - uint32_t, uint32_t>> { - public: - DBBasicTestWithTimestampCompressionSettings() - : DBBasicTestWithTimestampBase( - "db_basic_test_with_timestamp_compression") {} -}; - -TEST_P(DBBasicTestWithTimestampCompressionSettings, PutAndGet) { - const int kNumKeysPerFile = 1024; - const size_t kNumTimestamps = 4; - Options options = CurrentOptions(); - options.create_if_missing = true; - options.env = env_; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - size_t ts_sz = Timestamp(0, 0).size(); - TestComparator test_cmp(ts_sz); - options.comparator = &test_cmp; - BlockBasedTableOptions bbto; - bbto.filter_policy = std::get<0>(GetParam()); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - const CompressionType comp_type = std::get<1>(GetParam()); -#if LZ4_VERSION_NUMBER < 10400 // r124+ - if (comp_type == kLZ4Compression || comp_type == kLZ4HCCompression) { - return; - } -#endif // LZ4_VERSION_NUMBER >= 10400 - if (!ZSTD_Supported() && comp_type == kZSTD) { - return; - } - if (!Zlib_Supported() && comp_type == kZlibCompression) { - return; - } - - options.compression = comp_type; - options.compression_opts.max_dict_bytes = std::get<2>(GetParam()); - if (comp_type == kZSTD) { - options.compression_opts.zstd_max_train_bytes = std::get<2>(GetParam()); - } - options.compression_opts.parallel_threads = std::get<3>(GetParam()); - options.target_file_size_base = 1 << 26; // 64MB - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - size_t num_cfs = handles_.size(); - ASSERT_EQ(2, num_cfs); - std::vector write_ts_list; - std::vector read_ts_list; - - for (size_t i = 0; i != kNumTimestamps; ++i) { - write_ts_list.push_back(Timestamp(i * 2, 0)); - read_ts_list.push_back(Timestamp(1 + i * 2, 0)); - const Slice write_ts = write_ts_list.back(); - WriteOptions wopts; - for (int cf = 0; cf != static_cast(num_cfs); ++cf) { - for (size_t j = 0; j != (kNumKeysPerFile - 1) / kNumTimestamps; ++j) { - ASSERT_OK( - db_->Put(wopts, handles_[cf], Key1(j), write_ts, - "value_" + std::to_string(j) + "_" + std::to_string(i))); - } - } - } - const auto& verify_db_func = [&]() { - for (size_t i = 0; i != kNumTimestamps; ++i) { - ReadOptions ropts; - const Slice read_ts = read_ts_list[i]; - ropts.timestamp = &read_ts; - for (int cf = 0; cf != static_cast(num_cfs); ++cf) { - ColumnFamilyHandle* cfh = handles_[cf]; - for (size_t j = 0; j != (kNumKeysPerFile - 1) / kNumTimestamps; ++j) { - std::string value; - ASSERT_OK(db_->Get(ropts, cfh, Key1(j), &value)); - ASSERT_EQ("value_" + std::to_string(j) + "_" + std::to_string(i), - value); - } - } - } - }; - verify_db_func(); - Close(); -} - -TEST_P(DBBasicTestWithTimestampCompressionSettings, PutDeleteGet) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - const int kNumKeysPerFile = 1024; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - BlockBasedTableOptions bbto; - bbto.filter_policy = std::get<0>(GetParam()); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - const CompressionType comp_type = std::get<1>(GetParam()); -#if LZ4_VERSION_NUMBER < 10400 // r124+ - if (comp_type == kLZ4Compression || comp_type == kLZ4HCCompression) { - return; - } -#endif // LZ4_VERSION_NUMBER >= 10400 - if (!ZSTD_Supported() && comp_type == kZSTD) { - return; - } - if (!Zlib_Supported() && comp_type == kZlibCompression) { - return; - } - - options.compression = comp_type; - options.compression_opts.max_dict_bytes = std::get<2>(GetParam()); - if (comp_type == kZSTD) { - options.compression_opts.zstd_max_train_bytes = std::get<2>(GetParam()); - } - options.compression_opts.parallel_threads = std::get<3>(GetParam()); - options.target_file_size_base = 1 << 26; // 64MB - - DestroyAndReopen(options); - - const size_t kNumL0Files = - static_cast(Options().level0_file_num_compaction_trigger); - { - // Half of the keys will go through Deletion and remaining half with - // SingleDeletion. Generate enough L0 files with ts=1 to trigger compaction - // to L1 - std::string ts = Timestamp(1, 0); - WriteOptions wopts; - for (size_t i = 0; i < kNumL0Files; ++i) { - for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK(db_->Put(wopts, Key1(j), ts, "value" + std::to_string(i))); - } - ASSERT_OK(db_->Flush(FlushOptions())); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - // Generate another L0 at ts=3 - ts = Timestamp(3, 0); - for (int i = 0; i < kNumKeysPerFile; ++i) { - std::string key_str = Key1(i); - Slice key(key_str); - if ((i % 3) == 0) { - if (i < kNumKeysPerFile / 2) { - ASSERT_OK(db_->Delete(wopts, key, ts)); - } else { - ASSERT_OK(db_->SingleDelete(wopts, key, ts)); - } - } else { - ASSERT_OK(db_->Put(wopts, key, ts, "new_value")); - } - } - ASSERT_OK(db_->Flush(FlushOptions())); - // Populate memtable at ts=5 - ts = Timestamp(5, 0); - for (int i = 0; i != kNumKeysPerFile; ++i) { - std::string key_str = Key1(i); - Slice key(key_str); - if ((i % 3) == 1) { - if (i < kNumKeysPerFile / 2) { - ASSERT_OK(db_->Delete(wopts, key, ts)); - } else { - ASSERT_OK(db_->SingleDelete(wopts, key, ts)); - } - } else if ((i % 3) == 2) { - ASSERT_OK(db_->Put(wopts, key, ts, "new_value_2")); - } - } - } - { - std::string ts_str = Timestamp(6, 0); - Slice ts = ts_str; - ReadOptions ropts; - ropts.timestamp = &ts; - for (uint64_t i = 0; i != static_cast(kNumKeysPerFile); ++i) { - std::string value; - std::string key_ts; - Status s = db_->Get(ropts, Key1(i), &value, &key_ts); - if ((i % 3) == 2) { - ASSERT_OK(s); - ASSERT_EQ("new_value_2", value); - ASSERT_EQ(Timestamp(5, 0), key_ts); - } else if ((i % 3) == 1) { - ASSERT_TRUE(s.IsNotFound()); - ASSERT_EQ(Timestamp(5, 0), key_ts); - } else { - ASSERT_TRUE(s.IsNotFound()); - ASSERT_EQ(Timestamp(3, 0), key_ts); - } - } - } -} - -// A class which remembers the name of each flushed file. -class FlushedFileCollector : public EventListener { - public: - FlushedFileCollector() {} - ~FlushedFileCollector() override {} - - void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override { - InstrumentedMutexLock lock(&mutex_); - flushed_files_.push_back(info.file_path); - } - - std::vector GetFlushedFiles() { - std::vector result; - { - InstrumentedMutexLock lock(&mutex_); - result = flushed_files_; - } - return result; - } - - void ClearFlushedFiles() { - InstrumentedMutexLock lock(&mutex_); - flushed_files_.clear(); - } - - private: - std::vector flushed_files_; - InstrumentedMutex mutex_; -}; - -TEST_P(DBBasicTestWithTimestampCompressionSettings, PutAndGetWithCompaction) { - const int kNumKeysPerFile = 1024; - const size_t kNumTimestamps = 2; - const size_t kNumKeysPerTimestamp = (kNumKeysPerFile - 1) / kNumTimestamps; - const size_t kSplitPosBase = kNumKeysPerTimestamp / 2; - Options options = CurrentOptions(); - options.create_if_missing = true; - options.env = env_; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - - FlushedFileCollector* collector = new FlushedFileCollector(); - options.listeners.emplace_back(collector); - - size_t ts_sz = Timestamp(0, 0).size(); - TestComparator test_cmp(ts_sz); - options.comparator = &test_cmp; - BlockBasedTableOptions bbto; - bbto.filter_policy = std::get<0>(GetParam()); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - const CompressionType comp_type = std::get<1>(GetParam()); -#if LZ4_VERSION_NUMBER < 10400 // r124+ - if (comp_type == kLZ4Compression || comp_type == kLZ4HCCompression) { - return; - } -#endif // LZ4_VERSION_NUMBER >= 10400 - if (!ZSTD_Supported() && comp_type == kZSTD) { - return; - } - if (!Zlib_Supported() && comp_type == kZlibCompression) { - return; - } - - options.compression = comp_type; - options.compression_opts.max_dict_bytes = std::get<2>(GetParam()); - if (comp_type == kZSTD) { - options.compression_opts.zstd_max_train_bytes = std::get<2>(GetParam()); - } - options.compression_opts.parallel_threads = std::get<3>(GetParam()); - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - - size_t num_cfs = handles_.size(); - ASSERT_EQ(2, num_cfs); - std::vector write_ts_list; - std::vector read_ts_list; - - const auto& verify_records_func = [&](size_t i, size_t begin, size_t end, - ColumnFamilyHandle* cfh) { - std::string value; - std::string timestamp; - - ReadOptions ropts; - const Slice read_ts = read_ts_list[i]; - ropts.timestamp = &read_ts; - std::string expected_timestamp = - std::string(write_ts_list[i].data(), write_ts_list[i].size()); - - for (size_t j = begin; j <= end; ++j) { - ASSERT_OK(db_->Get(ropts, cfh, Key1(j), &value, ×tamp)); - ASSERT_EQ("value_" + std::to_string(j) + "_" + std::to_string(i), value); - ASSERT_EQ(expected_timestamp, timestamp); - } - }; - - for (size_t i = 0; i != kNumTimestamps; ++i) { - write_ts_list.push_back(Timestamp(i * 2, 0)); - read_ts_list.push_back(Timestamp(1 + i * 2, 0)); - const Slice write_ts = write_ts_list.back(); - WriteOptions wopts; - for (int cf = 0; cf != static_cast(num_cfs); ++cf) { - size_t memtable_get_start = 0; - for (size_t j = 0; j != kNumKeysPerTimestamp; ++j) { - ASSERT_OK( - db_->Put(wopts, handles_[cf], Key1(j), write_ts, - "value_" + std::to_string(j) + "_" + std::to_string(i))); - if (j == kSplitPosBase + i || j == kNumKeysPerTimestamp - 1) { - verify_records_func(i, memtable_get_start, j, handles_[cf]); - memtable_get_start = j + 1; - - // flush all keys with the same timestamp to two sst files, split at - // incremental positions such that lowerlevel[1].smallest.userkey == - // higherlevel[0].largest.userkey - ASSERT_OK(Flush(cf)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); // wait for flush (which - // is also a compaction) - - // compact files (2 at each level) to a lower level such that all - // keys with the same timestamp is at one level, with newer versions - // at higher levels. - CompactionOptions compact_opt; - compact_opt.compression = kNoCompression; - ASSERT_OK(db_->CompactFiles(compact_opt, handles_[cf], - collector->GetFlushedFiles(), - static_cast(kNumTimestamps - i))); - collector->ClearFlushedFiles(); - } - } - } - } - const auto& verify_db_func = [&]() { - for (size_t i = 0; i != kNumTimestamps; ++i) { - ReadOptions ropts; - const Slice read_ts = read_ts_list[i]; - ropts.timestamp = &read_ts; - std::string expected_timestamp(write_ts_list[i].data(), - write_ts_list[i].size()); - for (int cf = 0; cf != static_cast(num_cfs); ++cf) { - ColumnFamilyHandle* cfh = handles_[cf]; - verify_records_func(i, 0, kNumKeysPerTimestamp - 1, cfh); - } - } - }; - verify_db_func(); - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, BatchWriteAndMultiGet) { - const int kNumKeysPerFile = 8192; - const size_t kNumTimestamps = 2; - const size_t kNumKeysPerTimestamp = (kNumKeysPerFile - 1) / kNumTimestamps; - Options options = CurrentOptions(); - options.create_if_missing = true; - options.env = env_; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - options.memtable_prefix_bloom_size_ratio = 0.1; - options.memtable_whole_key_filtering = true; - - size_t ts_sz = Timestamp(0, 0).size(); - TestComparator test_cmp(ts_sz); - options.comparator = &test_cmp; - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy( - 10 /*bits_per_key*/, false /*use_block_based_builder*/)); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - size_t num_cfs = handles_.size(); - ASSERT_EQ(2, num_cfs); - std::vector write_ts_list; - std::vector read_ts_list; - - const auto& verify_records_func = [&](size_t i, ColumnFamilyHandle* cfh) { - std::vector keys; - std::vector key_vals; - std::vector values; - std::vector timestamps; - - for (size_t j = 0; j != kNumKeysPerTimestamp; ++j) { - key_vals.push_back(Key1(j)); - } - for (size_t j = 0; j != kNumKeysPerTimestamp; ++j) { - keys.push_back(key_vals[j]); - } - - ReadOptions ropts; - const Slice read_ts = read_ts_list[i]; - ropts.timestamp = &read_ts; - std::string expected_timestamp(write_ts_list[i].data(), - write_ts_list[i].size()); - - std::vector cfhs(keys.size(), cfh); - std::vector statuses = - db_->MultiGet(ropts, cfhs, keys, &values, ×tamps); - for (size_t j = 0; j != kNumKeysPerTimestamp; ++j) { - ASSERT_OK(statuses[j]); - ASSERT_EQ("value_" + std::to_string(j) + "_" + std::to_string(i), - values[j]); - ASSERT_EQ(expected_timestamp, timestamps[j]); - } - }; - - const std::string dummy_ts(ts_sz, '\0'); - for (size_t i = 0; i != kNumTimestamps; ++i) { - write_ts_list.push_back(Timestamp(i * 2, 0)); - read_ts_list.push_back(Timestamp(1 + i * 2, 0)); - const Slice& write_ts = write_ts_list.back(); - for (int cf = 0; cf != static_cast(num_cfs); ++cf) { - WriteOptions wopts; - WriteBatch batch(0, 0, 0, ts_sz); - for (size_t j = 0; j != kNumKeysPerTimestamp; ++j) { - const std::string key = Key1(j); - const std::string value = - "value_" + std::to_string(j) + "_" + std::to_string(i); - ASSERT_OK(batch.Put(handles_[cf], key, value)); - } - ASSERT_OK(batch.UpdateTimestamps(write_ts, - [ts_sz](uint32_t) { return ts_sz; })); - ASSERT_OK(db_->Write(wopts, &batch)); - - verify_records_func(i, handles_[cf]); - - ASSERT_OK(Flush(cf)); - } - } - - const auto& verify_db_func = [&]() { - for (size_t i = 0; i != kNumTimestamps; ++i) { - ReadOptions ropts; - const Slice read_ts = read_ts_list[i]; - ropts.timestamp = &read_ts; - for (int cf = 0; cf != static_cast(num_cfs); ++cf) { - ColumnFamilyHandle* cfh = handles_[cf]; - verify_records_func(i, cfh); - } - } - }; - verify_db_func(); - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, MultiGetNoReturnTs) { - Options options = CurrentOptions(); - options.env = env_; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - WriteOptions write_opts; - std::string ts = Timestamp(1, 0); - ASSERT_OK(db_->Put(write_opts, "foo", ts, "value")); - ASSERT_OK(db_->Put(write_opts, "bar", ts, "value")); - ASSERT_OK(db_->Put(write_opts, "fooxxxxxxxxxxxxxxxx", ts, "value")); - ASSERT_OK(db_->Put(write_opts, "barxxxxxxxxxxxxxxxx", ts, "value")); - ColumnFamilyHandle* cfh = dbfull()->DefaultColumnFamily(); - ts = Timestamp(2, 0); - Slice read_ts = ts; - ReadOptions read_opts; - read_opts.timestamp = &read_ts; - { - ColumnFamilyHandle* column_families[] = {cfh, cfh}; - Slice keys[] = {"foo", "bar"}; - PinnableSlice values[] = {PinnableSlice(), PinnableSlice()}; - Status statuses[] = {Status::OK(), Status::OK()}; - dbfull()->MultiGet(read_opts, /*num_keys=*/2, &column_families[0], &keys[0], - &values[0], &statuses[0], /*sorted_input=*/false); - for (const auto& s : statuses) { - ASSERT_OK(s); - } - } - { - ColumnFamilyHandle* column_families[] = {cfh, cfh, cfh, cfh}; - // Make user keys longer than configured timestamp size (16 bytes) to - // verify RocksDB does not use the trailing bytes 'x' as timestamp. - Slice keys[] = {"fooxxxxxxxxxxxxxxxx", "barxxxxxxxxxxxxxxxx", "foo", "bar"}; - PinnableSlice values[] = {PinnableSlice(), PinnableSlice(), PinnableSlice(), - PinnableSlice()}; - Status statuses[] = {Status::OK(), Status::OK(), Status::OK(), - Status::OK()}; - dbfull()->MultiGet(read_opts, /*num_keys=*/4, &column_families[0], &keys[0], - &values[0], &statuses[0], /*sorted_input=*/false); - for (const auto& s : statuses) { - ASSERT_OK(s); - } - } - Close(); -} - - -INSTANTIATE_TEST_CASE_P( - Timestamp, DBBasicTestWithTimestampCompressionSettings, - ::testing::Combine( - ::testing::Values(std::shared_ptr(nullptr), - std::shared_ptr( - NewBloomFilterPolicy(10, false))), - ::testing::Values(kNoCompression, kZlibCompression, kLZ4Compression, - kLZ4HCCompression, kZSTD), - ::testing::Values(0, 1 << 14), ::testing::Values(1, 4))); - -class DBBasicTestWithTimestampPrefixSeek - : public DBBasicTestWithTimestampBase, - public testing::WithParamInterface< - std::tuple, - std::shared_ptr, bool, - BlockBasedTableOptions::IndexType>> { - public: - DBBasicTestWithTimestampPrefixSeek() - : DBBasicTestWithTimestampBase( - "/db_basic_test_with_timestamp_prefix_seek") {} -}; - -TEST_P(DBBasicTestWithTimestampPrefixSeek, IterateWithPrefix) { - const size_t kNumKeysPerFile = 128; - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.prefix_extractor = std::get<0>(GetParam()); - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - BlockBasedTableOptions bbto; - bbto.filter_policy = std::get<1>(GetParam()); - bbto.index_type = std::get<3>(GetParam()); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - - const uint64_t kMaxKey = 0xffffffffffffffff; - const uint64_t kMinKey = 0xfffffffffffff000; - const std::vector write_ts_list = {Timestamp(3, 0xffffffff), - Timestamp(6, 0xffffffff)}; - WriteOptions write_opts; - { - for (size_t i = 0; i != write_ts_list.size(); ++i) { - for (uint64_t key = kMaxKey; key >= kMinKey; --key) { - Status s = db_->Put(write_opts, Key1(key), write_ts_list[i], - "value" + std::to_string(i)); - ASSERT_OK(s); - } - } - } - const std::vector read_ts_list = {Timestamp(5, 0xffffffff), - Timestamp(9, 0xffffffff)}; - { - ReadOptions read_opts; - read_opts.total_order_seek = false; - read_opts.prefix_same_as_start = std::get<2>(GetParam()); - fprintf(stdout, "%s %s %d\n", options.prefix_extractor->Name(), - bbto.filter_policy ? bbto.filter_policy->Name() : "null", - static_cast(read_opts.prefix_same_as_start)); - for (size_t i = 0; i != read_ts_list.size(); ++i) { - Slice read_ts = read_ts_list[i]; - read_opts.timestamp = &read_ts; - std::unique_ptr iter(db_->NewIterator(read_opts)); - - // Seek to kMaxKey - iter->Seek(Key1(kMaxKey)); - CheckIterUserEntry(iter.get(), Key1(kMaxKey), kTypeValue, - "value" + std::to_string(i), write_ts_list[i]); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - - // Seek to kMinKey - iter->Seek(Key1(kMinKey)); - CheckIterUserEntry(iter.get(), Key1(kMinKey), kTypeValue, - "value" + std::to_string(i), write_ts_list[i]); - iter->Prev(); - ASSERT_FALSE(iter->Valid()); - } - const std::vector targets = {kMinKey, kMinKey + 0x10, - kMinKey + 0x100, kMaxKey}; - const SliceTransform* const pe = options.prefix_extractor.get(); - ASSERT_NE(nullptr, pe); - const size_t kPrefixShift = - 8 * (Key1(0).size() - pe->Transform(Key1(0)).size()); - const uint64_t kPrefixMask = - ~((static_cast(1) << kPrefixShift) - 1); - const uint64_t kNumKeysWithinPrefix = - (static_cast(1) << kPrefixShift); - for (size_t i = 0; i != read_ts_list.size(); ++i) { - Slice read_ts = read_ts_list[i]; - read_opts.timestamp = &read_ts; - std::unique_ptr it(db_->NewIterator(read_opts)); - // Forward and backward iterate. - for (size_t j = 0; j != targets.size(); ++j) { - std::string start_key = Key1(targets[j]); - uint64_t expected_ub = - (targets[j] & kPrefixMask) - 1 + kNumKeysWithinPrefix; - uint64_t expected_key = targets[j]; - size_t count = 0; - it->Seek(Key1(targets[j])); - while (it->Valid()) { - std::string saved_prev_key; - saved_prev_key.assign(it->key().data(), it->key().size()); - - // Out of prefix - if (!read_opts.prefix_same_as_start && - pe->Transform(saved_prev_key) != pe->Transform(start_key)) { - break; - } - CheckIterUserEntry(it.get(), Key1(expected_key), kTypeValue, - "value" + std::to_string(i), write_ts_list[i]); - ++count; - ++expected_key; - it->Next(); - } - ASSERT_EQ(expected_ub - targets[j] + 1, count); - - count = 0; - expected_key = targets[j]; - it->SeekForPrev(start_key); - uint64_t expected_lb = (targets[j] & kPrefixMask); - while (it->Valid()) { - // Out of prefix - if (!read_opts.prefix_same_as_start && - pe->Transform(it->key()) != pe->Transform(start_key)) { - break; - } - CheckIterUserEntry(it.get(), Key1(expected_key), kTypeValue, - "value" + std::to_string(i), write_ts_list[i]); - ++count; - --expected_key; - it->Prev(); - } - ASSERT_EQ(targets[j] - std::max(expected_lb, kMinKey) + 1, count); - } - } - } - Close(); -} - -// TODO(yanqin): consider handling non-fixed-length prefix extractors, e.g. -// NoopTransform. -INSTANTIATE_TEST_CASE_P( - Timestamp, DBBasicTestWithTimestampPrefixSeek, - ::testing::Combine( - ::testing::Values( - std::shared_ptr(NewFixedPrefixTransform(1)), - std::shared_ptr(NewFixedPrefixTransform(4)), - std::shared_ptr(NewFixedPrefixTransform(7)), - std::shared_ptr(NewFixedPrefixTransform(8))), - ::testing::Values(std::shared_ptr(nullptr), - std::shared_ptr( - NewBloomFilterPolicy(10 /*bits_per_key*/, false)), - std::shared_ptr( - NewBloomFilterPolicy(20 /*bits_per_key*/, - false))), - ::testing::Bool(), - ::testing::Values( - BlockBasedTableOptions::IndexType::kBinarySearch, - BlockBasedTableOptions::IndexType::kHashSearch, - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch, - BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey))); - -class DBBasicTestWithTsIterTombstones - : public DBBasicTestWithTimestampBase, - public testing::WithParamInterface< - std::tuple, - std::shared_ptr, int, - BlockBasedTableOptions::IndexType>> { - public: - DBBasicTestWithTsIterTombstones() - : DBBasicTestWithTimestampBase("/db_basic_ts_iter_tombstones") {} -}; - -TEST_P(DBBasicTestWithTsIterTombstones, IterWithDelete) { - constexpr size_t kNumKeysPerFile = 128; - Options options = CurrentOptions(); - options.env = env_; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.prefix_extractor = std::get<0>(GetParam()); - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - BlockBasedTableOptions bbto; - bbto.filter_policy = std::get<1>(GetParam()); - bbto.index_type = std::get<3>(GetParam()); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options.num_levels = std::get<2>(GetParam()); - DestroyAndReopen(options); - std::vector write_ts_strs = {Timestamp(2, 0), Timestamp(4, 0)}; - constexpr uint64_t kMaxKey = 0xffffffffffffffff; - constexpr uint64_t kMinKey = 0xfffffffffffff000; - // Insert kMinKey...kMaxKey - uint64_t key = kMinKey; - WriteOptions write_opts; - Slice ts = write_ts_strs[0]; - do { - Status s = db_->Put(write_opts, Key1(key), write_ts_strs[0], - "value" + std::to_string(key)); - ASSERT_OK(s); - if (kMaxKey == key) { - break; - } - ++key; - } while (true); - - for (key = kMaxKey; key >= kMinKey; --key) { - Status s; - if (0 != (key % 2)) { - s = db_->Put(write_opts, Key1(key), write_ts_strs[1], - "value1" + std::to_string(key)); - } else { - s = db_->Delete(write_opts, Key1(key), write_ts_strs[1]); - } - ASSERT_OK(s); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - { - std::string read_ts = Timestamp(4, 0); - ts = read_ts; - ReadOptions read_opts; - read_opts.total_order_seek = true; - read_opts.timestamp = &ts; - std::unique_ptr iter(db_->NewIterator(read_opts)); - size_t count = 0; - key = kMinKey + 1; - for (iter->SeekToFirst(); iter->Valid(); iter->Next(), ++count, key += 2) { - ASSERT_EQ(Key1(key), iter->key()); - ASSERT_EQ("value1" + std::to_string(key), iter->value()); - } - ASSERT_EQ((kMaxKey - kMinKey + 1) / 2, count); - - for (iter->SeekToLast(), count = 0, key = kMaxKey; iter->Valid(); - key -= 2, ++count, iter->Prev()) { - ASSERT_EQ(Key1(key), iter->key()); - ASSERT_EQ("value1" + std::to_string(key), iter->value()); - } - ASSERT_EQ((kMaxKey - kMinKey + 1) / 2, count); - } - Close(); -} - -INSTANTIATE_TEST_CASE_P( - Timestamp, DBBasicTestWithTsIterTombstones, - ::testing::Combine( - ::testing::Values( - std::shared_ptr(NewFixedPrefixTransform(7)), - std::shared_ptr(NewFixedPrefixTransform(8))), - ::testing::Values(std::shared_ptr(nullptr), - std::shared_ptr( - NewBloomFilterPolicy(10, false)), - std::shared_ptr( - NewBloomFilterPolicy(20, false))), - ::testing::Values(2, 6), - ::testing::Values( - BlockBasedTableOptions::IndexType::kBinarySearch, - BlockBasedTableOptions::IndexType::kHashSearch, - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch, - BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey))); -#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) - -class UpdateFullHistoryTsLowTest : public DBBasicTestWithTimestampBase { - public: - UpdateFullHistoryTsLowTest() - : DBBasicTestWithTimestampBase("/update_full_history_ts_low_test") {} -}; - -TEST_F(UpdateFullHistoryTsLowTest, ConcurrentUpdate) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - std::string lower_ts_low = Timestamp(10, 0); - std::string higher_ts_low = Timestamp(25, 0); - const size_t kTimestampSize = lower_ts_low.size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - - DestroyAndReopen(options); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - // This workaround swaps `lower_ts_low` originally used for update by the - // caller to `higher_ts_low` after its writer is queued to make sure - // the caller will always get a TryAgain error. - // It mimics cases where two threads update full_history_ts_low concurrently - // with one thread writing a higher ts_low and one thread writing a lower - // ts_low. - VersionEdit* version_edit; - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::IncreaseFullHistoryTsLowImpl:BeforeEdit", - [&](void* arg) { version_edit = reinterpret_cast(arg); }); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:BeforeWriterWaiting", - [&](void* /*arg*/) { version_edit->SetFullHistoryTsLow(higher_ts_low); }); - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_TRUE( - db_->IncreaseFullHistoryTsLow(db_->DefaultColumnFamily(), lower_ts_low) - .IsTryAgain()); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, - GCPreserveRangeTombstoneWhenNoOrSmallFullHistoryLow) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - std::string ts_str = Timestamp(1, 0); - WriteOptions wopts; - ASSERT_OK(db_->Put(wopts, "k1", ts_str, "v1")); - ASSERT_OK(db_->Put(wopts, "k2", ts_str, "v2")); - ASSERT_OK(db_->Put(wopts, "k3", ts_str, "v3")); - ts_str = Timestamp(2, 0); - ASSERT_OK( - db_->DeleteRange(wopts, db_->DefaultColumnFamily(), "k1", "k3", ts_str)); - - ts_str = Timestamp(3, 0); - Slice ts = ts_str; - ReadOptions ropts; - ropts.timestamp = &ts; - CompactRangeOptions cro; - cro.full_history_ts_low = nullptr; - std::string value, key_ts; - Status s; - auto verify = [&] { - s = db_->Get(ropts, "k1", &value); - ASSERT_TRUE(s.IsNotFound()); - - s = db_->Get(ropts, "k2", &value, &key_ts); - ASSERT_TRUE(s.IsNotFound()); - ASSERT_EQ(key_ts, Timestamp(2, 0)); - - ASSERT_OK(db_->Get(ropts, "k3", &value, &key_ts)); - ASSERT_EQ(value, "v3"); - ASSERT_EQ(Timestamp(1, 0), key_ts); - - size_t batch_size = 3; - std::vector key_strs = {"k1", "k2", "k3"}; - std::vector keys{key_strs.begin(), key_strs.end()}; - std::vector values(batch_size); - std::vector statuses(batch_size); - db_->MultiGet(ropts, db_->DefaultColumnFamily(), batch_size, keys.data(), - values.data(), statuses.data(), true /* sorted_input */); - ASSERT_TRUE(statuses[0].IsNotFound()); - ASSERT_TRUE(statuses[1].IsNotFound()); - ASSERT_OK(statuses[2]); - ; - ASSERT_EQ(values[2], "v3"); - }; - verify(); - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - verify(); - std::string lb = Timestamp(0, 0); - Slice lb_slice = lb; - cro.full_history_ts_low = &lb_slice; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - verify(); - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, - GCRangeTombstonesAndCoveredKeysRespectingTslow) { - Options options = CurrentOptions(); - options.env = env_; - options.create_if_missing = true; - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.cache_index_and_filter_blocks = true; - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.num_levels = 2; - DestroyAndReopen(options); - - WriteOptions wopts; - ASSERT_OK(db_->Put(wopts, "k1", Timestamp(1, 0), "v1")); - ASSERT_OK(db_->Delete(wopts, "k2", Timestamp(2, 0))); - ASSERT_OK(db_->DeleteRange(wopts, db_->DefaultColumnFamily(), "k1", "k3", - Timestamp(3, 0))); - ASSERT_OK(db_->Put(wopts, "k3", Timestamp(4, 0), "v3")); - - ReadOptions ropts; - std::string read_ts = Timestamp(5, 0); - Slice read_ts_slice = read_ts; - ropts.timestamp = &read_ts_slice; - size_t batch_size = 3; - std::vector key_strs = {"k1", "k2", "k3"}; - std::vector keys = {key_strs.begin(), key_strs.end()}; - std::vector values(batch_size); - std::vector statuses(batch_size); - std::vector timestamps(batch_size); - db_->MultiGet(ropts, db_->DefaultColumnFamily(), batch_size, keys.data(), - values.data(), timestamps.data(), statuses.data(), - true /* sorted_input */); - ASSERT_TRUE(statuses[0].IsNotFound()); - ASSERT_EQ(timestamps[0], Timestamp(3, 0)); - ASSERT_TRUE(statuses[1].IsNotFound()); - // DeleteRange has a higher timestamp than Delete for "k2" - ASSERT_EQ(timestamps[1], Timestamp(3, 0)); - ASSERT_OK(statuses[2]); - ASSERT_EQ(values[2], "v3"); - ASSERT_EQ(timestamps[2], Timestamp(4, 0)); - - CompactRangeOptions cro; - // Range tombstone has timestamp >= full_history_ts_low, covered keys - // are not dropped. - std::string compaction_ts_str = Timestamp(2, 0); - Slice compaction_ts = compaction_ts_str; - cro.full_history_ts_low = &compaction_ts; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - ropts.timestamp = &compaction_ts; - std::string value, ts; - ASSERT_OK(db_->Get(ropts, "k1", &value, &ts)); - ASSERT_EQ(value, "v1"); - // timestamp is below full_history_ts_low, zeroed out as the key goes into - // bottommost level - ASSERT_EQ(ts, Timestamp(0, 0)); - ASSERT_TRUE(db_->Get(ropts, "k2", &value, &ts).IsNotFound()); - ASSERT_EQ(ts, Timestamp(2, 0)); - - compaction_ts_str = Timestamp(4, 0); - compaction_ts = compaction_ts_str; - cro.full_history_ts_low = &compaction_ts; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - ropts.timestamp = &read_ts_slice; - // k1, k2 and the range tombstone should be dropped - // k3 should still exist - db_->MultiGet(ropts, db_->DefaultColumnFamily(), batch_size, keys.data(), - values.data(), timestamps.data(), statuses.data(), - true /* sorted_input */); - ASSERT_TRUE(statuses[0].IsNotFound()); - ASSERT_TRUE(timestamps[0].empty()); - ASSERT_TRUE(statuses[1].IsNotFound()); - ASSERT_TRUE(timestamps[1].empty()); - ASSERT_OK(statuses[2]); - ASSERT_EQ(values[2], "v3"); - ASSERT_EQ(timestamps[2], Timestamp(4, 0)); - - Close(); -} - -TEST_P(DBBasicTestWithTimestampTableOptions, DeleteRangeBaiscReadAndIterate) { - const int kNum = 200, kRangeBegin = 50, kRangeEnd = 150, kNumPerFile = 25; - Options options = CurrentOptions(); - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - options.compression = kNoCompression; - BlockBasedTableOptions bbto; - bbto.index_type = GetParam(); - bbto.block_size = 100; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options.env = env_; - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.memtable_factory.reset(test::NewSpecialSkipListFactory(kNumPerFile)); - DestroyAndReopen(options); - - // Write half of the keys before the tombstone and half after the tombstone. - // Only covered keys (i.e., within the range and older than the tombstone) - // should be deleted. - for (int i = 0; i < kNum; ++i) { - if (i == kNum / 2) { - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key1(kRangeBegin), Key1(kRangeEnd), - Timestamp(i, 0))); - } - ASSERT_OK(db_->Put(WriteOptions(), Key1(i), Timestamp(i, 0), - "val" + std::to_string(i))); - if (i == kNum - kNumPerFile) { - ASSERT_OK(Flush()); - } - } - - ReadOptions read_opts; - read_opts.total_order_seek = true; - std::string read_ts = Timestamp(kNum, 0); - Slice read_ts_slice = read_ts; - read_opts.timestamp = &read_ts_slice; - { - std::unique_ptr iter(db_->NewIterator(read_opts)); - ASSERT_OK(iter->status()); - - int expected = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_EQ(Key1(expected), iter->key()); - if (expected == kRangeBegin - 1) { - expected = kNum / 2; - } else { - ++expected; - } - } - ASSERT_EQ(kNum, expected); - - expected = kNum / 2; - for (iter->Seek(Key1(kNum / 2)); iter->Valid(); iter->Next()) { - ASSERT_EQ(Key1(expected), iter->key()); - ++expected; - } - ASSERT_EQ(kNum, expected); - - expected = kRangeBegin - 1; - for (iter->SeekForPrev(Key1(kNum / 2 - 1)); iter->Valid(); iter->Prev()) { - ASSERT_EQ(Key1(expected), iter->key()); - --expected; - } - ASSERT_EQ(-1, expected); - - read_ts = Timestamp(0, 0); - read_ts_slice = read_ts; - read_opts.timestamp = &read_ts_slice; - iter.reset(db_->NewIterator(read_opts)); - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), Key1(0)); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - ASSERT_OK(iter->status()); - } - - read_ts = Timestamp(kNum, 0); - read_ts_slice = read_ts; - read_opts.timestamp = &read_ts_slice; - std::string value, timestamp; - Status s; - for (int i = 0; i < kNum; ++i) { - s = db_->Get(read_opts, Key1(i), &value, ×tamp); - if (i >= kRangeBegin && i < kNum / 2) { - ASSERT_TRUE(s.IsNotFound()); - ASSERT_EQ(timestamp, Timestamp(kNum / 2, 0)); - } else { - ASSERT_OK(s); - ASSERT_EQ(value, "val" + std::to_string(i)); - ASSERT_EQ(timestamp, Timestamp(i, 0)); - } - } - - size_t batch_size = kNum; - std::vector key_strs(batch_size); - std::vector keys(batch_size); - std::vector values(batch_size); - std::vector statuses(batch_size); - std::vector timestamps(batch_size); - for (int i = 0; i < kNum; ++i) { - key_strs[i] = Key1(i); - keys[i] = key_strs[i]; - } - db_->MultiGet(read_opts, db_->DefaultColumnFamily(), batch_size, keys.data(), - values.data(), timestamps.data(), statuses.data(), - true /* sorted_input */); - for (int i = 0; i < kNum; ++i) { - if (i >= kRangeBegin && i < kNum / 2) { - ASSERT_TRUE(statuses[i].IsNotFound()); - ASSERT_EQ(timestamps[i], Timestamp(kNum / 2, 0)); - } else { - ASSERT_OK(statuses[i]); - ASSERT_EQ(values[i], "val" + std::to_string(i)); - ASSERT_EQ(timestamps[i], Timestamp(i, 0)); - } - } - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, DeleteRangeGetIteratorWithSnapshot) { - // 4 keys 0, 1, 2, 3 at timestamps 0, 1, 2, 3 respectively. - // A range tombstone [1, 3) at timestamp 1 and has a sequence number between - // key 1 and 2. - Options options = CurrentOptions(); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - WriteOptions write_opts; - std::string put_ts = Timestamp(0, 0); - const int kNum = 4, kNumPerFile = 1, kRangeBegin = 1, kRangeEnd = 3; - options.memtable_factory.reset(test::NewSpecialSkipListFactory(kNumPerFile)); - const Snapshot* before_tombstone = nullptr; - const Snapshot* after_tombstone = nullptr; - for (int i = 0; i < kNum; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), Key1(i), Timestamp(i, 0), - "val" + std::to_string(i))); - if (i == kRangeBegin) { - before_tombstone = db_->GetSnapshot(); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key1(kRangeBegin), Key1(kRangeEnd), - Timestamp(kRangeBegin, 0))); - } - if (i == kNum / 2) { - ASSERT_OK(Flush()); - } - } - assert(before_tombstone); - after_tombstone = db_->GetSnapshot(); - // snapshot and ts before tombstone - std::string read_ts_str = Timestamp(kRangeBegin - 1, 0); // (0, 0) - Slice read_ts = read_ts_str; - ReadOptions read_opts; - read_opts.timestamp = &read_ts; - read_opts.snapshot = before_tombstone; - std::vector expected_status = { - Status::OK(), Status::NotFound(), Status::NotFound(), Status::NotFound()}; - std::vector expected_values(kNum); - expected_values[0] = "val" + std::to_string(0); - std::vector expected_timestamps(kNum); - expected_timestamps[0] = Timestamp(0, 0); - - size_t batch_size = kNum; - std::vector key_strs(batch_size); - std::vector keys(batch_size); - std::vector values(batch_size); - std::vector statuses(batch_size); - std::vector timestamps(batch_size); - for (int i = 0; i < kNum; ++i) { - key_strs[i] = Key1(i); - keys[i] = key_strs[i]; - } - - auto verify = [&] { - db_->MultiGet(read_opts, db_->DefaultColumnFamily(), batch_size, - keys.data(), values.data(), timestamps.data(), - statuses.data(), true /* sorted_input */); - std::string value, timestamp; - Status s; - for (int i = 0; i < kNum; ++i) { - s = db_->Get(read_opts, Key1(i), &value, ×tamp); - ASSERT_EQ(s, expected_status[i]); - ASSERT_EQ(statuses[i], expected_status[i]); - if (s.ok()) { - ASSERT_EQ(value, expected_values[i]); - ASSERT_EQ(values[i], expected_values[i]); - } - if (!timestamp.empty()) { - ASSERT_EQ(timestamp, expected_timestamps[i]); - ASSERT_EQ(timestamps[i], expected_timestamps[i]); - } else { - ASSERT_TRUE(timestamps[i].empty()); - } - } - std::unique_ptr iter(db_->NewIterator(read_opts)); - std::unique_ptr iter_for_seek(db_->NewIterator(read_opts)); - iter->SeekToFirst(); - for (int i = 0; i < kNum; ++i) { - if (expected_status[i].ok()) { - auto verify_iter = [&](Iterator* iter_ptr) { - ASSERT_TRUE(iter_ptr->Valid()); - ASSERT_EQ(iter_ptr->key(), keys[i]); - ASSERT_EQ(iter_ptr->value(), expected_values[i]); - ASSERT_EQ(iter_ptr->timestamp(), expected_timestamps[i]); - }; - verify_iter(iter.get()); - iter->Next(); - - iter_for_seek->Seek(keys[i]); - verify_iter(iter_for_seek.get()); - - iter_for_seek->SeekForPrev(keys[i]); - verify_iter(iter_for_seek.get()); - } - } - ASSERT_FALSE(iter->Valid()); - ASSERT_OK(iter->status()); - }; - - verify(); - - // snapshot before tombstone and ts after tombstone - read_ts_str = Timestamp(kNum, 0); // (4, 0) - read_ts = read_ts_str; - read_opts.timestamp = &read_ts; - read_opts.snapshot = before_tombstone; - expected_status[1] = Status::OK(); - expected_timestamps[1] = Timestamp(1, 0); - expected_values[1] = "val" + std::to_string(1); - verify(); - - // snapshot after tombstone and ts before tombstone - read_ts_str = Timestamp(kRangeBegin - 1, 0); // (0, 0) - read_ts = read_ts_str; - read_opts.timestamp = &read_ts; - read_opts.snapshot = after_tombstone; - expected_status[1] = Status::NotFound(); - expected_timestamps[1].clear(); - expected_values[1].clear(); - verify(); - - // snapshot and ts after tombstone - read_ts_str = Timestamp(kNum, 0); // (4, 0) - read_ts = read_ts_str; - read_opts.timestamp = &read_ts; - read_opts.snapshot = after_tombstone; - for (int i = 0; i < kNum; ++i) { - if (i == kRangeBegin) { - expected_status[i] = Status::NotFound(); - expected_values[i].clear(); - } else { - expected_status[i] = Status::OK(); - expected_values[i] = "val" + std::to_string(i); - } - expected_timestamps[i] = Timestamp(i, 0); - } - verify(); - - db_->ReleaseSnapshot(before_tombstone); - db_->ReleaseSnapshot(after_tombstone); - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, MergeBasic) { - Options options = GetDefaultOptions(); - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.merge_operator = std::make_shared('.'); - DestroyAndReopen(options); - - const std::array write_ts_strs = { - Timestamp(100, 0), Timestamp(200, 0), Timestamp(300, 0)}; - constexpr size_t kNumOfUniqKeys = 100; - ColumnFamilyHandle* default_cf = db_->DefaultColumnFamily(); - - for (size_t i = 0; i < write_ts_strs.size(); ++i) { - for (size_t j = 0; j < kNumOfUniqKeys; ++j) { - Status s; - if (i == 0) { - const std::string val = "v" + std::to_string(j) + "_0"; - s = db_->Put(WriteOptions(), Key1(j), write_ts_strs[i], val); - } else { - const std::string merge_op = std::to_string(i); - s = db_->Merge(WriteOptions(), default_cf, Key1(j), write_ts_strs[i], - merge_op); - } - ASSERT_OK(s); - } - } - - std::array read_ts_strs = { - Timestamp(150, 0), Timestamp(250, 0), Timestamp(350, 0)}; - - const auto verify_db_with_get = [&]() { - for (size_t i = 0; i < kNumOfUniqKeys; ++i) { - const std::string base_val = "v" + std::to_string(i) + "_0"; - const std::array expected_values = { - base_val, base_val + ".1", base_val + ".1.2"}; - const std::array& expected_ts = write_ts_strs; - ReadOptions read_opts; - for (size_t j = 0; j < read_ts_strs.size(); ++j) { - Slice read_ts = read_ts_strs[j]; - read_opts.timestamp = &read_ts; - std::string value; - std::string ts; - const Status s = db_->Get(read_opts, Key1(i), &value, &ts); - ASSERT_OK(s); - ASSERT_EQ(expected_values[j], value); - ASSERT_EQ(expected_ts[j], ts); - - // Do Seek/SeekForPrev - std::unique_ptr it(db_->NewIterator(read_opts)); - it->Seek(Key1(i)); - ASSERT_TRUE(it->Valid()); - ASSERT_EQ(expected_values[j], it->value()); - ASSERT_EQ(expected_ts[j], it->timestamp()); - - it->SeekForPrev(Key1(i)); - ASSERT_TRUE(it->Valid()); - ASSERT_EQ(expected_values[j], it->value()); - ASSERT_EQ(expected_ts[j], it->timestamp()); - } - } - }; - - const auto verify_db_with_iterator = [&]() { - std::string value_suffix; - for (size_t i = 0; i < read_ts_strs.size(); ++i) { - ReadOptions read_opts; - Slice read_ts = read_ts_strs[i]; - read_opts.timestamp = &read_ts; - std::unique_ptr it(db_->NewIterator(read_opts)); - size_t key_int_val = 0; - for (it->SeekToFirst(); it->Valid(); it->Next(), ++key_int_val) { - const std::string key = Key1(key_int_val); - const std::string value = - "v" + std::to_string(key_int_val) + "_0" + value_suffix; - ASSERT_EQ(key, it->key()); - ASSERT_EQ(value, it->value()); - ASSERT_EQ(write_ts_strs[i], it->timestamp()); - } - ASSERT_EQ(kNumOfUniqKeys, key_int_val); - - key_int_val = kNumOfUniqKeys - 1; - for (it->SeekToLast(); it->Valid(); it->Prev(), --key_int_val) { - const std::string key = Key1(key_int_val); - const std::string value = - "v" + std::to_string(key_int_val) + "_0" + value_suffix; - ASSERT_EQ(key, it->key()); - ASSERT_EQ(value, it->value()); - ASSERT_EQ(write_ts_strs[i], it->timestamp()); - } - ASSERT_EQ(std::numeric_limits::max(), key_int_val); - - value_suffix = value_suffix + "." + std::to_string(i + 1); - } - }; - - verify_db_with_get(); - verify_db_with_iterator(); - - ASSERT_OK(db_->Flush(FlushOptions())); - - verify_db_with_get(); - verify_db_with_iterator(); - - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, MergeAfterDeletion) { - Options options = GetDefaultOptions(); - options.create_if_missing = true; - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - options.merge_operator = std::make_shared('.'); - DestroyAndReopen(options); - - ColumnFamilyHandle* const column_family = db_->DefaultColumnFamily(); - - const size_t num_keys_per_file = 10; - const size_t num_merges_per_key = 2; - for (size_t i = 0; i < num_keys_per_file; ++i) { - std::string ts = Timestamp(i + 10000, 0); - Status s = db_->Delete(WriteOptions(), Key1(i), ts); - ASSERT_OK(s); - for (size_t j = 1; j <= num_merges_per_key; ++j) { - ts = Timestamp(i + 10000 + j, 0); - s = db_->Merge(WriteOptions(), column_family, Key1(i), ts, - std::to_string(j)); - ASSERT_OK(s); - } - } - - const auto verify_db = [&]() { - ReadOptions read_opts; - std::string read_ts_str = Timestamp(20000, 0); - Slice ts = read_ts_str; - read_opts.timestamp = &ts; - std::unique_ptr it(db_->NewIterator(read_opts)); - size_t count = 0; - for (it->SeekToFirst(); it->Valid(); it->Next(), ++count) { - std::string key = Key1(count); - ASSERT_EQ(key, it->key()); - std::string value; - for (size_t j = 1; j <= num_merges_per_key; ++j) { - value.append(std::to_string(j)); - if (j < num_merges_per_key) { - value.push_back('.'); - } - } - ASSERT_EQ(value, it->value()); - std::string ts1 = Timestamp(count + 10000 + num_merges_per_key, 0); - ASSERT_EQ(ts1, it->timestamp()); - } - ASSERT_OK(it->status()); - ASSERT_EQ(num_keys_per_file, count); - for (it->SeekToLast(); it->Valid(); it->Prev(), --count) { - std::string key = Key1(count - 1); - ASSERT_EQ(key, it->key()); - std::string value; - for (size_t j = 1; j <= num_merges_per_key; ++j) { - value.append(std::to_string(j)); - if (j < num_merges_per_key) { - value.push_back('.'); - } - } - ASSERT_EQ(value, it->value()); - std::string ts1 = Timestamp(count - 1 + 10000 + num_merges_per_key, 0); - ASSERT_EQ(ts1, it->timestamp()); - } - ASSERT_OK(it->status()); - ASSERT_EQ(0, count); - }; - - verify_db(); - - Close(); -} - -TEST_F(DBBasicTestWithTimestamp, RangeTombstoneApproximateSize) { - // Test code path for calculating range tombstone compensated size - // during flush and compaction. - Options options = CurrentOptions(); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - // So that the compaction below is non-bottommost and will calcualte - // compensated range tombstone size. - ASSERT_OK(db_->Put(WriteOptions(), Key(1), Timestamp(1, 0), "val")); - ASSERT_OK(Flush()); - MoveFilesToLevel(5); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), - Key(1), Timestamp(1, 0))); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(1), - Key(2), Timestamp(2, 0))); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->RunManualCompaction( - static_cast_with_check(db_->DefaultColumnFamily()) - ->cfd(), - 0 /* input_level */, 1 /* output_level */, CompactRangeOptions(), - nullptr /* begin */, nullptr /* end */, true /* exclusive */, - true /* disallow_trivial_move */, - std::numeric_limits::max() /* max_file_num_to_ignore */, - "" /*trim_ts*/)); -} - -TEST_F(DBBasicTestWithTimestamp, IterSeekToLastWithIterateUpperbound) { - // Test for a bug fix where DBIter::SeekToLast() could fail when - // iterate_upper_bound and iter_start_ts are both set. - Options options = CurrentOptions(); - const size_t kTimestampSize = Timestamp(0, 0).size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; - DestroyAndReopen(options); - - ASSERT_OK(db_->Put(WriteOptions(), Key(1), Timestamp(2, 0), "val")); - ReadOptions ro; - std::string k = Key(1); - Slice k_slice = k; - ro.iterate_upper_bound = &k_slice; - std::string ts = Timestamp(3, 0); - Slice read_ts = ts; - ro.timestamp = &read_ts; - std::string start_ts = Timestamp(0, 0); - Slice start_ts_slice = start_ts; - ro.iter_start_ts = &start_ts_slice; - std::unique_ptr iter{db_->NewIterator(ro)}; - iter->SeekToLast(); - ASSERT_FALSE(iter->Valid()); - ASSERT_OK(iter->status()); -} -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_with_timestamp_compaction_test.cc b/db/db_with_timestamp_compaction_test.cc deleted file mode 100644 index 7d80c85c4..000000000 --- a/db/db_with_timestamp_compaction_test.cc +++ /dev/null @@ -1,353 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/compaction/compaction.h" -#include "db/db_test_util.h" -#include "port/stack_trace.h" -#include "test_util/testutil.h" - -namespace ROCKSDB_NAMESPACE { - -namespace { -std::string Key1(uint64_t key) { - std::string ret; - PutFixed64(&ret, key); - std::reverse(ret.begin(), ret.end()); - return ret; -} - -std::string Timestamp(uint64_t ts) { - std::string ret; - PutFixed64(&ret, ts); - return ret; -} -} // anonymous namespace - -class TimestampCompatibleCompactionTest : public DBTestBase { - public: - TimestampCompatibleCompactionTest() - : DBTestBase("ts_compatible_compaction_test", /*env_do_fsync=*/true) {} - - std::string Get(const std::string& key, uint64_t ts) { - ReadOptions read_opts; - std::string ts_str = Timestamp(ts); - Slice ts_slice = ts_str; - read_opts.timestamp = &ts_slice; - std::string value; - Status s = db_->Get(read_opts, key, &value); - if (s.IsNotFound()) { - value.assign("NOT_FOUND"); - } else if (!s.ok()) { - value.assign(s.ToString()); - } - return value; - } -}; - -TEST_F(TimestampCompatibleCompactionTest, UserKeyCrossFileBoundary) { - Options options = CurrentOptions(); - options.env = env_; - options.compaction_style = kCompactionStyleLevel; - options.comparator = test::BytewiseComparatorWithU64TsWrapper(); - options.level0_file_num_compaction_trigger = 3; - constexpr size_t kNumKeysPerFile = 101; - options.memtable_factory.reset( - test::NewSpecialSkipListFactory(kNumKeysPerFile)); - DestroyAndReopen(options); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { - const auto* compaction = reinterpret_cast(arg); - ASSERT_NE(nullptr, compaction); - ASSERT_EQ(0, compaction->start_level()); - ASSERT_EQ(1, compaction->num_input_levels()); - // Check that all 3 L0 ssts are picked for level compaction. - ASSERT_EQ(3, compaction->num_input_files(0)); - }); - SyncPoint::GetInstance()->EnableProcessing(); - // Write a L0 with keys 0, 1, ..., 99 with ts from 100 to 199. - uint64_t ts = 100; - uint64_t key = 0; - WriteOptions write_opts; - for (; key < kNumKeysPerFile - 1; ++key, ++ts) { - std::string ts_str = Timestamp(ts); - ASSERT_OK( - db_->Put(write_opts, Key1(key), ts_str, "foo_" + std::to_string(key))); - } - // Write another L0 with keys 99 with newer ts. - ASSERT_OK(Flush()); - uint64_t saved_read_ts1 = ts++; - key = 99; - for (int i = 0; i < 4; ++i, ++ts) { - std::string ts_str = Timestamp(ts); - ASSERT_OK( - db_->Put(write_opts, Key1(key), ts_str, "bar_" + std::to_string(key))); - } - ASSERT_OK(Flush()); - uint64_t saved_read_ts2 = ts++; - // Write another L0 with keys 99, 100, 101, ..., 150 - for (; key <= 150; ++key, ++ts) { - std::string ts_str = Timestamp(ts); - ASSERT_OK( - db_->Put(write_opts, Key1(key), ts_str, "foo1_" + std::to_string(key))); - } - ASSERT_OK(Flush()); - // Wait for compaction to finish - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - uint64_t read_ts = ts; - ASSERT_EQ("foo_99", Get(Key1(99), saved_read_ts1)); - ASSERT_EQ("bar_99", Get(Key1(99), saved_read_ts2)); - ASSERT_EQ("foo1_99", Get(Key1(99), read_ts)); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(TimestampCompatibleCompactionTest, MultipleSubCompactions) { - Options options = CurrentOptions(); - options.env = env_; - options.compaction_style = kCompactionStyleUniversal; - options.comparator = test::BytewiseComparatorWithU64TsWrapper(); - options.level0_file_num_compaction_trigger = 3; - options.max_subcompactions = 3; - options.target_file_size_base = 1024; - options.statistics = CreateDBStatistics(); - DestroyAndReopen(options); - - uint64_t ts = 100; - uint64_t key = 0; - WriteOptions write_opts; - - // Write keys 0, 1, ..., 499 with ts from 100 to 599. - { - for (; key <= 499; ++key, ++ts) { - std::string ts_str = Timestamp(ts); - ASSERT_OK(db_->Put(write_opts, Key1(key), ts_str, - "foo_" + std::to_string(key))); - } - } - - // Write keys 500, ..., 999 with ts from 600 to 1099. - { - for (; key <= 999; ++key, ++ts) { - std::string ts_str = Timestamp(ts); - ASSERT_OK(db_->Put(write_opts, Key1(key), ts_str, - "foo_" + std::to_string(key))); - } - ASSERT_OK(Flush()); - } - - // Wait for compaction to finish - { - ASSERT_OK(dbfull()->RunManualCompaction( - static_cast_with_check( - db_->DefaultColumnFamily()) - ->cfd(), - 0 /* input_level */, 1 /* output_level */, CompactRangeOptions(), - nullptr /* begin */, nullptr /* end */, true /* exclusive */, - true /* disallow_trivial_move */, - std::numeric_limits::max() /* max_file_num_to_ignore */, - "" /*trim_ts*/)); - } - - // Check stats to make sure multiple subcompactions were scheduled for - // boundaries not to be nullptr. - { - HistogramData num_sub_compactions; - options.statistics->histogramData(NUM_SUBCOMPACTIONS_SCHEDULED, - &num_sub_compactions); - ASSERT_GT(num_sub_compactions.sum, 1); - } - - for (key = 0; key <= 999; ++key) { - ASSERT_EQ("foo_" + std::to_string(key), Get(Key1(key), ts)); - } -} - -class TestFilePartitioner : public SstPartitioner { - public: - explicit TestFilePartitioner() {} - ~TestFilePartitioner() override {} - - const char* Name() const override { return "TestFilePartitioner"; } - PartitionerResult ShouldPartition( - const PartitionerRequest& /*request*/) override { - return PartitionerResult::kRequired; - } - bool CanDoTrivialMove(const Slice& /*smallest_user_key*/, - const Slice& /*largest_user_key*/) override { - return false; - } -}; - -class TestFilePartitionerFactory : public SstPartitionerFactory { - public: - explicit TestFilePartitionerFactory() {} - std::unique_ptr CreatePartitioner( - const SstPartitioner::Context& /*context*/) const override { - std::unique_ptr ret = - std::make_unique(); - return ret; - } - const char* Name() const override { return "TestFilePartitionerFactory"; } -}; - -TEST_F(TimestampCompatibleCompactionTest, CompactFilesRangeCheckL0) { - Options options = CurrentOptions(); - options.env = env_; - options.sst_partitioner_factory = - std::make_shared(); - options.comparator = test::BytewiseComparatorWithU64TsWrapper(); - options.disable_auto_compactions = true; - DestroyAndReopen(options); - - constexpr int kNumFiles = 10; - constexpr int kKeysPerFile = 2; - const std::string user_key = "foo"; - constexpr uint64_t start_ts = 10000; - - uint64_t cur_ts = start_ts; - for (int k = 0; k < kNumFiles; ++k) { - for (int i = 0; i < kKeysPerFile; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), user_key, Timestamp(cur_ts), - "v" + std::to_string(i))); - ++cur_ts; - } - ASSERT_OK(db_->Flush(FlushOptions())); - } - - std::vector input_files{}; - { - std::vector files; - ASSERT_OK(env_->GetChildren(dbname_, &files)); - for (const auto& f : files) { - uint64_t file_num = 0; - FileType file_type = FileType::kWalFile; - if (!ParseFileName(f, &file_num, &file_type) || - file_type != FileType::kTableFile) { - continue; - } - input_files.emplace_back(f); - } - // sorting here by name, which also happens to sort by generation date. - std::sort(input_files.begin(), input_files.end()); - assert(kNumFiles == input_files.size()); - std::vector tmp; - tmp.emplace_back(input_files[input_files.size() / 2]); - input_files.swap(tmp); - } - - { - std::vector output_file_names; - CompactionJobInfo compaction_job_info; - ASSERT_OK(db_->CompactFiles(CompactionOptions(), input_files, - /*output_level=*/1, /*output_path_id=*/-1, - &output_file_names, &compaction_job_info)); - // We expect the L0 files older than the original provided input were all - // included in the compaction. - ASSERT_EQ(static_cast(kNumFiles / 2 + 1), - compaction_job_info.input_files.size()); - } -} - -TEST_F(TimestampCompatibleCompactionTest, CompactFilesRangeCheckL1) { - Options options = CurrentOptions(); - options.env = env_; - options.sst_partitioner_factory = - std::make_shared(); - options.comparator = test::BytewiseComparatorWithU64TsWrapper(); - - constexpr int kNumFiles = 4; - options.level0_file_num_compaction_trigger = kNumFiles; - - DestroyAndReopen(options); - - constexpr int kKeysPerFile = 2; - const std::string user_key = "foo"; - constexpr uint64_t start_ts = 10000; - - uint64_t cur_ts = start_ts; - // Generate some initial files in both L0 and L1. - for (int k = 0; k < kNumFiles; ++k) { - for (int i = 0; i < kKeysPerFile; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), user_key, Timestamp(cur_ts), - "v" + std::to_string(i))); - ++cur_ts; - } - ASSERT_OK(db_->Flush(FlushOptions())); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ(0, NumTableFilesAtLevel(/*level=*/0, /*cf=*/0)); - ASSERT_EQ(kNumFiles * kKeysPerFile, - NumTableFilesAtLevel(/*level=*/1, /*cf=*/0)); - - constexpr int additional_l0s = 2; - for (int i = 0; i < additional_l0s; ++i, ++cur_ts) { - ASSERT_OK(db_->Put(WriteOptions(), user_key, Timestamp(cur_ts), "v")); - ASSERT_OK(db_->Flush(FlushOptions())); - } - ASSERT_EQ(additional_l0s, NumTableFilesAtLevel(/*level=*/0, /*cf=*/0)); - - std::vector inputs; - { - std::vector fmetas; - db_->GetLiveFilesMetaData(&fmetas); - bool included_one_l1 = false; - for (const auto& meta : fmetas) { - if (meta.level == 0) { - inputs.emplace_back(meta.relative_filename); - } else if (!included_one_l1) { - inputs.emplace_back(meta.relative_filename); - included_one_l1 = true; - } - } - } - ASSERT_EQ(static_cast(3), inputs.size()); - { - std::vector output_file_names; - CompactionJobInfo compaction_job_info; - - ASSERT_OK(db_->CompactFiles(CompactionOptions(), inputs, /*output_level=*/1, - /*output_path_id=*/-1, &output_file_names, - &compaction_job_info)); - ASSERT_EQ(kNumFiles * kKeysPerFile + 2, output_file_names.size()); - ASSERT_EQ(kNumFiles * kKeysPerFile + 2, - static_cast(compaction_job_info.input_files.size())); - } -} - -TEST_F(TimestampCompatibleCompactionTest, EmptyCompactionOutput) { - Options options = CurrentOptions(); - options.env = env_; - options.comparator = test::BytewiseComparatorWithU64TsWrapper(); - DestroyAndReopen(options); - - std::string ts_str = Timestamp(1); - WriteOptions wopts; - ASSERT_OK( - db_->DeleteRange(wopts, db_->DefaultColumnFamily(), "k1", "k3", ts_str)); - ASSERT_OK(Flush()); - - ts_str = Timestamp(3); - Slice ts = ts_str; - CompactRangeOptions cro; - // range tombstone will be dropped during compaction - cro.full_history_ts_low = &ts; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_write_buffer_manager_test.cc b/db/db_write_buffer_manager_test.cc deleted file mode 100644 index 294244547..000000000 --- a/db/db_write_buffer_manager_test.cc +++ /dev/null @@ -1,860 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/db_test_util.h" -#include "db/write_thread.h" -#include "port/stack_trace.h" - -namespace ROCKSDB_NAMESPACE { - -class DBWriteBufferManagerTest : public DBTestBase, - public testing::WithParamInterface { - public: - DBWriteBufferManagerTest() - : DBTestBase("db_write_buffer_manager_test", /*env_do_fsync=*/false) {} - bool cost_cache_; -}; - -TEST_P(DBWriteBufferManagerTest, SharedBufferAcrossCFs1) { - Options options = CurrentOptions(); - options.arena_block_size = 4096; - options.write_buffer_size = 500000; // this is never hit - std::shared_ptr cache = NewLRUCache(4 * 1024 * 1024, 2); - ASSERT_LT(cache->GetUsage(), 256 * 1024); - cost_cache_ = GetParam(); - - if (cost_cache_) { - options.write_buffer_manager.reset( - new WriteBufferManager(100000, cache, true)); - } else { - options.write_buffer_manager.reset( - new WriteBufferManager(100000, nullptr, true)); - } - - WriteOptions wo; - wo.disableWAL = true; - - CreateAndReopenWithCF({"cf1", "cf2", "cf3"}, options); - ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); - Flush(3); - ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); - ASSERT_OK(Put(0, Key(1), DummyString(1), wo)); - Flush(0); - - // Write to "Default", "cf2" and "cf3". - ASSERT_OK(Put(3, Key(1), DummyString(30000), wo)); - ASSERT_OK(Put(0, Key(1), DummyString(40000), wo)); - ASSERT_OK(Put(2, Key(1), DummyString(1), wo)); - - ASSERT_OK(Put(3, Key(2), DummyString(40000), wo)); - // WriteBufferManager::buffer_size_ has exceeded after the previous write is - // completed. - - // This make sures write will go through and if stall was in effect, it will - // end. - ASSERT_OK(Put(0, Key(2), DummyString(1), wo)); -} - -// Test Single DB with multiple writer threads get blocked when -// WriteBufferManager execeeds buffer_size_ and flush is waiting to be -// finished. -TEST_P(DBWriteBufferManagerTest, SharedWriteBufferAcrossCFs2) { - Options options = CurrentOptions(); - options.arena_block_size = 4096; - options.write_buffer_size = 500000; // this is never hit - std::shared_ptr cache = NewLRUCache(4 * 1024 * 1024, 2); - ASSERT_LT(cache->GetUsage(), 256 * 1024); - cost_cache_ = GetParam(); - - if (cost_cache_) { - options.write_buffer_manager.reset( - new WriteBufferManager(100000, cache, true)); - } else { - options.write_buffer_manager.reset( - new WriteBufferManager(100000, nullptr, true)); - } - WriteOptions wo; - wo.disableWAL = true; - - CreateAndReopenWithCF({"cf1", "cf2", "cf3"}, options); - ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); - Flush(3); - ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); - ASSERT_OK(Put(0, Key(1), DummyString(1), wo)); - Flush(0); - - // Write to "Default", "cf2" and "cf3". No flush will be triggered. - ASSERT_OK(Put(3, Key(1), DummyString(30000), wo)); - ASSERT_OK(Put(0, Key(1), DummyString(40000), wo)); - ASSERT_OK(Put(2, Key(1), DummyString(1), wo)); - - ASSERT_OK(Put(3, Key(2), DummyString(40000), wo)); - // WriteBufferManager::buffer_size_ has exceeded after the previous write is - // completed. - - std::unordered_set w_set; - std::vector threads; - int wait_count_db = 0; - int num_writers = 4; - InstrumentedMutex mutex; - InstrumentedCondVar cv(&mutex); - std::atomic thread_num(0); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0", - "DBImpl::BackgroundCallFlush:start"}}); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WBMStallInterface::BlockDB", [&](void*) { - InstrumentedMutexLock lock(&mutex); - wait_count_db++; - cv.SignalAll(); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WriteThread::WriteStall::Wait", [&](void* arg) { - InstrumentedMutexLock lock(&mutex); - WriteThread::Writer* w = reinterpret_cast(arg); - w_set.insert(w); - // Allow the flush to continue if all writer threads are blocked. - if (w_set.size() == (unsigned long)num_writers) { - TEST_SYNC_POINT( - "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0"); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - bool s = true; - - std::function writer = [&](int cf) { - int a = thread_num.fetch_add(1); - std::string key = "foo" + std::to_string(a); - Status tmp = Put(cf, Slice(key), DummyString(1), wo); - InstrumentedMutexLock lock(&mutex); - s = s && tmp.ok(); - }; - - // Flow: - // main_writer thread will write but will be blocked (as Flush will on hold, - // buffer_size_ has exceeded, thus will create stall in effect). - // | - // | - // multiple writer threads will be created to write across multiple columns - // and they will be blocked. - // | - // | - // Last writer thread will write and when its blocked it will signal Flush to - // continue to clear the stall. - - threads.emplace_back(writer, 1); - // Wait untill first thread (main_writer) writing to DB is blocked and then - // create the multiple writers which will be blocked from getting added to the - // queue because stall is in effect. - { - InstrumentedMutexLock lock(&mutex); - while (wait_count_db != 1) { - cv.Wait(); - } - } - for (int i = 0; i < num_writers; i++) { - threads.emplace_back(writer, i % 4); - } - for (auto& t : threads) { - t.join(); - } - - ASSERT_TRUE(s); - - // Number of DBs blocked. - ASSERT_EQ(wait_count_db, 1); - // Number of Writer threads blocked. - ASSERT_EQ(w_set.size(), num_writers); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -// Test multiple DBs get blocked when WriteBufferManager limit exceeds and flush -// is waiting to be finished but DBs tries to write meanwhile. -TEST_P(DBWriteBufferManagerTest, SharedWriteBufferLimitAcrossDB) { - std::vector dbnames; - std::vector dbs; - int num_dbs = 3; - - for (int i = 0; i < num_dbs; i++) { - dbs.push_back(nullptr); - dbnames.push_back( - test::PerThreadDBPath("db_shared_wb_db" + std::to_string(i))); - } - - Options options = CurrentOptions(); - options.arena_block_size = 4096; - options.write_buffer_size = 500000; // this is never hit - std::shared_ptr cache = NewLRUCache(4 * 1024 * 1024, 2); - ASSERT_LT(cache->GetUsage(), 256 * 1024); - cost_cache_ = GetParam(); - - if (cost_cache_) { - options.write_buffer_manager.reset( - new WriteBufferManager(100000, cache, true)); - } else { - options.write_buffer_manager.reset( - new WriteBufferManager(100000, nullptr, true)); - } - CreateAndReopenWithCF({"cf1", "cf2"}, options); - - for (int i = 0; i < num_dbs; i++) { - ASSERT_OK(DestroyDB(dbnames[i], options)); - ASSERT_OK(DB::Open(options, dbnames[i], &(dbs[i]))); - } - WriteOptions wo; - wo.disableWAL = true; - - for (int i = 0; i < num_dbs; i++) { - ASSERT_OK(dbs[i]->Put(wo, Key(1), DummyString(20000))); - } - // Insert to db_. - ASSERT_OK(Put(0, Key(1), DummyString(30000), wo)); - - // WriteBufferManager Limit exceeded. - std::vector threads; - int wait_count_db = 0; - InstrumentedMutex mutex; - InstrumentedCondVar cv(&mutex); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0", - "DBImpl::BackgroundCallFlush:start"}}); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WBMStallInterface::BlockDB", [&](void*) { - { - InstrumentedMutexLock lock(&mutex); - wait_count_db++; - cv.Signal(); - // Since this is the last DB, signal Flush to continue. - if (wait_count_db == num_dbs + 1) { - TEST_SYNC_POINT( - "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0"); - } - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - bool s = true; - - // Write to DB. - std::function write_db = [&](DB* db) { - Status tmp = db->Put(wo, Key(3), DummyString(1)); - InstrumentedMutexLock lock(&mutex); - s = s && tmp.ok(); - }; - - // Flow: - // db_ will write and will be blocked (as Flush will on hold and will create - // stall in effect). - // | - // multiple dbs writers will be created to write to that db and they will be - // blocked. - // | - // | - // Last writer will write and when its blocked it will signal Flush to - // continue to clear the stall. - - threads.emplace_back(write_db, db_); - // Wait untill first DB is blocked and then create the multiple writers for - // different DBs which will be blocked from getting added to the queue because - // stall is in effect. - { - InstrumentedMutexLock lock(&mutex); - while (wait_count_db != 1) { - cv.Wait(); - } - } - for (int i = 0; i < num_dbs; i++) { - threads.emplace_back(write_db, dbs[i]); - } - for (auto& t : threads) { - t.join(); - } - - ASSERT_TRUE(s); - ASSERT_EQ(num_dbs + 1, wait_count_db); - // Clean up DBs. - for (int i = 0; i < num_dbs; i++) { - ASSERT_OK(dbs[i]->Close()); - ASSERT_OK(DestroyDB(dbnames[i], options)); - delete dbs[i]; - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -// Test multiple threads writing across multiple DBs and multiple columns get -// blocked when stall by WriteBufferManager is in effect. -TEST_P(DBWriteBufferManagerTest, SharedWriteBufferLimitAcrossDB1) { - std::vector dbnames; - std::vector dbs; - int num_dbs = 3; - - for (int i = 0; i < num_dbs; i++) { - dbs.push_back(nullptr); - dbnames.push_back( - test::PerThreadDBPath("db_shared_wb_db" + std::to_string(i))); - } - - Options options = CurrentOptions(); - options.arena_block_size = 4096; - options.write_buffer_size = 500000; // this is never hit - std::shared_ptr cache = NewLRUCache(4 * 1024 * 1024, 2); - ASSERT_LT(cache->GetUsage(), 256 * 1024); - cost_cache_ = GetParam(); - - if (cost_cache_) { - options.write_buffer_manager.reset( - new WriteBufferManager(100000, cache, true)); - } else { - options.write_buffer_manager.reset( - new WriteBufferManager(100000, nullptr, true)); - } - CreateAndReopenWithCF({"cf1", "cf2"}, options); - - for (int i = 0; i < num_dbs; i++) { - ASSERT_OK(DestroyDB(dbnames[i], options)); - ASSERT_OK(DB::Open(options, dbnames[i], &(dbs[i]))); - } - WriteOptions wo; - wo.disableWAL = true; - - for (int i = 0; i < num_dbs; i++) { - ASSERT_OK(dbs[i]->Put(wo, Key(1), DummyString(20000))); - } - // Insert to db_. - ASSERT_OK(Put(0, Key(1), DummyString(30000), wo)); - - // WriteBufferManager::buffer_size_ has exceeded after the previous write to - // dbs[0] is completed. - std::vector threads; - int wait_count_db = 0; - InstrumentedMutex mutex; - InstrumentedCondVar cv(&mutex); - std::unordered_set w_set; - std::vector writer_threads; - std::atomic thread_num(0); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0", - "DBImpl::BackgroundCallFlush:start"}}); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WBMStallInterface::BlockDB", [&](void*) { - { - InstrumentedMutexLock lock(&mutex); - wait_count_db++; - thread_num.fetch_add(1); - cv.Signal(); - // Allow the flush to continue if all writer threads are blocked. - if (thread_num.load(std::memory_order_relaxed) == 2 * num_dbs + 1) { - TEST_SYNC_POINT( - "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0"); - } - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WriteThread::WriteStall::Wait", [&](void* arg) { - WriteThread::Writer* w = reinterpret_cast(arg); - { - InstrumentedMutexLock lock(&mutex); - w_set.insert(w); - thread_num.fetch_add(1); - // Allow the flush continue if all writer threads are blocked. - if (thread_num.load(std::memory_order_relaxed) == 2 * num_dbs + 1) { - TEST_SYNC_POINT( - "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0"); - } - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - bool s1 = true, s2 = true; - // Write to multiple columns of db_. - std::function write_cf = [&](int cf) { - Status tmp = Put(cf, Key(3), DummyString(1), wo); - InstrumentedMutexLock lock(&mutex); - s1 = s1 && tmp.ok(); - }; - // Write to multiple DBs. - std::function write_db = [&](DB* db) { - Status tmp = db->Put(wo, Key(3), DummyString(1)); - InstrumentedMutexLock lock(&mutex); - s2 = s2 && tmp.ok(); - }; - - // Flow: - // thread will write to db_ will be blocked (as Flush will on hold, - // buffer_size_ has exceeded and will create stall in effect). - // | - // | - // multiple writers threads writing to different DBs and to db_ across - // multiple columns will be created and they will be blocked due to stall. - // | - // | - // Last writer thread will write and when its blocked it will signal Flush to - // continue to clear the stall. - threads.emplace_back(write_db, db_); - // Wait untill first thread is blocked and then create the multiple writer - // threads. - { - InstrumentedMutexLock lock(&mutex); - while (wait_count_db != 1) { - cv.Wait(); - } - } - - for (int i = 0; i < num_dbs; i++) { - // Write to multiple columns of db_. - writer_threads.emplace_back(write_cf, i % 3); - // Write to different dbs. - threads.emplace_back(write_db, dbs[i]); - } - for (auto& t : threads) { - t.join(); - } - for (auto& t : writer_threads) { - t.join(); - } - - ASSERT_TRUE(s1); - ASSERT_TRUE(s2); - - // Number of DBs blocked. - ASSERT_EQ(num_dbs + 1, wait_count_db); - // Number of Writer threads blocked. - ASSERT_EQ(w_set.size(), num_dbs); - // Clean up DBs. - for (int i = 0; i < num_dbs; i++) { - ASSERT_OK(dbs[i]->Close()); - ASSERT_OK(DestroyDB(dbnames[i], options)); - delete dbs[i]; - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -// Test multiple threads writing across multiple columns of db_ by passing -// different values to WriteOption.no_slown_down. -TEST_P(DBWriteBufferManagerTest, MixedSlowDownOptionsSingleDB) { - Options options = CurrentOptions(); - options.arena_block_size = 4096; - options.write_buffer_size = 500000; // this is never hit - std::shared_ptr cache = NewLRUCache(4 * 1024 * 1024, 2); - ASSERT_LT(cache->GetUsage(), 256 * 1024); - cost_cache_ = GetParam(); - - if (cost_cache_) { - options.write_buffer_manager.reset( - new WriteBufferManager(100000, cache, true)); - } else { - options.write_buffer_manager.reset( - new WriteBufferManager(100000, nullptr, true)); - } - WriteOptions wo; - wo.disableWAL = true; - - CreateAndReopenWithCF({"cf1", "cf2", "cf3"}, options); - - ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); - Flush(3); - ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); - ASSERT_OK(Put(0, Key(1), DummyString(1), wo)); - Flush(0); - - // Write to "Default", "cf2" and "cf3". No flush will be triggered. - ASSERT_OK(Put(3, Key(1), DummyString(30000), wo)); - ASSERT_OK(Put(0, Key(1), DummyString(40000), wo)); - ASSERT_OK(Put(2, Key(1), DummyString(1), wo)); - ASSERT_OK(Put(3, Key(2), DummyString(40000), wo)); - - // WriteBufferManager::buffer_size_ has exceeded after the previous write to - // db_ is completed. - - std::unordered_set w_slowdown_set; - std::vector threads; - int wait_count_db = 0; - int num_writers = 4; - InstrumentedMutex mutex; - InstrumentedCondVar cv(&mutex); - std::atomic thread_num(0); - std::atomic w_no_slowdown(0); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0", - "DBImpl::BackgroundCallFlush:start"}}); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WBMStallInterface::BlockDB", [&](void*) { - { - InstrumentedMutexLock lock(&mutex); - wait_count_db++; - cv.SignalAll(); - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WriteThread::WriteStall::Wait", [&](void* arg) { - { - InstrumentedMutexLock lock(&mutex); - WriteThread::Writer* w = reinterpret_cast(arg); - w_slowdown_set.insert(w); - // Allow the flush continue if all writer threads are blocked. - if (w_slowdown_set.size() + (unsigned long)w_no_slowdown.load( - std::memory_order_relaxed) == - (unsigned long)num_writers) { - TEST_SYNC_POINT( - "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0"); - } - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - bool s1 = true, s2 = true; - - std::function write_slow_down = [&](int cf) { - int a = thread_num.fetch_add(1); - std::string key = "foo" + std::to_string(a); - WriteOptions write_op; - write_op.no_slowdown = false; - Status tmp = Put(cf, Slice(key), DummyString(1), write_op); - InstrumentedMutexLock lock(&mutex); - s1 = s1 && tmp.ok(); - }; - - std::function write_no_slow_down = [&](int cf) { - int a = thread_num.fetch_add(1); - std::string key = "foo" + std::to_string(a); - WriteOptions write_op; - write_op.no_slowdown = true; - Status tmp = Put(cf, Slice(key), DummyString(1), write_op); - { - InstrumentedMutexLock lock(&mutex); - s2 = s2 && !tmp.ok(); - w_no_slowdown.fetch_add(1); - // Allow the flush continue if all writer threads are blocked. - if (w_slowdown_set.size() + - (unsigned long)w_no_slowdown.load(std::memory_order_relaxed) == - (unsigned long)num_writers) { - TEST_SYNC_POINT( - "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0"); - } - } - }; - - // Flow: - // main_writer thread will write but will be blocked (as Flush will on hold, - // buffer_size_ has exceeded, thus will create stall in effect). - // | - // | - // multiple writer threads will be created to write across multiple columns - // with different values of WriteOptions.no_slowdown. Some of them will - // be blocked and some of them will return with Incomplete status. - // | - // | - // Last writer thread will write and when its blocked/return it will signal - // Flush to continue to clear the stall. - threads.emplace_back(write_slow_down, 1); - // Wait untill first thread (main_writer) writing to DB is blocked and then - // create the multiple writers which will be blocked from getting added to the - // queue because stall is in effect. - { - InstrumentedMutexLock lock(&mutex); - while (wait_count_db != 1) { - cv.Wait(); - } - } - - for (int i = 0; i < num_writers; i += 2) { - threads.emplace_back(write_no_slow_down, (i) % 4); - threads.emplace_back(write_slow_down, (i + 1) % 4); - } - for (auto& t : threads) { - t.join(); - } - - ASSERT_TRUE(s1); - ASSERT_TRUE(s2); - // Number of DBs blocked. - ASSERT_EQ(wait_count_db, 1); - // Number of Writer threads blocked. - ASSERT_EQ(w_slowdown_set.size(), num_writers / 2); - // Number of Writer threads with WriteOptions.no_slowdown = true. - ASSERT_EQ(w_no_slowdown.load(std::memory_order_relaxed), num_writers / 2); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -// Test multiple threads writing across multiple columns of db_ and different -// dbs by passing different values to WriteOption.no_slown_down. -TEST_P(DBWriteBufferManagerTest, MixedSlowDownOptionsMultipleDB) { - std::vector dbnames; - std::vector dbs; - int num_dbs = 4; - - for (int i = 0; i < num_dbs; i++) { - dbs.push_back(nullptr); - dbnames.push_back( - test::PerThreadDBPath("db_shared_wb_db" + std::to_string(i))); - } - - Options options = CurrentOptions(); - options.arena_block_size = 4096; - options.write_buffer_size = 500000; // this is never hit - std::shared_ptr cache = NewLRUCache(4 * 1024 * 1024, 2); - ASSERT_LT(cache->GetUsage(), 256 * 1024); - cost_cache_ = GetParam(); - - if (cost_cache_) { - options.write_buffer_manager.reset( - new WriteBufferManager(100000, cache, true)); - } else { - options.write_buffer_manager.reset( - new WriteBufferManager(100000, nullptr, true)); - } - CreateAndReopenWithCF({"cf1", "cf2"}, options); - - for (int i = 0; i < num_dbs; i++) { - ASSERT_OK(DestroyDB(dbnames[i], options)); - ASSERT_OK(DB::Open(options, dbnames[i], &(dbs[i]))); - } - WriteOptions wo; - wo.disableWAL = true; - - for (int i = 0; i < num_dbs; i++) { - ASSERT_OK(dbs[i]->Put(wo, Key(1), DummyString(20000))); - } - // Insert to db_. - ASSERT_OK(Put(0, Key(1), DummyString(30000), wo)); - - // WriteBufferManager::buffer_size_ has exceeded after the previous write to - // dbs[0] is completed. - std::vector threads; - int wait_count_db = 0; - InstrumentedMutex mutex; - InstrumentedCondVar cv(&mutex); - std::unordered_set w_slowdown_set; - std::vector writer_threads; - std::atomic thread_num(0); - std::atomic w_no_slowdown(0); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0", - "DBImpl::BackgroundCallFlush:start"}}); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WBMStallInterface::BlockDB", [&](void*) { - InstrumentedMutexLock lock(&mutex); - wait_count_db++; - cv.Signal(); - // Allow the flush continue if all writer threads are blocked. - if (w_slowdown_set.size() + - (unsigned long)(w_no_slowdown.load(std::memory_order_relaxed) + - wait_count_db) == - (unsigned long)(2 * num_dbs + 1)) { - TEST_SYNC_POINT( - "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0"); - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WriteThread::WriteStall::Wait", [&](void* arg) { - WriteThread::Writer* w = reinterpret_cast(arg); - InstrumentedMutexLock lock(&mutex); - w_slowdown_set.insert(w); - // Allow the flush continue if all writer threads are blocked. - if (w_slowdown_set.size() + - (unsigned long)(w_no_slowdown.load(std::memory_order_relaxed) + - wait_count_db) == - (unsigned long)(2 * num_dbs + 1)) { - TEST_SYNC_POINT( - "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0"); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - bool s1 = true, s2 = true; - std::function write_slow_down = [&](DB* db) { - int a = thread_num.fetch_add(1); - std::string key = "foo" + std::to_string(a); - WriteOptions write_op; - write_op.no_slowdown = false; - Status tmp = db->Put(write_op, Slice(key), DummyString(1)); - InstrumentedMutexLock lock(&mutex); - s1 = s1 && tmp.ok(); - }; - - std::function write_no_slow_down = [&](DB* db) { - int a = thread_num.fetch_add(1); - std::string key = "foo" + std::to_string(a); - WriteOptions write_op; - write_op.no_slowdown = true; - Status tmp = db->Put(write_op, Slice(key), DummyString(1)); - { - InstrumentedMutexLock lock(&mutex); - s2 = s2 && !tmp.ok(); - w_no_slowdown.fetch_add(1); - if (w_slowdown_set.size() + - (unsigned long)(w_no_slowdown.load(std::memory_order_relaxed) + - wait_count_db) == - (unsigned long)(2 * num_dbs + 1)) { - TEST_SYNC_POINT( - "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0"); - } - } - }; - - // Flow: - // first thread will write but will be blocked (as Flush will on hold, - // buffer_size_ has exceeded, thus will create stall in effect). - // | - // | - // multiple writer threads will be created to write across multiple columns - // of db_ and different DBs with different values of - // WriteOptions.no_slowdown. Some of them will be blocked and some of them - // will return with Incomplete status. - // | - // | - // Last writer thread will write and when its blocked/return it will signal - // Flush to continue to clear the stall. - threads.emplace_back(write_slow_down, db_); - // Wait untill first thread writing to DB is blocked and then - // create the multiple writers. - { - InstrumentedMutexLock lock(&mutex); - while (wait_count_db != 1) { - cv.Wait(); - } - } - - for (int i = 0; i < num_dbs; i += 2) { - // Write to multiple columns of db_. - writer_threads.emplace_back(write_slow_down, db_); - writer_threads.emplace_back(write_no_slow_down, db_); - // Write to different DBs. - threads.emplace_back(write_slow_down, dbs[i]); - threads.emplace_back(write_no_slow_down, dbs[i + 1]); - } - - for (auto& t : threads) { - t.join(); - } - - for (auto& t : writer_threads) { - t.join(); - } - - ASSERT_TRUE(s1); - ASSERT_TRUE(s2); - // Number of DBs blocked. - ASSERT_EQ((num_dbs / 2) + 1, wait_count_db); - // Number of writer threads writing to db_ blocked from getting added to the - // queue. - ASSERT_EQ(w_slowdown_set.size(), num_dbs / 2); - // Number of threads with WriteOptions.no_slowdown = true. - ASSERT_EQ(w_no_slowdown.load(std::memory_order_relaxed), num_dbs); - - // Clean up DBs. - for (int i = 0; i < num_dbs; i++) { - ASSERT_OK(dbs[i]->Close()); - ASSERT_OK(DestroyDB(dbnames[i], options)); - delete dbs[i]; - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - - -// Tests a `WriteBufferManager` constructed with `allow_stall == false` does not -// thrash memtable switching when full and a CF receives multiple writes. -// Instead, we expect to switch a CF's memtable for flush only when that CF does -// not have any pending or running flush. -// -// This test uses multiple DBs each with a single CF instead of a single DB -// with multiple CFs. That way we can control which CF is considered for switch -// by writing to that CF's DB. -// -// Not supported in LITE mode due to `GetProperty()` unavailable. -TEST_P(DBWriteBufferManagerTest, StopSwitchingMemTablesOnceFlushing) { - Options options = CurrentOptions(); - options.arena_block_size = 4 << 10; // 4KB - options.write_buffer_size = 1 << 20; // 1MB - std::shared_ptr cache = - NewLRUCache(4 << 20 /* capacity (4MB) */, 2 /* num_shard_bits */); - ASSERT_LT(cache->GetUsage(), 256 << 10 /* 256KB */); - cost_cache_ = GetParam(); - if (cost_cache_) { - options.write_buffer_manager.reset(new WriteBufferManager( - 512 << 10 /* buffer_size (512KB) */, cache, false /* allow_stall */)); - } else { - options.write_buffer_manager.reset( - new WriteBufferManager(512 << 10 /* buffer_size (512KB) */, - nullptr /* cache */, false /* allow_stall */)); - } - - Reopen(options); - std::string dbname = test::PerThreadDBPath("db_shared_wbm_db"); - DB* shared_wbm_db = nullptr; - - ASSERT_OK(DestroyDB(dbname, options)); - ASSERT_OK(DB::Open(options, dbname, &shared_wbm_db)); - - // The last write will make WBM need flush, but it won't flush yet. - ASSERT_OK(Put(Key(1), DummyString(256 << 10 /* 256KB */), WriteOptions())); - ASSERT_FALSE(options.write_buffer_manager->ShouldFlush()); - ASSERT_OK(Put(Key(1), DummyString(256 << 10 /* 256KB */), WriteOptions())); - ASSERT_TRUE(options.write_buffer_manager->ShouldFlush()); - - // Flushes will be pending, not running because flush threads are blocked. - test::SleepingBackgroundTask sleeping_task_high; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - &sleeping_task_high, Env::Priority::HIGH); - - for (int i = 0; i < 3; ++i) { - ASSERT_OK( - shared_wbm_db->Put(WriteOptions(), Key(1), DummyString(1 /* len */))); - std::string prop; - ASSERT_TRUE( - shared_wbm_db->GetProperty("rocksdb.num-immutable-mem-table", &prop)); - ASSERT_EQ(std::to_string(i > 0 ? 1 : 0), prop); - ASSERT_TRUE( - shared_wbm_db->GetProperty("rocksdb.mem-table-flush-pending", &prop)); - ASSERT_EQ(std::to_string(i > 0 ? 1 : 0), prop); - } - - // Clean up DBs. - sleeping_task_high.WakeUp(); - sleeping_task_high.WaitUntilDone(); - ASSERT_OK(shared_wbm_db->Close()); - ASSERT_OK(DestroyDB(dbname, options)); - delete shared_wbm_db; -} - - -INSTANTIATE_TEST_CASE_P(DBWriteBufferManagerTest, DBWriteBufferManagerTest, - testing::Bool()); - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/db_write_test.cc b/db/db_write_test.cc deleted file mode 100644 index d82c57376..000000000 --- a/db/db_write_test.cc +++ /dev/null @@ -1,790 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include -#include -#include -#include -#include -#include - -#include "db/db_test_util.h" -#include "db/write_batch_internal.h" -#include "db/write_thread.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "test_util/sync_point.h" -#include "util/random.h" -#include "util/string_util.h" -#include "utilities/fault_injection_env.h" -#include "utilities/fault_injection_fs.h" - -namespace ROCKSDB_NAMESPACE { - -// Test variations of WriteImpl. -class DBWriteTest : public DBTestBase, public testing::WithParamInterface { - public: - DBWriteTest() : DBTestBase("db_write_test", /*env_do_fsync=*/true) {} - - Options GetOptions() { return DBTestBase::GetOptions(GetParam()); } - - void Open() { DBTestBase::Reopen(GetOptions()); } -}; - -class DBWriteTestUnparameterized : public DBTestBase { - public: - explicit DBWriteTestUnparameterized() - : DBTestBase("pipelined_write_test", /*env_do_fsync=*/false) {} -}; - -// It is invalid to do sync write while disabling WAL. -TEST_P(DBWriteTest, SyncAndDisableWAL) { - WriteOptions write_options; - write_options.sync = true; - write_options.disableWAL = true; - ASSERT_TRUE(dbfull()->Put(write_options, "foo", "bar").IsInvalidArgument()); - WriteBatch batch; - ASSERT_OK(batch.Put("foo", "bar")); - ASSERT_TRUE(dbfull()->Write(write_options, &batch).IsInvalidArgument()); -} - -TEST_P(DBWriteTest, WriteStallRemoveNoSlowdownWrite) { - Options options = GetOptions(); - options.level0_stop_writes_trigger = options.level0_slowdown_writes_trigger = - 4; - std::vector threads; - std::atomic thread_num(0); - port::Mutex mutex; - port::CondVar cv(&mutex); - // Guarded by mutex - int writers = 0; - - Reopen(options); - - std::function write_slowdown_func = [&]() { - int a = thread_num.fetch_add(1); - std::string key = "foo" + std::to_string(a); - WriteOptions wo; - wo.no_slowdown = false; - ASSERT_OK(dbfull()->Put(wo, key, "bar")); - }; - std::function write_no_slowdown_func = [&]() { - int a = thread_num.fetch_add(1); - std::string key = "foo" + std::to_string(a); - WriteOptions wo; - wo.no_slowdown = true; - Status s = dbfull()->Put(wo, key, "bar"); - ASSERT_TRUE(s.ok() || s.IsIncomplete()); - }; - std::function unblock_main_thread_func = [&](void*) { - mutex.Lock(); - ++writers; - cv.SignalAll(); - mutex.Unlock(); - }; - - // Create 3 L0 files and schedule 4th without waiting - ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar")); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WriteThread::JoinBatchGroup:Start", unblock_main_thread_func); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBWriteTest::WriteStallRemoveNoSlowdownWrite:1", - "DBImpl::BackgroundCallFlush:start"}, - {"DBWriteTest::WriteStallRemoveNoSlowdownWrite:2", - "DBImplWrite::PipelinedWriteImpl:AfterJoinBatchGroup"}, - // Make compaction start wait for the write stall to be detected and - // implemented by a write group leader - {"DBWriteTest::WriteStallRemoveNoSlowdownWrite:3", - "BackgroundCallCompaction:0"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Schedule creation of 4th L0 file without waiting. This will seal the - // memtable and then wait for a sync point before writing the file. We need - // to do it this way because SwitchMemtable() needs to enter the - // write_thread - FlushOptions fopt; - fopt.wait = false; - ASSERT_OK(dbfull()->Flush(fopt)); - - // Create a mix of slowdown/no_slowdown write threads - mutex.Lock(); - // First leader - threads.emplace_back(write_slowdown_func); - while (writers != 1) { - cv.Wait(); - } - - // Second leader. Will stall writes - // Build a writers list with no slowdown in the middle: - // +-------------+ - // | slowdown +<----+ newest - // +--+----------+ - // | - // v - // +--+----------+ - // | no slowdown | - // +--+----------+ - // | - // v - // +--+----------+ - // | slowdown + - // +-------------+ - threads.emplace_back(write_slowdown_func); - while (writers != 2) { - cv.Wait(); - } - threads.emplace_back(write_no_slowdown_func); - while (writers != 3) { - cv.Wait(); - } - threads.emplace_back(write_slowdown_func); - while (writers != 4) { - cv.Wait(); - } - - mutex.Unlock(); - - TEST_SYNC_POINT("DBWriteTest::WriteStallRemoveNoSlowdownWrite:1"); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(nullptr)); - // This would have triggered a write stall. Unblock the write group leader - TEST_SYNC_POINT("DBWriteTest::WriteStallRemoveNoSlowdownWrite:2"); - // The leader is going to create missing newer links. When the leader - // finishes, the next leader is going to delay writes and fail writers with - // no_slowdown - - TEST_SYNC_POINT("DBWriteTest::WriteStallRemoveNoSlowdownWrite:3"); - for (auto& t : threads) { - t.join(); - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_P(DBWriteTest, WriteThreadHangOnWriteStall) { - Options options = GetOptions(); - options.level0_stop_writes_trigger = options.level0_slowdown_writes_trigger = - 4; - std::vector threads; - std::atomic thread_num(0); - port::Mutex mutex; - port::CondVar cv(&mutex); - // Guarded by mutex - int writers = 0; - - Reopen(options); - - std::function write_slowdown_func = [&]() { - int a = thread_num.fetch_add(1); - std::string key = "foo" + std::to_string(a); - WriteOptions wo; - wo.no_slowdown = false; - ASSERT_OK(dbfull()->Put(wo, key, "bar")); - }; - std::function write_no_slowdown_func = [&]() { - int a = thread_num.fetch_add(1); - std::string key = "foo" + std::to_string(a); - WriteOptions wo; - wo.no_slowdown = true; - Status s = dbfull()->Put(wo, key, "bar"); - ASSERT_TRUE(s.ok() || s.IsIncomplete()); - }; - std::function unblock_main_thread_func = [&](void*) { - mutex.Lock(); - ++writers; - cv.SignalAll(); - mutex.Unlock(); - }; - - // Create 3 L0 files and schedule 4th without waiting - ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar")); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WriteThread::JoinBatchGroup:Start", unblock_main_thread_func); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBWriteTest::WriteThreadHangOnWriteStall:1", - "DBImpl::BackgroundCallFlush:start"}, - {"DBWriteTest::WriteThreadHangOnWriteStall:2", - "DBImpl::WriteImpl:BeforeLeaderEnters"}, - // Make compaction start wait for the write stall to be detected and - // implemented by a write group leader - {"DBWriteTest::WriteThreadHangOnWriteStall:3", - "BackgroundCallCompaction:0"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Schedule creation of 4th L0 file without waiting. This will seal the - // memtable and then wait for a sync point before writing the file. We need - // to do it this way because SwitchMemtable() needs to enter the - // write_thread - FlushOptions fopt; - fopt.wait = false; - ASSERT_OK(dbfull()->Flush(fopt)); - - // Create a mix of slowdown/no_slowdown write threads - mutex.Lock(); - // First leader - threads.emplace_back(write_slowdown_func); - while (writers != 1) { - cv.Wait(); - } - // Second leader. Will stall writes - threads.emplace_back(write_slowdown_func); - threads.emplace_back(write_no_slowdown_func); - threads.emplace_back(write_slowdown_func); - threads.emplace_back(write_no_slowdown_func); - threads.emplace_back(write_slowdown_func); - while (writers != 6) { - cv.Wait(); - } - mutex.Unlock(); - - TEST_SYNC_POINT("DBWriteTest::WriteThreadHangOnWriteStall:1"); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(nullptr)); - // This would have triggered a write stall. Unblock the write group leader - TEST_SYNC_POINT("DBWriteTest::WriteThreadHangOnWriteStall:2"); - // The leader is going to create missing newer links. When the leader - // finishes, the next leader is going to delay writes and fail writers with - // no_slowdown - - TEST_SYNC_POINT("DBWriteTest::WriteThreadHangOnWriteStall:3"); - for (auto& t : threads) { - t.join(); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_P(DBWriteTest, IOErrorOnWALWritePropagateToWriteThreadFollower) { - constexpr int kNumThreads = 5; - std::unique_ptr mock_env( - new FaultInjectionTestEnv(env_)); - Options options = GetOptions(); - options.env = mock_env.get(); - Reopen(options); - std::atomic ready_count{0}; - std::atomic leader_count{0}; - std::vector threads; - mock_env->SetFilesystemActive(false); - - // Wait until all threads linked to write threads, to make sure - // all threads join the same batch group. - SyncPoint::GetInstance()->SetCallBack( - "WriteThread::JoinBatchGroup:Wait", [&](void* arg) { - ready_count++; - auto* w = reinterpret_cast(arg); - if (w->state == WriteThread::STATE_GROUP_LEADER) { - leader_count++; - while (ready_count < kNumThreads) { - // busy waiting - } - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - for (int i = 0; i < kNumThreads; i++) { - threads.push_back(port::Thread( - [&](int index) { - // All threads should fail. - auto res = Put("key" + std::to_string(index), "value"); - if (options.manual_wal_flush) { - ASSERT_TRUE(res.ok()); - // we should see fs error when we do the flush - - // TSAN reports a false alarm for lock-order-inversion but Open and - // FlushWAL are not run concurrently. Disabling this until TSAN is - // fixed. - // res = dbfull()->FlushWAL(false); - // ASSERT_FALSE(res.ok()); - } else { - ASSERT_FALSE(res.ok()); - } - }, - i)); - } - for (int i = 0; i < kNumThreads; i++) { - threads[i].join(); - } - ASSERT_EQ(1, leader_count); - - // The Failed PUT operations can cause a BG error to be set. - // Mark it as Checked for the ASSERT_STATUS_CHECKED - dbfull()->Resume().PermitUncheckedError(); - - // Close before mock_env destruct. - Close(); -} - -TEST_F(DBWriteTestUnparameterized, PipelinedWriteRace) { - // This test was written to trigger a race in ExitAsBatchGroupLeader in case - // enable_pipelined_write_ was true. - // Writers for which ShouldWriteToMemtable() evaluates to false are removed - // from the write_group via CompleteFollower/ CompleteLeader. Writers in the - // middle of the group are fully unlinked, but if that writers is the - // last_writer, then we did not update the predecessor's link_older, i.e., - // this writer was still reachable via newest_writer_. - // - // But the problem was, that CompleteFollower already wakes up the thread - // owning that writer before the writer has been removed. This resulted in a - // race - if the leader thread was fast enough, then everything was fine. - // However, if the woken up thread finished the current write operation and - // then performed yet another write, then a new writer instance was added - // to newest_writer_. It is possible that the new writer is located on the - // same address on stack, and if this happened, then we had a problem, - // because the old code tried to find the last_writer in the list to unlink - // it, which in this case produced a cycle in the list. - // Whether two invocations of PipelinedWriteImpl() by the same thread actually - // allocate the writer on the same address depends on the OS and/or compiler, - // so it is rather hard to create a deterministic test for this. - - Options options = GetDefaultOptions(); - options.create_if_missing = true; - options.enable_pipelined_write = true; - std::vector threads; - - std::atomic write_counter{0}; - std::atomic active_writers{0}; - std::atomic second_write_starting{false}; - std::atomic second_write_in_progress{false}; - std::atomic leader{nullptr}; - std::atomic finished_WAL_write{false}; - - DestroyAndReopen(options); - - auto write_one_doc = [&]() { - int a = write_counter.fetch_add(1); - std::string key = "foo" + std::to_string(a); - WriteOptions wo; - ASSERT_OK(dbfull()->Put(wo, key, "bar")); - --active_writers; - }; - - auto write_two_docs = [&]() { - write_one_doc(); - second_write_starting = true; - write_one_doc(); - }; - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WriteThread::JoinBatchGroup:Wait", [&](void* arg) { - if (second_write_starting.load()) { - second_write_in_progress = true; - return; - } - auto* w = reinterpret_cast(arg); - if (w->state == WriteThread::STATE_GROUP_LEADER) { - active_writers++; - if (leader.load() == nullptr) { - leader.store(w); - while (active_writers.load() < 2) { - // wait for another thread to join the write_group - } - } - } else { - // we disable the memtable for all followers so that they they are - // removed from the write_group before enqueuing it for the memtable - // write - w->disable_memtable = true; - active_writers++; - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WriteThread::ExitAsBatchGroupLeader:Start", [&](void* arg) { - auto* wg = reinterpret_cast(arg); - if (wg->leader == leader && !finished_WAL_write) { - finished_WAL_write = true; - while (active_writers.load() < 3) { - // wait for the new writer to be enqueued - } - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WriteThread::ExitAsBatchGroupLeader:AfterCompleteWriters", - [&](void* arg) { - auto* wg = reinterpret_cast(arg); - if (wg->leader == leader) { - while (!second_write_in_progress.load()) { - // wait for the old follower thread to start the next write - } - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // start leader + one follower - threads.emplace_back(write_one_doc); - while (leader.load() == nullptr) { - // wait for leader - } - - // we perform two writes in the follower, so that for the second write - // the thread reinserts a Writer with the same address - threads.emplace_back(write_two_docs); - - // wait for the leader to enter ExitAsBatchGroupLeader - while (!finished_WAL_write.load()) { - // wait for write_group to have finished the WAL writes - } - - // start another writer thread to be enqueued before the leader can - // complete the writers from its write_group - threads.emplace_back(write_one_doc); - - for (auto& t : threads) { - t.join(); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_P(DBWriteTest, ManualWalFlushInEffect) { - Options options = GetOptions(); - Reopen(options); - // try the 1st WAL created during open - ASSERT_TRUE(Put("key" + std::to_string(0), "value").ok()); - ASSERT_TRUE(options.manual_wal_flush != dbfull()->WALBufferIsEmpty()); - ASSERT_TRUE(dbfull()->FlushWAL(false).ok()); - ASSERT_TRUE(dbfull()->WALBufferIsEmpty()); - // try the 2nd wal created during SwitchWAL - ASSERT_OK(dbfull()->TEST_SwitchWAL()); - ASSERT_TRUE(Put("key" + std::to_string(0), "value").ok()); - ASSERT_TRUE(options.manual_wal_flush != dbfull()->WALBufferIsEmpty()); - ASSERT_TRUE(dbfull()->FlushWAL(false).ok()); - ASSERT_TRUE(dbfull()->WALBufferIsEmpty()); -} - -TEST_P(DBWriteTest, UnflushedPutRaceWithTrackedWalSync) { - // Repro race condition bug where unflushed WAL data extended the synced size - // recorded to MANIFEST despite being unrecoverable. - Options options = GetOptions(); - std::unique_ptr fault_env( - new FaultInjectionTestEnv(env_)); - options.env = fault_env.get(); - options.manual_wal_flush = true; - options.track_and_verify_wals_in_manifest = true; - Reopen(options); - - ASSERT_OK(Put("key1", "val1")); - - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::SyncWAL:Begin", - [this](void* /* arg */) { ASSERT_OK(Put("key2", "val2")); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(db_->FlushWAL(true /* sync */)); - - // Ensure callback ran. - ASSERT_EQ("val2", Get("key2")); - - Close(); - - // Simulate full loss of unsynced data. This drops "key2" -> "val2" from the - // DB WAL. - fault_env->DropUnsyncedFileData(); - - Reopen(options); - - // Need to close before `fault_env` goes out of scope. - Close(); -} - -TEST_P(DBWriteTest, InactiveWalFullySyncedBeforeUntracked) { - // Repro bug where a WAL is appended and switched after - // `FlushWAL(true /* sync */)`'s sync finishes and before it untracks fully - // synced inactive logs. Previously such a WAL would be wrongly untracked - // so the final append would never be synced. - Options options = GetOptions(); - std::unique_ptr fault_env( - new FaultInjectionTestEnv(env_)); - options.env = fault_env.get(); - Reopen(options); - - ASSERT_OK(Put("key1", "val1")); - - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::SyncWAL:BeforeMarkLogsSynced:1", [this](void* /* arg */) { - ASSERT_OK(Put("key2", "val2")); - ASSERT_OK(dbfull()->TEST_SwitchMemtable()); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(db_->FlushWAL(true /* sync */)); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - - ASSERT_OK(Put("key3", "val3")); - - ASSERT_OK(db_->FlushWAL(true /* sync */)); - - Close(); - - // Simulate full loss of unsynced data. This should drop nothing since we did - // `FlushWAL(true /* sync */)` before `Close()`. - fault_env->DropUnsyncedFileData(); - - Reopen(options); - - ASSERT_EQ("val1", Get("key1")); - ASSERT_EQ("val2", Get("key2")); - ASSERT_EQ("val3", Get("key3")); - - // Need to close before `fault_env` goes out of scope. - Close(); -} - -TEST_P(DBWriteTest, IOErrorOnWALWriteTriggersReadOnlyMode) { - std::unique_ptr mock_env( - new FaultInjectionTestEnv(env_)); - Options options = GetOptions(); - options.env = mock_env.get(); - Reopen(options); - for (int i = 0; i < 2; i++) { - // Forcibly fail WAL write for the first Put only. Subsequent Puts should - // fail due to read-only mode - mock_env->SetFilesystemActive(i != 0); - auto res = Put("key" + std::to_string(i), "value"); - // TSAN reports a false alarm for lock-order-inversion but Open and - // FlushWAL are not run concurrently. Disabling this until TSAN is - // fixed. - /* - if (options.manual_wal_flush && i == 0) { - // even with manual_wal_flush the 2nd Put should return error because of - // the read-only mode - ASSERT_TRUE(res.ok()); - // we should see fs error when we do the flush - res = dbfull()->FlushWAL(false); - } - */ - if (!options.manual_wal_flush) { - ASSERT_NOK(res); - } else { - ASSERT_OK(res); - } - } - // Close before mock_env destruct. - Close(); -} - -TEST_P(DBWriteTest, IOErrorOnSwitchMemtable) { - Random rnd(301); - std::unique_ptr mock_env( - new FaultInjectionTestEnv(env_)); - Options options = GetOptions(); - options.env = mock_env.get(); - options.writable_file_max_buffer_size = 4 * 1024 * 1024; - options.write_buffer_size = 3 * 512 * 1024; - options.wal_bytes_per_sync = 256 * 1024; - options.manual_wal_flush = true; - Reopen(options); - mock_env->SetFilesystemActive(false, Status::IOError("Not active")); - Status s; - for (int i = 0; i < 4 * 512; ++i) { - s = Put(Key(i), rnd.RandomString(1024)); - if (!s.ok()) { - break; - } - } - ASSERT_EQ(s.severity(), Status::Severity::kFatalError); - - mock_env->SetFilesystemActive(true); - // Close before mock_env destruct. - Close(); -} - -// Test that db->LockWAL() flushes the WAL after locking, which can fail -TEST_P(DBWriteTest, LockWALInEffect) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - Options options = GetOptions(); - std::shared_ptr fault_fs( - new FaultInjectionTestFS(FileSystem::Default())); - std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); - options.env = fault_fs_env.get(); - options.disable_auto_compactions = true; - options.paranoid_checks = false; - options.max_bgerror_resume_count = 0; // manual Resume() - Reopen(options); - // try the 1st WAL created during open - ASSERT_OK(Put("key0", "value")); - ASSERT_NE(options.manual_wal_flush, dbfull()->WALBufferIsEmpty()); - ASSERT_OK(db_->LockWAL()); - ASSERT_TRUE(dbfull()->WALBufferIsEmpty()); - ASSERT_OK(db_->UnlockWAL()); - // try the 2nd wal created during SwitchWAL - ASSERT_OK(dbfull()->TEST_SwitchWAL()); - ASSERT_OK(Put("key1", "value")); - ASSERT_NE(options.manual_wal_flush, dbfull()->WALBufferIsEmpty()); - ASSERT_OK(db_->LockWAL()); - ASSERT_TRUE(dbfull()->WALBufferIsEmpty()); - ASSERT_OK(db_->UnlockWAL()); - - // Fail the WAL flush if applicable - fault_fs->SetFilesystemActive(false); - Status s = Put("key2", "value"); - if (options.manual_wal_flush) { - ASSERT_OK(s); - // I/O failure - ASSERT_NOK(db_->LockWAL()); - // Should not need UnlockWAL after LockWAL fails - } else { - ASSERT_NOK(s); - ASSERT_OK(db_->LockWAL()); - ASSERT_OK(db_->UnlockWAL()); - } - fault_fs->SetFilesystemActive(true); - ASSERT_OK(db_->Resume()); - // Writes should work again - ASSERT_OK(Put("key3", "value")); - ASSERT_EQ(Get("key3"), "value"); - - // Should be extraneous, but allowed - ASSERT_NOK(db_->UnlockWAL()); - - // Close before mock_env destruct. - Close(); -} - -TEST_P(DBWriteTest, LockWALConcurrentRecursive) { - Options options = GetOptions(); - Reopen(options); - ASSERT_OK(Put("k1", "val")); - ASSERT_OK(db_->LockWAL()); // 0 -> 1 - auto frozen_seqno = db_->GetLatestSequenceNumber(); - std::atomic t1_completed{false}; - port::Thread t1{[&]() { - // Won't finish until WAL unlocked - ASSERT_OK(Put("k1", "val2")); - t1_completed = true; - }}; - - ASSERT_OK(db_->LockWAL()); // 1 -> 2 - // Read-only ops are OK - ASSERT_EQ(Get("k1"), "val"); - { - std::vector files; - LiveFilesStorageInfoOptions lf_opts; - // A DB flush could deadlock - lf_opts.wal_size_for_flush = UINT64_MAX; - ASSERT_OK(db_->GetLiveFilesStorageInfo({lf_opts}, &files)); - } - - port::Thread t2{[&]() { - ASSERT_OK(db_->LockWAL()); // 2 -> 3 or 1 -> 2 - }}; - - ASSERT_OK(db_->UnlockWAL()); // 2 -> 1 or 3 -> 2 - // Give t1 an extra chance to jump in case of bug - std::this_thread::yield(); - t2.join(); - ASSERT_FALSE(t1_completed.load()); - - // Should now have 2 outstanding LockWAL - ASSERT_EQ(Get("k1"), "val"); - - ASSERT_OK(db_->UnlockWAL()); // 2 -> 1 - - ASSERT_FALSE(t1_completed.load()); - ASSERT_EQ(Get("k1"), "val"); - ASSERT_EQ(frozen_seqno, db_->GetLatestSequenceNumber()); - - // Ensure final Unlock is concurrency safe and extra Unlock is safe but - // non-OK - std::atomic unlock_ok{0}; - port::Thread t3{[&]() { - if (db_->UnlockWAL().ok()) { - unlock_ok++; - } - ASSERT_OK(db_->LockWAL()); - if (db_->UnlockWAL().ok()) { - unlock_ok++; - } - }}; - - if (db_->UnlockWAL().ok()) { - unlock_ok++; - } - t3.join(); - - // There was one extra unlock, so just one non-ok - ASSERT_EQ(unlock_ok.load(), 2); - - // Write can proceed - t1.join(); - ASSERT_TRUE(t1_completed.load()); - ASSERT_EQ(Get("k1"), "val2"); - // And new writes - ASSERT_OK(Put("k2", "val")); - ASSERT_EQ(Get("k2"), "val"); -} - -TEST_P(DBWriteTest, ConcurrentlyDisabledWAL) { - Options options = GetOptions(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.statistics->set_stats_level(StatsLevel::kAll); - Reopen(options); - std::string wal_key_prefix = "WAL_KEY_"; - std::string no_wal_key_prefix = "K_"; - // 100 KB value each for NO-WAL operation - std::string no_wal_value(1024 * 100, 'X'); - // 1B value each for WAL operation - std::string wal_value = "0"; - std::thread threads[10]; - for (int t = 0; t < 10; t++) { - threads[t] = std::thread([t, wal_key_prefix, wal_value, no_wal_key_prefix, - no_wal_value, this] { - for (int i = 0; i < 10; i++) { - ROCKSDB_NAMESPACE::WriteOptions write_option_disable; - write_option_disable.disableWAL = true; - ROCKSDB_NAMESPACE::WriteOptions write_option_default; - std::string no_wal_key = - no_wal_key_prefix + std::to_string(t) + "_" + std::to_string(i); - ASSERT_OK(this->Put(no_wal_key, no_wal_value, write_option_disable)); - std::string wal_key = - wal_key_prefix + std::to_string(i) + "_" + std::to_string(i); - ASSERT_OK(this->Put(wal_key, wal_value, write_option_default)); - ASSERT_OK(dbfull()->SyncWAL()); - } - return; - }); - } - for (auto& t : threads) { - t.join(); - } - uint64_t bytes_num = options.statistics->getTickerCount( - ROCKSDB_NAMESPACE::Tickers::WAL_FILE_BYTES); - // written WAL size should less than 100KB (even included HEADER & FOOTER - // overhead) - ASSERT_LE(bytes_num, 1024 * 100); -} - -INSTANTIATE_TEST_CASE_P(DBWriteTestInstance, DBWriteTest, - testing::Values(DBTestBase::kDefault, - DBTestBase::kConcurrentWALWrites, - DBTestBase::kPipelinedWrite)); - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/dbformat_test.cc b/db/dbformat_test.cc deleted file mode 100644 index 8dc3387df..000000000 --- a/db/dbformat_test.cc +++ /dev/null @@ -1,214 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/dbformat.h" - -#include "table/block_based/index_builder.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" - -namespace ROCKSDB_NAMESPACE { - -static std::string IKey(const std::string& user_key, uint64_t seq, - ValueType vt) { - std::string encoded; - AppendInternalKey(&encoded, ParsedInternalKey(user_key, seq, vt)); - return encoded; -} - -static std::string Shorten(const std::string& s, const std::string& l) { - std::string result = s; - ShortenedIndexBuilder::FindShortestInternalKeySeparator(*BytewiseComparator(), - &result, l); - return result; -} - -static std::string ShortSuccessor(const std::string& s) { - std::string result = s; - ShortenedIndexBuilder::FindShortInternalKeySuccessor(*BytewiseComparator(), - &result); - return result; -} - -static void TestKey(const std::string& key, uint64_t seq, ValueType vt) { - std::string encoded = IKey(key, seq, vt); - - Slice in(encoded); - ParsedInternalKey decoded("", 0, kTypeValue); - - ASSERT_OK(ParseInternalKey(in, &decoded, true /* log_err_key */)); - ASSERT_EQ(key, decoded.user_key.ToString()); - ASSERT_EQ(seq, decoded.sequence); - ASSERT_EQ(vt, decoded.type); - - ASSERT_NOK(ParseInternalKey(Slice("bar"), &decoded, true /* log_err_key */)); -} - -class FormatTest : public testing::Test {}; - -TEST_F(FormatTest, InternalKey_EncodeDecode) { - const char* keys[] = {"", "k", "hello", "longggggggggggggggggggggg"}; - const uint64_t seq[] = {1, - 2, - 3, - (1ull << 8) - 1, - 1ull << 8, - (1ull << 8) + 1, - (1ull << 16) - 1, - 1ull << 16, - (1ull << 16) + 1, - (1ull << 32) - 1, - 1ull << 32, - (1ull << 32) + 1}; - for (unsigned int k = 0; k < sizeof(keys) / sizeof(keys[0]); k++) { - for (unsigned int s = 0; s < sizeof(seq) / sizeof(seq[0]); s++) { - TestKey(keys[k], seq[s], kTypeValue); - TestKey("hello", 1, kTypeDeletion); - } - } -} - -TEST_F(FormatTest, InternalKeyShortSeparator) { - // When user keys are same - ASSERT_EQ(IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 99, kTypeValue))); - ASSERT_EQ( - IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 101, kTypeValue))); - ASSERT_EQ( - IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 100, kTypeValue))); - ASSERT_EQ( - IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 100, kTypeDeletion))); - - // When user keys are misordered - ASSERT_EQ(IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), IKey("bar", 99, kTypeValue))); - - // When user keys are different, but correctly ordered - ASSERT_EQ( - IKey("g", kMaxSequenceNumber, kValueTypeForSeek), - Shorten(IKey("foo", 100, kTypeValue), IKey("hello", 200, kTypeValue))); - - ASSERT_EQ(IKey("ABC2", kMaxSequenceNumber, kValueTypeForSeek), - Shorten(IKey("ABC1AAAAA", 100, kTypeValue), - IKey("ABC2ABB", 200, kTypeValue))); - - ASSERT_EQ(IKey("AAA2", kMaxSequenceNumber, kValueTypeForSeek), - Shorten(IKey("AAA1AAA", 100, kTypeValue), - IKey("AAA2AA", 200, kTypeValue))); - - ASSERT_EQ( - IKey("AAA2", kMaxSequenceNumber, kValueTypeForSeek), - Shorten(IKey("AAA1AAA", 100, kTypeValue), IKey("AAA4", 200, kTypeValue))); - - ASSERT_EQ( - IKey("AAA1B", kMaxSequenceNumber, kValueTypeForSeek), - Shorten(IKey("AAA1AAA", 100, kTypeValue), IKey("AAA2", 200, kTypeValue))); - - ASSERT_EQ(IKey("AAA2", kMaxSequenceNumber, kValueTypeForSeek), - Shorten(IKey("AAA1AAA", 100, kTypeValue), - IKey("AAA2A", 200, kTypeValue))); - - ASSERT_EQ( - IKey("AAA1", 100, kTypeValue), - Shorten(IKey("AAA1", 100, kTypeValue), IKey("AAA2", 200, kTypeValue))); - - // When start user key is prefix of limit user key - ASSERT_EQ( - IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), IKey("foobar", 200, kTypeValue))); - - // When limit user key is prefix of start user key - ASSERT_EQ( - IKey("foobar", 100, kTypeValue), - Shorten(IKey("foobar", 100, kTypeValue), IKey("foo", 200, kTypeValue))); -} - -TEST_F(FormatTest, InternalKeyShortestSuccessor) { - ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek), - ShortSuccessor(IKey("foo", 100, kTypeValue))); - ASSERT_EQ(IKey("\xff\xff", 100, kTypeValue), - ShortSuccessor(IKey("\xff\xff", 100, kTypeValue))); -} - -TEST_F(FormatTest, IterKeyOperation) { - IterKey k; - const char p[] = "abcdefghijklmnopqrstuvwxyz"; - const char q[] = "0123456789"; - - ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), - std::string("")); - - k.TrimAppend(0, p, 3); - ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), - std::string("abc")); - - k.TrimAppend(1, p, 3); - ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), - std::string("aabc")); - - k.TrimAppend(0, p, 26); - ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), - std::string("abcdefghijklmnopqrstuvwxyz")); - - k.TrimAppend(26, q, 10); - ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), - std::string("abcdefghijklmnopqrstuvwxyz0123456789")); - - k.TrimAppend(36, q, 1); - ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), - std::string("abcdefghijklmnopqrstuvwxyz01234567890")); - - k.TrimAppend(26, q, 1); - ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), - std::string("abcdefghijklmnopqrstuvwxyz0")); - - // Size going up, memory allocation is triggered - k.TrimAppend(27, p, 26); - ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), - std::string("abcdefghijklmnopqrstuvwxyz0" - "abcdefghijklmnopqrstuvwxyz")); -} - -TEST_F(FormatTest, UpdateInternalKey) { - std::string user_key("abcdefghijklmnopqrstuvwxyz"); - uint64_t new_seq = 0x123456; - ValueType new_val_type = kTypeDeletion; - - std::string ikey; - AppendInternalKey(&ikey, ParsedInternalKey(user_key, 100U, kTypeValue)); - size_t ikey_size = ikey.size(); - UpdateInternalKey(&ikey, new_seq, new_val_type); - ASSERT_EQ(ikey_size, ikey.size()); - - Slice in(ikey); - ParsedInternalKey decoded; - ASSERT_OK(ParseInternalKey(in, &decoded, true /* log_err_key */)); - ASSERT_EQ(user_key, decoded.user_key.ToString()); - ASSERT_EQ(new_seq, decoded.sequence); - ASSERT_EQ(new_val_type, decoded.type); -} - -TEST_F(FormatTest, RangeTombstoneSerializeEndKey) { - RangeTombstone t("a", "b", 2); - InternalKey k("b", 3, kTypeValue); - const InternalKeyComparator cmp(BytewiseComparator()); - ASSERT_LT(cmp.Compare(t.SerializeEndKey(), k), 0); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/deletefile_test.cc b/db/deletefile_test.cc deleted file mode 100644 index 481eda7dd..000000000 --- a/db/deletefile_test.cc +++ /dev/null @@ -1,603 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - - -#include - -#include -#include -#include - -#include "db/db_impl/db_impl.h" -#include "db/db_test_util.h" -#include "db/version_set.h" -#include "db/write_batch_internal.h" -#include "file/filename.h" -#include "port/stack_trace.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/transaction_log.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -class DeleteFileTest : public DBTestBase { - public: - const int numlevels_; - const std::string wal_dir_; - - DeleteFileTest() - : DBTestBase("deletefile_test", /*env_do_fsync=*/true), - numlevels_(7), - wal_dir_(dbname_ + "/wal_files") {} - - void SetOptions(Options* options) { - ASSERT_NE(options, nullptr); - options->delete_obsolete_files_period_micros = 0; // always do full purge - options->enable_thread_tracking = true; - options->write_buffer_size = 1024 * 1024 * 1000; - options->target_file_size_base = 1024 * 1024 * 1000; - options->max_bytes_for_level_base = 1024 * 1024 * 1000; - options->WAL_ttl_seconds = 300; // Used to test log files - options->WAL_size_limit_MB = 1024; // Used to test log files - options->wal_dir = wal_dir_; - } - - void AddKeys(int numkeys, int startkey = 0) { - WriteOptions options; - options.sync = false; - ReadOptions roptions; - for (int i = startkey; i < (numkeys + startkey); i++) { - std::string temp = std::to_string(i); - Slice key(temp); - Slice value(temp); - ASSERT_OK(db_->Put(options, key, value)); - } - } - - int numKeysInLevels(std::vector& metadata, - std::vector* keysperlevel = nullptr) { - if (keysperlevel != nullptr) { - keysperlevel->resize(numlevels_); - } - - int numKeys = 0; - for (size_t i = 0; i < metadata.size(); i++) { - int startkey = atoi(metadata[i].smallestkey.c_str()); - int endkey = atoi(metadata[i].largestkey.c_str()); - int numkeysinfile = (endkey - startkey + 1); - numKeys += numkeysinfile; - if (keysperlevel != nullptr) { - (*keysperlevel)[(int)metadata[i].level] += numkeysinfile; - } - fprintf(stderr, "level %d name %s smallest %s largest %s\n", - metadata[i].level, metadata[i].name.c_str(), - metadata[i].smallestkey.c_str(), metadata[i].largestkey.c_str()); - } - return numKeys; - } - - void CreateTwoLevels() { - AddKeys(50000, 10000); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - for (int i = 0; i < 2; ++i) { - ASSERT_OK(dbfull()->TEST_CompactRange(i, nullptr, nullptr)); - } - - AddKeys(50000, 10000); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr)); - } - - void CheckFileTypeCounts(const std::string& dir, int required_log, - int required_sst, int required_manifest) { - std::vector filenames; - ASSERT_OK(env_->GetChildren(dir, &filenames)); - - int log_cnt = 0, sst_cnt = 0, manifest_cnt = 0; - for (auto file : filenames) { - uint64_t number; - FileType type; - if (ParseFileName(file, &number, &type)) { - log_cnt += (type == kWalFile); - sst_cnt += (type == kTableFile); - manifest_cnt += (type == kDescriptorFile); - } - } - if (required_log >= 0) { - ASSERT_EQ(required_log, log_cnt); - } - if (required_sst >= 0) { - ASSERT_EQ(required_sst, sst_cnt); - } - if (required_manifest >= 0) { - ASSERT_EQ(required_manifest, manifest_cnt); - } - } - - static void DoSleep(void* arg) { - auto test = reinterpret_cast(arg); - test->env_->SleepForMicroseconds(2 * 1000 * 1000); - } - - // An empty job to guard all jobs are processed - static void GuardFinish(void* /*arg*/) { - TEST_SYNC_POINT("DeleteFileTest::GuardFinish"); - } -}; - -TEST_F(DeleteFileTest, AddKeysAndQueryLevels) { - Options options = CurrentOptions(); - SetOptions(&options); - Destroy(options); - options.create_if_missing = true; - Reopen(options); - - CreateTwoLevels(); - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - - std::string level1file = ""; - int level1keycount = 0; - std::string level2file = ""; - int level2keycount = 0; - int level1index = 0; - int level2index = 1; - - ASSERT_EQ((int)metadata.size(), 2); - if (metadata[0].level == 2) { - level1index = 1; - level2index = 0; - } - - level1file = metadata[level1index].name; - int startkey = atoi(metadata[level1index].smallestkey.c_str()); - int endkey = atoi(metadata[level1index].largestkey.c_str()); - level1keycount = (endkey - startkey + 1); - level2file = metadata[level2index].name; - startkey = atoi(metadata[level2index].smallestkey.c_str()); - endkey = atoi(metadata[level2index].largestkey.c_str()); - level2keycount = (endkey - startkey + 1); - - // COntrolled setup. Levels 1 and 2 should both have 50K files. - // This is a little fragile as it depends on the current - // compaction heuristics. - ASSERT_EQ(level1keycount, 50000); - ASSERT_EQ(level2keycount, 50000); - - Status status = db_->DeleteFile("0.sst"); - ASSERT_TRUE(status.IsInvalidArgument()); - - // intermediate level files cannot be deleted. - status = db_->DeleteFile(level1file); - ASSERT_TRUE(status.IsInvalidArgument()); - - // Lowest level file deletion should succeed. - status = db_->DeleteFile(level2file); - ASSERT_OK(status); -} - -TEST_F(DeleteFileTest, PurgeObsoleteFilesTest) { - Options options = CurrentOptions(); - SetOptions(&options); - Destroy(options); - options.create_if_missing = true; - Reopen(options); - - CreateTwoLevels(); - // there should be only one (empty) log file because CreateTwoLevels() - // flushes the memtables to disk - CheckFileTypeCounts(wal_dir_, 1, 0, 0); - // 2 ssts, 1 manifest - CheckFileTypeCounts(dbname_, 0, 2, 1); - std::string first("0"), last("999999"); - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 2; - Slice first_slice(first), last_slice(last); - ASSERT_OK(db_->CompactRange(compact_options, &first_slice, &last_slice)); - // 1 sst after compaction - CheckFileTypeCounts(dbname_, 0, 1, 1); - - // this time, we keep an iterator alive - Reopen(options); - Iterator* itr = nullptr; - CreateTwoLevels(); - itr = db_->NewIterator(ReadOptions()); - ASSERT_OK(itr->status()); - ASSERT_OK(db_->CompactRange(compact_options, &first_slice, &last_slice)); - ASSERT_OK(itr->status()); - // 3 sst after compaction with live iterator - CheckFileTypeCounts(dbname_, 0, 3, 1); - delete itr; - // 1 sst after iterator deletion - CheckFileTypeCounts(dbname_, 0, 1, 1); -} - -TEST_F(DeleteFileTest, BackgroundPurgeIteratorTest) { - Options options = CurrentOptions(); - SetOptions(&options); - Destroy(options); - options.create_if_missing = true; - Reopen(options); - - std::string first("0"), last("999999"); - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 2; - Slice first_slice(first), last_slice(last); - - // We keep an iterator alive - Iterator* itr = nullptr; - CreateTwoLevels(); - ReadOptions read_options; - read_options.background_purge_on_iterator_cleanup = true; - itr = db_->NewIterator(read_options); - ASSERT_OK(itr->status()); - ASSERT_OK(db_->CompactRange(compact_options, &first_slice, &last_slice)); - // 3 sst after compaction with live iterator - CheckFileTypeCounts(dbname_, 0, 3, 1); - test::SleepingBackgroundTask sleeping_task_before; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - &sleeping_task_before, Env::Priority::HIGH); - delete itr; - test::SleepingBackgroundTask sleeping_task_after; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - &sleeping_task_after, Env::Priority::HIGH); - - // Make sure no purges are executed foreground - CheckFileTypeCounts(dbname_, 0, 3, 1); - sleeping_task_before.WakeUp(); - sleeping_task_before.WaitUntilDone(); - - // Make sure all background purges are executed - sleeping_task_after.WakeUp(); - sleeping_task_after.WaitUntilDone(); - // 1 sst after iterator deletion - CheckFileTypeCounts(dbname_, 0, 1, 1); -} - -TEST_F(DeleteFileTest, PurgeDuringOpen) { - Options options = CurrentOptions(); - CheckFileTypeCounts(dbname_, -1, 0, -1); - Close(); - std::unique_ptr file; - ASSERT_OK(options.env->NewWritableFile(dbname_ + "/000002.sst", &file, - EnvOptions())); - ASSERT_OK(file->Close()); - CheckFileTypeCounts(dbname_, -1, 1, -1); - options.avoid_unnecessary_blocking_io = false; - options.create_if_missing = false; - Reopen(options); - CheckFileTypeCounts(dbname_, -1, 0, -1); - Close(); - - // test background purge - options.avoid_unnecessary_blocking_io = true; - options.create_if_missing = false; - ASSERT_OK(options.env->NewWritableFile(dbname_ + "/000002.sst", &file, - EnvOptions())); - ASSERT_OK(file->Close()); - CheckFileTypeCounts(dbname_, -1, 1, -1); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->LoadDependency( - {{"DeleteFileTest::PurgeDuringOpen:1", "DBImpl::BGWorkPurge:start"}}); - SyncPoint::GetInstance()->EnableProcessing(); - Reopen(options); - // the obsolete file is not deleted until the background purge job is ran - CheckFileTypeCounts(dbname_, -1, 1, -1); - TEST_SYNC_POINT("DeleteFileTest::PurgeDuringOpen:1"); - ASSERT_OK(dbfull()->TEST_WaitForPurge()); - CheckFileTypeCounts(dbname_, -1, 0, -1); -} - -TEST_F(DeleteFileTest, BackgroundPurgeCFDropTest) { - Options options = CurrentOptions(); - SetOptions(&options); - Destroy(options); - options.create_if_missing = true; - Reopen(options); - - auto do_test = [&](bool bg_purge) { - ColumnFamilyOptions co; - co.max_write_buffer_size_to_maintain = - static_cast(co.write_buffer_size); - WriteOptions wo; - FlushOptions fo; - ColumnFamilyHandle* cfh = nullptr; - - ASSERT_OK(db_->CreateColumnFamily(co, "dropme", &cfh)); - - ASSERT_OK(db_->Put(wo, cfh, "pika", "chu")); - ASSERT_OK(db_->Flush(fo, cfh)); - // Expect 1 sst file. - CheckFileTypeCounts(dbname_, 0, 1, 1); - - ASSERT_OK(db_->DropColumnFamily(cfh)); - // Still 1 file, it won't be deleted while ColumnFamilyHandle is alive. - CheckFileTypeCounts(dbname_, 0, 1, 1); - - delete cfh; - test::SleepingBackgroundTask sleeping_task_after; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - &sleeping_task_after, Env::Priority::HIGH); - // If background purge is enabled, the file should still be there. - CheckFileTypeCounts(dbname_, 0, bg_purge ? 1 : 0, 1); - TEST_SYNC_POINT("DeleteFileTest::BackgroundPurgeCFDropTest:1"); - - // Execute background purges. - sleeping_task_after.WakeUp(); - sleeping_task_after.WaitUntilDone(); - // The file should have been deleted. - CheckFileTypeCounts(dbname_, 0, 0, 1); - }; - - { - SCOPED_TRACE("avoid_unnecessary_blocking_io = false"); - do_test(false); - } - - options.avoid_unnecessary_blocking_io = true; - options.create_if_missing = false; - Reopen(options); - ASSERT_OK(dbfull()->TEST_WaitForPurge()); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->LoadDependency( - {{"DeleteFileTest::BackgroundPurgeCFDropTest:1", - "DBImpl::BGWorkPurge:start"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - { - SCOPED_TRACE("avoid_unnecessary_blocking_io = true"); - do_test(true); - } -} - -// This test is to reproduce a bug that read invalid ReadOption in iterator -// cleanup function -TEST_F(DeleteFileTest, BackgroundPurgeCopyOptions) { - Options options = CurrentOptions(); - SetOptions(&options); - Destroy(options); - options.create_if_missing = true; - Reopen(options); - - std::string first("0"), last("999999"); - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 2; - Slice first_slice(first), last_slice(last); - - // We keep an iterator alive - Iterator* itr = nullptr; - CreateTwoLevels(); - { - ReadOptions read_options; - read_options.background_purge_on_iterator_cleanup = true; - itr = db_->NewIterator(read_options); - ASSERT_OK(itr->status()); - // ReadOptions is deleted, but iterator cleanup function should not be - // affected - } - - ASSERT_OK(db_->CompactRange(compact_options, &first_slice, &last_slice)); - // 3 sst after compaction with live iterator - CheckFileTypeCounts(dbname_, 0, 3, 1); - delete itr; - - test::SleepingBackgroundTask sleeping_task_after; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, - &sleeping_task_after, Env::Priority::HIGH); - - // Make sure all background purges are executed - sleeping_task_after.WakeUp(); - sleeping_task_after.WaitUntilDone(); - // 1 sst after iterator deletion - CheckFileTypeCounts(dbname_, 0, 1, 1); -} - -TEST_F(DeleteFileTest, BackgroundPurgeTestMultipleJobs) { - Options options = CurrentOptions(); - SetOptions(&options); - Destroy(options); - options.create_if_missing = true; - Reopen(options); - - std::string first("0"), last("999999"); - CompactRangeOptions compact_options; - compact_options.change_level = true; - compact_options.target_level = 2; - Slice first_slice(first), last_slice(last); - - // We keep an iterator alive - CreateTwoLevels(); - ReadOptions read_options; - read_options.background_purge_on_iterator_cleanup = true; - Iterator* itr1 = db_->NewIterator(read_options); - ASSERT_OK(itr1->status()); - CreateTwoLevels(); - Iterator* itr2 = db_->NewIterator(read_options); - ASSERT_OK(itr2->status()); - ASSERT_OK(db_->CompactRange(compact_options, &first_slice, &last_slice)); - // 5 sst files after 2 compactions with 2 live iterators - CheckFileTypeCounts(dbname_, 0, 5, 1); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - // ~DBImpl should wait until all BGWorkPurge are finished - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::~DBImpl:WaitJob", "DBImpl::BGWorkPurge"}, - {"DeleteFileTest::GuardFinish", - "DeleteFileTest::BackgroundPurgeTestMultipleJobs:DBClose"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - delete itr1; - env_->Schedule(&DeleteFileTest::DoSleep, this, Env::Priority::HIGH); - delete itr2; - env_->Schedule(&DeleteFileTest::GuardFinish, nullptr, Env::Priority::HIGH); - Close(); - - TEST_SYNC_POINT("DeleteFileTest::BackgroundPurgeTestMultipleJobs:DBClose"); - // 1 sst after iterator deletion - CheckFileTypeCounts(dbname_, 0, 1, 1); -} - -TEST_F(DeleteFileTest, DeleteFileWithIterator) { - Options options = CurrentOptions(); - SetOptions(&options); - Destroy(options); - options.create_if_missing = true; - Reopen(options); - - CreateTwoLevels(); - ReadOptions read_options; - Iterator* it = db_->NewIterator(read_options); - ASSERT_OK(it->status()); - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - - std::string level2file; - - ASSERT_EQ(metadata.size(), static_cast(2)); - if (metadata[0].level == 1) { - level2file = metadata[1].name; - } else { - level2file = metadata[0].name; - } - - Status status = db_->DeleteFile(level2file); - fprintf(stdout, "Deletion status %s: %s\n", level2file.c_str(), - status.ToString().c_str()); - ASSERT_OK(status); - it->SeekToFirst(); - int numKeysIterated = 0; - while (it->Valid()) { - numKeysIterated++; - it->Next(); - } - ASSERT_EQ(numKeysIterated, 50000); - delete it; -} - -TEST_F(DeleteFileTest, DeleteLogFiles) { - Options options = CurrentOptions(); - SetOptions(&options); - Destroy(options); - options.create_if_missing = true; - Reopen(options); - - AddKeys(10, 0); - VectorLogPtr logfiles; - ASSERT_OK(db_->GetSortedWalFiles(logfiles)); - ASSERT_GT(logfiles.size(), 0UL); - // Take the last log file which is expected to be alive and try to delete it - // Should not succeed because live logs are not allowed to be deleted - std::unique_ptr alive_log = std::move(logfiles.back()); - ASSERT_EQ(alive_log->Type(), kAliveLogFile); - ASSERT_OK(env_->FileExists(wal_dir_ + "/" + alive_log->PathName())); - fprintf(stdout, "Deleting alive log file %s\n", - alive_log->PathName().c_str()); - ASSERT_NOK(db_->DeleteFile(alive_log->PathName())); - ASSERT_OK(env_->FileExists(wal_dir_ + "/" + alive_log->PathName())); - logfiles.clear(); - - // Call Flush to bring about a new working log file and add more keys - // Call Flush again to flush out memtable and move alive log to archived log - // and try to delete the archived log file - FlushOptions fopts; - ASSERT_OK(db_->Flush(fopts)); - AddKeys(10, 0); - ASSERT_OK(db_->Flush(fopts)); - ASSERT_OK(db_->GetSortedWalFiles(logfiles)); - ASSERT_GT(logfiles.size(), 0UL); - std::unique_ptr archived_log = std::move(logfiles.front()); - ASSERT_EQ(archived_log->Type(), kArchivedLogFile); - ASSERT_OK(env_->FileExists(wal_dir_ + "/" + archived_log->PathName())); - fprintf(stdout, "Deleting archived log file %s\n", - archived_log->PathName().c_str()); - ASSERT_OK(db_->DeleteFile(archived_log->PathName())); - ASSERT_TRUE( - env_->FileExists(wal_dir_ + "/" + archived_log->PathName()).IsNotFound()); -} - -TEST_F(DeleteFileTest, DeleteNonDefaultColumnFamily) { - Options options = CurrentOptions(); - SetOptions(&options); - Destroy(options); - options.create_if_missing = true; - Reopen(options); - CreateAndReopenWithCF({"new_cf"}, options); - - Random rnd(5); - for (int i = 0; i < 1000; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), handles_[1], test::RandomKey(&rnd, 10), - test::RandomKey(&rnd, 10))); - } - ASSERT_OK(db_->Flush(FlushOptions(), handles_[1])); - for (int i = 0; i < 1000; ++i) { - ASSERT_OK(db_->Put(WriteOptions(), handles_[1], test::RandomKey(&rnd, 10), - test::RandomKey(&rnd, 10))); - } - ASSERT_OK(db_->Flush(FlushOptions(), handles_[1])); - - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - ASSERT_EQ(2U, metadata.size()); - ASSERT_EQ("new_cf", metadata[0].column_family_name); - ASSERT_EQ("new_cf", metadata[1].column_family_name); - auto old_file = metadata[0].smallest_seqno < metadata[1].smallest_seqno - ? metadata[0].name - : metadata[1].name; - auto new_file = metadata[0].smallest_seqno > metadata[1].smallest_seqno - ? metadata[0].name - : metadata[1].name; - ASSERT_TRUE(db_->DeleteFile(new_file).IsInvalidArgument()); - ASSERT_OK(db_->DeleteFile(old_file)); - - { - std::unique_ptr itr(db_->NewIterator(ReadOptions(), handles_[1])); - ASSERT_OK(itr->status()); - int count = 0; - for (itr->SeekToFirst(); itr->Valid(); itr->Next()) { - ASSERT_OK(itr->status()); - ++count; - } - ASSERT_EQ(count, 1000); - } - - Close(); - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "new_cf"}, options); - - { - std::unique_ptr itr(db_->NewIterator(ReadOptions(), handles_[1])); - int count = 0; - for (itr->SeekToFirst(); itr->Valid(); itr->Next()) { - ASSERT_OK(itr->status()); - ++count; - } - ASSERT_EQ(count, 1000); - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/db/error_handler_fs_test.cc b/db/error_handler_fs_test.cc deleted file mode 100644 index 82008705d..000000000 --- a/db/error_handler_fs_test.cc +++ /dev/null @@ -1,2862 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/db_test_util.h" -#include "file/sst_file_manager_impl.h" -#include "port/stack_trace.h" -#include "rocksdb/io_status.h" -#include "rocksdb/sst_file_manager.h" -#include "test_util/sync_point.h" -#include "util/random.h" -#include "utilities/fault_injection_env.h" -#include "utilities/fault_injection_fs.h" - -namespace ROCKSDB_NAMESPACE { - -class DBErrorHandlingFSTest : public DBTestBase { - public: - DBErrorHandlingFSTest() - : DBTestBase("db_error_handling_fs_test", /*env_do_fsync=*/true) { - fault_fs_.reset(new FaultInjectionTestFS(env_->GetFileSystem())); - fault_env_.reset(new CompositeEnvWrapper(env_, fault_fs_)); - } - - std::string GetManifestNameFromLiveFiles() { - std::vector live_files; - uint64_t manifest_size; - - Status s = dbfull()->GetLiveFiles(live_files, &manifest_size, false); - if (!s.ok()) { - return ""; - } - for (auto& file : live_files) { - uint64_t num = 0; - FileType type; - if (ParseFileName(file, &num, &type) && type == kDescriptorFile) { - return file; - } - } - return ""; - } - - std::shared_ptr fault_fs_; - std::unique_ptr fault_env_; -}; - -class ErrorHandlerFSListener : public EventListener { - public: - ErrorHandlerFSListener() - : mutex_(), - cv_(&mutex_), - no_auto_recovery_(false), - recovery_complete_(false), - file_creation_started_(false), - override_bg_error_(false), - file_count_(0), - fault_fs_(nullptr) {} - ~ErrorHandlerFSListener() { - file_creation_error_.PermitUncheckedError(); - bg_error_.PermitUncheckedError(); - new_bg_error_.PermitUncheckedError(); - } - - void OnTableFileCreationStarted( - const TableFileCreationBriefInfo& /*ti*/) override { - InstrumentedMutexLock l(&mutex_); - file_creation_started_ = true; - if (file_count_ > 0) { - if (--file_count_ == 0) { - fault_fs_->SetFilesystemActive(false, file_creation_error_); - file_creation_error_ = IOStatus::OK(); - } - } - cv_.SignalAll(); - } - - void OnErrorRecoveryBegin(BackgroundErrorReason /*reason*/, Status bg_error, - bool* auto_recovery) override { - bg_error.PermitUncheckedError(); - if (*auto_recovery && no_auto_recovery_) { - *auto_recovery = false; - } - } - - void OnErrorRecoveryEnd(const BackgroundErrorRecoveryInfo& info) override { - InstrumentedMutexLock l(&mutex_); - recovery_complete_ = true; - cv_.SignalAll(); - new_bg_error_ = info.new_bg_error; - } - - bool WaitForRecovery(uint64_t /*abs_time_us*/) { - InstrumentedMutexLock l(&mutex_); - while (!recovery_complete_) { - cv_.Wait(/*abs_time_us*/); - } - if (recovery_complete_) { - recovery_complete_ = false; - return true; - } - return false; - } - - void WaitForTableFileCreationStarted(uint64_t /*abs_time_us*/) { - InstrumentedMutexLock l(&mutex_); - while (!file_creation_started_) { - cv_.Wait(/*abs_time_us*/); - } - file_creation_started_ = false; - } - - void OnBackgroundError(BackgroundErrorReason /*reason*/, - Status* bg_error) override { - if (override_bg_error_) { - *bg_error = bg_error_; - override_bg_error_ = false; - } - } - - void EnableAutoRecovery(bool enable = true) { no_auto_recovery_ = !enable; } - - void OverrideBGError(Status bg_err) { - bg_error_ = bg_err; - override_bg_error_ = true; - } - - void InjectFileCreationError(FaultInjectionTestFS* fs, int file_count, - IOStatus io_s) { - fault_fs_ = fs; - file_count_ = file_count; - file_creation_error_ = io_s; - } - - Status new_bg_error() { return new_bg_error_; } - - private: - InstrumentedMutex mutex_; - InstrumentedCondVar cv_; - bool no_auto_recovery_; - bool recovery_complete_; - bool file_creation_started_; - bool override_bg_error_; - int file_count_; - IOStatus file_creation_error_; - Status bg_error_; - Status new_bg_error_; - FaultInjectionTestFS* fault_fs_; -}; - -TEST_F(DBErrorHandlingFSTest, FLushWriteError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.statistics = CreateDBStatistics(); - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - ASSERT_OK(Put(Key(0), "val")); - SyncPoint::GetInstance()->SetCallBack("FlushJob::Start", [&](void*) { - fault_fs_->SetFilesystemActive(false, IOStatus::NoSpace("Out of space")); - }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_ERROR_COUNT)); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_IO_ERROR_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT)); - - Reopen(options); - ASSERT_EQ("val", Get(Key(0))); - Destroy(options); -} - -// All the NoSpace IOError will be handled as the regular BG Error no matter the -// retryable flag is set of not. So the auto resume for retryable IO Error will -// not be triggered. Also, it is mapped as hard error. -TEST_F(DBErrorHandlingFSTest, FLushWriteNoSpaceError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 2; - options.bgerror_resume_retry_interval = 100000; // 0.1 second - options.statistics = CreateDBStatistics(); - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::NoSpace("Retryable IO Error"); - error_msg.SetRetryable(true); - - ASSERT_OK(Put(Key(1), "val1")); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeFinishBuildTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_ERROR_COUNT)); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_IO_ERROR_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT)); - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, FLushWriteRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 0; - options.statistics = CreateDBStatistics(); - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - ASSERT_OK(Put(Key(1), "val1")); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeFinishBuildTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_ERROR_COUNT)); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_IO_ERROR_COUNT)); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT)); - Reopen(options); - ASSERT_EQ("val1", Get(Key(1))); - - ASSERT_OK(Put(Key(2), "val2")); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeSyncTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - Reopen(options); - ASSERT_EQ("val2", Get(Key(2))); - - ASSERT_OK(Put(Key(3), "val3")); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeCloseTableFile", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - Reopen(options); - ASSERT_EQ("val3", Get(Key(3))); - - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, FLushWriteFileScopeError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 0; - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("File Scope Data Loss Error"); - error_msg.SetDataLoss(true); - error_msg.SetScope( - ROCKSDB_NAMESPACE::IOStatus::IOErrorScope::kIOErrorScopeFile); - error_msg.SetRetryable(false); - - ASSERT_OK(Put(Key(1), "val1")); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeFinishBuildTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - Reopen(options); - ASSERT_EQ("val1", Get(Key(1))); - - ASSERT_OK(Put(Key(2), "val2")); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeSyncTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - Reopen(options); - ASSERT_EQ("val2", Get(Key(2))); - - ASSERT_OK(Put(Key(3), "val3")); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeCloseTableFile", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - Reopen(options); - ASSERT_EQ("val3", Get(Key(3))); - - // not file scope, but retyrable set - error_msg.SetDataLoss(false); - error_msg.SetScope( - ROCKSDB_NAMESPACE::IOStatus::IOErrorScope::kIOErrorScopeFileSystem); - error_msg.SetRetryable(true); - - ASSERT_OK(Put(Key(3), "val3")); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeCloseTableFile", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - Reopen(options); - ASSERT_EQ("val3", Get(Key(3))); - - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, FLushWALWriteRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 0; - Status s; - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - listener->EnableAutoRecovery(false); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::SyncClosedLogs:Start", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - - CreateAndReopenWithCF({"pikachu, sdfsdfsdf"}, options); - - WriteOptions wo = WriteOptions(); - wo.disableWAL = false; - ASSERT_OK(Put(Key(1), "val1", wo)); - - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - auto cfh = dbfull()->GetColumnFamilyHandle(1); - s = dbfull()->DropColumnFamily(cfh); - - s = dbfull()->Resume(); - ASSERT_OK(s); - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_OK(Put(Key(3), "val3", wo)); - ASSERT_EQ("val3", Get(Key(3))); - s = Flush(); - ASSERT_OK(s); - ASSERT_EQ("val3", Get(Key(3))); - - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, FLushWALAtomicWriteRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 0; - options.atomic_flush = true; - Status s; - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - listener->EnableAutoRecovery(false); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::SyncClosedLogs:Start", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - - CreateAndReopenWithCF({"pikachu, sdfsdfsdf"}, options); - - WriteOptions wo = WriteOptions(); - wo.disableWAL = false; - ASSERT_OK(Put(Key(1), "val1", wo)); - - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - auto cfh = dbfull()->GetColumnFamilyHandle(1); - s = dbfull()->DropColumnFamily(cfh); - - s = dbfull()->Resume(); - ASSERT_OK(s); - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_OK(Put(Key(3), "val3", wo)); - ASSERT_EQ("val3", Get(Key(3))); - s = Flush(); - ASSERT_OK(s); - ASSERT_EQ("val3", Get(Key(3))); - - Destroy(options); -} - -// The flush error is injected before we finish the table build -TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError1) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 0; - options.statistics = CreateDBStatistics(); - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - WriteOptions wo = WriteOptions(); - wo.disableWAL = true; - ASSERT_OK(Put(Key(1), "val1", wo)); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeFinishBuildTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_OK(Put(Key(2), "val2", wo)); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - ASSERT_EQ("val2", Get(Key(2))); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_EQ("val2", Get(Key(2))); - ASSERT_OK(Put(Key(3), "val3", wo)); - ASSERT_EQ("val3", Get(Key(3))); - s = Flush(); - ASSERT_OK(s); - ASSERT_EQ("val3", Get(Key(3))); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_ERROR_COUNT)); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_IO_ERROR_COUNT)); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT)); - - Destroy(options); -} - -// The retryable IO error is injected before we sync table -TEST_F(DBErrorHandlingFSTest, FLushWriteNoWALRetryableError2) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 0; - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - WriteOptions wo = WriteOptions(); - wo.disableWAL = true; - - ASSERT_OK(Put(Key(1), "val1", wo)); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeSyncTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_OK(Put(Key(2), "val2", wo)); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - ASSERT_EQ("val2", Get(Key(2))); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_EQ("val2", Get(Key(2))); - ASSERT_OK(Put(Key(3), "val3", wo)); - ASSERT_EQ("val3", Get(Key(3))); - s = Flush(); - ASSERT_OK(s); - ASSERT_EQ("val3", Get(Key(3))); - - Destroy(options); -} - -// The retryable IO error is injected before we close the table file -TEST_F(DBErrorHandlingFSTest, FLushWriteNoWALRetryableError3) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 0; - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - WriteOptions wo = WriteOptions(); - wo.disableWAL = true; - - ASSERT_OK(Put(Key(1), "val1", wo)); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeCloseTableFile", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_OK(Put(Key(2), "val2", wo)); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - ASSERT_EQ("val2", Get(Key(2))); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_EQ("val2", Get(Key(2))); - ASSERT_OK(Put(Key(3), "val3", wo)); - ASSERT_EQ("val3", Get(Key(3))); - s = Flush(); - ASSERT_OK(s); - ASSERT_EQ("val3", Get(Key(3))); - - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, ManifestWriteError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - Status s; - std::string old_manifest; - std::string new_manifest; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - old_manifest = GetManifestNameFromLiveFiles(); - - ASSERT_OK(Put(Key(0), "val")); - ASSERT_OK(Flush()); - ASSERT_OK(Put(Key(1), "val")); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", [&](void*) { - fault_fs_->SetFilesystemActive(false, - IOStatus::NoSpace("Out of space")); - }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - - new_manifest = GetManifestNameFromLiveFiles(); - ASSERT_NE(new_manifest, old_manifest); - - Reopen(options); - ASSERT_EQ("val", Get(Key(0))); - ASSERT_EQ("val", Get(Key(1))); - Close(); -} - -TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 0; - Status s; - std::string old_manifest; - std::string new_manifest; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - old_manifest = GetManifestNameFromLiveFiles(); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - ASSERT_OK(Put(Key(0), "val")); - ASSERT_OK(Flush()); - ASSERT_OK(Put(Key(1), "val")); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - - new_manifest = GetManifestNameFromLiveFiles(); - ASSERT_NE(new_manifest, old_manifest); - - Reopen(options); - ASSERT_EQ("val", Get(Key(0))); - ASSERT_EQ("val", Get(Key(1))); - Close(); -} - -TEST_F(DBErrorHandlingFSTest, ManifestWriteFileScopeError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 0; - Status s; - std::string old_manifest; - std::string new_manifest; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - old_manifest = GetManifestNameFromLiveFiles(); - - IOStatus error_msg = IOStatus::IOError("File Scope Data Loss Error"); - error_msg.SetDataLoss(true); - error_msg.SetScope( - ROCKSDB_NAMESPACE::IOStatus::IOErrorScope::kIOErrorScopeFile); - error_msg.SetRetryable(false); - - ASSERT_OK(Put(Key(0), "val")); - ASSERT_OK(Flush()); - ASSERT_OK(Put(Key(1), "val")); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - - new_manifest = GetManifestNameFromLiveFiles(); - ASSERT_NE(new_manifest, old_manifest); - - Reopen(options); - ASSERT_EQ("val", Get(Key(0))); - ASSERT_EQ("val", Get(Key(1))); - Close(); -} - -TEST_F(DBErrorHandlingFSTest, ManifestWriteNoWALRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 0; - Status s; - std::string old_manifest; - std::string new_manifest; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - old_manifest = GetManifestNameFromLiveFiles(); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - WriteOptions wo = WriteOptions(); - wo.disableWAL = true; - ASSERT_OK(Put(Key(0), "val", wo)); - ASSERT_OK(Flush()); - ASSERT_OK(Put(Key(1), "val", wo)); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - - new_manifest = GetManifestNameFromLiveFiles(); - ASSERT_NE(new_manifest, old_manifest); - - Reopen(options); - ASSERT_EQ("val", Get(Key(0))); - ASSERT_EQ("val", Get(Key(1))); - Close(); -} - -TEST_F(DBErrorHandlingFSTest, DoubleManifestWriteError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - Status s; - std::string old_manifest; - std::string new_manifest; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - old_manifest = GetManifestNameFromLiveFiles(); - - ASSERT_OK(Put(Key(0), "val")); - ASSERT_OK(Flush()); - ASSERT_OK(Put(Key(1), "val")); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", [&](void*) { - fault_fs_->SetFilesystemActive(false, - IOStatus::NoSpace("Out of space")); - }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); - fault_fs_->SetFilesystemActive(true); - - // This Resume() will attempt to create a new manifest file and fail again - s = dbfull()->Resume(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); - fault_fs_->SetFilesystemActive(true); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - - // A successful Resume() will create a new manifest file - s = dbfull()->Resume(); - ASSERT_OK(s); - - new_manifest = GetManifestNameFromLiveFiles(); - ASSERT_NE(new_manifest, old_manifest); - - Reopen(options); - ASSERT_EQ("val", Get(Key(0))); - ASSERT_EQ("val", Get(Key(1))); - Close(); -} - -TEST_F(DBErrorHandlingFSTest, CompactionManifestWriteError) { - if (mem_env_ != nullptr) { - ROCKSDB_GTEST_SKIP("Test requires non-mock environment"); - return; - } - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.level0_file_num_compaction_trigger = 2; - options.listeners.emplace_back(listener); - Status s; - std::string old_manifest; - std::string new_manifest; - std::atomic fail_manifest(false); - DestroyAndReopen(options); - old_manifest = GetManifestNameFromLiveFiles(); - - ASSERT_OK(Put(Key(0), "val")); - ASSERT_OK(Put(Key(2), "val")); - s = Flush(); - ASSERT_OK(s); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - // Wait for flush of 2nd L0 file before starting compaction - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}, - // Wait for compaction to detect manifest write error - {"BackgroundCallCompaction:1", "CompactionManifestWriteError:0"}, - // Make compaction thread wait for error to be cleared - {"CompactionManifestWriteError:1", - "DBImpl::BackgroundCallCompaction:FoundObsoleteFiles"}, - // Wait for DB instance to clear bg_error before calling - // TEST_WaitForCompact - {"SstFileManagerImpl::ErrorCleared", "CompactionManifestWriteError:2"}}); - // trigger manifest write failure in compaction thread - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", [&](void*) { fail_manifest.store(true); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", [&](void*) { - if (fail_manifest.load()) { - fault_fs_->SetFilesystemActive(false, - IOStatus::NoSpace("Out of space")); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put(Key(1), "val")); - // This Flush will trigger a compaction, which will fail when appending to - // the manifest - s = Flush(); - ASSERT_OK(s); - - TEST_SYNC_POINT("CompactionManifestWriteError:0"); - // Clear all errors so when the compaction is retried, it will succeed - fault_fs_->SetFilesystemActive(true); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - TEST_SYNC_POINT("CompactionManifestWriteError:1"); - TEST_SYNC_POINT("CompactionManifestWriteError:2"); - - s = dbfull()->TEST_WaitForCompact(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ASSERT_OK(s); - - new_manifest = GetManifestNameFromLiveFiles(); - ASSERT_NE(new_manifest, old_manifest); - Reopen(options); - ASSERT_EQ("val", Get(Key(0))); - ASSERT_EQ("val", Get(Key(1))); - ASSERT_EQ("val", Get(Key(2))); - Close(); -} - -TEST_F(DBErrorHandlingFSTest, CompactionManifestWriteRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.level0_file_num_compaction_trigger = 2; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 0; - Status s; - std::string old_manifest; - std::string new_manifest; - std::atomic fail_manifest(false); - DestroyAndReopen(options); - old_manifest = GetManifestNameFromLiveFiles(); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - ASSERT_OK(Put(Key(0), "val")); - ASSERT_OK(Put(Key(2), "val")); - s = Flush(); - ASSERT_OK(s); - - listener->OverrideBGError(Status(error_msg, Status::Severity::kHardError)); - listener->EnableAutoRecovery(false); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - // Wait for flush of 2nd L0 file before starting compaction - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}, - // Wait for compaction to detect manifest write error - {"BackgroundCallCompaction:1", "CompactionManifestWriteError:0"}, - // Make compaction thread wait for error to be cleared - {"CompactionManifestWriteError:1", - "DBImpl::BackgroundCallCompaction:FoundObsoleteFiles"}}); - // trigger manifest write failure in compaction thread - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", [&](void*) { fail_manifest.store(true); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", [&](void*) { - if (fail_manifest.load()) { - fault_fs_->SetFilesystemActive(false, error_msg); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put(Key(1), "val")); - s = Flush(); - ASSERT_OK(s); - - TEST_SYNC_POINT("CompactionManifestWriteError:0"); - TEST_SYNC_POINT("CompactionManifestWriteError:1"); - - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); - - fault_fs_->SetFilesystemActive(true); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - s = dbfull()->Resume(); - ASSERT_OK(s); - - new_manifest = GetManifestNameFromLiveFiles(); - ASSERT_NE(new_manifest, old_manifest); - - Reopen(options); - ASSERT_EQ("val", Get(Key(0))); - ASSERT_EQ("val", Get(Key(1))); - ASSERT_EQ("val", Get(Key(2))); - Close(); -} - -TEST_F(DBErrorHandlingFSTest, CompactionWriteError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.level0_file_num_compaction_trigger = 2; - options.listeners.emplace_back(listener); - Status s; - DestroyAndReopen(options); - - ASSERT_OK(Put(Key(0), "va;")); - ASSERT_OK(Put(Key(2), "va;")); - s = Flush(); - ASSERT_OK(s); - - listener->OverrideBGError( - Status(Status::NoSpace(), Status::Severity::kHardError)); - listener->EnableAutoRecovery(false); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", [&](void*) { - fault_fs_->SetFilesystemActive(false, - IOStatus::NoSpace("Out of space")); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put(Key(1), "val")); - s = Flush(); - ASSERT_OK(s); - - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); - - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_OK(s); - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, DISABLED_CompactionWriteRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.level0_file_num_compaction_trigger = 2; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 0; - Status s; - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - ASSERT_OK(Put(Key(0), "va;")); - ASSERT_OK(Put(Key(2), "va;")); - s = Flush(); - ASSERT_OK(s); - - listener->OverrideBGError(Status(error_msg, Status::Severity::kHardError)); - listener->EnableAutoRecovery(false); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "CompactionJob::OpenCompactionOutputFile", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:Finish", - [&](void*) { CancelAllBackgroundWork(dbfull()); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put(Key(1), "val")); - s = Flush(); - ASSERT_OK(s); - - s = dbfull()->TEST_GetBGError(); - ASSERT_OK(s); - fault_fs_->SetFilesystemActive(true); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - s = dbfull()->Resume(); - ASSERT_OK(s); - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, DISABLED_CompactionWriteFileScopeError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.level0_file_num_compaction_trigger = 2; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 0; - Status s; - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("File Scope Data Loss Error"); - error_msg.SetDataLoss(true); - error_msg.SetScope( - ROCKSDB_NAMESPACE::IOStatus::IOErrorScope::kIOErrorScopeFile); - error_msg.SetRetryable(false); - - ASSERT_OK(Put(Key(0), "va;")); - ASSERT_OK(Put(Key(2), "va;")); - s = Flush(); - ASSERT_OK(s); - - listener->OverrideBGError(Status(error_msg, Status::Severity::kHardError)); - listener->EnableAutoRecovery(false); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "CompactionJob::OpenCompactionOutputFile", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:Finish", - [&](void*) { CancelAllBackgroundWork(dbfull()); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put(Key(1), "val")); - s = Flush(); - ASSERT_OK(s); - - s = dbfull()->TEST_GetBGError(); - ASSERT_OK(s); - - fault_fs_->SetFilesystemActive(true); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - s = dbfull()->Resume(); - ASSERT_OK(s); - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, CorruptionError) { - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.level0_file_num_compaction_trigger = 2; - Status s; - DestroyAndReopen(options); - - ASSERT_OK(Put(Key(0), "va;")); - ASSERT_OK(Put(Key(2), "va;")); - s = Flush(); - ASSERT_OK(s); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", [&](void*) { - fault_fs_->SetFilesystemActive(false, - IOStatus::Corruption("Corruption")); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put(Key(1), "val")); - s = Flush(); - ASSERT_OK(s); - - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s.severity(), - ROCKSDB_NAMESPACE::Status::Severity::kUnrecoverableError); - - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_NOK(s); - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, AutoRecoverFlushError) { - if (mem_env_ != nullptr) { - ROCKSDB_GTEST_SKIP("Test requires non-mock environment"); - return; - } - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.statistics = CreateDBStatistics(); - Status s; - - listener->EnableAutoRecovery(); - DestroyAndReopen(options); - - ASSERT_OK(Put(Key(0), "val")); - SyncPoint::GetInstance()->SetCallBack("FlushJob::Start", [&](void*) { - fault_fs_->SetFilesystemActive(false, IOStatus::NoSpace("Out of space")); - }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - ASSERT_EQ(listener->WaitForRecovery(5000000), true); - - s = Put(Key(1), "val"); - ASSERT_OK(s); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_ERROR_COUNT)); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_IO_ERROR_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT)); - ASSERT_EQ(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT)); - - Reopen(options); - ASSERT_EQ("val", Get(Key(0))); - ASSERT_EQ("val", Get(Key(1))); - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, FailRecoverFlushError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - Status s; - - listener->EnableAutoRecovery(); - DestroyAndReopen(options); - - ASSERT_OK(Put(Key(0), "val")); - SyncPoint::GetInstance()->SetCallBack("FlushJob::Start", [&](void*) { - fault_fs_->SetFilesystemActive(false, IOStatus::NoSpace("Out of space")); - }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); - // We should be able to shutdown the database while auto recovery is going - // on in the background - Close(); - DestroyDB(dbname_, options).PermitUncheckedError(); -} - -TEST_F(DBErrorHandlingFSTest, WALWriteError) { - if (mem_env_ != nullptr) { - ROCKSDB_GTEST_SKIP("Test requires non-mock environment"); - return; - } - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.writable_file_max_buffer_size = 32768; - options.listeners.emplace_back(listener); - Status s; - Random rnd(301); - - listener->EnableAutoRecovery(); - DestroyAndReopen(options); - - { - WriteBatch batch; - - for (auto i = 0; i < 100; ++i) { - ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024))); - } - - WriteOptions wopts; - wopts.sync = true; - ASSERT_OK(dbfull()->Write(wopts, &batch)); - }; - - { - WriteBatch batch; - int write_error = 0; - - for (auto i = 100; i < 199; ++i) { - ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024))); - } - - SyncPoint::GetInstance()->SetCallBack( - "WritableFileWriter::Append:BeforePrepareWrite", [&](void*) { - write_error++; - if (write_error > 2) { - fault_fs_->SetFilesystemActive(false, - IOStatus::NoSpace("Out of space")); - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - WriteOptions wopts; - wopts.sync = true; - s = dbfull()->Write(wopts, &batch); - ASSERT_EQ(s, s.NoSpace()); - } - SyncPoint::GetInstance()->DisableProcessing(); - // `ClearAllCallBacks()` is needed in addition to `DisableProcessing()` to - // drain all callbacks. Otherwise, a pending callback in the background - // could re-disable `fault_fs_` after we enable it below. - SyncPoint::GetInstance()->ClearAllCallBacks(); - fault_fs_->SetFilesystemActive(true); - ASSERT_EQ(listener->WaitForRecovery(5000000), true); - for (auto i = 0; i < 199; ++i) { - if (i < 100) { - ASSERT_NE(Get(Key(i)), "NOT_FOUND"); - } else { - ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); - } - } - Reopen(options); - for (auto i = 0; i < 199; ++i) { - if (i < 100) { - ASSERT_NE(Get(Key(i)), "NOT_FOUND"); - } else { - ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); - } - } - Close(); -} - -TEST_F(DBErrorHandlingFSTest, WALWriteRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.writable_file_max_buffer_size = 32768; - options.listeners.emplace_back(listener); - options.paranoid_checks = true; - options.max_bgerror_resume_count = 0; - Random rnd(301); - - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - // For the first batch, write is successful, require sync - { - WriteBatch batch; - - for (auto i = 0; i < 100; ++i) { - ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024))); - } - - WriteOptions wopts; - wopts.sync = true; - ASSERT_OK(dbfull()->Write(wopts, &batch)); - }; - - // For the second batch, the first 2 file Append are successful, then the - // following Append fails due to file system retryable IOError. - { - WriteBatch batch; - int write_error = 0; - - for (auto i = 100; i < 200; ++i) { - ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024))); - } - - SyncPoint::GetInstance()->SetCallBack( - "WritableFileWriter::Append:BeforePrepareWrite", [&](void*) { - write_error++; - if (write_error > 2) { - fault_fs_->SetFilesystemActive(false, error_msg); - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - WriteOptions wopts; - wopts.sync = true; - Status s = dbfull()->Write(wopts, &batch); - ASSERT_TRUE(s.IsIOError()); - } - fault_fs_->SetFilesystemActive(true); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - - // Data in corrupted WAL are not stored - for (auto i = 0; i < 199; ++i) { - if (i < 100) { - ASSERT_NE(Get(Key(i)), "NOT_FOUND"); - } else { - ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); - } - } - - // Resume and write a new batch, should be in the WAL - ASSERT_OK(dbfull()->Resume()); - { - WriteBatch batch; - - for (auto i = 200; i < 300; ++i) { - ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024))); - } - - WriteOptions wopts; - wopts.sync = true; - ASSERT_OK(dbfull()->Write(wopts, &batch)); - }; - - Reopen(options); - for (auto i = 0; i < 300; ++i) { - if (i < 100 || i >= 200) { - ASSERT_NE(Get(Key(i)), "NOT_FOUND"); - } else { - ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); - } - } - Close(); -} - -TEST_F(DBErrorHandlingFSTest, MultiCFWALWriteError) { - if (mem_env_ != nullptr) { - ROCKSDB_GTEST_SKIP("Test requires non-mock environment"); - return; - } - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.writable_file_max_buffer_size = 32768; - options.listeners.emplace_back(listener); - Random rnd(301); - - listener->EnableAutoRecovery(); - CreateAndReopenWithCF({"one", "two", "three"}, options); - - { - WriteBatch batch; - - for (auto i = 1; i < 4; ++i) { - for (auto j = 0; j < 100; ++j) { - ASSERT_OK(batch.Put(handles_[i], Key(j), rnd.RandomString(1024))); - } - } - - WriteOptions wopts; - wopts.sync = true; - ASSERT_OK(dbfull()->Write(wopts, &batch)); - }; - - { - WriteBatch batch; - int write_error = 0; - - // Write to one CF - for (auto i = 100; i < 199; ++i) { - ASSERT_OK(batch.Put(handles_[2], Key(i), rnd.RandomString(1024))); - } - - SyncPoint::GetInstance()->SetCallBack( - "WritableFileWriter::Append:BeforePrepareWrite", [&](void*) { - write_error++; - if (write_error > 2) { - fault_fs_->SetFilesystemActive(false, - IOStatus::NoSpace("Out of space")); - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - WriteOptions wopts; - wopts.sync = true; - Status s = dbfull()->Write(wopts, &batch); - ASSERT_TRUE(s.IsNoSpace()); - } - SyncPoint::GetInstance()->DisableProcessing(); - // `ClearAllCallBacks()` is needed in addition to `DisableProcessing()` to - // drain all callbacks. Otherwise, a pending callback in the background - // could re-disable `fault_fs_` after we enable it below. - SyncPoint::GetInstance()->ClearAllCallBacks(); - fault_fs_->SetFilesystemActive(true); - ASSERT_EQ(listener->WaitForRecovery(5000000), true); - - for (auto i = 1; i < 4; ++i) { - // Every CF should have been flushed - ASSERT_EQ(NumTableFilesAtLevel(0, i), 1); - } - - for (auto i = 1; i < 4; ++i) { - for (auto j = 0; j < 199; ++j) { - if (j < 100) { - ASSERT_NE(Get(i, Key(j)), "NOT_FOUND"); - } else { - ASSERT_EQ(Get(i, Key(j)), "NOT_FOUND"); - } - } - } - ReopenWithColumnFamilies({"default", "one", "two", "three"}, options); - for (auto i = 1; i < 4; ++i) { - for (auto j = 0; j < 199; ++j) { - if (j < 100) { - ASSERT_NE(Get(i, Key(j)), "NOT_FOUND"); - } else { - ASSERT_EQ(Get(i, Key(j)), "NOT_FOUND"); - } - } - } - Close(); -} - -TEST_F(DBErrorHandlingFSTest, MultiDBCompactionError) { - if (mem_env_ != nullptr) { - ROCKSDB_GTEST_SKIP("Test requires non-mock environment"); - return; - } - FaultInjectionTestEnv* def_env = new FaultInjectionTestEnv(env_); - std::vector> fault_envs; - std::vector fault_fs; - std::vector options; - std::vector> listener; - std::vector db; - std::shared_ptr sfm(NewSstFileManager(def_env)); - int kNumDbInstances = 3; - Random rnd(301); - - for (auto i = 0; i < kNumDbInstances; ++i) { - listener.emplace_back(new ErrorHandlerFSListener()); - options.emplace_back(GetDefaultOptions()); - fault_fs.emplace_back(new FaultInjectionTestFS(env_->GetFileSystem())); - std::shared_ptr fs(fault_fs.back()); - fault_envs.emplace_back(new CompositeEnvWrapper(def_env, fs)); - options[i].env = fault_envs.back().get(); - options[i].create_if_missing = true; - options[i].level0_file_num_compaction_trigger = 2; - options[i].writable_file_max_buffer_size = 32768; - options[i].listeners.emplace_back(listener[i]); - options[i].sst_file_manager = sfm; - DB* dbptr; - char buf[16]; - - listener[i]->EnableAutoRecovery(); - // Setup for returning error for the 3rd SST, which would be level 1 - listener[i]->InjectFileCreationError(fault_fs[i], 3, - IOStatus::NoSpace("Out of space")); - snprintf(buf, sizeof(buf), "_%d", i); - ASSERT_OK(DestroyDB(dbname_ + std::string(buf), options[i])); - ASSERT_OK(DB::Open(options[i], dbname_ + std::string(buf), &dbptr)); - db.emplace_back(dbptr); - } - - for (auto i = 0; i < kNumDbInstances; ++i) { - WriteBatch batch; - - for (auto j = 0; j <= 100; ++j) { - ASSERT_OK(batch.Put(Key(j), rnd.RandomString(1024))); - } - - WriteOptions wopts; - wopts.sync = true; - ASSERT_OK(db[i]->Write(wopts, &batch)); - ASSERT_OK(db[i]->Flush(FlushOptions())); - } - - def_env->SetFilesystemActive(false, Status::NoSpace("Out of space")); - for (auto i = 0; i < kNumDbInstances; ++i) { - WriteBatch batch; - - // Write to one CF - for (auto j = 100; j < 199; ++j) { - ASSERT_OK(batch.Put(Key(j), rnd.RandomString(1024))); - } - - WriteOptions wopts; - wopts.sync = true; - ASSERT_OK(db[i]->Write(wopts, &batch)); - ASSERT_OK(db[i]->Flush(FlushOptions())); - } - - for (auto i = 0; i < kNumDbInstances; ++i) { - Status s = static_cast(db[i])->TEST_WaitForCompact(true); - ASSERT_EQ(s.severity(), Status::Severity::kSoftError); - fault_fs[i]->SetFilesystemActive(true); - } - - def_env->SetFilesystemActive(true); - for (auto i = 0; i < kNumDbInstances; ++i) { - std::string prop; - ASSERT_EQ(listener[i]->WaitForRecovery(5000000), true); - ASSERT_OK(static_cast(db[i])->TEST_WaitForCompact(true)); - EXPECT_TRUE(db[i]->GetProperty( - "rocksdb.num-files-at-level" + std::to_string(0), &prop)); - EXPECT_EQ(atoi(prop.c_str()), 0); - EXPECT_TRUE(db[i]->GetProperty( - "rocksdb.num-files-at-level" + std::to_string(1), &prop)); - EXPECT_EQ(atoi(prop.c_str()), 1); - } - - SstFileManagerImpl* sfmImpl = - static_cast_with_check(sfm.get()); - sfmImpl->Close(); - - for (auto i = 0; i < kNumDbInstances; ++i) { - char buf[16]; - snprintf(buf, sizeof(buf), "_%d", i); - delete db[i]; - fault_fs[i]->SetFilesystemActive(true); - if (getenv("KEEP_DB")) { - printf("DB is still at %s%s\n", dbname_.c_str(), buf); - } else { - ASSERT_OK(DestroyDB(dbname_ + std::string(buf), options[i])); - } - } - options.clear(); - sfm.reset(); - delete def_env; -} - -TEST_F(DBErrorHandlingFSTest, MultiDBVariousErrors) { - if (mem_env_ != nullptr) { - ROCKSDB_GTEST_SKIP("Test requires non-mock environment"); - return; - } - FaultInjectionTestEnv* def_env = new FaultInjectionTestEnv(env_); - std::vector> fault_envs; - std::vector fault_fs; - std::vector options; - std::vector> listener; - std::vector db; - std::shared_ptr sfm(NewSstFileManager(def_env)); - int kNumDbInstances = 3; - Random rnd(301); - - for (auto i = 0; i < kNumDbInstances; ++i) { - listener.emplace_back(new ErrorHandlerFSListener()); - options.emplace_back(GetDefaultOptions()); - fault_fs.emplace_back(new FaultInjectionTestFS(env_->GetFileSystem())); - std::shared_ptr fs(fault_fs.back()); - fault_envs.emplace_back(new CompositeEnvWrapper(def_env, fs)); - options[i].env = fault_envs.back().get(); - options[i].create_if_missing = true; - options[i].level0_file_num_compaction_trigger = 2; - options[i].writable_file_max_buffer_size = 32768; - options[i].listeners.emplace_back(listener[i]); - options[i].sst_file_manager = sfm; - DB* dbptr; - char buf[16]; - - listener[i]->EnableAutoRecovery(); - switch (i) { - case 0: - // Setup for returning error for the 3rd SST, which would be level 1 - listener[i]->InjectFileCreationError(fault_fs[i], 3, - IOStatus::NoSpace("Out of space")); - break; - case 1: - // Setup for returning error after the 1st SST, which would result - // in a hard error - listener[i]->InjectFileCreationError(fault_fs[i], 2, - IOStatus::NoSpace("Out of space")); - break; - default: - break; - } - snprintf(buf, sizeof(buf), "_%d", i); - ASSERT_OK(DestroyDB(dbname_ + std::string(buf), options[i])); - ASSERT_OK(DB::Open(options[i], dbname_ + std::string(buf), &dbptr)); - db.emplace_back(dbptr); - } - - for (auto i = 0; i < kNumDbInstances; ++i) { - WriteBatch batch; - - for (auto j = 0; j <= 100; ++j) { - ASSERT_OK(batch.Put(Key(j), rnd.RandomString(1024))); - } - - WriteOptions wopts; - wopts.sync = true; - ASSERT_OK(db[i]->Write(wopts, &batch)); - ASSERT_OK(db[i]->Flush(FlushOptions())); - } - - def_env->SetFilesystemActive(false, Status::NoSpace("Out of space")); - for (auto i = 0; i < kNumDbInstances; ++i) { - WriteBatch batch; - - // Write to one CF - for (auto j = 100; j < 199; ++j) { - ASSERT_OK(batch.Put(Key(j), rnd.RandomString(1024))); - } - - WriteOptions wopts; - wopts.sync = true; - ASSERT_OK(db[i]->Write(wopts, &batch)); - if (i != 1) { - ASSERT_OK(db[i]->Flush(FlushOptions())); - } else { - ASSERT_TRUE(db[i]->Flush(FlushOptions()).IsNoSpace()); - } - } - - for (auto i = 0; i < kNumDbInstances; ++i) { - Status s = static_cast(db[i])->TEST_WaitForCompact(true); - switch (i) { - case 0: - ASSERT_EQ(s.severity(), Status::Severity::kSoftError); - break; - case 1: - ASSERT_EQ(s.severity(), Status::Severity::kHardError); - break; - case 2: - ASSERT_OK(s); - break; - } - fault_fs[i]->SetFilesystemActive(true); - } - - def_env->SetFilesystemActive(true); - for (auto i = 0; i < kNumDbInstances; ++i) { - std::string prop; - if (i < 2) { - ASSERT_EQ(listener[i]->WaitForRecovery(5000000), true); - } - if (i == 1) { - ASSERT_OK(static_cast(db[i])->TEST_WaitForCompact(true)); - } - EXPECT_TRUE(db[i]->GetProperty( - "rocksdb.num-files-at-level" + std::to_string(0), &prop)); - EXPECT_EQ(atoi(prop.c_str()), 0); - EXPECT_TRUE(db[i]->GetProperty( - "rocksdb.num-files-at-level" + std::to_string(1), &prop)); - EXPECT_EQ(atoi(prop.c_str()), 1); - } - - SstFileManagerImpl* sfmImpl = - static_cast_with_check(sfm.get()); - sfmImpl->Close(); - - for (auto i = 0; i < kNumDbInstances; ++i) { - char buf[16]; - snprintf(buf, sizeof(buf), "_%d", i); - fault_fs[i]->SetFilesystemActive(true); - delete db[i]; - if (getenv("KEEP_DB")) { - printf("DB is still at %s%s\n", dbname_.c_str(), buf); - } else { - EXPECT_OK(DestroyDB(dbname_ + std::string(buf), options[i])); - } - } - options.clear(); - delete def_env; -} - -// When Put the KV-pair, the write option is set to disable WAL. -// If retryable error happens in this condition, map the bg error -// to soft error and trigger auto resume. During auto resume, SwitchMemtable -// is disabled to avoid small SST tables. Write can still be applied before -// the bg error is cleaned unless the memtable is full. -TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableErrorAutoRecover1) { - // Activate the FS before the first resume - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 2; - options.bgerror_resume_retry_interval = 100000; // 0.1 second - options.statistics = CreateDBStatistics(); - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - WriteOptions wo = WriteOptions(); - wo.disableWAL = true; - ASSERT_OK(Put(Key(1), "val1", wo)); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"RecoverFromRetryableBGIOError:LoopOut", - "FLushWritNoWALRetryableeErrorAutoRecover1:1"}}); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeFinishBuildTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - TEST_SYNC_POINT("FLushWritNoWALRetryableeErrorAutoRecover1:1"); - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_EQ("val1", Get(Key(1))); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - ASSERT_EQ(3, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_ERROR_COUNT)); - ASSERT_EQ(3, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_IO_ERROR_COUNT)); - ASSERT_EQ(3, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT)); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_COUNT)); - ASSERT_LE(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT)); - ASSERT_LE(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT)); - HistogramData autoresume_retry; - options.statistics->histogramData(ERROR_HANDLER_AUTORESUME_RETRY_COUNT, - &autoresume_retry); - ASSERT_GE(autoresume_retry.max, 0); - ASSERT_OK(Put(Key(2), "val2", wo)); - s = Flush(); - // Since auto resume fails, the bg error is not cleand, flush will - // return the bg_error set before. - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - ASSERT_EQ("val2", Get(Key(2))); - - // call auto resume - ASSERT_OK(dbfull()->Resume()); - ASSERT_OK(Put(Key(3), "val3", wo)); - // After resume is successful, the flush should be ok. - ASSERT_OK(Flush()); - ASSERT_EQ("val3", Get(Key(3))); - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableErrorAutoRecover2) { - // Activate the FS before the first resume - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 2; - options.bgerror_resume_retry_interval = 100000; // 0.1 second - options.statistics = CreateDBStatistics(); - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - WriteOptions wo = WriteOptions(); - wo.disableWAL = true; - ASSERT_OK(Put(Key(1), "val1", wo)); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeFinishBuildTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - ASSERT_EQ(listener->WaitForRecovery(5000000), true); - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_ERROR_COUNT)); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_IO_ERROR_COUNT)); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT)); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_COUNT)); - ASSERT_LE(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT)); - ASSERT_LE(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT)); - HistogramData autoresume_retry; - options.statistics->histogramData(ERROR_HANDLER_AUTORESUME_RETRY_COUNT, - &autoresume_retry); - ASSERT_GE(autoresume_retry.max, 0); - ASSERT_OK(Put(Key(2), "val2", wo)); - s = Flush(); - // Since auto resume is successful, the bg error is cleaned, flush will - // be successful. - ASSERT_OK(s); - ASSERT_EQ("val2", Get(Key(2))); - Destroy(options); -} - -// Auto resume fromt the flush retryable IO error. Activate the FS before the -// first resume. Resume is successful -TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover1) { - // Activate the FS before the first resume - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 2; - options.bgerror_resume_retry_interval = 100000; // 0.1 second - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - ASSERT_OK(Put(Key(1), "val1")); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeFinishBuildTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - ASSERT_EQ(listener->WaitForRecovery(5000000), true); - - ASSERT_EQ("val1", Get(Key(1))); - Reopen(options); - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_OK(Put(Key(2), "val2")); - ASSERT_OK(Flush()); - ASSERT_EQ("val2", Get(Key(2))); - - Destroy(options); -} - -// Auto resume fromt the flush retryable IO error and set the retry limit count. -// Never activate the FS and auto resume should fail at the end -TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover2) { - // Fail all the resume and let user to resume - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 2; - options.bgerror_resume_retry_interval = 100000; // 0.1 second - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - ASSERT_OK(Put(Key(1), "val1")); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"FLushWritRetryableeErrorAutoRecover2:0", - "RecoverFromRetryableBGIOError:BeforeStart"}, - {"RecoverFromRetryableBGIOError:LoopOut", - "FLushWritRetryableeErrorAutoRecover2:1"}}); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeFinishBuildTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover2:0"); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover2:1"); - fault_fs_->SetFilesystemActive(true); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - - ASSERT_EQ("val1", Get(Key(1))); - // Auto resume fails due to FS does not recover during resume. User call - // resume manually here. - s = dbfull()->Resume(); - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_OK(s); - ASSERT_OK(Put(Key(2), "val2")); - ASSERT_OK(Flush()); - ASSERT_EQ("val2", Get(Key(2))); - - Destroy(options); -} - -// Auto resume fromt the flush retryable IO error and set the retry limit count. -// Fail the first resume and let the second resume be successful. -TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableErrorAutoRecover) { - // Fail the first resume and let the second resume be successful - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 2; - options.bgerror_resume_retry_interval = 100000; // 0.1 second - Status s; - std::string old_manifest; - std::string new_manifest; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - old_manifest = GetManifestNameFromLiveFiles(); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - ASSERT_OK(Put(Key(0), "val")); - ASSERT_OK(Flush()); - ASSERT_OK(Put(Key(1), "val")); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"RecoverFromRetryableBGIOError:BeforeStart", - "ManifestWriteRetryableErrorAutoRecover:0"}, - {"ManifestWriteRetryableErrorAutoRecover:1", - "RecoverFromRetryableBGIOError:BeforeWait1"}, - {"RecoverFromRetryableBGIOError:RecoverSuccess", - "ManifestWriteRetryableErrorAutoRecover:2"}}); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - TEST_SYNC_POINT("ManifestWriteRetryableErrorAutoRecover:0"); - fault_fs_->SetFilesystemActive(true); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - TEST_SYNC_POINT("ManifestWriteRetryableErrorAutoRecover:1"); - TEST_SYNC_POINT("ManifestWriteRetryableErrorAutoRecover:2"); - SyncPoint::GetInstance()->DisableProcessing(); - - new_manifest = GetManifestNameFromLiveFiles(); - ASSERT_NE(new_manifest, old_manifest); - - Reopen(options); - ASSERT_EQ("val", Get(Key(0))); - ASSERT_EQ("val", Get(Key(1))); - Close(); -} - -TEST_F(DBErrorHandlingFSTest, ManifestWriteNoWALRetryableErrorAutoRecover) { - // Fail the first resume and let the second resume be successful - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 2; - options.bgerror_resume_retry_interval = 100000; // 0.1 second - Status s; - std::string old_manifest; - std::string new_manifest; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - old_manifest = GetManifestNameFromLiveFiles(); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - WriteOptions wo = WriteOptions(); - wo.disableWAL = true; - ASSERT_OK(Put(Key(0), "val", wo)); - ASSERT_OK(Flush()); - ASSERT_OK(Put(Key(1), "val", wo)); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"RecoverFromRetryableBGIOError:BeforeStart", - "ManifestWriteNoWALRetryableErrorAutoRecover:0"}, - {"ManifestWriteNoWALRetryableErrorAutoRecover:1", - "RecoverFromRetryableBGIOError:BeforeWait1"}, - {"RecoverFromRetryableBGIOError:RecoverSuccess", - "ManifestWriteNoWALRetryableErrorAutoRecover:2"}}); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - TEST_SYNC_POINT("ManifestWriteNoWALRetryableErrorAutoRecover:0"); - fault_fs_->SetFilesystemActive(true); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - TEST_SYNC_POINT("ManifestWriteNoWALRetryableErrorAutoRecover:1"); - TEST_SYNC_POINT("ManifestWriteNoWALRetryableErrorAutoRecover:2"); - SyncPoint::GetInstance()->DisableProcessing(); - - new_manifest = GetManifestNameFromLiveFiles(); - ASSERT_NE(new_manifest, old_manifest); - - Reopen(options); - ASSERT_EQ("val", Get(Key(0))); - ASSERT_EQ("val", Get(Key(1))); - Close(); -} - -TEST_F(DBErrorHandlingFSTest, - CompactionManifestWriteRetryableErrorAutoRecover) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.level0_file_num_compaction_trigger = 2; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 2; - options.bgerror_resume_retry_interval = 100000; // 0.1 second - Status s; - std::string old_manifest; - std::string new_manifest; - std::atomic fail_manifest(false); - DestroyAndReopen(options); - old_manifest = GetManifestNameFromLiveFiles(); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - ASSERT_OK(Put(Key(0), "val")); - ASSERT_OK(Put(Key(2), "val")); - ASSERT_OK(Flush()); - - listener->OverrideBGError(Status(error_msg, Status::Severity::kHardError)); - listener->EnableAutoRecovery(false); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - // Wait for flush of 2nd L0 file before starting compaction - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}, - // Wait for compaction to detect manifest write error - {"BackgroundCallCompaction:1", "CompactionManifestWriteErrorAR:0"}, - // Make compaction thread wait for error to be cleared - {"CompactionManifestWriteErrorAR:1", - "DBImpl::BackgroundCallCompaction:FoundObsoleteFiles"}, - {"CompactionManifestWriteErrorAR:2", - "RecoverFromRetryableBGIOError:BeforeStart"}, - // Fail the first resume, before the wait in resume - {"RecoverFromRetryableBGIOError:BeforeResume0", - "CompactionManifestWriteErrorAR:3"}, - // Activate the FS before the second resume - {"CompactionManifestWriteErrorAR:4", - "RecoverFromRetryableBGIOError:BeforeResume1"}, - // Wait the auto resume be sucessful - {"RecoverFromRetryableBGIOError:RecoverSuccess", - "CompactionManifestWriteErrorAR:5"}}); - // trigger manifest write failure in compaction thread - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", [&](void*) { fail_manifest.store(true); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", [&](void*) { - if (fail_manifest.load()) { - fault_fs_->SetFilesystemActive(false, error_msg); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put(Key(1), "val")); - s = Flush(); - ASSERT_OK(s); - - TEST_SYNC_POINT("CompactionManifestWriteErrorAR:0"); - TEST_SYNC_POINT("CompactionManifestWriteErrorAR:1"); - - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); - TEST_SYNC_POINT("CompactionManifestWriteErrorAR:2"); - TEST_SYNC_POINT("CompactionManifestWriteErrorAR:3"); - fault_fs_->SetFilesystemActive(true); - SyncPoint::GetInstance()->ClearAllCallBacks(); - TEST_SYNC_POINT("CompactionManifestWriteErrorAR:4"); - TEST_SYNC_POINT("CompactionManifestWriteErrorAR:5"); - SyncPoint::GetInstance()->DisableProcessing(); - - new_manifest = GetManifestNameFromLiveFiles(); - ASSERT_NE(new_manifest, old_manifest); - - Reopen(options); - ASSERT_EQ("val", Get(Key(0))); - ASSERT_EQ("val", Get(Key(1))); - ASSERT_EQ("val", Get(Key(2))); - Close(); -} - -TEST_F(DBErrorHandlingFSTest, CompactionWriteRetryableErrorAutoRecover) { - // In this test, in the first round of compaction, the FS is set to error. - // So the first compaction fails due to retryable IO error and it is mapped - // to soft error. Then, compaction is rescheduled, in the second round of - // compaction, the FS is set to active and compaction is successful, so - // the test will hit the CompactionJob::FinishCompactionOutputFile1 sync - // point. - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.level0_file_num_compaction_trigger = 2; - options.listeners.emplace_back(listener); - Status s; - std::atomic fail_first(false); - std::atomic fail_second(true); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - ASSERT_OK(Put(Key(0), "va;")); - ASSERT_OK(Put(Key(2), "va;")); - s = Flush(); - ASSERT_OK(s); - - listener->OverrideBGError(Status(error_msg, Status::Severity::kHardError)); - listener->EnableAutoRecovery(false); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}, - {"CompactionJob::FinishCompactionOutputFile1", - "CompactionWriteRetryableErrorAutoRecover0"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:Start", - [&](void*) { fault_fs_->SetFilesystemActive(true); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", [&](void*) { fail_first.store(true); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "CompactionJob::OpenCompactionOutputFile", [&](void*) { - if (fail_first.load() && fail_second.load()) { - fault_fs_->SetFilesystemActive(false, error_msg); - fail_second.store(false); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put(Key(1), "val")); - s = Flush(); - ASSERT_OK(s); - - s = dbfull()->TEST_WaitForCompact(); - ASSERT_OK(s); - TEST_SYNC_POINT("CompactionWriteRetryableErrorAutoRecover0"); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, WALWriteRetryableErrorAutoRecover1) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.writable_file_max_buffer_size = 32768; - options.listeners.emplace_back(listener); - options.paranoid_checks = true; - options.max_bgerror_resume_count = 2; - options.bgerror_resume_retry_interval = 100000; // 0.1 second - Status s; - Random rnd(301); - - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - // For the first batch, write is successful, require sync - { - WriteBatch batch; - - for (auto i = 0; i < 100; ++i) { - ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024))); - } - - WriteOptions wopts; - wopts.sync = true; - ASSERT_OK(dbfull()->Write(wopts, &batch)); - }; - - // For the second batch, the first 2 file Append are successful, then the - // following Append fails due to file system retryable IOError. - { - WriteBatch batch; - int write_error = 0; - - for (auto i = 100; i < 200; ++i) { - ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024))); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"WALWriteErrorDone", "RecoverFromRetryableBGIOError:BeforeStart"}, - {"RecoverFromRetryableBGIOError:BeforeResume0", "WALWriteError1:0"}, - {"WALWriteError1:1", "RecoverFromRetryableBGIOError:BeforeResume1"}, - {"RecoverFromRetryableBGIOError:RecoverSuccess", "WALWriteError1:2"}}); - - SyncPoint::GetInstance()->SetCallBack( - "WritableFileWriter::Append:BeforePrepareWrite", [&](void*) { - write_error++; - if (write_error > 2) { - fault_fs_->SetFilesystemActive(false, error_msg); - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - WriteOptions wopts; - wopts.sync = true; - s = dbfull()->Write(wopts, &batch); - ASSERT_EQ(true, s.IsIOError()); - TEST_SYNC_POINT("WALWriteErrorDone"); - - TEST_SYNC_POINT("WALWriteError1:0"); - fault_fs_->SetFilesystemActive(true); - SyncPoint::GetInstance()->ClearAllCallBacks(); - TEST_SYNC_POINT("WALWriteError1:1"); - TEST_SYNC_POINT("WALWriteError1:2"); - } - SyncPoint::GetInstance()->DisableProcessing(); - - // Data in corrupted WAL are not stored - for (auto i = 0; i < 199; ++i) { - if (i < 100) { - ASSERT_NE(Get(Key(i)), "NOT_FOUND"); - } else { - ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); - } - } - - // Resume and write a new batch, should be in the WAL - { - WriteBatch batch; - - for (auto i = 200; i < 300; ++i) { - ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024))); - } - - WriteOptions wopts; - wopts.sync = true; - ASSERT_OK(dbfull()->Write(wopts, &batch)); - }; - - Reopen(options); - for (auto i = 0; i < 300; ++i) { - if (i < 100 || i >= 200) { - ASSERT_NE(Get(Key(i)), "NOT_FOUND"); - } else { - ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); - } - } - Close(); -} - -TEST_F(DBErrorHandlingFSTest, WALWriteRetryableErrorAutoRecover2) { - // Fail the first recover and try second time. - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.writable_file_max_buffer_size = 32768; - options.listeners.emplace_back(listener); - options.paranoid_checks = true; - options.max_bgerror_resume_count = 2; - options.bgerror_resume_retry_interval = 100000; // 0.1 second - Status s; - Random rnd(301); - - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - // For the first batch, write is successful, require sync - { - WriteBatch batch; - - for (auto i = 0; i < 100; ++i) { - ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024))); - } - - WriteOptions wopts; - wopts.sync = true; - ASSERT_OK(dbfull()->Write(wopts, &batch)); - }; - - // For the second batch, the first 2 file Append are successful, then the - // following Append fails due to file system retryable IOError. - { - WriteBatch batch; - int write_error = 0; - - for (auto i = 100; i < 200; ++i) { - ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024))); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"RecoverFromRetryableBGIOError:BeforeWait0", "WALWriteError2:0"}, - {"WALWriteError2:1", "RecoverFromRetryableBGIOError:BeforeWait1"}, - {"RecoverFromRetryableBGIOError:RecoverSuccess", "WALWriteError2:2"}}); - - SyncPoint::GetInstance()->SetCallBack( - "WritableFileWriter::Append:BeforePrepareWrite", [&](void*) { - write_error++; - if (write_error > 2) { - fault_fs_->SetFilesystemActive(false, error_msg); - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - WriteOptions wopts; - wopts.sync = true; - s = dbfull()->Write(wopts, &batch); - ASSERT_EQ(true, s.IsIOError()); - - TEST_SYNC_POINT("WALWriteError2:0"); - fault_fs_->SetFilesystemActive(true); - SyncPoint::GetInstance()->ClearAllCallBacks(); - TEST_SYNC_POINT("WALWriteError2:1"); - TEST_SYNC_POINT("WALWriteError2:2"); - } - SyncPoint::GetInstance()->DisableProcessing(); - - // Data in corrupted WAL are not stored - for (auto i = 0; i < 199; ++i) { - if (i < 100) { - ASSERT_NE(Get(Key(i)), "NOT_FOUND"); - } else { - ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); - } - } - - // Resume and write a new batch, should be in the WAL - { - WriteBatch batch; - - for (auto i = 200; i < 300; ++i) { - ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024))); - } - - WriteOptions wopts; - wopts.sync = true; - ASSERT_OK(dbfull()->Write(wopts, &batch)); - }; - - Reopen(options); - for (auto i = 0; i < 300; ++i) { - if (i < 100 || i >= 200) { - ASSERT_NE(Get(Key(i)), "NOT_FOUND"); - } else { - ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); - } - } - Close(); -} - -// Fail auto resume from a flush retryable error and verify that -// OnErrorRecoveryEnd listener callback is called -TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAbortRecovery) { - // Activate the FS before the first resume - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 2; - options.bgerror_resume_retry_interval = 100000; // 0.1 second - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - ASSERT_OK(Put(Key(1), "val1")); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeFinishBuildTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - ASSERT_EQ(listener->WaitForRecovery(5000000), true); - ASSERT_EQ(listener->new_bg_error(), Status::Aborted()); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, FlushReadError) { - std::shared_ptr listener = - std::make_shared(); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.statistics = CreateDBStatistics(); - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - ASSERT_OK(Put(Key(0), "val")); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeOutputValidation", [&](void*) { - IOStatus st = IOStatus::IOError(); - st.SetRetryable(true); - st.SetScope(IOStatus::IOErrorScope::kIOErrorScopeFile); - fault_fs_->SetFilesystemActive(false, st); - }); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeDeleteFile", - [&](void*) { fault_fs_->SetFilesystemActive(true, IOStatus::OK()); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - ASSERT_EQ(listener->WaitForRecovery(5000000), true); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_ERROR_COUNT)); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_IO_ERROR_COUNT)); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT)); - ASSERT_LE(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_COUNT)); - ASSERT_LE(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT)); - s = dbfull()->TEST_GetBGError(); - ASSERT_OK(s); - - Reopen(GetDefaultOptions()); - ASSERT_EQ("val", Get(Key(0))); -} - -TEST_F(DBErrorHandlingFSTest, AtomicFlushReadError) { - std::shared_ptr listener = - std::make_shared(); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.statistics = CreateDBStatistics(); - Status s; - - listener->EnableAutoRecovery(false); - options.atomic_flush = true; - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(0, Key(0), "val")); - ASSERT_OK(Put(1, Key(0), "val")); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeOutputValidation", [&](void*) { - IOStatus st = IOStatus::IOError(); - st.SetRetryable(true); - st.SetScope(IOStatus::IOErrorScope::kIOErrorScopeFile); - fault_fs_->SetFilesystemActive(false, st); - }); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeDeleteFile", - [&](void*) { fault_fs_->SetFilesystemActive(true, IOStatus::OK()); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush({0, 1}); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - ASSERT_EQ(listener->WaitForRecovery(5000000), true); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_ERROR_COUNT)); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_IO_ERROR_COUNT)); - ASSERT_EQ(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT)); - ASSERT_LE(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_COUNT)); - ASSERT_LE(0, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT)); - s = dbfull()->TEST_GetBGError(); - ASSERT_OK(s); - - TryReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, - GetDefaultOptions()); - ASSERT_EQ("val", Get(Key(0))); -} - -TEST_F(DBErrorHandlingFSTest, AtomicFlushNoSpaceError) { - std::shared_ptr listener = - std::make_shared(); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.statistics = CreateDBStatistics(); - Status s; - - listener->EnableAutoRecovery(true); - options.atomic_flush = true; - CreateAndReopenWithCF({"pikachu"}, options); - - ASSERT_OK(Put(0, Key(0), "val")); - ASSERT_OK(Put(1, Key(0), "val")); - SyncPoint::GetInstance()->SetCallBack("BuildTable:create_file", [&](void*) { - IOStatus st = IOStatus::NoSpace(); - fault_fs_->SetFilesystemActive(false, st); - }); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeDeleteFile", - [&](void*) { fault_fs_->SetFilesystemActive(true, IOStatus::OK()); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush({0, 1}); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - ASSERT_EQ(listener->WaitForRecovery(5000000), true); - ASSERT_LE(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_ERROR_COUNT)); - ASSERT_LE(1, options.statistics->getAndResetTickerCount( - ERROR_HANDLER_BG_IO_ERROR_COUNT)); - s = dbfull()->TEST_GetBGError(); - ASSERT_OK(s); - - TryReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, - GetDefaultOptions()); - ASSERT_EQ("val", Get(Key(0))); -} - -TEST_F(DBErrorHandlingFSTest, CompactionReadRetryableErrorAutoRecover) { - // In this test, in the first round of compaction, the FS is set to error. - // So the first compaction fails due to retryable IO error and it is mapped - // to soft error. Then, compaction is rescheduled, in the second round of - // compaction, the FS is set to active and compaction is successful, so - // the test will hit the CompactionJob::FinishCompactionOutputFile1 sync - // point. - std::shared_ptr listener = - std::make_shared(); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.level0_file_num_compaction_trigger = 2; - options.listeners.emplace_back(listener); - BlockBasedTableOptions table_options; - table_options.no_block_cache = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Status s; - std::atomic fail_first(false); - std::atomic fail_second(true); - Random rnd(301); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - for (int i = 0; i < 100; ++i) { - ASSERT_OK(Put(Key(i), rnd.RandomString(1024))); - } - s = Flush(); - ASSERT_OK(s); - - listener->OverrideBGError(Status(error_msg, Status::Severity::kHardError)); - listener->EnableAutoRecovery(false); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}, - {"CompactionJob::FinishCompactionOutputFile1", - "CompactionWriteRetryableErrorAutoRecover0"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BackgroundCompaction:Start", - [&](void*) { fault_fs_->SetFilesystemActive(true); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", [&](void*) { fail_first.store(true); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "CompactionJob::Run():PausingManualCompaction:2", [&](void*) { - if (fail_first.load() && fail_second.load()) { - fault_fs_->SetFilesystemActive(false, error_msg); - fail_second.store(false); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put(Key(1), "val")); - s = Flush(); - ASSERT_OK(s); - - s = dbfull()->TEST_WaitForCompact(); - ASSERT_OK(s); - TEST_SYNC_POINT("CompactionWriteRetryableErrorAutoRecover0"); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - - Reopen(GetDefaultOptions()); -} - -class DBErrorHandlingFencingTest : public DBErrorHandlingFSTest, - public testing::WithParamInterface {}; - -TEST_P(DBErrorHandlingFencingTest, FLushWriteFenced) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.paranoid_checks = GetParam(); - Status s; - - listener->EnableAutoRecovery(true); - DestroyAndReopen(options); - - ASSERT_OK(Put(Key(0), "val")); - SyncPoint::GetInstance()->SetCallBack("FlushJob::Start", [&](void*) { - fault_fs_->SetFilesystemActive(false, IOStatus::IOFenced("IO fenced")); - }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kFatalError); - ASSERT_TRUE(s.IsIOFenced()); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_TRUE(s.IsIOFenced()); - Destroy(options); -} - -TEST_P(DBErrorHandlingFencingTest, ManifestWriteFenced) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.paranoid_checks = GetParam(); - Status s; - std::string old_manifest; - std::string new_manifest; - - listener->EnableAutoRecovery(true); - DestroyAndReopen(options); - old_manifest = GetManifestNameFromLiveFiles(); - - ASSERT_OK(Put(Key(0), "val")); - ASSERT_OK(Flush()); - ASSERT_OK(Put(Key(1), "val")); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", [&](void*) { - fault_fs_->SetFilesystemActive(false, IOStatus::IOFenced("IO fenced")); - }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kFatalError); - ASSERT_TRUE(s.IsIOFenced()); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_TRUE(s.IsIOFenced()); - Close(); -} - -TEST_P(DBErrorHandlingFencingTest, CompactionWriteFenced) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.level0_file_num_compaction_trigger = 2; - options.listeners.emplace_back(listener); - options.paranoid_checks = GetParam(); - Status s; - DestroyAndReopen(options); - - ASSERT_OK(Put(Key(0), "va;")); - ASSERT_OK(Put(Key(2), "va;")); - s = Flush(); - ASSERT_OK(s); - - listener->EnableAutoRecovery(true); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::FlushMemTable:FlushMemTableFinished", - "BackgroundCallCompaction:0"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BackgroundCallCompaction:0", [&](void*) { - fault_fs_->SetFilesystemActive(false, IOStatus::IOFenced("IO fenced")); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(Put(Key(1), "val")); - s = Flush(); - ASSERT_OK(s); - - s = dbfull()->TEST_WaitForCompact(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kFatalError); - ASSERT_TRUE(s.IsIOFenced()); - - fault_fs_->SetFilesystemActive(true); - s = dbfull()->Resume(); - ASSERT_TRUE(s.IsIOFenced()); - Destroy(options); -} - -TEST_P(DBErrorHandlingFencingTest, WALWriteFenced) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.writable_file_max_buffer_size = 32768; - options.listeners.emplace_back(listener); - options.paranoid_checks = GetParam(); - Status s; - Random rnd(301); - - listener->EnableAutoRecovery(true); - DestroyAndReopen(options); - - { - WriteBatch batch; - - for (auto i = 0; i < 100; ++i) { - ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024))); - } - - WriteOptions wopts; - wopts.sync = true; - ASSERT_OK(dbfull()->Write(wopts, &batch)); - }; - - { - WriteBatch batch; - int write_error = 0; - - for (auto i = 100; i < 199; ++i) { - ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024))); - } - - SyncPoint::GetInstance()->SetCallBack( - "WritableFileWriter::Append:BeforePrepareWrite", [&](void*) { - write_error++; - if (write_error > 2) { - fault_fs_->SetFilesystemActive(false, - IOStatus::IOFenced("IO fenced")); - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - WriteOptions wopts; - wopts.sync = true; - s = dbfull()->Write(wopts, &batch); - ASSERT_TRUE(s.IsIOFenced()); - } - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - { - WriteBatch batch; - - for (auto i = 0; i < 100; ++i) { - ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024))); - } - - WriteOptions wopts; - wopts.sync = true; - s = dbfull()->Write(wopts, &batch); - ASSERT_TRUE(s.IsIOFenced()); - } - Close(); -} - -INSTANTIATE_TEST_CASE_P(DBErrorHandlingFSTest, DBErrorHandlingFencingTest, - ::testing::Bool()); - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/external_sst_file_basic_test.cc b/db/external_sst_file_basic_test.cc deleted file mode 100644 index 7fc5bc260..000000000 --- a/db/external_sst_file_basic_test.cc +++ /dev/null @@ -1,1999 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include - -#include "db/db_test_util.h" -#include "db/version_edit.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/sst_file_writer.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/random.h" -#include "utilities/fault_injection_env.h" - -namespace ROCKSDB_NAMESPACE { - -class ExternalSSTFileBasicTest - : public DBTestBase, - public ::testing::WithParamInterface> { - public: - ExternalSSTFileBasicTest() - : DBTestBase("external_sst_file_basic_test", /*env_do_fsync=*/true) { - sst_files_dir_ = dbname_ + "_sst_files/"; - fault_injection_test_env_.reset(new FaultInjectionTestEnv(env_)); - DestroyAndRecreateExternalSSTFilesDir(); - - // Check if the Env supports RandomRWFile - std::string file_path = sst_files_dir_ + "test_random_rw_file"; - std::unique_ptr wfile; - assert(env_->NewWritableFile(file_path, &wfile, EnvOptions()).ok()); - wfile.reset(); - std::unique_ptr rwfile; - Status s = env_->NewRandomRWFile(file_path, &rwfile, EnvOptions()); - if (s.IsNotSupported()) { - random_rwfile_supported_ = false; - } else { - EXPECT_OK(s); - random_rwfile_supported_ = true; - } - rwfile.reset(); - EXPECT_OK(env_->DeleteFile(file_path)); - } - - void DestroyAndRecreateExternalSSTFilesDir() { - ASSERT_OK(DestroyDir(env_, sst_files_dir_)); - ASSERT_OK(env_->CreateDir(sst_files_dir_)); - } - - Status DeprecatedAddFile(const std::vector& files, - bool move_files = false, - bool skip_snapshot_check = false) { - IngestExternalFileOptions opts; - opts.move_files = move_files; - opts.snapshot_consistency = !skip_snapshot_check; - opts.allow_global_seqno = false; - opts.allow_blocking_flush = false; - return db_->IngestExternalFile(files, opts); - } - - Status AddFileWithFileChecksum( - const std::vector& files, - const std::vector& files_checksums, - const std::vector& files_checksum_func_names, - bool verify_file_checksum = true, bool move_files = false, - bool skip_snapshot_check = false, bool write_global_seqno = true) { - IngestExternalFileOptions opts; - opts.move_files = move_files; - opts.snapshot_consistency = !skip_snapshot_check; - opts.allow_global_seqno = false; - opts.allow_blocking_flush = false; - opts.write_global_seqno = write_global_seqno; - opts.verify_file_checksum = verify_file_checksum; - - IngestExternalFileArg arg; - arg.column_family = db_->DefaultColumnFamily(); - arg.external_files = files; - arg.options = opts; - arg.files_checksums = files_checksums; - arg.files_checksum_func_names = files_checksum_func_names; - return db_->IngestExternalFiles({arg}); - } - - Status GenerateAndAddExternalFile( - const Options options, std::vector keys, - const std::vector& value_types, - std::vector> range_deletions, int file_id, - bool write_global_seqno, bool verify_checksums_before_ingest, - std::map* true_data) { - assert(value_types.size() == 1 || keys.size() == value_types.size()); - std::string file_path = sst_files_dir_ + std::to_string(file_id); - SstFileWriter sst_file_writer(EnvOptions(), options); - - Status s = sst_file_writer.Open(file_path); - if (!s.ok()) { - return s; - } - for (size_t i = 0; i < range_deletions.size(); i++) { - // Account for the effect of range deletions on true_data before - // all point operators, even though sst_file_writer.DeleteRange - // must be called before other sst_file_writer methods. This is - // because point writes take precedence over range deletions - // in the same ingested sst. - std::string start_key = Key(range_deletions[i].first); - std::string end_key = Key(range_deletions[i].second); - s = sst_file_writer.DeleteRange(start_key, end_key); - if (!s.ok()) { - sst_file_writer.Finish(); - return s; - } - auto start_key_it = true_data->find(start_key); - if (start_key_it == true_data->end()) { - start_key_it = true_data->upper_bound(start_key); - } - auto end_key_it = true_data->find(end_key); - if (end_key_it == true_data->end()) { - end_key_it = true_data->upper_bound(end_key); - } - true_data->erase(start_key_it, end_key_it); - } - for (size_t i = 0; i < keys.size(); i++) { - std::string key = Key(keys[i]); - std::string value = Key(keys[i]) + std::to_string(file_id); - ValueType value_type = - (value_types.size() == 1 ? value_types[0] : value_types[i]); - switch (value_type) { - case ValueType::kTypeValue: - s = sst_file_writer.Put(key, value); - (*true_data)[key] = value; - break; - case ValueType::kTypeMerge: - s = sst_file_writer.Merge(key, value); - // we only use TestPutOperator in this test - (*true_data)[key] = value; - break; - case ValueType::kTypeDeletion: - s = sst_file_writer.Delete(key); - true_data->erase(key); - break; - default: - return Status::InvalidArgument("Value type is not supported"); - } - if (!s.ok()) { - sst_file_writer.Finish(); - return s; - } - } - s = sst_file_writer.Finish(); - - if (s.ok()) { - IngestExternalFileOptions ifo; - ifo.allow_global_seqno = true; - ifo.write_global_seqno = write_global_seqno; - ifo.verify_checksums_before_ingest = verify_checksums_before_ingest; - s = db_->IngestExternalFile({file_path}, ifo); - } - return s; - } - - Status GenerateAndAddExternalFile( - const Options options, std::vector keys, - const std::vector& value_types, int file_id, - bool write_global_seqno, bool verify_checksums_before_ingest, - std::map* true_data) { - return GenerateAndAddExternalFile( - options, keys, value_types, {}, file_id, write_global_seqno, - verify_checksums_before_ingest, true_data); - } - - Status GenerateAndAddExternalFile( - const Options options, std::vector keys, const ValueType value_type, - int file_id, bool write_global_seqno, bool verify_checksums_before_ingest, - std::map* true_data) { - return GenerateAndAddExternalFile( - options, keys, std::vector(1, value_type), file_id, - write_global_seqno, verify_checksums_before_ingest, true_data); - } - - ~ExternalSSTFileBasicTest() override { - DestroyDir(env_, sst_files_dir_).PermitUncheckedError(); - } - - protected: - std::string sst_files_dir_; - std::unique_ptr fault_injection_test_env_; - bool random_rwfile_supported_; -}; - -TEST_F(ExternalSSTFileBasicTest, Basic) { - Options options = CurrentOptions(); - - SstFileWriter sst_file_writer(EnvOptions(), options); - - // Current file size should be 0 after sst_file_writer init and before open a - // file. - ASSERT_EQ(sst_file_writer.FileSize(), 0); - - // file1.sst (0 => 99) - std::string file1 = sst_files_dir_ + "file1.sst"; - ASSERT_OK(sst_file_writer.Open(file1)); - for (int k = 0; k < 100; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file1_info; - Status s = sst_file_writer.Finish(&file1_info); - ASSERT_OK(s) << s.ToString(); - - // Current file size should be non-zero after success write. - ASSERT_GT(sst_file_writer.FileSize(), 0); - - ASSERT_EQ(file1_info.file_path, file1); - ASSERT_EQ(file1_info.num_entries, 100); - ASSERT_EQ(file1_info.smallest_key, Key(0)); - ASSERT_EQ(file1_info.largest_key, Key(99)); - ASSERT_EQ(file1_info.num_range_del_entries, 0); - ASSERT_EQ(file1_info.smallest_range_del_key, ""); - ASSERT_EQ(file1_info.largest_range_del_key, ""); - ASSERT_EQ(file1_info.file_checksum, kUnknownFileChecksum); - ASSERT_EQ(file1_info.file_checksum_func_name, kUnknownFileChecksumFuncName); - // sst_file_writer already finished, cannot add this value - s = sst_file_writer.Put(Key(100), "bad_val"); - ASSERT_NOK(s) << s.ToString(); - s = sst_file_writer.DeleteRange(Key(100), Key(200)); - ASSERT_NOK(s) << s.ToString(); - - DestroyAndReopen(options); - // Add file using file path - s = DeprecatedAddFile({file1}); - ASSERT_OK(s) << s.ToString(); - ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); - for (int k = 0; k < 100; k++) { - ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); - } - - DestroyAndRecreateExternalSSTFilesDir(); -} - -class ChecksumVerifyHelper { - private: - Options options_; - - public: - ChecksumVerifyHelper(Options& options) : options_(options) {} - ~ChecksumVerifyHelper() {} - - Status GetSingleFileChecksumAndFuncName( - const std::string& file_path, std::string* file_checksum, - std::string* file_checksum_func_name) { - Status s; - EnvOptions soptions; - std::unique_ptr file_reader; - s = options_.env->NewSequentialFile(file_path, &file_reader, soptions); - if (!s.ok()) { - return s; - } - std::unique_ptr scratch(new char[2048]); - Slice result; - FileChecksumGenFactory* file_checksum_gen_factory = - options_.file_checksum_gen_factory.get(); - if (file_checksum_gen_factory == nullptr) { - *file_checksum = kUnknownFileChecksum; - *file_checksum_func_name = kUnknownFileChecksumFuncName; - return Status::OK(); - } else { - FileChecksumGenContext gen_context; - std::unique_ptr file_checksum_gen = - file_checksum_gen_factory->CreateFileChecksumGenerator(gen_context); - *file_checksum_func_name = file_checksum_gen->Name(); - s = file_reader->Read(2048, &result, scratch.get()); - if (!s.ok()) { - return s; - } - while (result.size() != 0) { - file_checksum_gen->Update(scratch.get(), result.size()); - s = file_reader->Read(2048, &result, scratch.get()); - if (!s.ok()) { - return s; - } - } - file_checksum_gen->Finalize(); - *file_checksum = file_checksum_gen->GetChecksum(); - } - return Status::OK(); - } -}; - -TEST_F(ExternalSSTFileBasicTest, BasicWithFileChecksumCrc32c) { - Options options = CurrentOptions(); - options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); - ChecksumVerifyHelper checksum_helper(options); - - SstFileWriter sst_file_writer(EnvOptions(), options); - - // Current file size should be 0 after sst_file_writer init and before open a - // file. - ASSERT_EQ(sst_file_writer.FileSize(), 0); - - // file1.sst (0 => 99) - std::string file1 = sst_files_dir_ + "file1.sst"; - ASSERT_OK(sst_file_writer.Open(file1)); - for (int k = 0; k < 100; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file1_info; - Status s = sst_file_writer.Finish(&file1_info); - ASSERT_OK(s) << s.ToString(); - std::string file_checksum, file_checksum_func_name; - ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( - file1, &file_checksum, &file_checksum_func_name)); - - // Current file size should be non-zero after success write. - ASSERT_GT(sst_file_writer.FileSize(), 0); - - ASSERT_EQ(file1_info.file_path, file1); - ASSERT_EQ(file1_info.num_entries, 100); - ASSERT_EQ(file1_info.smallest_key, Key(0)); - ASSERT_EQ(file1_info.largest_key, Key(99)); - ASSERT_EQ(file1_info.num_range_del_entries, 0); - ASSERT_EQ(file1_info.smallest_range_del_key, ""); - ASSERT_EQ(file1_info.largest_range_del_key, ""); - ASSERT_EQ(file1_info.file_checksum, file_checksum); - ASSERT_EQ(file1_info.file_checksum_func_name, file_checksum_func_name); - // sst_file_writer already finished, cannot add this value - s = sst_file_writer.Put(Key(100), "bad_val"); - ASSERT_NOK(s) << s.ToString(); - s = sst_file_writer.DeleteRange(Key(100), Key(200)); - ASSERT_NOK(s) << s.ToString(); - - DestroyAndReopen(options); - // Add file using file path - s = DeprecatedAddFile({file1}); - ASSERT_OK(s) << s.ToString(); - ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); - for (int k = 0; k < 100; k++) { - ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); - } - - DestroyAndRecreateExternalSSTFilesDir(); -} - -TEST_F(ExternalSSTFileBasicTest, IngestFileWithFileChecksum) { - Options old_options = CurrentOptions(); - Options options = CurrentOptions(); - options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); - const ImmutableCFOptions ioptions(options); - ChecksumVerifyHelper checksum_helper(options); - - SstFileWriter sst_file_writer(EnvOptions(), options); - - // file01.sst (1000 => 1099) - std::string file1 = sst_files_dir_ + "file01.sst"; - ASSERT_OK(sst_file_writer.Open(file1)); - for (int k = 1000; k < 1100; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file1_info; - Status s = sst_file_writer.Finish(&file1_info); - ASSERT_OK(s) << s.ToString(); - ASSERT_EQ(file1_info.file_path, file1); - ASSERT_EQ(file1_info.num_entries, 100); - ASSERT_EQ(file1_info.smallest_key, Key(1000)); - ASSERT_EQ(file1_info.largest_key, Key(1099)); - std::string file_checksum1, file_checksum_func_name1; - ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( - file1, &file_checksum1, &file_checksum_func_name1)); - ASSERT_EQ(file1_info.file_checksum, file_checksum1); - ASSERT_EQ(file1_info.file_checksum_func_name, file_checksum_func_name1); - - // file02.sst (1100 => 1299) - std::string file2 = sst_files_dir_ + "file02.sst"; - ASSERT_OK(sst_file_writer.Open(file2)); - for (int k = 1100; k < 1300; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file2_info; - s = sst_file_writer.Finish(&file2_info); - ASSERT_OK(s) << s.ToString(); - ASSERT_EQ(file2_info.file_path, file2); - ASSERT_EQ(file2_info.num_entries, 200); - ASSERT_EQ(file2_info.smallest_key, Key(1100)); - ASSERT_EQ(file2_info.largest_key, Key(1299)); - std::string file_checksum2, file_checksum_func_name2; - ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( - file2, &file_checksum2, &file_checksum_func_name2)); - ASSERT_EQ(file2_info.file_checksum, file_checksum2); - ASSERT_EQ(file2_info.file_checksum_func_name, file_checksum_func_name2); - - // file03.sst (1300 => 1499) - std::string file3 = sst_files_dir_ + "file03.sst"; - ASSERT_OK(sst_file_writer.Open(file3)); - for (int k = 1300; k < 1500; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); - } - ExternalSstFileInfo file3_info; - s = sst_file_writer.Finish(&file3_info); - ASSERT_OK(s) << s.ToString(); - ASSERT_EQ(file3_info.file_path, file3); - ASSERT_EQ(file3_info.num_entries, 200); - ASSERT_EQ(file3_info.smallest_key, Key(1300)); - ASSERT_EQ(file3_info.largest_key, Key(1499)); - std::string file_checksum3, file_checksum_func_name3; - ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( - file3, &file_checksum3, &file_checksum_func_name3)); - ASSERT_EQ(file3_info.file_checksum, file_checksum3); - ASSERT_EQ(file3_info.file_checksum_func_name, file_checksum_func_name3); - - // file04.sst (1500 => 1799) - std::string file4 = sst_files_dir_ + "file04.sst"; - ASSERT_OK(sst_file_writer.Open(file4)); - for (int k = 1500; k < 1800; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); - } - ExternalSstFileInfo file4_info; - s = sst_file_writer.Finish(&file4_info); - ASSERT_OK(s) << s.ToString(); - ASSERT_EQ(file4_info.file_path, file4); - ASSERT_EQ(file4_info.num_entries, 300); - ASSERT_EQ(file4_info.smallest_key, Key(1500)); - ASSERT_EQ(file4_info.largest_key, Key(1799)); - std::string file_checksum4, file_checksum_func_name4; - ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( - file4, &file_checksum4, &file_checksum_func_name4)); - ASSERT_EQ(file4_info.file_checksum, file_checksum4); - ASSERT_EQ(file4_info.file_checksum_func_name, file_checksum_func_name4); - - // file05.sst (1800 => 1899) - std::string file5 = sst_files_dir_ + "file05.sst"; - ASSERT_OK(sst_file_writer.Open(file5)); - for (int k = 1800; k < 2000; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); - } - ExternalSstFileInfo file5_info; - s = sst_file_writer.Finish(&file5_info); - ASSERT_OK(s) << s.ToString(); - ASSERT_EQ(file5_info.file_path, file5); - ASSERT_EQ(file5_info.num_entries, 200); - ASSERT_EQ(file5_info.smallest_key, Key(1800)); - ASSERT_EQ(file5_info.largest_key, Key(1999)); - std::string file_checksum5, file_checksum_func_name5; - ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( - file5, &file_checksum5, &file_checksum_func_name5)); - ASSERT_EQ(file5_info.file_checksum, file_checksum5); - ASSERT_EQ(file5_info.file_checksum_func_name, file_checksum_func_name5); - - // file06.sst (2000 => 2199) - std::string file6 = sst_files_dir_ + "file06.sst"; - ASSERT_OK(sst_file_writer.Open(file6)); - for (int k = 2000; k < 2200; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); - } - ExternalSstFileInfo file6_info; - s = sst_file_writer.Finish(&file6_info); - ASSERT_OK(s) << s.ToString(); - ASSERT_EQ(file6_info.file_path, file6); - ASSERT_EQ(file6_info.num_entries, 200); - ASSERT_EQ(file6_info.smallest_key, Key(2000)); - ASSERT_EQ(file6_info.largest_key, Key(2199)); - std::string file_checksum6, file_checksum_func_name6; - ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( - file6, &file_checksum6, &file_checksum_func_name6)); - ASSERT_EQ(file6_info.file_checksum, file_checksum6); - ASSERT_EQ(file6_info.file_checksum_func_name, file_checksum_func_name6); - - s = AddFileWithFileChecksum({file1}, {file_checksum1, "xyz"}, - {file_checksum1}, true, false, false, false); - // does not care the checksum input since db does not enable file checksum - ASSERT_OK(s) << s.ToString(); - ASSERT_OK(env_->FileExists(file1)); - std::vector live_files; - dbfull()->GetLiveFilesMetaData(&live_files); - std::set set1; - for (auto f : live_files) { - set1.insert(f.name); - ASSERT_EQ(f.file_checksum, kUnknownFileChecksum); - ASSERT_EQ(f.file_checksum_func_name, kUnknownFileChecksumFuncName); - } - - // check the temperature of the file being ingested - ColumnFamilyMetaData metadata; - db_->GetColumnFamilyMetaData(&metadata); - ASSERT_EQ(1, metadata.file_count); - ASSERT_EQ(Temperature::kUnknown, metadata.levels[6].files[0].temperature); - auto size = GetSstSizeHelper(Temperature::kUnknown); - ASSERT_GT(size, 0); - size = GetSstSizeHelper(Temperature::kWarm); - ASSERT_EQ(size, 0); - size = GetSstSizeHelper(Temperature::kHot); - ASSERT_EQ(size, 0); - size = GetSstSizeHelper(Temperature::kCold); - ASSERT_EQ(size, 0); - - // Reopen Db with checksum enabled - Reopen(options); - // Enable verify_file_checksum option - // The checksum vector does not match, fail the ingestion - s = AddFileWithFileChecksum({file2}, {file_checksum2, "xyz"}, - {file_checksum_func_name2}, true, false, false, - false); - ASSERT_NOK(s) << s.ToString(); - - // Enable verify_file_checksum option - // The checksum name does not match, fail the ingestion - s = AddFileWithFileChecksum({file2}, {file_checksum2}, {"xyz"}, true, false, - false, false); - ASSERT_NOK(s) << s.ToString(); - - // Enable verify_file_checksum option - // The checksum itself does not match, fail the ingestion - s = AddFileWithFileChecksum({file2}, {"xyz"}, {file_checksum_func_name2}, - true, false, false, false); - ASSERT_NOK(s) << s.ToString(); - - // Enable verify_file_checksum option - // All matches, ingestion is successful - s = AddFileWithFileChecksum({file2}, {file_checksum2}, - {file_checksum_func_name2}, true, false, false, - false); - ASSERT_OK(s) << s.ToString(); - std::vector live_files1; - dbfull()->GetLiveFilesMetaData(&live_files1); - for (auto f : live_files1) { - if (set1.find(f.name) == set1.end()) { - ASSERT_EQ(f.file_checksum, file_checksum2); - ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name2); - set1.insert(f.name); - } - } - ASSERT_OK(env_->FileExists(file2)); - - // Enable verify_file_checksum option - // No checksum information is provided, generate it when ingesting - std::vector checksum, checksum_func; - s = AddFileWithFileChecksum({file3}, checksum, checksum_func, true, false, - false, false); - ASSERT_OK(s) << s.ToString(); - std::vector live_files2; - dbfull()->GetLiveFilesMetaData(&live_files2); - for (auto f : live_files2) { - if (set1.find(f.name) == set1.end()) { - ASSERT_EQ(f.file_checksum, file_checksum3); - ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name3); - set1.insert(f.name); - } - } - ASSERT_OK(s) << s.ToString(); - ASSERT_OK(env_->FileExists(file3)); - - // Does not enable verify_file_checksum options - // The checksum name does not match, fail the ingestion - s = AddFileWithFileChecksum({file4}, {file_checksum4}, {"xyz"}, false, false, - false, false); - ASSERT_NOK(s) << s.ToString(); - - // Does not enable verify_file_checksum options - // Checksum function name matches, store the checksum being ingested. - s = AddFileWithFileChecksum({file4}, {"asd"}, {file_checksum_func_name4}, - false, false, false, false); - ASSERT_OK(s) << s.ToString(); - std::vector live_files3; - dbfull()->GetLiveFilesMetaData(&live_files3); - for (auto f : live_files3) { - if (set1.find(f.name) == set1.end()) { - ASSERT_FALSE(f.file_checksum == file_checksum4); - ASSERT_EQ(f.file_checksum, "asd"); - ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name4); - set1.insert(f.name); - } - } - ASSERT_OK(s) << s.ToString(); - ASSERT_OK(env_->FileExists(file4)); - - // enable verify_file_checksum options, DB enable checksum, and enable - // write_global_seq. So the checksum stored is different from the one - // ingested due to the sequence number changes. - s = AddFileWithFileChecksum({file5}, {file_checksum5}, - {file_checksum_func_name5}, true, false, false, - true); - ASSERT_OK(s) << s.ToString(); - std::vector live_files4; - dbfull()->GetLiveFilesMetaData(&live_files4); - for (auto f : live_files4) { - if (set1.find(f.name) == set1.end()) { - std::string cur_checksum5, cur_checksum_func_name5; - ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( - dbname_ + f.name, &cur_checksum5, &cur_checksum_func_name5)); - ASSERT_EQ(f.file_checksum, cur_checksum5); - ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name5); - set1.insert(f.name); - } - } - ASSERT_OK(s) << s.ToString(); - ASSERT_OK(env_->FileExists(file5)); - - // Does not enable verify_file_checksum options and also the ingested file - // checksum information is empty. DB will generate and store the checksum - // in Manifest. - std::vector files_c6, files_name6; - s = AddFileWithFileChecksum({file6}, files_c6, files_name6, false, false, - false, false); - ASSERT_OK(s) << s.ToString(); - std::vector live_files6; - dbfull()->GetLiveFilesMetaData(&live_files6); - for (auto f : live_files6) { - if (set1.find(f.name) == set1.end()) { - ASSERT_EQ(f.file_checksum, file_checksum6); - ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name6); - set1.insert(f.name); - } - } - ASSERT_OK(s) << s.ToString(); - ASSERT_OK(env_->FileExists(file6)); - db_->GetColumnFamilyMetaData(&metadata); - size = GetSstSizeHelper(Temperature::kUnknown); - ASSERT_GT(size, 0); - size = GetSstSizeHelper(Temperature::kWarm); - ASSERT_EQ(size, 0); - size = GetSstSizeHelper(Temperature::kHot); - ASSERT_EQ(size, 0); - size = GetSstSizeHelper(Temperature::kCold); - ASSERT_EQ(size, 0); -} - -TEST_F(ExternalSSTFileBasicTest, NoCopy) { - Options options = CurrentOptions(); - const ImmutableCFOptions ioptions(options); - - SstFileWriter sst_file_writer(EnvOptions(), options); - - // file1.sst (0 => 99) - std::string file1 = sst_files_dir_ + "file1.sst"; - ASSERT_OK(sst_file_writer.Open(file1)); - for (int k = 0; k < 100; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file1_info; - Status s = sst_file_writer.Finish(&file1_info); - ASSERT_OK(s) << s.ToString(); - ASSERT_EQ(file1_info.file_path, file1); - ASSERT_EQ(file1_info.num_entries, 100); - ASSERT_EQ(file1_info.smallest_key, Key(0)); - ASSERT_EQ(file1_info.largest_key, Key(99)); - - // file2.sst (100 => 299) - std::string file2 = sst_files_dir_ + "file2.sst"; - ASSERT_OK(sst_file_writer.Open(file2)); - for (int k = 100; k < 300; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file2_info; - s = sst_file_writer.Finish(&file2_info); - ASSERT_OK(s) << s.ToString(); - ASSERT_EQ(file2_info.file_path, file2); - ASSERT_EQ(file2_info.num_entries, 200); - ASSERT_EQ(file2_info.smallest_key, Key(100)); - ASSERT_EQ(file2_info.largest_key, Key(299)); - - // file3.sst (110 => 124) .. overlap with file2.sst - std::string file3 = sst_files_dir_ + "file3.sst"; - ASSERT_OK(sst_file_writer.Open(file3)); - for (int k = 110; k < 125; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); - } - ExternalSstFileInfo file3_info; - s = sst_file_writer.Finish(&file3_info); - ASSERT_OK(s) << s.ToString(); - ASSERT_EQ(file3_info.file_path, file3); - ASSERT_EQ(file3_info.num_entries, 15); - ASSERT_EQ(file3_info.smallest_key, Key(110)); - ASSERT_EQ(file3_info.largest_key, Key(124)); - - s = DeprecatedAddFile({file1}, true /* move file */); - ASSERT_OK(s) << s.ToString(); - ASSERT_EQ(Status::NotFound(), env_->FileExists(file1)); - - s = DeprecatedAddFile({file2}, false /* copy file */); - ASSERT_OK(s) << s.ToString(); - ASSERT_OK(env_->FileExists(file2)); - - // This file has overlapping values with the existing data - s = DeprecatedAddFile({file3}, true /* move file */); - ASSERT_NOK(s) << s.ToString(); - ASSERT_OK(env_->FileExists(file3)); - - for (int k = 0; k < 300; k++) { - ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); - } -} - -TEST_P(ExternalSSTFileBasicTest, IngestFileWithGlobalSeqnoPickedSeqno) { - bool write_global_seqno = std::get<0>(GetParam()); - bool verify_checksums_before_ingest = std::get<1>(GetParam()); - do { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - DestroyAndReopen(options); - std::map true_data; - - int file_id = 1; - - ASSERT_OK(GenerateAndAddExternalFile( - options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File doesn't overwrite any keys, no seqno needed - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {10, 11, 12, 13}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File doesn't overwrite any keys, no seqno needed - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {1, 4, 6}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {11, 15, 19}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {120, 130}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File doesn't overwrite any keys, no seqno needed - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {1, 130}, ValueType::kTypeValue, file_id++, write_global_seqno, - verify_checksums_before_ingest, &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3); - - // Write some keys through normal write path - for (int i = 0; i < 50; i++) { - ASSERT_OK(Put(Key(i), "memtable")); - true_data[Key(i)] = "memtable"; - } - SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber(); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {60, 61, 62}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File doesn't overwrite any keys, no seqno needed - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {40, 41, 42}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {20, 30, 40}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2); - - const Snapshot* snapshot = db_->GetSnapshot(); - - // We will need a seqno for the file regardless if the file overwrite - // keys in the DB or not because we have a snapshot - ASSERT_OK(GenerateAndAddExternalFile( - options, {1000, 1002}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // A global seqno will be assigned anyway because of the snapshot - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {2000, 3002}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // A global seqno will be assigned anyway because of the snapshot - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {1, 20, 40, 100, 150}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // A global seqno will be assigned anyway because of the snapshot - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); - - db_->ReleaseSnapshot(snapshot); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {5000, 5001}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // No snapshot anymore, no need to assign a seqno - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); - - size_t kcnt = 0; - VerifyDBFromMap(true_data, &kcnt, false); - } while (ChangeOptionsForFileIngestionTest()); -} - -TEST_P(ExternalSSTFileBasicTest, IngestFileWithMultipleValueType) { - bool write_global_seqno = std::get<0>(GetParam()); - bool verify_checksums_before_ingest = std::get<1>(GetParam()); - do { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.merge_operator.reset(new TestPutOperator()); - DestroyAndReopen(options); - std::map true_data; - - int file_id = 1; - - ASSERT_OK(GenerateAndAddExternalFile( - options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File doesn't overwrite any keys, no seqno needed - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {10, 11, 12, 13}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File doesn't overwrite any keys, no seqno needed - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {1, 4, 6}, ValueType::kTypeMerge, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {11, 15, 19}, ValueType::kTypeDeletion, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {120, 130}, ValueType::kTypeMerge, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File doesn't overwrite any keys, no seqno needed - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {1, 130}, ValueType::kTypeDeletion, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {120}, {ValueType::kTypeValue}, {{120, 135}}, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {}, {}, {{110, 120}}, file_id++, write_global_seqno, - verify_checksums_before_ingest, &true_data)); - // The range deletion ends on a key, but it doesn't actually delete - // this key because the largest key in the range is exclusive. Still, - // it counts as an overlap so a new seqno will be assigned. - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {}, {}, {{100, 109}}, file_id++, write_global_seqno, - verify_checksums_before_ingest, &true_data)); - // File doesn't overwrite any keys, no seqno needed - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5); - - // Write some keys through normal write path - for (int i = 0; i < 50; i++) { - ASSERT_OK(Put(Key(i), "memtable")); - true_data[Key(i)] = "memtable"; - } - SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber(); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {60, 61, 62}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File doesn't overwrite any keys, no seqno needed - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {40, 41, 42}, ValueType::kTypeMerge, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {20, 30, 40}, ValueType::kTypeDeletion, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2); - - const Snapshot* snapshot = db_->GetSnapshot(); - - // We will need a seqno for the file regardless if the file overwrite - // keys in the DB or not because we have a snapshot - ASSERT_OK(GenerateAndAddExternalFile( - options, {1000, 1002}, ValueType::kTypeMerge, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // A global seqno will be assigned anyway because of the snapshot - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {2000, 3002}, ValueType::kTypeMerge, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // A global seqno will be assigned anyway because of the snapshot - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {1, 20, 40, 100, 150}, ValueType::kTypeMerge, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // A global seqno will be assigned anyway because of the snapshot - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); - - db_->ReleaseSnapshot(snapshot); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {5000, 5001}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data)); - // No snapshot anymore, no need to assign a seqno - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); - - size_t kcnt = 0; - VerifyDBFromMap(true_data, &kcnt, false); - } while (ChangeOptionsForFileIngestionTest()); -} - -TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) { - bool write_global_seqno = std::get<0>(GetParam()); - bool verify_checksums_before_ingest = std::get<1>(GetParam()); - do { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.merge_operator.reset(new TestPutOperator()); - DestroyAndReopen(options); - std::map true_data; - - int file_id = 1; - - ASSERT_OK(GenerateAndAddExternalFile( - options, {1, 2, 3, 4, 5, 6}, - {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue, - ValueType::kTypeMerge, ValueType::kTypeValue, ValueType::kTypeMerge}, - file_id++, write_global_seqno, verify_checksums_before_ingest, - &true_data)); - // File doesn't overwrite any keys, no seqno needed - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {10, 11, 12, 13}, - {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue, - ValueType::kTypeMerge}, - file_id++, write_global_seqno, verify_checksums_before_ingest, - &true_data)); - // File doesn't overwrite any keys, no seqno needed - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {1, 4, 6}, - {ValueType::kTypeDeletion, ValueType::kTypeValue, - ValueType::kTypeMerge}, - file_id++, write_global_seqno, verify_checksums_before_ingest, - &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {11, 15, 19}, - {ValueType::kTypeDeletion, ValueType::kTypeMerge, - ValueType::kTypeValue}, - file_id++, write_global_seqno, verify_checksums_before_ingest, - &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {120, 130}, {ValueType::kTypeValue, ValueType::kTypeMerge}, - file_id++, write_global_seqno, verify_checksums_before_ingest, - &true_data)); - // File doesn't overwrite any keys, no seqno needed - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {1, 130}, {ValueType::kTypeMerge, ValueType::kTypeDeletion}, - file_id++, write_global_seqno, verify_checksums_before_ingest, - &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {150, 151, 152}, - {ValueType::kTypeValue, ValueType::kTypeMerge, - ValueType::kTypeDeletion}, - {{150, 160}, {180, 190}}, file_id++, write_global_seqno, - verify_checksums_before_ingest, &true_data)); - // File doesn't overwrite any keys, no seqno needed - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {150, 151, 152}, - {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue}, - {{200, 250}}, file_id++, write_global_seqno, - verify_checksums_before_ingest, &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {300, 301, 302}, - {ValueType::kTypeValue, ValueType::kTypeMerge, - ValueType::kTypeDeletion}, - {{1, 2}, {152, 154}}, file_id++, write_global_seqno, - verify_checksums_before_ingest, &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5); - - // Write some keys through normal write path - for (int i = 0; i < 50; i++) { - ASSERT_OK(Put(Key(i), "memtable")); - true_data[Key(i)] = "memtable"; - } - SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber(); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {60, 61, 62}, - {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue}, - file_id++, write_global_seqno, verify_checksums_before_ingest, - &true_data)); - // File doesn't overwrite any keys, no seqno needed - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {40, 41, 42}, - {ValueType::kTypeValue, ValueType::kTypeDeletion, - ValueType::kTypeDeletion}, - file_id++, write_global_seqno, verify_checksums_before_ingest, - &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {20, 30, 40}, - {ValueType::kTypeDeletion, ValueType::kTypeDeletion, - ValueType::kTypeDeletion}, - file_id++, write_global_seqno, verify_checksums_before_ingest, - &true_data)); - // File overwrites some keys, a seqno will be assigned - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2); - - const Snapshot* snapshot = db_->GetSnapshot(); - - // We will need a seqno for the file regardless if the file overwrite - // keys in the DB or not because we have a snapshot - ASSERT_OK(GenerateAndAddExternalFile( - options, {1000, 1002}, {ValueType::kTypeValue, ValueType::kTypeMerge}, - file_id++, write_global_seqno, verify_checksums_before_ingest, - &true_data)); - // A global seqno will be assigned anyway because of the snapshot - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {2000, 3002}, {ValueType::kTypeValue, ValueType::kTypeMerge}, - file_id++, write_global_seqno, verify_checksums_before_ingest, - &true_data)); - // A global seqno will be assigned anyway because of the snapshot - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {1, 20, 40, 100, 150}, - {ValueType::kTypeDeletion, ValueType::kTypeDeletion, - ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeMerge}, - file_id++, write_global_seqno, verify_checksums_before_ingest, - &true_data)); - // A global seqno will be assigned anyway because of the snapshot - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); - - db_->ReleaseSnapshot(snapshot); - - ASSERT_OK(GenerateAndAddExternalFile( - options, {5000, 5001}, {ValueType::kTypeValue, ValueType::kTypeMerge}, - file_id++, write_global_seqno, verify_checksums_before_ingest, - &true_data)); - // No snapshot anymore, no need to assign a seqno - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); - - size_t kcnt = 0; - VerifyDBFromMap(true_data, &kcnt, false); - } while (ChangeOptionsForFileIngestionTest()); -} - -TEST_F(ExternalSSTFileBasicTest, FadviseTrigger) { - Options options = CurrentOptions(); - const int kNumKeys = 10000; - - size_t total_fadvised_bytes = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "SstFileWriter::Rep::InvalidatePageCache", [&](void* arg) { - size_t fadvise_size = *(reinterpret_cast(arg)); - total_fadvised_bytes += fadvise_size; - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - std::unique_ptr sst_file_writer; - - std::string sst_file_path = sst_files_dir_ + "file_fadvise_disable.sst"; - sst_file_writer.reset( - new SstFileWriter(EnvOptions(), options, nullptr, false)); - ASSERT_OK(sst_file_writer->Open(sst_file_path)); - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(sst_file_writer->Put(Key(i), Key(i))); - } - ASSERT_OK(sst_file_writer->Finish()); - // fadvise disabled - ASSERT_EQ(total_fadvised_bytes, 0); - - sst_file_path = sst_files_dir_ + "file_fadvise_enable.sst"; - sst_file_writer.reset( - new SstFileWriter(EnvOptions(), options, nullptr, true)); - ASSERT_OK(sst_file_writer->Open(sst_file_path)); - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(sst_file_writer->Put(Key(i), Key(i))); - } - ASSERT_OK(sst_file_writer->Finish()); - // fadvise enabled - ASSERT_EQ(total_fadvised_bytes, sst_file_writer->FileSize()); - ASSERT_GT(total_fadvised_bytes, 0); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(ExternalSSTFileBasicTest, SyncFailure) { - Options options; - options.create_if_missing = true; - options.env = fault_injection_test_env_.get(); - - std::vector> test_cases = { - {"ExternalSstFileIngestionJob::BeforeSyncIngestedFile", - "ExternalSstFileIngestionJob::AfterSyncIngestedFile"}, - {"ExternalSstFileIngestionJob::BeforeSyncDir", - "ExternalSstFileIngestionJob::AfterSyncDir"}, - {"ExternalSstFileIngestionJob::BeforeSyncGlobalSeqno", - "ExternalSstFileIngestionJob::AfterSyncGlobalSeqno"}}; - - for (size_t i = 0; i < test_cases.size(); i++) { - bool no_sync = false; - SyncPoint::GetInstance()->SetCallBack(test_cases[i].first, [&](void*) { - fault_injection_test_env_->SetFilesystemActive(false); - }); - SyncPoint::GetInstance()->SetCallBack(test_cases[i].second, [&](void*) { - fault_injection_test_env_->SetFilesystemActive(true); - }); - if (i == 0) { - SyncPoint::GetInstance()->SetCallBack( - "ExternalSstFileIngestionJob::Prepare:Reopen", [&](void* s) { - Status* status = static_cast(s); - if (status->IsNotSupported()) { - no_sync = true; - } - }); - } - if (i == 2) { - SyncPoint::GetInstance()->SetCallBack( - "ExternalSstFileIngestionJob::NewRandomRWFile", [&](void* s) { - Status* status = static_cast(s); - if (status->IsNotSupported()) { - no_sync = true; - } - }); - } - SyncPoint::GetInstance()->EnableProcessing(); - - DestroyAndReopen(options); - if (i == 2) { - ASSERT_OK(Put("foo", "v1")); - } - - Options sst_file_writer_options; - sst_file_writer_options.env = fault_injection_test_env_.get(); - std::unique_ptr sst_file_writer( - new SstFileWriter(EnvOptions(), sst_file_writer_options)); - std::string file_name = - sst_files_dir_ + "sync_failure_test_" + std::to_string(i) + ".sst"; - ASSERT_OK(sst_file_writer->Open(file_name)); - ASSERT_OK(sst_file_writer->Put("bar", "v2")); - ASSERT_OK(sst_file_writer->Finish()); - - IngestExternalFileOptions ingest_opt; - ASSERT_FALSE(ingest_opt.write_global_seqno); // new default - if (i == 0) { - ingest_opt.move_files = true; - } - const Snapshot* snapshot = db_->GetSnapshot(); - if (i == 2) { - ingest_opt.write_global_seqno = true; - } - Status s = db_->IngestExternalFile({file_name}, ingest_opt); - if (no_sync) { - ASSERT_OK(s); - } else { - ASSERT_NOK(s); - } - db_->ReleaseSnapshot(snapshot); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - Destroy(options); - } -} - -TEST_F(ExternalSSTFileBasicTest, ReopenNotSupported) { - Options options; - options.create_if_missing = true; - options.env = env_; - - SyncPoint::GetInstance()->SetCallBack( - "ExternalSstFileIngestionJob::Prepare:Reopen", [&](void* arg) { - Status* s = static_cast(arg); - *s = Status::NotSupported(); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - DestroyAndReopen(options); - - Options sst_file_writer_options; - sst_file_writer_options.env = env_; - std::unique_ptr sst_file_writer( - new SstFileWriter(EnvOptions(), sst_file_writer_options)); - std::string file_name = - sst_files_dir_ + "reopen_not_supported_test_" + ".sst"; - ASSERT_OK(sst_file_writer->Open(file_name)); - ASSERT_OK(sst_file_writer->Put("bar", "v2")); - ASSERT_OK(sst_file_writer->Finish()); - - IngestExternalFileOptions ingest_opt; - ingest_opt.move_files = true; - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(db_->IngestExternalFile({file_name}, ingest_opt)); - db_->ReleaseSnapshot(snapshot); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - Destroy(options); -} - -TEST_F(ExternalSSTFileBasicTest, VerifyChecksumReadahead) { - Options options; - options.create_if_missing = true; - SpecialEnv senv(env_); - options.env = &senv; - DestroyAndReopen(options); - - Options sst_file_writer_options; - sst_file_writer_options.env = env_; - std::unique_ptr sst_file_writer( - new SstFileWriter(EnvOptions(), sst_file_writer_options)); - std::string file_name = sst_files_dir_ + "verify_checksum_readahead_test.sst"; - ASSERT_OK(sst_file_writer->Open(file_name)); - Random rnd(301); - std::string value = rnd.RandomString(4000); - for (int i = 0; i < 5000; i++) { - ASSERT_OK(sst_file_writer->Put(DBTestBase::Key(i), value)); - } - ASSERT_OK(sst_file_writer->Finish()); - - // Ingest it once without verifying checksums to see the baseline - // preads. - IngestExternalFileOptions ingest_opt; - ingest_opt.move_files = false; - senv.count_random_reads_ = true; - senv.random_read_bytes_counter_ = 0; - ASSERT_OK(db_->IngestExternalFile({file_name}, ingest_opt)); - - auto base_num_reads = senv.random_read_counter_.Read(); - // Make sure the counter is enabled. - ASSERT_GT(base_num_reads, 0); - - // Ingest again and observe the reads made for for readahead. - ingest_opt.move_files = false; - ingest_opt.verify_checksums_before_ingest = true; - ingest_opt.verify_checksums_readahead_size = size_t{2 * 1024 * 1024}; - - senv.count_random_reads_ = true; - senv.random_read_bytes_counter_ = 0; - ASSERT_OK(db_->IngestExternalFile({file_name}, ingest_opt)); - - // Make sure the counter is enabled. - ASSERT_GT(senv.random_read_counter_.Read() - base_num_reads, 0); - - // The SST file is about 20MB. Readahead size is 2MB. - // Give a conservative 15 reads for metadata blocks, the number - // of random reads should be within 20 MB / 2MB + 15 = 25. - ASSERT_LE(senv.random_read_counter_.Read() - base_num_reads, 40); - - Destroy(options); -} - -TEST_F(ExternalSSTFileBasicTest, IngestRangeDeletionTombstoneWithGlobalSeqno) { - for (int i = 5; i < 25; i++) { - ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(i), - Key(i) + "_val")); - } - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - Reopen(options); - SstFileWriter sst_file_writer(EnvOptions(), options); - - // file.sst (delete 0 => 30) - std::string file = sst_files_dir_ + "file.sst"; - ASSERT_OK(sst_file_writer.Open(file)); - ASSERT_OK(sst_file_writer.DeleteRange(Key(0), Key(30))); - ExternalSstFileInfo file_info; - ASSERT_OK(sst_file_writer.Finish(&file_info)); - ASSERT_EQ(file_info.file_path, file); - ASSERT_EQ(file_info.num_entries, 0); - ASSERT_EQ(file_info.smallest_key, ""); - ASSERT_EQ(file_info.largest_key, ""); - ASSERT_EQ(file_info.num_range_del_entries, 1); - ASSERT_EQ(file_info.smallest_range_del_key, Key(0)); - ASSERT_EQ(file_info.largest_range_del_key, Key(30)); - - IngestExternalFileOptions ifo; - ifo.move_files = true; - ifo.snapshot_consistency = true; - ifo.allow_global_seqno = true; - ifo.write_global_seqno = true; - ifo.verify_checksums_before_ingest = false; - ASSERT_OK(db_->IngestExternalFile({file}, ifo)); - - for (int i = 5; i < 25; i++) { - std::string res; - ASSERT_TRUE(db_->Get(ReadOptions(), Key(i), &res).IsNotFound()); - } -} - -TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) { - int kNumLevels = 7; - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.num_levels = kNumLevels; - Reopen(options); - - std::map true_data; - int file_id = 1; - // prevent range deletions from being dropped due to becoming obsolete. - const Snapshot* snapshot = db_->GetSnapshot(); - - // range del [0, 50) in L6 file, [50, 100) in L0 file, [100, 150) in memtable - for (int i = 0; i < 3; i++) { - if (i != 0) { - db_->Flush(FlushOptions()); - if (i == 1) { - MoveFilesToLevel(kNumLevels - 1); - } - } - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - Key(50 * i), Key(50 * (i + 1)))); - } - ASSERT_EQ(1, NumTableFilesAtLevel(0)); - ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels - 2)); - ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 1)); - - bool write_global_seqno = std::get<0>(GetParam()); - bool verify_checksums_before_ingest = std::get<1>(GetParam()); - // overlaps with L0 file but not memtable, so flush is skipped and file is - // ingested into L0 - SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber(); - ASSERT_OK(GenerateAndAddExternalFile( - options, {60, 90}, {ValueType::kTypeValue, ValueType::kTypeValue}, - {{65, 70}, {70, 85}}, file_id++, write_global_seqno, - verify_checksums_before_ingest, &true_data)); - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno); - ASSERT_EQ(2, NumTableFilesAtLevel(0)); - ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels - 2)); - ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1)); - - // overlaps with L6 file but not memtable or L0 file, so flush is skipped and - // file is ingested into L5 - ASSERT_OK(GenerateAndAddExternalFile( - options, {10, 40}, {ValueType::kTypeValue, ValueType::kTypeValue}, - file_id++, write_global_seqno, verify_checksums_before_ingest, - &true_data)); - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno); - ASSERT_EQ(2, NumTableFilesAtLevel(0)); - ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2)); - ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1)); - - // overlaps with L5 file but not memtable or L0 file, so flush is skipped and - // file is ingested into L4 - ASSERT_OK(GenerateAndAddExternalFile( - options, {}, {}, {{5, 15}}, file_id++, write_global_seqno, - verify_checksums_before_ingest, &true_data)); - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno); - ASSERT_EQ(2, NumTableFilesAtLevel(0)); - ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2)); - ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 2)); - ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1)); - - // ingested file overlaps with memtable, so flush is triggered before the file - // is ingested such that the ingested data is considered newest. So L0 file - // count increases by two. - ASSERT_OK(GenerateAndAddExternalFile( - options, {100, 140}, {ValueType::kTypeValue, ValueType::kTypeValue}, - file_id++, write_global_seqno, verify_checksums_before_ingest, - &true_data)); - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno); - ASSERT_EQ(4, NumTableFilesAtLevel(0)); - ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2)); - ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1)); - - // snapshot unneeded now that all range deletions are persisted - db_->ReleaseSnapshot(snapshot); - - // overlaps with nothing, so places at bottom level and skips incrementing - // seqnum. - ASSERT_OK(GenerateAndAddExternalFile( - options, {151, 175}, {ValueType::kTypeValue, ValueType::kTypeValue}, - {{160, 200}}, file_id++, write_global_seqno, - verify_checksums_before_ingest, &true_data)); - ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno); - ASSERT_EQ(4, NumTableFilesAtLevel(0)); - ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2)); - ASSERT_EQ(2, NumTableFilesAtLevel(options.num_levels - 1)); -} - -TEST_F(ExternalSSTFileBasicTest, AdjacentRangeDeletionTombstones) { - Options options = CurrentOptions(); - SstFileWriter sst_file_writer(EnvOptions(), options); - - // file8.sst (delete 300 => 400) - std::string file8 = sst_files_dir_ + "file8.sst"; - ASSERT_OK(sst_file_writer.Open(file8)); - ASSERT_OK(sst_file_writer.DeleteRange(Key(300), Key(400))); - ExternalSstFileInfo file8_info; - Status s = sst_file_writer.Finish(&file8_info); - ASSERT_OK(s) << s.ToString(); - ASSERT_EQ(file8_info.file_path, file8); - ASSERT_EQ(file8_info.num_entries, 0); - ASSERT_EQ(file8_info.smallest_key, ""); - ASSERT_EQ(file8_info.largest_key, ""); - ASSERT_EQ(file8_info.num_range_del_entries, 1); - ASSERT_EQ(file8_info.smallest_range_del_key, Key(300)); - ASSERT_EQ(file8_info.largest_range_del_key, Key(400)); - - // file9.sst (delete 400 => 500) - std::string file9 = sst_files_dir_ + "file9.sst"; - ASSERT_OK(sst_file_writer.Open(file9)); - ASSERT_OK(sst_file_writer.DeleteRange(Key(400), Key(500))); - ExternalSstFileInfo file9_info; - s = sst_file_writer.Finish(&file9_info); - ASSERT_OK(s) << s.ToString(); - ASSERT_EQ(file9_info.file_path, file9); - ASSERT_EQ(file9_info.num_entries, 0); - ASSERT_EQ(file9_info.smallest_key, ""); - ASSERT_EQ(file9_info.largest_key, ""); - ASSERT_EQ(file9_info.num_range_del_entries, 1); - ASSERT_EQ(file9_info.smallest_range_del_key, Key(400)); - ASSERT_EQ(file9_info.largest_range_del_key, Key(500)); - - // Range deletion tombstones are exclusive on their end key, so these SSTs - // should not be considered as overlapping. - s = DeprecatedAddFile({file8, file9}); - ASSERT_OK(s) << s.ToString(); - ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); - DestroyAndRecreateExternalSSTFilesDir(); -} - -TEST_P(ExternalSSTFileBasicTest, IngestFileWithBadBlockChecksum) { - bool change_checksum_called = false; - const auto& change_checksum = [&](void* arg) { - if (!change_checksum_called) { - char* buf = reinterpret_cast(arg); - assert(nullptr != buf); - buf[0] ^= 0x1; - change_checksum_called = true; - } - }; - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "BlockBasedTableBuilder::WriteMaybeCompressedBlock:TamperWithChecksum", - change_checksum); - SyncPoint::GetInstance()->EnableProcessing(); - int file_id = 0; - bool write_global_seqno = std::get<0>(GetParam()); - bool verify_checksums_before_ingest = std::get<1>(GetParam()); - do { - Options options = CurrentOptions(); - DestroyAndReopen(options); - std::map true_data; - Status s = GenerateAndAddExternalFile( - options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data); - if (verify_checksums_before_ingest) { - ASSERT_NOK(s); - } else { - ASSERT_OK(s); - } - change_checksum_called = false; - } while (ChangeOptionsForFileIngestionTest()); -} - -TEST_P(ExternalSSTFileBasicTest, IngestFileWithFirstByteTampered) { - if (!random_rwfile_supported_) { - ROCKSDB_GTEST_SKIP("Test requires NewRandomRWFile support"); - return; - } - SyncPoint::GetInstance()->DisableProcessing(); - int file_id = 0; - EnvOptions env_options; - do { - Options options = CurrentOptions(); - std::string file_path = sst_files_dir_ + std::to_string(file_id++); - SstFileWriter sst_file_writer(env_options, options); - Status s = sst_file_writer.Open(file_path); - ASSERT_OK(s); - for (int i = 0; i != 100; ++i) { - std::string key = Key(i); - std::string value = Key(i) + std::to_string(0); - ASSERT_OK(sst_file_writer.Put(key, value)); - } - ASSERT_OK(sst_file_writer.Finish()); - { - // Get file size - uint64_t file_size = 0; - ASSERT_OK(env_->GetFileSize(file_path, &file_size)); - ASSERT_GT(file_size, 8); - std::unique_ptr rwfile; - ASSERT_OK(env_->NewRandomRWFile(file_path, &rwfile, EnvOptions())); - // Manually corrupt the file - // We deterministically corrupt the first byte because we currently - // cannot choose a random offset. The reason for this limitation is that - // we do not checksum property block at present. - const uint64_t offset = 0; - char scratch[8] = {0}; - Slice buf; - ASSERT_OK(rwfile->Read(offset, sizeof(scratch), &buf, scratch)); - scratch[0] ^= 0xff; // flip one bit - ASSERT_OK(rwfile->Write(offset, buf)); - } - // Ingest file. - IngestExternalFileOptions ifo; - ifo.write_global_seqno = std::get<0>(GetParam()); - ifo.verify_checksums_before_ingest = std::get<1>(GetParam()); - s = db_->IngestExternalFile({file_path}, ifo); - if (ifo.verify_checksums_before_ingest) { - ASSERT_NOK(s); - } else { - ASSERT_OK(s); - } - } while (ChangeOptionsForFileIngestionTest()); -} - -TEST_P(ExternalSSTFileBasicTest, IngestExternalFileWithCorruptedPropsBlock) { - bool verify_checksums_before_ingest = std::get<1>(GetParam()); - if (!verify_checksums_before_ingest) { - ROCKSDB_GTEST_BYPASS("Bypassing test when !verify_checksums_before_ingest"); - return; - } - if (!random_rwfile_supported_) { - ROCKSDB_GTEST_SKIP("Test requires NewRandomRWFile support"); - return; - } - uint64_t props_block_offset = 0; - size_t props_block_size = 0; - const auto& get_props_block_offset = [&](void* arg) { - props_block_offset = *reinterpret_cast(arg); - }; - const auto& get_props_block_size = [&](void* arg) { - props_block_size = *reinterpret_cast(arg); - }; - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "BlockBasedTableBuilder::WritePropertiesBlock:GetPropsBlockOffset", - get_props_block_offset); - SyncPoint::GetInstance()->SetCallBack( - "BlockBasedTableBuilder::WritePropertiesBlock:GetPropsBlockSize", - get_props_block_size); - SyncPoint::GetInstance()->EnableProcessing(); - int file_id = 0; - Random64 rand(time(nullptr)); - do { - std::string file_path = sst_files_dir_ + std::to_string(file_id++); - Options options = CurrentOptions(); - SstFileWriter sst_file_writer(EnvOptions(), options); - Status s = sst_file_writer.Open(file_path); - ASSERT_OK(s); - for (int i = 0; i != 100; ++i) { - std::string key = Key(i); - std::string value = Key(i) + std::to_string(0); - ASSERT_OK(sst_file_writer.Put(key, value)); - } - ASSERT_OK(sst_file_writer.Finish()); - - { - std::unique_ptr rwfile; - ASSERT_OK(env_->NewRandomRWFile(file_path, &rwfile, EnvOptions())); - // Manually corrupt the file - ASSERT_GT(props_block_size, 8); - uint64_t offset = - props_block_offset + rand.Next() % (props_block_size - 8); - char scratch[8] = {0}; - Slice buf; - ASSERT_OK(rwfile->Read(offset, sizeof(scratch), &buf, scratch)); - scratch[0] ^= 0xff; // flip one bit - ASSERT_OK(rwfile->Write(offset, buf)); - } - - // Ingest file. - IngestExternalFileOptions ifo; - ifo.write_global_seqno = std::get<0>(GetParam()); - ifo.verify_checksums_before_ingest = true; - s = db_->IngestExternalFile({file_path}, ifo); - ASSERT_NOK(s); - } while (ChangeOptionsForFileIngestionTest()); -} - -TEST_F(ExternalSSTFileBasicTest, OverlappingFiles) { - Options options = CurrentOptions(); - - std::vector files; - { - SstFileWriter sst_file_writer(EnvOptions(), options); - std::string file1 = sst_files_dir_ + "file1.sst"; - ASSERT_OK(sst_file_writer.Open(file1)); - ASSERT_OK(sst_file_writer.Put("a", "z")); - ASSERT_OK(sst_file_writer.Put("i", "m")); - ExternalSstFileInfo file1_info; - ASSERT_OK(sst_file_writer.Finish(&file1_info)); - files.push_back(std::move(file1)); - } - { - SstFileWriter sst_file_writer(EnvOptions(), options); - std::string file2 = sst_files_dir_ + "file2.sst"; - ASSERT_OK(sst_file_writer.Open(file2)); - ASSERT_OK(sst_file_writer.Put("i", "k")); - ExternalSstFileInfo file2_info; - ASSERT_OK(sst_file_writer.Finish(&file2_info)); - files.push_back(std::move(file2)); - } - - IngestExternalFileOptions ifo; - ASSERT_OK(db_->IngestExternalFile(files, ifo)); - ASSERT_EQ(Get("a"), "z"); - ASSERT_EQ(Get("i"), "k"); - - int total_keys = 0; - Iterator* iter = db_->NewIterator(ReadOptions()); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - total_keys++; - } - delete iter; - ASSERT_EQ(total_keys, 2); - - ASSERT_EQ(2, NumTableFilesAtLevel(0)); -} - -TEST_F(ExternalSSTFileBasicTest, IngestFileAfterDBPut) { - // Repro https://github.com/facebook/rocksdb/issues/6245. - // Flush three files to L0. Ingest one more file to trigger L0->L1 compaction - // via trivial move. The bug happened when L1 files were incorrectly sorted - // resulting in an old value for "k" returned by `Get()`. - Options options = CurrentOptions(); - - ASSERT_OK(Put("k", "a")); - Flush(); - ASSERT_OK(Put("k", "a")); - Flush(); - ASSERT_OK(Put("k", "a")); - Flush(); - SstFileWriter sst_file_writer(EnvOptions(), options); - - // Current file size should be 0 after sst_file_writer init and before open a - // file. - ASSERT_EQ(sst_file_writer.FileSize(), 0); - - std::string file1 = sst_files_dir_ + "file1.sst"; - ASSERT_OK(sst_file_writer.Open(file1)); - ASSERT_OK(sst_file_writer.Put("k", "b")); - - ExternalSstFileInfo file1_info; - Status s = sst_file_writer.Finish(&file1_info); - ASSERT_OK(s) << s.ToString(); - - // Current file size should be non-zero after success write. - ASSERT_GT(sst_file_writer.FileSize(), 0); - - IngestExternalFileOptions ifo; - s = db_->IngestExternalFile({file1}, ifo); - ASSERT_OK(s); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ(Get("k"), "b"); -} - -TEST_F(ExternalSSTFileBasicTest, IngestWithTemperature) { - Options options = CurrentOptions(); - const ImmutableCFOptions ioptions(options); - options.bottommost_temperature = Temperature::kWarm; - SstFileWriter sst_file_writer(EnvOptions(), options); - options.level0_file_num_compaction_trigger = 2; - Reopen(options); - - auto size = GetSstSizeHelper(Temperature::kUnknown); - ASSERT_EQ(size, 0); - size = GetSstSizeHelper(Temperature::kWarm); - ASSERT_EQ(size, 0); - size = GetSstSizeHelper(Temperature::kHot); - ASSERT_EQ(size, 0); - - // create file01.sst (1000 => 1099) and ingest it - std::string file1 = sst_files_dir_ + "file01.sst"; - ASSERT_OK(sst_file_writer.Open(file1)); - for (int k = 1000; k < 1100; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file1_info; - Status s = sst_file_writer.Finish(&file1_info); - ASSERT_OK(s); - ASSERT_EQ(file1_info.file_path, file1); - ASSERT_EQ(file1_info.num_entries, 100); - ASSERT_EQ(file1_info.smallest_key, Key(1000)); - ASSERT_EQ(file1_info.largest_key, Key(1099)); - - std::vector files; - std::vector files_checksums; - std::vector files_checksum_func_names; - Temperature file_temperature = Temperature::kWarm; - - files.push_back(file1); - IngestExternalFileOptions in_opts; - in_opts.move_files = false; - in_opts.snapshot_consistency = true; - in_opts.allow_global_seqno = false; - in_opts.allow_blocking_flush = false; - in_opts.write_global_seqno = true; - in_opts.verify_file_checksum = false; - IngestExternalFileArg arg; - arg.column_family = db_->DefaultColumnFamily(); - arg.external_files = files; - arg.options = in_opts; - arg.files_checksums = files_checksums; - arg.files_checksum_func_names = files_checksum_func_names; - arg.file_temperature = file_temperature; - s = db_->IngestExternalFiles({arg}); - ASSERT_OK(s); - - // check the temperature of the file being ingested - ColumnFamilyMetaData metadata; - db_->GetColumnFamilyMetaData(&metadata); - ASSERT_EQ(1, metadata.file_count); - ASSERT_EQ(Temperature::kWarm, metadata.levels[6].files[0].temperature); - size = GetSstSizeHelper(Temperature::kUnknown); - ASSERT_EQ(size, 0); - size = GetSstSizeHelper(Temperature::kWarm); - ASSERT_GT(size, 1); - - // non-bottommost file still has unknown temperature - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(Put("bar", "bar")); - ASSERT_OK(Flush()); - db_->GetColumnFamilyMetaData(&metadata); - ASSERT_EQ(2, metadata.file_count); - ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature); - size = GetSstSizeHelper(Temperature::kUnknown); - ASSERT_GT(size, 0); - size = GetSstSizeHelper(Temperature::kWarm); - ASSERT_GT(size, 0); - - // reopen and check the information is persisted - Reopen(options); - db_->GetColumnFamilyMetaData(&metadata); - ASSERT_EQ(2, metadata.file_count); - ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature); - ASSERT_EQ(Temperature::kWarm, metadata.levels[6].files[0].temperature); - size = GetSstSizeHelper(Temperature::kUnknown); - ASSERT_GT(size, 0); - size = GetSstSizeHelper(Temperature::kWarm); - ASSERT_GT(size, 0); - - // check other non-exist temperatures - size = GetSstSizeHelper(Temperature::kHot); - ASSERT_EQ(size, 0); - size = GetSstSizeHelper(Temperature::kCold); - ASSERT_EQ(size, 0); - std::string prop; - ASSERT_TRUE(dbfull()->GetProperty( - DB::Properties::kLiveSstFilesSizeAtTemperature + std::to_string(22), - &prop)); - ASSERT_EQ(std::atoi(prop.c_str()), 0); -} - -TEST_F(ExternalSSTFileBasicTest, FailIfNotBottommostLevel) { - Options options = GetDefaultOptions(); - - std::string file_path = sst_files_dir_ + std::to_string(1); - SstFileWriter sfw(EnvOptions(), options); - - ASSERT_OK(sfw.Open(file_path)); - ASSERT_OK(sfw.Put("b", "dontcare")); - ASSERT_OK(sfw.Finish()); - - // Test universal compaction + ingest with snapshot consistency - options.create_if_missing = true; - options.compaction_style = CompactionStyle::kCompactionStyleUniversal; - DestroyAndReopen(options); - { - const Snapshot* snapshot = db_->GetSnapshot(); - ManagedSnapshot snapshot_guard(db_, snapshot); - IngestExternalFileOptions ifo; - ifo.fail_if_not_bottommost_level = true; - ifo.snapshot_consistency = true; - const Status s = db_->IngestExternalFile({file_path}, ifo); - ASSERT_TRUE(s.IsTryAgain()); - } - - // Test level compaction - options.compaction_style = CompactionStyle::kCompactionStyleLevel; - options.num_levels = 2; - DestroyAndReopen(options); - ASSERT_OK(db_->Put(WriteOptions(), "a", "dontcare")); - ASSERT_OK(db_->Put(WriteOptions(), "c", "dontcare")); - ASSERT_OK(db_->Flush(FlushOptions())); - - ASSERT_OK(db_->Put(WriteOptions(), "b", "dontcare")); - ASSERT_OK(db_->Put(WriteOptions(), "d", "dontcare")); - ASSERT_OK(db_->Flush(FlushOptions())); - - { - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - - IngestExternalFileOptions ifo; - ifo.fail_if_not_bottommost_level = true; - const Status s = db_->IngestExternalFile({file_path}, ifo); - ASSERT_TRUE(s.IsTryAgain()); - } -} - -TEST_F(ExternalSSTFileBasicTest, VerifyChecksum) { - const std::string kPutVal = "put_val"; - const std::string kIngestedVal = "ingested_val"; - - ASSERT_OK(Put("k", kPutVal, WriteOptions())); - ASSERT_OK(Flush()); - - std::string external_file = sst_files_dir_ + "/file_to_ingest.sst"; - { - SstFileWriter sst_file_writer{EnvOptions(), CurrentOptions()}; - - ASSERT_OK(sst_file_writer.Open(external_file)); - ASSERT_OK(sst_file_writer.Put("k", kIngestedVal)); - ASSERT_OK(sst_file_writer.Finish()); - } - - ASSERT_OK(db_->IngestExternalFile(db_->DefaultColumnFamily(), {external_file}, - IngestExternalFileOptions())); - - ASSERT_OK(db_->VerifyChecksum()); -} - -TEST_F(ExternalSSTFileBasicTest, VerifySstUniqueId) { - const std::string kPutVal = "put_val"; - const std::string kIngestedVal = "ingested_val"; - - ASSERT_OK(Put("k", kPutVal, WriteOptions())); - ASSERT_OK(Flush()); - - std::string external_file = sst_files_dir_ + "/file_to_ingest.sst"; - { - SstFileWriter sst_file_writer{EnvOptions(), CurrentOptions()}; - - ASSERT_OK(sst_file_writer.Open(external_file)); - ASSERT_OK(sst_file_writer.Put("k", kIngestedVal)); - ASSERT_OK(sst_file_writer.Finish()); - } - - ASSERT_OK(db_->IngestExternalFile(db_->DefaultColumnFamily(), {external_file}, - IngestExternalFileOptions())); - - // Test ingest file without session_id and db_id (for example generated by an - // older version of sst_writer) - SyncPoint::GetInstance()->SetCallBack( - "PropertyBlockBuilder::AddTableProperty:Start", [&](void* props_vs) { - auto props = static_cast(props_vs); - // update table property session_id to a different one - props->db_session_id = ""; - props->db_id = ""; - }); - std::atomic_int skipped = 0, passed = 0; - SyncPoint::GetInstance()->SetCallBack( - "BlockBasedTable::Open::SkippedVerifyUniqueId", - [&](void* /*arg*/) { skipped++; }); - SyncPoint::GetInstance()->SetCallBack( - "BlockBasedTable::Open::PassedVerifyUniqueId", - [&](void* /*arg*/) { passed++; }); - SyncPoint::GetInstance()->EnableProcessing(); - - auto options = CurrentOptions(); - ASSERT_TRUE(options.verify_sst_unique_id_in_manifest); - Reopen(options); - ASSERT_EQ(skipped, 0); - ASSERT_EQ(passed, 2); // one flushed + one ingested - - external_file = sst_files_dir_ + "/file_to_ingest2.sst"; - { - SstFileWriter sst_file_writer{EnvOptions(), CurrentOptions()}; - - ASSERT_OK(sst_file_writer.Open(external_file)); - ASSERT_OK(sst_file_writer.Put("k", kIngestedVal)); - ASSERT_OK(sst_file_writer.Finish()); - } - - ASSERT_OK(db_->IngestExternalFile(db_->DefaultColumnFamily(), {external_file}, - IngestExternalFileOptions())); - - // Two table file opens skipping verification: - // * ExternalSstFileIngestionJob::GetIngestedFileInfo - // * TableCache::GetTableReader - ASSERT_EQ(skipped, 2); - ASSERT_EQ(passed, 2); - - // Check same after re-open (except no GetIngestedFileInfo) - skipped = 0; - passed = 0; - Reopen(options); - ASSERT_EQ(skipped, 1); - ASSERT_EQ(passed, 2); -} - -TEST_F(ExternalSSTFileBasicTest, StableSnapshotWhileLoggingToManifest) { - const std::string kPutVal = "put_val"; - const std::string kIngestedVal = "ingested_val"; - - ASSERT_OK(Put("k", kPutVal, WriteOptions())); - ASSERT_OK(Flush()); - - std::string external_file = sst_files_dir_ + "/file_to_ingest.sst"; - { - SstFileWriter sst_file_writer{EnvOptions(), CurrentOptions()}; - ASSERT_OK(sst_file_writer.Open(external_file)); - ASSERT_OK(sst_file_writer.Put("k", kIngestedVal)); - ASSERT_OK(sst_file_writer.Finish()); - } - - const Snapshot* snapshot = nullptr; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", [&](void* /* arg */) { - // prevent background compaction job to call this callback - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - snapshot = db_->GetSnapshot(); - ReadOptions read_opts; - read_opts.snapshot = snapshot; - std::string value; - ASSERT_OK(db_->Get(read_opts, "k", &value)); - ASSERT_EQ(kPutVal, value); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(db_->IngestExternalFile(db_->DefaultColumnFamily(), {external_file}, - IngestExternalFileOptions())); - auto ingested_file_seqno = db_->GetLatestSequenceNumber(); - ASSERT_NE(nullptr, snapshot); - // snapshot is taken before SST ingestion is done - ASSERT_EQ(ingested_file_seqno, snapshot->GetSequenceNumber() + 1); - - ReadOptions read_opts; - read_opts.snapshot = snapshot; - std::string value; - ASSERT_OK(db_->Get(read_opts, "k", &value)); - ASSERT_EQ(kPutVal, value); - db_->ReleaseSnapshot(snapshot); - - // After reopen, sequence number should be up current such that - // ingested value is read - Reopen(CurrentOptions()); - ASSERT_OK(db_->Get(ReadOptions(), "k", &value)); - ASSERT_EQ(kIngestedVal, value); - - // New write should get higher seqno compared to ingested file - ASSERT_OK(Put("k", kPutVal, WriteOptions())); - ASSERT_EQ(db_->GetLatestSequenceNumber(), ingested_file_seqno + 1); -} - -INSTANTIATE_TEST_CASE_P(ExternalSSTFileBasicTest, ExternalSSTFileBasicTest, - testing::Values(std::make_tuple(true, true), - std::make_tuple(true, false), - std::make_tuple(false, true), - std::make_tuple(false, false))); - - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/external_sst_file_test.cc b/db/external_sst_file_test.cc deleted file mode 100644 index 63627c27e..000000000 --- a/db/external_sst_file_test.cc +++ /dev/null @@ -1,2860 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include -#include - -#include "db/db_test_util.h" -#include "db/dbformat.h" -#include "file/filename.h" -#include "options/options_helper.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/sst_file_reader.h" -#include "rocksdb/sst_file_writer.h" -#include "test_util/testutil.h" -#include "util/random.h" -#include "util/thread_guard.h" -#include "utilities/fault_injection_env.h" - -namespace ROCKSDB_NAMESPACE { - -// A test environment that can be configured to fail the Link operation. -class ExternalSSTTestFS : public FileSystemWrapper { - public: - ExternalSSTTestFS(const std::shared_ptr& t, bool fail_link) - : FileSystemWrapper(t), fail_link_(fail_link) {} - static const char* kClassName() { return "ExternalSSTTestFS"; } - const char* Name() const override { return kClassName(); } - - IOStatus LinkFile(const std::string& s, const std::string& t, - const IOOptions& options, IODebugContext* dbg) override { - if (fail_link_) { - return IOStatus::NotSupported("Link failed"); - } - return target()->LinkFile(s, t, options, dbg); - } - - void set_fail_link(bool fail_link) { fail_link_ = fail_link; } - - private: - bool fail_link_; -}; - -class ExternalSSTFileTestBase : public DBTestBase { - public: - ExternalSSTFileTestBase() - : DBTestBase("external_sst_file_test", /*env_do_fsync=*/true) { - sst_files_dir_ = dbname_ + "/sst_files/"; - DestroyAndRecreateExternalSSTFilesDir(); - } - - void DestroyAndRecreateExternalSSTFilesDir() { - ASSERT_OK(DestroyDir(env_, sst_files_dir_)); - ASSERT_OK(env_->CreateDir(sst_files_dir_)); - } - - ~ExternalSSTFileTestBase() override { - DestroyDir(env_, sst_files_dir_).PermitUncheckedError(); - } - - protected: - std::string sst_files_dir_; -}; - -class ExternSSTFileLinkFailFallbackTest - : public ExternalSSTFileTestBase, - public ::testing::WithParamInterface> { - public: - ExternSSTFileLinkFailFallbackTest() { - fs_ = std::make_shared(env_->GetFileSystem(), true); - test_env_.reset(new CompositeEnvWrapper(env_, fs_)); - options_ = CurrentOptions(); - options_.disable_auto_compactions = true; - options_.env = test_env_.get(); - } - - void TearDown() override { - delete db_; - db_ = nullptr; - ASSERT_OK(DestroyDB(dbname_, options_)); - } - - protected: - Options options_; - std::shared_ptr fs_; - std::unique_ptr test_env_; -}; - -class ExternalSSTFileTest - : public ExternalSSTFileTestBase, - public ::testing::WithParamInterface> { - public: - ExternalSSTFileTest() {} - - Status GenerateOneExternalFile( - const Options& options, ColumnFamilyHandle* cfh, - std::vector>& data, int file_id, - bool sort_data, std::string* external_file_path, - std::map* true_data) { - // Generate a file id if not provided - if (-1 == file_id) { - file_id = (++last_file_id_); - } - // Sort data if asked to do so - if (sort_data) { - std::sort(data.begin(), data.end(), - [&](const std::pair& e1, - const std::pair& e2) { - return options.comparator->Compare(e1.first, e2.first) < 0; - }); - auto uniq_iter = std::unique( - data.begin(), data.end(), - [&](const std::pair& e1, - const std::pair& e2) { - return options.comparator->Compare(e1.first, e2.first) == 0; - }); - data.resize(uniq_iter - data.begin()); - } - std::string file_path = sst_files_dir_ + std::to_string(file_id); - SstFileWriter sst_file_writer(EnvOptions(), options, cfh); - Status s = sst_file_writer.Open(file_path); - if (!s.ok()) { - return s; - } - for (const auto& entry : data) { - s = sst_file_writer.Put(entry.first, entry.second); - if (!s.ok()) { - sst_file_writer.Finish().PermitUncheckedError(); - return s; - } - } - s = sst_file_writer.Finish(); - if (s.ok() && external_file_path != nullptr) { - *external_file_path = file_path; - } - if (s.ok() && nullptr != true_data) { - for (const auto& entry : data) { - true_data->insert({entry.first, entry.second}); - } - } - return s; - } - - Status GenerateAndAddExternalFile( - const Options options, - std::vector> data, int file_id = -1, - bool allow_global_seqno = false, bool write_global_seqno = false, - bool verify_checksums_before_ingest = true, bool ingest_behind = false, - bool sort_data = false, - std::map* true_data = nullptr, - ColumnFamilyHandle* cfh = nullptr) { - // Generate a file id if not provided - if (file_id == -1) { - file_id = last_file_id_ + 1; - last_file_id_++; - } - - // Sort data if asked to do so - if (sort_data) { - std::sort(data.begin(), data.end(), - [&](const std::pair& e1, - const std::pair& e2) { - return options.comparator->Compare(e1.first, e2.first) < 0; - }); - auto uniq_iter = std::unique( - data.begin(), data.end(), - [&](const std::pair& e1, - const std::pair& e2) { - return options.comparator->Compare(e1.first, e2.first) == 0; - }); - data.resize(uniq_iter - data.begin()); - } - std::string file_path = sst_files_dir_ + std::to_string(file_id); - SstFileWriter sst_file_writer(EnvOptions(), options, cfh); - - Status s = sst_file_writer.Open(file_path); - if (!s.ok()) { - return s; - } - for (auto& entry : data) { - s = sst_file_writer.Put(entry.first, entry.second); - if (!s.ok()) { - sst_file_writer.Finish().PermitUncheckedError(); - return s; - } - } - s = sst_file_writer.Finish(); - - if (s.ok()) { - IngestExternalFileOptions ifo; - ifo.allow_global_seqno = allow_global_seqno; - ifo.write_global_seqno = allow_global_seqno ? write_global_seqno : false; - ifo.verify_checksums_before_ingest = verify_checksums_before_ingest; - ifo.ingest_behind = ingest_behind; - if (cfh) { - s = db_->IngestExternalFile(cfh, {file_path}, ifo); - } else { - s = db_->IngestExternalFile({file_path}, ifo); - } - } - - if (s.ok() && true_data) { - for (auto& entry : data) { - (*true_data)[entry.first] = entry.second; - } - } - - return s; - } - - Status GenerateAndAddExternalFiles( - const Options& options, - const std::vector& column_families, - const std::vector& ifos, - std::vector>>& data, - int file_id, bool sort_data, - std::vector>& true_data) { - if (-1 == file_id) { - file_id = (++last_file_id_); - } - // Generate external SST files, one for each column family - size_t num_cfs = column_families.size(); - assert(ifos.size() == num_cfs); - assert(data.size() == num_cfs); - std::vector args(num_cfs); - for (size_t i = 0; i != num_cfs; ++i) { - std::string external_file_path; - Status s = GenerateOneExternalFile( - options, column_families[i], data[i], file_id, sort_data, - &external_file_path, - true_data.size() == num_cfs ? &true_data[i] : nullptr); - if (!s.ok()) { - return s; - } - ++file_id; - - args[i].column_family = column_families[i]; - args[i].external_files.push_back(external_file_path); - args[i].options = ifos[i]; - } - return db_->IngestExternalFiles(args); - } - - Status GenerateAndAddExternalFile( - const Options options, std::vector> data, - int file_id = -1, bool allow_global_seqno = false, - bool write_global_seqno = false, - bool verify_checksums_before_ingest = true, bool ingest_behind = false, - bool sort_data = false, - std::map* true_data = nullptr, - ColumnFamilyHandle* cfh = nullptr) { - std::vector> file_data; - for (auto& entry : data) { - file_data.emplace_back(Key(entry.first), entry.second); - } - return GenerateAndAddExternalFile(options, file_data, file_id, - allow_global_seqno, write_global_seqno, - verify_checksums_before_ingest, - ingest_behind, sort_data, true_data, cfh); - } - - Status GenerateAndAddExternalFile( - const Options options, std::vector keys, int file_id = -1, - bool allow_global_seqno = false, bool write_global_seqno = false, - bool verify_checksums_before_ingest = true, bool ingest_behind = false, - bool sort_data = false, - std::map* true_data = nullptr, - ColumnFamilyHandle* cfh = nullptr) { - std::vector> file_data; - for (auto& k : keys) { - file_data.emplace_back(Key(k), Key(k) + std::to_string(file_id)); - } - return GenerateAndAddExternalFile(options, file_data, file_id, - allow_global_seqno, write_global_seqno, - verify_checksums_before_ingest, - ingest_behind, sort_data, true_data, cfh); - } - - Status DeprecatedAddFile(const std::vector& files, - bool move_files = false, - bool skip_snapshot_check = false, - bool skip_write_global_seqno = false) { - IngestExternalFileOptions opts; - opts.move_files = move_files; - opts.snapshot_consistency = !skip_snapshot_check; - opts.allow_global_seqno = false; - opts.allow_blocking_flush = false; - opts.write_global_seqno = !skip_write_global_seqno; - return db_->IngestExternalFile(files, opts); - } - - protected: - int last_file_id_ = 0; -}; - -TEST_F(ExternalSSTFileTest, Basic) { - do { - Options options = CurrentOptions(); - - SstFileWriter sst_file_writer(EnvOptions(), options); - - // Current file size should be 0 after sst_file_writer init and before open - // a file. - ASSERT_EQ(sst_file_writer.FileSize(), 0); - - // file1.sst (0 => 99) - std::string file1 = sst_files_dir_ + "file1.sst"; - ASSERT_OK(sst_file_writer.Open(file1)); - for (int k = 0; k < 100; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file1_info; - ASSERT_OK(sst_file_writer.Finish(&file1_info)); - - // Current file size should be non-zero after success write. - ASSERT_GT(sst_file_writer.FileSize(), 0); - - ASSERT_EQ(file1_info.file_path, file1); - ASSERT_EQ(file1_info.num_entries, 100); - ASSERT_EQ(file1_info.smallest_key, Key(0)); - ASSERT_EQ(file1_info.largest_key, Key(99)); - ASSERT_EQ(file1_info.num_range_del_entries, 0); - ASSERT_EQ(file1_info.smallest_range_del_key, ""); - ASSERT_EQ(file1_info.largest_range_del_key, ""); - // sst_file_writer already finished, cannot add this value - ASSERT_NOK(sst_file_writer.Put(Key(100), "bad_val")); - - // file2.sst (100 => 199) - std::string file2 = sst_files_dir_ + "file2.sst"; - ASSERT_OK(sst_file_writer.Open(file2)); - for (int k = 100; k < 200; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - // Cannot add this key because it's not after last added key - ASSERT_NOK(sst_file_writer.Put(Key(99), "bad_val")); - ExternalSstFileInfo file2_info; - ASSERT_OK(sst_file_writer.Finish(&file2_info)); - ASSERT_EQ(file2_info.file_path, file2); - ASSERT_EQ(file2_info.num_entries, 100); - ASSERT_EQ(file2_info.smallest_key, Key(100)); - ASSERT_EQ(file2_info.largest_key, Key(199)); - - // file3.sst (195 => 299) - // This file values overlap with file2 values - std::string file3 = sst_files_dir_ + "file3.sst"; - ASSERT_OK(sst_file_writer.Open(file3)); - for (int k = 195; k < 300; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); - } - ExternalSstFileInfo file3_info; - ASSERT_OK(sst_file_writer.Finish(&file3_info)); - - // Current file size should be non-zero after success finish. - ASSERT_GT(sst_file_writer.FileSize(), 0); - ASSERT_EQ(file3_info.file_path, file3); - ASSERT_EQ(file3_info.num_entries, 105); - ASSERT_EQ(file3_info.smallest_key, Key(195)); - ASSERT_EQ(file3_info.largest_key, Key(299)); - - // file4.sst (30 => 39) - // This file values overlap with file1 values - std::string file4 = sst_files_dir_ + "file4.sst"; - ASSERT_OK(sst_file_writer.Open(file4)); - for (int k = 30; k < 40; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); - } - ExternalSstFileInfo file4_info; - ASSERT_OK(sst_file_writer.Finish(&file4_info)); - ASSERT_EQ(file4_info.file_path, file4); - ASSERT_EQ(file4_info.num_entries, 10); - ASSERT_EQ(file4_info.smallest_key, Key(30)); - ASSERT_EQ(file4_info.largest_key, Key(39)); - - // file5.sst (400 => 499) - std::string file5 = sst_files_dir_ + "file5.sst"; - ASSERT_OK(sst_file_writer.Open(file5)); - for (int k = 400; k < 500; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file5_info; - ASSERT_OK(sst_file_writer.Finish(&file5_info)); - ASSERT_EQ(file5_info.file_path, file5); - ASSERT_EQ(file5_info.num_entries, 100); - ASSERT_EQ(file5_info.smallest_key, Key(400)); - ASSERT_EQ(file5_info.largest_key, Key(499)); - - // file6.sst (delete 400 => 500) - std::string file6 = sst_files_dir_ + "file6.sst"; - ASSERT_OK(sst_file_writer.Open(file6)); - ASSERT_OK(sst_file_writer.DeleteRange(Key(400), Key(500))); - ExternalSstFileInfo file6_info; - ASSERT_OK(sst_file_writer.Finish(&file6_info)); - ASSERT_EQ(file6_info.file_path, file6); - ASSERT_EQ(file6_info.num_entries, 0); - ASSERT_EQ(file6_info.smallest_key, ""); - ASSERT_EQ(file6_info.largest_key, ""); - ASSERT_EQ(file6_info.num_range_del_entries, 1); - ASSERT_EQ(file6_info.smallest_range_del_key, Key(400)); - ASSERT_EQ(file6_info.largest_range_del_key, Key(500)); - - // file7.sst (delete 500 => 570, put 520 => 599 divisible by 2) - std::string file7 = sst_files_dir_ + "file7.sst"; - ASSERT_OK(sst_file_writer.Open(file7)); - ASSERT_OK(sst_file_writer.DeleteRange(Key(500), Key(550))); - for (int k = 520; k < 560; k += 2) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ASSERT_OK(sst_file_writer.DeleteRange(Key(525), Key(575))); - for (int k = 560; k < 600; k += 2) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file7_info; - ASSERT_OK(sst_file_writer.Finish(&file7_info)); - ASSERT_EQ(file7_info.file_path, file7); - ASSERT_EQ(file7_info.num_entries, 40); - ASSERT_EQ(file7_info.smallest_key, Key(520)); - ASSERT_EQ(file7_info.largest_key, Key(598)); - ASSERT_EQ(file7_info.num_range_del_entries, 2); - ASSERT_EQ(file7_info.smallest_range_del_key, Key(500)); - ASSERT_EQ(file7_info.largest_range_del_key, Key(575)); - - // file8.sst (delete 600 => 700) - std::string file8 = sst_files_dir_ + "file8.sst"; - ASSERT_OK(sst_file_writer.Open(file8)); - ASSERT_OK(sst_file_writer.DeleteRange(Key(600), Key(700))); - ExternalSstFileInfo file8_info; - ASSERT_OK(sst_file_writer.Finish(&file8_info)); - ASSERT_EQ(file8_info.file_path, file8); - ASSERT_EQ(file8_info.num_entries, 0); - ASSERT_EQ(file8_info.smallest_key, ""); - ASSERT_EQ(file8_info.largest_key, ""); - ASSERT_EQ(file8_info.num_range_del_entries, 1); - ASSERT_EQ(file8_info.smallest_range_del_key, Key(600)); - ASSERT_EQ(file8_info.largest_range_del_key, Key(700)); - - // Cannot create an empty sst file - std::string file_empty = sst_files_dir_ + "file_empty.sst"; - ExternalSstFileInfo file_empty_info; - ASSERT_NOK(sst_file_writer.Finish(&file_empty_info)); - - DestroyAndReopen(options); - // Add file using file path - ASSERT_OK(DeprecatedAddFile({file1})); - ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); - for (int k = 0; k < 100; k++) { - ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); - } - - // Add file while holding a snapshot will fail - const Snapshot* s1 = db_->GetSnapshot(); - if (s1 != nullptr) { - ASSERT_NOK(DeprecatedAddFile({file2})); - db_->ReleaseSnapshot(s1); - } - // We can add the file after releaseing the snapshot - ASSERT_OK(DeprecatedAddFile({file2})); - - ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); - for (int k = 0; k < 200; k++) { - ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); - } - - // This file has overlapping values with the existing data - ASSERT_NOK(DeprecatedAddFile({file3})); - - // This file has overlapping values with the existing data - ASSERT_NOK(DeprecatedAddFile({file4})); - - // Overwrite values of keys divisible by 5 - for (int k = 0; k < 200; k += 5) { - ASSERT_OK(Put(Key(k), Key(k) + "_val_new")); - } - ASSERT_NE(db_->GetLatestSequenceNumber(), 0U); - - // Key range of file5 (400 => 499) don't overlap with any keys in DB - ASSERT_OK(DeprecatedAddFile({file5})); - - // This file has overlapping values with the existing data - ASSERT_NOK(DeprecatedAddFile({file6})); - - // Key range of file7 (500 => 598) don't overlap with any keys in DB - ASSERT_OK(DeprecatedAddFile({file7})); - - // Key range of file7 (600 => 700) don't overlap with any keys in DB - ASSERT_OK(DeprecatedAddFile({file8})); - - // Make sure values are correct before and after flush/compaction - for (int i = 0; i < 2; i++) { - for (int k = 0; k < 200; k++) { - std::string value = Key(k) + "_val"; - if (k % 5 == 0) { - value += "_new"; - } - ASSERT_EQ(Get(Key(k)), value); - } - for (int k = 400; k < 500; k++) { - std::string value = Key(k) + "_val"; - ASSERT_EQ(Get(Key(k)), value); - } - for (int k = 500; k < 600; k++) { - std::string value = Key(k) + "_val"; - if (k < 520 || k % 2 == 1) { - value = "NOT_FOUND"; - } - ASSERT_EQ(Get(Key(k)), value); - } - ASSERT_OK(Flush()); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - } - - Close(); - options.disable_auto_compactions = true; - Reopen(options); - - // Delete keys in range (400 => 499) - for (int k = 400; k < 500; k++) { - ASSERT_OK(Delete(Key(k))); - } - // We deleted range (400 => 499) but cannot add file5 because - // of the range tombstones - ASSERT_NOK(DeprecatedAddFile({file5})); - - // Compacting the DB will remove the tombstones - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - // Now we can add the file - ASSERT_OK(DeprecatedAddFile({file5})); - - // Verify values of file5 in DB - for (int k = 400; k < 500; k++) { - std::string value = Key(k) + "_val"; - ASSERT_EQ(Get(Key(k)), value); - } - DestroyAndRecreateExternalSSTFilesDir(); - } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction | - kRangeDelSkipConfigs)); -} - -class SstFileWriterCollector : public TablePropertiesCollector { - public: - explicit SstFileWriterCollector(const std::string prefix) : prefix_(prefix) { - name_ = prefix_ + "_SstFileWriterCollector"; - } - - const char* Name() const override { return name_.c_str(); } - - Status Finish(UserCollectedProperties* properties) override { - std::string count = std::to_string(count_); - *properties = UserCollectedProperties{ - {prefix_ + "_SstFileWriterCollector", "YES"}, - {prefix_ + "_Count", count}, - }; - return Status::OK(); - } - - Status AddUserKey(const Slice& /*user_key*/, const Slice& /*value*/, - EntryType /*type*/, SequenceNumber /*seq*/, - uint64_t /*file_size*/) override { - ++count_; - return Status::OK(); - } - - UserCollectedProperties GetReadableProperties() const override { - return UserCollectedProperties{}; - } - - private: - uint32_t count_ = 0; - std::string prefix_; - std::string name_; -}; - -class SstFileWriterCollectorFactory : public TablePropertiesCollectorFactory { - public: - explicit SstFileWriterCollectorFactory(std::string prefix) - : prefix_(prefix), num_created_(0) {} - TablePropertiesCollector* CreateTablePropertiesCollector( - TablePropertiesCollectorFactory::Context /*context*/) override { - num_created_++; - return new SstFileWriterCollector(prefix_); - } - const char* Name() const override { return "SstFileWriterCollectorFactory"; } - - std::string prefix_; - uint32_t num_created_; -}; - -TEST_F(ExternalSSTFileTest, AddList) { - do { - Options options = CurrentOptions(); - - auto abc_collector = std::make_shared("abc"); - auto xyz_collector = std::make_shared("xyz"); - - options.table_properties_collector_factories.emplace_back(abc_collector); - options.table_properties_collector_factories.emplace_back(xyz_collector); - - SstFileWriter sst_file_writer(EnvOptions(), options); - - // file1.sst (0 => 99) - std::string file1 = sst_files_dir_ + "file1.sst"; - ASSERT_OK(sst_file_writer.Open(file1)); - for (int k = 0; k < 100; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file1_info; - ASSERT_OK(sst_file_writer.Finish(&file1_info)); - ASSERT_EQ(file1_info.file_path, file1); - ASSERT_EQ(file1_info.num_entries, 100); - ASSERT_EQ(file1_info.smallest_key, Key(0)); - ASSERT_EQ(file1_info.largest_key, Key(99)); - // sst_file_writer already finished, cannot add this value - ASSERT_NOK(sst_file_writer.Put(Key(100), "bad_val")); - - // file2.sst (100 => 199) - std::string file2 = sst_files_dir_ + "file2.sst"; - ASSERT_OK(sst_file_writer.Open(file2)); - for (int k = 100; k < 200; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - // Cannot add this key because it's not after last added key - ASSERT_NOK(sst_file_writer.Put(Key(99), "bad_val")); - ExternalSstFileInfo file2_info; - ASSERT_OK(sst_file_writer.Finish(&file2_info)); - ASSERT_EQ(file2_info.file_path, file2); - ASSERT_EQ(file2_info.num_entries, 100); - ASSERT_EQ(file2_info.smallest_key, Key(100)); - ASSERT_EQ(file2_info.largest_key, Key(199)); - - // file3.sst (195 => 199) - // This file values overlap with file2 values - std::string file3 = sst_files_dir_ + "file3.sst"; - ASSERT_OK(sst_file_writer.Open(file3)); - for (int k = 195; k < 200; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); - } - ExternalSstFileInfo file3_info; - ASSERT_OK(sst_file_writer.Finish(&file3_info)); - ASSERT_EQ(file3_info.file_path, file3); - ASSERT_EQ(file3_info.num_entries, 5); - ASSERT_EQ(file3_info.smallest_key, Key(195)); - ASSERT_EQ(file3_info.largest_key, Key(199)); - - // file4.sst (30 => 39) - // This file values overlap with file1 values - std::string file4 = sst_files_dir_ + "file4.sst"; - ASSERT_OK(sst_file_writer.Open(file4)); - for (int k = 30; k < 40; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); - } - ExternalSstFileInfo file4_info; - ASSERT_OK(sst_file_writer.Finish(&file4_info)); - ASSERT_EQ(file4_info.file_path, file4); - ASSERT_EQ(file4_info.num_entries, 10); - ASSERT_EQ(file4_info.smallest_key, Key(30)); - ASSERT_EQ(file4_info.largest_key, Key(39)); - - // file5.sst (200 => 299) - std::string file5 = sst_files_dir_ + "file5.sst"; - ASSERT_OK(sst_file_writer.Open(file5)); - for (int k = 200; k < 300; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file5_info; - ASSERT_OK(sst_file_writer.Finish(&file5_info)); - ASSERT_EQ(file5_info.file_path, file5); - ASSERT_EQ(file5_info.num_entries, 100); - ASSERT_EQ(file5_info.smallest_key, Key(200)); - ASSERT_EQ(file5_info.largest_key, Key(299)); - - // file6.sst (delete 0 => 100) - std::string file6 = sst_files_dir_ + "file6.sst"; - ASSERT_OK(sst_file_writer.Open(file6)); - ASSERT_OK(sst_file_writer.DeleteRange(Key(0), Key(75))); - ASSERT_OK(sst_file_writer.DeleteRange(Key(25), Key(100))); - ExternalSstFileInfo file6_info; - ASSERT_OK(sst_file_writer.Finish(&file6_info)); - ASSERT_EQ(file6_info.file_path, file6); - ASSERT_EQ(file6_info.num_entries, 0); - ASSERT_EQ(file6_info.smallest_key, ""); - ASSERT_EQ(file6_info.largest_key, ""); - ASSERT_EQ(file6_info.num_range_del_entries, 2); - ASSERT_EQ(file6_info.smallest_range_del_key, Key(0)); - ASSERT_EQ(file6_info.largest_range_del_key, Key(100)); - - // file7.sst (delete 99 => 201) - std::string file7 = sst_files_dir_ + "file7.sst"; - ASSERT_OK(sst_file_writer.Open(file7)); - ASSERT_OK(sst_file_writer.DeleteRange(Key(99), Key(201))); - ExternalSstFileInfo file7_info; - ASSERT_OK(sst_file_writer.Finish(&file7_info)); - ASSERT_EQ(file7_info.file_path, file7); - ASSERT_EQ(file7_info.num_entries, 0); - ASSERT_EQ(file7_info.smallest_key, ""); - ASSERT_EQ(file7_info.largest_key, ""); - ASSERT_EQ(file7_info.num_range_del_entries, 1); - ASSERT_EQ(file7_info.smallest_range_del_key, Key(99)); - ASSERT_EQ(file7_info.largest_range_del_key, Key(201)); - - // list 1 has internal key range conflict - std::vector file_list0({file1, file2}); - std::vector file_list1({file3, file2, file1}); - std::vector file_list2({file5}); - std::vector file_list3({file3, file4}); - std::vector file_list4({file5, file7}); - std::vector file_list5({file6, file7}); - - DestroyAndReopen(options); - - // These lists of files have key ranges that overlap with each other - ASSERT_NOK(DeprecatedAddFile(file_list1)); - // Both of the following overlap on the range deletion tombstone. - ASSERT_NOK(DeprecatedAddFile(file_list4)); - ASSERT_NOK(DeprecatedAddFile(file_list5)); - - // Add files using file path list - ASSERT_OK(DeprecatedAddFile(file_list0)); - ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); - for (int k = 0; k < 200; k++) { - ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); - } - - TablePropertiesCollection props; - ASSERT_OK(db_->GetPropertiesOfAllTables(&props)); - ASSERT_EQ(props.size(), 2); - for (auto file_props : props) { - auto user_props = file_props.second->user_collected_properties; - ASSERT_EQ(user_props["abc_SstFileWriterCollector"], "YES"); - ASSERT_EQ(user_props["xyz_SstFileWriterCollector"], "YES"); - ASSERT_EQ(user_props["abc_Count"], "100"); - ASSERT_EQ(user_props["xyz_Count"], "100"); - } - - // Add file while holding a snapshot will fail - const Snapshot* s1 = db_->GetSnapshot(); - if (s1 != nullptr) { - ASSERT_NOK(DeprecatedAddFile(file_list2)); - db_->ReleaseSnapshot(s1); - } - // We can add the file after releaseing the snapshot - ASSERT_OK(DeprecatedAddFile(file_list2)); - ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); - for (int k = 0; k < 300; k++) { - ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); - } - - ASSERT_OK(db_->GetPropertiesOfAllTables(&props)); - ASSERT_EQ(props.size(), 3); - for (auto file_props : props) { - auto user_props = file_props.second->user_collected_properties; - ASSERT_EQ(user_props["abc_SstFileWriterCollector"], "YES"); - ASSERT_EQ(user_props["xyz_SstFileWriterCollector"], "YES"); - ASSERT_EQ(user_props["abc_Count"], "100"); - ASSERT_EQ(user_props["xyz_Count"], "100"); - } - - // This file list has overlapping values with the existing data - ASSERT_NOK(DeprecatedAddFile(file_list3)); - - // Overwrite values of keys divisible by 5 - for (int k = 0; k < 200; k += 5) { - ASSERT_OK(Put(Key(k), Key(k) + "_val_new")); - } - ASSERT_NE(db_->GetLatestSequenceNumber(), 0U); - - // Make sure values are correct before and after flush/compaction - for (int i = 0; i < 2; i++) { - for (int k = 0; k < 200; k++) { - std::string value = Key(k) + "_val"; - if (k % 5 == 0) { - value += "_new"; - } - ASSERT_EQ(Get(Key(k)), value); - } - for (int k = 200; k < 300; k++) { - std::string value = Key(k) + "_val"; - ASSERT_EQ(Get(Key(k)), value); - } - ASSERT_OK(Flush()); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - } - - // Delete keys in range (200 => 299) - for (int k = 200; k < 300; k++) { - ASSERT_OK(Delete(Key(k))); - } - // We deleted range (200 => 299) but cannot add file5 because - // of the range tombstones - ASSERT_NOK(DeprecatedAddFile(file_list2)); - - // Compacting the DB will remove the tombstones - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - // Now we can add the file - ASSERT_OK(DeprecatedAddFile(file_list2)); - - // Verify values of file5 in DB - for (int k = 200; k < 300; k++) { - std::string value = Key(k) + "_val"; - ASSERT_EQ(Get(Key(k)), value); - } - DestroyAndRecreateExternalSSTFilesDir(); - } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction | - kRangeDelSkipConfigs)); -} - -TEST_F(ExternalSSTFileTest, AddListAtomicity) { - do { - Options options = CurrentOptions(); - - SstFileWriter sst_file_writer(EnvOptions(), options); - - // files[0].sst (0 => 99) - // files[1].sst (100 => 199) - // ... - // file[8].sst (800 => 899) - int n = 9; - std::vector files(n); - std::vector files_info(n); - for (int i = 0; i < n; i++) { - files[i] = sst_files_dir_ + "file" + std::to_string(i) + ".sst"; - ASSERT_OK(sst_file_writer.Open(files[i])); - for (int k = i * 100; k < (i + 1) * 100; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ASSERT_OK(sst_file_writer.Finish(&files_info[i])); - ASSERT_EQ(files_info[i].file_path, files[i]); - ASSERT_EQ(files_info[i].num_entries, 100); - ASSERT_EQ(files_info[i].smallest_key, Key(i * 100)); - ASSERT_EQ(files_info[i].largest_key, Key((i + 1) * 100 - 1)); - } - files.push_back(sst_files_dir_ + "file" + std::to_string(n) + ".sst"); - ASSERT_NOK(DeprecatedAddFile(files)); - for (int k = 0; k < n * 100; k++) { - ASSERT_EQ("NOT_FOUND", Get(Key(k))); - } - files.pop_back(); - ASSERT_OK(DeprecatedAddFile(files)); - for (int k = 0; k < n * 100; k++) { - std::string value = Key(k) + "_val"; - ASSERT_EQ(Get(Key(k)), value); - } - DestroyAndRecreateExternalSSTFilesDir(); - } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction)); -} -// This test reporduce a bug that can happen in some cases if the DB started -// purging obsolete files when we are adding an external sst file. -// This situation may result in deleting the file while it's being added. -TEST_F(ExternalSSTFileTest, PurgeObsoleteFilesBug) { - Options options = CurrentOptions(); - SstFileWriter sst_file_writer(EnvOptions(), options); - - // file1.sst (0 => 500) - std::string sst_file_path = sst_files_dir_ + "file1.sst"; - ASSERT_OK(sst_file_writer.Open(sst_file_path)); - for (int i = 0; i < 500; i++) { - std::string k = Key(i); - ASSERT_OK(sst_file_writer.Put(k, k + "_val")); - } - - ExternalSstFileInfo sst_file_info; - ASSERT_OK(sst_file_writer.Finish(&sst_file_info)); - - options.delete_obsolete_files_period_micros = 0; - options.disable_auto_compactions = true; - DestroyAndReopen(options); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "ExternalSstFileIngestionJob::Prepare:FileAdded", [&](void* /* arg */) { - ASSERT_OK(Put("aaa", "bbb")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("aaa", "xxx")); - ASSERT_OK(Flush()); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(DeprecatedAddFile({sst_file_path})); - - for (int i = 0; i < 500; i++) { - std::string k = Key(i); - std::string v = k + "_val"; - ASSERT_EQ(Get(k), v); - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(ExternalSSTFileTest, SkipSnapshot) { - Options options = CurrentOptions(); - - SstFileWriter sst_file_writer(EnvOptions(), options); - - // file1.sst (0 => 99) - std::string file1 = sst_files_dir_ + "file1.sst"; - ASSERT_OK(sst_file_writer.Open(file1)); - for (int k = 0; k < 100; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file1_info; - ASSERT_OK(sst_file_writer.Finish(&file1_info)); - ASSERT_EQ(file1_info.file_path, file1); - ASSERT_EQ(file1_info.num_entries, 100); - ASSERT_EQ(file1_info.smallest_key, Key(0)); - ASSERT_EQ(file1_info.largest_key, Key(99)); - - // file2.sst (100 => 299) - std::string file2 = sst_files_dir_ + "file2.sst"; - ASSERT_OK(sst_file_writer.Open(file2)); - for (int k = 100; k < 300; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file2_info; - ASSERT_OK(sst_file_writer.Finish(&file2_info)); - ASSERT_EQ(file2_info.file_path, file2); - ASSERT_EQ(file2_info.num_entries, 200); - ASSERT_EQ(file2_info.smallest_key, Key(100)); - ASSERT_EQ(file2_info.largest_key, Key(299)); - - ASSERT_OK(DeprecatedAddFile({file1})); - - // Add file will fail when holding snapshot and use the default - // skip_snapshot_check to false - const Snapshot* s1 = db_->GetSnapshot(); - if (s1 != nullptr) { - ASSERT_NOK(DeprecatedAddFile({file2})); - } - - // Add file will success when set skip_snapshot_check to true even db holding - // snapshot - if (s1 != nullptr) { - ASSERT_OK(DeprecatedAddFile({file2}, false, true)); - db_->ReleaseSnapshot(s1); - } - - // file3.sst (300 => 399) - std::string file3 = sst_files_dir_ + "file3.sst"; - ASSERT_OK(sst_file_writer.Open(file3)); - for (int k = 300; k < 400; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); - } - ExternalSstFileInfo file3_info; - ASSERT_OK(sst_file_writer.Finish(&file3_info)); - ASSERT_EQ(file3_info.file_path, file3); - ASSERT_EQ(file3_info.num_entries, 100); - ASSERT_EQ(file3_info.smallest_key, Key(300)); - ASSERT_EQ(file3_info.largest_key, Key(399)); - - // check that we have change the old key - ASSERT_EQ(Get(Key(300)), "NOT_FOUND"); - const Snapshot* s2 = db_->GetSnapshot(); - ASSERT_OK(DeprecatedAddFile({file3}, false, true)); - ASSERT_EQ(Get(Key(300)), Key(300) + ("_val")); - ASSERT_EQ(Get(Key(300), s2), Key(300) + ("_val")); - - db_->ReleaseSnapshot(s2); -} - -TEST_F(ExternalSSTFileTest, MultiThreaded) { - env_->skip_fsync_ = true; - // Bulk load 10 files every file contain 1000 keys - int num_files = 10; - int keys_per_file = 1000; - - // Generate file names - std::vector file_names; - for (int i = 0; i < num_files; i++) { - std::string file_name = "file_" + std::to_string(i) + ".sst"; - file_names.push_back(sst_files_dir_ + file_name); - } - - do { - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - std::atomic thread_num(0); - std::function write_file_func = [&]() { - int file_idx = thread_num.fetch_add(1); - int range_start = file_idx * keys_per_file; - int range_end = range_start + keys_per_file; - - SstFileWriter sst_file_writer(EnvOptions(), options); - - ASSERT_OK(sst_file_writer.Open(file_names[file_idx])); - - for (int k = range_start; k < range_end; k++) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k))); - } - - ASSERT_OK(sst_file_writer.Finish()); - }; - // Write num_files files in parallel - std::vector sst_writer_threads; - for (int i = 0; i < num_files; ++i) { - sst_writer_threads.emplace_back(write_file_func); - } - - for (auto& t : sst_writer_threads) { - t.join(); - } - - fprintf(stderr, "Wrote %d files (%d keys)\n", num_files, - num_files * keys_per_file); - - thread_num.store(0); - std::atomic files_added(0); - // Thread 0 -> Load {f0,f1} - // Thread 1 -> Load {f0,f1} - // Thread 2 -> Load {f2,f3} - // Thread 3 -> Load {f2,f3} - // Thread 4 -> Load {f4,f5} - // Thread 5 -> Load {f4,f5} - // ... - std::function load_file_func = [&]() { - // We intentionally add every file twice, and assert that it was added - // only once and the other add failed - int thread_id = thread_num.fetch_add(1); - int file_idx = (thread_id / 2) * 2; - // sometimes we use copy, sometimes link .. the result should be the same - bool move_file = (thread_id % 3 == 0); - - std::vector files_to_add; - - files_to_add = {file_names[file_idx]}; - if (static_cast(file_idx + 1) < file_names.size()) { - files_to_add.push_back(file_names[file_idx + 1]); - } - - Status s = DeprecatedAddFile(files_to_add, move_file); - if (s.ok()) { - files_added += static_cast(files_to_add.size()); - } - }; - - // Bulk load num_files files in parallel - std::vector add_file_threads; - DestroyAndReopen(options); - for (int i = 0; i < num_files; ++i) { - add_file_threads.emplace_back(load_file_func); - } - - for (auto& t : add_file_threads) { - t.join(); - } - ASSERT_EQ(files_added.load(), num_files); - fprintf(stderr, "Loaded %d files (%d keys)\n", num_files, - num_files * keys_per_file); - - // Overwrite values of keys divisible by 100 - for (int k = 0; k < num_files * keys_per_file; k += 100) { - std::string key = Key(k); - ASSERT_OK(Put(key, key + "_new")); - } - - for (int i = 0; i < 2; i++) { - // Make sure the values are correct before and after flush/compaction - for (int k = 0; k < num_files * keys_per_file; ++k) { - std::string key = Key(k); - std::string value = (k % 100 == 0) ? (key + "_new") : key; - ASSERT_EQ(Get(key), value); - } - ASSERT_OK(Flush()); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - } - - fprintf(stderr, "Verified %d values\n", num_files * keys_per_file); - DestroyAndRecreateExternalSSTFilesDir(); - } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction)); -} - -TEST_F(ExternalSSTFileTest, OverlappingRanges) { - env_->skip_fsync_ = true; - Random rnd(301); - SequenceNumber assigned_seqno = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "ExternalSstFileIngestionJob::Run", [&assigned_seqno](void* arg) { - ASSERT_TRUE(arg != nullptr); - assigned_seqno = *(static_cast(arg)); - }); - bool need_flush = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::IngestExternalFile:NeedFlush", [&need_flush](void* arg) { - ASSERT_TRUE(arg != nullptr); - need_flush = *(static_cast(arg)); - }); - bool overlap_with_db = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile", - [&overlap_with_db](void* arg) { - ASSERT_TRUE(arg != nullptr); - overlap_with_db = *(static_cast(arg)); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - do { - Options options = CurrentOptions(); - env_->skip_fsync_ = true; - DestroyAndReopen(options); - - SstFileWriter sst_file_writer(EnvOptions(), options); - - printf("Option config = %d\n", option_config_); - std::vector> key_ranges; - for (int i = 0; i < 100; i++) { - int range_start = rnd.Uniform(20000); - int keys_per_range = 10 + rnd.Uniform(41); - - key_ranges.emplace_back(range_start, range_start + keys_per_range); - } - - int memtable_add = 0; - int success_add_file = 0; - int failed_add_file = 0; - std::map true_data; - for (size_t i = 0; i < key_ranges.size(); i++) { - int range_start = key_ranges[i].first; - int range_end = key_ranges[i].second; - - Status s; - std::string range_val = "range_" + std::to_string(i); - - // For 20% of ranges we use DB::Put, for 80% we use DB::AddFile - if (i && i % 5 == 0) { - // Use DB::Put to insert range (insert into memtable) - range_val += "_put"; - for (int k = range_start; k <= range_end; k++) { - s = Put(Key(k), range_val); - ASSERT_OK(s); - } - memtable_add++; - } else { - // Use DB::AddFile to insert range - range_val += "_add_file"; - - // Generate the file containing the range - std::string file_name = sst_files_dir_ + env_->GenerateUniqueId(); - s = sst_file_writer.Open(file_name); - ASSERT_OK(s); - for (int k = range_start; k <= range_end; k++) { - s = sst_file_writer.Put(Key(k), range_val); - ASSERT_OK(s); - } - ExternalSstFileInfo file_info; - s = sst_file_writer.Finish(&file_info); - ASSERT_OK(s); - - // Insert the generated file - s = DeprecatedAddFile({file_name}); - auto it = true_data.lower_bound(Key(range_start)); - if (option_config_ != kUniversalCompaction && - option_config_ != kUniversalCompactionMultiLevel && - option_config_ != kUniversalSubcompactions) { - if (it != true_data.end() && it->first <= Key(range_end)) { - // This range overlap with data already exist in DB - ASSERT_NOK(s); - failed_add_file++; - } else { - ASSERT_OK(s); - success_add_file++; - } - } else { - if ((it != true_data.end() && it->first <= Key(range_end)) || - need_flush || assigned_seqno > 0 || overlap_with_db) { - // This range overlap with data already exist in DB - ASSERT_NOK(s); - failed_add_file++; - } else { - ASSERT_OK(s); - success_add_file++; - } - } - } - - if (s.ok()) { - // Update true_data map to include the new inserted data - for (int k = range_start; k <= range_end; k++) { - true_data[Key(k)] = range_val; - } - } - - // Flush / Compact the DB - if (i && i % 50 == 0) { - ASSERT_OK(Flush()); - } - if (i && i % 75 == 0) { - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - } - } - - printf("Total: %" ROCKSDB_PRIszt - " ranges\n" - "AddFile()|Success: %d ranges\n" - "AddFile()|RangeConflict: %d ranges\n" - "Put(): %d ranges\n", - key_ranges.size(), success_add_file, failed_add_file, memtable_add); - - // Verify the correctness of the data - for (const auto& kv : true_data) { - ASSERT_EQ(Get(kv.first), kv.second); - } - printf("keys/values verified\n"); - DestroyAndRecreateExternalSSTFilesDir(); - } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction)); -} - -TEST_P(ExternalSSTFileTest, PickedLevel) { - env_->skip_fsync_ = true; - Options options = CurrentOptions(); - options.disable_auto_compactions = false; - options.level0_file_num_compaction_trigger = 4; - options.num_levels = 4; - DestroyAndReopen(options); - - std::map true_data; - - // File 0 will go to last level (L3) - ASSERT_OK(GenerateAndAddExternalFile(options, {1, 10}, -1, false, false, true, - false, false, &true_data)); - EXPECT_EQ(FilesPerLevel(), "0,0,0,1"); - - // File 1 will go to level L2 (since it overlap with file 0 in L3) - ASSERT_OK(GenerateAndAddExternalFile(options, {2, 9}, -1, false, false, true, - false, false, &true_data)); - EXPECT_EQ(FilesPerLevel(), "0,0,1,1"); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"ExternalSSTFileTest::PickedLevel:0", "BackgroundCallCompaction:0"}, - {"DBImpl::BackgroundCompaction:Start", - "ExternalSSTFileTest::PickedLevel:1"}, - {"ExternalSSTFileTest::PickedLevel:2", - "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Flush 4 files containing the same keys - for (int i = 0; i < 4; i++) { - ASSERT_OK(Put(Key(3), Key(3) + "put")); - ASSERT_OK(Put(Key(8), Key(8) + "put")); - true_data[Key(3)] = Key(3) + "put"; - true_data[Key(8)] = Key(8) + "put"; - ASSERT_OK(Flush()); - } - - // Wait for BackgroundCompaction() to be called - TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevel:0"); - TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevel:1"); - - EXPECT_EQ(FilesPerLevel(), "4,0,1,1"); - - // This file overlaps with file 0 (L3), file 1 (L2) and the - // output of compaction going to L1 - ASSERT_OK(GenerateAndAddExternalFile(options, {4, 7}, -1, - true /* allow_global_seqno */, false, - true, false, false, &true_data)); - EXPECT_EQ(FilesPerLevel(), "5,0,1,1"); - - // This file does not overlap with any file or with the running compaction - ASSERT_OK(GenerateAndAddExternalFile(options, {9000, 9001}, -1, false, false, - false, false, false, &true_data)); - EXPECT_EQ(FilesPerLevel(), "5,0,1,2"); - - // Hold compaction from finishing - TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevel:2"); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - EXPECT_EQ(FilesPerLevel(), "1,1,1,2"); - - size_t kcnt = 0; - VerifyDBFromMap(true_data, &kcnt, false); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(ExternalSSTFileTest, IngestNonExistingFile) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - - Status s = db_->IngestExternalFile({"non_existing_file"}, - IngestExternalFileOptions()); - ASSERT_NOK(s); - - // Verify file deletion is not impacted (verify a bug fix) - ASSERT_OK(Put(Key(1), Key(1))); - ASSERT_OK(Put(Key(9), Key(9))); - ASSERT_OK(Flush()); - - ASSERT_OK(Put(Key(1), Key(1))); - ASSERT_OK(Put(Key(9), Key(9))); - ASSERT_OK(Flush()); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); - - // After full compaction, there should be only 1 file. - std::vector files; - ASSERT_OK(env_->GetChildren(dbname_, &files)); - int num_sst_files = 0; - for (auto& f : files) { - uint64_t number; - FileType type; - if (ParseFileName(f, &number, &type) && type == kTableFile) { - num_sst_files++; - } - } - ASSERT_EQ(1, num_sst_files); -} - -#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -TEST_F(ExternalSSTFileTest, CompactDuringAddFileRandom) { - env_->skip_fsync_ = true; - Options options = CurrentOptions(); - options.disable_auto_compactions = false; - options.level0_file_num_compaction_trigger = 2; - options.num_levels = 2; - DestroyAndReopen(options); - - std::function bg_compact = [&]() { - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - }; - - int range_id = 0; - std::vector file_keys; - std::function bg_addfile = [&]() { - ASSERT_OK(GenerateAndAddExternalFile(options, file_keys, range_id, - true /* allow_global_seqno */)); - }; - - const int num_of_ranges = 1000; - std::vector threads; - while (range_id < num_of_ranges) { - int range_start = range_id * 10; - int range_end = range_start + 10; - - file_keys.clear(); - for (int k = range_start + 1; k < range_end; k++) { - file_keys.push_back(k); - } - ASSERT_OK(Put(Key(range_start), Key(range_start))); - ASSERT_OK(Put(Key(range_end), Key(range_end))); - ASSERT_OK(Flush()); - - if (range_id % 10 == 0) { - threads.emplace_back(bg_compact); - } - threads.emplace_back(bg_addfile); - - for (auto& t : threads) { - t.join(); - } - threads.clear(); - - range_id++; - } - - for (int rid = 0; rid < num_of_ranges; rid++) { - int range_start = rid * 10; - int range_end = range_start + 10; - - ASSERT_EQ(Get(Key(range_start)), Key(range_start)) << rid; - ASSERT_EQ(Get(Key(range_end)), Key(range_end)) << rid; - for (int k = range_start + 1; k < range_end; k++) { - std::string v = Key(k) + std::to_string(rid); - ASSERT_EQ(Get(Key(k)), v) << rid; - } - } -} -#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) - -TEST_F(ExternalSSTFileTest, PickedLevelDynamic) { - env_->skip_fsync_ = true; - Options options = CurrentOptions(); - options.disable_auto_compactions = false; - options.level0_file_num_compaction_trigger = 4; - options.level_compaction_dynamic_level_bytes = true; - options.num_levels = 4; - DestroyAndReopen(options); - std::map true_data; - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"ExternalSSTFileTest::PickedLevelDynamic:0", - "BackgroundCallCompaction:0"}, - {"DBImpl::BackgroundCompaction:Start", - "ExternalSSTFileTest::PickedLevelDynamic:1"}, - {"ExternalSSTFileTest::PickedLevelDynamic:2", - "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Flush 4 files containing the same keys - for (int i = 0; i < 4; i++) { - for (int k = 20; k <= 30; k++) { - ASSERT_OK(Put(Key(k), Key(k) + "put")); - true_data[Key(k)] = Key(k) + "put"; - } - for (int k = 50; k <= 60; k++) { - ASSERT_OK(Put(Key(k), Key(k) + "put")); - true_data[Key(k)] = Key(k) + "put"; - } - ASSERT_OK(Flush()); - } - - // Wait for BackgroundCompaction() to be called - TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevelDynamic:0"); - TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevelDynamic:1"); - - // This file overlaps with the output of the compaction (going to L3) - // so the file will be added to L0 since L3 is the base level - ASSERT_OK(GenerateAndAddExternalFile(options, {31, 32, 33, 34}, -1, - true /* allow_global_seqno */, false, - true, false, false, &true_data)); - EXPECT_EQ(FilesPerLevel(), "5"); - - // This file does not overlap with the current running compactiong - ASSERT_OK(GenerateAndAddExternalFile(options, {9000, 9001}, -1, false, false, - true, false, false, &true_data)); - EXPECT_EQ(FilesPerLevel(), "5,0,0,1"); - - // Hold compaction from finishing - TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevelDynamic:2"); - - // Output of the compaction will go to L3 - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - EXPECT_EQ(FilesPerLevel(), "1,0,0,2"); - - Close(); - options.disable_auto_compactions = true; - Reopen(options); - - ASSERT_OK(GenerateAndAddExternalFile(options, {1, 15, 19}, -1, false, false, - true, false, false, &true_data)); - ASSERT_EQ(FilesPerLevel(), "1,0,0,3"); - - ASSERT_OK(GenerateAndAddExternalFile(options, {1000, 1001, 1002}, -1, false, - false, true, false, false, &true_data)); - ASSERT_EQ(FilesPerLevel(), "1,0,0,4"); - - ASSERT_OK(GenerateAndAddExternalFile(options, {500, 600, 700}, -1, false, - false, true, false, false, &true_data)); - ASSERT_EQ(FilesPerLevel(), "1,0,0,5"); - - // File 5 overlaps with file 2 (L3 / base level) - ASSERT_OK(GenerateAndAddExternalFile(options, {2, 10}, -1, false, false, true, - false, false, &true_data)); - ASSERT_EQ(FilesPerLevel(), "2,0,0,5"); - - // File 6 overlaps with file 2 (L3 / base level) and file 5 (L0) - ASSERT_OK(GenerateAndAddExternalFile(options, {3, 9}, -1, false, false, true, - false, false, &true_data)); - ASSERT_EQ(FilesPerLevel(), "3,0,0,5"); - - // Verify data in files - size_t kcnt = 0; - VerifyDBFromMap(true_data, &kcnt, false); - - // Write range [5 => 10] to L0 - for (int i = 5; i <= 10; i++) { - std::string k = Key(i); - std::string v = k + "put"; - ASSERT_OK(Put(k, v)); - true_data[k] = v; - } - ASSERT_OK(Flush()); - ASSERT_EQ(FilesPerLevel(), "4,0,0,5"); - - // File 7 overlaps with file 4 (L3) - ASSERT_OK(GenerateAndAddExternalFile(options, {650, 651, 652}, -1, false, - false, true, false, false, &true_data)); - ASSERT_EQ(FilesPerLevel(), "5,0,0,5"); - - VerifyDBFromMap(true_data, &kcnt, false); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(ExternalSSTFileTest, AddExternalSstFileWithCustomCompartor) { - Options options = CurrentOptions(); - options.comparator = ReverseBytewiseComparator(); - DestroyAndReopen(options); - - SstFileWriter sst_file_writer(EnvOptions(), options); - - // Generate files with these key ranges - // {14 -> 0} - // {24 -> 10} - // {34 -> 20} - // {44 -> 30} - // .. - std::vector generated_files; - for (int i = 0; i < 10; i++) { - std::string file_name = sst_files_dir_ + env_->GenerateUniqueId(); - ASSERT_OK(sst_file_writer.Open(file_name)); - - int range_end = i * 10; - int range_start = range_end + 15; - for (int k = (range_start - 1); k >= range_end; k--) { - ASSERT_OK(sst_file_writer.Put(Key(k), Key(k))); - } - ExternalSstFileInfo file_info; - ASSERT_OK(sst_file_writer.Finish(&file_info)); - generated_files.push_back(file_name); - } - - std::vector in_files; - - // These 2nd and 3rd files overlap with each other - in_files = {generated_files[0], generated_files[4], generated_files[5], - generated_files[7]}; - ASSERT_NOK(DeprecatedAddFile(in_files)); - - // These 2 files don't overlap with each other - in_files = {generated_files[0], generated_files[2]}; - ASSERT_OK(DeprecatedAddFile(in_files)); - - // These 2 files don't overlap with each other but overlap with keys in DB - in_files = {generated_files[3], generated_files[7]}; - ASSERT_NOK(DeprecatedAddFile(in_files)); - - // Files don't overlap and don't overlap with DB key range - in_files = {generated_files[4], generated_files[6], generated_files[8]}; - ASSERT_OK(DeprecatedAddFile(in_files)); - - for (int i = 0; i < 100; i++) { - if (i % 20 <= 14) { - ASSERT_EQ(Get(Key(i)), Key(i)); - } else { - ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); - } - } -} - -TEST_F(ExternalSSTFileTest, AddFileTrivialMoveBug) { - Options options = CurrentOptions(); - options.num_levels = 3; - options.IncreaseParallelism(20); - DestroyAndReopen(options); - - ASSERT_OK(GenerateAndAddExternalFile(options, {1, 4}, 1)); // L3 - ASSERT_OK(GenerateAndAddExternalFile(options, {2, 3}, 2)); // L2 - - ASSERT_OK(GenerateAndAddExternalFile(options, {10, 14}, 3)); // L3 - ASSERT_OK(GenerateAndAddExternalFile(options, {12, 13}, 4)); // L2 - - ASSERT_OK(GenerateAndAddExternalFile(options, {20, 24}, 5)); // L3 - ASSERT_OK(GenerateAndAddExternalFile(options, {22, 23}, 6)); // L2 - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "CompactionJob::Run():Start", [&](void* /*arg*/) { - // Fit in L3 but will overlap with the compaction output so will be - // added to L2. Prior to the fix, a compaction will then trivially move - // this file to L3 and break LSM consistency - static std::atomic called = {false}; - if (!called) { - called = true; - ASSERT_OK(dbfull()->SetOptions({{"max_bytes_for_level_base", "1"}})); - ASSERT_OK(GenerateAndAddExternalFile(options, {15, 16}, 7, - true /* allow_global_seqno */)); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - CompactRangeOptions cro; - cro.exclusive_manual_compaction = false; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(ExternalSSTFileTest, CompactAddedFiles) { - Options options = CurrentOptions(); - options.num_levels = 3; - DestroyAndReopen(options); - - ASSERT_OK(GenerateAndAddExternalFile(options, {1, 10}, 1)); // L3 - ASSERT_OK(GenerateAndAddExternalFile(options, {2, 9}, 2)); // L2 - ASSERT_OK(GenerateAndAddExternalFile(options, {3, 8}, 3)); // L1 - ASSERT_OK(GenerateAndAddExternalFile(options, {4, 7}, 4)); // L0 - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); -} - -TEST_F(ExternalSSTFileTest, SstFileWriterNonSharedKeys) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - std::string file_path = sst_files_dir_ + "/not_shared"; - SstFileWriter sst_file_writer(EnvOptions(), options); - - std::string suffix(100, 'X'); - ASSERT_OK(sst_file_writer.Open(file_path)); - ASSERT_OK(sst_file_writer.Put("A" + suffix, "VAL")); - ASSERT_OK(sst_file_writer.Put("BB" + suffix, "VAL")); - ASSERT_OK(sst_file_writer.Put("CC" + suffix, "VAL")); - ASSERT_OK(sst_file_writer.Put("CXD" + suffix, "VAL")); - ASSERT_OK(sst_file_writer.Put("CZZZ" + suffix, "VAL")); - ASSERT_OK(sst_file_writer.Put("ZAAAX" + suffix, "VAL")); - - ASSERT_OK(sst_file_writer.Finish()); - ASSERT_OK(DeprecatedAddFile({file_path})); -} - -TEST_F(ExternalSSTFileTest, WithUnorderedWrite) { - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::WriteImpl:UnorderedWriteAfterWriteWAL", - "ExternalSSTFileTest::WithUnorderedWrite:WaitWriteWAL"}, - {"DBImpl::WaitForPendingWrites:BeforeBlock", - "DBImpl::WriteImpl:BeforeUnorderedWriteMemtable"}}); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::IngestExternalFile:NeedFlush", [&](void* need_flush) { - ASSERT_TRUE(*reinterpret_cast(need_flush)); - }); - - Options options = CurrentOptions(); - options.unordered_write = true; - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "v1")); - SyncPoint::GetInstance()->EnableProcessing(); - port::Thread writer([&]() { ASSERT_OK(Put("bar", "v2")); }); - - TEST_SYNC_POINT("ExternalSSTFileTest::WithUnorderedWrite:WaitWriteWAL"); - ASSERT_OK(GenerateAndAddExternalFile(options, {{"bar", "v3"}}, -1, - true /* allow_global_seqno */)); - ASSERT_EQ(Get("bar"), "v3"); - - writer.join(); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoRandomized) { - env_->skip_fsync_ = true; - Options options = CurrentOptions(); - options.IncreaseParallelism(20); - options.level0_slowdown_writes_trigger = 256; - options.level0_stop_writes_trigger = 256; - - bool write_global_seqno = std::get<0>(GetParam()); - bool verify_checksums_before_ingest = std::get<1>(GetParam()); - for (int iter = 0; iter < 2; iter++) { - bool write_to_memtable = (iter == 0); - DestroyAndReopen(options); - - Random rnd(301); - std::map true_data; - for (int i = 0; i < 500; i++) { - std::vector> random_data; - for (int j = 0; j < 100; j++) { - std::string k = rnd.RandomString(rnd.Next() % 20); - std::string v = rnd.RandomString(rnd.Next() % 50); - random_data.emplace_back(k, v); - } - - if (write_to_memtable && rnd.OneIn(4)) { - // 25% of writes go through memtable - for (auto& entry : random_data) { - ASSERT_OK(Put(entry.first, entry.second)); - true_data[entry.first] = entry.second; - } - } else { - ASSERT_OK(GenerateAndAddExternalFile( - options, random_data, -1, true, write_global_seqno, - verify_checksums_before_ingest, false, true, &true_data)); - } - } - size_t kcnt = 0; - VerifyDBFromMap(true_data, &kcnt, false); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - VerifyDBFromMap(true_data, &kcnt, false); - } -} -#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) - -TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) { - Options options = CurrentOptions(); - options.num_levels = 5; - options.disable_auto_compactions = true; - DestroyAndReopen(options); - std::vector> file_data; - std::map true_data; - - // Insert 100 -> 200 into the memtable - for (int i = 100; i <= 200; i++) { - ASSERT_OK(Put(Key(i), "memtable")); - true_data[Key(i)] = "memtable"; - } - - // Insert 0 -> 20 using AddFile - file_data.clear(); - for (int i = 0; i <= 20; i++) { - file_data.emplace_back(Key(i), "L4"); - } - bool write_global_seqno = std::get<0>(GetParam()); - bool verify_checksums_before_ingest = std::get<1>(GetParam()); - ASSERT_OK(GenerateAndAddExternalFile( - options, file_data, -1, true, write_global_seqno, - verify_checksums_before_ingest, false, false, &true_data)); - - // This file don't overlap with anything in the DB, will go to L4 - ASSERT_EQ("0,0,0,0,1", FilesPerLevel()); - - // Insert 80 -> 130 using AddFile - file_data.clear(); - for (int i = 80; i <= 130; i++) { - file_data.emplace_back(Key(i), "L0"); - } - ASSERT_OK(GenerateAndAddExternalFile( - options, file_data, -1, true, write_global_seqno, - verify_checksums_before_ingest, false, false, &true_data)); - - // This file overlap with the memtable, so it will flush it and add - // it self to L0 - ASSERT_EQ("2,0,0,0,1", FilesPerLevel()); - - // Insert 30 -> 50 using AddFile - file_data.clear(); - for (int i = 30; i <= 50; i++) { - file_data.emplace_back(Key(i), "L4"); - } - ASSERT_OK(GenerateAndAddExternalFile( - options, file_data, -1, true, write_global_seqno, - verify_checksums_before_ingest, false, false, &true_data)); - - // This file don't overlap with anything in the DB and fit in L4 as well - ASSERT_EQ("2,0,0,0,2", FilesPerLevel()); - - // Insert 10 -> 40 using AddFile - file_data.clear(); - for (int i = 10; i <= 40; i++) { - file_data.emplace_back(Key(i), "L3"); - } - ASSERT_OK(GenerateAndAddExternalFile( - options, file_data, -1, true, write_global_seqno, - verify_checksums_before_ingest, false, false, &true_data)); - - // This file overlap with files in L4, we will ingest it in L3 - ASSERT_EQ("2,0,0,1,2", FilesPerLevel()); - - size_t kcnt = 0; - VerifyDBFromMap(true_data, &kcnt, false); -} - -TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoMemtableFlush) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - uint64_t entries_in_memtable; - std::map true_data; - - for (int k : {10, 20, 40, 80}) { - ASSERT_OK(Put(Key(k), "memtable")); - true_data[Key(k)] = "memtable"; - } - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable, - &entries_in_memtable)); - ASSERT_GE(entries_in_memtable, 1); - - bool write_global_seqno = std::get<0>(GetParam()); - bool verify_checksums_before_ingest = std::get<1>(GetParam()); - // No need for flush - ASSERT_OK(GenerateAndAddExternalFile( - options, {90, 100, 110}, -1, true, write_global_seqno, - verify_checksums_before_ingest, false, false, &true_data)); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable, - &entries_in_memtable)); - ASSERT_GE(entries_in_memtable, 1); - - // This file will flush the memtable - ASSERT_OK(GenerateAndAddExternalFile( - options, {19, 20, 21}, -1, true, write_global_seqno, - verify_checksums_before_ingest, false, false, &true_data)); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable, - &entries_in_memtable)); - ASSERT_EQ(entries_in_memtable, 0); - - for (int k : {200, 201, 205, 206}) { - ASSERT_OK(Put(Key(k), "memtable")); - true_data[Key(k)] = "memtable"; - } - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable, - &entries_in_memtable)); - ASSERT_GE(entries_in_memtable, 1); - - // No need for flush, this file keys fit between the memtable keys - ASSERT_OK(GenerateAndAddExternalFile( - options, {202, 203, 204}, -1, true, write_global_seqno, - verify_checksums_before_ingest, false, false, &true_data)); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable, - &entries_in_memtable)); - ASSERT_GE(entries_in_memtable, 1); - - // This file will flush the memtable - ASSERT_OK(GenerateAndAddExternalFile( - options, {206, 207}, -1, true, write_global_seqno, - verify_checksums_before_ingest, false, false, &true_data)); - ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable, - &entries_in_memtable)); - ASSERT_EQ(entries_in_memtable, 0); - - size_t kcnt = 0; - VerifyDBFromMap(true_data, &kcnt, false); -} - -TEST_P(ExternalSSTFileTest, L0SortingIssue) { - Options options = CurrentOptions(); - options.num_levels = 2; - DestroyAndReopen(options); - std::map true_data; - - ASSERT_OK(Put(Key(1), "memtable")); - ASSERT_OK(Put(Key(10), "memtable")); - - bool write_global_seqno = std::get<0>(GetParam()); - bool verify_checksums_before_ingest = std::get<1>(GetParam()); - // No Flush needed, No global seqno needed, Ingest in L1 - ASSERT_OK( - GenerateAndAddExternalFile(options, {7, 8}, -1, true, write_global_seqno, - verify_checksums_before_ingest, false, false)); - // No Flush needed, but need a global seqno, Ingest in L0 - ASSERT_OK( - GenerateAndAddExternalFile(options, {7, 8}, -1, true, write_global_seqno, - verify_checksums_before_ingest, false, false)); - printf("%s\n", FilesPerLevel().c_str()); - - // Overwrite what we added using external files - ASSERT_OK(Put(Key(7), "memtable")); - ASSERT_OK(Put(Key(8), "memtable")); - - // Read values from memtable - ASSERT_EQ(Get(Key(7)), "memtable"); - ASSERT_EQ(Get(Key(8)), "memtable"); - - // Flush and read from L0 - ASSERT_OK(Flush()); - printf("%s\n", FilesPerLevel().c_str()); - ASSERT_EQ(Get(Key(7)), "memtable"); - ASSERT_EQ(Get(Key(8)), "memtable"); -} - -TEST_F(ExternalSSTFileTest, CompactionDeadlock) { - Options options = CurrentOptions(); - options.num_levels = 2; - options.level0_file_num_compaction_trigger = 4; - options.level0_slowdown_writes_trigger = 4; - options.level0_stop_writes_trigger = 4; - DestroyAndReopen(options); - - // atomic conter of currently running bg threads - std::atomic running_threads(0); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"DBImpl::DelayWrite:Wait", "ExternalSSTFileTest::DeadLock:0"}, - {"ExternalSSTFileTest::DeadLock:1", "DBImpl::AddFile:Start"}, - {"DBImpl::AddFile:MutexLock", "ExternalSSTFileTest::DeadLock:2"}, - {"ExternalSSTFileTest::DeadLock:3", "BackgroundCallCompaction:0"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Start ingesting and extrnal file in the background - ROCKSDB_NAMESPACE::port::Thread bg_ingest_file([&]() { - running_threads += 1; - ASSERT_OK(GenerateAndAddExternalFile(options, {5, 6})); - running_threads -= 1; - }); - - ASSERT_OK(Put(Key(1), "memtable")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put(Key(2), "memtable")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put(Key(3), "memtable")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put(Key(4), "memtable")); - ASSERT_OK(Flush()); - - // This thread will try to insert into the memtable but since we have 4 L0 - // files this thread will be blocked and hold the writer thread - ROCKSDB_NAMESPACE::port::Thread bg_block_put([&]() { - running_threads += 1; - ASSERT_OK(Put(Key(10), "memtable")); - running_threads -= 1; - }); - - // Make sure DelayWrite is called first - TEST_SYNC_POINT("ExternalSSTFileTest::DeadLock:0"); - - // `DBImpl::AddFile:Start` will wait until we be here - TEST_SYNC_POINT("ExternalSSTFileTest::DeadLock:1"); - - // Wait for IngestExternalFile() to start and aquire mutex - TEST_SYNC_POINT("ExternalSSTFileTest::DeadLock:2"); - - // Now let compaction start - TEST_SYNC_POINT("ExternalSSTFileTest::DeadLock:3"); - - // Wait for max 5 seconds, if we did not finish all bg threads - // then we hit the deadlock bug - for (int i = 0; i < 10; i++) { - if (running_threads.load() == 0) { - break; - } - // Make sure we do a "real sleep", not a mock one. - SystemClock::Default()->SleepForMicroseconds(500000); - } - - ASSERT_EQ(running_threads.load(), 0); - - bg_ingest_file.join(); - bg_block_put.join(); -} - -TEST_F(ExternalSSTFileTest, DirtyExit) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - std::string file_path = sst_files_dir_ + "/dirty_exit"; - std::unique_ptr sst_file_writer; - - // Destruct SstFileWriter without calling Finish() - sst_file_writer.reset(new SstFileWriter(EnvOptions(), options)); - ASSERT_OK(sst_file_writer->Open(file_path)); - sst_file_writer.reset(); - - // Destruct SstFileWriter with a failing Finish - sst_file_writer.reset(new SstFileWriter(EnvOptions(), options)); - ASSERT_OK(sst_file_writer->Open(file_path)); - ASSERT_NOK(sst_file_writer->Finish()); -} - -TEST_F(ExternalSSTFileTest, FileWithCFInfo) { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"koko", "toto"}, options); - - SstFileWriter sfw_default(EnvOptions(), options, handles_[0]); - SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]); - SstFileWriter sfw_cf2(EnvOptions(), options, handles_[2]); - SstFileWriter sfw_unknown(EnvOptions(), options); - - // default_cf.sst - const std::string cf_default_sst = sst_files_dir_ + "/default_cf.sst"; - ASSERT_OK(sfw_default.Open(cf_default_sst)); - ASSERT_OK(sfw_default.Put("K1", "V1")); - ASSERT_OK(sfw_default.Put("K2", "V2")); - ASSERT_OK(sfw_default.Finish()); - - // cf1.sst - const std::string cf1_sst = sst_files_dir_ + "/cf1.sst"; - ASSERT_OK(sfw_cf1.Open(cf1_sst)); - ASSERT_OK(sfw_cf1.Put("K3", "V1")); - ASSERT_OK(sfw_cf1.Put("K4", "V2")); - ASSERT_OK(sfw_cf1.Finish()); - - // cf_unknown.sst - const std::string unknown_sst = sst_files_dir_ + "/cf_unknown.sst"; - ASSERT_OK(sfw_unknown.Open(unknown_sst)); - ASSERT_OK(sfw_unknown.Put("K5", "V1")); - ASSERT_OK(sfw_unknown.Put("K6", "V2")); - ASSERT_OK(sfw_unknown.Finish()); - - IngestExternalFileOptions ifo; - - // SST CF don't match - ASSERT_NOK(db_->IngestExternalFile(handles_[0], {cf1_sst}, ifo)); - // SST CF don't match - ASSERT_NOK(db_->IngestExternalFile(handles_[2], {cf1_sst}, ifo)); - // SST CF match - ASSERT_OK(db_->IngestExternalFile(handles_[1], {cf1_sst}, ifo)); - - // SST CF don't match - ASSERT_NOK(db_->IngestExternalFile(handles_[1], {cf_default_sst}, ifo)); - // SST CF don't match - ASSERT_NOK(db_->IngestExternalFile(handles_[2], {cf_default_sst}, ifo)); - // SST CF match - ASSERT_OK(db_->IngestExternalFile(handles_[0], {cf_default_sst}, ifo)); - - // SST CF unknown - ASSERT_OK(db_->IngestExternalFile(handles_[1], {unknown_sst}, ifo)); - // SST CF unknown - ASSERT_OK(db_->IngestExternalFile(handles_[2], {unknown_sst}, ifo)); - // SST CF unknown - ASSERT_OK(db_->IngestExternalFile(handles_[0], {unknown_sst}, ifo)); - - // Cannot ingest a file into a dropped CF - ASSERT_OK(db_->DropColumnFamily(handles_[1])); - ASSERT_NOK(db_->IngestExternalFile(handles_[1], {unknown_sst}, ifo)); - - // CF was not dropped, ok to Ingest - ASSERT_OK(db_->IngestExternalFile(handles_[2], {unknown_sst}, ifo)); -} - -/* - * Test and verify the functionality of ingestion_options.move_files and - * ingestion_options.failed_move_fall_back_to_copy - */ -TEST_P(ExternSSTFileLinkFailFallbackTest, LinkFailFallBackExternalSst) { - const bool fail_link = std::get<0>(GetParam()); - const bool failed_move_fall_back_to_copy = std::get<1>(GetParam()); - fs_->set_fail_link(fail_link); - const EnvOptions env_options; - DestroyAndReopen(options_); - const int kNumKeys = 10000; - IngestExternalFileOptions ifo; - ifo.move_files = true; - ifo.failed_move_fall_back_to_copy = failed_move_fall_back_to_copy; - - std::string file_path = sst_files_dir_ + "file1.sst"; - // Create SstFileWriter for default column family - SstFileWriter sst_file_writer(env_options, options_); - ASSERT_OK(sst_file_writer.Open(file_path)); - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(sst_file_writer.Put(Key(i), Key(i) + "_value")); - } - ASSERT_OK(sst_file_writer.Finish()); - uint64_t file_size = 0; - ASSERT_OK(env_->GetFileSize(file_path, &file_size)); - - bool copyfile = false; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "ExternalSstFileIngestionJob::Prepare:CopyFile", - [&](void* /* arg */) { copyfile = true; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - const Status s = db_->IngestExternalFile({file_path}, ifo); - - ColumnFamilyHandleImpl* cfh = - static_cast(dbfull()->DefaultColumnFamily()); - ColumnFamilyData* cfd = cfh->cfd(); - const InternalStats* internal_stats_ptr = cfd->internal_stats(); - const std::vector& comp_stats = - internal_stats_ptr->TEST_GetCompactionStats(); - uint64_t bytes_copied = 0; - uint64_t bytes_moved = 0; - for (const auto& stats : comp_stats) { - bytes_copied += stats.bytes_written; - bytes_moved += stats.bytes_moved; - } - - if (!fail_link) { - // Link operation succeeds. External SST should be moved. - ASSERT_OK(s); - ASSERT_EQ(0, bytes_copied); - ASSERT_EQ(file_size, bytes_moved); - ASSERT_FALSE(copyfile); - } else { - // Link operation fails. - ASSERT_EQ(0, bytes_moved); - if (failed_move_fall_back_to_copy) { - ASSERT_OK(s); - // Copy file is true since a failed link falls back to copy file. - ASSERT_TRUE(copyfile); - ASSERT_EQ(file_size, bytes_copied); - } else { - ASSERT_TRUE(s.IsNotSupported()); - // Copy file is false since a failed link does not fall back to copy file. - ASSERT_FALSE(copyfile); - ASSERT_EQ(0, bytes_copied); - } - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -class TestIngestExternalFileListener : public EventListener { - public: - void OnExternalFileIngested(DB* /*db*/, - const ExternalFileIngestionInfo& info) override { - ingested_files.push_back(info); - } - - std::vector ingested_files; -}; - -TEST_P(ExternalSSTFileTest, IngestionListener) { - Options options = CurrentOptions(); - TestIngestExternalFileListener* listener = - new TestIngestExternalFileListener(); - options.listeners.emplace_back(listener); - CreateAndReopenWithCF({"koko", "toto"}, options); - - bool write_global_seqno = std::get<0>(GetParam()); - bool verify_checksums_before_ingest = std::get<1>(GetParam()); - // Ingest into default cf - ASSERT_OK(GenerateAndAddExternalFile( - options, {1, 2}, -1, true, write_global_seqno, - verify_checksums_before_ingest, false, true, nullptr, handles_[0])); - ASSERT_EQ(listener->ingested_files.size(), 1); - ASSERT_EQ(listener->ingested_files.back().cf_name, "default"); - ASSERT_EQ(listener->ingested_files.back().global_seqno, 0); - ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_id, - 0); - ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_name, - "default"); - - // Ingest into cf1 - ASSERT_OK(GenerateAndAddExternalFile( - options, {1, 2}, -1, true, write_global_seqno, - verify_checksums_before_ingest, false, true, nullptr, handles_[1])); - ASSERT_EQ(listener->ingested_files.size(), 2); - ASSERT_EQ(listener->ingested_files.back().cf_name, "koko"); - ASSERT_EQ(listener->ingested_files.back().global_seqno, 0); - ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_id, - 1); - ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_name, - "koko"); - - // Ingest into cf2 - ASSERT_OK(GenerateAndAddExternalFile( - options, {1, 2}, -1, true, write_global_seqno, - verify_checksums_before_ingest, false, true, nullptr, handles_[2])); - ASSERT_EQ(listener->ingested_files.size(), 3); - ASSERT_EQ(listener->ingested_files.back().cf_name, "toto"); - ASSERT_EQ(listener->ingested_files.back().global_seqno, 0); - ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_id, - 2); - ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_name, - "toto"); -} - -TEST_F(ExternalSSTFileTest, SnapshotInconsistencyBug) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - const int kNumKeys = 10000; - - // Insert keys using normal path and take a snapshot - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(Put(Key(i), Key(i) + "_V1")); - } - const Snapshot* snap = db_->GetSnapshot(); - - // Overwrite all keys using IngestExternalFile - std::string sst_file_path = sst_files_dir_ + "file1.sst"; - SstFileWriter sst_file_writer(EnvOptions(), options); - ASSERT_OK(sst_file_writer.Open(sst_file_path)); - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(sst_file_writer.Put(Key(i), Key(i) + "_V2")); - } - ASSERT_OK(sst_file_writer.Finish()); - - IngestExternalFileOptions ifo; - ifo.move_files = true; - ASSERT_OK(db_->IngestExternalFile({sst_file_path}, ifo)); - - for (int i = 0; i < kNumKeys; i++) { - ASSERT_EQ(Get(Key(i), snap), Key(i) + "_V1"); - ASSERT_EQ(Get(Key(i)), Key(i) + "_V2"); - } - - db_->ReleaseSnapshot(snap); -} - -TEST_P(ExternalSSTFileTest, IngestBehind) { - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = 3; - options.disable_auto_compactions = false; - DestroyAndReopen(options); - std::vector> file_data; - std::map true_data; - - // Insert 100 -> 200 into the memtable - for (int i = 100; i <= 200; i++) { - ASSERT_OK(Put(Key(i), "memtable")); - true_data[Key(i)] = "memtable"; - } - - // Insert 100 -> 200 using IngestExternalFile - file_data.clear(); - for (int i = 0; i <= 20; i++) { - file_data.emplace_back(Key(i), "ingest_behind"); - } - - bool allow_global_seqno = true; - bool ingest_behind = true; - bool write_global_seqno = std::get<0>(GetParam()); - bool verify_checksums_before_ingest = std::get<1>(GetParam()); - - // Can't ingest behind since allow_ingest_behind isn't set to true - ASSERT_NOK(GenerateAndAddExternalFile( - options, file_data, -1, allow_global_seqno, write_global_seqno, - verify_checksums_before_ingest, ingest_behind, false /*sort_data*/, - &true_data)); - - options.allow_ingest_behind = true; - // check that we still can open the DB, as num_levels should be - // sanitized to 3 - options.num_levels = 2; - DestroyAndReopen(options); - - options.num_levels = 3; - DestroyAndReopen(options); - // Insert 100 -> 200 into the memtable - for (int i = 100; i <= 200; i++) { - ASSERT_OK(Put(Key(i), "memtable")); - true_data[Key(i)] = "memtable"; - } - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - // Universal picker should go at second from the bottom level - ASSERT_EQ("0,1", FilesPerLevel()); - ASSERT_OK(GenerateAndAddExternalFile( - options, file_data, -1, allow_global_seqno, write_global_seqno, - verify_checksums_before_ingest, true /*ingest_behind*/, - false /*sort_data*/, &true_data)); - ASSERT_EQ("0,1,1", FilesPerLevel()); - // this time ingest should fail as the file doesn't fit to the bottom level - ASSERT_NOK(GenerateAndAddExternalFile( - options, file_data, -1, allow_global_seqno, write_global_seqno, - verify_checksums_before_ingest, true /*ingest_behind*/, - false /*sort_data*/, &true_data)); - ASSERT_EQ("0,1,1", FilesPerLevel()); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - // bottom level should be empty - ASSERT_EQ("0,1", FilesPerLevel()); - - size_t kcnt = 0; - VerifyDBFromMap(true_data, &kcnt, false); -} - -TEST_F(ExternalSSTFileTest, SkipBloomFilter) { - Options options = CurrentOptions(); - - BlockBasedTableOptions table_options; - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); - table_options.cache_index_and_filter_blocks = true; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - // Create external SST file and include bloom filters - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - DestroyAndReopen(options); - { - std::string file_path = sst_files_dir_ + "sst_with_bloom.sst"; - SstFileWriter sst_file_writer(EnvOptions(), options); - ASSERT_OK(sst_file_writer.Open(file_path)); - ASSERT_OK(sst_file_writer.Put("Key1", "Value1")); - ASSERT_OK(sst_file_writer.Finish()); - - ASSERT_OK( - db_->IngestExternalFile({file_path}, IngestExternalFileOptions())); - - ASSERT_EQ(Get("Key1"), "Value1"); - ASSERT_GE( - options.statistics->getTickerCount(Tickers::BLOCK_CACHE_FILTER_ADD), 1); - } - - // Create external SST file but skip bloom filters - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - DestroyAndReopen(options); - { - std::string file_path = sst_files_dir_ + "sst_with_no_bloom.sst"; - SstFileWriter sst_file_writer(EnvOptions(), options, nullptr, true, - Env::IOPriority::IO_TOTAL, - true /* skip_filters */); - ASSERT_OK(sst_file_writer.Open(file_path)); - ASSERT_OK(sst_file_writer.Put("Key1", "Value1")); - ASSERT_OK(sst_file_writer.Finish()); - - ASSERT_OK( - db_->IngestExternalFile({file_path}, IngestExternalFileOptions())); - - ASSERT_EQ(Get("Key1"), "Value1"); - ASSERT_EQ( - options.statistics->getTickerCount(Tickers::BLOCK_CACHE_FILTER_ADD), 0); - } -} - -TEST_F(ExternalSSTFileTest, IngestFileWrittenWithCompressionDictionary) { - if (!ZSTD_Supported()) { - return; - } - const int kNumEntries = 1 << 10; - const int kNumBytesPerEntry = 1 << 10; - Options options = CurrentOptions(); - options.compression = kZSTD; - options.compression_opts.max_dict_bytes = 1 << 14; // 16KB - options.compression_opts.zstd_max_train_bytes = 1 << 18; // 256KB - DestroyAndReopen(options); - - std::atomic num_compression_dicts(0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "BlockBasedTableBuilder::WriteCompressionDictBlock:RawDict", - [&](void* /* arg */) { ++num_compression_dicts; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - Random rnd(301); - std::vector> random_data; - for (int i = 0; i < kNumEntries; i++) { - std::string val = rnd.RandomString(kNumBytesPerEntry); - random_data.emplace_back(Key(i), std::move(val)); - } - ASSERT_OK(GenerateAndAddExternalFile(options, std::move(random_data))); - ASSERT_EQ(1, num_compression_dicts); -} - -class ExternalSSTBlockChecksumTest - : public ExternalSSTFileTestBase, - public testing::WithParamInterface {}; - -INSTANTIATE_TEST_CASE_P(FormatVersions, ExternalSSTBlockChecksumTest, - testing::ValuesIn(test::kFooterFormatVersionsToTest)); - -// Very slow, not worth the cost to run regularly -TEST_P(ExternalSSTBlockChecksumTest, DISABLED_HugeBlockChecksum) { - BlockBasedTableOptions table_options; - table_options.format_version = GetParam(); - for (auto t : GetSupportedChecksums()) { - table_options.checksum = t; - Options options = CurrentOptions(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - SstFileWriter sst_file_writer(EnvOptions(), options); - - // 2^32 - 1, will lead to data block with more than 2^32 bytes - size_t huge_size = std::numeric_limits::max(); - - std::string f = sst_files_dir_ + "f.sst"; - ASSERT_OK(sst_file_writer.Open(f)); - { - Random64 r(123); - std::string huge(huge_size, 0); - for (size_t j = 0; j + 7 < huge_size; j += 8) { - EncodeFixed64(&huge[j], r.Next()); - } - ASSERT_OK(sst_file_writer.Put("Huge", huge)); - } - - ExternalSstFileInfo f_info; - ASSERT_OK(sst_file_writer.Finish(&f_info)); - ASSERT_GT(f_info.file_size, uint64_t{huge_size} + 10); - - SstFileReader sst_file_reader(options); - ASSERT_OK(sst_file_reader.Open(f)); - ASSERT_OK(sst_file_reader.VerifyChecksum()); - } -} - -TEST_P(ExternalSSTFileTest, IngestFilesIntoMultipleColumnFamilies_Success) { - std::unique_ptr fault_injection_env( - new FaultInjectionTestEnv(env_)); - Options options = CurrentOptions(); - options.env = fault_injection_env.get(); - CreateAndReopenWithCF({"pikachu", "eevee"}, options); - - // Exercise different situations in different column families: two are empty - // (so no new sequence number is needed), but at least one overlaps with the - // DB and needs to bump the sequence number. - ASSERT_OK(db_->Put(WriteOptions(), "foo1", "oldvalue")); - - std::vector column_families; - column_families.push_back(handles_[0]); - column_families.push_back(handles_[1]); - column_families.push_back(handles_[2]); - std::vector ifos(column_families.size()); - for (auto& ifo : ifos) { - ifo.allow_global_seqno = true; // Always allow global_seqno - // May or may not write global_seqno - ifo.write_global_seqno = std::get<0>(GetParam()); - // Whether to verify checksums before ingestion - ifo.verify_checksums_before_ingest = std::get<1>(GetParam()); - } - std::vector>> data; - data.push_back( - {std::make_pair("foo1", "fv1"), std::make_pair("foo2", "fv2")}); - data.push_back( - {std::make_pair("bar1", "bv1"), std::make_pair("bar2", "bv2")}); - data.push_back( - {std::make_pair("bar3", "bv3"), std::make_pair("bar4", "bv4")}); - - // Resize the true_data vector upon construction to avoid re-alloc - std::vector> true_data( - column_families.size()); - ASSERT_OK(GenerateAndAddExternalFiles(options, column_families, ifos, data, - -1, true, true_data)); - Close(); - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"}, - options); - ASSERT_EQ(3, handles_.size()); - int cf = 0; - for (const auto& verify_map : true_data) { - for (const auto& elem : verify_map) { - const std::string& key = elem.first; - const std::string& value = elem.second; - ASSERT_EQ(value, Get(cf, key)); - } - ++cf; - } - Close(); - Destroy(options, true /* delete_cf_paths */); -} - -TEST_P(ExternalSSTFileTest, - IngestFilesIntoMultipleColumnFamilies_NoMixedStateWithSnapshot) { - std::unique_ptr fault_injection_env( - new FaultInjectionTestEnv(env_)); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->LoadDependency({ - {"DBImpl::IngestExternalFiles:InstallSVForFirstCF:0", - "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_MixedState:" - "BeforeRead"}, - {"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_MixedState:" - "AfterRead", - "DBImpl::IngestExternalFiles:InstallSVForFirstCF:1"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - - Options options = CurrentOptions(); - options.env = fault_injection_env.get(); - CreateAndReopenWithCF({"pikachu", "eevee"}, options); - const std::vector> data_before_ingestion = - {{{"foo1", "fv1_0"}, {"foo2", "fv2_0"}, {"foo3", "fv3_0"}}, - {{"bar1", "bv1_0"}, {"bar2", "bv2_0"}, {"bar3", "bv3_0"}}, - {{"bar4", "bv4_0"}, {"bar5", "bv5_0"}, {"bar6", "bv6_0"}}}; - for (size_t i = 0; i != handles_.size(); ++i) { - int cf = static_cast(i); - const auto& orig_data = data_before_ingestion[i]; - for (const auto& kv : orig_data) { - ASSERT_OK(Put(cf, kv.first, kv.second)); - } - ASSERT_OK(Flush(cf)); - } - - std::vector column_families; - column_families.push_back(handles_[0]); - column_families.push_back(handles_[1]); - column_families.push_back(handles_[2]); - std::vector ifos(column_families.size()); - for (auto& ifo : ifos) { - ifo.allow_global_seqno = true; // Always allow global_seqno - // May or may not write global_seqno - ifo.write_global_seqno = std::get<0>(GetParam()); - // Whether to verify checksums before ingestion - ifo.verify_checksums_before_ingest = std::get<1>(GetParam()); - } - std::vector>> data; - data.push_back( - {std::make_pair("foo1", "fv1"), std::make_pair("foo2", "fv2")}); - data.push_back( - {std::make_pair("bar1", "bv1"), std::make_pair("bar2", "bv2")}); - data.push_back( - {std::make_pair("bar3", "bv3"), std::make_pair("bar4", "bv4")}); - // Resize the true_data vector upon construction to avoid re-alloc - std::vector> true_data( - column_families.size()); - // Take snapshot before ingestion starts - ReadOptions read_opts; - read_opts.total_order_seek = true; - read_opts.snapshot = dbfull()->GetSnapshot(); - std::vector iters(handles_.size()); - - // Range scan checks first kv of each CF before ingestion starts. - for (size_t i = 0; i != handles_.size(); ++i) { - iters[i] = dbfull()->NewIterator(read_opts, handles_[i]); - iters[i]->SeekToFirst(); - ASSERT_TRUE(iters[i]->Valid()); - const std::string& key = iters[i]->key().ToString(); - const std::string& value = iters[i]->value().ToString(); - const std::map& orig_data = - data_before_ingestion[i]; - std::map::const_iterator it = orig_data.find(key); - ASSERT_NE(orig_data.end(), it); - ASSERT_EQ(it->second, value); - iters[i]->Next(); - } - port::Thread ingest_thread([&]() { - ASSERT_OK(GenerateAndAddExternalFiles(options, column_families, ifos, data, - -1, true, true_data)); - }); - TEST_SYNC_POINT( - "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_MixedState:" - "BeforeRead"); - // Should see only data before ingestion - for (size_t i = 0; i != handles_.size(); ++i) { - const auto& orig_data = data_before_ingestion[i]; - for (; iters[i]->Valid(); iters[i]->Next()) { - const std::string& key = iters[i]->key().ToString(); - const std::string& value = iters[i]->value().ToString(); - std::map::const_iterator it = - orig_data.find(key); - ASSERT_NE(orig_data.end(), it); - ASSERT_EQ(it->second, value); - } - } - TEST_SYNC_POINT( - "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_MixedState:" - "AfterRead"); - ingest_thread.join(); - for (auto* iter : iters) { - delete iter; - } - iters.clear(); - dbfull()->ReleaseSnapshot(read_opts.snapshot); - - Close(); - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"}, - options); - // Should see consistent state after ingestion for all column families even - // without snapshot. - ASSERT_EQ(3, handles_.size()); - int cf = 0; - for (const auto& verify_map : true_data) { - for (const auto& elem : verify_map) { - const std::string& key = elem.first; - const std::string& value = elem.second; - ASSERT_EQ(value, Get(cf, key)); - } - ++cf; - } - Close(); - Destroy(options, true /* delete_cf_paths */); -} - -TEST_P(ExternalSSTFileTest, IngestFilesIntoMultipleColumnFamilies_PrepareFail) { - std::unique_ptr fault_injection_env( - new FaultInjectionTestEnv(env_)); - Options options = CurrentOptions(); - options.env = fault_injection_env.get(); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->LoadDependency({ - {"DBImpl::IngestExternalFiles:BeforeLastJobPrepare:0", - "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_PrepareFail:" - "0"}, - {"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies:PrepareFail:" - "1", - "DBImpl::IngestExternalFiles:BeforeLastJobPrepare:1"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - CreateAndReopenWithCF({"pikachu", "eevee"}, options); - std::vector column_families; - column_families.push_back(handles_[0]); - column_families.push_back(handles_[1]); - column_families.push_back(handles_[2]); - std::vector ifos(column_families.size()); - for (auto& ifo : ifos) { - ifo.allow_global_seqno = true; // Always allow global_seqno - // May or may not write global_seqno - ifo.write_global_seqno = std::get<0>(GetParam()); - // Whether to verify block checksums before ingest - ifo.verify_checksums_before_ingest = std::get<1>(GetParam()); - } - std::vector>> data; - data.push_back( - {std::make_pair("foo1", "fv1"), std::make_pair("foo2", "fv2")}); - data.push_back( - {std::make_pair("bar1", "bv1"), std::make_pair("bar2", "bv2")}); - data.push_back( - {std::make_pair("bar3", "bv3"), std::make_pair("bar4", "bv4")}); - - // Resize the true_data vector upon construction to avoid re-alloc - std::vector> true_data( - column_families.size()); - port::Thread ingest_thread([&]() { - ASSERT_NOK(GenerateAndAddExternalFiles(options, column_families, ifos, data, - -1, true, true_data)); - }); - TEST_SYNC_POINT( - "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_PrepareFail:" - "0"); - fault_injection_env->SetFilesystemActive(false); - TEST_SYNC_POINT( - "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies:PrepareFail:" - "1"); - ingest_thread.join(); - - fault_injection_env->SetFilesystemActive(true); - Close(); - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"}, - options); - ASSERT_EQ(3, handles_.size()); - int cf = 0; - for (const auto& verify_map : true_data) { - for (const auto& elem : verify_map) { - const std::string& key = elem.first; - ASSERT_EQ("NOT_FOUND", Get(cf, key)); - } - ++cf; - } - Close(); - Destroy(options, true /* delete_cf_paths */); -} - -TEST_P(ExternalSSTFileTest, IngestFilesIntoMultipleColumnFamilies_CommitFail) { - std::unique_ptr fault_injection_env( - new FaultInjectionTestEnv(env_)); - Options options = CurrentOptions(); - options.env = fault_injection_env.get(); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->LoadDependency({ - {"DBImpl::IngestExternalFiles:BeforeJobsRun:0", - "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_CommitFail:" - "0"}, - {"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_CommitFail:" - "1", - "DBImpl::IngestExternalFiles:BeforeJobsRun:1"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - CreateAndReopenWithCF({"pikachu", "eevee"}, options); - std::vector column_families; - column_families.push_back(handles_[0]); - column_families.push_back(handles_[1]); - column_families.push_back(handles_[2]); - std::vector ifos(column_families.size()); - for (auto& ifo : ifos) { - ifo.allow_global_seqno = true; // Always allow global_seqno - // May or may not write global_seqno - ifo.write_global_seqno = std::get<0>(GetParam()); - // Whether to verify block checksums before ingestion - ifo.verify_checksums_before_ingest = std::get<1>(GetParam()); - } - std::vector>> data; - data.push_back( - {std::make_pair("foo1", "fv1"), std::make_pair("foo2", "fv2")}); - data.push_back( - {std::make_pair("bar1", "bv1"), std::make_pair("bar2", "bv2")}); - data.push_back( - {std::make_pair("bar3", "bv3"), std::make_pair("bar4", "bv4")}); - // Resize the true_data vector upon construction to avoid re-alloc - std::vector> true_data( - column_families.size()); - port::Thread ingest_thread([&]() { - ASSERT_NOK(GenerateAndAddExternalFiles(options, column_families, ifos, data, - -1, true, true_data)); - }); - TEST_SYNC_POINT( - "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_CommitFail:" - "0"); - fault_injection_env->SetFilesystemActive(false); - TEST_SYNC_POINT( - "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_CommitFail:" - "1"); - ingest_thread.join(); - - fault_injection_env->SetFilesystemActive(true); - Close(); - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"}, - options); - ASSERT_EQ(3, handles_.size()); - int cf = 0; - for (const auto& verify_map : true_data) { - for (const auto& elem : verify_map) { - const std::string& key = elem.first; - ASSERT_EQ("NOT_FOUND", Get(cf, key)); - } - ++cf; - } - Close(); - Destroy(options, true /* delete_cf_paths */); -} - -TEST_P(ExternalSSTFileTest, - IngestFilesIntoMultipleColumnFamilies_PartialManifestWriteFail) { - std::unique_ptr fault_injection_env( - new FaultInjectionTestEnv(env_)); - Options options = CurrentOptions(); - options.env = fault_injection_env.get(); - - CreateAndReopenWithCF({"pikachu", "eevee"}, options); - - SyncPoint::GetInstance()->ClearTrace(); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->LoadDependency({ - {"VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:0", - "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_" - "PartialManifestWriteFail:0"}, - {"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_" - "PartialManifestWriteFail:1", - "VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:1"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - - std::vector column_families; - column_families.push_back(handles_[0]); - column_families.push_back(handles_[1]); - column_families.push_back(handles_[2]); - std::vector ifos(column_families.size()); - for (auto& ifo : ifos) { - ifo.allow_global_seqno = true; // Always allow global_seqno - // May or may not write global_seqno - ifo.write_global_seqno = std::get<0>(GetParam()); - // Whether to verify block checksums before ingestion - ifo.verify_checksums_before_ingest = std::get<1>(GetParam()); - } - std::vector>> data; - data.push_back( - {std::make_pair("foo1", "fv1"), std::make_pair("foo2", "fv2")}); - data.push_back( - {std::make_pair("bar1", "bv1"), std::make_pair("bar2", "bv2")}); - data.push_back( - {std::make_pair("bar3", "bv3"), std::make_pair("bar4", "bv4")}); - // Resize the true_data vector upon construction to avoid re-alloc - std::vector> true_data( - column_families.size()); - port::Thread ingest_thread([&]() { - ASSERT_NOK(GenerateAndAddExternalFiles(options, column_families, ifos, data, - -1, true, true_data)); - }); - TEST_SYNC_POINT( - "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_" - "PartialManifestWriteFail:0"); - fault_injection_env->SetFilesystemActive(false); - TEST_SYNC_POINT( - "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_" - "PartialManifestWriteFail:1"); - ingest_thread.join(); - - ASSERT_OK(fault_injection_env->DropUnsyncedFileData()); - fault_injection_env->SetFilesystemActive(true); - Close(); - ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"}, - options); - ASSERT_EQ(3, handles_.size()); - int cf = 0; - for (const auto& verify_map : true_data) { - for (const auto& elem : verify_map) { - const std::string& key = elem.first; - ASSERT_EQ("NOT_FOUND", Get(cf, key)); - } - ++cf; - } - Close(); - Destroy(options, true /* delete_cf_paths */); -} - -TEST_P(ExternalSSTFileTest, IngestFilesTriggerFlushingWithTwoWriteQueue) { - Options options = CurrentOptions(); - // Use large buffer to avoid memtable flush - options.write_buffer_size = 1024 * 1024; - options.two_write_queues = true; - DestroyAndReopen(options); - - ASSERT_OK(dbfull()->Put(WriteOptions(), "1000", "v1")); - ASSERT_OK(dbfull()->Put(WriteOptions(), "1001", "v1")); - ASSERT_OK(dbfull()->Put(WriteOptions(), "9999", "v1")); - - // Put one key which is overlap with keys in memtable. - // It will trigger flushing memtable and require this thread is - // currently at the front of the 2nd writer queue. We must make - // sure that it won't enter the 2nd writer queue for the second time. - std::vector> data; - data.push_back(std::make_pair("1001", "v2")); - ASSERT_OK(GenerateAndAddExternalFile(options, data, -1, true)); -} - -TEST_P(ExternalSSTFileTest, DeltaEncodingWhileGlobalSeqnoPresent) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - constexpr size_t kValueSize = 8; - Random rnd(301); - std::string value = rnd.RandomString(kValueSize); - - // Write some key to make global seqno larger than zero - for (int i = 0; i < 10; i++) { - ASSERT_OK(Put("ab" + Key(i), value)); - } - // Get a Snapshot to make RocksDB assign global seqno to ingested sst files. - auto snap = dbfull()->GetSnapshot(); - - std::string fname = sst_files_dir_ + "test_file"; - ROCKSDB_NAMESPACE::SstFileWriter writer(EnvOptions(), options); - ASSERT_OK(writer.Open(fname)); - std::string key1 = "ab"; - std::string key2 = "ab"; - - // Make the prefix of key2 is same with key1 add zero seqno. The tail of every - // key is composed as (seqno << 8 | value_type), and here `1` represents - // ValueType::kTypeValue - - PutFixed64(&key2, PackSequenceAndType(0, kTypeValue)); - key2 += "cdefghijkl"; - - ASSERT_OK(writer.Put(key1, value)); - ASSERT_OK(writer.Put(key2, value)); - - ExternalSstFileInfo info; - ASSERT_OK(writer.Finish(&info)); - - ASSERT_OK(dbfull()->IngestExternalFile({info.file_path}, - IngestExternalFileOptions())); - dbfull()->ReleaseSnapshot(snap); - ASSERT_EQ(value, Get(key1)); - // You will get error here - ASSERT_EQ(value, Get(key2)); -} - -TEST_P(ExternalSSTFileTest, - DeltaEncodingWhileGlobalSeqnoPresentIteratorSwitch) { - // Regression test for bug where global seqno corrupted the shared bytes - // buffer when switching from reverse iteration to forward iteration. - constexpr size_t kValueSize = 8; - Options options = CurrentOptions(); - - Random rnd(301); - std::string value = rnd.RandomString(kValueSize); - - std::string key0 = "aa"; - std::string key1 = "ab"; - // Make the prefix of key2 is same with key1 add zero seqno. The tail of every - // key is composed as (seqno << 8 | value_type), and here `1` represents - // ValueType::kTypeValue - std::string key2 = "ab"; - PutFixed64(&key2, PackSequenceAndType(0, kTypeValue)); - key2 += "cdefghijkl"; - std::string key3 = key2 + "_"; - - // Write some key to make global seqno larger than zero - ASSERT_OK(Put(key0, value)); - - std::string fname = sst_files_dir_ + "test_file"; - ROCKSDB_NAMESPACE::SstFileWriter writer(EnvOptions(), options); - ASSERT_OK(writer.Open(fname)); - - // key0 is a dummy to ensure the turnaround point (key1) comes from Prev - // cache rather than block (restart keys are pinned in block). - ASSERT_OK(writer.Put(key0, value)); - ASSERT_OK(writer.Put(key1, value)); - ASSERT_OK(writer.Put(key2, value)); - ASSERT_OK(writer.Put(key3, value)); - - ExternalSstFileInfo info; - ASSERT_OK(writer.Finish(&info)); - - ASSERT_OK(dbfull()->IngestExternalFile({info.file_path}, - IngestExternalFileOptions())); - ReadOptions read_opts; - // Prevents Seek() when switching directions, which circumvents the bug. - read_opts.total_order_seek = true; - Iterator* iter = db_->NewIterator(read_opts); - // Scan backwards to key2. File iterator will then be positioned at key1. - iter->Seek(key3); - ASSERT_EQ(key3, iter->key()); - iter->Prev(); - ASSERT_EQ(key2, iter->key()); - // Scan forwards and make sure key3 is present. Previously key3 would be - // corrupted by the global seqno from key1. - iter->Next(); - ASSERT_EQ(key3, iter->key()); - delete iter; -} - -INSTANTIATE_TEST_CASE_P(ExternalSSTFileTest, ExternalSSTFileTest, - testing::Values(std::make_tuple(false, false), - std::make_tuple(false, true), - std::make_tuple(true, false), - std::make_tuple(true, true))); - -INSTANTIATE_TEST_CASE_P(ExternSSTFileLinkFailFallbackTest, - ExternSSTFileLinkFailFallbackTest, - testing::Values(std::make_tuple(true, false), - std::make_tuple(true, true), - std::make_tuple(false, false))); - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/db/fault_injection_test.cc b/db/fault_injection_test.cc deleted file mode 100644 index ddd4b47cc..000000000 --- a/db/fault_injection_test.cc +++ /dev/null @@ -1,637 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright 2014 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -// This test uses a custom Env to keep track of the state of a filesystem as of -// the last "sync". It then checks for data loss errors by purposely dropping -// file data (or entire files) not protected by a "sync". - -#include "db/db_impl/db_impl.h" -#include "db/log_format.h" -#include "db/version_set.h" -#include "env/mock_env.h" -#include "file/filename.h" -#include "rocksdb/cache.h" -#include "rocksdb/convenience.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/table.h" -#include "rocksdb/write_batch.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/mutexlock.h" -#include "util/random.h" -#include "utilities/fault_injection_env.h" -#ifndef NDEBUG -#include "utilities/fault_injection_fs.h" -#endif - -namespace ROCKSDB_NAMESPACE { - -static const int kValueSize = 1000; -static const int kMaxNumValues = 2000; -static const size_t kNumIterations = 3; - -enum FaultInjectionOptionConfig { - kDefault, - kDifferentDataDir, - kWalDir, - kSyncWal, - kWalDirSyncWal, - kMultiLevels, - kEnd, -}; -class FaultInjectionTest - : public testing::Test, - public testing::WithParamInterface> { - protected: - int option_config_; - int non_inclusive_end_range_; // kEnd or equivalent to that - // When need to make sure data is persistent, sync WAL - bool sync_use_wal_; - // When need to make sure data is persistent, call DB::CompactRange() - bool sync_use_compact_; - - bool sequential_order_; - - public: - enum ExpectedVerifResult { kValExpectFound, kValExpectNoError }; - enum ResetMethod { - kResetDropUnsyncedData, - kResetDropRandomUnsyncedData, - kResetDeleteUnsyncedFiles, - kResetDropAndDeleteUnsynced - }; - - std::unique_ptr base_env_; - FaultInjectionTestEnv* env_; - std::string dbname_; - std::shared_ptr tiny_cache_; - Options options_; - DB* db_; - - FaultInjectionTest() - : option_config_(std::get<1>(GetParam())), - non_inclusive_end_range_(std::get<2>(GetParam())), - sync_use_wal_(false), - sync_use_compact_(true), - base_env_(nullptr), - env_(nullptr), - db_(nullptr) { - EXPECT_OK( - test::CreateEnvFromSystem(ConfigOptions(), &system_env_, &env_guard_)); - EXPECT_NE(system_env_, nullptr); - } - - ~FaultInjectionTest() override { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - } - - bool ChangeOptions() { - option_config_++; - if (option_config_ >= non_inclusive_end_range_) { - return false; - } else { - if (option_config_ == kMultiLevels) { - base_env_.reset(MockEnv::Create(system_env_)); - } - return true; - } - } - - // Return the current option configuration. - Options CurrentOptions() { - sync_use_wal_ = false; - sync_use_compact_ = true; - Options options; - switch (option_config_) { - case kWalDir: - options.wal_dir = test::PerThreadDBPath(env_, "fault_test_wal"); - break; - case kDifferentDataDir: - options.db_paths.emplace_back( - test::PerThreadDBPath(env_, "fault_test_data"), 1000000U); - break; - case kSyncWal: - sync_use_wal_ = true; - sync_use_compact_ = false; - break; - case kWalDirSyncWal: - options.wal_dir = test::PerThreadDBPath(env_, "/fault_test_wal"); - sync_use_wal_ = true; - sync_use_compact_ = false; - break; - case kMultiLevels: - options.write_buffer_size = 64 * 1024; - options.target_file_size_base = 64 * 1024; - options.level0_file_num_compaction_trigger = 2; - options.level0_slowdown_writes_trigger = 2; - options.level0_stop_writes_trigger = 4; - options.max_bytes_for_level_base = 128 * 1024; - options.max_write_buffer_number = 2; - options.max_background_compactions = 8; - options.max_background_flushes = 8; - sync_use_wal_ = true; - sync_use_compact_ = false; - break; - default: - break; - } - return options; - } - - Status NewDB() { - assert(db_ == nullptr); - assert(tiny_cache_ == nullptr); - assert(env_ == nullptr); - - env_ = new FaultInjectionTestEnv(base_env_ ? base_env_.get() : system_env_); - - options_ = CurrentOptions(); - options_.env = env_; - options_.paranoid_checks = true; - - BlockBasedTableOptions table_options; - tiny_cache_ = NewLRUCache(100); - table_options.block_cache = tiny_cache_; - options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - dbname_ = test::PerThreadDBPath("fault_test"); - - EXPECT_OK(DestroyDB(dbname_, options_)); - - options_.create_if_missing = true; - Status s = OpenDB(); - options_.create_if_missing = false; - return s; - } - - void SetUp() override { - sequential_order_ = std::get<0>(GetParam()); - ASSERT_OK(NewDB()); - } - - void TearDown() override { - CloseDB(); - - Status s = DestroyDB(dbname_, options_); - - delete env_; - env_ = nullptr; - - tiny_cache_.reset(); - - ASSERT_OK(s); - } - - void Build(const WriteOptions& write_options, int start_idx, int num_vals) { - std::string key_space, value_space; - WriteBatch batch; - for (int i = start_idx; i < start_idx + num_vals; i++) { - Slice key = Key(i, &key_space); - batch.Clear(); - ASSERT_OK(batch.Put(key, Value(i, &value_space))); - ASSERT_OK(db_->Write(write_options, &batch)); - } - } - - Status ReadValue(int i, std::string* val) const { - std::string key_space, value_space; - Slice key = Key(i, &key_space); - Value(i, &value_space); - ReadOptions options; - return db_->Get(options, key, val); - } - - Status Verify(int start_idx, int num_vals, - ExpectedVerifResult expected) const { - std::string val; - std::string value_space; - Status s; - for (int i = start_idx; i < start_idx + num_vals && s.ok(); i++) { - Value(i, &value_space); - s = ReadValue(i, &val); - if (s.ok()) { - EXPECT_EQ(value_space, val); - } - if (expected == kValExpectFound) { - if (!s.ok()) { - fprintf(stderr, "Error when read %dth record (expect found): %s\n", i, - s.ToString().c_str()); - return s; - } - } else if (!s.ok() && !s.IsNotFound()) { - fprintf(stderr, "Error when read %dth record: %s\n", i, - s.ToString().c_str()); - return s; - } - } - return Status::OK(); - } - - // Return the ith key - Slice Key(int i, std::string* storage) const { - unsigned long long num = i; - if (!sequential_order_) { - // random transfer - const int m = 0x5bd1e995; - num *= m; - num ^= num << 24; - } - char buf[100]; - snprintf(buf, sizeof(buf), "%016d", static_cast(num)); - storage->assign(buf, strlen(buf)); - return Slice(*storage); - } - - // Return the value to associate with the specified key - Slice Value(int k, std::string* storage) const { - Random r(k); - *storage = r.RandomString(kValueSize); - return Slice(*storage); - } - - void CloseDB() { - delete db_; - db_ = nullptr; - } - - Status OpenDB() { - CloseDB(); - env_->ResetState(); - Status s = DB::Open(options_, dbname_, &db_); - assert(db_ != nullptr); - return s; - } - - void DeleteAllData() { - Iterator* iter = db_->NewIterator(ReadOptions()); - WriteOptions options; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(db_->Delete(WriteOptions(), iter->key())); - } - ASSERT_OK(iter->status()); - delete iter; - - FlushOptions flush_options; - flush_options.wait = true; - ASSERT_OK(db_->Flush(flush_options)); - } - - // rnd cannot be null for kResetDropRandomUnsyncedData - void ResetDBState(ResetMethod reset_method, Random* rnd = nullptr) { - env_->AssertNoOpenFile(); - switch (reset_method) { - case kResetDropUnsyncedData: - ASSERT_OK(env_->DropUnsyncedFileData()); - break; - case kResetDropRandomUnsyncedData: - ASSERT_OK(env_->DropRandomUnsyncedFileData(rnd)); - break; - case kResetDeleteUnsyncedFiles: - ASSERT_OK(env_->DeleteFilesCreatedAfterLastDirSync()); - break; - case kResetDropAndDeleteUnsynced: - ASSERT_OK(env_->DropUnsyncedFileData()); - ASSERT_OK(env_->DeleteFilesCreatedAfterLastDirSync()); - break; - default: - assert(false); - } - } - - void PartialCompactTestPreFault(int num_pre_sync, int num_post_sync) { - DeleteAllData(); - - WriteOptions write_options; - write_options.sync = sync_use_wal_; - - Build(write_options, 0, num_pre_sync); - if (sync_use_compact_) { - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - } - write_options.sync = false; - Build(write_options, num_pre_sync, num_post_sync); - } - - void PartialCompactTestReopenWithFault(ResetMethod reset_method, - int num_pre_sync, int num_post_sync, - Random* rnd = nullptr) { - env_->SetFilesystemActive(false); - CloseDB(); - ResetDBState(reset_method, rnd); - ASSERT_OK(OpenDB()); - ASSERT_OK(Verify(0, num_pre_sync, FaultInjectionTest::kValExpectFound)); - ASSERT_OK(Verify(num_pre_sync, num_post_sync, - FaultInjectionTest::kValExpectNoError)); - WaitCompactionFinish(); - ASSERT_OK(Verify(0, num_pre_sync, FaultInjectionTest::kValExpectFound)); - ASSERT_OK(Verify(num_pre_sync, num_post_sync, - FaultInjectionTest::kValExpectNoError)); - } - - void NoWriteTestPreFault() {} - - void NoWriteTestReopenWithFault(ResetMethod reset_method) { - CloseDB(); - ResetDBState(reset_method); - ASSERT_OK(OpenDB()); - } - - void WaitCompactionFinish() { - ASSERT_OK(static_cast(db_->GetRootDB())->TEST_WaitForCompact()); - ASSERT_OK(db_->Put(WriteOptions(), "", "")); - } - - private: - Env* system_env_; - std::shared_ptr env_guard_; -}; - -class FaultInjectionTestSplitted : public FaultInjectionTest {}; - -TEST_P(FaultInjectionTestSplitted, FaultTest) { - do { - Random rnd(301); - - for (size_t idx = 0; idx < kNumIterations; idx++) { - int num_pre_sync = rnd.Uniform(kMaxNumValues); - int num_post_sync = rnd.Uniform(kMaxNumValues); - - PartialCompactTestPreFault(num_pre_sync, num_post_sync); - PartialCompactTestReopenWithFault(kResetDropUnsyncedData, num_pre_sync, - num_post_sync); - NoWriteTestPreFault(); - NoWriteTestReopenWithFault(kResetDropUnsyncedData); - - PartialCompactTestPreFault(num_pre_sync, num_post_sync); - PartialCompactTestReopenWithFault(kResetDropRandomUnsyncedData, - num_pre_sync, num_post_sync, &rnd); - NoWriteTestPreFault(); - NoWriteTestReopenWithFault(kResetDropUnsyncedData); - - // Setting a separate data path won't pass the test as we don't sync - // it after creating new files, - PartialCompactTestPreFault(num_pre_sync, num_post_sync); - PartialCompactTestReopenWithFault(kResetDropAndDeleteUnsynced, - num_pre_sync, num_post_sync); - NoWriteTestPreFault(); - NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced); - - PartialCompactTestPreFault(num_pre_sync, num_post_sync); - // No new files created so we expect all values since no files will be - // dropped. - PartialCompactTestReopenWithFault(kResetDeleteUnsyncedFiles, num_pre_sync, - num_post_sync); - NoWriteTestPreFault(); - NoWriteTestReopenWithFault(kResetDeleteUnsyncedFiles); - } - } while (ChangeOptions()); -} - -// Previous log file is not fsynced if sync is forced after log rolling. -TEST_P(FaultInjectionTest, WriteOptionSyncTest) { - test::SleepingBackgroundTask sleeping_task_low; - env_->SetBackgroundThreads(1, Env::HIGH); - // Block the job queue to prevent flush job from running. - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::HIGH); - sleeping_task_low.WaitUntilSleeping(); - - WriteOptions write_options; - write_options.sync = false; - - std::string key_space, value_space; - ASSERT_OK( - db_->Put(write_options, Key(1, &key_space), Value(1, &value_space))); - FlushOptions flush_options; - flush_options.wait = false; - ASSERT_OK(db_->Flush(flush_options)); - write_options.sync = true; - ASSERT_OK( - db_->Put(write_options, Key(2, &key_space), Value(2, &value_space))); - ASSERT_OK(db_->FlushWAL(false)); - - env_->SetFilesystemActive(false); - NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced); - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); - - ASSERT_OK(OpenDB()); - std::string val; - Value(2, &value_space); - ASSERT_OK(ReadValue(2, &val)); - ASSERT_EQ(value_space, val); - - Value(1, &value_space); - ASSERT_OK(ReadValue(1, &val)); - ASSERT_EQ(value_space, val); -} - -TEST_P(FaultInjectionTest, UninstalledCompaction) { - options_.target_file_size_base = 32 * 1024; - options_.write_buffer_size = 100 << 10; // 100KB - options_.level0_file_num_compaction_trigger = 6; - options_.level0_stop_writes_trigger = 1 << 10; - options_.level0_slowdown_writes_trigger = 1 << 10; - options_.max_background_compactions = 1; - OpenDB(); - - if (!sequential_order_) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"FaultInjectionTest::FaultTest:0", "DBImpl::BGWorkCompaction"}, - {"CompactionJob::Run():End", "FaultInjectionTest::FaultTest:1"}, - {"FaultInjectionTest::FaultTest:2", - "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}, - }); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - int kNumKeys = 1000; - Build(WriteOptions(), 0, kNumKeys); - FlushOptions flush_options; - flush_options.wait = true; - ASSERT_OK(db_->Flush(flush_options)); - ASSERT_OK(db_->Put(WriteOptions(), "", "")); - TEST_SYNC_POINT("FaultInjectionTest::FaultTest:0"); - TEST_SYNC_POINT("FaultInjectionTest::FaultTest:1"); - env_->SetFilesystemActive(false); - TEST_SYNC_POINT("FaultInjectionTest::FaultTest:2"); - CloseDB(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ResetDBState(kResetDropUnsyncedData); - - std::atomic opened(false); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::Open:Opened", [&](void* /*arg*/) { opened.store(true); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::BGWorkCompaction", - [&](void* /*arg*/) { ASSERT_TRUE(opened.load()); }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(OpenDB()); - ASSERT_OK(Verify(0, kNumKeys, FaultInjectionTest::kValExpectFound)); - WaitCompactionFinish(); - ASSERT_OK(Verify(0, kNumKeys, FaultInjectionTest::kValExpectFound)); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_P(FaultInjectionTest, ManualLogSyncTest) { - test::SleepingBackgroundTask sleeping_task_low; - env_->SetBackgroundThreads(1, Env::HIGH); - // Block the job queue to prevent flush job from running. - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, - Env::Priority::HIGH); - sleeping_task_low.WaitUntilSleeping(); - - WriteOptions write_options; - write_options.sync = false; - - std::string key_space, value_space; - ASSERT_OK( - db_->Put(write_options, Key(1, &key_space), Value(1, &value_space))); - FlushOptions flush_options; - flush_options.wait = false; - ASSERT_OK(db_->Flush(flush_options)); - ASSERT_OK( - db_->Put(write_options, Key(2, &key_space), Value(2, &value_space))); - ASSERT_OK(db_->FlushWAL(true)); - - env_->SetFilesystemActive(false); - NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced); - sleeping_task_low.WakeUp(); - sleeping_task_low.WaitUntilDone(); - - ASSERT_OK(OpenDB()); - std::string val; - Value(2, &value_space); - ASSERT_OK(ReadValue(2, &val)); - ASSERT_EQ(value_space, val); - - Value(1, &value_space); - ASSERT_OK(ReadValue(1, &val)); - ASSERT_EQ(value_space, val); -} - -TEST_P(FaultInjectionTest, WriteBatchWalTerminationTest) { - ReadOptions ro; - Options options = CurrentOptions(); - options.env = env_; - - WriteOptions wo; - wo.sync = true; - wo.disableWAL = false; - WriteBatch batch; - ASSERT_OK(batch.Put("cats", "dogs")); - batch.MarkWalTerminationPoint(); - ASSERT_OK(batch.Put("boys", "girls")); - ASSERT_OK(db_->Write(wo, &batch)); - - env_->SetFilesystemActive(false); - NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced); - ASSERT_OK(OpenDB()); - - std::string val; - ASSERT_OK(db_->Get(ro, "cats", &val)); - ASSERT_EQ("dogs", val); - ASSERT_EQ(db_->Get(ro, "boys", &val), Status::NotFound()); -} - -TEST_P(FaultInjectionTest, NoDuplicateTrailingEntries) { - auto fault_fs = std::make_shared(FileSystem::Default()); - fault_fs->EnableWriteErrorInjection(); - fault_fs->SetFilesystemDirectWritable(false); - const std::string file_name = NormalizePath(dbname_ + "/test_file"); - std::unique_ptr log_writer = nullptr; - constexpr uint64_t log_number = 0; - { - std::unique_ptr file; - const Status s = - fault_fs->NewWritableFile(file_name, FileOptions(), &file, nullptr); - ASSERT_OK(s); - std::unique_ptr fwriter( - new WritableFileWriter(std::move(file), file_name, FileOptions())); - log_writer.reset(new log::Writer(std::move(fwriter), log_number, - /*recycle_log_files=*/false)); - } - - fault_fs->SetRandomWriteError( - 0xdeadbeef, /*one_in=*/1, IOStatus::IOError("Injected IOError"), - /*inject_for_all_file_types=*/true, /*types=*/{}); - - { - VersionEdit edit; - edit.SetColumnFamily(0); - std::string buf; - assert(edit.EncodeTo(&buf)); - const Status s = log_writer->AddRecord(buf); - ASSERT_NOK(s); - } - - fault_fs->DisableWriteErrorInjection(); - - // Closing the log writer will cause WritableFileWriter::Close() and flush - // remaining data from its buffer to underlying file. - log_writer.reset(); - - { - std::unique_ptr file; - Status s = - fault_fs->NewSequentialFile(file_name, FileOptions(), &file, nullptr); - ASSERT_OK(s); - std::unique_ptr freader( - new SequentialFileReader(std::move(file), file_name)); - Status log_read_s; - class LogReporter : public log::Reader::Reporter { - public: - Status* status_; - explicit LogReporter(Status* _s) : status_(_s) {} - void Corruption(size_t /*bytes*/, const Status& _s) override { - if (status_->ok()) { - *status_ = _s; - } - } - } reporter(&log_read_s); - std::unique_ptr log_reader(new log::Reader( - nullptr, std::move(freader), &reporter, /*checksum=*/true, log_number)); - Slice record; - std::string data; - size_t count = 0; - while (log_reader->ReadRecord(&record, &data) && log_read_s.ok()) { - VersionEdit edit; - ASSERT_OK(edit.DecodeFrom(data)); - ++count; - } - // Verify that only one version edit exists in the file. - ASSERT_EQ(1, count); - } -} - -INSTANTIATE_TEST_CASE_P( - FaultTest, FaultInjectionTest, - ::testing::Values(std::make_tuple(false, kDefault, kEnd), - std::make_tuple(true, kDefault, kEnd))); - -INSTANTIATE_TEST_CASE_P( - FaultTest, FaultInjectionTestSplitted, - ::testing::Values(std::make_tuple(false, kDefault, kSyncWal), - std::make_tuple(true, kDefault, kSyncWal), - std::make_tuple(false, kSyncWal, kEnd), - std::make_tuple(true, kSyncWal, kEnd))); - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/file_indexer_test.cc b/db/file_indexer_test.cc deleted file mode 100644 index 5c82189ef..000000000 --- a/db/file_indexer_test.cc +++ /dev/null @@ -1,352 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/file_indexer.h" - -#include - -#include "db/dbformat.h" -#include "db/version_edit.h" -#include "port/stack_trace.h" -#include "rocksdb/comparator.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" - -namespace ROCKSDB_NAMESPACE { - -class IntComparator : public Comparator { - public: - int Compare(const Slice& a, const Slice& b) const override { - assert(a.size() == 8); - assert(b.size() == 8); - int64_t diff = *reinterpret_cast(a.data()) - - *reinterpret_cast(b.data()); - if (diff < 0) { - return -1; - } else if (diff == 0) { - return 0; - } else { - return 1; - } - } - - const char* Name() const override { return "IntComparator"; } - - void FindShortestSeparator(std::string* /*start*/, - const Slice& /*limit*/) const override {} - - void FindShortSuccessor(std::string* /*key*/) const override {} -}; - -class FileIndexerTest : public testing::Test { - public: - FileIndexerTest() - : kNumLevels(4), files(new std::vector[kNumLevels]) {} - - ~FileIndexerTest() override { - ClearFiles(); - delete[] files; - } - - void AddFile(int level, int64_t smallest, int64_t largest) { - auto* f = new FileMetaData(); - f->smallest = IntKey(smallest); - f->largest = IntKey(largest); - files[level].push_back(f); - } - - InternalKey IntKey(int64_t v) { - return InternalKey(Slice(reinterpret_cast(&v), 8), 0, kTypeValue); - } - - void ClearFiles() { - for (uint32_t i = 0; i < kNumLevels; ++i) { - for (auto* f : files[i]) { - delete f; - } - files[i].clear(); - } - } - - void GetNextLevelIndex(const uint32_t level, const uint32_t file_index, - const int cmp_smallest, const int cmp_largest, - int32_t* left_index, int32_t* right_index) { - *left_index = 100; - *right_index = 100; - indexer->GetNextLevelIndex(level, file_index, cmp_smallest, cmp_largest, - left_index, right_index); - } - - int32_t left = 100; - int32_t right = 100; - const uint32_t kNumLevels; - IntComparator ucmp; - FileIndexer* indexer; - - std::vector* files; -}; - -// Case 0: Empty -TEST_F(FileIndexerTest, Empty) { - Arena arena; - indexer = new FileIndexer(&ucmp); - indexer->UpdateIndex(&arena, 0, files); - delete indexer; -} - -// Case 1: no overlap, files are on the left of next level files -TEST_F(FileIndexerTest, no_overlap_left) { - Arena arena; - indexer = new FileIndexer(&ucmp); - // level 1 - AddFile(1, 100, 200); - AddFile(1, 300, 400); - AddFile(1, 500, 600); - // level 2 - AddFile(2, 1500, 1600); - AddFile(2, 1601, 1699); - AddFile(2, 1700, 1800); - // level 3 - AddFile(3, 2500, 2600); - AddFile(3, 2601, 2699); - AddFile(3, 2700, 2800); - indexer->UpdateIndex(&arena, kNumLevels, files); - for (uint32_t level = 1; level < 3; ++level) { - for (uint32_t f = 0; f < 3; ++f) { - GetNextLevelIndex(level, f, -1, -1, &left, &right); - ASSERT_EQ(0, left); - ASSERT_EQ(-1, right); - GetNextLevelIndex(level, f, 0, -1, &left, &right); - ASSERT_EQ(0, left); - ASSERT_EQ(-1, right); - GetNextLevelIndex(level, f, 1, -1, &left, &right); - ASSERT_EQ(0, left); - ASSERT_EQ(-1, right); - GetNextLevelIndex(level, f, 1, 0, &left, &right); - ASSERT_EQ(0, left); - ASSERT_EQ(-1, right); - GetNextLevelIndex(level, f, 1, 1, &left, &right); - ASSERT_EQ(0, left); - ASSERT_EQ(2, right); - } - } - delete indexer; - ClearFiles(); -} - -// Case 2: no overlap, files are on the right of next level files -TEST_F(FileIndexerTest, no_overlap_right) { - Arena arena; - indexer = new FileIndexer(&ucmp); - // level 1 - AddFile(1, 2100, 2200); - AddFile(1, 2300, 2400); - AddFile(1, 2500, 2600); - // level 2 - AddFile(2, 1500, 1600); - AddFile(2, 1501, 1699); - AddFile(2, 1700, 1800); - // level 3 - AddFile(3, 500, 600); - AddFile(3, 501, 699); - AddFile(3, 700, 800); - indexer->UpdateIndex(&arena, kNumLevels, files); - for (uint32_t level = 1; level < 3; ++level) { - for (uint32_t f = 0; f < 3; ++f) { - GetNextLevelIndex(level, f, -1, -1, &left, &right); - ASSERT_EQ(f == 0 ? 0 : 3, left); - ASSERT_EQ(2, right); - GetNextLevelIndex(level, f, 0, -1, &left, &right); - ASSERT_EQ(3, left); - ASSERT_EQ(2, right); - GetNextLevelIndex(level, f, 1, -1, &left, &right); - ASSERT_EQ(3, left); - ASSERT_EQ(2, right); - GetNextLevelIndex(level, f, 1, -1, &left, &right); - ASSERT_EQ(3, left); - ASSERT_EQ(2, right); - GetNextLevelIndex(level, f, 1, 0, &left, &right); - ASSERT_EQ(3, left); - ASSERT_EQ(2, right); - GetNextLevelIndex(level, f, 1, 1, &left, &right); - ASSERT_EQ(3, left); - ASSERT_EQ(2, right); - } - } - delete indexer; -} - -// Case 3: empty L2 -TEST_F(FileIndexerTest, empty_L2) { - Arena arena; - indexer = new FileIndexer(&ucmp); - for (uint32_t i = 1; i < kNumLevels; ++i) { - ASSERT_EQ(0U, indexer->LevelIndexSize(i)); - } - // level 1 - AddFile(1, 2100, 2200); - AddFile(1, 2300, 2400); - AddFile(1, 2500, 2600); - // level 3 - AddFile(3, 500, 600); - AddFile(3, 501, 699); - AddFile(3, 700, 800); - indexer->UpdateIndex(&arena, kNumLevels, files); - for (uint32_t f = 0; f < 3; ++f) { - GetNextLevelIndex(1, f, -1, -1, &left, &right); - ASSERT_EQ(0, left); - ASSERT_EQ(-1, right); - GetNextLevelIndex(1, f, 0, -1, &left, &right); - ASSERT_EQ(0, left); - ASSERT_EQ(-1, right); - GetNextLevelIndex(1, f, 1, -1, &left, &right); - ASSERT_EQ(0, left); - ASSERT_EQ(-1, right); - GetNextLevelIndex(1, f, 1, -1, &left, &right); - ASSERT_EQ(0, left); - ASSERT_EQ(-1, right); - GetNextLevelIndex(1, f, 1, 0, &left, &right); - ASSERT_EQ(0, left); - ASSERT_EQ(-1, right); - GetNextLevelIndex(1, f, 1, 1, &left, &right); - ASSERT_EQ(0, left); - ASSERT_EQ(-1, right); - } - delete indexer; - ClearFiles(); -} - -// Case 4: mixed -TEST_F(FileIndexerTest, mixed) { - Arena arena; - indexer = new FileIndexer(&ucmp); - // level 1 - AddFile(1, 100, 200); - AddFile(1, 250, 400); - AddFile(1, 450, 500); - // level 2 - AddFile(2, 100, 150); // 0 - AddFile(2, 200, 250); // 1 - AddFile(2, 251, 300); // 2 - AddFile(2, 301, 350); // 3 - AddFile(2, 500, 600); // 4 - // level 3 - AddFile(3, 0, 50); - AddFile(3, 100, 200); - AddFile(3, 201, 250); - indexer->UpdateIndex(&arena, kNumLevels, files); - // level 1, 0 - GetNextLevelIndex(1, 0, -1, -1, &left, &right); - ASSERT_EQ(0, left); - ASSERT_EQ(0, right); - GetNextLevelIndex(1, 0, 0, -1, &left, &right); - ASSERT_EQ(0, left); - ASSERT_EQ(0, right); - GetNextLevelIndex(1, 0, 1, -1, &left, &right); - ASSERT_EQ(0, left); - ASSERT_EQ(1, right); - GetNextLevelIndex(1, 0, 1, 0, &left, &right); - ASSERT_EQ(1, left); - ASSERT_EQ(1, right); - GetNextLevelIndex(1, 0, 1, 1, &left, &right); - ASSERT_EQ(1, left); - ASSERT_EQ(4, right); - // level 1, 1 - GetNextLevelIndex(1, 1, -1, -1, &left, &right); - ASSERT_EQ(1, left); - ASSERT_EQ(1, right); - GetNextLevelIndex(1, 1, 0, -1, &left, &right); - ASSERT_EQ(1, left); - ASSERT_EQ(1, right); - GetNextLevelIndex(1, 1, 1, -1, &left, &right); - ASSERT_EQ(1, left); - ASSERT_EQ(3, right); - GetNextLevelIndex(1, 1, 1, 0, &left, &right); - ASSERT_EQ(4, left); - ASSERT_EQ(3, right); - GetNextLevelIndex(1, 1, 1, 1, &left, &right); - ASSERT_EQ(4, left); - ASSERT_EQ(4, right); - // level 1, 2 - GetNextLevelIndex(1, 2, -1, -1, &left, &right); - ASSERT_EQ(4, left); - ASSERT_EQ(3, right); - GetNextLevelIndex(1, 2, 0, -1, &left, &right); - ASSERT_EQ(4, left); - ASSERT_EQ(3, right); - GetNextLevelIndex(1, 2, 1, -1, &left, &right); - ASSERT_EQ(4, left); - ASSERT_EQ(4, right); - GetNextLevelIndex(1, 2, 1, 0, &left, &right); - ASSERT_EQ(4, left); - ASSERT_EQ(4, right); - GetNextLevelIndex(1, 2, 1, 1, &left, &right); - ASSERT_EQ(4, left); - ASSERT_EQ(4, right); - // level 2, 0 - GetNextLevelIndex(2, 0, -1, -1, &left, &right); - ASSERT_EQ(0, left); - ASSERT_EQ(1, right); - GetNextLevelIndex(2, 0, 0, -1, &left, &right); - ASSERT_EQ(1, left); - ASSERT_EQ(1, right); - GetNextLevelIndex(2, 0, 1, -1, &left, &right); - ASSERT_EQ(1, left); - ASSERT_EQ(1, right); - GetNextLevelIndex(2, 0, 1, 0, &left, &right); - ASSERT_EQ(1, left); - ASSERT_EQ(1, right); - GetNextLevelIndex(2, 0, 1, 1, &left, &right); - ASSERT_EQ(1, left); - ASSERT_EQ(2, right); - // level 2, 1 - GetNextLevelIndex(2, 1, -1, -1, &left, &right); - ASSERT_EQ(1, left); - ASSERT_EQ(1, right); - GetNextLevelIndex(2, 1, 0, -1, &left, &right); - ASSERT_EQ(1, left); - ASSERT_EQ(1, right); - GetNextLevelIndex(2, 1, 1, -1, &left, &right); - ASSERT_EQ(1, left); - ASSERT_EQ(2, right); - GetNextLevelIndex(2, 1, 1, 0, &left, &right); - ASSERT_EQ(2, left); - ASSERT_EQ(2, right); - GetNextLevelIndex(2, 1, 1, 1, &left, &right); - ASSERT_EQ(2, left); - ASSERT_EQ(2, right); - // level 2, [2 - 4], no overlap - for (uint32_t f = 2; f <= 4; ++f) { - GetNextLevelIndex(2, f, -1, -1, &left, &right); - ASSERT_EQ(f == 2 ? 2 : 3, left); - ASSERT_EQ(2, right); - GetNextLevelIndex(2, f, 0, -1, &left, &right); - ASSERT_EQ(3, left); - ASSERT_EQ(2, right); - GetNextLevelIndex(2, f, 1, -1, &left, &right); - ASSERT_EQ(3, left); - ASSERT_EQ(2, right); - GetNextLevelIndex(2, f, 1, 0, &left, &right); - ASSERT_EQ(3, left); - ASSERT_EQ(2, right); - GetNextLevelIndex(2, f, 1, 1, &left, &right); - ASSERT_EQ(3, left); - ASSERT_EQ(2, right); - } - delete indexer; - ClearFiles(); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/filename_test.cc b/db/filename_test.cc deleted file mode 100644 index 04c81b333..000000000 --- a/db/filename_test.cc +++ /dev/null @@ -1,241 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "file/filename.h" - -#include "db/dbformat.h" -#include "port/port.h" -#include "test_util/testharness.h" - -namespace ROCKSDB_NAMESPACE { - -class FileNameTest : public testing::Test {}; - -TEST_F(FileNameTest, Parse) { - Slice db; - FileType type; - uint64_t number; - - char kDefautInfoLogDir = 1; - char kDifferentInfoLogDir = 2; - char kNoCheckLogDir = 4; - char kAllMode = kDefautInfoLogDir | kDifferentInfoLogDir | kNoCheckLogDir; - - // Successful parses - static struct { - const char* fname; - uint64_t number; - FileType type; - char mode; - } cases[] = { - {"100.log", 100, kWalFile, kAllMode}, - {"0.log", 0, kWalFile, kAllMode}, - {"0.sst", 0, kTableFile, kAllMode}, - {"CURRENT", 0, kCurrentFile, kAllMode}, - {"LOCK", 0, kDBLockFile, kAllMode}, - {"MANIFEST-2", 2, kDescriptorFile, kAllMode}, - {"MANIFEST-7", 7, kDescriptorFile, kAllMode}, - {"METADB-2", 2, kMetaDatabase, kAllMode}, - {"METADB-7", 7, kMetaDatabase, kAllMode}, - {"LOG", 0, kInfoLogFile, kDefautInfoLogDir}, - {"LOG.old", 0, kInfoLogFile, kDefautInfoLogDir}, - {"LOG.old.6688", 6688, kInfoLogFile, kDefautInfoLogDir}, - {"rocksdb_dir_LOG", 0, kInfoLogFile, kDifferentInfoLogDir}, - {"rocksdb_dir_LOG.old", 0, kInfoLogFile, kDifferentInfoLogDir}, - {"rocksdb_dir_LOG.old.6688", 6688, kInfoLogFile, kDifferentInfoLogDir}, - {"18446744073709551615.log", 18446744073709551615ull, kWalFile, kAllMode}, - }; - for (char mode : {kDifferentInfoLogDir, kDefautInfoLogDir, kNoCheckLogDir}) { - for (unsigned int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { - InfoLogPrefix info_log_prefix(mode != kDefautInfoLogDir, "/rocksdb/dir"); - if (cases[i].mode & mode) { - std::string f = cases[i].fname; - if (mode == kNoCheckLogDir) { - ASSERT_TRUE(ParseFileName(f, &number, &type)) << f; - } else { - ASSERT_TRUE(ParseFileName(f, &number, info_log_prefix.prefix, &type)) - << f; - } - ASSERT_EQ(cases[i].type, type) << f; - ASSERT_EQ(cases[i].number, number) << f; - } - } - } - - // Errors - static const char* errors[] = {"", - "foo", - "foo-dx-100.log", - ".log", - "", - "manifest", - "CURREN", - "CURRENTX", - "MANIFES", - "MANIFEST", - "MANIFEST-", - "XMANIFEST-3", - "MANIFEST-3x", - "META", - "METADB", - "METADB-", - "XMETADB-3", - "METADB-3x", - "LOC", - "LOCKx", - "LO", - "LOGx", - "18446744073709551616.log", - "184467440737095516150.log", - "100", - "100.", - "100.lop"}; - for (unsigned int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) { - std::string f = errors[i]; - ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f; - }; -} - -TEST_F(FileNameTest, InfoLogFileName) { - std::string dbname = ("/data/rocksdb"); - std::string db_absolute_path; - ASSERT_OK(Env::Default()->GetAbsolutePath(dbname, &db_absolute_path)); - - ASSERT_EQ("/data/rocksdb/LOG", InfoLogFileName(dbname, db_absolute_path, "")); - ASSERT_EQ("/data/rocksdb/LOG.old.666", - OldInfoLogFileName(dbname, 666u, db_absolute_path, "")); - - ASSERT_EQ("/data/rocksdb_log/data_rocksdb_LOG", - InfoLogFileName(dbname, db_absolute_path, "/data/rocksdb_log")); - ASSERT_EQ( - "/data/rocksdb_log/data_rocksdb_LOG.old.666", - OldInfoLogFileName(dbname, 666u, db_absolute_path, "/data/rocksdb_log")); -} - -TEST_F(FileNameTest, Construction) { - uint64_t number; - FileType type; - std::string fname; - - fname = CurrentFileName("foo"); - ASSERT_EQ("foo/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); - ASSERT_EQ(0U, number); - ASSERT_EQ(kCurrentFile, type); - - fname = LockFileName("foo"); - ASSERT_EQ("foo/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); - ASSERT_EQ(0U, number); - ASSERT_EQ(kDBLockFile, type); - - fname = LogFileName("foo", 192); - ASSERT_EQ("foo/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); - ASSERT_EQ(192U, number); - ASSERT_EQ(kWalFile, type); - - fname = TableFileName({DbPath("bar", 0)}, 200, 0); - std::string fname1 = - TableFileName({DbPath("foo", 0), DbPath("bar", 0)}, 200, 1); - ASSERT_EQ(fname, fname1); - ASSERT_EQ("bar/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); - ASSERT_EQ(200U, number); - ASSERT_EQ(kTableFile, type); - - fname = DescriptorFileName("bar", 100); - ASSERT_EQ("bar/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); - ASSERT_EQ(100U, number); - ASSERT_EQ(kDescriptorFile, type); - - fname = TempFileName("tmp", 999); - ASSERT_EQ("tmp/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); - ASSERT_EQ(999U, number); - ASSERT_EQ(kTempFile, type); - - fname = MetaDatabaseName("met", 100); - ASSERT_EQ("met/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); - ASSERT_EQ(100U, number); - ASSERT_EQ(kMetaDatabase, type); -} - -TEST_F(FileNameTest, NormalizePath) { - // No leading slash - const std::string sep = std::string(1, kFilePathSeparator); - - std::string expected = "FOLDER" + sep + "filename.ext"; - std::string given = "FOLDER" + sep + "filename.ext"; - - ASSERT_EQ(expected, NormalizePath(given)); - - // Two chars /a - - expected = sep + "a"; - given = expected; - ASSERT_EQ(expected, NormalizePath(given)); - - // Two chars a/ - expected = "a" + sep; - given = expected; - ASSERT_EQ(expected, NormalizePath(given)); - - // Server only - expected = sep + sep + "a"; - given = expected; - ASSERT_EQ(expected, NormalizePath(given)); - - // Two slashes after character - expected = "a" + sep; - given = "a" + sep + sep; - - ASSERT_EQ(expected, NormalizePath(given)); - - // slash only / - expected = sep; - given = expected; - ASSERT_EQ(expected, NormalizePath(given)); - - // UNC only // - expected = sep; - given = sep + sep; - - ASSERT_EQ(expected, NormalizePath(given)); - - // 3 slashesy // - expected = sep + sep; - given = sep + sep + sep; - ASSERT_EQ(expected, NormalizePath(given)); - - // 3 slashes // - expected = sep + sep + "a" + sep; - given = sep + sep + sep + "a" + sep; - ASSERT_EQ(expected, NormalizePath(given)); - - // 2 separators in the middle - expected = "a" + sep + "b"; - given = "a" + sep + sep + "b"; - ASSERT_EQ(expected, NormalizePath(given)); - - // UNC with duplicate slashes - expected = sep + sep + "SERVER" + sep + "a" + sep + "b" + sep + "c"; - given = sep + sep + "SERVER" + sep + "a" + sep + sep + "b" + sep + "c"; - ASSERT_EQ(expected, NormalizePath(given)); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/flush_job_test.cc b/db/flush_job_test.cc deleted file mode 100644 index 72332fc3a..000000000 --- a/db/flush_job_test.cc +++ /dev/null @@ -1,743 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db/flush_job.h" - -#include -#include -#include -#include - -#include "db/blob/blob_index.h" -#include "db/column_family.h" -#include "db/db_impl/db_impl.h" -#include "db/version_set.h" -#include "file/writable_file_writer.h" -#include "rocksdb/cache.h" -#include "rocksdb/file_system.h" -#include "rocksdb/write_buffer_manager.h" -#include "table/mock_table.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/random.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -// TODO(icanadi) Mock out everything else: -// 1. VersionSet -// 2. Memtable -class FlushJobTestBase : public testing::Test { - protected: - FlushJobTestBase(std::string dbname, const Comparator* ucmp) - : env_(Env::Default()), - fs_(env_->GetFileSystem()), - dbname_(std::move(dbname)), - ucmp_(ucmp), - options_(), - db_options_(options_), - column_family_names_({kDefaultColumnFamilyName, "foo", "bar"}), - table_cache_(NewLRUCache(50000, 16)), - write_buffer_manager_(db_options_.db_write_buffer_size), - shutting_down_(false), - mock_table_factory_(new mock::MockTableFactory()) {} - - virtual ~FlushJobTestBase() { - if (getenv("KEEP_DB")) { - fprintf(stdout, "db is still in %s\n", dbname_.c_str()); - } else { - // destroy versions_ to release all file handles - versions_.reset(); - EXPECT_OK(DestroyDir(env_, dbname_)); - } - } - - void NewDB() { - ASSERT_OK(SetIdentityFile(env_, dbname_)); - VersionEdit new_db; - - new_db.SetLogNumber(0); - new_db.SetNextFile(2); - new_db.SetLastSequence(0); - - autovector new_cfs; - SequenceNumber last_seq = 1; - uint32_t cf_id = 1; - for (size_t i = 1; i != column_family_names_.size(); ++i) { - VersionEdit new_cf; - new_cf.AddColumnFamily(column_family_names_[i]); - new_cf.SetColumnFamily(cf_id++); - new_cf.SetComparatorName(ucmp_->Name()); - new_cf.SetLogNumber(0); - new_cf.SetNextFile(2); - new_cf.SetLastSequence(last_seq++); - new_cfs.emplace_back(new_cf); - } - - const std::string manifest = DescriptorFileName(dbname_, 1); - const auto& fs = env_->GetFileSystem(); - std::unique_ptr file_writer; - Status s = WritableFileWriter::Create( - fs, manifest, fs->OptimizeForManifestWrite(env_options_), &file_writer, - nullptr); - ASSERT_OK(s); - - { - log::Writer log(std::move(file_writer), 0, false); - std::string record; - new_db.EncodeTo(&record); - s = log.AddRecord(record); - ASSERT_OK(s); - - for (const auto& e : new_cfs) { - record.clear(); - e.EncodeTo(&record); - s = log.AddRecord(record); - ASSERT_OK(s); - } - } - ASSERT_OK(s); - // Make "CURRENT" file that points to the new manifest file. - s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); - ASSERT_OK(s); - } - - void SetUp() override { - EXPECT_OK(env_->CreateDirIfMissing(dbname_)); - - // TODO(icanadi) Remove this once we mock out VersionSet - NewDB(); - - db_options_.env = env_; - db_options_.fs = fs_; - db_options_.db_paths.emplace_back(dbname_, - std::numeric_limits::max()); - db_options_.statistics = CreateDBStatistics(); - - cf_options_.comparator = ucmp_; - - std::vector column_families; - cf_options_.table_factory = mock_table_factory_; - for (const auto& cf_name : column_family_names_) { - column_families.emplace_back(cf_name, cf_options_); - } - - versions_.reset( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); - EXPECT_OK(versions_->Recover(column_families, false)); - } - - Env* env_; - std::shared_ptr fs_; - std::string dbname_; - const Comparator* const ucmp_; - EnvOptions env_options_; - Options options_; - ImmutableDBOptions db_options_; - const std::vector column_family_names_; - std::shared_ptr table_cache_; - WriteController write_controller_; - WriteBufferManager write_buffer_manager_; - ColumnFamilyOptions cf_options_; - std::unique_ptr versions_; - InstrumentedMutex mutex_; - std::atomic shutting_down_; - std::shared_ptr mock_table_factory_; - - SeqnoToTimeMapping empty_seqno_to_time_mapping_; -}; - -class FlushJobTest : public FlushJobTestBase { - public: - FlushJobTest() - : FlushJobTestBase(test::PerThreadDBPath("flush_job_test"), - BytewiseComparator()) {} -}; - -TEST_F(FlushJobTest, Empty) { - JobContext job_context(0); - auto cfd = versions_->GetColumnFamilySet()->GetDefault(); - EventLogger event_logger(db_options_.info_log.get()); - SnapshotChecker* snapshot_checker = nullptr; // not relavant - FlushJob flush_job( - dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_, - *cfd->GetLatestMutableCFOptions(), - std::numeric_limits::max() /* memtable_id */, env_options_, - versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber, - snapshot_checker, &job_context, FlushReason::kTest, nullptr, nullptr, - nullptr, kNoCompression, nullptr, &event_logger, false, - true /* sync_output_directory */, true /* write_manifest */, - Env::Priority::USER, nullptr /*IOTracer*/, empty_seqno_to_time_mapping_); - { - InstrumentedMutexLock l(&mutex_); - flush_job.PickMemTable(); - ASSERT_OK(flush_job.Run()); - } - job_context.Clean(); -} - -TEST_F(FlushJobTest, NonEmpty) { - JobContext job_context(0); - auto cfd = versions_->GetColumnFamilySet()->GetDefault(); - auto new_mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(), - kMaxSequenceNumber); - new_mem->Ref(); - auto inserted_keys = mock::MakeMockFile(); - // Test data: - // seqno [ 1, 2 ... 8998, 8999, 9000, 9001, 9002 ... 9999 ] - // key [ 1001, 1002 ... 9998, 9999, 0, 1, 2 ... 999 ] - // range-delete "9995" -> "9999" at seqno 10000 - // blob references with seqnos 10001..10006 - for (int i = 1; i < 10000; ++i) { - std::string key(std::to_string((i + 1000) % 10000)); - std::string value("value" + key); - ASSERT_OK(new_mem->Add(SequenceNumber(i), kTypeValue, key, value, - nullptr /* kv_prot_info */)); - if ((i + 1000) % 10000 < 9995) { - InternalKey internal_key(key, SequenceNumber(i), kTypeValue); - inserted_keys.push_back({internal_key.Encode().ToString(), value}); - } - } - - { - ASSERT_OK(new_mem->Add(SequenceNumber(10000), kTypeRangeDeletion, "9995", - "9999a", nullptr /* kv_prot_info */)); - InternalKey internal_key("9995", SequenceNumber(10000), kTypeRangeDeletion); - inserted_keys.push_back({internal_key.Encode().ToString(), "9999a"}); - } - - // Note: the first two blob references will not be considered when resolving - // the oldest blob file referenced (the first one is inlined TTL, while the - // second one is TTL and thus points to a TTL blob file). - constexpr std::array blob_file_numbers{ - {kInvalidBlobFileNumber, 5, 103, 17, 102, 101}}; - for (size_t i = 0; i < blob_file_numbers.size(); ++i) { - std::string key(std::to_string(i + 10001)); - std::string blob_index; - if (i == 0) { - BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 1234567890ULL, - "foo"); - } else if (i == 1) { - BlobIndex::EncodeBlobTTL(&blob_index, /* expiration */ 1234567890ULL, - blob_file_numbers[i], /* offset */ i << 10, - /* size */ i << 20, kNoCompression); - } else { - BlobIndex::EncodeBlob(&blob_index, blob_file_numbers[i], - /* offset */ i << 10, /* size */ i << 20, - kNoCompression); - } - - const SequenceNumber seq(i + 10001); - ASSERT_OK(new_mem->Add(seq, kTypeBlobIndex, key, blob_index, - nullptr /* kv_prot_info */)); - - InternalKey internal_key(key, seq, kTypeBlobIndex); - inserted_keys.push_back({internal_key.Encode().ToString(), blob_index}); - } - mock::SortKVVector(&inserted_keys); - - autovector to_delete; - new_mem->ConstructFragmentedRangeTombstones(); - cfd->imm()->Add(new_mem, &to_delete); - for (auto& m : to_delete) { - delete m; - } - - EventLogger event_logger(db_options_.info_log.get()); - SnapshotChecker* snapshot_checker = nullptr; // not relavant - FlushJob flush_job( - dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_, - *cfd->GetLatestMutableCFOptions(), - std::numeric_limits::max() /* memtable_id */, env_options_, - versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber, - snapshot_checker, &job_context, FlushReason::kTest, nullptr, nullptr, - nullptr, kNoCompression, db_options_.statistics.get(), &event_logger, - true, true /* sync_output_directory */, true /* write_manifest */, - Env::Priority::USER, nullptr /*IOTracer*/, empty_seqno_to_time_mapping_); - - HistogramData hist; - FileMetaData file_meta; - mutex_.Lock(); - flush_job.PickMemTable(); - ASSERT_OK(flush_job.Run(nullptr, &file_meta)); - mutex_.Unlock(); - db_options_.statistics->histogramData(FLUSH_TIME, &hist); - ASSERT_GT(hist.average, 0.0); - - ASSERT_EQ(std::to_string(0), file_meta.smallest.user_key().ToString()); - ASSERT_EQ("9999a", file_meta.largest.user_key().ToString()); - ASSERT_EQ(1, file_meta.fd.smallest_seqno); - ASSERT_EQ(10006, file_meta.fd.largest_seqno); - ASSERT_EQ(17, file_meta.oldest_blob_file_number); - mock_table_factory_->AssertSingleFile(inserted_keys); - job_context.Clean(); -} - -TEST_F(FlushJobTest, FlushMemTablesSingleColumnFamily) { - const size_t num_mems = 2; - const size_t num_mems_to_flush = 1; - const size_t num_keys_per_table = 100; - JobContext job_context(0); - ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetDefault(); - std::vector memtable_ids; - std::vector new_mems; - for (size_t i = 0; i != num_mems; ++i) { - MemTable* mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(), - kMaxSequenceNumber); - mem->SetID(i); - mem->Ref(); - new_mems.emplace_back(mem); - memtable_ids.push_back(mem->GetID()); - - for (size_t j = 0; j < num_keys_per_table; ++j) { - std::string key(std::to_string(j + i * num_keys_per_table)); - std::string value("value" + key); - ASSERT_OK(mem->Add(SequenceNumber(j + i * num_keys_per_table), kTypeValue, - key, value, nullptr /* kv_prot_info */)); - } - } - - autovector to_delete; - for (auto mem : new_mems) { - mem->ConstructFragmentedRangeTombstones(); - cfd->imm()->Add(mem, &to_delete); - } - - EventLogger event_logger(db_options_.info_log.get()); - SnapshotChecker* snapshot_checker = nullptr; // not relavant - - assert(memtable_ids.size() == num_mems); - uint64_t smallest_memtable_id = memtable_ids.front(); - uint64_t flush_memtable_id = smallest_memtable_id + num_mems_to_flush - 1; - FlushJob flush_job( - dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_, - *cfd->GetLatestMutableCFOptions(), flush_memtable_id, env_options_, - versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber, - snapshot_checker, &job_context, FlushReason::kTest, nullptr, nullptr, - nullptr, kNoCompression, db_options_.statistics.get(), &event_logger, - true, true /* sync_output_directory */, true /* write_manifest */, - Env::Priority::USER, nullptr /*IOTracer*/, empty_seqno_to_time_mapping_); - HistogramData hist; - FileMetaData file_meta; - mutex_.Lock(); - flush_job.PickMemTable(); - ASSERT_OK(flush_job.Run(nullptr /* prep_tracker */, &file_meta)); - mutex_.Unlock(); - db_options_.statistics->histogramData(FLUSH_TIME, &hist); - ASSERT_GT(hist.average, 0.0); - - ASSERT_EQ(std::to_string(0), file_meta.smallest.user_key().ToString()); - ASSERT_EQ("99", file_meta.largest.user_key().ToString()); - ASSERT_EQ(0, file_meta.fd.smallest_seqno); - ASSERT_EQ(SequenceNumber(num_mems_to_flush * num_keys_per_table - 1), - file_meta.fd.largest_seqno); - ASSERT_EQ(kInvalidBlobFileNumber, file_meta.oldest_blob_file_number); - - for (auto m : to_delete) { - delete m; - } - to_delete.clear(); - job_context.Clean(); -} - -TEST_F(FlushJobTest, FlushMemtablesMultipleColumnFamilies) { - autovector all_cfds; - for (auto cfd : *versions_->GetColumnFamilySet()) { - all_cfds.push_back(cfd); - } - const std::vector num_memtables = {2, 1, 3}; - assert(num_memtables.size() == column_family_names_.size()); - const size_t num_keys_per_memtable = 1000; - JobContext job_context(0); - std::vector memtable_ids; - std::vector smallest_seqs; - std::vector largest_seqs; - autovector to_delete; - SequenceNumber curr_seqno = 0; - size_t k = 0; - for (auto cfd : all_cfds) { - smallest_seqs.push_back(curr_seqno); - for (size_t i = 0; i != num_memtables[k]; ++i) { - MemTable* mem = cfd->ConstructNewMemtable( - *cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber); - mem->SetID(i); - mem->Ref(); - - for (size_t j = 0; j != num_keys_per_memtable; ++j) { - std::string key(std::to_string(j + i * num_keys_per_memtable)); - std::string value("value" + key); - ASSERT_OK(mem->Add(curr_seqno++, kTypeValue, key, value, - nullptr /* kv_prot_info */)); - } - mem->ConstructFragmentedRangeTombstones(); - cfd->imm()->Add(mem, &to_delete); - } - largest_seqs.push_back(curr_seqno - 1); - memtable_ids.push_back(num_memtables[k++] - 1); - } - - EventLogger event_logger(db_options_.info_log.get()); - SnapshotChecker* snapshot_checker = nullptr; // not relevant - std::vector> flush_jobs; - k = 0; - for (auto cfd : all_cfds) { - std::vector snapshot_seqs; - flush_jobs.emplace_back(new FlushJob( - dbname_, cfd, db_options_, *cfd->GetLatestMutableCFOptions(), - memtable_ids[k], env_options_, versions_.get(), &mutex_, - &shutting_down_, snapshot_seqs, kMaxSequenceNumber, snapshot_checker, - &job_context, FlushReason::kTest, nullptr, nullptr, nullptr, - kNoCompression, db_options_.statistics.get(), &event_logger, true, - false /* sync_output_directory */, false /* write_manifest */, - Env::Priority::USER, nullptr /*IOTracer*/, - empty_seqno_to_time_mapping_)); - k++; - } - HistogramData hist; - std::vector file_metas; - // Call reserve to avoid auto-resizing - file_metas.reserve(flush_jobs.size()); - mutex_.Lock(); - for (auto& job : flush_jobs) { - job->PickMemTable(); - } - for (auto& job : flush_jobs) { - FileMetaData meta; - // Run will release and re-acquire mutex - ASSERT_OK(job->Run(nullptr /**/, &meta)); - file_metas.emplace_back(meta); - } - autovector file_meta_ptrs; - for (auto& meta : file_metas) { - file_meta_ptrs.push_back(&meta); - } - autovector*> mems_list; - for (size_t i = 0; i != all_cfds.size(); ++i) { - const auto& mems = flush_jobs[i]->GetMemTables(); - mems_list.push_back(&mems); - } - autovector mutable_cf_options_list; - for (auto cfd : all_cfds) { - mutable_cf_options_list.push_back(cfd->GetLatestMutableCFOptions()); - } - autovector>*> - committed_flush_jobs_info; - for (auto& job : flush_jobs) { - committed_flush_jobs_info.push_back(job->GetCommittedFlushJobsInfo()); - } - - Status s = InstallMemtableAtomicFlushResults( - nullptr /* imm_lists */, all_cfds, mutable_cf_options_list, mems_list, - versions_.get(), nullptr /* prep_tracker */, &mutex_, file_meta_ptrs, - committed_flush_jobs_info, &job_context.memtables_to_free, - nullptr /* db_directory */, nullptr /* log_buffer */); - ASSERT_OK(s); - - mutex_.Unlock(); - db_options_.statistics->histogramData(FLUSH_TIME, &hist); - ASSERT_GT(hist.average, 0.0); - k = 0; - for (const auto& file_meta : file_metas) { - ASSERT_EQ(std::to_string(0), file_meta.smallest.user_key().ToString()); - ASSERT_EQ("999", file_meta.largest.user_key() - .ToString()); // max key by bytewise comparator - ASSERT_EQ(smallest_seqs[k], file_meta.fd.smallest_seqno); - ASSERT_EQ(largest_seqs[k], file_meta.fd.largest_seqno); - // Verify that imm is empty - ASSERT_EQ(std::numeric_limits::max(), - all_cfds[k]->imm()->GetEarliestMemTableID()); - ASSERT_EQ(0, all_cfds[k]->imm()->GetLatestMemTableID()); - ++k; - } - - for (auto m : to_delete) { - delete m; - } - to_delete.clear(); - job_context.Clean(); -} - -TEST_F(FlushJobTest, Snapshots) { - JobContext job_context(0); - auto cfd = versions_->GetColumnFamilySet()->GetDefault(); - auto new_mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(), - kMaxSequenceNumber); - - std::set snapshots_set; - int keys = 10000; - int max_inserts_per_keys = 8; - - Random rnd(301); - for (int i = 0; i < keys / 2; ++i) { - snapshots_set.insert(rnd.Uniform(keys * (max_inserts_per_keys / 2)) + 1); - } - // set has already removed the duplicate snapshots - std::vector snapshots(snapshots_set.begin(), - snapshots_set.end()); - - new_mem->Ref(); - SequenceNumber current_seqno = 0; - auto inserted_keys = mock::MakeMockFile(); - for (int i = 1; i < keys; ++i) { - std::string key(std::to_string(i)); - int insertions = rnd.Uniform(max_inserts_per_keys); - for (int j = 0; j < insertions; ++j) { - std::string value(rnd.HumanReadableString(10)); - auto seqno = ++current_seqno; - ASSERT_OK(new_mem->Add(SequenceNumber(seqno), kTypeValue, key, value, - nullptr /* kv_prot_info */)); - // a key is visible only if: - // 1. it's the last one written (j == insertions - 1) - // 2. there's a snapshot pointing at it - bool visible = (j == insertions - 1) || - (snapshots_set.find(seqno) != snapshots_set.end()); - if (visible) { - InternalKey internal_key(key, seqno, kTypeValue); - inserted_keys.push_back({internal_key.Encode().ToString(), value}); - } - } - } - mock::SortKVVector(&inserted_keys); - - autovector to_delete; - new_mem->ConstructFragmentedRangeTombstones(); - cfd->imm()->Add(new_mem, &to_delete); - for (auto& m : to_delete) { - delete m; - } - - EventLogger event_logger(db_options_.info_log.get()); - SnapshotChecker* snapshot_checker = nullptr; // not relavant - FlushJob flush_job( - dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_, - *cfd->GetLatestMutableCFOptions(), - std::numeric_limits::max() /* memtable_id */, env_options_, - versions_.get(), &mutex_, &shutting_down_, snapshots, kMaxSequenceNumber, - snapshot_checker, &job_context, FlushReason::kTest, nullptr, nullptr, - nullptr, kNoCompression, db_options_.statistics.get(), &event_logger, - true, true /* sync_output_directory */, true /* write_manifest */, - Env::Priority::USER, nullptr /*IOTracer*/, empty_seqno_to_time_mapping_); - mutex_.Lock(); - flush_job.PickMemTable(); - ASSERT_OK(flush_job.Run()); - mutex_.Unlock(); - mock_table_factory_->AssertSingleFile(inserted_keys); - HistogramData hist; - db_options_.statistics->histogramData(FLUSH_TIME, &hist); - ASSERT_GT(hist.average, 0.0); - job_context.Clean(); -} - -TEST_F(FlushJobTest, GetRateLimiterPriorityForWrite) { - // Prepare a FlushJob that flush MemTables of Single Column Family. - const size_t num_mems = 2; - const size_t num_mems_to_flush = 1; - const size_t num_keys_per_table = 100; - JobContext job_context(0); - ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetDefault(); - std::vector memtable_ids; - std::vector new_mems; - for (size_t i = 0; i != num_mems; ++i) { - MemTable* mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(), - kMaxSequenceNumber); - mem->SetID(i); - mem->Ref(); - new_mems.emplace_back(mem); - memtable_ids.push_back(mem->GetID()); - - for (size_t j = 0; j < num_keys_per_table; ++j) { - std::string key(std::to_string(j + i * num_keys_per_table)); - std::string value("value" + key); - ASSERT_OK(mem->Add(SequenceNumber(j + i * num_keys_per_table), kTypeValue, - key, value, nullptr /* kv_prot_info */)); - } - } - - autovector to_delete; - for (auto mem : new_mems) { - mem->ConstructFragmentedRangeTombstones(); - cfd->imm()->Add(mem, &to_delete); - } - - EventLogger event_logger(db_options_.info_log.get()); - SnapshotChecker* snapshot_checker = nullptr; // not relavant - - assert(memtable_ids.size() == num_mems); - uint64_t smallest_memtable_id = memtable_ids.front(); - uint64_t flush_memtable_id = smallest_memtable_id + num_mems_to_flush - 1; - FlushJob flush_job( - dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_, - *cfd->GetLatestMutableCFOptions(), flush_memtable_id, env_options_, - versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber, - snapshot_checker, &job_context, FlushReason::kTest, nullptr, nullptr, - nullptr, kNoCompression, db_options_.statistics.get(), &event_logger, - true, true /* sync_output_directory */, true /* write_manifest */, - Env::Priority::USER, nullptr /*IOTracer*/, empty_seqno_to_time_mapping_); - - // When the state from WriteController is normal. - ASSERT_EQ(flush_job.GetRateLimiterPriorityForWrite(), Env::IO_HIGH); - - WriteController* write_controller = - flush_job.versions_->GetColumnFamilySet()->write_controller(); - - { - // When the state from WriteController is Delayed. - std::unique_ptr delay_token = - write_controller->GetDelayToken(1000000); - ASSERT_EQ(flush_job.GetRateLimiterPriorityForWrite(), Env::IO_USER); - } - - { - // When the state from WriteController is Stopped. - std::unique_ptr stop_token = - write_controller->GetStopToken(); - ASSERT_EQ(flush_job.GetRateLimiterPriorityForWrite(), Env::IO_USER); - } -} - -class FlushJobTimestampTest : public FlushJobTestBase { - public: - FlushJobTimestampTest() - : FlushJobTestBase(test::PerThreadDBPath("flush_job_ts_gc_test"), - test::BytewiseComparatorWithU64TsWrapper()) {} - - void AddKeyValueToMemtable(MemTable* memtable, std::string key, uint64_t ts, - SequenceNumber seq, ValueType value_type, - Slice value) { - std::string key_str(std::move(key)); - PutFixed64(&key_str, ts); - ASSERT_OK(memtable->Add(seq, value_type, key_str, value, - nullptr /* kv_prot_info */)); - } - - protected: - static constexpr uint64_t kStartTs = 10; - static constexpr SequenceNumber kStartSeq = 0; - SequenceNumber curr_seq_{kStartSeq}; - std::atomic curr_ts_{kStartTs}; -}; - -TEST_F(FlushJobTimestampTest, AllKeysExpired) { - ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetDefault(); - autovector to_delete; - - { - MemTable* new_mem = cfd->ConstructNewMemtable( - *cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber); - new_mem->Ref(); - for (int i = 0; i < 100; ++i) { - uint64_t ts = curr_ts_.fetch_add(1); - SequenceNumber seq = (curr_seq_++); - AddKeyValueToMemtable(new_mem, test::EncodeInt(0), ts, seq, - ValueType::kTypeValue, "0_value"); - } - uint64_t ts = curr_ts_.fetch_add(1); - SequenceNumber seq = (curr_seq_++); - AddKeyValueToMemtable(new_mem, test::EncodeInt(0), ts, seq, - ValueType::kTypeDeletionWithTimestamp, ""); - new_mem->ConstructFragmentedRangeTombstones(); - cfd->imm()->Add(new_mem, &to_delete); - } - - std::vector snapshots; - constexpr SnapshotChecker* const snapshot_checker = nullptr; - JobContext job_context(0); - EventLogger event_logger(db_options_.info_log.get()); - std::string full_history_ts_low; - PutFixed64(&full_history_ts_low, std::numeric_limits::max()); - FlushJob flush_job( - dbname_, cfd, db_options_, *cfd->GetLatestMutableCFOptions(), - std::numeric_limits::max() /* memtable_id */, env_options_, - versions_.get(), &mutex_, &shutting_down_, snapshots, kMaxSequenceNumber, - snapshot_checker, &job_context, FlushReason::kTest, nullptr, nullptr, - nullptr, kNoCompression, db_options_.statistics.get(), &event_logger, - true, true /* sync_output_directory */, true /* write_manifest */, - Env::Priority::USER, nullptr /*IOTracer*/, empty_seqno_to_time_mapping_, - /*db_id=*/"", - /*db_session_id=*/"", full_history_ts_low); - - FileMetaData fmeta; - mutex_.Lock(); - flush_job.PickMemTable(); - ASSERT_OK(flush_job.Run(/*prep_tracker=*/nullptr, &fmeta)); - mutex_.Unlock(); - - { - std::string key = test::EncodeInt(0); - key.append(test::EncodeInt(curr_ts_.load(std::memory_order_relaxed) - 1)); - InternalKey ikey(key, curr_seq_ - 1, ValueType::kTypeDeletionWithTimestamp); - ASSERT_EQ(ikey.Encode(), fmeta.smallest.Encode()); - ASSERT_EQ(ikey.Encode(), fmeta.largest.Encode()); - } - - job_context.Clean(); - ASSERT_TRUE(to_delete.empty()); -} - -TEST_F(FlushJobTimestampTest, NoKeyExpired) { - ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetDefault(); - autovector to_delete; - - { - MemTable* new_mem = cfd->ConstructNewMemtable( - *cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber); - new_mem->Ref(); - for (int i = 0; i < 100; ++i) { - uint64_t ts = curr_ts_.fetch_add(1); - SequenceNumber seq = (curr_seq_++); - AddKeyValueToMemtable(new_mem, test::EncodeInt(0), ts, seq, - ValueType::kTypeValue, "0_value"); - } - new_mem->ConstructFragmentedRangeTombstones(); - cfd->imm()->Add(new_mem, &to_delete); - } - - std::vector snapshots; - SnapshotChecker* const snapshot_checker = nullptr; - JobContext job_context(0); - EventLogger event_logger(db_options_.info_log.get()); - std::string full_history_ts_low; - PutFixed64(&full_history_ts_low, 0); - FlushJob flush_job( - dbname_, cfd, db_options_, *cfd->GetLatestMutableCFOptions(), - std::numeric_limits::max() /* memtable_id */, env_options_, - versions_.get(), &mutex_, &shutting_down_, snapshots, kMaxSequenceNumber, - snapshot_checker, &job_context, FlushReason::kTest, nullptr, nullptr, - nullptr, kNoCompression, db_options_.statistics.get(), &event_logger, - true, true /* sync_output_directory */, true /* write_manifest */, - Env::Priority::USER, nullptr /*IOTracer*/, empty_seqno_to_time_mapping_, - /*db_id=*/"", - /*db_session_id=*/"", full_history_ts_low); - - FileMetaData fmeta; - mutex_.Lock(); - flush_job.PickMemTable(); - ASSERT_OK(flush_job.Run(/*prep_tracker=*/nullptr, &fmeta)); - mutex_.Unlock(); - - { - std::string ukey = test::EncodeInt(0); - std::string smallest_key = - ukey + test::EncodeInt(curr_ts_.load(std::memory_order_relaxed) - 1); - std::string largest_key = ukey + test::EncodeInt(kStartTs); - InternalKey smallest(smallest_key, curr_seq_ - 1, ValueType::kTypeValue); - InternalKey largest(largest_key, kStartSeq, ValueType::kTypeValue); - ASSERT_EQ(smallest.Encode(), fmeta.smallest.Encode()); - ASSERT_EQ(largest.Encode(), fmeta.largest.Encode()); - } - job_context.Clean(); - ASSERT_TRUE(to_delete.empty()); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/import_column_family_test.cc b/db/import_column_family_test.cc deleted file mode 100644 index c7940a374..000000000 --- a/db/import_column_family_test.cc +++ /dev/null @@ -1,746 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - - -#include - -#include "db/db_test_util.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/sst_file_writer.h" -#include "test_util/testutil.h" -#include "util/random.h" - -namespace ROCKSDB_NAMESPACE { - -class ImportColumnFamilyTest : public DBTestBase { - public: - ImportColumnFamilyTest() - : DBTestBase("import_column_family_test", /*env_do_fsync=*/true) { - sst_files_dir_ = dbname_ + "/sst_files/"; - export_files_dir_ = test::PerThreadDBPath(env_, "export"); - DestroyAndRecreateExternalSSTFilesDir(); - import_cfh_ = nullptr; - import_cfh2_ = nullptr; - metadata_ptr_ = nullptr; - } - - ~ImportColumnFamilyTest() { - if (import_cfh_) { - EXPECT_OK(db_->DropColumnFamily(import_cfh_)); - EXPECT_OK(db_->DestroyColumnFamilyHandle(import_cfh_)); - import_cfh_ = nullptr; - } - if (import_cfh2_) { - EXPECT_OK(db_->DropColumnFamily(import_cfh2_)); - EXPECT_OK(db_->DestroyColumnFamilyHandle(import_cfh2_)); - import_cfh2_ = nullptr; - } - if (metadata_ptr_) { - delete metadata_ptr_; - metadata_ptr_ = nullptr; - } - EXPECT_OK(DestroyDir(env_, sst_files_dir_)); - EXPECT_OK(DestroyDir(env_, export_files_dir_)); - } - - void DestroyAndRecreateExternalSSTFilesDir() { - EXPECT_OK(DestroyDir(env_, sst_files_dir_)); - EXPECT_OK(env_->CreateDir(sst_files_dir_)); - EXPECT_OK(DestroyDir(env_, export_files_dir_)); - } - - LiveFileMetaData LiveFileMetaDataInit(std::string name, std::string path, - int level, - SequenceNumber smallest_seqno, - SequenceNumber largest_seqno) { - LiveFileMetaData metadata; - metadata.name = name; - metadata.db_path = path; - metadata.smallest_seqno = smallest_seqno; - metadata.largest_seqno = largest_seqno; - metadata.level = level; - return metadata; - } - - protected: - std::string sst_files_dir_; - std::string export_files_dir_; - ColumnFamilyHandle* import_cfh_; - ColumnFamilyHandle* import_cfh2_; - ExportImportFilesMetaData* metadata_ptr_; -}; - -TEST_F(ImportColumnFamilyTest, ImportSSTFileWriterFiles) { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"koko"}, options); - - SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]); - SstFileWriter sfw_unknown(EnvOptions(), options); - - // cf1.sst - const std::string cf1_sst_name = "cf1.sst"; - const std::string cf1_sst = sst_files_dir_ + cf1_sst_name; - ASSERT_OK(sfw_cf1.Open(cf1_sst)); - ASSERT_OK(sfw_cf1.Put("K1", "V1")); - ASSERT_OK(sfw_cf1.Put("K2", "V2")); - ASSERT_OK(sfw_cf1.Finish()); - - // cf_unknown.sst - const std::string unknown_sst_name = "cf_unknown.sst"; - const std::string unknown_sst = sst_files_dir_ + unknown_sst_name; - ASSERT_OK(sfw_unknown.Open(unknown_sst)); - ASSERT_OK(sfw_unknown.Put("K3", "V1")); - ASSERT_OK(sfw_unknown.Put("K4", "V2")); - ASSERT_OK(sfw_unknown.Finish()); - - { - // Import sst file corresponding to cf1 onto a new cf and verify - ExportImportFilesMetaData metadata; - metadata.files.push_back( - LiveFileMetaDataInit(cf1_sst_name, sst_files_dir_, 0, 10, 19)); - metadata.db_comparator_name = options.comparator->Name(); - - ASSERT_OK(db_->CreateColumnFamilyWithImport( - options, "toto", ImportColumnFamilyOptions(), metadata, &import_cfh_)); - ASSERT_NE(import_cfh_, nullptr); - - std::string value; - ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, "K1", &value)); - ASSERT_EQ(value, "V1"); - ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, "K2", &value)); - ASSERT_EQ(value, "V2"); - ASSERT_OK(db_->DropColumnFamily(import_cfh_)); - ASSERT_OK(db_->DestroyColumnFamilyHandle(import_cfh_)); - import_cfh_ = nullptr; - } - - { - // Import sst file corresponding to unknown cf onto a new cf and verify - ExportImportFilesMetaData metadata; - metadata.files.push_back( - LiveFileMetaDataInit(unknown_sst_name, sst_files_dir_, 0, 20, 29)); - metadata.db_comparator_name = options.comparator->Name(); - - ASSERT_OK(db_->CreateColumnFamilyWithImport( - options, "yoyo", ImportColumnFamilyOptions(), metadata, &import_cfh_)); - ASSERT_NE(import_cfh_, nullptr); - - std::string value; - ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, "K3", &value)); - ASSERT_EQ(value, "V1"); - ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, "K4", &value)); - ASSERT_EQ(value, "V2"); - } - EXPECT_OK(db_->DestroyColumnFamilyHandle(import_cfh_)); - import_cfh_ = nullptr; - - // verify sst unique id during reopen - options.verify_sst_unique_id_in_manifest = true; - ReopenWithColumnFamilies({"default", "koko", "yoyo"}, options); -} - -TEST_F(ImportColumnFamilyTest, ImportSSTFileWriterFilesWithOverlap) { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"koko"}, options); - - SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]); - - // file3.sst - const std::string file3_sst_name = "file3.sst"; - const std::string file3_sst = sst_files_dir_ + file3_sst_name; - ASSERT_OK(sfw_cf1.Open(file3_sst)); - for (int i = 0; i < 100; ++i) { - ASSERT_OK(sfw_cf1.Put(Key(i), Key(i) + "_val")); - } - ASSERT_OK(sfw_cf1.Finish()); - - // file2.sst - const std::string file2_sst_name = "file2.sst"; - const std::string file2_sst = sst_files_dir_ + file2_sst_name; - ASSERT_OK(sfw_cf1.Open(file2_sst)); - for (int i = 0; i < 100; i += 2) { - ASSERT_OK(sfw_cf1.Put(Key(i), Key(i) + "_overwrite1")); - } - ASSERT_OK(sfw_cf1.Finish()); - - // file1a.sst - const std::string file1a_sst_name = "file1a.sst"; - const std::string file1a_sst = sst_files_dir_ + file1a_sst_name; - ASSERT_OK(sfw_cf1.Open(file1a_sst)); - for (int i = 0; i < 52; i += 4) { - ASSERT_OK(sfw_cf1.Put(Key(i), Key(i) + "_overwrite2")); - } - ASSERT_OK(sfw_cf1.Finish()); - - // file1b.sst - const std::string file1b_sst_name = "file1b.sst"; - const std::string file1b_sst = sst_files_dir_ + file1b_sst_name; - ASSERT_OK(sfw_cf1.Open(file1b_sst)); - for (int i = 52; i < 100; i += 4) { - ASSERT_OK(sfw_cf1.Put(Key(i), Key(i) + "_overwrite2")); - } - ASSERT_OK(sfw_cf1.Finish()); - - // file0a.sst - const std::string file0a_sst_name = "file0a.sst"; - const std::string file0a_sst = sst_files_dir_ + file0a_sst_name; - ASSERT_OK(sfw_cf1.Open(file0a_sst)); - for (int i = 0; i < 100; i += 16) { - ASSERT_OK(sfw_cf1.Put(Key(i), Key(i) + "_overwrite3")); - } - ASSERT_OK(sfw_cf1.Finish()); - - // file0b.sst - const std::string file0b_sst_name = "file0b.sst"; - const std::string file0b_sst = sst_files_dir_ + file0b_sst_name; - ASSERT_OK(sfw_cf1.Open(file0b_sst)); - for (int i = 0; i < 100; i += 16) { - ASSERT_OK(sfw_cf1.Put(Key(i), Key(i) + "_overwrite4")); - } - ASSERT_OK(sfw_cf1.Finish()); - - // Import sst files and verify - ExportImportFilesMetaData metadata; - metadata.files.push_back( - LiveFileMetaDataInit(file3_sst_name, sst_files_dir_, 3, 10, 19)); - metadata.files.push_back( - LiveFileMetaDataInit(file2_sst_name, sst_files_dir_, 2, 20, 29)); - metadata.files.push_back( - LiveFileMetaDataInit(file1a_sst_name, sst_files_dir_, 1, 30, 34)); - metadata.files.push_back( - LiveFileMetaDataInit(file1b_sst_name, sst_files_dir_, 1, 35, 39)); - metadata.files.push_back( - LiveFileMetaDataInit(file0a_sst_name, sst_files_dir_, 0, 40, 49)); - metadata.files.push_back( - LiveFileMetaDataInit(file0b_sst_name, sst_files_dir_, 0, 50, 59)); - metadata.db_comparator_name = options.comparator->Name(); - - ASSERT_OK(db_->CreateColumnFamilyWithImport( - options, "toto", ImportColumnFamilyOptions(), metadata, &import_cfh_)); - ASSERT_NE(import_cfh_, nullptr); - - for (int i = 0; i < 100; i++) { - std::string value; - ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value)); - if (i % 16 == 0) { - ASSERT_EQ(value, Key(i) + "_overwrite4"); - } else if (i % 4 == 0) { - ASSERT_EQ(value, Key(i) + "_overwrite2"); - } else if (i % 2 == 0) { - ASSERT_EQ(value, Key(i) + "_overwrite1"); - } else { - ASSERT_EQ(value, Key(i) + "_val"); - } - } - - for (int i = 0; i < 100; i += 5) { - ASSERT_OK( - db_->Put(WriteOptions(), import_cfh_, Key(i), Key(i) + "_overwrite5")); - } - - // Flush and check again - ASSERT_OK(db_->Flush(FlushOptions(), import_cfh_)); - for (int i = 0; i < 100; i++) { - std::string value; - ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value)); - if (i % 5 == 0) { - ASSERT_EQ(value, Key(i) + "_overwrite5"); - } else if (i % 16 == 0) { - ASSERT_EQ(value, Key(i) + "_overwrite4"); - } else if (i % 4 == 0) { - ASSERT_EQ(value, Key(i) + "_overwrite2"); - } else if (i % 2 == 0) { - ASSERT_EQ(value, Key(i) + "_overwrite1"); - } else { - ASSERT_EQ(value, Key(i) + "_val"); - } - } - - // Compact and check again. - ASSERT_OK( - db_->CompactRange(CompactRangeOptions(), import_cfh_, nullptr, nullptr)); - for (int i = 0; i < 100; i++) { - std::string value; - ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value)); - if (i % 5 == 0) { - ASSERT_EQ(value, Key(i) + "_overwrite5"); - } else if (i % 16 == 0) { - ASSERT_EQ(value, Key(i) + "_overwrite4"); - } else if (i % 4 == 0) { - ASSERT_EQ(value, Key(i) + "_overwrite2"); - } else if (i % 2 == 0) { - ASSERT_EQ(value, Key(i) + "_overwrite1"); - } else { - ASSERT_EQ(value, Key(i) + "_val"); - } - } -} - -TEST_F(ImportColumnFamilyTest, ImportSSTFileWriterFilesWithRangeTombstone) { - // Test for a bug where import file's smallest and largest key did not - // consider range tombstone. - Options options = CurrentOptions(); - CreateAndReopenWithCF({"koko"}, options); - - SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]); - // cf1.sst - const std::string cf1_sst_name = "cf1.sst"; - const std::string cf1_sst = sst_files_dir_ + cf1_sst_name; - ASSERT_OK(sfw_cf1.Open(cf1_sst)); - ASSERT_OK(sfw_cf1.Put("K1", "V1")); - ASSERT_OK(sfw_cf1.Put("K2", "V2")); - ASSERT_OK(sfw_cf1.DeleteRange("K3", "K4")); - ASSERT_OK(sfw_cf1.Finish()); - - // Import sst file corresponding to cf1 onto a new cf and verify - ExportImportFilesMetaData metadata; - metadata.files.push_back( - LiveFileMetaDataInit(cf1_sst_name, sst_files_dir_, 0, 0, 19)); - metadata.db_comparator_name = options.comparator->Name(); - - ASSERT_OK(db_->CreateColumnFamilyWithImport( - options, "toto", ImportColumnFamilyOptions(), metadata, &import_cfh_)); - ASSERT_NE(import_cfh_, nullptr); - - ColumnFamilyMetaData import_cf_meta; - db_->GetColumnFamilyMetaData(import_cfh_, &import_cf_meta); - ASSERT_EQ(import_cf_meta.file_count, 1); - const SstFileMetaData* file_meta = nullptr; - for (const auto& level_meta : import_cf_meta.levels) { - if (!level_meta.files.empty()) { - file_meta = &(level_meta.files[0]); - break; - } - } - ASSERT_TRUE(file_meta != nullptr); - InternalKey largest; - largest.DecodeFrom(file_meta->largest); - ASSERT_EQ(largest.user_key(), "K4"); - - std::string value; - ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, "K1", &value)); - ASSERT_EQ(value, "V1"); - ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, "K2", &value)); - ASSERT_EQ(value, "V2"); - ASSERT_OK(db_->DropColumnFamily(import_cfh_)); - ASSERT_OK(db_->DestroyColumnFamilyHandle(import_cfh_)); - import_cfh_ = nullptr; -} - -TEST_F(ImportColumnFamilyTest, ImportExportedSSTFromAnotherCF) { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"koko"}, options); - - for (int i = 0; i < 100; ++i) { - ASSERT_OK(Put(1, Key(i), Key(i) + "_val")); - } - ASSERT_OK(Flush(1)); - - ASSERT_OK( - db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr)); - - // Overwrite the value in the same set of keys. - for (int i = 0; i < 100; ++i) { - ASSERT_OK(Put(1, Key(i), Key(i) + "_overwrite")); - } - - // Flush to create L0 file. - ASSERT_OK(Flush(1)); - for (int i = 0; i < 100; ++i) { - ASSERT_OK(Put(1, Key(i), Key(i) + "_overwrite2")); - } - - // Flush again to create another L0 file. It should have higher sequencer. - ASSERT_OK(Flush(1)); - - Checkpoint* checkpoint; - ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); - ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir_, - &metadata_ptr_)); - ASSERT_NE(metadata_ptr_, nullptr); - delete checkpoint; - - ImportColumnFamilyOptions import_options; - import_options.move_files = false; - ASSERT_OK(db_->CreateColumnFamilyWithImport(options, "toto", import_options, - *metadata_ptr_, &import_cfh_)); - ASSERT_NE(import_cfh_, nullptr); - - import_options.move_files = true; - ASSERT_OK(db_->CreateColumnFamilyWithImport(options, "yoyo", import_options, - *metadata_ptr_, &import_cfh2_)); - ASSERT_NE(import_cfh2_, nullptr); - delete metadata_ptr_; - metadata_ptr_ = NULL; - - std::string value1, value2; - - for (int i = 0; i < 100; ++i) { - ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value1)); - ASSERT_EQ(Get(1, Key(i)), value1); - } - - for (int i = 0; i < 100; ++i) { - ASSERT_OK(db_->Get(ReadOptions(), import_cfh2_, Key(i), &value2)); - ASSERT_EQ(Get(1, Key(i)), value2); - } - - // Modify keys in cf1 and verify. - for (int i = 0; i < 25; i++) { - ASSERT_OK(db_->Delete(WriteOptions(), import_cfh_, Key(i))); - } - for (int i = 25; i < 50; i++) { - ASSERT_OK( - db_->Put(WriteOptions(), import_cfh_, Key(i), Key(i) + "_overwrite3")); - } - for (int i = 0; i < 25; ++i) { - ASSERT_TRUE( - db_->Get(ReadOptions(), import_cfh_, Key(i), &value1).IsNotFound()); - } - for (int i = 25; i < 50; ++i) { - ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value1)); - ASSERT_EQ(Key(i) + "_overwrite3", value1); - } - for (int i = 50; i < 100; ++i) { - ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value1)); - ASSERT_EQ(Key(i) + "_overwrite2", value1); - } - - for (int i = 0; i < 100; ++i) { - ASSERT_OK(db_->Get(ReadOptions(), import_cfh2_, Key(i), &value2)); - ASSERT_EQ(Get(1, Key(i)), value2); - } - - // Compact and check again. - ASSERT_OK(db_->Flush(FlushOptions(), import_cfh_)); - ASSERT_OK( - db_->CompactRange(CompactRangeOptions(), import_cfh_, nullptr, nullptr)); - - for (int i = 0; i < 25; ++i) { - ASSERT_TRUE( - db_->Get(ReadOptions(), import_cfh_, Key(i), &value1).IsNotFound()); - } - for (int i = 25; i < 50; ++i) { - ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value1)); - ASSERT_EQ(Key(i) + "_overwrite3", value1); - } - for (int i = 50; i < 100; ++i) { - ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, Key(i), &value1)); - ASSERT_EQ(Key(i) + "_overwrite2", value1); - } - - for (int i = 0; i < 100; ++i) { - ASSERT_OK(db_->Get(ReadOptions(), import_cfh2_, Key(i), &value2)); - ASSERT_EQ(Get(1, Key(i)), value2); - } -} - -TEST_F(ImportColumnFamilyTest, ImportExportedSSTFromAnotherDB) { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"koko"}, options); - - for (int i = 0; i < 100; ++i) { - ASSERT_OK(Put(1, Key(i), Key(i) + "_val")); - } - ASSERT_OK(Flush(1)); - - // Compact to create a L1 file. - ASSERT_OK( - db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr)); - - // Overwrite the value in the same set of keys. - for (int i = 0; i < 50; ++i) { - ASSERT_OK(Put(1, Key(i), Key(i) + "_overwrite")); - } - - // Flush to create L0 file. - ASSERT_OK(Flush(1)); - - for (int i = 0; i < 25; ++i) { - ASSERT_OK(Put(1, Key(i), Key(i) + "_overwrite2")); - } - - // Flush again to create another L0 file. It should have higher sequencer. - ASSERT_OK(Flush(1)); - - Checkpoint* checkpoint; - ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); - ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir_, - &metadata_ptr_)); - ASSERT_NE(metadata_ptr_, nullptr); - delete checkpoint; - - // Create a new db and import the files. - DB* db_copy; - ASSERT_OK(DestroyDir(env_, dbname_ + "/db_copy")); - ASSERT_OK(DB::Open(options, dbname_ + "/db_copy", &db_copy)); - ColumnFamilyHandle* cfh = nullptr; - ASSERT_OK(db_copy->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", - ImportColumnFamilyOptions(), - *metadata_ptr_, &cfh)); - ASSERT_NE(cfh, nullptr); - - for (int i = 0; i < 100; ++i) { - std::string value; - ASSERT_OK(db_copy->Get(ReadOptions(), cfh, Key(i), &value)); - ASSERT_EQ(Get(1, Key(i)), value); - } - ASSERT_OK(db_copy->DropColumnFamily(cfh)); - ASSERT_OK(db_copy->DestroyColumnFamilyHandle(cfh)); - delete db_copy; - ASSERT_OK(DestroyDir(env_, dbname_ + "/db_copy")); -} - -TEST_F(ImportColumnFamilyTest, - ImportExportedSSTFromAnotherCFWithRangeTombstone) { - // Test for a bug where import file's smallest and largest key did not - // consider range tombstone. - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - CreateAndReopenWithCF({"koko"}, options); - - for (int i = 10; i < 20; ++i) { - ASSERT_OK(Put(1, Key(i), Key(i) + "_val")); - } - ASSERT_OK(Flush(1 /* cf */)); - MoveFilesToLevel(1 /* level */, 1 /* cf */); - const Snapshot* snapshot = db_->GetSnapshot(); - ASSERT_OK(db_->DeleteRange(WriteOptions(), handles_[1], Key(0), Key(25))); - ASSERT_OK(Put(1, Key(1), "t")); - ASSERT_OK(Flush(1)); - // Tests importing a range tombstone only file - ASSERT_OK(db_->DeleteRange(WriteOptions(), handles_[1], Key(0), Key(2))); - - Checkpoint* checkpoint; - ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); - ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir_, - &metadata_ptr_)); - ASSERT_NE(metadata_ptr_, nullptr); - delete checkpoint; - - ImportColumnFamilyOptions import_options; - import_options.move_files = false; - ASSERT_OK(db_->CreateColumnFamilyWithImport(options, "toto", import_options, - *metadata_ptr_, &import_cfh_)); - ASSERT_NE(import_cfh_, nullptr); - - import_options.move_files = true; - ASSERT_OK(db_->CreateColumnFamilyWithImport(options, "yoyo", import_options, - *metadata_ptr_, &import_cfh2_)); - ASSERT_NE(import_cfh2_, nullptr); - delete metadata_ptr_; - metadata_ptr_ = nullptr; - - std::string value1, value2; - ReadOptions ro_latest; - ReadOptions ro_snapshot; - ro_snapshot.snapshot = snapshot; - - for (int i = 10; i < 20; ++i) { - ASSERT_TRUE(db_->Get(ro_latest, import_cfh_, Key(i), &value1).IsNotFound()); - ASSERT_OK(db_->Get(ro_snapshot, import_cfh_, Key(i), &value1)); - ASSERT_EQ(Get(1, Key(i), snapshot), value1); - } - ASSERT_TRUE(db_->Get(ro_latest, import_cfh_, Key(1), &value1).IsNotFound()); - - for (int i = 10; i < 20; ++i) { - ASSERT_TRUE( - db_->Get(ro_latest, import_cfh2_, Key(i), &value1).IsNotFound()); - - ASSERT_OK(db_->Get(ro_snapshot, import_cfh2_, Key(i), &value2)); - ASSERT_EQ(Get(1, Key(i), snapshot), value2); - } - ASSERT_TRUE(db_->Get(ro_latest, import_cfh2_, Key(1), &value1).IsNotFound()); - - db_->ReleaseSnapshot(snapshot); -} - -TEST_F(ImportColumnFamilyTest, LevelFilesOverlappingAtEndpoints) { - // Imports a column family containing a level where two files overlap at their - // endpoints. "Overlap" means the largest user key in one file is the same as - // the smallest user key in the second file. - const int kFileBytes = 128 << 10; // 128KB - const int kValueBytes = 1 << 10; // 1KB - const int kNumFiles = 4; - - Options options = CurrentOptions(); - options.disable_auto_compactions = true; - options.num_levels = 2; - CreateAndReopenWithCF({"koko"}, options); - - Random rnd(301); - // Every key is snapshot protected to ensure older versions will not be - // dropped during compaction. - std::vector snapshots; - snapshots.reserve(kFileBytes / kValueBytes * kNumFiles); - for (int i = 0; i < kNumFiles; ++i) { - for (int j = 0; j < kFileBytes / kValueBytes; ++j) { - auto value = rnd.RandomString(kValueBytes); - ASSERT_OK(Put(1, "key", value)); - snapshots.push_back(db_->GetSnapshot()); - } - ASSERT_OK(Flush(1)); - } - - // Compact to create overlapping L1 files. - ASSERT_OK( - db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr)); - ASSERT_GT(NumTableFilesAtLevel(1, 1), 1); - - Checkpoint* checkpoint; - ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); - ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir_, - &metadata_ptr_)); - ASSERT_NE(metadata_ptr_, nullptr); - delete checkpoint; - - // Create a new db and import the files. - DB* db_copy; - ASSERT_OK(DestroyDir(env_, dbname_ + "/db_copy")); - ASSERT_OK(DB::Open(options, dbname_ + "/db_copy", &db_copy)); - ColumnFamilyHandle* cfh = nullptr; - ASSERT_OK(db_copy->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", - ImportColumnFamilyOptions(), - *metadata_ptr_, &cfh)); - ASSERT_NE(cfh, nullptr); - - { - std::string value; - ASSERT_OK(db_copy->Get(ReadOptions(), cfh, "key", &value)); - } - ASSERT_OK(db_copy->DropColumnFamily(cfh)); - ASSERT_OK(db_copy->DestroyColumnFamilyHandle(cfh)); - delete db_copy; - ASSERT_OK(DestroyDir(env_, dbname_ + "/db_copy")); - for (const Snapshot* snapshot : snapshots) { - db_->ReleaseSnapshot(snapshot); - } -} - -TEST_F(ImportColumnFamilyTest, ImportColumnFamilyNegativeTest) { - Options options = CurrentOptions(); - CreateAndReopenWithCF({"koko"}, options); - - { - // Create column family with existing cf name. - ExportImportFilesMetaData metadata; - - ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "koko", - ImportColumnFamilyOptions(), - metadata, &import_cfh_), - Status::InvalidArgument("Column family already exists")); - ASSERT_EQ(import_cfh_, nullptr); - } - - { - // Import with no files specified. - ExportImportFilesMetaData metadata; - - ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", - ImportColumnFamilyOptions(), - metadata, &import_cfh_), - Status::InvalidArgument("The list of files is empty")); - ASSERT_EQ(import_cfh_, nullptr); - } - - { - // Import with overlapping keys in sst files. - ExportImportFilesMetaData metadata; - SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]); - const std::string file1_sst_name = "file1.sst"; - const std::string file1_sst = sst_files_dir_ + file1_sst_name; - ASSERT_OK(sfw_cf1.Open(file1_sst)); - ASSERT_OK(sfw_cf1.Put("K1", "V1")); - ASSERT_OK(sfw_cf1.Put("K2", "V2")); - ASSERT_OK(sfw_cf1.Finish()); - const std::string file2_sst_name = "file2.sst"; - const std::string file2_sst = sst_files_dir_ + file2_sst_name; - ASSERT_OK(sfw_cf1.Open(file2_sst)); - ASSERT_OK(sfw_cf1.Put("K2", "V2")); - ASSERT_OK(sfw_cf1.Put("K3", "V3")); - ASSERT_OK(sfw_cf1.Finish()); - - metadata.files.push_back( - LiveFileMetaDataInit(file1_sst_name, sst_files_dir_, 1, 10, 19)); - metadata.files.push_back( - LiveFileMetaDataInit(file2_sst_name, sst_files_dir_, 1, 10, 19)); - metadata.db_comparator_name = options.comparator->Name(); - - ASSERT_NOK(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", - ImportColumnFamilyOptions(), - metadata, &import_cfh_)); - ASSERT_EQ(import_cfh_, nullptr); - } - - { - // Import with a mismatching comparator, should fail with appropriate error. - ExportImportFilesMetaData metadata; - Options mismatch_options = CurrentOptions(); - mismatch_options.comparator = ReverseBytewiseComparator(); - SstFileWriter sfw_cf1(EnvOptions(), mismatch_options, handles_[1]); - const std::string file1_sst_name = "file1.sst"; - const std::string file1_sst = sst_files_dir_ + file1_sst_name; - ASSERT_OK(sfw_cf1.Open(file1_sst)); - ASSERT_OK(sfw_cf1.Put("K2", "V2")); - ASSERT_OK(sfw_cf1.Put("K1", "V1")); - ASSERT_OK(sfw_cf1.Finish()); - - metadata.files.push_back( - LiveFileMetaDataInit(file1_sst_name, sst_files_dir_, 1, 10, 19)); - metadata.db_comparator_name = mismatch_options.comparator->Name(); - - ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "coco", - ImportColumnFamilyOptions(), - metadata, &import_cfh_), - Status::InvalidArgument("Comparator name mismatch")); - ASSERT_EQ(import_cfh_, nullptr); - } - - { - // Import with non existent sst file should fail with appropriate error - ExportImportFilesMetaData metadata; - SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]); - const std::string file1_sst_name = "file1.sst"; - const std::string file1_sst = sst_files_dir_ + file1_sst_name; - ASSERT_OK(sfw_cf1.Open(file1_sst)); - ASSERT_OK(sfw_cf1.Put("K1", "V1")); - ASSERT_OK(sfw_cf1.Put("K2", "V2")); - ASSERT_OK(sfw_cf1.Finish()); - const std::string file3_sst_name = "file3.sst"; - - metadata.files.push_back( - LiveFileMetaDataInit(file1_sst_name, sst_files_dir_, 1, 10, 19)); - metadata.files.push_back( - LiveFileMetaDataInit(file3_sst_name, sst_files_dir_, 1, 10, 19)); - metadata.db_comparator_name = options.comparator->Name(); - - ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", - ImportColumnFamilyOptions(), - metadata, &import_cfh_), - Status::IOError("No such file or directory")); - ASSERT_EQ(import_cfh_, nullptr); - - // Test successful import after a failure with the same CF name. Ensures - // there is no side effect with CF when there is a failed import - metadata.files.pop_back(); - metadata.db_comparator_name = options.comparator->Name(); - - ASSERT_OK(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", - ImportColumnFamilyOptions(), - metadata, &import_cfh_)); - ASSERT_NE(import_cfh_, nullptr); - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/db/listener_test.cc b/db/listener_test.cc deleted file mode 100644 index 7c96bfd34..000000000 --- a/db/listener_test.cc +++ /dev/null @@ -1,1598 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include - -#include "db/blob/blob_index.h" -#include "db/db_impl/db_impl.h" -#include "db/db_test_util.h" -#include "db/dbformat.h" -#include "db/version_set.h" -#include "db/write_batch_internal.h" -#include "file/filename.h" -#include "monitoring/statistics.h" -#include "rocksdb/cache.h" -#include "rocksdb/compaction_filter.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/options.h" -#include "rocksdb/perf_context.h" -#include "rocksdb/slice.h" -#include "rocksdb/slice_transform.h" -#include "rocksdb/table.h" -#include "rocksdb/table_properties.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/hash.h" -#include "util/mutexlock.h" -#include "util/rate_limiter.h" -#include "util/string_util.h" -#include "utilities/merge_operators.h" - - -namespace ROCKSDB_NAMESPACE { - -class EventListenerTest : public DBTestBase { - public: - EventListenerTest() : DBTestBase("listener_test", /*env_do_fsync=*/true) {} - - static std::string BlobStr(uint64_t blob_file_number, uint64_t offset, - uint64_t size) { - std::string blob_index; - BlobIndex::EncodeBlob(&blob_index, blob_file_number, offset, size, - kNoCompression); - return blob_index; - } - - const size_t k110KB = 110 << 10; -}; - -struct TestPropertiesCollector - : public ROCKSDB_NAMESPACE::TablePropertiesCollector { - ROCKSDB_NAMESPACE::Status AddUserKey( - const ROCKSDB_NAMESPACE::Slice& /*key*/, - const ROCKSDB_NAMESPACE::Slice& /*value*/, - ROCKSDB_NAMESPACE::EntryType /*type*/, - ROCKSDB_NAMESPACE::SequenceNumber /*seq*/, - uint64_t /*file_size*/) override { - return Status::OK(); - } - ROCKSDB_NAMESPACE::Status Finish( - ROCKSDB_NAMESPACE::UserCollectedProperties* properties) override { - properties->insert({"0", "1"}); - return Status::OK(); - } - - const char* Name() const override { return "TestTablePropertiesCollector"; } - - ROCKSDB_NAMESPACE::UserCollectedProperties GetReadableProperties() - const override { - ROCKSDB_NAMESPACE::UserCollectedProperties ret; - ret["2"] = "3"; - return ret; - } -}; - -class TestPropertiesCollectorFactory : public TablePropertiesCollectorFactory { - public: - TablePropertiesCollector* CreateTablePropertiesCollector( - TablePropertiesCollectorFactory::Context /*context*/) override { - return new TestPropertiesCollector; - } - const char* Name() const override { return "TestTablePropertiesCollector"; } -}; - -class TestCompactionListener : public EventListener { - public: - explicit TestCompactionListener(EventListenerTest* test) : test_(test) {} - - void OnCompactionCompleted(DB* db, const CompactionJobInfo& ci) override { - std::lock_guard lock(mutex_); - compacted_dbs_.push_back(db); - ASSERT_GT(ci.input_files.size(), 0U); - ASSERT_EQ(ci.input_files.size(), ci.input_file_infos.size()); - - for (size_t i = 0; i < ci.input_file_infos.size(); ++i) { - ASSERT_EQ(ci.input_file_infos[i].level, ci.base_input_level); - ASSERT_EQ(ci.input_file_infos[i].file_number, - TableFileNameToNumber(ci.input_files[i])); - } - - ASSERT_GT(ci.output_files.size(), 0U); - ASSERT_EQ(ci.output_files.size(), ci.output_file_infos.size()); - - ASSERT_TRUE(test_); - ASSERT_EQ(test_->db_, db); - - std::vector> files_by_level; - test_->dbfull()->TEST_GetFilesMetaData(test_->handles_[ci.cf_id], - &files_by_level); - ASSERT_GT(files_by_level.size(), ci.output_level); - - for (size_t i = 0; i < ci.output_file_infos.size(); ++i) { - ASSERT_EQ(ci.output_file_infos[i].level, ci.output_level); - ASSERT_EQ(ci.output_file_infos[i].file_number, - TableFileNameToNumber(ci.output_files[i])); - - auto it = std::find_if( - files_by_level[ci.output_level].begin(), - files_by_level[ci.output_level].end(), [&](const FileMetaData& meta) { - return meta.fd.GetNumber() == ci.output_file_infos[i].file_number; - }); - ASSERT_NE(it, files_by_level[ci.output_level].end()); - - ASSERT_EQ(ci.output_file_infos[i].oldest_blob_file_number, - it->oldest_blob_file_number); - } - - ASSERT_EQ(db->GetEnv()->GetThreadID(), ci.thread_id); - ASSERT_GT(ci.thread_id, 0U); - - for (auto fl : {ci.input_files, ci.output_files}) { - for (auto fn : fl) { - auto it = ci.table_properties.find(fn); - ASSERT_NE(it, ci.table_properties.end()); - auto tp = it->second; - ASSERT_TRUE(tp != nullptr); - ASSERT_EQ(tp->user_collected_properties.find("0")->second, "1"); - } - } - } - - EventListenerTest* test_; - std::vector compacted_dbs_; - std::mutex mutex_; -}; - -TEST_F(EventListenerTest, OnSingleDBCompactionTest) { - const int kTestKeySize = 16; - const int kTestValueSize = 984; - const int kEntrySize = kTestKeySize + kTestValueSize; - const int kEntriesPerBuffer = 100; - const int kNumL0Files = 4; - - Options options; - options.env = CurrentOptions().env; - options.create_if_missing = true; - options.write_buffer_size = kEntrySize * kEntriesPerBuffer; - options.compaction_style = kCompactionStyleLevel; - options.target_file_size_base = options.write_buffer_size; - options.max_bytes_for_level_base = options.target_file_size_base * 2; - options.max_bytes_for_level_multiplier = 2; - options.compression = kNoCompression; -#ifdef ROCKSDB_USING_THREAD_STATUS - options.enable_thread_tracking = true; -#endif // ROCKSDB_USING_THREAD_STATUS - options.level0_file_num_compaction_trigger = kNumL0Files; - options.table_properties_collector_factories.push_back( - std::make_shared()); - - TestCompactionListener* listener = new TestCompactionListener(this); - options.listeners.emplace_back(listener); - std::vector cf_names = {"pikachu", "ilya", "muromec", - "dobrynia", "nikitich", "alyosha", - "popovich"}; - CreateAndReopenWithCF(cf_names, options); - ASSERT_OK(Put(1, "pikachu", std::string(90000, 'p'))); - - WriteBatch batch; - ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 1, "ditto", - BlobStr(123, 0, 1 << 10))); - ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); - - ASSERT_OK(Put(2, "ilya", std::string(90000, 'i'))); - ASSERT_OK(Put(3, "muromec", std::string(90000, 'm'))); - ASSERT_OK(Put(4, "dobrynia", std::string(90000, 'd'))); - ASSERT_OK(Put(5, "nikitich", std::string(90000, 'n'))); - ASSERT_OK(Put(6, "alyosha", std::string(90000, 'a'))); - ASSERT_OK(Put(7, "popovich", std::string(90000, 'p'))); - for (int i = 1; i < 8; ++i) { - ASSERT_OK(Flush(i)); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[i], - nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - } - - ASSERT_EQ(listener->compacted_dbs_.size(), cf_names.size()); - for (size_t i = 0; i < cf_names.size(); ++i) { - ASSERT_EQ(listener->compacted_dbs_[i], db_); - } -} - -// This simple Listener can only handle one flush at a time. -class TestFlushListener : public EventListener { - public: - TestFlushListener(Env* env, EventListenerTest* test) - : slowdown_count(0), stop_count(0), db_closed(), env_(env), test_(test) { - db_closed = false; - } - - virtual ~TestFlushListener() { - prev_fc_info_.status.PermitUncheckedError(); // Ignore the status - } - void OnTableFileCreated(const TableFileCreationInfo& info) override { - // remember the info for later checking the FlushJobInfo. - prev_fc_info_ = info; - ASSERT_GT(info.db_name.size(), 0U); - ASSERT_GT(info.cf_name.size(), 0U); - ASSERT_GT(info.file_path.size(), 0U); - ASSERT_GT(info.job_id, 0); - ASSERT_GT(info.table_properties.data_size, 0U); - ASSERT_GT(info.table_properties.raw_key_size, 0U); - ASSERT_GT(info.table_properties.raw_value_size, 0U); - ASSERT_GT(info.table_properties.num_data_blocks, 0U); - ASSERT_GT(info.table_properties.num_entries, 0U); - ASSERT_EQ(info.file_checksum, kUnknownFileChecksum); - ASSERT_EQ(info.file_checksum_func_name, kUnknownFileChecksumFuncName); - -#ifdef ROCKSDB_USING_THREAD_STATUS - // Verify the id of the current thread that created this table - // file matches the id of any active flush or compaction thread. - uint64_t thread_id = env_->GetThreadID(); - std::vector thread_list; - ASSERT_OK(env_->GetThreadList(&thread_list)); - bool found_match = false; - for (auto thread_status : thread_list) { - if (thread_status.operation_type == ThreadStatus::OP_FLUSH || - thread_status.operation_type == ThreadStatus::OP_COMPACTION) { - if (thread_id == thread_status.thread_id) { - found_match = true; - break; - } - } - } - ASSERT_TRUE(found_match); -#endif // ROCKSDB_USING_THREAD_STATUS - } - - void OnFlushCompleted(DB* db, const FlushJobInfo& info) override { - flushed_dbs_.push_back(db); - flushed_column_family_names_.push_back(info.cf_name); - if (info.triggered_writes_slowdown) { - slowdown_count++; - } - if (info.triggered_writes_stop) { - stop_count++; - } - // verify whether the previously created file matches the flushed file. - ASSERT_EQ(prev_fc_info_.db_name, db->GetName()); - ASSERT_EQ(prev_fc_info_.cf_name, info.cf_name); - ASSERT_EQ(prev_fc_info_.job_id, info.job_id); - ASSERT_EQ(prev_fc_info_.file_path, info.file_path); - ASSERT_EQ(TableFileNameToNumber(info.file_path), info.file_number); - - // Note: the following chunk relies on the notification pertaining to the - // database pointed to by DBTestBase::db_, and is thus bypassed when - // that assumption does not hold (see the test case MultiDBMultiListeners - // below). - ASSERT_TRUE(test_); - if (db == test_->db_) { - std::vector> files_by_level; - ASSERT_LT(info.cf_id, test_->handles_.size()); - ASSERT_GE(info.cf_id, 0u); - ASSERT_NE(test_->handles_[info.cf_id], nullptr); - test_->dbfull()->TEST_GetFilesMetaData(test_->handles_[info.cf_id], - &files_by_level); - - ASSERT_FALSE(files_by_level.empty()); - auto it = std::find_if(files_by_level[0].begin(), files_by_level[0].end(), - [&](const FileMetaData& meta) { - return meta.fd.GetNumber() == info.file_number; - }); - ASSERT_NE(it, files_by_level[0].end()); - ASSERT_EQ(info.oldest_blob_file_number, it->oldest_blob_file_number); - } - - ASSERT_EQ(db->GetEnv()->GetThreadID(), info.thread_id); - ASSERT_GT(info.thread_id, 0U); - ASSERT_EQ(info.table_properties.user_collected_properties.find("0")->second, - "1"); - } - - std::vector flushed_column_family_names_; - std::vector flushed_dbs_; - int slowdown_count; - int stop_count; - bool db_closing; - std::atomic_bool db_closed; - TableFileCreationInfo prev_fc_info_; - - protected: - Env* env_; - EventListenerTest* test_; -}; - -TEST_F(EventListenerTest, OnSingleDBFlushTest) { - Options options; - options.env = CurrentOptions().env; - options.write_buffer_size = k110KB; -#ifdef ROCKSDB_USING_THREAD_STATUS - options.enable_thread_tracking = true; -#endif // ROCKSDB_USING_THREAD_STATUS - TestFlushListener* listener = new TestFlushListener(options.env, this); - options.listeners.emplace_back(listener); - std::vector cf_names = {"pikachu", "ilya", "muromec", - "dobrynia", "nikitich", "alyosha", - "popovich"}; - options.table_properties_collector_factories.push_back( - std::make_shared()); - CreateAndReopenWithCF(cf_names, options); - - ASSERT_OK(Put(1, "pikachu", std::string(90000, 'p'))); - - WriteBatch batch; - ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 1, "ditto", - BlobStr(456, 0, 1 << 10))); - ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); - - ASSERT_OK(Put(2, "ilya", std::string(90000, 'i'))); - ASSERT_OK(Put(3, "muromec", std::string(90000, 'm'))); - ASSERT_OK(Put(4, "dobrynia", std::string(90000, 'd'))); - ASSERT_OK(Put(5, "nikitich", std::string(90000, 'n'))); - ASSERT_OK(Put(6, "alyosha", std::string(90000, 'a'))); - ASSERT_OK(Put(7, "popovich", std::string(90000, 'p'))); - for (int i = 1; i < 8; ++i) { - ASSERT_OK(Flush(i)); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - // Ensure background work is fully finished including listener callbacks - // before accessing listener state. - ASSERT_OK(dbfull()->TEST_WaitForBackgroundWork()); - ASSERT_EQ(listener->flushed_dbs_.size(), i); - ASSERT_EQ(listener->flushed_column_family_names_.size(), i); - } - - // make sure callback functions are called in the right order - for (size_t i = 0; i < cf_names.size(); ++i) { - ASSERT_EQ(listener->flushed_dbs_[i], db_); - ASSERT_EQ(listener->flushed_column_family_names_[i], cf_names[i]); - } -} - -TEST_F(EventListenerTest, MultiCF) { - Options options; - options.env = CurrentOptions().env; - options.write_buffer_size = k110KB; -#ifdef ROCKSDB_USING_THREAD_STATUS - options.enable_thread_tracking = true; -#endif // ROCKSDB_USING_THREAD_STATUS - for (auto atomic_flush : {false, true}) { - options.atomic_flush = atomic_flush; - options.create_if_missing = true; - DestroyAndReopen(options); - TestFlushListener* listener = new TestFlushListener(options.env, this); - options.listeners.emplace_back(listener); - options.table_properties_collector_factories.push_back( - std::make_shared()); - std::vector cf_names = {"pikachu", "ilya", "muromec", - "dobrynia", "nikitich", "alyosha", - "popovich"}; - CreateAndReopenWithCF(cf_names, options); - - ASSERT_OK(Put(1, "pikachu", std::string(90000, 'p'))); - ASSERT_OK(Put(2, "ilya", std::string(90000, 'i'))); - ASSERT_OK(Put(3, "muromec", std::string(90000, 'm'))); - ASSERT_OK(Put(4, "dobrynia", std::string(90000, 'd'))); - ASSERT_OK(Put(5, "nikitich", std::string(90000, 'n'))); - ASSERT_OK(Put(6, "alyosha", std::string(90000, 'a'))); - ASSERT_OK(Put(7, "popovich", std::string(90000, 'p'))); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - for (int i = 1; i < 8; ++i) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::NotifyOnFlushCompleted::PostAllOnFlushCompleted", - "EventListenerTest.MultiCF:PreVerifyListener"}}); - ASSERT_OK(Flush(i)); - TEST_SYNC_POINT("EventListenerTest.MultiCF:PreVerifyListener"); - ASSERT_EQ(listener->flushed_dbs_.size(), i); - ASSERT_EQ(listener->flushed_column_family_names_.size(), i); - // make sure callback functions are called in the right order - if (i == 7) { - for (size_t j = 0; j < cf_names.size(); j++) { - ASSERT_EQ(listener->flushed_dbs_[j], db_); - ASSERT_EQ(listener->flushed_column_family_names_[j], cf_names[j]); - } - } - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - Close(); - } -} - -TEST_F(EventListenerTest, MultiDBMultiListeners) { - Options options; - options.env = CurrentOptions().env; -#ifdef ROCKSDB_USING_THREAD_STATUS - options.enable_thread_tracking = true; -#endif // ROCKSDB_USING_THREAD_STATUS - options.table_properties_collector_factories.push_back( - std::make_shared()); - std::vector listeners; - const int kNumDBs = 5; - const int kNumListeners = 10; - for (int i = 0; i < kNumListeners; ++i) { - listeners.emplace_back(new TestFlushListener(options.env, this)); - } - - std::vector cf_names = {"pikachu", "ilya", "muromec", - "dobrynia", "nikitich", "alyosha", - "popovich"}; - - options.create_if_missing = true; - for (int i = 0; i < kNumListeners; ++i) { - options.listeners.emplace_back(listeners[i]); - } - DBOptions db_opts(options); - ColumnFamilyOptions cf_opts(options); - - std::vector dbs; - std::vector> vec_handles; - - for (int d = 0; d < kNumDBs; ++d) { - ASSERT_OK(DestroyDB(dbname_ + std::to_string(d), options)); - DB* db; - std::vector handles; - ASSERT_OK(DB::Open(options, dbname_ + std::to_string(d), &db)); - for (size_t c = 0; c < cf_names.size(); ++c) { - ColumnFamilyHandle* handle; - ASSERT_OK(db->CreateColumnFamily(cf_opts, cf_names[c], &handle)); - handles.push_back(handle); - } - - vec_handles.push_back(std::move(handles)); - dbs.push_back(db); - } - - for (int d = 0; d < kNumDBs; ++d) { - for (size_t c = 0; c < cf_names.size(); ++c) { - ASSERT_OK(dbs[d]->Put(WriteOptions(), vec_handles[d][c], cf_names[c], - cf_names[c])); - } - } - - for (size_t c = 0; c < cf_names.size(); ++c) { - for (int d = 0; d < kNumDBs; ++d) { - ASSERT_OK(dbs[d]->Flush(FlushOptions(), vec_handles[d][c])); - ASSERT_OK( - static_cast_with_check(dbs[d])->TEST_WaitForFlushMemTable()); - } - } - - for (int d = 0; d < kNumDBs; ++d) { - // Ensure background work is fully finished including listener callbacks - // before accessing listener state. - ASSERT_OK( - static_cast_with_check(dbs[d])->TEST_WaitForBackgroundWork()); - } - - for (auto* listener : listeners) { - int pos = 0; - for (size_t c = 0; c < cf_names.size(); ++c) { - for (int d = 0; d < kNumDBs; ++d) { - ASSERT_EQ(listener->flushed_dbs_[pos], dbs[d]); - ASSERT_EQ(listener->flushed_column_family_names_[pos], cf_names[c]); - pos++; - } - } - } - - for (auto handles : vec_handles) { - for (auto h : handles) { - delete h; - } - handles.clear(); - } - vec_handles.clear(); - - for (auto db : dbs) { - delete db; - } -} - -TEST_F(EventListenerTest, DisableBGCompaction) { - Options options; - options.env = CurrentOptions().env; -#ifdef ROCKSDB_USING_THREAD_STATUS - options.enable_thread_tracking = true; -#endif // ROCKSDB_USING_THREAD_STATUS - TestFlushListener* listener = new TestFlushListener(options.env, this); - const int kCompactionTrigger = 1; - const int kSlowdownTrigger = 5; - const int kStopTrigger = 100; - options.level0_file_num_compaction_trigger = kCompactionTrigger; - options.level0_slowdown_writes_trigger = kSlowdownTrigger; - options.level0_stop_writes_trigger = kStopTrigger; - options.max_write_buffer_number = 10; - options.listeners.emplace_back(listener); - // BG compaction is disabled. Number of L0 files will simply keeps - // increasing in this test. - options.compaction_style = kCompactionStyleNone; - options.compression = kNoCompression; - options.write_buffer_size = 100000; // Small write buffer - options.table_properties_collector_factories.push_back( - std::make_shared()); - - CreateAndReopenWithCF({"pikachu"}, options); - ColumnFamilyMetaData cf_meta; - db_->GetColumnFamilyMetaData(handles_[1], &cf_meta); - - // keep writing until writes are forced to stop. - for (int i = 0; static_cast(cf_meta.file_count) < kSlowdownTrigger * 10; - ++i) { - ASSERT_OK( - Put(1, std::to_string(i), std::string(10000, 'x'), WriteOptions())); - FlushOptions fo; - fo.allow_write_stall = true; - ASSERT_OK(db_->Flush(fo, handles_[1])); - db_->GetColumnFamilyMetaData(handles_[1], &cf_meta); - } - // Ensure background work is fully finished including listener callbacks - // before accessing listener state. - ASSERT_OK(dbfull()->TEST_WaitForBackgroundWork()); - ASSERT_GE(listener->slowdown_count, kSlowdownTrigger * 9); -} - -class TestCompactionReasonListener : public EventListener { - public: - void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override { - std::lock_guard lock(mutex_); - compaction_reasons_.push_back(ci.compaction_reason); - } - - std::vector compaction_reasons_; - std::mutex mutex_; -}; - -TEST_F(EventListenerTest, CompactionReasonLevel) { - Options options; - options.env = CurrentOptions().env; - options.create_if_missing = true; - options.memtable_factory.reset(test::NewSpecialSkipListFactory( - DBTestBase::kNumKeysByGenerateNewRandomFile)); - - TestCompactionReasonListener* listener = new TestCompactionReasonListener(); - options.listeners.emplace_back(listener); - - options.level0_file_num_compaction_trigger = 4; - options.compaction_style = kCompactionStyleLevel; - - DestroyAndReopen(options); - Random rnd(301); - - // Write 4 files in L0 - for (int i = 0; i < 4; i++) { - GenerateNewRandomFile(&rnd); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ(listener->compaction_reasons_.size(), 1); - ASSERT_EQ(listener->compaction_reasons_[0], - CompactionReason::kLevelL0FilesNum); - - DestroyAndReopen(options); - - // Write 3 non-overlapping files in L0 - for (int k = 1; k <= 30; k++) { - ASSERT_OK(Put(Key(k), Key(k))); - if (k % 10 == 0) { - Flush(); - } - } - - // Do a trivial move from L0 -> L1 - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - options.max_bytes_for_level_base = 1; - Close(); - listener->compaction_reasons_.clear(); - Reopen(options); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_GT(listener->compaction_reasons_.size(), 1); - - for (auto compaction_reason : listener->compaction_reasons_) { - ASSERT_EQ(compaction_reason, CompactionReason::kLevelMaxLevelSize); - } - - options.disable_auto_compactions = true; - Close(); - listener->compaction_reasons_.clear(); - Reopen(options); - - ASSERT_OK(Put("key", "value")); - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - ASSERT_GT(listener->compaction_reasons_.size(), 0); - for (auto compaction_reason : listener->compaction_reasons_) { - ASSERT_EQ(compaction_reason, CompactionReason::kManualCompaction); - } -} - -TEST_F(EventListenerTest, CompactionReasonUniversal) { - Options options; - options.env = CurrentOptions().env; - options.create_if_missing = true; - options.memtable_factory.reset(test::NewSpecialSkipListFactory( - DBTestBase::kNumKeysByGenerateNewRandomFile)); - - TestCompactionReasonListener* listener = new TestCompactionReasonListener(); - options.listeners.emplace_back(listener); - - options.compaction_style = kCompactionStyleUniversal; - - Random rnd(301); - - options.level0_file_num_compaction_trigger = 8; - options.compaction_options_universal.max_size_amplification_percent = 100000; - options.compaction_options_universal.size_ratio = 100000; - DestroyAndReopen(options); - listener->compaction_reasons_.clear(); - - // Write 8 files in L0 - for (int i = 0; i < 8; i++) { - GenerateNewRandomFile(&rnd); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_GT(listener->compaction_reasons_.size(), 0); - for (auto compaction_reason : listener->compaction_reasons_) { - ASSERT_EQ(compaction_reason, CompactionReason::kUniversalSizeRatio); - } - - options.level0_file_num_compaction_trigger = 8; - options.compaction_options_universal.max_size_amplification_percent = 1; - options.compaction_options_universal.size_ratio = 100000; - - DestroyAndReopen(options); - listener->compaction_reasons_.clear(); - - // Write 8 files in L0 - for (int i = 0; i < 8; i++) { - GenerateNewRandomFile(&rnd); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_GT(listener->compaction_reasons_.size(), 0); - for (auto compaction_reason : listener->compaction_reasons_) { - ASSERT_EQ(compaction_reason, CompactionReason::kUniversalSizeAmplification); - } - - options.disable_auto_compactions = true; - Close(); - listener->compaction_reasons_.clear(); - Reopen(options); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - ASSERT_GT(listener->compaction_reasons_.size(), 0); - for (auto compaction_reason : listener->compaction_reasons_) { - ASSERT_EQ(compaction_reason, CompactionReason::kManualCompaction); - } -} - -TEST_F(EventListenerTest, CompactionReasonFIFO) { - Options options; - options.env = CurrentOptions().env; - options.create_if_missing = true; - options.memtable_factory.reset(test::NewSpecialSkipListFactory( - DBTestBase::kNumKeysByGenerateNewRandomFile)); - - TestCompactionReasonListener* listener = new TestCompactionReasonListener(); - options.listeners.emplace_back(listener); - - options.level0_file_num_compaction_trigger = 4; - options.compaction_style = kCompactionStyleFIFO; - options.compaction_options_fifo.max_table_files_size = 1; - - DestroyAndReopen(options); - Random rnd(301); - - // Write 4 files in L0 - for (int i = 0; i < 4; i++) { - GenerateNewRandomFile(&rnd); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_GT(listener->compaction_reasons_.size(), 0); - for (auto compaction_reason : listener->compaction_reasons_) { - ASSERT_EQ(compaction_reason, CompactionReason::kFIFOMaxSize); - } -} - -class TableFileCreationListener : public EventListener { - public: - class TestFS : public FileSystemWrapper { - public: - explicit TestFS(const std::shared_ptr& t) - : FileSystemWrapper(t) {} - static const char* kClassName() { return "TestEnv"; } - const char* Name() const override { return kClassName(); } - - void SetStatus(IOStatus s) { status_ = s; } - - IOStatus NewWritableFile(const std::string& fname, const FileOptions& opts, - std::unique_ptr* result, - IODebugContext* dbg) override { - if (fname.size() > 4 && fname.substr(fname.size() - 4) == ".sst") { - if (!status_.ok()) { - return status_; - } - } - return target()->NewWritableFile(fname, opts, result, dbg); - } - - private: - IOStatus status_; - }; - - TableFileCreationListener() { - for (int i = 0; i < 2; i++) { - started_[i] = finished_[i] = failure_[i] = 0; - } - } - - int Index(TableFileCreationReason reason) { - int idx; - switch (reason) { - case TableFileCreationReason::kFlush: - idx = 0; - break; - case TableFileCreationReason::kCompaction: - idx = 1; - break; - default: - idx = -1; - } - return idx; - } - - void CheckAndResetCounters(int flush_started, int flush_finished, - int flush_failure, int compaction_started, - int compaction_finished, int compaction_failure) { - ASSERT_EQ(started_[0], flush_started); - ASSERT_EQ(finished_[0], flush_finished); - ASSERT_EQ(failure_[0], flush_failure); - ASSERT_EQ(started_[1], compaction_started); - ASSERT_EQ(finished_[1], compaction_finished); - ASSERT_EQ(failure_[1], compaction_failure); - for (int i = 0; i < 2; i++) { - started_[i] = finished_[i] = failure_[i] = 0; - } - } - - void OnTableFileCreationStarted( - const TableFileCreationBriefInfo& info) override { - int idx = Index(info.reason); - if (idx >= 0) { - started_[idx]++; - } - ASSERT_GT(info.db_name.size(), 0U); - ASSERT_GT(info.cf_name.size(), 0U); - ASSERT_GT(info.file_path.size(), 0U); - ASSERT_GT(info.job_id, 0); - } - - void OnTableFileCreated(const TableFileCreationInfo& info) override { - int idx = Index(info.reason); - if (idx >= 0) { - finished_[idx]++; - } - ASSERT_GT(info.db_name.size(), 0U); - ASSERT_GT(info.cf_name.size(), 0U); - ASSERT_GT(info.file_path.size(), 0U); - ASSERT_GT(info.job_id, 0); - ASSERT_EQ(info.file_checksum, kUnknownFileChecksum); - ASSERT_EQ(info.file_checksum_func_name, kUnknownFileChecksumFuncName); - if (info.status.ok()) { - if (info.table_properties.num_range_deletions == 0U) { - ASSERT_GT(info.table_properties.data_size, 0U); - ASSERT_GT(info.table_properties.raw_key_size, 0U); - ASSERT_GT(info.table_properties.raw_value_size, 0U); - ASSERT_GT(info.table_properties.num_data_blocks, 0U); - ASSERT_GT(info.table_properties.num_entries, 0U); - } - } else { - if (idx >= 0) { - failure_[idx]++; - last_failure_ = info.status; - } - } - } - - int started_[2]; - int finished_[2]; - int failure_[2]; - Status last_failure_; -}; - -TEST_F(EventListenerTest, TableFileCreationListenersTest) { - auto listener = std::make_shared(); - Options options; - std::shared_ptr test_fs = - std::make_shared( - CurrentOptions().env->GetFileSystem()); - std::unique_ptr test_env = NewCompositeEnv(test_fs); - options.create_if_missing = true; - options.listeners.push_back(listener); - options.env = test_env.get(); - DestroyAndReopen(options); - - ASSERT_OK(Put("foo", "aaa")); - ASSERT_OK(Put("bar", "bbb")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - listener->CheckAndResetCounters(1, 1, 0, 0, 0, 0); - ASSERT_OK(Put("foo", "aaa1")); - ASSERT_OK(Put("bar", "bbb1")); - test_fs->SetStatus(IOStatus::NotSupported("not supported")); - ASSERT_NOK(Flush()); - listener->CheckAndResetCounters(1, 1, 1, 0, 0, 0); - ASSERT_TRUE(listener->last_failure_.IsNotSupported()); - test_fs->SetStatus(IOStatus::OK()); - - Reopen(options); - ASSERT_OK(Put("foo", "aaa2")); - ASSERT_OK(Put("bar", "bbb2")); - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - listener->CheckAndResetCounters(1, 1, 0, 0, 0, 0); - - const Slice kRangeStart = "a"; - const Slice kRangeEnd = "z"; - ASSERT_OK( - dbfull()->CompactRange(CompactRangeOptions(), &kRangeStart, &kRangeEnd)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - listener->CheckAndResetCounters(0, 0, 0, 1, 1, 0); - - ASSERT_OK(Put("foo", "aaa3")); - ASSERT_OK(Put("bar", "bbb3")); - ASSERT_OK(Flush()); - test_fs->SetStatus(IOStatus::NotSupported("not supported")); - ASSERT_NOK( - dbfull()->CompactRange(CompactRangeOptions(), &kRangeStart, &kRangeEnd)); - ASSERT_NOK(dbfull()->TEST_WaitForCompact()); - listener->CheckAndResetCounters(1, 1, 0, 1, 1, 1); - ASSERT_TRUE(listener->last_failure_.IsNotSupported()); - - // Reset - test_fs->SetStatus(IOStatus::OK()); - DestroyAndReopen(options); - - // Verify that an empty table file that is immediately deleted gives Aborted - // status to listener. - ASSERT_OK(Put("baz", "z")); - ASSERT_OK(SingleDelete("baz")); - ASSERT_OK(Flush()); - listener->CheckAndResetCounters(1, 1, 1, 0, 0, 0); - ASSERT_TRUE(listener->last_failure_.IsAborted()); - - // Also in compaction - ASSERT_OK(Put("baz", "z")); - ASSERT_OK(Flush()); - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), - kRangeStart, kRangeEnd)); - ASSERT_OK(Flush()); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - listener->CheckAndResetCounters(2, 2, 0, 1, 1, 1); - ASSERT_TRUE(listener->last_failure_.IsAborted()); - - Close(); // Avoid UAF on listener -} - -class MemTableSealedListener : public EventListener { - private: - SequenceNumber latest_seq_number_; - - public: - MemTableSealedListener() {} - void OnMemTableSealed(const MemTableInfo& info) override { - latest_seq_number_ = info.first_seqno; - } - - void OnFlushCompleted(DB* /*db*/, - const FlushJobInfo& flush_job_info) override { - ASSERT_LE(flush_job_info.smallest_seqno, latest_seq_number_); - } -}; - -TEST_F(EventListenerTest, MemTableSealedListenerTest) { - auto listener = std::make_shared(); - Options options; - options.env = CurrentOptions().env; - options.create_if_missing = true; - options.listeners.push_back(listener); - DestroyAndReopen(options); - - for (unsigned int i = 0; i < 10; i++) { - std::string tag = std::to_string(i); - ASSERT_OK(Put("foo" + tag, "aaa")); - ASSERT_OK(Put("bar" + tag, "bbb")); - - ASSERT_OK(Flush()); - } -} - -class ColumnFamilyHandleDeletionStartedListener : public EventListener { - private: - std::vector cfs_; - int counter; - - public: - explicit ColumnFamilyHandleDeletionStartedListener( - const std::vector& cfs) - : cfs_(cfs), counter(0) { - cfs_.insert(cfs_.begin(), kDefaultColumnFamilyName); - } - void OnColumnFamilyHandleDeletionStarted( - ColumnFamilyHandle* handle) override { - ASSERT_EQ(cfs_[handle->GetID()], handle->GetName()); - counter++; - } - int getCounter() { return counter; } -}; - -TEST_F(EventListenerTest, ColumnFamilyHandleDeletionStartedListenerTest) { - std::vector cfs{"pikachu", "eevee", "Mewtwo"}; - auto listener = - std::make_shared(cfs); - Options options; - options.env = CurrentOptions().env; - options.create_if_missing = true; - options.listeners.push_back(listener); - CreateAndReopenWithCF(cfs, options); - ASSERT_EQ(handles_.size(), 4); - delete handles_[3]; - delete handles_[2]; - delete handles_[1]; - handles_.resize(1); - ASSERT_EQ(listener->getCounter(), 3); -} - -class BackgroundErrorListener : public EventListener { - private: - SpecialEnv* env_; - int counter_; - - public: - BackgroundErrorListener(SpecialEnv* env) : env_(env), counter_(0) {} - - void OnBackgroundError(BackgroundErrorReason /*reason*/, - Status* bg_error) override { - if (counter_ == 0) { - // suppress the first error and disable write-dropping such that a retry - // can succeed. - *bg_error = Status::OK(); - env_->drop_writes_.store(false, std::memory_order_release); - env_->SetMockSleep(false); - } - ++counter_; - } - - int counter() { return counter_; } -}; - -TEST_F(EventListenerTest, BackgroundErrorListenerFailedFlushTest) { - auto listener = std::make_shared(env_); - Options options; - options.create_if_missing = true; - options.env = env_; - options.listeners.push_back(listener); - options.memtable_factory.reset(test::NewSpecialSkipListFactory(1)); - options.paranoid_checks = true; - DestroyAndReopen(options); - - // the usual TEST_WaitForFlushMemTable() doesn't work for failed flushes, so - // forge a custom one for the failed flush case. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::BGWorkFlush:done", - "EventListenerTest:BackgroundErrorListenerFailedFlushTest:1"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - env_->drop_writes_.store(true, std::memory_order_release); - env_->SetMockSleep(); - - ASSERT_OK(Put("key0", "val")); - ASSERT_OK(Put("key1", "val")); - TEST_SYNC_POINT("EventListenerTest:BackgroundErrorListenerFailedFlushTest:1"); - ASSERT_EQ(1, listener->counter()); - ASSERT_OK(Put("key2", "val")); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ(1, NumTableFilesAtLevel(0)); -} - -TEST_F(EventListenerTest, BackgroundErrorListenerFailedCompactionTest) { - auto listener = std::make_shared(env_); - Options options; - options.create_if_missing = true; - options.disable_auto_compactions = true; - options.env = env_; - options.level0_file_num_compaction_trigger = 2; - options.listeners.push_back(listener); - options.memtable_factory.reset(test::NewSpecialSkipListFactory(2)); - options.paranoid_checks = true; - DestroyAndReopen(options); - - // third iteration triggers the second memtable's flush - for (int i = 0; i < 3; ++i) { - ASSERT_OK(Put("key0", "val")); - if (i > 0) { - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - } - ASSERT_OK(Put("key1", "val")); - } - ASSERT_EQ(2, NumTableFilesAtLevel(0)); - - env_->drop_writes_.store(true, std::memory_order_release); - env_->SetMockSleep(); - ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}})); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(1, listener->counter()); - - // trigger flush so compaction is triggered again; this time it succeeds - // The previous failed compaction may get retried automatically, so we may - // be left with 0 or 1 files in level 1, depending on when the retry gets - // scheduled - ASSERT_OK(Put("key0", "val")); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_LE(1, NumTableFilesAtLevel(0)); -} - -class TestFileOperationListener : public EventListener { - public: - TestFileOperationListener() { - file_reads_.store(0); - file_reads_success_.store(0); - file_writes_.store(0); - file_writes_success_.store(0); - file_flushes_.store(0); - file_flushes_success_.store(0); - file_closes_.store(0); - file_closes_success_.store(0); - file_syncs_.store(0); - file_syncs_success_.store(0); - file_truncates_.store(0); - file_truncates_success_.store(0); - file_seq_reads_.store(0); - blob_file_reads_.store(0); - blob_file_writes_.store(0); - blob_file_flushes_.store(0); - blob_file_closes_.store(0); - blob_file_syncs_.store(0); - blob_file_truncates_.store(0); - } - - void OnFileReadFinish(const FileOperationInfo& info) override { - ++file_reads_; - if (info.status.ok()) { - ++file_reads_success_; - } - if (info.path.find("MANIFEST") != std::string::npos) { - ++file_seq_reads_; - } - if (EndsWith(info.path, ".blob")) { - ++blob_file_reads_; - } - ReportDuration(info); - } - - void OnFileWriteFinish(const FileOperationInfo& info) override { - ++file_writes_; - if (info.status.ok()) { - ++file_writes_success_; - } - if (EndsWith(info.path, ".blob")) { - ++blob_file_writes_; - } - ReportDuration(info); - } - - void OnFileFlushFinish(const FileOperationInfo& info) override { - ++file_flushes_; - if (info.status.ok()) { - ++file_flushes_success_; - } - if (EndsWith(info.path, ".blob")) { - ++blob_file_flushes_; - } - ReportDuration(info); - } - - void OnFileCloseFinish(const FileOperationInfo& info) override { - ++file_closes_; - if (info.status.ok()) { - ++file_closes_success_; - } - if (EndsWith(info.path, ".blob")) { - ++blob_file_closes_; - } - ReportDuration(info); - } - - void OnFileSyncFinish(const FileOperationInfo& info) override { - ++file_syncs_; - if (info.status.ok()) { - ++file_syncs_success_; - } - if (EndsWith(info.path, ".blob")) { - ++blob_file_syncs_; - } - ReportDuration(info); - } - - void OnFileTruncateFinish(const FileOperationInfo& info) override { - ++file_truncates_; - if (info.status.ok()) { - ++file_truncates_success_; - } - if (EndsWith(info.path, ".blob")) { - ++blob_file_truncates_; - } - ReportDuration(info); - } - - bool ShouldBeNotifiedOnFileIO() override { return true; } - - std::atomic file_reads_; - std::atomic file_reads_success_; - std::atomic file_writes_; - std::atomic file_writes_success_; - std::atomic file_flushes_; - std::atomic file_flushes_success_; - std::atomic file_closes_; - std::atomic file_closes_success_; - std::atomic file_syncs_; - std::atomic file_syncs_success_; - std::atomic file_truncates_; - std::atomic file_truncates_success_; - std::atomic file_seq_reads_; - std::atomic blob_file_reads_; - std::atomic blob_file_writes_; - std::atomic blob_file_flushes_; - std::atomic blob_file_closes_; - std::atomic blob_file_syncs_; - std::atomic blob_file_truncates_; - - private: - void ReportDuration(const FileOperationInfo& info) const { - ASSERT_GT(info.duration.count(), 0); - } -}; - -TEST_F(EventListenerTest, OnFileOperationTest) { - Options options; - options.env = CurrentOptions().env; - options.create_if_missing = true; - - TestFileOperationListener* listener = new TestFileOperationListener(); - options.listeners.emplace_back(listener); - - options.use_direct_io_for_flush_and_compaction = false; - Status s = TryReopen(options); - if (s.IsInvalidArgument()) { - options.use_direct_io_for_flush_and_compaction = false; - } else { - ASSERT_OK(s); - } - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "aaa")); - ASSERT_OK(dbfull()->Flush(FlushOptions())); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_GE(listener->file_writes_.load(), - listener->file_writes_success_.load()); - ASSERT_GT(listener->file_writes_.load(), 0); - ASSERT_GE(listener->file_flushes_.load(), - listener->file_flushes_success_.load()); - ASSERT_GT(listener->file_flushes_.load(), 0); - Close(); - - Reopen(options); - ASSERT_GE(listener->file_reads_.load(), listener->file_reads_success_.load()); - ASSERT_GT(listener->file_reads_.load(), 0); - ASSERT_GE(listener->file_closes_.load(), - listener->file_closes_success_.load()); - ASSERT_GT(listener->file_closes_.load(), 0); - ASSERT_GE(listener->file_syncs_.load(), listener->file_syncs_success_.load()); - ASSERT_GT(listener->file_syncs_.load(), 0); - if (true == options.use_direct_io_for_flush_and_compaction) { - ASSERT_GE(listener->file_truncates_.load(), - listener->file_truncates_success_.load()); - ASSERT_GT(listener->file_truncates_.load(), 0); - } -} - -TEST_F(EventListenerTest, OnBlobFileOperationTest) { - Options options; - options.env = CurrentOptions().env; - options.create_if_missing = true; - TestFileOperationListener* listener = new TestFileOperationListener(); - options.listeners.emplace_back(listener); - options.disable_auto_compactions = true; - options.enable_blob_files = true; - options.min_blob_size = 0; - options.enable_blob_garbage_collection = true; - options.blob_garbage_collection_age_cutoff = 0.5; - - DestroyAndReopen(options); - - ASSERT_OK(Put("Key1", "blob_value1")); - ASSERT_OK(Put("Key2", "blob_value2")); - ASSERT_OK(Put("Key3", "blob_value3")); - ASSERT_OK(Put("Key4", "blob_value4")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("Key3", "new_blob_value3")); - ASSERT_OK(Put("Key4", "new_blob_value4")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("Key5", "blob_value5")); - ASSERT_OK(Put("Key6", "blob_value6")); - ASSERT_OK(Flush()); - - ASSERT_GT(listener->blob_file_writes_.load(), 0U); - ASSERT_GT(listener->blob_file_flushes_.load(), 0U); - Close(); - - Reopen(options); - ASSERT_GT(listener->blob_file_closes_.load(), 0U); - ASSERT_GT(listener->blob_file_syncs_.load(), 0U); - if (true == options.use_direct_io_for_flush_and_compaction) { - ASSERT_GT(listener->blob_file_truncates_.load(), 0U); - } -} - -TEST_F(EventListenerTest, ReadManifestAndWALOnRecovery) { - Options options; - options.env = CurrentOptions().env; - options.create_if_missing = true; - - TestFileOperationListener* listener = new TestFileOperationListener(); - options.listeners.emplace_back(listener); - - options.use_direct_io_for_flush_and_compaction = false; - Status s = TryReopen(options); - if (s.IsInvalidArgument()) { - options.use_direct_io_for_flush_and_compaction = false; - } else { - ASSERT_OK(s); - } - DestroyAndReopen(options); - ASSERT_OK(Put("foo", "aaa")); - Close(); - - size_t seq_reads = listener->file_seq_reads_.load(); - Reopen(options); - ASSERT_GT(listener->file_seq_reads_.load(), seq_reads); -} - -class BlobDBJobLevelEventListenerTest : public EventListener { - public: - explicit BlobDBJobLevelEventListenerTest(EventListenerTest* test) - : test_(test), call_count_(0) {} - - const VersionStorageInfo* GetVersionStorageInfo() const { - VersionSet* const versions = test_->dbfull()->GetVersionSet(); - assert(versions); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - EXPECT_NE(cfd, nullptr); - - Version* const current = cfd->current(); - EXPECT_NE(current, nullptr); - - const VersionStorageInfo* const storage_info = current->storage_info(); - EXPECT_NE(storage_info, nullptr); - - return storage_info; - } - - void CheckBlobFileAdditions( - const std::vector& blob_file_addition_infos) const { - const auto* vstorage = GetVersionStorageInfo(); - - EXPECT_FALSE(blob_file_addition_infos.empty()); - - for (const auto& blob_file_addition_info : blob_file_addition_infos) { - const auto meta = vstorage->GetBlobFileMetaData( - blob_file_addition_info.blob_file_number); - - EXPECT_NE(meta, nullptr); - EXPECT_EQ(meta->GetBlobFileNumber(), - blob_file_addition_info.blob_file_number); - EXPECT_EQ(meta->GetTotalBlobBytes(), - blob_file_addition_info.total_blob_bytes); - EXPECT_EQ(meta->GetTotalBlobCount(), - blob_file_addition_info.total_blob_count); - EXPECT_FALSE(blob_file_addition_info.blob_file_path.empty()); - } - } - - std::vector GetFlushedFiles() { - std::lock_guard lock(mutex_); - std::vector result; - for (const auto& fname : flushed_files_) { - result.push_back(fname); - } - return result; - } - - void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override { - call_count_++; - - { - std::lock_guard lock(mutex_); - flushed_files_.push_back(info.file_path); - } - - EXPECT_EQ(info.blob_compression_type, kNoCompression); - - CheckBlobFileAdditions(info.blob_file_addition_infos); - } - - void OnCompactionCompleted(DB* /*db*/, - const CompactionJobInfo& info) override { - call_count_++; - - EXPECT_EQ(info.blob_compression_type, kNoCompression); - - CheckBlobFileAdditions(info.blob_file_addition_infos); - - EXPECT_FALSE(info.blob_file_garbage_infos.empty()); - - for (const auto& blob_file_garbage_info : info.blob_file_garbage_infos) { - EXPECT_GT(blob_file_garbage_info.blob_file_number, 0U); - EXPECT_GT(blob_file_garbage_info.garbage_blob_count, 0U); - EXPECT_GT(blob_file_garbage_info.garbage_blob_bytes, 0U); - EXPECT_FALSE(blob_file_garbage_info.blob_file_path.empty()); - } - } - - EventListenerTest* test_; - uint32_t call_count_; - - private: - std::vector flushed_files_; - std::mutex mutex_; -}; - -// Test OnFlushCompleted EventListener called for blob files -TEST_F(EventListenerTest, BlobDBOnFlushCompleted) { - Options options; - options.env = CurrentOptions().env; - options.enable_blob_files = true; - options.create_if_missing = true; - options.disable_auto_compactions = true; - - options.min_blob_size = 0; - BlobDBJobLevelEventListenerTest* blob_event_listener = - new BlobDBJobLevelEventListenerTest(this); - options.listeners.emplace_back(blob_event_listener); - - DestroyAndReopen(options); - - ASSERT_OK(Put("Key1", "blob_value1")); - ASSERT_OK(Put("Key2", "blob_value2")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("Key3", "blob_value3")); - ASSERT_OK(Flush()); - - ASSERT_EQ(Get("Key1"), "blob_value1"); - ASSERT_EQ(Get("Key2"), "blob_value2"); - ASSERT_EQ(Get("Key3"), "blob_value3"); - - ASSERT_GT(blob_event_listener->call_count_, 0U); -} - -// Test OnCompactionCompleted EventListener called for blob files -TEST_F(EventListenerTest, BlobDBOnCompactionCompleted) { - Options options; - options.env = CurrentOptions().env; - options.enable_blob_files = true; - options.create_if_missing = true; - options.disable_auto_compactions = true; - options.min_blob_size = 0; - BlobDBJobLevelEventListenerTest* blob_event_listener = - new BlobDBJobLevelEventListenerTest(this); - options.listeners.emplace_back(blob_event_listener); - - options.enable_blob_garbage_collection = true; - options.blob_garbage_collection_age_cutoff = 0.5; - - DestroyAndReopen(options); - - ASSERT_OK(Put("Key1", "blob_value1")); - ASSERT_OK(Put("Key2", "blob_value2")); - ASSERT_OK(Put("Key3", "blob_value3")); - ASSERT_OK(Put("Key4", "blob_value4")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("Key3", "new_blob_value3")); - ASSERT_OK(Put("Key4", "new_blob_value4")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("Key5", "blob_value5")); - ASSERT_OK(Put("Key6", "blob_value6")); - ASSERT_OK(Flush()); - - blob_event_listener->call_count_ = 0; - constexpr Slice* begin = nullptr; - constexpr Slice* end = nullptr; - - // On compaction, because of blob_garbage_collection_age_cutoff, it will - // delete the oldest blob file and create new blob file during compaction. - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end)); - - // Make sure, OnCompactionCompleted is called. - ASSERT_GT(blob_event_listener->call_count_, 0U); -} - -// Test CompactFiles calls OnCompactionCompleted EventListener for blob files -// and populate the blob files info. -TEST_F(EventListenerTest, BlobDBCompactFiles) { - Options options; - options.env = CurrentOptions().env; - options.enable_blob_files = true; - options.create_if_missing = true; - options.disable_auto_compactions = true; - options.min_blob_size = 0; - options.enable_blob_garbage_collection = true; - options.blob_garbage_collection_age_cutoff = 0.5; - - BlobDBJobLevelEventListenerTest* blob_event_listener = - new BlobDBJobLevelEventListenerTest(this); - options.listeners.emplace_back(blob_event_listener); - - DestroyAndReopen(options); - - ASSERT_OK(Put("Key1", "blob_value1")); - ASSERT_OK(Put("Key2", "blob_value2")); - ASSERT_OK(Put("Key3", "blob_value3")); - ASSERT_OK(Put("Key4", "blob_value4")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("Key3", "new_blob_value3")); - ASSERT_OK(Put("Key4", "new_blob_value4")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("Key5", "blob_value5")); - ASSERT_OK(Put("Key6", "blob_value6")); - ASSERT_OK(Flush()); - - std::vector output_file_names; - CompactionJobInfo compaction_job_info; - - // On compaction, because of blob_garbage_collection_age_cutoff, it will - // delete the oldest blob file and create new blob file during compaction - // which will be populated in output_files_names. - ASSERT_OK(dbfull()->CompactFiles( - CompactionOptions(), blob_event_listener->GetFlushedFiles(), 1, -1, - &output_file_names, &compaction_job_info)); - - bool is_blob_in_output = false; - for (const auto& file : output_file_names) { - if (EndsWith(file, ".blob")) { - is_blob_in_output = true; - } - } - ASSERT_TRUE(is_blob_in_output); - - for (const auto& blob_file_addition_info : - compaction_job_info.blob_file_addition_infos) { - EXPECT_GT(blob_file_addition_info.blob_file_number, 0U); - EXPECT_GT(blob_file_addition_info.total_blob_bytes, 0U); - EXPECT_GT(blob_file_addition_info.total_blob_count, 0U); - EXPECT_FALSE(blob_file_addition_info.blob_file_path.empty()); - } - - for (const auto& blob_file_garbage_info : - compaction_job_info.blob_file_garbage_infos) { - EXPECT_GT(blob_file_garbage_info.blob_file_number, 0U); - EXPECT_GT(blob_file_garbage_info.garbage_blob_count, 0U); - EXPECT_GT(blob_file_garbage_info.garbage_blob_bytes, 0U); - EXPECT_FALSE(blob_file_garbage_info.blob_file_path.empty()); - } -} - -class BlobDBFileLevelEventListener : public EventListener { - public: - void OnBlobFileCreationStarted( - const BlobFileCreationBriefInfo& info) override { - files_started_++; - EXPECT_FALSE(info.db_name.empty()); - EXPECT_FALSE(info.cf_name.empty()); - EXPECT_FALSE(info.file_path.empty()); - EXPECT_GT(info.job_id, 0); - } - - void OnBlobFileCreated(const BlobFileCreationInfo& info) override { - files_created_++; - EXPECT_FALSE(info.db_name.empty()); - EXPECT_FALSE(info.cf_name.empty()); - EXPECT_FALSE(info.file_path.empty()); - EXPECT_GT(info.job_id, 0); - EXPECT_GT(info.total_blob_count, 0U); - EXPECT_GT(info.total_blob_bytes, 0U); - EXPECT_EQ(info.file_checksum, kUnknownFileChecksum); - EXPECT_EQ(info.file_checksum_func_name, kUnknownFileChecksumFuncName); - EXPECT_TRUE(info.status.ok()); - } - - void OnBlobFileDeleted(const BlobFileDeletionInfo& info) override { - files_deleted_++; - EXPECT_FALSE(info.db_name.empty()); - EXPECT_FALSE(info.file_path.empty()); - EXPECT_GT(info.job_id, 0); - EXPECT_TRUE(info.status.ok()); - } - - void CheckCounters() { - EXPECT_EQ(files_started_, files_created_); - EXPECT_GT(files_started_, 0U); - EXPECT_GT(files_deleted_, 0U); - EXPECT_LT(files_deleted_, files_created_); - } - - private: - std::atomic files_started_{}; - std::atomic files_created_{}; - std::atomic files_deleted_{}; -}; - -TEST_F(EventListenerTest, BlobDBFileTest) { - Options options; - options.env = CurrentOptions().env; - options.enable_blob_files = true; - options.create_if_missing = true; - options.disable_auto_compactions = true; - options.min_blob_size = 0; - options.enable_blob_garbage_collection = true; - options.blob_garbage_collection_age_cutoff = 0.5; - - BlobDBFileLevelEventListener* blob_event_listener = - new BlobDBFileLevelEventListener(); - options.listeners.emplace_back(blob_event_listener); - - DestroyAndReopen(options); - - ASSERT_OK(Put("Key1", "blob_value1")); - ASSERT_OK(Put("Key2", "blob_value2")); - ASSERT_OK(Put("Key3", "blob_value3")); - ASSERT_OK(Put("Key4", "blob_value4")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("Key3", "new_blob_value3")); - ASSERT_OK(Put("Key4", "new_blob_value4")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("Key5", "blob_value5")); - ASSERT_OK(Put("Key6", "blob_value6")); - ASSERT_OK(Flush()); - - constexpr Slice* begin = nullptr; - constexpr Slice* end = nullptr; - - // On compaction, because of blob_garbage_collection_age_cutoff, it will - // delete the oldest blob file and create new blob file during compaction. - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end)); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - blob_event_listener->CheckCounters(); -} - -} // namespace ROCKSDB_NAMESPACE - - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/log_test.cc b/db/log_test.cc deleted file mode 100644 index f4d388f41..000000000 --- a/db/log_test.cc +++ /dev/null @@ -1,1095 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/log_reader.h" -#include "db/log_writer.h" -#include "file/sequence_file_reader.h" -#include "file/writable_file_writer.h" -#include "rocksdb/env.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/coding.h" -#include "util/crc32c.h" -#include "util/random.h" -#include "utilities/memory_allocators.h" - -namespace ROCKSDB_NAMESPACE { -namespace log { - -// Construct a string of the specified length made out of the supplied -// partial string. -static std::string BigString(const std::string& partial_string, size_t n) { - std::string result; - while (result.size() < n) { - result.append(partial_string); - } - result.resize(n); - return result; -} - -// Construct a string from a number -static std::string NumberString(int n) { - char buf[50]; - snprintf(buf, sizeof(buf), "%d.", n); - return std::string(buf); -} - -// Return a skewed potentially long string -static std::string RandomSkewedString(int i, Random* rnd) { - return BigString(NumberString(i), rnd->Skewed(17)); -} - -// Param type is tuple -// get<0>(tuple): non-zero if recycling log, zero if regular log -// get<1>(tuple): true if allow retry after read EOF, false otherwise -class LogTest - : public ::testing::TestWithParam> { - private: - class StringSource : public FSSequentialFile { - public: - Slice& contents_; - bool force_error_; - size_t force_error_position_; - bool force_eof_; - size_t force_eof_position_; - bool returned_partial_; - bool fail_after_read_partial_; - explicit StringSource(Slice& contents, bool fail_after_read_partial) - : contents_(contents), - force_error_(false), - force_error_position_(0), - force_eof_(false), - force_eof_position_(0), - returned_partial_(false), - fail_after_read_partial_(fail_after_read_partial) {} - - IOStatus Read(size_t n, const IOOptions& /*opts*/, Slice* result, - char* scratch, IODebugContext* /*dbg*/) override { - if (fail_after_read_partial_) { - EXPECT_TRUE(!returned_partial_) << "must not Read() after eof/error"; - } - - if (force_error_) { - if (force_error_position_ >= n) { - force_error_position_ -= n; - } else { - *result = Slice(contents_.data(), force_error_position_); - contents_.remove_prefix(force_error_position_); - force_error_ = false; - returned_partial_ = true; - return IOStatus::Corruption("read error"); - } - } - - if (contents_.size() < n) { - n = contents_.size(); - returned_partial_ = true; - } - - if (force_eof_) { - if (force_eof_position_ >= n) { - force_eof_position_ -= n; - } else { - force_eof_ = false; - n = force_eof_position_; - returned_partial_ = true; - } - } - - // By using scratch we ensure that caller has control over the - // lifetime of result.data() - memcpy(scratch, contents_.data(), n); - *result = Slice(scratch, n); - - contents_.remove_prefix(n); - return IOStatus::OK(); - } - - IOStatus Skip(uint64_t n) override { - if (n > contents_.size()) { - contents_.clear(); - return IOStatus::NotFound("in-memory file skipepd past end"); - } - - contents_.remove_prefix(n); - - return IOStatus::OK(); - } - }; - - class ReportCollector : public Reader::Reporter { - public: - size_t dropped_bytes_; - std::string message_; - - ReportCollector() : dropped_bytes_(0) {} - void Corruption(size_t bytes, const Status& status) override { - dropped_bytes_ += bytes; - message_.append(status.ToString()); - } - }; - - std::string& dest_contents() { return sink_->contents_; } - - const std::string& dest_contents() const { return sink_->contents_; } - - void reset_source_contents() { source_->contents_ = dest_contents(); } - - Slice reader_contents_; - test::StringSink* sink_; - StringSource* source_; - ReportCollector report_; - - protected: - std::unique_ptr writer_; - std::unique_ptr reader_; - bool allow_retry_read_; - CompressionType compression_type_; - - public: - LogTest() - : reader_contents_(), - sink_(new test::StringSink(&reader_contents_)), - source_(new StringSource(reader_contents_, !std::get<1>(GetParam()))), - allow_retry_read_(std::get<1>(GetParam())), - compression_type_(std::get<2>(GetParam())) { - std::unique_ptr sink_holder(sink_); - std::unique_ptr file_writer(new WritableFileWriter( - std::move(sink_holder), "" /* don't care */, FileOptions())); - Writer* writer = - new Writer(std::move(file_writer), 123, std::get<0>(GetParam()), false, - compression_type_); - writer_.reset(writer); - std::unique_ptr source_holder(source_); - std::unique_ptr file_reader( - new SequentialFileReader(std::move(source_holder), "" /* file name */)); - if (allow_retry_read_) { - reader_.reset(new FragmentBufferedReader(nullptr, std::move(file_reader), - &report_, true /* checksum */, - 123 /* log_number */)); - } else { - reader_.reset(new Reader(nullptr, std::move(file_reader), &report_, - true /* checksum */, 123 /* log_number */)); - } - } - - Slice* get_reader_contents() { return &reader_contents_; } - - void Write(const std::string& msg) { - ASSERT_OK(writer_->AddRecord(Slice(msg))); - } - - size_t WrittenBytes() const { return dest_contents().size(); } - - std::string Read(const WALRecoveryMode wal_recovery_mode = - WALRecoveryMode::kTolerateCorruptedTailRecords) { - std::string scratch; - Slice record; - bool ret = false; - uint64_t record_checksum; - ret = reader_->ReadRecord(&record, &scratch, wal_recovery_mode, - &record_checksum); - if (ret) { - if (!allow_retry_read_) { - // allow_retry_read_ means using FragmentBufferedReader which does not - // support record checksum yet. - uint64_t actual_record_checksum = - XXH3_64bits(record.data(), record.size()); - assert(actual_record_checksum == record_checksum); - } - return record.ToString(); - } else { - return "EOF"; - } - } - - void IncrementByte(int offset, char delta) { - dest_contents()[offset] += delta; - } - - void SetByte(int offset, char new_byte) { - dest_contents()[offset] = new_byte; - } - - void ShrinkSize(int bytes) { sink_->Drop(bytes); } - - void FixChecksum(int header_offset, int len, bool recyclable) { - // Compute crc of type/len/data - int header_size = recyclable ? kRecyclableHeaderSize : kHeaderSize; - uint32_t crc = crc32c::Value(&dest_contents()[header_offset + 6], - header_size - 6 + len); - crc = crc32c::Mask(crc); - EncodeFixed32(&dest_contents()[header_offset], crc); - } - - void ForceError(size_t position = 0) { - source_->force_error_ = true; - source_->force_error_position_ = position; - } - - size_t DroppedBytes() const { return report_.dropped_bytes_; } - - std::string ReportMessage() const { return report_.message_; } - - void ForceEOF(size_t position = 0) { - source_->force_eof_ = true; - source_->force_eof_position_ = position; - } - - void UnmarkEOF() { - source_->returned_partial_ = false; - reader_->UnmarkEOF(); - } - - bool IsEOF() { return reader_->IsEOF(); } - - // Returns OK iff recorded error message contains "msg" - std::string MatchError(const std::string& msg) const { - if (report_.message_.find(msg) == std::string::npos) { - return report_.message_; - } else { - return "OK"; - } - } -}; - -TEST_P(LogTest, Empty) { ASSERT_EQ("EOF", Read()); } - -TEST_P(LogTest, ReadWrite) { - Write("foo"); - Write("bar"); - Write(""); - Write("xxxx"); - ASSERT_EQ("foo", Read()); - ASSERT_EQ("bar", Read()); - ASSERT_EQ("", Read()); - ASSERT_EQ("xxxx", Read()); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ("EOF", Read()); // Make sure reads at eof work -} - -TEST_P(LogTest, ManyBlocks) { - for (int i = 0; i < 100000; i++) { - Write(NumberString(i)); - } - for (int i = 0; i < 100000; i++) { - ASSERT_EQ(NumberString(i), Read()); - } - ASSERT_EQ("EOF", Read()); -} - -TEST_P(LogTest, Fragmentation) { - Write("small"); - Write(BigString("medium", 50000)); - Write(BigString("large", 100000)); - ASSERT_EQ("small", Read()); - ASSERT_EQ(BigString("medium", 50000), Read()); - ASSERT_EQ(BigString("large", 100000), Read()); - ASSERT_EQ("EOF", Read()); -} - -TEST_P(LogTest, MarginalTrailer) { - // Make a trailer that is exactly the same length as an empty record. - int header_size = - std::get<0>(GetParam()) ? kRecyclableHeaderSize : kHeaderSize; - const int n = kBlockSize - 2 * header_size; - Write(BigString("foo", n)); - ASSERT_EQ((unsigned int)(kBlockSize - header_size), WrittenBytes()); - Write(""); - Write("bar"); - ASSERT_EQ(BigString("foo", n), Read()); - ASSERT_EQ("", Read()); - ASSERT_EQ("bar", Read()); - ASSERT_EQ("EOF", Read()); -} - -TEST_P(LogTest, MarginalTrailer2) { - // Make a trailer that is exactly the same length as an empty record. - int header_size = - std::get<0>(GetParam()) ? kRecyclableHeaderSize : kHeaderSize; - const int n = kBlockSize - 2 * header_size; - Write(BigString("foo", n)); - ASSERT_EQ((unsigned int)(kBlockSize - header_size), WrittenBytes()); - Write("bar"); - ASSERT_EQ(BigString("foo", n), Read()); - ASSERT_EQ("bar", Read()); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(0U, DroppedBytes()); - ASSERT_EQ("", ReportMessage()); -} - -TEST_P(LogTest, ShortTrailer) { - int header_size = - std::get<0>(GetParam()) ? kRecyclableHeaderSize : kHeaderSize; - const int n = kBlockSize - 2 * header_size + 4; - Write(BigString("foo", n)); - ASSERT_EQ((unsigned int)(kBlockSize - header_size + 4), WrittenBytes()); - Write(""); - Write("bar"); - ASSERT_EQ(BigString("foo", n), Read()); - ASSERT_EQ("", Read()); - ASSERT_EQ("bar", Read()); - ASSERT_EQ("EOF", Read()); -} - -TEST_P(LogTest, AlignedEof) { - int header_size = - std::get<0>(GetParam()) ? kRecyclableHeaderSize : kHeaderSize; - const int n = kBlockSize - 2 * header_size + 4; - Write(BigString("foo", n)); - ASSERT_EQ((unsigned int)(kBlockSize - header_size + 4), WrittenBytes()); - ASSERT_EQ(BigString("foo", n), Read()); - ASSERT_EQ("EOF", Read()); -} - -TEST_P(LogTest, RandomRead) { - const int N = 500; - Random write_rnd(301); - for (int i = 0; i < N; i++) { - Write(RandomSkewedString(i, &write_rnd)); - } - Random read_rnd(301); - for (int i = 0; i < N; i++) { - ASSERT_EQ(RandomSkewedString(i, &read_rnd), Read()); - } - ASSERT_EQ("EOF", Read()); -} - -// Tests of all the error paths in log_reader.cc follow: - -TEST_P(LogTest, ReadError) { - Write("foo"); - ForceError(); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ((unsigned int)kBlockSize, DroppedBytes()); - ASSERT_EQ("OK", MatchError("read error")); -} - -TEST_P(LogTest, BadRecordType) { - Write("foo"); - // Type is stored in header[6] - IncrementByte(6, 100); - FixChecksum(0, 3, false); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(3U, DroppedBytes()); - ASSERT_EQ("OK", MatchError("unknown record type")); -} - -TEST_P(LogTest, TruncatedTrailingRecordIsIgnored) { - Write("foo"); - ShrinkSize(4); // Drop all payload as well as a header byte - ASSERT_EQ("EOF", Read()); - // Truncated last record is ignored, not treated as an error - ASSERT_EQ(0U, DroppedBytes()); - ASSERT_EQ("", ReportMessage()); -} - -TEST_P(LogTest, TruncatedTrailingRecordIsNotIgnored) { - if (allow_retry_read_) { - // If read retry is allowed, then truncated trailing record should not - // raise an error. - return; - } - Write("foo"); - ShrinkSize(4); // Drop all payload as well as a header byte - ASSERT_EQ("EOF", Read(WALRecoveryMode::kAbsoluteConsistency)); - // Truncated last record is ignored, not treated as an error - ASSERT_GT(DroppedBytes(), 0U); - ASSERT_EQ("OK", MatchError("Corruption: truncated header")); -} - -TEST_P(LogTest, BadLength) { - if (allow_retry_read_) { - // If read retry is allowed, then we should not raise an error when the - // record length specified in header is longer than data currently - // available. It's possible that the body of the record is not written yet. - return; - } - bool recyclable_log = (std::get<0>(GetParam()) != 0); - int header_size = recyclable_log ? kRecyclableHeaderSize : kHeaderSize; - const int kPayloadSize = kBlockSize - header_size; - Write(BigString("bar", kPayloadSize)); - Write("foo"); - // Least significant size byte is stored in header[4]. - IncrementByte(4, 1); - if (!recyclable_log) { - ASSERT_EQ("foo", Read()); - ASSERT_EQ(kBlockSize, DroppedBytes()); - ASSERT_EQ("OK", MatchError("bad record length")); - } else { - ASSERT_EQ("EOF", Read()); - } -} - -TEST_P(LogTest, BadLengthAtEndIsIgnored) { - if (allow_retry_read_) { - // If read retry is allowed, then we should not raise an error when the - // record length specified in header is longer than data currently - // available. It's possible that the body of the record is not written yet. - return; - } - Write("foo"); - ShrinkSize(1); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(0U, DroppedBytes()); - ASSERT_EQ("", ReportMessage()); -} - -TEST_P(LogTest, BadLengthAtEndIsNotIgnored) { - if (allow_retry_read_) { - // If read retry is allowed, then we should not raise an error when the - // record length specified in header is longer than data currently - // available. It's possible that the body of the record is not written yet. - return; - } - Write("foo"); - ShrinkSize(1); - ASSERT_EQ("EOF", Read(WALRecoveryMode::kAbsoluteConsistency)); - ASSERT_GT(DroppedBytes(), 0U); - ASSERT_EQ("OK", MatchError("Corruption: truncated record body")); -} - -TEST_P(LogTest, ChecksumMismatch) { - Write("foooooo"); - IncrementByte(0, 14); - ASSERT_EQ("EOF", Read()); - bool recyclable_log = (std::get<0>(GetParam()) != 0); - if (!recyclable_log) { - ASSERT_EQ(14U, DroppedBytes()); - ASSERT_EQ("OK", MatchError("checksum mismatch")); - } else { - ASSERT_EQ(0U, DroppedBytes()); - ASSERT_EQ("", ReportMessage()); - } -} - -TEST_P(LogTest, UnexpectedMiddleType) { - Write("foo"); - bool recyclable_log = (std::get<0>(GetParam()) != 0); - SetByte(6, static_cast(recyclable_log ? kRecyclableMiddleType - : kMiddleType)); - FixChecksum(0, 3, !!recyclable_log); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(3U, DroppedBytes()); - ASSERT_EQ("OK", MatchError("missing start")); -} - -TEST_P(LogTest, UnexpectedLastType) { - Write("foo"); - bool recyclable_log = (std::get<0>(GetParam()) != 0); - SetByte(6, - static_cast(recyclable_log ? kRecyclableLastType : kLastType)); - FixChecksum(0, 3, !!recyclable_log); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(3U, DroppedBytes()); - ASSERT_EQ("OK", MatchError("missing start")); -} - -TEST_P(LogTest, UnexpectedFullType) { - Write("foo"); - Write("bar"); - bool recyclable_log = (std::get<0>(GetParam()) != 0); - SetByte( - 6, static_cast(recyclable_log ? kRecyclableFirstType : kFirstType)); - FixChecksum(0, 3, !!recyclable_log); - ASSERT_EQ("bar", Read()); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(3U, DroppedBytes()); - ASSERT_EQ("OK", MatchError("partial record without end")); -} - -TEST_P(LogTest, UnexpectedFirstType) { - Write("foo"); - Write(BigString("bar", 100000)); - bool recyclable_log = (std::get<0>(GetParam()) != 0); - SetByte( - 6, static_cast(recyclable_log ? kRecyclableFirstType : kFirstType)); - FixChecksum(0, 3, !!recyclable_log); - ASSERT_EQ(BigString("bar", 100000), Read()); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(3U, DroppedBytes()); - ASSERT_EQ("OK", MatchError("partial record without end")); -} - -TEST_P(LogTest, MissingLastIsIgnored) { - Write(BigString("bar", kBlockSize)); - // Remove the LAST block, including header. - ShrinkSize(14); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ("", ReportMessage()); - ASSERT_EQ(0U, DroppedBytes()); -} - -TEST_P(LogTest, MissingLastIsNotIgnored) { - if (allow_retry_read_) { - // If read retry is allowed, then truncated trailing record should not - // raise an error. - return; - } - Write(BigString("bar", kBlockSize)); - // Remove the LAST block, including header. - ShrinkSize(14); - ASSERT_EQ("EOF", Read(WALRecoveryMode::kAbsoluteConsistency)); - ASSERT_GT(DroppedBytes(), 0U); - ASSERT_EQ("OK", MatchError("Corruption: error reading trailing data")); -} - -TEST_P(LogTest, PartialLastIsIgnored) { - Write(BigString("bar", kBlockSize)); - // Cause a bad record length in the LAST block. - ShrinkSize(1); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ("", ReportMessage()); - ASSERT_EQ(0U, DroppedBytes()); -} - -TEST_P(LogTest, PartialLastIsNotIgnored) { - if (allow_retry_read_) { - // If read retry is allowed, then truncated trailing record should not - // raise an error. - return; - } - Write(BigString("bar", kBlockSize)); - // Cause a bad record length in the LAST block. - ShrinkSize(1); - ASSERT_EQ("EOF", Read(WALRecoveryMode::kAbsoluteConsistency)); - ASSERT_GT(DroppedBytes(), 0U); - ASSERT_EQ("OK", MatchError("Corruption: truncated record body")); -} - -TEST_P(LogTest, ErrorJoinsRecords) { - // Consider two fragmented records: - // first(R1) last(R1) first(R2) last(R2) - // where the middle two fragments disappear. We do not want - // first(R1),last(R2) to get joined and returned as a valid record. - - // Write records that span two blocks - Write(BigString("foo", kBlockSize)); - Write(BigString("bar", kBlockSize)); - Write("correct"); - - // Wipe the middle block - for (unsigned int offset = kBlockSize; offset < 2 * kBlockSize; offset++) { - SetByte(offset, 'x'); - } - - bool recyclable_log = (std::get<0>(GetParam()) != 0); - if (!recyclable_log) { - ASSERT_EQ("correct", Read()); - ASSERT_EQ("EOF", Read()); - size_t dropped = DroppedBytes(); - ASSERT_LE(dropped, 2 * kBlockSize + 100); - ASSERT_GE(dropped, 2 * kBlockSize); - } else { - ASSERT_EQ("EOF", Read()); - } -} - -TEST_P(LogTest, ClearEofSingleBlock) { - Write("foo"); - Write("bar"); - bool recyclable_log = (std::get<0>(GetParam()) != 0); - int header_size = recyclable_log ? kRecyclableHeaderSize : kHeaderSize; - ForceEOF(3 + header_size + 2); - ASSERT_EQ("foo", Read()); - UnmarkEOF(); - ASSERT_EQ("bar", Read()); - ASSERT_TRUE(IsEOF()); - ASSERT_EQ("EOF", Read()); - Write("xxx"); - UnmarkEOF(); - ASSERT_EQ("xxx", Read()); - ASSERT_TRUE(IsEOF()); -} - -TEST_P(LogTest, ClearEofMultiBlock) { - size_t num_full_blocks = 5; - bool recyclable_log = (std::get<0>(GetParam()) != 0); - int header_size = recyclable_log ? kRecyclableHeaderSize : kHeaderSize; - size_t n = (kBlockSize - header_size) * num_full_blocks + 25; - Write(BigString("foo", n)); - Write(BigString("bar", n)); - ForceEOF(n + num_full_blocks * header_size + header_size + 3); - ASSERT_EQ(BigString("foo", n), Read()); - ASSERT_TRUE(IsEOF()); - UnmarkEOF(); - ASSERT_EQ(BigString("bar", n), Read()); - ASSERT_TRUE(IsEOF()); - Write(BigString("xxx", n)); - UnmarkEOF(); - ASSERT_EQ(BigString("xxx", n), Read()); - ASSERT_TRUE(IsEOF()); -} - -TEST_P(LogTest, ClearEofError) { - // If an error occurs during Read() in UnmarkEOF(), the records contained - // in the buffer should be returned on subsequent calls of ReadRecord() - // until no more full records are left, whereafter ReadRecord() should return - // false to indicate that it cannot read any further. - - Write("foo"); - Write("bar"); - UnmarkEOF(); - ASSERT_EQ("foo", Read()); - ASSERT_TRUE(IsEOF()); - Write("xxx"); - ForceError(0); - UnmarkEOF(); - ASSERT_EQ("bar", Read()); - ASSERT_EQ("EOF", Read()); -} - -TEST_P(LogTest, ClearEofError2) { - Write("foo"); - Write("bar"); - UnmarkEOF(); - ASSERT_EQ("foo", Read()); - Write("xxx"); - ForceError(3); - UnmarkEOF(); - ASSERT_EQ("bar", Read()); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(3U, DroppedBytes()); - ASSERT_EQ("OK", MatchError("read error")); -} - -TEST_P(LogTest, Recycle) { - bool recyclable_log = (std::get<0>(GetParam()) != 0); - if (!recyclable_log) { - return; // test is only valid for recycled logs - } - Write("foo"); - Write("bar"); - Write("baz"); - Write("bif"); - Write("blitz"); - while (get_reader_contents()->size() < log::kBlockSize * 2) { - Write("xxxxxxxxxxxxxxxx"); - } - std::unique_ptr sink( - new test::OverwritingStringSink(get_reader_contents())); - std::unique_ptr dest_holder(new WritableFileWriter( - std::move(sink), "" /* don't care */, FileOptions())); - Writer recycle_writer(std::move(dest_holder), 123, true); - ASSERT_OK(recycle_writer.AddRecord(Slice("foooo"))); - ASSERT_OK(recycle_writer.AddRecord(Slice("bar"))); - ASSERT_GE(get_reader_contents()->size(), log::kBlockSize * 2); - ASSERT_EQ("foooo", Read()); - ASSERT_EQ("bar", Read()); - ASSERT_EQ("EOF", Read()); -} - -// Do NOT enable compression for this instantiation. -INSTANTIATE_TEST_CASE_P( - Log, LogTest, - ::testing::Combine(::testing::Values(0, 1), ::testing::Bool(), - ::testing::Values(CompressionType::kNoCompression))); - -class RetriableLogTest : public ::testing::TestWithParam { - private: - class ReportCollector : public Reader::Reporter { - public: - size_t dropped_bytes_; - std::string message_; - - ReportCollector() : dropped_bytes_(0) {} - void Corruption(size_t bytes, const Status& status) override { - dropped_bytes_ += bytes; - message_.append(status.ToString()); - } - }; - - Slice contents_; - test::StringSink* sink_; - std::unique_ptr log_writer_; - Env* env_; - const std::string test_dir_; - const std::string log_file_; - std::unique_ptr writer_; - std::unique_ptr reader_; - ReportCollector report_; - std::unique_ptr log_reader_; - - public: - RetriableLogTest() - : contents_(), - sink_(new test::StringSink(&contents_)), - log_writer_(nullptr), - env_(Env::Default()), - test_dir_(test::PerThreadDBPath("retriable_log_test")), - log_file_(test_dir_ + "/log"), - writer_(nullptr), - reader_(nullptr), - log_reader_(nullptr) { - std::unique_ptr sink_holder(sink_); - std::unique_ptr wfw(new WritableFileWriter( - std::move(sink_holder), "" /* file name */, FileOptions())); - log_writer_.reset(new Writer(std::move(wfw), 123, GetParam())); - } - - Status SetupTestEnv() { - Status s; - FileOptions fopts; - auto fs = env_->GetFileSystem(); - s = fs->CreateDirIfMissing(test_dir_, IOOptions(), nullptr); - std::unique_ptr writable_file; - if (s.ok()) { - s = fs->NewWritableFile(log_file_, fopts, &writable_file, nullptr); - } - if (s.ok()) { - writer_.reset( - new WritableFileWriter(std::move(writable_file), log_file_, fopts)); - EXPECT_NE(writer_, nullptr); - } - std::unique_ptr seq_file; - if (s.ok()) { - s = fs->NewSequentialFile(log_file_, fopts, &seq_file, nullptr); - } - if (s.ok()) { - reader_.reset(new SequentialFileReader(std::move(seq_file), log_file_)); - EXPECT_NE(reader_, nullptr); - log_reader_.reset(new FragmentBufferedReader( - nullptr, std::move(reader_), &report_, true /* checksum */, - 123 /* log_number */)); - EXPECT_NE(log_reader_, nullptr); - } - return s; - } - - std::string contents() { return sink_->contents_; } - - void Encode(const std::string& msg) { - ASSERT_OK(log_writer_->AddRecord(Slice(msg))); - } - - void Write(const Slice& data) { - ASSERT_OK(writer_->Append(data)); - ASSERT_OK(writer_->Sync(true)); - } - - bool TryRead(std::string* result) { - assert(result != nullptr); - result->clear(); - std::string scratch; - Slice record; - bool r = log_reader_->ReadRecord(&record, &scratch); - if (r) { - result->assign(record.data(), record.size()); - return true; - } else { - return false; - } - } -}; - -TEST_P(RetriableLogTest, TailLog_PartialHeader) { - ASSERT_OK(SetupTestEnv()); - std::vector remaining_bytes_in_last_record; - size_t header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; - bool eof = false; - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency( - {{"RetriableLogTest::TailLog:AfterPart1", - "RetriableLogTest::TailLog:BeforeReadRecord"}, - {"FragmentBufferedLogReader::TryReadMore:FirstEOF", - "RetriableLogTest::TailLog:BeforePart2"}}); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "FragmentBufferedLogReader::TryReadMore:FirstEOF", - [&](void* /*arg*/) { eof = true; }); - SyncPoint::GetInstance()->EnableProcessing(); - - size_t delta = header_size - 1; - port::Thread log_writer_thread([&]() { - size_t old_sz = contents().size(); - Encode("foo"); - size_t new_sz = contents().size(); - std::string part1 = contents().substr(old_sz, delta); - std::string part2 = - contents().substr(old_sz + delta, new_sz - old_sz - delta); - Write(Slice(part1)); - TEST_SYNC_POINT("RetriableLogTest::TailLog:AfterPart1"); - TEST_SYNC_POINT("RetriableLogTest::TailLog:BeforePart2"); - Write(Slice(part2)); - }); - - std::string record; - port::Thread log_reader_thread([&]() { - TEST_SYNC_POINT("RetriableLogTest::TailLog:BeforeReadRecord"); - while (!TryRead(&record)) { - } - }); - log_reader_thread.join(); - log_writer_thread.join(); - ASSERT_EQ("foo", record); - ASSERT_TRUE(eof); -} - -TEST_P(RetriableLogTest, TailLog_FullHeader) { - ASSERT_OK(SetupTestEnv()); - std::vector remaining_bytes_in_last_record; - size_t header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; - bool eof = false; - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency( - {{"RetriableLogTest::TailLog:AfterPart1", - "RetriableLogTest::TailLog:BeforeReadRecord"}, - {"FragmentBufferedLogReader::TryReadMore:FirstEOF", - "RetriableLogTest::TailLog:BeforePart2"}}); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "FragmentBufferedLogReader::TryReadMore:FirstEOF", - [&](void* /*arg*/) { eof = true; }); - SyncPoint::GetInstance()->EnableProcessing(); - - size_t delta = header_size + 1; - port::Thread log_writer_thread([&]() { - size_t old_sz = contents().size(); - Encode("foo"); - size_t new_sz = contents().size(); - std::string part1 = contents().substr(old_sz, delta); - std::string part2 = - contents().substr(old_sz + delta, new_sz - old_sz - delta); - Write(Slice(part1)); - TEST_SYNC_POINT("RetriableLogTest::TailLog:AfterPart1"); - TEST_SYNC_POINT("RetriableLogTest::TailLog:BeforePart2"); - Write(Slice(part2)); - ASSERT_TRUE(eof); - }); - - std::string record; - port::Thread log_reader_thread([&]() { - TEST_SYNC_POINT("RetriableLogTest::TailLog:BeforeReadRecord"); - while (!TryRead(&record)) { - } - }); - log_reader_thread.join(); - log_writer_thread.join(); - ASSERT_EQ("foo", record); -} - -TEST_P(RetriableLogTest, NonBlockingReadFullRecord) { - // Clear all sync point callbacks even if this test does not use sync point. - // It is necessary, otherwise the execute of this test may hit a sync point - // with which a callback is registered. The registered callback may access - // some dead variable, causing segfault. - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - ASSERT_OK(SetupTestEnv()); - size_t header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; - size_t delta = header_size - 1; - size_t old_sz = contents().size(); - Encode("foo-bar"); - size_t new_sz = contents().size(); - std::string part1 = contents().substr(old_sz, delta); - std::string part2 = - contents().substr(old_sz + delta, new_sz - old_sz - delta); - Write(Slice(part1)); - std::string record; - ASSERT_FALSE(TryRead(&record)); - ASSERT_TRUE(record.empty()); - Write(Slice(part2)); - ASSERT_TRUE(TryRead(&record)); - ASSERT_EQ("foo-bar", record); -} - -INSTANTIATE_TEST_CASE_P(bool, RetriableLogTest, ::testing::Values(0, 2)); - -class CompressionLogTest : public LogTest { - public: - Status SetupTestEnv() { return writer_->AddCompressionTypeRecord(); } -}; - -TEST_P(CompressionLogTest, Empty) { - CompressionType compression_type = std::get<2>(GetParam()); - if (!StreamingCompressionTypeSupported(compression_type)) { - ROCKSDB_GTEST_SKIP("Test requires support for compression type"); - return; - } - ASSERT_OK(SetupTestEnv()); - const bool compression_enabled = - std::get<2>(GetParam()) == kNoCompression ? false : true; - // If WAL compression is enabled, a record is added for the compression type - const int compression_record_size = compression_enabled ? kHeaderSize + 4 : 0; - ASSERT_EQ(compression_record_size, WrittenBytes()); - ASSERT_EQ("EOF", Read()); -} - -TEST_P(CompressionLogTest, ReadWrite) { - CompressionType compression_type = std::get<2>(GetParam()); - if (!StreamingCompressionTypeSupported(compression_type)) { - ROCKSDB_GTEST_SKIP("Test requires support for compression type"); - return; - } - ASSERT_OK(SetupTestEnv()); - Write("foo"); - Write("bar"); - Write(""); - Write("xxxx"); - ASSERT_EQ("foo", Read()); - ASSERT_EQ("bar", Read()); - ASSERT_EQ("", Read()); - ASSERT_EQ("xxxx", Read()); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ("EOF", Read()); // Make sure reads at eof work -} - -TEST_P(CompressionLogTest, ManyBlocks) { - CompressionType compression_type = std::get<2>(GetParam()); - if (!StreamingCompressionTypeSupported(compression_type)) { - ROCKSDB_GTEST_SKIP("Test requires support for compression type"); - return; - } - ASSERT_OK(SetupTestEnv()); - for (int i = 0; i < 100000; i++) { - Write(NumberString(i)); - } - for (int i = 0; i < 100000; i++) { - ASSERT_EQ(NumberString(i), Read()); - } - ASSERT_EQ("EOF", Read()); -} - -TEST_P(CompressionLogTest, Fragmentation) { - CompressionType compression_type = std::get<2>(GetParam()); - if (!StreamingCompressionTypeSupported(compression_type)) { - ROCKSDB_GTEST_SKIP("Test requires support for compression type"); - return; - } - ASSERT_OK(SetupTestEnv()); - Random rnd(301); - const std::vector wal_entries = { - "small", - rnd.RandomBinaryString(3 * kBlockSize / 2), // Spans into block 2 - rnd.RandomBinaryString(3 * kBlockSize), // Spans into block 5 - }; - for (const std::string& wal_entry : wal_entries) { - Write(wal_entry); - } - - for (const std::string& wal_entry : wal_entries) { - ASSERT_EQ(wal_entry, Read()); - } - ASSERT_EQ("EOF", Read()); -} - -TEST_P(CompressionLogTest, AlignedFragmentation) { - CompressionType compression_type = std::get<2>(GetParam()); - if (!StreamingCompressionTypeSupported(compression_type)) { - ROCKSDB_GTEST_SKIP("Test requires support for compression type"); - return; - } - ASSERT_OK(SetupTestEnv()); - Random rnd(301); - int num_filler_records = 0; - // Keep writing small records until the next record will be aligned at the - // beginning of the block. - while ((WrittenBytes() & (kBlockSize - 1)) >= kHeaderSize) { - char entry = 'a'; - ASSERT_OK(writer_->AddRecord(Slice(&entry, 1))); - num_filler_records++; - } - const std::vector wal_entries = { - rnd.RandomBinaryString(3 * kBlockSize), - }; - for (const std::string& wal_entry : wal_entries) { - Write(wal_entry); - } - - for (int i = 0; i < num_filler_records; ++i) { - ASSERT_EQ("a", Read()); - } - for (const std::string& wal_entry : wal_entries) { - ASSERT_EQ(wal_entry, Read()); - } - ASSERT_EQ("EOF", Read()); -} - -INSTANTIATE_TEST_CASE_P( - Compression, CompressionLogTest, - ::testing::Combine(::testing::Values(0, 1), ::testing::Bool(), - ::testing::Values(CompressionType::kNoCompression, - CompressionType::kZSTD))); - -class StreamingCompressionTest - : public ::testing::TestWithParam> {}; - -TEST_P(StreamingCompressionTest, Basic) { - size_t input_size = std::get<0>(GetParam()); - CompressionType compression_type = std::get<1>(GetParam()); - if (!StreamingCompressionTypeSupported(compression_type)) { - ROCKSDB_GTEST_SKIP("Test requires support for compression type"); - return; - } - CompressionOptions opts; - constexpr uint32_t compression_format_version = 2; - StreamingCompress* compress = StreamingCompress::Create( - compression_type, opts, compression_format_version, kBlockSize); - StreamingUncompress* uncompress = StreamingUncompress::Create( - compression_type, compression_format_version, kBlockSize); - MemoryAllocator* allocator = new DefaultMemoryAllocator(); - std::string input_buffer = BigString("abc", input_size); - std::vector compressed_buffers; - size_t remaining; - // Call compress till the entire input is consumed - do { - char* output_buffer = (char*)allocator->Allocate(kBlockSize); - size_t output_pos; - remaining = compress->Compress(input_buffer.c_str(), input_size, - output_buffer, &output_pos); - if (output_pos > 0) { - std::string compressed_buffer; - compressed_buffer.assign(output_buffer, output_pos); - compressed_buffers.emplace_back(std::move(compressed_buffer)); - } - allocator->Deallocate((void*)output_buffer); - } while (remaining > 0); - std::string uncompressed_buffer = ""; - int ret_val = 0; - size_t output_pos; - char* uncompressed_output_buffer = (char*)allocator->Allocate(kBlockSize); - // Uncompress the fragments and concatenate them. - for (int i = 0; i < (int)compressed_buffers.size(); i++) { - // Call uncompress till either the entire input is consumed or the output - // buffer size is equal to the allocated output buffer size. - const char* input = compressed_buffers[i].c_str(); - do { - ret_val = uncompress->Uncompress(input, compressed_buffers[i].size(), - uncompressed_output_buffer, &output_pos); - input = nullptr; - if (output_pos > 0) { - std::string uncompressed_fragment; - uncompressed_fragment.assign(uncompressed_output_buffer, output_pos); - uncompressed_buffer += uncompressed_fragment; - } - } while (ret_val > 0 || output_pos == kBlockSize); - } - allocator->Deallocate((void*)uncompressed_output_buffer); - delete allocator; - delete compress; - delete uncompress; - // The final return value from uncompress() should be 0. - ASSERT_EQ(ret_val, 0); - ASSERT_EQ(input_buffer, uncompressed_buffer); -} - -INSTANTIATE_TEST_CASE_P( - StreamingCompression, StreamingCompressionTest, - ::testing::Combine(::testing::Values(10, 100, 1000, kBlockSize, - kBlockSize * 2), - ::testing::Values(CompressionType::kZSTD))); - -} // namespace log -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/manual_compaction_test.cc b/db/manual_compaction_test.cc deleted file mode 100644 index b92cb794b..000000000 --- a/db/manual_compaction_test.cc +++ /dev/null @@ -1,308 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Test for issue 178: a manual compaction causes deleted data to reappear. -#include - -#include "port/port.h" -#include "rocksdb/compaction_filter.h" -#include "rocksdb/db.h" -#include "rocksdb/slice.h" -#include "rocksdb/write_batch.h" -#include "test_util/testharness.h" - -using ROCKSDB_NAMESPACE::CompactionFilter; -using ROCKSDB_NAMESPACE::CompactionStyle; -using ROCKSDB_NAMESPACE::CompactRangeOptions; -using ROCKSDB_NAMESPACE::CompressionType; -using ROCKSDB_NAMESPACE::DB; -using ROCKSDB_NAMESPACE::DestroyDB; -using ROCKSDB_NAMESPACE::FlushOptions; -using ROCKSDB_NAMESPACE::Iterator; -using ROCKSDB_NAMESPACE::Options; -using ROCKSDB_NAMESPACE::ReadOptions; -using ROCKSDB_NAMESPACE::Slice; -using ROCKSDB_NAMESPACE::WriteBatch; -using ROCKSDB_NAMESPACE::WriteOptions; - -namespace { - -// Reasoning: previously the number was 1100000. Since the keys are written to -// the batch in one write each write will result into one SST file. each write -// will result into one SST file. We reduced the write_buffer_size to 1K to -// basically have the same effect with however less number of keys, which -// results into less test runtime. -const int kNumKeys = 1100; - -std::string Key1(int i) { - char buf[100]; - snprintf(buf, sizeof(buf), "my_key_%d", i); - return buf; -} - -std::string Key2(int i) { return Key1(i) + "_xxx"; } - -class ManualCompactionTest : public testing::Test { - public: - ManualCompactionTest() { - // Get rid of any state from an old run. - dbname_ = ROCKSDB_NAMESPACE::test::PerThreadDBPath( - "rocksdb_manual_compaction_test"); - EXPECT_OK(DestroyDB(dbname_, Options())); - } - - std::string dbname_; -}; - -class DestroyAllCompactionFilter : public CompactionFilter { - public: - DestroyAllCompactionFilter() {} - - bool Filter(int /*level*/, const Slice& /*key*/, const Slice& existing_value, - std::string* /*new_value*/, - bool* /*value_changed*/) const override { - return existing_value.ToString() == "destroy"; - } - - const char* Name() const override { return "DestroyAllCompactionFilter"; } -}; - -class LogCompactionFilter : public CompactionFilter { - public: - const char* Name() const override { return "LogCompactionFilter"; } - - bool Filter(int level, const Slice& key, const Slice& /*existing_value*/, - std::string* /*new_value*/, - bool* /*value_changed*/) const override { - key_level_[key.ToString()] = level; - return false; - } - - void Reset() { key_level_.clear(); } - - size_t NumKeys() const { return key_level_.size(); } - - int KeyLevel(const Slice& key) { - auto it = key_level_.find(key.ToString()); - if (it == key_level_.end()) { - return -1; - } - return it->second; - } - - private: - mutable std::map key_level_; -}; - -TEST_F(ManualCompactionTest, CompactTouchesAllKeys) { - for (int iter = 0; iter < 2; ++iter) { - DB* db; - Options options; - if (iter == 0) { // level compaction - options.num_levels = 3; - options.compaction_style = CompactionStyle::kCompactionStyleLevel; - } else { // universal compaction - options.compaction_style = CompactionStyle::kCompactionStyleUniversal; - } - options.create_if_missing = true; - options.compression = CompressionType::kNoCompression; - options.compaction_filter = new DestroyAllCompactionFilter(); - ASSERT_OK(DB::Open(options, dbname_, &db)); - - ASSERT_OK(db->Put(WriteOptions(), Slice("key1"), Slice("destroy"))); - ASSERT_OK(db->Put(WriteOptions(), Slice("key2"), Slice("destroy"))); - ASSERT_OK(db->Put(WriteOptions(), Slice("key3"), Slice("value3"))); - ASSERT_OK(db->Put(WriteOptions(), Slice("key4"), Slice("destroy"))); - - Slice key4("key4"); - ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, &key4)); - Iterator* itr = db->NewIterator(ReadOptions()); - itr->SeekToFirst(); - ASSERT_TRUE(itr->Valid()); - ASSERT_EQ("key3", itr->key().ToString()); - itr->Next(); - ASSERT_TRUE(!itr->Valid()); - delete itr; - - delete options.compaction_filter; - delete db; - ASSERT_OK(DestroyDB(dbname_, options)); - } -} - -TEST_F(ManualCompactionTest, Test) { - // Open database. Disable compression since it affects the creation - // of layers and the code below is trying to test against a very - // specific scenario. - DB* db; - Options db_options; - db_options.write_buffer_size = 1024; - db_options.create_if_missing = true; - db_options.compression = CompressionType::kNoCompression; - ASSERT_OK(DB::Open(db_options, dbname_, &db)); - - // create first key range - WriteBatch batch; - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(batch.Put(Key1(i), "value for range 1 key")); - } - ASSERT_OK(db->Write(WriteOptions(), &batch)); - - // create second key range - batch.Clear(); - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(batch.Put(Key2(i), "value for range 2 key")); - } - ASSERT_OK(db->Write(WriteOptions(), &batch)); - - // delete second key range - batch.Clear(); - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(batch.Delete(Key2(i))); - } - ASSERT_OK(db->Write(WriteOptions(), &batch)); - - // compact database - std::string start_key = Key1(0); - std::string end_key = Key1(kNumKeys - 1); - Slice least(start_key.data(), start_key.size()); - Slice greatest(end_key.data(), end_key.size()); - - // commenting out the line below causes the example to work correctly - ASSERT_OK(db->CompactRange(CompactRangeOptions(), &least, &greatest)); - - // count the keys - Iterator* iter = db->NewIterator(ReadOptions()); - int num_keys = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - num_keys++; - } - delete iter; - ASSERT_EQ(kNumKeys, num_keys) << "Bad number of keys"; - - // close database - delete db; - ASSERT_OK(DestroyDB(dbname_, Options())); -} - -TEST_F(ManualCompactionTest, SkipLevel) { - DB* db; - Options options; - options.num_levels = 3; - // Initially, flushed L0 files won't exceed 100. - options.level0_file_num_compaction_trigger = 100; - options.compaction_style = CompactionStyle::kCompactionStyleLevel; - options.create_if_missing = true; - options.compression = CompressionType::kNoCompression; - LogCompactionFilter* filter = new LogCompactionFilter(); - options.compaction_filter = filter; - ASSERT_OK(DB::Open(options, dbname_, &db)); - - WriteOptions wo; - FlushOptions fo; - ASSERT_OK(db->Put(wo, "1", "")); - ASSERT_OK(db->Flush(fo)); - ASSERT_OK(db->Put(wo, "2", "")); - ASSERT_OK(db->Flush(fo)); - ASSERT_OK(db->Put(wo, "4", "")); - ASSERT_OK(db->Put(wo, "8", "")); - ASSERT_OK(db->Flush(fo)); - - { - // L0: 1, 2, [4, 8] - // no file has keys in range [5, 7] - Slice start("5"); - Slice end("7"); - filter->Reset(); - ASSERT_OK(db->CompactRange(CompactRangeOptions(), &start, &end)); - ASSERT_EQ(0, filter->NumKeys()); - } - - { - // L0: 1, 2, [4, 8] - // [3, 7] overlaps with 4 in L0 - Slice start("3"); - Slice end("7"); - filter->Reset(); - ASSERT_OK(db->CompactRange(CompactRangeOptions(), &start, &end)); - ASSERT_EQ(2, filter->NumKeys()); - ASSERT_EQ(0, filter->KeyLevel("4")); - ASSERT_EQ(0, filter->KeyLevel("8")); - } - - { - // L0: 1, 2 - // L1: [4, 8] - // no file has keys in range (-inf, 0] - Slice end("0"); - filter->Reset(); - ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, &end)); - ASSERT_EQ(0, filter->NumKeys()); - } - - { - // L0: 1, 2 - // L1: [4, 8] - // no file has keys in range [9, inf) - Slice start("9"); - filter->Reset(); - ASSERT_OK(db->CompactRange(CompactRangeOptions(), &start, nullptr)); - ASSERT_EQ(0, filter->NumKeys()); - } - - { - // L0: 1, 2 - // L1: [4, 8] - // [2, 2] overlaps with 2 in L0 - Slice start("2"); - Slice end("2"); - filter->Reset(); - ASSERT_OK(db->CompactRange(CompactRangeOptions(), &start, &end)); - ASSERT_EQ(1, filter->NumKeys()); - ASSERT_EQ(0, filter->KeyLevel("2")); - } - - { - // L0: 1 - // L1: 2, [4, 8] - // [2, 5] overlaps with 2 and [4, 8) in L1, skip L0 - Slice start("2"); - Slice end("5"); - filter->Reset(); - ASSERT_OK(db->CompactRange(CompactRangeOptions(), &start, &end)); - ASSERT_EQ(3, filter->NumKeys()); - ASSERT_EQ(1, filter->KeyLevel("2")); - ASSERT_EQ(1, filter->KeyLevel("4")); - ASSERT_EQ(1, filter->KeyLevel("8")); - } - - { - // L0: 1 - // L1: [2, 4, 8] - // [0, inf) overlaps all files - Slice start("0"); - filter->Reset(); - ASSERT_OK(db->CompactRange(CompactRangeOptions(), &start, nullptr)); - ASSERT_EQ(4, filter->NumKeys()); - // 1 is first compacted to L1 and then further compacted into [2, 4, 8], - // so finally the logged level for 1 is L1. - ASSERT_EQ(1, filter->KeyLevel("1")); - ASSERT_EQ(1, filter->KeyLevel("2")); - ASSERT_EQ(1, filter->KeyLevel("4")); - ASSERT_EQ(1, filter->KeyLevel("8")); - } - - delete filter; - delete db; - ASSERT_OK(DestroyDB(dbname_, options)); -} - -} // anonymous namespace - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/memtable_list_test.cc b/db/memtable_list_test.cc deleted file mode 100644 index c63952b12..000000000 --- a/db/memtable_list_test.cc +++ /dev/null @@ -1,1037 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db/memtable_list.h" - -#include -#include -#include - -#include "db/merge_context.h" -#include "db/version_set.h" -#include "db/write_controller.h" -#include "rocksdb/db.h" -#include "rocksdb/status.h" -#include "rocksdb/write_buffer_manager.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -class MemTableListTest : public testing::Test { - public: - std::string dbname; - DB* db; - Options options; - std::vector handles; - std::atomic file_number; - - MemTableListTest() : db(nullptr), file_number(1) { - dbname = test::PerThreadDBPath("memtable_list_test"); - options.create_if_missing = true; - EXPECT_OK(DestroyDB(dbname, options)); - } - - // Create a test db if not yet created - void CreateDB() { - if (db == nullptr) { - options.create_if_missing = true; - EXPECT_OK(DestroyDB(dbname, options)); - // Open DB only with default column family - ColumnFamilyOptions cf_options; - std::vector cf_descs; - cf_descs.emplace_back(kDefaultColumnFamilyName, cf_options); - Status s = DB::Open(options, dbname, cf_descs, &handles, &db); - EXPECT_OK(s); - - ColumnFamilyOptions cf_opt1, cf_opt2; - cf_opt1.cf_paths.emplace_back(dbname + "_one_1", - std::numeric_limits::max()); - cf_opt2.cf_paths.emplace_back(dbname + "_two_1", - std::numeric_limits::max()); - int sz = static_cast(handles.size()); - handles.resize(sz + 2); - s = db->CreateColumnFamily(cf_opt1, "one", &handles[1]); - EXPECT_OK(s); - s = db->CreateColumnFamily(cf_opt2, "two", &handles[2]); - EXPECT_OK(s); - - cf_descs.emplace_back("one", cf_options); - cf_descs.emplace_back("two", cf_options); - } - } - - ~MemTableListTest() override { - if (db) { - std::vector cf_descs(handles.size()); - for (int i = 0; i != static_cast(handles.size()); ++i) { - EXPECT_OK(handles[i]->GetDescriptor(&cf_descs[i])); - } - for (auto h : handles) { - if (h) { - EXPECT_OK(db->DestroyColumnFamilyHandle(h)); - } - } - handles.clear(); - delete db; - db = nullptr; - EXPECT_OK(DestroyDB(dbname, options, cf_descs)); - } - } - - // Calls MemTableList::TryInstallMemtableFlushResults() and sets up all - // structures needed to call this function. - Status Mock_InstallMemtableFlushResults( - MemTableList* list, const MutableCFOptions& mutable_cf_options, - const autovector& m, autovector* to_delete) { - // Create a mock Logger - test::NullLogger logger; - LogBuffer log_buffer(DEBUG_LEVEL, &logger); - - CreateDB(); - // Create a mock VersionSet - DBOptions db_options; - ImmutableDBOptions immutable_db_options(db_options); - EnvOptions env_options; - std::shared_ptr table_cache(NewLRUCache(50000, 16)); - WriteBufferManager write_buffer_manager(db_options.db_write_buffer_size); - WriteController write_controller(10000000u); - - VersionSet versions(dbname, &immutable_db_options, env_options, - table_cache.get(), &write_buffer_manager, - &write_controller, /*block_cache_tracer=*/nullptr, - /*io_tracer=*/nullptr, /*db_id*/ "", - /*db_session_id*/ ""); - std::vector cf_descs; - cf_descs.emplace_back(kDefaultColumnFamilyName, ColumnFamilyOptions()); - cf_descs.emplace_back("one", ColumnFamilyOptions()); - cf_descs.emplace_back("two", ColumnFamilyOptions()); - - EXPECT_OK(versions.Recover(cf_descs, false)); - - // Create mock default ColumnFamilyData - auto column_family_set = versions.GetColumnFamilySet(); - LogsWithPrepTracker dummy_prep_tracker; - auto cfd = column_family_set->GetDefault(); - EXPECT_TRUE(nullptr != cfd); - uint64_t file_num = file_number.fetch_add(1); - IOStatus io_s; - // Create dummy mutex. - InstrumentedMutex mutex; - InstrumentedMutexLock l(&mutex); - std::list> flush_jobs_info; - Status s = list->TryInstallMemtableFlushResults( - cfd, mutable_cf_options, m, &dummy_prep_tracker, &versions, &mutex, - file_num, to_delete, nullptr, &log_buffer, &flush_jobs_info); - EXPECT_OK(io_s); - return s; - } - - // Calls MemTableList::InstallMemtableFlushResults() and sets up all - // structures needed to call this function. - Status Mock_InstallMemtableAtomicFlushResults( - autovector& lists, const autovector& cf_ids, - const autovector& mutable_cf_options_list, - const autovector*>& mems_list, - autovector* to_delete) { - // Create a mock Logger - test::NullLogger logger; - LogBuffer log_buffer(DEBUG_LEVEL, &logger); - - CreateDB(); - // Create a mock VersionSet - DBOptions db_options; - - ImmutableDBOptions immutable_db_options(db_options); - EnvOptions env_options; - std::shared_ptr table_cache(NewLRUCache(50000, 16)); - WriteBufferManager write_buffer_manager(db_options.db_write_buffer_size); - WriteController write_controller(10000000u); - - VersionSet versions(dbname, &immutable_db_options, env_options, - table_cache.get(), &write_buffer_manager, - &write_controller, /*block_cache_tracer=*/nullptr, - /*io_tracer=*/nullptr, /*db_id*/ "", - /*db_session_id*/ ""); - std::vector cf_descs; - cf_descs.emplace_back(kDefaultColumnFamilyName, ColumnFamilyOptions()); - cf_descs.emplace_back("one", ColumnFamilyOptions()); - cf_descs.emplace_back("two", ColumnFamilyOptions()); - EXPECT_OK(versions.Recover(cf_descs, false)); - - // Create mock default ColumnFamilyData - - auto column_family_set = versions.GetColumnFamilySet(); - - LogsWithPrepTracker dummy_prep_tracker; - autovector cfds; - for (int i = 0; i != static_cast(cf_ids.size()); ++i) { - cfds.emplace_back(column_family_set->GetColumnFamily(cf_ids[i])); - EXPECT_NE(nullptr, cfds[i]); - } - std::vector file_metas; - file_metas.reserve(cf_ids.size()); - for (size_t i = 0; i != cf_ids.size(); ++i) { - FileMetaData meta; - uint64_t file_num = file_number.fetch_add(1); - meta.fd = FileDescriptor(file_num, 0, 0); - file_metas.emplace_back(meta); - } - autovector file_meta_ptrs; - for (auto& meta : file_metas) { - file_meta_ptrs.push_back(&meta); - } - std::vector>> - committed_flush_jobs_info_storage(cf_ids.size()); - autovector>*> - committed_flush_jobs_info; - for (int i = 0; i < static_cast(cf_ids.size()); ++i) { - committed_flush_jobs_info.push_back( - &committed_flush_jobs_info_storage[i]); - } - - InstrumentedMutex mutex; - InstrumentedMutexLock l(&mutex); - return InstallMemtableAtomicFlushResults( - &lists, cfds, mutable_cf_options_list, mems_list, &versions, - nullptr /* prep_tracker */, &mutex, file_meta_ptrs, - committed_flush_jobs_info, to_delete, nullptr, &log_buffer); - } -}; - -TEST_F(MemTableListTest, Empty) { - // Create an empty MemTableList and validate basic functions. - MemTableList list(1, 0, 0); - - ASSERT_EQ(0, list.NumNotFlushed()); - ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); - ASSERT_FALSE(list.IsFlushPending()); - - autovector mems; - list.PickMemtablesToFlush( - std::numeric_limits::max() /* memtable_id */, &mems); - ASSERT_EQ(0, mems.size()); - - autovector to_delete; - list.current()->Unref(&to_delete); - ASSERT_EQ(0, to_delete.size()); -} - -TEST_F(MemTableListTest, GetTest) { - // Create MemTableList - int min_write_buffer_number_to_merge = 2; - int max_write_buffer_number_to_maintain = 0; - int64_t max_write_buffer_size_to_maintain = 0; - MemTableList list(min_write_buffer_number_to_merge, - max_write_buffer_number_to_maintain, - max_write_buffer_size_to_maintain); - - SequenceNumber seq = 1; - std::string value; - Status s; - MergeContext merge_context; - InternalKeyComparator ikey_cmp(options.comparator); - SequenceNumber max_covering_tombstone_seq = 0; - autovector to_delete; - - LookupKey lkey("key1", seq); - bool found = list.current()->Get(lkey, &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions()); - ASSERT_FALSE(found); - - // Create a MemTable - InternalKeyComparator cmp(BytewiseComparator()); - auto factory = std::make_shared(); - options.memtable_factory = factory; - ImmutableOptions ioptions(options); - - WriteBufferManager wb(options.db_write_buffer_size); - MemTable* mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, - kMaxSequenceNumber, 0 /* column_family_id */); - mem->Ref(); - - // Write some keys to this memtable. - ASSERT_OK( - mem->Add(++seq, kTypeDeletion, "key1", "", nullptr /* kv_prot_info */)); - ASSERT_OK(mem->Add(++seq, kTypeValue, "key2", "value2", - nullptr /* kv_prot_info */)); - ASSERT_OK(mem->Add(++seq, kTypeValue, "key1", "value1", - nullptr /* kv_prot_info */)); - ASSERT_OK(mem->Add(++seq, kTypeValue, "key2", "value2.2", - nullptr /* kv_prot_info */)); - - // Fetch the newly written keys - merge_context.Clear(); - found = mem->Get(LookupKey("key1", seq), &value, /*columns*/ nullptr, - /*timestamp*/ nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions(), - false /* immutable_memtable */); - ASSERT_TRUE(s.ok() && found); - ASSERT_EQ(value, "value1"); - - merge_context.Clear(); - found = mem->Get(LookupKey("key1", 2), &value, /*columns*/ nullptr, - /*timestamp*/ nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions(), - false /* immutable_memtable */); - // MemTable found out that this key is *not* found (at this sequence#) - ASSERT_TRUE(found && s.IsNotFound()); - - merge_context.Clear(); - found = mem->Get(LookupKey("key2", seq), &value, /*columns*/ nullptr, - /*timestamp*/ nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions(), - false /* immutable_memtable */); - ASSERT_TRUE(s.ok() && found); - ASSERT_EQ(value, "value2.2"); - - ASSERT_EQ(4, mem->num_entries()); - ASSERT_EQ(1, mem->num_deletes()); - - // Add memtable to list - // This is to make assert(memtable->IsFragmentedRangeTombstonesConstructed()) - // in MemTableListVersion::GetFromList work. - mem->ConstructFragmentedRangeTombstones(); - list.Add(mem, &to_delete); - - SequenceNumber saved_seq = seq; - - // Create another memtable and write some keys to it - WriteBufferManager wb2(options.db_write_buffer_size); - MemTable* mem2 = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb2, - kMaxSequenceNumber, 0 /* column_family_id */); - mem2->Ref(); - - ASSERT_OK( - mem2->Add(++seq, kTypeDeletion, "key1", "", nullptr /* kv_prot_info */)); - ASSERT_OK(mem2->Add(++seq, kTypeValue, "key2", "value2.3", - nullptr /* kv_prot_info */)); - - // Add second memtable to list - // This is to make assert(memtable->IsFragmentedRangeTombstonesConstructed()) - // in MemTableListVersion::GetFromList work. - mem2->ConstructFragmentedRangeTombstones(); - list.Add(mem2, &to_delete); - - // Fetch keys via MemTableList - merge_context.Clear(); - found = - list.current()->Get(LookupKey("key1", seq), &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions()); - ASSERT_TRUE(found && s.IsNotFound()); - - merge_context.Clear(); - found = list.current()->Get(LookupKey("key1", saved_seq), &value, - /*columns=*/nullptr, /*timestamp=*/nullptr, &s, - &merge_context, &max_covering_tombstone_seq, - ReadOptions()); - ASSERT_TRUE(s.ok() && found); - ASSERT_EQ("value1", value); - - merge_context.Clear(); - found = - list.current()->Get(LookupKey("key2", seq), &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions()); - ASSERT_TRUE(s.ok() && found); - ASSERT_EQ(value, "value2.3"); - - merge_context.Clear(); - found = list.current()->Get(LookupKey("key2", 1), &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions()); - ASSERT_FALSE(found); - - ASSERT_EQ(2, list.NumNotFlushed()); - - list.current()->Unref(&to_delete); - for (MemTable* m : to_delete) { - delete m; - } -} - -TEST_F(MemTableListTest, GetFromHistoryTest) { - // Create MemTableList - int min_write_buffer_number_to_merge = 2; - int max_write_buffer_number_to_maintain = 2; - int64_t max_write_buffer_size_to_maintain = 2 * Arena::kInlineSize; - MemTableList list(min_write_buffer_number_to_merge, - max_write_buffer_number_to_maintain, - max_write_buffer_size_to_maintain); - - SequenceNumber seq = 1; - std::string value; - Status s; - MergeContext merge_context; - InternalKeyComparator ikey_cmp(options.comparator); - SequenceNumber max_covering_tombstone_seq = 0; - autovector to_delete; - - LookupKey lkey("key1", seq); - bool found = list.current()->Get(lkey, &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions()); - ASSERT_FALSE(found); - - // Create a MemTable - InternalKeyComparator cmp(BytewiseComparator()); - auto factory = std::make_shared(); - options.memtable_factory = factory; - ImmutableOptions ioptions(options); - - WriteBufferManager wb(options.db_write_buffer_size); - MemTable* mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, - kMaxSequenceNumber, 0 /* column_family_id */); - mem->Ref(); - - // Write some keys to this memtable. - ASSERT_OK( - mem->Add(++seq, kTypeDeletion, "key1", "", nullptr /* kv_prot_info */)); - ASSERT_OK(mem->Add(++seq, kTypeValue, "key2", "value2", - nullptr /* kv_prot_info */)); - ASSERT_OK(mem->Add(++seq, kTypeValue, "key2", "value2.2", - nullptr /* kv_prot_info */)); - - // Fetch the newly written keys - merge_context.Clear(); - found = mem->Get(LookupKey("key1", seq), &value, /*columns*/ nullptr, - /*timestamp*/ nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions(), - false /* immutable_memtable */); - // MemTable found out that this key is *not* found (at this sequence#) - ASSERT_TRUE(found && s.IsNotFound()); - - merge_context.Clear(); - found = mem->Get(LookupKey("key2", seq), &value, /*columns*/ nullptr, - /*timestamp*/ nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions(), - false /* immutable_memtable */); - ASSERT_TRUE(s.ok() && found); - ASSERT_EQ(value, "value2.2"); - - // Add memtable to list - // This is to make assert(memtable->IsFragmentedRangeTombstonesConstructed()) - // in MemTableListVersion::GetFromList work. - mem->ConstructFragmentedRangeTombstones(); - list.Add(mem, &to_delete); - ASSERT_EQ(0, to_delete.size()); - - // Fetch keys via MemTableList - merge_context.Clear(); - found = - list.current()->Get(LookupKey("key1", seq), &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions()); - ASSERT_TRUE(found && s.IsNotFound()); - - merge_context.Clear(); - found = - list.current()->Get(LookupKey("key2", seq), &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions()); - ASSERT_TRUE(s.ok() && found); - ASSERT_EQ("value2.2", value); - - // Flush this memtable from the list. - // (It will then be a part of the memtable history). - autovector to_flush; - list.PickMemtablesToFlush( - std::numeric_limits::max() /* memtable_id */, &to_flush); - ASSERT_EQ(1, to_flush.size()); - - MutableCFOptions mutable_cf_options(options); - s = Mock_InstallMemtableFlushResults(&list, mutable_cf_options, to_flush, - &to_delete); - ASSERT_OK(s); - ASSERT_EQ(0, list.NumNotFlushed()); - ASSERT_EQ(1, list.NumFlushed()); - ASSERT_EQ(0, to_delete.size()); - - // Verify keys are no longer in MemTableList - merge_context.Clear(); - found = - list.current()->Get(LookupKey("key1", seq), &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions()); - ASSERT_FALSE(found); - - merge_context.Clear(); - found = - list.current()->Get(LookupKey("key2", seq), &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions()); - ASSERT_FALSE(found); - - // Verify keys are present in history - merge_context.Clear(); - found = list.current()->GetFromHistory( - LookupKey("key1", seq), &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, - ReadOptions()); - ASSERT_TRUE(found && s.IsNotFound()); - - merge_context.Clear(); - found = list.current()->GetFromHistory( - LookupKey("key2", seq), &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, - ReadOptions()); - ASSERT_TRUE(found); - ASSERT_EQ("value2.2", value); - - // Create another memtable and write some keys to it - WriteBufferManager wb2(options.db_write_buffer_size); - MemTable* mem2 = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb2, - kMaxSequenceNumber, 0 /* column_family_id */); - mem2->Ref(); - - ASSERT_OK( - mem2->Add(++seq, kTypeDeletion, "key1", "", nullptr /* kv_prot_info */)); - ASSERT_OK(mem2->Add(++seq, kTypeValue, "key3", "value3", - nullptr /* kv_prot_info */)); - - // Add second memtable to list - // This is to make assert(memtable->IsFragmentedRangeTombstonesConstructed()) - // in MemTableListVersion::GetFromList work. - mem2->ConstructFragmentedRangeTombstones(); - list.Add(mem2, &to_delete); - ASSERT_EQ(0, to_delete.size()); - - to_flush.clear(); - list.PickMemtablesToFlush( - std::numeric_limits::max() /* memtable_id */, &to_flush); - ASSERT_EQ(1, to_flush.size()); - - // Flush second memtable - s = Mock_InstallMemtableFlushResults(&list, mutable_cf_options, to_flush, - &to_delete); - ASSERT_OK(s); - ASSERT_EQ(0, list.NumNotFlushed()); - ASSERT_EQ(2, list.NumFlushed()); - ASSERT_EQ(0, to_delete.size()); - - // Add a third memtable to push the first memtable out of the history - WriteBufferManager wb3(options.db_write_buffer_size); - MemTable* mem3 = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb3, - kMaxSequenceNumber, 0 /* column_family_id */); - mem3->Ref(); - // This is to make assert(memtable->IsFragmentedRangeTombstonesConstructed()) - // in MemTableListVersion::GetFromList work. - mem3->ConstructFragmentedRangeTombstones(); - list.Add(mem3, &to_delete); - ASSERT_EQ(1, list.NumNotFlushed()); - ASSERT_EQ(1, list.NumFlushed()); - ASSERT_EQ(1, to_delete.size()); - - // Verify keys are no longer in MemTableList - merge_context.Clear(); - found = - list.current()->Get(LookupKey("key1", seq), &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions()); - ASSERT_FALSE(found); - - merge_context.Clear(); - found = - list.current()->Get(LookupKey("key2", seq), &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions()); - ASSERT_FALSE(found); - - merge_context.Clear(); - found = - list.current()->Get(LookupKey("key3", seq), &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions()); - ASSERT_FALSE(found); - - // Verify that the second memtable's keys are in the history - merge_context.Clear(); - found = list.current()->GetFromHistory( - LookupKey("key1", seq), &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, - ReadOptions()); - ASSERT_TRUE(found && s.IsNotFound()); - - merge_context.Clear(); - found = list.current()->GetFromHistory( - LookupKey("key3", seq), &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, - ReadOptions()); - ASSERT_TRUE(found); - ASSERT_EQ("value3", value); - - // Verify that key2 from the first memtable is no longer in the history - merge_context.Clear(); - found = - list.current()->Get(LookupKey("key2", seq), &value, /*columns=*/nullptr, - /*timestamp=*/nullptr, &s, &merge_context, - &max_covering_tombstone_seq, ReadOptions()); - ASSERT_FALSE(found); - - // Cleanup - list.current()->Unref(&to_delete); - ASSERT_EQ(3, to_delete.size()); - for (MemTable* m : to_delete) { - delete m; - } -} - -TEST_F(MemTableListTest, FlushPendingTest) { - const int num_tables = 6; - SequenceNumber seq = 1; - Status s; - - auto factory = std::make_shared(); - options.memtable_factory = factory; - ImmutableOptions ioptions(options); - InternalKeyComparator cmp(BytewiseComparator()); - WriteBufferManager wb(options.db_write_buffer_size); - autovector to_delete; - - // Create MemTableList - int min_write_buffer_number_to_merge = 3; - int max_write_buffer_number_to_maintain = 7; - int64_t max_write_buffer_size_to_maintain = - 7 * static_cast(options.write_buffer_size); - MemTableList list(min_write_buffer_number_to_merge, - max_write_buffer_number_to_maintain, - max_write_buffer_size_to_maintain); - - // Create some MemTables - uint64_t memtable_id = 0; - std::vector tables; - MutableCFOptions mutable_cf_options(options); - for (int i = 0; i < num_tables; i++) { - MemTable* mem = new MemTable(cmp, ioptions, mutable_cf_options, &wb, - kMaxSequenceNumber, 0 /* column_family_id */); - mem->SetID(memtable_id++); - mem->Ref(); - - std::string value; - MergeContext merge_context; - - ASSERT_OK(mem->Add(++seq, kTypeValue, "key1", std::to_string(i), - nullptr /* kv_prot_info */)); - ASSERT_OK(mem->Add(++seq, kTypeValue, "keyN" + std::to_string(i), "valueN", - nullptr /* kv_prot_info */)); - ASSERT_OK(mem->Add(++seq, kTypeValue, "keyX" + std::to_string(i), "value", - nullptr /* kv_prot_info */)); - ASSERT_OK(mem->Add(++seq, kTypeValue, "keyM" + std::to_string(i), "valueM", - nullptr /* kv_prot_info */)); - ASSERT_OK(mem->Add(++seq, kTypeDeletion, "keyX" + std::to_string(i), "", - nullptr /* kv_prot_info */)); - - tables.push_back(mem); - } - - // Nothing to flush - ASSERT_FALSE(list.IsFlushPending()); - ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); - autovector to_flush; - list.PickMemtablesToFlush( - std::numeric_limits::max() /* memtable_id */, &to_flush); - ASSERT_EQ(0, to_flush.size()); - - // Request a flush even though there is nothing to flush - list.FlushRequested(); - ASSERT_FALSE(list.IsFlushPending()); - ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); - - // Attempt to 'flush' to clear request for flush - list.PickMemtablesToFlush( - std::numeric_limits::max() /* memtable_id */, &to_flush); - ASSERT_EQ(0, to_flush.size()); - ASSERT_FALSE(list.IsFlushPending()); - ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); - - // Request a flush again - list.FlushRequested(); - // No flush pending since the list is empty. - ASSERT_FALSE(list.IsFlushPending()); - ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); - - // Add 2 tables - list.Add(tables[0], &to_delete); - list.Add(tables[1], &to_delete); - ASSERT_EQ(2, list.NumNotFlushed()); - ASSERT_EQ(0, to_delete.size()); - - // Even though we have less than the minimum to flush, a flush is - // pending since we had previously requested a flush and never called - // PickMemtablesToFlush() to clear the flush. - ASSERT_TRUE(list.IsFlushPending()); - ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); - - // Pick tables to flush - list.PickMemtablesToFlush( - std::numeric_limits::max() /* memtable_id */, &to_flush); - ASSERT_EQ(2, to_flush.size()); - ASSERT_EQ(2, list.NumNotFlushed()); - ASSERT_FALSE(list.IsFlushPending()); - ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); - - // Revert flush - list.RollbackMemtableFlush(to_flush, 0); - ASSERT_FALSE(list.IsFlushPending()); - ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); - to_flush.clear(); - - // Add another table - list.Add(tables[2], &to_delete); - // We now have the minimum to flush regardles of whether FlushRequested() - // was called. - ASSERT_TRUE(list.IsFlushPending()); - ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); - ASSERT_EQ(0, to_delete.size()); - - // Pick tables to flush - list.PickMemtablesToFlush( - std::numeric_limits::max() /* memtable_id */, &to_flush); - ASSERT_EQ(3, to_flush.size()); - ASSERT_EQ(3, list.NumNotFlushed()); - ASSERT_FALSE(list.IsFlushPending()); - ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); - - // Pick tables to flush again - autovector to_flush2; - list.PickMemtablesToFlush( - std::numeric_limits::max() /* memtable_id */, &to_flush2); - ASSERT_EQ(0, to_flush2.size()); - ASSERT_EQ(3, list.NumNotFlushed()); - ASSERT_FALSE(list.IsFlushPending()); - ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); - - // Add another table - list.Add(tables[3], &to_delete); - ASSERT_FALSE(list.IsFlushPending()); - ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); - ASSERT_EQ(0, to_delete.size()); - - // Request a flush again - list.FlushRequested(); - ASSERT_TRUE(list.IsFlushPending()); - ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); - - // Pick tables to flush again - list.PickMemtablesToFlush( - std::numeric_limits::max() /* memtable_id */, &to_flush2); - ASSERT_EQ(1, to_flush2.size()); - ASSERT_EQ(4, list.NumNotFlushed()); - ASSERT_FALSE(list.IsFlushPending()); - ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); - - // Rollback first pick of tables - list.RollbackMemtableFlush(to_flush, 0); - ASSERT_TRUE(list.IsFlushPending()); - ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); - to_flush.clear(); - - // Add another tables - list.Add(tables[4], &to_delete); - ASSERT_EQ(5, list.NumNotFlushed()); - // We now have the minimum to flush regardles of whether FlushRequested() - ASSERT_TRUE(list.IsFlushPending()); - ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); - ASSERT_EQ(0, to_delete.size()); - - // Pick tables to flush - list.PickMemtablesToFlush( - std::numeric_limits::max() /* memtable_id */, &to_flush); - // Picks three oldest memtables. The fourth oldest is picked in `to_flush2` so - // must be excluded. The newest (fifth oldest) is non-consecutive with the - // three oldest due to omitting the fourth oldest so must not be picked. - ASSERT_EQ(3, to_flush.size()); - ASSERT_EQ(5, list.NumNotFlushed()); - ASSERT_FALSE(list.IsFlushPending()); - ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); - - // Pick tables to flush again - autovector to_flush3; - list.PickMemtablesToFlush( - std::numeric_limits::max() /* memtable_id */, &to_flush3); - // Picks newest (fifth oldest) - ASSERT_EQ(1, to_flush3.size()); - ASSERT_EQ(5, list.NumNotFlushed()); - ASSERT_FALSE(list.IsFlushPending()); - ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); - - // Nothing left to flush - autovector to_flush4; - list.PickMemtablesToFlush( - std::numeric_limits::max() /* memtable_id */, &to_flush4); - ASSERT_EQ(0, to_flush4.size()); - ASSERT_EQ(5, list.NumNotFlushed()); - ASSERT_FALSE(list.IsFlushPending()); - ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); - - // Flush the 3 memtables that were picked in to_flush - s = Mock_InstallMemtableFlushResults(&list, mutable_cf_options, to_flush, - &to_delete); - ASSERT_OK(s); - - // Note: now to_flush contains tables[0,1,2]. to_flush2 contains - // tables[3]. to_flush3 contains tables[4]. - // Current implementation will only commit memtables in the order they were - // created. So TryInstallMemtableFlushResults will install the first 3 tables - // in to_flush and stop when it encounters a table not yet flushed. - ASSERT_EQ(2, list.NumNotFlushed()); - int num_in_history = - std::min(3, static_cast(max_write_buffer_size_to_maintain) / - static_cast(options.write_buffer_size)); - ASSERT_EQ(num_in_history, list.NumFlushed()); - ASSERT_EQ(5 - list.NumNotFlushed() - num_in_history, to_delete.size()); - - // Request a flush again. Should be nothing to flush - list.FlushRequested(); - ASSERT_FALSE(list.IsFlushPending()); - ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); - - // Flush the 1 memtable (tables[4]) that was picked in to_flush3 - s = MemTableListTest::Mock_InstallMemtableFlushResults( - &list, mutable_cf_options, to_flush3, &to_delete); - ASSERT_OK(s); - - // This will install 0 tables since tables[4] flushed while tables[3] has not - // yet flushed. - ASSERT_EQ(2, list.NumNotFlushed()); - ASSERT_EQ(0, to_delete.size()); - - // Flush the 1 memtable (tables[3]) that was picked in to_flush2 - s = MemTableListTest::Mock_InstallMemtableFlushResults( - &list, mutable_cf_options, to_flush2, &to_delete); - ASSERT_OK(s); - - // This will actually install 2 tables. The 1 we told it to flush, and also - // tables[4] which has been waiting for tables[3] to commit. - ASSERT_EQ(0, list.NumNotFlushed()); - num_in_history = - std::min(5, static_cast(max_write_buffer_size_to_maintain) / - static_cast(options.write_buffer_size)); - ASSERT_EQ(num_in_history, list.NumFlushed()); - ASSERT_EQ(5 - list.NumNotFlushed() - num_in_history, to_delete.size()); - - for (const auto& m : to_delete) { - // Refcount should be 0 after calling TryInstallMemtableFlushResults. - // Verify this, by Ref'ing then UnRef'ing: - m->Ref(); - ASSERT_EQ(m, m->Unref()); - delete m; - } - to_delete.clear(); - - // Add another table - list.Add(tables[5], &to_delete); - ASSERT_EQ(1, list.NumNotFlushed()); - ASSERT_EQ(5, list.GetLatestMemTableID()); - memtable_id = 4; - // Pick tables to flush. The tables to pick must have ID smaller than or - // equal to 4. Therefore, no table will be selected in this case. - autovector to_flush5; - list.FlushRequested(); - ASSERT_TRUE(list.HasFlushRequested()); - list.PickMemtablesToFlush(memtable_id, &to_flush5); - ASSERT_TRUE(to_flush5.empty()); - ASSERT_EQ(1, list.NumNotFlushed()); - ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); - ASSERT_FALSE(list.IsFlushPending()); - ASSERT_FALSE(list.HasFlushRequested()); - - // Pick tables to flush. The tables to pick must have ID smaller than or - // equal to 5. Therefore, only tables[5] will be selected. - memtable_id = 5; - list.FlushRequested(); - list.PickMemtablesToFlush(memtable_id, &to_flush5); - ASSERT_EQ(1, static_cast(to_flush5.size())); - ASSERT_EQ(1, list.NumNotFlushed()); - ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); - ASSERT_FALSE(list.IsFlushPending()); - to_delete.clear(); - - list.current()->Unref(&to_delete); - int to_delete_size = - std::min(num_tables, static_cast(max_write_buffer_size_to_maintain) / - static_cast(options.write_buffer_size)); - ASSERT_EQ(to_delete_size, to_delete.size()); - - for (const auto& m : to_delete) { - // Refcount should be 0 after calling TryInstallMemtableFlushResults. - // Verify this, by Ref'ing then UnRef'ing: - m->Ref(); - ASSERT_EQ(m, m->Unref()); - delete m; - } - to_delete.clear(); -} - -TEST_F(MemTableListTest, EmptyAtomicFlusTest) { - autovector lists; - autovector cf_ids; - autovector options_list; - autovector*> to_flush; - autovector to_delete; - Status s = Mock_InstallMemtableAtomicFlushResults(lists, cf_ids, options_list, - to_flush, &to_delete); - ASSERT_OK(s); - ASSERT_TRUE(to_delete.empty()); -} - -TEST_F(MemTableListTest, AtomicFlusTest) { - const int num_cfs = 3; - const int num_tables_per_cf = 2; - SequenceNumber seq = 1; - - auto factory = std::make_shared(); - options.memtable_factory = factory; - ImmutableOptions ioptions(options); - InternalKeyComparator cmp(BytewiseComparator()); - WriteBufferManager wb(options.db_write_buffer_size); - - // Create MemTableLists - int min_write_buffer_number_to_merge = 3; - int max_write_buffer_number_to_maintain = 7; - int64_t max_write_buffer_size_to_maintain = - 7 * static_cast(options.write_buffer_size); - autovector lists; - for (int i = 0; i != num_cfs; ++i) { - lists.emplace_back(new MemTableList(min_write_buffer_number_to_merge, - max_write_buffer_number_to_maintain, - max_write_buffer_size_to_maintain)); - } - - autovector cf_ids; - std::vector> tables(num_cfs); - autovector mutable_cf_options_list; - uint32_t cf_id = 0; - for (auto& elem : tables) { - mutable_cf_options_list.emplace_back(new MutableCFOptions(options)); - uint64_t memtable_id = 0; - for (int i = 0; i != num_tables_per_cf; ++i) { - MemTable* mem = - new MemTable(cmp, ioptions, *(mutable_cf_options_list.back()), &wb, - kMaxSequenceNumber, cf_id); - mem->SetID(memtable_id++); - mem->Ref(); - - std::string value; - - ASSERT_OK(mem->Add(++seq, kTypeValue, "key1", std::to_string(i), - nullptr /* kv_prot_info */)); - ASSERT_OK(mem->Add(++seq, kTypeValue, "keyN" + std::to_string(i), - "valueN", nullptr /* kv_prot_info */)); - ASSERT_OK(mem->Add(++seq, kTypeValue, "keyX" + std::to_string(i), "value", - nullptr /* kv_prot_info */)); - ASSERT_OK(mem->Add(++seq, kTypeValue, "keyM" + std::to_string(i), - "valueM", nullptr /* kv_prot_info */)); - ASSERT_OK(mem->Add(++seq, kTypeDeletion, "keyX" + std::to_string(i), "", - nullptr /* kv_prot_info */)); - - elem.push_back(mem); - } - cf_ids.push_back(cf_id++); - } - - std::vector> flush_candidates(num_cfs); - - // Nothing to flush - for (auto i = 0; i != num_cfs; ++i) { - auto* list = lists[i]; - ASSERT_FALSE(list->IsFlushPending()); - ASSERT_FALSE(list->imm_flush_needed.load(std::memory_order_acquire)); - list->PickMemtablesToFlush( - std::numeric_limits::max() /* memtable_id */, - &flush_candidates[i]); - ASSERT_EQ(0, flush_candidates[i].size()); - } - // Request flush even though there is nothing to flush - for (auto i = 0; i != num_cfs; ++i) { - auto* list = lists[i]; - list->FlushRequested(); - ASSERT_FALSE(list->IsFlushPending()); - ASSERT_FALSE(list->imm_flush_needed.load(std::memory_order_acquire)); - } - autovector to_delete; - // Add tables to the immutable memtalbe lists associated with column families - for (auto i = 0; i != num_cfs; ++i) { - for (auto j = 0; j != num_tables_per_cf; ++j) { - lists[i]->Add(tables[i][j], &to_delete); - } - ASSERT_EQ(num_tables_per_cf, lists[i]->NumNotFlushed()); - ASSERT_TRUE(lists[i]->IsFlushPending()); - ASSERT_TRUE(lists[i]->imm_flush_needed.load(std::memory_order_acquire)); - } - std::vector flush_memtable_ids = {1, 1, 0}; - // +----+ - // list[0]: |0 1| - // list[1]: |0 1| - // | +--+ - // list[2]: |0| 1 - // +-+ - // Pick memtables to flush - for (auto i = 0; i != num_cfs; ++i) { - flush_candidates[i].clear(); - lists[i]->PickMemtablesToFlush(flush_memtable_ids[i], &flush_candidates[i]); - ASSERT_EQ(flush_memtable_ids[i] - 0 + 1, - static_cast(flush_candidates[i].size())); - } - autovector tmp_lists; - autovector tmp_cf_ids; - autovector tmp_options_list; - autovector*> to_flush; - for (auto i = 0; i != num_cfs; ++i) { - if (!flush_candidates[i].empty()) { - to_flush.push_back(&flush_candidates[i]); - tmp_lists.push_back(lists[i]); - tmp_cf_ids.push_back(i); - tmp_options_list.push_back(mutable_cf_options_list[i]); - } - } - Status s = Mock_InstallMemtableAtomicFlushResults( - tmp_lists, tmp_cf_ids, tmp_options_list, to_flush, &to_delete); - ASSERT_OK(s); - - for (auto i = 0; i != num_cfs; ++i) { - for (auto j = 0; j != num_tables_per_cf; ++j) { - if (static_cast(j) <= flush_memtable_ids[i]) { - ASSERT_LT(0, tables[i][j]->GetFileNumber()); - } - } - ASSERT_EQ( - static_cast(num_tables_per_cf) - flush_candidates[i].size(), - lists[i]->NumNotFlushed()); - } - - to_delete.clear(); - for (auto list : lists) { - list->current()->Unref(&to_delete); - delete list; - } - for (auto& mutable_cf_options : mutable_cf_options_list) { - if (mutable_cf_options != nullptr) { - delete mutable_cf_options; - mutable_cf_options = nullptr; - } - } - // All memtables in tables array must have been flushed, thus ready to be - // deleted. - ASSERT_EQ(to_delete.size(), tables.size() * tables.front().size()); - for (const auto& m : to_delete) { - // Refcount should be 0 after calling InstallMemtableFlushResults. - // Verify this by Ref'ing and then Unref'ing. - m->Ref(); - ASSERT_EQ(m, m->Unref()); - delete m; - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/merge_helper_test.cc b/db/merge_helper_test.cc deleted file mode 100644 index 05408d5b9..000000000 --- a/db/merge_helper_test.cc +++ /dev/null @@ -1,298 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db/merge_helper.h" - -#include -#include -#include - -#include "db/dbformat.h" -#include "rocksdb/comparator.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/coding.h" -#include "util/vector_iterator.h" -#include "utilities/merge_operators.h" - -namespace ROCKSDB_NAMESPACE { - -class MergeHelperTest : public testing::Test { - public: - MergeHelperTest() : icmp_(BytewiseComparator()) { env_ = Env::Default(); } - - ~MergeHelperTest() override = default; - - Status Run(SequenceNumber stop_before, bool at_bottom, - SequenceNumber latest_snapshot = 0) { - iter_.reset(new VectorIterator(ks_, vs_, &icmp_)); - iter_->SeekToFirst(); - merge_helper_.reset(new MergeHelper(env_, icmp_.user_comparator(), - merge_op_.get(), filter_.get(), nullptr, - false, latest_snapshot)); - return merge_helper_->MergeUntil( - iter_.get(), nullptr /* range_del_agg */, stop_before, at_bottom, - false /* allow_data_in_errors */, nullptr /* blob_fetcher */, - nullptr /* full_history_ts_low */, nullptr /* prefetch_buffers */, - nullptr /* c_iter_stats */); - } - - void AddKeyVal(const std::string& user_key, const SequenceNumber& seq, - const ValueType& t, const std::string& val, - bool corrupt = false) { - InternalKey ikey(user_key, seq, t); - if (corrupt) { - test::CorruptKeyType(&ikey); - } - ks_.push_back(ikey.Encode().ToString()); - vs_.push_back(val); - } - - Env* env_; - InternalKeyComparator icmp_; - std::unique_ptr iter_; - std::shared_ptr merge_op_; - std::unique_ptr merge_helper_; - std::vector ks_; - std::vector vs_; - std::unique_ptr filter_; -}; - -// If MergeHelper encounters a new key on the last level, we know that -// the key has no more history and it can merge keys. -TEST_F(MergeHelperTest, MergeAtBottomSuccess) { - merge_op_ = MergeOperators::CreateUInt64AddOperator(); - - AddKeyVal("a", 20, kTypeMerge, test::EncodeInt(1U)); - AddKeyVal("a", 10, kTypeMerge, test::EncodeInt(3U)); - AddKeyVal("b", 10, kTypeMerge, test::EncodeInt(4U)); // <- iter_ after merge - - ASSERT_TRUE(Run(0, true).ok()); - ASSERT_EQ(ks_[2], iter_->key()); - ASSERT_EQ(test::KeyStr("a", 20, kTypeValue), merge_helper_->keys()[0]); - ASSERT_EQ(test::EncodeInt(4U), merge_helper_->values()[0]); - ASSERT_EQ(1U, merge_helper_->keys().size()); - ASSERT_EQ(1U, merge_helper_->values().size()); -} - -// Merging with a value results in a successful merge. -TEST_F(MergeHelperTest, MergeValue) { - merge_op_ = MergeOperators::CreateUInt64AddOperator(); - - AddKeyVal("a", 40, kTypeMerge, test::EncodeInt(1U)); - AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(3U)); - AddKeyVal("a", 20, kTypeValue, test::EncodeInt(4U)); // <- iter_ after merge - AddKeyVal("a", 10, kTypeMerge, test::EncodeInt(1U)); - - ASSERT_TRUE(Run(0, false).ok()); - ASSERT_EQ(ks_[3], iter_->key()); - ASSERT_EQ(test::KeyStr("a", 40, kTypeValue), merge_helper_->keys()[0]); - ASSERT_EQ(test::EncodeInt(8U), merge_helper_->values()[0]); - ASSERT_EQ(1U, merge_helper_->keys().size()); - ASSERT_EQ(1U, merge_helper_->values().size()); -} - -// Merging stops before a snapshot. -TEST_F(MergeHelperTest, SnapshotBeforeValue) { - merge_op_ = MergeOperators::CreateUInt64AddOperator(); - - AddKeyVal("a", 50, kTypeMerge, test::EncodeInt(1U)); - AddKeyVal("a", 40, kTypeMerge, test::EncodeInt(3U)); // <- iter_ after merge - AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(1U)); - AddKeyVal("a", 20, kTypeValue, test::EncodeInt(4U)); - AddKeyVal("a", 10, kTypeMerge, test::EncodeInt(1U)); - - ASSERT_TRUE(Run(31, true).IsMergeInProgress()); - ASSERT_EQ(ks_[2], iter_->key()); - ASSERT_EQ(test::KeyStr("a", 50, kTypeMerge), merge_helper_->keys()[0]); - ASSERT_EQ(test::EncodeInt(4U), merge_helper_->values()[0]); - ASSERT_EQ(1U, merge_helper_->keys().size()); - ASSERT_EQ(1U, merge_helper_->values().size()); -} - -// MergeHelper preserves the operand stack for merge operators that -// cannot do a partial merge. -TEST_F(MergeHelperTest, NoPartialMerge) { - merge_op_ = MergeOperators::CreateStringAppendTESTOperator(); - - AddKeyVal("a", 50, kTypeMerge, "v2"); - AddKeyVal("a", 40, kTypeMerge, "v"); // <- iter_ after merge - AddKeyVal("a", 30, kTypeMerge, "v"); - - ASSERT_TRUE(Run(31, true).IsMergeInProgress()); - ASSERT_EQ(ks_[2], iter_->key()); - ASSERT_EQ(test::KeyStr("a", 40, kTypeMerge), merge_helper_->keys()[0]); - ASSERT_EQ("v", merge_helper_->values()[0]); - ASSERT_EQ(test::KeyStr("a", 50, kTypeMerge), merge_helper_->keys()[1]); - ASSERT_EQ("v2", merge_helper_->values()[1]); - ASSERT_EQ(2U, merge_helper_->keys().size()); - ASSERT_EQ(2U, merge_helper_->values().size()); -} - -// A single operand can not be merged. -TEST_F(MergeHelperTest, SingleOperand) { - merge_op_ = MergeOperators::CreateUInt64AddOperator(); - - AddKeyVal("a", 50, kTypeMerge, test::EncodeInt(1U)); - - ASSERT_TRUE(Run(31, false).IsMergeInProgress()); - ASSERT_FALSE(iter_->Valid()); - ASSERT_EQ(test::KeyStr("a", 50, kTypeMerge), merge_helper_->keys()[0]); - ASSERT_EQ(test::EncodeInt(1U), merge_helper_->values()[0]); - ASSERT_EQ(1U, merge_helper_->keys().size()); - ASSERT_EQ(1U, merge_helper_->values().size()); -} - -// Merging with a deletion turns the deletion into a value -TEST_F(MergeHelperTest, MergeDeletion) { - merge_op_ = MergeOperators::CreateUInt64AddOperator(); - - AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(3U)); - AddKeyVal("a", 20, kTypeDeletion, ""); - - ASSERT_TRUE(Run(15, false).ok()); - ASSERT_FALSE(iter_->Valid()); - ASSERT_EQ(test::KeyStr("a", 30, kTypeValue), merge_helper_->keys()[0]); - ASSERT_EQ(test::EncodeInt(3U), merge_helper_->values()[0]); - ASSERT_EQ(1U, merge_helper_->keys().size()); - ASSERT_EQ(1U, merge_helper_->values().size()); -} - -// The merge helper stops upon encountering a corrupt key -TEST_F(MergeHelperTest, CorruptKey) { - merge_op_ = MergeOperators::CreateUInt64AddOperator(); - - AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(3U)); - AddKeyVal("a", 25, kTypeMerge, test::EncodeInt(1U)); - // Corrupt key - AddKeyVal("a", 20, kTypeDeletion, "", true); // <- iter_ after merge - - ASSERT_TRUE(Run(15, false).IsMergeInProgress()); - ASSERT_EQ(ks_[2], iter_->key()); - ASSERT_EQ(test::KeyStr("a", 30, kTypeMerge), merge_helper_->keys()[0]); - ASSERT_EQ(test::EncodeInt(4U), merge_helper_->values()[0]); - ASSERT_EQ(1U, merge_helper_->keys().size()); - ASSERT_EQ(1U, merge_helper_->values().size()); -} - -// The compaction filter is called on every merge operand -TEST_F(MergeHelperTest, FilterMergeOperands) { - merge_op_ = MergeOperators::CreateUInt64AddOperator(); - filter_.reset(new test::FilterNumber(5U)); - - AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(3U)); - AddKeyVal("a", 29, kTypeMerge, test::EncodeInt(5U)); // Filtered - AddKeyVal("a", 28, kTypeMerge, test::EncodeInt(3U)); - AddKeyVal("a", 27, kTypeMerge, test::EncodeInt(1U)); - AddKeyVal("a", 26, kTypeMerge, test::EncodeInt(5U)); // Filtered - AddKeyVal("a", 25, kTypeValue, test::EncodeInt(1U)); - - ASSERT_TRUE(Run(15, false).ok()); - ASSERT_FALSE(iter_->Valid()); - MergeOutputIterator merge_output_iter(merge_helper_.get()); - merge_output_iter.SeekToFirst(); - ASSERT_EQ(test::KeyStr("a", 30, kTypeValue), - merge_output_iter.key().ToString()); - ASSERT_EQ(test::EncodeInt(8U), merge_output_iter.value().ToString()); - merge_output_iter.Next(); - ASSERT_FALSE(merge_output_iter.Valid()); -} - -TEST_F(MergeHelperTest, FilterAllMergeOperands) { - merge_op_ = MergeOperators::CreateUInt64AddOperator(); - filter_.reset(new test::FilterNumber(5U)); - - AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(5U)); - AddKeyVal("a", 29, kTypeMerge, test::EncodeInt(5U)); - AddKeyVal("a", 28, kTypeMerge, test::EncodeInt(5U)); - AddKeyVal("a", 27, kTypeMerge, test::EncodeInt(5U)); - AddKeyVal("a", 26, kTypeMerge, test::EncodeInt(5U)); - AddKeyVal("a", 25, kTypeMerge, test::EncodeInt(5U)); - - // filtered out all - ASSERT_TRUE(Run(15, false).ok()); - ASSERT_FALSE(iter_->Valid()); - MergeOutputIterator merge_output_iter(merge_helper_.get()); - merge_output_iter.SeekToFirst(); - ASSERT_FALSE(merge_output_iter.Valid()); - - // we have one operand that will survive because it's a delete - AddKeyVal("a", 24, kTypeDeletion, test::EncodeInt(5U)); - AddKeyVal("b", 23, kTypeValue, test::EncodeInt(5U)); - ASSERT_TRUE(Run(15, true).ok()); - merge_output_iter = MergeOutputIterator(merge_helper_.get()); - ASSERT_TRUE(iter_->Valid()); - merge_output_iter.SeekToFirst(); - ASSERT_FALSE(merge_output_iter.Valid()); - - // when all merge operands are filtered out, we leave the iterator pointing to - // the Put/Delete that survived - ASSERT_EQ(test::KeyStr("a", 24, kTypeDeletion), iter_->key().ToString()); - ASSERT_EQ(test::EncodeInt(5U), iter_->value().ToString()); -} - -// Make sure that merge operands are filtered at the beginning -TEST_F(MergeHelperTest, FilterFirstMergeOperand) { - merge_op_ = MergeOperators::CreateUInt64AddOperator(); - filter_.reset(new test::FilterNumber(5U)); - - AddKeyVal("a", 31, kTypeMerge, test::EncodeInt(5U)); // Filtered - AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(5U)); // Filtered - AddKeyVal("a", 29, kTypeMerge, test::EncodeInt(2U)); - AddKeyVal("a", 28, kTypeMerge, test::EncodeInt(1U)); - AddKeyVal("a", 27, kTypeMerge, test::EncodeInt(3U)); - AddKeyVal("a", 26, kTypeMerge, test::EncodeInt(5U)); // Filtered - AddKeyVal("a", 25, kTypeMerge, test::EncodeInt(5U)); // Filtered - AddKeyVal("b", 24, kTypeValue, test::EncodeInt(5U)); // next user key - - ASSERT_OK(Run(15, true)); - ASSERT_TRUE(iter_->Valid()); - MergeOutputIterator merge_output_iter(merge_helper_.get()); - merge_output_iter.SeekToFirst(); - // sequence number is 29 here, because the first merge operand got filtered - // out - ASSERT_EQ(test::KeyStr("a", 29, kTypeValue), - merge_output_iter.key().ToString()); - ASSERT_EQ(test::EncodeInt(6U), merge_output_iter.value().ToString()); - merge_output_iter.Next(); - ASSERT_FALSE(merge_output_iter.Valid()); - - // make sure that we're passing user keys into the filter - ASSERT_EQ("a", filter_->last_merge_operand_key()); -} - -// Make sure that merge operands are not filtered out if there's a snapshot -// pointing at them -TEST_F(MergeHelperTest, DontFilterMergeOperandsBeforeSnapshotTest) { - merge_op_ = MergeOperators::CreateUInt64AddOperator(); - filter_.reset(new test::FilterNumber(5U)); - - AddKeyVal("a", 31, kTypeMerge, test::EncodeInt(5U)); - AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(5U)); - AddKeyVal("a", 29, kTypeMerge, test::EncodeInt(2U)); - AddKeyVal("a", 28, kTypeMerge, test::EncodeInt(1U)); - AddKeyVal("a", 27, kTypeMerge, test::EncodeInt(3U)); - AddKeyVal("a", 26, kTypeMerge, test::EncodeInt(5U)); - AddKeyVal("a", 25, kTypeMerge, test::EncodeInt(5U)); - AddKeyVal("b", 24, kTypeValue, test::EncodeInt(5U)); - - ASSERT_OK(Run(15, true, 32)); - ASSERT_TRUE(iter_->Valid()); - MergeOutputIterator merge_output_iter(merge_helper_.get()); - merge_output_iter.SeekToFirst(); - ASSERT_EQ(test::KeyStr("a", 31, kTypeValue), - merge_output_iter.key().ToString()); - ASSERT_EQ(test::EncodeInt(26U), merge_output_iter.value().ToString()); - merge_output_iter.Next(); - ASSERT_FALSE(merge_output_iter.Valid()); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/merge_test.cc b/db/merge_test.cc deleted file mode 100644 index 6d1333e55..000000000 --- a/db/merge_test.cc +++ /dev/null @@ -1,621 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -#include - -#include -#include - -#include "db/db_impl/db_impl.h" -#include "db/dbformat.h" -#include "db/write_batch_internal.h" -#include "port/stack_trace.h" -#include "rocksdb/cache.h" -#include "rocksdb/comparator.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/merge_operator.h" -#include "rocksdb/utilities/db_ttl.h" -#include "test_util/testharness.h" -#include "util/coding.h" -#include "utilities/merge_operators.h" - -namespace ROCKSDB_NAMESPACE { - -bool use_compression; - -class MergeTest : public testing::Test {}; - -size_t num_merge_operator_calls; -void resetNumMergeOperatorCalls() { num_merge_operator_calls = 0; } - -size_t num_partial_merge_calls; -void resetNumPartialMergeCalls() { num_partial_merge_calls = 0; } - -class CountMergeOperator : public AssociativeMergeOperator { - public: - CountMergeOperator() { - mergeOperator_ = MergeOperators::CreateUInt64AddOperator(); - } - - bool Merge(const Slice& key, const Slice* existing_value, const Slice& value, - std::string* new_value, Logger* logger) const override { - assert(new_value->empty()); - ++num_merge_operator_calls; - if (existing_value == nullptr) { - new_value->assign(value.data(), value.size()); - return true; - } - - return mergeOperator_->PartialMerge(key, *existing_value, value, new_value, - logger); - } - - bool PartialMergeMulti(const Slice& key, - const std::deque& operand_list, - std::string* new_value, - Logger* logger) const override { - assert(new_value->empty()); - ++num_partial_merge_calls; - return mergeOperator_->PartialMergeMulti(key, operand_list, new_value, - logger); - } - - const char* Name() const override { return "UInt64AddOperator"; } - - private: - std::shared_ptr mergeOperator_; -}; - -class EnvMergeTest : public EnvWrapper { - public: - EnvMergeTest() : EnvWrapper(Env::Default()) {} - static const char* kClassName() { return "MergeEnv"; } - const char* Name() const override { return kClassName(); } - // ~EnvMergeTest() override {} - - uint64_t NowNanos() override { - ++now_nanos_count_; - return target()->NowNanos(); - } - - static uint64_t now_nanos_count_; - - static std::unique_ptr singleton_; - - static EnvMergeTest* GetInstance() { - if (nullptr == singleton_) singleton_.reset(new EnvMergeTest); - return singleton_.get(); - } -}; - -uint64_t EnvMergeTest::now_nanos_count_{0}; -std::unique_ptr EnvMergeTest::singleton_; - -std::shared_ptr OpenDb(const std::string& dbname, const bool ttl = false, - const size_t max_successive_merges = 0) { - DB* db; - Options options; - options.create_if_missing = true; - options.merge_operator = std::make_shared(); - options.max_successive_merges = max_successive_merges; - options.env = EnvMergeTest::GetInstance(); - EXPECT_OK(DestroyDB(dbname, Options())); - Status s; - if (ttl) { - DBWithTTL* db_with_ttl; - s = DBWithTTL::Open(options, dbname, &db_with_ttl); - db = db_with_ttl; - } else { - s = DB::Open(options, dbname, &db); - } - EXPECT_OK(s); - assert(s.ok()); - // Allowed to call NowNanos during DB creation (in GenerateRawUniqueId() for - // session ID) - EnvMergeTest::now_nanos_count_ = 0; - return std::shared_ptr(db); -} - -// Imagine we are maintaining a set of uint64 counters. -// Each counter has a distinct name. And we would like -// to support four high level operations: -// set, add, get and remove -// This is a quick implementation without a Merge operation. -class Counters { - protected: - std::shared_ptr db_; - - WriteOptions put_option_; - ReadOptions get_option_; - WriteOptions delete_option_; - - uint64_t default_; - - public: - explicit Counters(std::shared_ptr db, uint64_t defaultCount = 0) - : db_(db), - put_option_(), - get_option_(), - delete_option_(), - default_(defaultCount) { - assert(db_); - } - - virtual ~Counters() {} - - // public interface of Counters. - // All four functions return false - // if the underlying level db operation failed. - - // mapped to a levedb Put - bool set(const std::string& key, uint64_t value) { - // just treat the internal rep of int64 as the string - char buf[sizeof(value)]; - EncodeFixed64(buf, value); - Slice slice(buf, sizeof(value)); - auto s = db_->Put(put_option_, key, slice); - - if (s.ok()) { - return true; - } else { - std::cerr << s.ToString() << std::endl; - return false; - } - } - - // mapped to a rocksdb Delete - bool remove(const std::string& key) { - auto s = db_->Delete(delete_option_, key); - - if (s.ok()) { - return true; - } else { - std::cerr << s.ToString() << std::endl; - return false; - } - } - - // mapped to a rocksdb Get - bool get(const std::string& key, uint64_t* value) { - std::string str; - auto s = db_->Get(get_option_, key, &str); - - if (s.IsNotFound()) { - // return default value if not found; - *value = default_; - return true; - } else if (s.ok()) { - // deserialization - if (str.size() != sizeof(uint64_t)) { - std::cerr << "value corruption\n"; - return false; - } - *value = DecodeFixed64(&str[0]); - return true; - } else { - std::cerr << s.ToString() << std::endl; - return false; - } - } - - // 'add' is implemented as get -> modify -> set - // An alternative is a single merge operation, see MergeBasedCounters - virtual bool add(const std::string& key, uint64_t value) { - uint64_t base = default_; - return get(key, &base) && set(key, base + value); - } - - // convenience functions for testing - void assert_set(const std::string& key, uint64_t value) { - assert(set(key, value)); - } - - void assert_remove(const std::string& key) { assert(remove(key)); } - - uint64_t assert_get(const std::string& key) { - uint64_t value = default_; - int result = get(key, &value); - assert(result); - if (result == 0) exit(1); // Disable unused variable warning. - return value; - } - - void assert_add(const std::string& key, uint64_t value) { - int result = add(key, value); - assert(result); - if (result == 0) exit(1); // Disable unused variable warning. - } -}; - -// Implement 'add' directly with the new Merge operation -class MergeBasedCounters : public Counters { - private: - WriteOptions merge_option_; // for merge - - public: - explicit MergeBasedCounters(std::shared_ptr db, uint64_t defaultCount = 0) - : Counters(db, defaultCount), merge_option_() {} - - // mapped to a rocksdb Merge operation - bool add(const std::string& key, uint64_t value) override { - char encoded[sizeof(uint64_t)]; - EncodeFixed64(encoded, value); - Slice slice(encoded, sizeof(uint64_t)); - auto s = db_->Merge(merge_option_, key, slice); - - if (s.ok()) { - return true; - } else { - std::cerr << s.ToString() << std::endl; - return false; - } - } -}; - -void dumpDb(DB* db) { - auto it = std::unique_ptr(db->NewIterator(ReadOptions())); - for (it->SeekToFirst(); it->Valid(); it->Next()) { - // uint64_t value = DecodeFixed64(it->value().data()); - // std::cout << it->key().ToString() << ": " << value << std::endl; - } - assert(it->status().ok()); // Check for any errors found during the scan -} - -void testCounters(Counters& counters, DB* db, bool test_compaction) { - FlushOptions o; - o.wait = true; - - counters.assert_set("a", 1); - - if (test_compaction) { - ASSERT_OK(db->Flush(o)); - } - - ASSERT_EQ(counters.assert_get("a"), 1); - - counters.assert_remove("b"); - - // defaut value is 0 if non-existent - ASSERT_EQ(counters.assert_get("b"), 0); - - counters.assert_add("a", 2); - - if (test_compaction) { - ASSERT_OK(db->Flush(o)); - } - - // 1+2 = 3 - ASSERT_EQ(counters.assert_get("a"), 3); - - dumpDb(db); - - // 1+...+49 = ? - uint64_t sum = 0; - for (int i = 1; i < 50; i++) { - counters.assert_add("b", i); - sum += i; - } - ASSERT_EQ(counters.assert_get("b"), sum); - - dumpDb(db); - - if (test_compaction) { - ASSERT_OK(db->Flush(o)); - - ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - dumpDb(db); - - ASSERT_EQ(counters.assert_get("a"), 3); - ASSERT_EQ(counters.assert_get("b"), sum); - } -} - -void testCountersWithFlushAndCompaction(Counters& counters, DB* db) { - ASSERT_OK(db->Put({}, "1", "1")); - ASSERT_OK(db->Flush(FlushOptions())); - - std::atomic cnt{0}; - const auto get_thread_id = [&cnt]() { - thread_local int thread_id{cnt++}; - return thread_id; - }; - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:BeforeWriterWaiting", [&](void* /*arg*/) { - int thread_id = get_thread_id(); - if (1 == thread_id) { - TEST_SYNC_POINT( - "testCountersWithFlushAndCompaction::bg_compact_thread:0"); - } else if (2 == thread_id) { - TEST_SYNC_POINT( - "testCountersWithFlushAndCompaction::bg_flush_thread:0"); - } - }); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WriteManifest", [&](void* /*arg*/) { - int thread_id = get_thread_id(); - if (0 == thread_id) { - TEST_SYNC_POINT( - "testCountersWithFlushAndCompaction::set_options_thread:0"); - TEST_SYNC_POINT( - "testCountersWithFlushAndCompaction::set_options_thread:1"); - } - }); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::LogAndApply:WakeUpAndDone", [&](void* arg) { - auto* mutex = reinterpret_cast(arg); - mutex->AssertHeld(); - int thread_id = get_thread_id(); - ASSERT_EQ(2, thread_id); - mutex->Unlock(); - TEST_SYNC_POINT( - "testCountersWithFlushAndCompaction::bg_flush_thread:1"); - TEST_SYNC_POINT( - "testCountersWithFlushAndCompaction::bg_flush_thread:2"); - mutex->Lock(); - }); - SyncPoint::GetInstance()->LoadDependency({ - {"testCountersWithFlushAndCompaction::set_options_thread:0", - "testCountersWithCompactionAndFlush:BeforeCompact"}, - {"testCountersWithFlushAndCompaction::bg_compact_thread:0", - "testCountersWithFlushAndCompaction:BeforeIncCounters"}, - {"testCountersWithFlushAndCompaction::bg_flush_thread:0", - "testCountersWithFlushAndCompaction::set_options_thread:1"}, - {"testCountersWithFlushAndCompaction::bg_flush_thread:1", - "testCountersWithFlushAndCompaction:BeforeVerification"}, - {"testCountersWithFlushAndCompaction:AfterGet", - "testCountersWithFlushAndCompaction::bg_flush_thread:2"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - - port::Thread set_options_thread([&]() { - ASSERT_OK(reinterpret_cast(db)->SetOptions( - {{"disable_auto_compactions", "false"}})); - }); - TEST_SYNC_POINT("testCountersWithCompactionAndFlush:BeforeCompact"); - port::Thread compact_thread([&]() { - ASSERT_OK(reinterpret_cast(db)->CompactRange( - CompactRangeOptions(), db->DefaultColumnFamily(), nullptr, nullptr)); - }); - - TEST_SYNC_POINT("testCountersWithFlushAndCompaction:BeforeIncCounters"); - counters.add("test-key", 1); - - FlushOptions flush_opts; - flush_opts.wait = false; - ASSERT_OK(db->Flush(flush_opts)); - - TEST_SYNC_POINT("testCountersWithFlushAndCompaction:BeforeVerification"); - std::string expected; - PutFixed64(&expected, 1); - std::string actual; - Status s = db->Get(ReadOptions(), "test-key", &actual); - TEST_SYNC_POINT("testCountersWithFlushAndCompaction:AfterGet"); - set_options_thread.join(); - compact_thread.join(); - ASSERT_OK(s); - ASSERT_EQ(expected, actual); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -void testSuccessiveMerge(Counters& counters, size_t max_num_merges, - size_t num_merges) { - counters.assert_remove("z"); - uint64_t sum = 0; - - for (size_t i = 1; i <= num_merges; ++i) { - resetNumMergeOperatorCalls(); - counters.assert_add("z", i); - sum += i; - - if (i % (max_num_merges + 1) == 0) { - ASSERT_EQ(num_merge_operator_calls, max_num_merges + 1); - } else { - ASSERT_EQ(num_merge_operator_calls, 0); - } - - resetNumMergeOperatorCalls(); - ASSERT_EQ(counters.assert_get("z"), sum); - ASSERT_EQ(num_merge_operator_calls, i % (max_num_merges + 1)); - } -} - -void testPartialMerge(Counters* counters, DB* db, size_t max_merge, - size_t min_merge, size_t count) { - FlushOptions o; - o.wait = true; - - // Test case 1: partial merge should be called when the number of merge - // operands exceeds the threshold. - uint64_t tmp_sum = 0; - resetNumPartialMergeCalls(); - for (size_t i = 1; i <= count; i++) { - counters->assert_add("b", i); - tmp_sum += i; - } - ASSERT_OK(db->Flush(o)); - ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(tmp_sum, counters->assert_get("b")); - if (count > max_merge) { - // in this case, FullMerge should be called instead. - ASSERT_EQ(num_partial_merge_calls, 0U); - } else { - // if count >= min_merge, then partial merge should be called once. - ASSERT_EQ((count >= min_merge), (num_partial_merge_calls == 1)); - } - - // Test case 2: partial merge should not be called when a put is found. - resetNumPartialMergeCalls(); - tmp_sum = 0; - ASSERT_OK(db->Put(ROCKSDB_NAMESPACE::WriteOptions(), "c", "10")); - for (size_t i = 1; i <= count; i++) { - counters->assert_add("c", i); - tmp_sum += i; - } - ASSERT_OK(db->Flush(o)); - ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ(tmp_sum, counters->assert_get("c")); - ASSERT_EQ(num_partial_merge_calls, 0U); - // NowNanos was previously called in MergeHelper::FilterMerge(), which - // harmed performance. - ASSERT_EQ(EnvMergeTest::now_nanos_count_, 0U); -} - -void testSingleBatchSuccessiveMerge(DB* db, size_t max_num_merges, - size_t num_merges) { - ASSERT_GT(num_merges, max_num_merges); - - Slice key("BatchSuccessiveMerge"); - uint64_t merge_value = 1; - char buf[sizeof(merge_value)]; - EncodeFixed64(buf, merge_value); - Slice merge_value_slice(buf, sizeof(merge_value)); - - // Create the batch - WriteBatch batch; - for (size_t i = 0; i < num_merges; ++i) { - ASSERT_OK(batch.Merge(key, merge_value_slice)); - } - - // Apply to memtable and count the number of merges - resetNumMergeOperatorCalls(); - ASSERT_OK(db->Write(WriteOptions(), &batch)); - ASSERT_EQ( - num_merge_operator_calls, - static_cast(num_merges - (num_merges % (max_num_merges + 1)))); - - // Get the value - resetNumMergeOperatorCalls(); - std::string get_value_str; - ASSERT_OK(db->Get(ReadOptions(), key, &get_value_str)); - assert(get_value_str.size() == sizeof(uint64_t)); - uint64_t get_value = DecodeFixed64(&get_value_str[0]); - ASSERT_EQ(get_value, num_merges * merge_value); - ASSERT_EQ(num_merge_operator_calls, - static_cast((num_merges % (max_num_merges + 1)))); -} - -void runTest(const std::string& dbname, const bool use_ttl = false) { - { - auto db = OpenDb(dbname, use_ttl); - - { - Counters counters(db, 0); - testCounters(counters, db.get(), true); - } - - { - MergeBasedCounters counters(db, 0); - testCounters(counters, db.get(), use_compression); - } - } - - ASSERT_OK(DestroyDB(dbname, Options())); - - { - size_t max_merge = 5; - auto db = OpenDb(dbname, use_ttl, max_merge); - MergeBasedCounters counters(db, 0); - testCounters(counters, db.get(), use_compression); - testSuccessiveMerge(counters, max_merge, max_merge * 2); - testSingleBatchSuccessiveMerge(db.get(), 5, 7); - ASSERT_OK(db->Close()); - ASSERT_OK(DestroyDB(dbname, Options())); - } - - { - size_t max_merge = 100; - // Min merge is hard-coded to 2. - uint32_t min_merge = 2; - for (uint32_t count = min_merge - 1; count <= min_merge + 1; count++) { - auto db = OpenDb(dbname, use_ttl, max_merge); - MergeBasedCounters counters(db, 0); - testPartialMerge(&counters, db.get(), max_merge, min_merge, count); - ASSERT_OK(db->Close()); - ASSERT_OK(DestroyDB(dbname, Options())); - } - { - auto db = OpenDb(dbname, use_ttl, max_merge); - MergeBasedCounters counters(db, 0); - testPartialMerge(&counters, db.get(), max_merge, min_merge, - min_merge * 10); - ASSERT_OK(db->Close()); - ASSERT_OK(DestroyDB(dbname, Options())); - } - } - - { - { - auto db = OpenDb(dbname); - MergeBasedCounters counters(db, 0); - counters.add("test-key", 1); - counters.add("test-key", 1); - counters.add("test-key", 1); - ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - } - - DB* reopen_db; - ASSERT_OK(DB::Open(Options(), dbname, &reopen_db)); - std::string value; - ASSERT_NOK(reopen_db->Get(ReadOptions(), "test-key", &value)); - delete reopen_db; - ASSERT_OK(DestroyDB(dbname, Options())); - } - - /* Temporary remove this test - { - std::cout << "Test merge-operator not set after reopen (recovery case)\n"; - { - auto db = OpenDb(dbname); - MergeBasedCounters counters(db, 0); - counters.add("test-key", 1); - counters.add("test-key", 1); - counters.add("test-key", 1); - } - - DB* reopen_db; - ASSERT_TRUE(DB::Open(Options(), dbname, &reopen_db).IsInvalidArgument()); - } - */ -} - -TEST_F(MergeTest, MergeDbTest) { - runTest(test::PerThreadDBPath("merge_testdb")); -} - -TEST_F(MergeTest, MergeDbTtlTest) { - runTest(test::PerThreadDBPath("merge_testdbttl"), - true); // Run test on TTL database -} - -TEST_F(MergeTest, MergeWithCompactionAndFlush) { - const std::string dbname = - test::PerThreadDBPath("merge_with_compaction_and_flush"); - { - auto db = OpenDb(dbname); - { - MergeBasedCounters counters(db, 0); - testCountersWithFlushAndCompaction(counters, db.get()); - } - } - ASSERT_OK(DestroyDB(dbname, Options())); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::use_compression = false; - if (argc > 1) { - ROCKSDB_NAMESPACE::use_compression = true; - } - - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/obsolete_files_test.cc b/db/obsolete_files_test.cc deleted file mode 100644 index 03f38c09f..000000000 --- a/db/obsolete_files_test.cc +++ /dev/null @@ -1,317 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - - -#include - -#include -#include -#include -#include - -#include "db/db_impl/db_impl.h" -#include "db/db_test_util.h" -#include "db/version_set.h" -#include "db/write_batch_internal.h" -#include "file/filename.h" -#include "port/stack_trace.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/transaction_log.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -class ObsoleteFilesTest : public DBTestBase { - public: - ObsoleteFilesTest() - : DBTestBase("obsolete_files_test", /*env_do_fsync=*/true), - wal_dir_(dbname_ + "/wal_files") {} - - void AddKeys(int numkeys, int startkey) { - WriteOptions options; - options.sync = false; - for (int i = startkey; i < (numkeys + startkey); i++) { - std::string temp = std::to_string(i); - Slice key(temp); - Slice value(temp); - ASSERT_OK(db_->Put(options, key, value)); - } - } - - void createLevel0Files(int numFiles, int numKeysPerFile) { - int startKey = 0; - for (int i = 0; i < numFiles; i++) { - AddKeys(numKeysPerFile, startKey); - startKey += numKeysPerFile; - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - ASSERT_OK( - dbfull()->TEST_WaitForCompact()); // wait for background flush (flush - // is also a kind of compaction). - } - } - - void CheckFileTypeCounts(const std::string& dir, int required_log, - int required_sst, int required_manifest) { - std::vector filenames; - ASSERT_OK(env_->GetChildren(dir, &filenames)); - - int log_cnt = 0; - int sst_cnt = 0; - int manifest_cnt = 0; - for (auto file : filenames) { - uint64_t number; - FileType type; - if (ParseFileName(file, &number, &type)) { - log_cnt += (type == kWalFile); - sst_cnt += (type == kTableFile); - manifest_cnt += (type == kDescriptorFile); - } - } - ASSERT_EQ(required_log, log_cnt); - ASSERT_EQ(required_sst, sst_cnt); - ASSERT_EQ(required_manifest, manifest_cnt); - } - - void ReopenDB() { - Options options = CurrentOptions(); - // Trigger compaction when the number of level 0 files reaches 2. - options.create_if_missing = true; - options.level0_file_num_compaction_trigger = 2; - options.disable_auto_compactions = false; - options.delete_obsolete_files_period_micros = 0; // always do full purge - options.enable_thread_tracking = true; - options.write_buffer_size = 1024 * 1024 * 1000; - options.target_file_size_base = 1024 * 1024 * 1000; - options.max_bytes_for_level_base = 1024 * 1024 * 1000; - options.WAL_ttl_seconds = 300; // Used to test log files - options.WAL_size_limit_MB = 1024; // Used to test log files - options.wal_dir = wal_dir_; - - // Note: the following prevents an otherwise harmless data race between the - // test setup code (AddBlobFile) in ObsoleteFilesTest.BlobFiles and the - // periodic stat dumping thread. - options.stats_dump_period_sec = 0; - - Destroy(options); - Reopen(options); - } - - const std::string wal_dir_; -}; - -TEST_F(ObsoleteFilesTest, RaceForObsoleteFileDeletion) { - ReopenDB(); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency({ - {"DBImpl::BackgroundCallCompaction:FoundObsoleteFiles", - "ObsoleteFilesTest::RaceForObsoleteFileDeletion:1"}, - {"DBImpl::BackgroundCallCompaction:PurgedObsoleteFiles", - "ObsoleteFilesTest::RaceForObsoleteFileDeletion:2"}, - }); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::DeleteObsoleteFileImpl:AfterDeletion", [&](void* arg) { - Status* p_status = reinterpret_cast(arg); - ASSERT_OK(*p_status); - }); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::CloseHelper:PendingPurgeFinished", [&](void* arg) { - std::unordered_set* files_grabbed_for_purge_ptr = - reinterpret_cast*>(arg); - ASSERT_TRUE(files_grabbed_for_purge_ptr->empty()); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - createLevel0Files(2, 50000); - CheckFileTypeCounts(wal_dir_, 1, 0, 0); - - port::Thread user_thread([this]() { - JobContext jobCxt(0); - TEST_SYNC_POINT("ObsoleteFilesTest::RaceForObsoleteFileDeletion:1"); - dbfull()->TEST_LockMutex(); - dbfull()->FindObsoleteFiles(&jobCxt, true /* force=true */, - false /* no_full_scan=false */); - dbfull()->TEST_UnlockMutex(); - TEST_SYNC_POINT("ObsoleteFilesTest::RaceForObsoleteFileDeletion:2"); - dbfull()->PurgeObsoleteFiles(jobCxt); - jobCxt.Clean(); - }); - - user_thread.join(); -} - -TEST_F(ObsoleteFilesTest, DeleteObsoleteOptionsFile) { - ReopenDB(); - - createLevel0Files(2, 50000); - CheckFileTypeCounts(wal_dir_, 1, 0, 0); - - ASSERT_OK(dbfull()->DisableFileDeletions()); - for (int i = 0; i != 4; ++i) { - if (i % 2) { - ASSERT_OK(dbfull()->SetOptions(dbfull()->DefaultColumnFamily(), - {{"paranoid_file_checks", "false"}})); - } else { - ASSERT_OK(dbfull()->SetOptions(dbfull()->DefaultColumnFamily(), - {{"paranoid_file_checks", "true"}})); - } - } - ASSERT_OK(dbfull()->EnableFileDeletions(true /* force */)); - - Close(); - - std::vector files; - int opts_file_count = 0; - ASSERT_OK(env_->GetChildren(dbname_, &files)); - for (const auto& file : files) { - uint64_t file_num; - Slice dummy_info_log_name_prefix; - FileType type; - WalFileType log_type; - if (ParseFileName(file, &file_num, dummy_info_log_name_prefix, &type, - &log_type) && - type == kOptionsFile) { - opts_file_count++; - } - } - ASSERT_EQ(2, opts_file_count); -} - -TEST_F(ObsoleteFilesTest, BlobFiles) { - ReopenDB(); - - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - assert(versions->GetColumnFamilySet()); - - ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); - assert(cfd); - - const ImmutableCFOptions* const ioptions = cfd->ioptions(); - assert(ioptions); - assert(!ioptions->cf_paths.empty()); - - const std::string& path = ioptions->cf_paths.front().path; - - // Add an obsolete blob file. - constexpr uint64_t first_blob_file_number = 234; - versions->AddObsoleteBlobFile(first_blob_file_number, path); - - // Add a live blob file. - Version* const version = cfd->current(); - assert(version); - - VersionStorageInfo* const storage_info = version->storage_info(); - assert(storage_info); - - constexpr uint64_t second_blob_file_number = 456; - constexpr uint64_t second_total_blob_count = 100; - constexpr uint64_t second_total_blob_bytes = 2000000; - constexpr char second_checksum_method[] = "CRC32B"; - constexpr char second_checksum_value[] = "\x6d\xbd\xf2\x3a"; - - auto shared_meta = SharedBlobFileMetaData::Create( - second_blob_file_number, second_total_blob_count, second_total_blob_bytes, - second_checksum_method, second_checksum_value); - - constexpr uint64_t second_garbage_blob_count = 0; - constexpr uint64_t second_garbage_blob_bytes = 0; - - auto meta = BlobFileMetaData::Create( - std::move(shared_meta), BlobFileMetaData::LinkedSsts(), - second_garbage_blob_count, second_garbage_blob_bytes); - - storage_info->AddBlobFile(std::move(meta)); - - // Check for obsolete files and make sure the first blob file is picked up - // and grabbed for purge. The second blob file should be on the live list. - constexpr int job_id = 0; - JobContext job_context{job_id}; - - dbfull()->TEST_LockMutex(); - constexpr bool force_full_scan = false; - dbfull()->FindObsoleteFiles(&job_context, force_full_scan); - dbfull()->TEST_UnlockMutex(); - - ASSERT_TRUE(job_context.HaveSomethingToDelete()); - ASSERT_EQ(job_context.blob_delete_files.size(), 1); - ASSERT_EQ(job_context.blob_delete_files[0].GetBlobFileNumber(), - first_blob_file_number); - - const auto& files_grabbed_for_purge = - dbfull()->TEST_GetFilesGrabbedForPurge(); - ASSERT_NE(files_grabbed_for_purge.find(first_blob_file_number), - files_grabbed_for_purge.end()); - - ASSERT_EQ(job_context.blob_live.size(), 1); - ASSERT_EQ(job_context.blob_live[0], second_blob_file_number); - - // Hack the job context a bit by adding a few files to the full scan - // list and adjusting the pending file number. We add the two files - // above as well as two additional ones, where one is old - // and should be cleaned up, and the other is still pending. - constexpr uint64_t old_blob_file_number = 123; - constexpr uint64_t pending_blob_file_number = 567; - - job_context.full_scan_candidate_files.emplace_back( - BlobFileName(old_blob_file_number), path); - job_context.full_scan_candidate_files.emplace_back( - BlobFileName(first_blob_file_number), path); - job_context.full_scan_candidate_files.emplace_back( - BlobFileName(second_blob_file_number), path); - job_context.full_scan_candidate_files.emplace_back( - BlobFileName(pending_blob_file_number), path); - - job_context.min_pending_output = pending_blob_file_number; - - // Purge obsolete files and make sure we purge the old file and the first file - // (and keep the second file and the pending file). - std::vector deleted_files; - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::DeleteObsoleteFileImpl::BeforeDeletion", [&](void* arg) { - const std::string* file = static_cast(arg); - assert(file); - - constexpr char blob_extension[] = ".blob"; - - if (file->find(blob_extension) != std::string::npos) { - deleted_files.emplace_back(*file); - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - - dbfull()->PurgeObsoleteFiles(job_context); - job_context.Clean(); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - - ASSERT_EQ(files_grabbed_for_purge.find(first_blob_file_number), - files_grabbed_for_purge.end()); - - std::sort(deleted_files.begin(), deleted_files.end()); - const std::vector expected_deleted_files{ - BlobFileName(path, old_blob_file_number), - BlobFileName(path, first_blob_file_number)}; - - ASSERT_EQ(deleted_files, expected_deleted_files); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/db/options_file_test.cc b/db/options_file_test.cc deleted file mode 100644 index c3adbeb64..000000000 --- a/db/options_file_test.cc +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include - -#include "db/db_impl/db_impl.h" -#include "db/db_test_util.h" -#include "rocksdb/options.h" -#include "rocksdb/table.h" -#include "test_util/testharness.h" - -namespace ROCKSDB_NAMESPACE { -class OptionsFileTest : public testing::Test { - public: - OptionsFileTest() : dbname_(test::PerThreadDBPath("options_file_test")) {} - - std::string dbname_; -}; - -namespace { -void UpdateOptionsFiles(DB* db, - std::unordered_set* filename_history, - int* options_files_count) { - std::vector filenames; - EXPECT_OK(db->GetEnv()->GetChildren(db->GetName(), &filenames)); - uint64_t number; - FileType type; - *options_files_count = 0; - for (auto filename : filenames) { - if (ParseFileName(filename, &number, &type) && type == kOptionsFile) { - filename_history->insert(filename); - (*options_files_count)++; - } - } -} - -// Verify whether the current Options Files are the latest ones. -void VerifyOptionsFileName( - DB* db, const std::unordered_set& past_filenames) { - std::vector filenames; - std::unordered_set current_filenames; - EXPECT_OK(db->GetEnv()->GetChildren(db->GetName(), &filenames)); - uint64_t number; - FileType type; - for (auto filename : filenames) { - if (ParseFileName(filename, &number, &type) && type == kOptionsFile) { - current_filenames.insert(filename); - } - } - for (auto past_filename : past_filenames) { - if (current_filenames.find(past_filename) != current_filenames.end()) { - continue; - } - for (auto filename : current_filenames) { - ASSERT_GT(filename, past_filename); - } - } -} -} // anonymous namespace - -TEST_F(OptionsFileTest, NumberOfOptionsFiles) { - const int kReopenCount = 20; - Options opt; - opt.create_if_missing = true; - ASSERT_OK(DestroyDB(dbname_, opt)); - std::unordered_set filename_history; - DB* db; - for (int i = 0; i < kReopenCount; ++i) { - ASSERT_OK(DB::Open(opt, dbname_, &db)); - int num_options_files = 0; - UpdateOptionsFiles(db, &filename_history, &num_options_files); - ASSERT_GT(num_options_files, 0); - ASSERT_LE(num_options_files, 2); - // Make sure we always keep the latest option files. - VerifyOptionsFileName(db, filename_history); - delete db; - } -} - -TEST_F(OptionsFileTest, OptionsFileName) { - const uint64_t kOptionsFileNum = 12345; - uint64_t number; - FileType type; - - auto options_file_name = OptionsFileName("", kOptionsFileNum); - ASSERT_TRUE(ParseFileName(options_file_name, &number, &type, nullptr)); - ASSERT_EQ(type, kOptionsFile); - ASSERT_EQ(number, kOptionsFileNum); - - const uint64_t kTempOptionsFileNum = 54352; - auto temp_options_file_name = TempOptionsFileName("", kTempOptionsFileNum); - ASSERT_TRUE(ParseFileName(temp_options_file_name, &number, &type, nullptr)); - ASSERT_NE(temp_options_file_name.find(kTempFileNameSuffix), - std::string::npos); - ASSERT_EQ(type, kTempFile); - ASSERT_EQ(number, kTempOptionsFileNum); -} -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { -#if !(defined NDEBUG) || !defined(OS_WIN) - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -#else - return 0; -#endif // !(defined NDEBUG) || !defined(OS_WIN) -} diff --git a/db/perf_context_test.cc b/db/perf_context_test.cc deleted file mode 100644 index 3e78dbe27..000000000 --- a/db/perf_context_test.cc +++ /dev/null @@ -1,1157 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -#include "rocksdb/perf_context.h" - -#include -#include -#include -#include - -#include "monitoring/histogram.h" -#include "monitoring/instrumented_mutex.h" -#include "monitoring/perf_context_imp.h" -#include "monitoring/thread_status_util.h" -#include "port/port.h" -#include "rocksdb/db.h" -#include "rocksdb/memtablerep.h" -#include "rocksdb/slice_transform.h" -#include "rocksdb/system_clock.h" -#include "test_util/testharness.h" -#include "util/stop_watch.h" -#include "util/string_util.h" -#include "utilities/merge_operators.h" - -bool FLAGS_random_key = false; -bool FLAGS_use_set_based_memetable = false; -int FLAGS_total_keys = 100; -int FLAGS_write_buffer_size = 1000000000; -int FLAGS_max_write_buffer_number = 8; -int FLAGS_min_write_buffer_number_to_merge = 7; -bool FLAGS_verbose = false; - -// Path to the database on file system -const std::string kDbName = - ROCKSDB_NAMESPACE::test::PerThreadDBPath("perf_context_test"); - -namespace ROCKSDB_NAMESPACE { - -std::shared_ptr OpenDb(bool read_only = false) { - DB* db; - Options options; - options.create_if_missing = true; - options.max_open_files = -1; - options.write_buffer_size = FLAGS_write_buffer_size; - options.max_write_buffer_number = FLAGS_max_write_buffer_number; - options.min_write_buffer_number_to_merge = - FLAGS_min_write_buffer_number_to_merge; - - if (FLAGS_use_set_based_memetable) { - options.prefix_extractor.reset( - ROCKSDB_NAMESPACE::NewFixedPrefixTransform(0)); - options.memtable_factory.reset(NewHashSkipListRepFactory()); - } - - Status s; - if (!read_only) { - s = DB::Open(options, kDbName, &db); - } else { - s = DB::OpenForReadOnly(options, kDbName, &db); - } - EXPECT_OK(s); - return std::shared_ptr(db); -} - -class PerfContextTest : public testing::Test {}; - -TEST_F(PerfContextTest, SeekIntoDeletion) { - ASSERT_OK(DestroyDB(kDbName, Options())); - auto db = OpenDb(); - WriteOptions write_options; - ReadOptions read_options; - - for (int i = 0; i < FLAGS_total_keys; ++i) { - std::string key = "k" + std::to_string(i); - std::string value = "v" + std::to_string(i); - - ASSERT_OK(db->Put(write_options, key, value)); - } - - for (int i = 0; i < FLAGS_total_keys - 1; ++i) { - std::string key = "k" + std::to_string(i); - ASSERT_OK(db->Delete(write_options, key)); - } - - HistogramImpl hist_get; - HistogramImpl hist_get_time; - for (int i = 0; i < FLAGS_total_keys - 1; ++i) { - std::string key = "k" + std::to_string(i); - std::string value; - - get_perf_context()->Reset(); - StopWatchNano timer(SystemClock::Default().get()); - timer.Start(); - auto status = db->Get(read_options, key, &value); - auto elapsed_nanos = timer.ElapsedNanos(); - ASSERT_TRUE(status.IsNotFound()); - hist_get.Add(get_perf_context()->user_key_comparison_count); - hist_get_time.Add(elapsed_nanos); - } - - if (FLAGS_verbose) { - std::cout << "Get user key comparison: \n" - << hist_get.ToString() << "Get time: \n" - << hist_get_time.ToString(); - } - - { - HistogramImpl hist_seek_to_first; - std::unique_ptr iter(db->NewIterator(read_options)); - - get_perf_context()->Reset(); - StopWatchNano timer(SystemClock::Default().get(), true); - iter->SeekToFirst(); - hist_seek_to_first.Add(get_perf_context()->user_key_comparison_count); - auto elapsed_nanos = timer.ElapsedNanos(); - - if (FLAGS_verbose) { - std::cout << "SeekToFirst user key comparison: \n" - << hist_seek_to_first.ToString() << "ikey skipped: " - << get_perf_context()->internal_key_skipped_count << "\n" - << "idelete skipped: " - << get_perf_context()->internal_delete_skipped_count << "\n" - << "elapsed: " << elapsed_nanos << "\n"; - } - } - - HistogramImpl hist_seek; - for (int i = 0; i < FLAGS_total_keys; ++i) { - std::unique_ptr iter(db->NewIterator(read_options)); - std::string key = "k" + std::to_string(i); - - get_perf_context()->Reset(); - StopWatchNano timer(SystemClock::Default().get(), true); - iter->Seek(key); - auto elapsed_nanos = timer.ElapsedNanos(); - hist_seek.Add(get_perf_context()->user_key_comparison_count); - if (FLAGS_verbose) { - std::cout << "seek cmp: " << get_perf_context()->user_key_comparison_count - << " ikey skipped " - << get_perf_context()->internal_key_skipped_count - << " idelete skipped " - << get_perf_context()->internal_delete_skipped_count - << " elapsed: " << elapsed_nanos << "ns\n"; - } - - get_perf_context()->Reset(); - ASSERT_TRUE(iter->Valid()); - StopWatchNano timer2(SystemClock::Default().get(), true); - iter->Next(); - auto elapsed_nanos2 = timer2.ElapsedNanos(); - if (FLAGS_verbose) { - std::cout << "next cmp: " << get_perf_context()->user_key_comparison_count - << "elapsed: " << elapsed_nanos2 << "ns\n"; - } - } - - if (FLAGS_verbose) { - std::cout << "Seek user key comparison: \n" << hist_seek.ToString(); - } -} - -TEST_F(PerfContextTest, StopWatchNanoOverhead) { - // profile the timer cost by itself! - const int kTotalIterations = 1000000; - std::vector timings(kTotalIterations); - - StopWatchNano timer(SystemClock::Default().get(), true); - for (auto& timing : timings) { - timing = timer.ElapsedNanos(true /* reset */); - } - - HistogramImpl histogram; - for (const auto timing : timings) { - histogram.Add(timing); - } - - if (FLAGS_verbose) { - std::cout << histogram.ToString(); - } -} - -TEST_F(PerfContextTest, StopWatchOverhead) { - // profile the timer cost by itself! - const int kTotalIterations = 1000000; - uint64_t elapsed = 0; - std::vector timings(kTotalIterations); - - StopWatch timer(SystemClock::Default().get(), nullptr, 0, &elapsed); - for (auto& timing : timings) { - timing = elapsed; - } - - HistogramImpl histogram; - uint64_t prev_timing = 0; - for (const auto timing : timings) { - histogram.Add(timing - prev_timing); - prev_timing = timing; - } - - if (FLAGS_verbose) { - std::cout << histogram.ToString(); - } -} - -void ProfileQueries(bool enabled_time = false) { - ASSERT_OK(DestroyDB(kDbName, Options())); // Start this test with a fresh DB - - auto db = OpenDb(); - - WriteOptions write_options; - ReadOptions read_options; - - HistogramImpl hist_put; - - HistogramImpl hist_get; - HistogramImpl hist_get_snapshot; - HistogramImpl hist_get_memtable; - HistogramImpl hist_get_files; - HistogramImpl hist_get_post_process; - HistogramImpl hist_num_memtable_checked; - - HistogramImpl hist_mget; - HistogramImpl hist_mget_snapshot; - HistogramImpl hist_mget_memtable; - HistogramImpl hist_mget_files; - HistogramImpl hist_mget_post_process; - HistogramImpl hist_mget_num_memtable_checked; - - HistogramImpl hist_write_pre_post; - HistogramImpl hist_write_wal_time; - HistogramImpl hist_write_memtable_time; - HistogramImpl hist_write_delay_time; - HistogramImpl hist_write_thread_wait_nanos; - HistogramImpl hist_write_scheduling_time; - - uint64_t total_db_mutex_nanos = 0; - - if (FLAGS_verbose) { - std::cout << "Inserting " << FLAGS_total_keys << " key/value pairs\n...\n"; - } - - std::vector keys; - const int kFlushFlag = -1; - for (int i = 0; i < FLAGS_total_keys; ++i) { - keys.push_back(i); - if (i == FLAGS_total_keys / 2) { - // Issuing a flush in the middle. - keys.push_back(kFlushFlag); - } - } - - if (FLAGS_random_key) { - RandomShuffle(std::begin(keys), std::end(keys)); - } -#ifndef NDEBUG - ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 1U); -#endif - int num_mutex_waited = 0; - for (const int i : keys) { - if (i == kFlushFlag) { - FlushOptions fo; - db->Flush(fo); - continue; - } - - std::string key = "k" + std::to_string(i); - std::string value = "v" + std::to_string(i); - - std::vector values; - - get_perf_context()->Reset(); - ASSERT_OK(db->Put(write_options, key, value)); - if (++num_mutex_waited > 3) { -#ifndef NDEBUG - ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0U); -#endif - } - hist_write_pre_post.Add( - get_perf_context()->write_pre_and_post_process_time); - hist_write_wal_time.Add(get_perf_context()->write_wal_time); - hist_write_memtable_time.Add(get_perf_context()->write_memtable_time); - hist_write_delay_time.Add(get_perf_context()->write_delay_time); - hist_write_thread_wait_nanos.Add( - get_perf_context()->write_thread_wait_nanos); - hist_write_scheduling_time.Add( - get_perf_context()->write_scheduling_flushes_compactions_time); - hist_put.Add(get_perf_context()->user_key_comparison_count); - total_db_mutex_nanos += get_perf_context()->db_mutex_lock_nanos; - } -#ifndef NDEBUG - ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0U); -#endif - - for (const int i : keys) { - if (i == kFlushFlag) { - continue; - } - std::string key = "k" + std::to_string(i); - std::string expected_value = "v" + std::to_string(i); - std::string value; - - std::vector multiget_keys = {Slice(key)}; - std::vector values; - - get_perf_context()->Reset(); - ASSERT_OK(db->Get(read_options, key, &value)); - ASSERT_EQ(expected_value, value); - hist_get_snapshot.Add(get_perf_context()->get_snapshot_time); - hist_get_memtable.Add(get_perf_context()->get_from_memtable_time); - hist_get_files.Add(get_perf_context()->get_from_output_files_time); - hist_num_memtable_checked.Add(get_perf_context()->get_from_memtable_count); - hist_get_post_process.Add(get_perf_context()->get_post_process_time); - hist_get.Add(get_perf_context()->user_key_comparison_count); - - get_perf_context()->Reset(); - auto statuses = db->MultiGet(read_options, multiget_keys, &values); - for (const auto& s : statuses) { - ASSERT_OK(s); - } - hist_mget_snapshot.Add(get_perf_context()->get_snapshot_time); - hist_mget_memtable.Add(get_perf_context()->get_from_memtable_time); - hist_mget_files.Add(get_perf_context()->get_from_output_files_time); - hist_mget_num_memtable_checked.Add( - get_perf_context()->get_from_memtable_count); - hist_mget_post_process.Add(get_perf_context()->get_post_process_time); - hist_mget.Add(get_perf_context()->user_key_comparison_count); - } - - if (FLAGS_verbose) { - std::cout << "Put user key comparison: \n" - << hist_put.ToString() << "Get user key comparison: \n" - << hist_get.ToString() << "MultiGet user key comparison: \n" - << hist_get.ToString(); - std::cout << "Put(): Pre and Post Process Time: \n" - << hist_write_pre_post.ToString() << " Writing WAL time: \n" - << hist_write_wal_time.ToString() << "\n" - << " Writing Mem Table time: \n" - << hist_write_memtable_time.ToString() << "\n" - << " Write Delay: \n" - << hist_write_delay_time.ToString() << "\n" - << " Waiting for Batch time: \n" - << hist_write_thread_wait_nanos.ToString() << "\n" - << " Scheduling Flushes and Compactions Time: \n" - << hist_write_scheduling_time.ToString() << "\n" - << " Total DB mutex nanos: \n" - << total_db_mutex_nanos << "\n"; - - std::cout << "Get(): Time to get snapshot: \n" - << hist_get_snapshot.ToString() - << " Time to get value from memtables: \n" - << hist_get_memtable.ToString() << "\n" - << " Time to get value from output files: \n" - << hist_get_files.ToString() << "\n" - << " Number of memtables checked: \n" - << hist_num_memtable_checked.ToString() << "\n" - << " Time to post process: \n" - << hist_get_post_process.ToString() << "\n"; - - std::cout << "MultiGet(): Time to get snapshot: \n" - << hist_mget_snapshot.ToString() - << " Time to get value from memtables: \n" - << hist_mget_memtable.ToString() << "\n" - << " Time to get value from output files: \n" - << hist_mget_files.ToString() << "\n" - << " Number of memtables checked: \n" - << hist_mget_num_memtable_checked.ToString() << "\n" - << " Time to post process: \n" - << hist_mget_post_process.ToString() << "\n"; - } - - if (enabled_time) { - ASSERT_GT(hist_get.Average(), 0); - ASSERT_GT(hist_get_snapshot.Average(), 0); - ASSERT_GT(hist_get_memtable.Average(), 0); - ASSERT_GT(hist_get_files.Average(), 0); - ASSERT_GT(hist_get_post_process.Average(), 0); - ASSERT_GT(hist_num_memtable_checked.Average(), 0); - - ASSERT_GT(hist_mget.Average(), 0); - ASSERT_GT(hist_mget_snapshot.Average(), 0); - ASSERT_GT(hist_mget_memtable.Average(), 0); - ASSERT_GT(hist_mget_files.Average(), 0); - ASSERT_GT(hist_mget_post_process.Average(), 0); - ASSERT_GT(hist_mget_num_memtable_checked.Average(), 0); - - EXPECT_GT(hist_write_pre_post.Average(), 0); - EXPECT_GT(hist_write_wal_time.Average(), 0); - EXPECT_GT(hist_write_memtable_time.Average(), 0); - EXPECT_EQ(hist_write_delay_time.Average(), 0); - EXPECT_EQ(hist_write_thread_wait_nanos.Average(), 0); - EXPECT_GT(hist_write_scheduling_time.Average(), 0); - -#ifndef NDEBUG - ASSERT_LT(total_db_mutex_nanos, 100U); -#endif - } - - db.reset(); - db = OpenDb(true); - - hist_get.Clear(); - hist_get_snapshot.Clear(); - hist_get_memtable.Clear(); - hist_get_files.Clear(); - hist_get_post_process.Clear(); - hist_num_memtable_checked.Clear(); - - hist_mget.Clear(); - hist_mget_snapshot.Clear(); - hist_mget_memtable.Clear(); - hist_mget_files.Clear(); - hist_mget_post_process.Clear(); - hist_mget_num_memtable_checked.Clear(); - - for (const int i : keys) { - if (i == kFlushFlag) { - continue; - } - std::string key = "k" + std::to_string(i); - std::string expected_value = "v" + std::to_string(i); - std::string value; - - std::vector multiget_keys = {Slice(key)}; - std::vector values; - - get_perf_context()->Reset(); - ASSERT_OK(db->Get(read_options, key, &value)); - ASSERT_EQ(expected_value, value); - hist_get_snapshot.Add(get_perf_context()->get_snapshot_time); - hist_get_memtable.Add(get_perf_context()->get_from_memtable_time); - hist_get_files.Add(get_perf_context()->get_from_output_files_time); - hist_num_memtable_checked.Add(get_perf_context()->get_from_memtable_count); - hist_get_post_process.Add(get_perf_context()->get_post_process_time); - hist_get.Add(get_perf_context()->user_key_comparison_count); - - get_perf_context()->Reset(); - auto statuses = db->MultiGet(read_options, multiget_keys, &values); - for (const auto& s : statuses) { - ASSERT_OK(s); - } - hist_mget_snapshot.Add(get_perf_context()->get_snapshot_time); - hist_mget_memtable.Add(get_perf_context()->get_from_memtable_time); - hist_mget_files.Add(get_perf_context()->get_from_output_files_time); - hist_mget_num_memtable_checked.Add( - get_perf_context()->get_from_memtable_count); - hist_mget_post_process.Add(get_perf_context()->get_post_process_time); - hist_mget.Add(get_perf_context()->user_key_comparison_count); - } - - if (FLAGS_verbose) { - std::cout << "ReadOnly Get user key comparison: \n" - << hist_get.ToString() - << "ReadOnly MultiGet user key comparison: \n" - << hist_mget.ToString(); - - std::cout << "ReadOnly Get(): Time to get snapshot: \n" - << hist_get_snapshot.ToString() - << " Time to get value from memtables: \n" - << hist_get_memtable.ToString() << "\n" - << " Time to get value from output files: \n" - << hist_get_files.ToString() << "\n" - << " Number of memtables checked: \n" - << hist_num_memtable_checked.ToString() << "\n" - << " Time to post process: \n" - << hist_get_post_process.ToString() << "\n"; - - std::cout << "ReadOnly MultiGet(): Time to get snapshot: \n" - << hist_mget_snapshot.ToString() - << " Time to get value from memtables: \n" - << hist_mget_memtable.ToString() << "\n" - << " Time to get value from output files: \n" - << hist_mget_files.ToString() << "\n" - << " Number of memtables checked: \n" - << hist_mget_num_memtable_checked.ToString() << "\n" - << " Time to post process: \n" - << hist_mget_post_process.ToString() << "\n"; - } - - if (enabled_time) { - ASSERT_GT(hist_get.Average(), 0); - ASSERT_GT(hist_get_memtable.Average(), 0); - ASSERT_GT(hist_get_files.Average(), 0); - ASSERT_GT(hist_num_memtable_checked.Average(), 0); - // In read-only mode Get(), no super version operation is needed - ASSERT_EQ(hist_get_post_process.Average(), 0); - ASSERT_GT(hist_get_snapshot.Average(), 0); - - ASSERT_GT(hist_mget.Average(), 0); - ASSERT_GT(hist_mget_snapshot.Average(), 0); - ASSERT_GT(hist_mget_memtable.Average(), 0); - ASSERT_GT(hist_mget_files.Average(), 0); - ASSERT_GT(hist_mget_post_process.Average(), 0); - ASSERT_GT(hist_mget_num_memtable_checked.Average(), 0); - } -} - -TEST_F(PerfContextTest, KeyComparisonCount) { - SetPerfLevel(kEnableCount); - ProfileQueries(); - - SetPerfLevel(kDisable); - ProfileQueries(); - - SetPerfLevel(kEnableTime); - ProfileQueries(true); -} - -// make perf_context_test -// export ROCKSDB_TESTS=PerfContextTest.SeekKeyComparison -// For one memtable: -// ./perf_context_test --write_buffer_size=500000 --total_keys=10000 -// For two memtables: -// ./perf_context_test --write_buffer_size=250000 --total_keys=10000 -// Specify --random_key=1 to shuffle the key before insertion -// Results show that, for sequential insertion, worst-case Seek Key comparison -// is close to the total number of keys (linear), when there is only one -// memtable. When there are two memtables, even the avg Seek Key comparison -// starts to become linear to the input size. - -TEST_F(PerfContextTest, SeekKeyComparison) { - ASSERT_OK(DestroyDB(kDbName, Options())); - auto db = OpenDb(); - WriteOptions write_options; - ReadOptions read_options; - - if (FLAGS_verbose) { - std::cout << "Inserting " << FLAGS_total_keys << " key/value pairs\n...\n"; - } - - std::vector keys; - for (int i = 0; i < FLAGS_total_keys; ++i) { - keys.push_back(i); - } - - if (FLAGS_random_key) { - RandomShuffle(std::begin(keys), std::end(keys)); - } - - HistogramImpl hist_put_time; - HistogramImpl hist_wal_time; - HistogramImpl hist_time_diff; - - SetPerfLevel(kEnableTime); - StopWatchNano timer(SystemClock::Default().get()); - for (const int i : keys) { - std::string key = "k" + std::to_string(i); - std::string value = "v" + std::to_string(i); - - get_perf_context()->Reset(); - timer.Start(); - ASSERT_OK(db->Put(write_options, key, value)); - auto put_time = timer.ElapsedNanos(); - hist_put_time.Add(put_time); - hist_wal_time.Add(get_perf_context()->write_wal_time); - hist_time_diff.Add(put_time - get_perf_context()->write_wal_time); - } - - if (FLAGS_verbose) { - std::cout << "Put time:\n" - << hist_put_time.ToString() << "WAL time:\n" - << hist_wal_time.ToString() << "time diff:\n" - << hist_time_diff.ToString(); - } - - HistogramImpl hist_seek; - HistogramImpl hist_next; - - for (int i = 0; i < FLAGS_total_keys; ++i) { - std::string key = "k" + std::to_string(i); - std::string value = "v" + std::to_string(i); - - std::unique_ptr iter(db->NewIterator(read_options)); - get_perf_context()->Reset(); - iter->Seek(key); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->value().ToString(), value); - hist_seek.Add(get_perf_context()->user_key_comparison_count); - } - - std::unique_ptr iter(db->NewIterator(read_options)); - for (iter->SeekToFirst(); iter->Valid();) { - get_perf_context()->Reset(); - iter->Next(); - hist_next.Add(get_perf_context()->user_key_comparison_count); - } - ASSERT_OK(iter->status()); - if (FLAGS_verbose) { - std::cout << "Seek:\n" - << hist_seek.ToString() << "Next:\n" - << hist_next.ToString(); - } -} - -TEST_F(PerfContextTest, DBMutexLockCounter) { - int stats_code[] = {0, static_cast(DB_MUTEX_WAIT_MICROS)}; - for (PerfLevel perf_level_test : - {PerfLevel::kEnableTimeExceptForMutex, PerfLevel::kEnableTime}) { - for (int c = 0; c < 2; ++c) { - InstrumentedMutex mutex(nullptr, SystemClock::Default().get(), - stats_code[c]); - mutex.Lock(); - ROCKSDB_NAMESPACE::port::Thread child_thread([&] { - SetPerfLevel(perf_level_test); - get_perf_context()->Reset(); - ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0); - mutex.Lock(); - mutex.Unlock(); - if (perf_level_test == PerfLevel::kEnableTimeExceptForMutex || - stats_code[c] != DB_MUTEX_WAIT_MICROS) { - ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0); - } else { - // increment the counter only when it's a DB Mutex - ASSERT_GT(get_perf_context()->db_mutex_lock_nanos, 0); - } - }); - SystemClock::Default()->SleepForMicroseconds(100); - mutex.Unlock(); - child_thread.join(); - } - } -} - -TEST_F(PerfContextTest, FalseDBMutexWait) { - SetPerfLevel(kEnableTime); - int stats_code[] = {0, static_cast(DB_MUTEX_WAIT_MICROS)}; - for (int c = 0; c < 2; ++c) { - InstrumentedMutex mutex(nullptr, SystemClock::Default().get(), - stats_code[c]); - InstrumentedCondVar lock(&mutex); - get_perf_context()->Reset(); - mutex.Lock(); - lock.TimedWait(100); - mutex.Unlock(); - if (stats_code[c] == static_cast(DB_MUTEX_WAIT_MICROS)) { - // increment the counter only when it's a DB Mutex - ASSERT_GT(get_perf_context()->db_condition_wait_nanos, 0); - } else { - ASSERT_EQ(get_perf_context()->db_condition_wait_nanos, 0); - } - } -} - -TEST_F(PerfContextTest, ToString) { - get_perf_context()->Reset(); - get_perf_context()->block_read_count = 12345; - - std::string zero_included = get_perf_context()->ToString(); - ASSERT_NE(std::string::npos, zero_included.find("= 0")); - ASSERT_NE(std::string::npos, zero_included.find("= 12345")); - - std::string zero_excluded = get_perf_context()->ToString(true); - ASSERT_EQ(std::string::npos, zero_excluded.find("= 0")); - ASSERT_NE(std::string::npos, zero_excluded.find("= 12345")); -} - -TEST_F(PerfContextTest, MergeOperatorTime) { - ASSERT_OK(DestroyDB(kDbName, Options())); - DB* db; - Options options; - options.create_if_missing = true; - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - Status s = DB::Open(options, kDbName, &db); - EXPECT_OK(s); - - std::string val; - ASSERT_OK(db->Merge(WriteOptions(), "k1", "val1")); - ASSERT_OK(db->Merge(WriteOptions(), "k1", "val2")); - ASSERT_OK(db->Merge(WriteOptions(), "k1", "val3")); - ASSERT_OK(db->Merge(WriteOptions(), "k1", "val4")); - - SetPerfLevel(kEnableTime); - get_perf_context()->Reset(); - ASSERT_OK(db->Get(ReadOptions(), "k1", &val)); -#ifdef OS_SOLARIS - for (int i = 0; i < 100; i++) { - ASSERT_OK(db->Get(ReadOptions(), "k1", &val)); - } -#endif - EXPECT_GT(get_perf_context()->merge_operator_time_nanos, 0); - - ASSERT_OK(db->Flush(FlushOptions())); - - get_perf_context()->Reset(); - ASSERT_OK(db->Get(ReadOptions(), "k1", &val)); -#ifdef OS_SOLARIS - for (int i = 0; i < 100; i++) { - ASSERT_OK(db->Get(ReadOptions(), "k1", &val)); - } -#endif - EXPECT_GT(get_perf_context()->merge_operator_time_nanos, 0); - - ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - get_perf_context()->Reset(); - ASSERT_OK(db->Get(ReadOptions(), "k1", &val)); -#ifdef OS_SOLARIS - for (int i = 0; i < 100; i++) { - ASSERT_OK(db->Get(ReadOptions(), "k1", &val)); - } -#endif - EXPECT_GT(get_perf_context()->merge_operator_time_nanos, 0); - - delete db; -} - -TEST_F(PerfContextTest, CopyAndMove) { - // Assignment operator - { - get_perf_context()->Reset(); - get_perf_context()->EnablePerLevelPerfContext(); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5); - ASSERT_EQ( - 1, - (*(get_perf_context()->level_to_perf_context))[5].bloom_filter_useful); - PerfContext perf_context_assign; - perf_context_assign = *get_perf_context(); - ASSERT_EQ( - 1, - (*(perf_context_assign.level_to_perf_context))[5].bloom_filter_useful); - get_perf_context()->ClearPerLevelPerfContext(); - get_perf_context()->Reset(); - ASSERT_EQ( - 1, - (*(perf_context_assign.level_to_perf_context))[5].bloom_filter_useful); - perf_context_assign.ClearPerLevelPerfContext(); - perf_context_assign.Reset(); - } - // Copy constructor - { - get_perf_context()->Reset(); - get_perf_context()->EnablePerLevelPerfContext(); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5); - ASSERT_EQ( - 1, - (*(get_perf_context()->level_to_perf_context))[5].bloom_filter_useful); - PerfContext perf_context_copy(*get_perf_context()); - ASSERT_EQ( - 1, (*(perf_context_copy.level_to_perf_context))[5].bloom_filter_useful); - get_perf_context()->ClearPerLevelPerfContext(); - get_perf_context()->Reset(); - ASSERT_EQ( - 1, (*(perf_context_copy.level_to_perf_context))[5].bloom_filter_useful); - perf_context_copy.ClearPerLevelPerfContext(); - perf_context_copy.Reset(); - } - // Move constructor - { - get_perf_context()->Reset(); - get_perf_context()->EnablePerLevelPerfContext(); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5); - ASSERT_EQ( - 1, - (*(get_perf_context()->level_to_perf_context))[5].bloom_filter_useful); - PerfContext perf_context_move = std::move(*get_perf_context()); - ASSERT_EQ( - 1, (*(perf_context_move.level_to_perf_context))[5].bloom_filter_useful); - get_perf_context()->ClearPerLevelPerfContext(); - get_perf_context()->Reset(); - ASSERT_EQ( - 1, (*(perf_context_move.level_to_perf_context))[5].bloom_filter_useful); - perf_context_move.ClearPerLevelPerfContext(); - perf_context_move.Reset(); - } -} - -TEST_F(PerfContextTest, PerfContextDisableEnable) { - get_perf_context()->Reset(); - get_perf_context()->EnablePerLevelPerfContext(); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, 0); - get_perf_context()->DisablePerLevelPerfContext(); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5); - get_perf_context()->EnablePerLevelPerfContext(); - PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 1, 0); - get_perf_context()->DisablePerLevelPerfContext(); - PerfContext perf_context_copy(*get_perf_context()); - ASSERT_EQ(1, (*(perf_context_copy.level_to_perf_context))[0] - .bloom_filter_full_positive); - // this was set when per level perf context is disabled, should not be copied - ASSERT_NE( - 1, (*(perf_context_copy.level_to_perf_context))[5].bloom_filter_useful); - ASSERT_EQ( - 1, (*(perf_context_copy.level_to_perf_context))[0].block_cache_hit_count); - perf_context_copy.ClearPerLevelPerfContext(); - perf_context_copy.Reset(); - get_perf_context()->ClearPerLevelPerfContext(); - get_perf_context()->Reset(); -} - -TEST_F(PerfContextTest, PerfContextByLevelGetSet) { - get_perf_context()->Reset(); - get_perf_context()->EnablePerLevelPerfContext(); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, 0); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 7); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 7); - PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, 2); - PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 1, 0); - PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 5, 2); - PERF_COUNTER_BY_LEVEL_ADD(block_cache_miss_count, 2, 3); - PERF_COUNTER_BY_LEVEL_ADD(block_cache_miss_count, 4, 1); - ASSERT_EQ( - 0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); - ASSERT_EQ( - 1, (*(get_perf_context()->level_to_perf_context))[5].bloom_filter_useful); - ASSERT_EQ( - 2, (*(get_perf_context()->level_to_perf_context))[7].bloom_filter_useful); - ASSERT_EQ(1, (*(get_perf_context()->level_to_perf_context))[0] - .bloom_filter_full_positive); - ASSERT_EQ(1, (*(get_perf_context()->level_to_perf_context))[2] - .bloom_filter_full_true_positive); - ASSERT_EQ( - 1, - (*(get_perf_context()->level_to_perf_context))[0].block_cache_hit_count); - ASSERT_EQ( - 5, - (*(get_perf_context()->level_to_perf_context))[2].block_cache_hit_count); - ASSERT_EQ( - 2, - (*(get_perf_context()->level_to_perf_context))[3].block_cache_miss_count); - ASSERT_EQ( - 4, - (*(get_perf_context()->level_to_perf_context))[1].block_cache_miss_count); - std::string zero_excluded = get_perf_context()->ToString(true); - ASSERT_NE(std::string::npos, - zero_excluded.find("bloom_filter_useful = 1@level5, 2@level7")); - ASSERT_NE(std::string::npos, - zero_excluded.find("bloom_filter_full_positive = 1@level0")); - ASSERT_NE(std::string::npos, - zero_excluded.find("bloom_filter_full_true_positive = 1@level2")); - ASSERT_NE(std::string::npos, - zero_excluded.find("block_cache_hit_count = 1@level0, 5@level2")); - ASSERT_NE(std::string::npos, - zero_excluded.find("block_cache_miss_count = 4@level1, 2@level3")); -} - -TEST_F(PerfContextTest, CPUTimer) { - if (SystemClock::Default()->CPUNanos() == 0) { - ROCKSDB_GTEST_SKIP("Target without CPUNanos support"); - return; - } - - ASSERT_OK(DestroyDB(kDbName, Options())); - auto db = OpenDb(); - WriteOptions write_options; - ReadOptions read_options; - SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); - - std::string max_str = "0"; - for (int i = 0; i < FLAGS_total_keys; ++i) { - std::string i_str = std::to_string(i); - std::string key = "k" + i_str; - std::string value = "v" + i_str; - max_str = max_str > i_str ? max_str : i_str; - - ASSERT_OK(db->Put(write_options, key, value)); - } - std::string last_key = "k" + max_str; - std::string last_value = "v" + max_str; - - { - // Get - get_perf_context()->Reset(); - std::string value; - ASSERT_OK(db->Get(read_options, "k0", &value)); - ASSERT_EQ(value, "v0"); - - if (FLAGS_verbose) { - std::cout << "Get CPU time nanos: " << get_perf_context()->get_cpu_nanos - << "ns\n"; - } - - // Iter - std::unique_ptr iter(db->NewIterator(read_options)); - - // Seek - get_perf_context()->Reset(); - iter->Seek(last_key); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(last_value, iter->value().ToString()); - - if (FLAGS_verbose) { - std::cout << "Iter Seek CPU time nanos: " - << get_perf_context()->iter_seek_cpu_nanos << "ns\n"; - } - - // SeekForPrev - get_perf_context()->Reset(); - iter->SeekForPrev(last_key); - ASSERT_TRUE(iter->Valid()); - - if (FLAGS_verbose) { - std::cout << "Iter SeekForPrev CPU time nanos: " - << get_perf_context()->iter_seek_cpu_nanos << "ns\n"; - } - - // SeekToLast - get_perf_context()->Reset(); - iter->SeekToLast(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(last_value, iter->value().ToString()); - - if (FLAGS_verbose) { - std::cout << "Iter SeekToLast CPU time nanos: " - << get_perf_context()->iter_seek_cpu_nanos << "ns\n"; - } - - // SeekToFirst - get_perf_context()->Reset(); - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("v0", iter->value().ToString()); - - if (FLAGS_verbose) { - std::cout << "Iter SeekToFirst CPU time nanos: " - << get_perf_context()->iter_seek_cpu_nanos << "ns\n"; - } - - // Next - get_perf_context()->Reset(); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("v1", iter->value().ToString()); - - if (FLAGS_verbose) { - std::cout << "Iter Next CPU time nanos: " - << get_perf_context()->iter_next_cpu_nanos << "ns\n"; - } - - // Prev - get_perf_context()->Reset(); - iter->Prev(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("v0", iter->value().ToString()); - - if (FLAGS_verbose) { - std::cout << "Iter Prev CPU time nanos: " - << get_perf_context()->iter_prev_cpu_nanos << "ns\n"; - } - - // monotonically increasing - get_perf_context()->Reset(); - auto count = get_perf_context()->iter_seek_cpu_nanos; - for (int i = 0; i < FLAGS_total_keys; ++i) { - iter->Seek("k" + std::to_string(i)); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("v" + std::to_string(i), iter->value().ToString()); - auto next_count = get_perf_context()->iter_seek_cpu_nanos; - ASSERT_GT(next_count, count); - count = next_count; - } - - // iterator creation/destruction; multiple iterators - { - std::unique_ptr iter2(db->NewIterator(read_options)); - ASSERT_EQ(count, get_perf_context()->iter_seek_cpu_nanos); - iter2->Seek(last_key); - ASSERT_TRUE(iter2->Valid()); - ASSERT_EQ(last_value, iter2->value().ToString()); - ASSERT_GT(get_perf_context()->iter_seek_cpu_nanos, count); - count = get_perf_context()->iter_seek_cpu_nanos; - } - ASSERT_EQ(count, get_perf_context()->iter_seek_cpu_nanos); - } -} - -TEST_F(PerfContextTest, MergeOperandCount) { - ASSERT_OK(DestroyDB(kDbName, Options())); - - DB* db = nullptr; - Options options; - options.create_if_missing = true; - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - - ASSERT_OK(DB::Open(options, kDbName, &db)); - std::unique_ptr db_guard(db); - - constexpr size_t num_keys = 3; - const std::string key_prefix("key"); - const std::string value_prefix("value"); - - std::vector keys; - keys.reserve(num_keys); - - for (size_t i = 0; i < num_keys; ++i) { - keys.emplace_back(key_prefix + std::to_string(i)); - } - - // Write three keys with one Put each followed by 1, 2, and 3 - // Merge operations respectively. - constexpr size_t total_merges = num_keys * (num_keys + 1) / 2; - - std::vector snapshots; - snapshots.reserve(total_merges); - - for (size_t i = 0; i < num_keys; ++i) { - const std::string suffix = std::to_string(i); - const std::string value = value_prefix + suffix; - - ASSERT_OK(db->Put(WriteOptions(), keys[i], value)); - - for (size_t j = 0; j <= i; ++j) { - // Take a snapshot before each Merge so they are preserved and not - // collapsed during flush. - snapshots.emplace_back(db); - - ASSERT_OK(db->Merge(WriteOptions(), keys[i], value + std::to_string(j))); - } - } - - auto verify = [&]() { - get_perf_context()->Reset(); - - for (size_t i = 0; i < num_keys; ++i) { - // Get - { - PinnableSlice result; - ASSERT_OK(db->Get(ReadOptions(), db->DefaultColumnFamily(), keys[i], - &result)); - ASSERT_EQ(get_perf_context()->internal_merge_point_lookup_count, i + 1); - - get_perf_context()->Reset(); - } - - // GetEntity - { - PinnableWideColumns result; - ASSERT_OK(db->GetEntity(ReadOptions(), db->DefaultColumnFamily(), - keys[i], &result)); - ASSERT_EQ(get_perf_context()->internal_merge_point_lookup_count, i + 1); - - get_perf_context()->Reset(); - } - } - - { - std::vector key_slices; - key_slices.reserve(num_keys); - - for (size_t i = 0; i < num_keys; ++i) { - key_slices.emplace_back(keys[i]); - } - - // MultiGet - { - std::vector results(num_keys); - std::vector statuses(num_keys); - - db->MultiGet(ReadOptions(), db->DefaultColumnFamily(), num_keys, - &key_slices[0], &results[0], &statuses[0]); - - for (size_t i = 0; i < num_keys; ++i) { - ASSERT_OK(statuses[i]); - } - - ASSERT_EQ(get_perf_context()->internal_merge_point_lookup_count, - total_merges); - - get_perf_context()->Reset(); - } - - // MultiGetEntity - { - std::vector results(num_keys); - std::vector statuses(num_keys); - - db->MultiGetEntity(ReadOptions(), db->DefaultColumnFamily(), num_keys, - &key_slices[0], &results[0], &statuses[0]); - - for (size_t i = 0; i < num_keys; ++i) { - ASSERT_OK(statuses[i]); - } - - ASSERT_EQ(get_perf_context()->internal_merge_point_lookup_count, - total_merges); - - get_perf_context()->Reset(); - } - } - - std::unique_ptr it(db->NewIterator(ReadOptions())); - - // Forward iteration - { - size_t i = 0; - - for (it->SeekToFirst(); it->Valid(); it->Next(), ++i) { - ASSERT_EQ(it->key(), keys[i]); - ASSERT_EQ(get_perf_context()->internal_merge_count, i + 1); - - get_perf_context()->Reset(); - } - } - - // Backward iteration - { - size_t i = num_keys - 1; - - for (it->SeekToLast(); it->Valid(); it->Prev(), --i) { - ASSERT_EQ(it->key(), keys[i]); - ASSERT_EQ(get_perf_context()->internal_merge_count, i + 1); - - get_perf_context()->Reset(); - } - } - }; - - // Verify counters when reading from memtable - verify(); - - // Verify counters when reading from table files - db->Flush(FlushOptions()); - - verify(); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - - for (int i = 1; i < argc; i++) { - int n; - char junk; - - if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) { - FLAGS_write_buffer_size = n; - } - - if (sscanf(argv[i], "--total_keys=%d%c", &n, &junk) == 1) { - FLAGS_total_keys = n; - } - - if (sscanf(argv[i], "--random_key=%d%c", &n, &junk) == 1 && - (n == 0 || n == 1)) { - FLAGS_random_key = n; - } - - if (sscanf(argv[i], "--use_set_based_memetable=%d%c", &n, &junk) == 1 && - (n == 0 || n == 1)) { - FLAGS_use_set_based_memetable = n; - } - - if (sscanf(argv[i], "--verbose=%d%c", &n, &junk) == 1 && - (n == 0 || n == 1)) { - FLAGS_verbose = n; - } - } - - if (FLAGS_verbose) { - std::cout << kDbName << "\n"; - } - - return RUN_ALL_TESTS(); -} diff --git a/db/periodic_task_scheduler_test.cc b/db/periodic_task_scheduler_test.cc deleted file mode 100644 index c1205bcf6..000000000 --- a/db/periodic_task_scheduler_test.cc +++ /dev/null @@ -1,229 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db/periodic_task_scheduler.h" - -#include "db/db_test_util.h" -#include "env/composite_env_wrapper.h" -#include "test_util/mock_time_env.h" - -namespace ROCKSDB_NAMESPACE { - -class PeriodicTaskSchedulerTest : public DBTestBase { - public: - PeriodicTaskSchedulerTest() - : DBTestBase("periodic_task_scheduler_test", /*env_do_fsync=*/true) { - mock_clock_ = std::make_shared(env_->GetSystemClock()); - mock_env_.reset(new CompositeEnvWrapper(env_, mock_clock_)); - } - - protected: - std::unique_ptr mock_env_; - std::shared_ptr mock_clock_; - - void SetUp() override { - mock_clock_->InstallTimedWaitFixCallback(); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::StartPeriodicTaskScheduler:Init", [&](void* arg) { - auto periodic_task_scheduler_ptr = - reinterpret_cast(arg); - periodic_task_scheduler_ptr->TEST_OverrideTimer(mock_clock_.get()); - }); - } -}; - -TEST_F(PeriodicTaskSchedulerTest, Basic) { - constexpr unsigned int kPeriodSec = 10; - Close(); - Options options; - options.stats_dump_period_sec = kPeriodSec; - options.stats_persist_period_sec = kPeriodSec; - options.create_if_missing = true; - options.env = mock_env_.get(); - - int dump_st_counter = 0; - SyncPoint::GetInstance()->SetCallBack("DBImpl::DumpStats:StartRunning", - [&](void*) { dump_st_counter++; }); - - int pst_st_counter = 0; - SyncPoint::GetInstance()->SetCallBack("DBImpl::PersistStats:StartRunning", - [&](void*) { pst_st_counter++; }); - - int flush_info_log_counter = 0; - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::FlushInfoLog:StartRunning", - [&](void*) { flush_info_log_counter++; }); - SyncPoint::GetInstance()->EnableProcessing(); - - Reopen(options); - - ASSERT_EQ(kPeriodSec, dbfull()->GetDBOptions().stats_dump_period_sec); - ASSERT_EQ(kPeriodSec, dbfull()->GetDBOptions().stats_persist_period_sec); - - ASSERT_GT(kPeriodSec, 1u); - dbfull()->TEST_WaitForPeriodicTaskRun([&] { - mock_clock_->MockSleepForSeconds(static_cast(kPeriodSec) - 1); - }); - - const PeriodicTaskScheduler& scheduler = - dbfull()->TEST_GetPeriodicTaskScheduler(); - ASSERT_EQ(3, scheduler.TEST_GetValidTaskNum()); - - ASSERT_EQ(1, dump_st_counter); - ASSERT_EQ(1, pst_st_counter); - ASSERT_EQ(1, flush_info_log_counter); - - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(kPeriodSec)); }); - - ASSERT_EQ(2, dump_st_counter); - ASSERT_EQ(2, pst_st_counter); - ASSERT_EQ(2, flush_info_log_counter); - - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(kPeriodSec)); }); - - ASSERT_EQ(3, dump_st_counter); - ASSERT_EQ(3, pst_st_counter); - ASSERT_EQ(3, flush_info_log_counter); - - // Disable scheduler with SetOption - ASSERT_OK(dbfull()->SetDBOptions( - {{"stats_dump_period_sec", "0"}, {"stats_persist_period_sec", "0"}})); - ASSERT_EQ(0u, dbfull()->GetDBOptions().stats_dump_period_sec); - ASSERT_EQ(0u, dbfull()->GetDBOptions().stats_persist_period_sec); - - // Info log flush should still run. - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(kPeriodSec)); }); - ASSERT_EQ(3, dump_st_counter); - ASSERT_EQ(3, pst_st_counter); - ASSERT_EQ(4, flush_info_log_counter); - - ASSERT_EQ(1u, scheduler.TEST_GetValidTaskNum()); - - // Re-enable one task - ASSERT_OK(dbfull()->SetDBOptions({{"stats_dump_period_sec", "5"}})); - ASSERT_EQ(5u, dbfull()->GetDBOptions().stats_dump_period_sec); - ASSERT_EQ(0u, dbfull()->GetDBOptions().stats_persist_period_sec); - - ASSERT_EQ(2, scheduler.TEST_GetValidTaskNum()); - - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(kPeriodSec)); }); - ASSERT_EQ(4, dump_st_counter); - ASSERT_EQ(3, pst_st_counter); - ASSERT_EQ(5, flush_info_log_counter); - - Close(); -} - -TEST_F(PeriodicTaskSchedulerTest, MultiInstances) { - constexpr int kPeriodSec = 5; - const int kInstanceNum = 10; - - Close(); - Options options; - options.stats_dump_period_sec = kPeriodSec; - options.stats_persist_period_sec = kPeriodSec; - options.create_if_missing = true; - options.env = mock_env_.get(); - - int dump_st_counter = 0; - SyncPoint::GetInstance()->SetCallBack("DBImpl::DumpStats:2", - [&](void*) { dump_st_counter++; }); - - int pst_st_counter = 0; - SyncPoint::GetInstance()->SetCallBack("DBImpl::PersistStats:StartRunning", - [&](void*) { pst_st_counter++; }); - SyncPoint::GetInstance()->EnableProcessing(); - - auto dbs = std::vector(kInstanceNum); - for (int i = 0; i < kInstanceNum; i++) { - ASSERT_OK( - DB::Open(options, test::PerThreadDBPath(std::to_string(i)), &(dbs[i]))); - } - - auto dbi = static_cast_with_check(dbs[kInstanceNum - 1]); - - const PeriodicTaskScheduler& scheduler = dbi->TEST_GetPeriodicTaskScheduler(); - ASSERT_EQ(kInstanceNum * 3, scheduler.TEST_GetValidTaskNum()); - - int expected_run = kInstanceNum; - dbi->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); }); - ASSERT_EQ(expected_run, dump_st_counter); - ASSERT_EQ(expected_run, pst_st_counter); - - expected_run += kInstanceNum; - dbi->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - ASSERT_EQ(expected_run, dump_st_counter); - ASSERT_EQ(expected_run, pst_st_counter); - - expected_run += kInstanceNum; - dbi->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - ASSERT_EQ(expected_run, dump_st_counter); - ASSERT_EQ(expected_run, pst_st_counter); - - int half = kInstanceNum / 2; - for (int i = 0; i < half; i++) { - delete dbs[i]; - } - - expected_run += (kInstanceNum - half) * 2; - - dbi->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - dbi->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - ASSERT_EQ(expected_run, dump_st_counter); - ASSERT_EQ(expected_run, pst_st_counter); - - for (int i = half; i < kInstanceNum; i++) { - ASSERT_OK(dbs[i]->Close()); - delete dbs[i]; - } -} - -TEST_F(PeriodicTaskSchedulerTest, MultiEnv) { - constexpr int kDumpPeriodSec = 5; - constexpr int kPersistPeriodSec = 10; - Close(); - Options options1; - options1.stats_dump_period_sec = kDumpPeriodSec; - options1.stats_persist_period_sec = kPersistPeriodSec; - options1.create_if_missing = true; - options1.env = mock_env_.get(); - - Reopen(options1); - - std::unique_ptr mock_env2( - new CompositeEnvWrapper(Env::Default(), mock_clock_)); - Options options2; - options2.stats_dump_period_sec = kDumpPeriodSec; - options2.stats_persist_period_sec = kPersistPeriodSec; - options2.create_if_missing = true; - options1.env = mock_env2.get(); - - std::string dbname = test::PerThreadDBPath("multi_env_test"); - DB* db; - ASSERT_OK(DB::Open(options2, dbname, &db)); - - ASSERT_OK(db->Close()); - delete db; - Close(); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - - return RUN_ALL_TESTS(); -} diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc deleted file mode 100644 index 737ad4ed2..000000000 --- a/db/plain_table_db_test.cc +++ /dev/null @@ -1,1347 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - - -#include -#include - -#include "db/db_impl/db_impl.h" -#include "db/version_set.h" -#include "db/write_batch_internal.h" -#include "file/filename.h" -#include "rocksdb/cache.h" -#include "rocksdb/compaction_filter.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/slice_transform.h" -#include "rocksdb/table.h" -#include "table/meta_blocks.h" -#include "table/plain/plain_table_bloom.h" -#include "table/plain/plain_table_factory.h" -#include "table/plain/plain_table_key_coding.h" -#include "table/plain/plain_table_reader.h" -#include "table/table_builder.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/cast_util.h" -#include "util/hash.h" -#include "util/mutexlock.h" -#include "util/random.h" -#include "util/string_util.h" -#include "utilities/merge_operators.h" - -namespace ROCKSDB_NAMESPACE { -class PlainTableKeyDecoderTest : public testing::Test {}; - -TEST_F(PlainTableKeyDecoderTest, ReadNonMmap) { - Random rnd(301); - const uint32_t kLength = 2222; - std::string tmp = rnd.RandomString(kLength); - Slice contents(tmp); - test::StringSource* string_source = - new test::StringSource(contents, 0, false); - std::unique_ptr holder(string_source); - std::unique_ptr file_reader( - new RandomAccessFileReader(std::move(holder), "test")); - std::unique_ptr file_info( - new PlainTableReaderFileInfo(std::move(file_reader), EnvOptions(), - kLength)); - - { - PlainTableFileReader reader(file_info.get()); - - const uint32_t kReadSize = 77; - for (uint32_t pos = 0; pos < kLength; pos += kReadSize) { - uint32_t read_size = std::min(kLength - pos, kReadSize); - Slice out; - ASSERT_TRUE(reader.Read(pos, read_size, &out)); - ASSERT_EQ(0, out.compare(tmp.substr(pos, read_size))); - } - - ASSERT_LT(uint32_t(string_source->total_reads()), kLength / kReadSize / 2); - } - - std::vector>> reads = { - {{600, 30}, {590, 30}, {600, 20}, {600, 40}}, - {{800, 20}, {100, 20}, {500, 20}, {1500, 20}, {100, 20}, {80, 20}}, - {{1000, 20}, {500, 20}, {1000, 50}}, - {{1000, 20}, {500, 20}, {500, 20}}, - {{1000, 20}, {500, 20}, {200, 20}, {500, 20}}, - {{1000, 20}, {500, 20}, {200, 20}, {1000, 50}}, - {{600, 500}, {610, 20}, {100, 20}}, - {{500, 100}, {490, 100}, {550, 50}}, - }; - - std::vector num_file_reads = {2, 6, 2, 2, 4, 3, 2, 2}; - - for (size_t i = 0; i < reads.size(); i++) { - string_source->set_total_reads(0); - PlainTableFileReader reader(file_info.get()); - for (auto p : reads[i]) { - Slice out; - ASSERT_TRUE(reader.Read(p.first, p.second, &out)); - ASSERT_EQ(0, out.compare(tmp.substr(p.first, p.second))); - } - ASSERT_EQ(num_file_reads[i], string_source->total_reads()); - } -} - -class PlainTableDBTest : public testing::Test, - public testing::WithParamInterface { - protected: - private: - std::string dbname_; - Env* env_; - DB* db_; - - bool mmap_mode_; - Options last_options_; - - public: - PlainTableDBTest() : env_(Env::Default()) {} - - ~PlainTableDBTest() override { - delete db_; - EXPECT_OK(DestroyDB(dbname_, Options())); - } - - void SetUp() override { - mmap_mode_ = GetParam(); - dbname_ = test::PerThreadDBPath("plain_table_db_test"); - EXPECT_OK(DestroyDB(dbname_, Options())); - db_ = nullptr; - Reopen(); - } - - // Return the current option configuration. - Options CurrentOptions() { - Options options; - - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 0; - plain_table_options.bloom_bits_per_key = 2; - plain_table_options.hash_table_ratio = 0.8; - plain_table_options.index_sparseness = 3; - plain_table_options.huge_page_tlb_size = 0; - plain_table_options.encoding_type = kPrefix; - plain_table_options.full_scan_mode = false; - plain_table_options.store_index_in_file = false; - - options.table_factory.reset(NewPlainTableFactory(plain_table_options)); - options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true)); - - options.prefix_extractor.reset(NewFixedPrefixTransform(8)); - options.allow_mmap_reads = mmap_mode_; - options.allow_concurrent_memtable_write = false; - options.unordered_write = false; - return options; - } - - DBImpl* dbfull() { return static_cast_with_check(db_); } - - void Reopen(Options* options = nullptr) { ASSERT_OK(TryReopen(options)); } - - void Close() { - delete db_; - db_ = nullptr; - } - - bool mmap_mode() const { return mmap_mode_; } - - void DestroyAndReopen(Options* options = nullptr) { - // Destroy using last options - Destroy(&last_options_); - ASSERT_OK(TryReopen(options)); - } - - void Destroy(Options* options) { - delete db_; - db_ = nullptr; - ASSERT_OK(DestroyDB(dbname_, *options)); - } - - Status PureReopen(Options* options, DB** db) { - return DB::Open(*options, dbname_, db); - } - - Status ReopenForReadOnly(Options* options) { - delete db_; - db_ = nullptr; - return DB::OpenForReadOnly(*options, dbname_, &db_); - } - - Status TryReopen(Options* options = nullptr) { - delete db_; - db_ = nullptr; - Options opts; - if (options != nullptr) { - opts = *options; - } else { - opts = CurrentOptions(); - opts.create_if_missing = true; - } - last_options_ = opts; - - return DB::Open(opts, dbname_, &db_); - } - - Status Put(const Slice& k, const Slice& v) { - return db_->Put(WriteOptions(), k, v); - } - - Status Delete(const std::string& k) { return db_->Delete(WriteOptions(), k); } - - std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) { - ReadOptions options; - options.snapshot = snapshot; - std::string result; - Status s = db_->Get(options, k, &result); - if (s.IsNotFound()) { - result = "NOT_FOUND"; - } else if (!s.ok()) { - result = s.ToString(); - } - return result; - } - - int NumTableFilesAtLevel(int level) { - std::string property; - EXPECT_TRUE(db_->GetProperty( - "rocksdb.num-files-at-level" + std::to_string(level), &property)); - return atoi(property.c_str()); - } - - // Return spread of files per level - std::string FilesPerLevel() { - std::string result; - size_t last_non_zero_offset = 0; - for (int level = 0; level < db_->NumberLevels(); level++) { - int f = NumTableFilesAtLevel(level); - char buf[100]; - snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f); - result += buf; - if (f > 0) { - last_non_zero_offset = result.size(); - } - } - result.resize(last_non_zero_offset); - return result; - } - - std::string IterStatus(Iterator* iter) { - std::string result; - if (iter->Valid()) { - result = iter->key().ToString() + "->" + iter->value().ToString(); - } else { - result = "(invalid)"; - } - return result; - } -}; - -TEST_P(PlainTableDBTest, Empty) { - ASSERT_TRUE(dbfull() != nullptr); - ASSERT_EQ("NOT_FOUND", Get("0000000000000foo")); -} - -extern const uint64_t kPlainTableMagicNumber; - -class TestPlainTableReader : public PlainTableReader { - public: - TestPlainTableReader( - const EnvOptions& env_options, const InternalKeyComparator& icomparator, - EncodingType encoding_type, uint64_t file_size, int bloom_bits_per_key, - double hash_table_ratio, size_t index_sparseness, - std::unique_ptr&& props, - std::unique_ptr&& file, - const ImmutableOptions& ioptions, const SliceTransform* prefix_extractor, - bool* expect_bloom_not_match, bool store_index_in_file, - uint32_t column_family_id, const std::string& column_family_name) - : PlainTableReader(ioptions, std::move(file), env_options, icomparator, - encoding_type, file_size, props.get(), - prefix_extractor), - expect_bloom_not_match_(expect_bloom_not_match) { - Status s = MmapDataIfNeeded(); - EXPECT_TRUE(s.ok()); - - s = PopulateIndex(props.get(), bloom_bits_per_key, hash_table_ratio, - index_sparseness, 2 * 1024 * 1024); - EXPECT_TRUE(s.ok()); - - EXPECT_EQ(column_family_id, static_cast(props->column_family_id)); - EXPECT_EQ(column_family_name, props->column_family_name); - if (store_index_in_file) { - auto bloom_version_ptr = props->user_collected_properties.find( - PlainTablePropertyNames::kBloomVersion); - EXPECT_TRUE(bloom_version_ptr != props->user_collected_properties.end()); - EXPECT_EQ(bloom_version_ptr->second, std::string("1")); - if (ioptions.bloom_locality > 0) { - auto num_blocks_ptr = props->user_collected_properties.find( - PlainTablePropertyNames::kNumBloomBlocks); - EXPECT_TRUE(num_blocks_ptr != props->user_collected_properties.end()); - } - } - table_properties_ = std::move(props); - } - - ~TestPlainTableReader() override {} - - private: - bool MatchBloom(uint32_t hash) const override { - bool ret = PlainTableReader::MatchBloom(hash); - if (*expect_bloom_not_match_) { - EXPECT_TRUE(!ret); - } else { - EXPECT_TRUE(ret); - } - return ret; - } - bool* expect_bloom_not_match_; -}; - -extern const uint64_t kPlainTableMagicNumber; -class TestPlainTableFactory : public PlainTableFactory { - public: - explicit TestPlainTableFactory(bool* expect_bloom_not_match, - const PlainTableOptions& options, - uint32_t column_family_id, - std::string column_family_name) - : PlainTableFactory(options), - bloom_bits_per_key_(options.bloom_bits_per_key), - hash_table_ratio_(options.hash_table_ratio), - index_sparseness_(options.index_sparseness), - store_index_in_file_(options.store_index_in_file), - expect_bloom_not_match_(expect_bloom_not_match), - column_family_id_(column_family_id), - column_family_name_(std::move(column_family_name)) {} - - using PlainTableFactory::NewTableReader; - Status NewTableReader( - const ReadOptions& /*ro*/, const TableReaderOptions& table_reader_options, - std::unique_ptr&& file, uint64_t file_size, - std::unique_ptr* table, - bool /*prefetch_index_and_filter_in_cache*/) const override { - std::unique_ptr props; - auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber, - table_reader_options.ioptions, &props); - EXPECT_TRUE(s.ok()); - - if (store_index_in_file_) { - BlockHandle bloom_block_handle; - s = FindMetaBlockInFile(file.get(), file_size, kPlainTableMagicNumber, - table_reader_options.ioptions, - BloomBlockBuilder::kBloomBlock, - &bloom_block_handle); - EXPECT_TRUE(s.ok()); - - BlockHandle index_block_handle; - s = FindMetaBlockInFile(file.get(), file_size, kPlainTableMagicNumber, - table_reader_options.ioptions, - PlainTableIndexBuilder::kPlainTableIndexBlock, - &index_block_handle); - EXPECT_TRUE(s.ok()); - } - - auto& user_props = props->user_collected_properties; - auto encoding_type_prop = - user_props.find(PlainTablePropertyNames::kEncodingType); - assert(encoding_type_prop != user_props.end()); - EncodingType encoding_type = static_cast( - DecodeFixed32(encoding_type_prop->second.c_str())); - - std::unique_ptr new_reader(new TestPlainTableReader( - table_reader_options.env_options, - table_reader_options.internal_comparator, encoding_type, file_size, - bloom_bits_per_key_, hash_table_ratio_, index_sparseness_, - std::move(props), std::move(file), table_reader_options.ioptions, - table_reader_options.prefix_extractor.get(), expect_bloom_not_match_, - store_index_in_file_, column_family_id_, column_family_name_)); - - *table = std::move(new_reader); - return s; - } - - private: - int bloom_bits_per_key_; - double hash_table_ratio_; - size_t index_sparseness_; - bool store_index_in_file_; - bool* expect_bloom_not_match_; - const uint32_t column_family_id_; - const std::string column_family_name_; -}; - -TEST_P(PlainTableDBTest, BadOptions1) { - // Build with a prefix extractor - ASSERT_OK(Put("1000000000000foo", "v1")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - // Bad attempt to re-open without a prefix extractor - Options options = CurrentOptions(); - options.prefix_extractor.reset(); - ASSERT_EQ( - "Invalid argument: Prefix extractor is missing when opening a PlainTable " - "built using a prefix extractor", - TryReopen(&options).ToString()); - - // Bad attempt to re-open with different prefix extractor - options.prefix_extractor.reset(NewFixedPrefixTransform(6)); - ASSERT_EQ( - "Invalid argument: Prefix extractor given doesn't match the one used to " - "build PlainTable", - TryReopen(&options).ToString()); - - // Correct prefix extractor - options.prefix_extractor.reset(NewFixedPrefixTransform(8)); - Reopen(&options); - ASSERT_EQ("v1", Get("1000000000000foo")); -} - -TEST_P(PlainTableDBTest, BadOptions2) { - Options options = CurrentOptions(); - options.prefix_extractor.reset(); - options.create_if_missing = true; - DestroyAndReopen(&options); - // Build without a prefix extractor - // (apparently works even if hash_table_ratio > 0) - ASSERT_OK(Put("1000000000000foo", "v1")); - // Build without a prefix extractor, this call will fail and returns the - // status for this bad attempt. - ASSERT_NOK(dbfull()->TEST_FlushMemTable()); - - // Bad attempt to re-open with hash_table_ratio > 0 and no prefix extractor - Status s = TryReopen(&options); - ASSERT_EQ( - "Not implemented: PlainTable requires a prefix extractor enable prefix " - "hash mode.", - s.ToString()); - - // OK to open with hash_table_ratio == 0 and no prefix extractor - PlainTableOptions plain_table_options; - plain_table_options.hash_table_ratio = 0; - options.table_factory.reset(NewPlainTableFactory(plain_table_options)); - Reopen(&options); - ASSERT_EQ("v1", Get("1000000000000foo")); - - // OK to open newly with a prefix_extractor and hash table; builds index - // in memory. - options = CurrentOptions(); - Reopen(&options); - ASSERT_EQ("v1", Get("1000000000000foo")); -} - -TEST_P(PlainTableDBTest, Flush) { - for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024; - huge_page_tlb_size += 2 * 1024 * 1024) { - for (EncodingType encoding_type : {kPlain, kPrefix}) { - for (int bloom = -1; bloom <= 117; bloom += 117) { - const int bloom_bits = std::max(bloom, 0); - const bool full_scan_mode = bloom < 0; - for (int total_order = 0; total_order <= 1; total_order++) { - for (int store_index_in_file = 0; store_index_in_file <= 1; - ++store_index_in_file) { - Options options = CurrentOptions(); - options.create_if_missing = true; - // Set only one bucket to force bucket conflict. - // Test index interval for the same prefix to be 1, 2 and 4 - if (total_order) { - options.prefix_extractor.reset(); - - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 0; - plain_table_options.bloom_bits_per_key = bloom_bits; - plain_table_options.hash_table_ratio = 0; - plain_table_options.index_sparseness = 2; - plain_table_options.huge_page_tlb_size = huge_page_tlb_size; - plain_table_options.encoding_type = encoding_type; - plain_table_options.full_scan_mode = full_scan_mode; - plain_table_options.store_index_in_file = store_index_in_file; - - options.table_factory.reset( - NewPlainTableFactory(plain_table_options)); - } else { - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 0; - plain_table_options.bloom_bits_per_key = bloom_bits; - plain_table_options.hash_table_ratio = 0.75; - plain_table_options.index_sparseness = 16; - plain_table_options.huge_page_tlb_size = huge_page_tlb_size; - plain_table_options.encoding_type = encoding_type; - plain_table_options.full_scan_mode = full_scan_mode; - plain_table_options.store_index_in_file = store_index_in_file; - - options.table_factory.reset( - NewPlainTableFactory(plain_table_options)); - } - DestroyAndReopen(&options); - uint64_t int_num; - ASSERT_TRUE(dbfull()->GetIntProperty( - "rocksdb.estimate-table-readers-mem", &int_num)); - ASSERT_EQ(int_num, 0U); - - ASSERT_OK(Put("1000000000000foo", "v1")); - ASSERT_OK(Put("0000000000000bar", "v2")); - ASSERT_OK(Put("1000000000000foo", "v3")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - ASSERT_TRUE(dbfull()->GetIntProperty( - "rocksdb.estimate-table-readers-mem", &int_num)); - ASSERT_GT(int_num, 0U); - - TablePropertiesCollection ptc; - ASSERT_OK(reinterpret_cast(dbfull())->GetPropertiesOfAllTables( - &ptc)); - ASSERT_EQ(1U, ptc.size()); - auto row = ptc.begin(); - auto tp = row->second; - - if (full_scan_mode) { - // Does not support Get/Seek - std::unique_ptr iter( - dbfull()->NewIterator(ReadOptions())); - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("0000000000000bar", iter->key().ToString()); - ASSERT_EQ("v2", iter->value().ToString()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000000foo", iter->key().ToString()); - ASSERT_EQ("v3", iter->value().ToString()); - iter->Next(); - ASSERT_TRUE(!iter->Valid()); - ASSERT_TRUE(iter->status().ok()); - } else { - if (!store_index_in_file) { - ASSERT_EQ(total_order ? "4" : "12", - (tp->user_collected_properties) - .at("plain_table_hash_table_size")); - ASSERT_EQ("0", (tp->user_collected_properties) - .at("plain_table_sub_index_size")); - } else { - ASSERT_EQ("0", (tp->user_collected_properties) - .at("plain_table_hash_table_size")); - ASSERT_EQ("0", (tp->user_collected_properties) - .at("plain_table_sub_index_size")); - } - ASSERT_EQ("v3", Get("1000000000000foo")); - ASSERT_EQ("v2", Get("0000000000000bar")); - } - } - } - } - } - } -} - -TEST_P(PlainTableDBTest, Flush2) { - for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024; - huge_page_tlb_size += 2 * 1024 * 1024) { - for (EncodingType encoding_type : {kPlain, kPrefix}) { - for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) { - for (int total_order = 0; total_order <= 1; total_order++) { - for (int store_index_in_file = 0; store_index_in_file <= 1; - ++store_index_in_file) { - if (encoding_type == kPrefix && total_order) { - continue; - } - if (!bloom_bits && store_index_in_file) { - continue; - } - if (total_order && store_index_in_file) { - continue; - } - bool expect_bloom_not_match = false; - Options options = CurrentOptions(); - options.create_if_missing = true; - // Set only one bucket to force bucket conflict. - // Test index interval for the same prefix to be 1, 2 and 4 - PlainTableOptions plain_table_options; - if (total_order) { - options.prefix_extractor = nullptr; - plain_table_options.hash_table_ratio = 0; - plain_table_options.index_sparseness = 2; - } else { - plain_table_options.hash_table_ratio = 0.75; - plain_table_options.index_sparseness = 16; - } - plain_table_options.user_key_len = kPlainTableVariableLength; - plain_table_options.bloom_bits_per_key = bloom_bits; - plain_table_options.huge_page_tlb_size = huge_page_tlb_size; - plain_table_options.encoding_type = encoding_type; - plain_table_options.store_index_in_file = store_index_in_file; - options.table_factory.reset(new TestPlainTableFactory( - &expect_bloom_not_match, plain_table_options, - 0 /* column_family_id */, kDefaultColumnFamilyName)); - - DestroyAndReopen(&options); - ASSERT_OK(Put("0000000000000bar", "b")); - ASSERT_OK(Put("1000000000000foo", "v1")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - ASSERT_OK(Put("1000000000000foo", "v2")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - ASSERT_EQ("v2", Get("1000000000000foo")); - - ASSERT_OK(Put("0000000000000eee", "v3")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - ASSERT_EQ("v3", Get("0000000000000eee")); - - ASSERT_OK(Delete("0000000000000bar")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - ASSERT_EQ("NOT_FOUND", Get("0000000000000bar")); - - ASSERT_OK(Put("0000000000000eee", "v5")); - ASSERT_OK(Put("9000000000000eee", "v5")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - ASSERT_EQ("v5", Get("0000000000000eee")); - - // Test Bloom Filter - if (bloom_bits > 0) { - // Neither key nor value should exist. - expect_bloom_not_match = true; - ASSERT_EQ("NOT_FOUND", Get("5_not00000000bar")); - // Key doesn't exist any more but prefix exists. - if (total_order) { - ASSERT_EQ("NOT_FOUND", Get("1000000000000not")); - ASSERT_EQ("NOT_FOUND", Get("0000000000000not")); - } - expect_bloom_not_match = false; - } - } - } - } - } - } -} - -TEST_P(PlainTableDBTest, Immortal) { - for (EncodingType encoding_type : {kPlain, kPrefix}) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.max_open_files = -1; - // Set only one bucket to force bucket conflict. - // Test index interval for the same prefix to be 1, 2 and 4 - PlainTableOptions plain_table_options; - plain_table_options.hash_table_ratio = 0.75; - plain_table_options.index_sparseness = 16; - plain_table_options.user_key_len = kPlainTableVariableLength; - plain_table_options.bloom_bits_per_key = 10; - plain_table_options.encoding_type = encoding_type; - options.table_factory.reset(NewPlainTableFactory(plain_table_options)); - - DestroyAndReopen(&options); - ASSERT_OK(Put("0000000000000bar", "b")); - ASSERT_OK(Put("1000000000000foo", "v1")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - int copied = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "GetContext::SaveValue::PinSelf", [&](void* /*arg*/) { copied++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_EQ("b", Get("0000000000000bar")); - ASSERT_EQ("v1", Get("1000000000000foo")); - ASSERT_EQ(2, copied); - copied = 0; - - Close(); - ASSERT_OK(ReopenForReadOnly(&options)); - - ASSERT_EQ("b", Get("0000000000000bar")); - ASSERT_EQ("v1", Get("1000000000000foo")); - ASSERT_EQ("NOT_FOUND", Get("1000000000000bar")); - if (mmap_mode()) { - ASSERT_EQ(0, copied); - } else { - ASSERT_EQ(2, copied); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } -} - -TEST_P(PlainTableDBTest, Iterator) { - for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024; - huge_page_tlb_size += 2 * 1024 * 1024) { - for (EncodingType encoding_type : {kPlain, kPrefix}) { - for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) { - for (int total_order = 0; total_order <= 1; total_order++) { - if (encoding_type == kPrefix && total_order == 1) { - continue; - } - bool expect_bloom_not_match = false; - Options options = CurrentOptions(); - options.create_if_missing = true; - // Set only one bucket to force bucket conflict. - // Test index interval for the same prefix to be 1, 2 and 4 - if (total_order) { - options.prefix_extractor = nullptr; - - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 16; - plain_table_options.bloom_bits_per_key = bloom_bits; - plain_table_options.hash_table_ratio = 0; - plain_table_options.index_sparseness = 2; - plain_table_options.huge_page_tlb_size = huge_page_tlb_size; - plain_table_options.encoding_type = encoding_type; - - options.table_factory.reset(new TestPlainTableFactory( - &expect_bloom_not_match, plain_table_options, - 0 /* column_family_id */, kDefaultColumnFamilyName)); - } else { - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 16; - plain_table_options.bloom_bits_per_key = bloom_bits; - plain_table_options.hash_table_ratio = 0.75; - plain_table_options.index_sparseness = 16; - plain_table_options.huge_page_tlb_size = huge_page_tlb_size; - plain_table_options.encoding_type = encoding_type; - - options.table_factory.reset(new TestPlainTableFactory( - &expect_bloom_not_match, plain_table_options, - 0 /* column_family_id */, kDefaultColumnFamilyName)); - } - DestroyAndReopen(&options); - - ASSERT_OK(Put("1000000000foo002", "v_2")); - ASSERT_OK(Put("0000000000000bar", "random")); - ASSERT_OK(Put("1000000000foo001", "v1")); - ASSERT_OK(Put("3000000000000bar", "bar_v")); - ASSERT_OK(Put("1000000000foo003", "v__3")); - ASSERT_OK(Put("1000000000foo004", "v__4")); - ASSERT_OK(Put("1000000000foo005", "v__5")); - ASSERT_OK(Put("1000000000foo007", "v__7")); - ASSERT_OK(Put("1000000000foo008", "v__8")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - ASSERT_EQ("v1", Get("1000000000foo001")); - ASSERT_EQ("v__3", Get("1000000000foo003")); - Iterator* iter = dbfull()->NewIterator(ReadOptions()); - iter->Seek("1000000000foo000"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000foo001", iter->key().ToString()); - ASSERT_EQ("v1", iter->value().ToString()); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000foo002", iter->key().ToString()); - ASSERT_EQ("v_2", iter->value().ToString()); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000foo003", iter->key().ToString()); - ASSERT_EQ("v__3", iter->value().ToString()); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000foo004", iter->key().ToString()); - ASSERT_EQ("v__4", iter->value().ToString()); - - iter->Seek("3000000000000bar"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("3000000000000bar", iter->key().ToString()); - ASSERT_EQ("bar_v", iter->value().ToString()); - - iter->Seek("1000000000foo000"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000foo001", iter->key().ToString()); - ASSERT_EQ("v1", iter->value().ToString()); - - iter->Seek("1000000000foo005"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000foo005", iter->key().ToString()); - ASSERT_EQ("v__5", iter->value().ToString()); - - iter->Seek("1000000000foo006"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000foo007", iter->key().ToString()); - ASSERT_EQ("v__7", iter->value().ToString()); - - iter->Seek("1000000000foo008"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000foo008", iter->key().ToString()); - ASSERT_EQ("v__8", iter->value().ToString()); - - if (total_order == 0) { - iter->Seek("1000000000foo009"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("3000000000000bar", iter->key().ToString()); - } - - // Test Bloom Filter - if (bloom_bits > 0) { - if (!total_order) { - // Neither key nor value should exist. - expect_bloom_not_match = true; - iter->Seek("2not000000000bar"); - ASSERT_TRUE(!iter->Valid()); - ASSERT_EQ("NOT_FOUND", Get("2not000000000bar")); - expect_bloom_not_match = false; - } else { - expect_bloom_not_match = true; - ASSERT_EQ("NOT_FOUND", Get("2not000000000bar")); - expect_bloom_not_match = false; - } - } - ASSERT_OK(iter->status()); - delete iter; - } - } - } - } -} - -namespace { -std::string NthKey(size_t n, char filler) { - std::string rv(16, filler); - rv[0] = n % 10; - rv[1] = (n / 10) % 10; - rv[2] = (n / 100) % 10; - rv[3] = (n / 1000) % 10; - return rv; -} -} // anonymous namespace - -TEST_P(PlainTableDBTest, BloomSchema) { - Options options = CurrentOptions(); - options.create_if_missing = true; - for (int bloom_locality = 0; bloom_locality <= 1; bloom_locality++) { - options.bloom_locality = bloom_locality; - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 16; - plain_table_options.bloom_bits_per_key = 3; // high FP rate for test - plain_table_options.hash_table_ratio = 0.75; - plain_table_options.index_sparseness = 16; - plain_table_options.huge_page_tlb_size = 0; - plain_table_options.encoding_type = kPlain; - - bool expect_bloom_not_match = false; - options.table_factory.reset(new TestPlainTableFactory( - &expect_bloom_not_match, plain_table_options, 0 /* column_family_id */, - kDefaultColumnFamilyName)); - DestroyAndReopen(&options); - - for (unsigned i = 0; i < 2345; ++i) { - ASSERT_OK(Put(NthKey(i, 'y'), "added")); - } - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - ASSERT_EQ("added", Get(NthKey(42, 'y'))); - - for (unsigned i = 0; i < 32; ++i) { - // Known pattern of Bloom filter false positives can detect schema change - // with high probability. Known FPs stuffed into bits: - uint32_t pattern; - if (!bloom_locality) { - pattern = 1785868347UL; - } else if (CACHE_LINE_SIZE == 64U) { - pattern = 2421694657UL; - } else if (CACHE_LINE_SIZE == 128U) { - pattern = 788710956UL; - } else { - ASSERT_EQ(CACHE_LINE_SIZE, 256U); - pattern = 163905UL; - } - bool expect_fp = pattern & (1UL << i); - // fprintf(stderr, "expect_fp@%u: %d\n", i, (int)expect_fp); - expect_bloom_not_match = !expect_fp; - ASSERT_EQ("NOT_FOUND", Get(NthKey(i, 'n'))); - } - } -} - -namespace { -std::string MakeLongKey(size_t length, char c) { - return std::string(length, c); -} -} // anonymous namespace - -TEST_P(PlainTableDBTest, IteratorLargeKeys) { - Options options = CurrentOptions(); - - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 0; - plain_table_options.bloom_bits_per_key = 0; - plain_table_options.hash_table_ratio = 0; - - options.table_factory.reset(NewPlainTableFactory(plain_table_options)); - options.create_if_missing = true; - options.prefix_extractor.reset(); - DestroyAndReopen(&options); - - std::string key_list[] = {MakeLongKey(30, '0'), MakeLongKey(16, '1'), - MakeLongKey(32, '2'), MakeLongKey(60, '3'), - MakeLongKey(90, '4'), MakeLongKey(50, '5'), - MakeLongKey(26, '6')}; - - for (size_t i = 0; i < 7; i++) { - ASSERT_OK(Put(key_list[i], std::to_string(i))); - } - - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - Iterator* iter = dbfull()->NewIterator(ReadOptions()); - iter->Seek(key_list[0]); - - for (size_t i = 0; i < 7; i++) { - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(key_list[i], iter->key().ToString()); - ASSERT_EQ(std::to_string(i), iter->value().ToString()); - iter->Next(); - } - - ASSERT_TRUE(!iter->Valid()); - - delete iter; -} - -namespace { -std::string MakeLongKeyWithPrefix(size_t length, char c) { - return "00000000" + std::string(length - 8, c); -} -} // anonymous namespace - -TEST_P(PlainTableDBTest, IteratorLargeKeysWithPrefix) { - Options options = CurrentOptions(); - - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 16; - plain_table_options.bloom_bits_per_key = 0; - plain_table_options.hash_table_ratio = 0.8; - plain_table_options.index_sparseness = 3; - plain_table_options.huge_page_tlb_size = 0; - plain_table_options.encoding_type = kPrefix; - - options.table_factory.reset(NewPlainTableFactory(plain_table_options)); - options.create_if_missing = true; - DestroyAndReopen(&options); - - std::string key_list[] = { - MakeLongKeyWithPrefix(30, '0'), MakeLongKeyWithPrefix(16, '1'), - MakeLongKeyWithPrefix(32, '2'), MakeLongKeyWithPrefix(60, '3'), - MakeLongKeyWithPrefix(90, '4'), MakeLongKeyWithPrefix(50, '5'), - MakeLongKeyWithPrefix(26, '6')}; - - for (size_t i = 0; i < 7; i++) { - ASSERT_OK(Put(key_list[i], std::to_string(i))); - } - - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - Iterator* iter = dbfull()->NewIterator(ReadOptions()); - iter->Seek(key_list[0]); - - for (size_t i = 0; i < 7; i++) { - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(key_list[i], iter->key().ToString()); - ASSERT_EQ(std::to_string(i), iter->value().ToString()); - iter->Next(); - } - - ASSERT_TRUE(!iter->Valid()); - - delete iter; -} - -TEST_P(PlainTableDBTest, IteratorReverseSuffixComparator) { - Options options = CurrentOptions(); - options.create_if_missing = true; - // Set only one bucket to force bucket conflict. - // Test index interval for the same prefix to be 1, 2 and 4 - test::SimpleSuffixReverseComparator comp; - options.comparator = ∁ - DestroyAndReopen(&options); - - ASSERT_OK(Put("1000000000foo002", "v_2")); - ASSERT_OK(Put("0000000000000bar", "random")); - ASSERT_OK(Put("1000000000foo001", "v1")); - ASSERT_OK(Put("3000000000000bar", "bar_v")); - ASSERT_OK(Put("1000000000foo003", "v__3")); - ASSERT_OK(Put("1000000000foo004", "v__4")); - ASSERT_OK(Put("1000000000foo005", "v__5")); - ASSERT_OK(Put("1000000000foo007", "v__7")); - ASSERT_OK(Put("1000000000foo008", "v__8")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - ASSERT_EQ("v1", Get("1000000000foo001")); - ASSERT_EQ("v__3", Get("1000000000foo003")); - Iterator* iter = dbfull()->NewIterator(ReadOptions()); - iter->Seek("1000000000foo009"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000foo008", iter->key().ToString()); - ASSERT_EQ("v__8", iter->value().ToString()); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000foo007", iter->key().ToString()); - ASSERT_EQ("v__7", iter->value().ToString()); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000foo005", iter->key().ToString()); - ASSERT_EQ("v__5", iter->value().ToString()); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000foo004", iter->key().ToString()); - ASSERT_EQ("v__4", iter->value().ToString()); - - iter->Seek("3000000000000bar"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("3000000000000bar", iter->key().ToString()); - ASSERT_EQ("bar_v", iter->value().ToString()); - - iter->Seek("1000000000foo005"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000foo005", iter->key().ToString()); - ASSERT_EQ("v__5", iter->value().ToString()); - - iter->Seek("1000000000foo006"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000foo005", iter->key().ToString()); - ASSERT_EQ("v__5", iter->value().ToString()); - - iter->Seek("1000000000foo008"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("1000000000foo008", iter->key().ToString()); - ASSERT_EQ("v__8", iter->value().ToString()); - - iter->Seek("1000000000foo000"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("3000000000000bar", iter->key().ToString()); - - delete iter; -} - -TEST_P(PlainTableDBTest, HashBucketConflict) { - for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024; - huge_page_tlb_size += 2 * 1024 * 1024) { - for (unsigned char i = 1; i <= 3; i++) { - Options options = CurrentOptions(); - options.create_if_missing = true; - // Set only one bucket to force bucket conflict. - // Test index interval for the same prefix to be 1, 2 and 4 - - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 16; - plain_table_options.bloom_bits_per_key = 0; - plain_table_options.hash_table_ratio = 0; - plain_table_options.index_sparseness = 2 ^ i; - plain_table_options.huge_page_tlb_size = huge_page_tlb_size; - - options.table_factory.reset(NewPlainTableFactory(plain_table_options)); - - DestroyAndReopen(&options); - ASSERT_OK(Put("5000000000000fo0", "v1")); - ASSERT_OK(Put("5000000000000fo1", "v2")); - ASSERT_OK(Put("5000000000000fo2", "v")); - ASSERT_OK(Put("2000000000000fo0", "v3")); - ASSERT_OK(Put("2000000000000fo1", "v4")); - ASSERT_OK(Put("2000000000000fo2", "v")); - ASSERT_OK(Put("2000000000000fo3", "v")); - - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - ASSERT_EQ("v1", Get("5000000000000fo0")); - ASSERT_EQ("v2", Get("5000000000000fo1")); - ASSERT_EQ("v3", Get("2000000000000fo0")); - ASSERT_EQ("v4", Get("2000000000000fo1")); - - ASSERT_EQ("NOT_FOUND", Get("5000000000000bar")); - ASSERT_EQ("NOT_FOUND", Get("2000000000000bar")); - ASSERT_EQ("NOT_FOUND", Get("5000000000000fo8")); - ASSERT_EQ("NOT_FOUND", Get("2000000000000fo8")); - - ReadOptions ro; - Iterator* iter = dbfull()->NewIterator(ro); - - iter->Seek("5000000000000fo0"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("5000000000000fo0", iter->key().ToString()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("5000000000000fo1", iter->key().ToString()); - - iter->Seek("5000000000000fo1"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("5000000000000fo1", iter->key().ToString()); - - iter->Seek("2000000000000fo0"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("2000000000000fo0", iter->key().ToString()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("2000000000000fo1", iter->key().ToString()); - - iter->Seek("2000000000000fo1"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("2000000000000fo1", iter->key().ToString()); - - iter->Seek("2000000000000bar"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("2000000000000fo0", iter->key().ToString()); - - iter->Seek("5000000000000bar"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("5000000000000fo0", iter->key().ToString()); - - iter->Seek("2000000000000fo8"); - ASSERT_TRUE(!iter->Valid() || - options.comparator->Compare(iter->key(), "20000001") > 0); - - iter->Seek("5000000000000fo8"); - ASSERT_TRUE(!iter->Valid()); - - iter->Seek("1000000000000fo2"); - ASSERT_TRUE(!iter->Valid()); - - iter->Seek("3000000000000fo2"); - ASSERT_TRUE(!iter->Valid()); - - iter->Seek("8000000000000fo2"); - ASSERT_TRUE(!iter->Valid()); - - ASSERT_OK(iter->status()); - delete iter; - } - } -} - -TEST_P(PlainTableDBTest, HashBucketConflictReverseSuffixComparator) { - for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024; - huge_page_tlb_size += 2 * 1024 * 1024) { - for (unsigned char i = 1; i <= 3; i++) { - Options options = CurrentOptions(); - options.create_if_missing = true; - test::SimpleSuffixReverseComparator comp; - options.comparator = ∁ - // Set only one bucket to force bucket conflict. - // Test index interval for the same prefix to be 1, 2 and 4 - - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 16; - plain_table_options.bloom_bits_per_key = 0; - plain_table_options.hash_table_ratio = 0; - plain_table_options.index_sparseness = 2 ^ i; - plain_table_options.huge_page_tlb_size = huge_page_tlb_size; - - options.table_factory.reset(NewPlainTableFactory(plain_table_options)); - DestroyAndReopen(&options); - ASSERT_OK(Put("5000000000000fo0", "v1")); - ASSERT_OK(Put("5000000000000fo1", "v2")); - ASSERT_OK(Put("5000000000000fo2", "v")); - ASSERT_OK(Put("2000000000000fo0", "v3")); - ASSERT_OK(Put("2000000000000fo1", "v4")); - ASSERT_OK(Put("2000000000000fo2", "v")); - ASSERT_OK(Put("2000000000000fo3", "v")); - - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - ASSERT_EQ("v1", Get("5000000000000fo0")); - ASSERT_EQ("v2", Get("5000000000000fo1")); - ASSERT_EQ("v3", Get("2000000000000fo0")); - ASSERT_EQ("v4", Get("2000000000000fo1")); - - ASSERT_EQ("NOT_FOUND", Get("5000000000000bar")); - ASSERT_EQ("NOT_FOUND", Get("2000000000000bar")); - ASSERT_EQ("NOT_FOUND", Get("5000000000000fo8")); - ASSERT_EQ("NOT_FOUND", Get("2000000000000fo8")); - - ReadOptions ro; - Iterator* iter = dbfull()->NewIterator(ro); - - iter->Seek("5000000000000fo1"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("5000000000000fo1", iter->key().ToString()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("5000000000000fo0", iter->key().ToString()); - - iter->Seek("5000000000000fo1"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("5000000000000fo1", iter->key().ToString()); - - iter->Seek("2000000000000fo1"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("2000000000000fo1", iter->key().ToString()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("2000000000000fo0", iter->key().ToString()); - - iter->Seek("2000000000000fo1"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("2000000000000fo1", iter->key().ToString()); - - iter->Seek("2000000000000var"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("2000000000000fo3", iter->key().ToString()); - - iter->Seek("5000000000000var"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("5000000000000fo2", iter->key().ToString()); - - std::string seek_key = "2000000000000bar"; - iter->Seek(seek_key); - ASSERT_TRUE(!iter->Valid() || - options.prefix_extractor->Transform(iter->key()) != - options.prefix_extractor->Transform(seek_key)); - - iter->Seek("1000000000000fo2"); - ASSERT_TRUE(!iter->Valid()); - - iter->Seek("3000000000000fo2"); - ASSERT_TRUE(!iter->Valid()); - - iter->Seek("8000000000000fo2"); - ASSERT_TRUE(!iter->Valid()); - - ASSERT_OK(iter->status()); - delete iter; - } - } -} - -TEST_P(PlainTableDBTest, NonExistingKeyToNonEmptyBucket) { - Options options = CurrentOptions(); - options.create_if_missing = true; - // Set only one bucket to force bucket conflict. - // Test index interval for the same prefix to be 1, 2 and 4 - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 16; - plain_table_options.bloom_bits_per_key = 0; - plain_table_options.hash_table_ratio = 0; - plain_table_options.index_sparseness = 5; - - options.table_factory.reset(NewPlainTableFactory(plain_table_options)); - DestroyAndReopen(&options); - ASSERT_OK(Put("5000000000000fo0", "v1")); - ASSERT_OK(Put("5000000000000fo1", "v2")); - ASSERT_OK(Put("5000000000000fo2", "v3")); - - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - ASSERT_EQ("v1", Get("5000000000000fo0")); - ASSERT_EQ("v2", Get("5000000000000fo1")); - ASSERT_EQ("v3", Get("5000000000000fo2")); - - ASSERT_EQ("NOT_FOUND", Get("8000000000000bar")); - ASSERT_EQ("NOT_FOUND", Get("1000000000000bar")); - - Iterator* iter = dbfull()->NewIterator(ReadOptions()); - - iter->Seek("5000000000000bar"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("5000000000000fo0", iter->key().ToString()); - - iter->Seek("5000000000000fo8"); - ASSERT_TRUE(!iter->Valid()); - - iter->Seek("1000000000000fo2"); - ASSERT_TRUE(!iter->Valid()); - - iter->Seek("8000000000000fo2"); - ASSERT_TRUE(!iter->Valid()); - - ASSERT_OK(iter->status()); - delete iter; -} - -static std::string Key(int i) { - char buf[100]; - snprintf(buf, sizeof(buf), "key_______%06d", i); - return std::string(buf); -} - -TEST_P(PlainTableDBTest, CompactionTrigger) { - Options options = CurrentOptions(); - options.write_buffer_size = 120 << 10; // 120KB - options.num_levels = 3; - options.level0_file_num_compaction_trigger = 3; - Reopen(&options); - - Random rnd(301); - - for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; - num++) { - std::vector values; - // Write 120KB (10 values, each 12K) - for (int i = 0; i < 10; i++) { - values.push_back(rnd.RandomString(12 << 10)); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Put(Key(999), "")); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); - ASSERT_EQ(NumTableFilesAtLevel(0), num + 1); - } - - // generate one more file in level-0, and should trigger level-0 compaction - std::vector values; - for (int i = 0; i < 12; i++) { - values.push_back(rnd.RandomString(10000)); - ASSERT_OK(Put(Key(i), values[i])); - } - ASSERT_OK(Put(Key(999), "")); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - ASSERT_EQ(NumTableFilesAtLevel(1), 1); -} - -TEST_P(PlainTableDBTest, AdaptiveTable) { - Options options = CurrentOptions(); - options.create_if_missing = true; - - options.table_factory.reset(NewPlainTableFactory()); - DestroyAndReopen(&options); - - ASSERT_OK(Put("1000000000000foo", "v1")); - ASSERT_OK(Put("0000000000000bar", "v2")); - ASSERT_OK(Put("1000000000000foo", "v3")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - - options.create_if_missing = false; - std::shared_ptr block_based_factory( - NewBlockBasedTableFactory()); - std::shared_ptr plain_table_factory(NewPlainTableFactory()); - std::shared_ptr dummy_factory; - options.table_factory.reset(NewAdaptiveTableFactory( - block_based_factory, block_based_factory, plain_table_factory)); - Reopen(&options); - ASSERT_EQ("v3", Get("1000000000000foo")); - ASSERT_EQ("v2", Get("0000000000000bar")); - - ASSERT_OK(Put("2000000000000foo", "v4")); - ASSERT_OK(Put("3000000000000bar", "v5")); - ASSERT_OK(dbfull()->TEST_FlushMemTable()); - ASSERT_EQ("v4", Get("2000000000000foo")); - ASSERT_EQ("v5", Get("3000000000000bar")); - - Reopen(&options); - ASSERT_EQ("v3", Get("1000000000000foo")); - ASSERT_EQ("v2", Get("0000000000000bar")); - ASSERT_EQ("v4", Get("2000000000000foo")); - ASSERT_EQ("v5", Get("3000000000000bar")); - - options.paranoid_checks = false; - options.table_factory.reset(NewBlockBasedTableFactory()); - Reopen(&options); - ASSERT_NE("v3", Get("1000000000000foo")); - - options.paranoid_checks = false; - options.table_factory.reset(NewPlainTableFactory()); - Reopen(&options); - ASSERT_NE("v5", Get("3000000000000bar")); -} - -INSTANTIATE_TEST_CASE_P(PlainTableDBTest, PlainTableDBTest, ::testing::Bool()); - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/db/prefix_test.cc b/db/prefix_test.cc deleted file mode 100644 index a8ae04035..000000000 --- a/db/prefix_test.cc +++ /dev/null @@ -1,894 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - - -#ifndef GFLAGS -#include -int main() { - fprintf(stderr, "Please install gflags to run this test... Skipping...\n"); - return 0; -} -#else - -#include -#include -#include - -#include "db/db_impl/db_impl.h" -#include "monitoring/histogram.h" -#include "rocksdb/comparator.h" -#include "rocksdb/db.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/memtablerep.h" -#include "rocksdb/perf_context.h" -#include "rocksdb/slice_transform.h" -#include "rocksdb/system_clock.h" -#include "rocksdb/table.h" -#include "test_util/testharness.h" -#include "util/cast_util.h" -#include "util/coding.h" -#include "util/gflags_compat.h" -#include "util/random.h" -#include "util/stop_watch.h" -#include "util/string_util.h" -#include "utilities/merge_operators.h" - -using GFLAGS_NAMESPACE::ParseCommandLineFlags; - -DEFINE_bool(trigger_deadlock, false, - "issue delete in range scan to trigger PrefixHashMap deadlock"); -DEFINE_int32(bucket_count, 100000, "number of buckets"); -DEFINE_uint64(num_locks, 10001, "number of locks"); -DEFINE_bool(random_prefix, false, "randomize prefix"); -DEFINE_uint64(total_prefixes, 100000, "total number of prefixes"); -DEFINE_uint64(items_per_prefix, 1, "total number of values per prefix"); -DEFINE_int64(write_buffer_size, 33554432, ""); -DEFINE_int32(max_write_buffer_number, 2, ""); -DEFINE_int32(min_write_buffer_number_to_merge, 1, ""); -DEFINE_int32(skiplist_height, 4, ""); -DEFINE_double(memtable_prefix_bloom_size_ratio, 0.1, ""); -DEFINE_int32(memtable_huge_page_size, 2 * 1024 * 1024, ""); -DEFINE_int32(value_size, 40, ""); -DEFINE_bool(enable_print, false, "Print options generated to console."); - -// Path to the database on file system -const std::string kDbName = - ROCKSDB_NAMESPACE::test::PerThreadDBPath("prefix_test"); - -namespace ROCKSDB_NAMESPACE { - -struct TestKey { - uint64_t prefix; - uint64_t sorted; - - TestKey(uint64_t _prefix, uint64_t _sorted) - : prefix(_prefix), sorted(_sorted) {} -}; - -// return a slice backed by test_key -inline Slice TestKeyToSlice(std::string& s, const TestKey& test_key) { - s.clear(); - PutFixed64(&s, test_key.prefix); - PutFixed64(&s, test_key.sorted); - return Slice(s.c_str(), s.size()); -} - -inline const TestKey SliceToTestKey(const Slice& slice) { - return TestKey(DecodeFixed64(slice.data()), DecodeFixed64(slice.data() + 8)); -} - -class TestKeyComparator : public Comparator { - public: - // Compare needs to be aware of the possibility of a and/or b is - // prefix only - int Compare(const Slice& a, const Slice& b) const override { - const TestKey kkey_a = SliceToTestKey(a); - const TestKey kkey_b = SliceToTestKey(b); - const TestKey* key_a = &kkey_a; - const TestKey* key_b = &kkey_b; - if (key_a->prefix != key_b->prefix) { - if (key_a->prefix < key_b->prefix) return -1; - if (key_a->prefix > key_b->prefix) return 1; - } else { - EXPECT_TRUE(key_a->prefix == key_b->prefix); - // note, both a and b could be prefix only - if (a.size() != b.size()) { - // one of them is prefix - EXPECT_TRUE( - (a.size() == sizeof(uint64_t) && b.size() == sizeof(TestKey)) || - (b.size() == sizeof(uint64_t) && a.size() == sizeof(TestKey))); - if (a.size() < b.size()) return -1; - if (a.size() > b.size()) return 1; - } else { - // both a and b are prefix - if (a.size() == sizeof(uint64_t)) { - return 0; - } - - // both a and b are whole key - EXPECT_TRUE(a.size() == sizeof(TestKey) && b.size() == sizeof(TestKey)); - if (key_a->sorted < key_b->sorted) return -1; - if (key_a->sorted > key_b->sorted) return 1; - if (key_a->sorted == key_b->sorted) return 0; - } - } - return 0; - } - - bool operator()(const TestKey& a, const TestKey& b) const { - std::string sa, sb; - return Compare(TestKeyToSlice(sa, a), TestKeyToSlice(sb, b)) < 0; - } - - const char* Name() const override { return "TestKeyComparator"; } - - void FindShortestSeparator(std::string* /*start*/, - const Slice& /*limit*/) const override {} - - void FindShortSuccessor(std::string* /*key*/) const override {} -}; - -namespace { -void PutKey(DB* db, WriteOptions write_options, uint64_t prefix, - uint64_t suffix, const Slice& value) { - TestKey test_key(prefix, suffix); - std::string s; - Slice key = TestKeyToSlice(s, test_key); - ASSERT_OK(db->Put(write_options, key, value)); -} - -void PutKey(DB* db, WriteOptions write_options, const TestKey& test_key, - const Slice& value) { - std::string s; - Slice key = TestKeyToSlice(s, test_key); - ASSERT_OK(db->Put(write_options, key, value)); -} - -void MergeKey(DB* db, WriteOptions write_options, const TestKey& test_key, - const Slice& value) { - std::string s; - Slice key = TestKeyToSlice(s, test_key); - ASSERT_OK(db->Merge(write_options, key, value)); -} - -void DeleteKey(DB* db, WriteOptions write_options, const TestKey& test_key) { - std::string s; - Slice key = TestKeyToSlice(s, test_key); - ASSERT_OK(db->Delete(write_options, key)); -} - -void SeekIterator(Iterator* iter, uint64_t prefix, uint64_t suffix) { - TestKey test_key(prefix, suffix); - std::string s; - Slice key = TestKeyToSlice(s, test_key); - iter->Seek(key); -} - -const std::string kNotFoundResult = "NOT_FOUND"; - -std::string Get(DB* db, const ReadOptions& read_options, uint64_t prefix, - uint64_t suffix) { - TestKey test_key(prefix, suffix); - std::string s2; - Slice key = TestKeyToSlice(s2, test_key); - - std::string result; - Status s = db->Get(read_options, key, &result); - if (s.IsNotFound()) { - result = kNotFoundResult; - } else if (!s.ok()) { - result = s.ToString(); - } - return result; -} - -class SamePrefixTransform : public SliceTransform { - private: - const Slice prefix_; - std::string name_; - - public: - explicit SamePrefixTransform(const Slice& prefix) - : prefix_(prefix), name_("rocksdb.SamePrefix." + prefix.ToString()) {} - - const char* Name() const override { return name_.c_str(); } - - Slice Transform(const Slice& src) const override { - assert(InDomain(src)); - return prefix_; - } - - bool InDomain(const Slice& src) const override { - if (src.size() >= prefix_.size()) { - return Slice(src.data(), prefix_.size()) == prefix_; - } - return false; - } - - bool InRange(const Slice& dst) const override { return dst == prefix_; } - - bool FullLengthEnabled(size_t* /*len*/) const override { return false; } -}; - -} // anonymous namespace - -class PrefixTest : public testing::Test { - public: - std::shared_ptr OpenDb() { - DB* db; - - options.create_if_missing = true; - options.write_buffer_size = FLAGS_write_buffer_size; - options.max_write_buffer_number = FLAGS_max_write_buffer_number; - options.min_write_buffer_number_to_merge = - FLAGS_min_write_buffer_number_to_merge; - - options.memtable_prefix_bloom_size_ratio = - FLAGS_memtable_prefix_bloom_size_ratio; - options.memtable_huge_page_size = FLAGS_memtable_huge_page_size; - - options.prefix_extractor.reset(NewFixedPrefixTransform(8)); - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - options.allow_concurrent_memtable_write = false; - - Status s = DB::Open(options, kDbName, &db); - EXPECT_OK(s); - return std::shared_ptr(db); - } - - void FirstOption() { option_config_ = kBegin; } - - bool NextOptions(int bucket_count) { - // skip some options - option_config_++; - if (option_config_ < kEnd) { - options.prefix_extractor.reset(NewFixedPrefixTransform(8)); - switch (option_config_) { - case kHashSkipList: - options.memtable_factory.reset( - NewHashSkipListRepFactory(bucket_count, FLAGS_skiplist_height)); - return true; - case kHashLinkList: - options.memtable_factory.reset( - NewHashLinkListRepFactory(bucket_count)); - return true; - case kHashLinkListHugePageTlb: - options.memtable_factory.reset( - NewHashLinkListRepFactory(bucket_count, 2 * 1024 * 1024)); - return true; - case kHashLinkListTriggerSkipList: - options.memtable_factory.reset( - NewHashLinkListRepFactory(bucket_count, 0, 3)); - return true; - default: - return false; - } - } - return false; - } - - PrefixTest() : option_config_(kBegin) { - options.comparator = new TestKeyComparator(); - } - ~PrefixTest() override { delete options.comparator; } - - protected: - enum OptionConfig { - kBegin, - kHashSkipList, - kHashLinkList, - kHashLinkListHugePageTlb, - kHashLinkListTriggerSkipList, - kEnd - }; - int option_config_; - Options options; -}; - -TEST(SamePrefixTest, InDomainTest) { - DB* db; - Options options; - options.create_if_missing = true; - options.prefix_extractor.reset(new SamePrefixTransform("HHKB")); - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.whole_key_filtering = false; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - WriteOptions write_options; - ReadOptions read_options; - { - ASSERT_OK(DestroyDB(kDbName, Options())); - ASSERT_OK(DB::Open(options, kDbName, &db)); - ASSERT_OK(db->Put(write_options, "HHKB pro2", "Mar 24, 2006")); - ASSERT_OK(db->Put(write_options, "HHKB pro2 Type-S", "June 29, 2011")); - ASSERT_OK(db->Put(write_options, "Realforce 87u", "idk")); - ASSERT_OK(db->Flush(FlushOptions())); - std::string result; - auto db_iter = db->NewIterator(ReadOptions()); - - db_iter->Seek("Realforce 87u"); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_OK(db_iter->status()); - ASSERT_EQ(db_iter->key(), "Realforce 87u"); - ASSERT_EQ(db_iter->value(), "idk"); - - delete db_iter; - delete db; - ASSERT_OK(DestroyDB(kDbName, Options())); - } - - { - ASSERT_OK(DB::Open(options, kDbName, &db)); - ASSERT_OK(db->Put(write_options, "pikachu", "1")); - ASSERT_OK(db->Put(write_options, "Meowth", "1")); - ASSERT_OK(db->Put(write_options, "Mewtwo", "idk")); - ASSERT_OK(db->Flush(FlushOptions())); - std::string result; - auto db_iter = db->NewIterator(ReadOptions()); - - db_iter->Seek("Mewtwo"); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_OK(db_iter->status()); - delete db_iter; - delete db; - ASSERT_OK(DestroyDB(kDbName, Options())); - } -} - -TEST_F(PrefixTest, TestResult) { - for (int num_buckets = 1; num_buckets <= 2; num_buckets++) { - FirstOption(); - while (NextOptions(num_buckets)) { - std::cout << "*** Mem table: " << options.memtable_factory->Name() - << " number of buckets: " << num_buckets << std::endl; - ASSERT_OK(DestroyDB(kDbName, Options())); - auto db = OpenDb(); - WriteOptions write_options; - ReadOptions read_options; - - // 1. Insert one row. - Slice v16("v16"); - PutKey(db.get(), write_options, 1, 6, v16); - std::unique_ptr iter(db->NewIterator(read_options)); - SeekIterator(iter.get(), 1, 6); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v16 == iter->value()); - SeekIterator(iter.get(), 1, 5); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v16 == iter->value()); - SeekIterator(iter.get(), 1, 5); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v16 == iter->value()); - iter->Next(); - ASSERT_TRUE(!iter->Valid()); - ASSERT_OK(iter->status()); - - SeekIterator(iter.get(), 2, 0); - ASSERT_TRUE(!iter->Valid()); - ASSERT_OK(iter->status()); - - ASSERT_EQ(v16.ToString(), Get(db.get(), read_options, 1, 6)); - ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 1, 5)); - ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 1, 7)); - ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 0, 6)); - ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 2, 6)); - - // 2. Insert an entry for the same prefix as the last entry in the bucket. - Slice v17("v17"); - PutKey(db.get(), write_options, 1, 7, v17); - iter.reset(db->NewIterator(read_options)); - SeekIterator(iter.get(), 1, 7); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v17 == iter->value()); - - SeekIterator(iter.get(), 1, 6); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v16 == iter->value()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v17 == iter->value()); - iter->Next(); - ASSERT_TRUE(!iter->Valid()); - ASSERT_OK(iter->status()); - - SeekIterator(iter.get(), 2, 0); - ASSERT_TRUE(!iter->Valid()); - ASSERT_OK(iter->status()); - - // 3. Insert an entry for the same prefix as the head of the bucket. - Slice v15("v15"); - PutKey(db.get(), write_options, 1, 5, v15); - iter.reset(db->NewIterator(read_options)); - - SeekIterator(iter.get(), 1, 7); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v17 == iter->value()); - - SeekIterator(iter.get(), 1, 5); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v15 == iter->value()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v16 == iter->value()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v17 == iter->value()); - - SeekIterator(iter.get(), 1, 5); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v15 == iter->value()); - - ASSERT_EQ(v15.ToString(), Get(db.get(), read_options, 1, 5)); - ASSERT_EQ(v16.ToString(), Get(db.get(), read_options, 1, 6)); - ASSERT_EQ(v17.ToString(), Get(db.get(), read_options, 1, 7)); - - // 4. Insert an entry with a larger prefix - Slice v22("v22"); - PutKey(db.get(), write_options, 2, 2, v22); - iter.reset(db->NewIterator(read_options)); - - SeekIterator(iter.get(), 2, 2); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v22 == iter->value()); - SeekIterator(iter.get(), 2, 0); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v22 == iter->value()); - - SeekIterator(iter.get(), 1, 5); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v15 == iter->value()); - - SeekIterator(iter.get(), 1, 7); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v17 == iter->value()); - - // 5. Insert an entry with a smaller prefix - Slice v02("v02"); - PutKey(db.get(), write_options, 0, 2, v02); - iter.reset(db->NewIterator(read_options)); - - SeekIterator(iter.get(), 0, 2); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v02 == iter->value()); - SeekIterator(iter.get(), 0, 0); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v02 == iter->value()); - - SeekIterator(iter.get(), 2, 0); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v22 == iter->value()); - - SeekIterator(iter.get(), 1, 5); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v15 == iter->value()); - - SeekIterator(iter.get(), 1, 7); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v17 == iter->value()); - - // 6. Insert to the beginning and the end of the first prefix - Slice v13("v13"); - Slice v18("v18"); - PutKey(db.get(), write_options, 1, 3, v13); - PutKey(db.get(), write_options, 1, 8, v18); - iter.reset(db->NewIterator(read_options)); - SeekIterator(iter.get(), 1, 7); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v17 == iter->value()); - - SeekIterator(iter.get(), 1, 3); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v13 == iter->value()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v15 == iter->value()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v16 == iter->value()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v17 == iter->value()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v18 == iter->value()); - - SeekIterator(iter.get(), 0, 0); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v02 == iter->value()); - - SeekIterator(iter.get(), 2, 0); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v22 == iter->value()); - - ASSERT_EQ(v22.ToString(), Get(db.get(), read_options, 2, 2)); - ASSERT_EQ(v02.ToString(), Get(db.get(), read_options, 0, 2)); - ASSERT_EQ(v13.ToString(), Get(db.get(), read_options, 1, 3)); - ASSERT_EQ(v15.ToString(), Get(db.get(), read_options, 1, 5)); - ASSERT_EQ(v16.ToString(), Get(db.get(), read_options, 1, 6)); - ASSERT_EQ(v17.ToString(), Get(db.get(), read_options, 1, 7)); - ASSERT_EQ(v18.ToString(), Get(db.get(), read_options, 1, 8)); - } - } -} - -// Show results in prefix -TEST_F(PrefixTest, PrefixValid) { - for (int num_buckets = 1; num_buckets <= 2; num_buckets++) { - FirstOption(); - while (NextOptions(num_buckets)) { - std::cout << "*** Mem table: " << options.memtable_factory->Name() - << " number of buckets: " << num_buckets << std::endl; - ASSERT_OK(DestroyDB(kDbName, Options())); - auto db = OpenDb(); - WriteOptions write_options; - ReadOptions read_options; - - // Insert keys with common prefix and one key with different - Slice v16("v16"); - Slice v17("v17"); - Slice v18("v18"); - Slice v19("v19"); - PutKey(db.get(), write_options, 12345, 6, v16); - PutKey(db.get(), write_options, 12345, 7, v17); - PutKey(db.get(), write_options, 12345, 8, v18); - PutKey(db.get(), write_options, 12345, 9, v19); - PutKey(db.get(), write_options, 12346, 8, v16); - ASSERT_OK(db->Flush(FlushOptions())); - TestKey test_key(12346, 8); - std::string s; - ASSERT_OK(db->Delete(write_options, TestKeyToSlice(s, test_key))); - ASSERT_OK(db->Flush(FlushOptions())); - read_options.prefix_same_as_start = true; - std::unique_ptr iter(db->NewIterator(read_options)); - SeekIterator(iter.get(), 12345, 6); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v16 == iter->value()); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v17 == iter->value()); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v18 == iter->value()); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_TRUE(v19 == iter->value()); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 12346, 8)); - - // Verify seeking past the prefix won't return a result. - SeekIterator(iter.get(), 12345, 10); - ASSERT_TRUE(!iter->Valid()); - ASSERT_OK(iter->status()); - } - } -} - -TEST_F(PrefixTest, DynamicPrefixIterator) { - while (NextOptions(FLAGS_bucket_count)) { - std::cout << "*** Mem table: " << options.memtable_factory->Name() - << std::endl; - ASSERT_OK(DestroyDB(kDbName, Options())); - auto db = OpenDb(); - WriteOptions write_options; - ReadOptions read_options; - - std::vector prefixes; - for (uint64_t i = 0; i < FLAGS_total_prefixes; ++i) { - prefixes.push_back(i); - } - - if (FLAGS_random_prefix) { - RandomShuffle(prefixes.begin(), prefixes.end()); - } - - HistogramImpl hist_put_time; - HistogramImpl hist_put_comparison; - // insert x random prefix, each with y continuous element. - for (auto prefix : prefixes) { - for (uint64_t sorted = 0; sorted < FLAGS_items_per_prefix; sorted++) { - TestKey test_key(prefix, sorted); - - std::string s; - Slice key = TestKeyToSlice(s, test_key); - std::string value(FLAGS_value_size, 0); - - get_perf_context()->Reset(); - StopWatchNano timer(SystemClock::Default().get(), true); - ASSERT_OK(db->Put(write_options, key, value)); - hist_put_time.Add(timer.ElapsedNanos()); - hist_put_comparison.Add(get_perf_context()->user_key_comparison_count); - } - } - - std::cout << "Put key comparison: \n" - << hist_put_comparison.ToString() << "Put time: \n" - << hist_put_time.ToString(); - - // test seek existing keys - HistogramImpl hist_seek_time; - HistogramImpl hist_seek_comparison; - - std::unique_ptr iter(db->NewIterator(read_options)); - - for (auto prefix : prefixes) { - TestKey test_key(prefix, FLAGS_items_per_prefix / 2); - std::string s; - Slice key = TestKeyToSlice(s, test_key); - std::string value = "v" + std::to_string(0); - - get_perf_context()->Reset(); - StopWatchNano timer(SystemClock::Default().get(), true); - auto key_prefix = options.prefix_extractor->Transform(key); - uint64_t total_keys = 0; - for (iter->Seek(key); - iter->Valid() && iter->key().starts_with(key_prefix); iter->Next()) { - if (FLAGS_trigger_deadlock) { - std::cout << "Behold the deadlock!\n"; - db->Delete(write_options, iter->key()); - } - total_keys++; - } - hist_seek_time.Add(timer.ElapsedNanos()); - hist_seek_comparison.Add(get_perf_context()->user_key_comparison_count); - ASSERT_EQ(total_keys, - FLAGS_items_per_prefix - FLAGS_items_per_prefix / 2); - } - - std::cout << "Seek key comparison: \n" - << hist_seek_comparison.ToString() << "Seek time: \n" - << hist_seek_time.ToString(); - - // test non-existing keys - HistogramImpl hist_no_seek_time; - HistogramImpl hist_no_seek_comparison; - - for (auto prefix = FLAGS_total_prefixes; - prefix < FLAGS_total_prefixes + 10000; prefix++) { - TestKey test_key(prefix, 0); - std::string s; - Slice key = TestKeyToSlice(s, test_key); - - get_perf_context()->Reset(); - StopWatchNano timer(SystemClock::Default().get(), true); - iter->Seek(key); - hist_no_seek_time.Add(timer.ElapsedNanos()); - hist_no_seek_comparison.Add( - get_perf_context()->user_key_comparison_count); - ASSERT_TRUE(!iter->Valid()); - ASSERT_OK(iter->status()); - } - - std::cout << "non-existing Seek key comparison: \n" - << hist_no_seek_comparison.ToString() - << "non-existing Seek time: \n" - << hist_no_seek_time.ToString(); - } -} - -TEST_F(PrefixTest, PrefixSeekModePrev) { - // Only for SkipListFactory - options.memtable_factory.reset(new SkipListFactory); - options.merge_operator = MergeOperators::CreatePutOperator(); - options.write_buffer_size = 1024 * 1024; - Random rnd(1); - for (size_t m = 1; m < 100; m++) { - std::cout << "[" + std::to_string(m) + "]" + "*** Mem table: " - << options.memtable_factory->Name() << std::endl; - ASSERT_OK(DestroyDB(kDbName, Options())); - auto db = OpenDb(); - WriteOptions write_options; - ReadOptions read_options; - std::map entry_maps[3], whole_map; - for (uint64_t i = 0; i < 10; i++) { - int div = i % 3 + 1; - for (uint64_t j = 0; j < 10; j++) { - whole_map[TestKey(i, j)] = entry_maps[rnd.Uniform(div)][TestKey(i, j)] = - 'v' + std::to_string(i) + std::to_string(j); - } - } - - std::map type_map; - for (size_t i = 0; i < 3; i++) { - for (auto& kv : entry_maps[i]) { - if (rnd.OneIn(3)) { - PutKey(db.get(), write_options, kv.first, kv.second); - type_map[kv.first] = "value"; - } else { - MergeKey(db.get(), write_options, kv.first, kv.second); - type_map[kv.first] = "merge"; - } - } - if (i < 2) { - ASSERT_OK(db->Flush(FlushOptions())); - } - } - - for (size_t i = 0; i < 2; i++) { - for (auto& kv : entry_maps[i]) { - if (rnd.OneIn(10)) { - whole_map.erase(kv.first); - DeleteKey(db.get(), write_options, kv.first); - entry_maps[2][kv.first] = "delete"; - } - } - } - - if (FLAGS_enable_print) { - for (size_t i = 0; i < 3; i++) { - for (auto& kv : entry_maps[i]) { - std::cout << "[" << i << "]" << kv.first.prefix << kv.first.sorted - << " " << kv.second + " " + type_map[kv.first] << std::endl; - } - } - } - - std::unique_ptr iter(db->NewIterator(read_options)); - for (uint64_t prefix = 0; prefix < 10; prefix++) { - uint64_t start_suffix = rnd.Uniform(9); - SeekIterator(iter.get(), prefix, start_suffix); - auto it = whole_map.find(TestKey(prefix, start_suffix)); - if (it == whole_map.end()) { - continue; - } - ASSERT_NE(it, whole_map.end()); - ASSERT_TRUE(iter->Valid()); - if (FLAGS_enable_print) { - std::cout << "round " << prefix - << " iter: " << SliceToTestKey(iter->key()).prefix - << SliceToTestKey(iter->key()).sorted - << " | map: " << it->first.prefix << it->first.sorted << " | " - << iter->value().ToString() << " " << it->second << std::endl; - } - ASSERT_EQ(iter->value(), it->second); - uint64_t stored_prefix = prefix; - for (size_t k = 0; k < 9; k++) { - if (rnd.OneIn(2) || it == whole_map.begin()) { - iter->Next(); - ++it; - if (FLAGS_enable_print) { - std::cout << "Next >> "; - } - } else { - iter->Prev(); - it--; - if (FLAGS_enable_print) { - std::cout << "Prev >> "; - } - } - if (!iter->Valid() || - SliceToTestKey(iter->key()).prefix != stored_prefix) { - break; - } - ASSERT_OK(iter->status()); - stored_prefix = SliceToTestKey(iter->key()).prefix; - ASSERT_TRUE(iter->Valid()); - ASSERT_NE(it, whole_map.end()); - ASSERT_EQ(iter->value(), it->second); - if (FLAGS_enable_print) { - std::cout << "iter: " << SliceToTestKey(iter->key()).prefix - << SliceToTestKey(iter->key()).sorted - << " | map: " << it->first.prefix << it->first.sorted - << " | " << iter->value().ToString() << " " << it->second - << std::endl; - } - } - } - } -} - -TEST_F(PrefixTest, PrefixSeekModePrev2) { - // Only for SkipListFactory - // test the case - // iter1 iter2 - // | prefix | suffix | | prefix | suffix | - // | 1 | 1 | | 1 | 2 | - // | 1 | 3 | | 1 | 4 | - // | 2 | 1 | | 3 | 3 | - // | 2 | 2 | | 3 | 4 | - // after seek(15), iter1 will be at 21 and iter2 will be 33. - // Then if call Prev() in prefix mode where SeekForPrev(21) gets called, - // iter2 should turn to invalid state because of bloom filter. - options.memtable_factory.reset(new SkipListFactory); - options.write_buffer_size = 1024 * 1024; - std::string v13("v13"); - ASSERT_OK(DestroyDB(kDbName, Options())); - auto db = OpenDb(); - WriteOptions write_options; - ReadOptions read_options; - PutKey(db.get(), write_options, TestKey(1, 2), "v12"); - PutKey(db.get(), write_options, TestKey(1, 4), "v14"); - PutKey(db.get(), write_options, TestKey(3, 3), "v33"); - PutKey(db.get(), write_options, TestKey(3, 4), "v34"); - ASSERT_OK(db->Flush(FlushOptions())); - ASSERT_OK( - static_cast_with_check(db.get())->TEST_WaitForFlushMemTable()); - PutKey(db.get(), write_options, TestKey(1, 1), "v11"); - PutKey(db.get(), write_options, TestKey(1, 3), "v13"); - PutKey(db.get(), write_options, TestKey(2, 1), "v21"); - PutKey(db.get(), write_options, TestKey(2, 2), "v22"); - ASSERT_OK(db->Flush(FlushOptions())); - ASSERT_OK( - static_cast_with_check(db.get())->TEST_WaitForFlushMemTable()); - std::unique_ptr iter(db->NewIterator(read_options)); - SeekIterator(iter.get(), 1, 5); - iter->Prev(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->value(), v13); -} - -TEST_F(PrefixTest, PrefixSeekModePrev3) { - // Only for SkipListFactory - // test SeekToLast() with iterate_upper_bound_ in prefix_seek_mode - options.memtable_factory.reset(new SkipListFactory); - options.write_buffer_size = 1024 * 1024; - std::string v14("v14"); - TestKey upper_bound_key = TestKey(1, 5); - std::string s; - Slice upper_bound = TestKeyToSlice(s, upper_bound_key); - - { - ASSERT_OK(DestroyDB(kDbName, Options())); - auto db = OpenDb(); - WriteOptions write_options; - ReadOptions read_options; - read_options.iterate_upper_bound = &upper_bound; - PutKey(db.get(), write_options, TestKey(1, 2), "v12"); - PutKey(db.get(), write_options, TestKey(1, 4), "v14"); - ASSERT_OK(db->Flush(FlushOptions())); - ASSERT_OK( - static_cast_with_check(db.get())->TEST_WaitForFlushMemTable()); - PutKey(db.get(), write_options, TestKey(1, 1), "v11"); - PutKey(db.get(), write_options, TestKey(1, 3), "v13"); - PutKey(db.get(), write_options, TestKey(2, 1), "v21"); - PutKey(db.get(), write_options, TestKey(2, 2), "v22"); - ASSERT_OK(db->Flush(FlushOptions())); - ASSERT_OK( - static_cast_with_check(db.get())->TEST_WaitForFlushMemTable()); - std::unique_ptr iter(db->NewIterator(read_options)); - iter->SeekToLast(); - ASSERT_EQ(iter->value(), v14); - } - { - ASSERT_OK(DestroyDB(kDbName, Options())); - auto db = OpenDb(); - WriteOptions write_options; - ReadOptions read_options; - read_options.iterate_upper_bound = &upper_bound; - PutKey(db.get(), write_options, TestKey(1, 2), "v12"); - PutKey(db.get(), write_options, TestKey(1, 4), "v14"); - PutKey(db.get(), write_options, TestKey(3, 3), "v33"); - PutKey(db.get(), write_options, TestKey(3, 4), "v34"); - ASSERT_OK(db->Flush(FlushOptions())); - ASSERT_OK( - static_cast_with_check(db.get())->TEST_WaitForFlushMemTable()); - PutKey(db.get(), write_options, TestKey(1, 1), "v11"); - PutKey(db.get(), write_options, TestKey(1, 3), "v13"); - ASSERT_OK(db->Flush(FlushOptions())); - ASSERT_OK( - static_cast_with_check(db.get())->TEST_WaitForFlushMemTable()); - std::unique_ptr iter(db->NewIterator(read_options)); - iter->SeekToLast(); - ASSERT_EQ(iter->value(), v14); - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - ParseCommandLineFlags(&argc, &argv, true); - return RUN_ALL_TESTS(); -} - -#endif // GFLAGS - diff --git a/db/range_del_aggregator_test.cc b/db/range_del_aggregator_test.cc deleted file mode 100644 index 89391c924..000000000 --- a/db/range_del_aggregator_test.cc +++ /dev/null @@ -1,713 +0,0 @@ -// Copyright (c) 2018-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db/range_del_aggregator.h" - -#include -#include -#include - -#include "db/db_test_util.h" -#include "db/dbformat.h" -#include "db/range_tombstone_fragmenter.h" -#include "test_util/testutil.h" -#include "util/vector_iterator.h" - -namespace ROCKSDB_NAMESPACE { - -class RangeDelAggregatorTest : public testing::Test {}; - -namespace { - -static auto bytewise_icmp = InternalKeyComparator(BytewiseComparator()); - -std::unique_ptr MakeRangeDelIter( - const std::vector& range_dels) { - std::vector keys, values; - for (const auto& range_del : range_dels) { - auto key_and_value = range_del.Serialize(); - keys.push_back(key_and_value.first.Encode().ToString()); - values.push_back(key_and_value.second.ToString()); - } - return std::unique_ptr( - new VectorIterator(keys, values, &bytewise_icmp)); -} - -std::vector> -MakeFragmentedTombstoneLists( - const std::vector>& range_dels_list) { - std::vector> fragment_lists; - for (const auto& range_dels : range_dels_list) { - auto range_del_iter = MakeRangeDelIter(range_dels); - fragment_lists.emplace_back(new FragmentedRangeTombstoneList( - std::move(range_del_iter), bytewise_icmp)); - } - return fragment_lists; -} - -struct TruncatedIterScanTestCase { - ParsedInternalKey start; - ParsedInternalKey end; - SequenceNumber seq; -}; - -struct TruncatedIterSeekTestCase { - Slice target; - ParsedInternalKey start; - ParsedInternalKey end; - SequenceNumber seq; - bool invalid; -}; - -struct ShouldDeleteTestCase { - ParsedInternalKey lookup_key; - bool result; -}; - -struct IsRangeOverlappedTestCase { - Slice start; - Slice end; - bool result; -}; - -ParsedInternalKey UncutEndpoint(const Slice& s) { - return ParsedInternalKey(s, kMaxSequenceNumber, kTypeRangeDeletion); -} - -ParsedInternalKey InternalValue(const Slice& key, SequenceNumber seq, - ValueType type = kTypeValue) { - return ParsedInternalKey(key, seq, type); -} - -void VerifyIterator( - TruncatedRangeDelIterator* iter, const InternalKeyComparator& icmp, - const std::vector& expected_range_dels) { - // Test forward iteration. - iter->SeekToFirst(); - for (size_t i = 0; i < expected_range_dels.size(); i++, iter->Next()) { - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(0, icmp.Compare(iter->start_key(), expected_range_dels[i].start)); - EXPECT_EQ(0, icmp.Compare(iter->end_key(), expected_range_dels[i].end)); - EXPECT_EQ(expected_range_dels[i].seq, iter->seq()); - } - EXPECT_FALSE(iter->Valid()); - - // Test reverse iteration. - iter->SeekToLast(); - std::vector reverse_expected_range_dels( - expected_range_dels.rbegin(), expected_range_dels.rend()); - for (size_t i = 0; i < reverse_expected_range_dels.size(); - i++, iter->Prev()) { - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(0, icmp.Compare(iter->start_key(), - reverse_expected_range_dels[i].start)); - EXPECT_EQ( - 0, icmp.Compare(iter->end_key(), reverse_expected_range_dels[i].end)); - EXPECT_EQ(reverse_expected_range_dels[i].seq, iter->seq()); - } - EXPECT_FALSE(iter->Valid()); -} - -void VerifySeek(TruncatedRangeDelIterator* iter, - const InternalKeyComparator& icmp, - const std::vector& test_cases) { - for (const auto& test_case : test_cases) { - iter->Seek(test_case.target); - if (test_case.invalid) { - ASSERT_FALSE(iter->Valid()); - } else { - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(0, icmp.Compare(iter->start_key(), test_case.start)); - EXPECT_EQ(0, icmp.Compare(iter->end_key(), test_case.end)); - EXPECT_EQ(test_case.seq, iter->seq()); - } - } -} - -void VerifySeekForPrev( - TruncatedRangeDelIterator* iter, const InternalKeyComparator& icmp, - const std::vector& test_cases) { - for (const auto& test_case : test_cases) { - iter->SeekForPrev(test_case.target); - if (test_case.invalid) { - ASSERT_FALSE(iter->Valid()); - } else { - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(0, icmp.Compare(iter->start_key(), test_case.start)); - EXPECT_EQ(0, icmp.Compare(iter->end_key(), test_case.end)); - EXPECT_EQ(test_case.seq, iter->seq()); - } - } -} - -void VerifyShouldDelete(RangeDelAggregator* range_del_agg, - const std::vector& test_cases) { - for (const auto& test_case : test_cases) { - EXPECT_EQ( - test_case.result, - range_del_agg->ShouldDelete( - test_case.lookup_key, RangeDelPositioningMode::kForwardTraversal)); - } - for (auto it = test_cases.rbegin(); it != test_cases.rend(); ++it) { - const auto& test_case = *it; - EXPECT_EQ( - test_case.result, - range_del_agg->ShouldDelete( - test_case.lookup_key, RangeDelPositioningMode::kBackwardTraversal)); - } -} - -void VerifyIsRangeOverlapped( - ReadRangeDelAggregator* range_del_agg, - const std::vector& test_cases) { - for (const auto& test_case : test_cases) { - EXPECT_EQ(test_case.result, - range_del_agg->IsRangeOverlapped(test_case.start, test_case.end)); - } -} - -void CheckIterPosition(const RangeTombstone& tombstone, - const FragmentedRangeTombstoneIterator* iter) { - // Test InternalIterator interface. - EXPECT_EQ(tombstone.start_key_, ExtractUserKey(iter->key())); - EXPECT_EQ(tombstone.end_key_, iter->value()); - EXPECT_EQ(tombstone.seq_, iter->seq()); - - // Test FragmentedRangeTombstoneIterator interface. - EXPECT_EQ(tombstone.start_key_, iter->start_key()); - EXPECT_EQ(tombstone.end_key_, iter->end_key()); - EXPECT_EQ(tombstone.seq_, GetInternalKeySeqno(iter->key())); -} - -void VerifyFragmentedRangeDels( - FragmentedRangeTombstoneIterator* iter, - const std::vector& expected_tombstones) { - iter->SeekToFirst(); - for (size_t i = 0; i < expected_tombstones.size(); i++, iter->Next()) { - ASSERT_TRUE(iter->Valid()); - CheckIterPosition(expected_tombstones[i], iter); - } - EXPECT_FALSE(iter->Valid()); -} - -} // anonymous namespace - -TEST_F(RangeDelAggregatorTest, EmptyTruncatedIter) { - auto range_del_iter = MakeRangeDelIter({}); - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, - kMaxSequenceNumber)); - - TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr, - nullptr); - - iter.SeekToFirst(); - ASSERT_FALSE(iter.Valid()); - - iter.SeekToLast(); - ASSERT_FALSE(iter.Valid()); -} - -TEST_F(RangeDelAggregatorTest, UntruncatedIter) { - auto range_del_iter = - MakeRangeDelIter({{"a", "e", 10}, {"e", "g", 8}, {"j", "n", 4}}); - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, - kMaxSequenceNumber)); - - TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr, - nullptr); - - VerifyIterator( - &iter, bytewise_icmp, - {{InternalValue("a", 10, kTypeRangeDeletion), UncutEndpoint("e"), 10}, - {InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8}, - {InternalValue("j", 4, kTypeRangeDeletion), UncutEndpoint("n"), 4}}); - - VerifySeek( - &iter, bytewise_icmp, - {{"d", InternalValue("a", 10, kTypeRangeDeletion), UncutEndpoint("e"), - 10}, - {"e", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8}, - {"ia", InternalValue("j", 4, kTypeRangeDeletion), UncutEndpoint("n"), 4}, - {"n", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0, - true /* invalid */}, - {"", InternalValue("a", 10, kTypeRangeDeletion), UncutEndpoint("e"), - 10}}); - - VerifySeekForPrev( - &iter, bytewise_icmp, - {{"d", InternalValue("a", 10, kTypeRangeDeletion), UncutEndpoint("e"), - 10}, - {"e", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8}, - {"ia", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8}, - {"n", InternalValue("j", 4, kTypeRangeDeletion), UncutEndpoint("n"), 4}, - {"", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0, - true /* invalid */}}); -} - -TEST_F(RangeDelAggregatorTest, UntruncatedIterWithSnapshot) { - auto range_del_iter = - MakeRangeDelIter({{"a", "e", 10}, {"e", "g", 8}, {"j", "n", 4}}); - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, - 9 /* snapshot */)); - - TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr, - nullptr); - - VerifyIterator( - &iter, bytewise_icmp, - {{InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8}, - {InternalValue("j", 4, kTypeRangeDeletion), UncutEndpoint("n"), 4}}); - - VerifySeek( - &iter, bytewise_icmp, - {{"d", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8}, - {"e", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8}, - {"ia", InternalValue("j", 4, kTypeRangeDeletion), UncutEndpoint("n"), 4}, - {"n", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0, - true /* invalid */}, - {"", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8}}); - - VerifySeekForPrev( - &iter, bytewise_icmp, - {{"d", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0, - true /* invalid */}, - {"e", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8}, - {"ia", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8}, - {"n", InternalValue("j", 4, kTypeRangeDeletion), UncutEndpoint("n"), 4}, - {"", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0, - true /* invalid */}}); -} - -TEST_F(RangeDelAggregatorTest, TruncatedIterPartiallyCutTombstones) { - auto range_del_iter = - MakeRangeDelIter({{"a", "e", 10}, {"e", "g", 8}, {"j", "n", 4}}); - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, - kMaxSequenceNumber)); - - InternalKey smallest("d", 7, kTypeValue); - InternalKey largest("m", 9, kTypeValue); - TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, - &smallest, &largest); - - VerifyIterator( - &iter, bytewise_icmp, - {{InternalValue("d", 7, kTypeMaxValid), UncutEndpoint("e"), 10}, - {InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8}, - {InternalValue("j", 4, kTypeRangeDeletion), - InternalValue("m", 8, kTypeMaxValid), 4}}); - - VerifySeek( - &iter, bytewise_icmp, - {{"d", InternalValue("d", 7, kTypeMaxValid), UncutEndpoint("e"), 10}, - {"e", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8}, - {"ia", InternalValue("j", 4, kTypeRangeDeletion), - InternalValue("m", 8, kTypeMaxValid), 4, false /* invalid */}, - {"n", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0, - true /* invalid */}, - {"", InternalValue("d", 7, kTypeMaxValid), UncutEndpoint("e"), 10}}); - - VerifySeekForPrev( - &iter, bytewise_icmp, - {{"d", InternalValue("d", 7, kTypeMaxValid), UncutEndpoint("e"), 10}, - {"e", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8}, - {"ia", InternalValue("e", 8, kTypeRangeDeletion), UncutEndpoint("g"), 8}, - {"n", InternalValue("j", 4, kTypeRangeDeletion), - InternalValue("m", 8, kTypeMaxValid), 4, false /* invalid */}, - {"", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0, - true /* invalid */}}); -} - -TEST_F(RangeDelAggregatorTest, TruncatedIterFullyCutTombstones) { - auto range_del_iter = - MakeRangeDelIter({{"a", "e", 10}, {"e", "g", 8}, {"j", "n", 4}}); - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, - kMaxSequenceNumber)); - - InternalKey smallest("f", 7, kTypeValue); - InternalKey largest("i", 9, kTypeValue); - TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, - &smallest, &largest); - - VerifyIterator( - &iter, bytewise_icmp, - {{InternalValue("f", 7, kTypeMaxValid), UncutEndpoint("g"), 8}}); - - VerifySeek( - &iter, bytewise_icmp, - {{"d", InternalValue("f", 7, kTypeMaxValid), UncutEndpoint("g"), 8}, - {"f", InternalValue("f", 7, kTypeMaxValid), UncutEndpoint("g"), 8}, - {"j", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0, - true /* invalid */}}); - - VerifySeekForPrev( - &iter, bytewise_icmp, - {{"d", InternalValue("", 0, kTypeRangeDeletion), UncutEndpoint(""), 0, - true /* invalid */}, - {"f", InternalValue("f", 7, kTypeMaxValid), UncutEndpoint("g"), 8}, - {"j", InternalValue("f", 7, kTypeMaxValid), UncutEndpoint("g"), 8}}); -} - -TEST_F(RangeDelAggregatorTest, SingleIterInAggregator) { - auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"c", "g", 8}}); - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, - kMaxSequenceNumber)); - - ReadRangeDelAggregator range_del_agg(&bytewise_icmp, kMaxSequenceNumber); - range_del_agg.AddTombstones(std::move(input_iter)); - - VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 19), false}, - {InternalValue("b", 9), true}, - {InternalValue("d", 9), true}, - {InternalValue("e", 7), true}, - {InternalValue("g", 7), false}}); - - VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false}, - {"_", "a", true}, - {"a", "c", true}, - {"d", "f", true}, - {"g", "l", false}}); -} - -TEST_F(RangeDelAggregatorTest, MultipleItersInAggregator) { - auto fragment_lists = MakeFragmentedTombstoneLists( - {{{"a", "e", 10}, {"c", "g", 8}}, - {{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}}); - - ReadRangeDelAggregator range_del_agg(&bytewise_icmp, kMaxSequenceNumber); - for (const auto& fragment_list : fragment_lists) { - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, - kMaxSequenceNumber)); - range_del_agg.AddTombstones(std::move(input_iter)); - } - - VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 19), true}, - {InternalValue("b", 19), false}, - {InternalValue("b", 9), true}, - {InternalValue("d", 9), true}, - {InternalValue("e", 7), true}, - {InternalValue("g", 7), false}, - {InternalValue("h", 24), true}, - {InternalValue("i", 24), false}, - {InternalValue("ii", 14), true}, - {InternalValue("j", 14), false}}); - - VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false}, - {"_", "a", true}, - {"a", "c", true}, - {"d", "f", true}, - {"g", "l", true}, - {"x", "y", false}}); -} - -TEST_F(RangeDelAggregatorTest, MultipleItersInAggregatorWithUpperBound) { - auto fragment_lists = MakeFragmentedTombstoneLists( - {{{"a", "e", 10}, {"c", "g", 8}}, - {{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}}); - - ReadRangeDelAggregator range_del_agg(&bytewise_icmp, 19); - for (const auto& fragment_list : fragment_lists) { - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, - 19 /* snapshot */)); - range_del_agg.AddTombstones(std::move(input_iter)); - } - - VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 19), false}, - {InternalValue("a", 9), true}, - {InternalValue("b", 9), true}, - {InternalValue("d", 9), true}, - {InternalValue("e", 7), true}, - {InternalValue("g", 7), false}, - {InternalValue("h", 24), false}, - {InternalValue("i", 24), false}, - {InternalValue("ii", 14), true}, - {InternalValue("j", 14), false}}); - - VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false}, - {"_", "a", true}, - {"a", "c", true}, - {"d", "f", true}, - {"g", "l", true}, - {"x", "y", false}}); -} - -TEST_F(RangeDelAggregatorTest, MultipleTruncatedItersInAggregator) { - auto fragment_lists = MakeFragmentedTombstoneLists( - {{{"a", "z", 10}}, {{"a", "z", 10}}, {{"a", "z", 10}}}); - std::vector> iter_bounds = { - {InternalKey("a", 4, kTypeValue), - InternalKey("m", kMaxSequenceNumber, kTypeRangeDeletion)}, - {InternalKey("m", 20, kTypeValue), - InternalKey("x", kMaxSequenceNumber, kTypeRangeDeletion)}, - {InternalKey("x", 5, kTypeValue), InternalKey("zz", 30, kTypeValue)}}; - - ReadRangeDelAggregator range_del_agg(&bytewise_icmp, 19); - for (size_t i = 0; i < fragment_lists.size(); i++) { - const auto& fragment_list = fragment_lists[i]; - const auto& bounds = iter_bounds[i]; - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, - 19 /* snapshot */)); - range_del_agg.AddTombstones(std::move(input_iter), &bounds.first, - &bounds.second); - } - - VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 10), false}, - {InternalValue("a", 9), false}, - {InternalValue("a", 4), true}, - {InternalValue("m", 10), false}, - {InternalValue("m", 9), true}, - {InternalValue("x", 10), false}, - {InternalValue("x", 9), false}, - {InternalValue("x", 5), true}, - {InternalValue("z", 9), false}}); - - VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false}, - {"_", "a", true}, - {"a", "n", true}, - {"l", "x", true}, - {"w", "z", true}, - {"zzz", "zz", false}, - {"zz", "zzz", false}}); -} - -TEST_F(RangeDelAggregatorTest, MultipleTruncatedItersInAggregatorSameLevel) { - auto fragment_lists = MakeFragmentedTombstoneLists( - {{{"a", "z", 10}}, {{"a", "z", 10}}, {{"a", "z", 10}}}); - std::vector> iter_bounds = { - {InternalKey("a", 4, kTypeValue), - InternalKey("m", kMaxSequenceNumber, kTypeRangeDeletion)}, - {InternalKey("m", 20, kTypeValue), - InternalKey("x", kMaxSequenceNumber, kTypeRangeDeletion)}, - {InternalKey("x", 5, kTypeValue), InternalKey("zz", 30, kTypeValue)}}; - - ReadRangeDelAggregator range_del_agg(&bytewise_icmp, 19); - - auto add_iter_to_agg = [&](size_t i) { - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(fragment_lists[i].get(), - bytewise_icmp, 19 /* snapshot */)); - range_del_agg.AddTombstones(std::move(input_iter), &iter_bounds[i].first, - &iter_bounds[i].second); - }; - - add_iter_to_agg(0); - VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 10), false}, - {InternalValue("a", 9), false}, - {InternalValue("a", 4), true}}); - - add_iter_to_agg(1); - VerifyShouldDelete(&range_del_agg, {{InternalValue("m", 10), false}, - {InternalValue("m", 9), true}}); - - add_iter_to_agg(2); - VerifyShouldDelete(&range_del_agg, {{InternalValue("x", 10), false}, - {InternalValue("x", 9), false}, - {InternalValue("x", 5), true}, - {InternalValue("z", 9), false}}); - - VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false}, - {"_", "a", true}, - {"a", "n", true}, - {"l", "x", true}, - {"w", "z", true}, - {"zzz", "zz", false}, - {"zz", "zzz", false}}); -} - -TEST_F(RangeDelAggregatorTest, CompactionAggregatorNoSnapshots) { - auto fragment_lists = MakeFragmentedTombstoneLists( - {{{"a", "e", 10}, {"c", "g", 8}}, - {{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}}); - - std::vector snapshots; - CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots); - for (const auto& fragment_list : fragment_lists) { - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, - kMaxSequenceNumber)); - range_del_agg.AddTombstones(std::move(input_iter)); - } - - VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 19), true}, - {InternalValue("b", 19), false}, - {InternalValue("b", 9), true}, - {InternalValue("d", 9), true}, - {InternalValue("e", 7), true}, - {InternalValue("g", 7), false}, - {InternalValue("h", 24), true}, - {InternalValue("i", 24), false}, - {InternalValue("ii", 14), true}, - {InternalValue("j", 14), false}}); - - auto range_del_compaction_iter = range_del_agg.NewIterator(); - VerifyFragmentedRangeDels(range_del_compaction_iter.get(), {{"a", "b", 20}, - {"b", "c", 10}, - {"c", "e", 10}, - {"e", "g", 8}, - {"h", "i", 25}, - {"ii", "j", 15}}); -} - -TEST_F(RangeDelAggregatorTest, CompactionAggregatorWithSnapshots) { - auto fragment_lists = MakeFragmentedTombstoneLists( - {{{"a", "e", 10}, {"c", "g", 8}}, - {{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}}); - - std::vector snapshots{9, 19}; - CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots); - for (const auto& fragment_list : fragment_lists) { - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, - kMaxSequenceNumber)); - range_del_agg.AddTombstones(std::move(input_iter)); - } - - VerifyShouldDelete( - &range_del_agg, - { - {InternalValue("a", 19), false}, // [10, 19] - {InternalValue("a", 9), false}, // [0, 9] - {InternalValue("b", 9), false}, // [0, 9] - {InternalValue("d", 9), false}, // [0, 9] - {InternalValue("d", 7), true}, // [0, 9] - {InternalValue("e", 7), true}, // [0, 9] - {InternalValue("g", 7), false}, // [0, 9] - {InternalValue("h", 24), true}, // [20, kMaxSequenceNumber] - {InternalValue("i", 24), false}, // [20, kMaxSequenceNumber] - {InternalValue("ii", 14), true}, // [10, 19] - {InternalValue("j", 14), false} // [10, 19] - }); - - auto range_del_compaction_iter = range_del_agg.NewIterator(); - VerifyFragmentedRangeDels(range_del_compaction_iter.get(), {{"a", "b", 20}, - {"a", "b", 10}, - {"b", "c", 10}, - {"c", "e", 10}, - {"c", "e", 8}, - {"e", "g", 8}, - {"h", "i", 25}, - {"ii", "j", 15}}); -} - -TEST_F(RangeDelAggregatorTest, CompactionAggregatorEmptyIteratorLeft) { - auto fragment_lists = MakeFragmentedTombstoneLists( - {{{"a", "e", 10}, {"c", "g", 8}}, - {{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}}); - - std::vector snapshots{9, 19}; - CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots); - for (const auto& fragment_list : fragment_lists) { - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, - kMaxSequenceNumber)); - range_del_agg.AddTombstones(std::move(input_iter)); - } - - Slice start("_"); - Slice end("__"); -} - -TEST_F(RangeDelAggregatorTest, CompactionAggregatorEmptyIteratorRight) { - auto fragment_lists = MakeFragmentedTombstoneLists( - {{{"a", "e", 10}, {"c", "g", 8}}, - {{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}}); - - std::vector snapshots{9, 19}; - CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots); - for (const auto& fragment_list : fragment_lists) { - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, - kMaxSequenceNumber)); - range_del_agg.AddTombstones(std::move(input_iter)); - } - - InternalKey start_buf("p", 0, kTypeRangeDeletion); - InternalKey end_buf("q", 0, kTypeRangeDeletion); - Slice start = start_buf.Encode(); - Slice end = end_buf.Encode(); - auto range_del_compaction_iter = range_del_agg.NewIterator(&start, &end); - VerifyFragmentedRangeDels(range_del_compaction_iter.get(), {}); -} - -TEST_F(RangeDelAggregatorTest, CompactionAggregatorBoundedIterator) { - auto fragment_lists = MakeFragmentedTombstoneLists( - {{{"a", "e", 10}, {"c", "g", 8}}, - {{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}}); - - std::vector snapshots{9, 19}; - CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots); - for (const auto& fragment_list : fragment_lists) { - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, - kMaxSequenceNumber)); - range_del_agg.AddTombstones(std::move(input_iter)); - } - - InternalKey start_buf("bb", 0, kTypeRangeDeletion); - InternalKey end_buf("e", 9, kTypeRangeDeletion); - Slice start = start_buf.Encode(); - Slice end = end_buf.Encode(); - auto range_del_compaction_iter = range_del_agg.NewIterator(&start, &end); - VerifyFragmentedRangeDels(range_del_compaction_iter.get(), - {{"a", "c", 10}, {"c", "e", 10}, {"c", "e", 8}}); -} - -TEST_F(RangeDelAggregatorTest, - CompactionAggregatorBoundedIteratorExtraFragments) { - auto fragment_lists = MakeFragmentedTombstoneLists( - {{{"a", "d", 10}, {"c", "g", 8}}, - {{"b", "c", 20}, {"d", "f", 30}, {"h", "i", 25}, {"ii", "j", 15}}}); - - std::vector snapshots{9, 19}; - CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots); - for (const auto& fragment_list : fragment_lists) { - std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, - kMaxSequenceNumber)); - range_del_agg.AddTombstones(std::move(input_iter)); - } - - InternalKey start_buf("bb", 0, kTypeRangeDeletion); - InternalKey end_buf("e", 0, kTypeRangeDeletion); - Slice start = start_buf.Encode(); - Slice end = end_buf.Encode(); - auto range_del_compaction_iter = range_del_agg.NewIterator(&start, &end); - VerifyFragmentedRangeDels(range_del_compaction_iter.get(), {{"a", "b", 10}, - {"b", "c", 20}, - {"b", "c", 10}, - {"c", "d", 10}, - {"c", "d", 8}, - {"d", "f", 30}, - {"d", "f", 8}, - {"f", "g", 8}}); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/range_tombstone_fragmenter_test.cc b/db/range_tombstone_fragmenter_test.cc deleted file mode 100644 index eee2ca2ca..000000000 --- a/db/range_tombstone_fragmenter_test.cc +++ /dev/null @@ -1,555 +0,0 @@ -// Copyright (c) 2018-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db/range_tombstone_fragmenter.h" - -#include "db/db_test_util.h" -#include "db/dbformat.h" -#include "rocksdb/comparator.h" -#include "test_util/testutil.h" -#include "util/vector_iterator.h" - -namespace ROCKSDB_NAMESPACE { - -class RangeTombstoneFragmenterTest : public testing::Test {}; - -namespace { - -static auto bytewise_icmp = InternalKeyComparator(BytewiseComparator()); - -std::unique_ptr MakeRangeDelIter( - const std::vector& range_dels) { - std::vector keys, values; - for (const auto& range_del : range_dels) { - auto key_and_value = range_del.Serialize(); - keys.push_back(key_and_value.first.Encode().ToString()); - values.push_back(key_and_value.second.ToString()); - } - return std::unique_ptr( - new VectorIterator(keys, values, &bytewise_icmp)); -} - -void CheckIterPosition(const RangeTombstone& tombstone, - const FragmentedRangeTombstoneIterator* iter) { - // Test InternalIterator interface. - EXPECT_EQ(tombstone.start_key_, ExtractUserKey(iter->key())); - EXPECT_EQ(tombstone.end_key_, iter->value()); - EXPECT_EQ(tombstone.seq_, iter->seq()); - - // Test FragmentedRangeTombstoneIterator interface. - EXPECT_EQ(tombstone.start_key_, iter->start_key()); - EXPECT_EQ(tombstone.end_key_, iter->end_key()); - EXPECT_EQ(tombstone.seq_, GetInternalKeySeqno(iter->key())); -} - -void VerifyFragmentedRangeDels( - FragmentedRangeTombstoneIterator* iter, - const std::vector& expected_tombstones) { - iter->SeekToFirst(); - for (size_t i = 0; i < expected_tombstones.size(); i++, iter->Next()) { - ASSERT_TRUE(iter->Valid()); - CheckIterPosition(expected_tombstones[i], iter); - } - EXPECT_FALSE(iter->Valid()); -} - -void VerifyVisibleTombstones( - FragmentedRangeTombstoneIterator* iter, - const std::vector& expected_tombstones) { - iter->SeekToTopFirst(); - for (size_t i = 0; i < expected_tombstones.size(); i++, iter->TopNext()) { - ASSERT_TRUE(iter->Valid()); - CheckIterPosition(expected_tombstones[i], iter); - } - EXPECT_FALSE(iter->Valid()); -} - -struct SeekTestCase { - Slice seek_target; - RangeTombstone expected_position; - bool out_of_range; -}; - -void VerifySeek(FragmentedRangeTombstoneIterator* iter, - const std::vector& cases) { - for (const auto& testcase : cases) { - iter->Seek(testcase.seek_target); - if (testcase.out_of_range) { - ASSERT_FALSE(iter->Valid()); - } else { - ASSERT_TRUE(iter->Valid()); - CheckIterPosition(testcase.expected_position, iter); - } - } -} - -void VerifySeekForPrev(FragmentedRangeTombstoneIterator* iter, - const std::vector& cases) { - for (const auto& testcase : cases) { - iter->SeekForPrev(testcase.seek_target); - if (testcase.out_of_range) { - ASSERT_FALSE(iter->Valid()); - } else { - ASSERT_TRUE(iter->Valid()); - CheckIterPosition(testcase.expected_position, iter); - } - } -} - -struct MaxCoveringTombstoneSeqnumTestCase { - Slice user_key; - SequenceNumber result; -}; - -void VerifyMaxCoveringTombstoneSeqnum( - FragmentedRangeTombstoneIterator* iter, - const std::vector& cases) { - for (const auto& testcase : cases) { - EXPECT_EQ(testcase.result, - iter->MaxCoveringTombstoneSeqnum(testcase.user_key)); - } -} - -} // anonymous namespace - -TEST_F(RangeTombstoneFragmenterTest, NonOverlappingTombstones) { - auto range_del_iter = MakeRangeDelIter({{"a", "b", 10}, {"c", "d", 5}}); - - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, - kMaxSequenceNumber); - ASSERT_EQ(0, iter.lower_bound()); - ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); - VerifyFragmentedRangeDels(&iter, {{"a", "b", 10}, {"c", "d", 5}}); - VerifyMaxCoveringTombstoneSeqnum(&iter, - {{"", 0}, {"a", 10}, {"b", 0}, {"c", 5}}); -} - -TEST_F(RangeTombstoneFragmenterTest, OverlappingTombstones) { - auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"c", "g", 15}}); - - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, - kMaxSequenceNumber); - ASSERT_EQ(0, iter.lower_bound()); - ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); - VerifyFragmentedRangeDels( - &iter, {{"a", "c", 10}, {"c", "e", 15}, {"c", "e", 10}, {"e", "g", 15}}); - VerifyMaxCoveringTombstoneSeqnum(&iter, - {{"a", 10}, {"c", 15}, {"e", 15}, {"g", 0}}); -} - -TEST_F(RangeTombstoneFragmenterTest, ContiguousTombstones) { - auto range_del_iter = MakeRangeDelIter( - {{"a", "c", 10}, {"c", "e", 20}, {"c", "e", 5}, {"e", "g", 15}}); - - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, - kMaxSequenceNumber); - ASSERT_EQ(0, iter.lower_bound()); - ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); - VerifyFragmentedRangeDels( - &iter, {{"a", "c", 10}, {"c", "e", 20}, {"c", "e", 5}, {"e", "g", 15}}); - VerifyMaxCoveringTombstoneSeqnum(&iter, - {{"a", 10}, {"c", 20}, {"e", 15}, {"g", 0}}); -} - -TEST_F(RangeTombstoneFragmenterTest, RepeatedStartAndEndKey) { - auto range_del_iter = - MakeRangeDelIter({{"a", "c", 10}, {"a", "c", 7}, {"a", "c", 3}}); - - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, - kMaxSequenceNumber); - ASSERT_EQ(0, iter.lower_bound()); - ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); - VerifyFragmentedRangeDels(&iter, - {{"a", "c", 10}, {"a", "c", 7}, {"a", "c", 3}}); - VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 10}, {"b", 10}, {"c", 0}}); -} - -TEST_F(RangeTombstoneFragmenterTest, RepeatedStartKeyDifferentEndKeys) { - auto range_del_iter = - MakeRangeDelIter({{"a", "e", 10}, {"a", "g", 7}, {"a", "c", 3}}); - - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, - kMaxSequenceNumber); - ASSERT_EQ(0, iter.lower_bound()); - ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); - VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, - {"a", "c", 7}, - {"a", "c", 3}, - {"c", "e", 10}, - {"c", "e", 7}, - {"e", "g", 7}}); - VerifyMaxCoveringTombstoneSeqnum(&iter, - {{"a", 10}, {"c", 10}, {"e", 7}, {"g", 0}}); -} - -TEST_F(RangeTombstoneFragmenterTest, RepeatedStartKeyMixedEndKeys) { - auto range_del_iter = MakeRangeDelIter({{"a", "c", 30}, - {"a", "g", 20}, - {"a", "e", 10}, - {"a", "g", 7}, - {"a", "c", 3}}); - - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, - kMaxSequenceNumber); - ASSERT_EQ(0, iter.lower_bound()); - ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); - VerifyFragmentedRangeDels(&iter, {{"a", "c", 30}, - {"a", "c", 20}, - {"a", "c", 10}, - {"a", "c", 7}, - {"a", "c", 3}, - {"c", "e", 20}, - {"c", "e", 10}, - {"c", "e", 7}, - {"e", "g", 20}, - {"e", "g", 7}}); - VerifyMaxCoveringTombstoneSeqnum(&iter, - {{"a", 30}, {"c", 20}, {"e", 20}, {"g", 0}}); -} - -TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) { - auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, - {"c", "g", 8}, - {"c", "i", 6}, - {"j", "n", 4}, - {"j", "l", 2}}); - - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, - kMaxSequenceNumber); - FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, - 9 /* upper_bound */); - FragmentedRangeTombstoneIterator iter3(&fragment_list, bytewise_icmp, - 7 /* upper_bound */); - FragmentedRangeTombstoneIterator iter4(&fragment_list, bytewise_icmp, - 5 /* upper_bound */); - FragmentedRangeTombstoneIterator iter5(&fragment_list, bytewise_icmp, - 3 /* upper_bound */); - for (auto* iter : {&iter1, &iter2, &iter3, &iter4, &iter5}) { - VerifyFragmentedRangeDels(iter, {{"a", "c", 10}, - {"c", "e", 10}, - {"c", "e", 8}, - {"c", "e", 6}, - {"e", "g", 8}, - {"e", "g", 6}, - {"g", "i", 6}, - {"j", "l", 4}, - {"j", "l", 2}, - {"l", "n", 4}}); - } - - ASSERT_EQ(0, iter1.lower_bound()); - ASSERT_EQ(kMaxSequenceNumber, iter1.upper_bound()); - VerifyVisibleTombstones(&iter1, {{"a", "c", 10}, - {"c", "e", 10}, - {"e", "g", 8}, - {"g", "i", 6}, - {"j", "l", 4}, - {"l", "n", 4}}); - VerifyMaxCoveringTombstoneSeqnum( - &iter1, {{"a", 10}, {"c", 10}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); - - ASSERT_EQ(0, iter2.lower_bound()); - ASSERT_EQ(9, iter2.upper_bound()); - VerifyVisibleTombstones(&iter2, {{"c", "e", 8}, - {"e", "g", 8}, - {"g", "i", 6}, - {"j", "l", 4}, - {"l", "n", 4}}); - VerifyMaxCoveringTombstoneSeqnum( - &iter2, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); - - ASSERT_EQ(0, iter3.lower_bound()); - ASSERT_EQ(7, iter3.upper_bound()); - VerifyVisibleTombstones(&iter3, {{"c", "e", 6}, - {"e", "g", 6}, - {"g", "i", 6}, - {"j", "l", 4}, - {"l", "n", 4}}); - VerifyMaxCoveringTombstoneSeqnum( - &iter3, {{"a", 0}, {"c", 6}, {"e", 6}, {"i", 0}, {"j", 4}, {"m", 4}}); - - ASSERT_EQ(0, iter4.lower_bound()); - ASSERT_EQ(5, iter4.upper_bound()); - VerifyVisibleTombstones(&iter4, {{"j", "l", 4}, {"l", "n", 4}}); - VerifyMaxCoveringTombstoneSeqnum( - &iter4, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 4}, {"m", 4}}); - - ASSERT_EQ(0, iter5.lower_bound()); - ASSERT_EQ(3, iter5.upper_bound()); - VerifyVisibleTombstones(&iter5, {{"j", "l", 2}}); - VerifyMaxCoveringTombstoneSeqnum( - &iter5, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 2}, {"m", 0}}); -} - -TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyUnordered) { - auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, - {"j", "n", 4}, - {"c", "i", 6}, - {"c", "g", 8}, - {"j", "l", 2}}); - - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, - 9 /* upper_bound */); - ASSERT_EQ(0, iter.lower_bound()); - ASSERT_EQ(9, iter.upper_bound()); - VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, - {"c", "e", 10}, - {"c", "e", 8}, - {"c", "e", 6}, - {"e", "g", 8}, - {"e", "g", 6}, - {"g", "i", 6}, - {"j", "l", 4}, - {"j", "l", 2}, - {"l", "n", 4}}); - VerifyMaxCoveringTombstoneSeqnum( - &iter, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); -} - -TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyForCompaction) { - auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, - {"j", "n", 4}, - {"c", "i", 6}, - {"c", "g", 8}, - {"j", "l", 2}}); - - FragmentedRangeTombstoneList fragment_list( - std::move(range_del_iter), bytewise_icmp, true /* for_compaction */, - {} /* snapshots */); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, - kMaxSequenceNumber /* upper_bound */); - VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, - {"c", "e", 10}, - {"e", "g", 8}, - {"g", "i", 6}, - {"j", "l", 4}, - {"l", "n", 4}}); -} - -TEST_F(RangeTombstoneFragmenterTest, - OverlapAndRepeatedStartKeyForCompactionWithSnapshot) { - auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, - {"j", "n", 4}, - {"c", "i", 6}, - {"c", "g", 8}, - {"j", "l", 2}}); - - FragmentedRangeTombstoneList fragment_list( - std::move(range_del_iter), bytewise_icmp, true /* for_compaction */, - {9, 20} /* snapshots */); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, - kMaxSequenceNumber /* upper_bound */); - VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, - {"c", "e", 10}, - {"c", "e", 8}, - {"e", "g", 8}, - {"g", "i", 6}, - {"j", "l", 4}, - {"l", "n", 4}}); -} - -TEST_F(RangeTombstoneFragmenterTest, IteratorSplitNoSnapshots) { - auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, - {"j", "n", 4}, - {"c", "i", 6}, - {"c", "g", 8}, - {"j", "l", 2}}); - - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, - kMaxSequenceNumber /* upper_bound */); - - auto split_iters = iter.SplitBySnapshot({} /* snapshots */); - ASSERT_EQ(1, split_iters.size()); - - auto* split_iter = split_iters[kMaxSequenceNumber].get(); - ASSERT_EQ(0, split_iter->lower_bound()); - ASSERT_EQ(kMaxSequenceNumber, split_iter->upper_bound()); - VerifyVisibleTombstones(split_iter, {{"a", "c", 10}, - {"c", "e", 10}, - {"e", "g", 8}, - {"g", "i", 6}, - {"j", "l", 4}, - {"l", "n", 4}}); -} - -TEST_F(RangeTombstoneFragmenterTest, IteratorSplitWithSnapshots) { - auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, - {"j", "n", 4}, - {"c", "i", 6}, - {"c", "g", 8}, - {"j", "l", 2}}); - - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, - kMaxSequenceNumber /* upper_bound */); - - auto split_iters = iter.SplitBySnapshot({3, 5, 7, 9} /* snapshots */); - ASSERT_EQ(5, split_iters.size()); - - auto* split_iter1 = split_iters[3].get(); - ASSERT_EQ(0, split_iter1->lower_bound()); - ASSERT_EQ(3, split_iter1->upper_bound()); - VerifyVisibleTombstones(split_iter1, {{"j", "l", 2}}); - - auto* split_iter2 = split_iters[5].get(); - ASSERT_EQ(4, split_iter2->lower_bound()); - ASSERT_EQ(5, split_iter2->upper_bound()); - VerifyVisibleTombstones(split_iter2, {{"j", "l", 4}, {"l", "n", 4}}); - - auto* split_iter3 = split_iters[7].get(); - ASSERT_EQ(6, split_iter3->lower_bound()); - ASSERT_EQ(7, split_iter3->upper_bound()); - VerifyVisibleTombstones(split_iter3, - {{"c", "e", 6}, {"e", "g", 6}, {"g", "i", 6}}); - - auto* split_iter4 = split_iters[9].get(); - ASSERT_EQ(8, split_iter4->lower_bound()); - ASSERT_EQ(9, split_iter4->upper_bound()); - VerifyVisibleTombstones(split_iter4, {{"c", "e", 8}, {"e", "g", 8}}); - - auto* split_iter5 = split_iters[kMaxSequenceNumber].get(); - ASSERT_EQ(10, split_iter5->lower_bound()); - ASSERT_EQ(kMaxSequenceNumber, split_iter5->upper_bound()); - VerifyVisibleTombstones(split_iter5, {{"a", "c", 10}, {"c", "e", 10}}); -} - -TEST_F(RangeTombstoneFragmenterTest, SeekStartKey) { - // Same tombstones as OverlapAndRepeatedStartKey. - auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, - {"c", "g", 8}, - {"c", "i", 6}, - {"j", "n", 4}, - {"j", "l", 2}}); - - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - - FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, - kMaxSequenceNumber); - VerifySeek( - &iter1, - {{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}}); - VerifySeekForPrev( - &iter1, - {{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}}); - - FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, - 3 /* upper_bound */); - VerifySeek(&iter2, {{"a", {"j", "l", 2}}, - {"e", {"j", "l", 2}}, - {"l", {}, true /* out of range */}}); - VerifySeekForPrev(&iter2, {{"a", {}, true /* out of range */}, - {"e", {}, true /* out of range */}, - {"l", {"j", "l", 2}}}); -} - -TEST_F(RangeTombstoneFragmenterTest, SeekCovered) { - // Same tombstones as OverlapAndRepeatedStartKey. - auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, - {"c", "g", 8}, - {"c", "i", 6}, - {"j", "n", 4}, - {"j", "l", 2}}); - - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - - FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, - kMaxSequenceNumber); - VerifySeek( - &iter1, - {{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}}); - VerifySeekForPrev( - &iter1, - {{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}}); - - FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, - 3 /* upper_bound */); - VerifySeek(&iter2, {{"b", {"j", "l", 2}}, - {"f", {"j", "l", 2}}, - {"m", {}, true /* out of range */}}); - VerifySeekForPrev(&iter2, {{"b", {}, true /* out of range */}, - {"f", {}, true /* out of range */}, - {"m", {"j", "l", 2}}}); -} - -TEST_F(RangeTombstoneFragmenterTest, SeekEndKey) { - // Same tombstones as OverlapAndRepeatedStartKey. - auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, - {"c", "g", 8}, - {"c", "i", 6}, - {"j", "n", 4}, - {"j", "l", 2}}); - - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - - FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, - kMaxSequenceNumber); - VerifySeek(&iter1, {{"c", {"c", "e", 10}}, - {"g", {"g", "i", 6}}, - {"i", {"j", "l", 4}}, - {"n", {}, true /* out of range */}}); - VerifySeekForPrev(&iter1, {{"c", {"c", "e", 10}}, - {"g", {"g", "i", 6}}, - {"i", {"g", "i", 6}}, - {"n", {"l", "n", 4}}}); - - FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, - 3 /* upper_bound */); - VerifySeek(&iter2, {{"c", {"j", "l", 2}}, - {"g", {"j", "l", 2}}, - {"i", {"j", "l", 2}}, - {"n", {}, true /* out of range */}}); - VerifySeekForPrev(&iter2, {{"c", {}, true /* out of range */}, - {"g", {}, true /* out of range */}, - {"i", {}, true /* out of range */}, - {"n", {"j", "l", 2}}}); -} - -TEST_F(RangeTombstoneFragmenterTest, SeekOutOfBounds) { - // Same tombstones as OverlapAndRepeatedStartKey. - auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, - {"c", "g", 8}, - {"c", "i", 6}, - {"j", "n", 4}, - {"j", "l", 2}}); - - FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), - bytewise_icmp); - - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, - kMaxSequenceNumber); - VerifySeek(&iter, {{"", {"a", "c", 10}}, {"z", {}, true /* out of range */}}); - VerifySeekForPrev(&iter, - {{"", {}, true /* out of range */}, {"z", {"l", "n", 4}}}); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/repair_test.cc b/db/repair_test.cc deleted file mode 100644 index 47482699d..000000000 --- a/db/repair_test.cc +++ /dev/null @@ -1,484 +0,0 @@ -// Copyright (c) 2016-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "rocksdb/options.h" - -#include -#include -#include - -#include "db/db_impl/db_impl.h" -#include "db/db_test_util.h" -#include "file/file_util.h" -#include "rocksdb/comparator.h" -#include "rocksdb/db.h" -#include "rocksdb/transaction_log.h" -#include "table/unique_id_impl.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -class RepairTest : public DBTestBase { - public: - RepairTest() : DBTestBase("repair_test", /*env_do_fsync=*/true) {} - - Status GetFirstSstPath(std::string* first_sst_path) { - assert(first_sst_path != nullptr); - first_sst_path->clear(); - uint64_t manifest_size; - std::vector files; - Status s = db_->GetLiveFiles(files, &manifest_size); - if (s.ok()) { - auto sst_iter = - std::find_if(files.begin(), files.end(), [](const std::string& file) { - uint64_t number; - FileType type; - bool ok = ParseFileName(file, &number, &type); - return ok && type == kTableFile; - }); - *first_sst_path = sst_iter == files.end() ? "" : dbname_ + *sst_iter; - } - return s; - } - - void ReopenWithSstIdVerify() { - std::atomic_int verify_passed{0}; - SyncPoint::GetInstance()->SetCallBack( - "BlockBasedTable::Open::PassedVerifyUniqueId", [&](void* arg) { - // override job status - auto id = static_cast(arg); - assert(*id != kNullUniqueId64x2); - verify_passed++; - }); - SyncPoint::GetInstance()->EnableProcessing(); - auto options = CurrentOptions(); - options.verify_sst_unique_id_in_manifest = true; - Reopen(options); - - ASSERT_GT(verify_passed, 0); - SyncPoint::GetInstance()->DisableProcessing(); - } - - std::vector GetLevelFileMetadatas(int level, int cf = 0) { - VersionSet* const versions = dbfull()->GetVersionSet(); - assert(versions); - ColumnFamilyData* const cfd = - versions->GetColumnFamilySet()->GetColumnFamily(cf); - assert(cfd); - Version* const current = cfd->current(); - assert(current); - VersionStorageInfo* const storage_info = current->storage_info(); - assert(storage_info); - return storage_info->LevelFiles(level); - } -}; - -TEST_F(RepairTest, SortRepairedDBL0ByEpochNumber) { - Options options = CurrentOptions(); - DestroyAndReopen(options); - - ASSERT_OK(Put("k1", "oldest")); - ASSERT_OK(Put("k1", "older")); - ASSERT_OK(Flush()); - MoveFilesToLevel(1); - - ASSERT_OK(Put("k1", "old")); - ASSERT_OK(Flush()); - - ASSERT_OK(Put("k1", "new")); - - std::vector level0_files = GetLevelFileMetadatas(0 /* level*/); - ASSERT_EQ(level0_files.size(), 1); - ASSERT_EQ(level0_files[0]->epoch_number, 2); - std::vector level1_files = GetLevelFileMetadatas(1 /* level*/); - ASSERT_EQ(level1_files.size(), 1); - ASSERT_EQ(level1_files[0]->epoch_number, 1); - - std::string manifest_path = - DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); - Close(); - ASSERT_OK(env_->FileExists(manifest_path)); - ASSERT_OK(env_->DeleteFile(manifest_path)); - - ASSERT_OK(RepairDB(dbname_, CurrentOptions())); - ReopenWithSstIdVerify(); - - EXPECT_EQ(Get("k1"), "new"); - - level0_files = GetLevelFileMetadatas(0 /* level*/); - ASSERT_EQ(level0_files.size(), 3); - EXPECT_EQ(level0_files[0]->epoch_number, 3); - EXPECT_EQ(level0_files[1]->epoch_number, 2); - EXPECT_EQ(level0_files[2]->epoch_number, 1); - level1_files = GetLevelFileMetadatas(1 /* level*/); - ASSERT_EQ(level1_files.size(), 0); -} - -TEST_F(RepairTest, LostManifest) { - // Add a couple SST files, delete the manifest, and verify RepairDB() saves - // the day. - ASSERT_OK(Put("key", "val")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("key2", "val2")); - ASSERT_OK(Flush()); - // Need to get path before Close() deletes db_, but delete it after Close() to - // ensure Close() didn't change the manifest. - std::string manifest_path = - DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); - - Close(); - ASSERT_OK(env_->FileExists(manifest_path)); - ASSERT_OK(env_->DeleteFile(manifest_path)); - ASSERT_OK(RepairDB(dbname_, CurrentOptions())); - ReopenWithSstIdVerify(); - - ASSERT_EQ(Get("key"), "val"); - ASSERT_EQ(Get("key2"), "val2"); -} - -TEST_F(RepairTest, LostManifestMoreDbFeatures) { - // Add a couple SST files, delete the manifest, and verify RepairDB() saves - // the day. - ASSERT_OK(Put("key", "val")); - ASSERT_OK(Put("key2", "val2")); - ASSERT_OK(Put("key3", "val3")); - ASSERT_OK(Put("key4", "val4")); - ASSERT_OK(Flush()); - // Test an SST file containing only a range tombstone - ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key2", - "key3z")); - ASSERT_OK(Flush()); - // Need to get path before Close() deletes db_, but delete it after Close() to - // ensure Close() didn't change the manifest. - std::string manifest_path = - DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); - - Close(); - ASSERT_OK(env_->FileExists(manifest_path)); - ASSERT_OK(env_->DeleteFile(manifest_path)); - ASSERT_OK(RepairDB(dbname_, CurrentOptions())); - - // repair from sst should work with unique_id verification - ReopenWithSstIdVerify(); - - ASSERT_EQ(Get("key"), "val"); - ASSERT_EQ(Get("key2"), "NOT_FOUND"); - ASSERT_EQ(Get("key3"), "NOT_FOUND"); - ASSERT_EQ(Get("key4"), "val4"); -} - -TEST_F(RepairTest, CorruptManifest) { - // Manifest is in an invalid format. Expect a full recovery. - ASSERT_OK(Put("key", "val")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("key2", "val2")); - ASSERT_OK(Flush()); - // Need to get path before Close() deletes db_, but overwrite it after Close() - // to ensure Close() didn't change the manifest. - std::string manifest_path = - DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); - - Close(); - ASSERT_OK(env_->FileExists(manifest_path)); - - ASSERT_OK(CreateFile(env_->GetFileSystem(), manifest_path, "blah", - false /* use_fsync */)); - ASSERT_OK(RepairDB(dbname_, CurrentOptions())); - - ReopenWithSstIdVerify(); - - ASSERT_EQ(Get("key"), "val"); - ASSERT_EQ(Get("key2"), "val2"); -} - -TEST_F(RepairTest, IncompleteManifest) { - // In this case, the manifest is valid but does not reference all of the SST - // files. Expect a full recovery. - ASSERT_OK(Put("key", "val")); - ASSERT_OK(Flush()); - std::string orig_manifest_path = - DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); - CopyFile(orig_manifest_path, orig_manifest_path + ".tmp"); - ASSERT_OK(Put("key2", "val2")); - ASSERT_OK(Flush()); - // Need to get path before Close() deletes db_, but overwrite it after Close() - // to ensure Close() didn't change the manifest. - std::string new_manifest_path = - DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); - - Close(); - ASSERT_OK(env_->FileExists(new_manifest_path)); - // Replace the manifest with one that is only aware of the first SST file. - CopyFile(orig_manifest_path + ".tmp", new_manifest_path); - ASSERT_OK(RepairDB(dbname_, CurrentOptions())); - - ReopenWithSstIdVerify(); - - ASSERT_EQ(Get("key"), "val"); - ASSERT_EQ(Get("key2"), "val2"); -} - -TEST_F(RepairTest, PostRepairSstFileNumbering) { - // Verify after a DB is repaired, new files will be assigned higher numbers - // than old files. - ASSERT_OK(Put("key", "val")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("key2", "val2")); - ASSERT_OK(Flush()); - uint64_t pre_repair_file_num = dbfull()->TEST_Current_Next_FileNo(); - Close(); - - ASSERT_OK(RepairDB(dbname_, CurrentOptions())); - - ReopenWithSstIdVerify(); - - uint64_t post_repair_file_num = dbfull()->TEST_Current_Next_FileNo(); - ASSERT_GE(post_repair_file_num, pre_repair_file_num); -} - -TEST_F(RepairTest, LostSst) { - // Delete one of the SST files but preserve the manifest that refers to it, - // then verify the DB is still usable for the intact SST. - ASSERT_OK(Put("key", "val")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("key2", "val2")); - ASSERT_OK(Flush()); - std::string sst_path; - ASSERT_OK(GetFirstSstPath(&sst_path)); - ASSERT_FALSE(sst_path.empty()); - ASSERT_OK(env_->DeleteFile(sst_path)); - - Close(); - ASSERT_OK(RepairDB(dbname_, CurrentOptions())); - ReopenWithSstIdVerify(); - - // Exactly one of the key-value pairs should be in the DB now. - ASSERT_TRUE((Get("key") == "val") != (Get("key2") == "val2")); -} - -TEST_F(RepairTest, CorruptSst) { - // Corrupt one of the SST files but preserve the manifest that refers to it, - // then verify the DB is still usable for the intact SST. - ASSERT_OK(Put("key", "val")); - ASSERT_OK(Flush()); - ASSERT_OK(Put("key2", "val2")); - ASSERT_OK(Flush()); - std::string sst_path; - ASSERT_OK(GetFirstSstPath(&sst_path)); - ASSERT_FALSE(sst_path.empty()); - - ASSERT_OK(CreateFile(env_->GetFileSystem(), sst_path, "blah", - false /* use_fsync */)); - - Close(); - ASSERT_OK(RepairDB(dbname_, CurrentOptions())); - ReopenWithSstIdVerify(); - - // Exactly one of the key-value pairs should be in the DB now. - ASSERT_TRUE((Get("key") == "val") != (Get("key2") == "val2")); -} - -TEST_F(RepairTest, UnflushedSst) { - // This test case invokes repair while some data is unflushed, then verifies - // that data is in the db. - ASSERT_OK(Put("key", "val")); - VectorLogPtr wal_files; - ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files)); - ASSERT_EQ(wal_files.size(), 1); - { - uint64_t total_ssts_size; - std::unordered_map sst_files; - ASSERT_OK(GetAllDataFiles(kTableFile, &sst_files, &total_ssts_size)); - ASSERT_EQ(total_ssts_size, 0); - } - // Need to get path before Close() deletes db_, but delete it after Close() to - // ensure Close() didn't change the manifest. - std::string manifest_path = - DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); - - Close(); - ASSERT_OK(env_->FileExists(manifest_path)); - ASSERT_OK(env_->DeleteFile(manifest_path)); - ASSERT_OK(RepairDB(dbname_, CurrentOptions())); - ReopenWithSstIdVerify(); - - ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files)); - ASSERT_EQ(wal_files.size(), 0); - { - uint64_t total_ssts_size; - std::unordered_map sst_files; - ASSERT_OK(GetAllDataFiles(kTableFile, &sst_files, &total_ssts_size)); - ASSERT_GT(total_ssts_size, 0); - } - ASSERT_EQ(Get("key"), "val"); -} - -TEST_F(RepairTest, SeparateWalDir) { - do { - Options options = CurrentOptions(); - DestroyAndReopen(options); - ASSERT_OK(Put("key", "val")); - ASSERT_OK(Put("foo", "bar")); - VectorLogPtr wal_files; - ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files)); - ASSERT_EQ(wal_files.size(), 1); - { - uint64_t total_ssts_size; - std::unordered_map sst_files; - ASSERT_OK(GetAllDataFiles(kTableFile, &sst_files, &total_ssts_size)); - ASSERT_EQ(total_ssts_size, 0); - } - std::string manifest_path = - DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); - - Close(); - ASSERT_OK(env_->FileExists(manifest_path)); - ASSERT_OK(env_->DeleteFile(manifest_path)); - ASSERT_OK(RepairDB(dbname_, options)); - - // make sure that all WALs are converted to SSTables. - options.wal_dir = ""; - - ReopenWithSstIdVerify(); - ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files)); - ASSERT_EQ(wal_files.size(), 0); - { - uint64_t total_ssts_size; - std::unordered_map sst_files; - ASSERT_OK(GetAllDataFiles(kTableFile, &sst_files, &total_ssts_size)); - ASSERT_GT(total_ssts_size, 0); - } - ASSERT_EQ(Get("key"), "val"); - ASSERT_EQ(Get("foo"), "bar"); - - } while (ChangeWalOptions()); -} - -TEST_F(RepairTest, RepairMultipleColumnFamilies) { - // Verify repair logic associates SST files with their original column - // families. - const int kNumCfs = 3; - const int kEntriesPerCf = 2; - DestroyAndReopen(CurrentOptions()); - CreateAndReopenWithCF({"pikachu1", "pikachu2"}, CurrentOptions()); - for (int i = 0; i < kNumCfs; ++i) { - for (int j = 0; j < kEntriesPerCf; ++j) { - ASSERT_OK(Put(i, "key" + std::to_string(j), "val" + std::to_string(j))); - if (j == kEntriesPerCf - 1 && i == kNumCfs - 1) { - // Leave one unflushed so we can verify WAL entries are properly - // associated with column families. - continue; - } - ASSERT_OK(Flush(i)); - } - } - - // Need to get path before Close() deletes db_, but delete it after Close() to - // ensure Close() doesn't re-create the manifest. - std::string manifest_path = - DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); - Close(); - ASSERT_OK(env_->FileExists(manifest_path)); - ASSERT_OK(env_->DeleteFile(manifest_path)); - - ASSERT_OK(RepairDB(dbname_, CurrentOptions())); - - ReopenWithColumnFamilies({"default", "pikachu1", "pikachu2"}, - CurrentOptions()); - for (int i = 0; i < kNumCfs; ++i) { - for (int j = 0; j < kEntriesPerCf; ++j) { - ASSERT_EQ(Get(i, "key" + std::to_string(j)), "val" + std::to_string(j)); - } - } -} - -TEST_F(RepairTest, RepairColumnFamilyOptions) { - // Verify repair logic uses correct ColumnFamilyOptions when repairing a - // database with different options for column families. - const int kNumCfs = 2; - const int kEntriesPerCf = 2; - - Options opts(CurrentOptions()), rev_opts(CurrentOptions()); - opts.comparator = BytewiseComparator(); - rev_opts.comparator = ReverseBytewiseComparator(); - - DestroyAndReopen(opts); - CreateColumnFamilies({"reverse"}, rev_opts); - ReopenWithColumnFamilies({"default", "reverse"}, - std::vector{opts, rev_opts}); - for (int i = 0; i < kNumCfs; ++i) { - for (int j = 0; j < kEntriesPerCf; ++j) { - ASSERT_OK(Put(i, "key" + std::to_string(j), "val" + std::to_string(j))); - if (i == kNumCfs - 1 && j == kEntriesPerCf - 1) { - // Leave one unflushed so we can verify RepairDB's flush logic - continue; - } - ASSERT_OK(Flush(i)); - } - } - Close(); - - // RepairDB() records the comparator in the manifest, and DB::Open would fail - // if a different comparator were used. - ASSERT_OK(RepairDB(dbname_, opts, {{"default", opts}, {"reverse", rev_opts}}, - opts /* unknown_cf_opts */)); - ASSERT_OK(TryReopenWithColumnFamilies({"default", "reverse"}, - std::vector{opts, rev_opts})); - for (int i = 0; i < kNumCfs; ++i) { - for (int j = 0; j < kEntriesPerCf; ++j) { - ASSERT_EQ(Get(i, "key" + std::to_string(j)), "val" + std::to_string(j)); - } - } - - // Examine table properties to verify RepairDB() used the right options when - // converting WAL->SST - TablePropertiesCollection fname_to_props; - ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[1], &fname_to_props)); - ASSERT_EQ(fname_to_props.size(), 2U); - for (const auto& fname_and_props : fname_to_props) { - std::string comparator_name(rev_opts.comparator->Name()); - ASSERT_EQ(comparator_name, fname_and_props.second->comparator_name); - } - Close(); - - // Also check comparator when it's provided via "unknown" CF options - ASSERT_OK(RepairDB(dbname_, opts, {{"default", opts}}, - rev_opts /* unknown_cf_opts */)); - ASSERT_OK(TryReopenWithColumnFamilies({"default", "reverse"}, - std::vector{opts, rev_opts})); - for (int i = 0; i < kNumCfs; ++i) { - for (int j = 0; j < kEntriesPerCf; ++j) { - ASSERT_EQ(Get(i, "key" + std::to_string(j)), "val" + std::to_string(j)); - } - } -} - -TEST_F(RepairTest, DbNameContainsTrailingSlash) { - { - bool tmp; - if (env_->AreFilesSame("", "", &tmp).IsNotSupported()) { - fprintf(stderr, - "skipping RepairTest.DbNameContainsTrailingSlash due to " - "unsupported Env::AreFilesSame\n"); - return; - } - } - - ASSERT_OK(Put("key", "val")); - ASSERT_OK(Flush()); - Close(); - - ASSERT_OK(RepairDB(dbname_ + "/", CurrentOptions())); - ReopenWithSstIdVerify(); - ASSERT_EQ(Get("key"), "val"); -} -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/db/seqno_time_test.cc b/db/seqno_time_test.cc deleted file mode 100644 index dd93be7af..000000000 --- a/db/seqno_time_test.cc +++ /dev/null @@ -1,994 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db/db_test_util.h" -#include "db/periodic_task_scheduler.h" -#include "db/seqno_to_time_mapping.h" -#include "port/stack_trace.h" -#include "rocksdb/iostats_context.h" -#include "rocksdb/utilities/debug.h" -#include "test_util/mock_time_env.h" - - -namespace ROCKSDB_NAMESPACE { - -class SeqnoTimeTest : public DBTestBase { - public: - SeqnoTimeTest() : DBTestBase("seqno_time_test", /*env_do_fsync=*/false) { - mock_clock_ = std::make_shared(env_->GetSystemClock()); - mock_env_ = std::make_unique(env_, mock_clock_); - } - - protected: - std::unique_ptr mock_env_; - std::shared_ptr mock_clock_; - - void SetUp() override { - mock_clock_->InstallTimedWaitFixCallback(); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::StartPeriodicTaskScheduler:Init", [&](void* arg) { - auto periodic_task_scheduler_ptr = - reinterpret_cast(arg); - periodic_task_scheduler_ptr->TEST_OverrideTimer(mock_clock_.get()); - }); - } - - // make sure the file is not in cache, otherwise it won't have IO info - void AssertKeyTemperature(int key_id, Temperature expected_temperature) { - get_iostats_context()->Reset(); - IOStatsContext* iostats = get_iostats_context(); - std::string result = Get(Key(key_id)); - ASSERT_FALSE(result.empty()); - ASSERT_GT(iostats->bytes_read, 0); - switch (expected_temperature) { - case Temperature::kUnknown: - ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_read_count, - 0); - ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, - 0); - break; - case Temperature::kCold: - ASSERT_GT(iostats->file_io_stats_by_temperature.cold_file_read_count, - 0); - ASSERT_GT(iostats->file_io_stats_by_temperature.cold_file_bytes_read, - 0); - break; - default: - // the test only support kCold now for the bottommost temperature - FAIL(); - } - } -}; - -TEST_F(SeqnoTimeTest, TemperatureBasicUniversal) { - const int kNumTrigger = 4; - const int kNumLevels = 7; - const int kNumKeys = 100; - const int kKeyPerSec = 10; - - Options options = CurrentOptions(); - options.compaction_style = kCompactionStyleUniversal; - options.preclude_last_level_data_seconds = 10000; - options.env = mock_env_.get(); - options.bottommost_temperature = Temperature::kCold; - options.num_levels = kNumLevels; - DestroyAndReopen(options); - - // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownSeqnoTime - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); }); - - int sst_num = 0; - // Write files that are overlap and enough to trigger compaction - for (; sst_num < kNumTrigger; sst_num++) { - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun([&] { - mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); - }); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(dbfull()->WaitForCompact(true)); - - // All data is hot, only output to penultimate level - ASSERT_EQ("0,0,0,0,0,1", FilesPerLevel()); - ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0); - ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0); - - // read a random key, which should be hot (kUnknown) - AssertKeyTemperature(20, Temperature::kUnknown); - - // Write more data, but still all hot until the 10th SST, as: - // write a key every 10 seconds, 100 keys per SST, each SST takes 1000 seconds - // The preclude_last_level_data_seconds is 10k - for (; sst_num < kNumTrigger * 2; sst_num++) { - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun([&] { - mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); - }); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->WaitForCompact(true)); - ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0); - ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0); - } - - // Now we have both hot data and cold data - for (; sst_num < kNumTrigger * 3; sst_num++) { - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun([&] { - mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); - }); - } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->WaitForCompact(true)); - } - - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - uint64_t hot_data_size = GetSstSizeHelper(Temperature::kUnknown); - uint64_t cold_data_size = GetSstSizeHelper(Temperature::kCold); - ASSERT_GT(hot_data_size, 0); - ASSERT_GT(cold_data_size, 0); - // the first a few key should be cold - AssertKeyTemperature(20, Temperature::kCold); - - for (int i = 0; i < 30; i++) { - dbfull()->TEST_WaitForPeriodicTaskRun([&] { - mock_clock_->MockSleepForSeconds(static_cast(20 * kKeyPerSec)); - }); - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - - // the hot/cold data cut off range should be between i * 20 + 200 -> 250 - AssertKeyTemperature(i * 20 + 250, Temperature::kUnknown); - AssertKeyTemperature(i * 20 + 200, Temperature::kCold); - } - - ASSERT_LT(GetSstSizeHelper(Temperature::kUnknown), hot_data_size); - ASSERT_GT(GetSstSizeHelper(Temperature::kCold), cold_data_size); - - // Wait again, the most of the data should be cold after that - // but it may not be all cold, because if there's no new data write to SST, - // the compaction will not get the new seqno->time sampling to decide the last - // a few data's time. - for (int i = 0; i < 5; i++) { - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(1000)); }); - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - } - - // any random data close to the end should be cold - AssertKeyTemperature(1000, Temperature::kCold); - - // close explicitly, because the env is local variable which will be released - // first. - Close(); -} - -TEST_F(SeqnoTimeTest, TemperatureBasicLevel) { - const int kNumLevels = 7; - const int kNumKeys = 100; - - Options options = CurrentOptions(); - options.preclude_last_level_data_seconds = 10000; - options.env = mock_env_.get(); - options.bottommost_temperature = Temperature::kCold; - options.num_levels = kNumLevels; - options.level_compaction_dynamic_level_bytes = true; - // TODO(zjay): for level compaction, auto-compaction may stuck in deadloop, if - // the penultimate level score > 1, but the hot is not cold enough to compact - // to last level, which will keep triggering compaction. - options.disable_auto_compactions = true; - DestroyAndReopen(options); - - // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownSeqnoTime - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(10)); }); - - int sst_num = 0; - // Write files that are overlap - for (; sst_num < 4; sst_num++) { - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(10)); }); - } - ASSERT_OK(Flush()); - } - - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - - // All data is hot, only output to penultimate level - ASSERT_EQ("0,0,0,0,0,1", FilesPerLevel()); - ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0); - ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0); - - // read a random key, which should be hot (kUnknown) - AssertKeyTemperature(20, Temperature::kUnknown); - - // Adding more data to have mixed hot and cold data - for (; sst_num < 14; sst_num++) { - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(10)); }); - } - ASSERT_OK(Flush()); - } - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0); - ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0); - - // Compact the files to the last level which should split the hot/cold data - MoveFilesToLevel(6); - uint64_t hot_data_size = GetSstSizeHelper(Temperature::kUnknown); - uint64_t cold_data_size = GetSstSizeHelper(Temperature::kCold); - ASSERT_GT(hot_data_size, 0); - ASSERT_GT(cold_data_size, 0); - // the first a few key should be cold - AssertKeyTemperature(20, Temperature::kCold); - - // Wait some time, with each wait, the cold data is increasing and hot data is - // decreasing - for (int i = 0; i < 30; i++) { - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(200)); }); - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - uint64_t pre_hot = hot_data_size; - uint64_t pre_cold = cold_data_size; - hot_data_size = GetSstSizeHelper(Temperature::kUnknown); - cold_data_size = GetSstSizeHelper(Temperature::kCold); - ASSERT_LT(hot_data_size, pre_hot); - ASSERT_GT(cold_data_size, pre_cold); - - // the hot/cold cut_off key should be around i * 20 + 400 -> 450 - AssertKeyTemperature(i * 20 + 450, Temperature::kUnknown); - AssertKeyTemperature(i * 20 + 400, Temperature::kCold); - } - - // Wait again, the most of the data should be cold after that - // hot data might not be empty, because if we don't write new data, there's - // no seqno->time sampling available to the compaction - for (int i = 0; i < 5; i++) { - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(1000)); }); - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - } - - // any random data close to the end should be cold - AssertKeyTemperature(1000, Temperature::kCold); - - Close(); -} - -enum class SeqnoTimeTestType : char { - kTrackInternalTimeSeconds = 0, - kPrecludeLastLevel = 1, - kBothSetTrackSmaller = 2, -}; - -class SeqnoTimeTablePropTest - : public SeqnoTimeTest, - public ::testing::WithParamInterface { - public: - SeqnoTimeTablePropTest() : SeqnoTimeTest() {} - - void SetTrackTimeDurationOptions(uint64_t track_time_duration, - Options& options) const { - // either option set will enable the time tracking feature - switch (GetParam()) { - case SeqnoTimeTestType::kTrackInternalTimeSeconds: - options.preclude_last_level_data_seconds = 0; - options.preserve_internal_time_seconds = track_time_duration; - break; - case SeqnoTimeTestType::kPrecludeLastLevel: - options.preclude_last_level_data_seconds = track_time_duration; - options.preserve_internal_time_seconds = 0; - break; - case SeqnoTimeTestType::kBothSetTrackSmaller: - options.preclude_last_level_data_seconds = track_time_duration; - options.preserve_internal_time_seconds = track_time_duration / 10; - break; - } - } -}; - -INSTANTIATE_TEST_CASE_P( - SeqnoTimeTablePropTest, SeqnoTimeTablePropTest, - ::testing::Values(SeqnoTimeTestType::kTrackInternalTimeSeconds, - SeqnoTimeTestType::kPrecludeLastLevel, - SeqnoTimeTestType::kBothSetTrackSmaller)); - -TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { - Options options = CurrentOptions(); - SetTrackTimeDurationOptions(10000, options); - - options.env = mock_env_.get(); - options.disable_auto_compactions = true; - DestroyAndReopen(options); - - std::set checked_file_nums; - SequenceNumber start_seq = dbfull()->GetLatestSequenceNumber(); - // Write a key every 10 seconds - for (int i = 0; i < 200; i++) { - ASSERT_OK(Put(Key(i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(10)); }); - } - ASSERT_OK(Flush()); - TablePropertiesCollection tables_props; - ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props)); - ASSERT_EQ(tables_props.size(), 1); - auto it = tables_props.begin(); - SeqnoToTimeMapping tp_mapping; - ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping)); - ASSERT_OK(tp_mapping.Sort()); - ASSERT_FALSE(tp_mapping.Empty()); - auto seqs = tp_mapping.TEST_GetInternalMapping(); - // about ~20 seqs->time entries, because the sample rate is 10000/100, and it - // passes 2k time. - ASSERT_GE(seqs.size(), 19); - ASSERT_LE(seqs.size(), 21); - SequenceNumber seq_end = dbfull()->GetLatestSequenceNumber(); - for (auto i = start_seq; i < start_seq + 10; i++) { - ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), (i + 1) * 10); - } - start_seq += 10; - for (auto i = start_seq; i < seq_end; i++) { - // The result is within the range - ASSERT_GE(tp_mapping.GetOldestApproximateTime(i), (i - 10) * 10); - ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), (i + 10) * 10); - } - checked_file_nums.insert(it->second->orig_file_number); - start_seq = seq_end; - - // Write a key every 1 seconds - for (int i = 0; i < 200; i++) { - ASSERT_OK(Put(Key(i + 190), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(1)); }); - } - seq_end = dbfull()->GetLatestSequenceNumber(); - ASSERT_OK(Flush()); - tables_props.clear(); - ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props)); - ASSERT_EQ(tables_props.size(), 2); - it = tables_props.begin(); - while (it != tables_props.end()) { - if (!checked_file_nums.count(it->second->orig_file_number)) { - break; - } - it++; - } - ASSERT_TRUE(it != tables_props.end()); - - tp_mapping.Clear(); - ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping)); - ASSERT_OK(tp_mapping.Sort()); - seqs = tp_mapping.TEST_GetInternalMapping(); - // There only a few time sample - ASSERT_GE(seqs.size(), 1); - ASSERT_LE(seqs.size(), 3); - for (auto i = start_seq; i < seq_end; i++) { - // The result is not very accurate, as there is more data write within small - // range of time - ASSERT_GE(tp_mapping.GetOldestApproximateTime(i), (i - start_seq) + 1000); - ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), (i - start_seq) + 3000); - } - checked_file_nums.insert(it->second->orig_file_number); - start_seq = seq_end; - - // Write a key every 200 seconds - for (int i = 0; i < 200; i++) { - ASSERT_OK(Put(Key(i + 380), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(200)); }); - } - seq_end = dbfull()->GetLatestSequenceNumber(); - ASSERT_OK(Flush()); - tables_props.clear(); - ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props)); - ASSERT_EQ(tables_props.size(), 3); - it = tables_props.begin(); - while (it != tables_props.end()) { - if (!checked_file_nums.count(it->second->orig_file_number)) { - break; - } - it++; - } - ASSERT_TRUE(it != tables_props.end()); - - tp_mapping.Clear(); - ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping)); - ASSERT_OK(tp_mapping.Sort()); - seqs = tp_mapping.TEST_GetInternalMapping(); - // The sequence number -> time entries should be maxed - ASSERT_GE(seqs.size(), 99); - ASSERT_LE(seqs.size(), 101); - for (auto i = start_seq; i < seq_end - 99; i++) { - // likely the first 100 entries reports 0 - ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), (i - start_seq) + 3000); - } - start_seq += 101; - - for (auto i = start_seq; i < seq_end; i++) { - ASSERT_GE(tp_mapping.GetOldestApproximateTime(i), - (i - start_seq) * 200 + 22200); - ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), - (i - start_seq) * 200 + 22600); - } - checked_file_nums.insert(it->second->orig_file_number); - start_seq = seq_end; - - // Write a key every 100 seconds - for (int i = 0; i < 200; i++) { - ASSERT_OK(Put(Key(i + 570), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(100)); }); - } - seq_end = dbfull()->GetLatestSequenceNumber(); - ASSERT_OK(Flush()); - tables_props.clear(); - ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props)); - ASSERT_EQ(tables_props.size(), 4); - it = tables_props.begin(); - while (it != tables_props.end()) { - if (!checked_file_nums.count(it->second->orig_file_number)) { - break; - } - it++; - } - ASSERT_TRUE(it != tables_props.end()); - tp_mapping.Clear(); - ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping)); - ASSERT_OK(tp_mapping.Sort()); - seqs = tp_mapping.TEST_GetInternalMapping(); - ASSERT_GE(seqs.size(), 99); - ASSERT_LE(seqs.size(), 101); - - checked_file_nums.insert(it->second->orig_file_number); - - // re-enable compaction - ASSERT_OK(dbfull()->SetOptions({ - {"disable_auto_compactions", "false"}, - })); - - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - - tables_props.clear(); - ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props)); - ASSERT_GE(tables_props.size(), 1); - it = tables_props.begin(); - while (it != tables_props.end()) { - if (!checked_file_nums.count(it->second->orig_file_number)) { - break; - } - it++; - } - ASSERT_TRUE(it != tables_props.end()); - tp_mapping.Clear(); - ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping)); - ASSERT_OK(tp_mapping.Sort()); - seqs = tp_mapping.TEST_GetInternalMapping(); - ASSERT_GE(seqs.size(), 99); - ASSERT_LE(seqs.size(), 101); - for (auto i = start_seq; i < seq_end - 99; i++) { - // likely the first 100 entries reports 0 - ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), - (i - start_seq) * 100 + 50000); - } - start_seq += 101; - - for (auto i = start_seq; i < seq_end; i++) { - ASSERT_GE(tp_mapping.GetOldestApproximateTime(i), - (i - start_seq) * 100 + 52200); - ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), - (i - start_seq) * 100 + 52400); - } - ASSERT_OK(db_->Close()); -} - -TEST_P(SeqnoTimeTablePropTest, MultiCFs) { - Options options = CurrentOptions(); - options.preclude_last_level_data_seconds = 0; - options.preserve_internal_time_seconds = 0; - options.env = mock_env_.get(); - options.stats_dump_period_sec = 0; - options.stats_persist_period_sec = 0; - ReopenWithColumnFamilies({"default"}, options); - - const PeriodicTaskScheduler& scheduler = - dbfull()->TEST_GetPeriodicTaskScheduler(); - ASSERT_FALSE(scheduler.TEST_HasTask(PeriodicTaskType::kRecordSeqnoTime)); - - // Write some data and increase the current time - for (int i = 0; i < 200; i++) { - ASSERT_OK(Put(Key(i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(100)); }); - } - ASSERT_OK(Flush()); - TablePropertiesCollection tables_props; - ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props)); - ASSERT_EQ(tables_props.size(), 1); - auto it = tables_props.begin(); - ASSERT_TRUE(it->second->seqno_to_time_mapping.empty()); - - ASSERT_TRUE(dbfull()->TEST_GetSeqnoToTimeMapping().Empty()); - - Options options_1 = options; - SetTrackTimeDurationOptions(10000, options_1); - CreateColumnFamilies({"one"}, options_1); - ASSERT_TRUE(scheduler.TEST_HasTask(PeriodicTaskType::kRecordSeqnoTime)); - - // Write some data to the default CF (without preclude_last_level feature) - for (int i = 0; i < 200; i++) { - ASSERT_OK(Put(Key(i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(100)); }); - } - ASSERT_OK(Flush()); - - // Write some data to the CF one - for (int i = 0; i < 20; i++) { - ASSERT_OK(Put(1, Key(i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(10)); }); - } - ASSERT_OK(Flush(1)); - tables_props.clear(); - ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[1], &tables_props)); - ASSERT_EQ(tables_props.size(), 1); - it = tables_props.begin(); - SeqnoToTimeMapping tp_mapping; - ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping)); - ASSERT_OK(tp_mapping.Sort()); - ASSERT_FALSE(tp_mapping.Empty()); - auto seqs = tp_mapping.TEST_GetInternalMapping(); - ASSERT_GE(seqs.size(), 1); - ASSERT_LE(seqs.size(), 4); - - // Create one more CF with larger preclude_last_level time - Options options_2 = options; - SetTrackTimeDurationOptions(1000000, options_2); // 1m - CreateColumnFamilies({"two"}, options_2); - - // Add more data to CF "two" to fill the in memory mapping - for (int i = 0; i < 2000; i++) { - ASSERT_OK(Put(2, Key(i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(100)); }); - } - seqs = dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping(); - ASSERT_GE(seqs.size(), 1000 - 1); - ASSERT_LE(seqs.size(), 1000 + 1); - - ASSERT_OK(Flush(2)); - tables_props.clear(); - ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[2], &tables_props)); - ASSERT_EQ(tables_props.size(), 1); - it = tables_props.begin(); - tp_mapping.Clear(); - ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping)); - ASSERT_OK(tp_mapping.Sort()); - seqs = tp_mapping.TEST_GetInternalMapping(); - // the max encoded entries is 100 - ASSERT_GE(seqs.size(), 100 - 1); - ASSERT_LE(seqs.size(), 100 + 1); - - // Write some data to default CF, as all memtable with preclude_last_level - // enabled have flushed, the in-memory seqno->time mapping should be cleared - for (int i = 0; i < 10; i++) { - ASSERT_OK(Put(0, Key(i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(100)); }); - } - seqs = dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping(); - ASSERT_OK(Flush(0)); - - // trigger compaction for CF "two" and make sure the compaction output has - // seqno_to_time_mapping - for (int j = 0; j < 3; j++) { - for (int i = 0; i < 200; i++) { - ASSERT_OK(Put(2, Key(i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(100)); }); - } - ASSERT_OK(Flush(2)); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - tables_props.clear(); - ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[2], &tables_props)); - ASSERT_EQ(tables_props.size(), 1); - it = tables_props.begin(); - tp_mapping.Clear(); - ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping)); - ASSERT_OK(tp_mapping.Sort()); - seqs = tp_mapping.TEST_GetInternalMapping(); - ASSERT_GE(seqs.size(), 99); - ASSERT_LE(seqs.size(), 101); - - for (int j = 0; j < 2; j++) { - for (int i = 0; i < 200; i++) { - ASSERT_OK(Put(0, Key(i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(100)); }); - } - ASSERT_OK(Flush(0)); - } - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - tables_props.clear(); - ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[0], &tables_props)); - ASSERT_EQ(tables_props.size(), 1); - it = tables_props.begin(); - ASSERT_TRUE(it->second->seqno_to_time_mapping.empty()); - - // Write some data to CF "two", but don't flush to accumulate - for (int i = 0; i < 1000; i++) { - ASSERT_OK(Put(2, Key(i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(100)); }); - } - ASSERT_GE( - dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping().size(), - 500); - // After dropping CF "one", the in-memory mapping will be change to only - // follow CF "two" options. - ASSERT_OK(db_->DropColumnFamily(handles_[1])); - ASSERT_LE( - dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping().size(), - 100 + 5); - - // After dropping CF "two", the in-memory mapping is also clear. - ASSERT_OK(db_->DropColumnFamily(handles_[2])); - ASSERT_EQ( - dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping().size(), - 0); - - // And the timer worker is stopped - ASSERT_FALSE(scheduler.TEST_HasTask(PeriodicTaskType::kRecordSeqnoTime)); - Close(); -} - -TEST_P(SeqnoTimeTablePropTest, MultiInstancesBasic) { - const int kInstanceNum = 2; - - Options options = CurrentOptions(); - SetTrackTimeDurationOptions(10000, options); - options.env = mock_env_.get(); - options.stats_dump_period_sec = 0; - options.stats_persist_period_sec = 0; - - auto dbs = std::vector(kInstanceNum); - for (int i = 0; i < kInstanceNum; i++) { - ASSERT_OK( - DB::Open(options, test::PerThreadDBPath(std::to_string(i)), &(dbs[i]))); - } - - // Make sure the second instance has the worker enabled - auto dbi = static_cast_with_check(dbs[1]); - WriteOptions wo; - for (int i = 0; i < 200; i++) { - ASSERT_OK(dbi->Put(wo, Key(i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(100)); }); - } - SeqnoToTimeMapping seqno_to_time_mapping = dbi->TEST_GetSeqnoToTimeMapping(); - ASSERT_GT(seqno_to_time_mapping.Size(), 10); - - for (int i = 0; i < kInstanceNum; i++) { - ASSERT_OK(dbs[i]->Close()); - delete dbs[i]; - } -} - -TEST_P(SeqnoTimeTablePropTest, SeqnoToTimeMappingUniversal) { - const int kNumTrigger = 4; - const int kNumLevels = 7; - const int kNumKeys = 100; - - Options options = CurrentOptions(); - SetTrackTimeDurationOptions(10000, options); - options.compaction_style = kCompactionStyleUniversal; - options.num_levels = kNumLevels; - options.env = mock_env_.get(); - - DestroyAndReopen(options); - - std::atomic_uint64_t num_seqno_zeroing{0}; - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "CompactionIterator::PrepareOutput:ZeroingSeq", - [&](void* /*arg*/) { num_seqno_zeroing++; }); - SyncPoint::GetInstance()->EnableProcessing(); - - int sst_num = 0; - for (; sst_num < kNumTrigger - 1; sst_num++) { - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(10)); }); - } - ASSERT_OK(Flush()); - } - TablePropertiesCollection tables_props; - ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props)); - ASSERT_EQ(tables_props.size(), 3); - for (const auto& props : tables_props) { - ASSERT_FALSE(props.second->seqno_to_time_mapping.empty()); - SeqnoToTimeMapping tp_mapping; - ASSERT_OK(tp_mapping.Add(props.second->seqno_to_time_mapping)); - ASSERT_OK(tp_mapping.Sort()); - ASSERT_FALSE(tp_mapping.Empty()); - auto seqs = tp_mapping.TEST_GetInternalMapping(); - ASSERT_GE(seqs.size(), 10 - 1); - ASSERT_LE(seqs.size(), 10 + 1); - } - - // Trigger a compaction - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value")); - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(10)); }); - } - sst_num++; - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - tables_props.clear(); - ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props)); - ASSERT_EQ(tables_props.size(), 1); - - auto it = tables_props.begin(); - SeqnoToTimeMapping tp_mapping; - ASSERT_FALSE(it->second->seqno_to_time_mapping.empty()); - ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping)); - - // compact to the last level - CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - // make sure the data is all compacted to penultimate level if the feature is - // on, otherwise, compacted to the last level. - if (options.preclude_last_level_data_seconds > 0) { - ASSERT_GT(NumTableFilesAtLevel(5), 0); - ASSERT_EQ(NumTableFilesAtLevel(6), 0); - } else { - ASSERT_EQ(NumTableFilesAtLevel(5), 0); - ASSERT_GT(NumTableFilesAtLevel(6), 0); - } - - // regardless the file is on the last level or not, it should keep the time - // information and sequence number are not set - tables_props.clear(); - tp_mapping.Clear(); - ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props)); - - ASSERT_EQ(tables_props.size(), 1); - ASSERT_EQ(num_seqno_zeroing, 0); - - it = tables_props.begin(); - ASSERT_FALSE(it->second->seqno_to_time_mapping.empty()); - ASSERT_OK(tp_mapping.Add(it->second->seqno_to_time_mapping)); - - // make half of the data expired - mock_clock_->MockSleepForSeconds(static_cast(8000)); - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - - tables_props.clear(); - tp_mapping.Clear(); - ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props)); - - if (options.preclude_last_level_data_seconds > 0) { - ASSERT_EQ(tables_props.size(), 2); - } else { - ASSERT_EQ(tables_props.size(), 1); - } - ASSERT_GT(num_seqno_zeroing, 0); - std::vector key_versions; - ASSERT_OK(GetAllKeyVersions(db_, Slice(), Slice(), - std::numeric_limits::max(), - &key_versions)); - // make sure there're more than 300 keys and first 100 keys are having seqno - // zeroed out, the last 100 key seqno not zeroed out - ASSERT_GT(key_versions.size(), 300); - for (int i = 0; i < 100; i++) { - ASSERT_EQ(key_versions[i].sequence, 0); - } - auto rit = key_versions.rbegin(); - for (int i = 0; i < 100; i++) { - ASSERT_GT(rit->sequence, 0); - rit++; - } - - // make all data expired and compact again to push it to the last level - // regardless if the tiering feature is enabled or not - mock_clock_->MockSleepForSeconds(static_cast(20000)); - - ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); - - ASSERT_GT(num_seqno_zeroing, 0); - ASSERT_GT(NumTableFilesAtLevel(6), 0); - - Close(); -} - -TEST_F(SeqnoTimeTest, MappingAppend) { - SeqnoToTimeMapping test(/*max_time_duration=*/100, /*max_capacity=*/10); - - // ignore seqno == 0, as it may mean the seqno is zeroed out - ASSERT_FALSE(test.Append(0, 9)); - - ASSERT_TRUE(test.Append(3, 10)); - auto size = test.Size(); - // normal add - ASSERT_TRUE(test.Append(10, 11)); - size++; - ASSERT_EQ(size, test.Size()); - - // Append unsorted - ASSERT_FALSE(test.Append(8, 12)); - ASSERT_EQ(size, test.Size()); - - // Append with the same seqno, newer time will be accepted - ASSERT_TRUE(test.Append(10, 12)); - ASSERT_EQ(size, test.Size()); - // older time will be ignored - ASSERT_FALSE(test.Append(10, 9)); - ASSERT_EQ(size, test.Size()); - - // new seqno with old time will be ignored - ASSERT_FALSE(test.Append(12, 8)); - ASSERT_EQ(size, test.Size()); -} - -TEST_F(SeqnoTimeTest, GetOldestApproximateTime) { - SeqnoToTimeMapping test(/*max_time_duration=*/100, /*max_capacity=*/10); - - ASSERT_EQ(test.GetOldestApproximateTime(10), kUnknownSeqnoTime); - - test.Append(3, 10); - - ASSERT_EQ(test.GetOldestApproximateTime(2), kUnknownSeqnoTime); - ASSERT_EQ(test.GetOldestApproximateTime(3), 10); - ASSERT_EQ(test.GetOldestApproximateTime(10), 10); - - test.Append(10, 100); - - test.Append(100, 1000); - ASSERT_EQ(test.GetOldestApproximateTime(10), 100); - ASSERT_EQ(test.GetOldestApproximateTime(40), 100); - ASSERT_EQ(test.GetOldestApproximateTime(111), 1000); -} - -TEST_F(SeqnoTimeTest, Sort) { - SeqnoToTimeMapping test; - - // single entry - test.Add(10, 11); - ASSERT_OK(test.Sort()); - ASSERT_EQ(test.Size(), 1); - - // duplicate, should be removed by sort - test.Add(10, 11); - // same seqno, but older time, should be removed - test.Add(10, 9); - - // unuseful ones, should be removed by sort - test.Add(11, 9); - test.Add(9, 8); - - // Good ones - test.Add(1, 10); - test.Add(100, 100); - - ASSERT_OK(test.Sort()); - - auto seqs = test.TEST_GetInternalMapping(); - - std::deque expected; - expected.emplace_back(1, 10); - expected.emplace_back(10, 11); - expected.emplace_back(100, 100); - - ASSERT_EQ(expected, seqs); -} - -TEST_F(SeqnoTimeTest, EncodeDecodeBasic) { - SeqnoToTimeMapping test(0, 1000); - - std::string output; - test.Encode(output, 0, 1000, 100); - ASSERT_TRUE(output.empty()); - - for (int i = 1; i <= 1000; i++) { - ASSERT_TRUE(test.Append(i, i * 10)); - } - test.Encode(output, 0, 1000, 100); - - ASSERT_FALSE(output.empty()); - - SeqnoToTimeMapping decoded; - ASSERT_OK(decoded.Add(output)); - ASSERT_OK(decoded.Sort()); - ASSERT_EQ(decoded.Size(), SeqnoToTimeMapping::kMaxSeqnoTimePairsPerSST); - ASSERT_EQ(test.Size(), 1000); - - for (SequenceNumber seq = 0; seq <= 1000; seq++) { - // test has the more accurate time mapping, encode only pick - // kMaxSeqnoTimePairsPerSST number of entries, which is less accurate - uint64_t target_time = test.GetOldestApproximateTime(seq); - ASSERT_GE(decoded.GetOldestApproximateTime(seq), - target_time < 200 ? 0 : target_time - 200); - ASSERT_LE(decoded.GetOldestApproximateTime(seq), target_time); - } -} - -TEST_F(SeqnoTimeTest, EncodeDecodePerferNewTime) { - SeqnoToTimeMapping test(0, 10); - - test.Append(1, 10); - test.Append(5, 17); - test.Append(6, 25); - test.Append(8, 30); - - std::string output; - test.Encode(output, 1, 10, 0, 3); - - SeqnoToTimeMapping decoded; - ASSERT_OK(decoded.Add(output)); - ASSERT_OK(decoded.Sort()); - - ASSERT_EQ(decoded.Size(), 3); - - auto seqs = decoded.TEST_GetInternalMapping(); - std::deque expected; - expected.emplace_back(1, 10); - expected.emplace_back(6, 25); - expected.emplace_back(8, 30); - ASSERT_EQ(expected, seqs); - - // Add a few large time number - test.Append(10, 100); - test.Append(13, 200); - test.Append(16, 300); - - output.clear(); - test.Encode(output, 1, 20, 0, 4); - decoded.Clear(); - ASSERT_OK(decoded.Add(output)); - ASSERT_OK(decoded.Sort()); - ASSERT_EQ(decoded.Size(), 4); - - expected.clear(); - expected.emplace_back(1, 10); - // entry #6, #8 are skipped as they are too close to #1. - // entry #100 is also within skip range, but if it's skipped, there not enough - // number to fill 4 entries, so select it. - expected.emplace_back(10, 100); - expected.emplace_back(13, 200); - expected.emplace_back(16, 300); - seqs = decoded.TEST_GetInternalMapping(); - ASSERT_EQ(expected, seqs); -} - -} // namespace ROCKSDB_NAMESPACE - - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/table_properties_collector_test.cc b/db/table_properties_collector_test.cc deleted file mode 100644 index 20f37e0c9..000000000 --- a/db/table_properties_collector_test.cc +++ /dev/null @@ -1,509 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db/table_properties_collector.h" - -#include -#include -#include -#include -#include - -#include "db/db_impl/db_impl.h" -#include "db/dbformat.h" -#include "file/sequence_file_reader.h" -#include "file/writable_file_writer.h" -#include "options/cf_options.h" -#include "rocksdb/flush_block_policy.h" -#include "rocksdb/table.h" -#include "table/block_based/block_based_table_factory.h" -#include "table/meta_blocks.h" -#include "table/plain/plain_table_factory.h" -#include "table/table_builder.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/coding.h" - -namespace ROCKSDB_NAMESPACE { - -class TablePropertiesTest : public testing::Test, - public testing::WithParamInterface { - public: - void SetUp() override { backward_mode_ = GetParam(); } - - bool backward_mode_; -}; - -// Utilities test functions -namespace { -static const uint32_t kTestColumnFamilyId = 66; -static const std::string kTestColumnFamilyName = "test_column_fam"; -static const int kTestLevel = 1; - -void MakeBuilder( - const Options& options, const ImmutableOptions& ioptions, - const MutableCFOptions& moptions, - const InternalKeyComparator& internal_comparator, - const IntTblPropCollectorFactories* int_tbl_prop_collector_factories, - std::unique_ptr* writable, - std::unique_ptr* builder) { - std::unique_ptr wf(new test::StringSink); - writable->reset( - new WritableFileWriter(std::move(wf), "" /* don't care */, EnvOptions())); - TableBuilderOptions tboptions( - ioptions, moptions, internal_comparator, int_tbl_prop_collector_factories, - options.compression, options.compression_opts, kTestColumnFamilyId, - kTestColumnFamilyName, kTestLevel); - builder->reset(NewTableBuilder(tboptions, writable->get())); -} -} // namespace - -// Collects keys that starts with "A" in a table. -class RegularKeysStartWithA : public TablePropertiesCollector { - public: - const char* Name() const override { return "RegularKeysStartWithA"; } - - Status Finish(UserCollectedProperties* properties) override { - std::string encoded; - std::string encoded_num_puts; - std::string encoded_num_deletes; - std::string encoded_num_single_deletes; - std::string encoded_num_size_changes; - PutVarint32(&encoded, count_); - PutVarint32(&encoded_num_puts, num_puts_); - PutVarint32(&encoded_num_deletes, num_deletes_); - PutVarint32(&encoded_num_single_deletes, num_single_deletes_); - PutVarint32(&encoded_num_size_changes, num_size_changes_); - *properties = UserCollectedProperties{ - {"TablePropertiesTest", message_}, - {"Count", encoded}, - {"NumPuts", encoded_num_puts}, - {"NumDeletes", encoded_num_deletes}, - {"NumSingleDeletes", encoded_num_single_deletes}, - {"NumSizeChanges", encoded_num_size_changes}, - }; - return Status::OK(); - } - - Status AddUserKey(const Slice& user_key, const Slice& /*value*/, - EntryType type, SequenceNumber /*seq*/, - uint64_t file_size) override { - // simply asssume all user keys are not empty. - if (user_key.data()[0] == 'A') { - ++count_; - } - if (type == kEntryPut) { - num_puts_++; - } else if (type == kEntryDelete) { - num_deletes_++; - } else if (type == kEntrySingleDelete) { - num_single_deletes_++; - } - if (file_size < file_size_) { - message_ = "File size should not decrease."; - } else if (file_size != file_size_) { - num_size_changes_++; - } - - return Status::OK(); - } - - UserCollectedProperties GetReadableProperties() const override { - return UserCollectedProperties{}; - } - - private: - std::string message_ = "Rocksdb"; - uint32_t count_ = 0; - uint32_t num_puts_ = 0; - uint32_t num_deletes_ = 0; - uint32_t num_single_deletes_ = 0; - uint32_t num_size_changes_ = 0; - uint64_t file_size_ = 0; -}; - -// Collects keys that starts with "A" in a table. Backward compatible mode -// It is also used to test internal key table property collector -class RegularKeysStartWithABackwardCompatible - : public TablePropertiesCollector { - public: - const char* Name() const override { return "RegularKeysStartWithA"; } - - Status Finish(UserCollectedProperties* properties) override { - std::string encoded; - PutVarint32(&encoded, count_); - *properties = UserCollectedProperties{{"TablePropertiesTest", "Rocksdb"}, - {"Count", encoded}}; - return Status::OK(); - } - - Status Add(const Slice& user_key, const Slice& /*value*/) override { - // simply asssume all user keys are not empty. - if (user_key.data()[0] == 'A') { - ++count_; - } - return Status::OK(); - } - - UserCollectedProperties GetReadableProperties() const override { - return UserCollectedProperties{}; - } - - private: - uint32_t count_ = 0; -}; - -class RegularKeysStartWithAInternal : public IntTblPropCollector { - public: - const char* Name() const override { return "RegularKeysStartWithA"; } - - Status Finish(UserCollectedProperties* properties) override { - std::string encoded; - PutVarint32(&encoded, count_); - *properties = UserCollectedProperties{{"TablePropertiesTest", "Rocksdb"}, - {"Count", encoded}}; - return Status::OK(); - } - - Status InternalAdd(const Slice& user_key, const Slice& /*value*/, - uint64_t /*file_size*/) override { - // simply asssume all user keys are not empty. - if (user_key.data()[0] == 'A') { - ++count_; - } - return Status::OK(); - } - - void BlockAdd(uint64_t /* block_uncomp_bytes */, - uint64_t /* block_compressed_bytes_fast */, - uint64_t /* block_compressed_bytes_slow */) override { - // Nothing to do. - return; - } - - UserCollectedProperties GetReadableProperties() const override { - return UserCollectedProperties{}; - } - - private: - uint32_t count_ = 0; -}; - -class RegularKeysStartWithAFactory : public IntTblPropCollectorFactory, - public TablePropertiesCollectorFactory { - public: - explicit RegularKeysStartWithAFactory(bool backward_mode) - : backward_mode_(backward_mode) {} - TablePropertiesCollector* CreateTablePropertiesCollector( - TablePropertiesCollectorFactory::Context context) override { - EXPECT_EQ(kTestColumnFamilyId, context.column_family_id); - EXPECT_EQ(kTestLevel, context.level_at_creation); - if (!backward_mode_) { - return new RegularKeysStartWithA(); - } else { - return new RegularKeysStartWithABackwardCompatible(); - } - } - IntTblPropCollector* CreateIntTblPropCollector( - uint32_t /*column_family_id*/, int /* level_at_creation */) override { - return new RegularKeysStartWithAInternal(); - } - const char* Name() const override { return "RegularKeysStartWithA"; } - - bool backward_mode_; -}; - -class FlushBlockEveryThreePolicy : public FlushBlockPolicy { - public: - bool Update(const Slice& /*key*/, const Slice& /*value*/) override { - return (++count_ % 3U == 0); - } - - private: - uint64_t count_ = 0; -}; - -class FlushBlockEveryThreePolicyFactory : public FlushBlockPolicyFactory { - public: - explicit FlushBlockEveryThreePolicyFactory() {} - - const char* Name() const override { - return "FlushBlockEveryThreePolicyFactory"; - } - - FlushBlockPolicy* NewFlushBlockPolicy( - const BlockBasedTableOptions& /*table_options*/, - const BlockBuilder& /*data_block_builder*/) const override { - return new FlushBlockEveryThreePolicy; - } -}; - -extern const uint64_t kBlockBasedTableMagicNumber; -extern const uint64_t kPlainTableMagicNumber; -namespace { -void TestCustomizedTablePropertiesCollector( - bool backward_mode, uint64_t magic_number, bool test_int_tbl_prop_collector, - const Options& options, const InternalKeyComparator& internal_comparator) { - // make sure the entries will be inserted with order. - std::map, std::string> kvs = { - {{"About ", kTypeValue}, "val5"}, // starts with 'A' - {{"Abstract", kTypeValue}, "val2"}, // starts with 'A' - {{"Around ", kTypeValue}, "val7"}, // starts with 'A' - {{"Beyond ", kTypeValue}, "val3"}, - {{"Builder ", kTypeValue}, "val1"}, - {{"Love ", kTypeDeletion}, ""}, - {{"Cancel ", kTypeValue}, "val4"}, - {{"Find ", kTypeValue}, "val6"}, - {{"Rocks ", kTypeDeletion}, ""}, - {{"Foo ", kTypeSingleDeletion}, ""}, - }; - - // -- Step 1: build table - std::unique_ptr builder; - std::unique_ptr writer; - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - if (test_int_tbl_prop_collector) { - int_tbl_prop_collector_factories.emplace_back( - new RegularKeysStartWithAFactory(backward_mode)); - } else { - GetIntTblPropCollectorFactory(ioptions, &int_tbl_prop_collector_factories); - } - MakeBuilder(options, ioptions, moptions, internal_comparator, - &int_tbl_prop_collector_factories, &writer, &builder); - - SequenceNumber seqNum = 0U; - for (const auto& kv : kvs) { - InternalKey ikey(kv.first.first, seqNum++, kv.first.second); - builder->Add(ikey.Encode(), kv.second); - } - ASSERT_OK(builder->Finish()); - ASSERT_OK(writer->Flush()); - - // -- Step 2: Read properties - test::StringSink* fwf = - static_cast(writer->writable_file()); - std::unique_ptr source( - new test::StringSource(fwf->contents())); - std::unique_ptr fake_file_reader( - new RandomAccessFileReader(std::move(source), "test")); - - std::unique_ptr props; - Status s = ReadTableProperties(fake_file_reader.get(), fwf->contents().size(), - magic_number, ioptions, &props); - ASSERT_OK(s); - - auto user_collected = props->user_collected_properties; - - ASSERT_NE(user_collected.find("TablePropertiesTest"), user_collected.end()); - ASSERT_EQ("Rocksdb", user_collected.at("TablePropertiesTest")); - - uint32_t starts_with_A = 0; - ASSERT_NE(user_collected.find("Count"), user_collected.end()); - Slice key(user_collected.at("Count")); - ASSERT_TRUE(GetVarint32(&key, &starts_with_A)); - ASSERT_EQ(3u, starts_with_A); - - if (!backward_mode && !test_int_tbl_prop_collector) { - uint32_t num_puts; - ASSERT_NE(user_collected.find("NumPuts"), user_collected.end()); - Slice key_puts(user_collected.at("NumPuts")); - ASSERT_TRUE(GetVarint32(&key_puts, &num_puts)); - ASSERT_EQ(7u, num_puts); - - uint32_t num_deletes; - ASSERT_NE(user_collected.find("NumDeletes"), user_collected.end()); - Slice key_deletes(user_collected.at("NumDeletes")); - ASSERT_TRUE(GetVarint32(&key_deletes, &num_deletes)); - ASSERT_EQ(2u, num_deletes); - - uint32_t num_single_deletes; - ASSERT_NE(user_collected.find("NumSingleDeletes"), user_collected.end()); - Slice key_single_deletes(user_collected.at("NumSingleDeletes")); - ASSERT_TRUE(GetVarint32(&key_single_deletes, &num_single_deletes)); - ASSERT_EQ(1u, num_single_deletes); - - uint32_t num_size_changes; - ASSERT_NE(user_collected.find("NumSizeChanges"), user_collected.end()); - Slice key_size_changes(user_collected.at("NumSizeChanges")); - ASSERT_TRUE(GetVarint32(&key_size_changes, &num_size_changes)); - ASSERT_GE(num_size_changes, 2u); - } -} -} // namespace - -TEST_P(TablePropertiesTest, CustomizedTablePropertiesCollector) { - // Test properties collectors with internal keys or regular keys - // for block based table - for (bool encode_as_internal : {true, false}) { - Options options; - BlockBasedTableOptions table_options; - table_options.flush_block_policy_factory = - std::make_shared(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - test::PlainInternalKeyComparator ikc(options.comparator); - std::shared_ptr collector_factory( - new RegularKeysStartWithAFactory(backward_mode_)); - options.table_properties_collector_factories.resize(1); - options.table_properties_collector_factories[0] = collector_factory; - - TestCustomizedTablePropertiesCollector(backward_mode_, - kBlockBasedTableMagicNumber, - encode_as_internal, options, ikc); - - // test plain table - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 8; - plain_table_options.bloom_bits_per_key = 8; - plain_table_options.hash_table_ratio = 0; - - options.table_factory = - std::make_shared(plain_table_options); - TestCustomizedTablePropertiesCollector(backward_mode_, - kPlainTableMagicNumber, - encode_as_internal, options, ikc); - } -} - -namespace { -void TestInternalKeyPropertiesCollector( - bool backward_mode, uint64_t magic_number, bool sanitized, - std::shared_ptr table_factory) { - InternalKey keys[] = { - InternalKey("A ", 0, ValueType::kTypeValue), - InternalKey("B ", 1, ValueType::kTypeValue), - InternalKey("C ", 2, ValueType::kTypeValue), - InternalKey("W ", 3, ValueType::kTypeDeletion), - InternalKey("X ", 4, ValueType::kTypeDeletion), - InternalKey("Y ", 5, ValueType::kTypeDeletion), - InternalKey("Z ", 6, ValueType::kTypeDeletion), - InternalKey("a ", 7, ValueType::kTypeSingleDeletion), - InternalKey("b ", 8, ValueType::kTypeMerge), - InternalKey("c ", 9, ValueType::kTypeMerge), - }; - - std::unique_ptr builder; - std::unique_ptr writable; - Options options; - test::PlainInternalKeyComparator pikc(options.comparator); - - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - options.table_factory = table_factory; - if (sanitized) { - options.table_properties_collector_factories.emplace_back( - new RegularKeysStartWithAFactory(backward_mode)); - // with sanitization, even regular properties collector will be able to - // handle internal keys. - auto comparator = options.comparator; - // HACK: Set options.info_log to avoid writing log in - // SanitizeOptions(). - options.info_log = std::make_shared(); - options = SanitizeOptions("db", // just a place holder - options); - ImmutableOptions ioptions(options); - GetIntTblPropCollectorFactory(ioptions, &int_tbl_prop_collector_factories); - options.comparator = comparator; - } - const ImmutableOptions ioptions(options); - MutableCFOptions moptions(options); - - for (int iter = 0; iter < 2; ++iter) { - MakeBuilder(options, ioptions, moptions, pikc, - &int_tbl_prop_collector_factories, &writable, &builder); - for (const auto& k : keys) { - builder->Add(k.Encode(), "val"); - } - - ASSERT_OK(builder->Finish()); - ASSERT_OK(writable->Flush()); - - test::StringSink* fwf = - static_cast(writable->writable_file()); - std::unique_ptr source( - new test::StringSource(fwf->contents())); - std::unique_ptr reader( - new RandomAccessFileReader(std::move(source), "test")); - - std::unique_ptr props; - Status s = ReadTableProperties(reader.get(), fwf->contents().size(), - magic_number, ioptions, &props); - ASSERT_OK(s); - - auto user_collected = props->user_collected_properties; - uint64_t deleted = GetDeletedKeys(user_collected); - ASSERT_EQ(5u, deleted); // deletes + single-deletes - - bool property_present; - uint64_t merges = GetMergeOperands(user_collected, &property_present); - ASSERT_TRUE(property_present); - ASSERT_EQ(2u, merges); - - if (sanitized) { - uint32_t starts_with_A = 0; - ASSERT_NE(user_collected.find("Count"), user_collected.end()); - Slice key(user_collected.at("Count")); - ASSERT_TRUE(GetVarint32(&key, &starts_with_A)); - ASSERT_EQ(1u, starts_with_A); - - if (!backward_mode) { - uint32_t num_puts; - ASSERT_NE(user_collected.find("NumPuts"), user_collected.end()); - Slice key_puts(user_collected.at("NumPuts")); - ASSERT_TRUE(GetVarint32(&key_puts, &num_puts)); - ASSERT_EQ(3u, num_puts); - - uint32_t num_deletes; - ASSERT_NE(user_collected.find("NumDeletes"), user_collected.end()); - Slice key_deletes(user_collected.at("NumDeletes")); - ASSERT_TRUE(GetVarint32(&key_deletes, &num_deletes)); - ASSERT_EQ(4u, num_deletes); - - uint32_t num_single_deletes; - ASSERT_NE(user_collected.find("NumSingleDeletes"), - user_collected.end()); - Slice key_single_deletes(user_collected.at("NumSingleDeletes")); - ASSERT_TRUE(GetVarint32(&key_single_deletes, &num_single_deletes)); - ASSERT_EQ(1u, num_single_deletes); - } - } - } -} -} // namespace - -TEST_P(TablePropertiesTest, InternalKeyPropertiesCollector) { - TestInternalKeyPropertiesCollector( - backward_mode_, kBlockBasedTableMagicNumber, true /* sanitize */, - std::make_shared()); - if (backward_mode_) { - TestInternalKeyPropertiesCollector( - backward_mode_, kBlockBasedTableMagicNumber, false /* not sanitize */, - std::make_shared()); - } - - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 8; - plain_table_options.bloom_bits_per_key = 8; - plain_table_options.hash_table_ratio = 0; - - TestInternalKeyPropertiesCollector( - backward_mode_, kPlainTableMagicNumber, false /* not sanitize */, - std::make_shared(plain_table_options)); -} - -INSTANTIATE_TEST_CASE_P(InternalKeyPropertiesCollector, TablePropertiesTest, - ::testing::Bool()); - -INSTANTIATE_TEST_CASE_P(CustomizedTablePropertiesCollector, TablePropertiesTest, - ::testing::Bool()); - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/version_builder_test.cc b/db/version_builder_test.cc deleted file mode 100644 index 611dee774..000000000 --- a/db/version_builder_test.cc +++ /dev/null @@ -1,1820 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include -#include -#include -#include -#include - -#include "db/version_edit.h" -#include "db/version_set.h" -#include "rocksdb/advanced_options.h" -#include "table/unique_id_impl.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -class VersionBuilderTest : public testing::Test { - public: - const Comparator* ucmp_; - InternalKeyComparator icmp_; - Options options_; - ImmutableOptions ioptions_; - MutableCFOptions mutable_cf_options_; - VersionStorageInfo vstorage_; - uint32_t file_num_; - CompactionOptionsFIFO fifo_options_; - std::vector size_being_compacted_; - - VersionBuilderTest() - : ucmp_(BytewiseComparator()), - icmp_(ucmp_), - ioptions_(options_), - mutable_cf_options_(options_), - vstorage_(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, - nullptr, false), - file_num_(1) { - mutable_cf_options_.RefreshDerivedOptions(ioptions_); - size_being_compacted_.resize(options_.num_levels); - } - - ~VersionBuilderTest() override { - for (int i = 0; i < vstorage_.num_levels(); i++) { - for (auto* f : vstorage_.LevelFiles(i)) { - if (--f->refs == 0) { - delete f; - } - } - } - } - - InternalKey GetInternalKey(const char* ukey, - SequenceNumber smallest_seq = 100) { - return InternalKey(ukey, smallest_seq, kTypeValue); - } - - void Add(int level, uint64_t file_number, const char* smallest, - const char* largest, uint64_t file_size = 0, uint32_t path_id = 0, - SequenceNumber smallest_seq = 100, SequenceNumber largest_seq = 100, - uint64_t num_entries = 0, uint64_t num_deletions = 0, - bool sampled = false, SequenceNumber smallest_seqno = 0, - SequenceNumber largest_seqno = 0, - uint64_t oldest_blob_file_number = kInvalidBlobFileNumber, - uint64_t epoch_number = kUnknownEpochNumber) { - assert(level < vstorage_.num_levels()); - FileMetaData* f = new FileMetaData( - file_number, path_id, file_size, GetInternalKey(smallest, smallest_seq), - GetInternalKey(largest, largest_seq), smallest_seqno, largest_seqno, - /* marked_for_compact */ false, Temperature::kUnknown, - oldest_blob_file_number, kUnknownOldestAncesterTime, - kUnknownFileCreationTime, epoch_number, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - f->compensated_file_size = file_size; - f->num_entries = num_entries; - f->num_deletions = num_deletions; - vstorage_.AddFile(level, f); - if (sampled) { - f->init_stats_from_file = true; - vstorage_.UpdateAccumulatedStats(f); - } - } - - void AddBlob(uint64_t blob_file_number, uint64_t total_blob_count, - uint64_t total_blob_bytes, std::string checksum_method, - std::string checksum_value, - BlobFileMetaData::LinkedSsts linked_ssts, - uint64_t garbage_blob_count, uint64_t garbage_blob_bytes) { - auto shared_meta = SharedBlobFileMetaData::Create( - blob_file_number, total_blob_count, total_blob_bytes, - std::move(checksum_method), std::move(checksum_value)); - auto meta = - BlobFileMetaData::Create(std::move(shared_meta), std::move(linked_ssts), - garbage_blob_count, garbage_blob_bytes); - - vstorage_.AddBlobFile(std::move(meta)); - } - - void AddDummyFile(uint64_t table_file_number, uint64_t blob_file_number, - uint64_t epoch_number) { - constexpr int level = 0; - constexpr char smallest[] = "bar"; - constexpr char largest[] = "foo"; - constexpr uint64_t file_size = 100; - constexpr uint32_t path_id = 0; - constexpr SequenceNumber smallest_seq = 0; - constexpr SequenceNumber largest_seq = 0; - constexpr uint64_t num_entries = 0; - constexpr uint64_t num_deletions = 0; - constexpr bool sampled = false; - - Add(level, table_file_number, smallest, largest, file_size, path_id, - smallest_seq, largest_seq, num_entries, num_deletions, sampled, - smallest_seq, largest_seq, blob_file_number, epoch_number); - } - - void AddDummyFileToEdit(VersionEdit* edit, uint64_t table_file_number, - uint64_t blob_file_number, uint64_t epoch_number) { - assert(edit); - - constexpr int level = 0; - constexpr uint32_t path_id = 0; - constexpr uint64_t file_size = 100; - constexpr char smallest[] = "bar"; - constexpr char largest[] = "foo"; - constexpr SequenceNumber smallest_seqno = 100; - constexpr SequenceNumber largest_seqno = 300; - constexpr bool marked_for_compaction = false; - - edit->AddFile(level, table_file_number, path_id, file_size, - GetInternalKey(smallest), GetInternalKey(largest), - smallest_seqno, largest_seqno, marked_for_compaction, - Temperature::kUnknown, blob_file_number, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - epoch_number, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - } - - void UpdateVersionStorageInfo(VersionStorageInfo* vstorage) { - assert(vstorage); - - vstorage->PrepareForVersionAppend(ioptions_, mutable_cf_options_); - vstorage->SetFinalized(); - } - - void UpdateVersionStorageInfo() { UpdateVersionStorageInfo(&vstorage_); } -}; - -void UnrefFilesInVersion(VersionStorageInfo* new_vstorage) { - for (int i = 0; i < new_vstorage->num_levels(); i++) { - for (auto* f : new_vstorage->LevelFiles(i)) { - if (--f->refs == 0) { - delete f; - } - } - } -} - -TEST_F(VersionBuilderTest, ApplyAndSaveTo) { - Add(0, 1U, "150", "200", 100U, /*path_id*/ 0, - /*smallest_seq*/ 100, /*largest_seq*/ 100, - /*num_entries*/ 0, /*num_deletions*/ 0, - /*sampled*/ false, /*smallest_seqno*/ 0, - /*largest_seqno*/ 0, - /*oldest_blob_file_number*/ kInvalidBlobFileNumber, - /*epoch_number*/ 1); - - Add(1, 66U, "150", "200", 100U); - Add(1, 88U, "201", "300", 100U); - - Add(2, 6U, "150", "179", 100U); - Add(2, 7U, "180", "220", 100U); - Add(2, 8U, "221", "300", 100U); - - Add(3, 26U, "150", "170", 100U); - Add(3, 27U, "171", "179", 100U); - Add(3, 28U, "191", "220", 100U); - Add(3, 29U, "221", "300", 100U); - - UpdateVersionStorageInfo(); - - VersionEdit version_edit; - version_edit.AddFile( - 2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, - false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - version_edit.DeleteFile(3, 27U); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder version_builder(env_options, &ioptions_, table_cache, - &vstorage_, version_set); - - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, nullptr, false); - ASSERT_OK(version_builder.Apply(&version_edit)); - ASSERT_OK(version_builder.SaveTo(&new_vstorage)); - - UpdateVersionStorageInfo(&new_vstorage); - - ASSERT_EQ(400U, new_vstorage.NumLevelBytes(2)); - ASSERT_EQ(300U, new_vstorage.NumLevelBytes(3)); - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic) { - ioptions_.level_compaction_dynamic_level_bytes = true; - - Add(0, 1U, "150", "200", 100U, 0, 200U, 200U, 0, 0, false, 200U, 200U, - /*oldest_blob_file_number*/ kInvalidBlobFileNumber, - /*epoch_number*/ 2); - Add(0, 88U, "201", "300", 100U, 0, 100U, 100U, 0, 0, false, 100U, 100U, - /*oldest_blob_file_number*/ kInvalidBlobFileNumber, - /*epoch_number*/ 1); - - Add(4, 6U, "150", "179", 100U); - Add(4, 7U, "180", "220", 100U); - Add(4, 8U, "221", "300", 100U); - - Add(5, 26U, "150", "170", 100U); - Add(5, 27U, "171", "179", 100U); - - UpdateVersionStorageInfo(); - - VersionEdit version_edit; - version_edit.AddFile( - 3, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, - false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - - version_edit.DeleteFile(0, 1U); - version_edit.DeleteFile(0, 88U); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder version_builder(env_options, &ioptions_, table_cache, - &vstorage_, version_set); - - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, nullptr, false); - ASSERT_OK(version_builder.Apply(&version_edit)); - ASSERT_OK(version_builder.SaveTo(&new_vstorage)); - - UpdateVersionStorageInfo(&new_vstorage); - - ASSERT_EQ(0U, new_vstorage.NumLevelBytes(0)); - ASSERT_EQ(100U, new_vstorage.NumLevelBytes(3)); - ASSERT_EQ(300U, new_vstorage.NumLevelBytes(4)); - ASSERT_EQ(200U, new_vstorage.NumLevelBytes(5)); - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic2) { - ioptions_.level_compaction_dynamic_level_bytes = true; - - Add(0, 1U, "150", "200", 100U, 0, 200U, 200U, 0, 0, false, 200U, 200U, - /*oldest_blob_file_number*/ kInvalidBlobFileNumber, - /*epoch_number*/ 2); - Add(0, 88U, "201", "300", 100U, 0, 100U, 100U, 0, 0, false, 100U, 100U, - /*oldest_blob_file_number*/ kInvalidBlobFileNumber, - /*epoch_number*/ 1); - - Add(4, 6U, "150", "179", 100U); - Add(4, 7U, "180", "220", 100U); - Add(4, 8U, "221", "300", 100U); - - Add(5, 26U, "150", "170", 100U); - Add(5, 27U, "171", "179", 100U); - - UpdateVersionStorageInfo(); - - VersionEdit version_edit; - version_edit.AddFile( - 4, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, - false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - version_edit.DeleteFile(0, 1U); - version_edit.DeleteFile(0, 88U); - version_edit.DeleteFile(4, 6U); - version_edit.DeleteFile(4, 7U); - version_edit.DeleteFile(4, 8U); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder version_builder(env_options, &ioptions_, table_cache, - &vstorage_, version_set); - - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, nullptr, false); - ASSERT_OK(version_builder.Apply(&version_edit)); - ASSERT_OK(version_builder.SaveTo(&new_vstorage)); - - UpdateVersionStorageInfo(&new_vstorage); - - ASSERT_EQ(0U, new_vstorage.NumLevelBytes(0)); - ASSERT_EQ(100U, new_vstorage.NumLevelBytes(4)); - ASSERT_EQ(200U, new_vstorage.NumLevelBytes(5)); - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, ApplyMultipleAndSaveTo) { - UpdateVersionStorageInfo(); - - VersionEdit version_edit; - version_edit.AddFile( - 2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, - false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - version_edit.AddFile( - 2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200, - false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - version_edit.AddFile( - 2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200, - false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - version_edit.AddFile( - 2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200, - false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - version_edit.AddFile( - 2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200, - false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder version_builder(env_options, &ioptions_, table_cache, - &vstorage_, version_set); - - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, nullptr, false); - ASSERT_OK(version_builder.Apply(&version_edit)); - ASSERT_OK(version_builder.SaveTo(&new_vstorage)); - - UpdateVersionStorageInfo(&new_vstorage); - - ASSERT_EQ(500U, new_vstorage.NumLevelBytes(2)); - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) { - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder version_builder(env_options, &ioptions_, table_cache, - &vstorage_, version_set); - - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, nullptr, false); - - VersionEdit version_edit; - version_edit.AddFile( - 2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, - false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - version_edit.AddFile( - 2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200, - false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - version_edit.AddFile( - 2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200, - false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - version_edit.AddFile( - 2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200, - false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - version_edit.AddFile( - 2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200, - false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - ASSERT_OK(version_builder.Apply(&version_edit)); - - VersionEdit version_edit2; - version_edit.AddFile( - 2, 808, 0, 100U, GetInternalKey("901"), GetInternalKey("950"), 200, 200, - false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - version_edit2.DeleteFile(2, 616); - version_edit2.DeleteFile(2, 636); - version_edit.AddFile( - 2, 806, 0, 100U, GetInternalKey("801"), GetInternalKey("850"), 200, 200, - false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - - ASSERT_OK(version_builder.Apply(&version_edit2)); - ASSERT_OK(version_builder.SaveTo(&new_vstorage)); - - UpdateVersionStorageInfo(&new_vstorage); - - ASSERT_EQ(300U, new_vstorage.NumLevelBytes(2)); - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, ApplyFileDeletionIncorrectLevel) { - constexpr int level = 1; - constexpr uint64_t file_number = 2345; - constexpr char smallest[] = "bar"; - constexpr char largest[] = "foo"; - constexpr uint64_t file_size = 100; - - Add(level, file_number, smallest, largest, file_size); - - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit edit; - - constexpr int incorrect_level = 3; - - edit.DeleteFile(incorrect_level, file_number); - - const Status s = builder.Apply(&edit); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(std::strstr(s.getState(), - "Cannot delete table file #2345 from level 3 since " - "it is on level 1")); -} - -TEST_F(VersionBuilderTest, ApplyFileDeletionNotInLSMTree) { - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit edit; - - constexpr int level = 3; - constexpr uint64_t file_number = 1234; - - edit.DeleteFile(level, file_number); - - const Status s = builder.Apply(&edit); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(std::strstr(s.getState(), - "Cannot delete table file #1234 from level 3 since " - "it is not in the LSM tree")); -} - -TEST_F(VersionBuilderTest, ApplyFileDeletionAndAddition) { - constexpr int level = 1; - constexpr uint64_t file_number = 2345; - constexpr char smallest[] = "bar"; - constexpr char largest[] = "foo"; - constexpr uint64_t file_size = 10000; - constexpr uint32_t path_id = 0; - constexpr SequenceNumber smallest_seq = 100; - constexpr SequenceNumber largest_seq = 500; - constexpr uint64_t num_entries = 0; - constexpr uint64_t num_deletions = 0; - constexpr bool sampled = false; - constexpr SequenceNumber smallest_seqno = 1; - constexpr SequenceNumber largest_seqno = 1000; - - Add(level, file_number, smallest, largest, file_size, path_id, smallest_seq, - largest_seq, num_entries, num_deletions, sampled, smallest_seqno, - largest_seqno); - - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit deletion; - - deletion.DeleteFile(level, file_number); - - ASSERT_OK(builder.Apply(&deletion)); - - VersionEdit addition; - - constexpr bool marked_for_compaction = false; - - addition.AddFile( - level, file_number, path_id, file_size, - GetInternalKey(smallest, smallest_seq), - GetInternalKey(largest, largest_seq), smallest_seqno, largest_seqno, - marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - - ASSERT_OK(builder.Apply(&addition)); - - constexpr bool force_consistency_checks = false; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); - - ASSERT_OK(builder.SaveTo(&new_vstorage)); - - UpdateVersionStorageInfo(&new_vstorage); - - ASSERT_EQ(new_vstorage.GetFileLocation(file_number).GetLevel(), level); - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyInBase) { - constexpr int level = 1; - constexpr uint64_t file_number = 2345; - constexpr char smallest[] = "bar"; - constexpr char largest[] = "foo"; - constexpr uint64_t file_size = 10000; - - Add(level, file_number, smallest, largest, file_size); - - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit edit; - - constexpr int new_level = 2; - constexpr uint32_t path_id = 0; - constexpr SequenceNumber smallest_seqno = 100; - constexpr SequenceNumber largest_seqno = 1000; - constexpr bool marked_for_compaction = false; - - edit.AddFile( - new_level, file_number, path_id, file_size, GetInternalKey(smallest), - GetInternalKey(largest), smallest_seqno, largest_seqno, - marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - - const Status s = builder.Apply(&edit); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(std::strstr(s.getState(), - "Cannot add table file #2345 to level 2 since it is " - "already in the LSM tree on level 1")); -} - -TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyApplied) { - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit edit; - - constexpr int level = 3; - constexpr uint64_t file_number = 2345; - constexpr uint32_t path_id = 0; - constexpr uint64_t file_size = 10000; - constexpr char smallest[] = "bar"; - constexpr char largest[] = "foo"; - constexpr SequenceNumber smallest_seqno = 100; - constexpr SequenceNumber largest_seqno = 1000; - constexpr bool marked_for_compaction = false; - - edit.AddFile( - level, file_number, path_id, file_size, GetInternalKey(smallest), - GetInternalKey(largest), smallest_seqno, largest_seqno, - marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - - ASSERT_OK(builder.Apply(&edit)); - - VersionEdit other_edit; - - constexpr int new_level = 2; - - other_edit.AddFile( - new_level, file_number, path_id, file_size, GetInternalKey(smallest), - GetInternalKey(largest), smallest_seqno, largest_seqno, - marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - - const Status s = builder.Apply(&other_edit); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(std::strstr(s.getState(), - "Cannot add table file #2345 to level 2 since it is " - "already in the LSM tree on level 3")); -} - -TEST_F(VersionBuilderTest, ApplyFileAdditionAndDeletion) { - UpdateVersionStorageInfo(); - - constexpr int level = 1; - constexpr uint64_t file_number = 2345; - constexpr uint32_t path_id = 0; - constexpr uint64_t file_size = 10000; - constexpr char smallest[] = "bar"; - constexpr char largest[] = "foo"; - constexpr SequenceNumber smallest_seqno = 100; - constexpr SequenceNumber largest_seqno = 1000; - constexpr bool marked_for_compaction = false; - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit addition; - - addition.AddFile( - level, file_number, path_id, file_size, GetInternalKey(smallest), - GetInternalKey(largest), smallest_seqno, largest_seqno, - marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - - ASSERT_OK(builder.Apply(&addition)); - - VersionEdit deletion; - - deletion.DeleteFile(level, file_number); - - ASSERT_OK(builder.Apply(&deletion)); - - constexpr bool force_consistency_checks = false; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); - - ASSERT_OK(builder.SaveTo(&new_vstorage)); - - UpdateVersionStorageInfo(&new_vstorage); - - ASSERT_FALSE(new_vstorage.GetFileLocation(file_number).IsValid()); - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, ApplyBlobFileAddition) { - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit edit; - - constexpr uint64_t blob_file_number = 1234; - constexpr uint64_t total_blob_count = 5678; - constexpr uint64_t total_blob_bytes = 999999; - constexpr char checksum_method[] = "SHA1"; - constexpr char checksum_value[] = - "\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52" - "\x5c\xbd"; - - edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, - checksum_method, checksum_value); - - // Add dummy table file to ensure the blob file is referenced. - constexpr uint64_t table_file_number = 1; - AddDummyFileToEdit(&edit, table_file_number, blob_file_number, - 1 /*epoch_number*/); - - ASSERT_OK(builder.Apply(&edit)); - - constexpr bool force_consistency_checks = false; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); - - ASSERT_OK(builder.SaveTo(&new_vstorage)); - - UpdateVersionStorageInfo(&new_vstorage); - - const auto& new_blob_files = new_vstorage.GetBlobFiles(); - ASSERT_EQ(new_blob_files.size(), 1); - - const auto new_meta = new_vstorage.GetBlobFileMetaData(blob_file_number); - - ASSERT_NE(new_meta, nullptr); - ASSERT_EQ(new_meta->GetBlobFileNumber(), blob_file_number); - ASSERT_EQ(new_meta->GetTotalBlobCount(), total_blob_count); - ASSERT_EQ(new_meta->GetTotalBlobBytes(), total_blob_bytes); - ASSERT_EQ(new_meta->GetChecksumMethod(), checksum_method); - ASSERT_EQ(new_meta->GetChecksumValue(), checksum_value); - ASSERT_EQ(new_meta->GetLinkedSsts(), - BlobFileMetaData::LinkedSsts{table_file_number}); - ASSERT_EQ(new_meta->GetGarbageBlobCount(), 0); - ASSERT_EQ(new_meta->GetGarbageBlobBytes(), 0); - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, ApplyBlobFileAdditionAlreadyInBase) { - // Attempt to add a blob file that is already present in the base version. - - constexpr uint64_t blob_file_number = 1234; - constexpr uint64_t total_blob_count = 5678; - constexpr uint64_t total_blob_bytes = 999999; - constexpr char checksum_method[] = "SHA1"; - constexpr char checksum_value[] = - "\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52" - "\x5c\xbd"; - constexpr uint64_t garbage_blob_count = 123; - constexpr uint64_t garbage_blob_bytes = 456789; - - AddBlob(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, - checksum_value, BlobFileMetaData::LinkedSsts(), garbage_blob_count, - garbage_blob_bytes); - - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit edit; - - edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, - checksum_method, checksum_value); - - const Status s = builder.Apply(&edit); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(std::strstr(s.getState(), "Blob file #1234 already added")); -} - -TEST_F(VersionBuilderTest, ApplyBlobFileAdditionAlreadyApplied) { - // Attempt to add the same blob file twice using version edits. - - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit edit; - - constexpr uint64_t blob_file_number = 1234; - constexpr uint64_t total_blob_count = 5678; - constexpr uint64_t total_blob_bytes = 999999; - constexpr char checksum_method[] = "SHA1"; - constexpr char checksum_value[] = - "\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52" - "\x5c\xbd"; - - edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, - checksum_method, checksum_value); - - ASSERT_OK(builder.Apply(&edit)); - - const Status s = builder.Apply(&edit); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(std::strstr(s.getState(), "Blob file #1234 already added")); -} - -TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileInBase) { - // Increase the amount of garbage for a blob file present in the base version. - - constexpr uint64_t table_file_number = 1; - constexpr uint64_t blob_file_number = 1234; - constexpr uint64_t total_blob_count = 5678; - constexpr uint64_t total_blob_bytes = 999999; - constexpr char checksum_method[] = "SHA1"; - constexpr char checksum_value[] = - "\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52" - "\x5c\xbd"; - constexpr uint64_t garbage_blob_count = 123; - constexpr uint64_t garbage_blob_bytes = 456789; - - AddBlob(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, - checksum_value, BlobFileMetaData::LinkedSsts{table_file_number}, - garbage_blob_count, garbage_blob_bytes); - - const auto meta = vstorage_.GetBlobFileMetaData(blob_file_number); - ASSERT_NE(meta, nullptr); - - // Add dummy table file to ensure the blob file is referenced. - AddDummyFile(table_file_number, blob_file_number, 1 /*epoch_number*/); - - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit edit; - - constexpr uint64_t new_garbage_blob_count = 456; - constexpr uint64_t new_garbage_blob_bytes = 111111; - - edit.AddBlobFileGarbage(blob_file_number, new_garbage_blob_count, - new_garbage_blob_bytes); - - ASSERT_OK(builder.Apply(&edit)); - - constexpr bool force_consistency_checks = false; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); - - ASSERT_OK(builder.SaveTo(&new_vstorage)); - - UpdateVersionStorageInfo(&new_vstorage); - - const auto& new_blob_files = new_vstorage.GetBlobFiles(); - ASSERT_EQ(new_blob_files.size(), 1); - - const auto new_meta = new_vstorage.GetBlobFileMetaData(blob_file_number); - - ASSERT_NE(new_meta, nullptr); - ASSERT_EQ(new_meta->GetSharedMeta(), meta->GetSharedMeta()); - ASSERT_EQ(new_meta->GetBlobFileNumber(), blob_file_number); - ASSERT_EQ(new_meta->GetTotalBlobCount(), total_blob_count); - ASSERT_EQ(new_meta->GetTotalBlobBytes(), total_blob_bytes); - ASSERT_EQ(new_meta->GetChecksumMethod(), checksum_method); - ASSERT_EQ(new_meta->GetChecksumValue(), checksum_value); - ASSERT_EQ(new_meta->GetLinkedSsts(), - BlobFileMetaData::LinkedSsts{table_file_number}); - ASSERT_EQ(new_meta->GetGarbageBlobCount(), - garbage_blob_count + new_garbage_blob_count); - ASSERT_EQ(new_meta->GetGarbageBlobBytes(), - garbage_blob_bytes + new_garbage_blob_bytes); - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileAdditionApplied) { - // Increase the amount of garbage for a blob file added using a version edit. - - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit addition; - - constexpr uint64_t blob_file_number = 1234; - constexpr uint64_t total_blob_count = 5678; - constexpr uint64_t total_blob_bytes = 999999; - constexpr char checksum_method[] = "SHA1"; - constexpr char checksum_value[] = - "\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52" - "\x5c\xbd"; - - addition.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, - checksum_method, checksum_value); - - // Add dummy table file to ensure the blob file is referenced. - constexpr uint64_t table_file_number = 1; - AddDummyFileToEdit(&addition, table_file_number, blob_file_number, - 1 /*epoch_number*/); - - ASSERT_OK(builder.Apply(&addition)); - - constexpr uint64_t garbage_blob_count = 123; - constexpr uint64_t garbage_blob_bytes = 456789; - - VersionEdit garbage; - - garbage.AddBlobFileGarbage(blob_file_number, garbage_blob_count, - garbage_blob_bytes); - - ASSERT_OK(builder.Apply(&garbage)); - - constexpr bool force_consistency_checks = false; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); - - ASSERT_OK(builder.SaveTo(&new_vstorage)); - - UpdateVersionStorageInfo(&new_vstorage); - - const auto& new_blob_files = new_vstorage.GetBlobFiles(); - ASSERT_EQ(new_blob_files.size(), 1); - - const auto new_meta = new_vstorage.GetBlobFileMetaData(blob_file_number); - - ASSERT_NE(new_meta, nullptr); - ASSERT_EQ(new_meta->GetBlobFileNumber(), blob_file_number); - ASSERT_EQ(new_meta->GetTotalBlobCount(), total_blob_count); - ASSERT_EQ(new_meta->GetTotalBlobBytes(), total_blob_bytes); - ASSERT_EQ(new_meta->GetChecksumMethod(), checksum_method); - ASSERT_EQ(new_meta->GetChecksumValue(), checksum_value); - ASSERT_EQ(new_meta->GetLinkedSsts(), - BlobFileMetaData::LinkedSsts{table_file_number}); - ASSERT_EQ(new_meta->GetGarbageBlobCount(), garbage_blob_count); - ASSERT_EQ(new_meta->GetGarbageBlobBytes(), garbage_blob_bytes); - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileNotFound) { - // Attempt to increase the amount of garbage for a blob file that is - // neither in the base version, nor was it added using a version edit. - - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit edit; - - constexpr uint64_t blob_file_number = 1234; - constexpr uint64_t garbage_blob_count = 5678; - constexpr uint64_t garbage_blob_bytes = 999999; - - edit.AddBlobFileGarbage(blob_file_number, garbage_blob_count, - garbage_blob_bytes); - - const Status s = builder.Apply(&edit); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(std::strstr(s.getState(), "Blob file #1234 not found")); -} - -TEST_F(VersionBuilderTest, BlobFileGarbageOverflow) { - // Test that VersionEdits that would result in the count/total size of garbage - // exceeding the count/total size of all blobs are rejected. - - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit addition; - - constexpr uint64_t blob_file_number = 1234; - constexpr uint64_t total_blob_count = 5678; - constexpr uint64_t total_blob_bytes = 999999; - constexpr char checksum_method[] = "SHA1"; - constexpr char checksum_value[] = - "\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52" - "\x5c\xbd"; - - addition.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, - checksum_method, checksum_value); - - // Add dummy table file to ensure the blob file is referenced. - constexpr uint64_t table_file_number = 1; - AddDummyFileToEdit(&addition, table_file_number, blob_file_number, - 1 /*epoch_number*/); - - ASSERT_OK(builder.Apply(&addition)); - - { - // Garbage blob count overflow - constexpr uint64_t garbage_blob_count = 5679; - constexpr uint64_t garbage_blob_bytes = 999999; - - VersionEdit garbage; - - garbage.AddBlobFileGarbage(blob_file_number, garbage_blob_count, - garbage_blob_bytes); - - const Status s = builder.Apply(&garbage); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE( - std::strstr(s.getState(), "Garbage overflow for blob file #1234")); - } - - { - // Garbage blob bytes overflow - constexpr uint64_t garbage_blob_count = 5678; - constexpr uint64_t garbage_blob_bytes = 1000000; - - VersionEdit garbage; - - garbage.AddBlobFileGarbage(blob_file_number, garbage_blob_count, - garbage_blob_bytes); - - const Status s = builder.Apply(&garbage); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE( - std::strstr(s.getState(), "Garbage overflow for blob file #1234")); - } -} - -TEST_F(VersionBuilderTest, SaveBlobFilesTo) { - // Add three blob files to base version. - for (uint64_t i = 1; i <= 3; ++i) { - const uint64_t table_file_number = 2 * i; - const uint64_t blob_file_number = 2 * i + 1; - const uint64_t total_blob_count = i * 1000; - const uint64_t total_blob_bytes = i * 1000000; - const uint64_t garbage_blob_count = i * 100; - const uint64_t garbage_blob_bytes = i * 20000; - - AddBlob(blob_file_number, total_blob_count, total_blob_bytes, - /* checksum_method */ std::string(), - /* checksum_value */ std::string(), - BlobFileMetaData::LinkedSsts{table_file_number}, garbage_blob_count, - garbage_blob_bytes); - } - - // Add dummy table files to ensure the blob files are referenced. - // Note: files are added to L0, so they have to be added in reverse order - // (newest first). - for (uint64_t i = 3; i >= 1; --i) { - const uint64_t table_file_number = 2 * i; - const uint64_t blob_file_number = 2 * i + 1; - - AddDummyFile(table_file_number, blob_file_number, i /*epoch_number*/); - } - - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit edit; - - // Add some garbage to the second and third blob files. The second blob file - // remains valid since it does not consist entirely of garbage yet. The third - // blob file is all garbage after the edit and will not be part of the new - // version. The corresponding dummy table file is also removed for - // consistency. - edit.AddBlobFileGarbage(/* blob_file_number */ 5, - /* garbage_blob_count */ 200, - /* garbage_blob_bytes */ 100000); - edit.AddBlobFileGarbage(/* blob_file_number */ 7, - /* garbage_blob_count */ 2700, - /* garbage_blob_bytes */ 2940000); - edit.DeleteFile(/* level */ 0, /* file_number */ 6); - - // Add a fourth blob file. - edit.AddBlobFile(/* blob_file_number */ 9, /* total_blob_count */ 4000, - /* total_blob_bytes */ 4000000, - /* checksum_method */ std::string(), - /* checksum_value */ std::string()); - - ASSERT_OK(builder.Apply(&edit)); - - constexpr bool force_consistency_checks = false; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); - - ASSERT_OK(builder.SaveTo(&new_vstorage)); - - UpdateVersionStorageInfo(&new_vstorage); - - const auto& new_blob_files = new_vstorage.GetBlobFiles(); - ASSERT_EQ(new_blob_files.size(), 3); - - const auto meta3 = new_vstorage.GetBlobFileMetaData(/* blob_file_number */ 3); - - ASSERT_NE(meta3, nullptr); - ASSERT_EQ(meta3->GetBlobFileNumber(), 3); - ASSERT_EQ(meta3->GetTotalBlobCount(), 1000); - ASSERT_EQ(meta3->GetTotalBlobBytes(), 1000000); - ASSERT_EQ(meta3->GetGarbageBlobCount(), 100); - ASSERT_EQ(meta3->GetGarbageBlobBytes(), 20000); - - const auto meta5 = new_vstorage.GetBlobFileMetaData(/* blob_file_number */ 5); - - ASSERT_NE(meta5, nullptr); - ASSERT_EQ(meta5->GetBlobFileNumber(), 5); - ASSERT_EQ(meta5->GetTotalBlobCount(), 2000); - ASSERT_EQ(meta5->GetTotalBlobBytes(), 2000000); - ASSERT_EQ(meta5->GetGarbageBlobCount(), 400); - ASSERT_EQ(meta5->GetGarbageBlobBytes(), 140000); - - const auto meta9 = new_vstorage.GetBlobFileMetaData(/* blob_file_number */ 9); - - ASSERT_NE(meta9, nullptr); - ASSERT_EQ(meta9->GetBlobFileNumber(), 9); - ASSERT_EQ(meta9->GetTotalBlobCount(), 4000); - ASSERT_EQ(meta9->GetTotalBlobBytes(), 4000000); - ASSERT_EQ(meta9->GetGarbageBlobCount(), 0); - ASSERT_EQ(meta9->GetGarbageBlobBytes(), 0); - - // Delete the first table file, which makes the first blob file obsolete - // since it's at the head and unreferenced. - VersionBuilder second_builder(env_options, &ioptions_, table_cache, - &new_vstorage, version_set); - - VersionEdit second_edit; - second_edit.DeleteFile(/* level */ 0, /* file_number */ 2); - - ASSERT_OK(second_builder.Apply(&second_edit)); - - VersionStorageInfo newer_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &new_vstorage, - force_consistency_checks); - - ASSERT_OK(second_builder.SaveTo(&newer_vstorage)); - - UpdateVersionStorageInfo(&newer_vstorage); - - const auto& newer_blob_files = newer_vstorage.GetBlobFiles(); - ASSERT_EQ(newer_blob_files.size(), 2); - - const auto newer_meta3 = - newer_vstorage.GetBlobFileMetaData(/* blob_file_number */ 3); - - ASSERT_EQ(newer_meta3, nullptr); - - UnrefFilesInVersion(&newer_vstorage); - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, SaveBlobFilesToConcurrentJobs) { - // When multiple background jobs (flushes/compactions) are executing in - // parallel, it is possible for the VersionEdit adding blob file K to be - // applied *after* the VersionEdit adding blob file N (for N > K). This test - // case makes sure this is handled correctly. - - // Add blob file #4 (referenced by table file #3) to base version. - constexpr uint64_t base_table_file_number = 3; - constexpr uint64_t base_blob_file_number = 4; - constexpr uint64_t base_total_blob_count = 100; - constexpr uint64_t base_total_blob_bytes = 1 << 20; - - constexpr char checksum_method[] = "SHA1"; - constexpr char checksum_value[] = "\xfa\xce\xb0\x0c"; - constexpr uint64_t garbage_blob_count = 0; - constexpr uint64_t garbage_blob_bytes = 0; - - AddDummyFile(base_table_file_number, base_blob_file_number, - 1 /*epoch_number*/); - AddBlob(base_blob_file_number, base_total_blob_count, base_total_blob_bytes, - checksum_method, checksum_value, - BlobFileMetaData::LinkedSsts{base_table_file_number}, - garbage_blob_count, garbage_blob_bytes); - - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit edit; - - // Add blob file #2 (referenced by table file #1). - constexpr int level = 0; - constexpr uint64_t table_file_number = 1; - constexpr uint32_t path_id = 0; - constexpr uint64_t file_size = 1 << 12; - constexpr char smallest[] = "key1"; - constexpr char largest[] = "key987"; - constexpr SequenceNumber smallest_seqno = 0; - constexpr SequenceNumber largest_seqno = 0; - constexpr bool marked_for_compaction = false; - - constexpr uint64_t blob_file_number = 2; - static_assert(blob_file_number < base_blob_file_number, - "Added blob file should have a smaller file number"); - - constexpr uint64_t total_blob_count = 234; - constexpr uint64_t total_blob_bytes = 1 << 22; - - edit.AddFile( - level, table_file_number, path_id, file_size, GetInternalKey(smallest), - GetInternalKey(largest), smallest_seqno, largest_seqno, - marked_for_compaction, Temperature::kUnknown, blob_file_number, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, 2 /*epoch_number*/, - checksum_value, checksum_method, kNullUniqueId64x2, 0); - edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, - checksum_method, checksum_value); - - ASSERT_OK(builder.Apply(&edit)); - - constexpr bool force_consistency_checks = true; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); - - ASSERT_OK(builder.SaveTo(&new_vstorage)); - - UpdateVersionStorageInfo(&new_vstorage); - - const auto& new_blob_files = new_vstorage.GetBlobFiles(); - ASSERT_EQ(new_blob_files.size(), 2); - - const auto base_meta = - new_vstorage.GetBlobFileMetaData(base_blob_file_number); - - ASSERT_NE(base_meta, nullptr); - ASSERT_EQ(base_meta->GetBlobFileNumber(), base_blob_file_number); - ASSERT_EQ(base_meta->GetTotalBlobCount(), base_total_blob_count); - ASSERT_EQ(base_meta->GetTotalBlobBytes(), base_total_blob_bytes); - ASSERT_EQ(base_meta->GetGarbageBlobCount(), garbage_blob_count); - ASSERT_EQ(base_meta->GetGarbageBlobBytes(), garbage_blob_bytes); - ASSERT_EQ(base_meta->GetChecksumMethod(), checksum_method); - ASSERT_EQ(base_meta->GetChecksumValue(), checksum_value); - - const auto added_meta = new_vstorage.GetBlobFileMetaData(blob_file_number); - - ASSERT_NE(added_meta, nullptr); - ASSERT_EQ(added_meta->GetBlobFileNumber(), blob_file_number); - ASSERT_EQ(added_meta->GetTotalBlobCount(), total_blob_count); - ASSERT_EQ(added_meta->GetTotalBlobBytes(), total_blob_bytes); - ASSERT_EQ(added_meta->GetGarbageBlobCount(), garbage_blob_count); - ASSERT_EQ(added_meta->GetGarbageBlobBytes(), garbage_blob_bytes); - ASSERT_EQ(added_meta->GetChecksumMethod(), checksum_method); - ASSERT_EQ(added_meta->GetChecksumValue(), checksum_value); - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) { - // Initialize base version. The first table file points to a valid blob file - // in this version; the second one does not refer to any blob files. - - Add(/* level */ 1, /* file_number */ 1, /* smallest */ "150", - /* largest */ "200", /* file_size */ 100, - /* path_id */ 0, /* smallest_seq */ 100, /* largest_seq */ 100, - /* num_entries */ 0, /* num_deletions */ 0, - /* sampled */ false, /* smallest_seqno */ 100, /* largest_seqno */ 100, - /* oldest_blob_file_number */ 16); - Add(/* level */ 1, /* file_number */ 23, /* smallest */ "201", - /* largest */ "300", /* file_size */ 100, - /* path_id */ 0, /* smallest_seq */ 200, /* largest_seq */ 200, - /* num_entries */ 0, /* num_deletions */ 0, - /* sampled */ false, /* smallest_seqno */ 200, /* largest_seqno */ 200, - kInvalidBlobFileNumber); - - AddBlob(/* blob_file_number */ 16, /* total_blob_count */ 1000, - /* total_blob_bytes */ 1000000, - /* checksum_method */ std::string(), - /* checksum_value */ std::string(), BlobFileMetaData::LinkedSsts{1}, - /* garbage_blob_count */ 500, /* garbage_blob_bytes */ 300000); - - UpdateVersionStorageInfo(); - - // Add a new table file that points to the existing blob file, and add a - // new table file--blob file pair. - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit edit; - - edit.AddFile(/* level */ 1, /* file_number */ 606, /* path_id */ 0, - /* file_size */ 100, /* smallest */ GetInternalKey("701"), - /* largest */ GetInternalKey("750"), /* smallest_seqno */ 200, - /* largest_seqno */ 200, /* marked_for_compaction */ false, - Temperature::kUnknown, - /* oldest_blob_file_number */ 16, kUnknownOldestAncesterTime, - kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0); - - edit.AddFile(/* level */ 1, /* file_number */ 700, /* path_id */ 0, - /* file_size */ 100, /* smallest */ GetInternalKey("801"), - /* largest */ GetInternalKey("850"), /* smallest_seqno */ 200, - /* largest_seqno */ 200, /* marked_for_compaction */ false, - Temperature::kUnknown, - /* oldest_blob_file_number */ 1000, kUnknownOldestAncesterTime, - kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0); - edit.AddBlobFile(/* blob_file_number */ 1000, /* total_blob_count */ 2000, - /* total_blob_bytes */ 200000, - /* checksum_method */ std::string(), - /* checksum_value */ std::string()); - - ASSERT_OK(builder.Apply(&edit)); - - // Save to a new version in order to trigger consistency checks. - constexpr bool force_consistency_checks = true; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); - - ASSERT_OK(builder.SaveTo(&new_vstorage)); - - UpdateVersionStorageInfo(&new_vstorage); - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesInconsistentLinks) { - // Initialize base version. Links between the table file and the blob file - // are inconsistent. - - Add(/* level */ 1, /* file_number */ 1, /* smallest */ "150", - /* largest */ "200", /* file_size */ 100, - /* path_id */ 0, /* smallest_seq */ 100, /* largest_seq */ 100, - /* num_entries */ 0, /* num_deletions */ 0, - /* sampled */ false, /* smallest_seqno */ 100, /* largest_seqno */ 100, - /* oldest_blob_file_number */ 256); - - AddBlob(/* blob_file_number */ 16, /* total_blob_count */ 1000, - /* total_blob_bytes */ 1000000, - /* checksum_method */ std::string(), - /* checksum_value */ std::string(), BlobFileMetaData::LinkedSsts{1}, - /* garbage_blob_count */ 500, /* garbage_blob_bytes */ 300000); - - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - // Save to a new version in order to trigger consistency checks. - constexpr bool force_consistency_checks = true; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); - - const Status s = builder.SaveTo(&new_vstorage); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(std::strstr( - s.getState(), - "Links are inconsistent between table files and blob file #16")); - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesAllGarbage) { - // Initialize base version. The table file points to a blob file that is - // all garbage. - - Add(/* level */ 1, /* file_number */ 1, /* smallest */ "150", - /* largest */ "200", /* file_size */ 100, - /* path_id */ 0, /* smallest_seq */ 100, /* largest_seq */ 100, - /* num_entries */ 0, /* num_deletions */ 0, - /* sampled */ false, /* smallest_seqno */ 100, /* largest_seqno */ 100, - /* oldest_blob_file_number */ 16); - - AddBlob(/* blob_file_number */ 16, /* total_blob_count */ 1000, - /* total_blob_bytes */ 1000000, - /* checksum_method */ std::string(), - /* checksum_value */ std::string(), BlobFileMetaData::LinkedSsts{1}, - /* garbage_blob_count */ 1000, /* garbage_blob_bytes */ 1000000); - - UpdateVersionStorageInfo(); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - // Save to a new version in order to trigger consistency checks. - constexpr bool force_consistency_checks = true; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); - - const Status s = builder.SaveTo(&new_vstorage); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE( - std::strstr(s.getState(), "Blob file #16 consists entirely of garbage")); - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesAllGarbageLinkedSsts) { - // Initialize base version, with a table file pointing to a blob file - // that has no garbage at this point. - - Add(/* level */ 1, /* file_number */ 1, /* smallest */ "150", - /* largest */ "200", /* file_size */ 100, - /* path_id */ 0, /* smallest_seq */ 100, /* largest_seq */ 100, - /* num_entries */ 0, /* num_deletions */ 0, - /* sampled */ false, /* smallest_seqno */ 100, /* largest_seqno */ 100, - /* oldest_blob_file_number */ 16); - - AddBlob(/* blob_file_number */ 16, /* total_blob_count */ 1000, - /* total_blob_bytes */ 1000000, - /* checksum_method */ std::string(), - /* checksum_value */ std::string(), BlobFileMetaData::LinkedSsts{1}, - /* garbage_blob_count */ 0, /* garbage_blob_bytes */ 0); - - UpdateVersionStorageInfo(); - - // Mark the entire blob file garbage but do not remove the linked SST. - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - VersionEdit edit; - - edit.AddBlobFileGarbage(/* blob_file_number */ 16, - /* garbage_blob_count */ 1000, - /* garbage_blob_bytes */ 1000000); - - ASSERT_OK(builder.Apply(&edit)); - - // Save to a new version in order to trigger consistency checks. - constexpr bool force_consistency_checks = true; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); - - const Status s = builder.SaveTo(&new_vstorage); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE( - std::strstr(s.getState(), "Blob file #16 consists entirely of garbage")); - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { - // Initialize base version. Table files 1..10 are linked to blob files 1..5, - // while table files 11..20 are not linked to any blob files. - - for (uint64_t i = 1; i <= 10; ++i) { - std::ostringstream oss; - oss << std::setw(2) << std::setfill('0') << i; - - const std::string key = oss.str(); - - Add(/* level */ 1, /* file_number */ i, /* smallest */ key.c_str(), - /* largest */ key.c_str(), /* file_size */ 100, - /* path_id */ 0, /* smallest_seq */ i * 100, /* largest_seq */ i * 100, - /* num_entries */ 0, /* num_deletions */ 0, - /* sampled */ false, /* smallest_seqno */ i * 100, - /* largest_seqno */ i * 100, - /* oldest_blob_file_number */ ((i - 1) % 5) + 1); - } - - for (uint64_t i = 1; i <= 5; ++i) { - AddBlob(/* blob_file_number */ i, /* total_blob_count */ 2000, - /* total_blob_bytes */ 2000000, - /* checksum_method */ std::string(), - /* checksum_value */ std::string(), - BlobFileMetaData::LinkedSsts{i, i + 5}, - /* garbage_blob_count */ 1000, /* garbage_blob_bytes */ 1000000); - } - - for (uint64_t i = 11; i <= 20; ++i) { - std::ostringstream oss; - oss << std::setw(2) << std::setfill('0') << i; - - const std::string key = oss.str(); - - Add(/* level */ 1, /* file_number */ i, /* smallest */ key.c_str(), - /* largest */ key.c_str(), /* file_size */ 100, - /* path_id */ 0, /* smallest_seq */ i * 100, /* largest_seq */ i * 100, - /* num_entries */ 0, /* num_deletions */ 0, - /* sampled */ false, /* smallest_seqno */ i * 100, - /* largest_seqno */ i * 100, kInvalidBlobFileNumber); - } - - UpdateVersionStorageInfo(); - - { - const auto& blob_files = vstorage_.GetBlobFiles(); - ASSERT_EQ(blob_files.size(), 5); - - const std::vector expected_linked_ssts{ - {1, 6}, {2, 7}, {3, 8}, {4, 9}, {5, 10}}; - - for (size_t i = 0; i < 5; ++i) { - const auto meta = - vstorage_.GetBlobFileMetaData(/* blob_file_number */ i + 1); - ASSERT_NE(meta, nullptr); - ASSERT_EQ(meta->GetLinkedSsts(), expected_linked_ssts[i]); - } - } - - VersionEdit edit; - - // Add an SST that references a blob file. - edit.AddFile( - /* level */ 1, /* file_number */ 21, /* path_id */ 0, - /* file_size */ 100, /* smallest */ GetInternalKey("21", 2100), - /* largest */ GetInternalKey("21", 2100), /* smallest_seqno */ 2100, - /* largest_seqno */ 2100, /* marked_for_compaction */ false, - Temperature::kUnknown, - /* oldest_blob_file_number */ 1, kUnknownOldestAncesterTime, - kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - - // Add an SST that does not reference any blob files. - edit.AddFile( - /* level */ 1, /* file_number */ 22, /* path_id */ 0, - /* file_size */ 100, /* smallest */ GetInternalKey("22", 2200), - /* largest */ GetInternalKey("22", 2200), /* smallest_seqno */ 2200, - /* largest_seqno */ 2200, /* marked_for_compaction */ false, - Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, - kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - - // Delete a file that references a blob file. - edit.DeleteFile(/* level */ 1, /* file_number */ 6); - - // Delete a file that does not reference any blob files. - edit.DeleteFile(/* level */ 1, /* file_number */ 16); - - // Trivially move a file that references a blob file. Note that we save - // the original BlobFileMetaData object so we can check that no new object - // gets created. - auto meta3 = vstorage_.GetBlobFileMetaData(/* blob_file_number */ 3); - - edit.DeleteFile(/* level */ 1, /* file_number */ 3); - edit.AddFile(/* level */ 2, /* file_number */ 3, /* path_id */ 0, - /* file_size */ 100, /* smallest */ GetInternalKey("03", 300), - /* largest */ GetInternalKey("03", 300), - /* smallest_seqno */ 300, - /* largest_seqno */ 300, /* marked_for_compaction */ false, - Temperature::kUnknown, - /* oldest_blob_file_number */ 3, kUnknownOldestAncesterTime, - kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0); - - // Trivially move a file that does not reference any blob files. - edit.DeleteFile(/* level */ 1, /* file_number */ 13); - edit.AddFile(/* level */ 2, /* file_number */ 13, /* path_id */ 0, - /* file_size */ 100, /* smallest */ GetInternalKey("13", 1300), - /* largest */ GetInternalKey("13", 1300), - /* smallest_seqno */ 1300, - /* largest_seqno */ 1300, /* marked_for_compaction */ false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - - // Add one more SST file that references a blob file, then promptly - // delete it in a second version edit before the new version gets saved. - // This file should not show up as linked to the blob file in the new version. - edit.AddFile(/* level */ 1, /* file_number */ 23, /* path_id */ 0, - /* file_size */ 100, /* smallest */ GetInternalKey("23", 2300), - /* largest */ GetInternalKey("23", 2300), - /* smallest_seqno */ 2300, - /* largest_seqno */ 2300, /* marked_for_compaction */ false, - Temperature::kUnknown, - /* oldest_blob_file_number */ 5, kUnknownOldestAncesterTime, - kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0); - - VersionEdit edit2; - - edit2.DeleteFile(/* level */ 1, /* file_number */ 23); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, - version_set); - - ASSERT_OK(builder.Apply(&edit)); - ASSERT_OK(builder.Apply(&edit2)); - - constexpr bool force_consistency_checks = true; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); - - ASSERT_OK(builder.SaveTo(&new_vstorage)); - - UpdateVersionStorageInfo(&new_vstorage); - - { - const auto& blob_files = new_vstorage.GetBlobFiles(); - ASSERT_EQ(blob_files.size(), 5); - - const std::vector expected_linked_ssts{ - {1, 21}, {2, 7}, {3, 8}, {4, 9}, {5, 10}}; - - for (size_t i = 0; i < 5; ++i) { - const auto meta = - new_vstorage.GetBlobFileMetaData(/* blob_file_number */ i + 1); - ASSERT_NE(meta, nullptr); - ASSERT_EQ(meta->GetLinkedSsts(), expected_linked_ssts[i]); - } - - // Make sure that no new BlobFileMetaData got created for the blob file - // affected by the trivial move. - ASSERT_EQ(new_vstorage.GetBlobFileMetaData(/* blob_file_number */ 3), - meta3); - } - - UnrefFilesInVersion(&new_vstorage); -} - -TEST_F(VersionBuilderTest, CheckConsistencyForFileDeletedTwice) { - Add(0, 1U, "150", "200", 100, /*path_id*/ 0, - /*smallest_seq*/ 100, /*largest_seq*/ 100, - /*num_entries*/ 0, /*num_deletions*/ 0, - /*sampled*/ false, /*smallest_seqno*/ 0, - /*largest_seqno*/ 0, - /*oldest_blob_file_number*/ kInvalidBlobFileNumber, - /*epoch_number*/ 1); - - UpdateVersionStorageInfo(); - - VersionEdit version_edit; - version_edit.DeleteFile(0, 1U); - - EnvOptions env_options; - constexpr TableCache* table_cache = nullptr; - constexpr VersionSet* version_set = nullptr; - - VersionBuilder version_builder(env_options, &ioptions_, table_cache, - &vstorage_, version_set); - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, nullptr, - true /* force_consistency_checks */); - ASSERT_OK(version_builder.Apply(&version_edit)); - ASSERT_OK(version_builder.SaveTo(&new_vstorage)); - - UpdateVersionStorageInfo(&new_vstorage); - - VersionBuilder version_builder2(env_options, &ioptions_, table_cache, - &new_vstorage, version_set); - VersionStorageInfo new_vstorage2(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, nullptr, - true /* force_consistency_checks */); - ASSERT_NOK(version_builder2.Apply(&version_edit)); - - UnrefFilesInVersion(&new_vstorage); - UnrefFilesInVersion(&new_vstorage2); -} - -TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) { - Status s; - // To verify files of same epoch number of overlapping ranges are caught as - // corrupted - VersionEdit version_edit_1; - version_edit_1.AddFile( - /* level */ 0, /* file_number */ 1U, /* path_id */ 0, - /* file_size */ 100, /* smallest */ GetInternalKey("a", 1), - /* largest */ GetInternalKey("c", 3), /* smallest_seqno */ 1, - /* largest_seqno */ 3, /* marked_for_compaction */ false, - Temperature::kUnknown, - /* oldest_blob_file_number */ kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - 1 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0); - version_edit_1.AddFile( - /* level */ 0, /* file_number */ 2U, /* path_id */ 0, - /* file_size */ 100, /* smallest */ GetInternalKey("b", 2), - /* largest */ GetInternalKey("d", 4), /* smallest_seqno */ 2, - /* largest_seqno */ 4, /* marked_for_compaction */ false, - Temperature::kUnknown, - /* oldest_blob_file_number */ kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - 1 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0); - - VersionBuilder version_builder_1(EnvOptions(), &ioptions_, - nullptr /* table_cache */, &vstorage_, - nullptr /* file_metadata_cache_res_mgr */); - VersionStorageInfo new_vstorage_1( - &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, - nullptr /* src_vstorage */, true /* force_consistency_checks */); - - ASSERT_OK(version_builder_1.Apply(&version_edit_1)); - s = version_builder_1.SaveTo(&new_vstorage_1); - EXPECT_TRUE(s.IsCorruption()); - EXPECT_TRUE(std::strstr( - s.getState(), "L0 files of same epoch number but overlapping range")); - UnrefFilesInVersion(&new_vstorage_1); - - // To verify L0 files not sorted by epoch_number are caught as corrupted - VersionEdit version_edit_2; - version_edit_2.AddFile( - /* level */ 0, /* file_number */ 1U, /* path_id */ 0, - /* file_size */ 100, /* smallest */ GetInternalKey("a", 1), - /* largest */ GetInternalKey("a", 1), /* smallest_seqno */ 1, - /* largest_seqno */ 1, /* marked_for_compaction */ false, - Temperature::kUnknown, - /* oldest_blob_file_number */ kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - 1 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0); - version_edit_2.AddFile( - /* level */ 0, /* file_number */ 2U, /* path_id */ 0, - /* file_size */ 100, /* smallest */ GetInternalKey("b", 2), - /* largest */ GetInternalKey("b", 2), /* smallest_seqno */ 2, - /* largest_seqno */ 2, /* marked_for_compaction */ false, - Temperature::kUnknown, - /* oldest_blob_file_number */ kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - 2 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, 0); - - VersionBuilder version_builder_2(EnvOptions(), &ioptions_, - nullptr /* table_cache */, &vstorage_, - nullptr /* file_metadata_cache_res_mgr */); - VersionStorageInfo new_vstorage_2( - &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, - nullptr /* src_vstorage */, true /* force_consistency_checks */); - - ASSERT_OK(version_builder_2.Apply(&version_edit_2)); - s = version_builder_2.SaveTo(&new_vstorage_2); - ASSERT_TRUE(s.ok()); - - const std::vector& l0_files = new_vstorage_2.LevelFiles(0); - ASSERT_EQ(l0_files.size(), 2); - // Manually corrupt L0 files's epoch_number - l0_files[0]->epoch_number = 1; - l0_files[1]->epoch_number = 2; - - // To surface corruption error by applying dummy version edit - VersionEdit dummy_version_edit; - VersionBuilder dummy_version_builder( - EnvOptions(), &ioptions_, nullptr /* table_cache */, &vstorage_, - nullptr /* file_metadata_cache_res_mgr */); - ASSERT_OK(dummy_version_builder.Apply(&dummy_version_edit)); - s = dummy_version_builder.SaveTo(&new_vstorage_2); - EXPECT_TRUE(s.IsCorruption()); - EXPECT_TRUE(std::strstr(s.getState(), "L0 files are not sorted properly")); - - UnrefFilesInVersion(&new_vstorage_2); -} - -TEST_F(VersionBuilderTest, EstimatedActiveKeys) { - const uint32_t kTotalSamples = 20; - const uint32_t kNumLevels = 5; - const uint32_t kFilesPerLevel = 8; - const uint32_t kNumFiles = kNumLevels * kFilesPerLevel; - const uint32_t kEntriesPerFile = 1000; - const uint32_t kDeletionsPerFile = 100; - for (uint32_t i = 0; i < kNumFiles; ++i) { - Add(static_cast(i / kFilesPerLevel), i + 1, - std::to_string((i + 100) * 1000).c_str(), - std::to_string((i + 100) * 1000 + 999).c_str(), 100U, 0, 100, 100, - kEntriesPerFile, kDeletionsPerFile, (i < kTotalSamples)); - } - // minus 2X for the number of deletion entries because: - // 1x for deletion entry does not count as a data entry. - // 1x for each deletion entry will actually remove one data entry. - ASSERT_EQ(vstorage_.GetEstimatedActiveKeys(), - (kEntriesPerFile - 2 * kDeletionsPerFile) * kNumFiles); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/version_edit_test.cc b/db/version_edit_test.cc deleted file mode 100644 index 1fa6c0054..000000000 --- a/db/version_edit_test.cc +++ /dev/null @@ -1,732 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/version_edit.h" - -#include "db/blob/blob_index.h" -#include "rocksdb/advanced_options.h" -#include "table/unique_id_impl.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/coding.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -static void TestEncodeDecode(const VersionEdit& edit) { - std::string encoded, encoded2; - edit.EncodeTo(&encoded); - VersionEdit parsed; - Status s = parsed.DecodeFrom(encoded); - ASSERT_TRUE(s.ok()) << s.ToString(); - parsed.EncodeTo(&encoded2); - ASSERT_EQ(encoded, encoded2); -} - -class VersionEditTest : public testing::Test {}; - -TEST_F(VersionEditTest, EncodeDecode) { - static const uint64_t kBig = 1ull << 50; - static const uint32_t kBig32Bit = 1ull << 30; - - VersionEdit edit; - for (int i = 0; i < 4; i++) { - TestEncodeDecode(edit); - edit.AddFile(3, kBig + 300 + i, kBig32Bit + 400 + i, 0, - InternalKey("foo", kBig + 500 + i, kTypeValue), - InternalKey("zoo", kBig + 600 + i, kTypeDeletion), - kBig + 500 + i, kBig + 600 + i, false, Temperature::kUnknown, - kInvalidBlobFileNumber, 888, 678, - kBig + 300 + i /* epoch_number */, "234", "crc32c", - kNullUniqueId64x2, 0); - edit.DeleteFile(4, kBig + 700 + i); - } - - edit.SetComparatorName("foo"); - edit.SetLogNumber(kBig + 100); - edit.SetNextFile(kBig + 200); - edit.SetLastSequence(kBig + 1000); - TestEncodeDecode(edit); -} - -TEST_F(VersionEditTest, EncodeDecodeNewFile4) { - static const uint64_t kBig = 1ull << 50; - - VersionEdit edit; - edit.AddFile(3, 300, 3, 100, InternalKey("foo", kBig + 500, kTypeValue), - InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500, - kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - 300 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue), - InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501, - kBig + 601, false, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - 301 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - edit.AddFile(5, 302, 0, 100, InternalKey("foo", kBig + 502, kTypeValue), - InternalKey("zoo", kBig + 602, kTypeDeletion), kBig + 502, - kBig + 602, true, Temperature::kUnknown, kInvalidBlobFileNumber, - 666, 888, 302 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - edit.AddFile(5, 303, 0, 100, InternalKey("foo", kBig + 503, kTypeBlobIndex), - InternalKey("zoo", kBig + 603, kTypeBlobIndex), kBig + 503, - kBig + 603, true, Temperature::kUnknown, 1001, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - 303 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - - edit.DeleteFile(4, 700); - - edit.SetComparatorName("foo"); - edit.SetLogNumber(kBig + 100); - edit.SetNextFile(kBig + 200); - edit.SetLastSequence(kBig + 1000); - TestEncodeDecode(edit); - - std::string encoded, encoded2; - edit.EncodeTo(&encoded); - VersionEdit parsed; - Status s = parsed.DecodeFrom(encoded); - ASSERT_TRUE(s.ok()) << s.ToString(); - auto& new_files = parsed.GetNewFiles(); - ASSERT_TRUE(new_files[0].second.marked_for_compaction); - ASSERT_TRUE(!new_files[1].second.marked_for_compaction); - ASSERT_TRUE(new_files[2].second.marked_for_compaction); - ASSERT_TRUE(new_files[3].second.marked_for_compaction); - ASSERT_EQ(3u, new_files[0].second.fd.GetPathId()); - ASSERT_EQ(3u, new_files[1].second.fd.GetPathId()); - ASSERT_EQ(0u, new_files[2].second.fd.GetPathId()); - ASSERT_EQ(0u, new_files[3].second.fd.GetPathId()); - ASSERT_EQ(kInvalidBlobFileNumber, - new_files[0].second.oldest_blob_file_number); - ASSERT_EQ(kInvalidBlobFileNumber, - new_files[1].second.oldest_blob_file_number); - ASSERT_EQ(kInvalidBlobFileNumber, - new_files[2].second.oldest_blob_file_number); - ASSERT_EQ(1001, new_files[3].second.oldest_blob_file_number); -} - -TEST_F(VersionEditTest, ForwardCompatibleNewFile4) { - static const uint64_t kBig = 1ull << 50; - VersionEdit edit; - edit.AddFile(3, 300, 3, 100, InternalKey("foo", kBig + 500, kTypeValue), - InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500, - kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - 300 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue), - InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501, - kBig + 601, false, Temperature::kUnknown, kInvalidBlobFileNumber, - 686, 868, 301 /* epoch_number */, "234", "crc32c", - kNullUniqueId64x2, 0); - edit.DeleteFile(4, 700); - - edit.SetComparatorName("foo"); - edit.SetLogNumber(kBig + 100); - edit.SetNextFile(kBig + 200); - edit.SetLastSequence(kBig + 1000); - - std::string encoded; - - // Call back function to add extra customized builds. - bool first = true; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "VersionEdit::EncodeTo:NewFile4:CustomizeFields", [&](void* arg) { - std::string* str = reinterpret_cast(arg); - PutVarint32(str, 33); - const std::string str1 = "random_string"; - PutLengthPrefixedSlice(str, str1); - if (first) { - first = false; - PutVarint32(str, 22); - const std::string str2 = "s"; - PutLengthPrefixedSlice(str, str2); - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - edit.EncodeTo(&encoded); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - VersionEdit parsed; - Status s = parsed.DecodeFrom(encoded); - ASSERT_TRUE(s.ok()) << s.ToString(); - ASSERT_TRUE(!first); - auto& new_files = parsed.GetNewFiles(); - ASSERT_TRUE(new_files[0].second.marked_for_compaction); - ASSERT_TRUE(!new_files[1].second.marked_for_compaction); - ASSERT_EQ(3u, new_files[0].second.fd.GetPathId()); - ASSERT_EQ(3u, new_files[1].second.fd.GetPathId()); - ASSERT_EQ(1u, parsed.GetDeletedFiles().size()); -} - -TEST_F(VersionEditTest, NewFile4NotSupportedField) { - static const uint64_t kBig = 1ull << 50; - VersionEdit edit; - edit.AddFile(3, 300, 3, 100, InternalKey("foo", kBig + 500, kTypeValue), - InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500, - kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - 300 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - - edit.SetComparatorName("foo"); - edit.SetLogNumber(kBig + 100); - edit.SetNextFile(kBig + 200); - edit.SetLastSequence(kBig + 1000); - - std::string encoded; - - // Call back function to add extra customized builds. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "VersionEdit::EncodeTo:NewFile4:CustomizeFields", [&](void* arg) { - std::string* str = reinterpret_cast(arg); - const std::string str1 = "s"; - PutLengthPrefixedSlice(str, str1); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - edit.EncodeTo(&encoded); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - VersionEdit parsed; - Status s = parsed.DecodeFrom(encoded); - ASSERT_NOK(s); -} - -TEST_F(VersionEditTest, EncodeEmptyFile) { - VersionEdit edit; - edit.AddFile(0, 0, 0, 0, InternalKey(), InternalKey(), 0, 0, false, - Temperature::kUnknown, kInvalidBlobFileNumber, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - 1 /*epoch_number*/, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - std::string buffer; - ASSERT_TRUE(!edit.EncodeTo(&buffer)); -} - -TEST_F(VersionEditTest, ColumnFamilyTest) { - VersionEdit edit; - edit.SetColumnFamily(2); - edit.AddColumnFamily("column_family"); - edit.SetMaxColumnFamily(5); - TestEncodeDecode(edit); - - edit.Clear(); - edit.SetColumnFamily(3); - edit.DropColumnFamily(); - TestEncodeDecode(edit); -} - -TEST_F(VersionEditTest, MinLogNumberToKeep) { - VersionEdit edit; - edit.SetMinLogNumberToKeep(13); - TestEncodeDecode(edit); - - edit.Clear(); - edit.SetMinLogNumberToKeep(23); - TestEncodeDecode(edit); -} - -TEST_F(VersionEditTest, AtomicGroupTest) { - VersionEdit edit; - edit.MarkAtomicGroup(1); - TestEncodeDecode(edit); -} - -TEST_F(VersionEditTest, IgnorableField) { - VersionEdit ve; - std::string encoded; - - // Size of ignorable field is too large - PutVarint32Varint64(&encoded, 2 /* kLogNumber */, 66); - // This is a customized ignorable tag - PutVarint32Varint64(&encoded, - 0x2710 /* A field with kTagSafeIgnoreMask set */, - 5 /* fieldlength 5 */); - encoded += "abc"; // Only fills 3 bytes, - ASSERT_NOK(ve.DecodeFrom(encoded)); - - encoded.clear(); - // Error when seeing unidentified tag that is not ignorable - PutVarint32Varint64(&encoded, 2 /* kLogNumber */, 66); - // This is a customized ignorable tag - PutVarint32Varint64(&encoded, 666 /* A field with kTagSafeIgnoreMask unset */, - 3 /* fieldlength 3 */); - encoded += "abc"; // Fill 3 bytes - PutVarint32Varint64(&encoded, 3 /* next file number */, 88); - ASSERT_NOK(ve.DecodeFrom(encoded)); - - // Safely ignore an identified but safely ignorable entry - encoded.clear(); - PutVarint32Varint64(&encoded, 2 /* kLogNumber */, 66); - // This is a customized ignorable tag - PutVarint32Varint64(&encoded, - 0x2710 /* A field with kTagSafeIgnoreMask set */, - 3 /* fieldlength 3 */); - encoded += "abc"; // Fill 3 bytes - PutVarint32Varint64(&encoded, 3 /* kNextFileNumber */, 88); - - ASSERT_OK(ve.DecodeFrom(encoded)); - - ASSERT_TRUE(ve.HasLogNumber()); - ASSERT_TRUE(ve.HasNextFile()); - ASSERT_EQ(66, ve.GetLogNumber()); - ASSERT_EQ(88, ve.GetNextFile()); -} - -TEST_F(VersionEditTest, DbId) { - VersionEdit edit; - edit.SetDBId("ab34-cd12-435f-er00"); - TestEncodeDecode(edit); - - edit.Clear(); - edit.SetDBId("34ba-cd12-435f-er01"); - TestEncodeDecode(edit); -} - -TEST_F(VersionEditTest, BlobFileAdditionAndGarbage) { - VersionEdit edit; - - const std::string checksum_method_prefix = "Hash"; - const std::string checksum_value_prefix = "Value"; - - for (uint64_t blob_file_number = 1; blob_file_number <= 10; - ++blob_file_number) { - const uint64_t total_blob_count = blob_file_number << 10; - const uint64_t total_blob_bytes = blob_file_number << 20; - - std::string checksum_method(checksum_method_prefix); - AppendNumberTo(&checksum_method, blob_file_number); - - std::string checksum_value(checksum_value_prefix); - AppendNumberTo(&checksum_value, blob_file_number); - - edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, - checksum_method, checksum_value); - - const uint64_t garbage_blob_count = total_blob_count >> 2; - const uint64_t garbage_blob_bytes = total_blob_bytes >> 1; - - edit.AddBlobFileGarbage(blob_file_number, garbage_blob_count, - garbage_blob_bytes); - } - - TestEncodeDecode(edit); -} - -TEST_F(VersionEditTest, AddWalEncodeDecode) { - VersionEdit edit; - for (uint64_t log_number = 1; log_number <= 20; log_number++) { - WalMetadata meta; - bool has_size = rand() % 2 == 0; - if (has_size) { - meta.SetSyncedSizeInBytes(rand() % 1000); - } - edit.AddWal(log_number, meta); - } - TestEncodeDecode(edit); -} - -static std::string PrefixEncodedWalAdditionWithLength( - const std::string& encoded) { - std::string ret; - PutVarint32(&ret, Tag::kWalAddition2); - PutLengthPrefixedSlice(&ret, encoded); - return ret; -} - -TEST_F(VersionEditTest, AddWalDecodeBadLogNumber) { - std::string encoded; - - { - // No log number. - std::string encoded_edit = PrefixEncodedWalAdditionWithLength(encoded); - VersionEdit edit; - Status s = edit.DecodeFrom(encoded_edit); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(s.ToString().find("Error decoding WAL log number") != - std::string::npos) - << s.ToString(); - } - - { - // log number should be varint64, - // but we only encode 128 which is not a valid representation of varint64. - char c = 0; - unsigned char* ptr = reinterpret_cast(&c); - *ptr = 128; - encoded.append(1, c); - - std::string encoded_edit = PrefixEncodedWalAdditionWithLength(encoded); - VersionEdit edit; - Status s = edit.DecodeFrom(encoded_edit); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(s.ToString().find("Error decoding WAL log number") != - std::string::npos) - << s.ToString(); - } -} - -TEST_F(VersionEditTest, AddWalDecodeBadTag) { - constexpr WalNumber kLogNumber = 100; - constexpr uint64_t kSizeInBytes = 100; - - std::string encoded; - PutVarint64(&encoded, kLogNumber); - - { - // No tag. - std::string encoded_edit = PrefixEncodedWalAdditionWithLength(encoded); - VersionEdit edit; - Status s = edit.DecodeFrom(encoded_edit); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(s.ToString().find("Error decoding tag") != std::string::npos) - << s.ToString(); - } - - { - // Only has size tag, no terminate tag. - std::string encoded_with_size = encoded; - PutVarint32(&encoded_with_size, - static_cast(WalAdditionTag::kSyncedSize)); - PutVarint64(&encoded_with_size, kSizeInBytes); - - std::string encoded_edit = - PrefixEncodedWalAdditionWithLength(encoded_with_size); - VersionEdit edit; - Status s = edit.DecodeFrom(encoded_edit); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(s.ToString().find("Error decoding tag") != std::string::npos) - << s.ToString(); - } - - { - // Only has terminate tag. - std::string encoded_with_terminate = encoded; - PutVarint32(&encoded_with_terminate, - static_cast(WalAdditionTag::kTerminate)); - - std::string encoded_edit = - PrefixEncodedWalAdditionWithLength(encoded_with_terminate); - VersionEdit edit; - ASSERT_OK(edit.DecodeFrom(encoded_edit)); - auto& wal_addition = edit.GetWalAdditions()[0]; - ASSERT_EQ(wal_addition.GetLogNumber(), kLogNumber); - ASSERT_FALSE(wal_addition.GetMetadata().HasSyncedSize()); - } -} - -TEST_F(VersionEditTest, AddWalDecodeNoSize) { - constexpr WalNumber kLogNumber = 100; - - std::string encoded; - PutVarint64(&encoded, kLogNumber); - PutVarint32(&encoded, static_cast(WalAdditionTag::kSyncedSize)); - // No real size after the size tag. - - { - // Without terminate tag. - std::string encoded_edit = PrefixEncodedWalAdditionWithLength(encoded); - VersionEdit edit; - Status s = edit.DecodeFrom(encoded_edit); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(s.ToString().find("Error decoding WAL file size") != - std::string::npos) - << s.ToString(); - } - - { - // With terminate tag. - PutVarint32(&encoded, static_cast(WalAdditionTag::kTerminate)); - - std::string encoded_edit = PrefixEncodedWalAdditionWithLength(encoded); - VersionEdit edit; - Status s = edit.DecodeFrom(encoded_edit); - ASSERT_TRUE(s.IsCorruption()); - // The terminate tag is misunderstood as the size. - ASSERT_TRUE(s.ToString().find("Error decoding tag") != std::string::npos) - << s.ToString(); - } -} - -TEST_F(VersionEditTest, AddWalDebug) { - constexpr int n = 2; - constexpr std::array kLogNumbers{{10, 20}}; - constexpr std::array kSizeInBytes{{100, 200}}; - - VersionEdit edit; - for (int i = 0; i < n; i++) { - edit.AddWal(kLogNumbers[i], WalMetadata(kSizeInBytes[i])); - } - - const WalAdditions& wals = edit.GetWalAdditions(); - - ASSERT_TRUE(edit.IsWalAddition()); - ASSERT_EQ(wals.size(), n); - for (int i = 0; i < n; i++) { - const WalAddition& wal = wals[i]; - ASSERT_EQ(wal.GetLogNumber(), kLogNumbers[i]); - ASSERT_EQ(wal.GetMetadata().GetSyncedSizeInBytes(), kSizeInBytes[i]); - } - - std::string expected_str = "VersionEdit {\n"; - for (int i = 0; i < n; i++) { - std::stringstream ss; - ss << " WalAddition: log_number: " << kLogNumbers[i] - << " synced_size_in_bytes: " << kSizeInBytes[i] << "\n"; - expected_str += ss.str(); - } - expected_str += " ColumnFamily: 0\n}\n"; - ASSERT_EQ(edit.DebugString(true), expected_str); - - std::string expected_json = "{\"EditNumber\": 4, \"WalAdditions\": ["; - for (int i = 0; i < n; i++) { - std::stringstream ss; - ss << "{\"LogNumber\": " << kLogNumbers[i] << ", " - << "\"SyncedSizeInBytes\": " << kSizeInBytes[i] << "}"; - if (i < n - 1) ss << ", "; - expected_json += ss.str(); - } - expected_json += "], \"ColumnFamily\": 0}"; - ASSERT_EQ(edit.DebugJSON(4, true), expected_json); -} - -TEST_F(VersionEditTest, DeleteWalEncodeDecode) { - VersionEdit edit; - edit.DeleteWalsBefore(rand() % 100); - TestEncodeDecode(edit); -} - -TEST_F(VersionEditTest, DeleteWalDebug) { - constexpr int n = 2; - constexpr std::array kLogNumbers{{10, 20}}; - - VersionEdit edit; - edit.DeleteWalsBefore(kLogNumbers[n - 1]); - - const WalDeletion& wal = edit.GetWalDeletion(); - - ASSERT_TRUE(edit.IsWalDeletion()); - ASSERT_EQ(wal.GetLogNumber(), kLogNumbers[n - 1]); - - std::string expected_str = "VersionEdit {\n"; - { - std::stringstream ss; - ss << " WalDeletion: log_number: " << kLogNumbers[n - 1] << "\n"; - expected_str += ss.str(); - } - expected_str += " ColumnFamily: 0\n}\n"; - ASSERT_EQ(edit.DebugString(true), expected_str); - - std::string expected_json = "{\"EditNumber\": 4, \"WalDeletion\": "; - { - std::stringstream ss; - ss << "{\"LogNumber\": " << kLogNumbers[n - 1] << "}"; - expected_json += ss.str(); - } - expected_json += ", \"ColumnFamily\": 0}"; - ASSERT_EQ(edit.DebugJSON(4, true), expected_json); -} - -TEST_F(VersionEditTest, FullHistoryTsLow) { - VersionEdit edit; - ASSERT_FALSE(edit.HasFullHistoryTsLow()); - std::string ts = test::EncodeInt(0); - edit.SetFullHistoryTsLow(ts); - TestEncodeDecode(edit); -} - -// Tests that if RocksDB is downgraded, the new types of VersionEdits -// that have a tag larger than kTagSafeIgnoreMask can be safely ignored. -TEST_F(VersionEditTest, IgnorableTags) { - SyncPoint::GetInstance()->SetCallBack( - "VersionEdit::EncodeTo:IgnoreIgnorableTags", [&](void* arg) { - bool* ignore = static_cast(arg); - *ignore = true; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - constexpr uint64_t kPrevLogNumber = 100; - constexpr uint64_t kLogNumber = 200; - constexpr uint64_t kNextFileNumber = 300; - constexpr uint64_t kColumnFamilyId = 400; - - VersionEdit edit; - // Add some ignorable entries. - for (int i = 0; i < 2; i++) { - edit.AddWal(i + 1, WalMetadata(i + 2)); - } - edit.SetDBId("db_id"); - // Add unignorable entries. - edit.SetPrevLogNumber(kPrevLogNumber); - edit.SetLogNumber(kLogNumber); - // Add more ignorable entries. - edit.DeleteWalsBefore(100); - // Add unignorable entry. - edit.SetNextFile(kNextFileNumber); - // Add more ignorable entries. - edit.SetFullHistoryTsLow("ts"); - // Add unignorable entry. - edit.SetColumnFamily(kColumnFamilyId); - - std::string encoded; - ASSERT_TRUE(edit.EncodeTo(&encoded)); - - VersionEdit decoded; - ASSERT_OK(decoded.DecodeFrom(encoded)); - - // Check that all ignorable entries are ignored. - ASSERT_FALSE(decoded.HasDbId()); - ASSERT_FALSE(decoded.HasFullHistoryTsLow()); - ASSERT_FALSE(decoded.IsWalAddition()); - ASSERT_FALSE(decoded.IsWalDeletion()); - ASSERT_TRUE(decoded.GetWalAdditions().empty()); - ASSERT_TRUE(decoded.GetWalDeletion().IsEmpty()); - - // Check that unignorable entries are still present. - ASSERT_EQ(edit.GetPrevLogNumber(), kPrevLogNumber); - ASSERT_EQ(edit.GetLogNumber(), kLogNumber); - ASSERT_EQ(edit.GetNextFile(), kNextFileNumber); - ASSERT_EQ(edit.GetColumnFamily(), kColumnFamilyId); - - SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST(FileMetaDataTest, UpdateBoundariesBlobIndex) { - FileMetaData meta; - - { - constexpr uint64_t file_number = 10; - constexpr uint32_t path_id = 0; - constexpr uint64_t file_size = 0; - - meta.fd = FileDescriptor(file_number, path_id, file_size); - } - - constexpr char key[] = "foo"; - - constexpr uint64_t expected_oldest_blob_file_number = 20; - - // Plain old value (does not affect oldest_blob_file_number) - { - constexpr char value[] = "value"; - constexpr SequenceNumber seq = 200; - - ASSERT_OK(meta.UpdateBoundaries(key, value, seq, kTypeValue)); - ASSERT_EQ(meta.oldest_blob_file_number, kInvalidBlobFileNumber); - } - - // Non-inlined, non-TTL blob index (sets oldest_blob_file_number) - { - constexpr uint64_t blob_file_number = 25; - static_assert(blob_file_number > expected_oldest_blob_file_number, - "unexpected"); - - constexpr uint64_t offset = 1000; - constexpr uint64_t size = 100; - - std::string blob_index; - BlobIndex::EncodeBlob(&blob_index, blob_file_number, offset, size, - kNoCompression); - - constexpr SequenceNumber seq = 201; - - ASSERT_OK(meta.UpdateBoundaries(key, blob_index, seq, kTypeBlobIndex)); - ASSERT_EQ(meta.oldest_blob_file_number, blob_file_number); - } - - // Another one, with the oldest blob file number (updates - // oldest_blob_file_number) - { - constexpr uint64_t offset = 2000; - constexpr uint64_t size = 300; - - std::string blob_index; - BlobIndex::EncodeBlob(&blob_index, expected_oldest_blob_file_number, offset, - size, kNoCompression); - - constexpr SequenceNumber seq = 202; - - ASSERT_OK(meta.UpdateBoundaries(key, blob_index, seq, kTypeBlobIndex)); - ASSERT_EQ(meta.oldest_blob_file_number, expected_oldest_blob_file_number); - } - - // Inlined TTL blob index (does not affect oldest_blob_file_number) - { - constexpr uint64_t expiration = 9876543210; - constexpr char value[] = "value"; - - std::string blob_index; - BlobIndex::EncodeInlinedTTL(&blob_index, expiration, value); - - constexpr SequenceNumber seq = 203; - - ASSERT_OK(meta.UpdateBoundaries(key, blob_index, seq, kTypeBlobIndex)); - ASSERT_EQ(meta.oldest_blob_file_number, expected_oldest_blob_file_number); - } - - // Non-inlined TTL blob index (does not affect oldest_blob_file_number, even - // though file number is smaller) - { - constexpr uint64_t expiration = 9876543210; - constexpr uint64_t blob_file_number = 15; - static_assert(blob_file_number < expected_oldest_blob_file_number, - "unexpected"); - - constexpr uint64_t offset = 2000; - constexpr uint64_t size = 500; - - std::string blob_index; - BlobIndex::EncodeBlobTTL(&blob_index, expiration, blob_file_number, offset, - size, kNoCompression); - - constexpr SequenceNumber seq = 204; - - ASSERT_OK(meta.UpdateBoundaries(key, blob_index, seq, kTypeBlobIndex)); - ASSERT_EQ(meta.oldest_blob_file_number, expected_oldest_blob_file_number); - } - - // Corrupt blob index - { - constexpr char corrupt_blob_index[] = "!corrupt!"; - constexpr SequenceNumber seq = 205; - - ASSERT_TRUE( - meta.UpdateBoundaries(key, corrupt_blob_index, seq, kTypeBlobIndex) - .IsCorruption()); - ASSERT_EQ(meta.oldest_blob_file_number, expected_oldest_blob_file_number); - } - - // Invalid blob file number - { - constexpr uint64_t offset = 10000; - constexpr uint64_t size = 1000; - - std::string blob_index; - BlobIndex::EncodeBlob(&blob_index, kInvalidBlobFileNumber, offset, size, - kNoCompression); - - constexpr SequenceNumber seq = 206; - - ASSERT_TRUE(meta.UpdateBoundaries(key, blob_index, seq, kTypeBlobIndex) - .IsCorruption()); - ASSERT_EQ(meta.oldest_blob_file_number, expected_oldest_blob_file_number); - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/version_set_test.cc b/db/version_set_test.cc deleted file mode 100644 index a83fabcd0..000000000 --- a/db/version_set_test.cc +++ /dev/null @@ -1,3619 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/version_set.h" - -#include - -#include "db/db_impl/db_impl.h" -#include "db/db_test_util.h" -#include "db/log_writer.h" -#include "db/version_edit.h" -#include "rocksdb/advanced_options.h" -#include "rocksdb/convenience.h" -#include "rocksdb/file_system.h" -#include "table/block_based/block_based_table_factory.h" -#include "table/mock_table.h" -#include "table/unique_id_impl.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -class GenerateLevelFilesBriefTest : public testing::Test { - public: - std::vector files_; - LevelFilesBrief file_level_; - Arena arena_; - - GenerateLevelFilesBriefTest() {} - - ~GenerateLevelFilesBriefTest() override { - for (size_t i = 0; i < files_.size(); i++) { - delete files_[i]; - } - } - - void Add(const char* smallest, const char* largest, - SequenceNumber smallest_seq = 100, - SequenceNumber largest_seq = 100) { - FileMetaData* f = new FileMetaData( - files_.size() + 1, 0, 0, - InternalKey(smallest, smallest_seq, kTypeValue), - InternalKey(largest, largest_seq, kTypeValue), smallest_seq, - largest_seq, /* marked_for_compact */ false, Temperature::kUnknown, - kInvalidBlobFileNumber, kUnknownOldestAncesterTime, - kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - files_.push_back(f); - } - - int Compare() { - int diff = 0; - for (size_t i = 0; i < files_.size(); i++) { - if (file_level_.files[i].fd.GetNumber() != files_[i]->fd.GetNumber()) { - diff++; - } - } - return diff; - } -}; - -TEST_F(GenerateLevelFilesBriefTest, Empty) { - DoGenerateLevelFilesBrief(&file_level_, files_, &arena_); - ASSERT_EQ(0u, file_level_.num_files); - ASSERT_EQ(0, Compare()); -} - -TEST_F(GenerateLevelFilesBriefTest, Single) { - Add("p", "q"); - DoGenerateLevelFilesBrief(&file_level_, files_, &arena_); - ASSERT_EQ(1u, file_level_.num_files); - ASSERT_EQ(0, Compare()); -} - -TEST_F(GenerateLevelFilesBriefTest, Multiple) { - Add("150", "200"); - Add("200", "250"); - Add("300", "350"); - Add("400", "450"); - DoGenerateLevelFilesBrief(&file_level_, files_, &arena_); - ASSERT_EQ(4u, file_level_.num_files); - ASSERT_EQ(0, Compare()); -} - -class CountingLogger : public Logger { - public: - CountingLogger() : log_count(0) {} - using Logger::Logv; - void Logv(const char* /*format*/, va_list /*ap*/) override { log_count++; } - int log_count; -}; - -Options GetOptionsWithNumLevels(int num_levels, - std::shared_ptr logger) { - Options opt; - opt.num_levels = num_levels; - opt.info_log = logger; - return opt; -} - -class VersionStorageInfoTestBase : public testing::Test { - public: - const Comparator* ucmp_; - InternalKeyComparator icmp_; - std::shared_ptr logger_; - Options options_; - ImmutableOptions ioptions_; - MutableCFOptions mutable_cf_options_; - VersionStorageInfo vstorage_; - - InternalKey GetInternalKey(const char* ukey, - SequenceNumber smallest_seq = 100) { - return InternalKey(ukey, smallest_seq, kTypeValue); - } - - explicit VersionStorageInfoTestBase(const Comparator* ucmp) - : ucmp_(ucmp), - icmp_(ucmp_), - logger_(new CountingLogger()), - options_(GetOptionsWithNumLevels(6, logger_)), - ioptions_(options_), - mutable_cf_options_(options_), - vstorage_(&icmp_, ucmp_, 6, kCompactionStyleLevel, - /*src_vstorage=*/nullptr, - /*_force_consistency_checks=*/false) {} - - ~VersionStorageInfoTestBase() override { - for (int i = 0; i < vstorage_.num_levels(); ++i) { - for (auto* f : vstorage_.LevelFiles(i)) { - if (--f->refs == 0) { - delete f; - } - } - } - } - - void Add(int level, uint32_t file_number, const char* smallest, - const char* largest, uint64_t file_size = 0, - uint64_t oldest_blob_file_number = kInvalidBlobFileNumber, - uint64_t compensated_range_deletion_size = 0) { - constexpr SequenceNumber dummy_seq = 0; - - Add(level, file_number, GetInternalKey(smallest, dummy_seq), - GetInternalKey(largest, dummy_seq), file_size, oldest_blob_file_number, - compensated_range_deletion_size); - } - - void Add(int level, uint32_t file_number, const InternalKey& smallest, - const InternalKey& largest, uint64_t file_size = 0, - uint64_t oldest_blob_file_number = kInvalidBlobFileNumber, - uint64_t compensated_range_deletion_size = 0) { - assert(level < vstorage_.num_levels()); - FileMetaData* f = new FileMetaData( - file_number, 0, file_size, smallest, largest, /* smallest_seq */ 0, - /* largest_seq */ 0, /* marked_for_compact */ false, - Temperature::kUnknown, oldest_blob_file_number, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownEpochNumber, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2, compensated_range_deletion_size); - vstorage_.AddFile(level, f); - } - - void AddBlob(uint64_t blob_file_number, uint64_t total_blob_count, - uint64_t total_blob_bytes, - BlobFileMetaData::LinkedSsts linked_ssts, - uint64_t garbage_blob_count, uint64_t garbage_blob_bytes) { - auto shared_meta = SharedBlobFileMetaData::Create( - blob_file_number, total_blob_count, total_blob_bytes, - /* checksum_method */ std::string(), - /* checksum_value */ std::string()); - auto meta = - BlobFileMetaData::Create(std::move(shared_meta), std::move(linked_ssts), - garbage_blob_count, garbage_blob_bytes); - - vstorage_.AddBlobFile(std::move(meta)); - } - - void UpdateVersionStorageInfo() { - vstorage_.PrepareForVersionAppend(ioptions_, mutable_cf_options_); - vstorage_.SetFinalized(); - } - - std::string GetOverlappingFiles(int level, const InternalKey& begin, - const InternalKey& end) { - std::vector inputs; - vstorage_.GetOverlappingInputs(level, &begin, &end, &inputs); - - std::string result; - for (size_t i = 0; i < inputs.size(); ++i) { - if (i > 0) { - result += ","; - } - AppendNumberTo(&result, inputs[i]->fd.GetNumber()); - } - return result; - } -}; - -class VersionStorageInfoTest : public VersionStorageInfoTestBase { - public: - VersionStorageInfoTest() : VersionStorageInfoTestBase(BytewiseComparator()) {} - - ~VersionStorageInfoTest() override {} -}; - -TEST_F(VersionStorageInfoTest, MaxBytesForLevelStatic) { - ioptions_.level_compaction_dynamic_level_bytes = false; - mutable_cf_options_.max_bytes_for_level_base = 10; - mutable_cf_options_.max_bytes_for_level_multiplier = 5; - - Add(4, 100U, "1", "2", 100U); - Add(5, 101U, "1", "2", 100U); - - UpdateVersionStorageInfo(); - - ASSERT_EQ(vstorage_.MaxBytesForLevel(1), 10U); - ASSERT_EQ(vstorage_.MaxBytesForLevel(2), 50U); - ASSERT_EQ(vstorage_.MaxBytesForLevel(3), 250U); - ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 1250U); - - ASSERT_EQ(0, logger_->log_count); -} - -TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamic_1) { - ioptions_.level_compaction_dynamic_level_bytes = true; - mutable_cf_options_.max_bytes_for_level_base = 1000; - mutable_cf_options_.max_bytes_for_level_multiplier = 5; - - Add(5, 1U, "1", "2", 500U); - - UpdateVersionStorageInfo(); - - ASSERT_EQ(0, logger_->log_count); - ASSERT_EQ(vstorage_.base_level(), 5); -} - -TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamic_2) { - ioptions_.level_compaction_dynamic_level_bytes = true; - mutable_cf_options_.max_bytes_for_level_base = 1000; - mutable_cf_options_.max_bytes_for_level_multiplier = 5; - - Add(5, 1U, "1", "2", 500U); - Add(5, 2U, "3", "4", 550U); - - UpdateVersionStorageInfo(); - - ASSERT_EQ(0, logger_->log_count); - ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 1000U); - ASSERT_EQ(vstorage_.base_level(), 4); -} - -TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamic_3) { - ioptions_.level_compaction_dynamic_level_bytes = true; - mutable_cf_options_.max_bytes_for_level_base = 1000; - mutable_cf_options_.max_bytes_for_level_multiplier = 5; - - Add(5, 1U, "1", "2", 500U); - Add(5, 2U, "3", "4", 550U); - Add(4, 3U, "3", "4", 550U); - - UpdateVersionStorageInfo(); - - ASSERT_EQ(0, logger_->log_count); - ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 1000U); - ASSERT_EQ(vstorage_.base_level(), 4); -} - -TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamic_4) { - ioptions_.level_compaction_dynamic_level_bytes = true; - mutable_cf_options_.max_bytes_for_level_base = 1000; - mutable_cf_options_.max_bytes_for_level_multiplier = 5; - - Add(5, 1U, "1", "2", 500U); - Add(5, 2U, "3", "4", 550U); - Add(4, 3U, "3", "4", 550U); - Add(3, 4U, "3", "4", 250U); - Add(3, 5U, "5", "7", 300U); - - UpdateVersionStorageInfo(); - - ASSERT_EQ(1, logger_->log_count); - ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 1005U); - ASSERT_EQ(vstorage_.MaxBytesForLevel(3), 1000U); - ASSERT_EQ(vstorage_.base_level(), 3); -} - -TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamic_5) { - ioptions_.level_compaction_dynamic_level_bytes = true; - mutable_cf_options_.max_bytes_for_level_base = 1000; - mutable_cf_options_.max_bytes_for_level_multiplier = 5; - - Add(5, 1U, "1", "2", 500U); - Add(5, 2U, "3", "4", 550U); - Add(4, 3U, "3", "4", 550U); - Add(3, 4U, "3", "4", 250U); - Add(3, 5U, "5", "7", 300U); - Add(1, 6U, "3", "4", 5U); - Add(1, 7U, "8", "9", 5U); - - UpdateVersionStorageInfo(); - - ASSERT_EQ(1, logger_->log_count); - ASSERT_GT(vstorage_.MaxBytesForLevel(4), 1005U); - ASSERT_GT(vstorage_.MaxBytesForLevel(3), 1005U); - ASSERT_EQ(vstorage_.MaxBytesForLevel(2), 1005U); - ASSERT_EQ(vstorage_.MaxBytesForLevel(1), 1000U); - ASSERT_EQ(vstorage_.base_level(), 1); -} - -TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicLotsOfData) { - ioptions_.level_compaction_dynamic_level_bytes = true; - mutable_cf_options_.max_bytes_for_level_base = 100; - mutable_cf_options_.max_bytes_for_level_multiplier = 2; - - Add(0, 1U, "1", "2", 50U); - Add(1, 2U, "1", "2", 50U); - Add(2, 3U, "1", "2", 500U); - Add(3, 4U, "1", "2", 500U); - Add(4, 5U, "1", "2", 1700U); - Add(5, 6U, "1", "2", 500U); - - UpdateVersionStorageInfo(); - - ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 800U); - ASSERT_EQ(vstorage_.MaxBytesForLevel(3), 400U); - ASSERT_EQ(vstorage_.MaxBytesForLevel(2), 200U); - ASSERT_EQ(vstorage_.MaxBytesForLevel(1), 100U); - ASSERT_EQ(vstorage_.base_level(), 1); - ASSERT_EQ(0, logger_->log_count); -} - -TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicLargeLevel) { - uint64_t kOneGB = 1000U * 1000U * 1000U; - ioptions_.level_compaction_dynamic_level_bytes = true; - mutable_cf_options_.max_bytes_for_level_base = 10U * kOneGB; - mutable_cf_options_.max_bytes_for_level_multiplier = 10; - - Add(0, 1U, "1", "2", 50U); - Add(3, 4U, "1", "2", 32U * kOneGB); - Add(4, 5U, "1", "2", 500U * kOneGB); - Add(5, 6U, "1", "2", 3000U * kOneGB); - - UpdateVersionStorageInfo(); - - ASSERT_EQ(vstorage_.MaxBytesForLevel(5), 3000U * kOneGB); - ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 300U * kOneGB); - ASSERT_EQ(vstorage_.MaxBytesForLevel(3), 30U * kOneGB); - ASSERT_EQ(vstorage_.MaxBytesForLevel(2), 10U * kOneGB); - ASSERT_EQ(vstorage_.base_level(), 2); - ASSERT_EQ(0, logger_->log_count); -} - -TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicWithLargeL0_1) { - ioptions_.level_compaction_dynamic_level_bytes = true; - mutable_cf_options_.max_bytes_for_level_base = 40000; - mutable_cf_options_.max_bytes_for_level_multiplier = 5; - mutable_cf_options_.level0_file_num_compaction_trigger = 2; - - Add(0, 1U, "1", "2", 10000U); - Add(0, 2U, "1", "2", 10000U); - Add(0, 3U, "1", "2", 10000U); - - Add(5, 4U, "1", "2", 1286250U); - Add(4, 5U, "1", "2", 200000U); - Add(3, 6U, "1", "2", 40000U); - Add(2, 7U, "1", "2", 8000U); - - UpdateVersionStorageInfo(); - - ASSERT_EQ(0, logger_->log_count); - ASSERT_EQ(2, vstorage_.base_level()); - // level multiplier should be 3.5 - ASSERT_EQ(vstorage_.level_multiplier(), 5.0); - ASSERT_EQ(40000U, vstorage_.MaxBytesForLevel(2)); - ASSERT_EQ(51450U, vstorage_.MaxBytesForLevel(3)); - ASSERT_EQ(257250U, vstorage_.MaxBytesForLevel(4)); - - vstorage_.ComputeCompactionScore(ioptions_, mutable_cf_options_); - // Only L0 hits compaction. - ASSERT_EQ(vstorage_.CompactionScoreLevel(0), 0); -} - -TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicWithLargeL0_2) { - ioptions_.level_compaction_dynamic_level_bytes = true; - mutable_cf_options_.max_bytes_for_level_base = 10000; - mutable_cf_options_.max_bytes_for_level_multiplier = 5; - mutable_cf_options_.level0_file_num_compaction_trigger = 4; - - Add(0, 11U, "1", "2", 10000U); - Add(0, 12U, "1", "2", 10000U); - Add(0, 13U, "1", "2", 10000U); - - // Level size should be around 10,000, 10,290, 51,450, 257,250 - Add(5, 4U, "1", "2", 1286250U); - Add(4, 5U, "1", "2", 258000U); // unadjusted score 1.003 - Add(3, 6U, "1", "2", 53000U); // unadjusted score 1.03 - Add(2, 7U, "1", "2", 20000U); // unadjusted score 1.94 - - UpdateVersionStorageInfo(); - - ASSERT_EQ(0, logger_->log_count); - ASSERT_EQ(1, vstorage_.base_level()); - ASSERT_EQ(10000U, vstorage_.MaxBytesForLevel(1)); - ASSERT_EQ(10290U, vstorage_.MaxBytesForLevel(2)); - ASSERT_EQ(51450U, vstorage_.MaxBytesForLevel(3)); - ASSERT_EQ(257250U, vstorage_.MaxBytesForLevel(4)); - - vstorage_.ComputeCompactionScore(ioptions_, mutable_cf_options_); - // Although L2 and l3 have higher unadjusted compaction score, considering - // a relatively large L0 being compacted down soon, L4 is picked up for - // compaction. - // L0 is still picked up for oversizing. - ASSERT_EQ(0, vstorage_.CompactionScoreLevel(0)); - ASSERT_EQ(4, vstorage_.CompactionScoreLevel(1)); -} - -TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicWithLargeL0_3) { - ioptions_.level_compaction_dynamic_level_bytes = true; - mutable_cf_options_.max_bytes_for_level_base = 20000; - mutable_cf_options_.max_bytes_for_level_multiplier = 5; - mutable_cf_options_.level0_file_num_compaction_trigger = 5; - - Add(0, 11U, "1", "2", 2500U); - Add(0, 12U, "1", "2", 2500U); - Add(0, 13U, "1", "2", 2500U); - Add(0, 14U, "1", "2", 2500U); - - // Level size should be around 20,000, 53000, 258000 - Add(5, 4U, "1", "2", 1286250U); - Add(4, 5U, "1", "2", 260000U); // Unadjusted score 1.01, adjusted about 4.3 - Add(3, 6U, "1", "2", 85000U); // Unadjusted score 1.42, adjusted about 11.6 - Add(2, 7U, "1", "2", 30000); // Unadjusted score 1.5, adjusted about 10.0 - - UpdateVersionStorageInfo(); - - ASSERT_EQ(0, logger_->log_count); - ASSERT_EQ(2, vstorage_.base_level()); - ASSERT_EQ(20000U, vstorage_.MaxBytesForLevel(2)); - - vstorage_.ComputeCompactionScore(ioptions_, mutable_cf_options_); - // Although L2 has higher unadjusted compaction score, considering - // a relatively large L0 being compacted down soon, L3 is picked up for - // compaction. - - ASSERT_EQ(3, vstorage_.CompactionScoreLevel(0)); - ASSERT_EQ(2, vstorage_.CompactionScoreLevel(1)); - ASSERT_EQ(4, vstorage_.CompactionScoreLevel(2)); -} - -TEST_F(VersionStorageInfoTest, EstimateLiveDataSize) { - // Test whether the overlaps are detected as expected - Add(1, 1U, "4", "7", 1U); // Perfect overlap with last level - Add(2, 2U, "3", "5", 1U); // Partial overlap with last level - Add(2, 3U, "6", "8", 1U); // Partial overlap with last level - Add(3, 4U, "1", "9", 1U); // Contains range of last level - Add(4, 5U, "4", "5", 1U); // Inside range of last level - Add(4, 6U, "6", "7", 1U); // Inside range of last level - Add(5, 7U, "4", "7", 10U); - - UpdateVersionStorageInfo(); - - ASSERT_EQ(10U, vstorage_.EstimateLiveDataSize()); -} - -TEST_F(VersionStorageInfoTest, EstimateLiveDataSize2) { - Add(0, 1U, "9", "9", 1U); // Level 0 is not ordered - Add(0, 2U, "5", "6", 1U); // Ignored because of [5,6] in l1 - Add(1, 3U, "1", "2", 1U); // Ignored because of [2,3] in l2 - Add(1, 4U, "3", "4", 1U); // Ignored because of [2,3] in l2 - Add(1, 5U, "5", "6", 1U); - Add(2, 6U, "2", "3", 1U); - Add(3, 7U, "7", "8", 1U); - - UpdateVersionStorageInfo(); - - ASSERT_EQ(4U, vstorage_.EstimateLiveDataSize()); -} - -TEST_F(VersionStorageInfoTest, GetOverlappingInputs) { - // Two files that overlap at the range deletion tombstone sentinel. - Add(1, 1U, {"a", 0, kTypeValue}, - {"b", kMaxSequenceNumber, kTypeRangeDeletion}, 1); - Add(1, 2U, {"b", 0, kTypeValue}, {"c", 0, kTypeValue}, 1); - // Two files that overlap at the same user key. - Add(1, 3U, {"d", 0, kTypeValue}, {"e", kMaxSequenceNumber, kTypeValue}, 1); - Add(1, 4U, {"e", 0, kTypeValue}, {"f", 0, kTypeValue}, 1); - // Two files that do not overlap. - Add(1, 5U, {"g", 0, kTypeValue}, {"h", 0, kTypeValue}, 1); - Add(1, 6U, {"i", 0, kTypeValue}, {"j", 0, kTypeValue}, 1); - - UpdateVersionStorageInfo(); - - ASSERT_EQ("1,2", - GetOverlappingFiles(1, {"a", 0, kTypeValue}, {"b", 0, kTypeValue})); - ASSERT_EQ("1", - GetOverlappingFiles(1, {"a", 0, kTypeValue}, - {"b", kMaxSequenceNumber, kTypeRangeDeletion})); - ASSERT_EQ("2", GetOverlappingFiles(1, {"b", kMaxSequenceNumber, kTypeValue}, - {"c", 0, kTypeValue})); - ASSERT_EQ("3,4", - GetOverlappingFiles(1, {"d", 0, kTypeValue}, {"e", 0, kTypeValue})); - ASSERT_EQ("3", - GetOverlappingFiles(1, {"d", 0, kTypeValue}, - {"e", kMaxSequenceNumber, kTypeRangeDeletion})); - ASSERT_EQ("3,4", GetOverlappingFiles(1, {"e", kMaxSequenceNumber, kTypeValue}, - {"f", 0, kTypeValue})); - ASSERT_EQ("3,4", - GetOverlappingFiles(1, {"e", 0, kTypeValue}, {"f", 0, kTypeValue})); - ASSERT_EQ("5", - GetOverlappingFiles(1, {"g", 0, kTypeValue}, {"h", 0, kTypeValue})); - ASSERT_EQ("6", - GetOverlappingFiles(1, {"i", 0, kTypeValue}, {"j", 0, kTypeValue})); -} - -TEST_F(VersionStorageInfoTest, FileLocationAndMetaDataByNumber) { - Add(0, 11U, "1", "2", 5000U); - Add(0, 12U, "1", "2", 5000U); - - Add(2, 7U, "1", "2", 8000U); - - UpdateVersionStorageInfo(); - - ASSERT_EQ(vstorage_.GetFileLocation(11U), - VersionStorageInfo::FileLocation(0, 0)); - ASSERT_NE(vstorage_.GetFileMetaDataByNumber(11U), nullptr); - - ASSERT_EQ(vstorage_.GetFileLocation(12U), - VersionStorageInfo::FileLocation(0, 1)); - ASSERT_NE(vstorage_.GetFileMetaDataByNumber(12U), nullptr); - - ASSERT_EQ(vstorage_.GetFileLocation(7U), - VersionStorageInfo::FileLocation(2, 0)); - ASSERT_NE(vstorage_.GetFileMetaDataByNumber(7U), nullptr); - - ASSERT_FALSE(vstorage_.GetFileLocation(999U).IsValid()); - ASSERT_EQ(vstorage_.GetFileMetaDataByNumber(999U), nullptr); -} - -TEST_F(VersionStorageInfoTest, ForcedBlobGCEmpty) { - // No SST or blob files in VersionStorageInfo - UpdateVersionStorageInfo(); - - constexpr double age_cutoff = 0.5; - constexpr double force_threshold = 0.75; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); - - ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); -} - -TEST_F(VersionStorageInfoTest, ForcedBlobGCSingleBatch) { - // Test the edge case when all blob files are part of the oldest batch. - // We have one L0 SST file #1, and four blob files #10, #11, #12, and #13. - // The oldest blob file used by SST #1 is blob file #10. - - constexpr int level = 0; - - constexpr uint64_t sst = 1; - - constexpr uint64_t first_blob = 10; - constexpr uint64_t second_blob = 11; - constexpr uint64_t third_blob = 12; - constexpr uint64_t fourth_blob = 13; - - { - constexpr char smallest[] = "bar1"; - constexpr char largest[] = "foo1"; - constexpr uint64_t file_size = 1000; - - Add(level, sst, smallest, largest, file_size, first_blob); - } - - { - constexpr uint64_t total_blob_count = 10; - constexpr uint64_t total_blob_bytes = 100000; - constexpr uint64_t garbage_blob_count = 2; - constexpr uint64_t garbage_blob_bytes = 15000; - - AddBlob(first_blob, total_blob_count, total_blob_bytes, - BlobFileMetaData::LinkedSsts{sst}, garbage_blob_count, - garbage_blob_bytes); - } - - { - constexpr uint64_t total_blob_count = 4; - constexpr uint64_t total_blob_bytes = 400000; - constexpr uint64_t garbage_blob_count = 3; - constexpr uint64_t garbage_blob_bytes = 235000; - - AddBlob(second_blob, total_blob_count, total_blob_bytes, - BlobFileMetaData::LinkedSsts{}, garbage_blob_count, - garbage_blob_bytes); - } - - { - constexpr uint64_t total_blob_count = 20; - constexpr uint64_t total_blob_bytes = 1000000; - constexpr uint64_t garbage_blob_count = 8; - constexpr uint64_t garbage_blob_bytes = 400000; - - AddBlob(third_blob, total_blob_count, total_blob_bytes, - BlobFileMetaData::LinkedSsts{}, garbage_blob_count, - garbage_blob_bytes); - } - - { - constexpr uint64_t total_blob_count = 128; - constexpr uint64_t total_blob_bytes = 1000000; - constexpr uint64_t garbage_blob_count = 67; - constexpr uint64_t garbage_blob_bytes = 600000; - - AddBlob(fourth_blob, total_blob_count, total_blob_bytes, - BlobFileMetaData::LinkedSsts{}, garbage_blob_count, - garbage_blob_bytes); - } - - UpdateVersionStorageInfo(); - - assert(vstorage_.num_levels() > 0); - const auto& level_files = vstorage_.LevelFiles(level); - - assert(level_files.size() == 1); - assert(level_files[0] && level_files[0]->fd.GetNumber() == sst); - - // No blob files eligible for GC due to the age cutoff - - { - constexpr double age_cutoff = 0.1; - constexpr double force_threshold = 0.0; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); - - ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); - } - - // Part of the oldest batch of blob files (specifically, #12 and #13) is - // ineligible for GC due to the age cutoff - - { - constexpr double age_cutoff = 0.5; - constexpr double force_threshold = 0.0; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); - - ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); - } - - // Oldest batch is eligible based on age cutoff but its overall garbage ratio - // is below threshold - - { - constexpr double age_cutoff = 1.0; - constexpr double force_threshold = 0.6; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); - - ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); - } - - // Oldest batch is eligible based on age cutoff and its overall garbage ratio - // meets threshold - - { - constexpr double age_cutoff = 1.0; - constexpr double force_threshold = 0.5; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); - - auto ssts_to_be_compacted = vstorage_.FilesMarkedForForcedBlobGC(); - ASSERT_EQ(ssts_to_be_compacted.size(), 1); - - const autovector> - expected_ssts_to_be_compacted{{level, level_files[0]}}; - - ASSERT_EQ(ssts_to_be_compacted[0], expected_ssts_to_be_compacted[0]); - } -} - -TEST_F(VersionStorageInfoTest, ForcedBlobGCMultipleBatches) { - // Add three L0 SSTs (1, 2, and 3) and four blob files (10, 11, 12, and 13). - // The first two SSTs have the same oldest blob file, namely, the very oldest - // one (10), while the third SST's oldest blob file reference points to the - // third blob file (12). Thus, the oldest batch of blob files contains the - // first two blob files 10 and 11, and assuming they are eligible for GC based - // on the age cutoff, compacting away the SSTs 1 and 2 will eliminate them. - - constexpr int level = 0; - - constexpr uint64_t first_sst = 1; - constexpr uint64_t second_sst = 2; - constexpr uint64_t third_sst = 3; - - constexpr uint64_t first_blob = 10; - constexpr uint64_t second_blob = 11; - constexpr uint64_t third_blob = 12; - constexpr uint64_t fourth_blob = 13; - - { - constexpr char smallest[] = "bar1"; - constexpr char largest[] = "foo1"; - constexpr uint64_t file_size = 1000; - - Add(level, first_sst, smallest, largest, file_size, first_blob); - } - - { - constexpr char smallest[] = "bar2"; - constexpr char largest[] = "foo2"; - constexpr uint64_t file_size = 2000; - - Add(level, second_sst, smallest, largest, file_size, first_blob); - } - - { - constexpr char smallest[] = "bar3"; - constexpr char largest[] = "foo3"; - constexpr uint64_t file_size = 3000; - - Add(level, third_sst, smallest, largest, file_size, third_blob); - } - - { - constexpr uint64_t total_blob_count = 10; - constexpr uint64_t total_blob_bytes = 100000; - constexpr uint64_t garbage_blob_count = 2; - constexpr uint64_t garbage_blob_bytes = 15000; - - AddBlob(first_blob, total_blob_count, total_blob_bytes, - BlobFileMetaData::LinkedSsts{first_sst, second_sst}, - garbage_blob_count, garbage_blob_bytes); - } - - { - constexpr uint64_t total_blob_count = 4; - constexpr uint64_t total_blob_bytes = 400000; - constexpr uint64_t garbage_blob_count = 3; - constexpr uint64_t garbage_blob_bytes = 235000; - - AddBlob(second_blob, total_blob_count, total_blob_bytes, - BlobFileMetaData::LinkedSsts{}, garbage_blob_count, - garbage_blob_bytes); - } - - { - constexpr uint64_t total_blob_count = 20; - constexpr uint64_t total_blob_bytes = 1000000; - constexpr uint64_t garbage_blob_count = 8; - constexpr uint64_t garbage_blob_bytes = 123456; - - AddBlob(third_blob, total_blob_count, total_blob_bytes, - BlobFileMetaData::LinkedSsts{third_sst}, garbage_blob_count, - garbage_blob_bytes); - } - - { - constexpr uint64_t total_blob_count = 128; - constexpr uint64_t total_blob_bytes = 789012345; - constexpr uint64_t garbage_blob_count = 67; - constexpr uint64_t garbage_blob_bytes = 88888888; - - AddBlob(fourth_blob, total_blob_count, total_blob_bytes, - BlobFileMetaData::LinkedSsts{}, garbage_blob_count, - garbage_blob_bytes); - } - - UpdateVersionStorageInfo(); - - assert(vstorage_.num_levels() > 0); - const auto& level_files = vstorage_.LevelFiles(level); - - assert(level_files.size() == 3); - assert(level_files[0] && level_files[0]->fd.GetNumber() == first_sst); - assert(level_files[1] && level_files[1]->fd.GetNumber() == second_sst); - assert(level_files[2] && level_files[2]->fd.GetNumber() == third_sst); - - // No blob files eligible for GC due to the age cutoff - - { - constexpr double age_cutoff = 0.1; - constexpr double force_threshold = 0.0; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); - - ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); - } - - // Part of the oldest batch of blob files (specifically, the second file) is - // ineligible for GC due to the age cutoff - - { - constexpr double age_cutoff = 0.25; - constexpr double force_threshold = 0.0; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); - - ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); - } - - // Oldest batch is eligible based on age cutoff but its overall garbage ratio - // is below threshold - - { - constexpr double age_cutoff = 0.5; - constexpr double force_threshold = 0.6; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); - - ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); - } - - // Oldest batch is eligible based on age cutoff and its overall garbage ratio - // meets threshold - - { - constexpr double age_cutoff = 0.5; - constexpr double force_threshold = 0.5; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); - - auto ssts_to_be_compacted = vstorage_.FilesMarkedForForcedBlobGC(); - ASSERT_EQ(ssts_to_be_compacted.size(), 2); - - std::sort(ssts_to_be_compacted.begin(), ssts_to_be_compacted.end(), - [](const std::pair& lhs, - const std::pair& rhs) { - assert(lhs.second); - assert(rhs.second); - return lhs.second->fd.GetNumber() < rhs.second->fd.GetNumber(); - }); - - const autovector> - expected_ssts_to_be_compacted{{level, level_files[0]}, - {level, level_files[1]}}; - - ASSERT_EQ(ssts_to_be_compacted[0], expected_ssts_to_be_compacted[0]); - ASSERT_EQ(ssts_to_be_compacted[1], expected_ssts_to_be_compacted[1]); - } - - // Now try the last two cases again with a greater than necessary age cutoff - - // Oldest batch is eligible based on age cutoff but its overall garbage ratio - // is below threshold - - { - constexpr double age_cutoff = 0.75; - constexpr double force_threshold = 0.6; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); - - ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); - } - - // Oldest batch is eligible based on age cutoff and its overall garbage ratio - // meets threshold - - { - constexpr double age_cutoff = 0.75; - constexpr double force_threshold = 0.5; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); - - auto ssts_to_be_compacted = vstorage_.FilesMarkedForForcedBlobGC(); - ASSERT_EQ(ssts_to_be_compacted.size(), 2); - - std::sort(ssts_to_be_compacted.begin(), ssts_to_be_compacted.end(), - [](const std::pair& lhs, - const std::pair& rhs) { - assert(lhs.second); - assert(rhs.second); - return lhs.second->fd.GetNumber() < rhs.second->fd.GetNumber(); - }); - - const autovector> - expected_ssts_to_be_compacted{{level, level_files[0]}, - {level, level_files[1]}}; - - ASSERT_EQ(ssts_to_be_compacted[0], expected_ssts_to_be_compacted[0]); - ASSERT_EQ(ssts_to_be_compacted[1], expected_ssts_to_be_compacted[1]); - } -} - -class VersionStorageInfoTimestampTest : public VersionStorageInfoTestBase { - public: - VersionStorageInfoTimestampTest() - : VersionStorageInfoTestBase(test::BytewiseComparatorWithU64TsWrapper()) { - } - ~VersionStorageInfoTimestampTest() override {} - std::string Timestamp(uint64_t ts) const { - std::string ret; - PutFixed64(&ret, ts); - return ret; - } - std::string PackUserKeyAndTimestamp(const Slice& ukey, uint64_t ts) const { - std::string ret; - ret.assign(ukey.data(), ukey.size()); - PutFixed64(&ret, ts); - return ret; - } -}; - -TEST_F(VersionStorageInfoTimestampTest, GetOverlappingInputs) { - Add(/*level=*/1, /*file_number=*/1, /*smallest=*/ - {PackUserKeyAndTimestamp("a", /*ts=*/9), /*s=*/0, kTypeValue}, - /*largest=*/ - {PackUserKeyAndTimestamp("a", /*ts=*/8), /*s=*/0, kTypeValue}, - /*file_size=*/100); - Add(/*level=*/1, /*file_number=*/2, /*smallest=*/ - {PackUserKeyAndTimestamp("a", /*ts=*/5), /*s=*/0, kTypeValue}, - /*largest=*/ - {PackUserKeyAndTimestamp("b", /*ts=*/10), /*s=*/0, kTypeValue}, - /*file_size=*/100); - Add(/*level=*/1, /*file_number=*/3, /*smallest=*/ - {PackUserKeyAndTimestamp("c", /*ts=*/12), /*s=*/0, kTypeValue}, - /*largest=*/ - {PackUserKeyAndTimestamp("d", /*ts=*/1), /*s=*/0, kTypeValue}, - /*file_size=*/100); - - UpdateVersionStorageInfo(); - - ASSERT_EQ( - "1,2", - GetOverlappingFiles( - /*level=*/1, - {PackUserKeyAndTimestamp("a", /*ts=*/12), /*s=*/0, kTypeValue}, - {PackUserKeyAndTimestamp("a", /*ts=*/11), /*s=*/0, kTypeValue})); - ASSERT_EQ("3", - GetOverlappingFiles( - /*level=*/1, - {PackUserKeyAndTimestamp("c", /*ts=*/15), /*s=*/0, kTypeValue}, - {PackUserKeyAndTimestamp("c", /*ts=*/2), /*s=*/0, kTypeValue})); -} - -class FindLevelFileTest : public testing::Test { - public: - LevelFilesBrief file_level_; - bool disjoint_sorted_files_; - Arena arena_; - - FindLevelFileTest() : disjoint_sorted_files_(true) {} - - ~FindLevelFileTest() override {} - - void LevelFileInit(size_t num = 0) { - char* mem = arena_.AllocateAligned(num * sizeof(FdWithKeyRange)); - file_level_.files = new (mem) FdWithKeyRange[num]; - file_level_.num_files = 0; - } - - void Add(const char* smallest, const char* largest, - SequenceNumber smallest_seq = 100, - SequenceNumber largest_seq = 100) { - InternalKey smallest_key = InternalKey(smallest, smallest_seq, kTypeValue); - InternalKey largest_key = InternalKey(largest, largest_seq, kTypeValue); - - Slice smallest_slice = smallest_key.Encode(); - Slice largest_slice = largest_key.Encode(); - - char* mem = - arena_.AllocateAligned(smallest_slice.size() + largest_slice.size()); - memcpy(mem, smallest_slice.data(), smallest_slice.size()); - memcpy(mem + smallest_slice.size(), largest_slice.data(), - largest_slice.size()); - - // add to file_level_ - size_t num = file_level_.num_files; - auto& file = file_level_.files[num]; - file.fd = FileDescriptor(num + 1, 0, 0); - file.smallest_key = Slice(mem, smallest_slice.size()); - file.largest_key = Slice(mem + smallest_slice.size(), largest_slice.size()); - file_level_.num_files++; - } - - int Find(const char* key) { - InternalKey target(key, 100, kTypeValue); - InternalKeyComparator cmp(BytewiseComparator()); - return FindFile(cmp, file_level_, target.Encode()); - } - - bool Overlaps(const char* smallest, const char* largest) { - InternalKeyComparator cmp(BytewiseComparator()); - Slice s(smallest != nullptr ? smallest : ""); - Slice l(largest != nullptr ? largest : ""); - return SomeFileOverlapsRange(cmp, disjoint_sorted_files_, file_level_, - (smallest != nullptr ? &s : nullptr), - (largest != nullptr ? &l : nullptr)); - } -}; - -TEST_F(FindLevelFileTest, LevelEmpty) { - LevelFileInit(0); - - ASSERT_EQ(0, Find("foo")); - ASSERT_TRUE(!Overlaps("a", "z")); - ASSERT_TRUE(!Overlaps(nullptr, "z")); - ASSERT_TRUE(!Overlaps("a", nullptr)); - ASSERT_TRUE(!Overlaps(nullptr, nullptr)); -} - -TEST_F(FindLevelFileTest, LevelSingle) { - LevelFileInit(1); - - Add("p", "q"); - ASSERT_EQ(0, Find("a")); - ASSERT_EQ(0, Find("p")); - ASSERT_EQ(0, Find("p1")); - ASSERT_EQ(0, Find("q")); - ASSERT_EQ(1, Find("q1")); - ASSERT_EQ(1, Find("z")); - - ASSERT_TRUE(!Overlaps("a", "b")); - ASSERT_TRUE(!Overlaps("z1", "z2")); - ASSERT_TRUE(Overlaps("a", "p")); - ASSERT_TRUE(Overlaps("a", "q")); - ASSERT_TRUE(Overlaps("a", "z")); - ASSERT_TRUE(Overlaps("p", "p1")); - ASSERT_TRUE(Overlaps("p", "q")); - ASSERT_TRUE(Overlaps("p", "z")); - ASSERT_TRUE(Overlaps("p1", "p2")); - ASSERT_TRUE(Overlaps("p1", "z")); - ASSERT_TRUE(Overlaps("q", "q")); - ASSERT_TRUE(Overlaps("q", "q1")); - - ASSERT_TRUE(!Overlaps(nullptr, "j")); - ASSERT_TRUE(!Overlaps("r", nullptr)); - ASSERT_TRUE(Overlaps(nullptr, "p")); - ASSERT_TRUE(Overlaps(nullptr, "p1")); - ASSERT_TRUE(Overlaps("q", nullptr)); - ASSERT_TRUE(Overlaps(nullptr, nullptr)); -} - -TEST_F(FindLevelFileTest, LevelMultiple) { - LevelFileInit(4); - - Add("150", "200"); - Add("200", "250"); - Add("300", "350"); - Add("400", "450"); - ASSERT_EQ(0, Find("100")); - ASSERT_EQ(0, Find("150")); - ASSERT_EQ(0, Find("151")); - ASSERT_EQ(0, Find("199")); - ASSERT_EQ(0, Find("200")); - ASSERT_EQ(1, Find("201")); - ASSERT_EQ(1, Find("249")); - ASSERT_EQ(1, Find("250")); - ASSERT_EQ(2, Find("251")); - ASSERT_EQ(2, Find("299")); - ASSERT_EQ(2, Find("300")); - ASSERT_EQ(2, Find("349")); - ASSERT_EQ(2, Find("350")); - ASSERT_EQ(3, Find("351")); - ASSERT_EQ(3, Find("400")); - ASSERT_EQ(3, Find("450")); - ASSERT_EQ(4, Find("451")); - - ASSERT_TRUE(!Overlaps("100", "149")); - ASSERT_TRUE(!Overlaps("251", "299")); - ASSERT_TRUE(!Overlaps("451", "500")); - ASSERT_TRUE(!Overlaps("351", "399")); - - ASSERT_TRUE(Overlaps("100", "150")); - ASSERT_TRUE(Overlaps("100", "200")); - ASSERT_TRUE(Overlaps("100", "300")); - ASSERT_TRUE(Overlaps("100", "400")); - ASSERT_TRUE(Overlaps("100", "500")); - ASSERT_TRUE(Overlaps("375", "400")); - ASSERT_TRUE(Overlaps("450", "450")); - ASSERT_TRUE(Overlaps("450", "500")); -} - -TEST_F(FindLevelFileTest, LevelMultipleNullBoundaries) { - LevelFileInit(4); - - Add("150", "200"); - Add("200", "250"); - Add("300", "350"); - Add("400", "450"); - ASSERT_TRUE(!Overlaps(nullptr, "149")); - ASSERT_TRUE(!Overlaps("451", nullptr)); - ASSERT_TRUE(Overlaps(nullptr, nullptr)); - ASSERT_TRUE(Overlaps(nullptr, "150")); - ASSERT_TRUE(Overlaps(nullptr, "199")); - ASSERT_TRUE(Overlaps(nullptr, "200")); - ASSERT_TRUE(Overlaps(nullptr, "201")); - ASSERT_TRUE(Overlaps(nullptr, "400")); - ASSERT_TRUE(Overlaps(nullptr, "800")); - ASSERT_TRUE(Overlaps("100", nullptr)); - ASSERT_TRUE(Overlaps("200", nullptr)); - ASSERT_TRUE(Overlaps("449", nullptr)); - ASSERT_TRUE(Overlaps("450", nullptr)); -} - -TEST_F(FindLevelFileTest, LevelOverlapSequenceChecks) { - LevelFileInit(1); - - Add("200", "200", 5000, 3000); - ASSERT_TRUE(!Overlaps("199", "199")); - ASSERT_TRUE(!Overlaps("201", "300")); - ASSERT_TRUE(Overlaps("200", "200")); - ASSERT_TRUE(Overlaps("190", "200")); - ASSERT_TRUE(Overlaps("200", "210")); -} - -TEST_F(FindLevelFileTest, LevelOverlappingFiles) { - LevelFileInit(2); - - Add("150", "600"); - Add("400", "500"); - disjoint_sorted_files_ = false; - ASSERT_TRUE(!Overlaps("100", "149")); - ASSERT_TRUE(!Overlaps("601", "700")); - ASSERT_TRUE(Overlaps("100", "150")); - ASSERT_TRUE(Overlaps("100", "200")); - ASSERT_TRUE(Overlaps("100", "300")); - ASSERT_TRUE(Overlaps("100", "400")); - ASSERT_TRUE(Overlaps("100", "500")); - ASSERT_TRUE(Overlaps("375", "400")); - ASSERT_TRUE(Overlaps("450", "450")); - ASSERT_TRUE(Overlaps("450", "500")); - ASSERT_TRUE(Overlaps("450", "700")); - ASSERT_TRUE(Overlaps("600", "700")); -} - -class VersionSetTestBase { - public: - const static std::string kColumnFamilyName1; - const static std::string kColumnFamilyName2; - const static std::string kColumnFamilyName3; - int num_initial_edits_; - - explicit VersionSetTestBase(const std::string& name) - : env_(nullptr), - dbname_(test::PerThreadDBPath(name)), - options_(), - db_options_(options_), - cf_options_(options_), - immutable_options_(db_options_, cf_options_), - mutable_cf_options_(cf_options_), - table_cache_(NewLRUCache(50000, 16)), - write_buffer_manager_(db_options_.db_write_buffer_size), - shutting_down_(false), - mock_table_factory_(std::make_shared()) { - EXPECT_OK(test::CreateEnvFromSystem(ConfigOptions(), &env_, &env_guard_)); - if (env_ == Env::Default() && getenv("MEM_ENV")) { - env_guard_.reset(NewMemEnv(Env::Default())); - env_ = env_guard_.get(); - } - EXPECT_NE(nullptr, env_); - - fs_ = env_->GetFileSystem(); - EXPECT_OK(fs_->CreateDirIfMissing(dbname_, IOOptions(), nullptr)); - - options_.env = env_; - db_options_.env = env_; - db_options_.fs = fs_; - immutable_options_.env = env_; - immutable_options_.fs = fs_; - immutable_options_.clock = env_->GetSystemClock().get(); - - versions_.reset( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); - reactive_versions_ = std::make_shared( - dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, nullptr); - db_options_.db_paths.emplace_back(dbname_, - std::numeric_limits::max()); - } - - virtual ~VersionSetTestBase() { - if (getenv("KEEP_DB")) { - fprintf(stdout, "DB is still at %s\n", dbname_.c_str()); - } else { - Options options; - options.env = env_; - EXPECT_OK(DestroyDB(dbname_, options)); - } - } - - protected: - virtual void PrepareManifest( - std::vector* column_families, - SequenceNumber* last_seqno, std::unique_ptr* log_writer) { - assert(column_families != nullptr); - assert(last_seqno != nullptr); - assert(log_writer != nullptr); - VersionEdit new_db; - if (db_options_.write_dbid_to_manifest) { - DBOptions tmp_db_options; - tmp_db_options.env = env_; - std::unique_ptr impl(new DBImpl(tmp_db_options, dbname_)); - std::string db_id; - impl->GetDbIdentityFromIdentityFile(&db_id); - new_db.SetDBId(db_id); - } - new_db.SetLogNumber(0); - new_db.SetNextFile(2); - new_db.SetLastSequence(0); - - const std::vector cf_names = { - kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2, - kColumnFamilyName3}; - const int kInitialNumOfCfs = static_cast(cf_names.size()); - autovector new_cfs; - uint64_t last_seq = 1; - uint32_t cf_id = 1; - for (int i = 1; i != kInitialNumOfCfs; ++i) { - VersionEdit new_cf; - new_cf.AddColumnFamily(cf_names[i]); - new_cf.SetColumnFamily(cf_id++); - new_cf.SetLogNumber(0); - new_cf.SetNextFile(2); - new_cf.SetLastSequence(last_seq++); - new_cfs.emplace_back(new_cf); - } - *last_seqno = last_seq; - num_initial_edits_ = static_cast(new_cfs.size() + 1); - std::unique_ptr file_writer; - const std::string manifest = DescriptorFileName(dbname_, 1); - const auto& fs = env_->GetFileSystem(); - Status s = WritableFileWriter::Create( - fs, manifest, fs->OptimizeForManifestWrite(env_options_), &file_writer, - nullptr); - ASSERT_OK(s); - { - log_writer->reset(new log::Writer(std::move(file_writer), 0, false)); - std::string record; - new_db.EncodeTo(&record); - s = (*log_writer)->AddRecord(record); - for (const auto& e : new_cfs) { - record.clear(); - e.EncodeTo(&record); - s = (*log_writer)->AddRecord(record); - ASSERT_OK(s); - } - } - ASSERT_OK(s); - - cf_options_.table_factory = mock_table_factory_; - for (const auto& cf_name : cf_names) { - column_families->emplace_back(cf_name, cf_options_); - } - } - - // Create DB with 3 column families. - void NewDB() { - SequenceNumber last_seqno; - std::unique_ptr log_writer; - SetIdentityFile(env_, dbname_); - PrepareManifest(&column_families_, &last_seqno, &log_writer); - log_writer.reset(); - // Make "CURRENT" file point to the new manifest file. - Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); - ASSERT_OK(s); - - EXPECT_OK(versions_->Recover(column_families_, false)); - EXPECT_EQ(column_families_.size(), - versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); - } - - void ReopenDB() { - versions_.reset( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); - EXPECT_OK(versions_->Recover(column_families_, false)); - } - - void VerifyManifest(std::string* manifest_path) const { - assert(manifest_path != nullptr); - uint64_t manifest_file_number = 0; - Status s = versions_->GetCurrentManifestPath( - dbname_, fs_.get(), manifest_path, &manifest_file_number); - ASSERT_OK(s); - ASSERT_EQ(1, manifest_file_number); - } - - Status LogAndApplyToDefaultCF(VersionEdit& edit) { - mutex_.Lock(); - Status s = - versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(), - mutable_cf_options_, &edit, &mutex_, nullptr); - mutex_.Unlock(); - return s; - } - - Status LogAndApplyToDefaultCF( - const autovector>& edits) { - autovector vedits; - for (auto& e : edits) { - vedits.push_back(e.get()); - } - mutex_.Lock(); - Status s = - versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(), - mutable_cf_options_, vedits, &mutex_, nullptr); - mutex_.Unlock(); - return s; - } - - void CreateNewManifest() { - constexpr FSDirectory* db_directory = nullptr; - constexpr bool new_descriptor_log = true; - mutex_.Lock(); - VersionEdit dummy; - ASSERT_OK(versions_->LogAndApply( - versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_, - &dummy, &mutex_, db_directory, new_descriptor_log)); - mutex_.Unlock(); - } - - ColumnFamilyData* CreateColumnFamily(const std::string& cf_name, - const ColumnFamilyOptions& cf_options) { - VersionEdit new_cf; - new_cf.AddColumnFamily(cf_name); - uint32_t new_id = versions_->GetColumnFamilySet()->GetNextColumnFamilyID(); - new_cf.SetColumnFamily(new_id); - new_cf.SetLogNumber(0); - new_cf.SetComparatorName(cf_options.comparator->Name()); - Status s; - mutex_.Lock(); - s = versions_->LogAndApply(/*column_family_data=*/nullptr, - MutableCFOptions(cf_options), &new_cf, &mutex_, - /*db_directory=*/nullptr, - /*new_descriptor_log=*/false, &cf_options); - mutex_.Unlock(); - EXPECT_OK(s); - ColumnFamilyData* cfd = - versions_->GetColumnFamilySet()->GetColumnFamily(cf_name); - EXPECT_NE(nullptr, cfd); - return cfd; - } - - Env* mem_env_; - Env* env_; - std::shared_ptr env_guard_; - std::shared_ptr fs_; - const std::string dbname_; - EnvOptions env_options_; - Options options_; - ImmutableDBOptions db_options_; - ColumnFamilyOptions cf_options_; - ImmutableOptions immutable_options_; - MutableCFOptions mutable_cf_options_; - std::shared_ptr table_cache_; - WriteController write_controller_; - WriteBufferManager write_buffer_manager_; - std::shared_ptr versions_; - std::shared_ptr reactive_versions_; - InstrumentedMutex mutex_; - std::atomic shutting_down_; - std::shared_ptr mock_table_factory_; - std::vector column_families_; -}; - -const std::string VersionSetTestBase::kColumnFamilyName1 = "alice"; -const std::string VersionSetTestBase::kColumnFamilyName2 = "bob"; -const std::string VersionSetTestBase::kColumnFamilyName3 = "charles"; - -class VersionSetTest : public VersionSetTestBase, public testing::Test { - public: - VersionSetTest() : VersionSetTestBase("version_set_test") {} -}; - -TEST_F(VersionSetTest, SameColumnFamilyGroupCommit) { - NewDB(); - const int kGroupSize = 5; - autovector edits; - for (int i = 0; i != kGroupSize; ++i) { - edits.emplace_back(VersionEdit()); - } - autovector cfds; - autovector all_mutable_cf_options; - autovector> edit_lists; - for (int i = 0; i != kGroupSize; ++i) { - cfds.emplace_back(versions_->GetColumnFamilySet()->GetDefault()); - all_mutable_cf_options.emplace_back(&mutable_cf_options_); - autovector edit_list; - edit_list.emplace_back(&edits[i]); - edit_lists.emplace_back(edit_list); - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - int count = 0; - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::ProcessManifestWrites:SameColumnFamily", [&](void* arg) { - uint32_t* cf_id = reinterpret_cast(arg); - EXPECT_EQ(0u, *cf_id); - ++count; - }); - SyncPoint::GetInstance()->EnableProcessing(); - mutex_.Lock(); - Status s = versions_->LogAndApply(cfds, all_mutable_cf_options, edit_lists, - &mutex_, nullptr); - mutex_.Unlock(); - EXPECT_OK(s); - EXPECT_EQ(kGroupSize - 1, count); -} - -TEST_F(VersionSetTest, PersistBlobFileStateInNewManifest) { - // Initialize the database and add a couple of blob files, one with some - // garbage in it, and one without any garbage. - NewDB(); - - assert(versions_); - assert(versions_->GetColumnFamilySet()); - - ColumnFamilyData* const cfd = versions_->GetColumnFamilySet()->GetDefault(); - assert(cfd); - - Version* const version = cfd->current(); - assert(version); - - VersionStorageInfo* const storage_info = version->storage_info(); - assert(storage_info); - - { - constexpr uint64_t blob_file_number = 123; - constexpr uint64_t total_blob_count = 456; - constexpr uint64_t total_blob_bytes = 77777777; - constexpr char checksum_method[] = "SHA1"; - constexpr char checksum_value[] = - "\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c" - "\x52\x5c\xbd"; - - auto shared_meta = SharedBlobFileMetaData::Create( - blob_file_number, total_blob_count, total_blob_bytes, checksum_method, - checksum_value); - - constexpr uint64_t garbage_blob_count = 89; - constexpr uint64_t garbage_blob_bytes = 1000000; - - auto meta = BlobFileMetaData::Create( - std::move(shared_meta), BlobFileMetaData::LinkedSsts(), - garbage_blob_count, garbage_blob_bytes); - - storage_info->AddBlobFile(std::move(meta)); - } - - { - constexpr uint64_t blob_file_number = 234; - constexpr uint64_t total_blob_count = 555; - constexpr uint64_t total_blob_bytes = 66666; - constexpr char checksum_method[] = "CRC32"; - constexpr char checksum_value[] = "\x3d\x87\xff\x57"; - - auto shared_meta = SharedBlobFileMetaData::Create( - blob_file_number, total_blob_count, total_blob_bytes, checksum_method, - checksum_value); - - constexpr uint64_t garbage_blob_count = 0; - constexpr uint64_t garbage_blob_bytes = 0; - - auto meta = BlobFileMetaData::Create( - std::move(shared_meta), BlobFileMetaData::LinkedSsts(), - garbage_blob_count, garbage_blob_bytes); - - storage_info->AddBlobFile(std::move(meta)); - } - - // Force the creation of a new manifest file and make sure metadata for - // the blob files is re-persisted. - size_t addition_encoded = 0; - SyncPoint::GetInstance()->SetCallBack( - "BlobFileAddition::EncodeTo::CustomFields", - [&](void* /* arg */) { ++addition_encoded; }); - - size_t garbage_encoded = 0; - SyncPoint::GetInstance()->SetCallBack( - "BlobFileGarbage::EncodeTo::CustomFields", - [&](void* /* arg */) { ++garbage_encoded; }); - SyncPoint::GetInstance()->EnableProcessing(); - - CreateNewManifest(); - - ASSERT_EQ(addition_encoded, 2); - ASSERT_EQ(garbage_encoded, 1); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_F(VersionSetTest, AddLiveBlobFiles) { - // Initialize the database and add a blob file. - NewDB(); - - assert(versions_); - assert(versions_->GetColumnFamilySet()); - - ColumnFamilyData* const cfd = versions_->GetColumnFamilySet()->GetDefault(); - assert(cfd); - - Version* const first_version = cfd->current(); - assert(first_version); - - VersionStorageInfo* const first_storage_info = first_version->storage_info(); - assert(first_storage_info); - - constexpr uint64_t first_blob_file_number = 234; - constexpr uint64_t first_total_blob_count = 555; - constexpr uint64_t first_total_blob_bytes = 66666; - constexpr char first_checksum_method[] = "CRC32"; - constexpr char first_checksum_value[] = "\x3d\x87\xff\x57"; - - auto first_shared_meta = SharedBlobFileMetaData::Create( - first_blob_file_number, first_total_blob_count, first_total_blob_bytes, - first_checksum_method, first_checksum_value); - - constexpr uint64_t garbage_blob_count = 0; - constexpr uint64_t garbage_blob_bytes = 0; - - auto first_meta = BlobFileMetaData::Create( - std::move(first_shared_meta), BlobFileMetaData::LinkedSsts(), - garbage_blob_count, garbage_blob_bytes); - - first_storage_info->AddBlobFile(first_meta); - - // Reference the version so it stays alive even after the following version - // edit. - first_version->Ref(); - - // Get live files directly from version. - std::vector version_table_files; - std::vector version_blob_files; - - first_version->AddLiveFiles(&version_table_files, &version_blob_files); - - ASSERT_EQ(version_blob_files.size(), 1); - ASSERT_EQ(version_blob_files[0], first_blob_file_number); - - // Create a new version containing an additional blob file. - versions_->TEST_CreateAndAppendVersion(cfd); - - Version* const second_version = cfd->current(); - assert(second_version); - assert(second_version != first_version); - - VersionStorageInfo* const second_storage_info = - second_version->storage_info(); - assert(second_storage_info); - - constexpr uint64_t second_blob_file_number = 456; - constexpr uint64_t second_total_blob_count = 100; - constexpr uint64_t second_total_blob_bytes = 2000000; - constexpr char second_checksum_method[] = "CRC32B"; - constexpr char second_checksum_value[] = "\x6d\xbd\xf2\x3a"; - - auto second_shared_meta = SharedBlobFileMetaData::Create( - second_blob_file_number, second_total_blob_count, second_total_blob_bytes, - second_checksum_method, second_checksum_value); - - auto second_meta = BlobFileMetaData::Create( - std::move(second_shared_meta), BlobFileMetaData::LinkedSsts(), - garbage_blob_count, garbage_blob_bytes); - - second_storage_info->AddBlobFile(std::move(first_meta)); - second_storage_info->AddBlobFile(std::move(second_meta)); - - // Get all live files from version set. Note that the result contains - // duplicates. - std::vector all_table_files; - std::vector all_blob_files; - - versions_->AddLiveFiles(&all_table_files, &all_blob_files); - - ASSERT_EQ(all_blob_files.size(), 3); - ASSERT_EQ(all_blob_files[0], first_blob_file_number); - ASSERT_EQ(all_blob_files[1], first_blob_file_number); - ASSERT_EQ(all_blob_files[2], second_blob_file_number); - - // Clean up previous version. - first_version->Unref(); -} - -TEST_F(VersionSetTest, ObsoleteBlobFile) { - // Initialize the database and add a blob file that is entirely garbage - // and thus can immediately be marked obsolete. - NewDB(); - - VersionEdit edit; - - constexpr uint64_t blob_file_number = 234; - constexpr uint64_t total_blob_count = 555; - constexpr uint64_t total_blob_bytes = 66666; - constexpr char checksum_method[] = "CRC32"; - constexpr char checksum_value[] = "\x3d\x87\xff\x57"; - - edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, - checksum_method, checksum_value); - - edit.AddBlobFileGarbage(blob_file_number, total_blob_count, total_blob_bytes); - - mutex_.Lock(); - Status s = - versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(), - mutable_cf_options_, &edit, &mutex_, nullptr); - mutex_.Unlock(); - - ASSERT_OK(s); - - // Make sure blob files from the pending number range are not returned - // as obsolete. - { - std::vector table_files; - std::vector blob_files; - std::vector manifest_files; - constexpr uint64_t min_pending_output = blob_file_number; - - versions_->GetObsoleteFiles(&table_files, &blob_files, &manifest_files, - min_pending_output); - - ASSERT_TRUE(blob_files.empty()); - } - - // Make sure the blob file is returned as obsolete if it's not in the pending - // range. - { - std::vector table_files; - std::vector blob_files; - std::vector manifest_files; - constexpr uint64_t min_pending_output = blob_file_number + 1; - - versions_->GetObsoleteFiles(&table_files, &blob_files, &manifest_files, - min_pending_output); - - ASSERT_EQ(blob_files.size(), 1); - ASSERT_EQ(blob_files[0].GetBlobFileNumber(), blob_file_number); - } - - // Make sure it's not returned a second time. - { - std::vector table_files; - std::vector blob_files; - std::vector manifest_files; - constexpr uint64_t min_pending_output = blob_file_number + 1; - - versions_->GetObsoleteFiles(&table_files, &blob_files, &manifest_files, - min_pending_output); - - ASSERT_TRUE(blob_files.empty()); - } -} - -TEST_F(VersionSetTest, WalEditsNotAppliedToVersion) { - NewDB(); - - constexpr uint64_t kNumWals = 5; - - autovector> edits; - // Add some WALs. - for (uint64_t i = 1; i <= kNumWals; i++) { - edits.emplace_back(new VersionEdit); - // WAL's size equals its log number. - edits.back()->AddWal(i, WalMetadata(i)); - } - // Delete the first half of the WALs. - edits.emplace_back(new VersionEdit); - edits.back()->DeleteWalsBefore(kNumWals / 2 + 1); - - autovector versions; - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::ProcessManifestWrites:NewVersion", - [&](void* arg) { versions.push_back(reinterpret_cast(arg)); }); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(LogAndApplyToDefaultCF(edits)); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - - // Since the edits are all WAL edits, no version should be created. - ASSERT_EQ(versions.size(), 1); - ASSERT_EQ(versions[0], nullptr); -} - -// Similar to WalEditsNotAppliedToVersion, but contains a non-WAL edit. -TEST_F(VersionSetTest, NonWalEditsAppliedToVersion) { - NewDB(); - - const std::string kDBId = "db_db"; - constexpr uint64_t kNumWals = 5; - - autovector> edits; - // Add some WALs. - for (uint64_t i = 1; i <= kNumWals; i++) { - edits.emplace_back(new VersionEdit); - // WAL's size equals its log number. - edits.back()->AddWal(i, WalMetadata(i)); - } - // Delete the first half of the WALs. - edits.emplace_back(new VersionEdit); - edits.back()->DeleteWalsBefore(kNumWals / 2 + 1); - edits.emplace_back(new VersionEdit); - edits.back()->SetDBId(kDBId); - - autovector versions; - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::ProcessManifestWrites:NewVersion", - [&](void* arg) { versions.push_back(reinterpret_cast(arg)); }); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_OK(LogAndApplyToDefaultCF(edits)); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - - // Since the edits are all WAL edits, no version should be created. - ASSERT_EQ(versions.size(), 1); - ASSERT_NE(versions[0], nullptr); -} - -TEST_F(VersionSetTest, WalAddition) { - NewDB(); - - constexpr WalNumber kLogNumber = 10; - constexpr uint64_t kSizeInBytes = 111; - - // A WAL is just created. - { - VersionEdit edit; - edit.AddWal(kLogNumber); - - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - - const auto& wals = versions_->GetWalSet().GetWals(); - ASSERT_EQ(wals.size(), 1); - ASSERT_TRUE(wals.find(kLogNumber) != wals.end()); - ASSERT_FALSE(wals.at(kLogNumber).HasSyncedSize()); - } - - // The WAL is synced for several times before closing. - { - for (uint64_t size_delta = 100; size_delta > 0; size_delta /= 2) { - uint64_t size = kSizeInBytes - size_delta; - WalMetadata wal(size); - VersionEdit edit; - edit.AddWal(kLogNumber, wal); - - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - - const auto& wals = versions_->GetWalSet().GetWals(); - ASSERT_EQ(wals.size(), 1); - ASSERT_TRUE(wals.find(kLogNumber) != wals.end()); - ASSERT_TRUE(wals.at(kLogNumber).HasSyncedSize()); - ASSERT_EQ(wals.at(kLogNumber).GetSyncedSizeInBytes(), size); - } - } - - // The WAL is closed. - { - WalMetadata wal(kSizeInBytes); - VersionEdit edit; - edit.AddWal(kLogNumber, wal); - - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - - const auto& wals = versions_->GetWalSet().GetWals(); - ASSERT_EQ(wals.size(), 1); - ASSERT_TRUE(wals.find(kLogNumber) != wals.end()); - ASSERT_TRUE(wals.at(kLogNumber).HasSyncedSize()); - ASSERT_EQ(wals.at(kLogNumber).GetSyncedSizeInBytes(), kSizeInBytes); - } - - // Recover a new VersionSet. - { - std::unique_ptr new_versions( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); - ASSERT_OK(new_versions->Recover(column_families_, /*read_only=*/false)); - const auto& wals = new_versions->GetWalSet().GetWals(); - ASSERT_EQ(wals.size(), 1); - ASSERT_TRUE(wals.find(kLogNumber) != wals.end()); - ASSERT_TRUE(wals.at(kLogNumber).HasSyncedSize()); - ASSERT_EQ(wals.at(kLogNumber).GetSyncedSizeInBytes(), kSizeInBytes); - } -} - -TEST_F(VersionSetTest, WalCloseWithoutSync) { - NewDB(); - - constexpr WalNumber kLogNumber = 10; - constexpr uint64_t kSizeInBytes = 111; - constexpr uint64_t kSyncedSizeInBytes = kSizeInBytes / 2; - - // A WAL is just created. - { - VersionEdit edit; - edit.AddWal(kLogNumber); - - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - - const auto& wals = versions_->GetWalSet().GetWals(); - ASSERT_EQ(wals.size(), 1); - ASSERT_TRUE(wals.find(kLogNumber) != wals.end()); - ASSERT_FALSE(wals.at(kLogNumber).HasSyncedSize()); - } - - // The WAL is synced before closing. - { - WalMetadata wal(kSyncedSizeInBytes); - VersionEdit edit; - edit.AddWal(kLogNumber, wal); - - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - - const auto& wals = versions_->GetWalSet().GetWals(); - ASSERT_EQ(wals.size(), 1); - ASSERT_TRUE(wals.find(kLogNumber) != wals.end()); - ASSERT_TRUE(wals.at(kLogNumber).HasSyncedSize()); - ASSERT_EQ(wals.at(kLogNumber).GetSyncedSizeInBytes(), kSyncedSizeInBytes); - } - - // A new WAL with larger log number is created, - // implicitly marking the current WAL closed. - { - VersionEdit edit; - edit.AddWal(kLogNumber + 1); - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - - const auto& wals = versions_->GetWalSet().GetWals(); - ASSERT_EQ(wals.size(), 2); - ASSERT_TRUE(wals.find(kLogNumber) != wals.end()); - ASSERT_TRUE(wals.at(kLogNumber).HasSyncedSize()); - ASSERT_EQ(wals.at(kLogNumber).GetSyncedSizeInBytes(), kSyncedSizeInBytes); - ASSERT_TRUE(wals.find(kLogNumber + 1) != wals.end()); - ASSERT_FALSE(wals.at(kLogNumber + 1).HasSyncedSize()); - } - - // Recover a new VersionSet. - { - std::unique_ptr new_versions( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); - ASSERT_OK(new_versions->Recover(column_families_, false)); - const auto& wals = new_versions->GetWalSet().GetWals(); - ASSERT_EQ(wals.size(), 2); - ASSERT_TRUE(wals.find(kLogNumber) != wals.end()); - ASSERT_TRUE(wals.at(kLogNumber).HasSyncedSize()); - ASSERT_EQ(wals.at(kLogNumber).GetSyncedSizeInBytes(), kSyncedSizeInBytes); - } -} - -TEST_F(VersionSetTest, WalDeletion) { - NewDB(); - - constexpr WalNumber kClosedLogNumber = 10; - constexpr WalNumber kNonClosedLogNumber = 20; - constexpr uint64_t kSizeInBytes = 111; - - // Add a non-closed and a closed WAL. - { - VersionEdit edit; - edit.AddWal(kClosedLogNumber, WalMetadata(kSizeInBytes)); - edit.AddWal(kNonClosedLogNumber); - - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - - const auto& wals = versions_->GetWalSet().GetWals(); - ASSERT_EQ(wals.size(), 2); - ASSERT_TRUE(wals.find(kNonClosedLogNumber) != wals.end()); - ASSERT_TRUE(wals.find(kClosedLogNumber) != wals.end()); - ASSERT_FALSE(wals.at(kNonClosedLogNumber).HasSyncedSize()); - ASSERT_TRUE(wals.at(kClosedLogNumber).HasSyncedSize()); - ASSERT_EQ(wals.at(kClosedLogNumber).GetSyncedSizeInBytes(), kSizeInBytes); - } - - // Delete the closed WAL. - { - VersionEdit edit; - edit.DeleteWalsBefore(kNonClosedLogNumber); - - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - - const auto& wals = versions_->GetWalSet().GetWals(); - ASSERT_EQ(wals.size(), 1); - ASSERT_TRUE(wals.find(kNonClosedLogNumber) != wals.end()); - ASSERT_FALSE(wals.at(kNonClosedLogNumber).HasSyncedSize()); - } - - // Recover a new VersionSet, only the non-closed WAL should show up. - { - std::unique_ptr new_versions( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); - ASSERT_OK(new_versions->Recover(column_families_, false)); - const auto& wals = new_versions->GetWalSet().GetWals(); - ASSERT_EQ(wals.size(), 1); - ASSERT_TRUE(wals.find(kNonClosedLogNumber) != wals.end()); - ASSERT_FALSE(wals.at(kNonClosedLogNumber).HasSyncedSize()); - } - - // Force the creation of a new MANIFEST file, - // only the non-closed WAL should be written to the new MANIFEST. - { - std::vector wal_additions; - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::WriteCurrentStateToManifest:SaveWal", [&](void* arg) { - VersionEdit* edit = reinterpret_cast(arg); - ASSERT_TRUE(edit->IsWalAddition()); - for (auto& addition : edit->GetWalAdditions()) { - wal_additions.push_back(addition); - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - - CreateNewManifest(); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - - ASSERT_EQ(wal_additions.size(), 1); - ASSERT_EQ(wal_additions[0].GetLogNumber(), kNonClosedLogNumber); - ASSERT_FALSE(wal_additions[0].GetMetadata().HasSyncedSize()); - } - - // Recover from the new MANIFEST, only the non-closed WAL should show up. - { - std::unique_ptr new_versions( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); - ASSERT_OK(new_versions->Recover(column_families_, false)); - const auto& wals = new_versions->GetWalSet().GetWals(); - ASSERT_EQ(wals.size(), 1); - ASSERT_TRUE(wals.find(kNonClosedLogNumber) != wals.end()); - ASSERT_FALSE(wals.at(kNonClosedLogNumber).HasSyncedSize()); - } -} - -TEST_F(VersionSetTest, WalCreateTwice) { - NewDB(); - - constexpr WalNumber kLogNumber = 10; - - VersionEdit edit; - edit.AddWal(kLogNumber); - - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - - Status s = LogAndApplyToDefaultCF(edit); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(s.ToString().find("WAL 10 is created more than once") != - std::string::npos) - << s.ToString(); -} - -TEST_F(VersionSetTest, WalCreateAfterClose) { - NewDB(); - - constexpr WalNumber kLogNumber = 10; - constexpr uint64_t kSizeInBytes = 111; - - { - // Add a closed WAL. - VersionEdit edit; - edit.AddWal(kLogNumber); - WalMetadata wal(kSizeInBytes); - edit.AddWal(kLogNumber, wal); - - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - } - - { - // Create the same WAL again. - VersionEdit edit; - edit.AddWal(kLogNumber); - - Status s = LogAndApplyToDefaultCF(edit); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(s.ToString().find("WAL 10 is created more than once") != - std::string::npos) - << s.ToString(); - } -} - -TEST_F(VersionSetTest, AddWalWithSmallerSize) { - NewDB(); - assert(versions_); - - constexpr WalNumber kLogNumber = 10; - constexpr uint64_t kSizeInBytes = 111; - - { - // Add a closed WAL. - VersionEdit edit; - WalMetadata wal(kSizeInBytes); - edit.AddWal(kLogNumber, wal); - - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - } - // Copy for future comparison. - const std::map wals1 = - versions_->GetWalSet().GetWals(); - - { - // Add the same WAL with smaller synced size. - VersionEdit edit; - WalMetadata wal(kSizeInBytes / 2); - edit.AddWal(kLogNumber, wal); - - Status s = LogAndApplyToDefaultCF(edit); - ASSERT_OK(s); - } - const std::map wals2 = - versions_->GetWalSet().GetWals(); - ASSERT_EQ(wals1, wals2); -} - -TEST_F(VersionSetTest, DeleteWalsBeforeNonExistingWalNumber) { - NewDB(); - - constexpr WalNumber kLogNumber0 = 10; - constexpr WalNumber kLogNumber1 = 20; - constexpr WalNumber kNonExistingNumber = 15; - constexpr uint64_t kSizeInBytes = 111; - - { - // Add closed WALs. - VersionEdit edit; - WalMetadata wal(kSizeInBytes); - edit.AddWal(kLogNumber0, wal); - edit.AddWal(kLogNumber1, wal); - - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - } - - { - // Delete WALs before a non-existing WAL. - VersionEdit edit; - edit.DeleteWalsBefore(kNonExistingNumber); - - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - } - - // Recover a new VersionSet, WAL0 is deleted, WAL1 is not. - { - std::unique_ptr new_versions( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); - ASSERT_OK(new_versions->Recover(column_families_, false)); - const auto& wals = new_versions->GetWalSet().GetWals(); - ASSERT_EQ(wals.size(), 1); - ASSERT_TRUE(wals.find(kLogNumber1) != wals.end()); - } -} - -TEST_F(VersionSetTest, DeleteAllWals) { - NewDB(); - - constexpr WalNumber kMaxLogNumber = 10; - constexpr uint64_t kSizeInBytes = 111; - - { - // Add a closed WAL. - VersionEdit edit; - WalMetadata wal(kSizeInBytes); - edit.AddWal(kMaxLogNumber, wal); - - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - } - - { - VersionEdit edit; - edit.DeleteWalsBefore(kMaxLogNumber + 10); - - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - } - - // Recover a new VersionSet, all WALs are deleted. - { - std::unique_ptr new_versions( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); - ASSERT_OK(new_versions->Recover(column_families_, false)); - const auto& wals = new_versions->GetWalSet().GetWals(); - ASSERT_EQ(wals.size(), 0); - } -} - -TEST_F(VersionSetTest, AtomicGroupWithWalEdits) { - NewDB(); - - constexpr int kAtomicGroupSize = 7; - constexpr uint64_t kNumWals = 5; - const std::string kDBId = "db_db"; - - int remaining = kAtomicGroupSize; - autovector> edits; - // Add 5 WALs. - for (uint64_t i = 1; i <= kNumWals; i++) { - edits.emplace_back(new VersionEdit); - // WAL's size equals its log number. - edits.back()->AddWal(i, WalMetadata(i)); - edits.back()->MarkAtomicGroup(--remaining); - } - // One edit with the min log number set. - edits.emplace_back(new VersionEdit); - edits.back()->SetDBId(kDBId); - edits.back()->MarkAtomicGroup(--remaining); - // Delete the first added 4 WALs. - edits.emplace_back(new VersionEdit); - edits.back()->DeleteWalsBefore(kNumWals); - edits.back()->MarkAtomicGroup(--remaining); - ASSERT_EQ(remaining, 0); - - ASSERT_OK(LogAndApplyToDefaultCF(edits)); - - // Recover a new VersionSet, the min log number and the last WAL should be - // kept. - { - std::unique_ptr new_versions( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); - std::string db_id; - ASSERT_OK( - new_versions->Recover(column_families_, /*read_only=*/false, &db_id)); - - ASSERT_EQ(db_id, kDBId); - - const auto& wals = new_versions->GetWalSet().GetWals(); - ASSERT_EQ(wals.size(), 1); - ASSERT_TRUE(wals.find(kNumWals) != wals.end()); - ASSERT_TRUE(wals.at(kNumWals).HasSyncedSize()); - ASSERT_EQ(wals.at(kNumWals).GetSyncedSizeInBytes(), kNumWals); - } -} - -TEST_F(VersionStorageInfoTest, AddRangeDeletionCompensatedFileSize) { - // Tests that compensated range deletion size is added to compensated file - // size. - Add(4, 100U, "1", "2", 100U, kInvalidBlobFileNumber, 1000U); - - UpdateVersionStorageInfo(); - - auto meta = vstorage_.GetFileMetaDataByNumber(100U); - ASSERT_EQ(meta->compensated_file_size, 100U + 1000U); -} - -class VersionSetWithTimestampTest : public VersionSetTest { - public: - static const std::string kNewCfName; - - explicit VersionSetWithTimestampTest() : VersionSetTest() {} - - void SetUp() override { - NewDB(); - Options options; - options.comparator = test::BytewiseComparatorWithU64TsWrapper(); - cfd_ = CreateColumnFamily(kNewCfName, options); - EXPECT_NE(nullptr, cfd_); - EXPECT_NE(nullptr, cfd_->GetLatestMutableCFOptions()); - column_families_.emplace_back(kNewCfName, options); - } - - void TearDown() override { - for (auto* e : edits_) { - delete e; - } - edits_.clear(); - } - - void GenVersionEditsToSetFullHistoryTsLow( - const std::vector& ts_lbs) { - for (const auto ts_lb : ts_lbs) { - VersionEdit* edit = new VersionEdit; - edit->SetColumnFamily(cfd_->GetID()); - std::string ts_str = test::EncodeInt(ts_lb); - edit->SetFullHistoryTsLow(ts_str); - edits_.emplace_back(edit); - } - } - - void VerifyFullHistoryTsLow(uint64_t expected_ts_low) { - std::unique_ptr vset( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); - ASSERT_OK(vset->Recover(column_families_, /*read_only=*/false, - /*db_id=*/nullptr)); - for (auto* cfd : *(vset->GetColumnFamilySet())) { - ASSERT_NE(nullptr, cfd); - if (cfd->GetName() == kNewCfName) { - ASSERT_EQ(test::EncodeInt(expected_ts_low), cfd->GetFullHistoryTsLow()); - } else { - ASSERT_TRUE(cfd->GetFullHistoryTsLow().empty()); - } - } - } - - void DoTest(const std::vector& ts_lbs) { - if (ts_lbs.empty()) { - return; - } - - GenVersionEditsToSetFullHistoryTsLow(ts_lbs); - - Status s; - mutex_.Lock(); - s = versions_->LogAndApply(cfd_, *(cfd_->GetLatestMutableCFOptions()), - edits_, &mutex_, nullptr); - mutex_.Unlock(); - ASSERT_OK(s); - VerifyFullHistoryTsLow(*std::max_element(ts_lbs.begin(), ts_lbs.end())); - } - - protected: - ColumnFamilyData* cfd_{nullptr}; - // edits_ must contain and own pointers to heap-alloc VersionEdit objects. - autovector edits_; -}; - -const std::string VersionSetWithTimestampTest::kNewCfName("new_cf"); - -TEST_F(VersionSetWithTimestampTest, SetFullHistoryTsLbOnce) { - constexpr uint64_t kTsLow = 100; - DoTest({kTsLow}); -} - -// Simulate the application increasing full_history_ts_low. -TEST_F(VersionSetWithTimestampTest, IncreaseFullHistoryTsLb) { - const std::vector ts_lbs = {100, 101, 102, 103}; - DoTest(ts_lbs); -} - -// Simulate the application trying to decrease full_history_ts_low -// unsuccessfully. If the application calls public API sequentially to -// decrease the lower bound ts, RocksDB will return an InvalidArgument -// status before involving VersionSet. Only when multiple threads trying -// to decrease the lower bound concurrently will this case ever happen. Even -// so, the lower bound cannot be decreased. The application will be notified -// via return value of the API. -TEST_F(VersionSetWithTimestampTest, TryDecreaseFullHistoryTsLb) { - const std::vector ts_lbs = {103, 102, 101, 100}; - DoTest(ts_lbs); -} - -class VersionSetAtomicGroupTest : public VersionSetTestBase, - public testing::Test { - public: - VersionSetAtomicGroupTest() - : VersionSetTestBase("version_set_atomic_group_test") {} - - void SetUp() override { - PrepareManifest(&column_families_, &last_seqno_, &log_writer_); - SetupTestSyncPoints(); - } - - void SetupValidAtomicGroup(int atomic_group_size) { - edits_.resize(atomic_group_size); - int remaining = atomic_group_size; - for (size_t i = 0; i != edits_.size(); ++i) { - edits_[i].SetLogNumber(0); - edits_[i].SetNextFile(2); - edits_[i].MarkAtomicGroup(--remaining); - edits_[i].SetLastSequence(last_seqno_++); - } - ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr)); - } - - void SetupIncompleteTrailingAtomicGroup(int atomic_group_size) { - edits_.resize(atomic_group_size); - int remaining = atomic_group_size; - for (size_t i = 0; i != edits_.size(); ++i) { - edits_[i].SetLogNumber(0); - edits_[i].SetNextFile(2); - edits_[i].MarkAtomicGroup(--remaining); - edits_[i].SetLastSequence(last_seqno_++); - } - ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr)); - } - - void SetupCorruptedAtomicGroup(int atomic_group_size) { - edits_.resize(atomic_group_size); - int remaining = atomic_group_size; - for (size_t i = 0; i != edits_.size(); ++i) { - edits_[i].SetLogNumber(0); - edits_[i].SetNextFile(2); - if (i != ((size_t)atomic_group_size / 2)) { - edits_[i].MarkAtomicGroup(--remaining); - } - edits_[i].SetLastSequence(last_seqno_++); - } - ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr)); - } - - void SetupIncorrectAtomicGroup(int atomic_group_size) { - edits_.resize(atomic_group_size); - int remaining = atomic_group_size; - for (size_t i = 0; i != edits_.size(); ++i) { - edits_[i].SetLogNumber(0); - edits_[i].SetNextFile(2); - if (i != 1) { - edits_[i].MarkAtomicGroup(--remaining); - } else { - edits_[i].MarkAtomicGroup(remaining--); - } - edits_[i].SetLastSequence(last_seqno_++); - } - ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr)); - } - - void SetupTestSyncPoints() { - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "AtomicGroupReadBuffer::AddEdit:FirstInAtomicGroup", [&](void* arg) { - VersionEdit* e = reinterpret_cast(arg); - EXPECT_EQ(edits_.front().DebugString(), - e->DebugString()); // compare based on value - first_in_atomic_group_ = true; - }); - SyncPoint::GetInstance()->SetCallBack( - "AtomicGroupReadBuffer::AddEdit:LastInAtomicGroup", [&](void* arg) { - VersionEdit* e = reinterpret_cast(arg); - EXPECT_EQ(edits_.back().DebugString(), - e->DebugString()); // compare based on value - EXPECT_TRUE(first_in_atomic_group_); - last_in_atomic_group_ = true; - }); - SyncPoint::GetInstance()->SetCallBack( - "VersionEditHandlerBase::Iterate:Finish", [&](void* arg) { - num_recovered_edits_ = *reinterpret_cast(arg); - }); - SyncPoint::GetInstance()->SetCallBack( - "AtomicGroupReadBuffer::AddEdit:AtomicGroup", - [&](void* /* arg */) { ++num_edits_in_atomic_group_; }); - SyncPoint::GetInstance()->SetCallBack( - "AtomicGroupReadBuffer::AddEdit:AtomicGroupMixedWithNormalEdits", - [&](void* arg) { - corrupted_edit_ = *reinterpret_cast(arg); - }); - SyncPoint::GetInstance()->SetCallBack( - "AtomicGroupReadBuffer::AddEdit:IncorrectAtomicGroupSize", - [&](void* arg) { - edit_with_incorrect_group_size_ = - *reinterpret_cast(arg); - }); - SyncPoint::GetInstance()->EnableProcessing(); - } - - void AddNewEditsToLog(int num_edits) { - for (int i = 0; i < num_edits; i++) { - std::string record; - edits_[i].EncodeTo(&record); - ASSERT_OK(log_writer_->AddRecord(record)); - } - } - - void TearDown() override { - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - log_writer_.reset(); - } - - protected: - std::vector column_families_; - SequenceNumber last_seqno_; - std::vector edits_; - bool first_in_atomic_group_ = false; - bool last_in_atomic_group_ = false; - int num_edits_in_atomic_group_ = 0; - size_t num_recovered_edits_ = 0; - VersionEdit corrupted_edit_; - VersionEdit edit_with_incorrect_group_size_; - std::unique_ptr log_writer_; -}; - -TEST_F(VersionSetAtomicGroupTest, HandleValidAtomicGroupWithVersionSetRecover) { - const int kAtomicGroupSize = 3; - SetupValidAtomicGroup(kAtomicGroupSize); - AddNewEditsToLog(kAtomicGroupSize); - EXPECT_OK(versions_->Recover(column_families_, false)); - EXPECT_EQ(column_families_.size(), - versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); - EXPECT_TRUE(first_in_atomic_group_); - EXPECT_TRUE(last_in_atomic_group_); - EXPECT_EQ(num_initial_edits_ + kAtomicGroupSize, num_recovered_edits_); -} - -TEST_F(VersionSetAtomicGroupTest, - HandleValidAtomicGroupWithReactiveVersionSetRecover) { - const int kAtomicGroupSize = 3; - SetupValidAtomicGroup(kAtomicGroupSize); - AddNewEditsToLog(kAtomicGroupSize); - std::unique_ptr manifest_reader; - std::unique_ptr manifest_reporter; - std::unique_ptr manifest_reader_status; - EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader, - &manifest_reporter, - &manifest_reader_status)); - EXPECT_EQ(column_families_.size(), - reactive_versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); - EXPECT_TRUE(first_in_atomic_group_); - EXPECT_TRUE(last_in_atomic_group_); - // The recover should clean up the replay buffer. - EXPECT_TRUE(reactive_versions_->TEST_read_edits_in_atomic_group() == 0); - EXPECT_TRUE(reactive_versions_->replay_buffer().size() == 0); - EXPECT_EQ(num_initial_edits_ + kAtomicGroupSize, num_recovered_edits_); -} - -TEST_F(VersionSetAtomicGroupTest, - HandleValidAtomicGroupWithReactiveVersionSetReadAndApply) { - const int kAtomicGroupSize = 3; - SetupValidAtomicGroup(kAtomicGroupSize); - std::unique_ptr manifest_reader; - std::unique_ptr manifest_reporter; - std::unique_ptr manifest_reader_status; - EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader, - &manifest_reporter, - &manifest_reader_status)); - EXPECT_EQ(num_initial_edits_, num_recovered_edits_); - AddNewEditsToLog(kAtomicGroupSize); - InstrumentedMutex mu; - std::unordered_set cfds_changed; - mu.Lock(); - EXPECT_OK(reactive_versions_->ReadAndApply( - &mu, &manifest_reader, manifest_reader_status.get(), &cfds_changed)); - mu.Unlock(); - EXPECT_TRUE(first_in_atomic_group_); - EXPECT_TRUE(last_in_atomic_group_); - // The recover should clean up the replay buffer. - EXPECT_TRUE(reactive_versions_->TEST_read_edits_in_atomic_group() == 0); - EXPECT_TRUE(reactive_versions_->replay_buffer().size() == 0); - EXPECT_EQ(kAtomicGroupSize, num_recovered_edits_); -} - -TEST_F(VersionSetAtomicGroupTest, - HandleIncompleteTrailingAtomicGroupWithVersionSetRecover) { - const int kAtomicGroupSize = 4; - const int kNumberOfPersistedVersionEdits = kAtomicGroupSize - 1; - SetupIncompleteTrailingAtomicGroup(kAtomicGroupSize); - AddNewEditsToLog(kNumberOfPersistedVersionEdits); - EXPECT_OK(versions_->Recover(column_families_, false)); - EXPECT_EQ(column_families_.size(), - versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); - EXPECT_TRUE(first_in_atomic_group_); - EXPECT_FALSE(last_in_atomic_group_); - EXPECT_EQ(kNumberOfPersistedVersionEdits, num_edits_in_atomic_group_); - EXPECT_EQ(num_initial_edits_, num_recovered_edits_); -} - -TEST_F(VersionSetAtomicGroupTest, - HandleIncompleteTrailingAtomicGroupWithReactiveVersionSetRecover) { - const int kAtomicGroupSize = 4; - const int kNumberOfPersistedVersionEdits = kAtomicGroupSize - 1; - SetupIncompleteTrailingAtomicGroup(kAtomicGroupSize); - AddNewEditsToLog(kNumberOfPersistedVersionEdits); - std::unique_ptr manifest_reader; - std::unique_ptr manifest_reporter; - std::unique_ptr manifest_reader_status; - EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader, - &manifest_reporter, - &manifest_reader_status)); - EXPECT_EQ(column_families_.size(), - reactive_versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); - EXPECT_TRUE(first_in_atomic_group_); - EXPECT_FALSE(last_in_atomic_group_); - EXPECT_EQ(kNumberOfPersistedVersionEdits, num_edits_in_atomic_group_); - // Reactive version set should store the edits in the replay buffer. - EXPECT_TRUE(reactive_versions_->TEST_read_edits_in_atomic_group() == - kNumberOfPersistedVersionEdits); - EXPECT_TRUE(reactive_versions_->replay_buffer().size() == kAtomicGroupSize); - // Write the last record. The reactive version set should now apply all - // edits. - std::string last_record; - edits_[kAtomicGroupSize - 1].EncodeTo(&last_record); - EXPECT_OK(log_writer_->AddRecord(last_record)); - InstrumentedMutex mu; - std::unordered_set cfds_changed; - mu.Lock(); - EXPECT_OK(reactive_versions_->ReadAndApply( - &mu, &manifest_reader, manifest_reader_status.get(), &cfds_changed)); - mu.Unlock(); - // Reactive version set should be empty now. - EXPECT_TRUE(reactive_versions_->TEST_read_edits_in_atomic_group() == 0); - EXPECT_TRUE(reactive_versions_->replay_buffer().size() == 0); - EXPECT_EQ(num_initial_edits_, num_recovered_edits_); -} - -TEST_F(VersionSetAtomicGroupTest, - HandleIncompleteTrailingAtomicGroupWithReactiveVersionSetReadAndApply) { - const int kAtomicGroupSize = 4; - const int kNumberOfPersistedVersionEdits = kAtomicGroupSize - 1; - SetupIncompleteTrailingAtomicGroup(kAtomicGroupSize); - std::unique_ptr manifest_reader; - std::unique_ptr manifest_reporter; - std::unique_ptr manifest_reader_status; - // No edits in an atomic group. - EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader, - &manifest_reporter, - &manifest_reader_status)); - EXPECT_EQ(column_families_.size(), - reactive_versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); - EXPECT_EQ(num_initial_edits_, num_recovered_edits_); - // Write a few edits in an atomic group. - AddNewEditsToLog(kNumberOfPersistedVersionEdits); - InstrumentedMutex mu; - std::unordered_set cfds_changed; - mu.Lock(); - EXPECT_OK(reactive_versions_->ReadAndApply( - &mu, &manifest_reader, manifest_reader_status.get(), &cfds_changed)); - mu.Unlock(); - EXPECT_TRUE(first_in_atomic_group_); - EXPECT_FALSE(last_in_atomic_group_); - EXPECT_EQ(kNumberOfPersistedVersionEdits, num_edits_in_atomic_group_); - // Reactive version set should store the edits in the replay buffer. - EXPECT_TRUE(reactive_versions_->TEST_read_edits_in_atomic_group() == - kNumberOfPersistedVersionEdits); - EXPECT_TRUE(reactive_versions_->replay_buffer().size() == kAtomicGroupSize); -} - -TEST_F(VersionSetAtomicGroupTest, - HandleCorruptedAtomicGroupWithVersionSetRecover) { - const int kAtomicGroupSize = 4; - SetupCorruptedAtomicGroup(kAtomicGroupSize); - AddNewEditsToLog(kAtomicGroupSize); - EXPECT_NOK(versions_->Recover(column_families_, false)); - EXPECT_EQ(column_families_.size(), - versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); - EXPECT_EQ(edits_[kAtomicGroupSize / 2].DebugString(), - corrupted_edit_.DebugString()); -} - -TEST_F(VersionSetAtomicGroupTest, - HandleCorruptedAtomicGroupWithReactiveVersionSetRecover) { - const int kAtomicGroupSize = 4; - SetupCorruptedAtomicGroup(kAtomicGroupSize); - AddNewEditsToLog(kAtomicGroupSize); - std::unique_ptr manifest_reader; - std::unique_ptr manifest_reporter; - std::unique_ptr manifest_reader_status; - EXPECT_NOK(reactive_versions_->Recover(column_families_, &manifest_reader, - &manifest_reporter, - &manifest_reader_status)); - EXPECT_EQ(column_families_.size(), - reactive_versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); - EXPECT_EQ(edits_[kAtomicGroupSize / 2].DebugString(), - corrupted_edit_.DebugString()); -} - -TEST_F(VersionSetAtomicGroupTest, - HandleCorruptedAtomicGroupWithReactiveVersionSetReadAndApply) { - const int kAtomicGroupSize = 4; - SetupCorruptedAtomicGroup(kAtomicGroupSize); - InstrumentedMutex mu; - std::unordered_set cfds_changed; - std::unique_ptr manifest_reader; - std::unique_ptr manifest_reporter; - std::unique_ptr manifest_reader_status; - EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader, - &manifest_reporter, - &manifest_reader_status)); - // Write the corrupted edits. - AddNewEditsToLog(kAtomicGroupSize); - mu.Lock(); - EXPECT_NOK(reactive_versions_->ReadAndApply( - &mu, &manifest_reader, manifest_reader_status.get(), &cfds_changed)); - mu.Unlock(); - EXPECT_EQ(edits_[kAtomicGroupSize / 2].DebugString(), - corrupted_edit_.DebugString()); -} - -TEST_F(VersionSetAtomicGroupTest, - HandleIncorrectAtomicGroupSizeWithVersionSetRecover) { - const int kAtomicGroupSize = 4; - SetupIncorrectAtomicGroup(kAtomicGroupSize); - AddNewEditsToLog(kAtomicGroupSize); - EXPECT_NOK(versions_->Recover(column_families_, false)); - EXPECT_EQ(column_families_.size(), - versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); - EXPECT_EQ(edits_[1].DebugString(), - edit_with_incorrect_group_size_.DebugString()); -} - -TEST_F(VersionSetAtomicGroupTest, - HandleIncorrectAtomicGroupSizeWithReactiveVersionSetRecover) { - const int kAtomicGroupSize = 4; - SetupIncorrectAtomicGroup(kAtomicGroupSize); - AddNewEditsToLog(kAtomicGroupSize); - std::unique_ptr manifest_reader; - std::unique_ptr manifest_reporter; - std::unique_ptr manifest_reader_status; - EXPECT_NOK(reactive_versions_->Recover(column_families_, &manifest_reader, - &manifest_reporter, - &manifest_reader_status)); - EXPECT_EQ(column_families_.size(), - reactive_versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); - EXPECT_EQ(edits_[1].DebugString(), - edit_with_incorrect_group_size_.DebugString()); -} - -TEST_F(VersionSetAtomicGroupTest, - HandleIncorrectAtomicGroupSizeWithReactiveVersionSetReadAndApply) { - const int kAtomicGroupSize = 4; - SetupIncorrectAtomicGroup(kAtomicGroupSize); - InstrumentedMutex mu; - std::unordered_set cfds_changed; - std::unique_ptr manifest_reader; - std::unique_ptr manifest_reporter; - std::unique_ptr manifest_reader_status; - EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader, - &manifest_reporter, - &manifest_reader_status)); - AddNewEditsToLog(kAtomicGroupSize); - mu.Lock(); - EXPECT_NOK(reactive_versions_->ReadAndApply( - &mu, &manifest_reader, manifest_reader_status.get(), &cfds_changed)); - mu.Unlock(); - EXPECT_EQ(edits_[1].DebugString(), - edit_with_incorrect_group_size_.DebugString()); -} - -class VersionSetTestDropOneCF : public VersionSetTestBase, - public testing::TestWithParam { - public: - VersionSetTestDropOneCF() - : VersionSetTestBase("version_set_test_drop_one_cf") {} -}; - -// This test simulates the following execution sequence -// Time thread1 bg_flush_thr -// | Prepare version edits (e1,e2,e3) for atomic -// | flush cf1, cf2, cf3 -// | Enqueue e to drop cfi -// | to manifest_writers_ -// | Enqueue (e1,e2,e3) to manifest_writers_ -// | -// | Apply e, -// | cfi.IsDropped() is true -// | Apply (e1,e2,e3), -// | since cfi.IsDropped() == true, we need to -// | drop ei and write the rest to MANIFEST. -// V -// -// Repeat the test for i = 1, 2, 3 to simulate dropping the first, middle and -// last column family in an atomic group. -TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) { - std::vector column_families; - SequenceNumber last_seqno; - std::unique_ptr log_writer; - PrepareManifest(&column_families, &last_seqno, &log_writer); - Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); - ASSERT_OK(s); - - EXPECT_OK(versions_->Recover(column_families, false /* read_only */)); - EXPECT_EQ(column_families.size(), - versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); - - const int kAtomicGroupSize = 3; - const std::vector non_default_cf_names = { - kColumnFamilyName1, kColumnFamilyName2, kColumnFamilyName3}; - - // Drop one column family - VersionEdit drop_cf_edit; - drop_cf_edit.DropColumnFamily(); - const std::string cf_to_drop_name(GetParam()); - auto cfd_to_drop = - versions_->GetColumnFamilySet()->GetColumnFamily(cf_to_drop_name); - ASSERT_NE(nullptr, cfd_to_drop); - // Increase its refcount because cfd_to_drop is used later, and we need to - // prevent it from being deleted. - cfd_to_drop->Ref(); - drop_cf_edit.SetColumnFamily(cfd_to_drop->GetID()); - mutex_.Lock(); - s = versions_->LogAndApply(cfd_to_drop, - *cfd_to_drop->GetLatestMutableCFOptions(), - &drop_cf_edit, &mutex_, nullptr); - mutex_.Unlock(); - ASSERT_OK(s); - - std::vector edits(kAtomicGroupSize); - uint32_t remaining = kAtomicGroupSize; - size_t i = 0; - autovector cfds; - autovector mutable_cf_options_list; - autovector> edit_lists; - for (const auto& cf_name : non_default_cf_names) { - auto cfd = (cf_name != cf_to_drop_name) - ? versions_->GetColumnFamilySet()->GetColumnFamily(cf_name) - : cfd_to_drop; - ASSERT_NE(nullptr, cfd); - cfds.push_back(cfd); - mutable_cf_options_list.emplace_back(cfd->GetLatestMutableCFOptions()); - edits[i].SetColumnFamily(cfd->GetID()); - edits[i].SetLogNumber(0); - edits[i].SetNextFile(2); - edits[i].MarkAtomicGroup(--remaining); - edits[i].SetLastSequence(last_seqno++); - autovector tmp_edits; - tmp_edits.push_back(&edits[i]); - edit_lists.emplace_back(tmp_edits); - ++i; - } - int called = 0; - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->SetCallBack( - "VersionSet::ProcessManifestWrites:CheckOneAtomicGroup", [&](void* arg) { - std::vector* tmp_edits = - reinterpret_cast*>(arg); - EXPECT_EQ(kAtomicGroupSize - 1, tmp_edits->size()); - for (const auto e : *tmp_edits) { - bool found = false; - for (const auto& e2 : edits) { - if (&e2 == e) { - found = true; - break; - } - } - ASSERT_TRUE(found); - } - ++called; - }); - SyncPoint::GetInstance()->EnableProcessing(); - mutex_.Lock(); - s = versions_->LogAndApply(cfds, mutable_cf_options_list, edit_lists, &mutex_, - nullptr); - mutex_.Unlock(); - ASSERT_OK(s); - ASSERT_EQ(1, called); - cfd_to_drop->UnrefAndTryDelete(); -} - -INSTANTIATE_TEST_CASE_P( - AtomicGroup, VersionSetTestDropOneCF, - testing::Values(VersionSetTestBase::kColumnFamilyName1, - VersionSetTestBase::kColumnFamilyName2, - VersionSetTestBase::kColumnFamilyName3)); - -class EmptyDefaultCfNewManifest : public VersionSetTestBase, - public testing::Test { - public: - EmptyDefaultCfNewManifest() : VersionSetTestBase("version_set_new_db_test") {} - // Emulate DBImpl::NewDB() - void PrepareManifest(std::vector* /*column_families*/, - SequenceNumber* /*last_seqno*/, - std::unique_ptr* log_writer) override { - assert(log_writer != nullptr); - VersionEdit new_db; - new_db.SetLogNumber(0); - const std::string manifest_path = DescriptorFileName(dbname_, 1); - const auto& fs = env_->GetFileSystem(); - std::unique_ptr file_writer; - Status s = WritableFileWriter::Create( - fs, manifest_path, fs->OptimizeForManifestWrite(env_options_), - &file_writer, nullptr); - ASSERT_OK(s); - log_writer->reset(new log::Writer(std::move(file_writer), 0, true)); - std::string record; - ASSERT_TRUE(new_db.EncodeTo(&record)); - s = (*log_writer)->AddRecord(record); - ASSERT_OK(s); - // Create new column family - VersionEdit new_cf; - new_cf.AddColumnFamily(VersionSetTestBase::kColumnFamilyName1); - new_cf.SetColumnFamily(1); - new_cf.SetLastSequence(2); - new_cf.SetNextFile(2); - record.clear(); - ASSERT_TRUE(new_cf.EncodeTo(&record)); - s = (*log_writer)->AddRecord(record); - ASSERT_OK(s); - } - - protected: - bool write_dbid_to_manifest_ = false; - std::unique_ptr log_writer_; -}; - -// Create db, create column family. Cf creation will switch to a new MANIFEST. -// Then reopen db, trying to recover. -TEST_F(EmptyDefaultCfNewManifest, Recover) { - PrepareManifest(nullptr, nullptr, &log_writer_); - log_writer_.reset(); - Status s = - SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); - ASSERT_OK(s); - std::string manifest_path; - VerifyManifest(&manifest_path); - std::vector column_families; - column_families.emplace_back(kDefaultColumnFamilyName, cf_options_); - column_families.emplace_back(VersionSetTestBase::kColumnFamilyName1, - cf_options_); - std::string db_id; - bool has_missing_table_file = false; - s = versions_->TryRecoverFromOneManifest( - manifest_path, column_families, false, &db_id, &has_missing_table_file); - ASSERT_OK(s); - ASSERT_FALSE(has_missing_table_file); -} - -class VersionSetTestEmptyDb - : public VersionSetTestBase, - public testing::TestWithParam< - std::tuple>> { - public: - static const std::string kUnknownColumnFamilyName; - VersionSetTestEmptyDb() : VersionSetTestBase("version_set_test_empty_db") {} - - protected: - void PrepareManifest(std::vector* /*column_families*/, - SequenceNumber* /*last_seqno*/, - std::unique_ptr* log_writer) override { - assert(nullptr != log_writer); - VersionEdit new_db; - if (db_options_.write_dbid_to_manifest) { - DBOptions tmp_db_options; - tmp_db_options.env = env_; - std::unique_ptr impl(new DBImpl(tmp_db_options, dbname_)); - std::string db_id; - impl->GetDbIdentityFromIdentityFile(&db_id); - new_db.SetDBId(db_id); - } - const std::string manifest_path = DescriptorFileName(dbname_, 1); - const auto& fs = env_->GetFileSystem(); - std::unique_ptr file_writer; - Status s = WritableFileWriter::Create( - fs, manifest_path, fs->OptimizeForManifestWrite(env_options_), - &file_writer, nullptr); - ASSERT_OK(s); - { - log_writer->reset(new log::Writer(std::move(file_writer), 0, false)); - std::string record; - new_db.EncodeTo(&record); - s = (*log_writer)->AddRecord(record); - ASSERT_OK(s); - } - } - - std::unique_ptr log_writer_; -}; - -const std::string VersionSetTestEmptyDb::kUnknownColumnFamilyName = "unknown"; - -TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest0) { - db_options_.write_dbid_to_manifest = std::get<0>(GetParam()); - PrepareManifest(nullptr, nullptr, &log_writer_); - log_writer_.reset(); - Status s = - SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); - ASSERT_OK(s); - - std::string manifest_path; - VerifyManifest(&manifest_path); - - bool read_only = std::get<1>(GetParam()); - const std::vector cf_names = std::get<2>(GetParam()); - - std::vector column_families; - for (const auto& cf_name : cf_names) { - column_families.emplace_back(cf_name, cf_options_); - } - - std::string db_id; - bool has_missing_table_file = false; - s = versions_->TryRecoverFromOneManifest(manifest_path, column_families, - read_only, &db_id, - &has_missing_table_file); - auto iter = - std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName); - if (iter == cf_names.end()) { - ASSERT_TRUE(s.IsInvalidArgument()); - } else { - ASSERT_NE(s.ToString().find(manifest_path), std::string::npos); - ASSERT_TRUE(s.IsCorruption()); - } -} - -TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest1) { - db_options_.write_dbid_to_manifest = std::get<0>(GetParam()); - PrepareManifest(nullptr, nullptr, &log_writer_); - // Only a subset of column families in the MANIFEST. - VersionEdit new_cf1; - new_cf1.AddColumnFamily(VersionSetTestBase::kColumnFamilyName1); - new_cf1.SetColumnFamily(1); - Status s; - { - std::string record; - new_cf1.EncodeTo(&record); - s = log_writer_->AddRecord(record); - ASSERT_OK(s); - } - log_writer_.reset(); - s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); - ASSERT_OK(s); - - std::string manifest_path; - VerifyManifest(&manifest_path); - - bool read_only = std::get<1>(GetParam()); - const std::vector& cf_names = std::get<2>(GetParam()); - std::vector column_families; - for (const auto& cf_name : cf_names) { - column_families.emplace_back(cf_name, cf_options_); - } - std::string db_id; - bool has_missing_table_file = false; - s = versions_->TryRecoverFromOneManifest(manifest_path, column_families, - read_only, &db_id, - &has_missing_table_file); - auto iter = - std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName); - if (iter == cf_names.end()) { - ASSERT_TRUE(s.IsInvalidArgument()); - } else { - ASSERT_NE(s.ToString().find(manifest_path), std::string::npos); - ASSERT_TRUE(s.IsCorruption()); - } -} - -TEST_P(VersionSetTestEmptyDb, OpenFromInCompleteManifest2) { - db_options_.write_dbid_to_manifest = std::get<0>(GetParam()); - PrepareManifest(nullptr, nullptr, &log_writer_); - // Write all column families but no log_number, next_file_number and - // last_sequence. - const std::vector all_cf_names = { - kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2, - kColumnFamilyName3}; - uint32_t cf_id = 1; - Status s; - for (size_t i = 1; i != all_cf_names.size(); ++i) { - VersionEdit new_cf; - new_cf.AddColumnFamily(all_cf_names[i]); - new_cf.SetColumnFamily(cf_id++); - std::string record; - ASSERT_TRUE(new_cf.EncodeTo(&record)); - s = log_writer_->AddRecord(record); - ASSERT_OK(s); - } - log_writer_.reset(); - s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); - ASSERT_OK(s); - - std::string manifest_path; - VerifyManifest(&manifest_path); - - bool read_only = std::get<1>(GetParam()); - const std::vector& cf_names = std::get<2>(GetParam()); - std::vector column_families; - for (const auto& cf_name : cf_names) { - column_families.emplace_back(cf_name, cf_options_); - } - std::string db_id; - bool has_missing_table_file = false; - s = versions_->TryRecoverFromOneManifest(manifest_path, column_families, - read_only, &db_id, - &has_missing_table_file); - auto iter = - std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName); - if (iter == cf_names.end()) { - ASSERT_TRUE(s.IsInvalidArgument()); - } else { - ASSERT_NE(s.ToString().find(manifest_path), std::string::npos); - ASSERT_TRUE(s.IsCorruption()); - } -} - -TEST_P(VersionSetTestEmptyDb, OpenManifestWithUnknownCF) { - db_options_.write_dbid_to_manifest = std::get<0>(GetParam()); - PrepareManifest(nullptr, nullptr, &log_writer_); - // Write all column families but no log_number, next_file_number and - // last_sequence. - const std::vector all_cf_names = { - kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2, - kColumnFamilyName3}; - uint32_t cf_id = 1; - Status s; - for (size_t i = 1; i != all_cf_names.size(); ++i) { - VersionEdit new_cf; - new_cf.AddColumnFamily(all_cf_names[i]); - new_cf.SetColumnFamily(cf_id++); - std::string record; - ASSERT_TRUE(new_cf.EncodeTo(&record)); - s = log_writer_->AddRecord(record); - ASSERT_OK(s); - } - { - VersionEdit tmp_edit; - tmp_edit.SetColumnFamily(4); - tmp_edit.SetLogNumber(0); - tmp_edit.SetNextFile(2); - tmp_edit.SetLastSequence(0); - std::string record; - ASSERT_TRUE(tmp_edit.EncodeTo(&record)); - s = log_writer_->AddRecord(record); - ASSERT_OK(s); - } - log_writer_.reset(); - s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); - ASSERT_OK(s); - - std::string manifest_path; - VerifyManifest(&manifest_path); - - bool read_only = std::get<1>(GetParam()); - const std::vector& cf_names = std::get<2>(GetParam()); - std::vector column_families; - for (const auto& cf_name : cf_names) { - column_families.emplace_back(cf_name, cf_options_); - } - std::string db_id; - bool has_missing_table_file = false; - s = versions_->TryRecoverFromOneManifest(manifest_path, column_families, - read_only, &db_id, - &has_missing_table_file); - auto iter = - std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName); - if (iter == cf_names.end()) { - ASSERT_TRUE(s.IsInvalidArgument()); - } else { - ASSERT_NE(s.ToString().find(manifest_path), std::string::npos); - ASSERT_TRUE(s.IsCorruption()); - } -} - -TEST_P(VersionSetTestEmptyDb, OpenCompleteManifest) { - db_options_.write_dbid_to_manifest = std::get<0>(GetParam()); - PrepareManifest(nullptr, nullptr, &log_writer_); - // Write all column families but no log_number, next_file_number and - // last_sequence. - const std::vector all_cf_names = { - kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2, - kColumnFamilyName3}; - uint32_t cf_id = 1; - Status s; - for (size_t i = 1; i != all_cf_names.size(); ++i) { - VersionEdit new_cf; - new_cf.AddColumnFamily(all_cf_names[i]); - new_cf.SetColumnFamily(cf_id++); - std::string record; - ASSERT_TRUE(new_cf.EncodeTo(&record)); - s = log_writer_->AddRecord(record); - ASSERT_OK(s); - } - { - VersionEdit tmp_edit; - tmp_edit.SetLogNumber(0); - tmp_edit.SetNextFile(2); - tmp_edit.SetLastSequence(0); - std::string record; - ASSERT_TRUE(tmp_edit.EncodeTo(&record)); - s = log_writer_->AddRecord(record); - ASSERT_OK(s); - } - log_writer_.reset(); - s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); - ASSERT_OK(s); - - std::string manifest_path; - VerifyManifest(&manifest_path); - - bool read_only = std::get<1>(GetParam()); - const std::vector& cf_names = std::get<2>(GetParam()); - std::vector column_families; - for (const auto& cf_name : cf_names) { - column_families.emplace_back(cf_name, cf_options_); - } - std::string db_id; - bool has_missing_table_file = false; - s = versions_->TryRecoverFromOneManifest(manifest_path, column_families, - read_only, &db_id, - &has_missing_table_file); - auto iter = - std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName); - if (iter == cf_names.end()) { - ASSERT_TRUE(s.IsInvalidArgument()); - } else if (read_only) { - ASSERT_OK(s); - ASSERT_FALSE(has_missing_table_file); - } else if (cf_names.size() == all_cf_names.size()) { - ASSERT_OK(s); - ASSERT_FALSE(has_missing_table_file); - } else if (cf_names.size() < all_cf_names.size()) { - ASSERT_TRUE(s.IsInvalidArgument()); - } else { - ASSERT_OK(s); - ASSERT_FALSE(has_missing_table_file); - ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetColumnFamily( - kUnknownColumnFamilyName); - ASSERT_EQ(nullptr, cfd); - } -} - -INSTANTIATE_TEST_CASE_P( - BestEffortRecovery, VersionSetTestEmptyDb, - testing::Combine( - /*write_dbid_to_manifest=*/testing::Bool(), - /*read_only=*/testing::Bool(), - /*cf_names=*/ - testing::Values( - std::vector(), - std::vector({kDefaultColumnFamilyName}), - std::vector({VersionSetTestBase::kColumnFamilyName1, - VersionSetTestBase::kColumnFamilyName2, - VersionSetTestBase::kColumnFamilyName3}), - std::vector({kDefaultColumnFamilyName, - VersionSetTestBase::kColumnFamilyName1}), - std::vector({kDefaultColumnFamilyName, - VersionSetTestBase::kColumnFamilyName1, - VersionSetTestBase::kColumnFamilyName2, - VersionSetTestBase::kColumnFamilyName3}), - std::vector( - {kDefaultColumnFamilyName, - VersionSetTestBase::kColumnFamilyName1, - VersionSetTestBase::kColumnFamilyName2, - VersionSetTestBase::kColumnFamilyName3, - VersionSetTestEmptyDb::kUnknownColumnFamilyName})))); - -class VersionSetTestMissingFiles : public VersionSetTestBase, - public testing::Test { - public: - VersionSetTestMissingFiles() - : VersionSetTestBase("version_set_test_missing_files"), - block_based_table_options_(), - table_factory_(std::make_shared( - block_based_table_options_)), - internal_comparator_( - std::make_shared(options_.comparator)) {} - - protected: - void PrepareManifest(std::vector* column_families, - SequenceNumber* last_seqno, - std::unique_ptr* log_writer) override { - assert(column_families != nullptr); - assert(last_seqno != nullptr); - assert(log_writer != nullptr); - const std::string manifest = DescriptorFileName(dbname_, 1); - const auto& fs = env_->GetFileSystem(); - std::unique_ptr file_writer; - Status s = WritableFileWriter::Create( - fs, manifest, fs->OptimizeForManifestWrite(env_options_), &file_writer, - nullptr); - ASSERT_OK(s); - log_writer->reset(new log::Writer(std::move(file_writer), 0, false)); - VersionEdit new_db; - if (db_options_.write_dbid_to_manifest) { - DBOptions tmp_db_options; - tmp_db_options.env = env_; - std::unique_ptr impl(new DBImpl(tmp_db_options, dbname_)); - std::string db_id; - impl->GetDbIdentityFromIdentityFile(&db_id); - new_db.SetDBId(db_id); - } - { - std::string record; - ASSERT_TRUE(new_db.EncodeTo(&record)); - s = (*log_writer)->AddRecord(record); - ASSERT_OK(s); - } - const std::vector cf_names = { - kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2, - kColumnFamilyName3}; - uint32_t cf_id = 1; // default cf id is 0 - cf_options_.table_factory = table_factory_; - for (const auto& cf_name : cf_names) { - column_families->emplace_back(cf_name, cf_options_); - if (cf_name == kDefaultColumnFamilyName) { - continue; - } - VersionEdit new_cf; - new_cf.AddColumnFamily(cf_name); - new_cf.SetColumnFamily(cf_id); - std::string record; - ASSERT_TRUE(new_cf.EncodeTo(&record)); - s = (*log_writer)->AddRecord(record); - ASSERT_OK(s); - - VersionEdit cf_files; - cf_files.SetColumnFamily(cf_id); - cf_files.SetLogNumber(0); - record.clear(); - ASSERT_TRUE(cf_files.EncodeTo(&record)); - s = (*log_writer)->AddRecord(record); - ASSERT_OK(s); - ++cf_id; - } - SequenceNumber seq = 2; - { - VersionEdit edit; - edit.SetNextFile(7); - edit.SetLastSequence(seq); - std::string record; - ASSERT_TRUE(edit.EncodeTo(&record)); - s = (*log_writer)->AddRecord(record); - ASSERT_OK(s); - } - *last_seqno = seq + 1; - } - - struct SstInfo { - uint64_t file_number; - std::string column_family; - std::string key; // the only key - int level = 0; - uint64_t epoch_number; - SstInfo(uint64_t file_num, const std::string& cf_name, - const std::string& _key, - uint64_t _epoch_number = kUnknownEpochNumber) - : SstInfo(file_num, cf_name, _key, 0, _epoch_number) {} - SstInfo(uint64_t file_num, const std::string& cf_name, - const std::string& _key, int lvl, - uint64_t _epoch_number = kUnknownEpochNumber) - : file_number(file_num), - column_family(cf_name), - key(_key), - level(lvl), - epoch_number(_epoch_number) {} - }; - - // Create dummy sst, return their metadata. Note that only file name and size - // are used. - void CreateDummyTableFiles(const std::vector& file_infos, - std::vector* file_metas) { - assert(file_metas != nullptr); - for (const auto& info : file_infos) { - uint64_t file_num = info.file_number; - std::string fname = MakeTableFileName(dbname_, file_num); - std::unique_ptr file; - Status s = fs_->NewWritableFile(fname, FileOptions(), &file, nullptr); - ASSERT_OK(s); - std::unique_ptr fwriter(new WritableFileWriter( - std::move(file), fname, FileOptions(), env_->GetSystemClock().get())); - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - - std::unique_ptr builder(table_factory_->NewTableBuilder( - TableBuilderOptions( - immutable_options_, mutable_cf_options_, *internal_comparator_, - &int_tbl_prop_collector_factories, kNoCompression, - CompressionOptions(), - TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, - info.column_family, info.level), - fwriter.get())); - InternalKey ikey(info.key, 0, ValueType::kTypeValue); - builder->Add(ikey.Encode(), "value"); - ASSERT_OK(builder->Finish()); - ASSERT_OK(fwriter->Flush()); - uint64_t file_size = 0; - s = fs_->GetFileSize(fname, IOOptions(), &file_size, nullptr); - ASSERT_OK(s); - ASSERT_NE(0, file_size); - file_metas->emplace_back(file_num, /*file_path_id=*/0, file_size, ikey, - ikey, 0, 0, false, Temperature::kUnknown, 0, 0, - 0, info.epoch_number, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, - 0); - } - } - - // This method updates last_sequence_. - void WriteFileAdditionAndDeletionToManifest( - uint32_t cf, const std::vector>& added_files, - const std::vector>& deleted_files) { - VersionEdit edit; - edit.SetColumnFamily(cf); - for (const auto& elem : added_files) { - int level = elem.first; - edit.AddFile(level, elem.second); - } - for (const auto& elem : deleted_files) { - int level = elem.first; - edit.DeleteFile(level, elem.second); - } - edit.SetLastSequence(last_seqno_); - ++last_seqno_; - assert(log_writer_.get() != nullptr); - std::string record; - ASSERT_TRUE(edit.EncodeTo(&record)); - Status s = log_writer_->AddRecord(record); - ASSERT_OK(s); - } - - BlockBasedTableOptions block_based_table_options_; - std::shared_ptr table_factory_; - std::shared_ptr internal_comparator_; - std::vector column_families_; - SequenceNumber last_seqno_; - std::unique_ptr log_writer_; -}; - -TEST_F(VersionSetTestMissingFiles, ManifestFarBehindSst) { - std::vector existing_files = { - SstInfo(100, kDefaultColumnFamilyName, "a", 100 /* epoch_number */), - SstInfo(102, kDefaultColumnFamilyName, "b", 102 /* epoch_number */), - SstInfo(103, kDefaultColumnFamilyName, "c", 103 /* epoch_number */), - SstInfo(107, kDefaultColumnFamilyName, "d", 107 /* epoch_number */), - SstInfo(110, kDefaultColumnFamilyName, "e", 110 /* epoch_number */)}; - std::vector file_metas; - CreateDummyTableFiles(existing_files, &file_metas); - - PrepareManifest(&column_families_, &last_seqno_, &log_writer_); - std::vector> added_files; - for (uint64_t file_num = 10; file_num < 15; ++file_num) { - std::string smallest_ukey = "a"; - std::string largest_ukey = "b"; - InternalKey smallest_ikey(smallest_ukey, 1, ValueType::kTypeValue); - InternalKey largest_ikey(largest_ukey, 1, ValueType::kTypeValue); - - FileMetaData meta = FileMetaData( - file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey, - largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0, - file_num /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - added_files.emplace_back(0, meta); - } - WriteFileAdditionAndDeletionToManifest( - /*cf=*/0, added_files, std::vector>()); - std::vector> deleted_files; - deleted_files.emplace_back(0, 10); - WriteFileAdditionAndDeletionToManifest( - /*cf=*/0, std::vector>(), deleted_files); - log_writer_.reset(); - Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); - ASSERT_OK(s); - std::string manifest_path; - VerifyManifest(&manifest_path); - std::string db_id; - bool has_missing_table_file = false; - s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_, - /*read_only=*/false, &db_id, - &has_missing_table_file); - ASSERT_OK(s); - ASSERT_TRUE(has_missing_table_file); - for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) { - VersionStorageInfo* vstorage = cfd->current()->storage_info(); - const std::vector& files = vstorage->LevelFiles(0); - ASSERT_TRUE(files.empty()); - } -} - -TEST_F(VersionSetTestMissingFiles, ManifestAheadofSst) { - std::vector existing_files = { - SstInfo(100, kDefaultColumnFamilyName, "a", 0 /* level */, - 100 /* epoch_number */), - SstInfo(102, kDefaultColumnFamilyName, "b", 0 /* level */, - 102 /* epoch_number */), - SstInfo(103, kDefaultColumnFamilyName, "c", 0 /* level */, - 103 /* epoch_number */), - SstInfo(107, kDefaultColumnFamilyName, "d", 0 /* level */, - 107 /* epoch_number */), - SstInfo(110, kDefaultColumnFamilyName, "e", 0 /* level */, - 110 /* epoch_number */)}; - std::vector file_metas; - CreateDummyTableFiles(existing_files, &file_metas); - - PrepareManifest(&column_families_, &last_seqno_, &log_writer_); - std::vector> added_files; - for (size_t i = 3; i != 5; ++i) { - added_files.emplace_back(0, file_metas[i]); - } - WriteFileAdditionAndDeletionToManifest( - /*cf=*/0, added_files, std::vector>()); - - added_files.clear(); - for (uint64_t file_num = 120; file_num < 130; ++file_num) { - std::string smallest_ukey = "a"; - std::string largest_ukey = "b"; - InternalKey smallest_ikey(smallest_ukey, 1, ValueType::kTypeValue); - InternalKey largest_ikey(largest_ukey, 1, ValueType::kTypeValue); - FileMetaData meta = FileMetaData( - file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey, - largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0, - file_num /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); - added_files.emplace_back(0, meta); - } - WriteFileAdditionAndDeletionToManifest( - /*cf=*/0, added_files, std::vector>()); - log_writer_.reset(); - Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); - ASSERT_OK(s); - std::string manifest_path; - VerifyManifest(&manifest_path); - std::string db_id; - bool has_missing_table_file = false; - s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_, - /*read_only=*/false, &db_id, - &has_missing_table_file); - ASSERT_OK(s); - ASSERT_TRUE(has_missing_table_file); - for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) { - VersionStorageInfo* vstorage = cfd->current()->storage_info(); - const std::vector& files = vstorage->LevelFiles(0); - if (cfd->GetName() == kDefaultColumnFamilyName) { - ASSERT_EQ(2, files.size()); - for (const auto* fmeta : files) { - if (fmeta->fd.GetNumber() != 107 && fmeta->fd.GetNumber() != 110) { - ASSERT_FALSE(true); - } - } - } else { - ASSERT_TRUE(files.empty()); - } - } -} - -TEST_F(VersionSetTestMissingFiles, NoFileMissing) { - std::vector existing_files = { - SstInfo(100, kDefaultColumnFamilyName, "a", 0 /* level */, - 100 /* epoch_number */), - SstInfo(102, kDefaultColumnFamilyName, "b", 0 /* level */, - 102 /* epoch_number */), - SstInfo(103, kDefaultColumnFamilyName, "c", 0 /* level */, - 103 /* epoch_number */), - SstInfo(107, kDefaultColumnFamilyName, "d", 0 /* level */, - 107 /* epoch_number */), - SstInfo(110, kDefaultColumnFamilyName, "e", 0 /* level */, - 110 /* epoch_number */)}; - std::vector file_metas; - CreateDummyTableFiles(existing_files, &file_metas); - - PrepareManifest(&column_families_, &last_seqno_, &log_writer_); - std::vector> added_files; - for (const auto& meta : file_metas) { - added_files.emplace_back(0, meta); - } - WriteFileAdditionAndDeletionToManifest( - /*cf=*/0, added_files, std::vector>()); - std::vector> deleted_files; - deleted_files.emplace_back(/*level=*/0, 100); - WriteFileAdditionAndDeletionToManifest( - /*cf=*/0, std::vector>(), deleted_files); - log_writer_.reset(); - Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); - ASSERT_OK(s); - std::string manifest_path; - VerifyManifest(&manifest_path); - std::string db_id; - bool has_missing_table_file = false; - s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_, - /*read_only=*/false, &db_id, - &has_missing_table_file); - ASSERT_OK(s); - ASSERT_FALSE(has_missing_table_file); - for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) { - VersionStorageInfo* vstorage = cfd->current()->storage_info(); - const std::vector& files = vstorage->LevelFiles(0); - if (cfd->GetName() == kDefaultColumnFamilyName) { - ASSERT_EQ(existing_files.size() - deleted_files.size(), files.size()); - bool has_deleted_file = false; - for (const auto* fmeta : files) { - if (fmeta->fd.GetNumber() == 100) { - has_deleted_file = true; - break; - } - } - ASSERT_FALSE(has_deleted_file); - } else { - ASSERT_TRUE(files.empty()); - } - } -} - -TEST_F(VersionSetTestMissingFiles, MinLogNumberToKeep2PC) { - db_options_.allow_2pc = true; - NewDB(); - - SstInfo sst(100, kDefaultColumnFamilyName, "a", 0 /* level */, - 100 /* epoch_number */); - std::vector file_metas; - CreateDummyTableFiles({sst}, &file_metas); - - constexpr WalNumber kMinWalNumberToKeep2PC = 10; - VersionEdit edit; - edit.AddFile(0, file_metas[0]); - edit.SetMinLogNumberToKeep(kMinWalNumberToKeep2PC); - ASSERT_OK(LogAndApplyToDefaultCF(edit)); - ASSERT_EQ(versions_->min_log_number_to_keep(), kMinWalNumberToKeep2PC); - - for (int i = 0; i < 3; i++) { - CreateNewManifest(); - ReopenDB(); - ASSERT_EQ(versions_->min_log_number_to_keep(), kMinWalNumberToKeep2PC); - } -} - -class ChargeFileMetadataTest : public DBTestBase { - public: - ChargeFileMetadataTest() - : DBTestBase("charge_file_metadata_test", /*env_do_fsync=*/true) {} -}; - -class ChargeFileMetadataTestWithParam - : public ChargeFileMetadataTest, - public testing::WithParamInterface { - public: - ChargeFileMetadataTestWithParam() {} -}; - -INSTANTIATE_TEST_CASE_P( - ChargeFileMetadataTestWithParam, ChargeFileMetadataTestWithParam, - ::testing::Values(CacheEntryRoleOptions::Decision::kEnabled, - CacheEntryRoleOptions::Decision::kDisabled)); - -TEST_P(ChargeFileMetadataTestWithParam, Basic) { - Options options; - BlockBasedTableOptions table_options; - CacheEntryRoleOptions::Decision charge_file_metadata = GetParam(); - table_options.cache_usage_options.options_overrides.insert( - {CacheEntryRole::kFileMetadata, {/*.charged = */ charge_file_metadata}}); - std::shared_ptr> - file_metadata_charge_only_cache = std::make_shared< - TargetCacheChargeTrackingCache>( - NewLRUCache( - 4 * CacheReservationManagerImpl< - CacheEntryRole::kFileMetadata>::GetDummyEntrySize(), - 0 /* num_shard_bits */, true /* strict_capacity_limit */)); - table_options.block_cache = file_metadata_charge_only_cache; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.create_if_missing = true; - options.disable_auto_compactions = true; - DestroyAndReopen(options); - - // Create 128 file metadata, each of which is roughly 1024 bytes. - // This results in 1 * - // CacheReservationManagerImpl::GetDummyEntrySize() - // cache reservation for file metadata. - for (int i = 1; i <= 128; ++i) { - ASSERT_OK(Put(std::string(1024, 'a'), "va")); - ASSERT_OK(Put("b", "vb")); - ASSERT_OK(Flush()); - } - if (charge_file_metadata == CacheEntryRoleOptions::Decision::kEnabled) { - EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(), - 1 * CacheReservationManagerImpl< - CacheEntryRole::kFileMetadata>::GetDummyEntrySize()); - - } else { - EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(), 0); - } - - // Create another 128 file metadata. - // This increases the file metadata cache reservation to 2 * - // CacheReservationManagerImpl::GetDummyEntrySize(). - for (int i = 1; i <= 128; ++i) { - ASSERT_OK(Put(std::string(1024, 'a'), "vva")); - ASSERT_OK(Put("b", "vvb")); - ASSERT_OK(Flush()); - } - if (charge_file_metadata == CacheEntryRoleOptions::Decision::kEnabled) { - EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(), - 2 * CacheReservationManagerImpl< - CacheEntryRole::kFileMetadata>::GetDummyEntrySize()); - } else { - EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(), 0); - } - // Compaction will create 1 new file metadata, obsolete and delete all 256 - // file metadata above. This results in 1 * - // CacheReservationManagerImpl::GetDummyEntrySize() - // cache reservation for file metadata. - SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::BackgroundCallCompaction:PurgedObsoleteFiles", - "ChargeFileMetadataTestWithParam::" - "PreVerifyingCacheReservationRelease"}}); - SyncPoint::GetInstance()->EnableProcessing(); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - ASSERT_EQ("0,1", FilesPerLevel(0)); - TEST_SYNC_POINT( - "ChargeFileMetadataTestWithParam::PreVerifyingCacheReservationRelease"); - if (charge_file_metadata == CacheEntryRoleOptions::Decision::kEnabled) { - EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(), - 1 * CacheReservationManagerImpl< - CacheEntryRole::kFileMetadata>::GetDummyEntrySize()); - } else { - EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(), 0); - } - SyncPoint::GetInstance()->DisableProcessing(); - - // Destroying the db will delete the remaining 1 new file metadata - // This results in no cache reservation for file metadata. - Destroy(options); - EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(), - 0 * CacheReservationManagerImpl< - CacheEntryRole::kFileMetadata>::GetDummyEntrySize()); - - // Reopen the db with a smaller cache in order to test failure in allocating - // file metadata due to memory limit based on cache capacity - file_metadata_charge_only_cache = std::make_shared< - TargetCacheChargeTrackingCache>( - NewLRUCache(1 * CacheReservationManagerImpl< - CacheEntryRole::kFileMetadata>::GetDummyEntrySize(), - 0 /* num_shard_bits */, true /* strict_capacity_limit */)); - table_options.block_cache = file_metadata_charge_only_cache; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Reopen(options); - ASSERT_OK(Put(std::string(1024, 'a'), "va")); - ASSERT_OK(Put("b", "vb")); - Status s = Flush(); - if (charge_file_metadata == CacheEntryRoleOptions::Decision::kEnabled) { - EXPECT_TRUE(s.IsMemoryLimit()); - EXPECT_TRUE(s.ToString().find( - kCacheEntryRoleToCamelString[static_cast( - CacheEntryRole::kFileMetadata)]) != std::string::npos); - EXPECT_TRUE(s.ToString().find("memory limit based on cache capacity") != - std::string::npos); - } else { - EXPECT_TRUE(s.ok()); - } -} -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/wal_edit_test.cc b/db/wal_edit_test.cc deleted file mode 100644 index 0c18fb125..000000000 --- a/db/wal_edit_test.cc +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db/wal_edit.h" - -#include "db/db_test_util.h" -#include "file/file_util.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" - -namespace ROCKSDB_NAMESPACE { - -TEST(WalSet, AddDeleteReset) { - WalSet wals; - ASSERT_TRUE(wals.GetWals().empty()); - - // Create WAL 1 - 10. - for (WalNumber log_number = 1; log_number <= 10; log_number++) { - wals.AddWal(WalAddition(log_number)); - } - ASSERT_EQ(wals.GetWals().size(), 10); - - // Delete WAL 1 - 5. - wals.DeleteWalsBefore(6); - ASSERT_EQ(wals.GetWals().size(), 5); - - WalNumber expected_log_number = 6; - for (auto it : wals.GetWals()) { - WalNumber log_number = it.first; - ASSERT_EQ(log_number, expected_log_number++); - } - - wals.Reset(); - ASSERT_TRUE(wals.GetWals().empty()); -} - -TEST(WalSet, Overwrite) { - constexpr WalNumber kNumber = 100; - constexpr uint64_t kBytes = 200; - WalSet wals; - wals.AddWal(WalAddition(kNumber)); - ASSERT_FALSE(wals.GetWals().at(kNumber).HasSyncedSize()); - wals.AddWal(WalAddition(kNumber, WalMetadata(kBytes))); - ASSERT_TRUE(wals.GetWals().at(kNumber).HasSyncedSize()); - ASSERT_EQ(wals.GetWals().at(kNumber).GetSyncedSizeInBytes(), kBytes); -} - -TEST(WalSet, SmallerSyncedSize) { - constexpr WalNumber kNumber = 100; - constexpr uint64_t kBytes = 100; - WalSet wals; - ASSERT_OK(wals.AddWal(WalAddition(kNumber, WalMetadata(kBytes)))); - const auto wals1 = wals.GetWals(); - Status s = wals.AddWal(WalAddition(kNumber, WalMetadata(0))); - const auto wals2 = wals.GetWals(); - ASSERT_OK(s); - ASSERT_EQ(wals1, wals2); -} - -TEST(WalSet, CreateTwice) { - constexpr WalNumber kNumber = 100; - WalSet wals; - ASSERT_OK(wals.AddWal(WalAddition(kNumber))); - Status s = wals.AddWal(WalAddition(kNumber)); - ASSERT_TRUE(s.IsCorruption()); - ASSERT_TRUE(s.ToString().find("WAL 100 is created more than once") != - std::string::npos); -} - -TEST(WalSet, DeleteAllWals) { - constexpr WalNumber kMaxWalNumber = 10; - WalSet wals; - for (WalNumber i = 1; i <= kMaxWalNumber; i++) { - wals.AddWal(WalAddition(i)); - } - ASSERT_OK(wals.DeleteWalsBefore(kMaxWalNumber + 1)); -} - -TEST(WalSet, AddObsoleteWal) { - constexpr WalNumber kNumber = 100; - WalSet wals; - ASSERT_OK(wals.DeleteWalsBefore(kNumber + 1)); - ASSERT_OK(wals.AddWal(WalAddition(kNumber))); - ASSERT_TRUE(wals.GetWals().empty()); -} - -TEST(WalSet, MinWalNumberToKeep) { - constexpr WalNumber kNumber = 100; - WalSet wals; - ASSERT_EQ(wals.GetMinWalNumberToKeep(), 0); - ASSERT_OK(wals.DeleteWalsBefore(kNumber)); - ASSERT_EQ(wals.GetMinWalNumberToKeep(), kNumber); - ASSERT_OK(wals.DeleteWalsBefore(kNumber - 1)); - ASSERT_EQ(wals.GetMinWalNumberToKeep(), kNumber); - ASSERT_OK(wals.DeleteWalsBefore(kNumber + 1)); - ASSERT_EQ(wals.GetMinWalNumberToKeep(), kNumber + 1); -} - -class WalSetTest : public DBTestBase { - public: - WalSetTest() : DBTestBase("WalSetTest", /* env_do_fsync */ true) {} - - void SetUp() override { - test_dir_ = test::PerThreadDBPath("wal_set_test"); - ASSERT_OK(env_->CreateDir(test_dir_)); - } - - void TearDown() override { - EXPECT_OK(DestroyDir(env_, test_dir_)); - logs_on_disk_.clear(); - wals_.Reset(); - } - - void CreateWalOnDisk(WalNumber number, const std::string& fname, - uint64_t size_bytes) { - std::unique_ptr f; - std::string fpath = Path(fname); - ASSERT_OK(env_->NewWritableFile(fpath, &f, EnvOptions())); - std::string content(size_bytes, '0'); - ASSERT_OK(f->Append(content)); - ASSERT_OK(f->Close()); - - logs_on_disk_[number] = fpath; - } - - void AddWalToWalSet(WalNumber number, uint64_t size_bytes) { - // Create WAL. - ASSERT_OK(wals_.AddWal(WalAddition(number))); - // Close WAL. - WalMetadata wal(size_bytes); - ASSERT_OK(wals_.AddWal(WalAddition(number, wal))); - } - - Status CheckWals() const { return wals_.CheckWals(env_, logs_on_disk_); } - - private: - std::string test_dir_; - std::unordered_map logs_on_disk_; - WalSet wals_; - - std::string Path(const std::string& fname) { return test_dir_ + "/" + fname; } -}; - -TEST_F(WalSetTest, CheckEmptyWals) { ASSERT_OK(CheckWals()); } - -TEST_F(WalSetTest, CheckWals) { - for (int number = 1; number < 10; number++) { - uint64_t size = rand() % 100; - std::stringstream ss; - ss << "log" << number; - std::string fname = ss.str(); - CreateWalOnDisk(number, fname, size); - // log 0 - 5 are obsolete. - if (number > 5) { - AddWalToWalSet(number, size); - } - } - ASSERT_OK(CheckWals()); -} - -TEST_F(WalSetTest, CheckMissingWals) { - for (int number = 1; number < 10; number++) { - uint64_t size = rand() % 100; - AddWalToWalSet(number, size); - // logs with even number are missing from disk. - if (number % 2) { - std::stringstream ss; - ss << "log" << number; - std::string fname = ss.str(); - CreateWalOnDisk(number, fname, size); - } - } - - Status s = CheckWals(); - ASSERT_TRUE(s.IsCorruption()) << s.ToString(); - // The first log with even number is missing. - std::stringstream expected_err; - expected_err << "Missing WAL with log number: " << 2; - ASSERT_TRUE(s.ToString().find(expected_err.str()) != std::string::npos) - << s.ToString(); -} - -TEST_F(WalSetTest, CheckWalsWithShrinkedSize) { - for (int number = 1; number < 10; number++) { - uint64_t size = rand() % 100 + 1; - AddWalToWalSet(number, size); - // logs with even number have shrinked size. - std::stringstream ss; - ss << "log" << number; - std::string fname = ss.str(); - CreateWalOnDisk(number, fname, (number % 2) ? size : size - 1); - } - - Status s = CheckWals(); - ASSERT_TRUE(s.IsCorruption()) << s.ToString(); - // The first log with even number has wrong size. - std::stringstream expected_err; - expected_err << "Size mismatch: WAL (log number: " << 2 << ")"; - ASSERT_TRUE(s.ToString().find(expected_err.str()) != std::string::npos) - << s.ToString(); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/wal_manager_test.cc b/db/wal_manager_test.cc deleted file mode 100644 index 0144e1846..000000000 --- a/db/wal_manager_test.cc +++ /dev/null @@ -1,336 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - - -#include "db/wal_manager.h" - -#include -#include - -#include "db/column_family.h" -#include "db/db_impl/db_impl.h" -#include "db/log_writer.h" -#include "db/version_set.h" -#include "env/mock_env.h" -#include "file/writable_file_writer.h" -#include "rocksdb/cache.h" -#include "rocksdb/file_system.h" -#include "rocksdb/write_batch.h" -#include "rocksdb/write_buffer_manager.h" -#include "table/mock_table.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -// TODO(icanadi) mock out VersionSet -// TODO(icanadi) move other WalManager-specific tests from db_test here -class WalManagerTest : public testing::Test { - public: - WalManagerTest() - : dbname_(test::PerThreadDBPath("wal_manager_test")), - db_options_(), - table_cache_(NewLRUCache(50000, 16)), - write_buffer_manager_(db_options_.db_write_buffer_size), - current_log_number_(0) { - env_.reset(MockEnv::Create(Env::Default())); - EXPECT_OK(DestroyDB(dbname_, Options())); - } - - void Init() { - ASSERT_OK(env_->CreateDirIfMissing(dbname_)); - ASSERT_OK(env_->CreateDirIfMissing(ArchivalDirectory(dbname_))); - db_options_.db_paths.emplace_back(dbname_, - std::numeric_limits::max()); - db_options_.wal_dir = dbname_; - db_options_.env = env_.get(); - db_options_.fs = env_->GetFileSystem(); - db_options_.clock = env_->GetSystemClock().get(); - - versions_.reset( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); - - wal_manager_.reset( - new WalManager(db_options_, env_options_, nullptr /*IOTracer*/)); - } - - void Reopen() { - wal_manager_.reset( - new WalManager(db_options_, env_options_, nullptr /*IOTracer*/)); - } - - // NOT thread safe - void Put(const std::string& key, const std::string& value) { - assert(current_log_writer_.get() != nullptr); - uint64_t seq = versions_->LastSequence() + 1; - WriteBatch batch; - ASSERT_OK(batch.Put(key, value)); - WriteBatchInternal::SetSequence(&batch, seq); - ASSERT_OK( - current_log_writer_->AddRecord(WriteBatchInternal::Contents(&batch))); - versions_->SetLastAllocatedSequence(seq); - versions_->SetLastPublishedSequence(seq); - versions_->SetLastSequence(seq); - } - - // NOT thread safe - void RollTheLog(bool /*archived*/) { - current_log_number_++; - std::string fname = ArchivedLogFileName(dbname_, current_log_number_); - const auto& fs = env_->GetFileSystem(); - std::unique_ptr file_writer; - ASSERT_OK(WritableFileWriter::Create(fs, fname, env_options_, &file_writer, - nullptr)); - current_log_writer_.reset( - new log::Writer(std::move(file_writer), 0, false)); - } - - void CreateArchiveLogs(int num_logs, int entries_per_log) { - for (int i = 1; i <= num_logs; ++i) { - RollTheLog(true); - for (int k = 0; k < entries_per_log; ++k) { - Put(std::to_string(k), std::string(1024, 'a')); - } - } - } - - std::unique_ptr OpenTransactionLogIter( - const SequenceNumber seq) { - std::unique_ptr iter; - Status status = wal_manager_->GetUpdatesSince( - seq, &iter, TransactionLogIterator::ReadOptions(), versions_.get()); - EXPECT_OK(status); - return iter; - } - - std::unique_ptr env_; - std::string dbname_; - ImmutableDBOptions db_options_; - WriteController write_controller_; - EnvOptions env_options_; - std::shared_ptr table_cache_; - WriteBufferManager write_buffer_manager_; - std::unique_ptr versions_; - std::unique_ptr wal_manager_; - - std::unique_ptr current_log_writer_; - uint64_t current_log_number_; -}; - -TEST_F(WalManagerTest, ReadFirstRecordCache) { - Init(); - std::string path = dbname_ + "/000001.log"; - std::unique_ptr file; - ASSERT_OK(env_->GetFileSystem()->NewWritableFile(path, FileOptions(), &file, - nullptr)); - - SequenceNumber s; - ASSERT_OK(wal_manager_->TEST_ReadFirstLine(path, 1 /* number */, &s)); - ASSERT_EQ(s, 0U); - - ASSERT_OK( - wal_manager_->TEST_ReadFirstRecord(kAliveLogFile, 1 /* number */, &s)); - ASSERT_EQ(s, 0U); - - std::unique_ptr file_writer( - new WritableFileWriter(std::move(file), path, FileOptions())); - log::Writer writer(std::move(file_writer), 1, - db_options_.recycle_log_file_num > 0); - WriteBatch batch; - ASSERT_OK(batch.Put("foo", "bar")); - WriteBatchInternal::SetSequence(&batch, 10); - ASSERT_OK(writer.AddRecord(WriteBatchInternal::Contents(&batch))); - - // TODO(icanadi) move SpecialEnv outside of db_test, so we can reuse it here. - // Waiting for lei to finish with db_test - // env_->count_sequential_reads_ = true; - // sequential_read_counter_ sanity test - // ASSERT_EQ(env_->sequential_read_counter_.Read(), 0); - - ASSERT_OK(wal_manager_->TEST_ReadFirstRecord(kAliveLogFile, 1, &s)); - ASSERT_EQ(s, 10U); - // did a read - // TODO(icanadi) move SpecialEnv outside of db_test, so we can reuse it here - // ASSERT_EQ(env_->sequential_read_counter_.Read(), 1); - - ASSERT_OK(wal_manager_->TEST_ReadFirstRecord(kAliveLogFile, 1, &s)); - ASSERT_EQ(s, 10U); - // no new reads since the value is cached - // TODO(icanadi) move SpecialEnv outside of db_test, so we can reuse it here - // ASSERT_EQ(env_->sequential_read_counter_.Read(), 1); -} - -namespace { -uint64_t GetLogDirSize(std::string dir_path, Env* env) { - uint64_t dir_size = 0; - std::vector files; - EXPECT_OK(env->GetChildren(dir_path, &files)); - for (auto& f : files) { - uint64_t number; - FileType type; - if (ParseFileName(f, &number, &type) && type == kWalFile) { - std::string const file_path = dir_path + "/" + f; - uint64_t file_size; - EXPECT_OK(env->GetFileSize(file_path, &file_size)); - dir_size += file_size; - } - } - return dir_size; -} -std::vector ListSpecificFiles( - Env* env, const std::string& path, const FileType expected_file_type) { - std::vector files; - std::vector file_numbers; - uint64_t number; - FileType type; - EXPECT_OK(env->GetChildren(path, &files)); - for (size_t i = 0; i < files.size(); ++i) { - if (ParseFileName(files[i], &number, &type)) { - if (type == expected_file_type) { - file_numbers.push_back(number); - } - } - } - return file_numbers; -} - -int CountRecords(TransactionLogIterator* iter) { - int count = 0; - SequenceNumber lastSequence = 0; - BatchResult res; - while (iter->Valid()) { - res = iter->GetBatch(); - EXPECT_TRUE(res.sequence > lastSequence); - ++count; - lastSequence = res.sequence; - EXPECT_OK(iter->status()); - iter->Next(); - } - EXPECT_OK(iter->status()); - return count; -} -} // anonymous namespace - -TEST_F(WalManagerTest, WALArchivalSizeLimit) { - db_options_.WAL_ttl_seconds = 0; - db_options_.WAL_size_limit_MB = 1000; - Init(); - - // TEST : Create WalManager with huge size limit and no ttl. - // Create some archived files and call PurgeObsoleteWALFiles(). - // Count the archived log files that survived. - // Assert that all of them did. - // Change size limit. Re-open WalManager. - // Assert that archive is not greater than WAL_size_limit_MB after - // PurgeObsoleteWALFiles() - // Set ttl and time_to_check_ to small values. Re-open db. - // Assert that there are no archived logs left. - - std::string archive_dir = ArchivalDirectory(dbname_); - CreateArchiveLogs(20, 5000); - - std::vector log_files = - ListSpecificFiles(env_.get(), archive_dir, kWalFile); - ASSERT_EQ(log_files.size(), 20U); - - db_options_.WAL_size_limit_MB = 8; - Reopen(); - wal_manager_->PurgeObsoleteWALFiles(); - - uint64_t archive_size = GetLogDirSize(archive_dir, env_.get()); - ASSERT_TRUE(archive_size <= db_options_.WAL_size_limit_MB * 1024 * 1024); - - db_options_.WAL_ttl_seconds = 1; - env_->SleepForMicroseconds(2 * 1000 * 1000); - Reopen(); - wal_manager_->PurgeObsoleteWALFiles(); - - log_files = ListSpecificFiles(env_.get(), archive_dir, kWalFile); - ASSERT_TRUE(log_files.empty()); -} - -TEST_F(WalManagerTest, WALArchivalTtl) { - db_options_.WAL_ttl_seconds = 1000; - Init(); - - // TEST : Create WalManager with a ttl and no size limit. - // Create some archived log files and call PurgeObsoleteWALFiles(). - // Assert that files are not deleted - // Reopen db with small ttl. - // Assert that all archived logs was removed. - - std::string archive_dir = ArchivalDirectory(dbname_); - CreateArchiveLogs(20, 5000); - - std::vector log_files = - ListSpecificFiles(env_.get(), archive_dir, kWalFile); - ASSERT_GT(log_files.size(), 0U); - - db_options_.WAL_ttl_seconds = 1; - env_->SleepForMicroseconds(3 * 1000 * 1000); - Reopen(); - wal_manager_->PurgeObsoleteWALFiles(); - - log_files = ListSpecificFiles(env_.get(), archive_dir, kWalFile); - ASSERT_TRUE(log_files.empty()); -} - -TEST_F(WalManagerTest, TransactionLogIteratorMoveOverZeroFiles) { - Init(); - RollTheLog(false); - Put("key1", std::string(1024, 'a')); - // Create a zero record WAL file. - RollTheLog(false); - RollTheLog(false); - - Put("key2", std::string(1024, 'a')); - - auto iter = OpenTransactionLogIter(0); - ASSERT_EQ(2, CountRecords(iter.get())); -} - -TEST_F(WalManagerTest, TransactionLogIteratorJustEmptyFile) { - Init(); - RollTheLog(false); - auto iter = OpenTransactionLogIter(0); - // Check that an empty iterator is returned - ASSERT_TRUE(!iter->Valid()); -} - -TEST_F(WalManagerTest, TransactionLogIteratorNewFileWhileScanning) { - Init(); - CreateArchiveLogs(2, 100); - auto iter = OpenTransactionLogIter(0); - CreateArchiveLogs(1, 100); - int i = 0; - for (; iter->Valid(); iter->Next()) { - i++; - } - ASSERT_EQ(i, 200); - // A new log file was added after the iterator was created. - // TryAgain indicates a new iterator is needed to fetch the new data - ASSERT_TRUE(iter->status().IsTryAgain()); - - iter = OpenTransactionLogIter(0); - i = 0; - for (; iter->Valid(); iter->Next()) { - i++; - } - ASSERT_EQ(i, 300); - ASSERT_TRUE(iter->status().ok()); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/db/write_batch_test.cc b/db/write_batch_test.cc deleted file mode 100644 index 4bd74f71e..000000000 --- a/db/write_batch_test.cc +++ /dev/null @@ -1,1112 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include - -#include "db/column_family.h" -#include "db/db_test_util.h" -#include "db/memtable.h" -#include "db/write_batch_internal.h" -#include "rocksdb/comparator.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/memtablerep.h" -#include "rocksdb/utilities/write_batch_with_index.h" -#include "rocksdb/write_buffer_manager.h" -#include "table/scoped_arena_iterator.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -static std::string PrintContents(WriteBatch* b, - bool merge_operator_supported = true) { - InternalKeyComparator cmp(BytewiseComparator()); - auto factory = std::make_shared(); - Options options; - options.memtable_factory = factory; - if (merge_operator_supported) { - options.merge_operator.reset(new TestPutOperator()); - } - ImmutableOptions ioptions(options); - WriteBufferManager wb(options.db_write_buffer_size); - MemTable* mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, - kMaxSequenceNumber, 0 /* column_family_id */); - mem->Ref(); - std::string state; - ColumnFamilyMemTablesDefault cf_mems_default(mem); - Status s = - WriteBatchInternal::InsertInto(b, &cf_mems_default, nullptr, nullptr); - uint32_t count = 0; - int put_count = 0; - int delete_count = 0; - int single_delete_count = 0; - int delete_range_count = 0; - int merge_count = 0; - for (int i = 0; i < 2; ++i) { - Arena arena; - ScopedArenaIterator arena_iter_guard; - std::unique_ptr iter_guard; - InternalIterator* iter; - if (i == 0) { - iter = mem->NewIterator(ReadOptions(), &arena); - arena_iter_guard.set(iter); - } else { - iter = mem->NewRangeTombstoneIterator(ReadOptions(), - kMaxSequenceNumber /* read_seq */, - false /* immutable_memtable */); - iter_guard.reset(iter); - } - if (iter == nullptr) { - continue; - } - EXPECT_OK(iter->status()); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ParsedInternalKey ikey; - ikey.clear(); - EXPECT_OK(ParseInternalKey(iter->key(), &ikey, true /* log_err_key */)); - switch (ikey.type) { - case kTypeValue: - state.append("Put("); - state.append(ikey.user_key.ToString()); - state.append(", "); - state.append(iter->value().ToString()); - state.append(")"); - count++; - put_count++; - break; - case kTypeDeletion: - state.append("Delete("); - state.append(ikey.user_key.ToString()); - state.append(")"); - count++; - delete_count++; - break; - case kTypeSingleDeletion: - state.append("SingleDelete("); - state.append(ikey.user_key.ToString()); - state.append(")"); - count++; - single_delete_count++; - break; - case kTypeRangeDeletion: - state.append("DeleteRange("); - state.append(ikey.user_key.ToString()); - state.append(", "); - state.append(iter->value().ToString()); - state.append(")"); - count++; - delete_range_count++; - break; - case kTypeMerge: - state.append("Merge("); - state.append(ikey.user_key.ToString()); - state.append(", "); - state.append(iter->value().ToString()); - state.append(")"); - count++; - merge_count++; - break; - default: - assert(false); - break; - } - state.append("@"); - state.append(std::to_string(ikey.sequence)); - } - EXPECT_OK(iter->status()); - } - if (s.ok()) { - EXPECT_EQ(b->HasPut(), put_count > 0); - EXPECT_EQ(b->HasDelete(), delete_count > 0); - EXPECT_EQ(b->HasSingleDelete(), single_delete_count > 0); - EXPECT_EQ(b->HasDeleteRange(), delete_range_count > 0); - EXPECT_EQ(b->HasMerge(), merge_count > 0); - if (count != WriteBatchInternal::Count(b)) { - state.append("CountMismatch()"); - } - } else { - state.append(s.ToString()); - } - delete mem->Unref(); - return state; -} - -class WriteBatchTest : public testing::Test {}; - -TEST_F(WriteBatchTest, Empty) { - WriteBatch batch; - ASSERT_EQ("", PrintContents(&batch)); - ASSERT_EQ(0u, WriteBatchInternal::Count(&batch)); - ASSERT_EQ(0u, batch.Count()); -} - -TEST_F(WriteBatchTest, Multiple) { - WriteBatch batch; - ASSERT_OK(batch.Put(Slice("foo"), Slice("bar"))); - ASSERT_OK(batch.Delete(Slice("box"))); - ASSERT_OK(batch.DeleteRange(Slice("bar"), Slice("foo"))); - ASSERT_OK(batch.Put(Slice("baz"), Slice("boo"))); - WriteBatchInternal::SetSequence(&batch, 100); - ASSERT_EQ(100U, WriteBatchInternal::Sequence(&batch)); - ASSERT_EQ(4u, WriteBatchInternal::Count(&batch)); - ASSERT_EQ( - "Put(baz, boo)@103" - "Delete(box)@101" - "Put(foo, bar)@100" - "DeleteRange(bar, foo)@102", - PrintContents(&batch)); - ASSERT_EQ(4u, batch.Count()); -} - -TEST_F(WriteBatchTest, Corruption) { - WriteBatch batch; - ASSERT_OK(batch.Put(Slice("foo"), Slice("bar"))); - ASSERT_OK(batch.Delete(Slice("box"))); - WriteBatchInternal::SetSequence(&batch, 200); - Slice contents = WriteBatchInternal::Contents(&batch); - ASSERT_OK(WriteBatchInternal::SetContents( - &batch, Slice(contents.data(), contents.size() - 1))); - ASSERT_EQ( - "Put(foo, bar)@200" - "Corruption: bad WriteBatch Delete", - PrintContents(&batch)); -} - -TEST_F(WriteBatchTest, Append) { - WriteBatch b1, b2; - WriteBatchInternal::SetSequence(&b1, 200); - WriteBatchInternal::SetSequence(&b2, 300); - ASSERT_OK(WriteBatchInternal::Append(&b1, &b2)); - ASSERT_EQ("", PrintContents(&b1)); - ASSERT_EQ(0u, b1.Count()); - ASSERT_OK(b2.Put("a", "va")); - ASSERT_OK(WriteBatchInternal::Append(&b1, &b2)); - ASSERT_EQ("Put(a, va)@200", PrintContents(&b1)); - ASSERT_EQ(1u, b1.Count()); - b2.Clear(); - ASSERT_OK(b2.Put("b", "vb")); - ASSERT_OK(WriteBatchInternal::Append(&b1, &b2)); - ASSERT_EQ( - "Put(a, va)@200" - "Put(b, vb)@201", - PrintContents(&b1)); - ASSERT_EQ(2u, b1.Count()); - ASSERT_OK(b2.Delete("foo")); - ASSERT_OK(WriteBatchInternal::Append(&b1, &b2)); - ASSERT_EQ( - "Put(a, va)@200" - "Put(b, vb)@202" - "Put(b, vb)@201" - "Delete(foo)@203", - PrintContents(&b1)); - ASSERT_EQ(4u, b1.Count()); - b2.Clear(); - ASSERT_OK(b2.Put("c", "cc")); - ASSERT_OK(b2.Put("d", "dd")); - b2.MarkWalTerminationPoint(); - ASSERT_OK(b2.Put("e", "ee")); - ASSERT_OK(WriteBatchInternal::Append(&b1, &b2, /*wal only*/ true)); - ASSERT_EQ( - "Put(a, va)@200" - "Put(b, vb)@202" - "Put(b, vb)@201" - "Put(c, cc)@204" - "Put(d, dd)@205" - "Delete(foo)@203", - PrintContents(&b1)); - ASSERT_EQ(6u, b1.Count()); - ASSERT_EQ( - "Put(c, cc)@0" - "Put(d, dd)@1" - "Put(e, ee)@2", - PrintContents(&b2)); - ASSERT_EQ(3u, b2.Count()); -} - -TEST_F(WriteBatchTest, SingleDeletion) { - WriteBatch batch; - WriteBatchInternal::SetSequence(&batch, 100); - ASSERT_EQ("", PrintContents(&batch)); - ASSERT_EQ(0u, batch.Count()); - ASSERT_OK(batch.Put("a", "va")); - ASSERT_EQ("Put(a, va)@100", PrintContents(&batch)); - ASSERT_EQ(1u, batch.Count()); - ASSERT_OK(batch.SingleDelete("a")); - ASSERT_EQ( - "SingleDelete(a)@101" - "Put(a, va)@100", - PrintContents(&batch)); - ASSERT_EQ(2u, batch.Count()); -} - -namespace { -struct TestHandler : public WriteBatch::Handler { - std::string seen; - Status PutCF(uint32_t column_family_id, const Slice& key, - const Slice& value) override { - if (column_family_id == 0) { - seen += "Put(" + key.ToString() + ", " + value.ToString() + ")"; - } else { - seen += "PutCF(" + std::to_string(column_family_id) + ", " + - key.ToString() + ", " + value.ToString() + ")"; - } - return Status::OK(); - } - Status DeleteCF(uint32_t column_family_id, const Slice& key) override { - if (column_family_id == 0) { - seen += "Delete(" + key.ToString() + ")"; - } else { - seen += "DeleteCF(" + std::to_string(column_family_id) + ", " + - key.ToString() + ")"; - } - return Status::OK(); - } - Status SingleDeleteCF(uint32_t column_family_id, const Slice& key) override { - if (column_family_id == 0) { - seen += "SingleDelete(" + key.ToString() + ")"; - } else { - seen += "SingleDeleteCF(" + std::to_string(column_family_id) + ", " + - key.ToString() + ")"; - } - return Status::OK(); - } - Status DeleteRangeCF(uint32_t column_family_id, const Slice& begin_key, - const Slice& end_key) override { - if (column_family_id == 0) { - seen += "DeleteRange(" + begin_key.ToString() + ", " + - end_key.ToString() + ")"; - } else { - seen += "DeleteRangeCF(" + std::to_string(column_family_id) + ", " + - begin_key.ToString() + ", " + end_key.ToString() + ")"; - } - return Status::OK(); - } - Status MergeCF(uint32_t column_family_id, const Slice& key, - const Slice& value) override { - if (column_family_id == 0) { - seen += "Merge(" + key.ToString() + ", " + value.ToString() + ")"; - } else { - seen += "MergeCF(" + std::to_string(column_family_id) + ", " + - key.ToString() + ", " + value.ToString() + ")"; - } - return Status::OK(); - } - void LogData(const Slice& blob) override { - seen += "LogData(" + blob.ToString() + ")"; - } - Status MarkBeginPrepare(bool unprepare) override { - seen += - "MarkBeginPrepare(" + std::string(unprepare ? "true" : "false") + ")"; - return Status::OK(); - } - Status MarkEndPrepare(const Slice& xid) override { - seen += "MarkEndPrepare(" + xid.ToString() + ")"; - return Status::OK(); - } - Status MarkNoop(bool empty_batch) override { - seen += "MarkNoop(" + std::string(empty_batch ? "true" : "false") + ")"; - return Status::OK(); - } - Status MarkCommit(const Slice& xid) override { - seen += "MarkCommit(" + xid.ToString() + ")"; - return Status::OK(); - } - Status MarkCommitWithTimestamp(const Slice& xid, const Slice& ts) override { - seen += "MarkCommitWithTimestamp(" + xid.ToString() + ", " + - ts.ToString(true) + ")"; - return Status::OK(); - } - Status MarkRollback(const Slice& xid) override { - seen += "MarkRollback(" + xid.ToString() + ")"; - return Status::OK(); - } -}; -} // anonymous namespace - -TEST_F(WriteBatchTest, PutNotImplemented) { - WriteBatch batch; - ASSERT_OK(batch.Put(Slice("k1"), Slice("v1"))); - ASSERT_EQ(1u, batch.Count()); - ASSERT_EQ("Put(k1, v1)@0", PrintContents(&batch)); - - WriteBatch::Handler handler; - ASSERT_OK(batch.Iterate(&handler)); -} - -TEST_F(WriteBatchTest, DeleteNotImplemented) { - WriteBatch batch; - ASSERT_OK(batch.Delete(Slice("k2"))); - ASSERT_EQ(1u, batch.Count()); - ASSERT_EQ("Delete(k2)@0", PrintContents(&batch)); - - WriteBatch::Handler handler; - ASSERT_OK(batch.Iterate(&handler)); -} - -TEST_F(WriteBatchTest, SingleDeleteNotImplemented) { - WriteBatch batch; - ASSERT_OK(batch.SingleDelete(Slice("k2"))); - ASSERT_EQ(1u, batch.Count()); - ASSERT_EQ("SingleDelete(k2)@0", PrintContents(&batch)); - - WriteBatch::Handler handler; - ASSERT_OK(batch.Iterate(&handler)); -} - -TEST_F(WriteBatchTest, MergeNotImplemented) { - WriteBatch batch; - ASSERT_OK(batch.Merge(Slice("foo"), Slice("bar"))); - ASSERT_EQ(1u, batch.Count()); - ASSERT_EQ("Merge(foo, bar)@0", PrintContents(&batch)); - - WriteBatch::Handler handler; - ASSERT_OK(batch.Iterate(&handler)); -} - -TEST_F(WriteBatchTest, MergeWithoutOperatorInsertionFailure) { - WriteBatch batch; - ASSERT_OK(batch.Merge(Slice("foo"), Slice("bar"))); - ASSERT_EQ(1u, batch.Count()); - ASSERT_EQ( - "Invalid argument: Merge requires `ColumnFamilyOptions::merge_operator " - "!= nullptr`", - PrintContents(&batch, false /* merge_operator_supported */)); -} - -TEST_F(WriteBatchTest, Blob) { - WriteBatch batch; - ASSERT_OK(batch.Put(Slice("k1"), Slice("v1"))); - ASSERT_OK(batch.Put(Slice("k2"), Slice("v2"))); - ASSERT_OK(batch.Put(Slice("k3"), Slice("v3"))); - ASSERT_OK(batch.PutLogData(Slice("blob1"))); - ASSERT_OK(batch.Delete(Slice("k2"))); - ASSERT_OK(batch.SingleDelete(Slice("k3"))); - ASSERT_OK(batch.PutLogData(Slice("blob2"))); - ASSERT_OK(batch.Merge(Slice("foo"), Slice("bar"))); - ASSERT_EQ(6u, batch.Count()); - ASSERT_EQ( - "Merge(foo, bar)@5" - "Put(k1, v1)@0" - "Delete(k2)@3" - "Put(k2, v2)@1" - "SingleDelete(k3)@4" - "Put(k3, v3)@2", - PrintContents(&batch)); - - TestHandler handler; - ASSERT_OK(batch.Iterate(&handler)); - ASSERT_EQ( - "Put(k1, v1)" - "Put(k2, v2)" - "Put(k3, v3)" - "LogData(blob1)" - "Delete(k2)" - "SingleDelete(k3)" - "LogData(blob2)" - "Merge(foo, bar)", - handler.seen); -} - -TEST_F(WriteBatchTest, PrepareCommit) { - WriteBatch batch; - ASSERT_OK(WriteBatchInternal::InsertNoop(&batch)); - ASSERT_OK(batch.Put(Slice("k1"), Slice("v1"))); - ASSERT_OK(batch.Put(Slice("k2"), Slice("v2"))); - batch.SetSavePoint(); - ASSERT_OK(WriteBatchInternal::MarkEndPrepare(&batch, Slice("xid1"))); - Status s = batch.RollbackToSavePoint(); - ASSERT_EQ(s, Status::NotFound()); - ASSERT_OK(WriteBatchInternal::MarkCommit(&batch, Slice("xid1"))); - ASSERT_OK(WriteBatchInternal::MarkRollback(&batch, Slice("xid1"))); - ASSERT_EQ(2u, batch.Count()); - - TestHandler handler; - ASSERT_OK(batch.Iterate(&handler)); - ASSERT_EQ( - "MarkBeginPrepare(false)" - "Put(k1, v1)" - "Put(k2, v2)" - "MarkEndPrepare(xid1)" - "MarkCommit(xid1)" - "MarkRollback(xid1)", - handler.seen); -} - -// It requires more than 30GB of memory to run the test. With single memory -// allocation of more than 30GB. -// Not all platform can run it. Also it runs a long time. So disable it. -TEST_F(WriteBatchTest, DISABLED_ManyUpdates) { - // Insert key and value of 3GB and push total batch size to 12GB. - static const size_t kKeyValueSize = 4u; - static const uint32_t kNumUpdates = uint32_t{3} << 30; - std::string raw(kKeyValueSize, 'A'); - WriteBatch batch(kNumUpdates * (4 + kKeyValueSize * 2) + 1024u); - char c = 'A'; - for (uint32_t i = 0; i < kNumUpdates; i++) { - if (c > 'Z') { - c = 'A'; - } - raw[0] = c; - raw[raw.length() - 1] = c; - c++; - ASSERT_OK(batch.Put(raw, raw)); - } - - ASSERT_EQ(kNumUpdates, batch.Count()); - - struct NoopHandler : public WriteBatch::Handler { - uint32_t num_seen = 0; - char expected_char = 'A'; - Status PutCF(uint32_t /*column_family_id*/, const Slice& key, - const Slice& value) override { - EXPECT_EQ(kKeyValueSize, key.size()); - EXPECT_EQ(kKeyValueSize, value.size()); - EXPECT_EQ(expected_char, key[0]); - EXPECT_EQ(expected_char, value[0]); - EXPECT_EQ(expected_char, key[kKeyValueSize - 1]); - EXPECT_EQ(expected_char, value[kKeyValueSize - 1]); - expected_char++; - if (expected_char > 'Z') { - expected_char = 'A'; - } - ++num_seen; - return Status::OK(); - } - Status DeleteCF(uint32_t /*column_family_id*/, - const Slice& /*key*/) override { - ADD_FAILURE(); - return Status::OK(); - } - Status SingleDeleteCF(uint32_t /*column_family_id*/, - const Slice& /*key*/) override { - ADD_FAILURE(); - return Status::OK(); - } - Status MergeCF(uint32_t /*column_family_id*/, const Slice& /*key*/, - const Slice& /*value*/) override { - ADD_FAILURE(); - return Status::OK(); - } - void LogData(const Slice& /*blob*/) override { ADD_FAILURE(); } - bool Continue() override { return num_seen < kNumUpdates; } - } handler; - - ASSERT_OK(batch.Iterate(&handler)); - ASSERT_EQ(kNumUpdates, handler.num_seen); -} - -// The test requires more than 18GB memory to run it, with single memory -// allocation of more than 12GB. Not all the platform can run it. So disable it. -TEST_F(WriteBatchTest, DISABLED_LargeKeyValue) { - // Insert key and value of 3GB and push total batch size to 12GB. - static const size_t kKeyValueSize = 3221225472u; - std::string raw(kKeyValueSize, 'A'); - WriteBatch batch(size_t(12884901888ull + 1024u)); - for (char i = 0; i < 2; i++) { - raw[0] = 'A' + i; - raw[raw.length() - 1] = 'A' - i; - ASSERT_OK(batch.Put(raw, raw)); - } - - ASSERT_EQ(2u, batch.Count()); - - struct NoopHandler : public WriteBatch::Handler { - int num_seen = 0; - Status PutCF(uint32_t /*column_family_id*/, const Slice& key, - const Slice& value) override { - EXPECT_EQ(kKeyValueSize, key.size()); - EXPECT_EQ(kKeyValueSize, value.size()); - EXPECT_EQ('A' + num_seen, key[0]); - EXPECT_EQ('A' + num_seen, value[0]); - EXPECT_EQ('A' - num_seen, key[kKeyValueSize - 1]); - EXPECT_EQ('A' - num_seen, value[kKeyValueSize - 1]); - ++num_seen; - return Status::OK(); - } - Status DeleteCF(uint32_t /*column_family_id*/, - const Slice& /*key*/) override { - ADD_FAILURE(); - return Status::OK(); - } - Status SingleDeleteCF(uint32_t /*column_family_id*/, - const Slice& /*key*/) override { - ADD_FAILURE(); - return Status::OK(); - } - Status MergeCF(uint32_t /*column_family_id*/, const Slice& /*key*/, - const Slice& /*value*/) override { - ADD_FAILURE(); - return Status::OK(); - } - void LogData(const Slice& /*blob*/) override { ADD_FAILURE(); } - bool Continue() override { return num_seen < 2; } - } handler; - - ASSERT_OK(batch.Iterate(&handler)); - ASSERT_EQ(2, handler.num_seen); -} - -TEST_F(WriteBatchTest, Continue) { - WriteBatch batch; - - struct Handler : public TestHandler { - int num_seen = 0; - Status PutCF(uint32_t column_family_id, const Slice& key, - const Slice& value) override { - ++num_seen; - return TestHandler::PutCF(column_family_id, key, value); - } - Status DeleteCF(uint32_t column_family_id, const Slice& key) override { - ++num_seen; - return TestHandler::DeleteCF(column_family_id, key); - } - Status SingleDeleteCF(uint32_t column_family_id, - const Slice& key) override { - ++num_seen; - return TestHandler::SingleDeleteCF(column_family_id, key); - } - Status MergeCF(uint32_t column_family_id, const Slice& key, - const Slice& value) override { - ++num_seen; - return TestHandler::MergeCF(column_family_id, key, value); - } - void LogData(const Slice& blob) override { - ++num_seen; - TestHandler::LogData(blob); - } - bool Continue() override { return num_seen < 5; } - } handler; - - ASSERT_OK(batch.Put(Slice("k1"), Slice("v1"))); - ASSERT_OK(batch.Put(Slice("k2"), Slice("v2"))); - ASSERT_OK(batch.PutLogData(Slice("blob1"))); - ASSERT_OK(batch.Delete(Slice("k1"))); - ASSERT_OK(batch.SingleDelete(Slice("k2"))); - ASSERT_OK(batch.PutLogData(Slice("blob2"))); - ASSERT_OK(batch.Merge(Slice("foo"), Slice("bar"))); - ASSERT_OK(batch.Iterate(&handler)); - ASSERT_EQ( - "Put(k1, v1)" - "Put(k2, v2)" - "LogData(blob1)" - "Delete(k1)" - "SingleDelete(k2)", - handler.seen); -} - -TEST_F(WriteBatchTest, PutGatherSlices) { - WriteBatch batch; - ASSERT_OK(batch.Put(Slice("foo"), Slice("bar"))); - - { - // Try a write where the key is one slice but the value is two - Slice key_slice("baz"); - Slice value_slices[2] = {Slice("header"), Slice("payload")}; - ASSERT_OK( - batch.Put(SliceParts(&key_slice, 1), SliceParts(value_slices, 2))); - } - - { - // One where the key is composite but the value is a single slice - Slice key_slices[3] = {Slice("key"), Slice("part2"), Slice("part3")}; - Slice value_slice("value"); - ASSERT_OK( - batch.Put(SliceParts(key_slices, 3), SliceParts(&value_slice, 1))); - } - - WriteBatchInternal::SetSequence(&batch, 100); - ASSERT_EQ( - "Put(baz, headerpayload)@101" - "Put(foo, bar)@100" - "Put(keypart2part3, value)@102", - PrintContents(&batch)); - ASSERT_EQ(3u, batch.Count()); -} - -namespace { -class ColumnFamilyHandleImplDummy : public ColumnFamilyHandleImpl { - public: - explicit ColumnFamilyHandleImplDummy(int id) - : ColumnFamilyHandleImpl(nullptr, nullptr, nullptr), id_(id) {} - explicit ColumnFamilyHandleImplDummy(int id, const Comparator* ucmp) - : ColumnFamilyHandleImpl(nullptr, nullptr, nullptr), - id_(id), - ucmp_(ucmp) {} - uint32_t GetID() const override { return id_; } - const Comparator* GetComparator() const override { return ucmp_; } - - private: - uint32_t id_; - const Comparator* const ucmp_ = BytewiseComparator(); -}; -} // anonymous namespace - -TEST_F(WriteBatchTest, ColumnFamiliesBatchTest) { - WriteBatch batch; - ColumnFamilyHandleImplDummy zero(0), two(2), three(3), eight(8); - ASSERT_OK(batch.Put(&zero, Slice("foo"), Slice("bar"))); - ASSERT_OK(batch.Put(&two, Slice("twofoo"), Slice("bar2"))); - ASSERT_OK(batch.Put(&eight, Slice("eightfoo"), Slice("bar8"))); - ASSERT_OK(batch.Delete(&eight, Slice("eightfoo"))); - ASSERT_OK(batch.SingleDelete(&two, Slice("twofoo"))); - ASSERT_OK(batch.DeleteRange(&two, Slice("3foo"), Slice("4foo"))); - ASSERT_OK(batch.Merge(&three, Slice("threethree"), Slice("3three"))); - ASSERT_OK(batch.Put(&zero, Slice("foo"), Slice("bar"))); - ASSERT_OK(batch.Merge(Slice("omom"), Slice("nom"))); - - TestHandler handler; - ASSERT_OK(batch.Iterate(&handler)); - ASSERT_EQ( - "Put(foo, bar)" - "PutCF(2, twofoo, bar2)" - "PutCF(8, eightfoo, bar8)" - "DeleteCF(8, eightfoo)" - "SingleDeleteCF(2, twofoo)" - "DeleteRangeCF(2, 3foo, 4foo)" - "MergeCF(3, threethree, 3three)" - "Put(foo, bar)" - "Merge(omom, nom)", - handler.seen); -} - -TEST_F(WriteBatchTest, ColumnFamiliesBatchWithIndexTest) { - WriteBatchWithIndex batch; - ColumnFamilyHandleImplDummy zero(0), two(2), three(3), eight(8); - ASSERT_OK(batch.Put(&zero, Slice("foo"), Slice("bar"))); - ASSERT_OK(batch.Put(&two, Slice("twofoo"), Slice("bar2"))); - ASSERT_OK(batch.Put(&eight, Slice("eightfoo"), Slice("bar8"))); - ASSERT_OK(batch.Delete(&eight, Slice("eightfoo"))); - ASSERT_OK(batch.SingleDelete(&two, Slice("twofoo"))); - ASSERT_OK(batch.Merge(&three, Slice("threethree"), Slice("3three"))); - ASSERT_OK(batch.Put(&zero, Slice("foo"), Slice("bar"))); - ASSERT_OK(batch.Merge(Slice("omom"), Slice("nom"))); - - std::unique_ptr iter; - - iter.reset(batch.NewIterator(&eight)); - iter->Seek("eightfoo"); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(WriteType::kPutRecord, iter->Entry().type); - ASSERT_EQ("eightfoo", iter->Entry().key.ToString()); - ASSERT_EQ("bar8", iter->Entry().value.ToString()); - - iter->Next(); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(WriteType::kDeleteRecord, iter->Entry().type); - ASSERT_EQ("eightfoo", iter->Entry().key.ToString()); - - iter->Next(); - ASSERT_OK(iter->status()); - ASSERT_TRUE(!iter->Valid()); - - iter.reset(batch.NewIterator(&two)); - iter->Seek("twofoo"); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(WriteType::kPutRecord, iter->Entry().type); - ASSERT_EQ("twofoo", iter->Entry().key.ToString()); - ASSERT_EQ("bar2", iter->Entry().value.ToString()); - - iter->Next(); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(WriteType::kSingleDeleteRecord, iter->Entry().type); - ASSERT_EQ("twofoo", iter->Entry().key.ToString()); - - iter->Next(); - ASSERT_OK(iter->status()); - ASSERT_TRUE(!iter->Valid()); - - iter.reset(batch.NewIterator()); - iter->Seek("gggg"); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(WriteType::kMergeRecord, iter->Entry().type); - ASSERT_EQ("omom", iter->Entry().key.ToString()); - ASSERT_EQ("nom", iter->Entry().value.ToString()); - - iter->Next(); - ASSERT_OK(iter->status()); - ASSERT_TRUE(!iter->Valid()); - - iter.reset(batch.NewIterator(&zero)); - iter->Seek("foo"); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(WriteType::kPutRecord, iter->Entry().type); - ASSERT_EQ("foo", iter->Entry().key.ToString()); - ASSERT_EQ("bar", iter->Entry().value.ToString()); - - iter->Next(); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(WriteType::kPutRecord, iter->Entry().type); - ASSERT_EQ("foo", iter->Entry().key.ToString()); - ASSERT_EQ("bar", iter->Entry().value.ToString()); - - iter->Next(); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(WriteType::kMergeRecord, iter->Entry().type); - ASSERT_EQ("omom", iter->Entry().key.ToString()); - ASSERT_EQ("nom", iter->Entry().value.ToString()); - - iter->Next(); - ASSERT_OK(iter->status()); - ASSERT_TRUE(!iter->Valid()); - - TestHandler handler; - ASSERT_OK(batch.GetWriteBatch()->Iterate(&handler)); - ASSERT_EQ( - "Put(foo, bar)" - "PutCF(2, twofoo, bar2)" - "PutCF(8, eightfoo, bar8)" - "DeleteCF(8, eightfoo)" - "SingleDeleteCF(2, twofoo)" - "MergeCF(3, threethree, 3three)" - "Put(foo, bar)" - "Merge(omom, nom)", - handler.seen); -} - -TEST_F(WriteBatchTest, SavePointTest) { - Status s; - WriteBatch batch; - batch.SetSavePoint(); - - ASSERT_OK(batch.Put("A", "a")); - ASSERT_OK(batch.Put("B", "b")); - batch.SetSavePoint(); - - ASSERT_OK(batch.Put("C", "c")); - ASSERT_OK(batch.Delete("A")); - batch.SetSavePoint(); - batch.SetSavePoint(); - - ASSERT_OK(batch.RollbackToSavePoint()); - ASSERT_EQ( - "Delete(A)@3" - "Put(A, a)@0" - "Put(B, b)@1" - "Put(C, c)@2", - PrintContents(&batch)); - - ASSERT_OK(batch.RollbackToSavePoint()); - ASSERT_OK(batch.RollbackToSavePoint()); - ASSERT_EQ( - "Put(A, a)@0" - "Put(B, b)@1", - PrintContents(&batch)); - - ASSERT_OK(batch.Delete("A")); - ASSERT_OK(batch.Put("B", "bb")); - - ASSERT_OK(batch.RollbackToSavePoint()); - ASSERT_EQ("", PrintContents(&batch)); - - s = batch.RollbackToSavePoint(); - ASSERT_TRUE(s.IsNotFound()); - ASSERT_EQ("", PrintContents(&batch)); - - ASSERT_OK(batch.Put("D", "d")); - ASSERT_OK(batch.Delete("A")); - - batch.SetSavePoint(); - - ASSERT_OK(batch.Put("A", "aaa")); - - ASSERT_OK(batch.RollbackToSavePoint()); - ASSERT_EQ( - "Delete(A)@1" - "Put(D, d)@0", - PrintContents(&batch)); - - batch.SetSavePoint(); - - ASSERT_OK(batch.Put("D", "d")); - ASSERT_OK(batch.Delete("A")); - - ASSERT_OK(batch.RollbackToSavePoint()); - ASSERT_EQ( - "Delete(A)@1" - "Put(D, d)@0", - PrintContents(&batch)); - - s = batch.RollbackToSavePoint(); - ASSERT_TRUE(s.IsNotFound()); - ASSERT_EQ( - "Delete(A)@1" - "Put(D, d)@0", - PrintContents(&batch)); - - WriteBatch batch2; - - s = batch2.RollbackToSavePoint(); - ASSERT_TRUE(s.IsNotFound()); - ASSERT_EQ("", PrintContents(&batch2)); - - ASSERT_OK(batch2.Delete("A")); - batch2.SetSavePoint(); - - s = batch2.RollbackToSavePoint(); - ASSERT_OK(s); - ASSERT_EQ("Delete(A)@0", PrintContents(&batch2)); - - batch2.Clear(); - ASSERT_EQ("", PrintContents(&batch2)); - - batch2.SetSavePoint(); - - ASSERT_OK(batch2.Delete("B")); - ASSERT_EQ("Delete(B)@0", PrintContents(&batch2)); - - batch2.SetSavePoint(); - s = batch2.RollbackToSavePoint(); - ASSERT_OK(s); - ASSERT_EQ("Delete(B)@0", PrintContents(&batch2)); - - s = batch2.RollbackToSavePoint(); - ASSERT_OK(s); - ASSERT_EQ("", PrintContents(&batch2)); - - s = batch2.RollbackToSavePoint(); - ASSERT_TRUE(s.IsNotFound()); - ASSERT_EQ("", PrintContents(&batch2)); - - WriteBatch batch3; - - s = batch3.PopSavePoint(); - ASSERT_TRUE(s.IsNotFound()); - ASSERT_EQ("", PrintContents(&batch3)); - - batch3.SetSavePoint(); - ASSERT_OK(batch3.Delete("A")); - - s = batch3.PopSavePoint(); - ASSERT_OK(s); - ASSERT_EQ("Delete(A)@0", PrintContents(&batch3)); -} - -TEST_F(WriteBatchTest, MemoryLimitTest) { - Status s; - // The header size is 12 bytes. The two Puts take 8 bytes which gives total - // of 12 + 8 * 2 = 28 bytes. - WriteBatch batch(0, 28); - - ASSERT_OK(batch.Put("a", "....")); - ASSERT_OK(batch.Put("b", "....")); - s = batch.Put("c", "...."); - ASSERT_TRUE(s.IsMemoryLimit()); -} - -namespace { -class TimestampChecker : public WriteBatch::Handler { - public: - explicit TimestampChecker( - std::unordered_map cf_to_ucmps, Slice ts) - : cf_to_ucmps_(std::move(cf_to_ucmps)), timestamp_(std::move(ts)) {} - Status PutCF(uint32_t cf, const Slice& key, const Slice& /*value*/) override { - auto cf_iter = cf_to_ucmps_.find(cf); - if (cf_iter == cf_to_ucmps_.end()) { - return Status::Corruption(); - } - const Comparator* const ucmp = cf_iter->second; - assert(ucmp); - size_t ts_sz = ucmp->timestamp_size(); - if (ts_sz == 0) { - return Status::OK(); - } - if (key.size() < ts_sz) { - return Status::Corruption(); - } - Slice ts = ExtractTimestampFromUserKey(key, ts_sz); - if (ts.compare(timestamp_) != 0) { - return Status::Corruption(); - } - return Status::OK(); - } - - private: - std::unordered_map cf_to_ucmps_; - Slice timestamp_; -}; - -Status CheckTimestampsInWriteBatch( - WriteBatch& wb, Slice timestamp, - std::unordered_map cf_to_ucmps) { - TimestampChecker ts_checker(cf_to_ucmps, timestamp); - return wb.Iterate(&ts_checker); -} -} // anonymous namespace - -TEST_F(WriteBatchTest, SanityChecks) { - ColumnFamilyHandleImplDummy cf0(0, - test::BytewiseComparatorWithU64TsWrapper()); - ColumnFamilyHandleImplDummy cf4(4); - - WriteBatch wb(0, 0, 0, /*default_cf_ts_sz=*/sizeof(uint64_t)); - - // Sanity checks for the new WriteBatch APIs with extra 'ts' arg. - ASSERT_TRUE(wb.Put(nullptr, "key", "ts", "value").IsInvalidArgument()); - ASSERT_TRUE(wb.Delete(nullptr, "key", "ts").IsInvalidArgument()); - ASSERT_TRUE(wb.SingleDelete(nullptr, "key", "ts").IsInvalidArgument()); - ASSERT_TRUE(wb.Merge(nullptr, "key", "ts", "value").IsInvalidArgument()); - ASSERT_TRUE(wb.DeleteRange(nullptr, "begin_key", "end_key", "ts") - .IsInvalidArgument()); - - ASSERT_TRUE(wb.Put(&cf4, "key", "ts", "value").IsInvalidArgument()); - ASSERT_TRUE(wb.Delete(&cf4, "key", "ts").IsInvalidArgument()); - ASSERT_TRUE(wb.SingleDelete(&cf4, "key", "ts").IsInvalidArgument()); - ASSERT_TRUE(wb.Merge(&cf4, "key", "ts", "value").IsInvalidArgument()); - ASSERT_TRUE( - wb.DeleteRange(&cf4, "begin_key", "end_key", "ts").IsInvalidArgument()); - - constexpr size_t wrong_ts_sz = 1 + sizeof(uint64_t); - std::string ts(wrong_ts_sz, '\0'); - - ASSERT_TRUE(wb.Put(&cf0, "key", ts, "value").IsInvalidArgument()); - ASSERT_TRUE(wb.Delete(&cf0, "key", ts).IsInvalidArgument()); - ASSERT_TRUE(wb.SingleDelete(&cf0, "key", ts).IsInvalidArgument()); - ASSERT_TRUE(wb.Merge(&cf0, "key", ts, "value").IsInvalidArgument()); - ASSERT_TRUE( - wb.DeleteRange(&cf0, "begin_key", "end_key", ts).IsInvalidArgument()); - - // Sanity checks for the new WriteBatch APIs without extra 'ts' arg. - WriteBatch wb1(0, 0, 0, wrong_ts_sz); - ASSERT_TRUE(wb1.Put(&cf0, "key", "value").IsInvalidArgument()); - ASSERT_TRUE(wb1.Delete(&cf0, "key").IsInvalidArgument()); - ASSERT_TRUE(wb1.SingleDelete(&cf0, "key").IsInvalidArgument()); - ASSERT_TRUE(wb1.Merge(&cf0, "key", "value").IsInvalidArgument()); - ASSERT_TRUE( - wb1.DeleteRange(&cf0, "begin_key", "end_key").IsInvalidArgument()); -} - -TEST_F(WriteBatchTest, UpdateTimestamps) { - // We assume the last eight bytes of each key is reserved for timestamps. - // Therefore, we must make sure each key is longer than eight bytes. - constexpr size_t key_size = 16; - constexpr size_t num_of_keys = 10; - std::vector key_strs(num_of_keys, std::string(key_size, '\0')); - - ColumnFamilyHandleImplDummy cf0(0); - ColumnFamilyHandleImplDummy cf4(4, - test::BytewiseComparatorWithU64TsWrapper()); - ColumnFamilyHandleImplDummy cf5(5, - test::BytewiseComparatorWithU64TsWrapper()); - - const std::unordered_map cf_to_ucmps = { - {0, cf0.GetComparator()}, - {4, cf4.GetComparator()}, - {5, cf5.GetComparator()}}; - - static constexpr size_t timestamp_size = sizeof(uint64_t); - - { - WriteBatch wb1, wb2, wb3, wb4, wb5, wb6, wb7; - ASSERT_OK(wb1.Put(&cf0, "key", "value")); - ASSERT_FALSE(WriteBatchInternal::HasKeyWithTimestamp(wb1)); - ASSERT_OK(wb2.Put(&cf4, "key", "value")); - ASSERT_TRUE(WriteBatchInternal::HasKeyWithTimestamp(wb2)); - ASSERT_OK(wb3.Put(&cf4, "key", /*ts=*/std::string(timestamp_size, '\xfe'), - "value")); - ASSERT_TRUE(WriteBatchInternal::HasKeyWithTimestamp(wb3)); - ASSERT_OK(wb4.Delete(&cf4, "key", - /*ts=*/std::string(timestamp_size, '\xfe'))); - ASSERT_TRUE(WriteBatchInternal::HasKeyWithTimestamp(wb4)); - ASSERT_OK(wb5.Delete(&cf4, "key")); - ASSERT_TRUE(WriteBatchInternal::HasKeyWithTimestamp(wb5)); - ASSERT_OK(wb6.SingleDelete(&cf4, "key")); - ASSERT_TRUE(WriteBatchInternal::HasKeyWithTimestamp(wb6)); - ASSERT_OK(wb7.SingleDelete(&cf4, "key", - /*ts=*/std::string(timestamp_size, '\xfe'))); - ASSERT_TRUE(WriteBatchInternal::HasKeyWithTimestamp(wb7)); - } - - WriteBatch batch; - // Write to the batch. We will assign timestamps later. - for (const auto& key_str : key_strs) { - ASSERT_OK(batch.Put(&cf0, key_str, "value")); - ASSERT_OK(batch.Put(&cf4, key_str, "value")); - ASSERT_OK(batch.Put(&cf5, key_str, "value")); - } - - const auto checker1 = [](uint32_t cf) { - if (cf == 4 || cf == 5) { - return timestamp_size; - } else if (cf == 0) { - return static_cast(0); - } else { - return std::numeric_limits::max(); - } - }; - ASSERT_OK( - batch.UpdateTimestamps(std::string(timestamp_size, '\xfe'), checker1)); - ASSERT_OK(CheckTimestampsInWriteBatch( - batch, std::string(timestamp_size, '\xfe'), cf_to_ucmps)); - - // We use indexed_cf_to_ucmps, non_indexed_cfs_with_ts and timestamp_size to - // simulate the case in which a transaction enables indexing for some writes - // while disables indexing for other writes. A transaction uses a - // WriteBatchWithIndex object to buffer writes (we consider Write-committed - // policy only). If indexing is enabled, then writes go through - // WriteBatchWithIndex API populating a WBWI internal data structure, i.e. a - // mapping from cf to user comparators. If indexing is disabled, a transaction - // writes directly to the underlying raw WriteBatch. We will need to track the - // comparator information for the column families to which un-indexed writes - // are performed. When calling UpdateTimestamp API of WriteBatch, we need - // indexed_cf_to_ucmps, non_indexed_cfs_with_ts, and timestamp_size to perform - // checking. - std::unordered_map indexed_cf_to_ucmps = { - {0, cf0.GetComparator()}, {4, cf4.GetComparator()}}; - std::unordered_set non_indexed_cfs_with_ts = {cf5.GetID()}; - const auto checker2 = [&indexed_cf_to_ucmps, - &non_indexed_cfs_with_ts](uint32_t cf) { - if (non_indexed_cfs_with_ts.count(cf) > 0) { - return timestamp_size; - } - auto cf_iter = indexed_cf_to_ucmps.find(cf); - if (cf_iter == indexed_cf_to_ucmps.end()) { - assert(false); - return std::numeric_limits::max(); - } - const Comparator* const ucmp = cf_iter->second; - assert(ucmp); - return ucmp->timestamp_size(); - }; - ASSERT_OK( - batch.UpdateTimestamps(std::string(timestamp_size, '\xef'), checker2)); - ASSERT_OK(CheckTimestampsInWriteBatch( - batch, std::string(timestamp_size, '\xef'), cf_to_ucmps)); -} - -TEST_F(WriteBatchTest, CommitWithTimestamp) { - WriteBatch wb; - const std::string txn_name = "xid1"; - std::string ts; - constexpr uint64_t commit_ts = 23; - PutFixed64(&ts, commit_ts); - ASSERT_OK(WriteBatchInternal::MarkCommitWithTimestamp(&wb, txn_name, ts)); - TestHandler handler; - ASSERT_OK(wb.Iterate(&handler)); - ASSERT_EQ("MarkCommitWithTimestamp(" + txn_name + ", " + - Slice(ts).ToString(true) + ")", - handler.seen); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db/write_callback_test.cc b/db/write_callback_test.cc deleted file mode 100644 index 1be8593f1..000000000 --- a/db/write_callback_test.cc +++ /dev/null @@ -1,454 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - - -#include "db/write_callback.h" - -#include -#include -#include -#include -#include - -#include "db/db_impl/db_impl.h" -#include "port/port.h" -#include "rocksdb/db.h" -#include "rocksdb/write_batch.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "util/random.h" - -using std::string; - -namespace ROCKSDB_NAMESPACE { - -class WriteCallbackTest : public testing::Test { - public: - string dbname; - - WriteCallbackTest() { - dbname = test::PerThreadDBPath("write_callback_testdb"); - } -}; - -class WriteCallbackTestWriteCallback1 : public WriteCallback { - public: - bool was_called = false; - - Status Callback(DB* db) override { - was_called = true; - - // Make sure db is a DBImpl - DBImpl* db_impl = dynamic_cast(db); - if (db_impl == nullptr) { - return Status::InvalidArgument(""); - } - - return Status::OK(); - } - - bool AllowWriteBatching() override { return true; } -}; - -class WriteCallbackTestWriteCallback2 : public WriteCallback { - public: - Status Callback(DB* /*db*/) override { return Status::Busy(); } - bool AllowWriteBatching() override { return true; } -}; - -class MockWriteCallback : public WriteCallback { - public: - bool should_fail_ = false; - bool allow_batching_ = false; - std::atomic was_called_{false}; - - MockWriteCallback() {} - - MockWriteCallback(const MockWriteCallback& other) { - should_fail_ = other.should_fail_; - allow_batching_ = other.allow_batching_; - was_called_.store(other.was_called_.load()); - } - - Status Callback(DB* /*db*/) override { - was_called_.store(true); - if (should_fail_) { - return Status::Busy(); - } else { - return Status::OK(); - } - } - - bool AllowWriteBatching() override { return allow_batching_; } -}; - -#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -class WriteCallbackPTest - : public WriteCallbackTest, - public ::testing::WithParamInterface< - std::tuple> { - public: - WriteCallbackPTest() { - std::tie(unordered_write_, seq_per_batch_, two_queues_, allow_parallel_, - allow_batching_, enable_WAL_, enable_pipelined_write_) = - GetParam(); - } - - protected: - bool unordered_write_; - bool seq_per_batch_; - bool two_queues_; - bool allow_parallel_; - bool allow_batching_; - bool enable_WAL_; - bool enable_pipelined_write_; -}; - -TEST_P(WriteCallbackPTest, WriteWithCallbackTest) { - struct WriteOP { - WriteOP(bool should_fail = false) { callback_.should_fail_ = should_fail; } - - void Put(const string& key, const string& val) { - kvs_.push_back(std::make_pair(key, val)); - ASSERT_OK(write_batch_.Put(key, val)); - } - - void Clear() { - kvs_.clear(); - write_batch_.Clear(); - callback_.was_called_.store(false); - } - - MockWriteCallback callback_; - WriteBatch write_batch_; - std::vector> kvs_; - }; - - // In each scenario we'll launch multiple threads to write. - // The size of each array equals to number of threads, and - // each boolean in it denote whether callback of corresponding - // thread should succeed or fail. - std::vector> write_scenarios = { - {true}, - {false}, - {false, false}, - {true, true}, - {true, false}, - {false, true}, - {false, false, false}, - {true, true, true}, - {false, true, false}, - {true, false, true}, - {true, false, false, false, false}, - {false, false, false, false, true}, - {false, false, true, false, true}, - }; - - for (auto& write_group : write_scenarios) { - Options options; - options.create_if_missing = true; - options.unordered_write = unordered_write_; - options.allow_concurrent_memtable_write = allow_parallel_; - options.enable_pipelined_write = enable_pipelined_write_; - options.two_write_queues = two_queues_; - // Skip unsupported combinations - if (options.enable_pipelined_write && seq_per_batch_) { - continue; - } - if (options.enable_pipelined_write && options.two_write_queues) { - continue; - } - if (options.unordered_write && !options.allow_concurrent_memtable_write) { - continue; - } - if (options.unordered_write && options.enable_pipelined_write) { - continue; - } - - ReadOptions read_options; - DB* db; - DBImpl* db_impl; - - ASSERT_OK(DestroyDB(dbname, options)); - - DBOptions db_options(options); - ColumnFamilyOptions cf_options(options); - std::vector column_families; - column_families.push_back( - ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options)); - std::vector handles; - auto open_s = DBImpl::Open(db_options, dbname, column_families, &handles, - &db, seq_per_batch_, true /* batch_per_txn */); - ASSERT_OK(open_s); - assert(handles.size() == 1); - delete handles[0]; - - db_impl = dynamic_cast(db); - ASSERT_TRUE(db_impl); - - // Writers that have called JoinBatchGroup. - std::atomic threads_joining(0); - // Writers that have linked to the queue - std::atomic threads_linked(0); - // Writers that pass WriteThread::JoinBatchGroup:Wait sync-point. - std::atomic threads_verified(0); - - std::atomic seq(db_impl->GetLatestSequenceNumber()); - ASSERT_EQ(db_impl->GetLatestSequenceNumber(), 0); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WriteThread::JoinBatchGroup:Start", [&](void*) { - uint64_t cur_threads_joining = threads_joining.fetch_add(1); - // Wait for the last joined writer to link to the queue. - // In this way the writers link to the queue one by one. - // This allows us to confidently detect the first writer - // who increases threads_linked as the leader. - while (threads_linked.load() < cur_threads_joining) { - } - }); - - // Verification once writers call JoinBatchGroup. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WriteThread::JoinBatchGroup:Wait", [&](void* arg) { - uint64_t cur_threads_linked = threads_linked.fetch_add(1); - bool is_leader = false; - bool is_last = false; - - // who am i - is_leader = (cur_threads_linked == 0); - is_last = (cur_threads_linked == write_group.size() - 1); - - // check my state - auto* writer = reinterpret_cast(arg); - - if (is_leader) { - ASSERT_TRUE(writer->state == - WriteThread::State::STATE_GROUP_LEADER); - } else { - ASSERT_TRUE(writer->state == WriteThread::State::STATE_INIT); - } - - // (meta test) the first WriteOP should indeed be the first - // and the last should be the last (all others can be out of - // order) - if (is_leader) { - ASSERT_TRUE(writer->callback->Callback(nullptr).ok() == - !write_group.front().callback_.should_fail_); - } else if (is_last) { - ASSERT_TRUE(writer->callback->Callback(nullptr).ok() == - !write_group.back().callback_.should_fail_); - } - - threads_verified.fetch_add(1); - // Wait here until all verification in this sync-point - // callback finish for all writers. - while (threads_verified.load() < write_group.size()) { - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "WriteThread::JoinBatchGroup:DoneWaiting", [&](void* arg) { - // check my state - auto* writer = reinterpret_cast(arg); - - if (!allow_batching_) { - // no batching so everyone should be a leader - ASSERT_TRUE(writer->state == - WriteThread::State::STATE_GROUP_LEADER); - } else if (!allow_parallel_) { - ASSERT_TRUE(writer->state == WriteThread::State::STATE_COMPLETED || - (enable_pipelined_write_ && - writer->state == - WriteThread::State::STATE_MEMTABLE_WRITER_LEADER)); - } - }); - - std::atomic thread_num(0); - std::atomic dummy_key(0); - - // Each write thread create a random write batch and write to DB - // with a write callback. - std::function write_with_callback_func = [&]() { - uint32_t i = thread_num.fetch_add(1); - Random rnd(i); - - // leaders gotta lead - while (i > 0 && threads_verified.load() < 1) { - } - - // loser has to lose - while (i == write_group.size() - 1 && - threads_verified.load() < write_group.size() - 1) { - } - - auto& write_op = write_group.at(i); - write_op.Clear(); - write_op.callback_.allow_batching_ = allow_batching_; - - // insert some keys - for (uint32_t j = 0; j < rnd.Next() % 50; j++) { - // grab unique key - char my_key = dummy_key.fetch_add(1); - - string skey(5, my_key); - string sval(10, my_key); - write_op.Put(skey, sval); - - if (!write_op.callback_.should_fail_ && !seq_per_batch_) { - seq.fetch_add(1); - } - } - if (!write_op.callback_.should_fail_ && seq_per_batch_) { - seq.fetch_add(1); - } - - WriteOptions woptions; - woptions.disableWAL = !enable_WAL_; - woptions.sync = enable_WAL_; - if (woptions.protection_bytes_per_key > 0) { - ASSERT_OK(WriteBatchInternal::UpdateProtectionInfo( - &write_op.write_batch_, woptions.protection_bytes_per_key)); - } - Status s; - if (seq_per_batch_) { - class PublishSeqCallback : public PreReleaseCallback { - public: - PublishSeqCallback(DBImpl* db_impl_in) : db_impl_(db_impl_in) {} - Status Callback(SequenceNumber last_seq, bool /*not used*/, uint64_t, - size_t /*index*/, size_t /*total*/) override { - db_impl_->SetLastPublishedSequence(last_seq); - return Status::OK(); - } - DBImpl* db_impl_; - } publish_seq_callback(db_impl); - // seq_per_batch_ requires a natural batch separator or Noop - ASSERT_OK(WriteBatchInternal::InsertNoop(&write_op.write_batch_)); - const size_t ONE_BATCH = 1; - s = db_impl->WriteImpl(woptions, &write_op.write_batch_, - &write_op.callback_, nullptr, 0, false, nullptr, - ONE_BATCH, - two_queues_ ? &publish_seq_callback : nullptr); - } else { - s = db_impl->WriteWithCallback(woptions, &write_op.write_batch_, - &write_op.callback_); - } - - if (write_op.callback_.should_fail_) { - ASSERT_TRUE(s.IsBusy()); - } else { - ASSERT_OK(s); - } - }; - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // do all the writes - std::vector threads; - for (uint32_t i = 0; i < write_group.size(); i++) { - threads.emplace_back(write_with_callback_func); - } - for (auto& t : threads) { - t.join(); - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - // check for keys - string value; - for (auto& w : write_group) { - ASSERT_TRUE(w.callback_.was_called_.load()); - for (auto& kvp : w.kvs_) { - if (w.callback_.should_fail_) { - ASSERT_TRUE(db->Get(read_options, kvp.first, &value).IsNotFound()); - } else { - ASSERT_OK(db->Get(read_options, kvp.first, &value)); - ASSERT_EQ(value, kvp.second); - } - } - } - - ASSERT_EQ(seq.load(), db_impl->TEST_GetLastVisibleSequence()); - - delete db; - ASSERT_OK(DestroyDB(dbname, options)); - } -} - -INSTANTIATE_TEST_CASE_P(WriteCallbackPTest, WriteCallbackPTest, - ::testing::Combine(::testing::Bool(), ::testing::Bool(), - ::testing::Bool(), ::testing::Bool(), - ::testing::Bool(), ::testing::Bool(), - ::testing::Bool())); -#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) - -TEST_F(WriteCallbackTest, WriteCallBackTest) { - Options options; - WriteOptions write_options; - ReadOptions read_options; - string value; - DB* db; - DBImpl* db_impl; - - ASSERT_OK(DestroyDB(dbname, options)); - - options.create_if_missing = true; - Status s = DB::Open(options, dbname, &db); - ASSERT_OK(s); - - db_impl = dynamic_cast(db); - ASSERT_TRUE(db_impl); - - WriteBatch wb; - - ASSERT_OK(wb.Put("a", "value.a")); - ASSERT_OK(wb.Delete("x")); - - // Test a simple Write - s = db->Write(write_options, &wb); - ASSERT_OK(s); - - s = db->Get(read_options, "a", &value); - ASSERT_OK(s); - ASSERT_EQ("value.a", value); - - // Test WriteWithCallback - WriteCallbackTestWriteCallback1 callback1; - WriteBatch wb2; - - ASSERT_OK(wb2.Put("a", "value.a2")); - - s = db_impl->WriteWithCallback(write_options, &wb2, &callback1); - ASSERT_OK(s); - ASSERT_TRUE(callback1.was_called); - - s = db->Get(read_options, "a", &value); - ASSERT_OK(s); - ASSERT_EQ("value.a2", value); - - // Test WriteWithCallback for a callback that fails - WriteCallbackTestWriteCallback2 callback2; - WriteBatch wb3; - - ASSERT_OK(wb3.Put("a", "value.a3")); - - s = db_impl->WriteWithCallback(write_options, &wb3, &callback2); - ASSERT_NOK(s); - - s = db->Get(read_options, "a", &value); - ASSERT_OK(s); - ASSERT_EQ("value.a2", value); - - delete db; - ASSERT_OK(DestroyDB(dbname, options)); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/db/write_controller_test.cc b/db/write_controller_test.cc deleted file mode 100644 index b6321a3bc..000000000 --- a/db/write_controller_test.cc +++ /dev/null @@ -1,248 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -#include "db/write_controller.h" - -#include -#include - -#include "rocksdb/system_clock.h" -#include "test_util/testharness.h" - -namespace ROCKSDB_NAMESPACE { -namespace { -class TimeSetClock : public SystemClockWrapper { - public: - explicit TimeSetClock() : SystemClockWrapper(nullptr) {} - const char* Name() const override { return "TimeSetClock"; } - uint64_t now_micros_ = 6666; - uint64_t NowNanos() override { return now_micros_ * std::milli::den; } -}; -} // anonymous namespace -class WriteControllerTest : public testing::Test { - public: - WriteControllerTest() { clock_ = std::make_shared(); } - std::shared_ptr clock_; -}; - -// Make tests easier to read -#define MILLION *1000000u -#define MB MILLION -#define MBPS MILLION -#define SECS MILLION // in microseconds - -TEST_F(WriteControllerTest, BasicAPI) { - WriteController controller(40 MBPS); // also set max delayed rate - EXPECT_EQ(controller.delayed_write_rate(), 40 MBPS); - EXPECT_FALSE(controller.IsStopped()); - EXPECT_FALSE(controller.NeedsDelay()); - EXPECT_EQ(0, controller.GetDelay(clock_.get(), 100 MB)); - - // set, get - controller.set_delayed_write_rate(20 MBPS); - EXPECT_EQ(controller.delayed_write_rate(), 20 MBPS); - EXPECT_FALSE(controller.IsStopped()); - EXPECT_FALSE(controller.NeedsDelay()); - EXPECT_EQ(0, controller.GetDelay(clock_.get(), 100 MB)); - - { - // set with token, get - auto delay_token_0 = controller.GetDelayToken(10 MBPS); - EXPECT_EQ(controller.delayed_write_rate(), 10 MBPS); - EXPECT_FALSE(controller.IsStopped()); - EXPECT_TRUE(controller.NeedsDelay()); - // test with delay - EXPECT_EQ(2 SECS, controller.GetDelay(clock_.get(), 20 MB)); - clock_->now_micros_ += 2 SECS; // pay the "debt" - - auto delay_token_1 = controller.GetDelayToken(2 MBPS); - EXPECT_EQ(10 SECS, controller.GetDelay(clock_.get(), 20 MB)); - clock_->now_micros_ += 10 SECS; // pay the "debt" - - auto delay_token_2 = controller.GetDelayToken(1 MBPS); - EXPECT_EQ(20 SECS, controller.GetDelay(clock_.get(), 20 MB)); - clock_->now_micros_ += 20 SECS; // pay the "debt" - - auto delay_token_3 = controller.GetDelayToken(20 MBPS); - EXPECT_EQ(1 SECS, controller.GetDelay(clock_.get(), 20 MB)); - clock_->now_micros_ += 1 SECS; // pay the "debt" - - // 60M is more than the max rate of 40M. Max rate will be used. - EXPECT_EQ(controller.delayed_write_rate(), 20 MBPS); - auto delay_token_4 = - controller.GetDelayToken(controller.delayed_write_rate() * 3); - EXPECT_EQ(controller.delayed_write_rate(), 40 MBPS); - EXPECT_EQ(static_cast(0.5 SECS), - controller.GetDelay(clock_.get(), 20 MB)); - - EXPECT_FALSE(controller.IsStopped()); - EXPECT_TRUE(controller.NeedsDelay()); - - // Test stop tokens - { - auto stop_token_1 = controller.GetStopToken(); - EXPECT_TRUE(controller.IsStopped()); - EXPECT_EQ(0, controller.GetDelay(clock_.get(), 100 MB)); - { - auto stop_token_2 = controller.GetStopToken(); - EXPECT_TRUE(controller.IsStopped()); - EXPECT_EQ(0, controller.GetDelay(clock_.get(), 100 MB)); - } - EXPECT_TRUE(controller.IsStopped()); - EXPECT_EQ(0, controller.GetDelay(clock_.get(), 100 MB)); - } - // Stop tokens released - EXPECT_FALSE(controller.IsStopped()); - EXPECT_TRUE(controller.NeedsDelay()); - EXPECT_EQ(controller.delayed_write_rate(), 40 MBPS); - // pay the previous "debt" - clock_->now_micros_ += static_cast(0.5 SECS); - EXPECT_EQ(1 SECS, controller.GetDelay(clock_.get(), 40 MB)); - } - - // Delay tokens released - EXPECT_FALSE(controller.NeedsDelay()); -} - -TEST_F(WriteControllerTest, StartFilled) { - WriteController controller(10 MBPS); - - // Attempt to write two things that combined would be allowed within - // a single refill interval - auto delay_token_0 = - controller.GetDelayToken(controller.delayed_write_rate()); - - // Verify no delay because write rate has not been exceeded within - // refill interval. - EXPECT_EQ(0U, controller.GetDelay(clock_.get(), 2000u /*bytes*/)); - EXPECT_EQ(0U, controller.GetDelay(clock_.get(), 2000u /*bytes*/)); - - // Allow refill (kMicrosPerRefill) - clock_->now_micros_ += 1000; - - // Again - EXPECT_EQ(0U, controller.GetDelay(clock_.get(), 2000u /*bytes*/)); - EXPECT_EQ(0U, controller.GetDelay(clock_.get(), 2000u /*bytes*/)); - - // Control: something bigger that would exceed write rate within interval - uint64_t delay = controller.GetDelay(clock_.get(), 10 MB); - EXPECT_GT(1.0 * delay, 0.999 SECS); - EXPECT_LT(1.0 * delay, 1.001 SECS); -} - -TEST_F(WriteControllerTest, DebtAccumulation) { - WriteController controller(10 MBPS); - - std::array, 10> tokens; - - // Accumulate a time delay debt with no passage of time, like many column - // families delaying writes simultaneously. (Old versions of WriteController - // would reset the debt on every GetDelayToken.) - uint64_t debt = 0; - for (unsigned i = 0; i < tokens.size(); ++i) { - tokens[i] = controller.GetDelayToken((i + 1u) MBPS); - uint64_t delay = controller.GetDelay(clock_.get(), 63 MB); - ASSERT_GT(delay, debt); - uint64_t incremental = delay - debt; - ASSERT_EQ(incremental, (63 SECS) / (i + 1u)); - debt += incremental; - } - - // Pay down the debt - clock_->now_micros_ += debt; - debt = 0; - - // Now accumulate debt with some passage of time. - for (unsigned i = 0; i < tokens.size(); ++i) { - // Debt is accumulated in time, not in bytes, so this new write - // limit is not applied to prior requested delays, even it they are - // in progress. - tokens[i] = controller.GetDelayToken((i + 1u) MBPS); - uint64_t delay = controller.GetDelay(clock_.get(), 63 MB); - ASSERT_GT(delay, debt); - uint64_t incremental = delay - debt; - ASSERT_EQ(incremental, (63 SECS) / (i + 1u)); - debt += incremental; - uint64_t credit = debt / 2; - clock_->now_micros_ += credit; - debt -= credit; - } - - // Pay down the debt - clock_->now_micros_ += debt; - debt = 0; // consistent state - (void)debt; // appease clang-analyze - - // Verify paid down - EXPECT_EQ(0U, controller.GetDelay(clock_.get(), 100u /*small bytes*/)); - - // Accumulate another debt, without accounting, and releasing tokens - for (unsigned i = 0; i < tokens.size(); ++i) { - // Big and small are delayed - ASSERT_LT(0U, controller.GetDelay(clock_.get(), 63 MB)); - ASSERT_LT(0U, controller.GetDelay(clock_.get(), 100u /*small bytes*/)); - tokens[i].reset(); - } - // All tokens released. - // Verify that releasing all tokens pays down debt, even with no time passage. - tokens[0] = controller.GetDelayToken(1 MBPS); - ASSERT_EQ(0U, controller.GetDelay(clock_.get(), 100u /*small bytes*/)); -} - -// This may or may not be a "good" feature, but it's an old feature -TEST_F(WriteControllerTest, CreditAccumulation) { - WriteController controller(10 MBPS); - - std::array, 10> tokens; - - // Ensure started - tokens[0] = controller.GetDelayToken(1 MBPS); - ASSERT_EQ(10 SECS, controller.GetDelay(clock_.get(), 10 MB)); - clock_->now_micros_ += 10 SECS; - - // Accumulate a credit - uint64_t credit = 1000 SECS /* see below: * 1 MB / 1 SEC */; - clock_->now_micros_ += credit; - - // Spend some credit (burst of I/O) - for (unsigned i = 0; i < tokens.size(); ++i) { - tokens[i] = controller.GetDelayToken((i + 1u) MBPS); - ASSERT_EQ(0U, controller.GetDelay(clock_.get(), 63 MB)); - // In WriteController, credit is accumulated in bytes, not in time. - // After an "unnecessary" delay, all of our time credit will be - // translated to bytes on the next operation, in this case with - // setting 1 MBPS. So regardless of the rate at delay time, we just - // account for the bytes. - credit -= 63 MB; - } - // Spend remaining credit - tokens[0] = controller.GetDelayToken(1 MBPS); - ASSERT_EQ(0U, controller.GetDelay(clock_.get(), credit)); - // Verify - ASSERT_EQ(10 SECS, controller.GetDelay(clock_.get(), 10 MB)); - clock_->now_micros_ += 10 SECS; - - // Accumulate a credit, no accounting - clock_->now_micros_ += 1000 SECS; - - // Spend a small amount, releasing tokens - for (unsigned i = 0; i < tokens.size(); ++i) { - ASSERT_EQ(0U, controller.GetDelay(clock_.get(), 3 MB)); - tokens[i].reset(); - } - - // All tokens released. - // Verify credit is wiped away on new delay. - tokens[0] = controller.GetDelayToken(1 MBPS); - ASSERT_EQ(10 SECS, controller.GetDelay(clock_.get(), 10 MB)); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/db_stress_tool/CMakeLists.txt b/db_stress_tool/CMakeLists.txt deleted file mode 100644 index 96d70dd0e..000000000 --- a/db_stress_tool/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -add_executable(db_stress${ARTIFACT_SUFFIX} - batched_ops_stress.cc - cf_consistency_stress.cc - db_stress.cc - db_stress_common.cc - db_stress_driver.cc - db_stress_gflags.cc - db_stress_listener.cc - db_stress_shared_state.cc - db_stress_stat.cc - db_stress_test_base.cc - db_stress_tool.cc - expected_state.cc - multi_ops_txns_stress.cc - no_batched_ops_stress.cc) -target_link_libraries(db_stress${ARTIFACT_SUFFIX} ${ROCKSDB_LIB} ${THIRDPARTY_LIBS}) -list(APPEND tool_deps db_stress) diff --git a/db_stress_tool/batched_ops_stress.cc b/db_stress_tool/batched_ops_stress.cc deleted file mode 100644 index 62a8290e9..000000000 --- a/db_stress_tool/batched_ops_stress.cc +++ /dev/null @@ -1,501 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifdef GFLAGS -#include "db_stress_tool/db_stress_common.h" - -namespace ROCKSDB_NAMESPACE { -class BatchedOpsStressTest : public StressTest { - public: - BatchedOpsStressTest() {} - virtual ~BatchedOpsStressTest() {} - - bool IsStateTracked() const override { return false; } - - // Given a key K and value V, this puts ("0"+K, V+"0"), ("1"+K, V+"1"), ..., - // ("9"+K, V+"9") in DB atomically i.e in a single batch. - // Also refer BatchedOpsStressTest::TestGet - Status TestPut(ThreadState* thread, WriteOptions& write_opts, - const ReadOptions& /* read_opts */, - const std::vector& rand_column_families, - const std::vector& rand_keys, - char (&value)[100]) override { - assert(!rand_column_families.empty()); - assert(!rand_keys.empty()); - - const std::string key_body = Key(rand_keys[0]); - - const uint32_t value_base = - thread->rand.Next() % thread->shared->UNKNOWN_SENTINEL; - const size_t sz = GenerateValue(value_base, value, sizeof(value)); - const std::string value_body = Slice(value, sz).ToString(); - - WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */, - FLAGS_batch_protection_bytes_per_key, - FLAGS_user_timestamp_size); - - ColumnFamilyHandle* const cfh = column_families_[rand_column_families[0]]; - assert(cfh); - - for (int i = 9; i >= 0; --i) { - const std::string num = std::to_string(i); - - // Note: the digit in num is prepended to the key; however, it is appended - // to the value because we want the "value base" to be encoded uniformly - // at the beginning of the value for all types of stress tests (e.g. - // batched, non-batched, CF consistency). - const std::string k = num + key_body; - const std::string v = value_body + num; - - if (FLAGS_use_merge) { - batch.Merge(cfh, k, v); - } else if (FLAGS_use_put_entity_one_in > 0 && - (value_base % FLAGS_use_put_entity_one_in) == 0) { - batch.PutEntity(cfh, k, GenerateWideColumns(value_base, v)); - } else { - batch.Put(cfh, k, v); - } - } - - const Status s = db_->Write(write_opts, &batch); - - if (!s.ok()) { - fprintf(stderr, "multiput error: %s\n", s.ToString().c_str()); - thread->stats.AddErrors(1); - } else { - // we did 10 writes each of size sz + 1 - thread->stats.AddBytesForWrites(10, (sz + 1) * 10); - } - - return s; - } - - // Given a key K, this deletes ("0"+K), ("1"+K), ..., ("9"+K) - // in DB atomically i.e in a single batch. Also refer MultiGet. - Status TestDelete(ThreadState* thread, WriteOptions& writeoptions, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - std::string keys[10] = {"9", "7", "5", "3", "1", "8", "6", "4", "2", "0"}; - - WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */, - FLAGS_batch_protection_bytes_per_key, - FLAGS_user_timestamp_size); - Status s; - auto cfh = column_families_[rand_column_families[0]]; - std::string key_str = Key(rand_keys[0]); - for (int i = 0; i < 10; i++) { - keys[i] += key_str; - batch.Delete(cfh, keys[i]); - } - - s = db_->Write(writeoptions, &batch); - if (!s.ok()) { - fprintf(stderr, "multidelete error: %s\n", s.ToString().c_str()); - thread->stats.AddErrors(1); - } else { - thread->stats.AddDeletes(10); - } - - return s; - } - - Status TestDeleteRange(ThreadState* /* thread */, - WriteOptions& /* write_opts */, - const std::vector& /* rand_column_families */, - const std::vector& /* rand_keys */) override { - assert(false); - return Status::NotSupported( - "BatchedOpsStressTest does not support " - "TestDeleteRange"); - } - - void TestIngestExternalFile( - ThreadState* /* thread */, - const std::vector& /* rand_column_families */, - const std::vector& /* rand_keys */) override { - assert(false); - fprintf(stderr, - "BatchedOpsStressTest does not support " - "TestIngestExternalFile\n"); - std::terminate(); - } - - // Given a key K, this gets values for "0"+K, "1"+K, ..., "9"+K - // in the same snapshot, and verifies that all the values are of the form - // V+"0", V+"1", ..., V+"9". - // ASSUMES that BatchedOpsStressTest::TestPut was used to put (K, V) into - // the DB. - Status TestGet(ThreadState* thread, const ReadOptions& readoptions, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - std::string keys[10] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}; - Slice key_slices[10]; - std::string values[10]; - ReadOptions readoptionscopy = readoptions; - readoptionscopy.snapshot = db_->GetSnapshot(); - std::string key_str = Key(rand_keys[0]); - Slice key = key_str; - auto cfh = column_families_[rand_column_families[0]]; - std::string from_db; - Status s; - for (int i = 0; i < 10; i++) { - keys[i] += key.ToString(); - key_slices[i] = keys[i]; - s = db_->Get(readoptionscopy, cfh, key_slices[i], &from_db); - if (!s.ok() && !s.IsNotFound()) { - fprintf(stderr, "get error: %s\n", s.ToString().c_str()); - values[i] = ""; - thread->stats.AddErrors(1); - // we continue after error rather than exiting so that we can - // find more errors if any - } else if (s.IsNotFound()) { - values[i] = ""; - thread->stats.AddGets(1, 0); - } else { - values[i] = from_db; - - assert(!keys[i].empty()); - assert(!values[i].empty()); - - const char expected = keys[i].front(); - const char actual = values[i].back(); - - if (expected != actual) { - fprintf(stderr, "get error expected = %c actual = %c\n", expected, - actual); - } - - values[i].pop_back(); // get rid of the differing character - - thread->stats.AddGets(1, 1); - } - } - db_->ReleaseSnapshot(readoptionscopy.snapshot); - - // Now that we retrieved all values, check that they all match - for (int i = 1; i < 10; i++) { - if (values[i] != values[0]) { - fprintf(stderr, "get error: inconsistent values for key %s: %s, %s\n", - key.ToString(true).c_str(), StringToHex(values[0]).c_str(), - StringToHex(values[i]).c_str()); - // we continue after error rather than exiting so that we can - // find more errors if any - } - } - - return s; - } - - std::vector TestMultiGet( - ThreadState* thread, const ReadOptions& readoptions, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - size_t num_keys = rand_keys.size(); - std::vector ret_status(num_keys); - std::array keys = { - {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}}; - size_t num_prefixes = keys.size(); - for (size_t rand_key = 0; rand_key < num_keys; ++rand_key) { - std::vector key_slices; - std::vector values(num_prefixes); - std::vector statuses(num_prefixes); - ReadOptions readoptionscopy = readoptions; - readoptionscopy.snapshot = db_->GetSnapshot(); - readoptionscopy.rate_limiter_priority = - FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL; - std::vector key_str; - key_str.reserve(num_prefixes); - key_slices.reserve(num_prefixes); - std::string from_db; - ColumnFamilyHandle* cfh = column_families_[rand_column_families[0]]; - - for (size_t key = 0; key < num_prefixes; ++key) { - key_str.emplace_back(keys[key] + Key(rand_keys[rand_key])); - key_slices.emplace_back(key_str.back()); - } - db_->MultiGet(readoptionscopy, cfh, num_prefixes, key_slices.data(), - values.data(), statuses.data()); - for (size_t i = 0; i < num_prefixes; i++) { - Status s = statuses[i]; - if (!s.ok() && !s.IsNotFound()) { - fprintf(stderr, "multiget error: %s\n", s.ToString().c_str()); - thread->stats.AddErrors(1); - ret_status[rand_key] = s; - // we continue after error rather than exiting so that we can - // find more errors if any - } else if (s.IsNotFound()) { - thread->stats.AddGets(1, 0); - ret_status[rand_key] = s; - } else { - assert(!keys[i].empty()); - assert(!values[i].empty()); - - const char expected = keys[i][0]; - const char actual = values[i][values[i].size() - 1]; - - if (expected != actual) { - fprintf(stderr, "multiget error expected = %c actual = %c\n", - expected, actual); - } - - values[i].remove_suffix(1); // get rid of the differing character - - thread->stats.AddGets(1, 1); - } - } - db_->ReleaseSnapshot(readoptionscopy.snapshot); - - // Now that we retrieved all values, check that they all match - for (size_t i = 1; i < num_prefixes; i++) { - if (values[i] != values[0]) { - fprintf(stderr, - "multiget error: inconsistent values for key %s: %s, %s\n", - StringToHex(key_str[i]).c_str(), - StringToHex(values[0].ToString()).c_str(), - StringToHex(values[i].ToString()).c_str()); - // we continue after error rather than exiting so that we can - // find more errors if any - } - } - } - - return ret_status; - } - - void TestGetEntity(ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - assert(thread); - - ManagedSnapshot snapshot_guard(db_); - - ReadOptions read_opts_copy(read_opts); - read_opts_copy.snapshot = snapshot_guard.snapshot(); - - assert(!rand_keys.empty()); - - const std::string key_suffix = Key(rand_keys[0]); - - assert(!rand_column_families.empty()); - assert(rand_column_families[0] >= 0); - assert(rand_column_families[0] < static_cast(column_families_.size())); - - ColumnFamilyHandle* const cfh = column_families_[rand_column_families[0]]; - assert(cfh); - - constexpr size_t num_keys = 10; - - std::array results; - - for (size_t i = 0; i < num_keys; ++i) { - const std::string key = std::to_string(i) + key_suffix; - - const Status s = db_->GetEntity(read_opts_copy, cfh, key, &results[i]); - - if (!s.ok() && !s.IsNotFound()) { - fprintf(stderr, "GetEntity error: %s\n", s.ToString().c_str()); - thread->stats.AddErrors(1); - } else if (s.IsNotFound()) { - thread->stats.AddGets(1, 0); - } else { - thread->stats.AddGets(1, 1); - } - } - - // Compare columns ignoring the last character of column values - auto compare = [](const WideColumns& lhs, const WideColumns& rhs) { - if (lhs.size() != rhs.size()) { - return false; - } - - for (size_t i = 0; i < lhs.size(); ++i) { - if (lhs[i].name() != rhs[i].name()) { - return false; - } - - if (lhs[i].value().size() != rhs[i].value().size()) { - return false; - } - - if (lhs[i].value().difference_offset(rhs[i].value()) < - lhs[i].value().size() - 1) { - return false; - } - } - - return true; - }; - - for (size_t i = 0; i < num_keys; ++i) { - const WideColumns& columns = results[i].columns(); - - if (!compare(results[0].columns(), columns)) { - fprintf(stderr, - "GetEntity error: inconsistent entities for key %s: %s, %s\n", - StringToHex(key_suffix).c_str(), - WideColumnsToHex(results[0].columns()).c_str(), - WideColumnsToHex(columns).c_str()); - } - - if (!columns.empty()) { - // The last character of each column value should be 'i' as a decimal - // digit - const char expected = static_cast('0' + i); - - for (const auto& column : columns) { - const Slice& value = column.value(); - - if (value.empty() || value[value.size() - 1] != expected) { - fprintf(stderr, - "GetEntity error: incorrect column value for key " - "%s, entity %s, column value %s, expected %c\n", - StringToHex(key_suffix).c_str(), - WideColumnsToHex(columns).c_str(), - value.ToString(/* hex */ true).c_str(), expected); - } - } - - if (!VerifyWideColumns(columns)) { - fprintf( - stderr, - "GetEntity error: inconsistent columns for key %s, entity %s\n", - StringToHex(key_suffix).c_str(), - WideColumnsToHex(columns).c_str()); - } - } - } - } - - // Given a key, this does prefix scans for "0"+P, "1"+P, ..., "9"+P - // in the same snapshot where P is the first FLAGS_prefix_size - 1 bytes - // of the key. Each of these 10 scans returns a series of values; - // each series should be the same length, and it is verified for each - // index i that all the i'th values are of the form V+"0", V+"1", ..., V+"9". - // ASSUMES that MultiPut was used to put (K, V) - Status TestPrefixScan(ThreadState* thread, const ReadOptions& readoptions, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - assert(!rand_column_families.empty()); - assert(!rand_keys.empty()); - - const std::string key = Key(rand_keys[0]); - - assert(FLAGS_prefix_size > 0); - const size_t prefix_to_use = static_cast(FLAGS_prefix_size); - - constexpr size_t num_prefixes = 10; - - std::array prefixes; - std::array prefix_slices; - std::array ro_copies; - std::array upper_bounds; - std::array ub_slices; - std::array, num_prefixes> iters; - - const Snapshot* const snapshot = db_->GetSnapshot(); - - ColumnFamilyHandle* const cfh = column_families_[rand_column_families[0]]; - assert(cfh); - - for (size_t i = 0; i < num_prefixes; ++i) { - prefixes[i] = std::to_string(i) + key; - prefix_slices[i] = Slice(prefixes[i].data(), prefix_to_use); - - ro_copies[i] = readoptions; - ro_copies[i].snapshot = snapshot; - if (thread->rand.OneIn(2) && - GetNextPrefix(prefix_slices[i], &(upper_bounds[i]))) { - // For half of the time, set the upper bound to the next prefix - ub_slices[i] = upper_bounds[i]; - ro_copies[i].iterate_upper_bound = &(ub_slices[i]); - } - - iters[i].reset(db_->NewIterator(ro_copies[i], cfh)); - iters[i]->Seek(prefix_slices[i]); - } - - uint64_t count = 0; - - while (iters[0]->Valid() && iters[0]->key().starts_with(prefix_slices[0])) { - ++count; - - std::array values; - - // get list of all values for this iteration - for (size_t i = 0; i < num_prefixes; ++i) { - // no iterator should finish before the first one - assert(iters[i]->Valid() && - iters[i]->key().starts_with(prefix_slices[i])); - values[i] = iters[i]->value().ToString(); - - // make sure the last character of the value is the expected digit - assert(!prefixes[i].empty()); - assert(!values[i].empty()); - - const char expected = prefixes[i].front(); - const char actual = values[i].back(); - - if (expected != actual) { - fprintf(stderr, "prefix scan error expected = %c actual = %c\n", - expected, actual); - } - - values[i].pop_back(); // get rid of the differing character - - // make sure all values are equivalent - if (values[i] != values[0]) { - fprintf(stderr, - "prefix scan error : %" ROCKSDB_PRIszt - ", inconsistent values for prefix %s: %s, %s\n", - i, prefix_slices[i].ToString(/* hex */ true).c_str(), - StringToHex(values[0]).c_str(), - StringToHex(values[i]).c_str()); - // we continue after error rather than exiting so that we can - // find more errors if any - } - - // make sure value() and columns() are consistent - if (!VerifyWideColumns(iters[i]->value(), iters[i]->columns())) { - fprintf(stderr, - "prefix scan error : %" ROCKSDB_PRIszt - ", value and columns inconsistent for prefix %s: value: %s, " - "columns: %s\n", - i, prefix_slices[i].ToString(/* hex */ true).c_str(), - iters[i]->value().ToString(/* hex */ true).c_str(), - WideColumnsToHex(iters[i]->columns()).c_str()); - } - - iters[i]->Next(); - } - } - - // cleanup iterators and snapshot - for (size_t i = 0; i < num_prefixes; ++i) { - // if the first iterator finished, they should have all finished - assert(!iters[i]->Valid() || - !iters[i]->key().starts_with(prefix_slices[i])); - assert(iters[i]->status().ok()); - } - - db_->ReleaseSnapshot(snapshot); - - thread->stats.AddPrefixes(1, count); - - return Status::OK(); - } - - void VerifyDb(ThreadState* /* thread */) const override {} - - void ContinuouslyVerifyDb(ThreadState* /* thread */) const override {} -}; - -StressTest* CreateBatchedOpsStressTest() { return new BatchedOpsStressTest(); } - -} // namespace ROCKSDB_NAMESPACE -#endif // GFLAGS diff --git a/db_stress_tool/cf_consistency_stress.cc b/db_stress_tool/cf_consistency_stress.cc deleted file mode 100644 index 883a17b6e..000000000 --- a/db_stress_tool/cf_consistency_stress.cc +++ /dev/null @@ -1,769 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifdef GFLAGS -#include "db_stress_tool/db_stress_common.h" -#include "file/file_util.h" - -namespace ROCKSDB_NAMESPACE { -class CfConsistencyStressTest : public StressTest { - public: - CfConsistencyStressTest() : batch_id_(0) {} - - ~CfConsistencyStressTest() override {} - - bool IsStateTracked() const override { return false; } - - Status TestPut(ThreadState* thread, WriteOptions& write_opts, - const ReadOptions& /* read_opts */, - const std::vector& rand_column_families, - const std::vector& rand_keys, - char (&value)[100]) override { - assert(!rand_column_families.empty()); - assert(!rand_keys.empty()); - - const std::string k = Key(rand_keys[0]); - - const uint32_t value_base = batch_id_.fetch_add(1); - const size_t sz = GenerateValue(value_base, value, sizeof(value)); - const Slice v(value, sz); - - WriteBatch batch; - - const bool use_put_entity = !FLAGS_use_merge && - FLAGS_use_put_entity_one_in > 0 && - (value_base % FLAGS_use_put_entity_one_in) == 0; - - for (auto cf : rand_column_families) { - ColumnFamilyHandle* const cfh = column_families_[cf]; - assert(cfh); - - if (FLAGS_use_merge) { - batch.Merge(cfh, k, v); - } else if (use_put_entity) { - batch.PutEntity(cfh, k, GenerateWideColumns(value_base, v)); - } else { - batch.Put(cfh, k, v); - } - } - - Status s = db_->Write(write_opts, &batch); - - if (!s.ok()) { - fprintf(stderr, "multi put or merge error: %s\n", s.ToString().c_str()); - thread->stats.AddErrors(1); - } else { - auto num = static_cast(rand_column_families.size()); - thread->stats.AddBytesForWrites(num, (sz + 1) * num); - } - - return s; - } - - Status TestDelete(ThreadState* thread, WriteOptions& write_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - std::string key_str = Key(rand_keys[0]); - Slice key = key_str; - WriteBatch batch; - for (auto cf : rand_column_families) { - ColumnFamilyHandle* cfh = column_families_[cf]; - batch.Delete(cfh, key); - } - Status s = db_->Write(write_opts, &batch); - if (!s.ok()) { - fprintf(stderr, "multidel error: %s\n", s.ToString().c_str()); - thread->stats.AddErrors(1); - } else { - thread->stats.AddDeletes(static_cast(rand_column_families.size())); - } - return s; - } - - Status TestDeleteRange(ThreadState* thread, WriteOptions& write_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - int64_t rand_key = rand_keys[0]; - auto shared = thread->shared; - int64_t max_key = shared->GetMaxKey(); - if (rand_key > max_key - FLAGS_range_deletion_width) { - rand_key = - thread->rand.Next() % (max_key - FLAGS_range_deletion_width + 1); - } - std::string key_str = Key(rand_key); - Slice key = key_str; - std::string end_key_str = Key(rand_key + FLAGS_range_deletion_width); - Slice end_key = end_key_str; - WriteBatch batch; - for (auto cf : rand_column_families) { - ColumnFamilyHandle* cfh = column_families_[rand_column_families[cf]]; - batch.DeleteRange(cfh, key, end_key); - } - Status s = db_->Write(write_opts, &batch); - if (!s.ok()) { - fprintf(stderr, "multi del range error: %s\n", s.ToString().c_str()); - thread->stats.AddErrors(1); - } else { - thread->stats.AddRangeDeletions( - static_cast(rand_column_families.size())); - } - return s; - } - - void TestIngestExternalFile( - ThreadState* /* thread */, - const std::vector& /* rand_column_families */, - const std::vector& /* rand_keys */) override { - assert(false); - fprintf(stderr, - "CfConsistencyStressTest does not support TestIngestExternalFile " - "because it's not possible to verify the result\n"); - std::terminate(); - } - - Status TestGet(ThreadState* thread, const ReadOptions& readoptions, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - std::string key_str = Key(rand_keys[0]); - Slice key = key_str; - Status s; - bool is_consistent = true; - - if (thread->rand.OneIn(2)) { - // 1/2 chance, does a random read from random CF - auto cfh = - column_families_[rand_column_families[thread->rand.Next() % - rand_column_families.size()]]; - std::string from_db; - s = db_->Get(readoptions, cfh, key, &from_db); - } else { - // 1/2 chance, comparing one key is the same across all CFs - const Snapshot* snapshot = db_->GetSnapshot(); - ReadOptions readoptionscopy = readoptions; - readoptionscopy.snapshot = snapshot; - - std::string value0; - s = db_->Get(readoptionscopy, column_families_[rand_column_families[0]], - key, &value0); - if (s.ok() || s.IsNotFound()) { - bool found = s.ok(); - for (size_t i = 1; i < rand_column_families.size(); i++) { - std::string value1; - s = db_->Get(readoptionscopy, - column_families_[rand_column_families[i]], key, &value1); - if (!s.ok() && !s.IsNotFound()) { - break; - } - if (!found && s.ok()) { - fprintf(stderr, "Get() return different results with key %s\n", - Slice(key_str).ToString(true).c_str()); - fprintf(stderr, "CF %s is not found\n", - column_family_names_[0].c_str()); - fprintf(stderr, "CF %s returns value %s\n", - column_family_names_[i].c_str(), - Slice(value1).ToString(true).c_str()); - is_consistent = false; - } else if (found && s.IsNotFound()) { - fprintf(stderr, "Get() return different results with key %s\n", - Slice(key_str).ToString(true).c_str()); - fprintf(stderr, "CF %s returns value %s\n", - column_family_names_[0].c_str(), - Slice(value0).ToString(true).c_str()); - fprintf(stderr, "CF %s is not found\n", - column_family_names_[i].c_str()); - is_consistent = false; - } else if (s.ok() && value0 != value1) { - fprintf(stderr, "Get() return different results with key %s\n", - Slice(key_str).ToString(true).c_str()); - fprintf(stderr, "CF %s returns value %s\n", - column_family_names_[0].c_str(), - Slice(value0).ToString(true).c_str()); - fprintf(stderr, "CF %s returns value %s\n", - column_family_names_[i].c_str(), - Slice(value1).ToString(true).c_str()); - is_consistent = false; - } - if (!is_consistent) { - break; - } - } - } - - db_->ReleaseSnapshot(snapshot); - } - if (!is_consistent) { - fprintf(stderr, "TestGet error: is_consistent is false\n"); - thread->stats.AddErrors(1); - // Fail fast to preserve the DB state. - thread->shared->SetVerificationFailure(); - } else if (s.ok()) { - thread->stats.AddGets(1, 1); - } else if (s.IsNotFound()) { - thread->stats.AddGets(1, 0); - } else { - fprintf(stderr, "TestGet error: %s\n", s.ToString().c_str()); - thread->stats.AddErrors(1); - } - return s; - } - - std::vector TestMultiGet( - ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - size_t num_keys = rand_keys.size(); - std::vector key_str; - std::vector keys; - keys.reserve(num_keys); - key_str.reserve(num_keys); - std::vector values(num_keys); - std::vector statuses(num_keys); - ColumnFamilyHandle* cfh = column_families_[rand_column_families[0]]; - ReadOptions readoptionscopy = read_opts; - readoptionscopy.rate_limiter_priority = - FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL; - - for (size_t i = 0; i < num_keys; ++i) { - key_str.emplace_back(Key(rand_keys[i])); - keys.emplace_back(key_str.back()); - } - db_->MultiGet(readoptionscopy, cfh, num_keys, keys.data(), values.data(), - statuses.data()); - for (auto s : statuses) { - if (s.ok()) { - // found case - thread->stats.AddGets(1, 1); - } else if (s.IsNotFound()) { - // not found case - thread->stats.AddGets(1, 0); - } else { - // errors case - fprintf(stderr, "MultiGet error: %s\n", s.ToString().c_str()); - thread->stats.AddErrors(1); - } - } - return statuses; - } - - void TestGetEntity(ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - assert(thread); - assert(!rand_column_families.empty()); - assert(!rand_keys.empty()); - - const std::string key = Key(rand_keys[0]); - - Status s; - bool is_consistent = true; - - if (thread->rand.OneIn(2)) { - // With a 1/2 chance, do a random read from a random CF - const size_t cf_id = thread->rand.Next() % rand_column_families.size(); - - assert(rand_column_families[cf_id] >= 0); - assert(rand_column_families[cf_id] < - static_cast(column_families_.size())); - - ColumnFamilyHandle* const cfh = - column_families_[rand_column_families[cf_id]]; - assert(cfh); - - PinnableWideColumns result; - s = db_->GetEntity(read_opts, cfh, key, &result); - - if (s.ok()) { - if (!VerifyWideColumns(result.columns())) { - fprintf( - stderr, - "GetEntity error: inconsistent columns for key %s, entity %s\n", - StringToHex(key).c_str(), - WideColumnsToHex(result.columns()).c_str()); - is_consistent = false; - } - } - } else { - // With a 1/2 chance, compare one key across all CFs - ManagedSnapshot snapshot_guard(db_); - - ReadOptions read_opts_copy = read_opts; - read_opts_copy.snapshot = snapshot_guard.snapshot(); - - assert(rand_column_families[0] >= 0); - assert(rand_column_families[0] < - static_cast(column_families_.size())); - - PinnableWideColumns cmp_result; - s = db_->GetEntity(read_opts_copy, - column_families_[rand_column_families[0]], key, - &cmp_result); - - if (s.ok() || s.IsNotFound()) { - const bool cmp_found = s.ok(); - - if (cmp_found) { - if (!VerifyWideColumns(cmp_result.columns())) { - fprintf(stderr, - "GetEntity error: inconsistent columns for key %s, " - "entity %s\n", - StringToHex(key).c_str(), - WideColumnsToHex(cmp_result.columns()).c_str()); - is_consistent = false; - } - } - - if (is_consistent) { - for (size_t i = 1; i < rand_column_families.size(); ++i) { - assert(rand_column_families[i] >= 0); - assert(rand_column_families[i] < - static_cast(column_families_.size())); - - PinnableWideColumns result; - s = db_->GetEntity(read_opts_copy, - column_families_[rand_column_families[i]], key, - &result); - - if (!s.ok() && !s.IsNotFound()) { - break; - } - - const bool found = s.ok(); - - assert(!column_family_names_.empty()); - assert(i < column_family_names_.size()); - - if (!cmp_found && found) { - fprintf(stderr, - "GetEntity returns different results for key %s: CF %s " - "returns not found, CF %s returns entity %s\n", - StringToHex(key).c_str(), column_family_names_[0].c_str(), - column_family_names_[i].c_str(), - WideColumnsToHex(result.columns()).c_str()); - is_consistent = false; - break; - } - - if (cmp_found && !found) { - fprintf(stderr, - "GetEntity returns different results for key %s: CF %s " - "returns entity %s, CF %s returns not found\n", - StringToHex(key).c_str(), column_family_names_[0].c_str(), - WideColumnsToHex(cmp_result.columns()).c_str(), - column_family_names_[i].c_str()); - is_consistent = false; - break; - } - - if (found && result != cmp_result) { - fprintf(stderr, - "GetEntity returns different results for key %s: CF %s " - "returns entity %s, CF %s returns entity %s\n", - StringToHex(key).c_str(), column_family_names_[0].c_str(), - WideColumnsToHex(cmp_result.columns()).c_str(), - column_family_names_[i].c_str(), - WideColumnsToHex(result.columns()).c_str()); - is_consistent = false; - break; - } - } - } - } - } - - if (!is_consistent) { - fprintf(stderr, "TestGetEntity error: results are not consistent\n"); - thread->stats.AddErrors(1); - // Fail fast to preserve the DB state. - thread->shared->SetVerificationFailure(); - } else if (s.ok()) { - thread->stats.AddGets(1, 1); - } else if (s.IsNotFound()) { - thread->stats.AddGets(1, 0); - } else { - fprintf(stderr, "TestGetEntity error: %s\n", s.ToString().c_str()); - thread->stats.AddErrors(1); - } - } - - Status TestPrefixScan(ThreadState* thread, const ReadOptions& readoptions, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - assert(!rand_column_families.empty()); - assert(!rand_keys.empty()); - - const std::string key = Key(rand_keys[0]); - - const size_t prefix_to_use = - (FLAGS_prefix_size < 0) ? 7 : static_cast(FLAGS_prefix_size); - - const Slice prefix(key.data(), prefix_to_use); - - std::string upper_bound; - Slice ub_slice; - - ReadOptions ro_copy = readoptions; - - // Get the next prefix first and then see if we want to set upper bound. - // We'll use the next prefix in an assertion later on - if (GetNextPrefix(prefix, &upper_bound) && thread->rand.OneIn(2)) { - ub_slice = Slice(upper_bound); - ro_copy.iterate_upper_bound = &ub_slice; - } - - ColumnFamilyHandle* const cfh = - column_families_[rand_column_families[thread->rand.Uniform( - static_cast(rand_column_families.size()))]]; - assert(cfh); - - std::unique_ptr iter(db_->NewIterator(ro_copy, cfh)); - - uint64_t count = 0; - Status s; - - for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); - iter->Next()) { - ++count; - - if (!VerifyWideColumns(iter->value(), iter->columns())) { - s = Status::Corruption("Value and columns inconsistent", - DebugString(iter->value(), iter->columns())); - break; - } - } - - assert(prefix_to_use == 0 || - count <= GetPrefixKeyCount(prefix.ToString(), upper_bound)); - - if (s.ok()) { - s = iter->status(); - } - - if (!s.ok()) { - fprintf(stderr, "TestPrefixScan error: %s\n", s.ToString().c_str()); - thread->stats.AddErrors(1); - - return s; - } - - thread->stats.AddPrefixes(1, count); - - return Status::OK(); - } - - ColumnFamilyHandle* GetControlCfh(ThreadState* thread, - int /*column_family_id*/ - ) override { - // All column families should contain the same data. Randomly pick one. - return column_families_[thread->rand.Next() % column_families_.size()]; - } - - void VerifyDb(ThreadState* thread) const override { - // This `ReadOptions` is for validation purposes. Ignore - // `FLAGS_rate_limit_user_ops` to avoid slowing any validation. - ReadOptions options(FLAGS_verify_checksum, true); - - // We must set total_order_seek to true because we are doing a SeekToFirst - // on a column family whose memtables may support (by default) prefix-based - // iterator. In this case, NewIterator with options.total_order_seek being - // false returns a prefix-based iterator. Calling SeekToFirst using this - // iterator causes the iterator to become invalid. That means we cannot - // iterate the memtable using this iterator any more, although the memtable - // contains the most up-to-date key-values. - options.total_order_seek = true; - - ManagedSnapshot snapshot_guard(db_); - options.snapshot = snapshot_guard.snapshot(); - - const size_t num = column_families_.size(); - - std::vector> iters; - iters.reserve(num); - - for (size_t i = 0; i < num; ++i) { - iters.emplace_back(db_->NewIterator(options, column_families_[i])); - iters.back()->SeekToFirst(); - } - - std::vector statuses(num, Status::OK()); - - assert(thread); - - auto shared = thread->shared; - assert(shared); - - do { - if (shared->HasVerificationFailedYet()) { - break; - } - - size_t valid_cnt = 0; - - for (size_t i = 0; i < num; ++i) { - const auto& iter = iters[i]; - assert(iter); - - if (iter->Valid()) { - if (!VerifyWideColumns(iter->value(), iter->columns())) { - statuses[i] = - Status::Corruption("Value and columns inconsistent", - DebugString(iter->value(), iter->columns())); - } else { - ++valid_cnt; - } - } else { - statuses[i] = iter->status(); - } - } - - if (valid_cnt == 0) { - for (size_t i = 0; i < num; ++i) { - const auto& s = statuses[i]; - if (!s.ok()) { - fprintf(stderr, "Iterator on cf %s has error: %s\n", - column_families_[i]->GetName().c_str(), - s.ToString().c_str()); - shared->SetVerificationFailure(); - } - } - - break; - } - - if (valid_cnt < num) { - shared->SetVerificationFailure(); - - for (size_t i = 0; i < num; ++i) { - assert(iters[i]); - - if (!iters[i]->Valid()) { - if (statuses[i].ok()) { - fprintf(stderr, "Finished scanning cf %s\n", - column_families_[i]->GetName().c_str()); - } else { - fprintf(stderr, "Iterator on cf %s has error: %s\n", - column_families_[i]->GetName().c_str(), - statuses[i].ToString().c_str()); - } - } else { - fprintf(stderr, "cf %s has remaining data to scan\n", - column_families_[i]->GetName().c_str()); - } - } - - break; - } - - if (shared->HasVerificationFailedYet()) { - break; - } - - // If the program reaches here, then all column families' iterators are - // still valid. - assert(valid_cnt == num); - - if (shared->PrintingVerificationResults()) { - continue; - } - - assert(iters[0]); - - const Slice key = iters[0]->key(); - const Slice value = iters[0]->value(); - - int num_mismatched_cfs = 0; - - for (size_t i = 1; i < num; ++i) { - assert(iters[i]); - - const int cmp = key.compare(iters[i]->key()); - - if (cmp != 0) { - ++num_mismatched_cfs; - - if (1 == num_mismatched_cfs) { - fprintf(stderr, "Verification failed\n"); - fprintf(stderr, "Latest Sequence Number: %" PRIu64 "\n", - db_->GetLatestSequenceNumber()); - fprintf(stderr, "[%s] %s => %s\n", - column_families_[0]->GetName().c_str(), - key.ToString(true /* hex */).c_str(), - value.ToString(true /* hex */).c_str()); - } - - fprintf(stderr, "[%s] %s => %s\n", - column_families_[i]->GetName().c_str(), - iters[i]->key().ToString(true /* hex */).c_str(), - iters[i]->value().ToString(true /* hex */).c_str()); - - Slice begin_key; - Slice end_key; - if (cmp < 0) { - begin_key = key; - end_key = iters[i]->key(); - } else { - begin_key = iters[i]->key(); - end_key = key; - } - - const auto print_key_versions = [&](ColumnFamilyHandle* cfh) { - constexpr size_t kMaxNumIKeys = 8; - - std::vector versions; - const Status s = GetAllKeyVersions(db_, cfh, begin_key, end_key, - kMaxNumIKeys, &versions); - if (!s.ok()) { - fprintf(stderr, "%s\n", s.ToString().c_str()); - return; - } - - assert(cfh); - - fprintf(stderr, - "Internal keys in CF '%s', [%s, %s] (max %" ROCKSDB_PRIszt - ")\n", - cfh->GetName().c_str(), - begin_key.ToString(true /* hex */).c_str(), - end_key.ToString(true /* hex */).c_str(), kMaxNumIKeys); - - for (const KeyVersion& kv : versions) { - fprintf(stderr, " key %s seq %" PRIu64 " type %d\n", - Slice(kv.user_key).ToString(true).c_str(), kv.sequence, - kv.type); - } - }; - - if (1 == num_mismatched_cfs) { - print_key_versions(column_families_[0]); - } - - print_key_versions(column_families_[i]); - - shared->SetVerificationFailure(); - } - } - - shared->FinishPrintingVerificationResults(); - - for (auto& iter : iters) { - assert(iter); - iter->Next(); - } - } while (true); - } - - void ContinuouslyVerifyDb(ThreadState* thread) const override { - assert(thread); - Status status; - - DB* db_ptr = cmp_db_ ? cmp_db_ : db_; - const auto& cfhs = cmp_db_ ? cmp_cfhs_ : column_families_; - - // Take a snapshot to preserve the state of primary db. - ManagedSnapshot snapshot_guard(db_); - - SharedState* shared = thread->shared; - assert(shared); - - if (cmp_db_) { - status = cmp_db_->TryCatchUpWithPrimary(); - if (!status.ok()) { - fprintf(stderr, "TryCatchUpWithPrimary: %s\n", - status.ToString().c_str()); - shared->SetShouldStopTest(); - assert(false); - return; - } - } - - const auto checksum_column_family = [](Iterator* iter, - uint32_t* checksum) -> Status { - assert(nullptr != checksum); - - uint32_t ret = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ret = crc32c::Extend(ret, iter->key().data(), iter->key().size()); - ret = crc32c::Extend(ret, iter->value().data(), iter->value().size()); - - for (const auto& column : iter->columns()) { - ret = crc32c::Extend(ret, column.name().data(), column.name().size()); - ret = - crc32c::Extend(ret, column.value().data(), column.value().size()); - } - } - - *checksum = ret; - return iter->status(); - }; - // This `ReadOptions` is for validation purposes. Ignore - // `FLAGS_rate_limit_user_ops` to avoid slowing any validation. - ReadOptions ropts(FLAGS_verify_checksum, true); - ropts.total_order_seek = true; - if (nullptr == cmp_db_) { - ropts.snapshot = snapshot_guard.snapshot(); - } - uint32_t crc = 0; - { - // Compute crc for all key-values of default column family. - std::unique_ptr it(db_ptr->NewIterator(ropts)); - status = checksum_column_family(it.get(), &crc); - if (!status.ok()) { - fprintf(stderr, "Computing checksum of default cf: %s\n", - status.ToString().c_str()); - assert(false); - } - } - // Since we currently intentionally disallow reading from the secondary - // instance with snapshot, we cannot achieve cross-cf consistency if WAL is - // enabled because there is no guarantee that secondary instance replays - // the primary's WAL to a consistent point where all cfs have the same - // data. - if (status.ok() && FLAGS_disable_wal) { - uint32_t tmp_crc = 0; - for (ColumnFamilyHandle* cfh : cfhs) { - if (cfh == db_ptr->DefaultColumnFamily()) { - continue; - } - std::unique_ptr it(db_ptr->NewIterator(ropts, cfh)); - status = checksum_column_family(it.get(), &tmp_crc); - if (!status.ok() || tmp_crc != crc) { - break; - } - } - if (!status.ok()) { - fprintf(stderr, "status: %s\n", status.ToString().c_str()); - shared->SetShouldStopTest(); - assert(false); - } else if (tmp_crc != crc) { - fprintf(stderr, "tmp_crc=%" PRIu32 " crc=%" PRIu32 "\n", tmp_crc, crc); - shared->SetShouldStopTest(); - assert(false); - } - } - } - - std::vector GenerateColumnFamilies( - const int /* num_column_families */, - int /* rand_column_family */) const override { - std::vector ret; - int num = static_cast(column_families_.size()); - int k = 0; - std::generate_n(back_inserter(ret), num, [&k]() -> int { return k++; }); - return ret; - } - - private: - std::atomic batch_id_; -}; - -StressTest* CreateCfConsistencyStressTest() { - return new CfConsistencyStressTest(); -} - -} // namespace ROCKSDB_NAMESPACE -#endif // GFLAGS diff --git a/db_stress_tool/db_stress.cc b/db_stress_tool/db_stress.cc deleted file mode 100644 index 2d03f5d26..000000000 --- a/db_stress_tool/db_stress.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef GFLAGS -#include - -int main() { - fprintf(stderr, "Please install gflags to run rocksdb tools\n"); - return 1; -} -#else -#include "port/stack_trace.h" -#include "rocksdb/db_stress_tool.h" - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - return ROCKSDB_NAMESPACE::db_stress_tool(argc, argv); -} -#endif // GFLAGS diff --git a/db_stress_tool/db_stress_common.cc b/db_stress_tool/db_stress_common.cc deleted file mode 100644 index 93436d0f8..000000000 --- a/db_stress_tool/db_stress_common.cc +++ /dev/null @@ -1,491 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// - -#ifdef GFLAGS -#include "db_stress_tool/db_stress_common.h" - -#include - -#include "util/file_checksum_helper.h" -#include "util/xxhash.h" - -ROCKSDB_NAMESPACE::Env* db_stress_listener_env = nullptr; -ROCKSDB_NAMESPACE::Env* db_stress_env = nullptr; -// If non-null, injects read error at a rate specified by the -// read_fault_one_in or write_fault_one_in flag -std::shared_ptr fault_fs_guard; -enum ROCKSDB_NAMESPACE::CompressionType compression_type_e = - ROCKSDB_NAMESPACE::kSnappyCompression; -enum ROCKSDB_NAMESPACE::CompressionType bottommost_compression_type_e = - ROCKSDB_NAMESPACE::kSnappyCompression; -enum ROCKSDB_NAMESPACE::ChecksumType checksum_type_e = - ROCKSDB_NAMESPACE::kCRC32c; -enum RepFactory FLAGS_rep_factory = kSkipList; -std::vector sum_probs(100001); -constexpr int64_t zipf_sum_size = 100000; - -namespace ROCKSDB_NAMESPACE { - -// Zipfian distribution is generated based on a pre-calculated array. -// It should be used before start the stress test. -// First, the probability distribution function (PDF) of this Zipfian follows -// power low. P(x) = 1/(x^alpha). -// So we calculate the PDF when x is from 0 to zipf_sum_size in first for loop -// and add the PDF value togetger as c. So we get the total probability in c. -// Next, we calculate inverse CDF of Zipfian and store the value of each in -// an array (sum_probs). The rank is from 0 to zipf_sum_size. For example, for -// integer k, its Zipfian CDF value is sum_probs[k]. -// Third, when we need to get an integer whose probability follows Zipfian -// distribution, we use a rand_seed [0,1] which follows uniform distribution -// as a seed and search it in the sum_probs via binary search. When we find -// the closest sum_probs[i] of rand_seed, i is the integer that in -// [0, zipf_sum_size] following Zipfian distribution with parameter alpha. -// Finally, we can scale i to [0, max_key] scale. -// In order to avoid that hot keys are close to each other and skew towards 0, -// we use Rando64 to shuffle it. -void InitializeHotKeyGenerator(double alpha) { - double c = 0; - for (int64_t i = 1; i <= zipf_sum_size; i++) { - c = c + (1.0 / std::pow(static_cast(i), alpha)); - } - c = 1.0 / c; - - sum_probs[0] = 0; - for (int64_t i = 1; i <= zipf_sum_size; i++) { - sum_probs[i] = - sum_probs[i - 1] + c / std::pow(static_cast(i), alpha); - } -} - -// Generate one key that follows the Zipfian distribution. The skewness -// is decided by the parameter alpha. Input is the rand_seed [0,1] and -// the max of the key to be generated. If we directly return tmp_zipf_seed, -// the closer to 0, the higher probability will be. To randomly distribute -// the hot keys in [0, max_key], we use Random64 to shuffle it. -int64_t GetOneHotKeyID(double rand_seed, int64_t max_key) { - int64_t low = 1, mid, high = zipf_sum_size, zipf = 0; - while (low <= high) { - mid = (low + high) / 2; - if (sum_probs[mid] >= rand_seed && sum_probs[mid - 1] < rand_seed) { - zipf = mid; - break; - } else if (sum_probs[mid] >= rand_seed) { - high = mid - 1; - } else { - low = mid + 1; - } - } - int64_t tmp_zipf_seed = zipf * max_key / zipf_sum_size; - Random64 rand_local(tmp_zipf_seed); - return rand_local.Next() % max_key; -} - -void PoolSizeChangeThread(void* v) { - assert(FLAGS_compaction_thread_pool_adjust_interval > 0); - ThreadState* thread = reinterpret_cast(v); - SharedState* shared = thread->shared; - - while (true) { - { - MutexLock l(shared->GetMutex()); - if (shared->ShouldStopBgThread()) { - shared->IncBgThreadsFinished(); - if (shared->BgThreadsFinished()) { - shared->GetCondVar()->SignalAll(); - } - return; - } - } - - auto thread_pool_size_base = FLAGS_max_background_compactions; - auto thread_pool_size_var = FLAGS_compaction_thread_pool_variations; - int new_thread_pool_size = - thread_pool_size_base - thread_pool_size_var + - thread->rand.Next() % (thread_pool_size_var * 2 + 1); - if (new_thread_pool_size < 1) { - new_thread_pool_size = 1; - } - db_stress_env->SetBackgroundThreads(new_thread_pool_size, - ROCKSDB_NAMESPACE::Env::Priority::LOW); - // Sleep up to 3 seconds - db_stress_env->SleepForMicroseconds( - thread->rand.Next() % FLAGS_compaction_thread_pool_adjust_interval * - 1000 + - 1); - } -} - -void DbVerificationThread(void* v) { - assert(FLAGS_continuous_verification_interval > 0); - auto* thread = reinterpret_cast(v); - SharedState* shared = thread->shared; - StressTest* stress_test = shared->GetStressTest(); - assert(stress_test != nullptr); - while (true) { - { - MutexLock l(shared->GetMutex()); - if (shared->ShouldStopBgThread()) { - shared->IncBgThreadsFinished(); - if (shared->BgThreadsFinished()) { - shared->GetCondVar()->SignalAll(); - } - return; - } - } - if (!shared->HasVerificationFailedYet()) { - stress_test->ContinuouslyVerifyDb(thread); - } - db_stress_env->SleepForMicroseconds( - thread->rand.Next() % FLAGS_continuous_verification_interval * 1000 + - 1); - } -} - -void PrintKeyValue(int cf, uint64_t key, const char* value, size_t sz) { - if (!FLAGS_verbose) { - return; - } - std::string tmp; - tmp.reserve(sz * 2 + 16); - char buf[4]; - for (size_t i = 0; i < sz; i++) { - snprintf(buf, 4, "%X", value[i]); - tmp.append(buf); - } - auto key_str = Key(key); - Slice key_slice = key_str; - fprintf(stdout, "[CF %d] %s (%" PRIi64 ") == > (%" ROCKSDB_PRIszt ") %s\n", - cf, key_slice.ToString(true).c_str(), key, sz, tmp.c_str()); -} - -// Note that if hot_key_alpha != 0, it generates the key based on Zipfian -// distribution. Keys are randomly scattered to [0, FLAGS_max_key]. It does -// not ensure the order of the keys being generated and the keys does not have -// the active range which is related to FLAGS_active_width. -int64_t GenerateOneKey(ThreadState* thread, uint64_t iteration) { - const double completed_ratio = - static_cast(iteration) / FLAGS_ops_per_thread; - const int64_t base_key = static_cast( - completed_ratio * (FLAGS_max_key - FLAGS_active_width)); - int64_t rand_seed = base_key + thread->rand.Next() % FLAGS_active_width; - int64_t cur_key = rand_seed; - if (FLAGS_hot_key_alpha != 0) { - // If set the Zipfian distribution Alpha to non 0, use Zipfian - double float_rand = - (static_cast(thread->rand.Next() % FLAGS_max_key)) / - FLAGS_max_key; - cur_key = GetOneHotKeyID(float_rand, FLAGS_max_key); - } - return cur_key; -} - -// Note that if hot_key_alpha != 0, it generates the key based on Zipfian -// distribution. Keys being generated are in random order. -// If user want to generate keys based on uniform distribution, user needs to -// set hot_key_alpha == 0. It will generate the random keys in increasing -// order in the key array (ensure key[i] >= key[i+1]) and constrained in a -// range related to FLAGS_active_width. -std::vector GenerateNKeys(ThreadState* thread, int num_keys, - uint64_t iteration) { - const double completed_ratio = - static_cast(iteration) / FLAGS_ops_per_thread; - const int64_t base_key = static_cast( - completed_ratio * (FLAGS_max_key - FLAGS_active_width)); - std::vector keys; - keys.reserve(num_keys); - int64_t next_key = base_key + thread->rand.Next() % FLAGS_active_width; - keys.push_back(next_key); - for (int i = 1; i < num_keys; ++i) { - // Generate the key follows zipfian distribution - if (FLAGS_hot_key_alpha != 0) { - double float_rand = - (static_cast(thread->rand.Next() % FLAGS_max_key)) / - FLAGS_max_key; - next_key = GetOneHotKeyID(float_rand, FLAGS_max_key); - } else { - // This may result in some duplicate keys - next_key = next_key + thread->rand.Next() % - (FLAGS_active_width - (next_key - base_key)); - } - keys.push_back(next_key); - } - return keys; -} - -size_t GenerateValue(uint32_t rand, char* v, size_t max_sz) { - size_t value_sz = - ((rand % kRandomValueMaxFactor) + 1) * FLAGS_value_size_mult; - assert(value_sz <= max_sz && value_sz >= sizeof(uint32_t)); - (void)max_sz; - PutUnaligned(reinterpret_cast(v), rand); - for (size_t i = sizeof(uint32_t); i < value_sz; i++) { - v[i] = (char)(rand ^ i); - } - v[value_sz] = '\0'; - return value_sz; // the size of the value set. -} - -uint32_t GetValueBase(Slice s) { - assert(s.size() >= sizeof(uint32_t)); - uint32_t res; - GetUnaligned(reinterpret_cast(s.data()), &res); - return res; -} - -WideColumns GenerateWideColumns(uint32_t value_base, const Slice& slice) { - WideColumns columns; - - constexpr size_t max_columns = 4; - const size_t num_columns = (value_base % max_columns) + 1; - - columns.reserve(num_columns); - - assert(slice.size() >= num_columns); - - columns.emplace_back(kDefaultWideColumnName, slice); - - for (size_t i = 1; i < num_columns; ++i) { - const Slice name(slice.data(), i); - const Slice value(slice.data() + i, slice.size() - i); - - columns.emplace_back(name, value); - } - - return columns; -} - -WideColumns GenerateExpectedWideColumns(uint32_t value_base, - const Slice& slice) { - if (FLAGS_use_put_entity_one_in == 0 || - (value_base % FLAGS_use_put_entity_one_in) != 0) { - return WideColumns{{kDefaultWideColumnName, slice}}; - } - - WideColumns columns = GenerateWideColumns(value_base, slice); - - std::sort(columns.begin(), columns.end(), - [](const WideColumn& lhs, const WideColumn& rhs) { - return lhs.name().compare(rhs.name()) < 0; - }); - - return columns; -} - -bool VerifyWideColumns(const Slice& value, const WideColumns& columns) { - if (value.size() < sizeof(uint32_t)) { - return false; - } - - const uint32_t value_base = GetValueBase(value); - - const WideColumns expected_columns = - GenerateExpectedWideColumns(value_base, value); - - if (columns != expected_columns) { - return false; - } - - return true; -} - -bool VerifyWideColumns(const WideColumns& columns) { - if (columns.empty()) { - return false; - } - - if (columns.front().name() != kDefaultWideColumnName) { - return false; - } - - const Slice& value_of_default = columns.front().value(); - - return VerifyWideColumns(value_of_default, columns); -} - -std::string GetNowNanos() { - uint64_t t = db_stress_env->NowNanos(); - std::string ret; - PutFixed64(&ret, t); - return ret; -} - -namespace { - -class MyXXH64Checksum : public FileChecksumGenerator { - public: - explicit MyXXH64Checksum(bool big) : big_(big) { - state_ = XXH64_createState(); - XXH64_reset(state_, 0); - } - - virtual ~MyXXH64Checksum() override { XXH64_freeState(state_); } - - void Update(const char* data, size_t n) override { - XXH64_update(state_, data, n); - } - - void Finalize() override { - assert(str_.empty()); - uint64_t digest = XXH64_digest(state_); - // Store as little endian raw bytes - PutFixed64(&str_, digest); - if (big_) { - // Throw in some more data for stress testing (448 bits total) - PutFixed64(&str_, GetSliceHash64(str_)); - PutFixed64(&str_, GetSliceHash64(str_)); - PutFixed64(&str_, GetSliceHash64(str_)); - PutFixed64(&str_, GetSliceHash64(str_)); - PutFixed64(&str_, GetSliceHash64(str_)); - PutFixed64(&str_, GetSliceHash64(str_)); - } - } - - std::string GetChecksum() const override { - assert(!str_.empty()); - return str_; - } - - const char* Name() const override { - return big_ ? "MyBigChecksum" : "MyXXH64Checksum"; - } - - private: - bool big_; - XXH64_state_t* state_; - std::string str_; -}; - -class DbStressChecksumGenFactory : public FileChecksumGenFactory { - std::string default_func_name_; - - std::unique_ptr CreateFromFuncName( - const std::string& func_name) { - std::unique_ptr rv; - if (func_name == "FileChecksumCrc32c") { - rv.reset(new FileChecksumGenCrc32c(FileChecksumGenContext())); - } else if (func_name == "MyXXH64Checksum") { - rv.reset(new MyXXH64Checksum(false /* big */)); - } else if (func_name == "MyBigChecksum") { - rv.reset(new MyXXH64Checksum(true /* big */)); - } else { - // Should be a recognized function when we get here - assert(false); - } - return rv; - } - - public: - explicit DbStressChecksumGenFactory(const std::string& default_func_name) - : default_func_name_(default_func_name) {} - - std::unique_ptr CreateFileChecksumGenerator( - const FileChecksumGenContext& context) override { - if (context.requested_checksum_func_name.empty()) { - return CreateFromFuncName(default_func_name_); - } else { - return CreateFromFuncName(context.requested_checksum_func_name); - } - } - - const char* Name() const override { return "FileChecksumGenCrc32cFactory"; } -}; - -} // namespace - -std::shared_ptr GetFileChecksumImpl( - const std::string& name) { - // Translate from friendly names to internal names - std::string internal_name; - if (name == "crc32c") { - internal_name = "FileChecksumCrc32c"; - } else if (name == "xxh64") { - internal_name = "MyXXH64Checksum"; - } else if (name == "big") { - internal_name = "MyBigChecksum"; - } else { - assert(name.empty() || name == "none"); - return nullptr; - } - return std::make_shared(internal_name); -} - -Status DeleteFilesInDirectory(const std::string& dirname) { - std::vector filenames; - Status s = Env::Default()->GetChildren(dirname, &filenames); - for (size_t i = 0; s.ok() && i < filenames.size(); ++i) { - s = Env::Default()->DeleteFile(dirname + "/" + filenames[i]); - } - return s; -} - -Status SaveFilesInDirectory(const std::string& src_dirname, - const std::string& dst_dirname) { - std::vector filenames; - Status s = Env::Default()->GetChildren(src_dirname, &filenames); - for (size_t i = 0; s.ok() && i < filenames.size(); ++i) { - bool is_dir = false; - s = Env::Default()->IsDirectory(src_dirname + "/" + filenames[i], &is_dir); - if (s.ok()) { - if (is_dir) { - continue; - } - s = Env::Default()->LinkFile(src_dirname + "/" + filenames[i], - dst_dirname + "/" + filenames[i]); - } - } - return s; -} - -Status InitUnverifiedSubdir(const std::string& dirname) { - Status s = Env::Default()->FileExists(dirname); - if (s.IsNotFound()) { - return Status::OK(); - } - - const std::string kUnverifiedDirname = dirname + "/unverified"; - if (s.ok()) { - s = Env::Default()->CreateDirIfMissing(kUnverifiedDirname); - } - if (s.ok()) { - // It might already exist with some stale contents. Delete any such - // contents. - s = DeleteFilesInDirectory(kUnverifiedDirname); - } - if (s.ok()) { - s = SaveFilesInDirectory(dirname, kUnverifiedDirname); - } - return s; -} - -Status DestroyUnverifiedSubdir(const std::string& dirname) { - Status s = Env::Default()->FileExists(dirname); - if (s.IsNotFound()) { - return Status::OK(); - } - - const std::string kUnverifiedDirname = dirname + "/unverified"; - if (s.ok()) { - s = Env::Default()->FileExists(kUnverifiedDirname); - } - if (s.IsNotFound()) { - return Status::OK(); - } - - if (s.ok()) { - s = DeleteFilesInDirectory(kUnverifiedDirname); - } - if (s.ok()) { - s = Env::Default()->DeleteDir(kUnverifiedDirname); - } - return s; -} - -} // namespace ROCKSDB_NAMESPACE -#endif // GFLAGS diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h deleted file mode 100644 index 062b6b98c..000000000 --- a/db_stress_tool/db_stress_common.h +++ /dev/null @@ -1,670 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// The test uses an array to compare against values written to the database. -// Keys written to the array are in 1:1 correspondence to the actual values in -// the database according to the formula in the function GenerateValue. - -// Space is reserved in the array from 0 to FLAGS_max_key and values are -// randomly written/deleted/read from those positions. During verification we -// compare all the positions in the array. To shorten/elongate the running -// time, you could change the settings: FLAGS_max_key, FLAGS_ops_per_thread, -// (sometimes also FLAGS_threads). -// -// NOTE that if FLAGS_test_batches_snapshots is set, the test will have -// different behavior. See comment of the flag for details. - -#ifdef GFLAGS -#pragma once -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "db/db_impl/db_impl.h" -#include "db/version_set.h" -#include "db_stress_tool/db_stress_env_wrapper.h" -#include "db_stress_tool/db_stress_listener.h" -#include "db_stress_tool/db_stress_shared_state.h" -#include "db_stress_tool/db_stress_test_base.h" -#include "logging/logging.h" -#include "monitoring/histogram.h" -#include "options/options_helper.h" -#include "port/port.h" -#include "rocksdb/cache.h" -#include "rocksdb/env.h" -#include "rocksdb/slice.h" -#include "rocksdb/slice_transform.h" -#include "rocksdb/statistics.h" -#include "rocksdb/utilities/backup_engine.h" -#include "rocksdb/utilities/checkpoint.h" -#include "rocksdb/utilities/db_ttl.h" -#include "rocksdb/utilities/debug.h" -#include "rocksdb/utilities/options_util.h" -#include "rocksdb/utilities/transaction.h" -#include "rocksdb/utilities/transaction_db.h" -#include "rocksdb/write_batch.h" -#include "test_util/testutil.h" -#include "util/coding.h" -#include "util/compression.h" -#include "util/crc32c.h" -#include "util/gflags_compat.h" -#include "util/mutexlock.h" -#include "util/random.h" -#include "util/string_util.h" -#include "utilities/blob_db/blob_db.h" -#include "utilities/fault_injection_fs.h" -#include "utilities/merge_operators.h" - -using GFLAGS_NAMESPACE::ParseCommandLineFlags; -using GFLAGS_NAMESPACE::RegisterFlagValidator; -using GFLAGS_NAMESPACE::SetUsageMessage; - -DECLARE_uint64(seed); -DECLARE_bool(read_only); -DECLARE_int64(max_key); -DECLARE_double(hot_key_alpha); -DECLARE_int32(max_key_len); -DECLARE_string(key_len_percent_dist); -DECLARE_int32(key_window_scale_factor); -DECLARE_int32(column_families); -DECLARE_string(options_file); -DECLARE_int64(active_width); -DECLARE_bool(test_batches_snapshots); -DECLARE_bool(atomic_flush); -DECLARE_int32(manual_wal_flush_one_in); -DECLARE_int32(lock_wal_one_in); -DECLARE_bool(test_cf_consistency); -DECLARE_bool(test_multi_ops_txns); -DECLARE_int32(threads); -DECLARE_int32(ttl); -DECLARE_int32(value_size_mult); -DECLARE_int32(compaction_readahead_size); -DECLARE_bool(enable_pipelined_write); -DECLARE_bool(verify_before_write); -DECLARE_bool(histogram); -DECLARE_bool(destroy_db_initially); -DECLARE_bool(verbose); -DECLARE_bool(progress_reports); -DECLARE_uint64(db_write_buffer_size); -DECLARE_int32(write_buffer_size); -DECLARE_int32(max_write_buffer_number); -DECLARE_int32(min_write_buffer_number_to_merge); -DECLARE_int32(max_write_buffer_number_to_maintain); -DECLARE_int64(max_write_buffer_size_to_maintain); -DECLARE_double(memtable_prefix_bloom_size_ratio); -DECLARE_bool(memtable_whole_key_filtering); -DECLARE_int32(open_files); -DECLARE_int64(compressed_cache_size); -DECLARE_int32(compressed_cache_numshardbits); -DECLARE_int32(compaction_style); -DECLARE_int32(compaction_pri); -DECLARE_int32(num_levels); -DECLARE_int32(level0_file_num_compaction_trigger); -DECLARE_int32(level0_slowdown_writes_trigger); -DECLARE_int32(level0_stop_writes_trigger); -DECLARE_int32(block_size); -DECLARE_int32(format_version); -DECLARE_int32(index_block_restart_interval); -DECLARE_bool(disable_auto_compactions); -DECLARE_int32(max_background_compactions); -DECLARE_int32(num_bottom_pri_threads); -DECLARE_int32(compaction_thread_pool_adjust_interval); -DECLARE_int32(compaction_thread_pool_variations); -DECLARE_int32(max_background_flushes); -DECLARE_int32(universal_size_ratio); -DECLARE_int32(universal_min_merge_width); -DECLARE_int32(universal_max_merge_width); -DECLARE_int32(universal_max_size_amplification_percent); -DECLARE_int32(clear_column_family_one_in); -DECLARE_int32(get_live_files_one_in); -DECLARE_int32(get_sorted_wal_files_one_in); -DECLARE_int32(get_current_wal_file_one_in); -DECLARE_int32(set_options_one_in); -DECLARE_int32(set_in_place_one_in); -DECLARE_int64(cache_size); -DECLARE_int32(cache_numshardbits); -DECLARE_bool(cache_index_and_filter_blocks); -DECLARE_bool(charge_compression_dictionary_building_buffer); -DECLARE_bool(charge_filter_construction); -DECLARE_bool(charge_table_reader); -DECLARE_bool(charge_file_metadata); -DECLARE_bool(charge_blob_cache); -DECLARE_int32(top_level_index_pinning); -DECLARE_int32(partition_pinning); -DECLARE_int32(unpartitioned_pinning); -DECLARE_string(cache_type); -DECLARE_uint64(subcompactions); -DECLARE_uint64(periodic_compaction_seconds); -DECLARE_uint64(compaction_ttl); -DECLARE_bool(fifo_allow_compaction); -DECLARE_bool(allow_concurrent_memtable_write); -DECLARE_double(experimental_mempurge_threshold); -DECLARE_bool(enable_write_thread_adaptive_yield); -DECLARE_int32(reopen); -DECLARE_double(bloom_bits); -DECLARE_int32(ribbon_starting_level); -DECLARE_bool(partition_filters); -DECLARE_bool(optimize_filters_for_memory); -DECLARE_bool(detect_filter_construct_corruption); -DECLARE_int32(index_type); -DECLARE_int32(data_block_index_type); -DECLARE_string(db); -DECLARE_string(secondaries_base); -DECLARE_bool(test_secondary); -DECLARE_string(expected_values_dir); -DECLARE_bool(verify_checksum); -DECLARE_bool(mmap_read); -DECLARE_bool(mmap_write); -DECLARE_bool(use_direct_reads); -DECLARE_bool(use_direct_io_for_flush_and_compaction); -DECLARE_bool(mock_direct_io); -DECLARE_bool(statistics); -DECLARE_bool(sync); -DECLARE_bool(use_fsync); -DECLARE_uint64(stats_dump_period_sec); -DECLARE_uint64(bytes_per_sync); -DECLARE_uint64(wal_bytes_per_sync); -DECLARE_int32(kill_random_test); -DECLARE_string(kill_exclude_prefixes); -DECLARE_bool(disable_wal); -DECLARE_uint64(recycle_log_file_num); -DECLARE_int64(target_file_size_base); -DECLARE_int32(target_file_size_multiplier); -DECLARE_uint64(max_bytes_for_level_base); -DECLARE_double(max_bytes_for_level_multiplier); -DECLARE_int32(range_deletion_width); -DECLARE_uint64(rate_limiter_bytes_per_sec); -DECLARE_bool(rate_limit_bg_reads); -DECLARE_bool(rate_limit_user_ops); -DECLARE_bool(rate_limit_auto_wal_flush); -DECLARE_uint64(sst_file_manager_bytes_per_sec); -DECLARE_uint64(sst_file_manager_bytes_per_truncate); -DECLARE_bool(use_txn); -DECLARE_uint64(txn_write_policy); -DECLARE_bool(unordered_write); -DECLARE_int32(backup_one_in); -DECLARE_uint64(backup_max_size); -DECLARE_int32(checkpoint_one_in); -DECLARE_int32(ingest_external_file_one_in); -DECLARE_int32(ingest_external_file_width); -DECLARE_int32(compact_files_one_in); -DECLARE_int32(compact_range_one_in); -DECLARE_int32(mark_for_compaction_one_file_in); -DECLARE_int32(flush_one_in); -DECLARE_int32(pause_background_one_in); -DECLARE_int32(compact_range_width); -DECLARE_int32(acquire_snapshot_one_in); -DECLARE_bool(compare_full_db_state_snapshot); -DECLARE_uint64(snapshot_hold_ops); -DECLARE_bool(long_running_snapshots); -DECLARE_bool(use_multiget); -DECLARE_bool(use_get_entity); -DECLARE_int32(readpercent); -DECLARE_int32(prefixpercent); -DECLARE_int32(writepercent); -DECLARE_int32(delpercent); -DECLARE_int32(delrangepercent); -DECLARE_int32(nooverwritepercent); -DECLARE_int32(iterpercent); -DECLARE_uint64(num_iterations); -DECLARE_int32(customopspercent); -DECLARE_string(compression_type); -DECLARE_string(bottommost_compression_type); -DECLARE_int32(compression_max_dict_bytes); -DECLARE_int32(compression_zstd_max_train_bytes); -DECLARE_int32(compression_parallel_threads); -DECLARE_uint64(compression_max_dict_buffer_bytes); -DECLARE_bool(compression_use_zstd_dict_trainer); -DECLARE_string(checksum_type); -DECLARE_string(env_uri); -DECLARE_string(fs_uri); -DECLARE_uint64(ops_per_thread); -DECLARE_uint64(log2_keys_per_lock); -DECLARE_uint64(max_manifest_file_size); -DECLARE_bool(in_place_update); -DECLARE_string(memtablerep); -DECLARE_int32(prefix_size); -DECLARE_bool(use_merge); -DECLARE_uint32(use_put_entity_one_in); -DECLARE_bool(use_full_merge_v1); -DECLARE_int32(sync_wal_one_in); -DECLARE_bool(avoid_unnecessary_blocking_io); -DECLARE_bool(write_dbid_to_manifest); -DECLARE_bool(avoid_flush_during_recovery); -DECLARE_uint64(max_write_batch_group_size_bytes); -DECLARE_bool(level_compaction_dynamic_level_bytes); -DECLARE_int32(verify_checksum_one_in); -DECLARE_int32(verify_db_one_in); -DECLARE_int32(continuous_verification_interval); -DECLARE_int32(get_property_one_in); -DECLARE_string(file_checksum_impl); - -// Options for StackableDB-based BlobDB -DECLARE_bool(use_blob_db); -DECLARE_uint64(blob_db_min_blob_size); -DECLARE_uint64(blob_db_bytes_per_sync); -DECLARE_uint64(blob_db_file_size); -DECLARE_bool(blob_db_enable_gc); -DECLARE_double(blob_db_gc_cutoff); - -// Options for integrated BlobDB -DECLARE_bool(allow_setting_blob_options_dynamically); -DECLARE_bool(enable_blob_files); -DECLARE_uint64(min_blob_size); -DECLARE_uint64(blob_file_size); -DECLARE_string(blob_compression_type); -DECLARE_bool(enable_blob_garbage_collection); -DECLARE_double(blob_garbage_collection_age_cutoff); -DECLARE_double(blob_garbage_collection_force_threshold); -DECLARE_uint64(blob_compaction_readahead_size); -DECLARE_int32(blob_file_starting_level); -DECLARE_bool(use_blob_cache); -DECLARE_bool(use_shared_block_and_blob_cache); -DECLARE_uint64(blob_cache_size); -DECLARE_int32(blob_cache_numshardbits); -DECLARE_int32(prepopulate_blob_cache); - -DECLARE_int32(approximate_size_one_in); -DECLARE_bool(sync_fault_injection); - -DECLARE_bool(best_efforts_recovery); -DECLARE_bool(skip_verifydb); -DECLARE_bool(enable_compaction_filter); -DECLARE_bool(paranoid_file_checks); -DECLARE_bool(fail_if_options_file_error); -DECLARE_uint64(batch_protection_bytes_per_key); -DECLARE_uint32(memtable_protection_bytes_per_key); - -DECLARE_uint64(user_timestamp_size); -DECLARE_string(secondary_cache_uri); -DECLARE_int32(secondary_cache_fault_one_in); - -DECLARE_int32(prepopulate_block_cache); - -DECLARE_bool(two_write_queues); -DECLARE_bool(use_only_the_last_commit_time_batch_for_recovery); -DECLARE_uint64(wp_snapshot_cache_bits); -DECLARE_uint64(wp_commit_cache_bits); - -DECLARE_bool(adaptive_readahead); -DECLARE_bool(async_io); -DECLARE_string(wal_compression); -DECLARE_bool(verify_sst_unique_id_in_manifest); - -DECLARE_int32(create_timestamped_snapshot_one_in); - -DECLARE_bool(allow_data_in_errors); - -// Tiered storage -DECLARE_bool(enable_tiered_storage); // set last_level_temperature -DECLARE_int64(preclude_last_level_data_seconds); -DECLARE_int64(preserve_internal_time_seconds); - -DECLARE_int32(verify_iterator_with_expected_state_one_in); -DECLARE_bool(preserve_unverified_changes); - -DECLARE_uint64(readahead_size); -DECLARE_uint64(initial_auto_readahead_size); -DECLARE_uint64(max_auto_readahead_size); -DECLARE_uint64(num_file_reads_for_auto_readahead); -DECLARE_bool(use_io_uring); - -constexpr long KB = 1024; -constexpr int kRandomValueMaxFactor = 3; -constexpr int kValueMaxLen = 100; - -// wrapped posix environment -extern ROCKSDB_NAMESPACE::Env* db_stress_env; -extern ROCKSDB_NAMESPACE::Env* db_stress_listener_env; -extern std::shared_ptr fault_fs_guard; - -extern enum ROCKSDB_NAMESPACE::CompressionType compression_type_e; -extern enum ROCKSDB_NAMESPACE::CompressionType bottommost_compression_type_e; -extern enum ROCKSDB_NAMESPACE::ChecksumType checksum_type_e; - -enum RepFactory { kSkipList, kHashSkipList, kVectorRep }; - -inline enum RepFactory StringToRepFactory(const char* ctype) { - assert(ctype); - - if (!strcasecmp(ctype, "skip_list")) - return kSkipList; - else if (!strcasecmp(ctype, "prefix_hash")) - return kHashSkipList; - else if (!strcasecmp(ctype, "vector")) - return kVectorRep; - - fprintf(stdout, "Cannot parse memreptable %s\n", ctype); - return kSkipList; -} - -extern enum RepFactory FLAGS_rep_factory; - -namespace ROCKSDB_NAMESPACE { -inline enum ROCKSDB_NAMESPACE::CompressionType StringToCompressionType( - const char* ctype) { - assert(ctype); - - ROCKSDB_NAMESPACE::CompressionType ret_compression_type; - - if (!strcasecmp(ctype, "disable")) { - ret_compression_type = ROCKSDB_NAMESPACE::kDisableCompressionOption; - } else if (!strcasecmp(ctype, "none")) { - ret_compression_type = ROCKSDB_NAMESPACE::kNoCompression; - } else if (!strcasecmp(ctype, "snappy")) { - ret_compression_type = ROCKSDB_NAMESPACE::kSnappyCompression; - } else if (!strcasecmp(ctype, "zlib")) { - ret_compression_type = ROCKSDB_NAMESPACE::kZlibCompression; - } else if (!strcasecmp(ctype, "bzip2")) { - ret_compression_type = ROCKSDB_NAMESPACE::kBZip2Compression; - } else if (!strcasecmp(ctype, "lz4")) { - ret_compression_type = ROCKSDB_NAMESPACE::kLZ4Compression; - } else if (!strcasecmp(ctype, "lz4hc")) { - ret_compression_type = ROCKSDB_NAMESPACE::kLZ4HCCompression; - } else if (!strcasecmp(ctype, "xpress")) { - ret_compression_type = ROCKSDB_NAMESPACE::kXpressCompression; - } else if (!strcasecmp(ctype, "zstd")) { - ret_compression_type = ROCKSDB_NAMESPACE::kZSTD; - } else { - fprintf(stderr, "Cannot parse compression type '%s'\n", ctype); - ret_compression_type = - ROCKSDB_NAMESPACE::kSnappyCompression; // default value - } - if (ret_compression_type != ROCKSDB_NAMESPACE::kDisableCompressionOption && - !CompressionTypeSupported(ret_compression_type)) { - // Use no compression will be more portable but considering this is - // only a stress test and snappy is widely available. Use snappy here. - ret_compression_type = ROCKSDB_NAMESPACE::kSnappyCompression; - } - return ret_compression_type; -} - -inline enum ROCKSDB_NAMESPACE::ChecksumType StringToChecksumType( - const char* ctype) { - assert(ctype); - auto iter = ROCKSDB_NAMESPACE::checksum_type_string_map.find(ctype); - if (iter != ROCKSDB_NAMESPACE::checksum_type_string_map.end()) { - return iter->second; - } - fprintf(stderr, "Cannot parse checksum type '%s'\n", ctype); - return ROCKSDB_NAMESPACE::kCRC32c; -} - -inline std::string ChecksumTypeToString(ROCKSDB_NAMESPACE::ChecksumType ctype) { - auto iter = std::find_if( - ROCKSDB_NAMESPACE::checksum_type_string_map.begin(), - ROCKSDB_NAMESPACE::checksum_type_string_map.end(), - [&](const std::pair& - name_and_enum_val) { return name_and_enum_val.second == ctype; }); - assert(iter != ROCKSDB_NAMESPACE::checksum_type_string_map.end()); - return iter->first; -} - -inline std::vector SplitString(std::string src) { - std::vector ret; - if (src.empty()) { - return ret; - } - size_t pos = 0; - size_t pos_comma; - while ((pos_comma = src.find(',', pos)) != std::string::npos) { - ret.push_back(src.substr(pos, pos_comma - pos)); - pos = pos_comma + 1; - } - ret.push_back(src.substr(pos, src.length())); - return ret; -} - -#ifdef _MSC_VER -#pragma warning(push) -// truncation of constant value on static_cast -#pragma warning(disable : 4309) -#endif -inline bool GetNextPrefix(const ROCKSDB_NAMESPACE::Slice& src, std::string* v) { - std::string ret = src.ToString(); - for (int i = static_cast(ret.size()) - 1; i >= 0; i--) { - if (ret[i] != static_cast(255)) { - ret[i] = ret[i] + 1; - break; - } else if (i != 0) { - ret[i] = 0; - } else { - // all FF. No next prefix - return false; - } - } - *v = ret; - return true; -} -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -// Append `val` to `*key` in fixed-width big-endian format -extern inline void AppendIntToString(uint64_t val, std::string* key) { - // PutFixed64 uses little endian - PutFixed64(key, val); - // Reverse to get big endian - char* int_data = &((*key)[key->size() - sizeof(uint64_t)]); - for (size_t i = 0; i < sizeof(uint64_t) / 2; ++i) { - std::swap(int_data[i], int_data[sizeof(uint64_t) - 1 - i]); - } -} - -// A struct for maintaining the parameters for generating variable length keys -struct KeyGenContext { - // Number of adjacent keys in one cycle of key lengths - uint64_t window; - // Number of keys of each possible length in a given window - std::vector weights; -}; -extern KeyGenContext key_gen_ctx; - -// Generate a variable length key string from the given int64 val. The -// order of the keys is preserved. The key could be anywhere from 8 to -// max_key_len * 8 bytes. -// The algorithm picks the length based on the -// offset of the val within a configured window and the distribution of the -// number of keys of various lengths in that window. For example, if x, y, x are -// the weights assigned to each possible key length, the keys generated would be -// - {0}...{x-1} -// {(x-1),0}..{(x-1),(y-1)},{(x-1),(y-1),0}..{(x-1),(y-1),(z-1)} and so on. -// Additionally, a trailer of 0-7 bytes could be appended. -extern inline std::string Key(int64_t val) { - uint64_t window = key_gen_ctx.window; - size_t levels = key_gen_ctx.weights.size(); - std::string key; - // Over-reserve and for now do not bother `shrink_to_fit()` since the key - // strings are transient. - key.reserve(FLAGS_max_key_len * 8); - - uint64_t window_idx = static_cast(val) / window; - uint64_t offset = static_cast(val) % window; - for (size_t level = 0; level < levels; ++level) { - uint64_t weight = key_gen_ctx.weights[level]; - uint64_t pfx; - if (level == 0) { - pfx = window_idx * weight; - } else { - pfx = 0; - } - pfx += offset >= weight ? weight - 1 : offset; - AppendIntToString(pfx, &key); - if (offset < weight) { - // Use the bottom 3 bits of offset as the number of trailing 'x's in the - // key. If the next key is going to be of the next level, then skip the - // trailer as it would break ordering. If the key length is already at - // max, skip the trailer. - if (offset < weight - 1 && level < levels - 1) { - size_t trailer_len = offset & 0x7; - key.append(trailer_len, 'x'); - } - break; - } - offset -= weight; - } - - return key; -} - -// Given a string key, map it to an index into the expected values buffer -extern inline bool GetIntVal(std::string big_endian_key, uint64_t* key_p) { - size_t size_key = big_endian_key.size(); - std::vector prefixes; - - assert(size_key <= key_gen_ctx.weights.size() * sizeof(uint64_t)); - - std::string little_endian_key; - little_endian_key.resize(size_key); - for (size_t start = 0; start + sizeof(uint64_t) <= size_key; - start += sizeof(uint64_t)) { - size_t end = start + sizeof(uint64_t); - for (size_t i = 0; i < sizeof(uint64_t); ++i) { - little_endian_key[start + i] = big_endian_key[end - 1 - i]; - } - Slice little_endian_slice = - Slice(&little_endian_key[start], sizeof(uint64_t)); - uint64_t pfx; - if (!GetFixed64(&little_endian_slice, &pfx)) { - return false; - } - prefixes.emplace_back(pfx); - } - - uint64_t key = 0; - for (size_t i = 0; i < prefixes.size(); ++i) { - uint64_t pfx = prefixes[i]; - key += (pfx / key_gen_ctx.weights[i]) * key_gen_ctx.window + - pfx % key_gen_ctx.weights[i]; - if (i < prefixes.size() - 1) { - // The encoding writes a `key_gen_ctx.weights[i] - 1` that counts for - // `key_gen_ctx.weights[i]` when there are more prefixes to come. So we - // need to add back the one here as we're at a non-last prefix. - ++key; - } - } - *key_p = key; - return true; -} - -// Given a string prefix, map it to the first corresponding index in the -// expected values buffer. -inline bool GetFirstIntValInPrefix(std::string big_endian_prefix, - uint64_t* key_p) { - size_t size_key = big_endian_prefix.size(); - // Pad with zeros to make it a multiple of 8. This function may be called - // with a prefix, in which case we return the first index that falls - // inside or outside that prefix, dependeing on whether the prefix is - // the start of upper bound of a scan - unsigned int pad = sizeof(uint64_t) - (size_key % sizeof(uint64_t)); - if (pad < sizeof(uint64_t)) { - big_endian_prefix.append(pad, '\0'); - } - return GetIntVal(std::move(big_endian_prefix), key_p); -} - -extern inline uint64_t GetPrefixKeyCount(const std::string& prefix, - const std::string& ub) { - uint64_t start = 0; - uint64_t end = 0; - - if (!GetFirstIntValInPrefix(prefix, &start) || - !GetFirstIntValInPrefix(ub, &end)) { - return 0; - } - - return end - start; -} - -extern inline std::string StringToHex(const std::string& str) { - std::string result = "0x"; - result.append(Slice(str).ToString(true)); - return result; -} - -inline std::string WideColumnsToHex(const WideColumns& columns) { - if (columns.empty()) { - return std::string(); - } - - std::ostringstream oss; - - oss << std::hex; - - auto it = columns.begin(); - oss << *it; - for (++it; it != columns.end(); ++it) { - oss << ' ' << *it; - } - - return oss.str(); -} - -// Unified output format for double parameters -extern inline std::string FormatDoubleParam(double param) { - return std::to_string(param); -} - -// Make sure that double parameter is a value we can reproduce by -// re-inputting the value printed. -extern inline void SanitizeDoubleParam(double* param) { - *param = std::atof(FormatDoubleParam(*param).c_str()); -} - -extern void PoolSizeChangeThread(void* v); - -extern void DbVerificationThread(void* v); - -extern void TimestampedSnapshotsThread(void* v); - -extern void PrintKeyValue(int cf, uint64_t key, const char* value, size_t sz); - -extern int64_t GenerateOneKey(ThreadState* thread, uint64_t iteration); - -extern std::vector GenerateNKeys(ThreadState* thread, int num_keys, - uint64_t iteration); - -extern size_t GenerateValue(uint32_t rand, char* v, size_t max_sz); -extern uint32_t GetValueBase(Slice s); - -extern WideColumns GenerateWideColumns(uint32_t value_base, const Slice& slice); -extern WideColumns GenerateExpectedWideColumns(uint32_t value_base, - const Slice& slice); -extern bool VerifyWideColumns(const Slice& value, const WideColumns& columns); -extern bool VerifyWideColumns(const WideColumns& columns); - -extern StressTest* CreateCfConsistencyStressTest(); -extern StressTest* CreateBatchedOpsStressTest(); -extern StressTest* CreateNonBatchedOpsStressTest(); -extern StressTest* CreateMultiOpsTxnsStressTest(); -extern void CheckAndSetOptionsForMultiOpsTxnStressTest(); -extern void InitializeHotKeyGenerator(double alpha); -extern int64_t GetOneHotKeyID(double rand_seed, int64_t max_key); - -extern std::string GetNowNanos(); - -std::shared_ptr GetFileChecksumImpl( - const std::string& name); - -Status DeleteFilesInDirectory(const std::string& dirname); -Status SaveFilesInDirectory(const std::string& src_dirname, - const std::string& dst_dirname); -Status DestroyUnverifiedSubdir(const std::string& dirname); -Status InitUnverifiedSubdir(const std::string& dirname); -} // namespace ROCKSDB_NAMESPACE -#endif // GFLAGS diff --git a/db_stress_tool/db_stress_compaction_filter.h b/db_stress_tool/db_stress_compaction_filter.h deleted file mode 100644 index 408bb48f3..000000000 --- a/db_stress_tool/db_stress_compaction_filter.h +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#pragma once - -#include "db_stress_tool/db_stress_common.h" -#include "db_stress_tool/db_stress_shared_state.h" -#include "rocksdb/compaction_filter.h" - -namespace ROCKSDB_NAMESPACE { - -// DbStressCompactionFilter is safe to use with db_stress as it does not perform -// any mutation. It only makes `kRemove` decisions for keys that are already -// non-existent according to the `SharedState`. -class DbStressCompactionFilter : public CompactionFilter { - public: - DbStressCompactionFilter(SharedState* state, int cf_id) - : state_(state), cf_id_(cf_id) {} - - Decision FilterV2(int /*level*/, const Slice& key, ValueType /*value_type*/, - const Slice& /*existing_value*/, std::string* /*new_value*/, - std::string* /*skip_until*/) const override { - if (state_ == nullptr) { - return Decision::kKeep; - } - if (key.empty() || ('0' <= key[0] && key[0] <= '9')) { - // It is likely leftover from a test_batches_snapshots run. Below this - // conditional, the test_batches_snapshots key format is not handled - // properly. Just keep it to be safe. - return Decision::kKeep; - } - uint64_t key_num = 0; - { - Slice ukey_without_ts = key; - assert(ukey_without_ts.size() >= FLAGS_user_timestamp_size); - ukey_without_ts.remove_suffix(FLAGS_user_timestamp_size); - [[maybe_unused]] bool ok = - GetIntVal(ukey_without_ts.ToString(), &key_num); - assert(ok); - } - port::Mutex* key_mutex = state_->GetMutexForKey(cf_id_, key_num); - if (!key_mutex->TryLock()) { - return Decision::kKeep; - } - // Reaching here means we acquired the lock. - - bool key_exists = state_->Exists(cf_id_, key_num); - const bool allow_overwrite = state_->AllowsOverwrite(key_num); - - key_mutex->Unlock(); - - if (!key_exists) { - return allow_overwrite ? Decision::kRemove : Decision::kPurge; - } - return Decision::kKeep; - } - - const char* Name() const override { return "DbStressCompactionFilter"; } - - private: - SharedState* const state_; - const int cf_id_; -}; - -class DbStressCompactionFilterFactory : public CompactionFilterFactory { - public: - DbStressCompactionFilterFactory() : state_(nullptr) {} - - void SetSharedState(SharedState* state) { - MutexLock state_mutex_guard(&state_mutex_); - state_ = state; - } - - std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& context) override { - MutexLock state_mutex_guard(&state_mutex_); - return std::unique_ptr( - new DbStressCompactionFilter(state_, context.column_family_id)); - } - - const char* Name() const override { - return "DbStressCompactionFilterFactory"; - } - - private: - port::Mutex state_mutex_; - SharedState* state_; -}; - -} // namespace ROCKSDB_NAMESPACE diff --git a/db_stress_tool/db_stress_driver.cc b/db_stress_tool/db_stress_driver.cc deleted file mode 100644 index 2c8dcf610..000000000 --- a/db_stress_tool/db_stress_driver.cc +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// - -#ifdef GFLAGS -#include "db_stress_tool/db_stress_common.h" -#include "utilities/fault_injection_fs.h" - -namespace ROCKSDB_NAMESPACE { -void ThreadBody(void* v) { - ThreadState* thread = reinterpret_cast(v); - SharedState* shared = thread->shared; - - if (!FLAGS_skip_verifydb && shared->ShouldVerifyAtBeginning()) { - thread->shared->GetStressTest()->VerifyDb(thread); - } - { - MutexLock l(shared->GetMutex()); - shared->IncInitialized(); - if (shared->AllInitialized()) { - shared->GetCondVar()->SignalAll(); - } - while (!shared->Started()) { - shared->GetCondVar()->Wait(); - } - } - thread->shared->GetStressTest()->OperateDb(thread); - - { - MutexLock l(shared->GetMutex()); - shared->IncOperated(); - if (shared->AllOperated()) { - shared->GetCondVar()->SignalAll(); - } - while (!shared->VerifyStarted()) { - shared->GetCondVar()->Wait(); - } - } - - if (!FLAGS_skip_verifydb) { - thread->shared->GetStressTest()->VerifyDb(thread); - } - - { - MutexLock l(shared->GetMutex()); - shared->IncDone(); - if (shared->AllDone()) { - shared->GetCondVar()->SignalAll(); - } - } -} - -bool RunStressTest(SharedState* shared) { - SystemClock* clock = db_stress_env->GetSystemClock().get(); - StressTest* stress = shared->GetStressTest(); - - if (shared->ShouldVerifyAtBeginning() && FLAGS_preserve_unverified_changes) { - Status s = InitUnverifiedSubdir(FLAGS_db); - if (s.ok() && !FLAGS_expected_values_dir.empty()) { - s = InitUnverifiedSubdir(FLAGS_expected_values_dir); - } - if (!s.ok()) { - fprintf(stderr, "Failed to setup unverified state dir: %s\n", - s.ToString().c_str()); - exit(1); - } - } - - stress->InitDb(shared); - stress->FinishInitDb(shared); - - if (FLAGS_sync_fault_injection) { - fault_fs_guard->SetFilesystemDirectWritable(false); - } - if (FLAGS_write_fault_one_in) { - fault_fs_guard->EnableWriteErrorInjection(); - } - - uint32_t n = FLAGS_threads; - uint64_t now = clock->NowMicros(); - fprintf(stdout, "%s Initializing worker threads\n", - clock->TimeToString(now / 1000000).c_str()); - - shared->SetThreads(n); - - if (FLAGS_compaction_thread_pool_adjust_interval > 0) { - shared->IncBgThreads(); - } - - if (FLAGS_continuous_verification_interval > 0) { - shared->IncBgThreads(); - } - - std::vector threads(n); - for (uint32_t i = 0; i < n; i++) { - threads[i] = new ThreadState(i, shared); - db_stress_env->StartThread(ThreadBody, threads[i]); - } - - ThreadState bg_thread(0, shared); - if (FLAGS_compaction_thread_pool_adjust_interval > 0) { - db_stress_env->StartThread(PoolSizeChangeThread, &bg_thread); - } - - ThreadState continuous_verification_thread(0, shared); - if (FLAGS_continuous_verification_interval > 0) { - db_stress_env->StartThread(DbVerificationThread, - &continuous_verification_thread); - } - - // Each thread goes through the following states: - // initializing -> wait for others to init -> read/populate/depopulate - // wait for others to operate -> verify -> done - - { - MutexLock l(shared->GetMutex()); - while (!shared->AllInitialized()) { - shared->GetCondVar()->Wait(); - } - if (shared->ShouldVerifyAtBeginning()) { - if (shared->HasVerificationFailedYet()) { - fprintf(stderr, "Crash-recovery verification failed :(\n"); - } else { - fprintf(stdout, "Crash-recovery verification passed :)\n"); - Status s = DestroyUnverifiedSubdir(FLAGS_db); - if (s.ok() && !FLAGS_expected_values_dir.empty()) { - s = DestroyUnverifiedSubdir(FLAGS_expected_values_dir); - } - if (!s.ok()) { - fprintf(stderr, "Failed to cleanup unverified state dir: %s\n", - s.ToString().c_str()); - exit(1); - } - } - } - - // This is after the verification step to avoid making all those `Get()`s - // and `MultiGet()`s contend on the DB-wide trace mutex. - if (!FLAGS_expected_values_dir.empty()) { - stress->TrackExpectedState(shared); - } - - now = clock->NowMicros(); - fprintf(stdout, "%s Starting database operations\n", - clock->TimeToString(now / 1000000).c_str()); - - shared->SetStart(); - shared->GetCondVar()->SignalAll(); - while (!shared->AllOperated()) { - shared->GetCondVar()->Wait(); - } - - now = clock->NowMicros(); - if (FLAGS_test_batches_snapshots) { - fprintf(stdout, "%s Limited verification already done during gets\n", - clock->TimeToString((uint64_t)now / 1000000).c_str()); - } else if (FLAGS_skip_verifydb) { - fprintf(stdout, "%s Verification skipped\n", - clock->TimeToString((uint64_t)now / 1000000).c_str()); - } else { - fprintf(stdout, "%s Starting verification\n", - clock->TimeToString((uint64_t)now / 1000000).c_str()); - } - - shared->SetStartVerify(); - shared->GetCondVar()->SignalAll(); - while (!shared->AllDone()) { - shared->GetCondVar()->Wait(); - } - } - - for (unsigned int i = 1; i < n; i++) { - threads[0]->stats.Merge(threads[i]->stats); - } - threads[0]->stats.Report("Stress Test"); - - for (unsigned int i = 0; i < n; i++) { - delete threads[i]; - threads[i] = nullptr; - } - now = clock->NowMicros(); - if (!FLAGS_skip_verifydb && !FLAGS_test_batches_snapshots && - !shared->HasVerificationFailedYet()) { - fprintf(stdout, "%s Verification successful\n", - clock->TimeToString(now / 1000000).c_str()); - } - stress->PrintStatistics(); - - if (FLAGS_compaction_thread_pool_adjust_interval > 0 || - FLAGS_continuous_verification_interval > 0) { - MutexLock l(shared->GetMutex()); - shared->SetShouldStopBgThread(); - while (!shared->BgThreadsFinished()) { - shared->GetCondVar()->Wait(); - } - } - - if (shared->HasVerificationFailedYet()) { - fprintf(stderr, "Verification failed :(\n"); - return false; - } - return true; -} -} // namespace ROCKSDB_NAMESPACE -#endif // GFLAGS diff --git a/db_stress_tool/db_stress_driver.h b/db_stress_tool/db_stress_driver.h deleted file mode 100644 index a173470ff..000000000 --- a/db_stress_tool/db_stress_driver.h +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db_stress_tool/db_stress_shared_state.h" -#ifdef GFLAGS -#pragma once -#include "db_stress_tool/db_stress_test_base.h" -namespace ROCKSDB_NAMESPACE { -extern void ThreadBody(void* /*thread_state*/); -extern bool RunStressTest(SharedState*); -} // namespace ROCKSDB_NAMESPACE -#endif // GFLAGS diff --git a/db_stress_tool/db_stress_env_wrapper.h b/db_stress_tool/db_stress_env_wrapper.h deleted file mode 100644 index af60df9bc..000000000 --- a/db_stress_tool/db_stress_env_wrapper.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifdef GFLAGS -#pragma once -#include "db_stress_tool/db_stress_common.h" - -namespace ROCKSDB_NAMESPACE { -class DbStressFSWrapper : public FileSystemWrapper { - public: - explicit DbStressFSWrapper(const std::shared_ptr& t) - : FileSystemWrapper(t) {} - static const char* kClassName() { return "DbStressFS"; } - const char* Name() const override { return kClassName(); } - - IOStatus DeleteFile(const std::string& f, const IOOptions& opts, - IODebugContext* dbg) override { - // We determine whether it is a manifest file by searching a strong, - // so that there will be false positive if the directory path contains the - // keyword but it is unlikely. - // Checkpoint, backup, and restore directories needs to be exempted. - if (!if_preserve_all_manifests || - f.find("MANIFEST-") == std::string::npos || - f.find("checkpoint") != std::string::npos || - f.find(".backup") != std::string::npos || - f.find(".restore") != std::string::npos) { - return target()->DeleteFile(f, opts, dbg); - } - // Rename the file instead of deletion to keep the history, and - // at the same time it is not visible to RocksDB. - return target()->RenameFile(f, f + "_renamed_", opts, dbg); - } - - // If true, all manifest files will not be delted in DeleteFile(). - bool if_preserve_all_manifests = true; -}; -} // namespace ROCKSDB_NAMESPACE -#endif // GFLAGS diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc deleted file mode 100644 index d7cf8b10f..000000000 --- a/db_stress_tool/db_stress_gflags.cc +++ /dev/null @@ -1,1081 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifdef GFLAGS -#include "db_stress_tool/db_stress_common.h" - -static bool ValidateUint32Range(const char* flagname, uint64_t value) { - if (value > std::numeric_limits::max()) { - fprintf(stderr, "Invalid value for --%s: %lu, overflow\n", flagname, - (unsigned long)value); - return false; - } - return true; -} - -DEFINE_uint64(seed, 2341234, - "Seed for PRNG. When --nooverwritepercent is " - "nonzero and --expected_values_dir is nonempty, this value " - "must be fixed across invocations."); -static const bool FLAGS_seed_dummy __attribute__((__unused__)) = - RegisterFlagValidator(&FLAGS_seed, &ValidateUint32Range); - -DEFINE_bool(read_only, false, "True if open DB in read-only mode during tests"); - -DEFINE_int64(max_key, 1 * KB * KB, - "Max number of key/values to place in database"); - -DEFINE_int32(max_key_len, 3, "Maximum length of a key in 8-byte units"); - -DEFINE_string(key_len_percent_dist, "", - "Percentages of keys of various lengths. For example, 1,30,69 " - "means 1% of keys are 8 bytes, 30% are 16 bytes, and 69% are " - "24 bytes. If not specified, it will be evenly distributed"); - -DEFINE_int32(key_window_scale_factor, 10, - "This value will be multiplied by 100 to come up with a window " - "size for varying the key length"); - -DEFINE_int32(column_families, 10, "Number of column families"); - -DEFINE_double( - hot_key_alpha, 0, - "Use Zipfian distribution to generate the key " - "distribution. If it is not specified, write path will use random " - "distribution to generate the keys. The parameter is [0, double_max]). " - "However, the larger alpha is, the more shewed will be. If alpha is " - "larger than 2, it is likely that only 1 key will be accessed. The " - "Recommended value is [0.8-1.5]. The distribution is also related to " - "max_key and total iterations of generating the hot key. "); - -DEFINE_string( - options_file, "", - "The path to a RocksDB options file. If specified, then db_stress will " - "run with the RocksDB options in the default column family of the " - "specified options file. Note that, when an options file is provided, " - "db_stress will ignore the flag values for all options that may be passed " - "via options file."); - -DEFINE_int64( - active_width, 0, - "Number of keys in active span of the key-range at any given time. The " - "span begins with its left endpoint at key 0, gradually moves rightwards, " - "and ends with its right endpoint at max_key. If set to 0, active_width " - "will be sanitized to be equal to max_key."); - -// TODO(noetzli) Add support for single deletes -DEFINE_bool(test_batches_snapshots, false, - "If set, the test uses MultiGet(), MultiPut() and MultiDelete()" - " which read/write/delete multiple keys in a batch. In this mode," - " we do not verify db content by comparing the content with the " - "pre-allocated array. Instead, we do partial verification inside" - " MultiGet() by checking various values in a batch. Benefit of" - " this mode:\n" - "\t(a) No need to acquire mutexes during writes (less cache " - "flushes in multi-core leading to speed up)\n" - "\t(b) No long validation at the end (more speed up)\n" - "\t(c) Test snapshot and atomicity of batch writes"); - -DEFINE_bool(atomic_flush, false, - "If set, enables atomic flush in the options.\n"); - -DEFINE_int32( - manual_wal_flush_one_in, 0, - "If non-zero, then `FlushWAL(bool sync)`, where `bool sync` is randomly " - "decided, will be explictly called in db stress once for every N ops " - "on average. Setting `manual_wal_flush_one_in` to be greater than 0 " - "implies `Options::manual_wal_flush = true` is set."); - -DEFINE_int32(lock_wal_one_in, 1000000, - "If non-zero, then `LockWAL()` + `UnlockWAL()` will be called in " - "db_stress once for every N ops on average."); - -DEFINE_bool(test_cf_consistency, false, - "If set, runs the stress test dedicated to verifying writes to " - "multiple column families are consistent. Setting this implies " - "`atomic_flush=true` is set true if `disable_wal=false`.\n"); - -DEFINE_bool(test_multi_ops_txns, false, - "If set, runs stress test dedicated to verifying multi-ops " - "transactions on a simple relational table with primary and " - "secondary index."); - -DEFINE_int32(threads, 32, "Number of concurrent threads to run."); - -DEFINE_int32(ttl, -1, - "Opens the db with this ttl value if this is not -1. " - "Carefully specify a large value such that verifications on " - "deleted values don't fail"); - -DEFINE_int32(value_size_mult, 8, - "Size of value will be this number times rand_int(1,3) bytes"); - -DEFINE_int32(compaction_readahead_size, 0, "Compaction readahead size"); - -DEFINE_bool(enable_pipelined_write, false, "Pipeline WAL/memtable writes"); - -DEFINE_bool(verify_before_write, false, "Verify before write"); - -DEFINE_bool(histogram, false, "Print histogram of operation timings"); - -DEFINE_bool(destroy_db_initially, true, - "Destroys the database dir before start if this is true"); - -DEFINE_bool(verbose, false, "Verbose"); - -DEFINE_bool(progress_reports, true, - "If true, db_stress will report number of finished operations"); - -DEFINE_uint64(db_write_buffer_size, - ROCKSDB_NAMESPACE::Options().db_write_buffer_size, - "Number of bytes to buffer in all memtables before compacting"); - -DEFINE_int32( - write_buffer_size, - static_cast(ROCKSDB_NAMESPACE::Options().write_buffer_size), - "Number of bytes to buffer in memtable before compacting"); - -DEFINE_int32(max_write_buffer_number, - ROCKSDB_NAMESPACE::Options().max_write_buffer_number, - "The number of in-memory memtables. " - "Each memtable is of size FLAGS_write_buffer_size."); - -DEFINE_int32(min_write_buffer_number_to_merge, - ROCKSDB_NAMESPACE::Options().min_write_buffer_number_to_merge, - "The minimum number of write buffers that will be merged together " - "before writing to storage. This is cheap because it is an " - "in-memory merge. If this feature is not enabled, then all these " - "write buffers are flushed to L0 as separate files and this " - "increases read amplification because a get request has to check " - "in all of these files. Also, an in-memory merge may result in " - "writing less data to storage if there are duplicate records in" - " each of these individual write buffers."); - -DEFINE_int32(max_write_buffer_number_to_maintain, - ROCKSDB_NAMESPACE::Options().max_write_buffer_number_to_maintain, - "The total maximum number of write buffers to maintain in memory " - "including copies of buffers that have already been flushed. " - "Unlike max_write_buffer_number, this parameter does not affect " - "flushing. This controls the minimum amount of write history " - "that will be available in memory for conflict checking when " - "Transactions are used. If this value is too low, some " - "transactions may fail at commit time due to not being able to " - "determine whether there were any write conflicts. Setting this " - "value to 0 will cause write buffers to be freed immediately " - "after they are flushed. If this value is set to -1, " - "'max_write_buffer_number' will be used."); - -DEFINE_int64(max_write_buffer_size_to_maintain, - ROCKSDB_NAMESPACE::Options().max_write_buffer_size_to_maintain, - "The total maximum size of write buffers to maintain in memory " - "including copies of buffers that have already been flushed. " - "Unlike max_write_buffer_number, this parameter does not affect " - "flushing. This controls the minimum amount of write history " - "that will be available in memory for conflict checking when " - "Transactions are used. If this value is too low, some " - "transactions may fail at commit time due to not being able to " - "determine whether there were any write conflicts. Setting this " - "value to 0 will cause write buffers to be freed immediately " - "after they are flushed. If this value is set to -1, " - "'max_write_buffer_number' will be used."); - -DEFINE_double(memtable_prefix_bloom_size_ratio, - ROCKSDB_NAMESPACE::Options().memtable_prefix_bloom_size_ratio, - "creates prefix blooms for memtables, each with size " - "`write_buffer_size * memtable_prefix_bloom_size_ratio`."); - -DEFINE_bool(memtable_whole_key_filtering, - ROCKSDB_NAMESPACE::Options().memtable_whole_key_filtering, - "Enable whole key filtering in memtables."); - -DEFINE_int32(open_files, ROCKSDB_NAMESPACE::Options().max_open_files, - "Maximum number of files to keep open at the same time " - "(use default if == 0)"); - -DEFINE_int64(compressed_cache_size, 0, - "Number of bytes to use as a cache of compressed data." - " 0 means use default settings."); - -DEFINE_int32( - compressed_cache_numshardbits, -1, - "Number of shards for the compressed block cache is 2 ** " - "compressed_cache_numshardbits. Negative value means default settings. " - "This is applied only if compressed_cache_size is greater than 0."); - -DEFINE_int32(compaction_style, ROCKSDB_NAMESPACE::Options().compaction_style, - ""); - -DEFINE_int32(compaction_pri, ROCKSDB_NAMESPACE::Options().compaction_pri, - "Which file from a level should be picked to merge to the next " - "level in level-based compaction"); - -DEFINE_int32(num_levels, ROCKSDB_NAMESPACE::Options().num_levels, - "Number of levels in the DB"); - -DEFINE_int32(level0_file_num_compaction_trigger, - ROCKSDB_NAMESPACE::Options().level0_file_num_compaction_trigger, - "Level0 compaction start trigger"); - -DEFINE_int32(level0_slowdown_writes_trigger, - ROCKSDB_NAMESPACE::Options().level0_slowdown_writes_trigger, - "Number of files in level-0 that will slow down writes"); - -DEFINE_int32(level0_stop_writes_trigger, - ROCKSDB_NAMESPACE::Options().level0_stop_writes_trigger, - "Number of files in level-0 that will trigger put stop."); - -DEFINE_int32(block_size, - static_cast( - ROCKSDB_NAMESPACE::BlockBasedTableOptions().block_size), - "Number of bytes in a block."); - -DEFINE_int32(format_version, - static_cast( - ROCKSDB_NAMESPACE::BlockBasedTableOptions().format_version), - "Format version of SST files."); - -DEFINE_int32( - index_block_restart_interval, - ROCKSDB_NAMESPACE::BlockBasedTableOptions().index_block_restart_interval, - "Number of keys between restart points " - "for delta encoding of keys in index block."); - -DEFINE_bool(disable_auto_compactions, - ROCKSDB_NAMESPACE::Options().disable_auto_compactions, - "If true, RocksDB internally will not trigger compactions."); - -DEFINE_int32(max_background_compactions, - ROCKSDB_NAMESPACE::Options().max_background_compactions, - "The maximum number of concurrent background compactions " - "that can occur in parallel."); - -DEFINE_int32(num_bottom_pri_threads, 0, - "The number of threads in the bottom-priority thread pool (used " - "by universal compaction only)."); - -DEFINE_int32(compaction_thread_pool_adjust_interval, 0, - "The interval (in milliseconds) to adjust compaction thread pool " - "size. Don't change it periodically if the value is 0."); - -DEFINE_int32(compaction_thread_pool_variations, 2, - "Range of background thread pool size variations when adjusted " - "periodically."); - -DEFINE_int32(max_background_flushes, - ROCKSDB_NAMESPACE::Options().max_background_flushes, - "The maximum number of concurrent background flushes " - "that can occur in parallel."); - -DEFINE_int32(universal_size_ratio, 0, - "The ratio of file sizes that trigger" - " compaction in universal style"); - -DEFINE_int32(universal_min_merge_width, 0, - "The minimum number of files to " - "compact in universal style compaction"); - -DEFINE_int32(universal_max_merge_width, 0, - "The max number of files to compact" - " in universal style compaction"); - -DEFINE_int32(universal_max_size_amplification_percent, 0, - "The max size amplification for universal style compaction"); - -DEFINE_int32(clear_column_family_one_in, 1000000, - "With a chance of 1/N, delete a column family and then recreate " - "it again. If N == 0, never drop/create column families. " - "When test_batches_snapshots is true, this flag has no effect"); - -DEFINE_int32(get_live_files_one_in, 1000000, - "With a chance of 1/N, call GetLiveFiles to verify if it returns " - "correctly. If N == 0, do not call the interface."); - -DEFINE_int32( - get_sorted_wal_files_one_in, 1000000, - "With a chance of 1/N, call GetSortedWalFiles to verify if it returns " - "correctly. (Note that this API may legitimately return an error.) If N == " - "0, do not call the interface."); - -DEFINE_int32( - get_current_wal_file_one_in, 1000000, - "With a chance of 1/N, call GetCurrentWalFile to verify if it returns " - "correctly. (Note that this API may legitimately return an error.) If N == " - "0, do not call the interface."); - -DEFINE_int32(set_options_one_in, 0, - "With a chance of 1/N, change some random options"); - -DEFINE_int32(set_in_place_one_in, 0, - "With a chance of 1/N, toggle in place support option"); - -DEFINE_int64(cache_size, 2LL * KB * KB * KB, - "Number of bytes to use as a cache of uncompressed data."); - -DEFINE_int32(cache_numshardbits, 6, - "Number of shards for the block cache" - " is 2 ** cache_numshardbits. Negative means use default settings." - " This is applied only if FLAGS_cache_size is greater than 0."); - -DEFINE_bool(cache_index_and_filter_blocks, false, - "True if indexes/filters should be cached in block cache."); - -DEFINE_bool(charge_compression_dictionary_building_buffer, false, - "Setting for " - "CacheEntryRoleOptions::charged of " - "CacheEntryRole::kCompressionDictionaryBuildingBuffer"); - -DEFINE_bool(charge_filter_construction, false, - "Setting for " - "CacheEntryRoleOptions::charged of " - "CacheEntryRole::kFilterConstruction"); - -DEFINE_bool(charge_table_reader, false, - "Setting for " - "CacheEntryRoleOptions::charged of " - "CacheEntryRole::kBlockBasedTableReader"); - -DEFINE_bool(charge_file_metadata, false, - "Setting for " - "CacheEntryRoleOptions::charged of " - "kFileMetadata"); - -DEFINE_bool(charge_blob_cache, false, - "Setting for " - "CacheEntryRoleOptions::charged of " - "kBlobCache"); - -DEFINE_int32( - top_level_index_pinning, - static_cast(ROCKSDB_NAMESPACE::PinningTier::kFallback), - "Type of pinning for top-level indexes into metadata partitions (see " - "`enum PinningTier` in table.h)"); - -DEFINE_int32( - partition_pinning, - static_cast(ROCKSDB_NAMESPACE::PinningTier::kFallback), - "Type of pinning for metadata partitions (see `enum PinningTier` in " - "table.h)"); - -DEFINE_int32( - unpartitioned_pinning, - static_cast(ROCKSDB_NAMESPACE::PinningTier::kFallback), - "Type of pinning for unpartitioned metadata blocks (see `enum PinningTier` " - "in table.h)"); - -DEFINE_string(cache_type, "lru_cache", "Type of block cache."); - -DEFINE_uint64(subcompactions, 1, - "Maximum number of subcompactions to divide L0-L1 compactions " - "into."); - -DEFINE_uint64(periodic_compaction_seconds, 1000, - "Files older than this value will be picked up for compaction."); - -DEFINE_uint64(compaction_ttl, 1000, - "Files older than TTL will be compacted to the next level."); - -DEFINE_bool(fifo_allow_compaction, false, - "If true, set `Options::compaction_options_fifo.allow_compaction = " - "true`. It only take effect when FIFO compaction is used."); - -DEFINE_bool(allow_concurrent_memtable_write, false, - "Allow multi-writers to update mem tables in parallel."); - -DEFINE_double(experimental_mempurge_threshold, 0.0, - "Maximum estimated useful payload that triggers a " - "mempurge process to collect memtable garbage bytes."); - -DEFINE_bool(enable_write_thread_adaptive_yield, true, - "Use a yielding spin loop for brief writer thread waits."); - -// Options for StackableDB-based BlobDB -DEFINE_bool(use_blob_db, false, "[Stacked BlobDB] Use BlobDB."); - -DEFINE_uint64( - blob_db_min_blob_size, - ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().min_blob_size, - "[Stacked BlobDB] Smallest blob to store in a file. Blobs " - "smaller than this will be inlined with the key in the LSM tree."); - -DEFINE_uint64( - blob_db_bytes_per_sync, - ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().bytes_per_sync, - "[Stacked BlobDB] Sync blob files once per every N bytes written."); - -DEFINE_uint64(blob_db_file_size, - ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().blob_file_size, - "[Stacked BlobDB] Target size of each blob file."); - -DEFINE_bool( - blob_db_enable_gc, - ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().enable_garbage_collection, - "[Stacked BlobDB] Enable BlobDB garbage collection."); - -DEFINE_double( - blob_db_gc_cutoff, - ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().garbage_collection_cutoff, - "[Stacked BlobDB] Cutoff ratio for BlobDB garbage collection."); - -// Options for integrated BlobDB -DEFINE_bool(allow_setting_blob_options_dynamically, false, - "[Integrated BlobDB] Allow setting blob options dynamically."); - -DEFINE_bool( - enable_blob_files, - ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().enable_blob_files, - "[Integrated BlobDB] Enable writing large values to separate blob files."); - -DEFINE_uint64(min_blob_size, - ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().min_blob_size, - "[Integrated BlobDB] The size of the smallest value to be stored " - "separately in a blob file."); - -DEFINE_uint64(blob_file_size, - ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().blob_file_size, - "[Integrated BlobDB] The size limit for blob files."); - -DEFINE_string(blob_compression_type, "none", - "[Integrated BlobDB] The compression algorithm to use for large " - "values stored in blob files."); - -DEFINE_bool(enable_blob_garbage_collection, - ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions() - .enable_blob_garbage_collection, - "[Integrated BlobDB] Enable blob garbage collection."); - -DEFINE_double(blob_garbage_collection_age_cutoff, - ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions() - .blob_garbage_collection_age_cutoff, - "[Integrated BlobDB] The cutoff in terms of blob file age for " - "garbage collection."); - -DEFINE_double(blob_garbage_collection_force_threshold, - ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions() - .blob_garbage_collection_force_threshold, - "[Integrated BlobDB] The threshold for the ratio of garbage in " - "the oldest blob files for forcing garbage collection."); - -DEFINE_uint64(blob_compaction_readahead_size, - ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions() - .blob_compaction_readahead_size, - "[Integrated BlobDB] Compaction readahead for blob files."); - -DEFINE_int32( - blob_file_starting_level, - ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().blob_file_starting_level, - "[Integrated BlobDB] Enable writing blob files during flushes and " - "compactions starting from the specified level."); - -DEFINE_bool(use_blob_cache, false, "[Integrated BlobDB] Enable blob cache."); - -DEFINE_bool( - use_shared_block_and_blob_cache, true, - "[Integrated BlobDB] Use a shared backing cache for both block " - "cache and blob cache. It only takes effect if use_blob_cache is enabled."); - -DEFINE_uint64( - blob_cache_size, 2LL * KB * KB * KB, - "[Integrated BlobDB] Number of bytes to use as a cache of blobs. It only " - "takes effect if the block and blob caches are different " - "(use_shared_block_and_blob_cache = false)."); - -DEFINE_int32(blob_cache_numshardbits, 6, - "[Integrated BlobDB] Number of shards for the blob cache is 2 ** " - "blob_cache_numshardbits. Negative means use default settings. " - "It only takes effect if blob_cache_size is greater than 0, and " - "the block and blob caches are different " - "(use_shared_block_and_blob_cache = false)."); - -DEFINE_int32(prepopulate_blob_cache, 0, - "[Integrated BlobDB] Pre-populate hot/warm blobs in blob cache. 0 " - "to disable and 1 to insert during flush."); - -DEFINE_bool(enable_tiered_storage, false, "Set last_level_temperature"); - -DEFINE_int64(preclude_last_level_data_seconds, 0, - "Preclude data from the last level. Used with tiered storage " - "feature to preclude new data from comacting to the last level."); - -DEFINE_int64( - preserve_internal_time_seconds, 0, - "Preserve internal time information which is attached to each SST."); - -static const bool FLAGS_subcompactions_dummy __attribute__((__unused__)) = - RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range); - -static bool ValidateInt32Positive(const char* flagname, int32_t value) { - if (value < 0) { - fprintf(stderr, "Invalid value for --%s: %d, must be >=0\n", flagname, - value); - return false; - } - return true; -} -DEFINE_int32(reopen, 10, "Number of times database reopens"); -static const bool FLAGS_reopen_dummy __attribute__((__unused__)) = - RegisterFlagValidator(&FLAGS_reopen, &ValidateInt32Positive); - -DEFINE_double(bloom_bits, 10, - "Bloom filter bits per key. " - "Negative means use default settings."); - -DEFINE_int32( - ribbon_starting_level, 999, - "Use Bloom filter on levels below specified and Ribbon beginning on level " - "specified. Flush is considered level -1. 999 or more -> always Bloom. 0 " - "-> Ribbon except Bloom for flush. -1 -> always Ribbon."); - -DEFINE_bool(partition_filters, false, - "use partitioned filters " - "for block-based table"); - -DEFINE_bool( - optimize_filters_for_memory, - ROCKSDB_NAMESPACE::BlockBasedTableOptions().optimize_filters_for_memory, - "Minimize memory footprint of filters"); - -DEFINE_bool( - detect_filter_construct_corruption, - ROCKSDB_NAMESPACE::BlockBasedTableOptions() - .detect_filter_construct_corruption, - "Detect corruption during new Bloom Filter and Ribbon Filter construction"); - -DEFINE_int32( - index_type, - static_cast( - ROCKSDB_NAMESPACE::BlockBasedTableOptions().index_type), - "Type of block-based table index (see `enum IndexType` in table.h)"); - -DEFINE_int32( - data_block_index_type, - static_cast( - ROCKSDB_NAMESPACE::BlockBasedTableOptions().data_block_index_type), - "Index type for data blocks (see `enum DataBlockIndexType` in table.h)"); - -DEFINE_string(db, "", "Use the db with the following name."); - -DEFINE_string(secondaries_base, "", - "Use this path as the base path for secondary instances."); - -DEFINE_bool(test_secondary, false, - "If true, start an additional secondary instance which can be used " - "for verification."); - -DEFINE_string( - expected_values_dir, "", - "Dir where files containing info about the latest/historical values will " - "be stored. If provided and non-empty, the DB state will be verified " - "against values from these files after recovery. --max_key and " - "--column_family must be kept the same across invocations of this program " - "that use the same --expected_values_dir. Currently historical values are " - "only tracked when --sync_fault_injection is set. See --seed and " - "--nooverwritepercent for further requirements."); - -DEFINE_bool(verify_checksum, false, - "Verify checksum for every block read from storage"); - -DEFINE_bool(mmap_read, ROCKSDB_NAMESPACE::Options().allow_mmap_reads, - "Allow reads to occur via mmap-ing files"); - -DEFINE_bool(mmap_write, ROCKSDB_NAMESPACE::Options().allow_mmap_writes, - "Allow writes to occur via mmap-ing files"); - -DEFINE_bool(use_direct_reads, ROCKSDB_NAMESPACE::Options().use_direct_reads, - "Use O_DIRECT for reading data"); - -DEFINE_bool(use_direct_io_for_flush_and_compaction, - ROCKSDB_NAMESPACE::Options().use_direct_io_for_flush_and_compaction, - "Use O_DIRECT for writing data"); - -DEFINE_bool(mock_direct_io, false, - "Mock direct IO by not using O_DIRECT for direct IO read"); - -DEFINE_bool(statistics, false, "Create database statistics"); - -DEFINE_bool(sync, false, "Sync all writes to disk"); - -DEFINE_bool(use_fsync, false, "If true, issue fsync instead of fdatasync"); - -DEFINE_uint64(bytes_per_sync, ROCKSDB_NAMESPACE::Options().bytes_per_sync, - "If nonzero, sync SST file data incrementally after every " - "`bytes_per_sync` bytes are written"); - -DEFINE_uint64(wal_bytes_per_sync, - ROCKSDB_NAMESPACE::Options().wal_bytes_per_sync, - "If nonzero, sync WAL file data incrementally after every " - "`bytes_per_sync` bytes are written"); - -DEFINE_int32(kill_random_test, 0, - "If non-zero, kill at various points in source code with " - "probability 1/this"); -static const bool FLAGS_kill_random_test_dummy __attribute__((__unused__)) = - RegisterFlagValidator(&FLAGS_kill_random_test, &ValidateInt32Positive); - -DEFINE_string(kill_exclude_prefixes, "", - "If non-empty, kill points with prefix in the list given will be" - " skipped. Items are comma-separated."); -extern std::vector rocksdb_kill_exclude_prefixes; - -DEFINE_bool(disable_wal, false, "If true, do not write WAL for write."); - -DEFINE_uint64(recycle_log_file_num, - ROCKSDB_NAMESPACE::Options().recycle_log_file_num, - "Number of old WAL files to keep around for later recycling"); - -DEFINE_int64(target_file_size_base, - ROCKSDB_NAMESPACE::Options().target_file_size_base, - "Target level-1 file size for compaction"); - -DEFINE_int32(target_file_size_multiplier, 1, - "A multiplier to compute target level-N file size (N >= 2)"); - -DEFINE_uint64(max_bytes_for_level_base, - ROCKSDB_NAMESPACE::Options().max_bytes_for_level_base, - "Max bytes for level-1"); - -DEFINE_double(max_bytes_for_level_multiplier, 2, - "A multiplier to compute max bytes for level-N (N >= 2)"); - -DEFINE_int32(range_deletion_width, 10, - "The width of the range deletion intervals."); - -DEFINE_uint64(rate_limiter_bytes_per_sec, 0, "Set options.rate_limiter value."); - -DEFINE_bool(rate_limit_bg_reads, false, - "Use options.rate_limiter on compaction reads"); - -DEFINE_bool(rate_limit_user_ops, false, - "When true use Env::IO_USER priority level to charge internal rate " - "limiter for reads associated with user operations."); - -DEFINE_bool(rate_limit_auto_wal_flush, false, - "When true use Env::IO_USER priority level to charge internal rate " - "limiter for automatic WAL flush (`Options::manual_wal_flush` == " - "false) after the user " - "write operation."); - -DEFINE_uint64(sst_file_manager_bytes_per_sec, 0, - "Set `Options::sst_file_manager` to delete at this rate. By " - "default the deletion rate is unbounded."); - -DEFINE_uint64(sst_file_manager_bytes_per_truncate, 0, - "Set `Options::sst_file_manager` to delete in chunks of this " - "many bytes. By default whole files will be deleted."); - -DEFINE_bool(use_txn, false, - "Use TransactionDB. Currently the default write policy is " - "TxnDBWritePolicy::WRITE_PREPARED"); - -DEFINE_uint64(txn_write_policy, 0, - "The transaction write policy. Default is " - "TxnDBWritePolicy::WRITE_COMMITTED. Note that this should not be " - "changed accross crashes."); - -DEFINE_bool(unordered_write, false, - "Turn on the unordered_write feature. This options is currently " - "tested only in combination with use_txn=true and " - "txn_write_policy=TxnDBWritePolicy::WRITE_PREPARED."); - -DEFINE_int32(backup_one_in, 0, - "If non-zero, then CreateNewBackup() will be called once for " - "every N operations on average. 0 indicates CreateNewBackup() " - "is disabled."); - -DEFINE_uint64(backup_max_size, 100 * 1024 * 1024, - "If non-zero, skip checking backup/restore when DB size in " - "bytes exceeds this setting."); - -DEFINE_int32(checkpoint_one_in, 0, - "If non-zero, then CreateCheckpoint() will be called once for " - "every N operations on average. 0 indicates CreateCheckpoint() " - "is disabled."); - -DEFINE_int32(ingest_external_file_one_in, 0, - "If non-zero, then IngestExternalFile() will be called once for " - "every N operations on average. 0 indicates IngestExternalFile() " - "is disabled."); - -DEFINE_int32(ingest_external_file_width, 100, - "The width of the ingested external files."); - -DEFINE_int32(compact_files_one_in, 0, - "If non-zero, then CompactFiles() will be called once for every N " - "operations on average. 0 indicates CompactFiles() is disabled."); - -DEFINE_int32(compact_range_one_in, 0, - "If non-zero, then CompactRange() will be called once for every N " - "operations on average. 0 indicates CompactRange() is disabled."); - -DEFINE_int32(mark_for_compaction_one_file_in, 0, - "A `TablePropertiesCollectorFactory` will be registered, which " - "creates a `TablePropertiesCollector` with `NeedCompact()` " - "returning true once for every N files on average. 0 or negative " - "mean `NeedCompact()` always returns false."); - -DEFINE_int32(flush_one_in, 0, - "If non-zero, then Flush() will be called once for every N ops " - "on average. 0 indicates calls to Flush() are disabled."); - -DEFINE_int32(pause_background_one_in, 0, - "If non-zero, then PauseBackgroundWork()+Continue will be called " - "once for every N ops on average. 0 disables."); - -DEFINE_int32(compact_range_width, 10000, - "The width of the ranges passed to CompactRange()."); - -DEFINE_int32(acquire_snapshot_one_in, 0, - "If non-zero, then acquires a snapshot once every N operations on " - "average."); - -DEFINE_bool(compare_full_db_state_snapshot, false, - "If set we compare state of entire db (in one of the threads) with" - "each snapshot."); - -DEFINE_uint64(snapshot_hold_ops, 0, - "If non-zero, then releases snapshots N operations after they're " - "acquired."); - -DEFINE_bool(long_running_snapshots, false, - "If set, hold on some some snapshots for much longer time."); - -DEFINE_bool(use_multiget, false, - "If set, use the batched MultiGet API for reads"); - -DEFINE_bool(use_get_entity, false, "If set, use the GetEntity API for reads"); - -static bool ValidateInt32Percent(const char* flagname, int32_t value) { - if (value < 0 || value > 100) { - fprintf(stderr, "Invalid value for --%s: %d, 0<= pct <=100 \n", flagname, - value); - return false; - } - return true; -} - -DEFINE_int32(readpercent, 10, - "Ratio of reads to total workload (expressed as a percentage)"); -static const bool FLAGS_readpercent_dummy __attribute__((__unused__)) = - RegisterFlagValidator(&FLAGS_readpercent, &ValidateInt32Percent); - -DEFINE_int32(prefixpercent, 20, - "Ratio of prefix iterators to total workload (expressed as a" - " percentage)"); -static const bool FLAGS_prefixpercent_dummy __attribute__((__unused__)) = - RegisterFlagValidator(&FLAGS_prefixpercent, &ValidateInt32Percent); - -DEFINE_int32(writepercent, 45, - "Ratio of writes to total workload (expressed as a percentage)"); -static const bool FLAGS_writepercent_dummy __attribute__((__unused__)) = - RegisterFlagValidator(&FLAGS_writepercent, &ValidateInt32Percent); - -DEFINE_int32(delpercent, 15, - "Ratio of deletes to total workload (expressed as a percentage)"); -static const bool FLAGS_delpercent_dummy __attribute__((__unused__)) = - RegisterFlagValidator(&FLAGS_delpercent, &ValidateInt32Percent); - -DEFINE_int32(delrangepercent, 0, - "Ratio of range deletions to total workload (expressed as a " - "percentage). Cannot be used with test_batches_snapshots"); -static const bool FLAGS_delrangepercent_dummy __attribute__((__unused__)) = - RegisterFlagValidator(&FLAGS_delrangepercent, &ValidateInt32Percent); - -DEFINE_int32(nooverwritepercent, 60, - "Ratio of keys without overwrite to total workload (expressed as " - "a percentage). When --expected_values_dir is nonempty, must " - "keep this value constant across invocations."); -static const bool FLAGS_nooverwritepercent_dummy __attribute__((__unused__)) = - RegisterFlagValidator(&FLAGS_nooverwritepercent, &ValidateInt32Percent); - -DEFINE_int32(iterpercent, 10, - "Ratio of iterations to total workload" - " (expressed as a percentage)"); -static const bool FLAGS_iterpercent_dummy __attribute__((__unused__)) = - RegisterFlagValidator(&FLAGS_iterpercent, &ValidateInt32Percent); - -DEFINE_uint64(num_iterations, 10, "Number of iterations per MultiIterate run"); -static const bool FLAGS_num_iterations_dummy __attribute__((__unused__)) = - RegisterFlagValidator(&FLAGS_num_iterations, &ValidateUint32Range); - -DEFINE_int32( - customopspercent, 0, - "Ratio of custom operations to total workload (expressed as a percentage)"); - -DEFINE_string(compression_type, "snappy", - "Algorithm to use to compress the database"); - -DEFINE_int32(compression_max_dict_bytes, 0, - "Maximum size of dictionary used to prime the compression " - "library."); - -DEFINE_int32(compression_zstd_max_train_bytes, 0, - "Maximum size of training data passed to zstd's dictionary " - "trainer."); - -DEFINE_int32(compression_parallel_threads, 1, - "Number of threads for parallel compression."); - -DEFINE_uint64(compression_max_dict_buffer_bytes, 0, - "Buffering limit for SST file data to sample for dictionary " - "compression."); - -DEFINE_bool( - compression_use_zstd_dict_trainer, true, - "Use zstd's trainer to generate dictionary. If the options is false, " - "zstd's finalizeDictionary() API is used to generate dictionary. " - "ZSTD 1.4.5+ is required. If ZSTD 1.4.5+ is not linked with the binary, " - "this flag will have the default value true."); - -DEFINE_string(bottommost_compression_type, "disable", - "Algorithm to use to compress bottommost level of the database. " - "\"disable\" means disabling the feature"); - -DEFINE_string(checksum_type, "kCRC32c", "Algorithm to use to checksum blocks"); - -DEFINE_string(env_uri, "", - "URI for env lookup. Mutually exclusive with --fs_uri"); - -DEFINE_string(fs_uri, "", - "URI for registry Filesystem lookup. Mutually exclusive" - " with --env_uri." - " Creates a default environment with the specified filesystem."); - -DEFINE_uint64(ops_per_thread, 1200000, "Number of operations per thread."); -static const bool FLAGS_ops_per_thread_dummy __attribute__((__unused__)) = - RegisterFlagValidator(&FLAGS_ops_per_thread, &ValidateUint32Range); - -DEFINE_uint64(log2_keys_per_lock, 2, "Log2 of number of keys per lock"); -static const bool FLAGS_log2_keys_per_lock_dummy __attribute__((__unused__)) = - RegisterFlagValidator(&FLAGS_log2_keys_per_lock, &ValidateUint32Range); - -DEFINE_uint64(max_manifest_file_size, 16384, "Maximum size of a MANIFEST file"); - -DEFINE_bool(in_place_update, false, "On true, does inplace update in memtable"); - -DEFINE_string(memtablerep, "skip_list", ""); - -inline static bool ValidatePrefixSize(const char* flagname, int32_t value) { - if (value < -1 || value > 8) { - fprintf(stderr, "Invalid value for --%s: %d. -1 <= PrefixSize <= 8\n", - flagname, value); - return false; - } - return true; -} -DEFINE_int32(prefix_size, 7, - "Control the prefix size for HashSkipListRep. " - "-1 is disabled."); -static const bool FLAGS_prefix_size_dummy __attribute__((__unused__)) = - RegisterFlagValidator(&FLAGS_prefix_size, &ValidatePrefixSize); - -DEFINE_bool(use_merge, false, - "On true, replaces all writes with a Merge " - "that behaves like a Put"); - -DEFINE_uint32(use_put_entity_one_in, 0, - "If greater than zero, PutEntity will be used once per every N " - "write ops on average."); - -DEFINE_bool(use_full_merge_v1, false, - "On true, use a merge operator that implement the deprecated " - "version of FullMerge"); - -DEFINE_int32(sync_wal_one_in, 0, - "If non-zero, then SyncWAL() will be called once for every N ops " - "on average. 0 indicates that calls to SyncWAL() are disabled."); - -DEFINE_bool(avoid_unnecessary_blocking_io, - ROCKSDB_NAMESPACE::Options().avoid_unnecessary_blocking_io, - "If true, some expensive cleaning up operations will be moved from " - "user reads to high-pri background threads."); - -DEFINE_bool(write_dbid_to_manifest, - ROCKSDB_NAMESPACE::Options().write_dbid_to_manifest, - "Write DB_ID to manifest"); - -DEFINE_bool(avoid_flush_during_recovery, - ROCKSDB_NAMESPACE::Options().avoid_flush_during_recovery, - "Avoid flush during recovery"); - -DEFINE_uint64(max_write_batch_group_size_bytes, - ROCKSDB_NAMESPACE::Options().max_write_batch_group_size_bytes, - "Max write batch group size"); - -DEFINE_bool(level_compaction_dynamic_level_bytes, - ROCKSDB_NAMESPACE::Options().level_compaction_dynamic_level_bytes, - "Use dynamic level"); - -DEFINE_int32(verify_checksum_one_in, 0, - "If non-zero, then DB::VerifyChecksum() will be called to do" - " checksum verification of all the files in the database once for" - " every N ops on average. 0 indicates that calls to" - " VerifyChecksum() are disabled."); -DEFINE_int32(verify_db_one_in, 0, - "If non-zero, call VerifyDb() once for every N ops. 0 indicates " - "that VerifyDb() will not be called in OperateDb(). Note that " - "enabling this can slow down tests."); - -DEFINE_int32(continuous_verification_interval, 1000, - "While test is running, verify db every N milliseconds. 0 " - "disables continuous verification."); - -DEFINE_int32(approximate_size_one_in, 64, - "If non-zero, DB::GetApproximateSizes() will be called against" - " random key ranges."); - -DEFINE_int32(read_fault_one_in, 1000, - "On non-zero, enables fault injection on read"); - -DEFINE_int32(get_property_one_in, 1000, - "If non-zero, then DB::GetProperty() will be called to get various" - " properties for every N ops on average. 0 indicates that" - " GetProperty() will be not be called."); - -DEFINE_bool(sync_fault_injection, false, - "If true, FaultInjectionTestFS will be used for write operations, " - "and unsynced data in DB will lost after crash. In such a case we " - "track DB changes in a trace file (\"*.trace\") in " - "--expected_values_dir for verifying there are no holes in the " - "recovered data."); - -DEFINE_bool(best_efforts_recovery, false, - "If true, use best efforts recovery."); -DEFINE_bool(skip_verifydb, false, - "If true, skip VerifyDb() calls and Get()/Iterator verifications" - "against expected state."); - -DEFINE_bool(enable_compaction_filter, false, - "If true, configures a compaction filter that returns a kRemove " - "decision for deleted keys."); - -DEFINE_bool(paranoid_file_checks, true, - "After writing every SST file, reopen it and read all the keys " - "and validate checksums"); - -DEFINE_bool(fail_if_options_file_error, false, - "Fail operations that fail to detect or properly persist options " - "file."); - -DEFINE_uint64(batch_protection_bytes_per_key, 0, - "If nonzero, enables integrity protection in `WriteBatch` at the " - "specified number of bytes per key. Currently the only supported " - "nonzero value is eight."); - -DEFINE_uint32( - memtable_protection_bytes_per_key, 0, - "If nonzero, enables integrity protection in memtable entries at the " - "specified number of bytes per key. Currently the supported " - "nonzero values are 1, 2, 4 and 8."); - -DEFINE_string(file_checksum_impl, "none", - "Name of an implementation for file_checksum_gen_factory, or " - "\"none\" for null."); - -DEFINE_int32(write_fault_one_in, 0, - "On non-zero, enables fault injection on write"); - -DEFINE_uint64(user_timestamp_size, 0, - "Number of bytes for a user-defined timestamp. Currently, only " - "8-byte is supported"); - -DEFINE_int32(open_metadata_write_fault_one_in, 0, - "On non-zero, enables fault injection on file metadata write " - "during DB reopen."); - -DEFINE_string(secondary_cache_uri, "", - "Full URI for creating a customized secondary cache object"); -DEFINE_int32(secondary_cache_fault_one_in, 0, - "On non-zero, enables fault injection in secondary cache inserts" - " and lookups"); -DEFINE_int32(open_write_fault_one_in, 0, - "On non-zero, enables fault injection on file writes " - "during DB reopen."); -DEFINE_int32(open_read_fault_one_in, 0, - "On non-zero, enables fault injection on file reads " - "during DB reopen."); -DEFINE_int32(injest_error_severity, 1, - "The severity of the injested IO Error. 1 is soft error (e.g. " - "retryable error), 2 is fatal error, and the default is " - "retryable error."); -DEFINE_int32(prepopulate_block_cache, - static_cast(ROCKSDB_NAMESPACE::BlockBasedTableOptions:: - PrepopulateBlockCache::kDisable), - "Options related to cache warming (see `enum " - "PrepopulateBlockCache` in table.h)"); - -DEFINE_bool(two_write_queues, false, - "Set to true to enable two write queues. Default: false"); - -DEFINE_bool(use_only_the_last_commit_time_batch_for_recovery, false, - "If true, the commit-time write batch will not be immediately " - "inserted into the memtables. Default: false"); - -DEFINE_uint64( - wp_snapshot_cache_bits, 7ull, - "Number of bits to represent write-prepared transaction db's snapshot " - "cache. Default: 7 (128 entries)"); - -DEFINE_uint64(wp_commit_cache_bits, 23ull, - "Number of bits to represent write-prepared transaction db's " - "commit cache. Default: 23 (8M entries)"); - -DEFINE_bool(adaptive_readahead, false, - "Carry forward internal auto readahead size from one file to next " - "file at each level during iteration"); -DEFINE_bool( - async_io, false, - "Does asynchronous prefetching when internal auto readahead is enabled"); - -DEFINE_string(wal_compression, "none", - "Algorithm to use for WAL compression. none to disable."); - -DEFINE_bool( - verify_sst_unique_id_in_manifest, false, - "Enable DB options `verify_sst_unique_id_in_manifest`, if true, during " - "DB-open try verifying the SST unique id between MANIFEST and SST " - "properties."); - -DEFINE_int32( - create_timestamped_snapshot_one_in, 0, - "On non-zero, create timestamped snapshots upon transaction commits."); - -DEFINE_bool(allow_data_in_errors, - ROCKSDB_NAMESPACE::Options().allow_data_in_errors, - "If true, allow logging data, e.g. key, value in LOG files."); - -DEFINE_int32(verify_iterator_with_expected_state_one_in, 0, - "If non-zero, when TestIterate() is to be called, there is a " - "1/verify_iterator_with_expected_state_one_in " - "chance that the iterator is verified against the expected state " - "file, instead of comparing keys between two iterators."); - -DEFINE_uint64(readahead_size, 0, "Iterator readahead size"); -DEFINE_uint64(initial_auto_readahead_size, 0, - "Initial auto readahead size for prefetching during Iteration"); -DEFINE_uint64(max_auto_readahead_size, 0, - "Max auto readahead size for prefetching during Iteration"); -DEFINE_uint64( - num_file_reads_for_auto_readahead, 0, - "Num of sequential reads to enable auto prefetching during Iteration"); - -DEFINE_bool( - preserve_unverified_changes, false, - "DB files of the current run will all be preserved in `FLAGS_db`. DB files " - "from the last run will be preserved in `FLAGS_db/unverified` until the " - "first verification succeeds. Expected state files from the last run will " - "be preserved similarly under `FLAGS_expected_values_dir/unverified` when " - "`--expected_values_dir` is nonempty."); - -DEFINE_uint64(stats_dump_period_sec, - ROCKSDB_NAMESPACE::Options().stats_dump_period_sec, - "Gap between printing stats to log in seconds"); - -DEFINE_bool(use_io_uring, false, "Enable the use of IO uring on Posix"); -extern "C" bool RocksDbIOUringEnable() { return FLAGS_use_io_uring; } - -#endif // GFLAGS diff --git a/db_stress_tool/db_stress_listener.cc b/db_stress_tool/db_stress_listener.cc deleted file mode 100644 index e2838c582..000000000 --- a/db_stress_tool/db_stress_listener.cc +++ /dev/null @@ -1,189 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db_stress_tool/db_stress_listener.h" - -#include - -#include "file/file_util.h" -#include "rocksdb/file_system.h" -#include "util/coding_lean.h" - -namespace ROCKSDB_NAMESPACE { - -#ifdef GFLAGS - -// TODO: consider using expected_values_dir instead, but this is more -// convenient for now. -UniqueIdVerifier::UniqueIdVerifier(const std::string& db_name, Env* env) - : path_(db_name + "/.unique_ids") { - // We expect such a small number of files generated during this test - // (thousands?), checking full 192-bit IDs for uniqueness is a very - // weak check. For a stronger check, we pick a specific 64-bit - // subsequence from the ID to check for uniqueness. All bits of the - // ID should be high quality, and 64 bits should be unique with - // very good probability for the quantities in this test. - offset_ = Random::GetTLSInstance()->Uniform(17); // 0 to 16 - - const std::shared_ptr fs = env->GetFileSystem(); - IOOptions opts; - - Status st = fs->CreateDirIfMissing(db_name, opts, nullptr); - if (!st.ok()) { - fprintf(stderr, "Failed to create directory %s: %s\n", db_name.c_str(), - st.ToString().c_str()); - exit(1); - } - - // Avoid relying on ReopenWritableFile which is not supported by all - // file systems. Create a new file and copy the old file contents to it. - std::string tmp_path = path_ + ".tmp"; - st = fs->FileExists(tmp_path, opts, /*dbg*/ nullptr); - if (st.IsNotFound()) { - st = fs->RenameFile(path_, tmp_path, opts, /*dbg*/ nullptr); - // Either it should succeed or fail because src path doesn't exist - assert(st.ok() || st.IsPathNotFound()); - } else { - // If path_ and tmp_path both exist, retain tmp_path as its - // guaranteed to be more complete. The order of operations are - - // 1. Rename path_ to tmp_path - // 2. Parse tmp_path contents - // 3. Create path_ - // 4. Copy tmp_path contents to path_ - // 5. Delete tmp_path - st = fs->DeleteFile(path_, opts, /*dbg*/ nullptr); - assert(st.ok() || st.IsPathNotFound()); - } - - uint64_t size = 0; - { - std::unique_ptr reader; - Status s = fs->NewSequentialFile(tmp_path, FileOptions(), &reader, - /*dbg*/ nullptr); - if (s.ok()) { - // Load from file - std::string id(24U, '\0'); - Slice result; - for (;;) { - s = reader->Read(id.size(), opts, &result, &id[0], /*dbg*/ nullptr); - if (!s.ok()) { - fprintf(stderr, "Error reading unique id file: %s\n", - s.ToString().c_str()); - assert(false); - } - if (result.size() < id.size()) { - // EOF - if (result.size() != 0) { - // Corrupt file. Not a DB bug but could happen if OS doesn't provide - // good guarantees on process crash. - fprintf(stdout, "Warning: clearing corrupt unique id file\n"); - id_set_.clear(); - reader.reset(); - s = fs->DeleteFile(tmp_path, opts, /*dbg*/ nullptr); - assert(s.ok()); - size = 0; - } - break; - } - size += 24U; - VerifyNoWrite(id); - } - } else { - // Newly created is ok. - // But FileSystem doesn't tell us whether non-existence was the cause of - // the failure. (Issue #9021) - Status s2 = fs->FileExists(tmp_path, opts, /*dbg*/ nullptr); - if (!s2.IsNotFound()) { - fprintf(stderr, "Error opening unique id file: %s\n", - s.ToString().c_str()); - assert(false); - } - size = 0; - } - } - fprintf(stdout, "(Re-)verified %zu unique IDs\n", id_set_.size()); - - std::unique_ptr file_writer; - st = fs->NewWritableFile(path_, FileOptions(), &file_writer, /*dbg*/ nullptr); - if (!st.ok()) { - fprintf(stderr, "Error creating the unique ids file: %s\n", - st.ToString().c_str()); - assert(false); - } - data_file_writer_.reset( - new WritableFileWriter(std::move(file_writer), path_, FileOptions())); - - if (size > 0) { - st = CopyFile(fs.get(), tmp_path, data_file_writer_, size, - /*use_fsync*/ true, /*io_tracer*/ nullptr, - /*temparature*/ Temperature::kHot); - if (!st.ok()) { - fprintf(stderr, "Error copying contents of old unique id file: %s\n", - st.ToString().c_str()); - assert(false); - } - } - st = fs->DeleteFile(tmp_path, opts, /*dbg*/ nullptr); - assert(st.ok() || st.IsPathNotFound()); -} - -UniqueIdVerifier::~UniqueIdVerifier() { - IOStatus s = data_file_writer_->Close(); - assert(s.ok()); -} - -void UniqueIdVerifier::VerifyNoWrite(const std::string& id) { - assert(id.size() == 24); - bool is_new = id_set_.insert(DecodeFixed64(&id[offset_])).second; - if (!is_new) { - fprintf(stderr, - "Duplicate partial unique ID found (offset=%zu, count=%zu)\n", - offset_, id_set_.size()); - assert(false); - } -} - -void UniqueIdVerifier::Verify(const std::string& id) { - assert(id.size() == 24); - std::lock_guard lock(mutex_); - // If we accumulate more than ~4 million IDs, there would be > 1 in 1M - // natural chance of collision. Thus, simply stop checking at that point. - if (id_set_.size() >= 4294967) { - return; - } - IOStatus s = data_file_writer_->Append(Slice(id)); - if (!s.ok()) { - fprintf(stderr, "Error writing to unique id file: %s\n", - s.ToString().c_str()); - assert(false); - } - s = data_file_writer_->Flush(); - if (!s.ok()) { - fprintf(stderr, "Error flushing unique id file: %s\n", - s.ToString().c_str()); - assert(false); - } - VerifyNoWrite(id); -} - -void DbStressListener::VerifyTableFileUniqueId( - const TableProperties& new_file_properties, const std::string& file_path) { - // Verify unique ID - std::string id; - // Unit tests verify that GetUniqueIdFromTableProperties returns just a - // substring of this, and we're only going to pull out 64 bits, so using - // GetExtendedUniqueIdFromTableProperties is arguably stronger testing here. - Status s = GetExtendedUniqueIdFromTableProperties(new_file_properties, &id); - if (!s.ok()) { - fprintf(stderr, "Error getting SST unique id for %s: %s\n", - file_path.c_str(), s.ToString().c_str()); - assert(false); - } - unique_ids_.Verify(id); -} - -#endif // GFLAGS - -} // namespace ROCKSDB_NAMESPACE diff --git a/db_stress_tool/db_stress_listener.h b/db_stress_tool/db_stress_listener.h deleted file mode 100644 index 97bbdaefa..000000000 --- a/db_stress_tool/db_stress_listener.h +++ /dev/null @@ -1,269 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#ifdef GFLAGS -#pragma once - -#include -#include - -#include "file/filename.h" -#include "file/writable_file_writer.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/file_system.h" -#include "rocksdb/listener.h" -#include "rocksdb/table_properties.h" -#include "rocksdb/unique_id.h" -#include "util/gflags_compat.h" -#include "util/random.h" - -DECLARE_int32(compact_files_one_in); - -namespace ROCKSDB_NAMESPACE { - -// Verify across process executions that all seen IDs are unique -class UniqueIdVerifier { - public: - explicit UniqueIdVerifier(const std::string& db_name, Env* env); - ~UniqueIdVerifier(); - - void Verify(const std::string& id); - - private: - void VerifyNoWrite(const std::string& id); - - private: - std::mutex mutex_; - // IDs persisted to a hidden file inside DB dir - std::string path_; - std::unique_ptr data_file_writer_; - // Starting byte for which 8 bytes to check in memory within 24 byte ID - size_t offset_; - // Working copy of the set of 8 byte pieces - std::unordered_set id_set_; -}; - -class DbStressListener : public EventListener { - public: - DbStressListener(const std::string& db_name, - const std::vector& db_paths, - const std::vector& column_families, - Env* env) - : db_name_(db_name), - db_paths_(db_paths), - column_families_(column_families), - num_pending_file_creations_(0), - unique_ids_(db_name, env) {} - - const char* Name() const override { return kClassName(); } - static const char* kClassName() { return "DBStressListener"; } - - ~DbStressListener() override { assert(num_pending_file_creations_ == 0); } - void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override { - assert(IsValidColumnFamilyName(info.cf_name)); - VerifyFilePath(info.file_path); - // pretending doing some work here - RandomSleep(); - } - - void OnFlushBegin(DB* /*db*/, - const FlushJobInfo& /*flush_job_info*/) override { - RandomSleep(); - } - - void OnTableFileDeleted(const TableFileDeletionInfo& /*info*/) override { - RandomSleep(); - } - - void OnCompactionBegin(DB* /*db*/, const CompactionJobInfo& /*ci*/) override { - RandomSleep(); - } - - void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override { - assert(IsValidColumnFamilyName(ci.cf_name)); - assert(ci.input_files.size() + ci.output_files.size() > 0U); - for (const auto& file_path : ci.input_files) { - VerifyFilePath(file_path); - } - for (const auto& file_path : ci.output_files) { - VerifyFilePath(file_path); - } - // pretending doing some work here - RandomSleep(); - } - - void OnTableFileCreationStarted( - const TableFileCreationBriefInfo& /*info*/) override { - ++num_pending_file_creations_; - } - - void OnTableFileCreated(const TableFileCreationInfo& info) override { - assert(info.db_name == db_name_); - assert(IsValidColumnFamilyName(info.cf_name)); - assert(info.job_id > 0 || FLAGS_compact_files_one_in > 0); - if (info.status.ok()) { - assert(info.file_size > 0); - VerifyFilePath(info.file_path); - assert(info.table_properties.data_size > 0 || - info.table_properties.num_range_deletions > 0); - assert(info.table_properties.raw_key_size > 0); - assert(info.table_properties.num_entries > 0); - VerifyTableFileUniqueId(info.table_properties, info.file_path); - } - --num_pending_file_creations_; - } - - void OnMemTableSealed(const MemTableInfo& /*info*/) override { - RandomSleep(); - } - - void OnColumnFamilyHandleDeletionStarted( - ColumnFamilyHandle* /*handle*/) override { - RandomSleep(); - } - - void OnExternalFileIngested(DB* /*db*/, - const ExternalFileIngestionInfo& info) override { - RandomSleep(); - // Here we assume that each generated external file is ingested - // exactly once (or thrown away in case of crash) - VerifyTableFileUniqueId(info.table_properties, info.internal_file_path); - } - - void OnBackgroundError(BackgroundErrorReason /* reason */, - Status* /* bg_error */) override { - RandomSleep(); - } - - void OnStallConditionsChanged(const WriteStallInfo& /*info*/) override { - RandomSleep(); - } - - void OnFileReadFinish(const FileOperationInfo& info) override { - // Even empty callback is valuable because sometimes some locks are - // released in order to make the callback. - - // Sleep carefully here as it is a frequent operation and we don't want - // to slow down the tests. We always sleep when the read is large. - // When read is small, sleep in a small chance. - size_t length_read = info.length; - if (length_read >= 1000000 || Random::GetTLSInstance()->OneIn(1000)) { - RandomSleep(); - } - } - - void OnFileWriteFinish(const FileOperationInfo& info) override { - // Even empty callback is valuable because sometimes some locks are - // released in order to make the callback. - - // Sleep carefully here as it is a frequent operation and we don't want - // to slow down the tests. When the write is large, always sleep. - // Otherwise, sleep in a relatively small chance. - size_t length_write = info.length; - if (length_write >= 1000000 || Random::GetTLSInstance()->OneIn(64)) { - RandomSleep(); - } - } - - bool ShouldBeNotifiedOnFileIO() override { - RandomSleep(); - return static_cast(Random::GetTLSInstance()->OneIn(1)); - } - - void OnErrorRecoveryBegin(BackgroundErrorReason /* reason */, - Status /* bg_error */, - bool* /* auto_recovery */) override { - RandomSleep(); - } - - void OnErrorRecoveryCompleted(Status /* old_bg_error */) override { - RandomSleep(); - } - - protected: - bool IsValidColumnFamilyName(const std::string& cf_name) const { - if (cf_name == kDefaultColumnFamilyName) { - return true; - } - // The column family names in the stress tests are numbers. - for (size_t i = 0; i < cf_name.size(); ++i) { - if (cf_name[i] < '0' || cf_name[i] > '9') { - return false; - } - } - return true; - } - - void VerifyFileDir(const std::string& file_dir) { -#ifndef NDEBUG - if (db_name_ == file_dir) { - return; - } - for (const auto& db_path : db_paths_) { - if (db_path.path == file_dir) { - return; - } - } - for (auto& cf : column_families_) { - for (const auto& cf_path : cf.options.cf_paths) { - if (cf_path.path == file_dir) { - return; - } - } - } - assert(false); -#else - (void)file_dir; -#endif // !NDEBUG - } - - void VerifyFileName(const std::string& file_name) { -#ifndef NDEBUG - uint64_t file_number; - FileType file_type; - bool result = ParseFileName(file_name, &file_number, &file_type); - assert(result); - assert(file_type == kTableFile); -#else - (void)file_name; -#endif // !NDEBUG - } - - void VerifyFilePath(const std::string& file_path) { -#ifndef NDEBUG - size_t pos = file_path.find_last_of("/"); - if (pos == std::string::npos) { - VerifyFileName(file_path); - } else { - if (pos > 0) { - VerifyFileDir(file_path.substr(0, pos)); - } - VerifyFileName(file_path.substr(pos)); - } -#else - (void)file_path; -#endif // !NDEBUG - } - - // Unique id is verified using the TableProperties. file_path is only used - // for reporting. - void VerifyTableFileUniqueId(const TableProperties& new_file_properties, - const std::string& file_path); - - void RandomSleep() { - std::this_thread::sleep_for( - std::chrono::microseconds(Random::GetTLSInstance()->Uniform(5000))); - } - - private: - std::string db_name_; - std::vector db_paths_; - std::vector column_families_; - std::atomic num_pending_file_creations_; - UniqueIdVerifier unique_ids_; -}; -} // namespace ROCKSDB_NAMESPACE -#endif // GFLAGS diff --git a/db_stress_tool/db_stress_shared_state.cc b/db_stress_tool/db_stress_shared_state.cc deleted file mode 100644 index a27f6ac73..000000000 --- a/db_stress_tool/db_stress_shared_state.cc +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// - -#ifdef GFLAGS -#include "db_stress_tool/db_stress_shared_state.h" - -namespace ROCKSDB_NAMESPACE { -thread_local bool SharedState::ignore_read_error; -} // namespace ROCKSDB_NAMESPACE -#endif // GFLAGS diff --git a/db_stress_tool/db_stress_shared_state.h b/db_stress_tool/db_stress_shared_state.h deleted file mode 100644 index 5565c6221..000000000 --- a/db_stress_tool/db_stress_shared_state.h +++ /dev/null @@ -1,427 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors - -#ifdef GFLAGS -#pragma once - -#include "db_stress_tool/db_stress_stat.h" -#include "db_stress_tool/expected_state.h" -// SyncPoint is not supported in Released Windows Mode. -#if !(defined NDEBUG) || !defined(OS_WIN) -#include "test_util/sync_point.h" -#endif // !(defined NDEBUG) || !defined(OS_WIN) -#include "util/gflags_compat.h" - -DECLARE_uint64(seed); -DECLARE_int64(max_key); -DECLARE_uint64(log2_keys_per_lock); -DECLARE_int32(threads); -DECLARE_int32(column_families); -DECLARE_int32(nooverwritepercent); -DECLARE_string(expected_values_dir); -DECLARE_int32(clear_column_family_one_in); -DECLARE_bool(test_batches_snapshots); -DECLARE_int32(compaction_thread_pool_adjust_interval); -DECLARE_int32(continuous_verification_interval); -DECLARE_int32(read_fault_one_in); -DECLARE_int32(write_fault_one_in); -DECLARE_int32(open_metadata_write_fault_one_in); -DECLARE_int32(open_write_fault_one_in); -DECLARE_int32(open_read_fault_one_in); - -DECLARE_int32(injest_error_severity); - -namespace ROCKSDB_NAMESPACE { -class StressTest; - -// State shared by all concurrent executions of the same benchmark. -class SharedState { - public: - // indicates a key may have any value (or not be present) as an operation on - // it is incomplete. - static constexpr uint32_t UNKNOWN_SENTINEL = 0xfffffffe; - // indicates a key should definitely be deleted - static constexpr uint32_t DELETION_SENTINEL = 0xffffffff; - - // Errors when reading filter blocks are ignored, so we use a thread - // local variable updated via sync points to keep track of errors injected - // while reading filter blocks in order to ignore the Get/MultiGet result - // for those calls - static thread_local bool ignore_read_error; - - SharedState(Env* /*env*/, StressTest* stress_test) - : cv_(&mu_), - seed_(static_cast(FLAGS_seed)), - max_key_(FLAGS_max_key), - log2_keys_per_lock_(static_cast(FLAGS_log2_keys_per_lock)), - num_threads_(0), - num_initialized_(0), - num_populated_(0), - vote_reopen_(0), - num_done_(0), - start_(false), - start_verify_(false), - num_bg_threads_(0), - should_stop_bg_thread_(false), - bg_thread_finished_(0), - stress_test_(stress_test), - verification_failure_(false), - should_stop_test_(false), - no_overwrite_ids_(GenerateNoOverwriteIds()), - expected_state_manager_(nullptr), - printing_verification_results_(false), - start_timestamp_(Env::Default()->NowNanos()) { - Status status; - // TODO: We should introduce a way to explicitly disable verification - // during shutdown. When that is disabled and FLAGS_expected_values_dir - // is empty (disabling verification at startup), we can skip tracking - // expected state. Only then should we permit bypassing the below feature - // compatibility checks. - if (!FLAGS_expected_values_dir.empty()) { - if (!std::atomic{}.is_lock_free()) { - status = Status::InvalidArgument( - "Cannot use --expected_values_dir on platforms without lock-free " - "std::atomic"); - } - if (status.ok() && FLAGS_clear_column_family_one_in > 0) { - status = Status::InvalidArgument( - "Cannot use --expected_values_dir on when " - "--clear_column_family_one_in is greater than zero."); - } - } - if (status.ok()) { - if (FLAGS_expected_values_dir.empty()) { - expected_state_manager_.reset( - new AnonExpectedStateManager(FLAGS_max_key, FLAGS_column_families)); - } else { - expected_state_manager_.reset(new FileExpectedStateManager( - FLAGS_max_key, FLAGS_column_families, FLAGS_expected_values_dir)); - } - status = expected_state_manager_->Open(); - } - if (!status.ok()) { - fprintf(stderr, "Failed setting up expected state with error: %s\n", - status.ToString().c_str()); - exit(1); - } - - if (FLAGS_test_batches_snapshots) { - fprintf(stdout, "No lock creation because test_batches_snapshots set\n"); - return; - } - - long num_locks = static_cast(max_key_ >> log2_keys_per_lock_); - if (max_key_ & ((1 << log2_keys_per_lock_) - 1)) { - num_locks++; - } - fprintf(stdout, "Creating %ld locks\n", num_locks * FLAGS_column_families); - key_locks_.resize(FLAGS_column_families); - - for (int i = 0; i < FLAGS_column_families; ++i) { - key_locks_[i].reset(new port::Mutex[num_locks]); - } - if (FLAGS_read_fault_one_in) { -#ifdef NDEBUG - // Unsupported in release mode because it relies on - // `IGNORE_STATUS_IF_ERROR` to distinguish faults not expected to lead to - // failure. - fprintf(stderr, - "Cannot set nonzero value for --read_fault_one_in in " - "release mode."); - exit(1); -#else // NDEBUG - SyncPoint::GetInstance()->SetCallBack("FaultInjectionIgnoreError", - IgnoreReadErrorCallback); - SyncPoint::GetInstance()->EnableProcessing(); -#endif // NDEBUG - } - } - - ~SharedState() { -#ifndef NDEBUG - if (FLAGS_read_fault_one_in) { - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - } -#endif - } - - port::Mutex* GetMutex() { return &mu_; } - - port::CondVar* GetCondVar() { return &cv_; } - - StressTest* GetStressTest() const { return stress_test_; } - - int64_t GetMaxKey() const { return max_key_; } - - uint32_t GetNumThreads() const { return num_threads_; } - - void SetThreads(int num_threads) { num_threads_ = num_threads; } - - void IncInitialized() { num_initialized_++; } - - void IncOperated() { num_populated_++; } - - void IncDone() { num_done_++; } - - void IncVotedReopen() { vote_reopen_ = (vote_reopen_ + 1) % num_threads_; } - - bool AllInitialized() const { return num_initialized_ >= num_threads_; } - - bool AllOperated() const { return num_populated_ >= num_threads_; } - - bool AllDone() const { return num_done_ >= num_threads_; } - - bool AllVotedReopen() { return (vote_reopen_ == 0); } - - void SetStart() { start_ = true; } - - void SetStartVerify() { start_verify_ = true; } - - bool Started() const { return start_; } - - bool VerifyStarted() const { return start_verify_; } - - void SetVerificationFailure() { verification_failure_.store(true); } - - bool HasVerificationFailedYet() const { return verification_failure_.load(); } - - void SetShouldStopTest() { should_stop_test_.store(true); } - - bool ShouldStopTest() const { return should_stop_test_.load(); } - - // Returns a lock covering `key` in `cf`. - port::Mutex* GetMutexForKey(int cf, int64_t key) { - return &key_locks_[cf][key >> log2_keys_per_lock_]; - } - - // Acquires locks for all keys in `cf`. - void LockColumnFamily(int cf) { - for (int i = 0; i < max_key_ >> log2_keys_per_lock_; ++i) { - key_locks_[cf][i].Lock(); - } - } - - // Releases locks for all keys in `cf`. - void UnlockColumnFamily(int cf) { - for (int i = 0; i < max_key_ >> log2_keys_per_lock_; ++i) { - key_locks_[cf][i].Unlock(); - } - } - - // Returns a collection of mutex locks covering the key range [start, end) in - // `cf`. - std::vector> GetLocksForKeyRange(int cf, - int64_t start, - int64_t end) { - std::vector> range_locks; - - if (start >= end) { - return range_locks; - } - - const int64_t start_idx = start >> log2_keys_per_lock_; - - int64_t end_idx = end >> log2_keys_per_lock_; - if ((end & ((1 << log2_keys_per_lock_) - 1)) == 0) { - --end_idx; - } - - for (int64_t idx = start_idx; idx <= end_idx; ++idx) { - range_locks.emplace_back( - std::make_unique(&key_locks_[cf][idx])); - } - - return range_locks; - } - - Status SaveAtAndAfter(DB* db) { - return expected_state_manager_->SaveAtAndAfter(db); - } - - bool HasHistory() { return expected_state_manager_->HasHistory(); } - - Status Restore(DB* db) { return expected_state_manager_->Restore(db); } - - // Requires external locking covering all keys in `cf`. - void ClearColumnFamily(int cf) { - return expected_state_manager_->ClearColumnFamily(cf); - } - - // @param pending True if the update may have started but is not yet - // guaranteed finished. This is useful for crash-recovery testing when the - // process may crash before updating the expected values array. - // - // Requires external locking covering `key` in `cf`. - void Put(int cf, int64_t key, uint32_t value_base, bool pending) { - return expected_state_manager_->Put(cf, key, value_base, pending); - } - - // Requires external locking covering `key` in `cf`. - uint32_t Get(int cf, int64_t key) const { - return expected_state_manager_->Get(cf, key); - } - - // @param pending See comment above Put() - // Returns true if the key was not yet deleted. - // - // Requires external locking covering `key` in `cf`. - bool Delete(int cf, int64_t key, bool pending) { - return expected_state_manager_->Delete(cf, key, pending); - } - - // @param pending See comment above Put() - // Returns true if the key was not yet deleted. - // - // Requires external locking covering `key` in `cf`. - bool SingleDelete(int cf, int64_t key, bool pending) { - return expected_state_manager_->Delete(cf, key, pending); - } - - // @param pending See comment above Put() - // Returns number of keys deleted by the call. - // - // Requires external locking covering keys in `[begin_key, end_key)` in `cf`. - int DeleteRange(int cf, int64_t begin_key, int64_t end_key, bool pending) { - return expected_state_manager_->DeleteRange(cf, begin_key, end_key, - pending); - } - - bool AllowsOverwrite(int64_t key) const { - return no_overwrite_ids_.find(key) == no_overwrite_ids_.end(); - } - - // Requires external locking covering `key` in `cf`. - bool Exists(int cf, int64_t key) { - return expected_state_manager_->Exists(cf, key); - } - - uint32_t GetSeed() const { return seed_; } - - void SetShouldStopBgThread() { should_stop_bg_thread_ = true; } - - bool ShouldStopBgThread() { return should_stop_bg_thread_; } - - void IncBgThreads() { ++num_bg_threads_; } - - void IncBgThreadsFinished() { ++bg_thread_finished_; } - - bool BgThreadsFinished() const { - return bg_thread_finished_ == num_bg_threads_; - } - - bool ShouldVerifyAtBeginning() const { - return !FLAGS_expected_values_dir.empty(); - } - - bool PrintingVerificationResults() { - bool tmp = false; - return !printing_verification_results_.compare_exchange_strong( - tmp, true, std::memory_order_relaxed); - } - - void FinishPrintingVerificationResults() { - printing_verification_results_.store(false, std::memory_order_relaxed); - } - - uint64_t GetStartTimestamp() const { return start_timestamp_; } - - private: - static void IgnoreReadErrorCallback(void*) { ignore_read_error = true; } - - // Pick random keys in each column family that will not experience overwrite. - std::unordered_set GenerateNoOverwriteIds() const { - fprintf(stdout, "Choosing random keys with no overwrite\n"); - // Start with the identity permutation. Subsequent iterations of - // for loop below will start with perm of previous for loop - std::vector permutation(max_key_); - for (int64_t i = 0; i < max_key_; ++i) { - permutation[i] = i; - } - // Now do the Knuth shuffle - const int64_t num_no_overwrite_keys = - (max_key_ * FLAGS_nooverwritepercent) / 100; - // Only need to figure out first num_no_overwrite_keys of permutation - std::unordered_set ret; - ret.reserve(num_no_overwrite_keys); - Random64 rnd(seed_); - for (int64_t i = 0; i < num_no_overwrite_keys; i++) { - assert(i < max_key_); - int64_t rand_index = i + rnd.Next() % (max_key_ - i); - // Swap i and rand_index; - int64_t temp = permutation[i]; - permutation[i] = permutation[rand_index]; - permutation[rand_index] = temp; - // Fill no_overwrite_ids_ with the first num_no_overwrite_keys of - // permutation - ret.insert(permutation[i]); - } - return ret; - } - - port::Mutex mu_; - port::CondVar cv_; - const uint32_t seed_; - const int64_t max_key_; - const uint32_t log2_keys_per_lock_; - int num_threads_; - long num_initialized_; - long num_populated_; - long vote_reopen_; - long num_done_; - bool start_; - bool start_verify_; - int num_bg_threads_; - bool should_stop_bg_thread_; - int bg_thread_finished_; - StressTest* stress_test_; - std::atomic verification_failure_; - std::atomic should_stop_test_; - - // Keys that should not be overwritten - const std::unordered_set no_overwrite_ids_; - - std::unique_ptr expected_state_manager_; - // Cannot store `port::Mutex` directly in vector since it is not copyable - // and storing it in the container may require copying depending on the impl. - std::vector> key_locks_; - std::atomic printing_verification_results_; - const uint64_t start_timestamp_; -}; - -// Per-thread state for concurrent executions of the same benchmark. -struct ThreadState { - uint32_t tid; // 0..n-1 - Random rand; // Has different seeds for different threads - SharedState* shared; - Stats stats; - struct SnapshotState { - const Snapshot* snapshot; - // The cf from which we did a Get at this snapshot - int cf_at; - // The name of the cf at the time that we did a read - std::string cf_at_name; - // The key with which we did a Get at this snapshot - std::string key; - // The status of the Get - Status status; - // The value of the Get - std::string value; - // optional state of all keys in the db - std::vector* key_vec; - - std::string timestamp; - }; - std::queue> snapshot_queue; - - ThreadState(uint32_t index, SharedState* _shared) - : tid(index), rand(1000 + index + _shared->GetSeed()), shared(_shared) {} -}; -} // namespace ROCKSDB_NAMESPACE -#endif // GFLAGS diff --git a/db_stress_tool/db_stress_stat.cc b/db_stress_tool/db_stress_stat.cc deleted file mode 100644 index 6a7883a52..000000000 --- a/db_stress_tool/db_stress_stat.cc +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#ifdef GFLAGS - -#include "db_stress_tool/db_stress_stat.h" - -namespace ROCKSDB_NAMESPACE { - -std::shared_ptr dbstats; -std::shared_ptr dbstats_secondaries; - -} // namespace ROCKSDB_NAMESPACE - -#endif // GFLAGS diff --git a/db_stress_tool/db_stress_stat.h b/db_stress_tool/db_stress_stat.h deleted file mode 100644 index 5b38c6e2b..000000000 --- a/db_stress_tool/db_stress_stat.h +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#pragma once -#include -#include -#include -#include - -#include "monitoring/histogram.h" -#include "port/port.h" -#include "rocksdb/snapshot.h" -#include "rocksdb/statistics.h" -#include "rocksdb/system_clock.h" -#include "util/gflags_compat.h" -#include "util/random.h" - -DECLARE_bool(histogram); -DECLARE_bool(progress_reports); - -namespace ROCKSDB_NAMESPACE { - -// Database statistics -extern std::shared_ptr dbstats; -extern std::shared_ptr dbstats_secondaries; - -class Stats { - private: - uint64_t start_; - uint64_t finish_; - double seconds_; - long done_; - long gets_; - long prefixes_; - long writes_; - long deletes_; - size_t single_deletes_; - long iterator_size_sums_; - long founds_; - long iterations_; - long range_deletions_; - long covered_by_range_deletions_; - long errors_; - long verified_errors_; - long num_compact_files_succeed_; - long num_compact_files_failed_; - int next_report_; - size_t bytes_; - uint64_t last_op_finish_; - HistogramImpl hist_; - - public: - Stats() {} - - void Start() { - next_report_ = 100; - hist_.Clear(); - done_ = 0; - gets_ = 0; - prefixes_ = 0; - writes_ = 0; - deletes_ = 0; - single_deletes_ = 0; - iterator_size_sums_ = 0; - founds_ = 0; - iterations_ = 0; - range_deletions_ = 0; - covered_by_range_deletions_ = 0; - errors_ = 0; - verified_errors_ = 0; - bytes_ = 0; - seconds_ = 0; - num_compact_files_succeed_ = 0; - num_compact_files_failed_ = 0; - start_ = SystemClock::Default()->NowMicros(); - last_op_finish_ = start_; - finish_ = start_; - } - - void Merge(const Stats& other) { - hist_.Merge(other.hist_); - done_ += other.done_; - gets_ += other.gets_; - prefixes_ += other.prefixes_; - writes_ += other.writes_; - deletes_ += other.deletes_; - single_deletes_ += other.single_deletes_; - iterator_size_sums_ += other.iterator_size_sums_; - founds_ += other.founds_; - iterations_ += other.iterations_; - range_deletions_ += other.range_deletions_; - covered_by_range_deletions_ = other.covered_by_range_deletions_; - errors_ += other.errors_; - verified_errors_ += other.verified_errors_; - bytes_ += other.bytes_; - seconds_ += other.seconds_; - num_compact_files_succeed_ += other.num_compact_files_succeed_; - num_compact_files_failed_ += other.num_compact_files_failed_; - if (other.start_ < start_) start_ = other.start_; - if (other.finish_ > finish_) finish_ = other.finish_; - } - - void Stop() { - finish_ = SystemClock::Default()->NowMicros(); - seconds_ = (finish_ - start_) * 1e-6; - } - - void FinishedSingleOp() { - if (FLAGS_histogram) { - auto now = SystemClock::Default()->NowMicros(); - auto micros = now - last_op_finish_; - hist_.Add(micros); - if (micros > 20000) { - fprintf(stdout, "long op: %" PRIu64 " micros%30s\r", micros, ""); - } - last_op_finish_ = now; - } - - done_++; - if (FLAGS_progress_reports) { - if (done_ >= next_report_) { - if (next_report_ < 1000) - next_report_ += 100; - else if (next_report_ < 5000) - next_report_ += 500; - else if (next_report_ < 10000) - next_report_ += 1000; - else if (next_report_ < 50000) - next_report_ += 5000; - else if (next_report_ < 100000) - next_report_ += 10000; - else if (next_report_ < 500000) - next_report_ += 50000; - else - next_report_ += 100000; - fprintf(stdout, "... finished %ld ops%30s\r", done_, ""); - } - } - } - - void AddBytesForWrites(long nwrites, size_t nbytes) { - writes_ += nwrites; - bytes_ += nbytes; - } - - void AddGets(long ngets, long nfounds) { - founds_ += nfounds; - gets_ += ngets; - } - - void AddPrefixes(long nprefixes, long count) { - prefixes_ += nprefixes; - iterator_size_sums_ += count; - } - - void AddIterations(long n) { iterations_ += n; } - - void AddDeletes(long n) { deletes_ += n; } - - void AddSingleDeletes(size_t n) { single_deletes_ += n; } - - void AddRangeDeletions(long n) { range_deletions_ += n; } - - void AddCoveredByRangeDeletions(long n) { covered_by_range_deletions_ += n; } - - void AddErrors(long n) { errors_ += n; } - - void AddVerifiedErrors(long n) { verified_errors_ += n; } - - void AddNumCompactFilesSucceed(long n) { num_compact_files_succeed_ += n; } - - void AddNumCompactFilesFailed(long n) { num_compact_files_failed_ += n; } - - void Report(const char* name) { - std::string extra; - if (bytes_ < 1 || done_ < 1) { - fprintf(stderr, "No writes or ops?\n"); - return; - } - - double elapsed = (finish_ - start_) * 1e-6; - double bytes_mb = bytes_ / 1048576.0; - double rate = bytes_mb / elapsed; - double throughput = (double)done_ / elapsed; - - fprintf(stdout, "%-12s: ", name); - fprintf(stdout, "%.3f micros/op %ld ops/sec\n", seconds_ * 1e6 / done_, - (long)throughput); - fprintf(stdout, "%-12s: Wrote %.2f MB (%.2f MB/sec) (%ld%% of %ld ops)\n", - "", bytes_mb, rate, (100 * writes_) / done_, done_); - fprintf(stdout, "%-12s: Wrote %ld times\n", "", writes_); - fprintf(stdout, "%-12s: Deleted %ld times\n", "", deletes_); - fprintf(stdout, "%-12s: Single deleted %" ROCKSDB_PRIszt " times\n", "", - single_deletes_); - fprintf(stdout, "%-12s: %ld read and %ld found the key\n", "", gets_, - founds_); - fprintf(stdout, "%-12s: Prefix scanned %ld times\n", "", prefixes_); - fprintf(stdout, "%-12s: Iterator size sum is %ld\n", "", - iterator_size_sums_); - fprintf(stdout, "%-12s: Iterated %ld times\n", "", iterations_); - fprintf(stdout, "%-12s: Deleted %ld key-ranges\n", "", range_deletions_); - fprintf(stdout, "%-12s: Range deletions covered %ld keys\n", "", - covered_by_range_deletions_); - - fprintf(stdout, "%-12s: Got errors %ld times\n", "", errors_); - fprintf(stdout, "%-12s: %ld CompactFiles() succeed\n", "", - num_compact_files_succeed_); - fprintf(stdout, "%-12s: %ld CompactFiles() did not succeed\n", "", - num_compact_files_failed_); - - if (FLAGS_histogram) { - fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str()); - } - fflush(stdout); - } -}; -} // namespace ROCKSDB_NAMESPACE diff --git a/db_stress_tool/db_stress_table_properties_collector.h b/db_stress_tool/db_stress_table_properties_collector.h deleted file mode 100644 index d1758cbb4..000000000 --- a/db_stress_tool/db_stress_table_properties_collector.h +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#pragma once - -#include "rocksdb/table.h" -#include "util/gflags_compat.h" -#include "util/random.h" - -DECLARE_int32(mark_for_compaction_one_file_in); - -namespace ROCKSDB_NAMESPACE { - -// A `DbStressTablePropertiesCollector` ignores what keys/values were added to -// the table, adds no properties to the table, and decides at random whether the -// table will be marked for compaction according to -// `FLAGS_mark_for_compaction_one_file_in`. -class DbStressTablePropertiesCollector : public TablePropertiesCollector { - public: - DbStressTablePropertiesCollector() - : need_compact_(Random::GetTLSInstance()->OneInOpt( - FLAGS_mark_for_compaction_one_file_in)) {} - - virtual Status AddUserKey(const Slice& /* key */, const Slice& /* value */, - EntryType /*type*/, SequenceNumber /*seq*/, - uint64_t /*file_size*/) override { - return Status::OK(); - } - - virtual Status Finish(UserCollectedProperties* /* properties */) override { - return Status::OK(); - } - - virtual UserCollectedProperties GetReadableProperties() const override { - return UserCollectedProperties{}; - } - - virtual const char* Name() const override { - return "DbStressTablePropertiesCollector"; - } - - virtual bool NeedCompact() const override { return need_compact_; } - - private: - const bool need_compact_; -}; - -// A `DbStressTablePropertiesCollectorFactory` creates -// `DbStressTablePropertiesCollectorFactory`s. -class DbStressTablePropertiesCollectorFactory - : public TablePropertiesCollectorFactory { - public: - virtual TablePropertiesCollector* CreateTablePropertiesCollector( - TablePropertiesCollectorFactory::Context /* context */) override { - return new DbStressTablePropertiesCollector(); - } - - virtual const char* Name() const override { - return "DbStressTablePropertiesCollectorFactory"; - } -}; - -} // namespace ROCKSDB_NAMESPACE diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc deleted file mode 100644 index 610826f4b..000000000 --- a/db_stress_tool/db_stress_test_base.cc +++ /dev/null @@ -1,3282 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// - -#include -#include - -#include "util/compression.h" -#ifdef GFLAGS -#include "db_stress_tool/db_stress_common.h" -#include "db_stress_tool/db_stress_compaction_filter.h" -#include "db_stress_tool/db_stress_driver.h" -#include "db_stress_tool/db_stress_table_properties_collector.h" -#include "rocksdb/convenience.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/secondary_cache.h" -#include "rocksdb/sst_file_manager.h" -#include "rocksdb/types.h" -#include "rocksdb/utilities/object_registry.h" -#include "rocksdb/utilities/write_batch_with_index.h" -#include "test_util/testutil.h" -#include "util/cast_util.h" -#include "utilities/backup/backup_engine_impl.h" -#include "utilities/fault_injection_fs.h" -#include "utilities/fault_injection_secondary_cache.h" - -namespace ROCKSDB_NAMESPACE { - -namespace { - -std::shared_ptr CreateFilterPolicy() { - if (FLAGS_bloom_bits < 0) { - return BlockBasedTableOptions().filter_policy; - } - const FilterPolicy* new_policy; - if (FLAGS_ribbon_starting_level >= 999) { - // Use Bloom API - new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, false); - } else { - new_policy = NewRibbonFilterPolicy( - FLAGS_bloom_bits, /* bloom_before_level */ FLAGS_ribbon_starting_level); - } - return std::shared_ptr(new_policy); -} - -} // namespace - -StressTest::StressTest() - : cache_(NewCache(FLAGS_cache_size, FLAGS_cache_numshardbits)), - filter_policy_(CreateFilterPolicy()), - db_(nullptr), - txn_db_(nullptr), - - db_aptr_(nullptr), - clock_(db_stress_env->GetSystemClock().get()), - new_column_family_name_(1), - num_times_reopened_(0), - db_preload_finished_(false), - cmp_db_(nullptr), - is_db_stopped_(false) { - if (FLAGS_destroy_db_initially) { - std::vector files; - db_stress_env->GetChildren(FLAGS_db, &files); - for (unsigned int i = 0; i < files.size(); i++) { - if (Slice(files[i]).starts_with("heap-")) { - db_stress_env->DeleteFile(FLAGS_db + "/" + files[i]); - } - } - - Options options; - options.env = db_stress_env; - // Remove files without preserving manfiest files - const Status s = !FLAGS_use_blob_db - ? DestroyDB(FLAGS_db, options) - : blob_db::DestroyBlobDB(FLAGS_db, options, - blob_db::BlobDBOptions()); - - if (!s.ok()) { - fprintf(stderr, "Cannot destroy original db: %s\n", s.ToString().c_str()); - exit(1); - } - } -} - -StressTest::~StressTest() { - for (auto cf : column_families_) { - delete cf; - } - column_families_.clear(); - delete db_; - - for (auto* cf : cmp_cfhs_) { - delete cf; - } - cmp_cfhs_.clear(); - delete cmp_db_; -} - -std::shared_ptr StressTest::NewCache(size_t capacity, - int32_t num_shard_bits) { - ConfigOptions config_options; - if (capacity <= 0) { - return nullptr; - } - - std::shared_ptr secondary_cache; - if (!FLAGS_secondary_cache_uri.empty()) { - Status s = SecondaryCache::CreateFromString( - config_options, FLAGS_secondary_cache_uri, &secondary_cache); - if (secondary_cache == nullptr) { - fprintf(stderr, - "No secondary cache registered matching string: %s status=%s\n", - FLAGS_secondary_cache_uri.c_str(), s.ToString().c_str()); - exit(1); - } - if (FLAGS_secondary_cache_fault_one_in > 0) { - secondary_cache = std::make_shared( - secondary_cache, static_cast(FLAGS_seed), - FLAGS_secondary_cache_fault_one_in); - } - } - - if (FLAGS_cache_type == "clock_cache") { - fprintf(stderr, "Old clock cache implementation has been removed.\n"); - exit(1); - } else if (FLAGS_cache_type == "hyper_clock_cache") { - HyperClockCacheOptions opts(static_cast(capacity), - FLAGS_block_size /*estimated_entry_charge*/, - num_shard_bits); - opts.secondary_cache = std::move(secondary_cache); - return opts.MakeSharedCache(); - } else if (FLAGS_cache_type == "lru_cache") { - LRUCacheOptions opts; - opts.capacity = capacity; - opts.num_shard_bits = num_shard_bits; - opts.secondary_cache = std::move(secondary_cache); - return NewLRUCache(opts); - } else { - fprintf(stderr, "Cache type not supported."); - exit(1); - } -} - -std::vector StressTest::GetBlobCompressionTags() { - std::vector compression_tags{"kNoCompression"}; - - if (Snappy_Supported()) { - compression_tags.emplace_back("kSnappyCompression"); - } - if (LZ4_Supported()) { - compression_tags.emplace_back("kLZ4Compression"); - } - if (ZSTD_Supported()) { - compression_tags.emplace_back("kZSTD"); - } - - return compression_tags; -} - -bool StressTest::BuildOptionsTable() { - if (FLAGS_set_options_one_in <= 0) { - return true; - } - - std::unordered_map> options_tbl = { - {"write_buffer_size", - {std::to_string(options_.write_buffer_size), - std::to_string(options_.write_buffer_size * 2), - std::to_string(options_.write_buffer_size * 4)}}, - {"max_write_buffer_number", - {std::to_string(options_.max_write_buffer_number), - std::to_string(options_.max_write_buffer_number * 2), - std::to_string(options_.max_write_buffer_number * 4)}}, - {"arena_block_size", - { - std::to_string(options_.arena_block_size), - std::to_string(options_.write_buffer_size / 4), - std::to_string(options_.write_buffer_size / 8), - }}, - {"memtable_huge_page_size", {"0", std::to_string(2 * 1024 * 1024)}}, - {"max_successive_merges", {"0", "2", "4"}}, - {"inplace_update_num_locks", {"100", "200", "300"}}, - // TODO: re-enable once internal task T124324915 is fixed. - // {"experimental_mempurge_threshold", {"0.0", "1.0"}}, - // TODO(ljin): enable test for this option - // {"disable_auto_compactions", {"100", "200", "300"}}, - {"level0_file_num_compaction_trigger", - { - std::to_string(options_.level0_file_num_compaction_trigger), - std::to_string(options_.level0_file_num_compaction_trigger + 2), - std::to_string(options_.level0_file_num_compaction_trigger + 4), - }}, - {"level0_slowdown_writes_trigger", - { - std::to_string(options_.level0_slowdown_writes_trigger), - std::to_string(options_.level0_slowdown_writes_trigger + 2), - std::to_string(options_.level0_slowdown_writes_trigger + 4), - }}, - {"level0_stop_writes_trigger", - { - std::to_string(options_.level0_stop_writes_trigger), - std::to_string(options_.level0_stop_writes_trigger + 2), - std::to_string(options_.level0_stop_writes_trigger + 4), - }}, - {"max_compaction_bytes", - { - std::to_string(options_.target_file_size_base * 5), - std::to_string(options_.target_file_size_base * 15), - std::to_string(options_.target_file_size_base * 100), - }}, - {"target_file_size_base", - { - std::to_string(options_.target_file_size_base), - std::to_string(options_.target_file_size_base * 2), - std::to_string(options_.target_file_size_base * 4), - }}, - {"target_file_size_multiplier", - { - std::to_string(options_.target_file_size_multiplier), - "1", - "2", - }}, - {"max_bytes_for_level_base", - { - std::to_string(options_.max_bytes_for_level_base / 2), - std::to_string(options_.max_bytes_for_level_base), - std::to_string(options_.max_bytes_for_level_base * 2), - }}, - {"max_bytes_for_level_multiplier", - { - std::to_string(options_.max_bytes_for_level_multiplier), - "1", - "2", - }}, - {"max_sequential_skip_in_iterations", {"4", "8", "12"}}, - }; - - if (FLAGS_allow_setting_blob_options_dynamically) { - options_tbl.emplace("enable_blob_files", - std::vector{"false", "true"}); - options_tbl.emplace("min_blob_size", - std::vector{"0", "8", "16"}); - options_tbl.emplace("blob_file_size", - std::vector{"1M", "16M", "256M", "1G"}); - options_tbl.emplace("blob_compression_type", GetBlobCompressionTags()); - options_tbl.emplace("enable_blob_garbage_collection", - std::vector{"false", "true"}); - options_tbl.emplace( - "blob_garbage_collection_age_cutoff", - std::vector{"0.0", "0.25", "0.5", "0.75", "1.0"}); - options_tbl.emplace("blob_garbage_collection_force_threshold", - std::vector{"0.5", "0.75", "1.0"}); - options_tbl.emplace("blob_compaction_readahead_size", - std::vector{"0", "1M", "4M"}); - options_tbl.emplace("blob_file_starting_level", - std::vector{"0", "1", "2"}); - options_tbl.emplace("prepopulate_blob_cache", - std::vector{"kDisable", "kFlushOnly"}); - } - - options_table_ = std::move(options_tbl); - - for (const auto& iter : options_table_) { - options_index_.push_back(iter.first); - } - return true; -} - -void StressTest::InitDb(SharedState* shared) { - uint64_t now = clock_->NowMicros(); - fprintf(stdout, "%s Initializing db_stress\n", - clock_->TimeToString(now / 1000000).c_str()); - PrintEnv(); - Open(shared); - BuildOptionsTable(); -} - -void StressTest::FinishInitDb(SharedState* shared) { - if (FLAGS_read_only) { - uint64_t now = clock_->NowMicros(); - fprintf(stdout, "%s Preloading db with %" PRIu64 " KVs\n", - clock_->TimeToString(now / 1000000).c_str(), FLAGS_max_key); - PreloadDbAndReopenAsReadOnly(FLAGS_max_key, shared); - } - - if (shared->HasHistory()) { - // The way it works right now is, if there's any history, that means the - // previous run mutating the DB had all its operations traced, in which case - // we should always be able to `Restore()` the expected values to match the - // `db_`'s current seqno. - Status s = shared->Restore(db_); - if (!s.ok()) { - fprintf(stderr, "Error restoring historical expected values: %s\n", - s.ToString().c_str()); - exit(1); - } - } - if (FLAGS_use_txn) { - // It's OK here without sync because unsynced data cannot be lost at this - // point - // - even with sync_fault_injection=1 as the - // file is still directly writable until after FinishInitDb() - ProcessRecoveredPreparedTxns(shared); - } - - if (FLAGS_enable_compaction_filter) { - auto* compaction_filter_factory = - reinterpret_cast( - options_.compaction_filter_factory.get()); - assert(compaction_filter_factory); - // This must be called only after any potential `SharedState::Restore()` has - // completed in order for the `compaction_filter_factory` to operate on the - // correct latest values file. - compaction_filter_factory->SetSharedState(shared); - fprintf(stdout, "Compaction filter factory: %s\n", - compaction_filter_factory->Name()); - } -} - -void StressTest::TrackExpectedState(SharedState* shared) { - // For `FLAGS_manual_wal_flush_one_inWAL` - // data can be lost when `manual_wal_flush_one_in > 0` and `FlushWAL()` is not - // explictly called by users of RocksDB (in our case, db stress). - // Therefore recovery from such potential WAL data loss is a prefix recovery - // that requires tracing - if ((FLAGS_sync_fault_injection || FLAGS_disable_wal || - FLAGS_manual_wal_flush_one_in > 0) && - IsStateTracked()) { - Status s = shared->SaveAtAndAfter(db_); - if (!s.ok()) { - fprintf(stderr, "Error enabling history tracing: %s\n", - s.ToString().c_str()); - exit(1); - } - } -} - -Status StressTest::AssertSame(DB* db, ColumnFamilyHandle* cf, - ThreadState::SnapshotState& snap_state) { - Status s; - if (cf->GetName() != snap_state.cf_at_name) { - return s; - } - // This `ReadOptions` is for validation purposes. Ignore - // `FLAGS_rate_limit_user_ops` to avoid slowing any validation. - ReadOptions ropt; - ropt.snapshot = snap_state.snapshot; - Slice ts; - if (!snap_state.timestamp.empty()) { - ts = snap_state.timestamp; - ropt.timestamp = &ts; - } - PinnableSlice exp_v(&snap_state.value); - exp_v.PinSelf(); - PinnableSlice v; - s = db->Get(ropt, cf, snap_state.key, &v); - if (!s.ok() && !s.IsNotFound()) { - return s; - } - if (snap_state.status != s) { - return Status::Corruption( - "The snapshot gave inconsistent results for key " + - std::to_string(Hash(snap_state.key.c_str(), snap_state.key.size(), 0)) + - " in cf " + cf->GetName() + ": (" + snap_state.status.ToString() + - ") vs. (" + s.ToString() + ")"); - } - if (s.ok()) { - if (exp_v != v) { - return Status::Corruption("The snapshot gave inconsistent values: (" + - exp_v.ToString() + ") vs. (" + v.ToString() + - ")"); - } - } - if (snap_state.key_vec != nullptr) { - // When `prefix_extractor` is set, seeking to beginning and scanning - // across prefixes are only supported with `total_order_seek` set. - ropt.total_order_seek = true; - std::unique_ptr iterator(db->NewIterator(ropt)); - std::unique_ptr> tmp_bitvec( - new std::vector(FLAGS_max_key)); - for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { - uint64_t key_val; - if (GetIntVal(iterator->key().ToString(), &key_val)) { - (*tmp_bitvec.get())[key_val] = true; - } - } - if (!std::equal(snap_state.key_vec->begin(), snap_state.key_vec->end(), - tmp_bitvec.get()->begin())) { - return Status::Corruption("Found inconsistent keys at this snapshot"); - } - } - return Status::OK(); -} - -void StressTest::VerificationAbort(SharedState* shared, std::string msg, - Status s) const { - fprintf(stderr, "Verification failed: %s. Status is %s\n", msg.c_str(), - s.ToString().c_str()); - shared->SetVerificationFailure(); -} - -void StressTest::VerificationAbort(SharedState* shared, std::string msg, int cf, - int64_t key) const { - auto key_str = Key(key); - Slice key_slice = key_str; - fprintf(stderr, - "Verification failed for column family %d key %s (%" PRIi64 "): %s\n", - cf, key_slice.ToString(true).c_str(), key, msg.c_str()); - shared->SetVerificationFailure(); -} - -void StressTest::VerificationAbort(SharedState* shared, std::string msg, int cf, - int64_t key, Slice value_from_db, - Slice value_from_expected) const { - auto key_str = Key(key); - fprintf(stderr, - "Verification failed for column family %d key %s (%" PRIi64 - "): value_from_db: %s, value_from_expected: %s, msg: %s\n", - cf, Slice(key_str).ToString(true).c_str(), key, - value_from_db.ToString(true).c_str(), - value_from_expected.ToString(true).c_str(), msg.c_str()); - shared->SetVerificationFailure(); -} - -void StressTest::VerificationAbort(SharedState* shared, int cf, int64_t key, - const Slice& value, - const WideColumns& columns) const { - assert(shared); - - auto key_str = Key(key); - - fprintf(stderr, - "Verification failed for column family %d key %s (%" PRIi64 - "): Value and columns inconsistent: value: %s, columns: %s\n", - cf, Slice(key_str).ToString(/* hex */ true).c_str(), key, - value.ToString(/* hex */ true).c_str(), - WideColumnsToHex(columns).c_str()); - - shared->SetVerificationFailure(); -} - -std::string StressTest::DebugString(const Slice& value, - const WideColumns& columns) { - std::ostringstream oss; - - oss << "value: " << value.ToString(/* hex */ true) - << ", columns: " << WideColumnsToHex(columns); - - return oss.str(); -} - -void StressTest::PrintStatistics() { - if (dbstats) { - fprintf(stdout, "STATISTICS:\n%s\n", dbstats->ToString().c_str()); - } - if (dbstats_secondaries) { - fprintf(stdout, "Secondary instances STATISTICS:\n%s\n", - dbstats_secondaries->ToString().c_str()); - } -} - -// Currently PreloadDb has to be single-threaded. -void StressTest::PreloadDbAndReopenAsReadOnly(int64_t number_of_keys, - SharedState* shared) { - WriteOptions write_opts; - write_opts.disableWAL = FLAGS_disable_wal; - if (FLAGS_sync) { - write_opts.sync = true; - } - if (FLAGS_rate_limit_auto_wal_flush) { - write_opts.rate_limiter_priority = Env::IO_USER; - } - char value[100]; - int cf_idx = 0; - Status s; - for (auto cfh : column_families_) { - for (int64_t k = 0; k != number_of_keys; ++k) { - const std::string key = Key(k); - - constexpr uint32_t value_base = 0; - const size_t sz = GenerateValue(value_base, value, sizeof(value)); - - const Slice v(value, sz); - - shared->Put(cf_idx, k, value_base, true /* pending */); - - std::string ts; - if (FLAGS_user_timestamp_size > 0) { - ts = GetNowNanos(); - } - - if (FLAGS_use_merge) { - if (!FLAGS_use_txn) { - if (FLAGS_user_timestamp_size > 0) { - s = db_->Merge(write_opts, cfh, key, ts, v); - } else { - s = db_->Merge(write_opts, cfh, key, v); - } - } else { - Transaction* txn; - s = NewTxn(write_opts, &txn); - if (s.ok()) { - s = txn->Merge(cfh, key, v); - if (s.ok()) { - s = CommitTxn(txn); - } - } - } - } else if (FLAGS_use_put_entity_one_in > 0) { - s = db_->PutEntity(write_opts, cfh, key, - GenerateWideColumns(value_base, v)); - } else { - if (!FLAGS_use_txn) { - if (FLAGS_user_timestamp_size > 0) { - s = db_->Put(write_opts, cfh, key, ts, v); - } else { - s = db_->Put(write_opts, cfh, key, v); - } - } else { - Transaction* txn; - s = NewTxn(write_opts, &txn); - if (s.ok()) { - s = txn->Put(cfh, key, v); - if (s.ok()) { - s = CommitTxn(txn); - } - } - } - } - - shared->Put(cf_idx, k, value_base, false /* pending */); - if (!s.ok()) { - break; - } - } - if (!s.ok()) { - break; - } - ++cf_idx; - } - if (s.ok()) { - s = db_->Flush(FlushOptions(), column_families_); - } - if (s.ok()) { - for (auto cf : column_families_) { - delete cf; - } - column_families_.clear(); - delete db_; - db_ = nullptr; - txn_db_ = nullptr; - - db_preload_finished_.store(true); - auto now = clock_->NowMicros(); - fprintf(stdout, "%s Reopening database in read-only\n", - clock_->TimeToString(now / 1000000).c_str()); - // Reopen as read-only, can ignore all options related to updates - Open(shared); - } else { - fprintf(stderr, "Failed to preload db"); - exit(1); - } -} - -Status StressTest::SetOptions(ThreadState* thread) { - assert(FLAGS_set_options_one_in > 0); - std::unordered_map opts; - std::string name = - options_index_[thread->rand.Next() % options_index_.size()]; - int value_idx = thread->rand.Next() % options_table_[name].size(); - if (name == "level0_file_num_compaction_trigger" || - name == "level0_slowdown_writes_trigger" || - name == "level0_stop_writes_trigger") { - opts["level0_file_num_compaction_trigger"] = - options_table_["level0_file_num_compaction_trigger"][value_idx]; - opts["level0_slowdown_writes_trigger"] = - options_table_["level0_slowdown_writes_trigger"][value_idx]; - opts["level0_stop_writes_trigger"] = - options_table_["level0_stop_writes_trigger"][value_idx]; - } else { - opts[name] = options_table_[name][value_idx]; - } - - int rand_cf_idx = thread->rand.Next() % FLAGS_column_families; - auto cfh = column_families_[rand_cf_idx]; - return db_->SetOptions(cfh, opts); -} - -void StressTest::ProcessRecoveredPreparedTxns(SharedState* shared) { - assert(txn_db_); - std::vector recovered_prepared_trans; - txn_db_->GetAllPreparedTransactions(&recovered_prepared_trans); - for (Transaction* txn : recovered_prepared_trans) { - ProcessRecoveredPreparedTxnsHelper(txn, shared); - delete txn; - } - recovered_prepared_trans.clear(); - txn_db_->GetAllPreparedTransactions(&recovered_prepared_trans); - assert(recovered_prepared_trans.size() == 0); -} - -void StressTest::ProcessRecoveredPreparedTxnsHelper(Transaction* txn, - SharedState* shared) { - thread_local Random rand(static_cast(FLAGS_seed)); - for (size_t i = 0; i < column_families_.size(); ++i) { - std::unique_ptr wbwi_iter( - txn->GetWriteBatch()->NewIterator(column_families_[i])); - for (wbwi_iter->SeekToFirst(); wbwi_iter->Valid(); wbwi_iter->Next()) { - uint64_t key_val; - if (GetIntVal(wbwi_iter->Entry().key.ToString(), &key_val)) { - shared->Put(static_cast(i) /* cf_idx */, key_val, - 0 /* value_base */, true /* pending */); - } - } - } - if (rand.OneIn(2)) { - Status s = txn->Commit(); - assert(s.ok()); - } else { - Status s = txn->Rollback(); - assert(s.ok()); - } -} - -Status StressTest::NewTxn(WriteOptions& write_opts, Transaction** txn) { - if (!FLAGS_use_txn) { - return Status::InvalidArgument("NewTxn when FLAGS_use_txn is not set"); - } - write_opts.disableWAL = FLAGS_disable_wal; - static std::atomic txn_id = {0}; - TransactionOptions txn_options; - txn_options.use_only_the_last_commit_time_batch_for_recovery = - FLAGS_use_only_the_last_commit_time_batch_for_recovery; - txn_options.lock_timeout = 600000; // 10 min - txn_options.deadlock_detect = true; - *txn = txn_db_->BeginTransaction(write_opts, txn_options); - auto istr = std::to_string(txn_id.fetch_add(1)); - Status s = (*txn)->SetName("xid" + istr); - return s; -} - -Status StressTest::CommitTxn(Transaction* txn, ThreadState* thread) { - if (!FLAGS_use_txn) { - return Status::InvalidArgument("CommitTxn when FLAGS_use_txn is not set"); - } - assert(txn_db_); - Status s = txn->Prepare(); - std::shared_ptr timestamped_snapshot; - if (s.ok()) { - if (thread && FLAGS_create_timestamped_snapshot_one_in && - thread->rand.OneIn(FLAGS_create_timestamped_snapshot_one_in)) { - uint64_t ts = db_stress_env->NowNanos(); - s = txn->CommitAndTryCreateSnapshot(/*notifier=*/nullptr, ts, - ×tamped_snapshot); - - std::pair> res; - if (thread->tid == 0) { - uint64_t now = db_stress_env->NowNanos(); - res = txn_db_->CreateTimestampedSnapshot(now); - if (res.first.ok()) { - assert(res.second); - assert(res.second->GetTimestamp() == now); - if (timestamped_snapshot) { - assert(res.second->GetTimestamp() > - timestamped_snapshot->GetTimestamp()); - } - } else { - assert(!res.second); - } - } - } else { - s = txn->Commit(); - } - } - if (thread && FLAGS_create_timestamped_snapshot_one_in > 0 && - thread->rand.OneInOpt(50000)) { - uint64_t now = db_stress_env->NowNanos(); - constexpr uint64_t time_diff = static_cast(1000) * 1000 * 1000; - txn_db_->ReleaseTimestampedSnapshotsOlderThan(now - time_diff); - } - delete txn; - return s; -} - -Status StressTest::RollbackTxn(Transaction* txn) { - if (!FLAGS_use_txn) { - return Status::InvalidArgument( - "RollbackTxn when FLAGS_use_txn is not" - " set"); - } - Status s = txn->Rollback(); - delete txn; - return s; -} - -void StressTest::OperateDb(ThreadState* thread) { - ReadOptions read_opts(FLAGS_verify_checksum, true); - read_opts.rate_limiter_priority = - FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL; - read_opts.async_io = FLAGS_async_io; - read_opts.adaptive_readahead = FLAGS_adaptive_readahead; - read_opts.readahead_size = FLAGS_readahead_size; - WriteOptions write_opts; - if (FLAGS_rate_limit_auto_wal_flush) { - write_opts.rate_limiter_priority = Env::IO_USER; - } - auto shared = thread->shared; - char value[100]; - std::string from_db; - if (FLAGS_sync) { - write_opts.sync = true; - } - write_opts.disableWAL = FLAGS_disable_wal; - write_opts.protection_bytes_per_key = FLAGS_batch_protection_bytes_per_key; - const int prefix_bound = static_cast(FLAGS_readpercent) + - static_cast(FLAGS_prefixpercent); - const int write_bound = prefix_bound + static_cast(FLAGS_writepercent); - const int del_bound = write_bound + static_cast(FLAGS_delpercent); - const int delrange_bound = - del_bound + static_cast(FLAGS_delrangepercent); - const int iterate_bound = - delrange_bound + static_cast(FLAGS_iterpercent); - - const uint64_t ops_per_open = FLAGS_ops_per_thread / (FLAGS_reopen + 1); - -#ifndef NDEBUG - if (FLAGS_read_fault_one_in) { - fault_fs_guard->SetThreadLocalReadErrorContext(thread->shared->GetSeed(), - FLAGS_read_fault_one_in); - } -#endif // NDEBUG - if (FLAGS_write_fault_one_in) { - IOStatus error_msg; - if (FLAGS_injest_error_severity <= 1 || FLAGS_injest_error_severity > 2) { - error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - } else if (FLAGS_injest_error_severity == 2) { - // Ingest the fatal error - error_msg = IOStatus::IOError("Fatal IO Error"); - error_msg.SetDataLoss(true); - } - std::vector types = {FileType::kTableFile, - FileType::kDescriptorFile, - FileType::kCurrentFile}; - fault_fs_guard->SetRandomWriteError( - thread->shared->GetSeed(), FLAGS_write_fault_one_in, error_msg, - /*inject_for_all_file_types=*/false, types); - } - thread->stats.Start(); - for (int open_cnt = 0; open_cnt <= FLAGS_reopen; ++open_cnt) { - if (thread->shared->HasVerificationFailedYet() || - thread->shared->ShouldStopTest()) { - break; - } - if (open_cnt != 0) { - thread->stats.FinishedSingleOp(); - MutexLock l(thread->shared->GetMutex()); - while (!thread->snapshot_queue.empty()) { - db_->ReleaseSnapshot(thread->snapshot_queue.front().second.snapshot); - delete thread->snapshot_queue.front().second.key_vec; - thread->snapshot_queue.pop(); - } - thread->shared->IncVotedReopen(); - if (thread->shared->AllVotedReopen()) { - thread->shared->GetStressTest()->Reopen(thread); - thread->shared->GetCondVar()->SignalAll(); - } else { - thread->shared->GetCondVar()->Wait(); - } - // Commenting this out as we don't want to reset stats on each open. - // thread->stats.Start(); - } - - for (uint64_t i = 0; i < ops_per_open; i++) { - if (thread->shared->HasVerificationFailedYet()) { - break; - } - - // Change Options - if (thread->rand.OneInOpt(FLAGS_set_options_one_in)) { - SetOptions(thread); - } - - if (thread->rand.OneInOpt(FLAGS_set_in_place_one_in)) { - options_.inplace_update_support ^= options_.inplace_update_support; - } - - if (thread->tid == 0 && FLAGS_verify_db_one_in > 0 && - thread->rand.OneIn(FLAGS_verify_db_one_in)) { - ContinuouslyVerifyDb(thread); - if (thread->shared->ShouldStopTest()) { - break; - } - } - - MaybeClearOneColumnFamily(thread); - - if (thread->rand.OneInOpt(FLAGS_manual_wal_flush_one_in)) { - bool sync = thread->rand.OneIn(2) ? true : false; - Status s = db_->FlushWAL(sync); - if (!s.ok() && !(sync && s.IsNotSupported())) { - fprintf(stderr, "FlushWAL(sync=%s) failed: %s\n", - (sync ? "true" : "false"), s.ToString().c_str()); - } - } - - if (thread->rand.OneInOpt(FLAGS_lock_wal_one_in)) { - Status s = db_->LockWAL(); - if (!s.ok()) { - fprintf(stderr, "LockWAL() failed: %s\n", s.ToString().c_str()); - } else { - auto old_seqno = db_->GetLatestSequenceNumber(); - // Yield for a while - do { - std::this_thread::yield(); - } while (thread->rand.OneIn(2)); - // Latest seqno should not have changed - auto new_seqno = db_->GetLatestSequenceNumber(); - if (old_seqno != new_seqno) { - fprintf( - stderr, - "Failure: latest seqno changed from %u to %u with WAL locked\n", - (unsigned)old_seqno, (unsigned)new_seqno); - } - s = db_->UnlockWAL(); - if (!s.ok()) { - fprintf(stderr, "UnlockWAL() failed: %s\n", s.ToString().c_str()); - } - } - } - - if (thread->rand.OneInOpt(FLAGS_sync_wal_one_in)) { - Status s = db_->SyncWAL(); - if (!s.ok() && !s.IsNotSupported()) { - fprintf(stderr, "SyncWAL() failed: %s\n", s.ToString().c_str()); - } - } - - int rand_column_family = thread->rand.Next() % FLAGS_column_families; - ColumnFamilyHandle* column_family = column_families_[rand_column_family]; - - if (thread->rand.OneInOpt(FLAGS_compact_files_one_in)) { - TestCompactFiles(thread, column_family); - } - - int64_t rand_key = GenerateOneKey(thread, i); - std::string keystr = Key(rand_key); - Slice key = keystr; - - if (thread->rand.OneInOpt(FLAGS_compact_range_one_in)) { - TestCompactRange(thread, rand_key, key, column_family); - if (thread->shared->HasVerificationFailedYet()) { - break; - } - } - - std::vector rand_column_families = - GenerateColumnFamilies(FLAGS_column_families, rand_column_family); - - if (thread->rand.OneInOpt(FLAGS_flush_one_in)) { - Status status = TestFlush(rand_column_families); - if (!status.ok()) { - fprintf(stdout, "Unable to perform Flush(): %s\n", - status.ToString().c_str()); - } - } - - // Verify GetLiveFiles with a 1 in N chance. - if (thread->rand.OneInOpt(FLAGS_get_live_files_one_in) && - !FLAGS_write_fault_one_in) { - Status status = VerifyGetLiveFiles(); - if (!status.ok()) { - VerificationAbort(shared, "VerifyGetLiveFiles status not OK", status); - } - } - - // Verify GetSortedWalFiles with a 1 in N chance. - if (thread->rand.OneInOpt(FLAGS_get_sorted_wal_files_one_in)) { - Status status = VerifyGetSortedWalFiles(); - if (!status.ok()) { - VerificationAbort(shared, "VerifyGetSortedWalFiles status not OK", - status); - } - } - - // Verify GetCurrentWalFile with a 1 in N chance. - if (thread->rand.OneInOpt(FLAGS_get_current_wal_file_one_in)) { - Status status = VerifyGetCurrentWalFile(); - if (!status.ok()) { - VerificationAbort(shared, "VerifyGetCurrentWalFile status not OK", - status); - } - } - - if (thread->rand.OneInOpt(FLAGS_pause_background_one_in)) { - Status status = TestPauseBackground(thread); - if (!status.ok()) { - VerificationAbort( - shared, "Pause/ContinueBackgroundWork status not OK", status); - } - } - - if (thread->rand.OneInOpt(FLAGS_verify_checksum_one_in)) { - Status status = db_->VerifyChecksum(); - if (!status.ok()) { - VerificationAbort(shared, "VerifyChecksum status not OK", status); - } - } - - if (thread->rand.OneInOpt(FLAGS_get_property_one_in)) { - TestGetProperty(thread); - } - - std::vector rand_keys = GenerateKeys(rand_key); - - if (thread->rand.OneInOpt(FLAGS_ingest_external_file_one_in)) { - TestIngestExternalFile(thread, rand_column_families, rand_keys); - } - - if (thread->rand.OneInOpt(FLAGS_backup_one_in)) { - // Beyond a certain DB size threshold, this test becomes heavier than - // it's worth. - uint64_t total_size = 0; - if (FLAGS_backup_max_size > 0) { - std::vector files; - db_stress_env->GetChildrenFileAttributes(FLAGS_db, &files); - for (auto& file : files) { - total_size += file.size_bytes; - } - } - - if (total_size <= FLAGS_backup_max_size) { - Status s = TestBackupRestore(thread, rand_column_families, rand_keys); - if (!s.ok()) { - VerificationAbort(shared, "Backup/restore gave inconsistent state", - s); - } - } - } - - if (thread->rand.OneInOpt(FLAGS_checkpoint_one_in)) { - Status s = TestCheckpoint(thread, rand_column_families, rand_keys); - if (!s.ok()) { - VerificationAbort(shared, "Checkpoint gave inconsistent state", s); - } - } - - if (thread->rand.OneInOpt(FLAGS_approximate_size_one_in)) { - Status s = - TestApproximateSize(thread, i, rand_column_families, rand_keys); - if (!s.ok()) { - VerificationAbort(shared, "ApproximateSize Failed", s); - } - } - if (thread->rand.OneInOpt(FLAGS_acquire_snapshot_one_in)) { - TestAcquireSnapshot(thread, rand_column_family, keystr, i); - } - - /*always*/ { - Status s = MaybeReleaseSnapshots(thread, i); - if (!s.ok()) { - VerificationAbort(shared, "Snapshot gave inconsistent state", s); - } - } - - // Assign timestamps if necessary. - std::string read_ts_str; - Slice read_ts; - if (FLAGS_user_timestamp_size > 0) { - read_ts_str = GetNowNanos(); - read_ts = read_ts_str; - read_opts.timestamp = &read_ts; - } - - int prob_op = thread->rand.Uniform(100); - // Reset this in case we pick something other than a read op. We don't - // want to use a stale value when deciding at the beginning of the loop - // whether to vote to reopen - if (prob_op >= 0 && prob_op < static_cast(FLAGS_readpercent)) { - assert(0 <= prob_op); - // OPERATION read - if (FLAGS_use_get_entity) { - TestGetEntity(thread, read_opts, rand_column_families, rand_keys); - } else if (FLAGS_use_multiget) { - // Leave room for one more iteration of the loop with a single key - // batch. This is to ensure that each thread does exactly the same - // number of ops - int multiget_batch_size = static_cast( - std::min(static_cast(thread->rand.Uniform(64)), - FLAGS_ops_per_thread - i - 1)); - // If its the last iteration, ensure that multiget_batch_size is 1 - multiget_batch_size = std::max(multiget_batch_size, 1); - rand_keys = GenerateNKeys(thread, multiget_batch_size, i); - TestMultiGet(thread, read_opts, rand_column_families, rand_keys); - i += multiget_batch_size - 1; - } else { - TestGet(thread, read_opts, rand_column_families, rand_keys); - } - } else if (prob_op < prefix_bound) { - assert(static_cast(FLAGS_readpercent) <= prob_op); - // OPERATION prefix scan - // keys are 8 bytes long, prefix size is FLAGS_prefix_size. There are - // (8 - FLAGS_prefix_size) bytes besides the prefix. So there will - // be 2 ^ ((8 - FLAGS_prefix_size) * 8) possible keys with the same - // prefix - TestPrefixScan(thread, read_opts, rand_column_families, rand_keys); - } else if (prob_op < write_bound) { - assert(prefix_bound <= prob_op); - // OPERATION write - TestPut(thread, write_opts, read_opts, rand_column_families, rand_keys, - value); - } else if (prob_op < del_bound) { - assert(write_bound <= prob_op); - // OPERATION delete - TestDelete(thread, write_opts, rand_column_families, rand_keys); - } else if (prob_op < delrange_bound) { - assert(del_bound <= prob_op); - // OPERATION delete range - TestDeleteRange(thread, write_opts, rand_column_families, rand_keys); - } else if (prob_op < iterate_bound) { - assert(delrange_bound <= prob_op); - // OPERATION iterate - if (!FLAGS_skip_verifydb && - thread->rand.OneInOpt( - FLAGS_verify_iterator_with_expected_state_one_in)) { - TestIterateAgainstExpected(thread, read_opts, rand_column_families, - rand_keys); - } else { - int num_seeks = static_cast(std::min( - std::max(static_cast(thread->rand.Uniform(4)), - static_cast(1)), - std::max(static_cast(FLAGS_ops_per_thread - i - 1), - static_cast(1)))); - rand_keys = GenerateNKeys(thread, num_seeks, i); - i += num_seeks - 1; - TestIterate(thread, read_opts, rand_column_families, rand_keys); - } - } else { - assert(iterate_bound <= prob_op); - TestCustomOperations(thread, rand_column_families); - } - thread->stats.FinishedSingleOp(); - } - } - while (!thread->snapshot_queue.empty()) { - db_->ReleaseSnapshot(thread->snapshot_queue.front().second.snapshot); - delete thread->snapshot_queue.front().second.key_vec; - thread->snapshot_queue.pop(); - } - - thread->stats.Stop(); -} - -// Generated a list of keys that close to boundaries of SST keys. -// If there isn't any SST file in the DB, return empty list. -std::vector StressTest::GetWhiteBoxKeys(ThreadState* thread, - DB* db, - ColumnFamilyHandle* cfh, - size_t num_keys) { - ColumnFamilyMetaData cfmd; - db->GetColumnFamilyMetaData(cfh, &cfmd); - std::vector boundaries; - for (const LevelMetaData& lmd : cfmd.levels) { - for (const SstFileMetaData& sfmd : lmd.files) { - // If FLAGS_user_timestamp_size > 0, then both smallestkey and largestkey - // have timestamps. - const auto& skey = sfmd.smallestkey; - const auto& lkey = sfmd.largestkey; - assert(skey.size() >= FLAGS_user_timestamp_size); - assert(lkey.size() >= FLAGS_user_timestamp_size); - boundaries.push_back( - skey.substr(0, skey.size() - FLAGS_user_timestamp_size)); - boundaries.push_back( - lkey.substr(0, lkey.size() - FLAGS_user_timestamp_size)); - } - } - if (boundaries.empty()) { - return {}; - } - - std::vector ret; - for (size_t j = 0; j < num_keys; j++) { - std::string k = - boundaries[thread->rand.Uniform(static_cast(boundaries.size()))]; - if (thread->rand.OneIn(3)) { - // Reduce one byte from the string - for (int i = static_cast(k.length()) - 1; i >= 0; i--) { - uint8_t cur = k[i]; - if (cur > 0) { - k[i] = static_cast(cur - 1); - break; - } else if (i > 0) { - k[i] = 0xFFu; - } - } - } else if (thread->rand.OneIn(2)) { - // Add one byte to the string - for (int i = static_cast(k.length()) - 1; i >= 0; i--) { - uint8_t cur = k[i]; - if (cur < 255) { - k[i] = static_cast(cur + 1); - break; - } else if (i > 0) { - k[i] = 0x00; - } - } - } - ret.push_back(k); - } - return ret; -} - -// Given a key K, this creates an iterator which scans to K and then -// does a random sequence of Next/Prev operations. -Status StressTest::TestIterate(ThreadState* thread, - const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) { - assert(!rand_column_families.empty()); - assert(!rand_keys.empty()); - - ManagedSnapshot snapshot_guard(db_); - - ReadOptions ro = read_opts; - ro.snapshot = snapshot_guard.snapshot(); - - std::string read_ts_str; - Slice read_ts_slice; - MaybeUseOlderTimestampForRangeScan(thread, read_ts_str, read_ts_slice, ro); - - bool expect_total_order = false; - if (thread->rand.OneIn(16)) { - // When prefix extractor is used, it's useful to cover total order seek. - ro.total_order_seek = true; - expect_total_order = true; - } else if (thread->rand.OneIn(4)) { - ro.total_order_seek = false; - ro.auto_prefix_mode = true; - expect_total_order = true; - } else if (options_.prefix_extractor.get() == nullptr) { - expect_total_order = true; - } - - std::string upper_bound_str; - Slice upper_bound; - if (thread->rand.OneIn(16)) { - // With a 1/16 chance, set an iterator upper bound. - // Note: upper_bound can be smaller than the seek key. - const int64_t rand_upper_key = GenerateOneKey(thread, FLAGS_ops_per_thread); - upper_bound_str = Key(rand_upper_key); - upper_bound = Slice(upper_bound_str); - ro.iterate_upper_bound = &upper_bound; - } - std::string lower_bound_str; - Slice lower_bound; - if (thread->rand.OneIn(16)) { - // With a 1/16 chance, enable iterator lower bound. - // Note: lower_bound can be greater than the seek key. - const int64_t rand_lower_key = GenerateOneKey(thread, FLAGS_ops_per_thread); - lower_bound_str = Key(rand_lower_key); - lower_bound = Slice(lower_bound_str); - ro.iterate_lower_bound = &lower_bound; - } - - ColumnFamilyHandle* const cfh = column_families_[rand_column_families[0]]; - assert(cfh); - - std::unique_ptr iter(db_->NewIterator(ro, cfh)); - - std::vector key_strs; - if (thread->rand.OneIn(16)) { - // Generate keys close to lower or upper bound of SST files. - key_strs = GetWhiteBoxKeys(thread, db_, cfh, rand_keys.size()); - } - if (key_strs.empty()) { - // Use the random keys passed in. - for (int64_t rkey : rand_keys) { - key_strs.push_back(Key(rkey)); - } - } - - std::string op_logs; - constexpr size_t kOpLogsLimit = 10000; - - for (const std::string& key_str : key_strs) { - if (op_logs.size() > kOpLogsLimit) { - // Shouldn't take too much memory for the history log. Clear it. - op_logs = "(cleared...)\n"; - } - - if (ro.iterate_upper_bound != nullptr && thread->rand.OneIn(2)) { - // With a 1/2 chance, change the upper bound. - // It is possible that it is changed before first use, but there is no - // problem with that. - const int64_t rand_upper_key = - GenerateOneKey(thread, FLAGS_ops_per_thread); - upper_bound_str = Key(rand_upper_key); - upper_bound = Slice(upper_bound_str); - } - if (ro.iterate_lower_bound != nullptr && thread->rand.OneIn(4)) { - // With a 1/4 chance, change the lower bound. - // It is possible that it is changed before first use, but there is no - // problem with that. - const int64_t rand_lower_key = - GenerateOneKey(thread, FLAGS_ops_per_thread); - lower_bound_str = Key(rand_lower_key); - lower_bound = Slice(lower_bound_str); - } - - // Record some options to op_logs - op_logs += "total_order_seek: "; - op_logs += (ro.total_order_seek ? "1 " : "0 "); - op_logs += "auto_prefix_mode: "; - op_logs += (ro.auto_prefix_mode ? "1 " : "0 "); - if (ro.iterate_upper_bound != nullptr) { - op_logs += "ub: " + upper_bound.ToString(true) + " "; - } - if (ro.iterate_lower_bound != nullptr) { - op_logs += "lb: " + lower_bound.ToString(true) + " "; - } - - // Set up an iterator, perform the same operations without bounds and with - // total order seek, and compare the results. This is to identify bugs - // related to bounds, prefix extractor, or reseeking. Sometimes we are - // comparing iterators with the same set-up, and it doesn't hurt to check - // them to be equal. - // - // This `ReadOptions` is for validation purposes. Ignore - // `FLAGS_rate_limit_user_ops` to avoid slowing any validation. - ReadOptions cmp_ro; - cmp_ro.timestamp = ro.timestamp; - cmp_ro.iter_start_ts = ro.iter_start_ts; - cmp_ro.snapshot = snapshot_guard.snapshot(); - cmp_ro.total_order_seek = true; - - ColumnFamilyHandle* const cmp_cfh = - GetControlCfh(thread, rand_column_families[0]); - assert(cmp_cfh); - - std::unique_ptr cmp_iter(db_->NewIterator(cmp_ro, cmp_cfh)); - - bool diverged = false; - - Slice key(key_str); - - const bool support_seek_first_or_last = expect_total_order; - - LastIterateOp last_op; - if (support_seek_first_or_last && thread->rand.OneIn(100)) { - iter->SeekToFirst(); - cmp_iter->SeekToFirst(); - last_op = kLastOpSeekToFirst; - op_logs += "STF "; - } else if (support_seek_first_or_last && thread->rand.OneIn(100)) { - iter->SeekToLast(); - cmp_iter->SeekToLast(); - last_op = kLastOpSeekToLast; - op_logs += "STL "; - } else if (thread->rand.OneIn(8)) { - iter->SeekForPrev(key); - cmp_iter->SeekForPrev(key); - last_op = kLastOpSeekForPrev; - op_logs += "SFP " + key.ToString(true) + " "; - } else { - iter->Seek(key); - cmp_iter->Seek(key); - last_op = kLastOpSeek; - op_logs += "S " + key.ToString(true) + " "; - } - - VerifyIterator(thread, cmp_cfh, ro, iter.get(), cmp_iter.get(), last_op, - key, op_logs, &diverged); - - const bool no_reverse = - (FLAGS_memtablerep == "prefix_hash" && !expect_total_order); - for (uint64_t i = 0; i < FLAGS_num_iterations && iter->Valid(); ++i) { - if (no_reverse || thread->rand.OneIn(2)) { - iter->Next(); - if (!diverged) { - assert(cmp_iter->Valid()); - cmp_iter->Next(); - } - op_logs += "N"; - } else { - iter->Prev(); - if (!diverged) { - assert(cmp_iter->Valid()); - cmp_iter->Prev(); - } - op_logs += "P"; - } - - last_op = kLastOpNextOrPrev; - - VerifyIterator(thread, cmp_cfh, ro, iter.get(), cmp_iter.get(), last_op, - key, op_logs, &diverged); - } - - thread->stats.AddIterations(1); - - op_logs += "; "; - } - - return Status::OK(); -} - -// Test the return status of GetLiveFiles. -Status StressTest::VerifyGetLiveFiles() const { - std::vector live_file; - uint64_t manifest_size = 0; - return db_->GetLiveFiles(live_file, &manifest_size); -} - -// Test the return status of GetSortedWalFiles. -Status StressTest::VerifyGetSortedWalFiles() const { - VectorLogPtr log_ptr; - return db_->GetSortedWalFiles(log_ptr); -} - -// Test the return status of GetCurrentWalFile. -Status StressTest::VerifyGetCurrentWalFile() const { - std::unique_ptr cur_wal_file; - return db_->GetCurrentWalFile(&cur_wal_file); -} - -// Compare the two iterator, iter and cmp_iter are in the same position, -// unless iter might be made invalidate or undefined because of -// upper or lower bounds, or prefix extractor. -// Will flag failure if the verification fails. -// diverged = true if the two iterator is already diverged. -// True if verification passed, false if not. -void StressTest::VerifyIterator(ThreadState* thread, - ColumnFamilyHandle* cmp_cfh, - const ReadOptions& ro, Iterator* iter, - Iterator* cmp_iter, LastIterateOp op, - const Slice& seek_key, - const std::string& op_logs, bool* diverged) { - assert(diverged); - - if (*diverged) { - return; - } - - if (ro.iter_start_ts != nullptr) { - assert(FLAGS_user_timestamp_size > 0); - // We currently do not verify iterator when dumping history of internal - // keys. - *diverged = true; - return; - } - - if (op == kLastOpSeekToFirst && ro.iterate_lower_bound != nullptr) { - // SeekToFirst() with lower bound is not well defined. - *diverged = true; - return; - } else if (op == kLastOpSeekToLast && ro.iterate_upper_bound != nullptr) { - // SeekToLast() with higher bound is not well defined. - *diverged = true; - return; - } else if (op == kLastOpSeek && ro.iterate_lower_bound != nullptr && - (options_.comparator->CompareWithoutTimestamp( - *ro.iterate_lower_bound, /*a_has_ts=*/false, seek_key, - /*b_has_ts=*/false) >= 0 || - (ro.iterate_upper_bound != nullptr && - options_.comparator->CompareWithoutTimestamp( - *ro.iterate_lower_bound, /*a_has_ts=*/false, - *ro.iterate_upper_bound, /*b_has_ts*/ false) >= 0))) { - // Lower bound behavior is not well defined if it is larger than - // seek key or upper bound. Disable the check for now. - *diverged = true; - return; - } else if (op == kLastOpSeekForPrev && ro.iterate_upper_bound != nullptr && - (options_.comparator->CompareWithoutTimestamp( - *ro.iterate_upper_bound, /*a_has_ts=*/false, seek_key, - /*b_has_ts=*/false) <= 0 || - (ro.iterate_lower_bound != nullptr && - options_.comparator->CompareWithoutTimestamp( - *ro.iterate_lower_bound, /*a_has_ts=*/false, - *ro.iterate_upper_bound, /*b_has_ts=*/false) >= 0))) { - // Uppder bound behavior is not well defined if it is smaller than - // seek key or lower bound. Disable the check for now. - *diverged = true; - return; - } - - const SliceTransform* pe = (ro.total_order_seek || ro.auto_prefix_mode) - ? nullptr - : options_.prefix_extractor.get(); - const Comparator* cmp = options_.comparator; - - if (iter->Valid() && !cmp_iter->Valid()) { - if (pe != nullptr) { - if (!pe->InDomain(seek_key)) { - // Prefix seek a non-in-domain key is undefined. Skip checking for - // this scenario. - *diverged = true; - return; - } else if (!pe->InDomain(iter->key())) { - // out of range is iterator key is not in domain anymore. - *diverged = true; - return; - } else if (pe->Transform(iter->key()) != pe->Transform(seek_key)) { - *diverged = true; - return; - } - } - fprintf(stderr, - "Control interator is invalid but iterator has key %s " - "%s\n", - iter->key().ToString(true).c_str(), op_logs.c_str()); - - *diverged = true; - } else if (cmp_iter->Valid()) { - // Iterator is not valid. It can be legimate if it has already been - // out of upper or lower bound, or filtered out by prefix iterator. - const Slice& total_order_key = cmp_iter->key(); - - if (pe != nullptr) { - if (!pe->InDomain(seek_key)) { - // Prefix seek a non-in-domain key is undefined. Skip checking for - // this scenario. - *diverged = true; - return; - } - - if (!pe->InDomain(total_order_key) || - pe->Transform(total_order_key) != pe->Transform(seek_key)) { - // If the prefix is exhausted, the only thing needs to check - // is the iterator isn't return a position in prefix. - // Either way, checking can stop from here. - *diverged = true; - if (!iter->Valid() || !pe->InDomain(iter->key()) || - pe->Transform(iter->key()) != pe->Transform(seek_key)) { - return; - } - fprintf(stderr, - "Iterator stays in prefix but contol doesn't" - " iterator key %s control iterator key %s %s\n", - iter->key().ToString(true).c_str(), - cmp_iter->key().ToString(true).c_str(), op_logs.c_str()); - } - } - // Check upper or lower bounds. - if (!*diverged) { - if ((iter->Valid() && iter->key() != cmp_iter->key()) || - (!iter->Valid() && - (ro.iterate_upper_bound == nullptr || - cmp->CompareWithoutTimestamp(total_order_key, /*a_has_ts=*/false, - *ro.iterate_upper_bound, - /*b_has_ts=*/false) < 0) && - (ro.iterate_lower_bound == nullptr || - cmp->CompareWithoutTimestamp(total_order_key, /*a_has_ts=*/false, - *ro.iterate_lower_bound, - /*b_has_ts=*/false) > 0))) { - fprintf(stderr, - "Iterator diverged from control iterator which" - " has value %s %s\n", - total_order_key.ToString(true).c_str(), op_logs.c_str()); - if (iter->Valid()) { - fprintf(stderr, "iterator has value %s\n", - iter->key().ToString(true).c_str()); - } else { - fprintf(stderr, "iterator is not valid\n"); - } - *diverged = true; - } - } - } - - if (!*diverged && iter->Valid()) { - if (!VerifyWideColumns(iter->value(), iter->columns())) { - fprintf(stderr, - "Value and columns inconsistent for iterator: value: %s, " - "columns: %s\n", - iter->value().ToString(/* hex */ true).c_str(), - WideColumnsToHex(iter->columns()).c_str()); - - *diverged = true; - } - } - - if (*diverged) { - fprintf(stderr, "Control CF %s\n", cmp_cfh->GetName().c_str()); - thread->stats.AddErrors(1); - // Fail fast to preserve the DB state. - thread->shared->SetVerificationFailure(); - } -} - -Status StressTest::TestBackupRestore( - ThreadState* thread, const std::vector& rand_column_families, - const std::vector& rand_keys) { - std::vector> locks; - if (ShouldAcquireMutexOnKey()) { - for (int rand_column_family : rand_column_families) { - // `rand_keys[0]` on each chosen CF will be verified. - locks.emplace_back(new MutexLock( - thread->shared->GetMutexForKey(rand_column_family, rand_keys[0]))); - } - } - - const std::string backup_dir = - FLAGS_db + "/.backup" + std::to_string(thread->tid); - const std::string restore_dir = - FLAGS_db + "/.restore" + std::to_string(thread->tid); - BackupEngineOptions backup_opts(backup_dir); - // For debugging, get info_log from live options - backup_opts.info_log = db_->GetDBOptions().info_log.get(); - if (thread->rand.OneIn(10)) { - backup_opts.share_table_files = false; - } else { - backup_opts.share_table_files = true; - if (thread->rand.OneIn(5)) { - backup_opts.share_files_with_checksum = false; - } else { - backup_opts.share_files_with_checksum = true; - if (thread->rand.OneIn(2)) { - // old - backup_opts.share_files_with_checksum_naming = - BackupEngineOptions::kLegacyCrc32cAndFileSize; - } else { - // new - backup_opts.share_files_with_checksum_naming = - BackupEngineOptions::kUseDbSessionId; - } - if (thread->rand.OneIn(2)) { - backup_opts.share_files_with_checksum_naming = - backup_opts.share_files_with_checksum_naming | - BackupEngineOptions::kFlagIncludeFileSize; - } - } - } - if (thread->rand.OneIn(2)) { - backup_opts.schema_version = 1; - } else { - backup_opts.schema_version = 2; - } - BackupEngine* backup_engine = nullptr; - std::string from = "a backup/restore operation"; - Status s = BackupEngine::Open(db_stress_env, backup_opts, &backup_engine); - if (!s.ok()) { - from = "BackupEngine::Open"; - } - if (s.ok()) { - if (backup_opts.schema_version >= 2 && thread->rand.OneIn(2)) { - TEST_BackupMetaSchemaOptions test_opts; - test_opts.crc32c_checksums = thread->rand.OneIn(2) == 0; - test_opts.file_sizes = thread->rand.OneIn(2) == 0; - TEST_SetBackupMetaSchemaOptions(backup_engine, test_opts); - } - CreateBackupOptions create_opts; - if (FLAGS_disable_wal) { - // The verification can only work when latest value of `key` is backed up, - // which requires flushing in case of WAL disabled. - // - // Note this triggers a flush with a key lock held. Meanwhile, operations - // like flush/compaction may attempt to grab key locks like in - // `DbStressCompactionFilter`. The philosophy around preventing deadlock - // is the background operation key lock acquisition only tries but does - // not wait for the lock. So here in the foreground it is OK to hold the - // lock and wait on a background operation (flush). - create_opts.flush_before_backup = true; - } - s = backup_engine->CreateNewBackup(create_opts, db_); - if (!s.ok()) { - from = "BackupEngine::CreateNewBackup"; - } - } - if (s.ok()) { - delete backup_engine; - backup_engine = nullptr; - s = BackupEngine::Open(db_stress_env, backup_opts, &backup_engine); - if (!s.ok()) { - from = "BackupEngine::Open (again)"; - } - } - std::vector backup_info; - // If inplace_not_restore, we verify the backup by opening it as a - // read-only DB. If !inplace_not_restore, we restore it to a temporary - // directory for verification. - bool inplace_not_restore = thread->rand.OneIn(3); - if (s.ok()) { - backup_engine->GetBackupInfo(&backup_info, - /*include_file_details*/ inplace_not_restore); - if (backup_info.empty()) { - s = Status::NotFound("no backups found"); - from = "BackupEngine::GetBackupInfo"; - } - } - if (s.ok() && thread->rand.OneIn(2)) { - s = backup_engine->VerifyBackup( - backup_info.front().backup_id, - thread->rand.OneIn(2) /* verify_with_checksum */); - if (!s.ok()) { - from = "BackupEngine::VerifyBackup"; - } - } - const bool allow_persistent = thread->tid == 0; // not too many - bool from_latest = false; - int count = static_cast(backup_info.size()); - if (s.ok() && !inplace_not_restore) { - if (count > 1) { - s = backup_engine->RestoreDBFromBackup( - RestoreOptions(), backup_info[thread->rand.Uniform(count)].backup_id, - restore_dir /* db_dir */, restore_dir /* wal_dir */); - if (!s.ok()) { - from = "BackupEngine::RestoreDBFromBackup"; - } - } else { - from_latest = true; - s = backup_engine->RestoreDBFromLatestBackup(RestoreOptions(), - restore_dir /* db_dir */, - restore_dir /* wal_dir */); - if (!s.ok()) { - from = "BackupEngine::RestoreDBFromLatestBackup"; - } - } - } - if (s.ok() && !inplace_not_restore) { - // Purge early if restoring, to ensure the restored directory doesn't - // have some secret dependency on the backup directory. - uint32_t to_keep = 0; - if (allow_persistent) { - // allow one thread to keep up to 2 backups - to_keep = thread->rand.Uniform(3); - } - s = backup_engine->PurgeOldBackups(to_keep); - if (!s.ok()) { - from = "BackupEngine::PurgeOldBackups"; - } - } - DB* restored_db = nullptr; - std::vector restored_cf_handles; - // Not yet implemented: opening restored BlobDB or TransactionDB - if (s.ok() && !FLAGS_use_txn && !FLAGS_use_blob_db) { - Options restore_options(options_); - restore_options.best_efforts_recovery = false; - restore_options.listeners.clear(); - // Avoid dangling/shared file descriptors, for reliable destroy - restore_options.sst_file_manager = nullptr; - std::vector cf_descriptors; - // TODO(ajkr): `column_family_names_` is not safe to access here when - // `clear_column_family_one_in != 0`. But we can't easily switch to - // `ListColumnFamilies` to get names because it won't necessarily give - // the same order as `column_family_names_`. - assert(FLAGS_clear_column_family_one_in == 0); - for (auto name : column_family_names_) { - cf_descriptors.emplace_back(name, ColumnFamilyOptions(restore_options)); - } - if (inplace_not_restore) { - BackupInfo& info = backup_info[thread->rand.Uniform(count)]; - restore_options.env = info.env_for_open.get(); - s = DB::OpenForReadOnly(DBOptions(restore_options), info.name_for_open, - cf_descriptors, &restored_cf_handles, - &restored_db); - if (!s.ok()) { - from = "DB::OpenForReadOnly in backup/restore"; - } - } else { - s = DB::Open(DBOptions(restore_options), restore_dir, cf_descriptors, - &restored_cf_handles, &restored_db); - if (!s.ok()) { - from = "DB::Open in backup/restore"; - } - } - } - // Note the column families chosen by `rand_column_families` cannot be - // dropped while the locks for `rand_keys` are held. So we should not have - // to worry about accessing those column families throughout this function. - // - // For simplicity, currently only verifies existence/non-existence of a - // single key - for (size_t i = 0; restored_db && s.ok() && i < rand_column_families.size(); - ++i) { - std::string key_str = Key(rand_keys[0]); - Slice key = key_str; - std::string restored_value; - // This `ReadOptions` is for validation purposes. Ignore - // `FLAGS_rate_limit_user_ops` to avoid slowing any validation. - ReadOptions read_opts; - std::string ts_str; - Slice ts; - if (FLAGS_user_timestamp_size > 0) { - ts_str = GetNowNanos(); - ts = ts_str; - read_opts.timestamp = &ts; - } - Status get_status = restored_db->Get( - read_opts, restored_cf_handles[rand_column_families[i]], key, - &restored_value); - bool exists = thread->shared->Exists(rand_column_families[i], rand_keys[0]); - if (get_status.ok()) { - if (!exists && from_latest && ShouldAcquireMutexOnKey()) { - std::ostringstream oss; - oss << "0x" << key.ToString(true) - << " exists in restore but not in original db"; - s = Status::Corruption(oss.str()); - } - } else if (get_status.IsNotFound()) { - if (exists && from_latest && ShouldAcquireMutexOnKey()) { - std::ostringstream oss; - oss << "0x" << key.ToString(true) - << " exists in original db but not in restore"; - s = Status::Corruption(oss.str()); - } - } else { - s = get_status; - if (!s.ok()) { - from = "DB::Get in backup/restore"; - } - } - } - if (restored_db != nullptr) { - for (auto* cf_handle : restored_cf_handles) { - restored_db->DestroyColumnFamilyHandle(cf_handle); - } - delete restored_db; - restored_db = nullptr; - } - if (s.ok() && inplace_not_restore) { - // Purge late if inplace open read-only - uint32_t to_keep = 0; - if (allow_persistent) { - // allow one thread to keep up to 2 backups - to_keep = thread->rand.Uniform(3); - } - s = backup_engine->PurgeOldBackups(to_keep); - if (!s.ok()) { - from = "BackupEngine::PurgeOldBackups"; - } - } - if (backup_engine != nullptr) { - delete backup_engine; - backup_engine = nullptr; - } - if (s.ok()) { - // Preserve directories on failure, or allowed persistent backup - if (!allow_persistent) { - s = DestroyDir(db_stress_env, backup_dir); - if (!s.ok()) { - from = "Destroy backup dir"; - } - } - } - if (s.ok()) { - s = DestroyDir(db_stress_env, restore_dir); - if (!s.ok()) { - from = "Destroy restore dir"; - } - } - if (!s.ok()) { - fprintf(stderr, "Failure in %s with: %s\n", from.c_str(), - s.ToString().c_str()); - } - return s; -} - -Status StressTest::TestApproximateSize( - ThreadState* thread, uint64_t iteration, - const std::vector& rand_column_families, - const std::vector& rand_keys) { - // rand_keys likely only has one key. Just use the first one. - assert(!rand_keys.empty()); - assert(!rand_column_families.empty()); - int64_t key1 = rand_keys[0]; - int64_t key2; - if (thread->rand.OneIn(2)) { - // Two totally random keys. This tends to cover large ranges. - key2 = GenerateOneKey(thread, iteration); - if (key2 < key1) { - std::swap(key1, key2); - } - } else { - // Unless users pass a very large FLAGS_max_key, it we should not worry - // about overflow. It is for testing, so we skip the overflow checking - // for simplicity. - key2 = key1 + static_cast(thread->rand.Uniform(1000)); - } - std::string key1_str = Key(key1); - std::string key2_str = Key(key2); - Range range{Slice(key1_str), Slice(key2_str)}; - SizeApproximationOptions sao; - sao.include_memtables = thread->rand.OneIn(2); - if (sao.include_memtables) { - sao.include_files = thread->rand.OneIn(2); - } - if (thread->rand.OneIn(2)) { - if (thread->rand.OneIn(2)) { - sao.files_size_error_margin = 0.0; - } else { - sao.files_size_error_margin = - static_cast(thread->rand.Uniform(3)); - } - } - uint64_t result; - return db_->GetApproximateSizes( - sao, column_families_[rand_column_families[0]], &range, 1, &result); -} - -Status StressTest::TestCheckpoint(ThreadState* thread, - const std::vector& rand_column_families, - const std::vector& rand_keys) { - std::vector> locks; - if (ShouldAcquireMutexOnKey()) { - for (int rand_column_family : rand_column_families) { - // `rand_keys[0]` on each chosen CF will be verified. - locks.emplace_back(new MutexLock( - thread->shared->GetMutexForKey(rand_column_family, rand_keys[0]))); - } - } - - std::string checkpoint_dir = - FLAGS_db + "/.checkpoint" + std::to_string(thread->tid); - Options tmp_opts(options_); - tmp_opts.listeners.clear(); - tmp_opts.env = db_stress_env; - - DestroyDB(checkpoint_dir, tmp_opts); - - if (db_stress_env->FileExists(checkpoint_dir).ok()) { - // If the directory might still exist, try to delete the files one by one. - // Likely a trash file is still there. - Status my_s = DestroyDir(db_stress_env, checkpoint_dir); - if (!my_s.ok()) { - fprintf(stderr, "Fail to destory directory before checkpoint: %s", - my_s.ToString().c_str()); - } - } - - Checkpoint* checkpoint = nullptr; - Status s = Checkpoint::Create(db_, &checkpoint); - if (s.ok()) { - s = checkpoint->CreateCheckpoint(checkpoint_dir); - if (!s.ok()) { - fprintf(stderr, "Fail to create checkpoint to %s\n", - checkpoint_dir.c_str()); - std::vector files; - Status my_s = db_stress_env->GetChildren(checkpoint_dir, &files); - if (my_s.ok()) { - for (const auto& f : files) { - fprintf(stderr, " %s\n", f.c_str()); - } - } else { - fprintf(stderr, "Fail to get files under the directory to %s\n", - my_s.ToString().c_str()); - } - } - } - delete checkpoint; - checkpoint = nullptr; - std::vector cf_handles; - DB* checkpoint_db = nullptr; - if (s.ok()) { - Options options(options_); - options.best_efforts_recovery = false; - options.listeners.clear(); - // Avoid race condition in trash handling after delete checkpoint_db - options.sst_file_manager.reset(); - std::vector cf_descs; - // TODO(ajkr): `column_family_names_` is not safe to access here when - // `clear_column_family_one_in != 0`. But we can't easily switch to - // `ListColumnFamilies` to get names because it won't necessarily give - // the same order as `column_family_names_`. - assert(FLAGS_clear_column_family_one_in == 0); - if (FLAGS_clear_column_family_one_in == 0) { - for (const auto& name : column_family_names_) { - cf_descs.emplace_back(name, ColumnFamilyOptions(options)); - } - s = DB::OpenForReadOnly(DBOptions(options), checkpoint_dir, cf_descs, - &cf_handles, &checkpoint_db); - } - } - if (checkpoint_db != nullptr) { - // Note the column families chosen by `rand_column_families` cannot be - // dropped while the locks for `rand_keys` are held. So we should not have - // to worry about accessing those column families throughout this function. - for (size_t i = 0; s.ok() && i < rand_column_families.size(); ++i) { - std::string key_str = Key(rand_keys[0]); - Slice key = key_str; - std::string ts_str; - Slice ts; - ReadOptions read_opts; - if (FLAGS_user_timestamp_size > 0) { - ts_str = GetNowNanos(); - ts = ts_str; - read_opts.timestamp = &ts; - } - std::string value; - Status get_status = checkpoint_db->Get( - read_opts, cf_handles[rand_column_families[i]], key, &value); - bool exists = - thread->shared->Exists(rand_column_families[i], rand_keys[0]); - if (get_status.ok()) { - if (!exists && ShouldAcquireMutexOnKey()) { - std::ostringstream oss; - oss << "0x" << key.ToString(true) << " exists in checkpoint " - << checkpoint_dir << " but not in original db"; - s = Status::Corruption(oss.str()); - } - } else if (get_status.IsNotFound()) { - if (exists && ShouldAcquireMutexOnKey()) { - std::ostringstream oss; - oss << "0x" << key.ToString(true) - << " exists in original db but not in checkpoint " - << checkpoint_dir; - s = Status::Corruption(oss.str()); - } - } else { - s = get_status; - } - } - for (auto cfh : cf_handles) { - delete cfh; - } - cf_handles.clear(); - delete checkpoint_db; - checkpoint_db = nullptr; - } - - if (!s.ok()) { - fprintf(stderr, "A checkpoint operation failed with: %s\n", - s.ToString().c_str()); - } else { - DestroyDB(checkpoint_dir, tmp_opts); - } - return s; -} - -void StressTest::TestGetProperty(ThreadState* thread) const { - std::unordered_set levelPropertyNames = { - DB::Properties::kAggregatedTablePropertiesAtLevel, - DB::Properties::kCompressionRatioAtLevelPrefix, - DB::Properties::kNumFilesAtLevelPrefix, - }; - std::unordered_set unknownPropertyNames = { - DB::Properties::kEstimateOldestKeyTime, - DB::Properties::kOptionsStatistics, - DB::Properties:: - kLiveSstFilesSizeAtTemperature, // similar to levelPropertyNames, it - // requires a number suffix - }; - unknownPropertyNames.insert(levelPropertyNames.begin(), - levelPropertyNames.end()); - - std::unordered_set blobCachePropertyNames = { - DB::Properties::kBlobCacheCapacity, - DB::Properties::kBlobCacheUsage, - DB::Properties::kBlobCachePinnedUsage, - }; - if (db_->GetOptions().blob_cache == nullptr) { - unknownPropertyNames.insert(blobCachePropertyNames.begin(), - blobCachePropertyNames.end()); - } - - std::string prop; - for (const auto& ppt_name_and_info : InternalStats::ppt_name_to_info) { - bool res = db_->GetProperty(ppt_name_and_info.first, &prop); - if (unknownPropertyNames.find(ppt_name_and_info.first) == - unknownPropertyNames.end()) { - if (!res) { - fprintf(stderr, "Failed to get DB property: %s\n", - ppt_name_and_info.first.c_str()); - thread->shared->SetVerificationFailure(); - } - if (ppt_name_and_info.second.handle_int != nullptr) { - uint64_t prop_int; - if (!db_->GetIntProperty(ppt_name_and_info.first, &prop_int)) { - fprintf(stderr, "Failed to get Int property: %s\n", - ppt_name_and_info.first.c_str()); - thread->shared->SetVerificationFailure(); - } - } - if (ppt_name_and_info.second.handle_map != nullptr) { - std::map prop_map; - if (!db_->GetMapProperty(ppt_name_and_info.first, &prop_map)) { - fprintf(stderr, "Failed to get Map property: %s\n", - ppt_name_and_info.first.c_str()); - thread->shared->SetVerificationFailure(); - } - } - } - } - - ROCKSDB_NAMESPACE::ColumnFamilyMetaData cf_meta_data; - db_->GetColumnFamilyMetaData(&cf_meta_data); - int level_size = static_cast(cf_meta_data.levels.size()); - for (int level = 0; level < level_size; level++) { - for (const auto& ppt_name : levelPropertyNames) { - bool res = db_->GetProperty(ppt_name + std::to_string(level), &prop); - if (!res) { - fprintf(stderr, "Failed to get DB property: %s\n", - (ppt_name + std::to_string(level)).c_str()); - thread->shared->SetVerificationFailure(); - } - } - } - - // Test for an invalid property name - if (thread->rand.OneIn(100)) { - if (db_->GetProperty("rocksdb.invalid_property_name", &prop)) { - fprintf(stderr, "Failed to return false for invalid property name\n"); - thread->shared->SetVerificationFailure(); - } - } -} - -void StressTest::TestCompactFiles(ThreadState* thread, - ColumnFamilyHandle* column_family) { - ROCKSDB_NAMESPACE::ColumnFamilyMetaData cf_meta_data; - db_->GetColumnFamilyMetaData(column_family, &cf_meta_data); - - if (cf_meta_data.levels.empty()) { - return; - } - - // Randomly compact up to three consecutive files from a level - const int kMaxRetry = 3; - for (int attempt = 0; attempt < kMaxRetry; ++attempt) { - size_t random_level = - thread->rand.Uniform(static_cast(cf_meta_data.levels.size())); - - const auto& files = cf_meta_data.levels[random_level].files; - if (files.size() > 0) { - size_t random_file_index = - thread->rand.Uniform(static_cast(files.size())); - if (files[random_file_index].being_compacted) { - // Retry as the selected file is currently being compacted - continue; - } - - std::vector input_files; - input_files.push_back(files[random_file_index].name); - if (random_file_index > 0 && - !files[random_file_index - 1].being_compacted) { - input_files.push_back(files[random_file_index - 1].name); - } - if (random_file_index + 1 < files.size() && - !files[random_file_index + 1].being_compacted) { - input_files.push_back(files[random_file_index + 1].name); - } - - size_t output_level = - std::min(random_level + 1, cf_meta_data.levels.size() - 1); - auto s = db_->CompactFiles(CompactionOptions(), column_family, - input_files, static_cast(output_level)); - if (!s.ok()) { - fprintf(stdout, "Unable to perform CompactFiles(): %s\n", - s.ToString().c_str()); - thread->stats.AddNumCompactFilesFailed(1); - } else { - thread->stats.AddNumCompactFilesSucceed(1); - } - break; - } - } -} - -Status StressTest::TestFlush(const std::vector& rand_column_families) { - FlushOptions flush_opts; - if (FLAGS_atomic_flush) { - return db_->Flush(flush_opts, column_families_); - } - std::vector cfhs; - std::for_each(rand_column_families.begin(), rand_column_families.end(), - [this, &cfhs](int k) { cfhs.push_back(column_families_[k]); }); - return db_->Flush(flush_opts, cfhs); -} - -Status StressTest::TestPauseBackground(ThreadState* thread) { - Status status = db_->PauseBackgroundWork(); - if (!status.ok()) { - return status; - } - // To avoid stalling/deadlocking ourself in this thread, just - // sleep here during pause and let other threads do db operations. - // Sleep up to ~16 seconds (2**24 microseconds), but very skewed - // toward short pause. (1 chance in 25 of pausing >= 1s; - // 1 chance in 625 of pausing full 16s.) - int pwr2_micros = - std::min(thread->rand.Uniform(25), thread->rand.Uniform(25)); - clock_->SleepForMicroseconds(1 << pwr2_micros); - return db_->ContinueBackgroundWork(); -} - -void StressTest::TestAcquireSnapshot(ThreadState* thread, - int rand_column_family, - const std::string& keystr, uint64_t i) { - Slice key = keystr; - ColumnFamilyHandle* column_family = column_families_[rand_column_family]; - // This `ReadOptions` is for validation purposes. Ignore - // `FLAGS_rate_limit_user_ops` to avoid slowing any validation. - ReadOptions ropt; - auto db_impl = static_cast_with_check(db_->GetRootDB()); - const bool ww_snapshot = thread->rand.OneIn(10); - const Snapshot* snapshot = - ww_snapshot ? db_impl->GetSnapshotForWriteConflictBoundary() - : db_->GetSnapshot(); - ropt.snapshot = snapshot; - - // Ideally, we want snapshot taking and timestamp generation to be atomic - // here, so that the snapshot corresponds to the timestamp. However, it is - // not possible with current GetSnapshot() API. - std::string ts_str; - Slice ts; - if (FLAGS_user_timestamp_size > 0) { - ts_str = GetNowNanos(); - ts = ts_str; - ropt.timestamp = &ts; - } - - std::string value_at; - // When taking a snapshot, we also read a key from that snapshot. We - // will later read the same key before releasing the snapshot and - // verify that the results are the same. - auto status_at = db_->Get(ropt, column_family, key, &value_at); - std::vector* key_vec = nullptr; - - if (FLAGS_compare_full_db_state_snapshot && (thread->tid == 0)) { - key_vec = new std::vector(FLAGS_max_key); - // When `prefix_extractor` is set, seeking to beginning and scanning - // across prefixes are only supported with `total_order_seek` set. - ropt.total_order_seek = true; - std::unique_ptr iterator(db_->NewIterator(ropt)); - for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { - uint64_t key_val; - if (GetIntVal(iterator->key().ToString(), &key_val)) { - (*key_vec)[key_val] = true; - } - } - } - - ThreadState::SnapshotState snap_state = {snapshot, - rand_column_family, - column_family->GetName(), - keystr, - status_at, - value_at, - key_vec, - ts_str}; - uint64_t hold_for = FLAGS_snapshot_hold_ops; - if (FLAGS_long_running_snapshots) { - // Hold 10% of snapshots for 10x more - if (thread->rand.OneIn(10)) { - assert(hold_for < std::numeric_limits::max() / 10); - hold_for *= 10; - // Hold 1% of snapshots for 100x more - if (thread->rand.OneIn(10)) { - assert(hold_for < std::numeric_limits::max() / 10); - hold_for *= 10; - } - } - } - uint64_t release_at = std::min(FLAGS_ops_per_thread - 1, i + hold_for); - thread->snapshot_queue.emplace(release_at, snap_state); -} - -Status StressTest::MaybeReleaseSnapshots(ThreadState* thread, uint64_t i) { - while (!thread->snapshot_queue.empty() && - i >= thread->snapshot_queue.front().first) { - auto snap_state = thread->snapshot_queue.front().second; - assert(snap_state.snapshot); - // Note: this is unsafe as the cf might be dropped concurrently. But - // it is ok since unclean cf drop is cunnrently not supported by write - // prepared transactions. - Status s = AssertSame(db_, column_families_[snap_state.cf_at], snap_state); - db_->ReleaseSnapshot(snap_state.snapshot); - delete snap_state.key_vec; - thread->snapshot_queue.pop(); - if (!s.ok()) { - return s; - } - } - return Status::OK(); -} - -void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key, - const Slice& start_key, - ColumnFamilyHandle* column_family) { - int64_t end_key_num; - if (std::numeric_limits::max() - rand_key < - FLAGS_compact_range_width) { - end_key_num = std::numeric_limits::max(); - } else { - end_key_num = FLAGS_compact_range_width + rand_key; - } - std::string end_key_buf = Key(end_key_num); - Slice end_key(end_key_buf); - - CompactRangeOptions cro; - cro.exclusive_manual_compaction = static_cast(thread->rand.Next() % 2); - cro.change_level = static_cast(thread->rand.Next() % 2); - std::vector bottom_level_styles = { - BottommostLevelCompaction::kSkip, - BottommostLevelCompaction::kIfHaveCompactionFilter, - BottommostLevelCompaction::kForce, - BottommostLevelCompaction::kForceOptimized}; - cro.bottommost_level_compaction = - bottom_level_styles[thread->rand.Next() % - static_cast(bottom_level_styles.size())]; - cro.allow_write_stall = static_cast(thread->rand.Next() % 2); - cro.max_subcompactions = static_cast(thread->rand.Next() % 4); - std::vector blob_gc_policies = { - BlobGarbageCollectionPolicy::kForce, - BlobGarbageCollectionPolicy::kDisable, - BlobGarbageCollectionPolicy::kUseDefault}; - cro.blob_garbage_collection_policy = - blob_gc_policies[thread->rand.Next() % - static_cast(blob_gc_policies.size())]; - cro.blob_garbage_collection_age_cutoff = - static_cast(thread->rand.Next() % 100) / 100.0; - - const Snapshot* pre_snapshot = nullptr; - uint32_t pre_hash = 0; - if (thread->rand.OneIn(2)) { - // Do some validation by declaring a snapshot and compare the data before - // and after the compaction - pre_snapshot = db_->GetSnapshot(); - pre_hash = - GetRangeHash(thread, pre_snapshot, column_family, start_key, end_key); - } - - Status status = db_->CompactRange(cro, column_family, &start_key, &end_key); - - if (!status.ok()) { - fprintf(stdout, "Unable to perform CompactRange(): %s\n", - status.ToString().c_str()); - } - - if (pre_snapshot != nullptr) { - uint32_t post_hash = - GetRangeHash(thread, pre_snapshot, column_family, start_key, end_key); - if (pre_hash != post_hash) { - fprintf(stderr, - "Data hash different before and after compact range " - "start_key %s end_key %s\n", - start_key.ToString(true).c_str(), end_key.ToString(true).c_str()); - thread->stats.AddErrors(1); - // Fail fast to preserve the DB state. - thread->shared->SetVerificationFailure(); - } - db_->ReleaseSnapshot(pre_snapshot); - } -} - -uint32_t StressTest::GetRangeHash(ThreadState* thread, const Snapshot* snapshot, - ColumnFamilyHandle* column_family, - const Slice& start_key, - const Slice& end_key) { - // This `ReadOptions` is for validation purposes. Ignore - // `FLAGS_rate_limit_user_ops` to avoid slowing any validation. - ReadOptions ro; - ro.snapshot = snapshot; - ro.total_order_seek = true; - std::string ts_str; - Slice ts; - if (FLAGS_user_timestamp_size > 0) { - ts_str = GetNowNanos(); - ts = ts_str; - ro.timestamp = &ts; - } - - std::unique_ptr it(db_->NewIterator(ro, column_family)); - - constexpr char kCrcCalculatorSepearator = ';'; - - uint32_t crc = 0; - - for (it->Seek(start_key); - it->Valid() && options_.comparator->Compare(it->key(), end_key) <= 0; - it->Next()) { - crc = crc32c::Extend(crc, it->key().data(), it->key().size()); - crc = crc32c::Extend(crc, &kCrcCalculatorSepearator, sizeof(char)); - crc = crc32c::Extend(crc, it->value().data(), it->value().size()); - crc = crc32c::Extend(crc, &kCrcCalculatorSepearator, sizeof(char)); - - for (const auto& column : it->columns()) { - crc = crc32c::Extend(crc, column.name().data(), column.name().size()); - crc = crc32c::Extend(crc, &kCrcCalculatorSepearator, sizeof(char)); - crc = crc32c::Extend(crc, column.value().data(), column.value().size()); - crc = crc32c::Extend(crc, &kCrcCalculatorSepearator, sizeof(char)); - } - } - - if (!it->status().ok()) { - fprintf(stderr, "Iterator non-OK when calculating range CRC: %s\n", - it->status().ToString().c_str()); - thread->stats.AddErrors(1); - // Fail fast to preserve the DB state. - thread->shared->SetVerificationFailure(); - } - - return crc; -} - -void StressTest::PrintEnv() const { - fprintf(stdout, "RocksDB version : %d.%d\n", kMajorVersion, - kMinorVersion); - fprintf(stdout, "Format version : %d\n", FLAGS_format_version); - fprintf(stdout, "TransactionDB : %s\n", - FLAGS_use_txn ? "true" : "false"); - - if (FLAGS_use_txn) { - fprintf(stdout, "Two write queues: : %s\n", - FLAGS_two_write_queues ? "true" : "false"); - fprintf(stdout, "Write policy : %d\n", - static_cast(FLAGS_txn_write_policy)); - if (static_cast(TxnDBWritePolicy::WRITE_PREPARED) == - FLAGS_txn_write_policy || - static_cast(TxnDBWritePolicy::WRITE_UNPREPARED) == - FLAGS_txn_write_policy) { - fprintf(stdout, "Snapshot cache bits : %d\n", - static_cast(FLAGS_wp_snapshot_cache_bits)); - fprintf(stdout, "Commit cache bits : %d\n", - static_cast(FLAGS_wp_commit_cache_bits)); - } - fprintf(stdout, "last cwb for recovery : %s\n", - FLAGS_use_only_the_last_commit_time_batch_for_recovery ? "true" - : "false"); - } - - fprintf(stdout, "Stacked BlobDB : %s\n", - FLAGS_use_blob_db ? "true" : "false"); - fprintf(stdout, "Read only mode : %s\n", - FLAGS_read_only ? "true" : "false"); - fprintf(stdout, "Atomic flush : %s\n", - FLAGS_atomic_flush ? "true" : "false"); - fprintf(stdout, "Manual WAL flush : %s\n", - FLAGS_manual_wal_flush_one_in > 0 ? "true" : "false"); - fprintf(stdout, "Column families : %d\n", FLAGS_column_families); - if (!FLAGS_test_batches_snapshots) { - fprintf(stdout, "Clear CFs one in : %d\n", - FLAGS_clear_column_family_one_in); - } - fprintf(stdout, "Number of threads : %d\n", FLAGS_threads); - fprintf(stdout, "Ops per thread : %lu\n", - (unsigned long)FLAGS_ops_per_thread); - std::string ttl_state("unused"); - if (FLAGS_ttl > 0) { - ttl_state = std::to_string(FLAGS_ttl); - } - fprintf(stdout, "Time to live(sec) : %s\n", ttl_state.c_str()); - fprintf(stdout, "Read percentage : %d%%\n", FLAGS_readpercent); - fprintf(stdout, "Prefix percentage : %d%%\n", FLAGS_prefixpercent); - fprintf(stdout, "Write percentage : %d%%\n", FLAGS_writepercent); - fprintf(stdout, "Delete percentage : %d%%\n", FLAGS_delpercent); - fprintf(stdout, "Delete range percentage : %d%%\n", FLAGS_delrangepercent); - fprintf(stdout, "No overwrite percentage : %d%%\n", - FLAGS_nooverwritepercent); - fprintf(stdout, "Iterate percentage : %d%%\n", FLAGS_iterpercent); - fprintf(stdout, "Custom ops percentage : %d%%\n", FLAGS_customopspercent); - fprintf(stdout, "DB-write-buffer-size : %" PRIu64 "\n", - FLAGS_db_write_buffer_size); - fprintf(stdout, "Write-buffer-size : %d\n", FLAGS_write_buffer_size); - fprintf(stdout, "Iterations : %lu\n", - (unsigned long)FLAGS_num_iterations); - fprintf(stdout, "Max key : %lu\n", - (unsigned long)FLAGS_max_key); - fprintf(stdout, "Ratio #ops/#keys : %f\n", - (1.0 * FLAGS_ops_per_thread * FLAGS_threads) / FLAGS_max_key); - fprintf(stdout, "Num times DB reopens : %d\n", FLAGS_reopen); - fprintf(stdout, "Batches/snapshots : %d\n", - FLAGS_test_batches_snapshots); - fprintf(stdout, "Do update in place : %d\n", FLAGS_in_place_update); - fprintf(stdout, "Num keys per lock : %d\n", - 1 << FLAGS_log2_keys_per_lock); - std::string compression = CompressionTypeToString(compression_type_e); - fprintf(stdout, "Compression : %s\n", compression.c_str()); - std::string bottommost_compression = - CompressionTypeToString(bottommost_compression_type_e); - fprintf(stdout, "Bottommost Compression : %s\n", - bottommost_compression.c_str()); - std::string checksum = ChecksumTypeToString(checksum_type_e); - fprintf(stdout, "Checksum type : %s\n", checksum.c_str()); - fprintf(stdout, "File checksum impl : %s\n", - FLAGS_file_checksum_impl.c_str()); - fprintf(stdout, "Bloom bits / key : %s\n", - FormatDoubleParam(FLAGS_bloom_bits).c_str()); - fprintf(stdout, "Max subcompactions : %" PRIu64 "\n", - FLAGS_subcompactions); - fprintf(stdout, "Use MultiGet : %s\n", - FLAGS_use_multiget ? "true" : "false"); - fprintf(stdout, "Use GetEntity : %s\n", - FLAGS_use_get_entity ? "true" : "false"); - - const char* memtablerep = ""; - switch (FLAGS_rep_factory) { - case kSkipList: - memtablerep = "skip_list"; - break; - case kHashSkipList: - memtablerep = "prefix_hash"; - break; - case kVectorRep: - memtablerep = "vector"; - break; - } - - fprintf(stdout, "Memtablerep : %s\n", memtablerep); - -#ifndef NDEBUG - KillPoint* kp = KillPoint::GetInstance(); - fprintf(stdout, "Test kill odd : %d\n", kp->rocksdb_kill_odds); - if (!kp->rocksdb_kill_exclude_prefixes.empty()) { - fprintf(stdout, "Skipping kill points prefixes:\n"); - for (auto& p : kp->rocksdb_kill_exclude_prefixes) { - fprintf(stdout, " %s\n", p.c_str()); - } - } -#endif - fprintf(stdout, "Periodic Compaction Secs : %" PRIu64 "\n", - FLAGS_periodic_compaction_seconds); - fprintf(stdout, "Compaction TTL : %" PRIu64 "\n", - FLAGS_compaction_ttl); - const char* compaction_pri = ""; - switch (FLAGS_compaction_pri) { - case kByCompensatedSize: - compaction_pri = "kByCompensatedSize"; - break; - case kOldestLargestSeqFirst: - compaction_pri = "kOldestLargestSeqFirst"; - break; - case kOldestSmallestSeqFirst: - compaction_pri = "kOldestSmallestSeqFirst"; - break; - case kMinOverlappingRatio: - compaction_pri = "kMinOverlappingRatio"; - break; - case kRoundRobin: - compaction_pri = "kRoundRobin"; - break; - } - fprintf(stdout, "Compaction Pri : %s\n", compaction_pri); - fprintf(stdout, "Background Purge : %d\n", - static_cast(FLAGS_avoid_unnecessary_blocking_io)); - fprintf(stdout, "Write DB ID to manifest : %d\n", - static_cast(FLAGS_write_dbid_to_manifest)); - fprintf(stdout, "Max Write Batch Group Size: %" PRIu64 "\n", - FLAGS_max_write_batch_group_size_bytes); - fprintf(stdout, "Use dynamic level : %d\n", - static_cast(FLAGS_level_compaction_dynamic_level_bytes)); - fprintf(stdout, "Read fault one in : %d\n", FLAGS_read_fault_one_in); - fprintf(stdout, "Write fault one in : %d\n", FLAGS_write_fault_one_in); - fprintf(stdout, "Open metadata write fault one in:\n"); - fprintf(stdout, " %d\n", - FLAGS_open_metadata_write_fault_one_in); - fprintf(stdout, "Sync fault injection : %d\n", - FLAGS_sync_fault_injection); - fprintf(stdout, "Best efforts recovery : %d\n", - static_cast(FLAGS_best_efforts_recovery)); - fprintf(stdout, "Fail if OPTIONS file error: %d\n", - static_cast(FLAGS_fail_if_options_file_error)); - fprintf(stdout, "User timestamp size bytes : %d\n", - static_cast(FLAGS_user_timestamp_size)); - fprintf(stdout, "WAL compression : %s\n", - FLAGS_wal_compression.c_str()); - fprintf(stdout, "Try verify sst unique id : %d\n", - static_cast(FLAGS_verify_sst_unique_id_in_manifest)); - - fprintf(stdout, "------------------------------------------------\n"); -} - -void StressTest::Open(SharedState* shared) { - assert(db_ == nullptr); - assert(txn_db_ == nullptr); - if (!InitializeOptionsFromFile(options_)) { - InitializeOptionsFromFlags(cache_, filter_policy_, options_); - } - InitializeOptionsGeneral(cache_, filter_policy_, options_); - - if (FLAGS_prefix_size == 0 && FLAGS_rep_factory == kHashSkipList) { - fprintf(stderr, - "prefeix_size cannot be zero if memtablerep == prefix_hash\n"); - exit(1); - } - if (FLAGS_prefix_size != 0 && FLAGS_rep_factory != kHashSkipList) { - fprintf(stderr, - "WARNING: prefix_size is non-zero but " - "memtablerep != prefix_hash\n"); - } - - if ((options_.enable_blob_files || options_.enable_blob_garbage_collection || - FLAGS_allow_setting_blob_options_dynamically) && - FLAGS_best_efforts_recovery) { - fprintf(stderr, - "Integrated BlobDB is currently incompatible with best-effort " - "recovery\n"); - exit(1); - } - - fprintf(stdout, - "Integrated BlobDB: blob files enabled %d, min blob size %" PRIu64 - ", blob file size %" PRIu64 - ", blob compression type %s, blob GC enabled %d, cutoff %f, force " - "threshold %f, blob compaction readahead size %" PRIu64 - ", blob file starting level %d\n", - options_.enable_blob_files, options_.min_blob_size, - options_.blob_file_size, - CompressionTypeToString(options_.blob_compression_type).c_str(), - options_.enable_blob_garbage_collection, - options_.blob_garbage_collection_age_cutoff, - options_.blob_garbage_collection_force_threshold, - options_.blob_compaction_readahead_size, - options_.blob_file_starting_level); - - if (FLAGS_use_blob_cache) { - fprintf(stdout, - "Integrated BlobDB: blob cache enabled" - ", block and blob caches shared: %d", - FLAGS_use_shared_block_and_blob_cache); - if (!FLAGS_use_shared_block_and_blob_cache) { - fprintf(stdout, - ", blob cache size %" PRIu64 ", blob cache num shard bits: %d", - FLAGS_blob_cache_size, FLAGS_blob_cache_numshardbits); - } - fprintf(stdout, ", blob cache prepopulated: %d\n", - FLAGS_prepopulate_blob_cache); - } else { - fprintf(stdout, "Integrated BlobDB: blob cache disabled\n"); - } - - fprintf(stdout, "DB path: [%s]\n", FLAGS_db.c_str()); - - Status s; - - if (FLAGS_ttl == -1) { - std::vector existing_column_families; - s = DB::ListColumnFamilies(DBOptions(options_), FLAGS_db, - &existing_column_families); // ignore errors - if (!s.ok()) { - // DB doesn't exist - assert(existing_column_families.empty()); - assert(column_family_names_.empty()); - column_family_names_.push_back(kDefaultColumnFamilyName); - } else if (column_family_names_.empty()) { - // this is the first call to the function Open() - column_family_names_ = existing_column_families; - } else { - // this is a reopen. just assert that existing column_family_names are - // equivalent to what we remember - auto sorted_cfn = column_family_names_; - std::sort(sorted_cfn.begin(), sorted_cfn.end()); - std::sort(existing_column_families.begin(), - existing_column_families.end()); - if (sorted_cfn != existing_column_families) { - fprintf(stderr, "Expected column families differ from the existing:\n"); - fprintf(stderr, "Expected: {"); - for (auto cf : sorted_cfn) { - fprintf(stderr, "%s ", cf.c_str()); - } - fprintf(stderr, "}\n"); - fprintf(stderr, "Existing: {"); - for (auto cf : existing_column_families) { - fprintf(stderr, "%s ", cf.c_str()); - } - fprintf(stderr, "}\n"); - } - assert(sorted_cfn == existing_column_families); - } - std::vector cf_descriptors; - for (auto name : column_family_names_) { - if (name != kDefaultColumnFamilyName) { - new_column_family_name_ = - std::max(new_column_family_name_.load(), std::stoi(name) + 1); - } - cf_descriptors.emplace_back(name, ColumnFamilyOptions(options_)); - } - while (cf_descriptors.size() < (size_t)FLAGS_column_families) { - std::string name = std::to_string(new_column_family_name_.load()); - new_column_family_name_++; - cf_descriptors.emplace_back(name, ColumnFamilyOptions(options_)); - column_family_names_.push_back(name); - } - - options_.listeners.clear(); - options_.listeners.emplace_back(new DbStressListener( - FLAGS_db, options_.db_paths, cf_descriptors, db_stress_listener_env)); - RegisterAdditionalListeners(); - - if (!FLAGS_use_txn) { - // Determine whether we need to ingest file metadata write failures - // during DB reopen. If it does, enable it. - // Only ingest metadata error if it is reopening, as initial open - // failure doesn't need to be handled. - // TODO cover transaction DB is not covered in this fault test too. - bool ingest_meta_error = false; - bool ingest_write_error = false; - bool ingest_read_error = false; - if ((FLAGS_open_metadata_write_fault_one_in || - FLAGS_open_write_fault_one_in || FLAGS_open_read_fault_one_in) && - fault_fs_guard - ->FileExists(FLAGS_db + "/CURRENT", IOOptions(), nullptr) - .ok()) { - if (!FLAGS_sync) { - // When DB Stress is not sync mode, we expect all WAL writes to - // WAL is durable. Buffering unsynced writes will cause false - // positive in crash tests. Before we figure out a way to - // solve it, skip WAL from failure injection. - fault_fs_guard->SetSkipDirectWritableTypes({kWalFile}); - } - ingest_meta_error = FLAGS_open_metadata_write_fault_one_in; - ingest_write_error = FLAGS_open_write_fault_one_in; - ingest_read_error = FLAGS_open_read_fault_one_in; - if (ingest_meta_error) { - fault_fs_guard->EnableMetadataWriteErrorInjection(); - fault_fs_guard->SetRandomMetadataWriteError( - FLAGS_open_metadata_write_fault_one_in); - } - if (ingest_write_error) { - fault_fs_guard->SetFilesystemDirectWritable(false); - fault_fs_guard->EnableWriteErrorInjection(); - fault_fs_guard->SetRandomWriteError( - static_cast(FLAGS_seed), FLAGS_open_write_fault_one_in, - IOStatus::IOError("Injected Open Error"), - /*inject_for_all_file_types=*/true, /*types=*/{}); - } - if (ingest_read_error) { - fault_fs_guard->SetRandomReadError(FLAGS_open_read_fault_one_in); - } - } - while (true) { - // StackableDB-based BlobDB - if (FLAGS_use_blob_db) { - blob_db::BlobDBOptions blob_db_options; - blob_db_options.min_blob_size = FLAGS_blob_db_min_blob_size; - blob_db_options.bytes_per_sync = FLAGS_blob_db_bytes_per_sync; - blob_db_options.blob_file_size = FLAGS_blob_db_file_size; - blob_db_options.enable_garbage_collection = FLAGS_blob_db_enable_gc; - blob_db_options.garbage_collection_cutoff = FLAGS_blob_db_gc_cutoff; - - blob_db::BlobDB* blob_db = nullptr; - s = blob_db::BlobDB::Open(options_, blob_db_options, FLAGS_db, - cf_descriptors, &column_families_, - &blob_db); - if (s.ok()) { - db_ = blob_db; - } - } else - { - if (db_preload_finished_.load() && FLAGS_read_only) { - s = DB::OpenForReadOnly(DBOptions(options_), FLAGS_db, - cf_descriptors, &column_families_, &db_); - } else { - s = DB::Open(DBOptions(options_), FLAGS_db, cf_descriptors, - &column_families_, &db_); - } - } - - if (ingest_meta_error || ingest_write_error || ingest_read_error) { - fault_fs_guard->SetFilesystemDirectWritable(true); - fault_fs_guard->DisableMetadataWriteErrorInjection(); - fault_fs_guard->DisableWriteErrorInjection(); - fault_fs_guard->SetSkipDirectWritableTypes({}); - fault_fs_guard->SetRandomReadError(0); - if (s.ok()) { - // Ingested errors might happen in background compactions. We - // wait for all compactions to finish to make sure DB is in - // clean state before executing queries. - s = static_cast_with_check(db_->GetRootDB()) - ->WaitForCompact(true /* wait_unscheduled */); - if (!s.ok()) { - for (auto cf : column_families_) { - delete cf; - } - column_families_.clear(); - delete db_; - db_ = nullptr; - } - } - if (!s.ok()) { - // After failure to opening a DB due to IO error, retry should - // successfully open the DB with correct data if no IO error shows - // up. - ingest_meta_error = false; - ingest_write_error = false; - ingest_read_error = false; - - Random rand(static_cast(FLAGS_seed)); - if (rand.OneIn(2)) { - fault_fs_guard->DeleteFilesCreatedAfterLastDirSync(IOOptions(), - nullptr); - } - if (rand.OneIn(3)) { - fault_fs_guard->DropUnsyncedFileData(); - } else if (rand.OneIn(2)) { - fault_fs_guard->DropRandomUnsyncedFileData(&rand); - } - continue; - } - } - break; - } - } else { - TransactionDBOptions txn_db_options; - assert(FLAGS_txn_write_policy <= TxnDBWritePolicy::WRITE_UNPREPARED); - txn_db_options.write_policy = - static_cast(FLAGS_txn_write_policy); - if (FLAGS_unordered_write) { - assert(txn_db_options.write_policy == TxnDBWritePolicy::WRITE_PREPARED); - options_.unordered_write = true; - options_.two_write_queues = true; - txn_db_options.skip_concurrency_control = true; - } else { - options_.two_write_queues = FLAGS_two_write_queues; - } - txn_db_options.wp_snapshot_cache_bits = - static_cast(FLAGS_wp_snapshot_cache_bits); - txn_db_options.wp_commit_cache_bits = - static_cast(FLAGS_wp_commit_cache_bits); - PrepareTxnDbOptions(shared, txn_db_options); - s = TransactionDB::Open(options_, txn_db_options, FLAGS_db, - cf_descriptors, &column_families_, &txn_db_); - if (!s.ok()) { - fprintf(stderr, "Error in opening the TransactionDB [%s]\n", - s.ToString().c_str()); - fflush(stderr); - } - assert(s.ok()); - - // Do not swap the order of the following. - { - db_ = txn_db_; - db_aptr_.store(txn_db_, std::memory_order_release); - } - } - if (!s.ok()) { - fprintf(stderr, "Error in opening the DB [%s]\n", s.ToString().c_str()); - fflush(stderr); - } - assert(s.ok()); - assert(column_families_.size() == - static_cast(FLAGS_column_families)); - - // Secondary instance does not support write-prepared/write-unprepared - // transactions, thus just disable secondary instance if we use - // transaction. - if (s.ok() && FLAGS_test_secondary && !FLAGS_use_txn) { - Options tmp_opts; - // TODO(yanqin) support max_open_files != -1 for secondary instance. - tmp_opts.max_open_files = -1; - tmp_opts.env = db_stress_env; - const std::string& secondary_path = FLAGS_secondaries_base; - s = DB::OpenAsSecondary(tmp_opts, FLAGS_db, secondary_path, - cf_descriptors, &cmp_cfhs_, &cmp_db_); - assert(s.ok()); - assert(cmp_cfhs_.size() == static_cast(FLAGS_column_families)); - } - } else { - DBWithTTL* db_with_ttl; - s = DBWithTTL::Open(options_, FLAGS_db, &db_with_ttl, FLAGS_ttl); - db_ = db_with_ttl; - } - - if (FLAGS_preserve_unverified_changes) { - // Up until now, no live file should have become obsolete due to these - // options. After `DisableFileDeletions()` we can reenable auto compactions - // since, even if live files become obsolete, they won't be deleted. - assert(options_.avoid_flush_during_recovery); - assert(options_.disable_auto_compactions); - if (s.ok()) { - s = db_->DisableFileDeletions(); - } - if (s.ok()) { - s = db_->EnableAutoCompaction(column_families_); - } - } - - if (!s.ok()) { - fprintf(stderr, "open error: %s\n", s.ToString().c_str()); - exit(1); - } -} - -void StressTest::Reopen(ThreadState* thread) { - // BG jobs in WritePrepared must be canceled first because i) they can access - // the db via a callbac ii) they hold on to a snapshot and the upcoming - // ::Close would complain about it. - const bool write_prepared = FLAGS_use_txn && FLAGS_txn_write_policy != 0; - bool bg_canceled __attribute__((unused)) = false; - if (write_prepared || thread->rand.OneIn(2)) { - const bool wait = - write_prepared || static_cast(thread->rand.OneIn(2)); - CancelAllBackgroundWork(db_, wait); - bg_canceled = wait; - } - assert(!write_prepared || bg_canceled); - - for (auto cf : column_families_) { - delete cf; - } - column_families_.clear(); - - if (thread->rand.OneIn(2)) { - Status s = db_->Close(); - if (!s.ok()) { - fprintf(stderr, "Non-ok close status: %s\n", s.ToString().c_str()); - fflush(stderr); - } - assert(s.ok()); - } - assert(txn_db_ == nullptr || db_ == txn_db_); - delete db_; - db_ = nullptr; - txn_db_ = nullptr; - - num_times_reopened_++; - auto now = clock_->NowMicros(); - fprintf(stdout, "%s Reopening database for the %dth time\n", - clock_->TimeToString(now / 1000000).c_str(), num_times_reopened_); - Open(thread->shared); - - if ((FLAGS_sync_fault_injection || FLAGS_disable_wal || - FLAGS_manual_wal_flush_one_in > 0) && - IsStateTracked()) { - Status s = thread->shared->SaveAtAndAfter(db_); - if (!s.ok()) { - fprintf(stderr, "Error enabling history tracing: %s\n", - s.ToString().c_str()); - exit(1); - } - } -} - -bool StressTest::MaybeUseOlderTimestampForPointLookup(ThreadState* thread, - std::string& ts_str, - Slice& ts_slice, - ReadOptions& read_opts) { - if (FLAGS_user_timestamp_size == 0) { - return false; - } - - assert(thread); - if (!thread->rand.OneInOpt(3)) { - return false; - } - - const SharedState* const shared = thread->shared; - assert(shared); - const uint64_t start_ts = shared->GetStartTimestamp(); - - uint64_t now = db_stress_env->NowNanos(); - - assert(now > start_ts); - uint64_t time_diff = now - start_ts; - uint64_t ts = start_ts + (thread->rand.Next64() % time_diff); - ts_str.clear(); - PutFixed64(&ts_str, ts); - ts_slice = ts_str; - read_opts.timestamp = &ts_slice; - return true; -} - -void StressTest::MaybeUseOlderTimestampForRangeScan(ThreadState* thread, - std::string& ts_str, - Slice& ts_slice, - ReadOptions& read_opts) { - if (FLAGS_user_timestamp_size == 0) { - return; - } - - assert(thread); - if (!thread->rand.OneInOpt(3)) { - return; - } - - const Slice* const saved_ts = read_opts.timestamp; - assert(saved_ts != nullptr); - - const SharedState* const shared = thread->shared; - assert(shared); - const uint64_t start_ts = shared->GetStartTimestamp(); - - uint64_t now = db_stress_env->NowNanos(); - - assert(now > start_ts); - uint64_t time_diff = now - start_ts; - uint64_t ts = start_ts + (thread->rand.Next64() % time_diff); - ts_str.clear(); - PutFixed64(&ts_str, ts); - ts_slice = ts_str; - read_opts.timestamp = &ts_slice; - - // TODO (yanqin): support Merge with iter_start_ts - if (!thread->rand.OneInOpt(3) || FLAGS_use_merge || FLAGS_use_full_merge_v1) { - return; - } - - ts_str.clear(); - PutFixed64(&ts_str, start_ts); - ts_slice = ts_str; - read_opts.iter_start_ts = &ts_slice; - read_opts.timestamp = saved_ts; -} - -void CheckAndSetOptionsForUserTimestamp(Options& options) { - assert(FLAGS_user_timestamp_size > 0); - const Comparator* const cmp = test::BytewiseComparatorWithU64TsWrapper(); - assert(cmp); - if (FLAGS_user_timestamp_size != cmp->timestamp_size()) { - fprintf(stderr, - "Only -user_timestamp_size=%d is supported in stress test.\n", - static_cast(cmp->timestamp_size())); - exit(1); - } - if (FLAGS_use_txn) { - fprintf(stderr, "TransactionDB does not support timestamp yet.\n"); - exit(1); - } - if (FLAGS_test_cf_consistency || FLAGS_test_batches_snapshots) { - fprintf(stderr, - "Due to per-key ts-seq ordering constraint, only the (default) " - "non-batched test is supported with timestamp.\n"); - exit(1); - } - if (FLAGS_ingest_external_file_one_in > 0) { - fprintf(stderr, "Bulk loading may not support timestamp yet.\n"); - exit(1); - } - options.comparator = cmp; -} - -bool InitializeOptionsFromFile(Options& options) { - DBOptions db_options; - ConfigOptions config_options; - config_options.ignore_unknown_options = false; - config_options.input_strings_escaped = true; - config_options.env = db_stress_env; - std::vector cf_descriptors; - if (!FLAGS_options_file.empty()) { - Status s = LoadOptionsFromFile(config_options, FLAGS_options_file, - &db_options, &cf_descriptors); - if (!s.ok()) { - fprintf(stderr, "Unable to load options file %s --- %s\n", - FLAGS_options_file.c_str(), s.ToString().c_str()); - exit(1); - } - db_options.env = new CompositeEnvWrapper(db_stress_env); - options = Options(db_options, cf_descriptors[0].options); - return true; - } - return false; -} - -void InitializeOptionsFromFlags( - const std::shared_ptr& cache, - const std::shared_ptr& filter_policy, - Options& options) { - BlockBasedTableOptions block_based_options; - block_based_options.block_cache = cache; - block_based_options.cache_index_and_filter_blocks = - FLAGS_cache_index_and_filter_blocks; - block_based_options.metadata_cache_options.top_level_index_pinning = - static_cast(FLAGS_top_level_index_pinning); - block_based_options.metadata_cache_options.partition_pinning = - static_cast(FLAGS_partition_pinning); - block_based_options.metadata_cache_options.unpartitioned_pinning = - static_cast(FLAGS_unpartitioned_pinning); - block_based_options.checksum = checksum_type_e; - block_based_options.block_size = FLAGS_block_size; - block_based_options.cache_usage_options.options_overrides.insert( - {CacheEntryRole::kCompressionDictionaryBuildingBuffer, - {/*.charged = */ FLAGS_charge_compression_dictionary_building_buffer - ? CacheEntryRoleOptions::Decision::kEnabled - : CacheEntryRoleOptions::Decision::kDisabled}}); - block_based_options.cache_usage_options.options_overrides.insert( - {CacheEntryRole::kFilterConstruction, - {/*.charged = */ FLAGS_charge_filter_construction - ? CacheEntryRoleOptions::Decision::kEnabled - : CacheEntryRoleOptions::Decision::kDisabled}}); - block_based_options.cache_usage_options.options_overrides.insert( - {CacheEntryRole::kBlockBasedTableReader, - {/*.charged = */ FLAGS_charge_table_reader - ? CacheEntryRoleOptions::Decision::kEnabled - : CacheEntryRoleOptions::Decision::kDisabled}}); - block_based_options.cache_usage_options.options_overrides.insert( - {CacheEntryRole::kFileMetadata, - {/*.charged = */ FLAGS_charge_file_metadata - ? CacheEntryRoleOptions::Decision::kEnabled - : CacheEntryRoleOptions::Decision::kDisabled}}); - block_based_options.cache_usage_options.options_overrides.insert( - {CacheEntryRole::kBlobCache, - {/*.charged = */ FLAGS_charge_blob_cache - ? CacheEntryRoleOptions::Decision::kEnabled - : CacheEntryRoleOptions::Decision::kDisabled}}); - block_based_options.format_version = - static_cast(FLAGS_format_version); - block_based_options.index_block_restart_interval = - static_cast(FLAGS_index_block_restart_interval); - block_based_options.filter_policy = filter_policy; - block_based_options.partition_filters = FLAGS_partition_filters; - block_based_options.optimize_filters_for_memory = - FLAGS_optimize_filters_for_memory; - block_based_options.detect_filter_construct_corruption = - FLAGS_detect_filter_construct_corruption; - block_based_options.index_type = - static_cast(FLAGS_index_type); - block_based_options.data_block_index_type = - static_cast( - FLAGS_data_block_index_type); - block_based_options.prepopulate_block_cache = - static_cast( - FLAGS_prepopulate_block_cache); - block_based_options.initial_auto_readahead_size = - FLAGS_initial_auto_readahead_size; - block_based_options.max_auto_readahead_size = FLAGS_max_auto_readahead_size; - block_based_options.num_file_reads_for_auto_readahead = - FLAGS_num_file_reads_for_auto_readahead; - options.table_factory.reset(NewBlockBasedTableFactory(block_based_options)); - options.db_write_buffer_size = FLAGS_db_write_buffer_size; - options.write_buffer_size = FLAGS_write_buffer_size; - options.max_write_buffer_number = FLAGS_max_write_buffer_number; - options.min_write_buffer_number_to_merge = - FLAGS_min_write_buffer_number_to_merge; - options.max_write_buffer_number_to_maintain = - FLAGS_max_write_buffer_number_to_maintain; - options.max_write_buffer_size_to_maintain = - FLAGS_max_write_buffer_size_to_maintain; - options.memtable_prefix_bloom_size_ratio = - FLAGS_memtable_prefix_bloom_size_ratio; - options.memtable_whole_key_filtering = FLAGS_memtable_whole_key_filtering; - options.disable_auto_compactions = FLAGS_disable_auto_compactions; - options.max_background_compactions = FLAGS_max_background_compactions; - options.max_background_flushes = FLAGS_max_background_flushes; - options.compaction_style = - static_cast(FLAGS_compaction_style); - if (options.compaction_style == - ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleFIFO) { - options.compaction_options_fifo.allow_compaction = - FLAGS_fifo_allow_compaction; - } - options.compaction_pri = - static_cast(FLAGS_compaction_pri); - options.num_levels = FLAGS_num_levels; - if (FLAGS_prefix_size >= 0) { - options.prefix_extractor.reset(NewFixedPrefixTransform(FLAGS_prefix_size)); - } - options.max_open_files = FLAGS_open_files; - options.statistics = dbstats; - options.env = db_stress_env; - options.use_fsync = FLAGS_use_fsync; - options.compaction_readahead_size = FLAGS_compaction_readahead_size; - options.allow_mmap_reads = FLAGS_mmap_read; - options.allow_mmap_writes = FLAGS_mmap_write; - options.use_direct_reads = FLAGS_use_direct_reads; - options.use_direct_io_for_flush_and_compaction = - FLAGS_use_direct_io_for_flush_and_compaction; - options.recycle_log_file_num = - static_cast(FLAGS_recycle_log_file_num); - options.target_file_size_base = FLAGS_target_file_size_base; - options.target_file_size_multiplier = FLAGS_target_file_size_multiplier; - options.max_bytes_for_level_base = FLAGS_max_bytes_for_level_base; - options.max_bytes_for_level_multiplier = FLAGS_max_bytes_for_level_multiplier; - options.level0_stop_writes_trigger = FLAGS_level0_stop_writes_trigger; - options.level0_slowdown_writes_trigger = FLAGS_level0_slowdown_writes_trigger; - options.level0_file_num_compaction_trigger = - FLAGS_level0_file_num_compaction_trigger; - options.compression = compression_type_e; - options.bottommost_compression = bottommost_compression_type_e; - options.compression_opts.max_dict_bytes = FLAGS_compression_max_dict_bytes; - options.compression_opts.zstd_max_train_bytes = - FLAGS_compression_zstd_max_train_bytes; - options.compression_opts.parallel_threads = - FLAGS_compression_parallel_threads; - options.compression_opts.max_dict_buffer_bytes = - FLAGS_compression_max_dict_buffer_bytes; - if (ZSTD_FinalizeDictionarySupported()) { - options.compression_opts.use_zstd_dict_trainer = - FLAGS_compression_use_zstd_dict_trainer; - } else if (!FLAGS_compression_use_zstd_dict_trainer) { - fprintf( - stderr, - "WARNING: use_zstd_dict_trainer is false but zstd finalizeDictionary " - "cannot be used because ZSTD 1.4.5+ is not linked with the binary." - " zstd dictionary trainer will be used.\n"); - } - options.max_manifest_file_size = FLAGS_max_manifest_file_size; - options.inplace_update_support = FLAGS_in_place_update; - options.max_subcompactions = static_cast(FLAGS_subcompactions); - options.allow_concurrent_memtable_write = - FLAGS_allow_concurrent_memtable_write; - options.experimental_mempurge_threshold = - FLAGS_experimental_mempurge_threshold; - options.periodic_compaction_seconds = FLAGS_periodic_compaction_seconds; - options.stats_dump_period_sec = - static_cast(FLAGS_stats_dump_period_sec); - options.ttl = FLAGS_compaction_ttl; - options.enable_pipelined_write = FLAGS_enable_pipelined_write; - options.enable_write_thread_adaptive_yield = - FLAGS_enable_write_thread_adaptive_yield; - options.compaction_options_universal.size_ratio = FLAGS_universal_size_ratio; - options.compaction_options_universal.min_merge_width = - FLAGS_universal_min_merge_width; - options.compaction_options_universal.max_merge_width = - FLAGS_universal_max_merge_width; - options.compaction_options_universal.max_size_amplification_percent = - FLAGS_universal_max_size_amplification_percent; - options.atomic_flush = FLAGS_atomic_flush; - options.manual_wal_flush = FLAGS_manual_wal_flush_one_in > 0 ? true : false; - options.avoid_unnecessary_blocking_io = FLAGS_avoid_unnecessary_blocking_io; - options.write_dbid_to_manifest = FLAGS_write_dbid_to_manifest; - options.avoid_flush_during_recovery = FLAGS_avoid_flush_during_recovery; - options.max_write_batch_group_size_bytes = - FLAGS_max_write_batch_group_size_bytes; - options.level_compaction_dynamic_level_bytes = - FLAGS_level_compaction_dynamic_level_bytes; - options.track_and_verify_wals_in_manifest = true; - options.verify_sst_unique_id_in_manifest = - FLAGS_verify_sst_unique_id_in_manifest; - options.memtable_protection_bytes_per_key = - FLAGS_memtable_protection_bytes_per_key; - - // Integrated BlobDB - options.enable_blob_files = FLAGS_enable_blob_files; - options.min_blob_size = FLAGS_min_blob_size; - options.blob_file_size = FLAGS_blob_file_size; - options.blob_compression_type = - StringToCompressionType(FLAGS_blob_compression_type.c_str()); - options.enable_blob_garbage_collection = FLAGS_enable_blob_garbage_collection; - options.blob_garbage_collection_age_cutoff = - FLAGS_blob_garbage_collection_age_cutoff; - options.blob_garbage_collection_force_threshold = - FLAGS_blob_garbage_collection_force_threshold; - options.blob_compaction_readahead_size = FLAGS_blob_compaction_readahead_size; - options.blob_file_starting_level = FLAGS_blob_file_starting_level; - - if (FLAGS_use_blob_cache) { - if (FLAGS_use_shared_block_and_blob_cache) { - options.blob_cache = cache; - } else { - if (FLAGS_blob_cache_size > 0) { - LRUCacheOptions co; - co.capacity = FLAGS_blob_cache_size; - co.num_shard_bits = FLAGS_blob_cache_numshardbits; - options.blob_cache = NewLRUCache(co); - } else { - fprintf(stderr, - "Unable to create a standalone blob cache if blob_cache_size " - "<= 0.\n"); - exit(1); - } - } - switch (FLAGS_prepopulate_blob_cache) { - case 0: - options.prepopulate_blob_cache = PrepopulateBlobCache::kDisable; - break; - case 1: - options.prepopulate_blob_cache = PrepopulateBlobCache::kFlushOnly; - break; - default: - fprintf(stderr, "Unknown prepopulate blob cache mode\n"); - exit(1); - } - } - - options.wal_compression = - StringToCompressionType(FLAGS_wal_compression.c_str()); - - if (FLAGS_enable_tiered_storage) { - options.bottommost_temperature = Temperature::kCold; - } - options.preclude_last_level_data_seconds = - FLAGS_preclude_last_level_data_seconds; - options.preserve_internal_time_seconds = FLAGS_preserve_internal_time_seconds; - - switch (FLAGS_rep_factory) { - case kSkipList: - // no need to do anything - break; - case kHashSkipList: - options.memtable_factory.reset(NewHashSkipListRepFactory(10000)); - break; - case kVectorRep: - options.memtable_factory.reset(new VectorRepFactory()); - break; - } - if (FLAGS_use_full_merge_v1) { - options.merge_operator = MergeOperators::CreateDeprecatedPutOperator(); - } else { - options.merge_operator = MergeOperators::CreatePutOperator(); - } - - if (FLAGS_enable_compaction_filter) { - options.compaction_filter_factory = - std::make_shared(); - } - - options.best_efforts_recovery = FLAGS_best_efforts_recovery; - options.paranoid_file_checks = FLAGS_paranoid_file_checks; - options.fail_if_options_file_error = FLAGS_fail_if_options_file_error; - - if (FLAGS_user_timestamp_size > 0) { - CheckAndSetOptionsForUserTimestamp(options); - } - - options.allow_data_in_errors = FLAGS_allow_data_in_errors; -} - -void InitializeOptionsGeneral( - const std::shared_ptr& cache, - const std::shared_ptr& filter_policy, - Options& options) { - options.create_missing_column_families = true; - options.create_if_missing = true; - - if (!options.statistics) { - options.statistics = dbstats; - } - - if (options.env == Options().env) { - options.env = db_stress_env; - } - - assert(options.table_factory); - auto table_options = - options.table_factory->GetOptions(); - if (table_options) { - if (FLAGS_cache_size > 0) { - table_options->block_cache = cache; - } - if (!table_options->filter_policy) { - table_options->filter_policy = filter_policy; - } - } - - // TODO: row_cache, thread-pool IO priority, CPU priority. - - if (!options.rate_limiter) { - if (FLAGS_rate_limiter_bytes_per_sec > 0) { - options.rate_limiter.reset(NewGenericRateLimiter( - FLAGS_rate_limiter_bytes_per_sec, 1000 /* refill_period_us */, - 10 /* fairness */, - FLAGS_rate_limit_bg_reads ? RateLimiter::Mode::kReadsOnly - : RateLimiter::Mode::kWritesOnly)); - } - } - - if (!options.file_checksum_gen_factory) { - options.file_checksum_gen_factory = - GetFileChecksumImpl(FLAGS_file_checksum_impl); - } - - if (FLAGS_sst_file_manager_bytes_per_sec > 0 || - FLAGS_sst_file_manager_bytes_per_truncate > 0) { - Status status; - options.sst_file_manager.reset(NewSstFileManager( - db_stress_env, options.info_log, "" /* trash_dir */, - static_cast(FLAGS_sst_file_manager_bytes_per_sec), - true /* delete_existing_trash */, &status, - 0.25 /* max_trash_db_ratio */, - FLAGS_sst_file_manager_bytes_per_truncate)); - if (!status.ok()) { - fprintf(stderr, "SstFileManager creation failed: %s\n", - status.ToString().c_str()); - exit(1); - } - } - - if (FLAGS_preserve_unverified_changes) { - if (!options.avoid_flush_during_recovery) { - fprintf(stderr, - "WARNING: flipping `avoid_flush_during_recovery` to true for " - "`preserve_unverified_changes` to keep all files\n"); - options.avoid_flush_during_recovery = true; - } - // Together with `avoid_flush_during_recovery == true`, this will prevent - // live files from becoming obsolete and deleted between `DB::Open()` and - // `DisableFileDeletions()` due to flush or compaction. We do not need to - // warn the user since we will reenable compaction soon. - options.disable_auto_compactions = true; - } - - options.table_properties_collector_factories.emplace_back( - std::make_shared()); -} - -} // namespace ROCKSDB_NAMESPACE -#endif // GFLAGS diff --git a/db_stress_tool/db_stress_test_base.h b/db_stress_tool/db_stress_test_base.h deleted file mode 100644 index e6de74d7b..000000000 --- a/db_stress_tool/db_stress_test_base.h +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifdef GFLAGS -#pragma once - -#include "db_stress_tool/db_stress_common.h" -#include "db_stress_tool/db_stress_shared_state.h" - -namespace ROCKSDB_NAMESPACE { -class SystemClock; -class Transaction; -class TransactionDB; -struct TransactionDBOptions; - -class StressTest { - public: - StressTest(); - - virtual ~StressTest(); - - std::shared_ptr NewCache(size_t capacity, int32_t num_shard_bits); - - static std::vector GetBlobCompressionTags(); - - bool BuildOptionsTable(); - - void InitDb(SharedState*); - // The initialization work is split into two parts to avoid a circular - // dependency with `SharedState`. - virtual void FinishInitDb(SharedState*); - void TrackExpectedState(SharedState* shared); - void OperateDb(ThreadState* thread); - virtual void VerifyDb(ThreadState* thread) const = 0; - virtual void ContinuouslyVerifyDb(ThreadState* /*thread*/) const = 0; - void PrintStatistics(); - - protected: - Status AssertSame(DB* db, ColumnFamilyHandle* cf, - ThreadState::SnapshotState& snap_state); - - // Currently PreloadDb has to be single-threaded. - void PreloadDbAndReopenAsReadOnly(int64_t number_of_keys, - SharedState* shared); - - Status SetOptions(ThreadState* thread); - - // For transactionsDB, there can be txns prepared but not yet committeed - // right before previous stress run crash. - // They will be recovered and processed through - // ProcessRecoveredPreparedTxnsHelper on the start of current stress run. - void ProcessRecoveredPreparedTxns(SharedState* shared); - - // Default implementation will first update ExpectedState to be - // `SharedState::UNKNOWN` for each keys in `txn` and then randomly - // commit or rollback `txn`. - virtual void ProcessRecoveredPreparedTxnsHelper(Transaction* txn, - SharedState* shared); - - Status NewTxn(WriteOptions& write_opts, Transaction** txn); - - Status CommitTxn(Transaction* txn, ThreadState* thread = nullptr); - - Status RollbackTxn(Transaction* txn); - - virtual void MaybeClearOneColumnFamily(ThreadState* /* thread */) {} - - virtual bool ShouldAcquireMutexOnKey() const { return false; } - - // Returns true if DB state is tracked by the stress test. - virtual bool IsStateTracked() const = 0; - - virtual std::vector GenerateColumnFamilies( - const int /* num_column_families */, int rand_column_family) const { - return {rand_column_family}; - } - - virtual std::vector GenerateKeys(int64_t rand_key) const { - return {rand_key}; - } - - virtual Status TestGet(ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) = 0; - - virtual std::vector TestMultiGet( - ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) = 0; - - virtual void TestGetEntity(ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) = 0; - - virtual Status TestPrefixScan(ThreadState* thread, - const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) = 0; - - virtual Status TestPut(ThreadState* thread, WriteOptions& write_opts, - const ReadOptions& read_opts, - const std::vector& cf_ids, - const std::vector& keys, - char (&value)[100]) = 0; - - virtual Status TestDelete(ThreadState* thread, WriteOptions& write_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) = 0; - - virtual Status TestDeleteRange(ThreadState* thread, WriteOptions& write_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) = 0; - - virtual void TestIngestExternalFile( - ThreadState* thread, const std::vector& rand_column_families, - const std::vector& rand_keys) = 0; - - // Issue compact range, starting with start_key, whose integer value - // is rand_key. - virtual void TestCompactRange(ThreadState* thread, int64_t rand_key, - const Slice& start_key, - ColumnFamilyHandle* column_family); - - // Calculate a hash value for all keys in range [start_key, end_key] - // at a certain snapshot. - uint32_t GetRangeHash(ThreadState* thread, const Snapshot* snapshot, - ColumnFamilyHandle* column_family, - const Slice& start_key, const Slice& end_key); - - // Return a column family handle that mirrors what is pointed by - // `column_family_id`, which will be used to validate data to be correct. - // By default, the column family itself will be returned. - virtual ColumnFamilyHandle* GetControlCfh(ThreadState* /* thread*/, - int column_family_id) { - return column_families_[column_family_id]; - } - - // Generated a list of keys that close to boundaries of SST keys. - // If there isn't any SST file in the DB, return empty list. - std::vector GetWhiteBoxKeys(ThreadState* thread, DB* db, - ColumnFamilyHandle* cfh, - size_t num_keys); - - // Given a key K, this creates an iterator which scans to K and then - // does a random sequence of Next/Prev operations. - virtual Status TestIterate(ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys); - - virtual Status TestIterateAgainstExpected( - ThreadState* /* thread */, const ReadOptions& /* read_opts */, - const std::vector& /* rand_column_families */, - const std::vector& /* rand_keys */) { - return Status::NotSupported(); - } - - // Enum used by VerifyIterator() to identify the mode to validate. - enum LastIterateOp { - kLastOpSeek, - kLastOpSeekForPrev, - kLastOpNextOrPrev, - kLastOpSeekToFirst, - kLastOpSeekToLast - }; - - // Compare the two iterator, iter and cmp_iter are in the same position, - // unless iter might be made invalidate or undefined because of - // upper or lower bounds, or prefix extractor. - // Will flag failure if the verification fails. - // diverged = true if the two iterator is already diverged. - // True if verification passed, false if not. - // op_logs is the information to print when validation fails. - void VerifyIterator(ThreadState* thread, ColumnFamilyHandle* cmp_cfh, - const ReadOptions& ro, Iterator* iter, Iterator* cmp_iter, - LastIterateOp op, const Slice& seek_key, - const std::string& op_logs, bool* diverged); - - virtual Status TestBackupRestore(ThreadState* thread, - const std::vector& rand_column_families, - const std::vector& rand_keys); - - virtual Status TestCheckpoint(ThreadState* thread, - const std::vector& rand_column_families, - const std::vector& rand_keys); - - void TestCompactFiles(ThreadState* thread, ColumnFamilyHandle* column_family); - - Status TestFlush(const std::vector& rand_column_families); - - Status TestPauseBackground(ThreadState* thread); - - void TestAcquireSnapshot(ThreadState* thread, int rand_column_family, - const std::string& keystr, uint64_t i); - - Status MaybeReleaseSnapshots(ThreadState* thread, uint64_t i); - Status VerifyGetLiveFiles() const; - Status VerifyGetSortedWalFiles() const; - Status VerifyGetCurrentWalFile() const; - void TestGetProperty(ThreadState* thread) const; - - virtual Status TestApproximateSize( - ThreadState* thread, uint64_t iteration, - const std::vector& rand_column_families, - const std::vector& rand_keys); - - virtual Status TestCustomOperations( - ThreadState* /*thread*/, - const std::vector& /*rand_column_families*/) { - return Status::NotSupported("TestCustomOperations() must be overridden"); - } - - void VerificationAbort(SharedState* shared, std::string msg, Status s) const; - - void VerificationAbort(SharedState* shared, std::string msg, int cf, - int64_t key) const; - - void VerificationAbort(SharedState* shared, std::string msg, int cf, - int64_t key, Slice value_from_db, - Slice value_from_expected) const; - - void VerificationAbort(SharedState* shared, int cf, int64_t key, - const Slice& value, const WideColumns& columns) const; - - static std::string DebugString(const Slice& value, - const WideColumns& columns); - - void PrintEnv() const; - - void Open(SharedState* shared); - - void Reopen(ThreadState* thread); - - virtual void RegisterAdditionalListeners() {} - - virtual void PrepareTxnDbOptions(SharedState* /*shared*/, - TransactionDBOptions& /*txn_db_opts*/) {} - - // Returns whether the timestamp of read_opts is updated. - bool MaybeUseOlderTimestampForPointLookup(ThreadState* thread, - std::string& ts_str, - Slice& ts_slice, - ReadOptions& read_opts); - - void MaybeUseOlderTimestampForRangeScan(ThreadState* thread, - std::string& ts_str, Slice& ts_slice, - ReadOptions& read_opts); - - std::shared_ptr cache_; - std::shared_ptr compressed_cache_; - std::shared_ptr filter_policy_; - DB* db_; - TransactionDB* txn_db_; - - // Currently only used in MultiOpsTxnsStressTest - std::atomic db_aptr_; - - Options options_; - SystemClock* clock_; - std::vector column_families_; - std::vector column_family_names_; - std::atomic new_column_family_name_; - int num_times_reopened_; - std::unordered_map> options_table_; - std::vector options_index_; - std::atomic db_preload_finished_; - - // Fields used for continuous verification from another thread - DB* cmp_db_; - std::vector cmp_cfhs_; - bool is_db_stopped_; -}; - -// Load options from OPTIONS file and populate `options`. -extern bool InitializeOptionsFromFile(Options& options); - -// Initialize `options` using command line arguments. -// When this function is called, `cache`, `block_cache_compressed`, -// `filter_policy` have all been initialized. Therefore, we just pass them as -// input arguments. -extern void InitializeOptionsFromFlags( - const std::shared_ptr& cache, - const std::shared_ptr& filter_policy, Options& options); - -// Initialize `options` on which `InitializeOptionsFromFile()` and -// `InitializeOptionsFromFlags()` have both been called already. -// There are two cases. -// Case 1: OPTIONS file is not specified. Command line arguments have been used -// to initialize `options`. InitializeOptionsGeneral() will use -// `cache` and `filter_policy` to initialize -// corresponding fields of `options`. InitializeOptionsGeneral() will -// also set up other fields of `options` so that stress test can run. -// Examples include `create_if_missing` and -// `create_missing_column_families`, etc. -// Case 2: OPTIONS file is specified. It is possible that, after loading from -// the given OPTIONS files, some shared object fields are still not -// initialized because they are not set in the OPTIONS file. In this -// case, if command line arguments indicate that the user wants to set -// up such shared objects, e.g. block cache, compressed block cache, -// row cache, filter policy, then InitializeOptionsGeneral() will honor -// the user's choice, thus passing `cache`, -// `filter_policy` as input arguments. -// -// InitializeOptionsGeneral() must not overwrite fields of `options` loaded -// from OPTIONS file. -extern void InitializeOptionsGeneral( - const std::shared_ptr& cache, - const std::shared_ptr& filter_policy, Options& options); - -// If no OPTIONS file is specified, set up `options` so that we can test -// user-defined timestamp which requires `-user_timestamp_size=8`. -// This function also checks for known (currently) incompatible features with -// user-defined timestamp. -extern void CheckAndSetOptionsForUserTimestamp(Options& options); - -} // namespace ROCKSDB_NAMESPACE -#endif // GFLAGS diff --git a/db_stress_tool/db_stress_tool.cc b/db_stress_tool/db_stress_tool.cc deleted file mode 100644 index c37117921..000000000 --- a/db_stress_tool/db_stress_tool.cc +++ /dev/null @@ -1,366 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// The test uses an array to compare against values written to the database. -// Keys written to the array are in 1:1 correspondence to the actual values in -// the database according to the formula in the function GenerateValue. - -// Space is reserved in the array from 0 to FLAGS_max_key and values are -// randomly written/deleted/read from those positions. During verification we -// compare all the positions in the array. To shorten/elongate the running -// time, you could change the settings: FLAGS_max_key, FLAGS_ops_per_thread, -// (sometimes also FLAGS_threads). -// -// NOTE that if FLAGS_test_batches_snapshots is set, the test will have -// different behavior. See comment of the flag for details. - -#include "db_stress_tool/db_stress_shared_state.h" -#ifdef GFLAGS -#include "db_stress_tool/db_stress_common.h" -#include "db_stress_tool/db_stress_driver.h" -#include "rocksdb/convenience.h" -#include "utilities/fault_injection_fs.h" - -namespace ROCKSDB_NAMESPACE { -namespace { -static std::shared_ptr env_guard; -static std::shared_ptr env_wrapper_guard; -static std::shared_ptr - dbsl_env_wrapper_guard; -static std::shared_ptr fault_env_guard; -} // namespace - -KeyGenContext key_gen_ctx; - -int db_stress_tool(int argc, char** argv) { - SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) + - " [OPTIONS]..."); - ParseCommandLineFlags(&argc, &argv, true); - - SanitizeDoubleParam(&FLAGS_bloom_bits); - SanitizeDoubleParam(&FLAGS_memtable_prefix_bloom_size_ratio); - SanitizeDoubleParam(&FLAGS_max_bytes_for_level_multiplier); - -#ifndef NDEBUG - if (FLAGS_mock_direct_io) { - SetupSyncPointsToMockDirectIO(); - } -#endif - if (FLAGS_statistics) { - dbstats = ROCKSDB_NAMESPACE::CreateDBStatistics(); - if (FLAGS_test_secondary) { - dbstats_secondaries = ROCKSDB_NAMESPACE::CreateDBStatistics(); - } - } - compression_type_e = StringToCompressionType(FLAGS_compression_type.c_str()); - bottommost_compression_type_e = - StringToCompressionType(FLAGS_bottommost_compression_type.c_str()); - checksum_type_e = StringToChecksumType(FLAGS_checksum_type.c_str()); - - Env* raw_env; - - int env_opts = !FLAGS_env_uri.empty() + !FLAGS_fs_uri.empty(); - if (env_opts > 1) { - fprintf(stderr, "Error: --env_uri and --fs_uri are mutually exclusive\n"); - exit(1); - } - - Status s = Env::CreateFromUri(ConfigOptions(), FLAGS_env_uri, FLAGS_fs_uri, - &raw_env, &env_guard); - if (!s.ok()) { - fprintf(stderr, "Error Creating Env URI: %s: %s\n", FLAGS_env_uri.c_str(), - s.ToString().c_str()); - exit(1); - } - dbsl_env_wrapper_guard = std::make_shared(raw_env); - db_stress_listener_env = dbsl_env_wrapper_guard.get(); - - if (FLAGS_read_fault_one_in || FLAGS_sync_fault_injection || - FLAGS_write_fault_one_in || FLAGS_open_metadata_write_fault_one_in || - FLAGS_open_write_fault_one_in || FLAGS_open_read_fault_one_in) { - FaultInjectionTestFS* fs = - new FaultInjectionTestFS(raw_env->GetFileSystem()); - fault_fs_guard.reset(fs); - if (FLAGS_write_fault_one_in) { - fault_fs_guard->SetFilesystemDirectWritable(false); - } else { - fault_fs_guard->SetFilesystemDirectWritable(true); - } - fault_env_guard = - std::make_shared(raw_env, fault_fs_guard); - raw_env = fault_env_guard.get(); - } - - env_wrapper_guard = std::make_shared( - raw_env, std::make_shared(raw_env->GetFileSystem())); - if (!env_opts && !FLAGS_use_io_uring) { - // If using the default Env (Posix), wrap DbStressEnvWrapper with the - // legacy EnvWrapper. This is a workaround to prevent MultiGet and scans - // from failing when IO uring is disabled. The EnvWrapper - // has a default implementation of ReadAsync that redirects to Read. - env_wrapper_guard = std::make_shared(env_wrapper_guard); - } - db_stress_env = env_wrapper_guard.get(); - - FLAGS_rep_factory = StringToRepFactory(FLAGS_memtablerep.c_str()); - - // The number of background threads should be at least as much the - // max number of concurrent compactions. - db_stress_env->SetBackgroundThreads(FLAGS_max_background_compactions, - ROCKSDB_NAMESPACE::Env::Priority::LOW); - db_stress_env->SetBackgroundThreads(FLAGS_num_bottom_pri_threads, - ROCKSDB_NAMESPACE::Env::Priority::BOTTOM); - if (FLAGS_prefixpercent > 0 && FLAGS_prefix_size < 0) { - fprintf(stderr, - "Error: prefixpercent is non-zero while prefix_size is " - "not positive!\n"); - exit(1); - } - if (FLAGS_test_batches_snapshots && FLAGS_prefix_size <= 0) { - fprintf(stderr, - "Error: please specify prefix_size for " - "test_batches_snapshots test!\n"); - exit(1); - } - if (FLAGS_memtable_prefix_bloom_size_ratio > 0.0 && FLAGS_prefix_size < 0 && - !FLAGS_memtable_whole_key_filtering) { - fprintf(stderr, - "Error: please specify positive prefix_size or enable whole key " - "filtering in order to use memtable_prefix_bloom_size_ratio\n"); - exit(1); - } - if ((FLAGS_readpercent + FLAGS_prefixpercent + FLAGS_writepercent + - FLAGS_delpercent + FLAGS_delrangepercent + FLAGS_iterpercent + - FLAGS_customopspercent) != 100) { - fprintf( - stderr, - "Error: " - "Read(-readpercent=%d)+Prefix(-prefixpercent=%d)+Write(-writepercent=%" - "d)+Delete(-delpercent=%d)+DeleteRange(-delrangepercent=%d)" - "+Iterate(-iterpercent=%d)+CustomOps(-customopspercent=%d) percents != " - "100!\n", - FLAGS_readpercent, FLAGS_prefixpercent, FLAGS_writepercent, - FLAGS_delpercent, FLAGS_delrangepercent, FLAGS_iterpercent, - FLAGS_customopspercent); - exit(1); - } - if (FLAGS_disable_wal == 1 && FLAGS_reopen > 0) { - fprintf(stderr, "Error: Db cannot reopen safely with disable_wal set!\n"); - exit(1); - } - if ((unsigned)FLAGS_reopen >= FLAGS_ops_per_thread) { - fprintf(stderr, - "Error: #DB-reopens should be < ops_per_thread\n" - "Provided reopens = %d and ops_per_thread = %lu\n", - FLAGS_reopen, (unsigned long)FLAGS_ops_per_thread); - exit(1); - } - if (FLAGS_test_batches_snapshots && FLAGS_delrangepercent > 0) { - fprintf(stderr, - "Error: nonzero delrangepercent unsupported in " - "test_batches_snapshots mode\n"); - exit(1); - } - if (FLAGS_active_width > FLAGS_max_key) { - fprintf(stderr, "Error: active_width can be at most max_key\n"); - exit(1); - } else if (FLAGS_active_width == 0) { - FLAGS_active_width = FLAGS_max_key; - } - if (FLAGS_value_size_mult * kRandomValueMaxFactor > kValueMaxLen) { - fprintf(stderr, "Error: value_size_mult can be at most %d\n", - kValueMaxLen / kRandomValueMaxFactor); - exit(1); - } - if (FLAGS_use_merge && FLAGS_nooverwritepercent == 100) { - fprintf( - stderr, - "Error: nooverwritepercent must not be 100 when using merge operands"); - exit(1); - } - if (FLAGS_ingest_external_file_one_in > 0 && - FLAGS_nooverwritepercent == 100) { - fprintf( - stderr, - "Error: nooverwritepercent must not be 100 when using file ingestion"); - exit(1); - } - if (FLAGS_clear_column_family_one_in > 0 && FLAGS_backup_one_in > 0) { - fprintf(stderr, - "Error: clear_column_family_one_in must be 0 when using backup\n"); - exit(1); - } - if (FLAGS_test_cf_consistency && FLAGS_disable_wal) { - FLAGS_atomic_flush = true; - } - - if (FLAGS_read_only) { - if (FLAGS_writepercent != 0 || FLAGS_delpercent != 0 || - FLAGS_delrangepercent != 0) { - fprintf(stderr, "Error: updates are not supported in read only mode\n"); - exit(1); - } else if (FLAGS_checkpoint_one_in > 0 && - FLAGS_clear_column_family_one_in > 0) { - fprintf(stdout, - "Warn: checkpoint won't be validated since column families may " - "be dropped.\n"); - } - } - - // Choose a location for the test database if none given with --db= - if (FLAGS_db.empty()) { - std::string default_db_path; - db_stress_env->GetTestDirectory(&default_db_path); - default_db_path += "/dbstress"; - FLAGS_db = default_db_path; - } - - if ((FLAGS_test_secondary || FLAGS_continuous_verification_interval > 0) && - FLAGS_secondaries_base.empty()) { - std::string default_secondaries_path; - db_stress_env->GetTestDirectory(&default_secondaries_path); - default_secondaries_path += "/dbstress_secondaries"; - s = db_stress_env->CreateDirIfMissing(default_secondaries_path); - if (!s.ok()) { - fprintf(stderr, "Failed to create directory %s: %s\n", - default_secondaries_path.c_str(), s.ToString().c_str()); - exit(1); - } - FLAGS_secondaries_base = default_secondaries_path; - } - - if (FLAGS_best_efforts_recovery && !FLAGS_skip_verifydb && - !FLAGS_disable_wal) { - fprintf(stderr, - "With best-efforts recovery, either skip_verifydb or disable_wal " - "should be set to true.\n"); - exit(1); - } - if (FLAGS_skip_verifydb) { - if (FLAGS_verify_db_one_in > 0) { - fprintf(stderr, - "Must set -verify_db_one_in=0 if skip_verifydb is true.\n"); - exit(1); - } - if (FLAGS_continuous_verification_interval > 0) { - fprintf(stderr, - "Must set -continuous_verification_interval=0 if skip_verifydb " - "is true.\n"); - exit(1); - } - } - if (FLAGS_enable_compaction_filter && - (FLAGS_acquire_snapshot_one_in > 0 || FLAGS_compact_range_one_in > 0 || - FLAGS_iterpercent > 0 || FLAGS_test_batches_snapshots || - FLAGS_test_cf_consistency)) { - fprintf( - stderr, - "Error: acquire_snapshot_one_in, compact_range_one_in, iterpercent, " - "test_batches_snapshots must all be 0 when using compaction filter\n"); - exit(1); - } - if (FLAGS_test_multi_ops_txns) { - CheckAndSetOptionsForMultiOpsTxnStressTest(); - } - - if (FLAGS_create_timestamped_snapshot_one_in > 0) { - if (!FLAGS_use_txn) { - fprintf(stderr, "timestamped snapshot supported only in TransactionDB\n"); - exit(1); - } else if (FLAGS_txn_write_policy != 0) { - fprintf(stderr, - "timestamped snapshot supported only in write-committed\n"); - exit(1); - } - } - - if (FLAGS_preserve_unverified_changes && FLAGS_reopen != 0) { - fprintf(stderr, - "Reopen DB is incompatible with preserving unverified changes\n"); - exit(1); - } - - if (FLAGS_use_txn && FLAGS_sync_fault_injection && - FLAGS_txn_write_policy != 0) { - fprintf(stderr, - "For TransactionDB, correctness testing with unsync data loss is " - "currently compatible with only write committed policy\n"); - exit(1); - } - - if (FLAGS_use_put_entity_one_in > 0 && - (FLAGS_ingest_external_file_one_in > 0 || FLAGS_use_merge || - FLAGS_use_full_merge_v1 || FLAGS_use_txn || FLAGS_test_multi_ops_txns || - FLAGS_user_timestamp_size > 0)) { - fprintf(stderr, - "PutEntity is currently incompatible with SstFileWriter, Merge," - " transactions, and user-defined timestamps\n"); - exit(1); - } - -#ifndef NDEBUG - KillPoint* kp = KillPoint::GetInstance(); - kp->rocksdb_kill_odds = FLAGS_kill_random_test; - kp->rocksdb_kill_exclude_prefixes = SplitString(FLAGS_kill_exclude_prefixes); -#endif - - unsigned int levels = FLAGS_max_key_len; - std::vector weights; - uint64_t scale_factor = FLAGS_key_window_scale_factor; - key_gen_ctx.window = scale_factor * 100; - if (!FLAGS_key_len_percent_dist.empty()) { - weights = SplitString(FLAGS_key_len_percent_dist); - if (weights.size() != levels) { - fprintf(stderr, - "Number of weights in key_len_dist should be equal to" - " max_key_len"); - exit(1); - } - - uint64_t total_weight = 0; - for (std::string& weight : weights) { - uint64_t val = std::stoull(weight); - key_gen_ctx.weights.emplace_back(val * scale_factor); - total_weight += val; - } - if (total_weight != 100) { - fprintf(stderr, "Sum of all weights in key_len_dist should be 100"); - exit(1); - } - } else { - uint64_t keys_per_level = key_gen_ctx.window / levels; - for (unsigned int level = 0; level + 1 < levels; ++level) { - key_gen_ctx.weights.emplace_back(keys_per_level); - } - key_gen_ctx.weights.emplace_back(key_gen_ctx.window - - keys_per_level * (levels - 1)); - } - std::unique_ptr shared; - std::unique_ptr stress; - if (FLAGS_test_cf_consistency) { - stress.reset(CreateCfConsistencyStressTest()); - } else if (FLAGS_test_batches_snapshots) { - stress.reset(CreateBatchedOpsStressTest()); - } else if (FLAGS_test_multi_ops_txns) { - stress.reset(CreateMultiOpsTxnsStressTest()); - } else { - stress.reset(CreateNonBatchedOpsStressTest()); - } - // Initialize the Zipfian pre-calculated array - InitializeHotKeyGenerator(FLAGS_hot_key_alpha); - shared.reset(new SharedState(db_stress_env, stress.get())); - if (RunStressTest(shared.get())) { - return 0; - } else { - return 1; - } -} - -} // namespace ROCKSDB_NAMESPACE -#endif // GFLAGS diff --git a/db_stress_tool/expected_state.cc b/db_stress_tool/expected_state.cc deleted file mode 100644 index 0d921c712..000000000 --- a/db_stress_tool/expected_state.cc +++ /dev/null @@ -1,745 +0,0 @@ -// Copyright (c) 2021-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#ifdef GFLAGS - -#include "db_stress_tool/expected_state.h" - -#include "db/wide/wide_column_serialization.h" -#include "db_stress_tool/db_stress_common.h" -#include "db_stress_tool/db_stress_shared_state.h" -#include "rocksdb/trace_reader_writer.h" -#include "rocksdb/trace_record_result.h" - -namespace ROCKSDB_NAMESPACE { - -ExpectedState::ExpectedState(size_t max_key, size_t num_column_families) - : max_key_(max_key), - num_column_families_(num_column_families), - values_(nullptr) {} - -void ExpectedState::ClearColumnFamily(int cf) { - std::fill(&Value(cf, 0 /* key */), &Value(cf + 1, 0 /* key */), - SharedState::DELETION_SENTINEL); -} - -void ExpectedState::Put(int cf, int64_t key, uint32_t value_base, - bool pending) { - if (!pending) { - // prevent expected-value update from reordering before Write - std::atomic_thread_fence(std::memory_order_release); - } - Value(cf, key).store(pending ? SharedState::UNKNOWN_SENTINEL : value_base, - std::memory_order_relaxed); - if (pending) { - // prevent Write from reordering before expected-value update - std::atomic_thread_fence(std::memory_order_release); - } -} - -uint32_t ExpectedState::Get(int cf, int64_t key) const { - return Value(cf, key); -} - -bool ExpectedState::Delete(int cf, int64_t key, bool pending) { - if (Value(cf, key) == SharedState::DELETION_SENTINEL) { - return false; - } - Put(cf, key, SharedState::DELETION_SENTINEL, pending); - return true; -} - -bool ExpectedState::SingleDelete(int cf, int64_t key, bool pending) { - return Delete(cf, key, pending); -} - -int ExpectedState::DeleteRange(int cf, int64_t begin_key, int64_t end_key, - bool pending) { - int covered = 0; - for (int64_t key = begin_key; key < end_key; ++key) { - if (Delete(cf, key, pending)) { - ++covered; - } - } - return covered; -} - -bool ExpectedState::Exists(int cf, int64_t key) { - // UNKNOWN_SENTINEL counts as exists. That assures a key for which overwrite - // is disallowed can't be accidentally added a second time, in which case - // SingleDelete wouldn't be able to properly delete the key. It does allow - // the case where a SingleDelete might be added which covers nothing, but - // that's not a correctness issue. - uint32_t expected_value = Value(cf, key).load(); - return expected_value != SharedState::DELETION_SENTINEL; -} - -void ExpectedState::Reset() { - for (size_t i = 0; i < num_column_families_; ++i) { - for (size_t j = 0; j < max_key_; ++j) { - Value(static_cast(i), j) - .store(SharedState::DELETION_SENTINEL, std::memory_order_relaxed); - } - } -} - -FileExpectedState::FileExpectedState(std::string expected_state_file_path, - size_t max_key, size_t num_column_families) - : ExpectedState(max_key, num_column_families), - expected_state_file_path_(expected_state_file_path) {} - -Status FileExpectedState::Open(bool create) { - size_t expected_values_size = GetValuesLen(); - - Env* default_env = Env::Default(); - - Status status; - if (create) { - std::unique_ptr wfile; - const EnvOptions soptions; - status = default_env->NewWritableFile(expected_state_file_path_, &wfile, - soptions); - if (status.ok()) { - std::string buf(expected_values_size, '\0'); - status = wfile->Append(buf); - } - } - if (status.ok()) { - status = default_env->NewMemoryMappedFileBuffer( - expected_state_file_path_, &expected_state_mmap_buffer_); - } - if (status.ok()) { - assert(expected_state_mmap_buffer_->GetLen() == expected_values_size); - values_ = static_cast*>( - expected_state_mmap_buffer_->GetBase()); - assert(values_ != nullptr); - if (create) { - Reset(); - } - } else { - assert(values_ == nullptr); - } - return status; -} - -AnonExpectedState::AnonExpectedState(size_t max_key, size_t num_column_families) - : ExpectedState(max_key, num_column_families) {} - -#ifndef NDEBUG -Status AnonExpectedState::Open(bool create) { -#else -Status AnonExpectedState::Open(bool /* create */) { -#endif - // AnonExpectedState only supports being freshly created. - assert(create); - values_allocation_.reset( - new std::atomic[GetValuesLen() / - sizeof(std::atomic)]); - values_ = &values_allocation_[0]; - Reset(); - return Status::OK(); -} - -ExpectedStateManager::ExpectedStateManager(size_t max_key, - size_t num_column_families) - : max_key_(max_key), - num_column_families_(num_column_families), - latest_(nullptr) {} - -ExpectedStateManager::~ExpectedStateManager() {} - -const std::string FileExpectedStateManager::kLatestBasename = "LATEST"; -const std::string FileExpectedStateManager::kStateFilenameSuffix = ".state"; -const std::string FileExpectedStateManager::kTraceFilenameSuffix = ".trace"; -const std::string FileExpectedStateManager::kTempFilenamePrefix = "."; -const std::string FileExpectedStateManager::kTempFilenameSuffix = ".tmp"; - -FileExpectedStateManager::FileExpectedStateManager( - size_t max_key, size_t num_column_families, - std::string expected_state_dir_path) - : ExpectedStateManager(max_key, num_column_families), - expected_state_dir_path_(std::move(expected_state_dir_path)) { - assert(!expected_state_dir_path_.empty()); -} - -Status FileExpectedStateManager::Open() { - // Before doing anything, sync directory state with ours. That is, determine - // `saved_seqno_`, and create any necessary missing files. - std::vector expected_state_dir_children; - Status s = Env::Default()->GetChildren(expected_state_dir_path_, - &expected_state_dir_children); - bool found_trace = false; - if (s.ok()) { - for (size_t i = 0; i < expected_state_dir_children.size(); ++i) { - const auto& filename = expected_state_dir_children[i]; - if (filename.size() >= kStateFilenameSuffix.size() && - filename.rfind(kStateFilenameSuffix) == - filename.size() - kStateFilenameSuffix.size() && - filename.rfind(kLatestBasename, 0) == std::string::npos) { - SequenceNumber found_seqno = ParseUint64( - filename.substr(0, filename.size() - kStateFilenameSuffix.size())); - if (saved_seqno_ == kMaxSequenceNumber || found_seqno > saved_seqno_) { - saved_seqno_ = found_seqno; - } - } - } - // Check if crash happened after creating state file but before creating - // trace file. - if (saved_seqno_ != kMaxSequenceNumber) { - std::string saved_seqno_trace_path = GetPathForFilename( - std::to_string(saved_seqno_) + kTraceFilenameSuffix); - Status exists_status = Env::Default()->FileExists(saved_seqno_trace_path); - if (exists_status.ok()) { - found_trace = true; - } else if (exists_status.IsNotFound()) { - found_trace = false; - } else { - s = exists_status; - } - } - } - if (s.ok() && saved_seqno_ != kMaxSequenceNumber && !found_trace) { - // Create an empty trace file so later logic does not need to distinguish - // missing vs. empty trace file. - std::unique_ptr wfile; - const EnvOptions soptions; - std::string saved_seqno_trace_path = - GetPathForFilename(std::to_string(saved_seqno_) + kTraceFilenameSuffix); - s = Env::Default()->NewWritableFile(saved_seqno_trace_path, &wfile, - soptions); - } - - if (s.ok()) { - s = Clean(); - } - - std::string expected_state_file_path = - GetPathForFilename(kLatestBasename + kStateFilenameSuffix); - bool found = false; - if (s.ok()) { - Status exists_status = Env::Default()->FileExists(expected_state_file_path); - if (exists_status.ok()) { - found = true; - } else if (exists_status.IsNotFound()) { - found = false; - } else { - s = exists_status; - } - } - - if (!found) { - // Initialize the file in a temp path and then rename it. That way, in case - // this process is killed during setup, `Clean()` will take care of removing - // the incomplete expected values file. - std::string temp_expected_state_file_path = - GetTempPathForFilename(kLatestBasename + kStateFilenameSuffix); - FileExpectedState temp_expected_state(temp_expected_state_file_path, - max_key_, num_column_families_); - if (s.ok()) { - s = temp_expected_state.Open(true /* create */); - } - if (s.ok()) { - s = Env::Default()->RenameFile(temp_expected_state_file_path, - expected_state_file_path); - } - } - - if (s.ok()) { - latest_.reset(new FileExpectedState(std::move(expected_state_file_path), - max_key_, num_column_families_)); - s = latest_->Open(false /* create */); - } - return s; -} - -Status FileExpectedStateManager::SaveAtAndAfter(DB* db) { - SequenceNumber seqno = db->GetLatestSequenceNumber(); - - std::string state_filename = std::to_string(seqno) + kStateFilenameSuffix; - std::string state_file_temp_path = GetTempPathForFilename(state_filename); - std::string state_file_path = GetPathForFilename(state_filename); - - std::string latest_file_path = - GetPathForFilename(kLatestBasename + kStateFilenameSuffix); - - std::string trace_filename = std::to_string(seqno) + kTraceFilenameSuffix; - std::string trace_file_path = GetPathForFilename(trace_filename); - - // Populate a tempfile and then rename it to atomically create ".state" - // with contents from "LATEST.state" - Status s = CopyFile(FileSystem::Default(), latest_file_path, - state_file_temp_path, 0 /* size */, false /* use_fsync */, - nullptr /* io_tracer */, Temperature::kUnknown); - if (s.ok()) { - s = FileSystem::Default()->RenameFile(state_file_temp_path, state_file_path, - IOOptions(), nullptr /* dbg */); - } - SequenceNumber old_saved_seqno = 0; - if (s.ok()) { - old_saved_seqno = saved_seqno_; - saved_seqno_ = seqno; - } - - // If there is a crash now, i.e., after ".state" was created but before - // ".trace" is created, it will be treated as if ".trace" were - // present but empty. - - // Create ".trace" directly. It is initially empty so no need for - // tempfile. - std::unique_ptr trace_writer; - if (s.ok()) { - EnvOptions soptions; - // Disable buffering so traces will not get stuck in application buffer. - soptions.writable_file_max_buffer_size = 0; - s = NewFileTraceWriter(Env::Default(), soptions, trace_file_path, - &trace_writer); - } - if (s.ok()) { - TraceOptions trace_opts; - trace_opts.filter |= kTraceFilterGet; - trace_opts.filter |= kTraceFilterMultiGet; - trace_opts.filter |= kTraceFilterIteratorSeek; - trace_opts.filter |= kTraceFilterIteratorSeekForPrev; - trace_opts.preserve_write_order = true; - s = db->StartTrace(trace_opts, std::move(trace_writer)); - } - - // Delete old state/trace files. Deletion order does not matter since we only - // delete after successfully saving new files, so old files will never be used - // again, even if we crash. - if (s.ok() && old_saved_seqno != kMaxSequenceNumber && - old_saved_seqno != saved_seqno_) { - s = Env::Default()->DeleteFile(GetPathForFilename( - std::to_string(old_saved_seqno) + kStateFilenameSuffix)); - } - if (s.ok() && old_saved_seqno != kMaxSequenceNumber && - old_saved_seqno != saved_seqno_) { - s = Env::Default()->DeleteFile(GetPathForFilename( - std::to_string(old_saved_seqno) + kTraceFilenameSuffix)); - } - return s; -} - -bool FileExpectedStateManager::HasHistory() { - return saved_seqno_ != kMaxSequenceNumber; -} - - -namespace { - -// An `ExpectedStateTraceRecordHandler` applies a configurable number of -// write operation trace records to the configured expected state. It is used in -// `FileExpectedStateManager::Restore()` to sync the expected state with the -// DB's post-recovery state. -class ExpectedStateTraceRecordHandler : public TraceRecord::Handler, - public WriteBatch::Handler { - public: - ExpectedStateTraceRecordHandler(uint64_t max_write_ops, ExpectedState* state) - : max_write_ops_(max_write_ops), - state_(state), - buffered_writes_(nullptr) {} - - ~ExpectedStateTraceRecordHandler() { assert(IsDone()); } - - // True if we have already reached the limit on write operations to apply. - bool IsDone() { return num_write_ops_ == max_write_ops_; } - - Status Handle(const WriteQueryTraceRecord& record, - std::unique_ptr* /* result */) override { - if (IsDone()) { - return Status::OK(); - } - WriteBatch batch(record.GetWriteBatchRep().ToString()); - return batch.Iterate(this); - } - - // Ignore reads. - Status Handle(const GetQueryTraceRecord& /* record */, - std::unique_ptr* /* result */) override { - return Status::OK(); - } - - // Ignore reads. - Status Handle(const IteratorSeekQueryTraceRecord& /* record */, - std::unique_ptr* /* result */) override { - return Status::OK(); - } - - // Ignore reads. - Status Handle(const MultiGetQueryTraceRecord& /* record */, - std::unique_ptr* /* result */) override { - return Status::OK(); - } - - // Below are the WriteBatch::Handler overrides. We could use a separate - // object, but it's convenient and works to share state with the - // `TraceRecord::Handler`. - - Status PutCF(uint32_t column_family_id, const Slice& key_with_ts, - const Slice& value) override { - Slice key = - StripTimestampFromUserKey(key_with_ts, FLAGS_user_timestamp_size); - uint64_t key_id; - if (!GetIntVal(key.ToString(), &key_id)) { - return Status::Corruption("unable to parse key", key.ToString()); - } - uint32_t value_id = GetValueBase(value); - - bool should_buffer_write = !(buffered_writes_ == nullptr); - if (should_buffer_write) { - return WriteBatchInternal::Put(buffered_writes_.get(), column_family_id, - key, value); - } - - state_->Put(column_family_id, static_cast(key_id), value_id, - false /* pending */); - ++num_write_ops_; - return Status::OK(); - } - - Status PutEntityCF(uint32_t column_family_id, const Slice& key_with_ts, - const Slice& entity) override { - Slice key = - StripTimestampFromUserKey(key_with_ts, FLAGS_user_timestamp_size); - - uint64_t key_id = 0; - if (!GetIntVal(key.ToString(), &key_id)) { - return Status::Corruption("Unable to parse key", key.ToString()); - } - - Slice entity_copy = entity; - WideColumns columns; - if (!WideColumnSerialization::Deserialize(entity_copy, columns).ok()) { - return Status::Corruption("Unable to deserialize entity", - entity.ToString(/* hex */ true)); - } - - if (!VerifyWideColumns(columns)) { - return Status::Corruption("Wide columns in entity inconsistent", - entity.ToString(/* hex */ true)); - } - - if (buffered_writes_) { - return WriteBatchInternal::PutEntity(buffered_writes_.get(), - column_family_id, key, columns); - } - - assert(!columns.empty()); - assert(columns.front().name() == kDefaultWideColumnName); - - const uint32_t value_base = GetValueBase(columns.front().value()); - - state_->Put(column_family_id, static_cast(key_id), value_base, - false /* pending */); - - ++num_write_ops_; - - return Status::OK(); - } - - Status DeleteCF(uint32_t column_family_id, - const Slice& key_with_ts) override { - Slice key = - StripTimestampFromUserKey(key_with_ts, FLAGS_user_timestamp_size); - uint64_t key_id; - if (!GetIntVal(key.ToString(), &key_id)) { - return Status::Corruption("unable to parse key", key.ToString()); - } - - bool should_buffer_write = !(buffered_writes_ == nullptr); - if (should_buffer_write) { - return WriteBatchInternal::Delete(buffered_writes_.get(), - column_family_id, key); - } - - state_->Delete(column_family_id, static_cast(key_id), - false /* pending */); - ++num_write_ops_; - return Status::OK(); - } - - Status SingleDeleteCF(uint32_t column_family_id, - const Slice& key_with_ts) override { - bool should_buffer_write = !(buffered_writes_ == nullptr); - if (should_buffer_write) { - Slice key = - StripTimestampFromUserKey(key_with_ts, FLAGS_user_timestamp_size); - Slice ts = - ExtractTimestampFromUserKey(key_with_ts, FLAGS_user_timestamp_size); - std::array key_with_ts_arr{{key, ts}}; - return WriteBatchInternal::SingleDelete( - buffered_writes_.get(), column_family_id, - SliceParts(key_with_ts_arr.data(), 2)); - } - - return DeleteCF(column_family_id, key_with_ts); - } - - Status DeleteRangeCF(uint32_t column_family_id, - const Slice& begin_key_with_ts, - const Slice& end_key_with_ts) override { - Slice begin_key = - StripTimestampFromUserKey(begin_key_with_ts, FLAGS_user_timestamp_size); - Slice end_key = - StripTimestampFromUserKey(end_key_with_ts, FLAGS_user_timestamp_size); - uint64_t begin_key_id, end_key_id; - if (!GetIntVal(begin_key.ToString(), &begin_key_id)) { - return Status::Corruption("unable to parse begin key", - begin_key.ToString()); - } - if (!GetIntVal(end_key.ToString(), &end_key_id)) { - return Status::Corruption("unable to parse end key", end_key.ToString()); - } - - bool should_buffer_write = !(buffered_writes_ == nullptr); - if (should_buffer_write) { - return WriteBatchInternal::DeleteRange( - buffered_writes_.get(), column_family_id, begin_key, end_key); - } - - state_->DeleteRange(column_family_id, static_cast(begin_key_id), - static_cast(end_key_id), false /* pending */); - ++num_write_ops_; - return Status::OK(); - } - - Status MergeCF(uint32_t column_family_id, const Slice& key_with_ts, - const Slice& value) override { - Slice key = - StripTimestampFromUserKey(key_with_ts, FLAGS_user_timestamp_size); - - bool should_buffer_write = !(buffered_writes_ == nullptr); - if (should_buffer_write) { - return WriteBatchInternal::Merge(buffered_writes_.get(), column_family_id, - key, value); - } - - return PutCF(column_family_id, key, value); - } - - Status MarkBeginPrepare(bool = false) override { - assert(!buffered_writes_); - buffered_writes_.reset(new WriteBatch()); - return Status::OK(); - } - - Status MarkEndPrepare(const Slice& xid) override { - assert(buffered_writes_); - std::string xid_str = xid.ToString(); - assert(xid_to_buffered_writes_.find(xid_str) == - xid_to_buffered_writes_.end()); - - xid_to_buffered_writes_[xid_str].swap(buffered_writes_); - - buffered_writes_.reset(); - - return Status::OK(); - } - - Status MarkCommit(const Slice& xid) override { - std::string xid_str = xid.ToString(); - assert(xid_to_buffered_writes_.find(xid_str) != - xid_to_buffered_writes_.end()); - assert(xid_to_buffered_writes_.at(xid_str)); - - Status s = xid_to_buffered_writes_.at(xid_str)->Iterate(this); - xid_to_buffered_writes_.erase(xid_str); - - return s; - } - - Status MarkRollback(const Slice& xid) override { - std::string xid_str = xid.ToString(); - assert(xid_to_buffered_writes_.find(xid_str) != - xid_to_buffered_writes_.end()); - assert(xid_to_buffered_writes_.at(xid_str)); - xid_to_buffered_writes_.erase(xid_str); - - return Status::OK(); - } - - private: - uint64_t num_write_ops_ = 0; - uint64_t max_write_ops_; - ExpectedState* state_; - std::unordered_map> - xid_to_buffered_writes_; - std::unique_ptr buffered_writes_; -}; - -} // anonymous namespace - -Status FileExpectedStateManager::Restore(DB* db) { - assert(HasHistory()); - SequenceNumber seqno = db->GetLatestSequenceNumber(); - if (seqno < saved_seqno_) { - return Status::Corruption("DB is older than any restorable expected state"); - } - - std::string state_filename = - std::to_string(saved_seqno_) + kStateFilenameSuffix; - std::string state_file_path = GetPathForFilename(state_filename); - - std::string latest_file_temp_path = - GetTempPathForFilename(kLatestBasename + kStateFilenameSuffix); - std::string latest_file_path = - GetPathForFilename(kLatestBasename + kStateFilenameSuffix); - - std::string trace_filename = - std::to_string(saved_seqno_) + kTraceFilenameSuffix; - std::string trace_file_path = GetPathForFilename(trace_filename); - - std::unique_ptr trace_reader; - Status s = NewFileTraceReader(Env::Default(), EnvOptions(), trace_file_path, - &trace_reader); - - if (s.ok()) { - // We are going to replay on top of "`seqno`.state" to create a new - // "LATEST.state". Start off by creating a tempfile so we can later make the - // new "LATEST.state" appear atomically using `RenameFile()`. - s = CopyFile(FileSystem::Default(), state_file_path, latest_file_temp_path, - 0 /* size */, false /* use_fsync */, nullptr /* io_tracer */, - Temperature::kUnknown); - } - - { - std::unique_ptr replayer; - std::unique_ptr state; - std::unique_ptr handler; - if (s.ok()) { - state.reset(new FileExpectedState(latest_file_temp_path, max_key_, - num_column_families_)); - s = state->Open(false /* create */); - } - if (s.ok()) { - handler.reset(new ExpectedStateTraceRecordHandler(seqno - saved_seqno_, - state.get())); - // TODO(ajkr): An API limitation requires we provide `handles` although - // they will be unused since we only use the replayer for reading records. - // Just give a default CFH for now to satisfy the requirement. - s = db->NewDefaultReplayer({db->DefaultColumnFamily()} /* handles */, - std::move(trace_reader), &replayer); - } - - if (s.ok()) { - s = replayer->Prepare(); - } - for (;;) { - std::unique_ptr record; - s = replayer->Next(&record); - if (!s.ok()) { - break; - } - std::unique_ptr res; - record->Accept(handler.get(), &res); - } - if (s.IsCorruption() && handler->IsDone()) { - // There could be a corruption reading the tail record of the trace due to - // `db_stress` crashing while writing it. It shouldn't matter as long as - // we already found all the write ops we need to catch up the expected - // state. - s = Status::OK(); - } - if (s.IsIncomplete()) { - // OK because `Status::Incomplete` is expected upon finishing all the - // trace records. - s = Status::OK(); - } - } - - if (s.ok()) { - s = FileSystem::Default()->RenameFile(latest_file_temp_path, - latest_file_path, IOOptions(), - nullptr /* dbg */); - } - if (s.ok()) { - latest_.reset(new FileExpectedState(latest_file_path, max_key_, - num_column_families_)); - s = latest_->Open(false /* create */); - } - - // Delete old state/trace files. We must delete the state file first. - // Otherwise, a crash-recovery immediately after deleting the trace file could - // lead to `Restore()` unable to replay to `seqno`. - if (s.ok()) { - s = Env::Default()->DeleteFile(state_file_path); - } - if (s.ok()) { - saved_seqno_ = kMaxSequenceNumber; - s = Env::Default()->DeleteFile(trace_file_path); - } - return s; -} - -Status FileExpectedStateManager::Clean() { - std::vector expected_state_dir_children; - Status s = Env::Default()->GetChildren(expected_state_dir_path_, - &expected_state_dir_children); - // An incomplete `Open()` or incomplete `SaveAtAndAfter()` could have left - // behind invalid temporary files. An incomplete `SaveAtAndAfter()` could have - // also left behind stale state/trace files. An incomplete `Restore()` could - // have left behind stale trace files. - for (size_t i = 0; s.ok() && i < expected_state_dir_children.size(); ++i) { - const auto& filename = expected_state_dir_children[i]; - if (filename.rfind(kTempFilenamePrefix, 0 /* pos */) == 0 && - filename.size() >= kTempFilenameSuffix.size() && - filename.rfind(kTempFilenameSuffix) == - filename.size() - kTempFilenameSuffix.size()) { - // Delete all temp files. - s = Env::Default()->DeleteFile(GetPathForFilename(filename)); - } else if (filename.size() >= kStateFilenameSuffix.size() && - filename.rfind(kStateFilenameSuffix) == - filename.size() - kStateFilenameSuffix.size() && - filename.rfind(kLatestBasename, 0) == std::string::npos && - ParseUint64(filename.substr( - 0, filename.size() - kStateFilenameSuffix.size())) < - saved_seqno_) { - assert(saved_seqno_ != kMaxSequenceNumber); - // Delete stale state files. - s = Env::Default()->DeleteFile(GetPathForFilename(filename)); - } else if (filename.size() >= kTraceFilenameSuffix.size() && - filename.rfind(kTraceFilenameSuffix) == - filename.size() - kTraceFilenameSuffix.size() && - ParseUint64(filename.substr( - 0, filename.size() - kTraceFilenameSuffix.size())) < - saved_seqno_) { - // Delete stale trace files. - s = Env::Default()->DeleteFile(GetPathForFilename(filename)); - } - } - return s; -} - -std::string FileExpectedStateManager::GetTempPathForFilename( - const std::string& filename) { - assert(!expected_state_dir_path_.empty()); - std::string expected_state_dir_path_slash = - expected_state_dir_path_.back() == '/' ? expected_state_dir_path_ - : expected_state_dir_path_ + "/"; - return expected_state_dir_path_slash + kTempFilenamePrefix + filename + - kTempFilenameSuffix; -} - -std::string FileExpectedStateManager::GetPathForFilename( - const std::string& filename) { - assert(!expected_state_dir_path_.empty()); - std::string expected_state_dir_path_slash = - expected_state_dir_path_.back() == '/' ? expected_state_dir_path_ - : expected_state_dir_path_ + "/"; - return expected_state_dir_path_slash + filename; -} - -AnonExpectedStateManager::AnonExpectedStateManager(size_t max_key, - size_t num_column_families) - : ExpectedStateManager(max_key, num_column_families) {} - -Status AnonExpectedStateManager::Open() { - latest_.reset(new AnonExpectedState(max_key_, num_column_families_)); - return latest_->Open(true /* create */); -} - -} // namespace ROCKSDB_NAMESPACE - -#endif // GFLAGS diff --git a/db_stress_tool/expected_state.h b/db_stress_tool/expected_state.h deleted file mode 100644 index 41d747e76..000000000 --- a/db_stress_tool/expected_state.h +++ /dev/null @@ -1,287 +0,0 @@ -// Copyright (c) 2021-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#ifdef GFLAGS - -#pragma once - -#include - -#include -#include - -#include "db/dbformat.h" -#include "file/file_util.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/file_system.h" -#include "rocksdb/rocksdb_namespace.h" -#include "rocksdb/types.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -// An `ExpectedState` provides read/write access to expected values for every -// key. -class ExpectedState { - public: - explicit ExpectedState(size_t max_key, size_t num_column_families); - - virtual ~ExpectedState() {} - - // Requires external locking preventing concurrent execution with any other - // member function. - virtual Status Open(bool create) = 0; - - // Requires external locking covering all keys in `cf`. - void ClearColumnFamily(int cf); - - // @param pending True if the update may have started but is not yet - // guaranteed finished. This is useful for crash-recovery testing when the - // process may crash before updating the expected values array. - // - // Requires external locking covering `key` in `cf`. - void Put(int cf, int64_t key, uint32_t value_base, bool pending); - - // Requires external locking covering `key` in `cf`. - uint32_t Get(int cf, int64_t key) const; - - // @param pending See comment above Put() - // Returns true if the key was not yet deleted. - // - // Requires external locking covering `key` in `cf`. - bool Delete(int cf, int64_t key, bool pending); - - // @param pending See comment above Put() - // Returns true if the key was not yet deleted. - // - // Requires external locking covering `key` in `cf`. - bool SingleDelete(int cf, int64_t key, bool pending); - - // @param pending See comment above Put() - // Returns number of keys deleted by the call. - // - // Requires external locking covering keys in `[begin_key, end_key)` in `cf`. - int DeleteRange(int cf, int64_t begin_key, int64_t end_key, bool pending); - - // Requires external locking covering `key` in `cf`. - bool Exists(int cf, int64_t key); - - private: - // Requires external locking covering `key` in `cf`. - std::atomic& Value(int cf, int64_t key) const { - return values_[cf * max_key_ + key]; - } - - const size_t max_key_; - const size_t num_column_families_; - - protected: - size_t GetValuesLen() const { - return sizeof(std::atomic) * num_column_families_ * max_key_; - } - - // Requires external locking preventing concurrent execution with any other - // member function. - void Reset(); - - std::atomic* values_; -}; - -// A `FileExpectedState` implements `ExpectedState` backed by a file. -class FileExpectedState : public ExpectedState { - public: - explicit FileExpectedState(std::string expected_state_file_path, - size_t max_key, size_t num_column_families); - - // Requires external locking preventing concurrent execution with any other - // member function. - Status Open(bool create) override; - - private: - const std::string expected_state_file_path_; - std::unique_ptr expected_state_mmap_buffer_; -}; - -// An `AnonExpectedState` implements `ExpectedState` backed by a memory -// allocation. -class AnonExpectedState : public ExpectedState { - public: - explicit AnonExpectedState(size_t max_key, size_t num_column_families); - - // Requires external locking preventing concurrent execution with any other - // member function. - Status Open(bool create) override; - - private: - std::unique_ptr[]> values_allocation_; -}; - -// An `ExpectedStateManager` manages data about the expected state of the -// database. It exposes operations for reading and modifying the latest -// expected state. -class ExpectedStateManager { - public: - explicit ExpectedStateManager(size_t max_key, size_t num_column_families); - - virtual ~ExpectedStateManager(); - - // Requires external locking preventing concurrent execution with any other - // member function. - virtual Status Open() = 0; - - // Saves expected values for the current state of `db` and begins tracking - // changes. Following a successful `SaveAtAndAfter()`, `Restore()` can be - // called on the same DB, as long as its state does not roll back to before - // its current state. - // - // Requires external locking preventing concurrent execution with any other - // member function. Furthermore, `db` must not be mutated while this function - // is executing. - virtual Status SaveAtAndAfter(DB* db) = 0; - - // Returns true if at least one state of historical expected values can be - // restored. - // - // Requires external locking preventing concurrent execution with any other - // member function. - virtual bool HasHistory() = 0; - - // Restores expected values according to the current state of `db`. See - // `SaveAtAndAfter()` for conditions where this can be called. - // - // Requires external locking preventing concurrent execution with any other - // member function. Furthermore, `db` must not be mutated while this function - // is executing. - virtual Status Restore(DB* db) = 0; - - // Requires external locking covering all keys in `cf`. - void ClearColumnFamily(int cf) { return latest_->ClearColumnFamily(cf); } - - // @param pending True if the update may have started but is not yet - // guaranteed finished. This is useful for crash-recovery testing when the - // process may crash before updating the expected values array. - // - // Requires external locking covering `key` in `cf`. - void Put(int cf, int64_t key, uint32_t value_base, bool pending) { - return latest_->Put(cf, key, value_base, pending); - } - - // Requires external locking covering `key` in `cf`. - uint32_t Get(int cf, int64_t key) const { return latest_->Get(cf, key); } - - // @param pending See comment above Put() - // Returns true if the key was not yet deleted. - // - // Requires external locking covering `key` in `cf`. - bool Delete(int cf, int64_t key, bool pending) { - return latest_->Delete(cf, key, pending); - } - - // @param pending See comment above Put() - // Returns true if the key was not yet deleted. - // - // Requires external locking covering `key` in `cf`. - bool SingleDelete(int cf, int64_t key, bool pending) { - return latest_->SingleDelete(cf, key, pending); - } - - // @param pending See comment above Put() - // Returns number of keys deleted by the call. - // - // Requires external locking covering keys in `[begin_key, end_key)` in `cf`. - int DeleteRange(int cf, int64_t begin_key, int64_t end_key, bool pending) { - return latest_->DeleteRange(cf, begin_key, end_key, pending); - } - - // Requires external locking covering `key` in `cf`. - bool Exists(int cf, int64_t key) { return latest_->Exists(cf, key); } - - protected: - const size_t max_key_; - const size_t num_column_families_; - std::unique_ptr latest_; -}; - -// A `FileExpectedStateManager` implements an `ExpectedStateManager` backed by -// a directory of files containing data about the expected state of the -// database. -class FileExpectedStateManager : public ExpectedStateManager { - public: - explicit FileExpectedStateManager(size_t max_key, size_t num_column_families, - std::string expected_state_dir_path); - - // Requires external locking preventing concurrent execution with any other - // member function. - Status Open() override; - - // See `ExpectedStateManager::SaveAtAndAfter()` API doc. - // - // This implementation makes a copy of "LATEST.state" into - // ".state", and starts a trace in ".trace". - // Due to using external files, a following `Restore()` can happen even - // from a different process. - Status SaveAtAndAfter(DB* db) override; - - // See `ExpectedStateManager::HasHistory()` API doc. - bool HasHistory() override; - - // See `ExpectedStateManager::Restore()` API doc. - // - // Say `db->GetLatestSequenceNumber()` was `a` last time `SaveAtAndAfter()` - // was called and now it is `b`. Then this function replays `b - a` write - // operations from "`a`.trace" onto "`a`.state", and then copies the resulting - // file into "LATEST.state". - Status Restore(DB* db) override; - - private: - // Requires external locking preventing concurrent execution with any other - // member function. - Status Clean(); - - std::string GetTempPathForFilename(const std::string& filename); - std::string GetPathForFilename(const std::string& filename); - - static const std::string kLatestBasename; - static const std::string kStateFilenameSuffix; - static const std::string kTraceFilenameSuffix; - static const std::string kTempFilenamePrefix; - static const std::string kTempFilenameSuffix; - - const std::string expected_state_dir_path_; - SequenceNumber saved_seqno_ = kMaxSequenceNumber; -}; - -// An `AnonExpectedStateManager` implements an `ExpectedStateManager` backed by -// a memory allocation containing data about the expected state of the database. -class AnonExpectedStateManager : public ExpectedStateManager { - public: - explicit AnonExpectedStateManager(size_t max_key, size_t num_column_families); - - // See `ExpectedStateManager::SaveAtAndAfter()` API doc. - // - // This implementation returns `Status::NotSupported` since we do not - // currently have a need to keep history of expected state within a process. - Status SaveAtAndAfter(DB* /* db */) override { - return Status::NotSupported(); - } - - // See `ExpectedStateManager::HasHistory()` API doc. - bool HasHistory() override { return false; } - - // See `ExpectedStateManager::Restore()` API doc. - // - // This implementation returns `Status::NotSupported` since we do not - // currently have a need to keep history of expected state within a process. - Status Restore(DB* /* db */) override { return Status::NotSupported(); } - - // Requires external locking preventing concurrent execution with any other - // member function. - Status Open() override; -}; - -} // namespace ROCKSDB_NAMESPACE - -#endif // GFLAGS diff --git a/db_stress_tool/multi_ops_txns_stress.cc b/db_stress_tool/multi_ops_txns_stress.cc deleted file mode 100644 index b543b0246..000000000 --- a/db_stress_tool/multi_ops_txns_stress.cc +++ /dev/null @@ -1,1753 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifdef GFLAGS -#include "db_stress_tool/multi_ops_txns_stress.h" - -#include "rocksdb/utilities/write_batch_with_index.h" -#include "util/defer.h" -#include "utilities/fault_injection_fs.h" -#include "utilities/transactions/write_prepared_txn_db.h" - -namespace ROCKSDB_NAMESPACE { - -// The description of A and C can be found in multi_ops_txns_stress.h -DEFINE_int32(lb_a, 0, "(Inclusive) lower bound of A"); -DEFINE_int32(ub_a, 1000, "(Exclusive) upper bound of A"); -DEFINE_int32(lb_c, 0, "(Inclusive) lower bound of C"); -DEFINE_int32(ub_c, 1000, "(Exclusive) upper bound of C"); - -DEFINE_string(key_spaces_path, "", - "Path to file describing the lower and upper bounds of A and C"); - -DEFINE_int32(delay_snapshot_read_one_in, 0, - "With a chance of 1/N, inject a random delay between taking " - "snapshot and read."); - -DEFINE_int32(rollback_one_in, 0, - "If non-zero, rollback non-read-only transactions with a " - "probability of 1/N."); - -DEFINE_int32(clear_wp_commit_cache_one_in, 0, - "If non-zero, evict all commit entries from commit cache with a " - "probability of 1/N. This options applies to write-prepared and " - "write-unprepared transactions."); - -extern "C" bool rocksdb_write_prepared_TEST_ShouldClearCommitCache(void) { - static Random rand(static_cast(db_stress_env->NowMicros())); - return FLAGS_clear_wp_commit_cache_one_in > 0 && - rand.OneIn(FLAGS_clear_wp_commit_cache_one_in); -} - -// MultiOpsTxnsStressTest can either operate on a database with pre-populated -// data (possibly from previous ones), or create a new db and preload it with -// data specified via `-lb_a`, `-ub_a`, `-lb_c`, `-ub_c`, etc. Among these, we -// define the test key spaces as two key ranges: [lb_a, ub_a) and [lb_c, ub_c). -// The key spaces specification is persisted in a file whose absolute path can -// be specified via `-key_spaces_path`. -// -// Whether an existing db is used or a new one is created, key_spaces_path will -// be used. In the former case, the test reads the key spaces specification -// from `-key_spaces_path` and decodes [lb_a, ub_a) and [lb_c, ub_c). In the -// latter case, the test writes a key spaces specification to a file at the -// location, and this file will be used by future runs until a new db is -// created. -// -// Create a fresh new database (-destroy_db_initially=1 or there is no database -// in the location specified by -db). See PreloadDb(). -// -// Use an existing, non-empty database. See ScanExistingDb(). -// -// This test is multi-threaded, and thread count can be specified via -// `-threads`. For simplicity, we partition the key ranges and each thread -// operates on a subrange independently. -// Within each subrange, a KeyGenerator object is responsible for key -// generation. A KeyGenerator maintains two sets: set of existing keys within -// [low, high), set of non-existing keys within [low, high). [low, high) is the -// subrange. The test initialization makes sure there is at least one -// non-existing key, otherwise the test will return an error and exit before -// any test thread is spawned. - -void MultiOpsTxnsStressTest::KeyGenerator::FinishInit() { - assert(existing_.empty()); - assert(!existing_uniq_.empty()); - assert(low_ < high_); - for (auto v : existing_uniq_) { - assert(low_ <= v); - assert(high_ > v); - existing_.push_back(v); - } - if (non_existing_uniq_.empty()) { - fprintf( - stderr, - "Cannot allocate key in [%u, %u)\nStart with a new DB or try change " - "the number of threads for testing via -threads=<#threads>\n", - static_cast(low_), static_cast(high_)); - fflush(stdout); - fflush(stderr); - assert(false); - } - initialized_ = true; -} - -std::pair -MultiOpsTxnsStressTest::KeyGenerator::ChooseExisting() { - assert(initialized_); - const size_t N = existing_.size(); - assert(N > 0); - uint32_t rnd = rand_.Uniform(static_cast(N)); - assert(rnd < N); - return std::make_pair(existing_[rnd], rnd); -} - -uint32_t MultiOpsTxnsStressTest::KeyGenerator::Allocate() { - assert(initialized_); - auto it = non_existing_uniq_.begin(); - assert(non_existing_uniq_.end() != it); - uint32_t ret = *it; - // Remove this element from non_existing_. - // Need to call UndoAllocation() if the calling transaction does not commit. - non_existing_uniq_.erase(it); - return ret; -} - -void MultiOpsTxnsStressTest::KeyGenerator::Replace(uint32_t old_val, - uint32_t old_pos, - uint32_t new_val) { - assert(initialized_); - { - auto it = existing_uniq_.find(old_val); - assert(it != existing_uniq_.end()); - existing_uniq_.erase(it); - } - - { - assert(0 == existing_uniq_.count(new_val)); - existing_uniq_.insert(new_val); - existing_[old_pos] = new_val; - } - - { - assert(0 == non_existing_uniq_.count(old_val)); - non_existing_uniq_.insert(old_val); - } -} - -void MultiOpsTxnsStressTest::KeyGenerator::UndoAllocation(uint32_t new_val) { - assert(initialized_); - assert(0 == non_existing_uniq_.count(new_val)); - non_existing_uniq_.insert(new_val); -} - -std::string MultiOpsTxnsStressTest::Record::EncodePrimaryKey(uint32_t a) { - std::string ret; - PutFixed32(&ret, kPrimaryIndexId); - PutFixed32(&ret, a); - - char* const buf = &ret[0]; - std::reverse(buf, buf + sizeof(kPrimaryIndexId)); - std::reverse(buf + sizeof(kPrimaryIndexId), - buf + sizeof(kPrimaryIndexId) + sizeof(a)); - return ret; -} - -std::string MultiOpsTxnsStressTest::Record::EncodeSecondaryKey(uint32_t c) { - std::string ret; - PutFixed32(&ret, kSecondaryIndexId); - PutFixed32(&ret, c); - - char* const buf = &ret[0]; - std::reverse(buf, buf + sizeof(kSecondaryIndexId)); - std::reverse(buf + sizeof(kSecondaryIndexId), - buf + sizeof(kSecondaryIndexId) + sizeof(c)); - return ret; -} - -std::string MultiOpsTxnsStressTest::Record::EncodeSecondaryKey(uint32_t c, - uint32_t a) { - std::string ret; - PutFixed32(&ret, kSecondaryIndexId); - PutFixed32(&ret, c); - PutFixed32(&ret, a); - - char* const buf = &ret[0]; - std::reverse(buf, buf + sizeof(kSecondaryIndexId)); - std::reverse(buf + sizeof(kSecondaryIndexId), - buf + sizeof(kSecondaryIndexId) + sizeof(c)); - std::reverse(buf + sizeof(kSecondaryIndexId) + sizeof(c), - buf + sizeof(kSecondaryIndexId) + sizeof(c) + sizeof(a)); - return ret; -} - -std::tuple -MultiOpsTxnsStressTest::Record::DecodePrimaryIndexValue( - Slice primary_index_value) { - if (primary_index_value.size() != 8) { - return std::tuple{Status::Corruption(""), 0, 0}; - } - uint32_t b = 0; - uint32_t c = 0; - if (!GetFixed32(&primary_index_value, &b) || - !GetFixed32(&primary_index_value, &c)) { - assert(false); - return std::tuple{Status::Corruption(""), 0, 0}; - } - return std::tuple{Status::OK(), b, c}; -} - -std::pair -MultiOpsTxnsStressTest::Record::DecodeSecondaryIndexValue( - Slice secondary_index_value) { - if (secondary_index_value.size() != 4) { - return std::make_pair(Status::Corruption(""), 0); - } - uint32_t crc = 0; - bool result __attribute__((unused)) = - GetFixed32(&secondary_index_value, &crc); - assert(result); - return std::make_pair(Status::OK(), crc); -} - -std::pair -MultiOpsTxnsStressTest::Record::EncodePrimaryIndexEntry() const { - std::string primary_index_key = EncodePrimaryKey(); - std::string primary_index_value = EncodePrimaryIndexValue(); - return std::make_pair(primary_index_key, primary_index_value); -} - -std::string MultiOpsTxnsStressTest::Record::EncodePrimaryKey() const { - return EncodePrimaryKey(a_); -} - -std::string MultiOpsTxnsStressTest::Record::EncodePrimaryIndexValue() const { - std::string ret; - PutFixed32(&ret, b_); - PutFixed32(&ret, c_); - return ret; -} - -std::pair -MultiOpsTxnsStressTest::Record::EncodeSecondaryIndexEntry() const { - std::string secondary_index_key = EncodeSecondaryKey(c_, a_); - - // Secondary index value is always 4-byte crc32 of the secondary key - std::string secondary_index_value; - uint32_t crc = - crc32c::Value(secondary_index_key.data(), secondary_index_key.size()); - PutFixed32(&secondary_index_value, crc); - return std::make_pair(std::move(secondary_index_key), secondary_index_value); -} - -std::string MultiOpsTxnsStressTest::Record::EncodeSecondaryKey() const { - return EncodeSecondaryKey(c_, a_); -} - -Status MultiOpsTxnsStressTest::Record::DecodePrimaryIndexEntry( - Slice primary_index_key, Slice primary_index_value) { - if (primary_index_key.size() != 8) { - assert(false); - return Status::Corruption("Primary index key length is not 8"); - } - - uint32_t index_id = 0; - - [[maybe_unused]] bool res = GetFixed32(&primary_index_key, &index_id); - assert(res); - index_id = EndianSwapValue(index_id); - - if (index_id != kPrimaryIndexId) { - std::ostringstream oss; - oss << "Unexpected primary index id: " << index_id; - return Status::Corruption(oss.str()); - } - - res = GetFixed32(&primary_index_key, &a_); - assert(res); - a_ = EndianSwapValue(a_); - assert(primary_index_key.empty()); - - if (primary_index_value.size() != 8) { - return Status::Corruption("Primary index value length is not 8"); - } - GetFixed32(&primary_index_value, &b_); - GetFixed32(&primary_index_value, &c_); - return Status::OK(); -} - -Status MultiOpsTxnsStressTest::Record::DecodeSecondaryIndexEntry( - Slice secondary_index_key, Slice secondary_index_value) { - if (secondary_index_key.size() != 12) { - return Status::Corruption("Secondary index key length is not 12"); - } - uint32_t crc = - crc32c::Value(secondary_index_key.data(), secondary_index_key.size()); - - uint32_t index_id = 0; - - [[maybe_unused]] bool res = GetFixed32(&secondary_index_key, &index_id); - assert(res); - index_id = EndianSwapValue(index_id); - - if (index_id != kSecondaryIndexId) { - std::ostringstream oss; - oss << "Unexpected secondary index id: " << index_id; - return Status::Corruption(oss.str()); - } - - assert(secondary_index_key.size() == 8); - res = GetFixed32(&secondary_index_key, &c_); - assert(res); - c_ = EndianSwapValue(c_); - - assert(secondary_index_key.size() == 4); - res = GetFixed32(&secondary_index_key, &a_); - assert(res); - a_ = EndianSwapValue(a_); - assert(secondary_index_key.empty()); - - if (secondary_index_value.size() != 4) { - return Status::Corruption("Secondary index value length is not 4"); - } - uint32_t val = 0; - GetFixed32(&secondary_index_value, &val); - if (val != crc) { - std::ostringstream oss; - oss << "Secondary index key checksum mismatch, stored: " << val - << ", recomputed: " << crc; - return Status::Corruption(oss.str()); - } - return Status::OK(); -} - -void MultiOpsTxnsStressTest::FinishInitDb(SharedState* shared) { - if (FLAGS_enable_compaction_filter) { - // TODO (yanqin) enable compaction filter - } - ProcessRecoveredPreparedTxns(shared); - - ReopenAndPreloadDbIfNeeded(shared); - // TODO (yanqin) parallelize if key space is large - for (auto& key_gen : key_gen_for_a_) { - assert(key_gen); - key_gen->FinishInit(); - } - // TODO (yanqin) parallelize if key space is large - for (auto& key_gen : key_gen_for_c_) { - assert(key_gen); - key_gen->FinishInit(); - } -} - -void MultiOpsTxnsStressTest::ReopenAndPreloadDbIfNeeded(SharedState* shared) { - (void)shared; - bool db_empty = false; - { - std::unique_ptr iter(db_->NewIterator(ReadOptions())); - iter->SeekToFirst(); - if (!iter->Valid()) { - db_empty = true; - } - } - - if (db_empty) { - PreloadDb(shared, FLAGS_threads, FLAGS_lb_a, FLAGS_ub_a, FLAGS_lb_c, - FLAGS_ub_c); - } else { - fprintf(stdout, - "Key ranges will be read from %s.\n-lb_a, -ub_a, -lb_c, -ub_c will " - "be ignored\n", - FLAGS_key_spaces_path.c_str()); - fflush(stdout); - ScanExistingDb(shared, FLAGS_threads); - } -} - -// Used for point-lookup transaction -Status MultiOpsTxnsStressTest::TestGet( - ThreadState* thread, const ReadOptions& read_opts, - const std::vector& /*rand_column_families*/, - const std::vector& /*rand_keys*/) { - uint32_t a = 0; - uint32_t pos = 0; - std::tie(a, pos) = ChooseExistingA(thread); - return PointLookupTxn(thread, read_opts, a); -} - -// Not used. -std::vector MultiOpsTxnsStressTest::TestMultiGet( - ThreadState* /*thread*/, const ReadOptions& /*read_opts*/, - const std::vector& /*rand_column_families*/, - const std::vector& /*rand_keys*/) { - return std::vector{Status::NotSupported()}; -} - -// Wide columns are currently not supported by transactions. -void MultiOpsTxnsStressTest::TestGetEntity( - ThreadState* /* thread */, const ReadOptions& /* read_opts */, - const std::vector& /* rand_column_families */, - const std::vector& /* rand_keys */) {} - -Status MultiOpsTxnsStressTest::TestPrefixScan( - ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) { - (void)thread; - (void)read_opts; - (void)rand_column_families; - (void)rand_keys; - return Status::OK(); -} - -// Given a key K, this creates an iterator which scans to K and then -// does a random sequence of Next/Prev operations. -Status MultiOpsTxnsStressTest::TestIterate( - ThreadState* thread, const ReadOptions& read_opts, - const std::vector& /*rand_column_families*/, - const std::vector& /*rand_keys*/) { - uint32_t c = 0; - uint32_t pos = 0; - std::tie(c, pos) = ChooseExistingC(thread); - return RangeScanTxn(thread, read_opts, c); -} - -// Not intended for use. -Status MultiOpsTxnsStressTest::TestPut(ThreadState* /*thread*/, - WriteOptions& /*write_opts*/, - const ReadOptions& /*read_opts*/, - const std::vector& /*cf_ids*/, - const std::vector& /*keys*/, - char (&value)[100]) { - (void)value; - return Status::NotSupported(); -} - -// Not intended for use. -Status MultiOpsTxnsStressTest::TestDelete( - ThreadState* /*thread*/, WriteOptions& /*write_opts*/, - const std::vector& /*rand_column_families*/, - const std::vector& /*rand_keys*/) { - return Status::NotSupported(); -} - -// Not intended for use. -Status MultiOpsTxnsStressTest::TestDeleteRange( - ThreadState* /*thread*/, WriteOptions& /*write_opts*/, - const std::vector& /*rand_column_families*/, - const std::vector& /*rand_keys*/) { - return Status::NotSupported(); -} - -void MultiOpsTxnsStressTest::TestIngestExternalFile( - ThreadState* thread, const std::vector& rand_column_families, - const std::vector& /*rand_keys*/) { - // TODO (yanqin) - (void)thread; - (void)rand_column_families; -} - -void MultiOpsTxnsStressTest::TestCompactRange( - ThreadState* thread, int64_t /*rand_key*/, const Slice& /*start_key*/, - ColumnFamilyHandle* column_family) { - // TODO (yanqin). - // May use GetRangeHash() for validation before and after DB::CompactRange() - // completes. - (void)thread; - (void)column_family; -} - -Status MultiOpsTxnsStressTest::TestBackupRestore( - ThreadState* thread, const std::vector& rand_column_families, - const std::vector& /*rand_keys*/) { - // TODO (yanqin) - (void)thread; - (void)rand_column_families; - return Status::OK(); -} - -Status MultiOpsTxnsStressTest::TestCheckpoint( - ThreadState* thread, const std::vector& rand_column_families, - const std::vector& /*rand_keys*/) { - // TODO (yanqin) - (void)thread; - (void)rand_column_families; - return Status::OK(); -} - -Status MultiOpsTxnsStressTest::TestApproximateSize( - ThreadState* thread, uint64_t iteration, - const std::vector& rand_column_families, - const std::vector& /*rand_keys*/) { - // TODO (yanqin) - (void)thread; - (void)iteration; - (void)rand_column_families; - return Status::OK(); -} - -Status MultiOpsTxnsStressTest::TestCustomOperations( - ThreadState* thread, const std::vector& rand_column_families) { - (void)rand_column_families; - // Randomly choose from 0, 1, and 2. - // TODO (yanqin) allow user to configure probability of each operation. - uint32_t rand = thread->rand.Uniform(3); - Status s; - if (0 == rand) { - // Update primary key. - uint32_t old_a = 0; - uint32_t pos = 0; - std::tie(old_a, pos) = ChooseExistingA(thread); - uint32_t new_a = GenerateNextA(thread); - s = PrimaryKeyUpdateTxn(thread, old_a, pos, new_a); - } else if (1 == rand) { - // Update secondary key. - uint32_t old_c = 0; - uint32_t pos = 0; - std::tie(old_c, pos) = ChooseExistingC(thread); - uint32_t new_c = GenerateNextC(thread); - s = SecondaryKeyUpdateTxn(thread, old_c, pos, new_c); - } else if (2 == rand) { - // Update primary index value. - uint32_t a = 0; - uint32_t pos = 0; - std::tie(a, pos) = ChooseExistingA(thread); - s = UpdatePrimaryIndexValueTxn(thread, a, /*b_delta=*/1); - } else { - // Should never reach here. - assert(false); - } - - return s; -} - -void MultiOpsTxnsStressTest::RegisterAdditionalListeners() { - options_.listeners.emplace_back(new MultiOpsTxnsStressListener(this)); -} - -void MultiOpsTxnsStressTest::PrepareTxnDbOptions( - SharedState* /*shared*/, TransactionDBOptions& txn_db_opts) { - // MultiOpsTxnStressTest uses SingleDelete to delete secondary keys, thus we - // register this callback to let TxnDb know that when rolling back - // a transaction, use only SingleDelete to cancel prior Put from the same - // transaction if applicable. - txn_db_opts.rollback_deletion_type_callback = - [](TransactionDB* /*db*/, ColumnFamilyHandle* /*column_family*/, - const Slice& key) { - Slice ks = key; - uint32_t index_id = 0; - [[maybe_unused]] bool res = GetFixed32(&ks, &index_id); - assert(res); - index_id = EndianSwapValue(index_id); - assert(index_id <= Record::kSecondaryIndexId); - return index_id == Record::kSecondaryIndexId; - }; -} - -Status MultiOpsTxnsStressTest::PrimaryKeyUpdateTxn(ThreadState* thread, - uint32_t old_a, - uint32_t old_a_pos, - uint32_t new_a) { - std::string old_pk = Record::EncodePrimaryKey(old_a); - std::string new_pk = Record::EncodePrimaryKey(new_a); - Transaction* txn = nullptr; - WriteOptions wopts; - Status s = NewTxn(wopts, &txn); - if (!s.ok()) { - assert(!txn); - thread->stats.AddErrors(1); - return s; - } - - assert(txn); - txn->SetSnapshotOnNextOperation(/*notifier=*/nullptr); - - const Defer cleanup([new_a, &s, thread, txn, this]() { - if (s.ok()) { - // Two gets, one for existing pk, one for locking potential new pk. - thread->stats.AddGets(/*ngets=*/2, /*nfounds=*/1); - thread->stats.AddDeletes(1); - thread->stats.AddBytesForWrites( - /*nwrites=*/2, - Record::kPrimaryIndexEntrySize + Record::kSecondaryIndexEntrySize); - thread->stats.AddSingleDeletes(1); - return; - } - if (s.IsNotFound()) { - thread->stats.AddGets(/*ngets=*/1, /*nfounds=*/0); - } else if (s.IsBusy() || s.IsIncomplete()) { - // ignore. - // Incomplete also means rollback by application. See the transaction - // implementations. - } else { - thread->stats.AddErrors(1); - } - auto& key_gen = key_gen_for_a_[thread->tid]; - key_gen->UndoAllocation(new_a); - RollbackTxn(txn).PermitUncheckedError(); - }); - - ReadOptions ropts; - ropts.rate_limiter_priority = - FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL; - std::string value; - s = txn->GetForUpdate(ropts, old_pk, &value); - if (!s.ok()) { - return s; - } - std::string empty_value; - s = txn->GetForUpdate(ropts, new_pk, &empty_value); - if (s.ok()) { - assert(!empty_value.empty()); - s = Status::Busy(); - return s; - } else if (!s.IsNotFound()) { - return s; - } - - auto result = Record::DecodePrimaryIndexValue(value); - s = std::get<0>(result); - if (!s.ok()) { - return s; - } - uint32_t b = std::get<1>(result); - uint32_t c = std::get<2>(result); - - ColumnFamilyHandle* cf = db_->DefaultColumnFamily(); - s = txn->Delete(cf, old_pk, /*assume_tracked=*/true); - if (!s.ok()) { - return s; - } - s = txn->Put(cf, new_pk, value, /*assume_tracked=*/true); - if (!s.ok()) { - return s; - } - - auto* wb = txn->GetWriteBatch(); - assert(wb); - - std::string old_sk = Record::EncodeSecondaryKey(c, old_a); - s = wb->SingleDelete(old_sk); - if (!s.ok()) { - return s; - } - - Record record(new_a, b, c); - std::string new_sk; - std::string new_crc; - std::tie(new_sk, new_crc) = record.EncodeSecondaryIndexEntry(); - s = wb->Put(new_sk, new_crc); - if (!s.ok()) { - return s; - } - - s = txn->Prepare(); - - if (!s.ok()) { - return s; - } - - if (FLAGS_rollback_one_in > 0 && thread->rand.OneIn(FLAGS_rollback_one_in)) { - s = Status::Incomplete(); - return s; - } - - s = WriteToCommitTimeWriteBatch(*txn); - if (!s.ok()) { - return s; - } - - s = CommitAndCreateTimestampedSnapshotIfNeeded(thread, *txn); - - auto& key_gen = key_gen_for_a_.at(thread->tid); - if (s.ok()) { - delete txn; - key_gen->Replace(old_a, old_a_pos, new_a); - } - return s; -} - -Status MultiOpsTxnsStressTest::SecondaryKeyUpdateTxn(ThreadState* thread, - uint32_t old_c, - uint32_t old_c_pos, - uint32_t new_c) { - Transaction* txn = nullptr; - WriteOptions wopts; - Status s = NewTxn(wopts, &txn); - if (!s.ok()) { - assert(!txn); - thread->stats.AddErrors(1); - return s; - } - - assert(txn); - - Iterator* it = nullptr; - long iterations = 0; - const Defer cleanup([new_c, &s, thread, &it, txn, this, &iterations]() { - delete it; - if (s.ok()) { - thread->stats.AddIterations(iterations); - thread->stats.AddGets(/*ngets=*/1, /*nfounds=*/1); - thread->stats.AddSingleDeletes(1); - thread->stats.AddBytesForWrites( - /*nwrites=*/2, - Record::kPrimaryIndexEntrySize + Record::kSecondaryIndexEntrySize); - return; - } else if (s.IsBusy() || s.IsTimedOut() || s.IsTryAgain() || - s.IsMergeInProgress() || s.IsIncomplete()) { - // ww-conflict detected, or - // lock cannot be acquired, or - // memtable history is not large enough for conflict checking, or - // Merge operation cannot be resolved, or - // application rollback. - // TODO (yanqin) add stats for other cases? - } else if (s.IsNotFound()) { - // ignore. - } else { - thread->stats.AddErrors(1); - } - auto& key_gen = key_gen_for_c_[thread->tid]; - key_gen->UndoAllocation(new_c); - RollbackTxn(txn).PermitUncheckedError(); - }); - - // TODO (yanqin) try SetSnapshotOnNextOperation(). We currently need to take - // a snapshot here because we will later verify that point lookup in the - // primary index using GetForUpdate() returns the same value for 'c' as the - // iterator. The iterator does not need a snapshot though, because it will be - // assigned the current latest (published) sequence in the db, which will be - // no smaller than the snapshot created here. The GetForUpdate will perform - // ww conflict checking to ensure GetForUpdate() (using the snapshot) sees - // the same data as this iterator. - txn->SetSnapshot(); - std::string old_sk_prefix = Record::EncodeSecondaryKey(old_c); - std::string iter_ub_str = Record::EncodeSecondaryKey(old_c + 1); - Slice iter_ub = iter_ub_str; - ReadOptions ropts; - ropts.snapshot = txn->GetSnapshot(); - ropts.total_order_seek = true; - ropts.iterate_upper_bound = &iter_ub; - ropts.rate_limiter_priority = - FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL; - it = txn->GetIterator(ropts); - - assert(it); - it->Seek(old_sk_prefix); - if (!it->Valid()) { - s = Status::NotFound(); - return s; - } - auto* wb = txn->GetWriteBatch(); - assert(wb); - - do { - ++iterations; - Record record; - s = record.DecodeSecondaryIndexEntry(it->key(), it->value()); - if (!s.ok()) { - fprintf(stderr, "Cannot decode secondary key (%s => %s): %s\n", - it->key().ToString(true).c_str(), - it->value().ToString(true).c_str(), s.ToString().c_str()); - assert(false); - break; - } - // At this point, record.b is not known yet, thus we need to access - // primary index. - std::string pk = Record::EncodePrimaryKey(record.a_value()); - std::string value; - ReadOptions read_opts; - read_opts.rate_limiter_priority = - FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL; - read_opts.snapshot = txn->GetSnapshot(); - s = txn->GetForUpdate(read_opts, pk, &value); - if (s.IsBusy() || s.IsTimedOut() || s.IsTryAgain() || - s.IsMergeInProgress()) { - // Write conflict, or cannot acquire lock, or memtable size is not large - // enough, or merge cannot be resolved. - break; - } else if (s.IsNotFound()) { - // We can also fail verification here. - std::ostringstream oss; - auto* dbimpl = static_cast_with_check(db_->GetRootDB()); - assert(dbimpl); - oss << "snap " << read_opts.snapshot->GetSequenceNumber() - << " (published " << dbimpl->GetLastPublishedSequence() - << "), pk should exist: " << Slice(pk).ToString(true); - fprintf(stderr, "%s\n", oss.str().c_str()); - assert(false); - break; - } - if (!s.ok()) { - std::ostringstream oss; - auto* dbimpl = static_cast_with_check(db_->GetRootDB()); - assert(dbimpl); - oss << "snap " << read_opts.snapshot->GetSequenceNumber() - << " (published " << dbimpl->GetLastPublishedSequence() << "), " - << s.ToString(); - fprintf(stderr, "%s\n", oss.str().c_str()); - assert(false); - break; - } - auto result = Record::DecodePrimaryIndexValue(value); - s = std::get<0>(result); - if (!s.ok()) { - fprintf(stderr, "Cannot decode primary index value %s: %s\n", - Slice(value).ToString(true).c_str(), s.ToString().c_str()); - assert(false); - break; - } - uint32_t b = std::get<1>(result); - uint32_t c = std::get<2>(result); - if (c != old_c) { - std::ostringstream oss; - auto* dbimpl = static_cast_with_check(db_->GetRootDB()); - assert(dbimpl); - oss << "snap " << read_opts.snapshot->GetSequenceNumber() - << " (published " << dbimpl->GetLastPublishedSequence() - << "), pk/sk mismatch. pk: (a=" << record.a_value() << ", " - << "c=" << c << "), sk: (c=" << old_c << ")"; - s = Status::Corruption(); - fprintf(stderr, "%s\n", oss.str().c_str()); - assert(false); - break; - } - Record new_rec(record.a_value(), b, new_c); - std::string new_primary_index_value = new_rec.EncodePrimaryIndexValue(); - ColumnFamilyHandle* cf = db_->DefaultColumnFamily(); - s = txn->Put(cf, pk, new_primary_index_value, /*assume_tracked=*/true); - if (!s.ok()) { - break; - } - std::string old_sk = it->key().ToString(/*hex=*/false); - std::string new_sk; - std::string new_crc; - std::tie(new_sk, new_crc) = new_rec.EncodeSecondaryIndexEntry(); - s = wb->SingleDelete(old_sk); - if (!s.ok()) { - break; - } - s = wb->Put(new_sk, new_crc); - if (!s.ok()) { - break; - } - - it->Next(); - } while (it->Valid()); - - if (!s.ok()) { - return s; - } - - s = txn->Prepare(); - - if (!s.ok()) { - return s; - } - - if (FLAGS_rollback_one_in > 0 && thread->rand.OneIn(FLAGS_rollback_one_in)) { - s = Status::Incomplete(); - return s; - } - - s = WriteToCommitTimeWriteBatch(*txn); - if (!s.ok()) { - return s; - } - - s = CommitAndCreateTimestampedSnapshotIfNeeded(thread, *txn); - - if (s.ok()) { - delete txn; - auto& key_gen = key_gen_for_c_.at(thread->tid); - key_gen->Replace(old_c, old_c_pos, new_c); - } - - return s; -} - -Status MultiOpsTxnsStressTest::UpdatePrimaryIndexValueTxn(ThreadState* thread, - uint32_t a, - uint32_t b_delta) { - std::string pk_str = Record::EncodePrimaryKey(a); - Transaction* txn = nullptr; - WriteOptions wopts; - Status s = NewTxn(wopts, &txn); - if (!s.ok()) { - assert(!txn); - thread->stats.AddErrors(1); - return s; - } - - assert(txn); - - const Defer cleanup([&s, thread, txn, this]() { - if (s.ok()) { - thread->stats.AddGets(/*ngets=*/1, /*nfounds=*/1); - thread->stats.AddBytesForWrites( - /*nwrites=*/1, /*nbytes=*/Record::kPrimaryIndexEntrySize); - return; - } - if (s.IsNotFound()) { - thread->stats.AddGets(/*ngets=*/1, /*nfounds=*/0); - } else if (s.IsInvalidArgument()) { - // ignored. - } else if (s.IsBusy() || s.IsTimedOut() || s.IsTryAgain() || - s.IsMergeInProgress() || s.IsIncomplete()) { - // ignored. - } else { - thread->stats.AddErrors(1); - } - RollbackTxn(txn).PermitUncheckedError(); - }); - ReadOptions ropts; - ropts.rate_limiter_priority = - FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL; - std::string value; - s = txn->GetForUpdate(ropts, pk_str, &value); - if (!s.ok()) { - return s; - } - auto result = Record::DecodePrimaryIndexValue(value); - if (!std::get<0>(result).ok()) { - s = std::get<0>(result); - fprintf(stderr, "Cannot decode primary index value %s: %s\n", - Slice(value).ToString(true).c_str(), s.ToString().c_str()); - assert(false); - return s; - } - uint32_t b = std::get<1>(result) + b_delta; - uint32_t c = std::get<2>(result); - Record record(a, b, c); - std::string primary_index_value = record.EncodePrimaryIndexValue(); - ColumnFamilyHandle* cf = db_->DefaultColumnFamily(); - s = txn->Put(cf, pk_str, primary_index_value, /*assume_tracked=*/true); - if (!s.ok()) { - return s; - } - s = txn->Prepare(); - if (!s.ok()) { - return s; - } - - if (FLAGS_rollback_one_in > 0 && thread->rand.OneIn(FLAGS_rollback_one_in)) { - s = Status::Incomplete(); - return s; - } - - s = WriteToCommitTimeWriteBatch(*txn); - if (!s.ok()) { - return s; - } - - s = CommitAndCreateTimestampedSnapshotIfNeeded(thread, *txn); - - if (s.ok()) { - delete txn; - } - return s; -} - -Status MultiOpsTxnsStressTest::PointLookupTxn(ThreadState* thread, - ReadOptions ropts, uint32_t a) { - std::string pk_str = Record::EncodePrimaryKey(a); - // pk may or may not exist - PinnableSlice value; - - Transaction* txn = nullptr; - WriteOptions wopts; - Status s = NewTxn(wopts, &txn); - if (!s.ok()) { - assert(!txn); - thread->stats.AddErrors(1); - return s; - } - - assert(txn); - - const Defer cleanup([&s, thread, txn, this]() { - if (s.ok()) { - thread->stats.AddGets(/*ngets=*/1, /*nfounds=*/1); - return; - } else if (s.IsNotFound()) { - thread->stats.AddGets(/*ngets=*/1, /*nfounds=*/0); - } else { - thread->stats.AddErrors(1); - } - RollbackTxn(txn).PermitUncheckedError(); - }); - - std::shared_ptr snapshot; - SetupSnapshot(thread, ropts, *txn, snapshot); - - if (FLAGS_delay_snapshot_read_one_in > 0 && - thread->rand.OneIn(FLAGS_delay_snapshot_read_one_in)) { - uint64_t delay_ms = thread->rand.Uniform(100) + 1; - db_->GetDBOptions().env->SleepForMicroseconds( - static_cast(delay_ms * 1000)); - } - - s = txn->Get(ropts, db_->DefaultColumnFamily(), pk_str, &value); - if (s.ok()) { - s = txn->Commit(); - } - if (s.ok()) { - delete txn; - } - return s; -} - -Status MultiOpsTxnsStressTest::RangeScanTxn(ThreadState* thread, - ReadOptions ropts, uint32_t c) { - std::string sk = Record::EncodeSecondaryKey(c); - - Transaction* txn = nullptr; - WriteOptions wopts; - Status s = NewTxn(wopts, &txn); - if (!s.ok()) { - assert(!txn); - thread->stats.AddErrors(1); - return s; - } - - assert(txn); - - const Defer cleanup([&s, thread, txn, this]() { - if (s.ok()) { - thread->stats.AddIterations(1); - return; - } - thread->stats.AddErrors(1); - RollbackTxn(txn).PermitUncheckedError(); - }); - - std::shared_ptr snapshot; - SetupSnapshot(thread, ropts, *txn, snapshot); - - if (FLAGS_delay_snapshot_read_one_in > 0 && - thread->rand.OneIn(FLAGS_delay_snapshot_read_one_in)) { - uint64_t delay_ms = thread->rand.Uniform(100) + 1; - db_->GetDBOptions().env->SleepForMicroseconds( - static_cast(delay_ms * 1000)); - } - - std::unique_ptr iter(txn->GetIterator(ropts)); - - constexpr size_t total_nexts = 10; - size_t nexts = 0; - for (iter->Seek(sk); - iter->Valid() && nexts < total_nexts && iter->status().ok(); - iter->Next(), ++nexts) { - } - - if (iter->status().ok()) { - s = txn->Commit(); - } else { - s = iter->status(); - } - - if (s.ok()) { - delete txn; - } - - return s; -} - -void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const { - if (thread->shared->HasVerificationFailedYet()) { - return; - } - const Snapshot* const snapshot = db_->GetSnapshot(); - assert(snapshot); - ManagedSnapshot snapshot_guard(db_, snapshot); - - std::ostringstream oss; - oss << "[snap=" << snapshot->GetSequenceNumber() << ","; - - auto* dbimpl = static_cast_with_check(db_->GetRootDB()); - assert(dbimpl); - - oss << " last_published=" << dbimpl->GetLastPublishedSequence() << "] "; - - if (FLAGS_delay_snapshot_read_one_in > 0 && - thread->rand.OneIn(FLAGS_delay_snapshot_read_one_in)) { - uint64_t delay_ms = thread->rand.Uniform(100) + 1; - db_->GetDBOptions().env->SleepForMicroseconds( - static_cast(delay_ms * 1000)); - } - - // TODO (yanqin) with a probability, we can use either forward or backward - // iterator in subsequent checks. We can also use more advanced features in - // range scan. For now, let's just use simple forward iteration with - // total_order_seek = true. - - // First, iterate primary index. - size_t primary_index_entries_count = 0; - { - std::string iter_ub_str; - PutFixed32(&iter_ub_str, Record::kPrimaryIndexId + 1); - std::reverse(iter_ub_str.begin(), iter_ub_str.end()); - Slice iter_ub = iter_ub_str; - - std::string start_key; - PutFixed32(&start_key, Record::kPrimaryIndexId); - std::reverse(start_key.begin(), start_key.end()); - - // This `ReadOptions` is for validation purposes. Ignore - // `FLAGS_rate_limit_user_ops` to avoid slowing any validation. - ReadOptions ropts; - ropts.snapshot = snapshot; - ropts.total_order_seek = true; - ropts.iterate_upper_bound = &iter_ub; - - std::unique_ptr it(db_->NewIterator(ropts)); - for (it->Seek(start_key); it->Valid(); it->Next()) { - Record record; - Status s = record.DecodePrimaryIndexEntry(it->key(), it->value()); - if (!s.ok()) { - oss << "Cannot decode primary index entry " << it->key().ToString(true) - << "=>" << it->value().ToString(true); - VerificationAbort(thread->shared, oss.str(), s); - assert(false); - return; - } - ++primary_index_entries_count; - - // Search secondary index. - uint32_t a = record.a_value(); - uint32_t c = record.c_value(); - char sk_buf[12]; - EncodeFixed32(sk_buf, Record::kSecondaryIndexId); - std::reverse(sk_buf, sk_buf + sizeof(uint32_t)); - EncodeFixed32(sk_buf + sizeof(uint32_t), c); - std::reverse(sk_buf + sizeof(uint32_t), sk_buf + 2 * sizeof(uint32_t)); - EncodeFixed32(sk_buf + 2 * sizeof(uint32_t), a); - std::reverse(sk_buf + 2 * sizeof(uint32_t), sk_buf + sizeof(sk_buf)); - Slice sk(sk_buf, sizeof(sk_buf)); - std::string value; - s = db_->Get(ropts, sk, &value); - if (!s.ok()) { - oss << "Cannot find secondary index entry " << sk.ToString(true); - VerificationAbort(thread->shared, oss.str(), s); - assert(false); - return; - } - } - } - - // Second, iterate secondary index. - size_t secondary_index_entries_count = 0; - { - std::string start_key; - PutFixed32(&start_key, Record::kSecondaryIndexId); - std::reverse(start_key.begin(), start_key.end()); - - // This `ReadOptions` is for validation purposes. Ignore - // `FLAGS_rate_limit_user_ops` to avoid slowing any validation. - ReadOptions ropts; - ropts.snapshot = snapshot; - ropts.total_order_seek = true; - - std::unique_ptr it(db_->NewIterator(ropts)); - for (it->Seek(start_key); it->Valid(); it->Next()) { - ++secondary_index_entries_count; - Record record; - Status s = record.DecodeSecondaryIndexEntry(it->key(), it->value()); - if (!s.ok()) { - oss << "Cannot decode secondary index entry " - << it->key().ToString(true) << "=>" << it->value().ToString(true); - VerificationAbort(thread->shared, oss.str(), s); - assert(false); - return; - } - // After decoding secondary index entry, we know a and c. Crc is verified - // in decoding phase. - // - // Form a primary key and search in the primary index. - std::string pk = Record::EncodePrimaryKey(record.a_value()); - std::string value; - s = db_->Get(ropts, pk, &value); - if (!s.ok()) { - oss << "Error searching pk " << Slice(pk).ToString(true) << ". " - << s.ToString() << ". sk " << it->key().ToString(true); - VerificationAbort(thread->shared, oss.str(), s); - assert(false); - return; - } - auto result = Record::DecodePrimaryIndexValue(value); - s = std::get<0>(result); - if (!s.ok()) { - oss << "Error decoding primary index value " - << Slice(value).ToString(true) << ". " << s.ToString(); - VerificationAbort(thread->shared, oss.str(), s); - assert(false); - return; - } - uint32_t c_in_primary = std::get<2>(result); - if (c_in_primary != record.c_value()) { - oss << "Pk/sk mismatch. pk: " << Slice(pk).ToString(true) << "=>" - << Slice(value).ToString(true) << " (a=" << record.a_value() - << ", c=" << c_in_primary << "), sk: " << it->key().ToString(true) - << " (c=" << record.c_value() << ")"; - VerificationAbort(thread->shared, oss.str(), s); - assert(false); - return; - } - } - } - - if (secondary_index_entries_count != primary_index_entries_count) { - oss << "Pk/sk mismatch: primary index has " << primary_index_entries_count - << " entries. Secondary index has " << secondary_index_entries_count - << " entries."; - VerificationAbort(thread->shared, oss.str(), Status::OK()); - assert(false); - return; - } -} - -// VerifyPkSkFast() can be called by MultiOpsTxnsStressListener's callbacks -// which can be called before TransactionDB::Open() returns to caller. -// Therefore, at that time, db_ and txn_db_ may still be nullptr. -// Caller has to make sure that the race condition does not happen. -void MultiOpsTxnsStressTest::VerifyPkSkFast(int job_id) { - DB* const db = db_aptr_.load(std::memory_order_acquire); - if (db == nullptr) { - return; - } - - assert(db_ == db); - assert(db_ != nullptr); - - const Snapshot* const snapshot = db_->GetSnapshot(); - assert(snapshot); - ManagedSnapshot snapshot_guard(db_, snapshot); - - std::ostringstream oss; - auto* dbimpl = static_cast_with_check(db_->GetRootDB()); - assert(dbimpl); - - oss << "Job " << job_id << ": [" << snapshot->GetSequenceNumber() << "," - << dbimpl->GetLastPublishedSequence() << "] "; - - std::string start_key; - PutFixed32(&start_key, Record::kSecondaryIndexId); - std::reverse(start_key.begin(), start_key.end()); - - // This `ReadOptions` is for validation purposes. Ignore - // `FLAGS_rate_limit_user_ops` to avoid slowing any validation. - ReadOptions ropts; - ropts.snapshot = snapshot; - ropts.total_order_seek = true; - - std::unique_ptr it(db_->NewIterator(ropts)); - for (it->Seek(start_key); it->Valid(); it->Next()) { - Record record; - Status s = record.DecodeSecondaryIndexEntry(it->key(), it->value()); - if (!s.ok()) { - oss << "Cannot decode secondary index entry " << it->key().ToString(true) - << "=>" << it->value().ToString(true); - fprintf(stderr, "%s\n", oss.str().c_str()); - fflush(stderr); - assert(false); - } - // After decoding secondary index entry, we know a and c. Crc is verified - // in decoding phase. - // - // Form a primary key and search in the primary index. - std::string pk = Record::EncodePrimaryKey(record.a_value()); - std::string value; - s = db_->Get(ropts, pk, &value); - if (!s.ok()) { - oss << "Error searching pk " << Slice(pk).ToString(true) << ". " - << s.ToString() << ". sk " << it->key().ToString(true); - fprintf(stderr, "%s\n", oss.str().c_str()); - fflush(stderr); - assert(false); - } - auto result = Record::DecodePrimaryIndexValue(value); - s = std::get<0>(result); - if (!s.ok()) { - oss << "Error decoding primary index value " - << Slice(value).ToString(true) << ". " << s.ToString(); - fprintf(stderr, "%s\n", oss.str().c_str()); - fflush(stderr); - assert(false); - } - uint32_t c_in_primary = std::get<2>(result); - if (c_in_primary != record.c_value()) { - oss << "Pk/sk mismatch. pk: " << Slice(pk).ToString(true) << "=>" - << Slice(value).ToString(true) << " (a=" << record.a_value() - << ", c=" << c_in_primary << "), sk: " << it->key().ToString(true) - << " (c=" << record.c_value() << ")"; - fprintf(stderr, "%s\n", oss.str().c_str()); - fflush(stderr); - assert(false); - } - } -} - -std::pair MultiOpsTxnsStressTest::ChooseExistingA( - ThreadState* thread) { - uint32_t tid = thread->tid; - auto& key_gen = key_gen_for_a_.at(tid); - return key_gen->ChooseExisting(); -} - -uint32_t MultiOpsTxnsStressTest::GenerateNextA(ThreadState* thread) { - uint32_t tid = thread->tid; - auto& key_gen = key_gen_for_a_.at(tid); - return key_gen->Allocate(); -} - -std::pair MultiOpsTxnsStressTest::ChooseExistingC( - ThreadState* thread) { - uint32_t tid = thread->tid; - auto& key_gen = key_gen_for_c_.at(tid); - return key_gen->ChooseExisting(); -} - -uint32_t MultiOpsTxnsStressTest::GenerateNextC(ThreadState* thread) { - uint32_t tid = thread->tid; - auto& key_gen = key_gen_for_c_.at(tid); - return key_gen->Allocate(); -} - -void MultiOpsTxnsStressTest::ProcessRecoveredPreparedTxnsHelper( - Transaction* txn, SharedState*) { - thread_local Random rand(static_cast(FLAGS_seed)); - if (rand.OneIn(2)) { - Status s = txn->Commit(); - assert(s.ok()); - } else { - Status s = txn->Rollback(); - assert(s.ok()); - } -} - -Status MultiOpsTxnsStressTest::WriteToCommitTimeWriteBatch(Transaction& txn) { - WriteBatch* ctwb = txn.GetCommitTimeWriteBatch(); - assert(ctwb); - // Do not change the content in key_buf. - static constexpr char key_buf[sizeof(Record::kMetadataPrefix) + 4] = { - '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\xff'}; - - uint64_t counter_val = counter_.Next(); - char val_buf[sizeof(counter_val)]; - EncodeFixed64(val_buf, counter_val); - return ctwb->Put(Slice(key_buf, sizeof(key_buf)), - Slice(val_buf, sizeof(val_buf))); -} - -Status MultiOpsTxnsStressTest::CommitAndCreateTimestampedSnapshotIfNeeded( - ThreadState* thread, Transaction& txn) { - Status s; - if (FLAGS_create_timestamped_snapshot_one_in > 0 && - thread->rand.OneInOpt(FLAGS_create_timestamped_snapshot_one_in)) { - uint64_t ts = db_stress_env->NowNanos(); - std::shared_ptr snapshot; - s = txn.CommitAndTryCreateSnapshot(/*notifier=*/nullptr, ts, &snapshot); - } else { - s = txn.Commit(); - } - assert(txn_db_); - if (FLAGS_create_timestamped_snapshot_one_in > 0 && - thread->rand.OneInOpt(50000)) { - uint64_t now = db_stress_env->NowNanos(); - constexpr uint64_t time_diff = static_cast(1000) * 1000 * 1000; - txn_db_->ReleaseTimestampedSnapshotsOlderThan(now - time_diff); - } - return s; -} - -void MultiOpsTxnsStressTest::SetupSnapshot( - ThreadState* thread, ReadOptions& read_opts, Transaction& txn, - std::shared_ptr& snapshot) { - if (thread->rand.OneInOpt(2)) { - snapshot = txn_db_->GetLatestTimestampedSnapshot(); - } - - if (snapshot) { - read_opts.snapshot = snapshot.get(); - } else { - txn.SetSnapshot(); - read_opts.snapshot = txn.GetSnapshot(); - } -} - -std::string MultiOpsTxnsStressTest::KeySpaces::EncodeTo() const { - std::string result; - PutFixed32(&result, lb_a); - PutFixed32(&result, ub_a); - PutFixed32(&result, lb_c); - PutFixed32(&result, ub_c); - return result; -} - -bool MultiOpsTxnsStressTest::KeySpaces::DecodeFrom(Slice data) { - if (!GetFixed32(&data, &lb_a) || !GetFixed32(&data, &ub_a) || - !GetFixed32(&data, &lb_c) || !GetFixed32(&data, &ub_c)) { - return false; - } - return true; -} - -void MultiOpsTxnsStressTest::PersistKeySpacesDesc( - const std::string& key_spaces_path, uint32_t lb_a, uint32_t ub_a, - uint32_t lb_c, uint32_t ub_c) { - KeySpaces key_spaces(lb_a, ub_a, lb_c, ub_c); - std::string key_spaces_rep = key_spaces.EncodeTo(); - - std::unique_ptr wfile; - Status s1 = - Env::Default()->NewWritableFile(key_spaces_path, &wfile, EnvOptions()); - assert(s1.ok()); - assert(wfile); - s1 = wfile->Append(key_spaces_rep); - assert(s1.ok()); -} - -MultiOpsTxnsStressTest::KeySpaces MultiOpsTxnsStressTest::ReadKeySpacesDesc( - const std::string& key_spaces_path) { - KeySpaces key_spaces; - std::unique_ptr sfile; - Status s1 = - Env::Default()->NewSequentialFile(key_spaces_path, &sfile, EnvOptions()); - assert(s1.ok()); - assert(sfile); - char buf[16]; - Slice result; - s1 = sfile->Read(sizeof(buf), &result, buf); - assert(s1.ok()); - if (!key_spaces.DecodeFrom(result)) { - assert(false); - } - return key_spaces; -} - -// Create an empty database if necessary and preload it with initial test data. -// Key range [lb_a, ub_a), [lb_c, ub_c). The key ranges will be shared by -// 'threads' threads. -// PreloadDb() also sets up KeyGenerator objects for each sub key range -// operated on by each thread. -// Both [lb_a, ub_a) and [lb_c, ub_c) are partitioned. Each thread operates on -// one sub range, using KeyGenerators to generate keys. -// For example, we choose a from [0, 10000) and c from [0, 100). Number of -// threads is 32, their tids range from 0 to 31. -// Thread k chooses a from [312*k,312*(k+1)) and c from [3*k,3*(k+1)) if k<31. -// Thread 31 chooses a from [9672, 10000) and c from [93, 100). -// Within each subrange: a from [low1, high1), c from [low2, high2). -// high1 - low1 > high2 - low2 -// We reserve {high1 - 1} and {high2 - 1} as unallocated. -// The records are , , ..., -// , ... -void MultiOpsTxnsStressTest::PreloadDb(SharedState* shared, int threads, - uint32_t lb_a, uint32_t ub_a, - uint32_t lb_c, uint32_t ub_c) { - key_gen_for_a_.resize(threads); - key_gen_for_c_.resize(threads); - - assert(ub_a > lb_a && ub_a > lb_a + threads); - assert(ub_c > lb_c && ub_c > lb_c + threads); - - PersistKeySpacesDesc(FLAGS_key_spaces_path, lb_a, ub_a, lb_c, ub_c); - - fprintf(stdout, "a from [%u, %u), c from [%u, %u)\n", - static_cast(lb_a), static_cast(ub_a), - static_cast(lb_c), static_cast(ub_c)); - - const uint32_t num_c = ub_c - lb_c; - const uint32_t num_c_per_thread = num_c / threads; - const uint32_t num_a = ub_a - lb_a; - const uint32_t num_a_per_thread = num_a / threads; - - WriteOptions wopts; - wopts.disableWAL = FLAGS_disable_wal; - Random rnd(shared->GetSeed()); - assert(txn_db_); - - std::vector existing_a_uniqs(threads); - std::vector non_existing_a_uniqs(threads); - std::vector existing_c_uniqs(threads); - std::vector non_existing_c_uniqs(threads); - - for (uint32_t a = lb_a; a < ub_a; ++a) { - uint32_t tid = (a - lb_a) / num_a_per_thread; - if (tid >= static_cast(threads)) { - tid = threads - 1; - } - - uint32_t a_base = lb_a + tid * num_a_per_thread; - uint32_t a_hi = (tid < static_cast(threads - 1)) - ? (a_base + num_a_per_thread) - : ub_a; - uint32_t a_delta = a - a_base; - - if (a == a_hi - 1) { - non_existing_a_uniqs[tid].insert(a); - continue; - } - - uint32_t c_base = lb_c + tid * num_c_per_thread; - uint32_t c_hi = (tid < static_cast(threads - 1)) - ? (c_base + num_c_per_thread) - : ub_c; - uint32_t c_delta = a_delta % (c_hi - c_base - 1); - uint32_t c = c_base + c_delta; - - uint32_t b = rnd.Next(); - Record record(a, b, c); - WriteBatch wb; - const auto primary_index_entry = record.EncodePrimaryIndexEntry(); - Status s = wb.Put(primary_index_entry.first, primary_index_entry.second); - assert(s.ok()); - - const auto secondary_index_entry = record.EncodeSecondaryIndexEntry(); - s = wb.Put(secondary_index_entry.first, secondary_index_entry.second); - assert(s.ok()); - - s = txn_db_->Write(wopts, &wb); - assert(s.ok()); - - // TODO (yanqin): make the following check optional, especially when data - // size is large. - Record tmp_rec; - tmp_rec.SetB(record.b_value()); - s = tmp_rec.DecodeSecondaryIndexEntry(secondary_index_entry.first, - secondary_index_entry.second); - assert(s.ok()); - assert(tmp_rec == record); - - existing_a_uniqs[tid].insert(a); - existing_c_uniqs[tid].insert(c); - } - - for (int i = 0; i < threads; ++i) { - uint32_t my_seed = i + shared->GetSeed(); - - auto& key_gen_for_a = key_gen_for_a_[i]; - assert(!key_gen_for_a); - uint32_t low = lb_a + i * num_a_per_thread; - uint32_t high = (i < threads - 1) ? (low + num_a_per_thread) : ub_a; - assert(existing_a_uniqs[i].size() == high - low - 1); - assert(non_existing_a_uniqs[i].size() == 1); - key_gen_for_a = std::make_unique( - my_seed, low, high, std::move(existing_a_uniqs[i]), - std::move(non_existing_a_uniqs[i])); - - auto& key_gen_for_c = key_gen_for_c_[i]; - assert(!key_gen_for_c); - low = lb_c + i * num_c_per_thread; - high = (i < threads - 1) ? (low + num_c_per_thread) : ub_c; - non_existing_c_uniqs[i].insert(high - 1); - assert(existing_c_uniqs[i].size() == high - low - 1); - assert(non_existing_c_uniqs[i].size() == 1); - key_gen_for_c = std::make_unique( - my_seed, low, high, std::move(existing_c_uniqs[i]), - std::move(non_existing_c_uniqs[i])); - } -} - -// Scan an existing, non-empty database. -// Set up [lb_a, ub_a) and [lb_c, ub_c) as test key ranges. -// Set up KeyGenerator objects for each sub key range operated on by each -// thread. -// Scan the entire database and for each subrange, populate the existing keys -// and non-existing keys. We currently require the non-existing keys be -// non-empty after initialization. -void MultiOpsTxnsStressTest::ScanExistingDb(SharedState* shared, int threads) { - key_gen_for_a_.resize(threads); - key_gen_for_c_.resize(threads); - - KeySpaces key_spaces = ReadKeySpacesDesc(FLAGS_key_spaces_path); - - const uint32_t lb_a = key_spaces.lb_a; - const uint32_t ub_a = key_spaces.ub_a; - const uint32_t lb_c = key_spaces.lb_c; - const uint32_t ub_c = key_spaces.ub_c; - - assert(lb_a < ub_a && lb_c < ub_c); - - fprintf(stdout, "a from [%u, %u), c from [%u, %u)\n", - static_cast(lb_a), static_cast(ub_a), - static_cast(lb_c), static_cast(ub_c)); - - assert(ub_a > lb_a && ub_a > lb_a + threads); - assert(ub_c > lb_c && ub_c > lb_c + threads); - - const uint32_t num_c = ub_c - lb_c; - const uint32_t num_c_per_thread = num_c / threads; - const uint32_t num_a = ub_a - lb_a; - const uint32_t num_a_per_thread = num_a / threads; - - assert(db_); - ReadOptions ropts; - std::vector existing_a_uniqs(threads); - std::vector non_existing_a_uniqs(threads); - std::vector existing_c_uniqs(threads); - std::vector non_existing_c_uniqs(threads); - { - std::string pk_lb_str = Record::EncodePrimaryKey(0); - std::string pk_ub_str = - Record::EncodePrimaryKey(std::numeric_limits::max()); - Slice pk_lb = pk_lb_str; - Slice pk_ub = pk_ub_str; - ropts.iterate_lower_bound = &pk_lb; - ropts.iterate_upper_bound = &pk_ub; - ropts.total_order_seek = true; - std::unique_ptr it(db_->NewIterator(ropts)); - - for (it->SeekToFirst(); it->Valid(); it->Next()) { - Record record; - Status s = record.DecodePrimaryIndexEntry(it->key(), it->value()); - if (!s.ok()) { - fprintf(stderr, "Cannot decode primary index entry (%s => %s): %s\n", - it->key().ToString(true).c_str(), - it->value().ToString(true).c_str(), s.ToString().c_str()); - assert(false); - } - uint32_t a = record.a_value(); - assert(a >= lb_a); - assert(a < ub_a); - uint32_t tid = (a - lb_a) / num_a_per_thread; - if (tid >= static_cast(threads)) { - tid = threads - 1; - } - - existing_a_uniqs[tid].insert(a); - - uint32_t c = record.c_value(); - assert(c >= lb_c); - assert(c < ub_c); - tid = (c - lb_c) / num_c_per_thread; - if (tid >= static_cast(threads)) { - tid = threads - 1; - } - auto& existing_c_uniq = existing_c_uniqs[tid]; - existing_c_uniq.insert(c); - } - - for (uint32_t a = lb_a; a < ub_a; ++a) { - uint32_t tid = (a - lb_a) / num_a_per_thread; - if (tid >= static_cast(threads)) { - tid = threads - 1; - } - if (0 == existing_a_uniqs[tid].count(a)) { - non_existing_a_uniqs[tid].insert(a); - } - } - - for (uint32_t c = lb_c; c < ub_c; ++c) { - uint32_t tid = (c - lb_c) / num_c_per_thread; - if (tid >= static_cast(threads)) { - tid = threads - 1; - } - if (0 == existing_c_uniqs[tid].count(c)) { - non_existing_c_uniqs[tid].insert(c); - } - } - - for (int i = 0; i < threads; ++i) { - uint32_t my_seed = i + shared->GetSeed(); - auto& key_gen_for_a = key_gen_for_a_[i]; - assert(!key_gen_for_a); - uint32_t low = lb_a + i * num_a_per_thread; - uint32_t high = (i < threads - 1) ? (low + num_a_per_thread) : ub_a; - - // The following two assertions assume the test thread count and key - // space remain the same across different runs. Will need to relax. - assert(existing_a_uniqs[i].size() == high - low - 1); - assert(non_existing_a_uniqs[i].size() == 1); - - key_gen_for_a = std::make_unique( - my_seed, low, high, std::move(existing_a_uniqs[i]), - std::move(non_existing_a_uniqs[i])); - - auto& key_gen_for_c = key_gen_for_c_[i]; - assert(!key_gen_for_c); - low = lb_c + i * num_c_per_thread; - high = (i < threads - 1) ? (low + num_c_per_thread) : ub_c; - - // The following two assertions assume the test thread count and key - // space remain the same across different runs. Will need to relax. - assert(existing_c_uniqs[i].size() == high - low - 1); - assert(non_existing_c_uniqs[i].size() == 1); - - key_gen_for_c = std::make_unique( - my_seed, low, high, std::move(existing_c_uniqs[i]), - std::move(non_existing_c_uniqs[i])); - } - } -} - -StressTest* CreateMultiOpsTxnsStressTest() { - return new MultiOpsTxnsStressTest(); -} - -void CheckAndSetOptionsForMultiOpsTxnStressTest() { - if (FLAGS_test_batches_snapshots || FLAGS_test_cf_consistency) { - fprintf(stderr, - "-test_multi_ops_txns is not compatible with " - "-test_bathces_snapshots and -test_cf_consistency\n"); - exit(1); - } - if (!FLAGS_use_txn) { - fprintf(stderr, "-use_txn must be true if -test_multi_ops_txns\n"); - exit(1); - } else if (FLAGS_test_secondary > 0) { - fprintf( - stderr, - "secondary instance does not support replaying logs (MANIFEST + WAL) " - "of TransactionDB with write-prepared/write-unprepared policy\n"); - exit(1); - } - if (FLAGS_clear_column_family_one_in > 0) { - fprintf(stderr, - "-test_multi_ops_txns is not compatible with clearing column " - "families\n"); - exit(1); - } - if (FLAGS_column_families > 1) { - // TODO (yanqin) support separating primary index and secondary index in - // different column families. - fprintf(stderr, - "-test_multi_ops_txns currently does not use more than one column " - "family\n"); - exit(1); - } - if (FLAGS_writepercent > 0 || FLAGS_delpercent > 0 || - FLAGS_delrangepercent > 0) { - fprintf(stderr, - "-test_multi_ops_txns requires that -writepercent, -delpercent and " - "-delrangepercent be 0\n"); - exit(1); - } - if (FLAGS_key_spaces_path.empty()) { - fprintf(stderr, - "Must specify a file to store ranges of A and C via " - "-key_spaces_path\n"); - exit(1); - } - if (FLAGS_create_timestamped_snapshot_one_in > 0) { - if (FLAGS_txn_write_policy != - static_cast(TxnDBWritePolicy::WRITE_COMMITTED)) { - fprintf(stderr, - "Timestamped snapshot is not yet supported by " - "write-prepared/write-unprepared transactions\n"); - exit(1); - } - } - if (FLAGS_sync_fault_injection == 1) { - fprintf(stderr, - "Sync fault injection is currently not supported in " - "-test_multi_ops_txns\n"); - exit(1); - } -} -} // namespace ROCKSDB_NAMESPACE - -#endif // GFLAGS diff --git a/db_stress_tool/multi_ops_txns_stress.h b/db_stress_tool/multi_ops_txns_stress.h deleted file mode 100644 index 479344643..000000000 --- a/db_stress_tool/multi_ops_txns_stress.h +++ /dev/null @@ -1,440 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifdef GFLAGS -#include "db_stress_tool/db_stress_common.h" - -namespace ROCKSDB_NAMESPACE { - -// This file defines MultiOpsTxnsStress so that we can stress test RocksDB -// transactions on a simple, emulated relational table. -// -// The record format is similar to the example found at -// https://github.com/facebook/mysql-5.6/wiki/MyRocks-record-format. -// -// The table is created by -// ``` -// create table t1 ( -// a int primary key, -// b int, -// c int, -// key(c), -// ) -// ``` -// -// (For simplicity, we use uint32_t for int here.) -// -// For this table, there is a primary index using `a`, as well as a secondary -// index using `c` and `a`. -// -// Primary key format: -// | index id | M(a) | -// Primary index value: -// | b | c | -// M(a) represents the big-endian format of a. -// -// Secondary key format: -// | index id | M(c) | M(a) | -// Secondary index value: -// | crc32 | -// Similarly to M(a), M(c) is the big-endian format of c. -// -// The in-memory representation of a record is defined in class -// MultiOpsTxnsStress:Record that includes a number of helper methods to -// encode/decode primary index keys, primary index values, secondary index keys, -// secondary index values, etc. -// -// Sometimes primary index and secondary index reside on different column -// families, but sometimes they colocate in the same column family. Current -// implementation puts them in the same (default) column family, and this is -// subject to future change if we find it interesting to test the other case. -// -// Class MultiOpsTxnsStressTest has the following transactions for testing. -// -// 1. Primary key update -// UPDATE t1 SET a = 3 WHERE a = 2; -// ``` -// tx->GetForUpdate(primary key a=2) -// tx->GetForUpdate(primary key a=3) -// tx->Delete(primary key a=2) -// tx->Put(primary key a=3, value) -// tx->batch->SingleDelete(secondary key a=2) -// tx->batch->Put(secondary key a=3, value) -// tx->Prepare() -// Tx->Commit() -// ``` -// -// 2. Secondary key update -// UPDATE t1 SET c = 3 WHERE c = 2; -// ``` -// iter->Seek(secondary key) -// // Get corresponding primary key value(s) from iterator -// tx->GetForUpdate(primary key) -// tx->Put(primary key, value c=3) -// tx->batch->SingleDelete(secondary key c=2) -// tx->batch->Put(secondary key c=3) -// tx->Prepare() -// tx->Commit() -// ``` -// -// 3. Primary index value update -// UPDATE t1 SET b = b + 1 WHERE a = 2; -// ``` -// tx->GetForUpdate(primary key a=2) -// tx->Put(primary key a=2, value b=b+1) -// tx->Prepare() -// tx->Commit() -// ``` -// -// 4. Point lookup -// SELECT * FROM t1 WHERE a = 3; -// ``` -// tx->Get(primary key a=3) -// tx->Commit() -// ``` -// -// 5. Range scan -// SELECT * FROM t1 WHERE c = 2; -// ``` -// it = tx->GetIterator() -// it->Seek(secondary key c=2) -// tx->Commit() -// ``` - -class MultiOpsTxnsStressTest : public StressTest { - public: - class Record { - public: - static constexpr uint32_t kMetadataPrefix = 0; - static constexpr uint32_t kPrimaryIndexId = 1; - static constexpr uint32_t kSecondaryIndexId = 2; - - static constexpr size_t kPrimaryIndexEntrySize = 8 + 8; - static constexpr size_t kSecondaryIndexEntrySize = 12 + 4; - - static_assert(kPrimaryIndexId < kSecondaryIndexId, - "kPrimaryIndexId must be smaller than kSecondaryIndexId"); - - static_assert(sizeof(kPrimaryIndexId) == sizeof(uint32_t), - "kPrimaryIndexId must be 4 bytes"); - static_assert(sizeof(kSecondaryIndexId) == sizeof(uint32_t), - "kSecondaryIndexId must be 4 bytes"); - - // Used for generating search key to probe primary index. - static std::string EncodePrimaryKey(uint32_t a); - // Used for generating search prefix to probe secondary index. - static std::string EncodeSecondaryKey(uint32_t c); - // Used for generating search key to probe secondary index. - static std::string EncodeSecondaryKey(uint32_t c, uint32_t a); - - static std::tuple DecodePrimaryIndexValue( - Slice primary_index_value); - - static std::pair DecodeSecondaryIndexValue( - Slice secondary_index_value); - - Record() = default; - Record(uint32_t _a, uint32_t _b, uint32_t _c) : a_(_a), b_(_b), c_(_c) {} - - bool operator==(const Record& other) const { - return a_ == other.a_ && b_ == other.b_ && c_ == other.c_; - } - - bool operator!=(const Record& other) const { return !(*this == other); } - - std::pair EncodePrimaryIndexEntry() const; - - std::string EncodePrimaryKey() const; - - std::string EncodePrimaryIndexValue() const; - - std::pair EncodeSecondaryIndexEntry() const; - - std::string EncodeSecondaryKey() const; - - Status DecodePrimaryIndexEntry(Slice primary_index_key, - Slice primary_index_value); - - Status DecodeSecondaryIndexEntry(Slice secondary_index_key, - Slice secondary_index_value); - - uint32_t a_value() const { return a_; } - uint32_t b_value() const { return b_; } - uint32_t c_value() const { return c_; } - - void SetA(uint32_t _a) { a_ = _a; } - void SetB(uint32_t _b) { b_ = _b; } - void SetC(uint32_t _c) { c_ = _c; } - - std::string ToString() const { - std::string ret("("); - ret.append(std::to_string(a_)); - ret.append(","); - ret.append(std::to_string(b_)); - ret.append(","); - ret.append(std::to_string(c_)); - ret.append(")"); - return ret; - } - - private: - friend class InvariantChecker; - - uint32_t a_{0}; - uint32_t b_{0}; - uint32_t c_{0}; - }; - - MultiOpsTxnsStressTest() {} - - ~MultiOpsTxnsStressTest() override {} - - void FinishInitDb(SharedState*) override; - - void ReopenAndPreloadDbIfNeeded(SharedState* shared); - - bool IsStateTracked() const override { return false; } - - Status TestGet(ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override; - - std::vector TestMultiGet( - ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override; - - void TestGetEntity(ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override; - - Status TestPrefixScan(ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override; - - // Given a key K, this creates an iterator which scans to K and then - // does a random sequence of Next/Prev operations. - Status TestIterate(ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override; - - Status TestPut(ThreadState* thread, WriteOptions& write_opts, - const ReadOptions& read_opts, const std::vector& cf_ids, - const std::vector& keys, char (&value)[100]) override; - - Status TestDelete(ThreadState* thread, WriteOptions& write_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override; - - Status TestDeleteRange(ThreadState* thread, WriteOptions& write_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override; - - void TestIngestExternalFile(ThreadState* thread, - const std::vector& rand_column_families, - const std::vector& rand_keys) override; - - void TestCompactRange(ThreadState* thread, int64_t rand_key, - const Slice& start_key, - ColumnFamilyHandle* column_family) override; - - Status TestBackupRestore(ThreadState* thread, - const std::vector& rand_column_families, - const std::vector& rand_keys) override; - - Status TestCheckpoint(ThreadState* thread, - const std::vector& rand_column_families, - const std::vector& rand_keys) override; - - Status TestApproximateSize(ThreadState* thread, uint64_t iteration, - const std::vector& rand_column_families, - const std::vector& rand_keys) override; - - Status TestCustomOperations( - ThreadState* thread, - const std::vector& rand_column_families) override; - - void RegisterAdditionalListeners() override; - - void PrepareTxnDbOptions(SharedState* /*shared*/, - TransactionDBOptions& txn_db_opts) override; - - Status PrimaryKeyUpdateTxn(ThreadState* thread, uint32_t old_a, - uint32_t old_a_pos, uint32_t new_a); - - Status SecondaryKeyUpdateTxn(ThreadState* thread, uint32_t old_c, - uint32_t old_c_pos, uint32_t new_c); - - Status UpdatePrimaryIndexValueTxn(ThreadState* thread, uint32_t a, - uint32_t b_delta); - - Status PointLookupTxn(ThreadState* thread, ReadOptions ropts, uint32_t a); - - Status RangeScanTxn(ThreadState* thread, ReadOptions ropts, uint32_t c); - - void VerifyDb(ThreadState* thread) const override; - - void ContinuouslyVerifyDb(ThreadState* thread) const override { - VerifyDb(thread); - } - - void VerifyPkSkFast(int job_id); - - protected: - class Counter { - public: - uint64_t Next() { return value_.fetch_add(1); } - - private: - std::atomic value_ = Env::Default()->NowNanos(); - }; - - using KeySet = std::set; - class KeyGenerator { - public: - explicit KeyGenerator(uint32_t s, uint32_t low, uint32_t high, - KeySet&& existing_uniq, KeySet&& non_existing_uniq) - : rand_(s), - low_(low), - high_(high), - existing_uniq_(std::move(existing_uniq)), - non_existing_uniq_(std::move(non_existing_uniq)) {} - ~KeyGenerator() { - assert(!existing_uniq_.empty()); - assert(!non_existing_uniq_.empty()); - } - void FinishInit(); - - std::pair ChooseExisting(); - void Replace(uint32_t old_val, uint32_t old_pos, uint32_t new_val); - uint32_t Allocate(); - void UndoAllocation(uint32_t new_val); - - std::string ToString() const { - std::ostringstream oss; - oss << "[" << low_ << ", " << high_ << "): " << existing_.size() - << " elements, " << existing_uniq_.size() << " unique values, " - << non_existing_uniq_.size() << " unique non-existing values"; - return oss.str(); - } - - private: - Random rand_; - uint32_t low_ = 0; - uint32_t high_ = 0; - std::vector existing_{}; - KeySet existing_uniq_{}; - KeySet non_existing_uniq_{}; - bool initialized_ = false; - }; - - // Return - std::pair ChooseExistingA(ThreadState* thread); - - uint32_t GenerateNextA(ThreadState* thread); - - // Return - std::pair ChooseExistingC(ThreadState* thread); - - uint32_t GenerateNextC(ThreadState* thread); - - // Randomly commit or rollback `txn` - void ProcessRecoveredPreparedTxnsHelper(Transaction* txn, - SharedState*) override; - - // Some applications, e.g. MyRocks writes a KV pair to the database via - // commit-time-write-batch (ctwb) in additional to the transaction's regular - // write batch. The key is usually constant representing some system - // metadata, while the value is monoticailly increasing which represents the - // actual value of the metadata. Method WriteToCommitTimeWriteBatch() - // emulates this scenario. - Status WriteToCommitTimeWriteBatch(Transaction& txn); - - Status CommitAndCreateTimestampedSnapshotIfNeeded(ThreadState* thread, - Transaction& txn); - - void SetupSnapshot(ThreadState* thread, ReadOptions& read_opts, - Transaction& txn, - std::shared_ptr& snapshot); - - std::vector> key_gen_for_a_; - std::vector> key_gen_for_c_; - - Counter counter_{}; - - private: - struct KeySpaces { - uint32_t lb_a = 0; - uint32_t ub_a = 0; - uint32_t lb_c = 0; - uint32_t ub_c = 0; - - explicit KeySpaces() = default; - explicit KeySpaces(uint32_t _lb_a, uint32_t _ub_a, uint32_t _lb_c, - uint32_t _ub_c) - : lb_a(_lb_a), ub_a(_ub_a), lb_c(_lb_c), ub_c(_ub_c) {} - - std::string EncodeTo() const; - bool DecodeFrom(Slice data); - }; - - void PersistKeySpacesDesc(const std::string& key_spaces_path, uint32_t lb_a, - uint32_t ub_a, uint32_t lb_c, uint32_t ub_c); - - KeySpaces ReadKeySpacesDesc(const std::string& key_spaces_path); - - void PreloadDb(SharedState* shared, int threads, uint32_t lb_a, uint32_t ub_a, - uint32_t lb_c, uint32_t ub_c); - - void ScanExistingDb(SharedState* shared, int threads); -}; - -class InvariantChecker { - public: - static_assert(sizeof(MultiOpsTxnsStressTest::Record().a_) == sizeof(uint32_t), - "MultiOpsTxnsStressTest::Record::a_ must be 4 bytes"); - static_assert(sizeof(MultiOpsTxnsStressTest::Record().b_) == sizeof(uint32_t), - "MultiOpsTxnsStressTest::Record::b_ must be 4 bytes"); - static_assert(sizeof(MultiOpsTxnsStressTest::Record().c_) == sizeof(uint32_t), - "MultiOpsTxnsStressTest::Record::c_ must be 4 bytes"); -}; - -class MultiOpsTxnsStressListener : public EventListener { - public: - explicit MultiOpsTxnsStressListener(MultiOpsTxnsStressTest* stress_test) - : stress_test_(stress_test) { - assert(stress_test_); - } - - ~MultiOpsTxnsStressListener() override {} - - void OnFlushCompleted(DB* db, const FlushJobInfo& info) override { - assert(db); -#ifdef NDEBUG - (void)db; -#endif - assert(info.cf_id == 0); - stress_test_->VerifyPkSkFast(info.job_id); - } - - void OnCompactionCompleted(DB* db, const CompactionJobInfo& info) override { - assert(db); -#ifdef NDEBUG - (void)db; -#endif - assert(info.cf_id == 0); - stress_test_->VerifyPkSkFast(info.job_id); - } - - private: - MultiOpsTxnsStressTest* const stress_test_ = nullptr; -}; - -} // namespace ROCKSDB_NAMESPACE -#endif // GFLAGS diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc deleted file mode 100644 index 716ea3802..000000000 --- a/db_stress_tool/no_batched_ops_stress.cc +++ /dev/null @@ -1,1676 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifdef GFLAGS -#include "db_stress_tool/db_stress_common.h" -#include "rocksdb/utilities/transaction_db.h" -#include "utilities/fault_injection_fs.h" - -namespace ROCKSDB_NAMESPACE { -class NonBatchedOpsStressTest : public StressTest { - public: - NonBatchedOpsStressTest() {} - - virtual ~NonBatchedOpsStressTest() {} - - void VerifyDb(ThreadState* thread) const override { - // This `ReadOptions` is for validation purposes. Ignore - // `FLAGS_rate_limit_user_ops` to avoid slowing any validation. - ReadOptions options(FLAGS_verify_checksum, true); - std::string ts_str; - Slice ts; - if (FLAGS_user_timestamp_size > 0) { - ts_str = GetNowNanos(); - ts = ts_str; - options.timestamp = &ts; - } - - auto shared = thread->shared; - const int64_t max_key = shared->GetMaxKey(); - const int64_t keys_per_thread = max_key / shared->GetNumThreads(); - int64_t start = keys_per_thread * thread->tid; - int64_t end = start + keys_per_thread; - uint64_t prefix_to_use = - (FLAGS_prefix_size < 0) ? 1 : static_cast(FLAGS_prefix_size); - - if (thread->tid == shared->GetNumThreads() - 1) { - end = max_key; - } - - for (size_t cf = 0; cf < column_families_.size(); ++cf) { - if (thread->shared->HasVerificationFailedYet()) { - break; - } - - enum class VerificationMethod { - kIterator, - kGet, - kGetEntity, - kMultiGet, - kMultiGetEntity, - kGetMergeOperands, - // Add any new items above kNumberOfMethods - kNumberOfMethods - }; - - constexpr int num_methods = - static_cast(VerificationMethod::kNumberOfMethods); - - const VerificationMethod method = - static_cast(thread->rand.Uniform( - (FLAGS_user_timestamp_size > 0) ? num_methods - 1 : num_methods)); - - if (method == VerificationMethod::kIterator) { - std::unique_ptr iter( - db_->NewIterator(options, column_families_[cf])); - - std::string seek_key = Key(start); - iter->Seek(seek_key); - - Slice prefix(seek_key.data(), prefix_to_use); - - for (int64_t i = start; i < end; ++i) { - if (thread->shared->HasVerificationFailedYet()) { - break; - } - - const std::string key = Key(i); - const Slice k(key); - const Slice pfx(key.data(), prefix_to_use); - - // Reseek when the prefix changes - if (prefix_to_use > 0 && prefix.compare(pfx) != 0) { - iter->Seek(k); - seek_key = key; - prefix = Slice(seek_key.data(), prefix_to_use); - } - - Status s = iter->status(); - - std::string from_db; - - if (iter->Valid()) { - const int diff = iter->key().compare(k); - - if (diff > 0) { - s = Status::NotFound(); - } else if (diff == 0) { - if (!VerifyWideColumns(iter->value(), iter->columns())) { - VerificationAbort(shared, static_cast(cf), i, - iter->value(), iter->columns()); - } - - from_db = iter->value().ToString(); - iter->Next(); - } else { - assert(diff < 0); - - VerificationAbort(shared, "An out of range key was found", - static_cast(cf), i); - } - } else { - // The iterator found no value for the key in question, so do not - // move to the next item in the iterator - s = Status::NotFound(); - } - - VerifyOrSyncValue(static_cast(cf), i, options, shared, from_db, - /* msg_prefix */ "Iterator verification", s, - /* strict */ true); - - if (!from_db.empty()) { - PrintKeyValue(static_cast(cf), static_cast(i), - from_db.data(), from_db.size()); - } - } - } else if (method == VerificationMethod::kGet) { - for (int64_t i = start; i < end; ++i) { - if (thread->shared->HasVerificationFailedYet()) { - break; - } - - const std::string key = Key(i); - std::string from_db; - - Status s = db_->Get(options, column_families_[cf], key, &from_db); - - VerifyOrSyncValue(static_cast(cf), i, options, shared, from_db, - /* msg_prefix */ "Get verification", s, - /* strict */ true); - - if (!from_db.empty()) { - PrintKeyValue(static_cast(cf), static_cast(i), - from_db.data(), from_db.size()); - } - } - } else if (method == VerificationMethod::kGetEntity) { - for (int64_t i = start; i < end; ++i) { - if (thread->shared->HasVerificationFailedYet()) { - break; - } - - const std::string key = Key(i); - PinnableWideColumns result; - - Status s = - db_->GetEntity(options, column_families_[cf], key, &result); - - std::string from_db; - - if (s.ok()) { - const WideColumns& columns = result.columns(); - - if (!columns.empty() && - columns.front().name() == kDefaultWideColumnName) { - from_db = columns.front().value().ToString(); - } - - if (!VerifyWideColumns(columns)) { - VerificationAbort(shared, static_cast(cf), i, from_db, - columns); - } - } - - VerifyOrSyncValue(static_cast(cf), i, options, shared, from_db, - /* msg_prefix */ "GetEntity verification", s, - /* strict */ true); - - if (!from_db.empty()) { - PrintKeyValue(static_cast(cf), static_cast(i), - from_db.data(), from_db.size()); - } - } - } else if (method == VerificationMethod::kMultiGet) { - for (int64_t i = start; i < end;) { - if (thread->shared->HasVerificationFailedYet()) { - break; - } - - // Keep the batch size to some reasonable value - size_t batch_size = thread->rand.Uniform(128) + 1; - batch_size = std::min(batch_size, end - i); - - std::vector key_strs(batch_size); - std::vector keys(batch_size); - std::vector values(batch_size); - std::vector statuses(batch_size); - - for (size_t j = 0; j < batch_size; ++j) { - key_strs[j] = Key(i + j); - keys[j] = Slice(key_strs[j]); - } - - db_->MultiGet(options, column_families_[cf], batch_size, keys.data(), - values.data(), statuses.data()); - - for (size_t j = 0; j < batch_size; ++j) { - const std::string from_db = values[j].ToString(); - - VerifyOrSyncValue(static_cast(cf), i + j, options, shared, - from_db, /* msg_prefix */ "MultiGet verification", - statuses[j], /* strict */ true); - - if (!from_db.empty()) { - PrintKeyValue(static_cast(cf), static_cast(i + j), - from_db.data(), from_db.size()); - } - } - - i += batch_size; - } - } else if (method == VerificationMethod::kMultiGetEntity) { - for (int64_t i = start; i < end;) { - if (thread->shared->HasVerificationFailedYet()) { - break; - } - - // Keep the batch size to some reasonable value - size_t batch_size = thread->rand.Uniform(128) + 1; - batch_size = std::min(batch_size, end - i); - - std::vector key_strs(batch_size); - std::vector keys(batch_size); - std::vector results(batch_size); - std::vector statuses(batch_size); - - for (size_t j = 0; j < batch_size; ++j) { - key_strs[j] = Key(i + j); - keys[j] = Slice(key_strs[j]); - } - - db_->MultiGetEntity(options, column_families_[cf], batch_size, - keys.data(), results.data(), statuses.data()); - - for (size_t j = 0; j < batch_size; ++j) { - std::string from_db; - - if (statuses[j].ok()) { - const WideColumns& columns = results[j].columns(); - - if (!columns.empty() && - columns.front().name() == kDefaultWideColumnName) { - from_db = columns.front().value().ToString(); - } - - if (!VerifyWideColumns(columns)) { - VerificationAbort(shared, static_cast(cf), i, from_db, - columns); - } - } - - VerifyOrSyncValue(static_cast(cf), i + j, options, shared, - from_db, - /* msg_prefix */ "MultiGetEntity verification", - statuses[j], /* strict */ true); - - if (!from_db.empty()) { - PrintKeyValue(static_cast(cf), static_cast(i + j), - from_db.data(), from_db.size()); - } - } - - i += batch_size; - } - } else { - assert(method == VerificationMethod::kGetMergeOperands); - - // Start off with small size that will be increased later if necessary - std::vector values(4); - - GetMergeOperandsOptions merge_operands_info; - merge_operands_info.expected_max_number_of_operands = - static_cast(values.size()); - - for (int64_t i = start; i < end; ++i) { - if (thread->shared->HasVerificationFailedYet()) { - break; - } - - const std::string key = Key(i); - const Slice k(key); - std::string from_db; - int number_of_operands = 0; - - Status s = db_->GetMergeOperands(options, column_families_[cf], k, - values.data(), &merge_operands_info, - &number_of_operands); - - if (s.IsIncomplete()) { - // Need to resize values as there are more than values.size() merge - // operands on this key. Should only happen a few times when we - // encounter a key that had more merge operands than any key seen so - // far - values.resize(number_of_operands); - merge_operands_info.expected_max_number_of_operands = - static_cast(number_of_operands); - s = db_->GetMergeOperands(options, column_families_[cf], k, - values.data(), &merge_operands_info, - &number_of_operands); - } - // Assumed here that GetMergeOperands always sets number_of_operand - if (number_of_operands) { - from_db = values[number_of_operands - 1].ToString(); - } - - VerifyOrSyncValue(static_cast(cf), i, options, shared, from_db, - /* msg_prefix */ "GetMergeOperands verification", s, - /* strict */ true); - - if (!from_db.empty()) { - PrintKeyValue(static_cast(cf), static_cast(i), - from_db.data(), from_db.size()); - } - } - } - } - } - - void ContinuouslyVerifyDb(ThreadState* thread) const override { - if (!cmp_db_) { - return; - } - assert(cmp_db_); - assert(!cmp_cfhs_.empty()); - Status s = cmp_db_->TryCatchUpWithPrimary(); - if (!s.ok()) { - assert(false); - exit(1); - } - - const auto checksum_column_family = [](Iterator* iter, - uint32_t* checksum) -> Status { - assert(nullptr != checksum); - uint32_t ret = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ret = crc32c::Extend(ret, iter->key().data(), iter->key().size()); - ret = crc32c::Extend(ret, iter->value().data(), iter->value().size()); - } - *checksum = ret; - return iter->status(); - }; - - auto* shared = thread->shared; - assert(shared); - const int64_t max_key = shared->GetMaxKey(); - ReadOptions read_opts(FLAGS_verify_checksum, true); - std::string ts_str; - Slice ts; - if (FLAGS_user_timestamp_size > 0) { - ts_str = GetNowNanos(); - ts = ts_str; - read_opts.timestamp = &ts; - } - - static Random64 rand64(shared->GetSeed()); - - { - uint32_t crc = 0; - std::unique_ptr it(cmp_db_->NewIterator(read_opts)); - s = checksum_column_family(it.get(), &crc); - if (!s.ok()) { - fprintf(stderr, "Computing checksum of default cf: %s\n", - s.ToString().c_str()); - assert(false); - } - } - - for (auto* handle : cmp_cfhs_) { - if (thread->rand.OneInOpt(3)) { - // Use Get() - uint64_t key = rand64.Uniform(static_cast(max_key)); - std::string key_str = Key(key); - std::string value; - std::string key_ts; - s = cmp_db_->Get(read_opts, handle, key_str, &value, - FLAGS_user_timestamp_size > 0 ? &key_ts : nullptr); - s.PermitUncheckedError(); - } else { - // Use range scan - std::unique_ptr iter(cmp_db_->NewIterator(read_opts, handle)); - uint32_t rnd = (thread->rand.Next()) % 4; - if (0 == rnd) { - // SeekToFirst() + Next()*5 - read_opts.total_order_seek = true; - iter->SeekToFirst(); - for (int i = 0; i < 5 && iter->Valid(); ++i, iter->Next()) { - } - } else if (1 == rnd) { - // SeekToLast() + Prev()*5 - read_opts.total_order_seek = true; - iter->SeekToLast(); - for (int i = 0; i < 5 && iter->Valid(); ++i, iter->Prev()) { - } - } else if (2 == rnd) { - // Seek() +Next()*5 - uint64_t key = rand64.Uniform(static_cast(max_key)); - std::string key_str = Key(key); - iter->Seek(key_str); - for (int i = 0; i < 5 && iter->Valid(); ++i, iter->Next()) { - } - } else { - // SeekForPrev() + Prev()*5 - uint64_t key = rand64.Uniform(static_cast(max_key)); - std::string key_str = Key(key); - iter->SeekForPrev(key_str); - for (int i = 0; i < 5 && iter->Valid(); ++i, iter->Prev()) { - } - } - } - } - } - - void MaybeClearOneColumnFamily(ThreadState* thread) override { - if (FLAGS_column_families > 1) { - if (thread->rand.OneInOpt(FLAGS_clear_column_family_one_in)) { - // drop column family and then create it again (can't drop default) - int cf = thread->rand.Next() % (FLAGS_column_families - 1) + 1; - std::string new_name = - std::to_string(new_column_family_name_.fetch_add(1)); - { - MutexLock l(thread->shared->GetMutex()); - fprintf( - stdout, - "[CF %d] Dropping and recreating column family. new name: %s\n", - cf, new_name.c_str()); - } - thread->shared->LockColumnFamily(cf); - Status s = db_->DropColumnFamily(column_families_[cf]); - delete column_families_[cf]; - if (!s.ok()) { - fprintf(stderr, "dropping column family error: %s\n", - s.ToString().c_str()); - std::terminate(); - } - s = db_->CreateColumnFamily(ColumnFamilyOptions(options_), new_name, - &column_families_[cf]); - column_family_names_[cf] = new_name; - thread->shared->ClearColumnFamily(cf); - if (!s.ok()) { - fprintf(stderr, "creating column family error: %s\n", - s.ToString().c_str()); - std::terminate(); - } - thread->shared->UnlockColumnFamily(cf); - } - } - } - - bool ShouldAcquireMutexOnKey() const override { return true; } - - bool IsStateTracked() const override { return true; } - - Status TestGet(ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - auto cfh = column_families_[rand_column_families[0]]; - std::string key_str = Key(rand_keys[0]); - Slice key = key_str; - std::string from_db; - int error_count = 0; - - if (fault_fs_guard) { - fault_fs_guard->EnableErrorInjection(); - SharedState::ignore_read_error = false; - } - - std::unique_ptr lock(new MutexLock( - thread->shared->GetMutexForKey(rand_column_families[0], rand_keys[0]))); - - ReadOptions read_opts_copy = read_opts; - std::string read_ts_str; - Slice read_ts_slice; - if (FLAGS_user_timestamp_size > 0) { - read_ts_str = GetNowNanos(); - read_ts_slice = read_ts_str; - read_opts_copy.timestamp = &read_ts_slice; - } - bool read_older_ts = MaybeUseOlderTimestampForPointLookup( - thread, read_ts_str, read_ts_slice, read_opts_copy); - - Status s = db_->Get(read_opts_copy, cfh, key, &from_db); - if (fault_fs_guard) { - error_count = fault_fs_guard->GetAndResetErrorCount(); - } - if (s.ok()) { - if (fault_fs_guard) { - if (error_count && !SharedState::ignore_read_error) { - // Grab mutex so multiple thread don't try to print the - // stack trace at the same time - MutexLock l(thread->shared->GetMutex()); - fprintf(stderr, "Didn't get expected error from Get\n"); - fprintf(stderr, "Callstack that injected the fault\n"); - fault_fs_guard->PrintFaultBacktrace(); - std::terminate(); - } - } - // found case - thread->stats.AddGets(1, 1); - // we only have the latest expected state - if (!FLAGS_skip_verifydb && !read_older_ts && - thread->shared->Get(rand_column_families[0], rand_keys[0]) == - SharedState::DELETION_SENTINEL) { - thread->shared->SetVerificationFailure(); - fprintf(stderr, - "error : inconsistent values for key %s: Get returns %s, " - "expected state does not have the key.\n", - key.ToString(true).c_str(), StringToHex(from_db).c_str()); - } - } else if (s.IsNotFound()) { - // not found case - thread->stats.AddGets(1, 0); - if (!FLAGS_skip_verifydb && !read_older_ts) { - auto expected = - thread->shared->Get(rand_column_families[0], rand_keys[0]); - if (expected != SharedState::DELETION_SENTINEL && - expected != SharedState::UNKNOWN_SENTINEL) { - thread->shared->SetVerificationFailure(); - fprintf(stderr, - "error : inconsistent values for key %s: expected state has " - "the key, Get() returns NotFound.\n", - key.ToString(true).c_str()); - } - } - } else { - if (error_count == 0) { - // errors case - thread->stats.AddErrors(1); - } else { - thread->stats.AddVerifiedErrors(1); - } - } - if (fault_fs_guard) { - fault_fs_guard->DisableErrorInjection(); - } - return s; - } - - std::vector TestMultiGet( - ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - size_t num_keys = rand_keys.size(); - std::vector key_str; - std::vector keys; - key_str.reserve(num_keys); - keys.reserve(num_keys); - std::vector values(num_keys); - std::vector statuses(num_keys); - ColumnFamilyHandle* cfh = column_families_[rand_column_families[0]]; - int error_count = 0; - // Do a consistency check between Get and MultiGet. Don't do it too - // often as it will slow db_stress down - bool do_consistency_check = thread->rand.OneIn(4); - - ReadOptions readoptionscopy = read_opts; - if (do_consistency_check) { - readoptionscopy.snapshot = db_->GetSnapshot(); - } - - std::string read_ts_str; - Slice read_ts_slice; - MaybeUseOlderTimestampForPointLookup(thread, read_ts_str, read_ts_slice, - readoptionscopy); - - readoptionscopy.rate_limiter_priority = - FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL; - - // To appease clang analyzer - const bool use_txn = FLAGS_use_txn; - - // Create a transaction in order to write some data. The purpose is to - // exercise WriteBatchWithIndex::MultiGetFromBatchAndDB. The transaction - // will be rolled back once MultiGet returns. - Transaction* txn = nullptr; - if (use_txn) { - WriteOptions wo; - if (FLAGS_rate_limit_auto_wal_flush) { - wo.rate_limiter_priority = Env::IO_USER; - } - Status s = NewTxn(wo, &txn); - if (!s.ok()) { - fprintf(stderr, "NewTxn: %s\n", s.ToString().c_str()); - std::terminate(); - } - } - for (size_t i = 0; i < num_keys; ++i) { - key_str.emplace_back(Key(rand_keys[i])); - keys.emplace_back(key_str.back()); - if (use_txn) { - // With a 1 in 10 probability, insert the just added key in the batch - // into the transaction. This will create an overlap with the MultiGet - // keys and exercise some corner cases in the code - if (thread->rand.OneIn(10)) { - int op = thread->rand.Uniform(2); - Status s; - switch (op) { - case 0: - case 1: { - uint32_t value_base = - thread->rand.Next() % thread->shared->UNKNOWN_SENTINEL; - char value[100]; - size_t sz = GenerateValue(value_base, value, sizeof(value)); - Slice v(value, sz); - if (op == 0) { - s = txn->Put(cfh, keys.back(), v); - } else { - s = txn->Merge(cfh, keys.back(), v); - } - break; - } - case 2: - s = txn->Delete(cfh, keys.back()); - break; - default: - assert(false); - } - if (!s.ok()) { - fprintf(stderr, "Transaction put: %s\n", s.ToString().c_str()); - std::terminate(); - } - } - } - } - - if (!use_txn) { - if (fault_fs_guard) { - fault_fs_guard->EnableErrorInjection(); - SharedState::ignore_read_error = false; - } - db_->MultiGet(readoptionscopy, cfh, num_keys, keys.data(), values.data(), - statuses.data()); - if (fault_fs_guard) { - error_count = fault_fs_guard->GetAndResetErrorCount(); - } - } else { - txn->MultiGet(readoptionscopy, cfh, num_keys, keys.data(), values.data(), - statuses.data()); - } - - if (fault_fs_guard && error_count && !SharedState::ignore_read_error) { - int stat_nok = 0; - for (const auto& s : statuses) { - if (!s.ok() && !s.IsNotFound()) { - stat_nok++; - } - } - - if (stat_nok < error_count) { - // Grab mutex so multiple thread don't try to print the - // stack trace at the same time - MutexLock l(thread->shared->GetMutex()); - fprintf(stderr, "Didn't get expected error from MultiGet. \n"); - fprintf(stderr, "num_keys %zu Expected %d errors, seen %d\n", num_keys, - error_count, stat_nok); - fprintf(stderr, "Callstack that injected the fault\n"); - fault_fs_guard->PrintFaultBacktrace(); - std::terminate(); - } - } - if (fault_fs_guard) { - fault_fs_guard->DisableErrorInjection(); - } - - for (size_t i = 0; i < statuses.size(); ++i) { - Status s = statuses[i]; - bool is_consistent = true; - // Only do the consistency check if no error was injected and MultiGet - // didn't return an unexpected error - if (do_consistency_check && !error_count && (s.ok() || s.IsNotFound())) { - Status tmp_s; - std::string value; - - if (use_txn) { - tmp_s = txn->Get(readoptionscopy, cfh, keys[i], &value); - } else { - tmp_s = db_->Get(readoptionscopy, cfh, keys[i], &value); - } - if (!tmp_s.ok() && !tmp_s.IsNotFound()) { - fprintf(stderr, "Get error: %s\n", s.ToString().c_str()); - is_consistent = false; - } else if (!s.ok() && tmp_s.ok()) { - fprintf(stderr, "MultiGet returned different results with key %s\n", - keys[i].ToString(true).c_str()); - fprintf(stderr, "Get returned ok, MultiGet returned not found\n"); - is_consistent = false; - } else if (s.ok() && tmp_s.IsNotFound()) { - fprintf(stderr, "MultiGet returned different results with key %s\n", - keys[i].ToString(true).c_str()); - fprintf(stderr, "MultiGet returned ok, Get returned not found\n"); - is_consistent = false; - } else if (s.ok() && value != values[i].ToString()) { - fprintf(stderr, "MultiGet returned different results with key %s\n", - keys[i].ToString(true).c_str()); - fprintf(stderr, "MultiGet returned value %s\n", - values[i].ToString(true).c_str()); - fprintf(stderr, "Get returned value %s\n", - Slice(value).ToString(true /* hex */).c_str()); - is_consistent = false; - } - } - - if (!is_consistent) { - fprintf(stderr, "TestMultiGet error: is_consistent is false\n"); - thread->stats.AddErrors(1); - // Fail fast to preserve the DB state - thread->shared->SetVerificationFailure(); - break; - } else if (s.ok()) { - // found case - thread->stats.AddGets(1, 1); - } else if (s.IsNotFound()) { - // not found case - thread->stats.AddGets(1, 0); - } else if (s.IsMergeInProgress() && use_txn) { - // With txn this is sometimes expected. - thread->stats.AddGets(1, 1); - } else { - if (error_count == 0) { - // errors case - fprintf(stderr, "MultiGet error: %s\n", s.ToString().c_str()); - thread->stats.AddErrors(1); - } else { - thread->stats.AddVerifiedErrors(1); - } - } - } - - if (readoptionscopy.snapshot) { - db_->ReleaseSnapshot(readoptionscopy.snapshot); - } - if (use_txn) { - RollbackTxn(txn); - } - return statuses; - } - - void TestGetEntity(ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - if (fault_fs_guard) { - fault_fs_guard->EnableErrorInjection(); - SharedState::ignore_read_error = false; - } - - assert(thread); - - SharedState* const shared = thread->shared; - assert(shared); - - assert(!rand_column_families.empty()); - assert(!rand_keys.empty()); - - std::unique_ptr lock(new MutexLock( - shared->GetMutexForKey(rand_column_families[0], rand_keys[0]))); - - assert(rand_column_families[0] >= 0); - assert(rand_column_families[0] < static_cast(column_families_.size())); - - ColumnFamilyHandle* const cfh = column_families_[rand_column_families[0]]; - assert(cfh); - - const std::string key = Key(rand_keys[0]); - - PinnableWideColumns from_db; - - const Status s = db_->GetEntity(read_opts, cfh, key, &from_db); - - int error_count = 0; - - if (fault_fs_guard) { - error_count = fault_fs_guard->GetAndResetErrorCount(); - } - - if (s.ok()) { - if (fault_fs_guard) { - if (error_count && !SharedState::ignore_read_error) { - // Grab mutex so multiple threads don't try to print the - // stack trace at the same time - MutexLock l(shared->GetMutex()); - fprintf(stderr, "Didn't get expected error from GetEntity\n"); - fprintf(stderr, "Call stack that injected the fault\n"); - fault_fs_guard->PrintFaultBacktrace(); - std::terminate(); - } - } - - thread->stats.AddGets(1, 1); - - if (!FLAGS_skip_verifydb) { - const WideColumns& columns = from_db.columns(); - - if (!VerifyWideColumns(columns)) { - shared->SetVerificationFailure(); - fprintf(stderr, - "error : inconsistent columns returned by GetEntity for key " - "%s: %s\n", - StringToHex(key).c_str(), WideColumnsToHex(columns).c_str()); - } else if (shared->Get(rand_column_families[0], rand_keys[0]) == - SharedState::DELETION_SENTINEL) { - shared->SetVerificationFailure(); - fprintf( - stderr, - "error : inconsistent values for key %s: GetEntity returns %s, " - "expected state does not have the key.\n", - StringToHex(key).c_str(), WideColumnsToHex(columns).c_str()); - } - } - } else if (s.IsNotFound()) { - thread->stats.AddGets(1, 0); - - if (!FLAGS_skip_verifydb) { - auto expected = shared->Get(rand_column_families[0], rand_keys[0]); - if (expected != SharedState::DELETION_SENTINEL && - expected != SharedState::UNKNOWN_SENTINEL) { - shared->SetVerificationFailure(); - fprintf(stderr, - "error : inconsistent values for key %s: expected state has " - "the key, GetEntity returns NotFound.\n", - StringToHex(key).c_str()); - } - } - } else { - if (error_count == 0) { - thread->stats.AddErrors(1); - } else { - thread->stats.AddVerifiedErrors(1); - } - } - - if (fault_fs_guard) { - fault_fs_guard->DisableErrorInjection(); - } - } - - Status TestPrefixScan(ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - assert(!rand_column_families.empty()); - assert(!rand_keys.empty()); - - ColumnFamilyHandle* const cfh = column_families_[rand_column_families[0]]; - assert(cfh); - - const std::string key = Key(rand_keys[0]); - const Slice prefix(key.data(), FLAGS_prefix_size); - - std::string upper_bound; - Slice ub_slice; - ReadOptions ro_copy = read_opts; - - // Get the next prefix first and then see if we want to set upper bound. - // We'll use the next prefix in an assertion later on - if (GetNextPrefix(prefix, &upper_bound) && thread->rand.OneIn(2)) { - // For half of the time, set the upper bound to the next prefix - ub_slice = Slice(upper_bound); - ro_copy.iterate_upper_bound = &ub_slice; - } - - std::string read_ts_str; - Slice read_ts_slice; - MaybeUseOlderTimestampForRangeScan(thread, read_ts_str, read_ts_slice, - ro_copy); - - std::unique_ptr iter(db_->NewIterator(ro_copy, cfh)); - - uint64_t count = 0; - Status s; - - if (fault_fs_guard) { - fault_fs_guard->EnableErrorInjection(); - SharedState::ignore_read_error = false; - } - - for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); - iter->Next()) { - ++count; - - // When iter_start_ts is set, iterator exposes internal keys, including - // tombstones; however, we want to perform column validation only for - // value-like types. - if (ro_copy.iter_start_ts) { - const ValueType value_type = ExtractValueType(iter->key()); - if (value_type != kTypeValue && value_type != kTypeBlobIndex && - value_type != kTypeWideColumnEntity) { - continue; - } - } - - if (!VerifyWideColumns(iter->value(), iter->columns())) { - s = Status::Corruption("Value and columns inconsistent", - DebugString(iter->value(), iter->columns())); - break; - } - } - - if (ro_copy.iter_start_ts == nullptr) { - assert(count <= GetPrefixKeyCount(prefix.ToString(), upper_bound)); - } - - if (s.ok()) { - s = iter->status(); - } - - uint64_t error_count = 0; - if (fault_fs_guard) { - error_count = fault_fs_guard->GetAndResetErrorCount(); - } - if (!s.ok() && (!fault_fs_guard || (fault_fs_guard && !error_count))) { - fprintf(stderr, "TestPrefixScan error: %s\n", s.ToString().c_str()); - thread->stats.AddErrors(1); - - return s; - } - - if (fault_fs_guard) { - fault_fs_guard->DisableErrorInjection(); - } - thread->stats.AddPrefixes(1, count); - - return Status::OK(); - } - - Status TestPut(ThreadState* thread, WriteOptions& write_opts, - const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys, - char (&value)[100]) override { - assert(!rand_column_families.empty()); - assert(!rand_keys.empty()); - - auto shared = thread->shared; - assert(shared); - - const int64_t max_key = shared->GetMaxKey(); - - int64_t rand_key = rand_keys[0]; - int rand_column_family = rand_column_families[0]; - std::string write_ts; - - std::unique_ptr lock( - new MutexLock(shared->GetMutexForKey(rand_column_family, rand_key))); - while (!shared->AllowsOverwrite(rand_key) && - (FLAGS_use_merge || shared->Exists(rand_column_family, rand_key))) { - lock.reset(); - - rand_key = thread->rand.Next() % max_key; - rand_column_family = thread->rand.Next() % FLAGS_column_families; - - lock.reset( - new MutexLock(shared->GetMutexForKey(rand_column_family, rand_key))); - if (FLAGS_user_timestamp_size > 0) { - write_ts = GetNowNanos(); - } - } - - if (write_ts.empty() && FLAGS_user_timestamp_size) { - write_ts = GetNowNanos(); - } - - const std::string k = Key(rand_key); - - ColumnFamilyHandle* const cfh = column_families_[rand_column_family]; - assert(cfh); - - if (FLAGS_verify_before_write) { - std::string from_db; - Status s = db_->Get(read_opts, cfh, k, &from_db); - if (!VerifyOrSyncValue(rand_column_family, rand_key, read_opts, shared, - /* msg_prefix */ "Pre-Put Get verification", - from_db, s, /* strict */ true)) { - return s; - } - } - - const uint32_t value_base = thread->rand.Next() % shared->UNKNOWN_SENTINEL; - const size_t sz = GenerateValue(value_base, value, sizeof(value)); - const Slice v(value, sz); - - shared->Put(rand_column_family, rand_key, value_base, true /* pending */); - - Status s; - - if (FLAGS_use_merge) { - if (!FLAGS_use_txn) { - if (FLAGS_user_timestamp_size == 0) { - s = db_->Merge(write_opts, cfh, k, v); - } else { - s = db_->Merge(write_opts, cfh, k, write_ts, v); - } - } else { - Transaction* txn; - s = NewTxn(write_opts, &txn); - if (s.ok()) { - s = txn->Merge(cfh, k, v); - if (s.ok()) { - s = CommitTxn(txn, thread); - } - } - } - } else if (FLAGS_use_put_entity_one_in > 0 && - (value_base % FLAGS_use_put_entity_one_in) == 0) { - s = db_->PutEntity(write_opts, cfh, k, - GenerateWideColumns(value_base, v)); - } else { - if (!FLAGS_use_txn) { - if (FLAGS_user_timestamp_size == 0) { - s = db_->Put(write_opts, cfh, k, v); - } else { - s = db_->Put(write_opts, cfh, k, write_ts, v); - } - } else { - Transaction* txn; - s = NewTxn(write_opts, &txn); - if (s.ok()) { - s = txn->Put(cfh, k, v); - if (s.ok()) { - s = CommitTxn(txn, thread); - } - } - } - } - - shared->Put(rand_column_family, rand_key, value_base, false /* pending */); - - if (!s.ok()) { - if (FLAGS_injest_error_severity >= 2) { - if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) { - is_db_stopped_ = true; - } else if (!is_db_stopped_ || - s.severity() < Status::Severity::kFatalError) { - fprintf(stderr, "put or merge error: %s\n", s.ToString().c_str()); - std::terminate(); - } - } else { - fprintf(stderr, "put or merge error: %s\n", s.ToString().c_str()); - std::terminate(); - } - } - - thread->stats.AddBytesForWrites(1, sz); - PrintKeyValue(rand_column_family, static_cast(rand_key), value, - sz); - return s; - } - - Status TestDelete(ThreadState* thread, WriteOptions& write_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - int64_t rand_key = rand_keys[0]; - int rand_column_family = rand_column_families[0]; - auto shared = thread->shared; - - std::unique_ptr lock( - new MutexLock(shared->GetMutexForKey(rand_column_family, rand_key))); - - // OPERATION delete - std::string write_ts_str = GetNowNanos(); - Slice write_ts = write_ts_str; - - std::string key_str = Key(rand_key); - Slice key = key_str; - auto cfh = column_families_[rand_column_family]; - - // Use delete if the key may be overwritten and a single deletion - // otherwise. - Status s; - if (shared->AllowsOverwrite(rand_key)) { - shared->Delete(rand_column_family, rand_key, true /* pending */); - if (!FLAGS_use_txn) { - if (FLAGS_user_timestamp_size == 0) { - s = db_->Delete(write_opts, cfh, key); - } else { - s = db_->Delete(write_opts, cfh, key, write_ts); - } - } else { - Transaction* txn; - s = NewTxn(write_opts, &txn); - if (s.ok()) { - s = txn->Delete(cfh, key); - if (s.ok()) { - s = CommitTxn(txn, thread); - } - } - } - shared->Delete(rand_column_family, rand_key, false /* pending */); - thread->stats.AddDeletes(1); - if (!s.ok()) { - if (FLAGS_injest_error_severity >= 2) { - if (!is_db_stopped_ && - s.severity() >= Status::Severity::kFatalError) { - is_db_stopped_ = true; - } else if (!is_db_stopped_ || - s.severity() < Status::Severity::kFatalError) { - fprintf(stderr, "delete error: %s\n", s.ToString().c_str()); - std::terminate(); - } - } else { - fprintf(stderr, "delete error: %s\n", s.ToString().c_str()); - std::terminate(); - } - } - } else { - shared->SingleDelete(rand_column_family, rand_key, true /* pending */); - if (!FLAGS_use_txn) { - if (FLAGS_user_timestamp_size == 0) { - s = db_->SingleDelete(write_opts, cfh, key); - } else { - s = db_->SingleDelete(write_opts, cfh, key, write_ts); - } - } else { - Transaction* txn; - s = NewTxn(write_opts, &txn); - if (s.ok()) { - s = txn->SingleDelete(cfh, key); - if (s.ok()) { - s = CommitTxn(txn, thread); - } - } - } - shared->SingleDelete(rand_column_family, rand_key, false /* pending */); - thread->stats.AddSingleDeletes(1); - if (!s.ok()) { - if (FLAGS_injest_error_severity >= 2) { - if (!is_db_stopped_ && - s.severity() >= Status::Severity::kFatalError) { - is_db_stopped_ = true; - } else if (!is_db_stopped_ || - s.severity() < Status::Severity::kFatalError) { - fprintf(stderr, "single delete error: %s\n", s.ToString().c_str()); - std::terminate(); - } - } else { - fprintf(stderr, "single delete error: %s\n", s.ToString().c_str()); - std::terminate(); - } - } - } - return s; - } - - Status TestDeleteRange(ThreadState* thread, WriteOptions& write_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - // OPERATION delete range - std::vector> range_locks; - // delete range does not respect disallowed overwrites. the keys for - // which overwrites are disallowed are randomly distributed so it - // could be expensive to find a range where each key allows - // overwrites. - int64_t rand_key = rand_keys[0]; - int rand_column_family = rand_column_families[0]; - auto shared = thread->shared; - int64_t max_key = shared->GetMaxKey(); - if (rand_key > max_key - FLAGS_range_deletion_width) { - rand_key = - thread->rand.Next() % (max_key - FLAGS_range_deletion_width + 1); - } - for (int j = 0; j < FLAGS_range_deletion_width; ++j) { - if (j == 0 || - ((rand_key + j) & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) { - range_locks.emplace_back(new MutexLock( - shared->GetMutexForKey(rand_column_family, rand_key + j))); - } - } - shared->DeleteRange(rand_column_family, rand_key, - rand_key + FLAGS_range_deletion_width, - true /* pending */); - - std::string keystr = Key(rand_key); - Slice key = keystr; - auto cfh = column_families_[rand_column_family]; - std::string end_keystr = Key(rand_key + FLAGS_range_deletion_width); - Slice end_key = end_keystr; - std::string write_ts_str; - Slice write_ts; - Status s; - if (FLAGS_user_timestamp_size) { - write_ts_str = GetNowNanos(); - write_ts = write_ts_str; - s = db_->DeleteRange(write_opts, cfh, key, end_key, write_ts); - } else { - s = db_->DeleteRange(write_opts, cfh, key, end_key); - } - if (!s.ok()) { - if (FLAGS_injest_error_severity >= 2) { - if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) { - is_db_stopped_ = true; - } else if (!is_db_stopped_ || - s.severity() < Status::Severity::kFatalError) { - fprintf(stderr, "delete range error: %s\n", s.ToString().c_str()); - std::terminate(); - } - } else { - fprintf(stderr, "delete range error: %s\n", s.ToString().c_str()); - std::terminate(); - } - } - int covered = shared->DeleteRange(rand_column_family, rand_key, - rand_key + FLAGS_range_deletion_width, - false /* pending */); - thread->stats.AddRangeDeletions(1); - thread->stats.AddCoveredByRangeDeletions(covered); - return s; - } - - void TestIngestExternalFile(ThreadState* thread, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - const std::string sst_filename = - FLAGS_db + "/." + std::to_string(thread->tid) + ".sst"; - Status s; - if (db_stress_env->FileExists(sst_filename).ok()) { - // Maybe we terminated abnormally before, so cleanup to give this file - // ingestion a clean slate - s = db_stress_env->DeleteFile(sst_filename); - } - - SstFileWriter sst_file_writer(EnvOptions(options_), options_); - if (s.ok()) { - s = sst_file_writer.Open(sst_filename); - } - int64_t key_base = rand_keys[0]; - int column_family = rand_column_families[0]; - std::vector> range_locks; - range_locks.reserve(FLAGS_ingest_external_file_width); - std::vector keys; - keys.reserve(FLAGS_ingest_external_file_width); - std::vector values; - values.reserve(FLAGS_ingest_external_file_width); - SharedState* shared = thread->shared; - - assert(FLAGS_nooverwritepercent < 100); - // Grab locks, set pending state on expected values, and add keys - for (int64_t key = key_base; - s.ok() && key < shared->GetMaxKey() && - static_cast(keys.size()) < FLAGS_ingest_external_file_width; - ++key) { - if (key == key_base || - (key & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) { - range_locks.emplace_back( - new MutexLock(shared->GetMutexForKey(column_family, key))); - } - if (!shared->AllowsOverwrite(key)) { - // We could alternatively include `key` on the condition its current - // value is `DELETION_SENTINEL`. - continue; - } - keys.push_back(key); - - uint32_t value_base = thread->rand.Next() % shared->UNKNOWN_SENTINEL; - values.push_back(value_base); - shared->Put(column_family, key, value_base, true /* pending */); - - char value[100]; - size_t value_len = GenerateValue(value_base, value, sizeof(value)); - auto key_str = Key(key); - s = sst_file_writer.Put(Slice(key_str), Slice(value, value_len)); - } - - if (s.ok() && keys.empty()) { - return; - } - - if (s.ok()) { - s = sst_file_writer.Finish(); - } - if (s.ok()) { - s = db_->IngestExternalFile(column_families_[column_family], - {sst_filename}, IngestExternalFileOptions()); - } - if (!s.ok()) { - fprintf(stderr, "file ingestion error: %s\n", s.ToString().c_str()); - std::terminate(); - } - for (size_t i = 0; i < keys.size(); ++i) { - shared->Put(column_family, keys[i], values[i], false /* pending */); - } - } - - // Given a key K, this creates an iterator which scans the range - // [K, K + FLAGS_num_iterations) forward and backward. - // Then does a random sequence of Next/Prev operations. - Status TestIterateAgainstExpected( - ThreadState* thread, const ReadOptions& read_opts, - const std::vector& rand_column_families, - const std::vector& rand_keys) override { - assert(thread); - assert(!rand_column_families.empty()); - assert(!rand_keys.empty()); - - auto shared = thread->shared; - assert(shared); - - int64_t max_key = shared->GetMaxKey(); - - const int64_t num_iter = static_cast(FLAGS_num_iterations); - - int64_t lb = rand_keys[0]; - if (lb > max_key - num_iter) { - lb = thread->rand.Next() % (max_key - num_iter + 1); - } - - const int64_t ub = lb + num_iter; - - // Lock the whole range over which we might iterate to ensure it doesn't - // change under us. - const int rand_column_family = rand_column_families[0]; - std::vector> range_locks = - shared->GetLocksForKeyRange(rand_column_family, lb, ub); - - ReadOptions ro(read_opts); - ro.total_order_seek = true; - - std::string read_ts_str; - Slice read_ts; - if (FLAGS_user_timestamp_size > 0) { - read_ts_str = GetNowNanos(); - read_ts = read_ts_str; - ro.timestamp = &read_ts; - } - - std::string max_key_str; - Slice max_key_slice; - if (!FLAGS_destroy_db_initially) { - max_key_str = Key(max_key); - max_key_slice = max_key_str; - // to restrict iterator from reading keys written in batched_op_stress - // that do not have expected state updated and may not be parseable by - // GetIntVal(). - ro.iterate_upper_bound = &max_key_slice; - } - - ColumnFamilyHandle* const cfh = column_families_[rand_column_family]; - assert(cfh); - - std::unique_ptr iter(db_->NewIterator(ro, cfh)); - - std::string op_logs; - - auto check_columns = [&]() { - assert(iter); - assert(iter->Valid()); - - if (!VerifyWideColumns(iter->value(), iter->columns())) { - shared->SetVerificationFailure(); - - fprintf(stderr, - "Verification failed for key %s: " - "Value and columns inconsistent: value: %s, columns: %s\n", - Slice(iter->key()).ToString(/* hex */ true).c_str(), - iter->value().ToString(/* hex */ true).c_str(), - WideColumnsToHex(iter->columns()).c_str()); - fprintf(stderr, "Column family: %s, op_logs: %s\n", - cfh->GetName().c_str(), op_logs.c_str()); - - thread->stats.AddErrors(1); - - return false; - } - - return true; - }; - - auto check_no_key_in_range = [&](int64_t start, int64_t end) { - for (auto j = std::max(start, lb); j < std::min(end, ub); ++j) { - auto expected_value = - shared->Get(rand_column_family, static_cast(j)); - if (expected_value != shared->DELETION_SENTINEL && - expected_value != shared->UNKNOWN_SENTINEL) { - // Fail fast to preserve the DB state. - thread->shared->SetVerificationFailure(); - if (iter->Valid()) { - fprintf(stderr, - "Expected state has key %s, iterator is at key %s\n", - Slice(Key(j)).ToString(true).c_str(), - iter->key().ToString(true).c_str()); - } else { - fprintf(stderr, "Expected state has key %s, iterator is invalid\n", - Slice(Key(j)).ToString(true).c_str()); - } - fprintf(stderr, "Column family: %s, op_logs: %s\n", - cfh->GetName().c_str(), op_logs.c_str()); - thread->stats.AddErrors(1); - return false; - } - } - return true; - }; - - // Forward and backward scan to ensure we cover the entire range [lb, ub). - // The random sequence Next and Prev test below tends to be very short - // ranged. - int64_t last_key = lb - 1; - - std::string key_str = Key(lb); - iter->Seek(key_str); - - op_logs += "S " + Slice(key_str).ToString(true) + " "; - - uint64_t curr = 0; - while (true) { - if (!iter->Valid()) { - if (!iter->status().ok()) { - thread->shared->SetVerificationFailure(); - fprintf(stderr, "TestIterate against expected state error: %s\n", - iter->status().ToString().c_str()); - fprintf(stderr, "Column family: %s, op_logs: %s\n", - cfh->GetName().c_str(), op_logs.c_str()); - thread->stats.AddErrors(1); - return iter->status(); - } - if (!check_no_key_in_range(last_key + 1, ub)) { - return Status::OK(); - } - break; - } - - if (!check_columns()) { - return Status::OK(); - } - - // iter is valid, the range (last_key, current key) was skipped - GetIntVal(iter->key().ToString(), &curr); - if (!check_no_key_in_range(last_key + 1, static_cast(curr))) { - return Status::OK(); - } - - last_key = static_cast(curr); - if (last_key >= ub - 1) { - break; - } - - iter->Next(); - - op_logs += "N"; - } - - // backward scan - key_str = Key(ub - 1); - iter->SeekForPrev(key_str); - - op_logs += " SFP " + Slice(key_str).ToString(true) + " "; - - last_key = ub; - while (true) { - if (!iter->Valid()) { - if (!iter->status().ok()) { - thread->shared->SetVerificationFailure(); - fprintf(stderr, "TestIterate against expected state error: %s\n", - iter->status().ToString().c_str()); - fprintf(stderr, "Column family: %s, op_logs: %s\n", - cfh->GetName().c_str(), op_logs.c_str()); - thread->stats.AddErrors(1); - return iter->status(); - } - if (!check_no_key_in_range(lb, last_key)) { - return Status::OK(); - } - break; - } - - if (!check_columns()) { - return Status::OK(); - } - - // the range (current key, last key) was skipped - GetIntVal(iter->key().ToString(), &curr); - if (!check_no_key_in_range(static_cast(curr + 1), last_key)) { - return Status::OK(); - } - - last_key = static_cast(curr); - if (last_key <= lb) { - break; - } - - iter->Prev(); - - op_logs += "P"; - } - - if (thread->rand.OneIn(2)) { - // Refresh after forward/backward scan to allow higher chance of SV - // change. It is safe to refresh since the testing key range is locked. - iter->Refresh(); - } - - // start from middle of [lb, ub) otherwise it is easy to iterate out of - // locked range - const int64_t mid = lb + num_iter / 2; - - key_str = Key(mid); - const Slice key(key_str); - - if (thread->rand.OneIn(2)) { - iter->Seek(key); - op_logs += " S " + key.ToString(true) + " "; - if (!iter->Valid() && iter->status().ok()) { - if (!check_no_key_in_range(mid, ub)) { - return Status::OK(); - } - } - } else { - iter->SeekForPrev(key); - op_logs += " SFP " + key.ToString(true) + " "; - if (!iter->Valid() && iter->status().ok()) { - // iterator says nothing <= mid - if (!check_no_key_in_range(lb, mid + 1)) { - return Status::OK(); - } - } - } - - for (int64_t i = 0; i < num_iter && iter->Valid(); ++i) { - if (!check_columns()) { - return Status::OK(); - } - - GetIntVal(iter->key().ToString(), &curr); - if (static_cast(curr) < lb) { - iter->Next(); - op_logs += "N"; - } else if (static_cast(curr) >= ub) { - iter->Prev(); - op_logs += "P"; - } else { - const uint32_t expected_value = - shared->Get(rand_column_family, static_cast(curr)); - if (expected_value == shared->DELETION_SENTINEL) { - // Fail fast to preserve the DB state. - thread->shared->SetVerificationFailure(); - fprintf(stderr, "Iterator has key %s, but expected state does not.\n", - iter->key().ToString(true).c_str()); - fprintf(stderr, "Column family: %s, op_logs: %s\n", - cfh->GetName().c_str(), op_logs.c_str()); - thread->stats.AddErrors(1); - break; - } - - if (thread->rand.OneIn(2)) { - iter->Next(); - op_logs += "N"; - if (!iter->Valid()) { - break; - } - uint64_t next = 0; - GetIntVal(iter->key().ToString(), &next); - if (!check_no_key_in_range(static_cast(curr + 1), - static_cast(next))) { - return Status::OK(); - } - } else { - iter->Prev(); - op_logs += "P"; - if (!iter->Valid()) { - break; - } - uint64_t prev = 0; - GetIntVal(iter->key().ToString(), &prev); - if (!check_no_key_in_range(static_cast(prev + 1), - static_cast(curr))) { - return Status::OK(); - } - } - } - } - - if (!iter->status().ok()) { - thread->shared->SetVerificationFailure(); - fprintf(stderr, "TestIterate against expected state error: %s\n", - iter->status().ToString().c_str()); - fprintf(stderr, "Column family: %s, op_logs: %s\n", - cfh->GetName().c_str(), op_logs.c_str()); - thread->stats.AddErrors(1); - return iter->status(); - } - - thread->stats.AddIterations(1); - - return Status::OK(); - } - - bool VerifyOrSyncValue(int cf, int64_t key, const ReadOptions& /*opts*/, - SharedState* shared, const std::string& value_from_db, - std::string msg_prefix, const Status& s, - bool strict = false) const { - if (shared->HasVerificationFailedYet()) { - return false; - } - - // compare value_from_db with the value in the shared state - uint32_t value_base = shared->Get(cf, key); - if (value_base == SharedState::UNKNOWN_SENTINEL) { - if (s.ok()) { - // Value exists in db, update state to reflect that - Slice slice(value_from_db); - value_base = GetValueBase(slice); - shared->Put(cf, key, value_base, false); - } else if (s.IsNotFound()) { - // Value doesn't exist in db, update state to reflect that - shared->SingleDelete(cf, key, false); - } - return true; - } - if (value_base == SharedState::DELETION_SENTINEL && !strict) { - return true; - } - - if (s.ok()) { - char value[kValueMaxLen]; - if (value_base == SharedState::DELETION_SENTINEL) { - VerificationAbort(shared, msg_prefix + ": Unexpected value found", cf, - key, value_from_db, ""); - return false; - } - size_t sz = GenerateValue(value_base, value, sizeof(value)); - if (value_from_db.length() != sz) { - VerificationAbort(shared, - msg_prefix + ": Length of value read is not equal", - cf, key, value_from_db, Slice(value, sz)); - return false; - } - if (memcmp(value_from_db.data(), value, sz) != 0) { - VerificationAbort(shared, - msg_prefix + ": Contents of value read don't match", - cf, key, value_from_db, Slice(value, sz)); - return false; - } - } else { - if (value_base != SharedState::DELETION_SENTINEL) { - char value[kValueMaxLen]; - size_t sz = GenerateValue(value_base, value, sizeof(value)); - VerificationAbort(shared, - msg_prefix + ": Value not found: " + s.ToString(), cf, - key, "", Slice(value, sz)); - return false; - } - } - return true; - } - - void PrepareTxnDbOptions(SharedState* shared, - TransactionDBOptions& txn_db_opts) override { - txn_db_opts.rollback_deletion_type_callback = - [shared](TransactionDB*, ColumnFamilyHandle*, const Slice& key) { - assert(shared); - uint64_t key_num = 0; - bool ok = GetIntVal(key.ToString(), &key_num); - assert(ok); - (void)ok; - return !shared->AllowsOverwrite(key_num); - }; - } -}; - -StressTest* CreateNonBatchedOpsStressTest() { - return new NonBatchedOpsStressTest(); -} - -} // namespace ROCKSDB_NAMESPACE -#endif // GFLAGS diff --git a/docs/.gitignore b/docs/.gitignore deleted file mode 100644 index 3938549cb..000000000 --- a/docs/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -.DS_STORE -_site/ -*.swo -*.swp -_site -.sass-cache -*.psd -*~ diff --git a/docs/CNAME b/docs/CNAME deleted file mode 100644 index 827d1c0ed..000000000 --- a/docs/CNAME +++ /dev/null @@ -1 +0,0 @@ -rocksdb.org \ No newline at end of file diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md deleted file mode 100644 index 2c5842fb4..000000000 --- a/docs/CONTRIBUTING.md +++ /dev/null @@ -1,115 +0,0 @@ -This provides guidance on how to contribute various content to `rocksdb.org`. - -## Getting started - -You should only have to do these one time. - -- Rename this file to `CONTRIBUTING.md`. -- Rename `EXAMPLE-README-FOR-RUNNING-DOCS.md` to `README.md` (replacing the existing `README.md` that came with the template). -- Rename `EXAMPLE-LICENSE` to `LICENSE`. -- Review the [template information](./TEMPLATE-INFORMATION.md). -- Review `./_config.yml`. -- Make sure you update `title`, `description`, `tagline` and `gacode` (Google Analytics) in `./_config.yml`. - -## Basic Structure - -Most content is written in markdown. You name the file `something.md`, then have a header that looks like this: - -``` ---- -docid: getting-started -title: Getting started with ProjectName -layout: docs -permalink: /docs/getting-started.html ---- -``` - -Customize these values for each document, blog post, etc. - -> The filename of the `.md` file doesn't actually matter; what is important is the `docid` being unique and the `permalink` correct and unique too). - -## Landing page - -Modify `index.md` with your new or updated content. - -If you want a `GridBlock` as part of your content, you can do so directly with HTML: - -``` -
-
-
-

Your Features

- -
-
- -
-
-

More information

-

- Stuff here -

-
-
-
-``` - -or with a combination of changing `./_data/features.yml` and adding some Liquid to `index.md`, such as: - -``` -{% include content/gridblocks.html data_source=site.data.features imagealign="bottom"%} -``` - -## Blog - -To modify a blog post, edit the appopriate markdown file in `./_posts/`. - -Adding a new blog post is a four-step process. - -> Some posts have a `permalink` and `comments` in the blog post YAML header. You will not need these for new blog posts. These are an artifact of migrating the blog from Wordpress to gh-pages. - -1. Create your blog post in `./_posts/` in markdown (file extension `.md` or `.markdown`). See current posts in that folder or `./doc-type-examples/2016-04-07-blog-post-example.md` for an example of the YAML format. **If the `./_posts` directory does not exist, create it**. - - You can add a `` tag in the middle of your post such that you show only the excerpt above that tag in the main `/blog` index on your page. -1. If you have not authored a blog post before, modify the `./_data/authors.yml` file with the `author` id you used in your blog post, along with your full name and Facebook ID to get your profile picture. -1. [Run the site locally](./README.md) to test your changes. It will be at `http://127.0.0.1/blog/your-new-blog-post-title.html` -1. Push your changes to GitHub. - -## Docs - -To modify docs, edit the appropriate markdown file in `./_docs/`. - -To add docs to the site.... - -1. Add your markdown file to the `./_docs/` folder. See `./doc-type-examples/docs-hello-world.md` for an example of the YAML header format. **If the `./_docs/` directory does not exist, create it**. - - You can use folders in the `./_docs/` directory to organize your content if you want. -1. Update `_data/nav_docs.yml` to add your new document to the navigation bar. Use the `docid` you put in your doc markdown in as the `id` in the `_data/nav_docs.yml` file. -1. [Run the site locally](./README.md) to test your changes. It will be at `http://127.0.0.1/docs/your-new-doc-permalink.html` -1. Push your changes to GitHub. - -## Header Bar - -To modify the header bar, change `./_data/nav.yml`. - -## Top Level Page - -To modify a top-level page, edit the appropriate markdown file in `./top-level/` - -If you want a top-level page (e.g., http://your-site.com/top-level.html) -- not in `/blog/` or `/docs/`.... - -1. Create a markdown file in the root `./top-level/`. See `./doc-type-examples/top-level-example.md` for more information. -1. If you want a visible link to that file, update `_data/nav.yml` to add a link to your new top-level document in the header bar. - - > This is not necessary if you just want to have a page that is linked to from another page, but not exposed as direct link to the user. - -1. [Run the site locally](./README.md) to test your changes. It will be at `http://127.0.0.1/your-top-level-page-permalink.html` -1. Push your changes to GitHub. - -## Other Changes - -- CSS: `./css/main.css` or `./_sass/*.scss`. -- Images: `./static/images/[docs | posts]/....` -- Main Blog post HTML: `./_includes/post.html` -- Main Docs HTML: `./_includes/doc.html` diff --git a/docs/Gemfile b/docs/Gemfile deleted file mode 100644 index dfb1cfdd4..000000000 --- a/docs/Gemfile +++ /dev/null @@ -1,4 +0,0 @@ -source 'https://rubygems.org' -gem 'github-pages', '~> 227' - -gem "webrick", "~> 1.7" diff --git a/docs/LICENSE-DOCUMENTATION b/docs/LICENSE-DOCUMENTATION deleted file mode 100644 index 1f255c9f3..000000000 --- a/docs/LICENSE-DOCUMENTATION +++ /dev/null @@ -1,385 +0,0 @@ -Attribution 4.0 International - -======================================================================= - -Creative Commons Corporation ("Creative Commons") is not a law firm and -does not provide legal services or legal advice. Distribution of -Creative Commons public licenses does not create a lawyer-client or -other relationship. Creative Commons makes its licenses and related -information available on an "as-is" basis. Creative Commons gives no -warranties regarding its licenses, any material licensed under their -terms and conditions, or any related information. Creative Commons -disclaims all liability for damages resulting from their use to the -fullest extent possible. - -Using Creative Commons Public Licenses - -Creative Commons public licenses provide a standard set of terms and -conditions that creators and other rights holders may use to share -original works of authorship and other material subject to copyright -and certain other rights specified in the public license below. The -following considerations are for informational purposes only, are not -exhaustive, and do not form part of our licenses. - - Considerations for licensors: Our public licenses are - intended for use by those authorized to give the public - permission to use material in ways otherwise restricted by - copyright and certain other rights. Our licenses are - irrevocable. Licensors should read and understand the terms - and conditions of the license they choose before applying it. - Licensors should also secure all rights necessary before - applying our licenses so that the public can reuse the - material as expected. Licensors should clearly mark any - material not subject to the license. This includes other CC- - licensed material, or material used under an exception or - limitation to copyright. More considerations for licensors: - wiki.creativecommons.org/Considerations_for_licensors - - Considerations for the public: By using one of our public - licenses, a licensor grants the public permission to use the - licensed material under specified terms and conditions. If - the licensor's permission is not necessary for any reason--for - example, because of any applicable exception or limitation to - copyright--then that use is not regulated by the license. Our - licenses grant only permissions under copyright and certain - other rights that a licensor has authority to grant. Use of - the licensed material may still be restricted for other - reasons, including because others have copyright or other - rights in the material. A licensor may make special requests, - such as asking that all changes be marked or described. - Although not required by our licenses, you are encouraged to - respect those requests where reasonable. More_considerations - for the public: - wiki.creativecommons.org/Considerations_for_licensees - -======================================================================= - -Creative Commons Attribution 4.0 International Public License - -By exercising the Licensed Rights (defined below), You accept and agree -to be bound by the terms and conditions of this Creative Commons -Attribution 4.0 International Public License ("Public License"). To the -extent this Public License may be interpreted as a contract, You are -granted the Licensed Rights in consideration of Your acceptance of -these terms and conditions, and the Licensor grants You such rights in -consideration of benefits the Licensor receives from making the -Licensed Material available under these terms and conditions. - -Section 1 -- Definitions. - - a. Adapted Material means material subject to Copyright and Similar - Rights that is derived from or based upon the Licensed Material - and in which the Licensed Material is translated, altered, - arranged, transformed, or otherwise modified in a manner requiring - permission under the Copyright and Similar Rights held by the - Licensor. For purposes of this Public License, where the Licensed - Material is a musical work, performance, or sound recording, - Adapted Material is always produced where the Licensed Material is - synched in timed relation with a moving image. - -b. Adapter's License means the license You apply to Your Copyright - and Similar Rights in Your contributions to Adapted Material in - accordance with the terms and conditions of this Public License. - -c. Copyright and Similar Rights means copyright and/or similar rights - closely related to copyright including, without limitation, - performance, broadcast, sound recording, and Sui Generis Database - Rights, without regard to how the rights are labeled or - categorized. For purposes of this Public License, the rights - specified in Section 2(b)(1)-(2) are not Copyright and Similar - Rights. - -d. Effective Technological Measures means those measures that, in the - absence of proper authority, may not be circumvented under laws - fulfilling obligations under Article 11 of the WIPO Copyright - Treaty adopted on December 20, 1996, and/or similar international - agreements. - -e. Exceptions and Limitations means fair use, fair dealing, and/or - any other exception or limitation to Copyright and Similar Rights - that applies to Your use of the Licensed Material. - -f. Licensed Material means the artistic or literary work, database, - or other material to which the Licensor applied this Public - License. - -g. Licensed Rights means the rights granted to You subject to the - terms and conditions of this Public License, which are limited to - all Copyright and Similar Rights that apply to Your use of the - Licensed Material and that the Licensor has authority to license. - -h. Licensor means the individual(s) or entity(ies) granting rights - under this Public License. - -i. Share means to provide material to the public by any means or - process that requires permission under the Licensed Rights, such - as reproduction, public display, public performance, distribution, - dissemination, communication, or importation, and to make material - available to the public including in ways that members of the - public may access the material from a place and at a time - individually chosen by them. - -j. Sui Generis Database Rights means rights other than copyright - resulting from Directive 96/9/EC of the European Parliament and of - the Council of 11 March 1996 on the legal protection of databases, - as amended and/or succeeded, as well as other essentially - equivalent rights anywhere in the world. - -k. You means the individual or entity exercising the Licensed Rights - under this Public License. Your has a corresponding meaning. - -Section 2 -- Scope. - -a. License grant. - - 1. Subject to the terms and conditions of this Public License, - the Licensor hereby grants You a worldwide, royalty-free, - non-sublicensable, non-exclusive, irrevocable license to - exercise the Licensed Rights in the Licensed Material to: - - a. reproduce and Share the Licensed Material, in whole or - in part; and - - b. produce, reproduce, and Share Adapted Material. - - 2. Exceptions and Limitations. For the avoidance of doubt, where - Exceptions and Limitations apply to Your use, this Public - License does not apply, and You do not need to comply with - its terms and conditions. - - 3. Term. The term of this Public License is specified in Section - 6(a). - - 4. Media and formats; technical modifications allowed. The - Licensor authorizes You to exercise the Licensed Rights in - all media and formats whether now known or hereafter created, - and to make technical modifications necessary to do so. The - Licensor waives and/or agrees not to assert any right or - authority to forbid You from making technical modifications - necessary to exercise the Licensed Rights, including - technical modifications necessary to circumvent Effective - Technological Measures. For purposes of this Public License, - simply making modifications authorized by this Section 2(a) - (4) never produces Adapted Material. - - 5. Downstream recipients. - - a. Offer from the Licensor -- Licensed Material. Every - recipient of the Licensed Material automatically - receives an offer from the Licensor to exercise the - Licensed Rights under the terms and conditions of this - Public License. - - b. No downstream restrictions. You may not offer or impose - any additional or different terms or conditions on, or - apply any Effective Technological Measures to, the - Licensed Material if doing so restricts exercise of the - Licensed Rights by any recipient of the Licensed - Material. - - 6. No endorsement. Nothing in this Public License constitutes or - may be construed as permission to assert or imply that You - are, or that Your use of the Licensed Material is, connected - with, or sponsored, endorsed, or granted official status by, - the Licensor or others designated to receive attribution as - provided in Section 3(a)(1)(A)(i). - -b. Other rights. - - 1. Moral rights, such as the right of integrity, are not - licensed under this Public License, nor are publicity, - privacy, and/or other similar personality rights; however, to - the extent possible, the Licensor waives and/or agrees not to - assert any such rights held by the Licensor to the limited - extent necessary to allow You to exercise the Licensed - Rights, but not otherwise. - - 2. Patent and trademark rights are not licensed under this - Public License. - - 3. To the extent possible, the Licensor waives any right to - collect royalties from You for the exercise of the Licensed - Rights, whether directly or through a collecting society - under any voluntary or waivable statutory or compulsory - licensing scheme. In all other cases the Licensor expressly - reserves any right to collect such royalties. - -Section 3 -- License Conditions. - -Your exercise of the Licensed Rights is expressly made subject to the -following conditions. - -a. Attribution. - - 1. If You Share the Licensed Material (including in modified - form), You must: - - a. retain the following if it is supplied by the Licensor - with the Licensed Material: - - i. identification of the creator(s) of the Licensed - Material and any others designated to receive - attribution, in any reasonable manner requested by - the Licensor (including by pseudonym if - designated); - - ii. a copyright notice; - - iii. a notice that refers to this Public License; - - iv. a notice that refers to the disclaimer of - warranties; - - v. a URI or hyperlink to the Licensed Material to the - extent reasonably practicable; - - b. indicate if You modified the Licensed Material and - retain an indication of any previous modifications; and - - c. indicate the Licensed Material is licensed under this - Public License, and include the text of, or the URI or - hyperlink to, this Public License. - - 2. You may satisfy the conditions in Section 3(a)(1) in any - reasonable manner based on the medium, means, and context in - which You Share the Licensed Material. For example, it may be - reasonable to satisfy the conditions by providing a URI or - hyperlink to a resource that includes the required - information. - - 3. If requested by the Licensor, You must remove any of the - information required by Section 3(a)(1)(A) to the extent - reasonably practicable. - - 4. If You Share Adapted Material You produce, the Adapter's - License You apply must not prevent recipients of the Adapted - Material from complying with this Public License. - -Section 4 -- Sui Generis Database Rights. - -Where the Licensed Rights include Sui Generis Database Rights that -apply to Your use of the Licensed Material: - -a. for the avoidance of doubt, Section 2(a)(1) grants You the right - to extract, reuse, reproduce, and Share all or a substantial - portion of the contents of the database; - -b. if You include all or a substantial portion of the database - contents in a database in which You have Sui Generis Database - Rights, then the database in which You have Sui Generis Database - Rights (but not its individual contents) is Adapted Material; and - -c. You must comply with the conditions in Section 3(a) if You Share - all or a substantial portion of the contents of the database. - -For the avoidance of doubt, this Section 4 supplements and does not -replace Your obligations under this Public License where the Licensed -Rights include other Copyright and Similar Rights. - -Section 5 -- Disclaimer of Warranties and Limitation of Liability. - -a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE - EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS - AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF - ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, - IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, - WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR - PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, - ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT - KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT - ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. - -b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE - TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, - NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, - INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, - COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR - USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN - ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR - DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR - IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. - -c. The disclaimer of warranties and limitation of liability provided - above shall be interpreted in a manner that, to the extent - possible, most closely approximates an absolute disclaimer and - waiver of all liability. - -Section 6 -- Term and Termination. - -a. This Public License applies for the term of the Copyright and - Similar Rights licensed here. However, if You fail to comply with - this Public License, then Your rights under this Public License - terminate automatically. - -b. Where Your right to use the Licensed Material has terminated under - Section 6(a), it reinstates: - - 1. automatically as of the date the violation is cured, provided - it is cured within 30 days of Your discovery of the - violation; or - - 2. upon express reinstatement by the Licensor. - - For the avoidance of doubt, this Section 6(b) does not affect any - right the Licensor may have to seek remedies for Your violations - of this Public License. - -c. For the avoidance of doubt, the Licensor may also offer the - Licensed Material under separate terms or conditions or stop - distributing the Licensed Material at any time; however, doing so - will not terminate this Public License. - -d. Sections 1, 5, 6, 7, and 8 survive termination of this Public - License. - -Section 7 -- Other Terms and Conditions. - -a. The Licensor shall not be bound by any additional or different - terms or conditions communicated by You unless expressly agreed. - -b. Any arrangements, understandings, or agreements regarding the - Licensed Material not stated herein are separate from and - independent of the terms and conditions of this Public License. - -Section 8 -- Interpretation. - -a. For the avoidance of doubt, this Public License does not, and - shall not be interpreted to, reduce, limit, restrict, or impose - conditions on any use of the Licensed Material that could lawfully - be made without permission under this Public License. - -b. To the extent possible, if any provision of this Public License is - deemed unenforceable, it shall be automatically reformed to the - minimum extent necessary to make it enforceable. If the provision - cannot be reformed, it shall be severed from this Public License - without affecting the enforceability of the remaining terms and - conditions. - -c. No term or condition of this Public License will be waived and no - failure to comply consented to unless expressly agreed to by the - Licensor. - -d. Nothing in this Public License constitutes or may be interpreted - as a limitation upon, or waiver of, any privileges and immunities - that apply to the Licensor or You, including from the legal - processes of any jurisdiction or authority. - -======================================================================= - -Creative Commons is not a party to its public licenses. -Notwithstanding, Creative Commons may elect to apply one of its public -licenses to material it publishes and in those instances will be -considered the "Licensor." Except for the limited purpose of indicating -that material is shared under a Creative Commons public license or as -otherwise permitted by the Creative Commons policies published at -creativecommons.org/policies, Creative Commons does not authorize the -use of the trademark "Creative Commons" or any other trademark or logo -of Creative Commons without its prior written consent including, -without limitation, in connection with any unauthorized modifications -to any of its public licenses or any other arrangements, -understandings, or agreements concerning use of licensed material. For -the avoidance of doubt, this paragraph does not form part of the public -licenses. - -Creative Commons may be contacted at creativecommons.org. - diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 0ae8978bc..000000000 --- a/docs/README.md +++ /dev/null @@ -1,80 +0,0 @@ -## User Documentation for rocksdb.org - -This directory will contain the user and feature documentation for RocksDB. The documentation will be hosted on GitHub pages. - -### Contributing - -See [CONTRIBUTING.md](./CONTRIBUTING.md) for details on how to add or modify content. - -### Run the Site Locally - -The requirements for running a GitHub pages site locally is described in [GitHub help](https://help.github.com/articles/setting-up-your-github-pages-site-locally-with-jekyll/#requirements). The steps below summarize these steps. - -> If you have run the site before, you can start with step 1 and then move on to step 5. - -1. Ensure that you are in the `/docs` directory in your local RocksDB clone (i.e., the same directory where this `README.md` exists). The below RubyGems commands, etc. must be run from there. - -1. Make sure you have Ruby and [RubyGems](https://rubygems.org/) installed. - - > Ruby >= 2.2 is required for the gems. On the latest versions of Mac OS X, Ruby 2.0 is the - > default. Use `brew install ruby` (or your preferred upgrade mechanism) to install a newer - > version of Ruby for your Mac OS X system. - -1. Make sure you have [Bundler](http://bundler.io/) installed. - - ``` - # may require sudo - gem install bundler - ``` -1. Install the project's dependencies - - ``` - # run this in the 'docs' directory - bundle install - ``` - - > If you get an error when installing `nokogiri`, you may be running into the problem described - > in [this nokogiri issue](https://github.com/sparklemotion/nokogiri/issues/1483). You can - > either `brew uninstall xz` (and then `brew install xz` after the bundle is installed) or - > `xcode-select --install` (although this may not work if you have already installed command - > line tools). - -1. Run Jekyll's server. - - - On first runs or for structural changes to the documentation (e.g., new sidebar menu item), do a full build. - - ``` - bundle exec jekyll serve - ``` - - - For content changes only, you can use `--incremental` for faster builds. - - ``` - bundle exec jekyll serve --incremental - ``` - - > We use `bundle exec` instead of running straight `jekyll` because `bundle exec` will always use the version of Jekyll from our `Gemfile`. Just running `jekyll` will use the system version and may not necessarily be compatible. - - - To run using an actual IP address, you can use `--host=0.0.0.0` - - ``` - bundle exec jekyll serve --host=0.0.0.0 - ``` - - This will allow you to use the IP address associated with your machine in the URL. That way you could share it with other people. - - e.g., on a Mac, you can your IP address with something like `ifconfig | grep "inet " | grep -v 127.0.0.1`. - -1. Either of commands in the previous step will serve up the site on your local device at http://127.0.0.1:4000/ or http://localhost:4000. - -### Updating the Bundle - -The site depends on Github Pages and the installed bundle is based on the `github-pages` gem. -Occasionally that gem might get updated with new or changed functionality. If that is the case, -you can run: - -``` -bundle update -``` - -to get the latest packages for the installation. diff --git a/docs/TEMPLATE-INFORMATION.md b/docs/TEMPLATE-INFORMATION.md deleted file mode 100644 index 9175bc0c2..000000000 --- a/docs/TEMPLATE-INFORMATION.md +++ /dev/null @@ -1,17 +0,0 @@ -## Template Details - -First, go through `_config.yml` and adjust the available settings to your project's standard. When you make changes here, you'll have to kill the `jekyll serve` instance and restart it to see those changes, but that's only the case with the config file. - -Next, update some image assets - you'll want to update `favicon.png`, `logo.svg`, and `og_image.png` (used for Like button stories and Shares on Facbeook) in the `static` folder with your own logos. - -Next, if you're going to have docs on your site, keep the `_docs` and `docs` folders, if not, you can safely remove them (or you can safely leave them and not include them in your navigation - Jekyll renders all of this before a client views the site anyway, so there's no performance hit from just leaving it there for a future expansion). - -Same thing with a blog section, either keep or delete the `_posts` and `blog` folders. - -You can customize your homepage in three parts - the first in the homepage header, which is mostly automatically derived from the elements you insert into your config file. However, you can also specify a series of 'promotional' elements in `_data/promo.yml`. You can read that file for more information. - -The second place for your homepage is in `index.md` which contains the bulk of the main content below the header. This is all markdown if you want, but you can use HTML and Jekyll's template tags (called Liquid) in there too. Checkout this folder's index.md for an example of one common template tag that we use on our sites called gridblocks. - -The third and last place is in the `_data/powered_by.yml` and `_data/powered_by_highlight.yml` files. Both these files combine to create a section on the homepage that is intended to show a list of companies or apps that are using your project. The `powered_by_highlight` file is a list of curated companies/apps that you want to show as a highlight at the top of this section, including their logos in whatever format you want. The `powered_by` file is a more open list that is just text links to the companies/apps and can be updated via Pull Request by the community. If you don't want these sections on your homepage, just empty out both files and leave them blank. - -The last thing you'll want to do is setup your top level navigation bar. You can do this by editing `nav.yml` and keeping the existing title/href/category structure used there. Although the nav is responsive and fairly flexible design-wise, no more than 5 or 6 nav items is recommended. diff --git a/docs/_config.yml b/docs/_config.yml deleted file mode 100644 index a4055fd1f..000000000 --- a/docs/_config.yml +++ /dev/null @@ -1,85 +0,0 @@ -# Site settings -permalink: /blog/:year/:month/:day/:title.html -title: RocksDB -tagline: A persistent key-value store for fast storage environments -description: > - RocksDB is an embeddable persistent key-value store for fast storage. -fbappid: "1615782811974223" -gacode: "UA-49459723-1" -# baseurl determines the subpath of your site. For example if you're using an -# organisation.github.io/reponame/ basic site URL, then baseurl would be set -# as "/reponame" but leave blank if you have a top-level domain URL as it is -# now set to "" by default as discussed in: -# http://jekyllrb.com/news/2016/10/06/jekyll-3-3-is-here/ -baseurl: "" - -# the base hostname & protocol for your site -# If baseurl is set, then the absolute url for your site would be url/baseurl -# This was also be set to the right thing automatically for local development -# https://github.com/blog/2277-what-s-new-in-github-pages-with-jekyll-3-3 -# http://jekyllrb.com/news/2016/10/06/jekyll-3-3-is-here/ -url: "http://rocksdb.org" - -# Note: There are new filters in Jekyll 3.3 to help with absolute and relative urls -# absolute_url -# relative_url -# So you will see these used throughout the Jekyll code in this template. -# no more need for | prepend: site.url | prepend: site.baseurl -# http://jekyllrb.com/news/2016/10/06/jekyll-3-3-is-here/ -#https://github.com/blog/2277-what-s-new-in-github-pages-with-jekyll-3-3 - -# The GitHub repo for your project -ghrepo: "facebook/rocksdb" - -# Use these color settings to determine your colour scheme for the site. -color: - # primary should be a vivid color that reflects the project's brand - primary: "#2a2a2a" - # secondary should be a subtle light or dark color used on page backgrounds - secondary: "#f9f9f9" - # Use the following to specify whether the previous two colours are 'light' - # or 'dark' and therefore what colors can be overlaid on them - primary-overlay: "dark" - secondary-overlay: "light" - -#Uncomment this if you want to enable Algolia doc search with your own values -#searchconfig: -# apikey: "" -# indexname: "" - -# Blog posts are builtin to Jekyll by default, with the `_posts` directory. -# Here you can specify other types of documentation. The names here are `docs` -# and `top-level`. This means their content will be in `_docs` and `_top-level`. -# The permalink format is also given. -# http://ben.balter.com/2015/02/20/jekyll-collections/ -collections: - docs: - output: true - permalink: /docs/:name/ - top-level: - output: true - permalink: :name.html - -# DO NOT ADJUST BELOW THIS LINE UNLESS YOU KNOW WHAT YOU ARE CHANGING - -markdown: kramdown -kramdown: - input: GFM - syntax_highlighter: rouge - - syntax_highlighter_opts: - css_class: 'rougeHighlight' - span: - line_numbers: false - block: - line_numbers: true - start_line: 1 - -sass: - style: :compressed - -redcarpet: - extensions: [with_toc_data] - -plugins: - - jekyll-redirect-from diff --git a/docs/_data/authors.yml b/docs/_data/authors.yml deleted file mode 100644 index 210987c0b..000000000 --- a/docs/_data/authors.yml +++ /dev/null @@ -1,81 +0,0 @@ -icanadi: - full_name: Igor Canadi - fbid: 706165749 - -xjin: - full_name: Xing Jin - fbid: 100000739847320 - -leijin: - full_name: Lei Jin - fbid: 634570164 - -yhciang: - full_name: Yueh-Hsuan Chiang - fbid: 1619020986 - -radheshyam: - full_name: Radheshyam Balasundaram - fbid: 800837305 - -zagfox: - full_name: Feng Zhu - fbid: 100006493823622 - -lgalanis: - full_name: Leonidas Galanis - fbid: 8649950 - -sdong: - full_name: Siying Dong - fbid: 9805119 - -dmitrism: - full_name: Dmitri Smirnov - -rven2: - full_name: Venkatesh Radhakrishnan - fbid: 100008352697325 - -yiwu: - full_name: Yi Wu - fbid: 100000476362039 - -maysamyabandeh: - full_name: Maysam Yabandeh - fbid: 100003482360101 - -IslamAbdelRahman: - full_name: Islam AbdelRahman - fbid: 642759407 - -ajkr: - full_name: Andrew Kryczka - fbid: 568694102 - -abhimadan: - full_name: Abhishek Madan - fbid: 1850247869 - -sagar0: - full_name: Sagar Vemuri - fbid: 2419111 - -lightmark: - full_name: Aaron Gao - fbid: 1351549072 - -fgwu: - full_name: Fenggang Wu - fbid: 100002297362180 - -ltamasi: - full_name: Levi Tamasi - -cbi42: - full_name: Changyu Bi - fbid: 100078474793041 - -zjay: - full_name: Jay Zhuang - fbid: 100032386042884 diff --git a/docs/_data/features.yml b/docs/_data/features.yml deleted file mode 100644 index d692c1849..000000000 --- a/docs/_data/features.yml +++ /dev/null @@ -1,19 +0,0 @@ -- title: High Performance - text: | - RocksDB uses a log structured database engine, written entirely in C++, for maximum performance. Keys and values are just arbitrarily-sized byte streams. - image: images/promo-performance.svg - -- title: Optimized for Fast Storage - text: | - RocksDB is optimized for fast, low latency storage such as flash drives and high-speed disk drives. RocksDB exploits the full potential of high read/write rates offered by flash or RAM. - image: images/promo-flash.svg - -- title: Adaptable - text: | - RocksDB is adaptable to different workloads. From database storage engines such as [MyRocks](https://github.com/facebook/mysql-5.6) to [application data caching](http://techblog.netflix.com/2016/05/application-data-caching-using-ssds.html) to embedded workloads, RocksDB can be used for a variety of data needs. - image: images/promo-adapt.svg - -- title: Basic and Advanced Database Operations - text: | - RocksDB provides basic operations such as opening and closing a database, reading and writing to more advanced operations such as merging and compaction filters. - image: images/promo-operations.svg diff --git a/docs/_data/nav.yml b/docs/_data/nav.yml deleted file mode 100644 index b70c65ff7..000000000 --- a/docs/_data/nav.yml +++ /dev/null @@ -1,30 +0,0 @@ -- title: Docs - href: /docs/ - category: docs - -- title: GitHub - href: https://github.com/facebook/rocksdb/ - category: external - -- title: API (C++) - href: https://github.com/facebook/rocksdb/tree/main/include/rocksdb - category: external - -- title: API (Java) - href: https://github.com/facebook/rocksdb/tree/main/java/src/main/java/org/rocksdb - category: external - -- title: Support - href: /support.html - category: support - -- title: Blog - href: /blog/ - category: blog - -- title: Facebook - href: https://www.facebook.com/groups/rocksdb.dev/ - category: external - -# Use external for external links not associated with the paths of the current site. -# If a category is external, site urls, for example, are not prepended to the href, etc.. diff --git a/docs/_data/nav_docs.yml b/docs/_data/nav_docs.yml deleted file mode 100644 index 8cdfd2d04..000000000 --- a/docs/_data/nav_docs.yml +++ /dev/null @@ -1,3 +0,0 @@ -- title: Quick Start - items: - - id: getting-started diff --git a/docs/_data/powered_by.yml b/docs/_data/powered_by.yml deleted file mode 100644 index a780cfe40..000000000 --- a/docs/_data/powered_by.yml +++ /dev/null @@ -1 +0,0 @@ -# Fill in later if desired diff --git a/docs/_data/powered_by_highlight.yml b/docs/_data/powered_by_highlight.yml deleted file mode 100644 index a780cfe40..000000000 --- a/docs/_data/powered_by_highlight.yml +++ /dev/null @@ -1 +0,0 @@ -# Fill in later if desired diff --git a/docs/_data/promo.yml b/docs/_data/promo.yml deleted file mode 100644 index 9a72aa844..000000000 --- a/docs/_data/promo.yml +++ /dev/null @@ -1,6 +0,0 @@ -# This file determines the list of promotional elements added to the header of \ -# your site's homepage. Full list of plugins are shown - -- type: button - href: docs/getting-started.html - text: Get Started diff --git a/docs/_docs/faq.md b/docs/_docs/faq.md deleted file mode 100644 index 0887a0987..000000000 --- a/docs/_docs/faq.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -docid: support-faq -title: FAQ -layout: docs -permalink: /docs/support/faq.html ---- - -Here is an ever-growing list of frequently asked questions around RocksDB - -## What is RocksDB? - -RocksDB is an embeddable persistent key-value store for fast storage. RocksDB can also be the foundation for a client-server database but our current focus is on embedded workloads. - -RocksDB builds on [LevelDB](https://code.google.com/p/leveldb/) to be scalable to run on servers with many CPU cores, to efficiently use fast storage, to support IO-bound, in-memory and write-once workloads, and to be flexible to allow for innovation. - -For the latest details, watch [Mark Callaghan’s and Igor Canadi’s talk at CMU on 10/2015](https://scs.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=f4e0eb37-ae18-468f-9248-cb73edad3e56). [Dhruba Borthakur’s introductory talk](https://github.com/facebook/rocksdb/blob/gh-pages-old/intro.pdf?raw=true) from the Data @ Scale 2013 conference provides some perspective about how RocksDB has evolved. - -## How does performance compare? - -We benchmarked LevelDB and found that it was unsuitable for our server workloads. The [benchmark results](http://leveldb.googlecode.com/svn/trunk/doc/benchmark.html) look awesome at first sight, but we quickly realized that those results were for a database whose size was smaller than the size of RAM on the test machine – where the entire database could fit in the OS page cache. When we performed the same benchmarks on a database that was at least 5 times larger than main memory, the performance results were dismal. - -By contrast, we’ve published the [RocksDB benchmark results](https://github.com/facebook/rocksdb/wiki/Performance-Benchmarks) for server side workloads on Flash. We also measured the performance of LevelDB on these server-workload benchmarks and found that RocksDB solidly outperforms LevelDB for these IO bound workloads. We found that LevelDB’s single-threaded compaction process was insufficient to drive server workloads. We saw frequent write-stalls with LevelDB that caused 99-percentile latency to be tremendously large. We found that mmap-ing a file into the OS cache introduced performance bottlenecks for reads. We could not make LevelDB consume all the IOs offered by the underlying Flash storage. - -## What is RocksDB suitable for? - -RocksDB can be used by applications that need low latency database accesses. Possibilities include: - -* A user-facing application that stores the viewing history and state of users of a website. -* A spam detection application that needs fast access to big data sets. -* A graph-search query that needs to scan a data set in realtime. -* A cache data from Hadoop, thereby allowing applications to query Hadoop data in realtime. -* A message-queue that supports a high number of inserts and deletes. - -## How big is RocksDB adoption? - -RocksDB is an embedded storage engine that is used in a number of backend systems at Facebook. In the Facebook newsfeed’s backend, it replaced another internal storage engine called Centrifuge and is one of the many components used. ZippyDB, a distributed key value store service used by Facebook products relies RocksDB. Details on ZippyDB are in [Muthu Annamalai’s talk at Data@Scale in Seattle](https://youtu.be/DfiN7pG0D0k). Dragon, a distributed graph query engine part of the social graph infrastructure, is using RocksDB to store data. Parse has been running [MongoDB on RocksDB in production](http://blog.parse.com/announcements/mongodb-rocksdb-parse/) since early 2015. - -RocksDB is proving to be a useful component for a lot of other groups in the industry. For a list of projects currently using RocksDB, take a look at our USERS.md list on github. - -## How good is RocksDB as a database storage engine? - -Our engineering team at Facebook firmly believes that RocksDB has great potential as storage engine for databases. It has been proven in production with MongoDB: [MongoRocks](https://github.com/mongodb-partners/mongo-rocks) is the RocksDB based storage engine for MongoDB. - -[MyRocks](https://code.facebook.com/posts/190251048047090/myrocks-a-space-and-write-optimized-mysql-database/) is the RocksDB based storage engine for MySQL. Using RocksDB we have managed to achieve 2x better compression and 10x less write amplification for our benchmarks compared to our existing MySQL setup. Given our current results, work is currently underway to develop MyRocks into a production ready solution for web-scale MySQL workloads. Follow along on [GitHub](https://github.com/facebook/mysql-5.6)! - -## Why is RocksDB open sourced? - -We are open sourcing this project on [GitHub](http://github.com/facebook/rocksdb) because we think it will be useful beyond Facebook. We are hoping that software programmers and database developers will use, enhance, and customize RocksDB for their use-cases. We would also like to engage with the academic community on topics related to efficiency for modern database algorithms. diff --git a/docs/_docs/getting-started.md b/docs/_docs/getting-started.md deleted file mode 100644 index efd17c031..000000000 --- a/docs/_docs/getting-started.md +++ /dev/null @@ -1,78 +0,0 @@ ---- -docid: getting-started -title: Getting started -layout: docs -permalink: /docs/getting-started.html ---- - -## Overview - -The RocksDB library provides a persistent key value store. Keys and values are arbitrary byte arrays. The keys are ordered within the key value store according to a user-specified comparator function. - -The library is maintained by the Facebook Database Engineering Team, and is based on [LevelDB](https://github.com/google/leveldb), by Sanjay Ghemawat and Jeff Dean at Google. - -This overview gives some simple examples of how RocksDB is used. For the story of why RocksDB was created in the first place, see [Dhruba Borthakur’s introductory talk](https://github.com/facebook/rocksdb/blob/gh-pages-old/intro.pdf?raw=true) from the Data @ Scale 2013 conference. - -## Opening A Database - -A rocksdb database has a name which corresponds to a file system directory. All of the contents of database are stored in this directory. The following example shows how to open a database, creating it if necessary: - -```c++ -#include -#include "rocksdb/db.h" - -rocksdb::DB* db; -rocksdb::Options options; -options.create_if_missing = true; -rocksdb::Status status = - rocksdb::DB::Open(options, "/tmp/testdb", &db); -assert(status.ok()); -... -``` - -If you want to raise an error if the database already exists, add the following line before the rocksdb::DB::Open call: - -```c++ -options.error_if_exists = true; -``` - -## Status - -You may have noticed the `rocksdb::Status` type above. Values of this type are returned by most functions in RocksDB that may encounter -an error. You can check if such a result is ok, and also print an associated error message: - -```c++ -rocksdb::Status s = ...; -if (!s.ok()) cerr << s.ToString() << endl; -``` - -## Closing A Database - -When you are done with a database, just delete the database object. For example: - -```c++ -/* open the db as described above */ -/* do something with db */ -delete db; -``` - -## Reads And Writes - -The database provides Put, Delete, and Get methods to modify/query the database. For example, the following code moves the value stored under `key1` to `key2`. - -```c++ -std::string value; -rocksdb::Status s = db->Get(rocksdb::ReadOptions(), key1, &value); -if (s.ok()) s = db->Put(rocksdb::WriteOptions(), key2, value); -if (s.ok()) s = db->Delete(rocksdb::WriteOptions(), key1); -``` - -## Further documentation - -These are just simple examples of how RocksDB is used. The full documentation is currently on the [GitHub wiki](https://github.com/facebook/rocksdb/wiki). - -Here are some specific details about the RocksDB implementation: - -- [RocksDB Overview](https://github.com/facebook/rocksdb/wiki/RocksDB-Overview) -- [Immutable BlockBased Table file format](https://github.com/facebook/rocksdb/wiki/Rocksdb-BlockBasedTable-Format) -- [Log file format](https://github.com/facebook/rocksdb/wiki/Write-Ahead-Log-File-Format) diff --git a/docs/_includes/blog_pagination.html b/docs/_includes/blog_pagination.html deleted file mode 100644 index 6a1f33436..000000000 --- a/docs/_includes/blog_pagination.html +++ /dev/null @@ -1,28 +0,0 @@ - -{% if paginator.total_pages > 1 %} -
- -
-{% endif %} diff --git a/docs/_includes/content/gridblocks.html b/docs/_includes/content/gridblocks.html deleted file mode 100644 index 49c5e5917..000000000 --- a/docs/_includes/content/gridblocks.html +++ /dev/null @@ -1,5 +0,0 @@ -
-{% for item in {{include.data_source}} %} - {% include content/items/gridblock.html item=item layout=include.layout imagealign=include.imagealign align=include.align %} -{% endfor %} -
\ No newline at end of file diff --git a/docs/_includes/content/items/gridblock.html b/docs/_includes/content/items/gridblock.html deleted file mode 100644 index 58c9e7fda..000000000 --- a/docs/_includes/content/items/gridblock.html +++ /dev/null @@ -1,37 +0,0 @@ -{% if include.layout == "fourColumn" %} - {% assign layout = "fourByGridBlock" %} -{% else %} - {% assign layout = "twoByGridBlock" %} -{% endif %} - -{% if include.imagealign == "side" %} - {% assign imagealign = "imageAlignSide" %} -{% else %} - {% if item.image %} - {% assign imagealign = "imageAlignTop" %} - {% else %} - {% assign imagealign = "" %} - {% endif %} -{% endif %} - -{% if include.align == "right" %} - {% assign align = "alignRight" %} -{% elsif include.align == "center" %} - {% assign align = "alignCenter" %} -{% else %} - {% assign align = "alignLeft" %} -{% endif %} - -
- {% if item.image %} -
- {{ item.title }} -
- {% endif %} -
-

{{ item.title }}

- {% if item.text %} - {{ item.text | markdownify }} - {% endif %} -
-
diff --git a/docs/_includes/doc.html b/docs/_includes/doc.html deleted file mode 100644 index 31e365ffe..000000000 --- a/docs/_includes/doc.html +++ /dev/null @@ -1,25 +0,0 @@ -
-
-

{% if include.truncate %}{{ page.title }}{% else %}{{ page.title }}{% endif %}

-
- -
- {% if include.truncate %} - {% if page.content contains '' %} - {{ page.content | split:'' | first }} - - {% else %} - {{ page.content }} - {% endif %} - {% else %} - {{ content }} - -

Edit on GitHub

- {% endif %} -
- {% include doc_paging.html %} -
diff --git a/docs/_includes/doc_paging.html b/docs/_includes/doc_paging.html deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/_includes/footer.html b/docs/_includes/footer.html deleted file mode 100644 index f560172d1..000000000 --- a/docs/_includes/footer.html +++ /dev/null @@ -1,34 +0,0 @@ -
- -
- diff --git a/docs/_includes/head.html b/docs/_includes/head.html deleted file mode 100644 index 10845ec1d..000000000 --- a/docs/_includes/head.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - - - - - - - - {% if site.searchconfig %} - - {% endif %} - - {% if page.title %}{{ page.title }} | {{ site.title }}{% else %}{{ site.title }}{% endif %} - - - - - diff --git a/docs/_includes/header.html b/docs/_includes/header.html deleted file mode 100644 index 8108d222b..000000000 --- a/docs/_includes/header.html +++ /dev/null @@ -1,19 +0,0 @@ -
-
-
- -

{{ site.title }}

-

{{ site.tagline }}

- -
-

{% if page.excerpt %}{{ page.excerpt | strip_html }}{% else %}{{ site.description }}{% endif %}

-
-
- {% for promo in site.data.promo %} - {% include plugins/{{promo.type}}.html button_href=promo.href button_text=promo.text %} -
- {% endfor %} -
-
-
-
diff --git a/docs/_includes/hero.html b/docs/_includes/hero.html deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/_includes/home_header.html b/docs/_includes/home_header.html deleted file mode 100644 index 90880d17c..000000000 --- a/docs/_includes/home_header.html +++ /dev/null @@ -1,22 +0,0 @@ -
-
-
-
-

{{ site.tagline }}

-
-

{% if page.excerpt %}{{ page.excerpt | strip_html }}{% else %}{{ site.description }}{% endif %}

-
-
- {% for promo in site.data.promo %} -
- {% include plugins/{{promo.type}}.html href=promo.href text=promo.text children=promo.children %} -
- {% endfor %} -
-
- -
-
-
diff --git a/docs/_includes/katex_import.html b/docs/_includes/katex_import.html deleted file mode 100644 index 6d6b7cf44..000000000 --- a/docs/_includes/katex_import.html +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/docs/_includes/katex_render.html b/docs/_includes/katex_render.html deleted file mode 100644 index 56e2e8974..000000000 --- a/docs/_includes/katex_render.html +++ /dev/null @@ -1,210 +0,0 @@ - diff --git a/docs/_includes/nav.html b/docs/_includes/nav.html deleted file mode 100644 index 9c6fed06b..000000000 --- a/docs/_includes/nav.html +++ /dev/null @@ -1,37 +0,0 @@ -
-
-
- - -

{{ site.title }}

-
- - - -
-
-
diff --git a/docs/_includes/nav/collection_nav.html b/docs/_includes/nav/collection_nav.html deleted file mode 100644 index a3c7a2dd3..000000000 --- a/docs/_includes/nav/collection_nav.html +++ /dev/null @@ -1,64 +0,0 @@ -
- -
- diff --git a/docs/_includes/nav/collection_nav_group.html b/docs/_includes/nav/collection_nav_group.html deleted file mode 100644 index b236ac5e3..000000000 --- a/docs/_includes/nav/collection_nav_group.html +++ /dev/null @@ -1,19 +0,0 @@ - \ No newline at end of file diff --git a/docs/_includes/nav/collection_nav_group_item.html b/docs/_includes/nav/collection_nav_group_item.html deleted file mode 100644 index fbb063deb..000000000 --- a/docs/_includes/nav/collection_nav_group_item.html +++ /dev/null @@ -1 +0,0 @@ - diff --git a/docs/_includes/nav/header_nav.html b/docs/_includes/nav/header_nav.html deleted file mode 100644 index 0fe945cdc..000000000 --- a/docs/_includes/nav/header_nav.html +++ /dev/null @@ -1,30 +0,0 @@ -
- - -
- \ No newline at end of file diff --git a/docs/_includes/nav_search.html b/docs/_includes/nav_search.html deleted file mode 100644 index 84956b9f7..000000000 --- a/docs/_includes/nav_search.html +++ /dev/null @@ -1,15 +0,0 @@ - - - \ No newline at end of file diff --git a/docs/_includes/plugins/all_share.html b/docs/_includes/plugins/all_share.html deleted file mode 100644 index 59b00d615..000000000 --- a/docs/_includes/plugins/all_share.html +++ /dev/null @@ -1,3 +0,0 @@ -
- {% include plugins/like_button.html %}{% include plugins/twitter_share.html %}{% include plugins/google_share.html %} -
\ No newline at end of file diff --git a/docs/_includes/plugins/ascii_cinema.html b/docs/_includes/plugins/ascii_cinema.html deleted file mode 100644 index 7d3f97148..000000000 --- a/docs/_includes/plugins/ascii_cinema.html +++ /dev/null @@ -1,2 +0,0 @@ -
- \ No newline at end of file diff --git a/docs/_includes/plugins/button.html b/docs/_includes/plugins/button.html deleted file mode 100644 index 9e499fe3f..000000000 --- a/docs/_includes/plugins/button.html +++ /dev/null @@ -1,6 +0,0 @@ - \ No newline at end of file diff --git a/docs/_includes/plugins/github_star.html b/docs/_includes/plugins/github_star.html deleted file mode 100644 index 6aea70fc7..000000000 --- a/docs/_includes/plugins/github_star.html +++ /dev/null @@ -1,4 +0,0 @@ -
- Star -
- \ No newline at end of file diff --git a/docs/_includes/plugins/github_watch.html b/docs/_includes/plugins/github_watch.html deleted file mode 100644 index 64233b57b..000000000 --- a/docs/_includes/plugins/github_watch.html +++ /dev/null @@ -1,4 +0,0 @@ -
- Watch -
- \ No newline at end of file diff --git a/docs/_includes/plugins/google_share.html b/docs/_includes/plugins/google_share.html deleted file mode 100644 index 1b557db86..000000000 --- a/docs/_includes/plugins/google_share.html +++ /dev/null @@ -1,5 +0,0 @@ -
-
-
- - diff --git a/docs/_includes/plugins/iframe.html b/docs/_includes/plugins/iframe.html deleted file mode 100644 index 525b59f22..000000000 --- a/docs/_includes/plugins/iframe.html +++ /dev/null @@ -1,6 +0,0 @@ -
- -
-
- {% include plugins/button.html href=include.href text=include.text %} -
\ No newline at end of file diff --git a/docs/_includes/plugins/like_button.html b/docs/_includes/plugins/like_button.html deleted file mode 100644 index bcb8a7bee..000000000 --- a/docs/_includes/plugins/like_button.html +++ /dev/null @@ -1,18 +0,0 @@ -
- \ No newline at end of file diff --git a/docs/_includes/plugins/plugin_row.html b/docs/_includes/plugins/plugin_row.html deleted file mode 100644 index 800f50b82..000000000 --- a/docs/_includes/plugins/plugin_row.html +++ /dev/null @@ -1,5 +0,0 @@ -
-{% for child in include.children %} - {% include plugins/{{child.type}}.html href=child.href text=child.text %} -{% endfor %} -
\ No newline at end of file diff --git a/docs/_includes/plugins/post_social_plugins.html b/docs/_includes/plugins/post_social_plugins.html deleted file mode 100644 index a2ecb90ee..000000000 --- a/docs/_includes/plugins/post_social_plugins.html +++ /dev/null @@ -1,41 +0,0 @@ -
- -
-
- - - diff --git a/docs/_includes/plugins/slideshow.html b/docs/_includes/plugins/slideshow.html deleted file mode 100644 index 69fa2b300..000000000 --- a/docs/_includes/plugins/slideshow.html +++ /dev/null @@ -1,88 +0,0 @@ -
- - - \ No newline at end of file diff --git a/docs/_includes/plugins/twitter_follow.html b/docs/_includes/plugins/twitter_follow.html deleted file mode 100644 index b0f25dc60..000000000 --- a/docs/_includes/plugins/twitter_follow.html +++ /dev/null @@ -1,12 +0,0 @@ - - - diff --git a/docs/_includes/plugins/twitter_share.html b/docs/_includes/plugins/twitter_share.html deleted file mode 100644 index a60f2a8df..000000000 --- a/docs/_includes/plugins/twitter_share.html +++ /dev/null @@ -1,11 +0,0 @@ -
- -
- diff --git a/docs/_includes/post.html b/docs/_includes/post.html deleted file mode 100644 index 3ae0a2a80..000000000 --- a/docs/_includes/post.html +++ /dev/null @@ -1,40 +0,0 @@ -
-
-
- {% for author_idx in page.author %} -
- {% assign author = site.data.authors[author_idx] %} - {% if author.fbid %} -
- {{ author.fullname }} -
- {% endif %} - {% if author.full_name %} - - {% endif %} -
- {% endfor %} -
-

{% if include.truncate %}{{ page.title }}{% else %}{{ page.title }}{% endif %}

- -
-
- {% if include.truncate %} - {% if page.content contains '' %} - {{ page.content | split:'' | first | markdownify }} - - {% else %} - {{ page.content | markdownify }} - {% endif %} - {% else %} - {{ content }} - {% endif %} - {% unless include.truncate %} - {% include plugins/like_button.html %} - {% endunless %} -
-
diff --git a/docs/_includes/powered_by.html b/docs/_includes/powered_by.html deleted file mode 100644 index c629429cd..000000000 --- a/docs/_includes/powered_by.html +++ /dev/null @@ -1,28 +0,0 @@ -{% if site.data.powered_by.first.items or site.data.powered_by_highlight.first.items %} -
-
- {% if site.data.powered_by_highlight.first.title %} -

{{ site.data.powered_by_highlight.first.title }}

- {% else %} -

{{ site.data.powered_by.first.title }}

- {% endif %} - {% if site.data.powered_by_highlight.first.items %} -
- {% for item in site.data.powered_by_highlight.first.items %} -
- {{ item.name }} -
- {% endfor %} -
- {% endif %} -
- {% for item in site.data.powered_by.first.items %} - - {% endfor %} -
-
Does your app use {{ site.title }}? Add it to this list with a pull request!
-
-
-{% endif %} diff --git a/docs/_includes/social_plugins.html b/docs/_includes/social_plugins.html deleted file mode 100644 index 9b36580dc..000000000 --- a/docs/_includes/social_plugins.html +++ /dev/null @@ -1,31 +0,0 @@ - -
- -
- - - diff --git a/docs/_includes/ui/button.html b/docs/_includes/ui/button.html deleted file mode 100644 index 729ccc33b..000000000 --- a/docs/_includes/ui/button.html +++ /dev/null @@ -1 +0,0 @@ -{{ include.button_text }} \ No newline at end of file diff --git a/docs/_layouts/basic.html b/docs/_layouts/basic.html deleted file mode 100644 index 65bd21060..000000000 --- a/docs/_layouts/basic.html +++ /dev/null @@ -1,12 +0,0 @@ ---- -layout: doc_default ---- - -
-
-
- {{ content }} -
-
-
- diff --git a/docs/_layouts/blog.html b/docs/_layouts/blog.html deleted file mode 100644 index 1b0da4135..000000000 --- a/docs/_layouts/blog.html +++ /dev/null @@ -1,11 +0,0 @@ ---- -category: blog -layout: blog_default ---- - -
-
- {{ content }} -
-
- diff --git a/docs/_layouts/blog_default.html b/docs/_layouts/blog_default.html deleted file mode 100644 index a29d58d3d..000000000 --- a/docs/_layouts/blog_default.html +++ /dev/null @@ -1,14 +0,0 @@ - - - {% include head.html %} - - {% include nav.html alwayson=true %} - - - diff --git a/docs/_layouts/default.html b/docs/_layouts/default.html deleted file mode 100644 index 0167d9fd9..000000000 --- a/docs/_layouts/default.html +++ /dev/null @@ -1,12 +0,0 @@ - - - {% include head.html %} - - {% include nav.html alwayson=true %} - - - - diff --git a/docs/_layouts/doc_default.html b/docs/_layouts/doc_default.html deleted file mode 100644 index 4a4139247..000000000 --- a/docs/_layouts/doc_default.html +++ /dev/null @@ -1,14 +0,0 @@ - - - {% include head.html %} - - {% include nav.html alwayson=true %} - - - diff --git a/docs/_layouts/doc_page.html b/docs/_layouts/doc_page.html deleted file mode 100644 index dba761e7d..000000000 --- a/docs/_layouts/doc_page.html +++ /dev/null @@ -1,10 +0,0 @@ ---- -layout: doc_default ---- - -
-
- {{ content }} -
-
- diff --git a/docs/_layouts/docs.html b/docs/_layouts/docs.html deleted file mode 100644 index 749dafabb..000000000 --- a/docs/_layouts/docs.html +++ /dev/null @@ -1,5 +0,0 @@ ---- -layout: doc_page ---- - -{% include doc.html %} \ No newline at end of file diff --git a/docs/_layouts/home.html b/docs/_layouts/home.html deleted file mode 100644 index b17732fa1..000000000 --- a/docs/_layouts/home.html +++ /dev/null @@ -1,26 +0,0 @@ - - - {% include head.html %} -
-
- Support Ukraine 🇺🇦 - - Help Provide Humanitarian Aid to Ukraine - - . -
-
- - {% include nav.html alwayson=true %} - - - diff --git a/docs/_layouts/page.html b/docs/_layouts/page.html deleted file mode 100644 index bec36805b..000000000 --- a/docs/_layouts/page.html +++ /dev/null @@ -1,3 +0,0 @@ ---- -layout: blog ---- diff --git a/docs/_layouts/plain.html b/docs/_layouts/plain.html deleted file mode 100644 index fccc02ce1..000000000 --- a/docs/_layouts/plain.html +++ /dev/null @@ -1,10 +0,0 @@ ---- -layout: default ---- - -
-
- {{ content }} -
-
- diff --git a/docs/_layouts/post.html b/docs/_layouts/post.html deleted file mode 100644 index 4c92cf214..000000000 --- a/docs/_layouts/post.html +++ /dev/null @@ -1,8 +0,0 @@ ---- -collection: blog -layout: blog ---- - -
-{% include post.html %} -
\ No newline at end of file diff --git a/docs/_layouts/redirect.html b/docs/_layouts/redirect.html deleted file mode 100644 index c24f81748..000000000 --- a/docs/_layouts/redirect.html +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - diff --git a/docs/_layouts/top-level.html b/docs/_layouts/top-level.html deleted file mode 100644 index fccc02ce1..000000000 --- a/docs/_layouts/top-level.html +++ /dev/null @@ -1,10 +0,0 @@ ---- -layout: default ---- - -
-
- {{ content }} -
-
- diff --git a/docs/_posts/2014-03-27-how-to-backup-rocksdb.markdown b/docs/_posts/2014-03-27-how-to-backup-rocksdb.markdown deleted file mode 100644 index f9e4a5444..000000000 --- a/docs/_posts/2014-03-27-how-to-backup-rocksdb.markdown +++ /dev/null @@ -1,135 +0,0 @@ ---- -title: How to backup RocksDB? -layout: post -author: icanadi -category: blog -redirect_from: - - /blog/191/how-to-backup-rocksdb/ ---- - -In RocksDB, we have implemented an easy way to backup your DB. Here is a simple example: - - - - #include "rocksdb/db.h" - #include "utilities/backupable_db.h" - using namespace rocksdb; - - DB* db; - DB::Open(Options(), "/tmp/rocksdb", &db); - BackupableDB* backupable_db = new BackupableDB(db, BackupableDBOptions("/tmp/rocksdb_backup")); - backupable_db->Put(...); // do your thing - backupable_db->CreateNewBackup(); - delete backupable_db; // no need to also delete db - - - - -This simple example will create a backup of your DB in "/tmp/rocksdb_backup". Creating new BackupableDB consumes DB* and you should be calling all the DB methods on object `backupable_db` going forward. - -Restoring is also easy: - - - - RestoreBackupableDB* restore = new RestoreBackupableDB(Env::Default(), BackupableDBOptions("/tmp/rocksdb_backup")); - restore->RestoreDBFromLatestBackup("/tmp/rocksdb", "/tmp/rocksdb"); - delete restore; - - - - -This code will restore the backup back to "/tmp/rocksdb". The second parameter is the location of log files (In some DBs they are different from DB directory, but usually they are the same. See Options::wal_dir for more info). - -An alternative API for backups is to use BackupEngine directly: - - - - #include "rocksdb/db.h" - #include "utilities/backupable_db.h" - using namespace rocksdb; - - DB* db; - DB::Open(Options(), "/tmp/rocksdb", &db); - db->Put(...); // do your thing - BackupEngine* backup_engine = BackupEngine::NewBackupEngine(Env::Default(), BackupableDBOptions("/tmp/rocksdb_backup")); - backup_engine->CreateNewBackup(db); - delete db; - delete backup_engine; - - - - -Restoring with BackupEngine is similar to RestoreBackupableDB: - - - - BackupEngine* backup_engine = BackupEngine::NewBackupEngine(Env::Default(), BackupableDBOptions("/tmp/rocksdb_backup")); - backup_engine->RestoreDBFromLatestBackup("/tmp/rocksdb", "/tmp/rocksdb"); - delete backup_engine; - - - - -Backups are incremental. You can create a new backup with `CreateNewBackup()` and only the new data will be copied to backup directory (for more details on what gets copied, see "Under the hood"). Checksum is always calculated for any backuped file (including sst, log, and etc). It is used to make sure files are kept sound in the file system. Checksum is also verified for files from the previous backups even though they do not need to be copied. A checksum mismatch aborts the current backup (see "Under the hood" for more details). Once you have more backups saved, you can issue `GetBackupInfo()` call to get a list of all backups together with information on timestamp of the backup and the size (please note that sum of all backups' sizes is bigger than the actual size of the backup directory because some data is shared by multiple backups). Backups are identified by their always-increasing IDs. `GetBackupInfo()` is available both in `BackupableDB` and `RestoreBackupableDB`. - -You probably want to keep around only small number of backups. To delete old backups, just call `PurgeOldBackups(N)`, where N is how many backups you'd like to keep. All backups except the N newest ones will be deleted. You can also choose to delete arbitrary backup with call `DeleteBackup(id)`. - -`RestoreDBFromLatestBackup()` will restore the DB from the latest consistent backup. An alternative is `RestoreDBFromBackup()` which takes a backup ID and restores that particular backup. Checksum is calculated for any restored file and compared against the one stored during the backup time. If a checksum mismatch is detected, the restore process is aborted and `Status::Corruption` is returned. Very important thing to note here: Let's say you have backups 1, 2, 3, 4. If you restore from backup 2 and start writing more data to your database, newly created backup will delete old backups 3 and 4 and create new backup 3 on top of 2. - - - -## Advanced usage - - -Let's say you want to backup your DB to HDFS. There is an option in `BackupableDBOptions` to set `backup_env`, which will be used for all file I/O related to backup dir (writes when backuping, reads when restoring). If you set it to HDFS Env, all the backups will be stored in HDFS. - -`BackupableDBOptions::info_log` is a Logger object that is used to print out LOG messages if not-nullptr. - -If `BackupableDBOptions::sync` is true, we will sync data to disk after every file write, guaranteeing that backups will be consistent after a reboot or if machine crashes. Setting it to false will speed things up a bit, but some (newer) backups might be inconsistent. In most cases, everything should be fine, though. - -If you set `BackupableDBOptions::destroy_old_data` to true, creating new `BackupableDB` will delete all the old backups in the backup directory. - -`BackupableDB::CreateNewBackup()` method takes a parameter `flush_before_backup`, which is false by default. When `flush_before_backup` is true, `BackupableDB` will first issue a memtable flush and only then copy the DB files to the backup directory. Doing so will prevent log files from being copied to the backup directory (since flush will delete them). If `flush_before_backup` is false, backup will not issue flush before starting the backup. In that case, the backup will also include log files corresponding to live memtables. Backup will be consistent with current state of the database regardless of `flush_before_backup` parameter. - - - -## Under the hood - - -`BackupableDB` implements `DB` interface and adds four methods to it: `CreateNewBackup()`, `GetBackupInfo()`, `PurgeOldBackups()`, `DeleteBackup()`. Any `DB` interface calls will get forwarded to underlying `DB` object. - -When you call `BackupableDB::CreateNewBackup()`, it does the following: - - - - - - 1. Disable file deletions - - - - 2. Get live files (this includes table files, current and manifest file). - - - - 3. Copy live files to the backup directory. Since table files are immutable and filenames unique, we don't copy a table file that is already present in the backup directory. For example, if there is a file `00050.sst` already backed up and `GetLiveFiles()` returns `00050.sst`, we will not copy that file to the backup directory. However, checksum is calculated for all files regardless if a file needs to be copied or not. If a file is already present, the calculated checksum is compared against previously calculated checksum to make sure nothing crazy happened between backups. If a mismatch is detected, backup is aborted and the system is restored back to the state before `BackupableDB::CreateNewBackup()` is called. One thing to note is that a backup abortion could mean a corruption from a file in backup directory or the corresponding live file in current DB. Both manifest and current files are copied, since they are not immutable. - - - - 4. If `flush_before_backup` was set to false, we also need to copy log files to the backup directory. We call `GetSortedWalFiles()` and copy all live files to the backup directory. - - - - 5. Enable file deletions - - - - -Backup IDs are always increasing and we have a file `LATEST_BACKUP` that contains the ID of the latest backup. If we crash in middle of backing up, on a restart we will detect that there are newer backup files than `LATEST_BACKUP` claims there are. In that case, we will delete any backup newer than `LATEST_BACKUP` and clean up all the files since some of the table files might be corrupted. Having corrupted table files in the backup directory is dangerous because of our deduplication strategy. - - - -## Further reading - - -For the API details, see `include/utilities/backupable_db.h`. For the implementation, see `utilities/backupable/backupable_db.cc`. diff --git a/docs/_posts/2014-03-27-how-to-persist-in-memory-rocksdb-database.markdown b/docs/_posts/2014-03-27-how-to-persist-in-memory-rocksdb-database.markdown deleted file mode 100644 index 89ffb2d97..000000000 --- a/docs/_posts/2014-03-27-how-to-persist-in-memory-rocksdb-database.markdown +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: How to persist in-memory RocksDB database? -layout: post -author: icanadi -category: blog -redirect_from: - - /blog/245/how-to-persist-in-memory-rocksdb-database/ ---- - -In recent months, we have focused on optimizing RocksDB for in-memory workloads. With growing RAM sizes and strict low-latency requirements, lots of applications decide to keep their entire data in memory. Running in-memory database with RocksDB is easy -- just mount your RocksDB directory on tmpfs or ramfs [1]. Even if the process crashes, RocksDB can recover all of your data from in-memory filesystem. However, what happens if the machine reboots? - - - -In this article we will explain how you can recover your in-memory RocksDB database even after a machine reboot. - -Every update to RocksDB is written to two places - one is an in-memory data structure called memtable and second is write-ahead log. Write-ahead log can be used to completely recover the data in memtable. By default, when we flush the memtable to table file, we also delete the current log, since we don't need it anymore for recovery (the data from the log is "persisted" in the table file -- we say that the log file is obsolete). However, if your table file is stored in in-memory file system, you may need the obsolete write-ahead log to recover the data after the machine reboots. Here's how you can do that. - -Options::wal_dir is the directory where RocksDB stores write-ahead log files. If you configure this directory to be on flash or disk, you will not lose current log file on machine reboot. -Options::WAL_ttl_seconds is the timeout when we delete the archived log files. If the timeout is non-zero, obsolete log files will be moved to `archive/` directory under Options::wal_dir. Those archived log files will only be deleted after the specified timeout. - -Let's assume Options::wal_dir is a directory on persistent storage and Options::WAL_ttl_seconds is set to one day. To fully recover the DB, we also need to backup the current snapshot of the database (containing table and metadata files) with a frequency of less than one day. RocksDB provides an utility that enables you to easily backup the snapshot of your database. You can learn more about it here: [How to backup RocksDB?](https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB%3F) - -You should configure the backup process to avoid backing up log files, since they are already stored in persistent storage. To do that, set BackupableDBOptions::backup_log_files to false. - -Restore process by default cleans up entire DB and WAL directory. Since we didn't include log files in the backup, we need to make sure that restoring the database doesn't delete log files in WAL directory. When restoring, configure RestoreOptions::keep_log_file to true. That option will also move any archived log files back to WAL directory, enabling RocksDB to replay all archived log files and rebuild the in-memory database state. - -To reiterate, here's what you have to do: - - - - - * Set DB directory to tmpfs or ramfs mounted drive - - - - * Set Options::wal_log to a directory on persistent storage - - - - * Set Options::WAL_ttl_seconds to T seconds - - - - * Backup RocksDB every T/2 seconds, with BackupableDBOptions::backup_log_files = false - - - - * When you lose data, restore from backup with RestoreOptions::keep_log_file = true - - - - - -[1] You might also want to consider using [PlainTable format](https://github.com/facebook/rocksdb/wiki/PlainTable-Format) for table files diff --git a/docs/_posts/2014-04-02-the-1st-rocksdb-local-meetup-held-on-march-27-2014.markdown b/docs/_posts/2014-04-02-the-1st-rocksdb-local-meetup-held-on-march-27-2014.markdown deleted file mode 100644 index 7ccbdbaad..000000000 --- a/docs/_posts/2014-04-02-the-1st-rocksdb-local-meetup-held-on-march-27-2014.markdown +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: The 1st RocksDB Local Meetup Held on March 27, 2014 -layout: post -author: xjin -category: blog -redirect_from: - - /blog/323/the-1st-rocksdb-local-meetup-held-on-march-27-2014/ ---- - -On Mar 27, 2014, RocksDB team @ Facebook held the 1st RocksDB local meetup in FB HQ (Menlo Park, California). We invited around 80 guests from 20+ local companies, including LinkedIn, Twitter, Dropbox, Square, Pinterest, MapR, Microsoft and IBM. Finally around 50 guests showed up, totaling around 60% show-up rate. - - - -[![Resize of 20140327_200754](/static/images/Resize-of-20140327_200754-300x225.jpg)](/static/images/Resize-of-20140327_200754-300x225.jpg) - -RocksDB team @ Facebook gave four talks about the latest progress and experience on RocksDB: - - - - - * [Supporting a 1PB In-Memory Workload](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Haobo-RocksDB-In-Memory.pdf) - - - - - * [Column Families in RocksDB](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Igor-Column-Families.pdf) - - - - - * ["Lockless" Get() in RocksDB?](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Lei-Lockless-Get.pdf) - - - - - * [Prefix Hashing in RocksDB](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Siying-Prefix-Hash.pdf) - - -A very interesting question asked by a massive number of guests is: does RocksDB plan to provide replication functionality? Obviously, many applications need a resilient and distributed storage solution, not just single-node storage. We are considering how to approach this issue. - -When will be the next meetup? We haven't decided yet. We will see whether the community is interested in it and how it can help RocksDB grow. - -If you have any questions or feedback for the meetup or RocksDB, please let us know in [our Facebook group](https://www.facebook.com/groups/rocksdb.dev/). - -### Comments - -**[Rajiv](geetasen@gmail.com)** - -Have any of these talks been recorded and if so will they be published? - -**[Igor Canadi](icanadi@fb.com)** - -Yes, I think we plan to publish them soon. diff --git a/docs/_posts/2014-04-07-rocksdb-2-8-release.markdown b/docs/_posts/2014-04-07-rocksdb-2-8-release.markdown deleted file mode 100644 index 7be7842a5..000000000 --- a/docs/_posts/2014-04-07-rocksdb-2-8-release.markdown +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: RocksDB 2.8 release -layout: post -author: icanadi -category: blog -redirect_from: - - /blog/371/rocksdb-2-8-release/ ---- - -Check out the new RocksDB 2.8 release on [Github](https://github.com/facebook/rocksdb/releases/tag/2.8.fb). - -RocksDB 2.8. is mostly focused on improving performance for in-memory workloads. We are seeing read QPS as high as 5M (we will write a separate blog post on this). - - - -Here is the summary of new features: - - * Added a new table format called PlainTable, which is optimized for RAM storage (ramfs or tmpfs). You can read more details about it on [our wiki](https://github.com/facebook/rocksdb/wiki/PlainTable-Format). - - - * New prefixed memtable format HashLinkedList, which is optimized for cases where there are only a few keys for each prefix. - - - * Merge operator supports a new function PartialMergeMulti() that allows users to do partial merges against multiple operands. This function enables big speedups for workloads that use merge operators. - - - * Added a V2 compaction filter interface. It buffers the kv-pairs sharing the same key prefix, process them in batches, and return the batched results back to DB. - - - * Geo-spatial support for locations and radial-search. - - - * Improved read performance using thread local cache for frequently accessed data. - - - * Stability improvements -- we're now ignoring partially written tailing record to MANIFEST or WAL files. - - - -We have also introduced small incompatible API changes (mostly for advanced users). You can see full release notes in our [HISTORY.my](https://github.com/facebook/rocksdb/blob/2.8.fb/HISTORY.md) file. diff --git a/docs/_posts/2014-04-21-indexing-sst-files-for-better-lookup-performance.markdown b/docs/_posts/2014-04-21-indexing-sst-files-for-better-lookup-performance.markdown deleted file mode 100644 index 368055d2c..000000000 --- a/docs/_posts/2014-04-21-indexing-sst-files-for-better-lookup-performance.markdown +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: Indexing SST Files for Better Lookup Performance -layout: post -author: leijin -category: blog -redirect_from: - - /blog/431/indexing-sst-files-for-better-lookup-performance/ ---- - -For a `Get()` request, RocksDB goes through mutable memtable, list of immutable memtables, and SST files to look up the target key. SST files are organized in levels. - -On level 0, files are sorted based on the time they are flushed. Their key range (as defined by FileMetaData.smallest and FileMetaData.largest) are mostly overlapped with each other. So it needs to look up every L0 file. - - - -Compaction is scheduled periodically to pick up files from an upper level and merges them with files from lower level. As a result, key/values are moved from L0 down the LSM tree gradually. Compaction sorts key/values and split them into files. From level 1 and below, SST files are sorted based on key. Their key range are mutually exclusive. Instead of scanning through each SST file and checking if a key falls into its range, RocksDB performs a binary search based on FileMetaData.largest to locate a candidate file that can potentially contain the target key. This reduces complexity from O(N) to O(log(N)). However, log(N) can still be large for bottom levels. For a fan-out ratio of 10, level 3 can have 1000 files. That requires 10 comparisons to locate a candidate file. This is a significant cost for an in-memory database when you can do [several million gets per second](https://github.com/facebook/rocksdb/wiki/RocksDB-In-Memory-Workload-Performance-Benchmarks). - -One observation to this problem is that: after the LSM tree is built, an SST file's position in its level is fixed. Furthermore, its order relative to files from the next level is also fixed. Based on this idea, we can perform [fractional cascading](http://en.wikipedia.org/wiki/Fractional_cascading) kind of optimization to narrow down the binary search range. Here is an example: - -[![tree_example](/static/images/tree_example1.png)](/static/images/tree_example1.png) - -Level 1 has 2 files and level 2 has 8 files. Now, we want to look up key 80. A binary search based FileMetaData.largest tells you file 1 is the candidate. Then key 80 is compared with its FileMetaData.smallest and FileMetaData.largest to decide if it falls into the range. The comparison shows 80 is less than FileMetaData.smallest (100), so file 1 does not possibly contain key 80. We to proceed to check level 2. Usually, we need to do binary search among all 8 files on level 2. But since we already know target key 80 is less than 100 and only file 1 to file 3 can contain key less than 100, we can safely exclude other files from the search. As a result we cut down the search space from 8 files to 3 files. - -Let's look at another example. We want to get key 230. A binary search on level 1 locates to file 2 (this also implies key 230 is larger than file 1's FileMetaData.largest 200). A comparison with file 2's range shows the target key is smaller than file 2's FileMetaData.smallest 300. Even though, we couldn't find key on level 1, we have derived hints that target key is in range between 200 and 300. Any files on level 2 that cannot overlap with [200, 300] can be safely excluded. As a result, we only need to look at file 5 and file 6 on level 2. - -Inspired by this concept, we pre-build pointers at compaction time on level 1 files that point to a range of files on level 2. For example, file 1 on level 1 points to file 3 (on level 2) on the left and file 4 on the right. File 2 will point to level 2 files 6 and 7. At query time, these pointers are used to determine the actual binary search range based on comparison result. - -Our benchmark shows that this optimization improves lookup QPS by ~5% for similar setup mentioned [here](https://github.com/facebook/rocksdb/wiki/RocksDB-In-Memory-Workload-Performance-Benchmarks). diff --git a/docs/_posts/2014-05-14-lock.markdown b/docs/_posts/2014-05-14-lock.markdown deleted file mode 100644 index 12009cc88..000000000 --- a/docs/_posts/2014-05-14-lock.markdown +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: Reducing Lock Contention in RocksDB -layout: post -author: sdong -category: blog -redirect_from: - - /blog/521/lock/ ---- - -In this post, we briefly introduce the recent improvements we did to RocksDB to improve the issue of lock contention costs. - -RocksDB has a simple thread synchronization mechanism (See [RocksDB Architecture Guide](https://github.com/facebook/rocksdb/wiki/Rocksdb-Architecture-Guide)  to understand terms used below, like SST tables or mem tables). SST tables are immutable after being written and mem tables are lock-free data structures supporting single writer and multiple readers. There is only one single major lock, the DB mutex (DBImpl.mutex_) protecting all the meta operations, including: - - - - * Increase or decrease reference counters of mem tables and SST tables - - - * Change and check meta data structures, before and after finishing compactions, flushes and new mem table creations - - - * Coordinating writers - - -This DB mutex used to be scalability bottleneck preventing us from scaling to more than 16 threads. To address the issue, we improved RocksDB in several ways. - -1. Consolidate reference counters and introduce "super version". For every read operation, mutex was acquired, and reference counters for each mem table and each SST table were increased. One such operation is not expensive but if you are building a high throughput server with lots of reads, the lock contention will become the bottleneck. This is especially true if you store all your data in RAM. - -To solve this problem, we created a meta-meta data structure called “[super version](https://reviews.facebook.net/rROCKSDB1fdb3f7dc60e96394e3e5b69a46ede5d67fb976c)”, which holds reference counters to all those mem table and SST tables, so that readers only need to increase the reference counters for this single data structure. In RocksDB, list of live mem tables and SST tables only changes infrequently, which would happen when new mem tables are created or flush/compaction happens. Now, at those times, a new super version is created with their reference counters increased. A super version lists live mem tables and SST tables so a reader only needs acquire the lock in order to find the latest super version and increase its reference counter. From the super version, the reader can find all the mem and SST tables which are safety accessible as long as the reader holds the reference count for the super version. - -2. We replace some reference counters to stc::atomic objects, so that decreasing reference count of an object usually doesn’t need to be inside the mutex any more. - -3. Make fetching super version and reference counting lock-free in read queries. After consolidating reference counting to one single super version and removing the locking for decreasing reference counts, in read case, we only acquire mutex for one thing: fetch the latest super version and increase the reference count for that (dereference the counter is done in an atomic decrease). We designed and implemented a (mostly) lock-free approach to do it. See [details](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Lei-Lockless-Get.pdf). We will write a separate blog post for that. - -4. Avoid disk I/O inside the mutex. As we know, each disk I/O to hard drives takes several milliseconds. It can be even longer if file system journal is involved or I/Os are queued. Even occasional disk I/O within mutex can cause huge performance outliers. -We identified in two situations, we might do disk I/O inside mutex and we removed them: -(1) Opening and closing transactional log files. We moved those operations out of the mutex. -(2) Information logging. In multiple places we write to logs within mutex. There is a chance that file write will wait for disk I/O to finish before finishing, even if fsync() is not issued, especially in EXT systems. We occasionally see 100+ milliseconds write() latency on EXT. Instead of removing those logging, we came up with a solution of delay logging. When inside mutex, instead of directly writing to the log file, we write to a log buffer, with the timing information. As soon as mutex is released, we flush the log buffer to log files. - -5. Reduce object creation inside the mutex. -Object creation can be slow because it involves malloc (in our case). Malloc sometimes is slow because it needs to lock some shared data structures. Allocating can also be slow because we sometimes do expensive operations in some of our classes' constructors. For these reasons, we try to reduce object creations inside the mutex. Here are two examples: - -(1) std::vector uses malloc inside. We introduced “[autovector](https://reviews.facebook.net/rROCKSDBc01676e46d3be08c3c140361ef1f5884f47d3b3c)” data structure, in which memory for first a few elements are pre-allocated as members of the autovector class. When an autovector is used as a stack variable, no malloc will be needed unless the pre-allocated buffer is used up. This autovector is quite useful for manipulating those meta data structures. Those meta operations are often locked inside DB mutex. - -(2) When building an iterator, we used to creating iterator of every live men table and SST table within the mutex and a merging iterator on top of them. Besides malloc, some of those iterators can be quite expensive to create, like sorting. Now, instead of doing that, we simply increase the reference counters of them, and release the mutex before creating any iterator. - -6. Deal with mutexes in LRU caches. -When I said there was only one single major lock, I was lying. In RocksDB, all LRU caches had exclusive mutexes within to protect writes to the LRU lists, which are done in both of read and write operations. LRU caches are used in block cache and table cache. Both of them are accessed more frequently than DB data structures. Lock contention of these two locks are as intense as the DB mutex. Even if LRU cache is sharded into ShardedLRUCache, we can still see lock contentions, especially table caches. We further address this issue in two way: -(1) Bypassing table caches. A table cache maintains list of SST table’s read handlers. Those handlers contain SST files’ descriptors, table metadata, and possibly data indexes, as well as bloom filters. When the table handler needs to be evicted based on LRU, those information is cleared. When the SST table needs to be read and its table handler is not in LRU cache, the table is opened and those metadata is loaded. In some cases, users want to tune the system in a way that table handler evictions should never happen. It is common for high-throughput, low-latency servers. We introduce a mode where table cache is bypassed in read queries. In this mode, all table handlers are cached and accessed directly, so there is no need to query and adjust table caches for reading the database. It is the users’ responsibility to reserve enough resource for it. This mode can be turned on by setting options.max_open_files=-1. - -(2) [New PlainTable format](//github.com/facebook/rocksdb/wiki/PlainTable-Format) (optimized for SST in ramfs/tmpfs) does not organize data by blocks. Data are located by memory addresses so no block cache is needed. - -With all of those improvements, lock contention is not a bottleneck anymore, which is shown in our [memory-only benchmark](https://github.com/facebook/rocksdb/wiki/RocksDB-In-Memory-Workload-Performance-Benchmarks) . Furthermore, lock contentions are not causing some huge (50 milliseconds+) latency outliers they used to cause. - -### Comments - -**[Lee Hounshell](lee@apsalar.com)** - -Please post an example of reading the same rocksdb concurrently. - -We are using the latest 3.0 rocksdb; however, when two separate processes -try and open the same rocksdb for reading, only one of the open requests -succeed. The other open always fails with “db/LOCK: Resource temporarily unavailable” So far we have not found an option that allows sharing the rocksdb for reads. An example would be most appreciated. - -**[Siying Dong](siying.d@fb.com)** - -Sorry for the delay. We don’t have feature support for this scenario yet. Here is an example you can work around this problem. You can build a snapshot of the DB by doing this: - -1. create a separate directory on the same host for a snapshot of the DB. -1. call `DB::DisableFileDeletions()` -1. call `DB::GetLiveFiles()` to get a full list of the files. -1. for all the files except manifest, add a hardlink file in your new directory pointing to the original file -1. copy the manifest file and truncate the size (you can read the comments of `DB::GetLiveFiles()` for more information) -1. call `DB::EnableFileDeletions()` -1. now you can open the snapshot directory in another process to access those files. Please remember to delete the directory after reading the data to allow those files to be recycled. - -By the way, the best way to ask those questions is in our [facebook group](https://www.facebook.com/groups/rocksdb.dev/). Let us know if you need any further help. - -**[Darshan](darshan.ghumare@gmail.com)** - -Will this consistency problem of RocksDB all occurs in case of single put/write? -What all ACID properties is supported by RocksDB, only durability irrespective of single or batch write? - -**[Siying Dong](siying.d@fb.com)** - -We recently [introduced optimistic transaction](https://reviews.facebook.net/D33435) which can help you ensure all of ACID. - -This blog post is mainly about optimizations in implementation. The RocksDB consistency semantic is not changed. diff --git a/docs/_posts/2014-05-19-rocksdb-3-0-release.markdown b/docs/_posts/2014-05-19-rocksdb-3-0-release.markdown deleted file mode 100644 index 61c90dc93..000000000 --- a/docs/_posts/2014-05-19-rocksdb-3-0-release.markdown +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: RocksDB 3.0 release -layout: post -author: icanadi -category: blog -redirect_from: - - /blog/557/rocksdb-3-0-release/ ---- - -Check out new RocksDB release on [Github](https://github.com/facebook/rocksdb/releases/tag/3.0.fb)! - -New features in RocksDB 3.0: - - * [Column Family support](https://github.com/facebook/rocksdb/wiki/Column-Families) - - - * [Ability to chose different checksum function](https://github.com/facebook/rocksdb/commit/0afc8bc29a5800e3212388c327c750d32e31f3d6) - - - * Deprecated ReadOptions::prefix_seek and ReadOptions::prefix - - - -Check out the full [change log](https://github.com/facebook/rocksdb/blob/3.0.fb/HISTORY.md). diff --git a/docs/_posts/2014-05-22-rocksdb-3-1-release.markdown b/docs/_posts/2014-05-22-rocksdb-3-1-release.markdown deleted file mode 100644 index 30156742b..000000000 --- a/docs/_posts/2014-05-22-rocksdb-3-1-release.markdown +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: RocksDB 3.1 release -layout: post -author: icanadi -category: blog -redirect_from: - - /blog/575/rocksdb-3-1-release/ ---- - -Check out the new release on [Github](https://github.com/facebook/rocksdb/releases/tag/rocksdb-3.1)! - -New features in RocksDB 3.1: - - * [Materialized hash index](https://github.com/facebook/rocksdb/commit/0b3d03d026a7248e438341264b4c6df339edc1d7) - - - * [FIFO compaction style](https://github.com/facebook/rocksdb/wiki/FIFO-compaction-style) - - -We released 3.1 so fast after 3.0 because one of our internal customers needed materialized hash index. diff --git a/docs/_posts/2014-06-23-plaintable-a-new-file-format.markdown b/docs/_posts/2014-06-23-plaintable-a-new-file-format.markdown deleted file mode 100644 index 6a641f233..000000000 --- a/docs/_posts/2014-06-23-plaintable-a-new-file-format.markdown +++ /dev/null @@ -1,47 +0,0 @@ ---- -title: PlainTable — A New File Format -layout: post -author: sdong -category: blog -redirect_from: - - /blog/599/plaintable-a-new-file-format/ ---- - -In this post, we are introducing "PlainTable" -- a file format we designed for RocksDB, initially to satisfy a production use case at Facebook. - -Design goals: - -1. All data stored in memory, in files stored in tmpfs/ramfs. Support DBs larger than 100GB (may be sharded across multiple RocksDB instance). -1. Optimize for [prefix hashing](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Siying-Prefix-Hash.pdf) -1. Less than or around 1 micro-second average latency for single Get() or Seek(). -1. Minimize memory consumption. -1. Queries efficiently return empty results - - - -Notice that our priority was not to maximize query performance, but to strike a balance between query performance and memory consumption. PlainTable query performance is not as good as you would see with a nicely-designed hash table, but they are of the same order of magnitude, while keeping memory overhead to a minimum. - -Since we are targeting micro-second latency, it is on the level of the number of CPU cache misses (if they cannot be parallellized, which are usually the case for index look-ups). On our target hardware with Intel CPUs of multiple sockets with NUMA, we can only allow 4-5 CPU cache misses (including costs of data TLB). - -To meet our requirements, given that only hash prefix iterating is needed, we made two decisions: - -1. to use a hash index, which is -1. directly addressed to rows, with no block structure. - -Having addressed our latency goal, the next task was to design a very compact hash index to minimize memory consumption. Some tricks we used to meet this goal: - -1. We only use 32-bit integers for data and index offsets.The first bit serves as a flag, so we can avoid using 8-byte pointers. -1. We never copy keys or parts of keys to index search structures. We store only offsets from which keys can be retrieved, to make comparisons with search keys. -1. Since our file is immutable, we can accurately estimate the number of hash buckets needed. - -To make sure the format works efficiently with empty queries, we added a bloom filter check before the query. This adds only one cache miss for non-empty cases [1], but avoids multiple cache misses for most empty results queries. This is a good trade-off for use cases with a large percentage of empty results. - -These are the design goals and basic ideas of PlainTable file format. For detailed information, see [this wiki page](https://github.com/facebook/rocksdb/wiki/PlainTable-Format). - -[1] Bloom filter checks typically require multiple memory access. However, because they are independent, they usually do not make the CPU pipeline stale. In any case, we improved the bloom filter to improve data locality - we may cover this further in a future blog post. - -### Comments - -**[Siying Dong](siying.d@fb.com)** - -Does [http://rocksdb.org/feed/](http://rocksdb.org/feed/) work? diff --git a/docs/_posts/2014-06-27-avoid-expensive-locks-in-get.markdown b/docs/_posts/2014-06-27-avoid-expensive-locks-in-get.markdown deleted file mode 100644 index 4411c7ae3..000000000 --- a/docs/_posts/2014-06-27-avoid-expensive-locks-in-get.markdown +++ /dev/null @@ -1,89 +0,0 @@ ---- -title: Avoid Expensive Locks in Get() -layout: post -author: leijin -category: blog -redirect_from: - - /blog/677/avoid-expensive-locks-in-get/ ---- - -As promised in the previous [blog post](blog/2014/05/14/lock.html)! - -RocksDB employs a multiversion concurrency control strategy. Before reading data, it needs to grab the current version, which is encapsulated in a data structure called [SuperVersion](https://reviews.facebook.net/rROCKSDB1fdb3f7dc60e96394e3e5b69a46ede5d67fb976c). - - - -At the beginning of `GetImpl()`, it used to do this: - - - mutex_.Lock(); - auto* s = super_version_->Ref(); - mutex_.Unlock(); - - -The lock is necessary because pointer super_version_ may be updated, the corresponding SuperVersion may be deleted while Ref() is in progress. - - -`Ref()` simply increases the reference counter and returns “this” pointer. However, this simple operation posed big challenges for in-memory workload and stopped RocksDB from scaling read throughput beyond 8 cores. Running 32 read threads on a 32-core CPU leads to [70% system CPU usage](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Lei-Lockless-Get.pdf). This is outrageous! - - - - -Luckily, we found a way to circumvent this problem by using [thread local storage](http://en.wikipedia.org/wiki/Thread-local_storage). Version change is a rare event comparable to millions of read requests. On the very first Get() request, each thread pays the mutex cost to acquire a reference to the new super version. Instead of releasing the reference after use, the reference is cached in thread’s local storage. An atomic variable is used to track global super version number. Subsequent reads simply compare the local super version number against the global super version number. If they are the same, the cached super version reference may be used directly, at no cost. If a version change is detected, mutex must be acquired to update the reference. The cost of mutex lock is amortized among millions of reads and becomes negligible. - - - - -The code looks something like this: - - - - - - SuperVersion* s = thread_local_->Get(); - if (s->version_number != super_version_number_.load()) { - // slow path, cleanup of current super version is omitted - mutex_.Lock(); - s = super_version_->Ref(); - mutex_.Unlock(); - } - - - - -The result is quite amazing. RocksDB can nicely [scale to 32 cores](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Lei-Lockless-Get.pdf) and most CPU time is spent in user land. - - - - -Daryl Grove gives a pretty good [comparison between mutex and atomic](https://blogs.oracle.com/d/entry/the_cost_of_mutexes). However, the real cost difference lies beyond what is shown in the assembly code. Mutex can keep threads spinning on CPU or even trigger thread context switches in which all readers compete to access the critical area. Our approach prevents mutual competition by directing threads to check against a global version which does not change at high frequency, and is therefore much more cache-friendly. - - - - -The new approach entails one issue: a thread can visit GetImpl() once but can never come back again. SuperVersion is referenced and cached in its thread local storage. All resources (e.g., memtables, files) which belong to that version are frozen. A “supervisor” is required to visit each thread’s local storage and free its resources without incurring a lock. We designed a lockless sweep using CAS (compare and switch instruction). Here is how it works: - - - - -(1) A reader thread uses CAS to acquire SuperVersion from its local storage and to put in a special flag (SuperVersion::kSVInUse). - - - - -(2) Upon completion of GetImpl(), the reader thread tries to return SuperVersion to local storage by CAS, expecting the special flag (SuperVersion::kSVInUse) in its local storage. If it does not see SuperVersion::kSVInUse, that means a “sweep” was done and the reader thread is responsible for cleanup (this is expensive, but does not happen often on the hot path). - - - - -(3) After any flush/compaction, the background thread performs a sweep (CAS) across all threads’ local storage and frees encountered SuperVersion. A reader thread must re-acquire a new SuperVersion reference on its next visit. - -### Comments - -**[David Barbour](dmbarbour@gmail.com)** - -Please post an example of reading the same rocksdb concurrently. - -We are using the latest 3.0 rocksdb; however, when two separate processes -try and open the same rocksdb for reading, only one of the open requests -succeed. The other open always fails with “db/LOCK: Resource temporarily unavailable” So far we have not found an option that allows sharing the rocksdb for reads. An example would be most appreciated. diff --git a/docs/_posts/2014-06-27-rocksdb-3-2-release.markdown b/docs/_posts/2014-06-27-rocksdb-3-2-release.markdown deleted file mode 100644 index e4eba6af4..000000000 --- a/docs/_posts/2014-06-27-rocksdb-3-2-release.markdown +++ /dev/null @@ -1,30 +0,0 @@ ---- -title: RocksDB 3.2 release -layout: post -author: leijin -category: blog -redirect_from: - - /blog/647/rocksdb-3-2-release/ ---- - -Check out new RocksDB release on [GitHub](https://github.com/facebook/rocksdb/releases/tag/rocksdb-3.2)! - -New Features in RocksDB 3.2: - - * PlainTable now supports a new key encoding: for keys of the same prefix, the prefix is only written once. It can be enabled through encoding_type paramter of NewPlainTableFactory() - - - * Add AdaptiveTableFactory, which is used to convert from a DB of PlainTable to BlockBasedTabe, or vise versa. It can be created using NewAdaptiveTableFactory() - - - -Public API changes: - - - * We removed seek compaction as a concept from RocksDB - - - * Add two paramters to NewHashLinkListRepFactory() for logging on too many entries in a hash bucket when flushing - - - * Added new option BlockBasedTableOptions::hash_index_allow_collision. When enabled, prefix hash index for block-based table will not store prefix and allow hash collision, reducing memory consumption diff --git a/docs/_posts/2014-07-29-rocksdb-3-3-release.markdown b/docs/_posts/2014-07-29-rocksdb-3-3-release.markdown deleted file mode 100644 index d858e4faf..000000000 --- a/docs/_posts/2014-07-29-rocksdb-3-3-release.markdown +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: RocksDB 3.3 Release -layout: post -author: yhciang -category: blog -redirect_from: - - /blog/1301/rocksdb-3-3-release/ ---- - -Check out new RocksDB release on [GitHub](https://github.com/facebook/rocksdb/releases/tag/rocksdb-3.3)! - -New Features in RocksDB 3.3: - - * **JSON API prototype**. - - - * **Performance improvement on HashLinkList**: We addressed performance outlier of HashLinkList caused by skewed bucket by switching data in the bucket from linked list to skip list. Add parameter threshold_use_skiplist in NewHashLinkListRepFactory(). - - - - * **More effective on storage space reclaim**: RocksDB is now able to reclaim storage space more effectively during the compaction process. This is done by compensating the size of each deletion entry by the 2X average value size, which makes compaction to be triggerred by deletion entries more easily. - - - * **TimeOut API to write**: Now WriteOptions have a variable called timeout_hint_us. With timeout_hint_us set to non-zero, any write associated with this timeout_hint_us may be aborted when it runs longer than the specified timeout_hint_us, and it is guaranteed that any write completes earlier than the specified time-out will not be aborted due to the time-out condition. - - - * **rate_limiter option**: We added an option that controls total throughput of flush and compaction. The throughput is specified in bytes/sec. Flush always has precedence over compaction when available bandwidth is constrained. - - - -Public API changes: - - - * Removed NewTotalOrderPlainTableFactory because it is not used and implemented semantically incorrect. diff --git a/docs/_posts/2014-09-12-cuckoo.markdown b/docs/_posts/2014-09-12-cuckoo.markdown deleted file mode 100644 index 22178f7ca..000000000 --- a/docs/_posts/2014-09-12-cuckoo.markdown +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: Cuckoo Hashing Table Format -layout: post -author: radheshyam -category: blog -redirect_from: - - /blog/1427/new-bloom-filter-format/ ---- - -## Introduction - -We recently introduced a new [Cuckoo Hashing](http://en.wikipedia.org/wiki/Cuckoo_hashing) based SST file format which is optimized for fast point lookups. The new format was built for applications which require very high point lookup rates (~4Mqps) in read only mode but do not use operations like range scan, merge operator, etc. But, the existing RocksDB file formats were built to support range scan and other operations and the current best point lookup in RocksDB is 1.2 Mqps given by [PlainTable](https://github.com/facebook/rocksdb/wiki/PlainTable-Format)[ format](https://github.com/facebook/rocksdb/wiki/PlainTable-Format). This prompted a hashing based file format, which we present here. The new table format uses a cache friendly version of Cuckoo Hashing algorithm with only 1 or 2 memory accesses per lookup. - - - -Goals: - - * Reduce memory accesses per lookup to 1 or 2 - - - * Get an end to end point lookup rate of at least 4 Mqps - - - * Minimize database size - - -Assumptions: - - * Key length and value length are fixed - - - * The database is operated in read only mode - - -Non-goals: - - - * While optimizing the performance of Get() operation was our primary goal, compaction and build times were secondary. We may work on improving them in future. - - -Details for setting up the table format can be found in [GitHub](https://github.com/facebook/rocksdb/wiki/CuckooTable-Format). - - -## Cuckoo Hashing Algorithm - -In order to achieve high lookup speeds, we did multiple optimizations, including a cache friendly cuckoo hash algorithm. Cuckoo Hashing uses multiple hash functions, _h1, ..., __hn._ - -### Original Cuckoo Hashing - -To insert any new key _k_, we compute hashes of the key _h1(k), ..., __hn__(k)_. We insert the key in the first hash location that is free. If all the locations are blocked, we try to move one of the colliding keys to a different location by trying to re-insert it. - -Finding smallest set of keys to displace in order to accommodate the new key is naturally a shortest path problem in a directed graph where nodes are buckets of hash table and there is an edge from bucket _A_ to bucket _B_ if the element stored in bucket _A_ can be accommodated in bucket _B_ using one of the hash functions. The source nodes are the possible hash locations for the given key _k_ and destination is any one of the empty buckets. We use this algorithm to handle collision. - -To retrieve a key _k_, we compute hashes, _h1(k), ..., __hn__(k)_ and the key must be present in one of these locations. - -Our goal is to minimize average (and maximum) number of hash functions required and hence the number of memory accesses. In our experiments, with a hash utilization of 90%, we found that the average number of lookups is 1.8 and maximum is 3. Around 44% of keys are accommodated in first hash location and 33% in second location. - - -### Cache Friendly Cuckoo Hashing - -We noticed the following two sub-optimal properties in original Cuckoo implementation: - - - * If the key is not present in first hash location, we jump to second hash location which may not be in cache. This results in many cache misses. - - - * Because only 44% of keys are located in first cuckoo block, we couldn't have an optimal prefetching strategy - prefetching all hash locations for a key is wasteful. But prefetching only the first hash location helps only 44% of cases. - - - -The solution is to insert more keys near first location. In case of collision in the first hash location - _h1(k)_, we try to insert it in next few buckets, _h1(k)+1, _h1(k)+2, _..., h1(k)+t-1_. If all of these _t_ locations are occupied, we skip over to next hash function _h2_ and repeat the process. We call the set of _t_ buckets as a _Cuckoo Block_. We chose _t_ such that size of a block is not bigger than a cache line and we prefetch the first cuckoo block. - - -With the new algorithm, for 90% hash utilization, we found that 85% of keys are accommodated in first Cuckoo Block. Prefetching the first cuckoo block yields best results. For a database of 100 million keys with key length 8 and value length 4, the hash algorithm alone can achieve 9.6 Mqps and we are working on improving it further. End to end RocksDB performance results can be found [here](https://github.com/facebook/rocksdb/wiki/CuckooTable-Format). diff --git a/docs/_posts/2014-09-12-new-bloom-filter-format.markdown b/docs/_posts/2014-09-12-new-bloom-filter-format.markdown deleted file mode 100644 index 96fa50a40..000000000 --- a/docs/_posts/2014-09-12-new-bloom-filter-format.markdown +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: New Bloom Filter Format -layout: post -author: zagfox -category: blog -redirect_from: - - /blog/1367/cuckoo/ ---- - -## Introduction - -In this post, we are introducing "full filter block" --- a new bloom filter format for [block based table](https://github.com/facebook/rocksdb/wiki/Rocksdb-BlockBasedTable-Format). This could bring about 40% of improvement for key query under in-memory (all data stored in memory, files stored in tmpfs/ramfs, an [example](https://github.com/facebook/rocksdb/wiki/RocksDB-In-Memory-Workload-Performance-Benchmarks) workload. The main idea behind is to generate a big filter that covers all the keys in SST file to avoid lots of unnecessary memory look ups. - - - - -## What is Bloom Filter - -In brief, [bloom filter](https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter) is a bits array generated for a set of keys that could tell if an arbitrary key may exist in that set. - -In RocksDB, we generate such a bloom filter for each SST file. When we conduct a query for a key, we first goes to the bloom filter block of SST file. If key may exist in filter, we goes into data block in SST file to search for the key. If not, we would return directly. So it could help speed up point look up operation a lot. - -## Original Bloom Filter Format - -Original bloom filter creates filters for each individual data block in SST file. It has complex structure (ref [here](https://github.com/facebook/rocksdb/wiki/Rocksdb-BlockBasedTable-Format#filter-meta-block)) which results in a lot of non-adjacent memory look ups. - -Here's the work flow for checking original bloom filter in block based table: - -1. Given the target key, we goes to the index block to get the "data block ID" where this key may reside. -1. Using the "data block ID", we goes to the filter block and get the correct "offset of filter". -1. Using the "offset of filter", we goes to the actual filter and do the checking. - -## New Bloom Filter Format - -New bloom filter creates filter for all keys in SST file and we name it "full filter". The data structure of full filter is very simple, there is just one big filter: - -    [ full filter ] - -In this way, the work flow of bloom filter checking is much simplified. - -(1) Given the target key, we goes directly to the filter block and conduct the filter checking. - -To be specific, there would be no checking for index block and no address jumping inside of filter block. - -Though it is a big filter, the total filter size would be the same as the original filter. - -One little draw back is that the new bloom filter introduces more memory consumption when building SST file because we need to buffer keys (or their hashes) before generating filter. Original filter just creates a bunch of small filters so it just buffer a small amount of keys. For full filter, we buffer hashes of all keys, which would take more memory when SST file size increases. - - -## Usage & Customization - -You can refer to the document here for [usage](https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#usage-of-new-bloom-filter) and [customization](https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#customize-your-own-filterpolicy). diff --git a/docs/_posts/2014-09-15-rocksdb-3-5-release.markdown b/docs/_posts/2014-09-15-rocksdb-3-5-release.markdown deleted file mode 100644 index 1878a5a56..000000000 --- a/docs/_posts/2014-09-15-rocksdb-3-5-release.markdown +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: RocksDB 3.5 Release! -layout: post -author: leijin -category: blog -redirect_from: - - /blog/1547/rocksdb-3-5-release/ ---- - -New RocksDB release - 3.5! - - -**New Features** - - - 1. Add include/utilities/write_batch_with_index.h, providing a utility class to query data out of WriteBatch when building it. - - - 2. new ReadOptions.total_order_seek to force total order seek when block-based table is built with hash index. - - - -**Public API changes** - - - 1. The Prefix Extractor used with V2 compaction filters is now passed user key to SliceTransform::Transform instead of unparsed RocksDB key. - - - 2. Move BlockBasedTable related options to BlockBasedTableOptions from Options. Change corresponding JNI interface. Options affected include: no_block_cache, block_cache, block_cache_compressed, block_size, block_size_deviation, block_restart_interval, filter_policy, whole_key_filtering. filter_policy is changed to shared_ptr from a raw pointer. - - - 3. Remove deprecated options: disable_seek_compaction and db_stats_log_interval - - - 4. OptimizeForPointLookup() takes one parameter for block cache size. It now builds hash index, bloom filter, and block cache. - - -[https://github.com/facebook/rocksdb/releases/tag/v3.5](https://github.com/facebook/rocksdb/releases/tag/rocksdb-3.5) diff --git a/docs/_posts/2015-01-16-migrating-from-leveldb-to-rocksdb-2.markdown b/docs/_posts/2015-01-16-migrating-from-leveldb-to-rocksdb-2.markdown deleted file mode 100644 index f18de0bbc..000000000 --- a/docs/_posts/2015-01-16-migrating-from-leveldb-to-rocksdb-2.markdown +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: Migrating from LevelDB to RocksDB -layout: post -author: lgalanis -category: blog -redirect_from: - - /blog/1811/migrating-from-leveldb-to-rocksdb-2/ ---- - -If you have an existing application that uses LevelDB and would like to migrate to using RocksDB, one problem you need to overcome is to map the options for LevelDB to proper options for RocksDB. As of release 3.9 this can be automatically done by using our option conversion utility found in rocksdb/utilities/leveldb_options.h. What is needed, is to first replace `leveldb::Options` with `rocksdb::LevelDBOptions`. Then, use `rocksdb::ConvertOptions( )` to convert the `LevelDBOptions` struct into appropriate RocksDB options. Here is an example: - - - -LevelDB code: - -```c++ -#include -#include "leveldb/db.h" - -using namespace leveldb; - -int main(int argc, char** argv) { - DB *db; - - Options opt; - opt.create_if_missing = true; - opt.max_open_files = 1000; - opt.block_size = 4096; - - Status s = DB::Open(opt, "/tmp/mydb", &db); - - delete db; -} -``` - -RocksDB code: - -```c++ -#include -#include "rocksdb/db.h" -#include "rocksdb/utilities/leveldb_options.h" - -using namespace rocksdb; - -int main(int argc, char** argv) { - DB *db; - - LevelDBOptions opt; - opt.create_if_missing = true; - opt.max_open_files = 1000; - opt.block_size = 4096; - - Options rocksdb_options = ConvertOptions(opt); - // add rocksdb specific options here - - Status s = DB::Open(rocksdb_options, "/tmp/mydb_rocks", &db); - - delete db; -} -``` - -The difference is: - -```diff --#include "leveldb/db.h" -+#include "rocksdb/db.h" -+#include "rocksdb/utilities/leveldb_options.h" - --using namespace leveldb; -+using namespace rocksdb; - -- Options opt; -+ LevelDBOptions opt; - -- Status s = DB::Open(opt, "/tmp/mydb", &db); -+ Options rocksdb_options = ConvertOptions(opt); -+ // add rockdb specific options here -+ -+ Status s = DB::Open(rocksdb_options, "/tmp/mydb_rocks", &db); -``` - -Once you get up and running with RocksDB you can then focus on tuning RocksDB further by modifying the converted options struct. - -The reason why ConvertOptions is handy is because a lot of individual options in RocksDB have moved to other structures in different components. For example, block_size is not available in struct rocksdb::Options. It resides in struct rocksdb::BlockBasedTableOptions, which is used to create a TableFactory object that RocksDB uses internally to create the proper TableBuilder objects. If you were to write your application from scratch it would look like this: - -RocksDB code from scratch: - -```c++ -#include -#include "rocksdb/db.h" -#include "rocksdb/table.h" - -using namespace rocksdb; - -int main(int argc, char** argv) { - DB *db; - - Options opt; - opt.create_if_missing = true; - opt.max_open_files = 1000; - - BlockBasedTableOptions topt; - topt.block_size = 4096; - opt.table_factory.reset(NewBlockBasedTableFactory(topt)); - - Status s = DB::Open(opt, "/tmp/mydb_rocks", &db); - - delete db; -} -``` - -The LevelDBOptions utility can ease migration to RocksDB from LevelDB and allows us to break down the various options across classes as it is needed. diff --git a/docs/_posts/2015-02-24-reading-rocksdb-options-from-a-file.markdown b/docs/_posts/2015-02-24-reading-rocksdb-options-from-a-file.markdown deleted file mode 100644 index cddc0dd01..000000000 --- a/docs/_posts/2015-02-24-reading-rocksdb-options-from-a-file.markdown +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: Reading RocksDB options from a file -layout: post -author: lgalanis -category: blog -redirect_from: - - /blog/1883/reading-rocksdb-options-from-a-file/ ---- - -RocksDB options can be provided using a file or any string to RocksDB. The format is straightforward: `write_buffer_size=1024;max_write_buffer_number=2`. Any whitespace around `=` and `;` is OK. Moreover, options can be nested as necessary. For example `BlockBasedTableOptions` can be nested as follows: `write_buffer_size=1024; max_write_buffer_number=2; block_based_table_factory={block_size=4k};`. Similarly any white space around `{` or `}` is ok. Here is what it looks like in code: - - - -```c++ -#include -#include "rocksdb/db.h" -#include "rocksdb/table.h" -#include "rocksdb/utilities/convenience.h" - -using namespace rocksdb; - -int main(int argc, char** argv) { - DB *db; - - Options opt; - - std::string options_string = - "create_if_missing=true;max_open_files=1000;" - "block_based_table_factory={block_size=4096}"; - - Status s = GetDBOptionsFromString(opt, options_string, &opt); - - s = DB::Open(opt, "/tmp/mydb_rocks", &db); - - // use db - - delete db; -} -``` - -Using `GetDBOptionsFromString` is a convenient way of changing options for your RocksDB application without needing to resort to recompilation or tedious command line parsing. diff --git a/docs/_posts/2015-02-27-write-batch-with-index.markdown b/docs/_posts/2015-02-27-write-batch-with-index.markdown deleted file mode 100644 index 7f9f77653..000000000 --- a/docs/_posts/2015-02-27-write-batch-with-index.markdown +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: 'WriteBatchWithIndex: Utility for Implementing Read-Your-Own-Writes' -layout: post -author: sdong -category: blog -redirect_from: - - /blog/1901/write-batch-with-index/ ---- - -RocksDB can be used as a storage engine of a higher level database. In fact, we are currently plugging RocksDB into MySQL and MongoDB as one of their storage engines. RocksDB can help with guaranteeing some of the ACID properties: durability is guaranteed by RocksDB by design; while consistency and isolation need to be enforced by concurrency controls on top of RocksDB; Atomicity can be implemented by committing a transaction's writes with one write batch to RocksDB in the end. - - - -However, if we enforce atomicity by only committing all writes in the end of the transaction in one batch, you cannot get the updated value from RocksDB previously written by the same transaction (read-your-own-write). To read the updated value, the databases on top of RocksDB need to maintain an internal buffer for all the written keys, and when a read happens they need to merge the result from RocksDB and from this buffer. This is a problem we faced when building the RocksDB storage engine in MongoDB. We solved it by creating a utility class, WriteBatchWithIndex (a write batch with a searchable index) and made it part of public API so that the community can also benefit from it. - -Before talking about the index part, let me introduce write batch first. The write batch class, `WriteBatch`, is a RocksDB data structure for atomic writes of multiple keys. Users can buffer their updates to a `WriteBatch` by calling `write_batch.Put("key1", "value1")` or `write_batch.Delete("key2")`, similar as calling RocksDB's functions of the same names. In the end, they call `db->Write(write_batch)` to atomically update all those batched operations to the DB. It is how a database can guarantee atomicity, as shown above. Adding a searchable index to `WriteBatch`, we now have `WriteBatchWithIndex`. Users can put updates to WriteBatchIndex in the same way as to `WriteBatch`. In the end, users can get a `WriteBatch` object from it and issue `db->Write()`. Additionally, users can create an iterator of a WriteBatchWithIndex, seek to any key location and iterate from there. - -To implement read-your-own-write using `WriteBatchWithIndex`, every time the user creates a transaction, we create a `WriteBatchWithIndex` attached to it. All the writes of the transaction go to the `WriteBatchWithIndex` first. When we commit the transaction, we atomically write the batch to RocksDB. When the user wants to call `Get()`, we first check if the value exists in the `WriteBatchWithIndex` and return the value if existing, by seeking and reading from an iterator of the write batch, before checking data in RocksDB. For example, here is the we implement it in MongoDB's RocksDB storage engine: [link](https://github.com/mongodb/mongo/blob/a31cc114a89a3645e97645805ba77db32c433dce/src/mongo/db/storage/rocks/rocks_recovery_unit.cpp#L245-L260). If a range query comes, we pass a DB's iterator to `WriteBatchWithIndex`, which creates a super iterator which combines the results from the DB iterator with the batch's iterator. Using this super iterator, we can iterate the DB with the transaction's own writes. Here is the iterator creation codes in MongoDB's RocksDB storage engine: [link](https://github.com/mongodb/mongo/blob/a31cc114a89a3645e97645805ba77db32c433dce/src/mongo/db/storage/rocks/rocks_recovery_unit.cpp#L266-L269). In this way, the database can solve the read-your-own-write problem by using RocksDB to handle a transaction's uncommitted writes. - -Using `WriteBatchWithIndex`, we successfully implemented read-your-own-writes in the RocksDB storage engine of MongoDB. If you also have a read-your-own-write problem, `WriteBatchWithIndex` can help you implement it quickly and correctly. diff --git a/docs/_posts/2015-04-22-integrating-rocksdb-with-mongodb-2.markdown b/docs/_posts/2015-04-22-integrating-rocksdb-with-mongodb-2.markdown deleted file mode 100644 index 1ffe2c532..000000000 --- a/docs/_posts/2015-04-22-integrating-rocksdb-with-mongodb-2.markdown +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Integrating RocksDB with MongoDB -layout: post -author: icanadi -category: blog -redirect_from: - - /blog/1967/integrating-rocksdb-with-mongodb-2/ ---- - -Over the last couple of years, we have been busy integrating RocksDB with various services here at Facebook that needed to store key-value pairs locally. We have also seen other companies using RocksDB as local storage components of their distributed systems. - - - -The next big challenge for us is to bring RocksDB storage engine to general purpose databases. Today we have an exciting milestone to share with our community! We're running MongoDB with RocksDB in production and seeing great results! You can read more about it here: [http://blog.parse.com/announcements/mongodb-rocksdb-parse/](http://blog.parse.com/announcements/mongodb-rocksdb-parse/) - -Keep tuned for benchmarks and more stability and performance improvements. diff --git a/docs/_posts/2015-06-12-rocksdb-in-osquery.markdown b/docs/_posts/2015-06-12-rocksdb-in-osquery.markdown deleted file mode 100644 index f3a55faae..000000000 --- a/docs/_posts/2015-06-12-rocksdb-in-osquery.markdown +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: RocksDB in osquery -layout: post -author: icanadi -category: lgalanis -redirect_from: - - /blog/1997/rocksdb-in-osquery/ ---- - -Check out [this](https://code.facebook.com/posts/1411870269134471/how-rocksdb-is-used-in-osquery/) blog post by [Mike Arpaia](https://www.facebook.com/mike.arpaia) and [Ted Reed](https://www.facebook.com/treeded) about how osquery leverages RocksDB to build an embedded pub-sub system. This article is a great read and contains insights on how to properly use RocksDB. diff --git a/docs/_posts/2015-07-15-rocksdb-2015-h2-roadmap.markdown b/docs/_posts/2015-07-15-rocksdb-2015-h2-roadmap.markdown deleted file mode 100644 index b3e2703fc..000000000 --- a/docs/_posts/2015-07-15-rocksdb-2015-h2-roadmap.markdown +++ /dev/null @@ -1,92 +0,0 @@ ---- -title: RocksDB 2015 H2 roadmap -layout: post -author: icanadi -category: blog -redirect_from: - - /blog/2015/rocksdb-2015-h2-roadmap/ ---- - -Every 6 months, RocksDB team gets together to prioritize the work ahead of us. We just went through this exercise and we wanted to share the results with the community. Here's what RocksDB team will be focusing on for the next 6 months: - - - -**MyRocks** - -As you might know, we're working hard to integrate RocksDB as a storage engine for MySQL. This project is pretty important for us because we're heavy users of MySQL. We're already getting pretty good performance results, but there is more work to be done. We need to focus on both performance and stability. The most high priority items on are list are: - - - - - 1. Reduce CPU costs of RocksDB as a MySQL storage engine - - - 2. Implement pessimistic concurrency control to support repeatable read isolation level in MyRocks - - - 3. Reduce P99 read latency, which is high mostly because of lingering tombstones - - - 4. Port ZSTD compression - - -**MongoRocks** - -Another database that we're working on is MongoDB. The project of integrating MongoDB with RocksDB storage engine is called MongoRocks. It's already running in production at Parse [1] and we're seeing surprisingly few issues. Our plans for the next half: - - - - - 1. Keep improving performance and stability, possibly reuse work done on MyRocks (workloads are pretty similar). - - - 2. Increase internal and external adoption. - - - 3. Support new MongoDB 3.2. - - -**RocksDB on cheaper storage media** - -Up to now, our mission was to build the best key-value store “for fast storage” (flash and in-memory). However, there are some use-cases at Facebook that don't need expensive high-end storage. In the next six months, we plan to deploy RocksDB on cheaper storage media. We will optimize performance to RocksDB on either or both: - - - - - 1. Hard drive storage array. - - - 2. Tiered Storage. - - -**Quality of Service** - -When talking to our customers, there are couple of issues that keep reoccurring. We need to fix them to make our customers happy. We will improve RocksDB to provide better assurance of performance and resource usage. Non-exhaustive list includes: - - - - - 1. Iterate P99 can be high due to the presence of tombstones. - - - 2. Write stalls can happen during high write loads. - - - 3. Better control of memory and disk usage. - - - 4. Service quality and performance of backup engine. - - -**Operation's user experience** - -As we increase deployment of RocksDB, engineers are spending more time on debugging RocksDB issues. We plan to improve user experience when running RocksDB. The goal is to reduce TTD (time-to-debug). The work includes monitoring, visualizations and documentations. - -[1]( http://blog.parse.com/announcements/mongodb-rocksdb-parse/](http://blog.parse.com/announcements/mongodb-rocksdb-parse/) - - -### Comments - -**[Mike](allspace2012@outlook.com)** - -What’s the status of this roadmap? “RocksDB on cheaper storage media”, has this been implemented? diff --git a/docs/_posts/2015-07-17-spatial-indexing-in-rocksdb.markdown b/docs/_posts/2015-07-17-spatial-indexing-in-rocksdb.markdown deleted file mode 100644 index 53c1f5a90..000000000 --- a/docs/_posts/2015-07-17-spatial-indexing-in-rocksdb.markdown +++ /dev/null @@ -1,78 +0,0 @@ ---- -title: Spatial indexing in RocksDB -layout: post -author: icanadi -category: blog -redirect_from: - - /blog/2039/spatial-indexing-in-rocksdb/ ---- - -About a year ago, there was a need to develop a spatial database at Facebook. We needed to store and index Earth's map data. Before building our own, we looked at the existing spatial databases. They were all very good technology, but also general purpose. We could sacrifice a general-purpose API, so we thought we could build a more performant database, since it would be specifically designed for our use-case. Furthermore, we decided to build the spatial database on top of RocksDB, because we have a lot of operational experience with running and tuning RocksDB at a large scale. - - - -When we started looking at this project, the first thing that surprised us was that our planet is not that big. Earth's entire map data can fit in memory on a reasonably high-end machine. Thus, we also decided to build a spatial database optimized for memory-resident dataset. - -The first use-case of our spatial database was an experimental map renderer. As part of our project, we successfully loaded [Open Street Maps](https://www.openstreetmap.org/) dataset and hooked it up with [Mapnik](http://mapnik.org/), a map rendering engine. - -The usual Mapnik workflow is to load the map data into a SQL-based database and then define map layers with SQL statements. To render a tile, Mapnik needs to execute a couple of SQL queries. The benefit of this approach is that you don't need to reload your database when you change your map style. You can just change your SQL query and Mapnik picks it up. In our model, we decided to precompute the features we need for each tile. We need to know the map style before we create the database. However, when rendering the map tile, we only fetch the features that we need to render. - -We haven't open sourced the RocksDB Mapnik plugin or the database loading pipeline. However, the spatial indexing is available in RocksDB under a name [SpatialDB](https://github.com/facebook/rocksdb/blob/main/include/rocksdb/utilities/spatial_db.h). The API is focused on map rendering use-case, but we hope that it can also be used for other spatial-based applications. - -Let's take a tour of the API. When you create a spatial database, you specify the spatial indexes that need to be built. Each spatial index is defined by a bounding box and granularity. For map rendering, we create a spatial index for each zoom levels. Higher zoom levels have more granularity. - - - - SpatialDB::Create( - SpatialDBOptions(), - "/data/map", { - SpatialIndexOptions("zoom10", BoundingBox(0, 0, 100, 100), 10), - SpatialIndexOptions("zoom16", BoundingBox(0, 0, 100, 100), 16) - } - ); - - - - -When you insert a feature (building, street, country border) into SpatialDB, you need to specify the list of spatial indexes that will index the feature. In the loading phase we process the map style to determine the list of zoom levels on which we'll render the feature. For example, we will not render the building on zoom level that shows an entire country. Building will only be indexed on higher zoom level's index. Country borders will be indexes on all zoom levels. - - - - FeatureSet feature; - feature.Set("type", "building"); - feature.Set("height", 6); - db->Insert(WriteOptions(), BoundingBox(5, 5, 10, 10), - well_known_binary_blob, feature, {"zoom16"}); - - - - -The indexing part is pretty simple. For each feature, we first find a list of index tiles that it intersects. Then, we add a link from the tile's [quad key](https://msdn.microsoft.com/en-us/library/bb259689.aspx) to the feature's primary key. Using quad keys improves data locality, i.e. features closer together geographically will have similar quad keys. Even though we're optimizing for a memory-resident dataset, data locality is still very important due to different caching effects. - -After you're done inserting all the features, you can call an API Compact() that will compact the dataset and speed up read queries. - - - - db->Compact(); - - - - -SpatialDB's query specifies: 1) bounding box we're interested in, and 2) a zoom level. We find all tiles that intersect with the query's bounding box and return all features in those tiles. - - - - - Cursor* c = db_->Query(ReadOptions(), BoundingBox(1, 1, 7, 7), "zoom16"); - for (c->Valid(); c->Next()) { - Render(c->blob(), c->feature_set()); - } - - - - -Note: `Render()` function is not part of RocksDB. You will need to use one of many open source map renderers, for example check out [Mapnik](http://mapnik.org/). - -TL;DR If you need an embedded spatial database, check out RocksDB's SpatialDB. [Let us know](https://www.facebook.com/groups/rocksdb.dev/) how we can make it better. - -If you're interested in learning more, check out this [talk](https://www.youtube.com/watch?v=T1jWsDMONM8). diff --git a/docs/_posts/2015-07-22-rocksdb-is-now-available-in-windows-platform.markdown b/docs/_posts/2015-07-22-rocksdb-is-now-available-in-windows-platform.markdown deleted file mode 100644 index b6bb47d53..000000000 --- a/docs/_posts/2015-07-22-rocksdb-is-now-available-in-windows-platform.markdown +++ /dev/null @@ -1,30 +0,0 @@ ---- -title: RocksDB is now available in Windows Platform -layout: post -author: dmitrism -category: blog -redirect_from: - - /blog/2033/rocksdb-is-now-available-in-windows-platform/ ---- - -Over the past 6 months we have seen a number of use cases where RocksDB is successfully used by the community and various companies to achieve high throughput and volume in a modern server environment. - -We at Microsoft Bing could not be left behind. As a result we are happy to [announce](http://bit.ly/1OmWBT9) the availability of the Windows Port created here at Microsoft which we intend to use as a storage option for one of our key/value data stores. - - - -We are happy to make this available for the community. Keep tuned for more announcements to come. - -### Comments - -**[Siying Dong](siying.d@fb.com)** - -Appreciate your contributions to RocksDB project! I believe it will benefits many users! - -**[empresas sevilla](oxofkx@gmail.com)** - -Magnifico artículo|, un placer leer el blog - -**[jak usunac](tomogedac@o2.pl)** - -I believe it will benefits too diff --git a/docs/_posts/2015-07-23-dynamic-level.markdown b/docs/_posts/2015-07-23-dynamic-level.markdown deleted file mode 100644 index 0ff3a0542..000000000 --- a/docs/_posts/2015-07-23-dynamic-level.markdown +++ /dev/null @@ -1,29 +0,0 @@ ---- -title: Dynamic Level Size for Level-Based Compaction -layout: post -author: sdong -category: blog -redirect_from: - - /blog/2207/dynamic-level/ ---- - -In this article, we follow up on the first part of an answer to one of the questions in our [AMA](https://www.reddit.com/r/IAmA/comments/3de3cv/we_are_rocksdb_engineering_team_ask_us_anything/ct4a8tb), the dynamic level size in level-based compaction. - - - -Level-based compaction is the original LevelDB compaction style and one of the two major compaction styles in RocksDB (See [our wiki](https://github.com/facebook/rocksdb/wiki/RocksDB-Basics#multi-threaded-compactions)). In RocksDB we introduced parallelism and more configurable options to it but the main algorithm stayed the same, until we recently introduced the dynamic level size mode. - - -In level-based compaction, we organize data to different sorted runs, called levels. Each level has a target size.  Usually target size of levels increases by the same size multiplier. For example, you can set target size of level 1 to be 1GB, and size multiplier to be 10, and the target size of level 1, 2, 3, 4 will be 1GB, 10GB, 100GB and 1000GB. Before level 1, there will be some staging file flushed from mem tables, called Level 0 files, which will later be merged to level 1. Compactions will be triggered as soon as actual size of a level exceeds its target size. We will merge a subset of data of that level to next level, to reduce size of the level. More compactions will be triggered until sizes of all the levels are lower than their target sizes. In a steady state, the size of each level will be around the same size of the size of level targets. - - -Level-based compaction’s advantage is its good space efficiency. We usually use the metric space amplification to measure the space efficiency. In this article ignore the effects of data compression so space amplification= size_on_file_system / size_of_user_data. - - -How do we estimate space amplification of level-based compaction? We focus specifically on the databases in steady state, which means database size is stable or grows slowly over time. This means updates will add roughly the same or little more data than what is removed by deletes. Given that, if we compact all the data all to the last level, the size of level will be equal as the size of last level before the compaction. On the other hand, the size of user data will be approximately the size of DB if we compact all the levels down to the last level. So the size of the last level will be a good estimation of user data size. So total size of the DB divided by the size of the last level will be a good estimation of space amplification. - - -Applying the equation, if we have four non-zero levels, their sizes are 1GB, 10GB, 100GB, 1000GB, the size amplification will be approximately (1000GB + 100GB + 10GB + 1GB) / 1000GB = 1.111, which is a very good number. However, there is a catch here: how to make sure the last level’s size is 1000GB, the same as the level’s size target? A user has to fine tune level sizes to achieve this number and will need to re-tune if DB size changes. The theoretic number 1.11 is hard to achieve in practice. In a worse case, if you have the target size of last level to be 1000GB but the user data is only 200GB, then the actual space amplification will be (200GB + 100GB + 10GB + 1GB) / 200GB = 1.555, a much worse number. - - -To solve this problem, my colleague Igor Kabiljo came up with a solution of dynamic level size target mode. You can enable it by setting options.level_compaction_dynamic_level_bytes=true. In this mode, size target of levels are changed dynamically based on size of the last level. Suppose the level size multiplier to be 10, and the DB size is 200GB. The target size of the last level is automatically set to be the actual size of the level, which is 200GB, the second to last level’s size target will be automatically set to be size_last_level / 10 = 20GB, the third last level’s will be size_last_level/100 = 2GB, and next level to be size_last_level/1000 = 200MB. We stop here because 200MB is within the range of the first level. In this way, we can achieve the 1.111 space amplification, without fine tuning of the level size targets. More details can be found in [code comments of the option](https://github.com/facebook/rocksdb/blob/v3.11/include/rocksdb/options.h#L366-L423) in the header file. diff --git a/docs/_posts/2015-10-27-getthreadlist.markdown b/docs/_posts/2015-10-27-getthreadlist.markdown deleted file mode 100644 index 92f743adc..000000000 --- a/docs/_posts/2015-10-27-getthreadlist.markdown +++ /dev/null @@ -1,193 +0,0 @@ ---- -title: GetThreadList -layout: post -author: yhciang -category: blog -redirect_from: - - /blog/2261/getthreadlist/ ---- - -We recently added a new API, called `GetThreadList()`, that exposes the RocksDB background thread activity. With this feature, developers will be able to obtain the real-time information about the currently running compactions and flushes such as the input / output size, elapsed time, the number of bytes it has written. Below is an example output of `GetThreadList`. To better illustrate the example, we have put a sample output of `GetThreadList` into a table where each column represents a thread status: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ThreadID -140716395198208 -140716416169728 -
DB -db1 -db2 -
CF -default -picachu -
ThreadType -High Pri -Low Pri -
Operation -Flush -Compaction -
ElapsedTime -143.459 ms -607.538 ms -
Stage -FlushJob::WriteLevel0Table -CompactionJob::Install -
OperationProperties - -BytesMemtables 4092938 -BytesWritten 1050701 - -BaseInputLevel 1 -BytesRead 4876417 -BytesWritten 4140109 -IsDeletion 0 -IsManual 0 -IsTrivialMove 0 -JobID 146 -OutputLevel 2 -TotalInputBytes 4883044 -
- -In the above output, we can see `GetThreadList()` reports the activity of two threads: one thread running flush job (middle column) and the other thread running a compaction job (right-most column). In each thread status, it shows basic information about the thread such as thread id, it's target db / column family, and the job it is currently doing and the current status of the job. For instance, we can see thread 140716416169728 is doing compaction on the `picachu` column family in database `db2`. In addition, we can see the compaction has been running for 600 ms, and it has read 4876417 bytes out of 4883044 bytes. This indicates the compaction is about to complete. The stage property indicates which code block the thread is currently executing. For instance, thread 140716416169728 is currently running `CompactionJob::Install`, which further indicates the compaction job is almost done. - -Below we briefly describe its API. - - -## How to Enable it? - - -To enable thread-tracking of a rocksdb instance, simply set `enable_thread_tracking` to true in its DBOptions: - -```c++ -// If true, then the status of the threads involved in this DB will -// be tracked and available via GetThreadList() API. -// -// Default: false -bool enable_thread_tracking; -``` - - - -## The API - - -The GetThreadList API is defined in [include/rocksdb/env.h](https://github.com/facebook/rocksdb/blob/main/include/rocksdb/env.h#L317-L318), which is an Env -function: - -```c++ -virtual Status GetThreadList(std::vector* thread_list) -``` - -Since an Env can be shared across multiple rocksdb instances, the output of -`GetThreadList()` include the background activity of all the rocksdb instances -that using the same Env. - -The `GetThreadList()` API simply returns a vector of `ThreadStatus`, each describes -the current status of a thread. The `ThreadStatus` structure, defined in -[include/rocksdb/thread_status.h](https://github.com/facebook/rocksdb/blob/main/include/rocksdb/thread_status.h), contains the following information: - -```c++ -// An unique ID for the thread. -const uint64_t thread_id; - -// The type of the thread, it could be HIGH_PRIORITY, -// LOW_PRIORITY, and USER -const ThreadType thread_type; - -// The name of the DB instance where the thread is currently -// involved with. It would be set to empty string if the thread -// does not involve in any DB operation. -const std::string db_name; - -// The name of the column family where the thread is currently -// It would be set to empty string if the thread does not involve -// in any column family. -const std::string cf_name; - -// The operation (high-level action) that the current thread is involved. -const OperationType operation_type; - -// The elapsed time in micros of the current thread operation. -const uint64_t op_elapsed_micros; - -// An integer showing the current stage where the thread is involved -// in the current operation. -const OperationStage operation_stage; - -// A list of properties that describe some details about the current -// operation. Same field in op_properties[] might have different -// meanings for different operations. -uint64_t op_properties[kNumOperationProperties]; - -// The state (lower-level action) that the current thread is involved. -const StateType state_type; -``` - -If you are interested in the background thread activity of your RocksDB application, please feel free to give `GetThreadList()` a try :) diff --git a/docs/_posts/2015-11-10-use-checkpoints-for-efficient-snapshots.markdown b/docs/_posts/2015-11-10-use-checkpoints-for-efficient-snapshots.markdown deleted file mode 100644 index 6852b8ffa..000000000 --- a/docs/_posts/2015-11-10-use-checkpoints-for-efficient-snapshots.markdown +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Use Checkpoints for Efficient Snapshots -layout: post -author: rven2 -category: blog -redirect_from: - - /blog/2609/use-checkpoints-for-efficient-snapshots/ ---- - -**Checkpoint** is a feature in RocksDB which provides the ability to take a snapshot of a running RocksDB database in a separate directory. Checkpoints can be used as a point in time snapshot, which can be opened Read-only to query rows as of the point in time or as a Writeable snapshot by opening it Read-Write. Checkpoints can be used for both full and incremental backups. - - - - -The Checkpoint feature enables RocksDB to create a consistent snapshot of a given RocksDB database in the specified directory. If the snapshot is on the same filesystem as the original database, the SST files will be hard-linked, otherwise SST files will be copied. The manifest and CURRENT files will be copied. In addition, if there are multiple column families, log files will be copied for the period covering the start and end of the checkpoint, in order to provide a consistent snapshot across column families. - - - - -A Checkpoint object needs to be created for a database before checkpoints are created. The API is as follows: - - - - -`Status Create(DB* db, Checkpoint** checkpoint_ptr);` - - - - -Given a checkpoint object and a directory, the CreateCheckpoint function creates a consistent snapshot of the database in the given directory. - - - - -`Status CreateCheckpoint(const std::string& checkpoint_dir);` - - - - -The directory should not already exist and will be created by this API. The directory will be an absolute path. The checkpoint can be used as a ​read-only copy of the DB or can be opened as a standalone DB. When opened read/write, the SST files continue to be hard links and these links are removed when the files are obsoleted. When the user is done with the snapshot, the user can delete the directory to remove the snapshot. - - - - -Checkpoints are used for online backup in ​MyRocks. which is MySQL using RocksDB as the storage engine . ([MySQL on RocksDB](https://github.com/facebook/mysql-5.6)) ​ diff --git a/docs/_posts/2015-11-16-analysis-file-read-latency-by-level.markdown b/docs/_posts/2015-11-16-analysis-file-read-latency-by-level.markdown deleted file mode 100644 index b21b04fe3..000000000 --- a/docs/_posts/2015-11-16-analysis-file-read-latency-by-level.markdown +++ /dev/null @@ -1,244 +0,0 @@ ---- -title: Analysis File Read Latency by Level -layout: post -author: sdong -category: blog -redirect_from: - - /blog/2537/analysis-file-read-latency-by-level/ ---- - -In many use cases of RocksDB, people rely on OS page cache for caching compressed data. With this approach, verifying effective of the OS page caching is challenging, because file system is a black box to users. - -As an example, a user can tune the DB as following: use level-based compaction, with L1 - L4 sizes to be 1GB, 10GB, 100GB and 1TB. And they reserve about 20GB memory as OS page cache, expecting level 0, 1 and 2 are mostly cached in memory, leaving only reads from level 3 and 4 requiring disk I/Os. However, in practice, it's not easy to verify whether OS page cache does exactly what we expect. For example, if we end up with doing 4 instead of 2 I/Os per query, it's not easy for users to figure out whether the it's because of efficiency of OS page cache or reading multiple blocks for a level. Analysis like it is especially important if users run RocksDB on hard drive disks, for the gap of latency between hard drives and memory is much higher than flash-based SSDs. - - - -In order to make tuning easier, we added new instrumentation to help users analysis latency distribution of file reads in different levels. If users turn DB statistics on, we always keep track of distribution of file read latency for each level. Users can retrieve the information by querying DB property “rocksdb.stats” ( [https://github.com/facebook/rocksdb/blob/v3.13.1/include/rocksdb/db.h#L315-L316](https://github.com/facebook/rocksdb/blob/v3.13.1/include/rocksdb/db.h#L315-L316) ). It will also printed out as a part of compaction summary in info logs periodically. - -The output looks like this: - - -``` -** Level 0 read latency histogram (micros): -Count: 696 Average: 489.8118 StdDev: 222.40 -Min: 3.0000 Median: 452.3077 Max: 1896.0000 -Percentiles: P50: 452.31 P75: 641.30 P99: 1068.00 P99.9: 1860.80 P99.99: 1896.00 ------------------------------------------------------- -[ 2, 3 ) 1 0.144% 0.144% -[ 18, 20 ) 1 0.144% 0.287% -[ 45, 50 ) 5 0.718% 1.006% -[ 50, 60 ) 26 3.736% 4.741% # -[ 60, 70 ) 6 0.862% 5.603% -[ 90, 100 ) 1 0.144% 5.747% -[ 120, 140 ) 2 0.287% 6.034% -[ 140, 160 ) 1 0.144% 6.178% -[ 160, 180 ) 1 0.144% 6.322% -[ 200, 250 ) 9 1.293% 7.615% -[ 250, 300 ) 45 6.466% 14.080% # -[ 300, 350 ) 88 12.644% 26.724% ### -[ 350, 400 ) 88 12.644% 39.368% ### -[ 400, 450 ) 71 10.201% 49.569% ## -[ 450, 500 ) 65 9.339% 58.908% ## -[ 500, 600 ) 74 10.632% 69.540% ## -[ 600, 700 ) 92 13.218% 82.759% ### -[ 700, 800 ) 64 9.195% 91.954% ## -[ 800, 900 ) 35 5.029% 96.983% # -[ 900, 1000 ) 12 1.724% 98.707% -[ 1000, 1200 ) 6 0.862% 99.569% -[ 1200, 1400 ) 2 0.287% 99.856% -[ 1800, 2000 ) 1 0.144% 100.000% - -** Level 1 read latency histogram (micros): -(......not pasted.....) - -** Level 2 read latency histogram (micros): -(......not pasted.....) - -** Level 3 read latency histogram (micros): -(......not pasted.....) - -** Level 4 read latency histogram (micros): -(......not pasted.....) - -** Level 5 read latency histogram (micros): -Count: 25583746 Average: 421.1326 StdDev: 385.11 -Min: 1.0000 Median: 376.0011 Max: 202444.0000 -Percentiles: P50: 376.00 P75: 438.00 P99: 1421.68 P99.9: 4164.43 P99.99: 9056.52 ------------------------------------------------------- -[ 0, 1 ) 2351 0.009% 0.009% -[ 1, 2 ) 6077 0.024% 0.033% -[ 2, 3 ) 8471 0.033% 0.066% -[ 3, 4 ) 788 0.003% 0.069% -[ 4, 5 ) 393 0.002% 0.071% -[ 5, 6 ) 786 0.003% 0.074% -[ 6, 7 ) 1709 0.007% 0.080% -[ 7, 8 ) 1769 0.007% 0.087% -[ 8, 9 ) 1573 0.006% 0.093% -[ 9, 10 ) 1495 0.006% 0.099% -[ 10, 12 ) 3043 0.012% 0.111% -[ 12, 14 ) 2259 0.009% 0.120% -[ 14, 16 ) 1233 0.005% 0.125% -[ 16, 18 ) 762 0.003% 0.128% -[ 18, 20 ) 451 0.002% 0.130% -[ 20, 25 ) 794 0.003% 0.133% -[ 25, 30 ) 1279 0.005% 0.138% -[ 30, 35 ) 1172 0.005% 0.142% -[ 35, 40 ) 1363 0.005% 0.148% -[ 40, 45 ) 409 0.002% 0.149% -[ 45, 50 ) 105 0.000% 0.150% -[ 50, 60 ) 80 0.000% 0.150% -[ 60, 70 ) 280 0.001% 0.151% -[ 70, 80 ) 1583 0.006% 0.157% -[ 80, 90 ) 4245 0.017% 0.174% -[ 90, 100 ) 6572 0.026% 0.200% -[ 100, 120 ) 9724 0.038% 0.238% -[ 120, 140 ) 3713 0.015% 0.252% -[ 140, 160 ) 2383 0.009% 0.261% -[ 160, 180 ) 18344 0.072% 0.333% -[ 180, 200 ) 51873 0.203% 0.536% -[ 200, 250 ) 631722 2.469% 3.005% -[ 250, 300 ) 2721970 10.639% 13.644% ## -[ 300, 350 ) 5909249 23.098% 36.742% ##### -[ 350, 400 ) 6522507 25.495% 62.237% ##### -[ 400, 450 ) 4296332 16.793% 79.030% ### -[ 450, 500 ) 2130323 8.327% 87.357% ## -[ 500, 600 ) 1553208 6.071% 93.428% # -[ 600, 700 ) 642129 2.510% 95.938% # -[ 700, 800 ) 372428 1.456% 97.394% -[ 800, 900 ) 187561 0.733% 98.127% -[ 900, 1000 ) 85858 0.336% 98.462% -[ 1000, 1200 ) 82730 0.323% 98.786% -[ 1200, 1400 ) 50691 0.198% 98.984% -[ 1400, 1600 ) 38026 0.149% 99.133% -[ 1600, 1800 ) 32991 0.129% 99.261% -[ 1800, 2000 ) 30200 0.118% 99.380% -[ 2000, 2500 ) 62195 0.243% 99.623% -[ 2500, 3000 ) 36684 0.143% 99.766% -[ 3000, 3500 ) 21317 0.083% 99.849% -[ 3500, 4000 ) 10216 0.040% 99.889% -[ 4000, 4500 ) 8351 0.033% 99.922% -[ 4500, 5000 ) 4152 0.016% 99.938% -[ 5000, 6000 ) 6328 0.025% 99.963% -[ 6000, 7000 ) 3253 0.013% 99.976% -[ 7000, 8000 ) 2082 0.008% 99.984% -[ 8000, 9000 ) 1546 0.006% 99.990% -[ 9000, 10000 ) 1055 0.004% 99.994% -[ 10000, 12000 ) 1566 0.006% 100.000% -[ 12000, 14000 ) 761 0.003% 100.003% -[ 14000, 16000 ) 462 0.002% 100.005% -[ 16000, 18000 ) 226 0.001% 100.006% -[ 18000, 20000 ) 126 0.000% 100.006% -[ 20000, 25000 ) 107 0.000% 100.007% -[ 25000, 30000 ) 43 0.000% 100.007% -[ 30000, 35000 ) 15 0.000% 100.007% -[ 35000, 40000 ) 14 0.000% 100.007% -[ 40000, 45000 ) 16 0.000% 100.007% -[ 45000, 50000 ) 1 0.000% 100.007% -[ 50000, 60000 ) 22 0.000% 100.007% -[ 60000, 70000 ) 10 0.000% 100.007% -[ 70000, 80000 ) 5 0.000% 100.007% -[ 80000, 90000 ) 14 0.000% 100.007% -[ 90000, 100000 ) 11 0.000% 100.007% -[ 100000, 120000 ) 33 0.000% 100.007% -[ 120000, 140000 ) 6 0.000% 100.007% -[ 140000, 160000 ) 3 0.000% 100.007% -[ 160000, 180000 ) 7 0.000% 100.007% -[ 200000, 250000 ) 2 0.000% 100.007% -``` - - -In this example, you can see we only issued 696 reads from level 0 while issued 25 million reads from level 5. The latency distribution is also clearly shown among those reads. This will be helpful for users to analysis OS page cache efficiency. - -Currently the read latency per level includes reads from data blocks, index blocks, as well as bloom filter blocks. We are also working on a feature to break down those three type of blocks. - -### Comments - -**[Tao Feng](fengtao04@gmail.com)** - -Is this feature also included in RocksJava? - -**[Siying Dong](siying.d@fb.com)** - -Should be. As long as you enable statistics, you should be able to get the value from `RocksDB.getProperty()` with property `rocksdb.dbstats`. Let me know if you can’t find it. - -**[chiddu](cnbscience@gmail.com)** - -> In this example, you can see we only issued 696 reads from level 0 while issued 256K reads from level 5. - -Isn’t it 2.5 M of reads instead of 256K ? . - -Also could anyone please provide more description on the histogram ? especially - -> Count: 25583746 Average: 421.1326 StdDev: 385.11 -> Min: 1.0000 Median: 376.0011 Max: 202444.0000 -> Percentiles: P50: 376.00 P75: 438.00 P99: 1421.68 P99.9: 4164.43 P99.99: 9056.52 - -and - -> [ 0, 1 ) 2351 0.009% 0.009% -> [ 1, 2 ) 6077 0.024% 0.033% -> [ 2, 3 ) 8471 0.033% 0.066% -> [ 3, 4 ) 788 0.003% 0.069%” - -thanks in advance - -**[Siying Dong](siying.d@fb.com)** - -Thank you for pointing out the mistake. I fixed it now. - -In this output, there are 2.5 million samples, average latency is 421 micro seconds, with standard deviation 385. Median is 376, max value is 202 milliseconds. 0.009% has value of 1, 0.024% has value of 1, 0.033% has value of 2. Accumulated value from 0 to 2 is 0.066%. - -Hope it helps. - -**[chiddu](cnbscience@gmail.com)** - -Thank you Siying for the quick reply, I was running couple of benchmark testing to check the performance of rocksdb on SSD. One of the test is similar to what is mentioned in the wiki, TEST 4 : Random read , except the key_size is 10 and value_size is 20. I am inserting 1 billion hashes and reading 1 billion hashes with 32 threads. The histogram shows something like this - -``` -Level 5 read latency histogram (micros): -Count: 7133903059 Average: 480.4357 StdDev: 309.18 -Min: 0.0000 Median: 551.1491 Max: 224142.0000 -Percentiles: P50: 551.15 P75: 651.44 P99: 996.52 P99.9: 2073.07 P99.99: 3196.32 -—————————————————— -[ 0, 1 ) 28587385 0.401% 0.401% -[ 1, 2 ) 686572516 9.624% 10.025% ## -[ 2, 3 ) 567317522 7.952% 17.977% ## -[ 3, 4 ) 44979472 0.631% 18.608% -[ 4, 5 ) 50379685 0.706% 19.314% -[ 5, 6 ) 64930061 0.910% 20.224% -[ 6, 7 ) 22613561 0.317% 20.541% -…………more…………. -``` - -If I understand your previous comment correctly, - -1. How is it that the count is around 7 billion when I have only inserted 1 billion hashes ? is the stat broken ? -1. What does the percentiles and the numbers signify ? -1. 0, 1 ) 28587385 0.401% 0.401% what does this “28587385” stand for in the histogram row ? - -**[Siying Dong](siying.d@fb.com)** - -If I remember correctly, with db_bench, if you specify –num=1000000000 –threads=32, it is every thread reading one billion keys, total of 32 billions. Is it the case you ran into? - -28,587,385 means that number of data points take the value [0,1) -28,587,385 / 7,133,903,058 = 0.401% provides percentage. - -**[chiddu](cnbscience@gmail.com)** - -I do have `num=1000000000` and `t=32`. The script says reading 1 billion hashes and not 32 billion hashes. - -this is the script on which I have used - -``` -echo “Load 1B keys sequentially into database…..” -bpl=10485760;overlap=10;mcz=2;del=300000000;levels=6;ctrig=4; delay=8; stop=12; wbn=3; mbc=20; mb=67108864;wbs=134217728; dds=1; sync=0; r=1000000000; t=1; vs=20; bs=4096; cs=1048576; of=500000; si=1000000; ./db_bench –benchmarks=fillseq –disable_seek_compaction=1 –mmap_read=0 –statistics=1 –histogram=1 –num=$r –threads=$t –value_size=$vs –block_size=$bs –cache_size=$cs –bloom_bits=10 –cache_numshardbits=6 –open_files=$of –verify_checksum=1 –db=/data/mysql/leveldb/test –sync=$sync –disable_wal=1 –compression_type=none –stats_interval=$si –compression_ratio=0.5 –disable_data_sync=$dds –write_buffer_size=$wbs –target_file_size_base=$mb –max_write_buffer_number=$wbn –max_background_compactions=$mbc –level0_file_num_compaction_trigger=$ctrig –level0_slowdown_writes_trigger=$delay –level0_stop_writes_trigger=$stop –num_levels=$levels –delete_obsolete_files_period_micros=$del –min_level_to_compress=$mcz –max_grandparent_overlap_factor=$overlap –stats_per_interval=1 –max_bytes_for_level_base=$bpl –use_existing_db=0 –key_size=10 - -echo “Reading 1B keys in database in random order….” -bpl=10485760;overlap=10;mcz=2;del=300000000;levels=6;ctrig=4; delay=8; stop=12; wbn=3; mbc=20; mb=67108864;wbs=134217728; dds=0; sync=0; r=1000000000; t=32; vs=20; bs=4096; cs=1048576; of=500000; si=1000000; ./db_bench –benchmarks=readrandom –disable_seek_compaction=1 –mmap_read=0 –statistics=1 –histogram=1 –num=$r –threads=$t –value_size=$vs –block_size=$bs –cache_size=$cs –bloom_bits=10 –cache_numshardbits=6 –open_files=$of –verify_checksum=1 –db=/some_data_base –sync=$sync –disable_wal=1 –compression_type=none –stats_interval=$si –compression_ratio=0.5 –disable_data_sync=$dds –write_buffer_size=$wbs –target_file_size_base=$mb –max_write_buffer_number=$wbn –max_background_compactions=$mbc –level0_file_num_compaction_trigger=$ctrig –level0_slowdown_writes_trigger=$delay –level0_stop_writes_trigger=$stop –num_levels=$levels –delete_obsolete_files_period_micros=$del –min_level_to_compress=$mcz –max_grandparent_overlap_factor=$overlap –stats_per_interval=1 –max_bytes_for_level_base=$bpl –use_existing_db=1 –key_size=10 -``` - -After running this script, there were no issues wrt to loading billion hashes , but when it came to reading part, its been almost 4 days and still I have only read 7 billion hashes and have read 200 million hashes in 2 and half days. Is there something which is missing in db_bench or something which I am missing ? - -**[Siying Dong](siying.d@fb.com)** - -It’s a printing error then. If you have `num=1000000000` and `t=32`, it will be 32 threads, and each reads 1 billion keys. diff --git a/docs/_posts/2016-01-29-compaction_pri.markdown b/docs/_posts/2016-01-29-compaction_pri.markdown deleted file mode 100644 index ba9ee627c..000000000 --- a/docs/_posts/2016-01-29-compaction_pri.markdown +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: Option of Compaction Priority -layout: post -author: sdong -category: blog -redirect_from: - - /blog/2921/compaction_pri/ ---- - -The most popular compaction style of RocksDB is level-based compaction, which is an improved version of LevelDB's compaction algorithm. Page 9- 16 of this [slides](https://github.com/facebook/rocksdb/blob/gh-pages/talks/2015-09-29-HPTS-Siying-RocksDB.pdf) gives an illustrated introduction of this compaction style. The basic idea that: data is organized by multiple levels with exponential increasing target size. Except a special level 0, every level is key-range partitioned into many files. When size of a level exceeds its target size, we pick one or more of its files, and merge the file into the next level. - - - -Which file to pick to compact is an interesting question. LevelDB only uses one thread for compaction and it always picks files in round robin manner. We implemented multi-thread compaction in RocksDB by picking multiple files from the same level and compact them in parallel. We had to move away from LevelDB's file picking approach. Recently, we created an option [options.compaction_pri](https://github.com/facebook/rocksdb/blob/d6c838f1e130d8860407bc771fa6d4ac238859ba/include/rocksdb/options.h#L83-L93), which indicated three different algorithms to pick files to compact. - -Why do we need to multiple algorithms to choose from? Because there are different factors to consider when picking the files, and we now don't yet know how to balance them automatically, so we expose it to users to choose. Here are factors to consider: - -**Write amplification** - -When we estimate write amplification, we usually simplify the problem by assuming keys are uniformly distributed inside each level. In reality, it is not the case, even if user updates are uniformly distributed across the whole key range. For instance, when we compact one file of a level to the next level, it creates a hole. Over time, incoming compaction will fill data to the hole, but the density will still be lower for a while. Picking a file with keys least densely populated is more expensive to get the file to the next level, because there will be more overlapping files in the next level so we need to rewrite more data. For example, assume a file is 100MB, if an L2 file overlaps with 8 L3 files, we need to rewrite about 800MB of data to get the file to L3. If the file overlaps with 12 L3 files, we'll need to rewrite about 1200MB to get a file of the same size out of L2. It uses 50% more writes. (This analysis ignores the key density of the next level, because the range covers N times of files in that level so one hole only impacts write amplification by 1/N) - -If all the updates are uniformly distributed, LevelDB's approach optimizes write amplification, because a file being picked covers a range whose last compaction time to the next level is the oldest, so the range will accumulated keys from incoming compactions for the longest and the density is the highest. - -We created a compaction priority **kOldestSmallestSeqFirst** for the same effect. With this mode, we always pick the file covers the oldest updates in the level, which usually is contains the densest key range. If you have a use case where writes are uniformly distributed across the key space and you want to reduce write amplification, you should set options.compaction_pri=kOldestSmallestSeqFirst. - -**Optimize for small working set** - -We are assuming updates are uniformly distributed across the whole key space in previous analysis. However, in many use cases, there are subset of keys that are frequently updated while other key ranges are very cold. In this case, keeping hot key ranges from compacting to deeper levels will benefit write amplification, as well as space amplification. For example, if in a DB only key 150-160 are updated and other keys are seldom updated. If level 1 contains 20 keys, we want to keep 150-160 all stay in level 1. Because when next level 0 -> 1 compaction comes, it will simply overwrite existing keys so size level 1 doesn't increase, so no need to schedule further compaction for level 1->2. On the other hand, if we compact key 150-155 to level2, when a new Level 1->2 compaction comes, it increases the size of level 1, making size of level 1 exceed target size and more compactions will be needed, which generates more writes. - -The compaction priority **kOldestLargestSeqFirst** optimizes this use case. In this mode, we will pick a file whose latest update is the oldest. It means there is no incoming data for the range for the longest. Usually it is the coldest range. By compacting coldest range first, we leave the hot ranges in the level. If your use case is to overwrite existing keys in a small range, try options.compaction_pri=kOldestLargestSeqFirst**.** - -**Drop delete marker sooner** - -If one file contains a lot of delete markers, it may slow down iterating over this area, because we still need to iterate those deleted keys just to ignore them. Furthermore, the sooner we compact delete keys into the last level, the sooner the disk space is reclaimed, so it is good for space efficiency. - -Our default compaction priority **kByCompensatedSize** considers the case. If number of deletes in a file exceeds number of inserts, it is more likely to be picked for compaction. The more number of deletes exceed inserts, the more likely it is being compacted. The optimization is added to avoid the worst performance of space efficiency and query performance when a large percentage of the DB is deleted. - -**Efficiency of compaction filter** - -Usually people use [compaction filters](https://github.com/facebook/rocksdb/blob/v4.1/include/rocksdb/options.h#L201-L226) to clean up old data to free up space. Picking files to compact may impact space efficiency. We don't yet have a a compaction priority to optimize this case. In some of our use cases, we solved the problem in a different way: we have an external service checking modify time of all SST files. If any of the files is too old, we force the single file to compaction by calling DB::CompactFiles() using the single file. In this way, we can provide a time bound of data passing through compaction filters. - - -In all, there three choices of compaction priority modes optimizing different scenarios. if you have a new use case, we suggest you start with `options.compaction_pri=kOldestSmallestSeqFirst` (note it is not the default one for backward compatible reason). If you want to further optimize your use case, you can try other two use cases if your use cases apply. - -If you have good ideas about better compaction picker approach, you are welcome to implement and benchmark it. We'll be glad to review and merge your a pull requests. - -### Comments - -**[Mark Callaghan](mdcallag@gmail.com)** - -Performance results for compaction_pri values and linkbench are explained at [http://smalldatum.blogspot.com/2016/02/compaction-priority-in-rocksdb.html](http://smalldatum.blogspot.com/2016/02/compaction-priority-in-rocksdb.html) diff --git a/docs/_posts/2016-02-24-rocksdb-4-2-release.markdown b/docs/_posts/2016-02-24-rocksdb-4-2-release.markdown deleted file mode 100644 index 409015cc8..000000000 --- a/docs/_posts/2016-02-24-rocksdb-4-2-release.markdown +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: RocksDB 4.2 Release! -layout: post -author: sdong -category: blog -redirect_from: - - /blog/3017/rocksdb-4-2-release/ ---- - -New RocksDB release - 4.2! - - -**New Features** - - 1. Introduce CreateLoggerFromOptions(), this function create a Logger for provided DBOptions. - - - 2. Add GetAggregatedIntProperty(), which returns the sum of the GetIntProperty of all the column families. - - - 3. Add MemoryUtil in rocksdb/utilities/memory.h. It currently offers a way to get the memory usage by type from a list rocksdb instances. - - - - - -**Public API changes** - - 1. CompactionFilter::Context includes information of Column Family ID - - - 2. The need-compaction hint given by TablePropertiesCollector::NeedCompact() will be persistent and recoverable after DB recovery. This introduces a breaking format change. If you use this experimental feature, including NewCompactOnDeletionCollectorFactory() in the new version, you may not be able to directly downgrade the DB back to version 4.0 or lower. - - - 3. TablePropertiesCollectorFactory::CreateTablePropertiesCollector() now takes an option Context, containing the information of column family ID for the file being written. - - - 4. Remove DefaultCompactionFilterFactory. - - -[https://github.com/facebook/rocksdb/releases/tag/v4.2](https://github.com/facebook/rocksdb/releases/tag/v4.2) diff --git a/docs/_posts/2016-02-25-rocksdb-ama.markdown b/docs/_posts/2016-02-25-rocksdb-ama.markdown deleted file mode 100644 index 2ba04f39a..000000000 --- a/docs/_posts/2016-02-25-rocksdb-ama.markdown +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: RocksDB AMA -layout: post -author: yhchiang -category: blog -redirect_from: - - /blog/3065/rocksdb-ama/ ---- - -RocksDB developers are doing a Reddit Ask-Me-Anything now at 10AM – 11AM PDT! We welcome you to stop by and ask any RocksDB related questions, including existing / upcoming features, tuning tips, or database design. - -Here are some enhancements that we'd like to focus on over the next six months: - -* 2-Phase Commit -* Lua support in some custom functions -* Backup and repair tools -* Direct I/O to bypass OS cache -* RocksDB Java API - -[https://www.reddit.com/r/IAmA/comments/47k1si/we_are_rocksdb_developers_ask_us_anything/](https://www.reddit.com/r/IAmA/comments/47k1si/we_are_rocksdb_developers_ask_us_anything/) diff --git a/docs/_posts/2016-03-07-rocksdb-options-file.markdown b/docs/_posts/2016-03-07-rocksdb-options-file.markdown deleted file mode 100644 index 703449b01..000000000 --- a/docs/_posts/2016-03-07-rocksdb-options-file.markdown +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: RocksDB Options File -layout: post -author: yhciang -category: blog -redirect_from: - - /blog/3089/rocksdb-options-file/ ---- - -In RocksDB 4.3, we added a new set of features that makes managing RocksDB options easier. Specifically: - - * **Persisting Options Automatically**: Each RocksDB database will now automatically persist its current set of options into an INI file on every successful call of DB::Open(), SetOptions(), and CreateColumnFamily() / DropColumnFamily(). - - - - * **Load Options from File**: We added [LoadLatestOptions() / LoadOptionsFromFile()](https://github.com/facebook/rocksdb/blob/4.3.fb/include/rocksdb/utilities/options_util.h#L48-L58) that enables developers to construct RocksDB options object from an options file. - - - - * **Sanity Check Options**: We added [CheckOptionsCompatibility](https://github.com/facebook/rocksdb/blob/4.3.fb/include/rocksdb/utilities/options_util.h#L64-L77) that performs compatibility check on two sets of RocksDB options. - - - -Want to know more about how to use this new features? Check out the [RocksDB Options File wiki page](https://github.com/facebook/rocksdb/wiki/RocksDB-Options-File) and start using this new feature today! diff --git a/docs/_posts/2016-04-26-rocksdb-4-5-1-released.markdown b/docs/_posts/2016-04-26-rocksdb-4-5-1-released.markdown deleted file mode 100644 index 247768d30..000000000 --- a/docs/_posts/2016-04-26-rocksdb-4-5-1-released.markdown +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: RocksDB 4.5.1 Released! -layout: post -author: sdong -category: blog -redirect_from: - - /blog/3179/rocksdb-4-5-1-released/ ---- - -## 4.5.1 (3/25/2016) - -### Bug Fixes - - *  Fix failures caused by the destorying order of singleton objects. - -
- -## 4.5.0 (2/5/2016) - -### Public API Changes - - * Add a new perf context level between kEnableCount and kEnableTime. Level 2 now does not include timers for mutexes. - * Statistics of mutex operation durations will not be measured by default. If you want to have them enabled, you need to set Statistics::stats_level_ to kAll. - * DBOptions::delete_scheduler and NewDeleteScheduler() are removed, please use DBOptions::sst_file_manager and NewSstFileManager() instead - -### New Features - * ldb tool now supports operations to non-default column families. - * Add kPersistedTier to ReadTier. This option allows Get and MultiGet to read only the persited data and skip mem-tables if writes were done with disableWAL = true. - * Add DBOptions::sst_file_manager. Use NewSstFileManager() in include/rocksdb/sst_file_manager.h to create a SstFileManager that can be used to track the total size of SST files and control the SST files deletion rate. - -
- - - -## 4.4.0 (1/14/2016) - -### Public API Changes - - * Change names in CompactionPri and add a new one. - * Deprecate options.soft_rate_limit and add options.soft_pending_compaction_bytes_limit. - * If options.max_write_buffer_number > 3, writes will be slowed down when writing to the last write buffer to delay a full stop. - * Introduce CompactionJobInfo::compaction_reason, this field include the reason to trigger the compaction. - * After slow down is triggered, if estimated pending compaction bytes keep increasing, slowdown more. - * Increase default options.delayed_write_rate to 2MB/s. - * Added a new parameter --path to ldb tool. --path accepts the name of either MANIFEST, SST or a WAL file. Either --db or --path can be used when calling ldb. - -
- -## 4.3.0 (12/8/2015) - -### New Features - - * CompactionFilter has new member function called IgnoreSnapshots which allows CompactionFilter to be called even if there are snapshots later than the key. - * RocksDB will now persist options under the same directory as the RocksDB database on successful DB::Open, CreateColumnFamily, DropColumnFamily, and SetOptions. - * Introduce LoadLatestOptions() in rocksdb/utilities/options_util.h. This function can construct the latest DBOptions / ColumnFamilyOptions used by the specified RocksDB intance. - * Introduce CheckOptionsCompatibility() in rocksdb/utilities/options_util.h. This function checks whether the input set of options is able to open the specified DB successfully. - -### Public API Changes - - * When options.db_write_buffer_size triggers, only the column family with the largest column family size will be flushed, not all the column families. diff --git a/docs/_posts/2016-07-26-rocksdb-4-8-released.markdown b/docs/_posts/2016-07-26-rocksdb-4-8-released.markdown deleted file mode 100644 index 0db275ddf..000000000 --- a/docs/_posts/2016-07-26-rocksdb-4-8-released.markdown +++ /dev/null @@ -1,48 +0,0 @@ ---- -title: RocksDB 4.8 Released! -layout: post -author: yiwu -category: blog -redirect_from: - - /blog/3239/rocksdb-4-8-released/ ---- - -## 4.8.0 (5/2/2016) - -### [](https://github.com/facebook/rocksdb/blob/main/HISTORY.md#public-api-change-1)Public API Change - - * Allow preset compression dictionary for improved compression of block-based tables. This is supported for zlib, zstd, and lz4. The compression dictionary's size is configurable via CompressionOptions::max_dict_bytes. - * Delete deprecated classes for creating backups (BackupableDB) and restoring from backups (RestoreBackupableDB). Now, BackupEngine should be used for creating backups, and BackupEngineReadOnly should be used for restorations. For more details, see [https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB%3F](https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB%3F) - * Expose estimate of per-level compression ratio via DB property: "rocksdb.compression-ratio-at-levelN". - * Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status. - -### [](https://github.com/facebook/rocksdb/blob/main/HISTORY.md#new-features-2)New Features - - * Add ReadOptions::readahead_size. If non-zero, NewIterator will create a new table reader which performs reads of the given size. - -
- - - -## [](https://github.com/facebook/rocksdb/blob/main/HISTORY.md#470-482016)4.7.0 (4/8/2016) - -### [](https://github.com/facebook/rocksdb/blob/main/HISTORY.md#public-api-change-2)Public API Change - - * rename options compaction_measure_io_stats to report_bg_io_stats and include flush too. - * Change some default options. Now default options will optimize for server-workloads. Also enable slowdown and full stop triggers for pending compaction bytes. These changes may cause sub-optimal performance or significant increase of resource usage. To avoid these risks, users can open existing RocksDB with options extracted from RocksDB option files. See [https://github.com/facebook/rocksdb/wiki/RocksDB-Options-File](https://github.com/facebook/rocksdb/wiki/RocksDB-Options-File) for how to use RocksDB option files. Or you can call Options.OldDefaults() to recover old defaults. DEFAULT_OPTIONS_HISTORY.md will track change history of default options. - -
- -## [](https://github.com/facebook/rocksdb/blob/main/HISTORY.md#460-3102016)4.6.0 (3/10/2016) - -### [](https://github.com/facebook/rocksdb/blob/main/HISTORY.md#public-api-changes-1)Public API Changes - - * Change default of BlockBasedTableOptions.format_version to 2. It means default DB created by 4.6 or up cannot be opened by RocksDB version 3.9 or earlier - * Added strict_capacity_limit option to NewLRUCache. If the flag is set to true, insert to cache will fail if no enough capacity can be free. Signature of Cache::Insert() is updated accordingly. - * Tickers [NUMBER_DB_NEXT, NUMBER_DB_PREV, NUMBER_DB_NEXT_FOUND, NUMBER_DB_PREV_FOUND, ITER_BYTES_READ] are not updated immediately. The are updated when the Iterator is deleted. - * Add monotonically increasing counter (DB property "rocksdb.current-super-version-number") that increments upon any change to the LSM tree. - -### [](https://github.com/facebook/rocksdb/blob/main/HISTORY.md#new-features-3)New Features - - * Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification. - * Deprecate Iterator::IsKeyPinned() and replace it with Iterator::GetProperty() with prop_name="rocksdb.iterator.is.key.pinned" diff --git a/docs/_posts/2016-09-28-rocksdb-4-11-2-released.markdown b/docs/_posts/2016-09-28-rocksdb-4-11-2-released.markdown deleted file mode 100644 index 87c20eb47..000000000 --- a/docs/_posts/2016-09-28-rocksdb-4-11-2-released.markdown +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: RocksDB 4.11.2 Released! -layout: post -author: sdong -category: blog ---- -We abandoned release candidates 4.10.x and directly go to 4.11.2 from 4.9, to make sure the latest release is stable. In 4.11.2, we fixed several data corruption related bugs introduced in 4.9.0. - -## 4.11.2 (9/15/2016) - -### Bug fixes - - * Segfault when failing to open an SST file for read-ahead iterators. - * WAL without data for all CFs is not deleted after recovery. - - - -## 4.11.1 (8/30/2016) - -### Bug Fixes - - * Mitigate the regression bug of deadlock condition during recovery when options.max_successive_merges hits. - * Fix data race condition related to hash index in block based table when putting indexes in the block cache. - -## 4.11.0 (8/1/2016) - -### Public API Change - - * options.memtable_prefix_bloom_huge_page_tlb_size => memtable_huge_page_size. When it is set, RocksDB will try to allocate memory from huge page for memtable too, rather than just memtable bloom filter. - -### New Features - - * A tool to migrate DB after options change. See include/rocksdb/utilities/option_change_migration.h. - * Add ReadOptions.background_purge_on_iterator_cleanup. If true, we avoid file deletion when destorying iterators. - -## 4.10.0 (7/5/2016) - -### Public API Change - - * options.memtable_prefix_bloom_bits changes to options.memtable_prefix_bloom_bits_ratio and deprecate options.memtable_prefix_bloom_probes - * enum type CompressionType and PerfLevel changes from char to unsigned char. Value of all PerfLevel shift by one. - * Deprecate options.filter_deletes. - -### New Features - - * Add avoid_flush_during_recovery option. - * Add a read option background_purge_on_iterator_cleanup to avoid deleting files in foreground when destroying iterators. Instead, a job is scheduled in high priority queue and would be executed in a separate background thread. - * RepairDB support for column families. RepairDB now associates data with non-default column families using information embedded in the SST/WAL files (4.7 or later). For data written by 4.6 or earlier, RepairDB associates it with the default column family. - * Add options.write_buffer_manager which allows users to control total memtable sizes across multiple DB instances. diff --git a/docs/_posts/2017-01-06-rocksdb-5-0-1-released.markdown b/docs/_posts/2017-01-06-rocksdb-5-0-1-released.markdown deleted file mode 100644 index fb0413055..000000000 --- a/docs/_posts/2017-01-06-rocksdb-5-0-1-released.markdown +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: RocksDB 5.0.1 Released! -layout: post -author: yiwu -category: blog ---- - -### Public API Change - - * Options::max_bytes_for_level_multiplier is now a double along with all getters and setters. - * Support dynamically change `delayed_write_rate` and `max_total_wal_size` options via SetDBOptions(). - * Introduce DB::DeleteRange for optimized deletion of large ranges of contiguous keys. - * Support dynamically change `delayed_write_rate` option via SetDBOptions(). - * Options::allow_concurrent_memtable_write and Options::enable_write_thread_adaptive_yield are now true by default. - * Remove Tickers::SEQUENCE_NUMBER to avoid confusion if statistics object is shared among RocksDB instance. Alternatively DB::GetLatestSequenceNumber() can be used to get the same value. - * Options.level0_stop_writes_trigger default value changes from 24 to 32. - * New compaction filter API: CompactionFilter::FilterV2(). Allows to drop ranges of keys. - * Removed flashcache support. - * DB::AddFile() is deprecated and is replaced with DB::IngestExternalFile(). DB::IngestExternalFile() remove all the restrictions that existed for DB::AddFile. - -### New Features - - * Add avoid_flush_during_shutdown option, which speeds up DB shutdown by not flushing unpersisted data (i.e. with disableWAL = true). Unpersisted data will be lost. The options is dynamically changeable via SetDBOptions(). - * Add memtable_insert_with_hint_prefix_extractor option. The option is mean to reduce CPU usage for inserting keys into memtable, if keys can be group by prefix and insert for each prefix are sequential or almost sequential. See include/rocksdb/options.h for more details. - * Add LuaCompactionFilter in utilities. This allows developers to write compaction filters in Lua. To use this feature, LUA_PATH needs to be set to the root directory of Lua. - * No longer populate "LATEST_BACKUP" file in backup directory, which formerly contained the number of the latest backup. The latest backup can be determined by finding the highest numbered file in the "meta/" subdirectory. diff --git a/docs/_posts/2017-02-07-rocksdb-5-1-2-released.markdown b/docs/_posts/2017-02-07-rocksdb-5-1-2-released.markdown deleted file mode 100644 index 35bafb219..000000000 --- a/docs/_posts/2017-02-07-rocksdb-5-1-2-released.markdown +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: RocksDB 5.1.2 Released! -layout: post -author: maysamyabandeh -category: blog ---- - -### Public API Change -* Support dynamically change `delete_obsolete_files_period_micros` option via SetDBOptions(). -* Added EventListener::OnExternalFileIngested which will be called when IngestExternalFile() add a file successfully. -* BackupEngine::Open and BackupEngineReadOnly::Open now always return error statuses matching those of the backup Env. - -### Bug Fixes -* Fix the bug that if 2PC is enabled, checkpoints may loss some recent transactions. -* When file copying is needed when creating checkpoints or bulk loading files, fsync the file after the file copying. diff --git a/docs/_posts/2017-02-17-bulkoad-ingest-sst-file.markdown b/docs/_posts/2017-02-17-bulkoad-ingest-sst-file.markdown deleted file mode 100644 index 9a43a846a..000000000 --- a/docs/_posts/2017-02-17-bulkoad-ingest-sst-file.markdown +++ /dev/null @@ -1,50 +0,0 @@ ---- -title: Bulkloading by ingesting external SST files -layout: post -author: IslamAbdelRahman -category: blog ---- - -## Introduction - -One of the basic operations of RocksDB is writing to RocksDB, Writes happen when user call (DB::Put, DB::Write, DB::Delete ... ), but what happens when you write to RocksDB ? .. this is a brief description of what happens. -- User insert a new key/value by calling DB::Put() (or DB::Write()) -- We create a new entry for the new key/value in our in-memory structure (memtable / SkipList by default) and we assign it a new sequence number. -- When the memtable exceeds a specific size (64 MB for example), we convert this memtable to a SST file, and put this file in level 0 of our LSM-Tree -- Later, compaction will kick in and move data from level 0 to level 1, and then from level 1 to level 2 .. and so on - -But what if we can skip these steps and add data to the lowest possible level directly ? This is what bulk-loading does - -## Bulkloading - -- Write all of our keys and values into SST file outside of the DB -- Add the SST file into the LSM directly - -This is bulk-loading, and in specific use-cases it allow users to achieve faster data loading and better write-amplification. - -and doing it is as simple as -```cpp -Options options; -SstFileWriter sst_file_writer(EnvOptions(), options, options.comparator); -Status s = sst_file_writer.Open(file_path); -assert(s.ok()); - -// Insert rows into the SST file, note that inserted keys must be -// strictly increasing (based on options.comparator) -for (...) { - s = sst_file_writer.Add(key, value); - assert(s.ok()); -} - -// Ingest the external SST file into the DB -s = db_->IngestExternalFile({"/home/usr/file1.sst"}, IngestExternalFileOptions()); -assert(s.ok()); -``` - -You can find more details about how to generate SST files and ingesting them into RocksDB in this [wiki page](https://github.com/facebook/rocksdb/wiki/Creating-and-Ingesting-SST-files) - -## Use cases -There are multiple use cases where bulkloading could be useful, for example -- Generating SST files in offline jobs in Hadoop, then downloading and ingesting the SST files into RocksDB -- Migrating shards between machines by dumping key-range in SST File and loading the file in a different machine -- Migrating from a different storage (InnoDB to RocksDB migration in MyRocks) diff --git a/docs/_posts/2017-03-02-rocksdb-5-2-1-released.markdown b/docs/_posts/2017-03-02-rocksdb-5-2-1-released.markdown deleted file mode 100644 index c6ce27d64..000000000 --- a/docs/_posts/2017-03-02-rocksdb-5-2-1-released.markdown +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: RocksDB 5.2.1 Released! -layout: post -author: sdong -category: blog ---- - -### Public API Change -* NewLRUCache() will determine number of shard bits automatically based on capacity, if the user doesn't pass one. This also impacts the default block cache when the user doesn't explict provide one. -* Change the default of delayed slowdown value to 16MB/s and further increase the L0 stop condition to 36 files. - -### New Features -* Added new overloaded function GetApproximateSizes that allows to specify if memtable stats should be computed only without computing SST files' stats approximations. -* Added new function GetApproximateMemTableStats that approximates both number of records and size of memtables. -* (Experimental) Two-level indexing that partition the index and creates a 2nd level index on the partitions. The feature can be enabled by setting kTwoLevelIndexSearch as IndexType and configuring index_per_partition. - -### Bug Fixes -* RangeSync() should work if ROCKSDB_FALLOCATE_PRESENT is not set -* Fix wrong results in a data race case in Get() -* Some fixes related to 2PC. -* Fix several bugs in Direct I/O supports. -* Fix a regression bug which can cause Seek() to miss some keys if the return key has been updated many times after the snapshot which is used by the iterator. diff --git a/docs/_posts/2017-05-12-partitioned-index-filter.markdown b/docs/_posts/2017-05-12-partitioned-index-filter.markdown deleted file mode 100644 index a537feb0c..000000000 --- a/docs/_posts/2017-05-12-partitioned-index-filter.markdown +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: Partitioned Index/Filters -layout: post -author: maysamyabandeh -category: blog ---- - -As DB/mem ratio gets larger, the memory footprint of filter/index blocks becomes non-trivial. Although `cache_index_and_filter_blocks` allows storing only a subset of them in block cache, their relatively large size negatively affects the performance by i) occupying the block cache space that could otherwise be used for caching data, ii) increasing the load on the disk storage by loading them into the cache after a miss. Here we illustrate these problems in more detail and explain how partitioning index/filters alleviates the overhead. - -### How large are the index/filter blocks? - -RocksDB has by default one index/filter block per SST file. The size of the index/filter varies based on the configuration but for a SST of size 256MB the index/filter block of size 0.5/5MB is typical, which is much larger than the typical data block size of 4-32KB. That is fine when all index/filters fit perfectly into memory and hence are read once per SST lifetime, not so much when they compete with data blocks for the block cache space and are also likely to be re-read many times from the disk. - -### What is the big deal with large index/filter blocks? - -When index/filter blocks are stored in block cache they are effectively competing with data blocks (as well as with each other) on this scarce resource. A filter of size 5MB is occupying the space that could otherwise be used to cache 1000s of data blocks (of size 4KB). This would result in more cache misses for data blocks. The large index/filters also kick each other out of the block cache more often and exacerbate their own cache miss rate too. This is while only a small part of the index/filter block might have been actually used during its lifetime in the cache. - -After the cache miss of an index/filter, it has to be reloaded from the disk, and its large size is not helping in reducing the IO cost. While a simple point lookup might need at most a couple of data block reads (of size 4KB) one from each layer of LSM, it might end up also loading multiple megabytes of index/filter blocks. If that happens often then the disk is spending more time serving index/filters rather than the actual data blocks. - -## What is partitioned index/filters? - -With partitioning, the index/filter of a SST file is partitioned into smaller blocks with an additional top-level index on them. When reading an index/filter, only top-level index is loaded into memory. The partitioned index/filter then uses the top-level index to load on demand into the block cache the partitions that are required to perform the index/filter query. The top-level index, which has much smaller memory footprint, can be stored in heap or block cache depending on the `cache_index_and_filter_blocks` setting. - -### Success stories - -#### HDD, 100TB DB - -In this example we have a DB of size 86G on HDD and emulate the small memory that is present to a node with 100TB of data by using direct IO (skipping OS file cache) and a very small block cache of size 60MB. Partitioning improves throughput by 11x from 5 op/s to 55 op/s. - -#### SSD, Linkbench - -In this example we have a DB of size 300G on SSD and emulate the small memory that would be available in presence of other DBs on the same node by by using direct IO (skipping OS file cache) and block cache of size 6G and 2G. Without partitioning the linkbench throughput drops from 38k tps to 23k when reducing block cache size from 6G to 2G. With partitioning the throughput drops from 38k to only 30k. - -Learn more [here](https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters). diff --git a/docs/_posts/2017-05-14-core-local-stats.markdown b/docs/_posts/2017-05-14-core-local-stats.markdown deleted file mode 100644 index a806541fc..000000000 --- a/docs/_posts/2017-05-14-core-local-stats.markdown +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: Core-local Statistics -layout: post -author: ajkr -category: blog ---- - -## Origins: Global Atomics - -Until RocksDB 4.12, ticker/histogram statistics were implemented with std::atomic values shared across the entire program. A ticker consists of a single atomic, while a histogram consists of several atomics to represent things like min/max/per-bucket counters. These statistics could be updated by all user/background threads. - -For concurrent/high-throughput workloads, cache line bouncing of atomics caused high CPU utilization. For example, we have tickers that count block cache hits and misses. Almost every user read increments these tickers a few times. Many concurrent user reads would cause the cache lines containing these atomics to bounce between cores. - -### Performance - -Here are perf results for 32 reader threads where most reads (99%+) are served by uncompressed block cache. Such a scenario stresses the statistics code heavily. - -Benchmark command: `TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench -statistics -use_existing_db=true -benchmarks=readrandom -threads=32 -cache_size=1048576000 -num=1000000 -reads=1000000 && perf report -g --children` - -Perf snippet for "cycles" event: - -``` - Children Self Command Shared Object Symbol -+ 30.33% 30.17% db_bench db_bench [.] rocksdb::StatisticsImpl::recordTick -+ 3.65% 0.98% db_bench db_bench [.] rocksdb::StatisticsImpl::measureTime -``` - -Perf snippet for "cache-misses" event: - -``` - Children Self Command Shared Object Symbol -+ 19.54% 19.50% db_bench db_bench [.] rocksdb::StatisticsImpl::recordTick -+ 3.44% 0.57% db_bench db_bench [.] rocksdb::StatisticsImpl::measureTime -``` - -The high CPU overhead for updating tickers and histograms corresponds well to the high cache misses. - -## Thread-locals: Faster Updates - -Since RocksDB 4.12, ticker/histogram statistics use thread-local storage. Each thread has a local set of atomic values that no other thread can update. This prevents the cache line bouncing problem described above. Even though updates to a given value are always made by the same thread, atomics are still useful to synchronize with aggregations for querying statistics. - -Implementing this approach involved a couple challenges. First, each query for a statistic's global value must aggregate all threads' local values. This adds some overhead, which may pass unnoticed if statistics are queried infrequently. Second, exited threads' local values are still needed to provide accurate statistics. We handle this by merging a thread's local values into process-wide variables upon thread exit. - -### Performance - -Update benchmark setup is same as before. CPU overhead improved 7.8x compared to global atomics, corresponding to a 17.8x reduction in cache-misses overhead. - -Perf snippet for "cycles" event: - -``` - Children Self Command Shared Object Symbol -+ 2.96% 0.87% db_bench db_bench [.] rocksdb::StatisticsImpl::recordTick -+ 1.37% 0.10% db_bench db_bench [.] rocksdb::StatisticsImpl::measureTime -``` - -Perf snippet for "cache-misses" event: - -``` - Children Self Command Shared Object Symbol -+ 1.21% 0.65% db_bench db_bench [.] rocksdb::StatisticsImpl::recordTick - 0.08% 0.00% db_bench db_bench [.] rocksdb::StatisticsImpl::measureTime -``` - -To measure statistics query latency, we ran sysbench with 4K OLTP clients concurrently with one client that queries statistics repeatedly. Times shown are in milliseconds. - -``` - min: 18.45 - avg: 27.91 - max: 231.65 - 95th percentile: 55.82 -``` - -## Core-locals: Faster Querying - -The thread-local approach is working well for applications calling RocksDB from only a few threads, or polling statistics infrequently. Eventually, though, we found use cases where those assumptions do not hold. For example, one application has per-connection threads and typically runs into performance issues when connection count grows very high. For debugging such issues, they want high-frequency statistics polling to correlate issues in their application with changes in RocksDB's state. - -Once [PR #2258](https://github.com/facebook/rocksdb/pull/2258) lands, ticker/histogram statistics will be local to each CPU core. Similarly to thread-local, each core updates only its local values, thus avoiding cache line bouncing. Local values are still atomics to make aggregation possible. With this change, query work depends only on number of cores, not the number of threads. So, applications with many more threads than cores can no longer impact statistics query latency. - -### Performance - -Update benchmark setup is same as before. CPU overhead worsened ~23% compared to thread-local, while cache performance was unchanged. - -Perf snippet for "cycles" event: - -``` - Children Self Command Shared Object Symbol -+ 2.96% 0.87% db_bench db_bench [.] rocksdb::StatisticsImpl::recordTick -+ 1.37% 0.10% db_bench db_bench [.] rocksdb::StatisticsImpl::measureTime -``` - -Perf snippet for "cache-misses" event: - -``` - Children Self Command Shared Object Symbol -+ 1.21% 0.65% db_bench db_bench [.] rocksdb::StatisticsImpl::recordTick - 0.08% 0.00% db_bench db_bench [.] rocksdb::StatisticsImpl::measureTime -``` - -Query latency is measured same as before with times in milliseconds. Average latency improved by 6.3x compared to thread-local. - -``` - min: 2.47 - avg: 4.45 - max: 91.13 - 95th percentile: 7.56 -``` diff --git a/docs/_posts/2017-05-26-rocksdb-5-4-5-released.markdown b/docs/_posts/2017-05-26-rocksdb-5-4-5-released.markdown deleted file mode 100644 index 561dab4c2..000000000 --- a/docs/_posts/2017-05-26-rocksdb-5-4-5-released.markdown +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: RocksDB 5.4.5 Released! -layout: post -author: sagar0 -category: blog ---- - -### Public API Change -* Support dynamically changing `stats_dump_period_sec` option via SetDBOptions(). -* Added ReadOptions::max_skippable_internal_keys to set a threshold to fail a request as incomplete when too many keys are being skipped while using iterators. -* DB::Get in place of std::string accepts PinnableSlice, which avoids the extra memcpy of value to std::string in most of cases. - * PinnableSlice releases the pinned resources that contain the value when it is destructed or when ::Reset() is called on it. - * The old API that accepts std::string, although discouraged, is still supported. -* Replace Options::use_direct_writes with Options::use_direct_io_for_flush_and_compaction. See Direct IO wiki for details. - -### New Features -* Memtable flush can be avoided during checkpoint creation if total log file size is smaller than a threshold specified by the user. -* Introduce level-based L0->L0 compactions to reduce file count, so write delays are incurred less often. -* (Experimental) Partitioning filters which creates an index on the partitions. The feature can be enabled by setting partition_filters when using kFullFilter. Currently the feature also requires two-level indexing to be enabled. Number of partitions is the same as the number of partitions for indexes, which is controlled by metadata_block_size. -* DB::ResetStats() to reset internal stats. -* Added CompactionEventListener and EventListener::OnFlushBegin interfaces. -* Added DB::CreateColumnFamilie() and DB::DropColumnFamilies() to bulk create/drop column families. -* Facility for cross-building RocksJava using Docker. - -### Bug Fixes -* Fix WriteBatchWithIndex address use after scope error. -* Fix WritableFile buffer size in direct IO. -* Add prefetch to PosixRandomAccessFile in buffered io. -* Fix PinnableSlice access invalid address when row cache is enabled. -* Fix huge fallocate calls fail and make XFS unhappy. -* Fix memory alignment with logical sector size. -* Fix alignment in ReadaheadRandomAccessFile. -* Fix bias with read amplification stats (READ_AMP_ESTIMATE_USEFUL_BYTES and READ_AMP_TOTAL_READ_BYTES). -* Fix a manual / auto compaction data race. -* Fix CentOS 5 cross-building of RocksJava. -* Build and link with ZStd when creating the static RocksJava build. -* Fix snprintf's usage to be cross-platform. -* Fix build errors with blob DB. -* Fix readamp test type inconsistency. diff --git a/docs/_posts/2017-06-26-17-level-based-changes.markdown b/docs/_posts/2017-06-26-17-level-based-changes.markdown deleted file mode 100644 index 9e838eb7f..000000000 --- a/docs/_posts/2017-06-26-17-level-based-changes.markdown +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Level-based Compaction Changes -layout: post -author: ajkr -category: blog ---- - -### Introduction - -RocksDB provides an option to limit the number of L0 files, which bounds read-amplification. Since L0 files (unlike files at lower levels) can span the entire key-range, a key might be in any file, thus reads need to check them one-by-one. Users often wish to configure a low limit to improve their read latency. - -Although, the mechanism with which we enforce L0's file count limit may be unappealing. When the limit is reached, RocksDB intentionally delays user writes. This slows down accumulation of files in L0, and frees up resources for compacting files down to lower levels. But adding delays will significantly increase user-visible write latency jitter. - -Also, due to how L0 files can span the entire key-range, compaction parallelization is limited. Files at L0 or L1 may be locked due to involvement in pending L0->L1 or L1->L2 compactions. We can only schedule a parallel L0->L1 compaction if it does not require any of the locked files, which is typically not the case. - -To handle these constraints better, we added a new type of compaction, L0->L0. It quickly reduces file count in L0 and can be scheduled even when L1 files are locked, unlike L0->L1. We also changed the L0->L1 picking algorithm to increase opportunities for parallelism. - -### Old L0->L1 Picking Logic - -Previously, our logic for picking which L0 file to compact was the same as every other level: pick the largest file in the level. One special property of L0->L1 compaction is that files can overlap in the input level, so those overlapping files must be pulled in as well. For example, a compaction may look like this: - -![full-range.png](/static/images/compaction/full-range.png) - -This compaction pulls in every L0 and L1 file. This happens regardless of which L0 file is initially chosen as each file overlaps with every other file. - -Users may insert their data less uniformly in the key-range. For example, a database may look like this during L0->L1 compaction: - -![part-range-old.png](/static/images/compaction/part-range-old.png) - -Let's say the third file from the top is the largest, and let's say the top two files are created after the compaction started. When the compaction is picked, the fourth L0 file and six rightmost L1 files are pulled in due to overlap. Notice this leaves the database in a state where we might not be able to schedule parallel compactions. For example, if the sixth file from the top is the next largest, we can't compact it because it overlaps with the top two files, which overlap with the locked L0 files. - -We can now see the high-level problems with this approach more clearly. First, locked files in L0 or L1 prevent us from parallelizing compactions. When locked files block L0->L1 compaction, there is nothing we can do to eliminate L0 files. Second, L0->L1 compactions are relatively slow. As we saw, when keys are uniformly distributed, L0->L1 compacts two entire levels. While this is happening, new files are being flushed to L0, advancing towards the file count limit. - -### New L0->L0 Algorithm - -We introduced compaction within L0 to improve both parallelization and speed of reducing L0 file count. An L0->L0 compaction may look like this: - -![l1-l2-contend.png](/static/images/compaction/l1-l2-contend.png) - -Say the L1->L2 compaction started first. Now L0->L1 is prevented by the locked L1 file. In this case, we compact files within L0. This allows us to start the work for eliminating L0 files earlier. It also lets us do less work since we don't pull in any L1 files, whereas L0->L1 compaction would've pulled in all of them. This lets us quickly reduce L0 file count to keep read-amp low while sustaining large bursts of writes (i.e., fast accumulation of L0 files). - -The tradeoff is this increases total compaction work, as we're now compacting files without contributing towards our eventual goal of moving them towards lower levels. Our benchmarks, though, consistently show less compaction stalls and improved write throughput. One justification is that L0 file data is highly likely in page cache and/or block cache due to it being recently written and frequently accessed. So, this type of compaction is relatively cheap compared to compactions at lower levels. - -This feature is available since RocksDB 5.4. - -### New L0->L1 Picking Logic - -Recall how the old L0->L1 picking algorithm chose the largest L0 file for compaction. This didn't fit well with L0->L0 compaction, which operates on a span of files. That span begins at the newest L0 file, and expands towards older files as long as they're not being compacted. Since the largest file may be anywhere, the old L0->L1 picking logic could arbitrarily prevent us from getting a long span of files. See the second illustration in this post for a scenario where this would happen. - -So, we changed the L0->L1 picking algorithm to start from the oldest file and expand towards newer files as long as they're not being compacted. For example: - -![l0-l1-contend.png](/static/images/compaction/l0-l1-contend.png) - -Now, there can never be L0 files unreachable for L0->L0 due to L0->L1 selecting files in the middle. When longer spans of files are available for L0->L0, we perform less compaction work per deleted L0 file, thus improving efficiency. - -This feature will be available in RocksDB 5.7. - -### Performance Changes - -Mark Callaghan did the most extensive benchmarking of this feature's impact on MyRocks. See his results [here](http://smalldatum.blogspot.com/2017/05/innodb-myrocks-and-tokudb-on-insert.html). Note the primary change between his March 17 and April 14 builds is the latter performs L0->L0 compaction. diff --git a/docs/_posts/2017-06-29-rocksdb-5-5-1-released.markdown b/docs/_posts/2017-06-29-rocksdb-5-5-1-released.markdown deleted file mode 100644 index d7856088b..000000000 --- a/docs/_posts/2017-06-29-rocksdb-5-5-1-released.markdown +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: RocksDB 5.5.1 Released! -layout: post -author: lightmark -category: blog ---- - -### New Features -* FIFO compaction to support Intra L0 compaction too with CompactionOptionsFIFO.allow_compaction=true. -* Statistics::Reset() to reset user stats. -* ldb add option --try_load_options, which will open DB with its own option file. -* Introduce WriteBatch::PopSavePoint to pop the most recent save point explicitly. -* Support dynamically change `max_open_files` option via SetDBOptions() -* Added DB::CreateColumnFamilie() and DB::DropColumnFamilies() to bulk create/drop column families. -* Add debugging function `GetAllKeyVersions` to see internal versions of a range of keys. -* Support file ingestion with universal compaction style -* Support file ingestion behind with option `allow_ingest_behind` -* New option enable_pipelined_write which may improve write throughput in case writing from multiple threads and WAL enabled. - -### Bug Fixes -* Fix the bug that Direct I/O uses direct reads for non-SST file -* Fix the bug that flush doesn't respond to fsync result diff --git a/docs/_posts/2017-07-25-rocksdb-5-6-1-released.markdown b/docs/_posts/2017-07-25-rocksdb-5-6-1-released.markdown deleted file mode 100644 index 3b54ffd5a..000000000 --- a/docs/_posts/2017-07-25-rocksdb-5-6-1-released.markdown +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: RocksDB 5.6.1 Released! -layout: post -author: yiwu -category: blog ---- - -### Public API Change -* Scheduling flushes and compactions in the same thread pool is no longer supported by setting `max_background_flushes=0`. Instead, users can achieve this by configuring their high-pri thread pool to have zero threads. See https://github.com/facebook/rocksdb/wiki/Thread-Pool for more details. -* Replace `Options::max_background_flushes`, `Options::max_background_compactions`, and `Options::base_background_compactions` all with `Options::max_background_jobs`, which automatically decides how many threads to allocate towards flush/compaction. -* options.delayed_write_rate by default take the value of options.rate_limiter rate. -* Replace global variable `IOStatsContext iostats_context` with `IOStatsContext* get_iostats_context()`; replace global variable `PerfContext perf_context` with `PerfContext* get_perf_context()`. - -### New Features -* Change ticker/histogram statistics implementations to use core-local storage. This improves aggregation speed compared to our previous thread-local approach, particularly for applications with many threads. See http://rocksdb.org/blog/2017/05/14/core-local-stats.html for more details. -* Users can pass a cache object to write buffer manager, so that they can cap memory usage for memtable and block cache using one single limit. -* Flush will be triggered when 7/8 of the limit introduced by write_buffer_manager or db_write_buffer_size is triggered, so that the hard threshold is hard to hit. See https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager for more details. -* Introduce WriteOptions.low_pri. If it is true, low priority writes will be throttled if the compaction is behind. See https://github.com/facebook/rocksdb/wiki/Low-Priority-Write for more details. -* `DB::IngestExternalFile()` now supports ingesting files into a database containing range deletions. - -### Bug Fixes -* Shouldn't ignore return value of fsync() in flush. diff --git a/docs/_posts/2017-08-24-pinnableslice.markdown b/docs/_posts/2017-08-24-pinnableslice.markdown deleted file mode 100644 index 06e0bcb2f..000000000 --- a/docs/_posts/2017-08-24-pinnableslice.markdown +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: PinnableSlice; less memcpy with point lookups -layout: post -author: maysamyabandeh -category: blog ---- - -The classic API for [DB::Get](https://github.com/facebook/rocksdb/blob/9e583711144f580390ce21a49a8ceacca338fcd5/include/rocksdb/db.h#L310) receives a std::string as argument to which it will copy the value. The memcpy overhead could be non-trivial when the value is large. The [new API](https://github.com/facebook/rocksdb/blob/9e583711144f580390ce21a49a8ceacca338fcd5/include/rocksdb/db.h#L322) receives a PinnableSlice instead, which avoids memcpy in most of the cases. - -### What is PinnableSlice? - -Similarly to Slice, PinnableSlice refers to some in-memory data so it does not incur the memcpy cost. To ensure that the data will not be erased while it is being processed by the user, PinnableSlice, as its name suggests, has the data pinned in memory. The pinned data are released when PinnableSlice object is destructed or when ::Reset is invoked explicitly on it. - -### How good is it? - -Here are the improvements in throughput for an [in-memory benchmark](https://github.com/facebook/rocksdb/pull/1756#issuecomment-286201693): -* value 1k byte: 14% -* value 10k byte: 34% - -### Any limitations? - -PinnableSlice tries to avoid memcpy as much as possible. The primary gain is when reading large values from the block cache. There are however cases that it would still have to copy the data into its internal buffer. The reason is mainly the complexity of implementation and if there is enough motivation on the application side. the scope of PinnableSlice could be extended to such cases too. These include: -* Merged values -* Reads from memtables - -### How to use it? - -```cpp -PinnableSlice pinnable_val; -while (!stopped) { - auto s = db->Get(opt, cf, key, &pinnable_val); - // ... use it - pinnable_val.Reset(); // then release it immediately -} -``` - -You can also [initialize the internal buffer](https://github.com/facebook/rocksdb/blob/9e583711144f580390ce21a49a8ceacca338fcd5/include/rocksdb/db.h#L314) of PinnableSlice by passing your own string in the constructor. [simple_example.cc](https://github.com/facebook/rocksdb/blob/main/examples/simple_example.cc) demonstrates that with more examples. diff --git a/docs/_posts/2017-08-25-flushwal.markdown b/docs/_posts/2017-08-25-flushwal.markdown deleted file mode 100644 index 751fe5249..000000000 --- a/docs/_posts/2017-08-25-flushwal.markdown +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: FlushWAL; less fwrite, faster writes -layout: post -author: maysamyabandeh -category: blog ---- - -When `DB::Put` is called, the data is written to both memtable (to be flushed to SST files later) and the WAL (write-ahead log) if it is enabled. In the case of a crash, RocksDB can recover as much as the memtable state that is reflected into the WAL. By default RocksDB automatically flushes the WAL from the application memory to the OS buffer after each `::Put`. It however can be configured to perform the flush manually after an explicit call to `::FlushWAL`. Not doing fwrite syscall after each `::Put` offers a tradeoff between reliability and write latency for the general case. As we explain below, some applications such as MyRocks benefit from this API to gain higher write throughput with however no compromise in reliability. - -### How much is the gain? - -Using `::FlushWAL` API along with setting `DBOptions.concurrent_prepare`, MyRocks achieves 40% higher throughput in Sysbench's [update-nonindex](https://github.com/akopytov/sysbench/blob/master/src/lua/oltp_update_non_index.lua) benchmark. - -### Write, Flush, and Sync - -The write to the WAL is first written to the application memory buffer. The buffer in the next step is "flushed" to OS buffer by calling fwrite syscall. The OS buffer is later "synced" to the persistent storage. The data in the OS buffer, although not persisted yet, will survive the application crash. By default, the flush occurs automatically upon each call to `DB::Put` or `DB::Write`. The user can additionally request sync after each write by setting `WriteOptions::sync`. - -### FlushWAL API - -The user can turn off the automatic flush of the WAL by setting `DBOptions::manual_wal_flush`. In that case, the WAL buffer is flushed when it is either full or `DB::FlushWAL` is called by the user. The API also accepts a boolean argument should we want to sync right after the flush: `::FlushWAL(true)`. - -### Success story: MyRocks - -Some applications that use RocksDB, already have other machinsims in place to provide reliability. MySQL for example uses 2PC (two-phase commit) to write to both binlog as well as the storage engine such as InnoDB and MyRocks. The group commit logic in MySQL allows the 1st phase (Prepare) to be run in parallel but after a commit group is formed performs the 2nd phase (Commit) in a serial manner. This makes low commit latency in the storage engine essential for achieving high throughput. The commit in MyRocks includes writing to the RocksDB WAL, which as explaiend above, by default incures the latency of flushing the WAL new appends to the OS buffer. - -Since binlog helps in recovering from some failure scenarios, MySQL can provide reliability without however needing a storage WAL flush after each individual commit. MyRocks benefits from this property, disables automatic WAL flush in RocksDB, and manually calls `::FlushWAL` when requested by MySQL. diff --git a/docs/_posts/2017-09-28-rocksdb-5-8-released.markdown b/docs/_posts/2017-09-28-rocksdb-5-8-released.markdown deleted file mode 100644 index a22dcaa1c..000000000 --- a/docs/_posts/2017-09-28-rocksdb-5-8-released.markdown +++ /dev/null @@ -1,25 +0,0 @@ ---- -title: RocksDB 5.8 Released! -layout: post -author: maysamyabandeh -category: blog ---- - -### Public API Change -* Users of `Statistics::getHistogramString()` will see fewer histogram buckets and different bucket endpoints. -* `Slice::compare` and BytewiseComparator `Compare` no longer accept `Slice`s containing nullptr. -* `Transaction::Get` and `Transaction::GetForUpdate` variants with `PinnableSlice` added. - -### New Features -* Add Iterator::Refresh(), which allows users to update the iterator state so that they can avoid some initialization costs of recreating iterators. -* Replace dynamic_cast<> (except unit test) so people can choose to build with RTTI off. With make, release mode is by default built with -fno-rtti and debug mode is built without it. Users can override it by setting USE_RTTI=0 or 1. -* Universal compactions including the bottom level can be executed in a dedicated thread pool. This alleviates head-of-line blocking in the compaction queue, which cause write stalling, particularly in multi-instance use cases. Users can enable this feature via `Env::SetBackgroundThreads(N, Env::Priority::BOTTOM)`, where `N > 0`. -* Allow merge operator to be called even with a single merge operand during compactions, by appropriately overriding `MergeOperator::AllowSingleOperand`. -* Add `DB::VerifyChecksum()`, which verifies the checksums in all SST files in a running DB. -* Block-based table support for disabling checksums by setting `BlockBasedTableOptions::checksum = kNoChecksum`. - -### Bug Fixes -* Fix wrong latencies in `rocksdb.db.get.micros`, `rocksdb.db.write.micros`, and `rocksdb.sst.read.micros`. -* Fix incorrect dropping of deletions during intra-L0 compaction. -* Fix transient reappearance of keys covered by range deletions when memtable prefix bloom filter is enabled. -* Fix potentially wrong file smallest key when range deletions separated by snapshot are written together. diff --git a/docs/_posts/2017-12-18-17-auto-tuned-rate-limiter.markdown b/docs/_posts/2017-12-18-17-auto-tuned-rate-limiter.markdown deleted file mode 100644 index d2e6204e1..000000000 --- a/docs/_posts/2017-12-18-17-auto-tuned-rate-limiter.markdown +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: Auto-tuned Rate Limiter -layout: post -author: ajkr -category: blog ---- - -### Introduction - -Our rate limiter has been hard to configure since users need to pick a value that is low enough to prevent background I/O spikes, which can impact user-visible read/write latencies. Meanwhile, picking too low a value can cause memtables and L0 files to pile up, eventually leading to writes stalling. Tuning the rate limiter has been especially difficult for users whose DB instances have different workloads, or have workloads that vary over time, or commonly both. - -To address this, in RocksDB 5.9 we released a dynamic rate limiter that adjusts itself over time according to demand for background I/O. It can be enabled simply by passing `auto_tuned=true` in the `NewGenericRateLimiter()` call. In this case `rate_bytes_per_sec` will indicate the upper-bound of the window within which a rate limit will be picked dynamically. The chosen rate limit will be much lower unless absolutely necessary, so setting this to the device's maximum throughput is a reasonable choice on dedicated hosts. - -### Algorithm - -We use a simple multiplicative-increase, multiplicative-decrease algorithm. We measure demand for background I/O as the ratio of intervals where the rate limiter is drained. There are low and high watermarks for this ratio, which will trigger a change in rate limit when breached. The rate limit can move within a window bounded by the user-specified upper-bound, and a lower-bound that we derive internally. Users can expect this lower bound to be 1-2 orders of magnitude less than the provided upper-bound (so don't provide INT64_MAX as your upper-bound), although it's subject to change. - -### Benchmark Results - -Data is ingested at 10MB/s and the rate limiter was created with 1000MB/s as its upper bound. The dynamically chosen rate limit hovers around 125MB/s. The other clustering of points at 50MB/s is due to number of compaction threads being reduced to one when there's no compaction pressure. - -![](/static/images/rate-limiter/write-KBps-series.png) - -![](/static/images/rate-limiter/auto-tuned-write-KBps-series.png) - -The following graph summarizes the above two time series graphs in CDF form. In particular, notice the p90 - p100 for background write rate are significantly lower with auto-tuned rate limiter enabled. - -![](/static/images/rate-limiter/write-KBps-cdf.png) diff --git a/docs/_posts/2017-12-19-write-prepared-txn.markdown b/docs/_posts/2017-12-19-write-prepared-txn.markdown deleted file mode 100644 index 439b3f83c..000000000 --- a/docs/_posts/2017-12-19-write-prepared-txn.markdown +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: WritePrepared Transactions -layout: post -author: maysamyabandeh -category: blog ---- - -RocksDB supports both optimistic and pessimistic concurrency controls. The pessimistic transactions make use of locks to provide isolation between the transactions. The default write policy in pessimistic transactions is _WriteCommitted_, which means that the data is written to the DB, i.e., the memtable, only after the transaction is committed. This policy simplified the implementation but came with some limitations in throughput, transaction size, and variety in supported isolation levels. In the below, we explain these in detail and present the other write policies, _WritePrepared_ and _WriteUnprepared_. We then dive into the design of _WritePrepared_ transactions. - -### WriteCommitted, Pros and Cons - -With _WriteCommitted_ write policy, the data is written to the memtable only after the transaction commits. This greatly simplifies the read path as any data that is read by other transactions can be assumed to be committed. This write policy, however, implies that the writes are buffered in memory in the meanwhile. This makes memory a bottleneck for large transactions. The delay of the commit phase in 2PC (two-phase commit) also becomes noticeable since most of the work, i.e., writing to memtable, is done at the commit phase. When the commit of multiple transactions are done in a serial fashion, such as in 2PC implementation of MySQL, the lengthy commit latency becomes a major contributor to lower throughput. Moreover this write policy cannot provide weaker isolation levels, such as READ UNCOMMITTED, that could potentially provide higher throughput for some applications. - -### Alternatives: _WritePrepared_ and _WriteUnprepared_ - -To tackle the lengthy commit issue, we should do memtable writes at earlier phases of 2PC so that the commit phase become lightweight and fast. 2PC is composed of Write stage, where the transaction `::Put` is invoked, the prepare phase, where `::Prepare` is invoked (upon which the DB promises to commit the transaction if later is requested), and commit phase, where `::Commit` is invoked and the transaction writes become visible to all readers. To make the commit phase lightweight, the memtable write could be done at either `::Prepare` or `::Put` stages, resulting into _WritePrepared_ and _WriteUnprepared_ write policies respectively. The downside is that when another transaction is reading data, it would need a way to tell apart which data is committed, and if they are, whether they are committed before the transaction's start, i.e., in the read snapshot of the transaction. _WritePrepared_ would still have the issue of buffering the data, which makes the memory the bottleneck for large transactions. It however provides a good milestone for transitioning from _WriteCommitted_ to _WriteUnprepared_ write policy. Here we explain the design of _WritePrepared_ policy. We will cover the changes that make the design to also supported _WriteUnprepared_ in an upcoming post. - -### _WritePrepared_ in a nutshell - -These are the primary design questions that needs to be addressed: -1) How do we identify the key/values in the DB with transactions that wrote them? -2) How do we figure if a key/value written by transaction Txn_w is in the read snapshot of the reading transaction Txn_r? -3) How do we rollback the data written by aborted transactions? - -With _WritePrepared_, a transaction still buffers the writes in a write batch object in memory. When 2PC `::Prepare` is called, it writes the in-memory write batch to the WAL (write-ahead log) as well as to the memtable(s) (one memtable per column family); We reuse the existing notion of sequence numbers in RocksDB to tag all the key/values in the same write batch with the same sequence number, `prepare_seq`, which is also used as the identifier for the transaction. At commit time, it writes a commit marker to the WAL, whose sequence number, `commit_seq`, will be used as the commit timestamp of the transaction. Before releasing the commit sequence number to the readers, it stores a mapping from `prepare_seq` to `commit_seq` in an in-memory data structure that we call _CommitCache_. When a transaction reading values from the DB (tagged with `prepare_seq`) it makes use of the _CommitCache_ to figure if `commit_seq` of the value is in its read snapshot. To rollback an aborted transaction, we apply the status before the transaction by making another write that cancels out the writes of the aborted transaction. - -The _CommitCache_ is a lock-free data structure that caches the recent commit entries. Looking up the entries in the cache must be enough for almost all th transactions that commit in a timely manner. When evicting the older entries from the cache, it still maintains some other data structures to cover the corner cases for transactions that takes abnormally too long to finish. We will cover them in the design details below. - -### Benchmark Results -Here we presents the improvements observed in MyRocks with sysbench and linkbench: -* benchmark...........tps.........p95 latency....cpu/query -* insert...................68% -* update-noindex...30%......38% -* update-index.......61%.......28% -* read-write............6%........3.5% -* read-only...........-1.2%.....-1.8% -* linkbench.............1.9%......+overall........0.6% - -Here are also the detailed results for [In-Memory Sysbench](https://gist.github.com/maysamyabandeh/bdb868091b2929a6d938615fdcf58424) and [SSD Sysbench](https://gist.github.com/maysamyabandeh/ff94f378ab48925025c34c47eff99306) curtesy of [@mdcallag](https://github.com/mdcallag). - -Learn more [here](https://github.com/facebook/rocksdb/wiki/WritePrepared-Transactions). diff --git a/docs/_posts/2018-02-05-rocksdb-5-10-2-released.markdown b/docs/_posts/2018-02-05-rocksdb-5-10-2-released.markdown deleted file mode 100644 index 9f32d3f94..000000000 --- a/docs/_posts/2018-02-05-rocksdb-5-10-2-released.markdown +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: RocksDB 5.10.2 Released! -layout: post -author: siying -category: blog ---- - -### Public API Change -* When running `make` with environment variable `USE_SSE` set and `PORTABLE` unset, will use all machine features available locally. Previously this combination only compiled SSE-related features. - -### New Features -* CRC32C is now using the 3-way pipelined SSE algorithm `crc32c_3way` on supported platforms to improve performance. The system will choose to use this algorithm on supported platforms automatically whenever possible. If PCLMULQDQ is not supported it will fall back to the old Fast_CRC32 algorithm. -* Provide lifetime hints when writing files on Linux. This reduces hardware write-amp on storage devices supporting multiple streams. -* Add a DB stat, `NUMBER_ITER_SKIP`, which returns how many internal keys were skipped during iterations (e.g., due to being tombstones or duplicate versions of a key). -* Add PerfContext counters, `key_lock_wait_count` and `key_lock_wait_time`, which measure the number of times transactions wait on key locks and total amount of time waiting. - -### Bug Fixes -* Fix IOError on WAL write doesn't propagate to write group follower -* Make iterator invalid on merge error. -* Fix performance issue in `IngestExternalFile()` affecting databases with large number of SST files. -* Fix possible corruption to LSM structure when `DeleteFilesInRange()` deletes a subset of files spanned by a `DeleteRange()` marker. -* Fix DB::Flush() keep waiting after flush finish under certain condition. diff --git a/docs/_posts/2018-08-01-rocksdb-tuning-advisor.markdown b/docs/_posts/2018-08-01-rocksdb-tuning-advisor.markdown deleted file mode 100644 index ff9b1e464..000000000 --- a/docs/_posts/2018-08-01-rocksdb-tuning-advisor.markdown +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: Rocksdb Tuning Advisor -layout: post -author: poojam23 -category: blog ---- - -The performance of Rocksdb is contingent on its tuning. However, because -of the complexity of its underlying technology and a large number of -configurable parameters, a good configuration is sometimes hard to obtain. The aim of -the python command-line tool, Rocksdb Advisor, is to automate the process of -suggesting improvements in the configuration based on advice from Rocksdb -experts. - -### Overview - -Experts share their wisdom as rules comprising of conditions and suggestions in the INI format (refer -[rules.ini](https://github.com/facebook/rocksdb/blob/main/tools/advisor/advisor/rules.ini)). -Users provide the Rocksdb configuration that they want to improve upon (as the -familiar Rocksdb OPTIONS file — -[example](https://github.com/facebook/rocksdb/blob/main/examples/rocksdb_option_file_example.ini)) -and the path of the file which contains Rocksdb logs and statistics. -The [Advisor](https://github.com/facebook/rocksdb/blob/main/tools/advisor/advisor/rule_parser_example.py) -creates appropriate DataSource objects (for Rocksdb -[logs](https://github.com/facebook/rocksdb/blob/main/tools/advisor/advisor/db_log_parser.py), -[options](https://github.com/facebook/rocksdb/blob/main/tools/advisor/advisor/db_options_parser.py), -[statistics](https://github.com/facebook/rocksdb/blob/main/tools/advisor/advisor/db_stats_fetcher.py) etc.) -and provides them to the [Rules Engine](https://github.com/facebook/rocksdb/blob/main/tools/advisor/advisor/rule_parser.py). -The Rules uses rules from experts to parse data-sources and trigger appropriate rules. -The Advisor's output gives information about which rules were triggered, -why they were triggered and what each of them suggests. Each suggestion -provided by a triggered rule advises some action on a Rocksdb -configuration option, for example, increase CFOptions.write_buffer_size, -set bloom_bits to 2 etc. - -### Usage - -An example command to run the tool: - -```shell -cd rocksdb/tools/advisor -python3 -m advisor.rule_parser_example --rules_spec=advisor/rules.ini --rocksdb_options=test/input_files/OPTIONS-000005 --log_files_path_prefix=test/input_files/LOG-0 --stats_dump_period_sec=20 -``` - -Sample output where a Rocksdb log-based rule has been triggered : - -```shell -Rule: stall-too-many-memtables -LogCondition: stall-too-many-memtables regex: Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+ -Suggestion: inc-bg-flush option : DBOptions.max_background_flushes action : increase suggested_values : ['2'] -Suggestion: inc-write-buffer option : CFOptions.max_write_buffer_number action : increase -scope: col_fam: -{'default'} -``` - -### Read more - -For more information, refer to [advisor](https://github.com/facebook/rocksdb/tree/main/tools/advisor/README.md). diff --git a/docs/_posts/2018-08-23-data-block-hash-index.markdown b/docs/_posts/2018-08-23-data-block-hash-index.markdown deleted file mode 100644 index c4b24ec2a..000000000 --- a/docs/_posts/2018-08-23-data-block-hash-index.markdown +++ /dev/null @@ -1,118 +0,0 @@ ---- -title: Improving Point-Lookup Using Data Block Hash Index -layout: post -author: fgwu -category: blog ---- -We've designed and implemented a _data block hash index_ in RocksDB that has the benefit of both reducing the CPU util and increasing the throughput for point lookup queries with a reasonable and tunable space overhead. - -Specifially, we append a compact hash table to the end of the data block for efficient indexing. It is backward compatible with the data base created without this feature. After turned on the hash index feature, existing data will be gradually converted to the hash index format. - -Benchmarks with `db_bench` show the CPU utilization of one of the main functions in the point lookup code path, `DataBlockIter::Seek()`, is reduced by 21.8%, and the overall RocksDB throughput is increased by 10% under purely cached workloads, at an overhead of 4.6% more space. Shadow testing with Facebook production traffic shows good CPU improvements too. - - -### How to use it -Two new options are added as part of this feature: `BlockBasedTableOptions::data_block_index_type` and `BlockBasedTableOptions::data_block_hash_table_util_ratio`. - -The hash index is disabled by default unless `BlockBasedTableOptions::data_block_index_type` is set to `data_block_index_type = kDataBlockBinaryAndHash`. The hash table utilization ratio is adjustable using `BlockBasedTableOptions::data_block_hash_table_util_ratio`, which is valid only if `data_block_index_type = kDataBlockBinaryAndHash`. - - -``` -// the definitions can be found in include/rocksdb/table.h - -// The index type that will be used for the data block. -enum DataBlockIndexType : char { - kDataBlockBinarySearch = 0, // traditional block type - kDataBlockBinaryAndHash = 1, // additional hash index -}; - -// Set to kDataBlockBinaryAndHash to enable hash index -DataBlockIndexType data_block_index_type = kDataBlockBinarySearch; - -// #entries/#buckets. It is valid only when data_block_hash_index_type is -// kDataBlockBinaryAndHash. -double data_block_hash_table_util_ratio = 0.75; - -``` - - -### Data Block Hash Index Design - -Current data block format groups adjacent keys together as a restart interval. One block consists of multiple restart intervals. The byte offset of the beginning of each restart interval, i.e. a restart point, is stored in an array called restart interval index or binary seek index. RocksDB does a binary search when performing point lookup for keys in data blocks to find the right restart interval the key may reside. We will use binary seek and binary search interchangeably in this post. - -In order to find the right location where the key may reside using binary search, multiple key parsing and comparison are needed. Each binary search branching triggers CPU cache miss, causing much CPU utilization. We have seen that this binary search takes up considerable CPU in production use-cases. - -![](/static/images/data-block-hash-index/block-format-binary-seek.png) - -We implemented a hash map at the end of the block to index the key to reduce the CPU overhead of the binary search. The hash index is just an array of pointers pointing into the binary seek index. - -![](/static/images/data-block-hash-index/block-format-hash-index.png) - - -Each array element is considered as a hash bucket when storing the location of a key (or more precisely, the restart index of the restart interval where the key resides). When multiple keys happen to hash into the same bucket (hash collision), we just mark the bucket as “collision”. So that when later querying on that key, the hash table lookup knows that there was a hash collision happened so it can fall back to the traditional binary search to find the location of the key. - -We define hash table utilization ratio as the #keys/#buckets. If a utilization ratio is 0.5 and there are 100 buckets, 50 keys are stored in the bucket. The less the util ratio, the less hash collision, and the less chance for a point lookup falls back to binary seek (fall back ratio) due to the collision. So a small util ratio has more benefit to reduce the CPU time but introduces more space overhead. - -Space overhead depends on the util ratio. Each bucket is a `uint8_t` (i.e. one byte). For a util ratio of 1, the space overhead is 1Byte per key, the fall back ratio observed is ~52%. - -![](/static/images/data-block-hash-index/hash-index-data-structure.png) - -### Things that Need Attention - -**Customized Comparator** - -Hash index will hash different keys (keys with different content, or byte sequence) into different hash values. This assumes the comparator will not treat different keys as equal if they have different content. - -The default bytewise comparator orders the keys in alphabetical order and works well with hash index, as different keys will never be regarded as equal. However, some specially crafted comparators will do. For example, say, a `StringToIntComparator` can convert a string into an integer, and use the integer to perform the comparison. Key string “16” and “0x10” is equal to each other as seen by this `StringToIntComparator`, but they probably hash to different value. Later queries to one form of the key will not be able to find the existing key been stored in the other format. - -We add a new function member to the comparator interface: - -``` -virtual bool CanKeysWithDifferentByteContentsBeEqual() const { return true; } -``` - - -Every comparator implementation should override this function and specify the behavior of the comparator. If a comparator can regard different keys equal, the function returns true, and as a result the hash index feature will not be enabled, and vice versa. - -NOTE: to use the hash index feature, one should 1) have a comparator that can never treat different keys as equal; and 2) override the `CanKeysWithDifferentByteContentsBeEqual()` function to return `false`, so the hash index can be enabled. - - -**Util Ratio's Impact on Data Block Cache** - -Adding the hash index to the end of the data block essentially takes up the data block cache space, making the effective data block cache size smaller and increasing the data block cache miss ratio. Therefore, a very small util ratio will result in a large data block cache miss ratio, and the extra I/O may drag down the throughput gain achieved by the hash index lookup. Besides, when compression is enabled, cache miss also incurs data block decompression, which is CPU-consuming. Therefore the CPU may even increase if using a too small util ratio. The best util ratio depends on workloads, cache to data ratio, disk bandwidth/latency etc. In our experiment, we found util ratio = 0.5 ~ 1 is a good range to explore that brings both CPU and throughput gains. - - -### Limitations - -As we use `uint8_t` to store binary seek index, i.e. restart interval index, the total number of restart intervals cannot be more than 253 (we reserved 255 and 254 as special flags). For blocks having a larger number of restart intervals, the hash index will not be created and the point lookup will be done by traditional binary seek. - -Data block hash index only supports point lookup. We do not support range lookup. Range lookup request will fall back to BinarySeek. - -RocksDB supports many types of records, such as `Put`, `Delete`, `Merge`, etc (visit [here](https://github.com/facebook/rocksdb/wiki/rocksdb-basics) for more information). Currently we only support `Put` and `Delete`, but not `Merge`. Internally we have a limited set of supported record types: - - -``` -kPutRecord, <=== supported -kDeleteRecord, <=== supported -kSingleDeleteRecord, <=== supported -kTypeBlobIndex, <=== supported -``` - -For records not supported, the searching process will fall back to the traditional binary seek. - - - -### Evaluation -To evaluate the CPU util reduction and isolate other factors such as disk I/O and block decompression, we first evaluate the hash idnex in a purely cached workload. We observe that the CPU utilization of one of the main functions in the point lookup code path, DataBlockIter::Seek(), is reduced by 21.8% and the overall throughput is increased by 10% at an overhead of 4.6% more space. - -However, general worload is not always purely cached. So we also evaluate the performance under different cache space pressure. In the following test, we use `db_bench` with RocksDB deployed on SSDs. The total DB size is 5~6GB, and it is about 14GB if decompressed. Different block cache sizes are used, ranging from 14GB down to 2GB, with an increasing cache miss ratio. - -Orange bars are representing our hash index performance. We use a hash util ratio of 1.0 in this test. Block size are set to 16KiB with the restart interval as 16. - -![](/static/images/data-block-hash-index/perf-throughput.png) -![](/static/images/data-block-hash-index/perf-cache-miss.png) - -We can see that if cache size is greater than 8GB, hash index can bring throughput gain. Cache size greater than 8GB can be translated to a cache miss ratio smaller than 40%. So if the workload has a cache miss ratio smaller than 40%, hash index is able to increase the throughput. - -Besides, shadow testing with Facebook production traffic shows good CPU improvements too. - diff --git a/docs/_posts/2018-11-21-delete-range.markdown b/docs/_posts/2018-11-21-delete-range.markdown deleted file mode 100644 index 96fc3562d..000000000 --- a/docs/_posts/2018-11-21-delete-range.markdown +++ /dev/null @@ -1,292 +0,0 @@ ---- -title: "DeleteRange: A New Native RocksDB Operation" -layout: post -author: -- abhimadan -- ajkr -category: blog ---- -## Motivation - -### Deletion patterns in LSM - -Deleting a range of keys is a common pattern in RocksDB. Most systems built on top of -RocksDB have multi-component key schemas, where keys sharing a common prefix are -logically related. Here are some examples. - -MyRocks is a MySQL fork using RocksDB as its storage engine. Each key's first -four bytes identify the table or index to which that key belongs. Thus dropping -a table or index involves deleting all the keys with that prefix. - -Rockssandra is a Cassandra variant that uses RocksDB as its storage engine. One -of its admin tool commands, `nodetool cleanup`, removes key-ranges that have been migrated -to other nodes in the cluster. - -Marketplace uses RocksDB to store product data. Its key begins with product ID, -and it stores various data associated with the product in separate keys. When a -product is removed, all these keys must be deleted. - -When we decide what to improve, we try to find a use case that's common across -users, since we want to build a generally useful system, not one that has many -one-off features for individual users. The range deletion pattern is common as -illustrated above, so from this perspective it's a good target for optimization. - -### Existing mechanisms: challenges and opportunities - -The most common pattern we see is scan-and-delete, i.e., advance an iterator -through the to-be-deleted range, and issue a `Delete` for each key. This is -slow (involves read I/O) so cannot be done in any critical path. Additionally, -it creates many tombstones, which slows down iterators and doesn't offer a deadline -for space reclamation. - -Another common pattern is using a custom compaction filter that drops keys in -the deleted range(s). This deletes the range asynchronously, so cannot be used -in cases where readers must not see keys in deleted ranges. Further, it has the -disadvantage of outputting tombstones to all but the bottom level. That's -because compaction cannot detect whether dropping a key would cause an older -version at a lower level to reappear. - -If space reclamation time is important, or it is important that the deleted -range not affect iterators, the user can trigger `CompactRange` on the deleted -range. This can involve arbitrarily long waits in the compaction queue, and -increases write-amp. By the time it's finished, however, the range is completely -gone from the LSM. - -`DeleteFilesInRange` can be used prior to compacting the deleted range as long -as snapshot readers do not need to access them. It drops files that are -completely contained in the deleted range. That saves write-amp because, in -`CompactRange`, the file data would have to be rewritten several times before it -reaches the bottom of the LSM, where tombstones can finally be dropped. - -In addition to the above approaches having various drawbacks, they are quite -complicated to reason about and implement. In an ideal world, deleting a range -of keys would be (1) simple, i.e., a single API call; (2) synchronous, i.e., -when the call finishes, the keys are guaranteed to be wiped from the DB; (3) low -latency so it can be used in critical paths; and (4) a first-class operation -with all the guarantees of any other write, like atomicity, crash-recovery, etc. - -## v1: Getting it to work - -### Where to persist them? - -The first place we thought about storing them is inline with the data blocks. -We could not think of a good way to do it, however, since the start of a range -tombstone covering a key could be anywhere, making binary search impossible. -So, we decided to investigate segregated storage. - -A second solution we considered is appending to the manifest. This file is -append-only, periodically compacted, and stores metadata like the level to which -each SST belongs. This is tempting because it leverages an existing file, which -is maintained in the background and fully read when the DB is opened. However, -it conceptually violates the manifest's purpose, which is to store metadata. It -also has no way to detect when a range tombstone no longer covers anything and -is droppable. Further, it'd be possible for keys above a range tombstone to disappear -when they have their seqnums zeroed upon compaction to the bottommost level. - -A third candidate is using a separate column family. This has similar problems -to the manifest approach. That is, we cannot easily detect when a range -tombstone is obsolete, and seqnum zeroing can cause a key -to go from above a range tombstone to below, i.e., disappearing. The upside is -we can reuse logic for memory buffering, consistent reads/writes, etc. - -The problems with the second and third solutions indicate a need for range -tombstones to be aware of flush/compaction. An easy way to achieve this is put -them in the SST files themselves - but not in the data blocks, as explained for -the first solution. So, we introduced a separate meta-block for range tombstones. -This resolved the problem of when to obsolete range tombstones, as it's simple: -when they're compacted to the bottom level. We also reused the LSM invariants -that newer versions of a key are always in a higher level to prevent the seqnum -zeroing problem. This approach has the side benefit of constraining the range -tombstones seen during reads to ones in a similar key-range. - -![](/static/images/delrange/delrange_sst_blocks.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -*When there are range tombstones in an SST, they are segregated in a separate meta-block* -{: style="text-align: center"} - -![](/static/images/delrange/delrange_key_schema.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -*Logical range tombstones (left) and their corresponding physical key-value representation (right)* -{: style="text-align: center"} - -### Write path - -`WriteBatch` stores range tombstones in its buffer which are logged to the WAL and -then applied to a dedicated range tombstone memtable during `Write`. Later in -the background the range tombstone memtable and its corresponding data memtable -are flushed together into a single SST with a range tombstone meta-block. SSTs -periodically undergo compaction which rewrites SSTs with point data and range -tombstones dropped or merged wherever possible. - -We chose to use a dedicated memtable for range tombstones. The memtable -representation is always skiplist in order to minimize overhead in the usual -case, which is the memtable contains zero or a small number of range tombstones. -The range tombstones are segregated to a separate memtable for the same reason -we segregated range tombstones in SSTs. That is, we did not know how to -interleave the range tombstone with point data in a way that we would be able to -find it for arbitrary keys that it covers. - -![](/static/images/delrange/delrange_write_path.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 70%"} - -*Lifetime of point keys and range tombstones in RocksDB* -{: style="text-align: center"} - -During flush and compaction, we chose to write out all non-obsolete range -tombstones unsorted. Sorting by a single dimension is easy to implement, but -doesn't bring asymptotic improvement to queries over range data. Ideally, we -want to store skylines (see “Read Path” subsection below) computed over our ranges so we can binary search. -However, a couple of concerns cause doing this in flush and compaction to feel -unsatisfactory: (1) we need to store multiple skylines, one for each snapshot, -which further complicates the range tombstone meta-block encoding; and (2) even -if we implement this, the range tombstone memtable still needs to be linearly -scanned. Given these concerns we decided to defer collapsing work to the read -side, hoping a good caching strategy could optimize this at some future point. - - -### Read path - -In point lookups, we aggregate range tombstones in an unordered vector as we -search through live memtable, immutable memtables, and then SSTs. When a key is -found that matches the lookup key, we do a scan through the vector, checking -whether the key is deleted. - -In iterators, we aggregate range tombstones into a skyline as we visit live -memtable, immutable memtables, and SSTs. The skyline is expensive to construct but fast to determine whether a key is covered. The skyline keeps track of the most recent range tombstone found to optimize `Next` and `Prev`. - -|![](/static/images/delrange/delrange_uncollapsed.png) |![](/static/images/delrange/delrange_collapsed.png) | - -*([Image source: Leetcode](https://leetcode.com/problems/the-skyline-problem/description/)) The skyline problem involves taking building location/height data in the -unsearchable form of A and converting it to the form of B, which is -binary-searchable. With overlapping range tombstones, to achieve efficient -searching we need to solve an analogous problem, where the x-axis is the -key-space and the y-axis is the sequence number.* -{: style="text-align: center"} - -### Performance characteristics - -For the v1 implementation, writes are much faster compared to the scan and -delete (optionally within a transaction) pattern. `DeleteRange` only logs to WAL -and applies to memtable. Logging to WAL always `fflush`es, and optionally -`fsync`s or `fdatasync`s. Applying to memtable is always an in-memory operation. -Since range tombstones have a dedicated skiplist memtable, the complexity of inserting is O(log(T)), where T is the number of existing buffered range tombstones. - -Reading in the presence of v1 range tombstones, however, is much slower than reads -in a database where scan-and-delete has happened, due to the linear scan over -range tombstone memtables/meta-blocks. - -Iterating in a database with v1 range tombstones is usually slower than in a -scan-and-delete database, although the gap lessens as iterations grow longer. -When an iterator is first created and seeked, we construct a skyline over its -tombstones. This operation is O(T\*log(T)) where T is the number of tombstones -found across live memtable, immutable memtable, L0 files, and one file from each -of the L1+ levels. However, moving the iterator forwards or backwards is simply -a constant-time operation (excluding edge cases, e.g., many range tombstones -between consecutive point keys). - -## v2: Making it fast - -`DeleteRange`’s negative impact on read perf is a barrier to its adoption. The -root cause is range tombstones are not stored or cached in a format that can be -efficiently searched. We needed to design DeleteRange so that we could maintain -write performance while making read performance competitive with workarounds -used in production (e.g., scan-and-delete). - -### Representations - -The key idea of the redesign is that, instead of globally collapsing range tombstones, - we can locally “fragment” them for each SST file and memtable to guarantee that: - -* no range tombstones overlap; and -* range tombstones are ordered by start key. - -Combined, these properties make range tombstones binary searchable. This - fragmentation will happen on the read path, but unlike the previous design, we can - easily cache many of these range tombstone fragments on the read path. - -### Write path - -The write path remains unchanged. - -### Read path - -When an SST file is opened, its range tombstones are fragmented and cached. For point - lookups, we binary search each file's fragmented range tombstones for one that covers - the lookup key. Unlike the old design, once we find a tombstone, we no longer need to - search for the key in lower levels, since we know that any keys on those levels will be - covered (though we do still check the current level since there may be keys written after - the range tombstone). - -For range scans, we create iterators over all the fragmented range - tombstones and store them in a list, seeking each one to cover the start key of the range - scan (if possible), and query each encountered key in this structure as in the old design, - advancing range tombstone iterators as necessary. In effect, we implicitly create a skyline. - This requires significantly less work on iterator creation, but since each memtable/SST has -its own range tombstone iterator, querying range tombstones requires key comparisons (and -possibly iterator increments) for several iterators (as opposed to v1, where we had a global -collapsed representation of all range tombstones). As a result, very long range scans may become - slower than before, but short range scans are an order of magnitude faster, which are the - more common class of range scan. - -## Benchmarks - -To understand the performance of this new design, we used `db_bench` to compare point lookup, short range scan, - and long range scan performance across: - -* the v1 DeleteRange design, -* the scan-and-delete workaround, and -* the v2 DeleteRange design. - -In these benchmarks, we used a database with 5 million data keys, and 10000 range tombstones (ignoring -those dropped during compaction) that were written in regular intervals after 4.5 million data keys were written. -Writing the range tombstones ensures that most of them are not compacted away, and we have more tombstones -in higher levels that cover keys in lower levels, which allows the benchmarks to exercise more interesting behavior -when reading deleted keys. - -Point lookup benchmarks read 100000 keys from a database using `readwhilewriting`. Range scan benchmarks used -`seekrandomwhilewriting` and seeked 100000 times, and advanced up to 10 keys away from the seek position for short range scans, and advanced up to 1000 keys away from the seek position for long range scans. - -The results are summarized in the tables below, averaged over 10 runs (note the -different SHAs for v1 benchmarks are due to a new `db_bench` flag that was added in order to compare performance with databases with no tombstones; for brevity, those results are not reported here). Also note that the block cache was large enough to hold the entire db, so the large throughput is due to limited I/Os and little time spent on decompression. The range tombstone blocks are always pinned uncompressed in memory. We believe these setup details should not affect relative performance between versions. - -### Point Lookups - -|Name |SHA |avg micros/op |avg ops/sec | -|v1 |35cd754a6 |1.3179 |759,830.90 | -|scan-del |7528130e3 |0.6036 |1,667,237.70 | -|v2 |7528130e3 |0.6128 |1,634,633.40 | - -### Short Range Scans - -|Name |SHA |avg micros/op |avg ops/sec | -|v1 |0ed738fdd |6.23 |176,562.00 | -|scan-del |PR 4677 |2.6844 |377,313.00 | -|v2 |PR 4677 |2.8226 |361,249.70 | - -### Long Range scans - -|Name |SHA |avg micros/op |avg ops/sec | -|v1 |0ed738fdd |52.7066 |19,074.00 | -|scan-del |PR 4677 |38.0325 |26,648.60 | -|v2 |PR 4677 |41.2882 |24,714.70 | - -## Future Work - -Note that memtable range tombstones are fragmented every read; for now this is acceptable, - since we expect there to be relatively few range tombstones in memtables (and users can - enforce this by keeping track of the number of memtable range deletions and manually flushing - after it passes a threshold). In the future, a specialized data structure can be used for storing - range tombstones in memory to avoid this work. - -Another future optimization is to create a new format version that requires range tombstones to - be stored in a fragmented form. This would save time when opening SST files, and when `max_open_files` -is not -1 (i.e., files may be opened several times). - -## Acknowledgements - -Special thanks to Peter Mattis and Nikhil Benesch from Cockroach Labs, who were early users of -DeleteRange v1 in production, contributed the cleanest/most efficient v1 aggregation implementation, found and fixed bugs, and provided initial DeleteRange v2 design and continued help. - -Thanks to Huachao Huang and Jinpeng Zhang from PingCAP for early DeleteRange v1 adoption, bug reports, and fixes. diff --git a/docs/_posts/2019-03-08-format-version-4.markdown b/docs/_posts/2019-03-08-format-version-4.markdown deleted file mode 100644 index ce657696c..000000000 --- a/docs/_posts/2019-03-08-format-version-4.markdown +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: format_version 4 -layout: post -author: maysamyabandeh -category: blog ---- - -The data blocks in RocksDB consist of a sequence of key/values pairs sorted by key, where the pairs are grouped into _restart intervals_ specified by `block_restart_interval`. Up to RocksDB version 5.14, where the latest and default value of `BlockBasedTableOptions::format_version` is 2, the format of index and data blocks are the same: index blocks use the same key format of <`user_key`,`seq`> and encode pointers to data blocks, <`offset`,`size`>, to a byte string and use them as values. The only difference is that the index blocks use `index_block_restart_interval` for the size of _restart intervals_. `format_version=`3,4 offer more optimized, backward-compatible, yet forward-incompatible format for index blocks. - -### Pros - -Using `format_version`=4 significantly reduces the index block size, in some cases around 4-5x. This frees more space in block cache, which would result in higher hit rate for data and filter blocks, or offer the same performance with a smaller block cache size. - -### Cons - -Being _forward-incompatible_ means that if you enable `format_version=`4 you cannot downgrade to a RocksDB version lower than 5.16. - -### How to use it? - -- `BlockBasedTableOptions::format_version` = 4 -- `BlockBasedTableOptions::index_block_restart_interval` = 16 - -### What is format_version 3? -(Since RocksDB 5.15) In most cases, the sequence number `seq` is not necessary for keys in the index blocks. In such cases, `format_version`=3 skips encoding the sequence number and sets `index_key_is_user_key` in TableProperties, which is used by the reader to know how to decode the index block. - -### What is format_version 4? -(Since RocksDB 5.16) Changes the format of index blocks by delta encoding the index values, which are the block handles. This saves the encoding of `BlockHandle::offset` of the non-head index entries in each restart interval. If used, `TableProperties::index_value_is_delta_encoded` is set, which is used by the reader to know how to decode the index block. The format of each key is (shared_size, non_shared_size, shared, non_shared). The format of each value, i.e., block handle, is (offset, size) whenever the shared_size is 0, which included the first entry in each restart point. Otherwise the format is delta-size = block handle size - size of last block handle. - -The index format in `format_version=4` would be as follows: - - restart_point 0: k, v (off, sz), k, v (delta-sz), ..., k, v (delta-sz) - restart_point 1: k, v (off, sz), k, v (delta-sz), ..., k, v (delta-sz) - ... - restart_point n-1: k, v (off, sz), k, v (delta-sz), ..., k, v (delta-sz) - where, k is key, v is value, and its encoding is in parenthesis. - diff --git a/docs/_posts/2019-08-15-unordered-write.markdown b/docs/_posts/2019-08-15-unordered-write.markdown deleted file mode 100644 index 5f0eb2880..000000000 --- a/docs/_posts/2019-08-15-unordered-write.markdown +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: Higher write throughput with `unordered_write` feature -layout: post -author: maysamyabandeh -category: blog ---- - -Since RocksDB 6.3, The `unordered_write=`true option together with WritePrepared transactions offers 34-42% higher write throughput compared to vanilla RocksDB. If the application can handle more relaxed ordering guarantees, the gain in throughput would increase to 63-131%. - -### Background - -Currently RocksDB API delivers the following powerful guarantees: -- Atomic reads: Either all of a write batch is visible to reads or none of it. -- Read-your-own writes: When a write thread returns to the user, a subsequent read by the same thread will be able to see its own writes. -- Immutable Snapshots: The reads visible to the snapshot are immutable in the sense that it will not be affected by any in-flight or future writes. - -### `unordered_write` - -The `unordered_write` feature, when turned on, relaxes the default guarantees of RocksDB. While it still gives read-your-own-write property, neither atomic reads nor the immutable snapshot properties are provided any longer. However, RocksDB users could still get read-your-own-write and immutable snapshots when using this feature in conjunction with TransactionDB configured with WritePrepared transactions and `two_write_queues`. You can read [here](https://github.com/facebook/rocksdb/wiki/unordered_write) to learn about the design of `unordered_write` and [here](https://github.com/facebook/rocksdb/wiki/WritePrepared-Transactions) to learn more about WritePrepared transactions. - -### How to use it? - -To get the same guarantees as vanilla RocksdB: - - DBOptions db_options; - db_options.unordered_write = true; - db_options.two_write_queues = true; - DB* db; - { - TransactionDBOptions txn_db_options; - txn_db_options.write_policy = TxnDBWritePolicy::WRITE_PREPARED; - txn_db_options.skip_concurrency_control = true; - TransactionDB* txn_db; - TransactionDB::Open(options, txn_db_options, kDBPath, &txn_db); - db = txn_db; - } - db->Write(...); - -To get relaxed guarantees: - - DBOptions db_options; - db_options.unordered_write = true; - DB* db; - DB::Open(db_options, kDBPath, &db); - db->Write(...); - -# Benchmarks - - TEST_TMPDIR=/dev/shm/ ~/db_bench --benchmarks=fillrandom --threads=32 --num=10000000 -max_write_buffer_number=16 --max_background_jobs=64 --batch_size=8 --writes=3000000 -level0_file_num_compaction_trigger=99999 --level0_slowdown_writes_trigger=99999 --level0_stop_writes_trigger=99999 -enable_pipelined_write=false -disable_auto_compactions --transaction_db=true --unordered_write=1 --disable_wal=0 - -Throughput with `unordered_write`=true and using WritePrepared transaction: -- WAL: +42% -- No-WAL: +34% -Throughput with `unordered_write`=true -- WAL: +63% -- NoWAL: +131% diff --git a/docs/_posts/2021-04-12-universal-improvements.markdown b/docs/_posts/2021-04-12-universal-improvements.markdown deleted file mode 100644 index fa4e9d463..000000000 --- a/docs/_posts/2021-04-12-universal-improvements.markdown +++ /dev/null @@ -1,46 +0,0 @@ ---- -title: (Call For Contribution) Make Universal Compaction More Incremental -layout: post -author: sdong -category: blog ---- - -### Motivation - -Universal Compaction is an important compaction style, but few changes were made after we made the structure multi-leveled. Yet the major restriction of always compacting full sorted run is not relaxed. Compared to Leveled Compaction, where we usually only compile several SST files together, in universal compaction, we frequently compact GBs of data. Two issues with this gap: 1. it makes it harder to unify universal and leveled compaction; 2. periodically data is fully compacted, and in the mean time space is doubled. To ease the problem, we can break the restriction and do similar as leveled compaction, and bring it closer to unified compaction. - -We call for help for making following improvements. - - -### How Universal Compaction Works - -In universal, whole levels are compacted together to satisfy two conditions (See [wiki page](https://github.com/facebook/rocksdb/wiki/Universal-Compaction) for more details): - -1. total size / bottommost level size > a threshold, or -2. total number of sorted runs (non-0 levels + L0 files) is within a threshold - -1 is to limit extra space overhead used for dead data and 2 is for read performance. - -If 1 is triggered, likely a full compaction will be triggered. If 2 is triggered, RocksDB compact some sorted runs to bring the number down. It does it by using a simple heuristic so that less writes needed for that purpose over time: it starts from compacting smaller files, but if total size to compact is similar to or larger than size of the next level, it will take that level together, as soon on (whether it is the best heuristic is another question and we’ve never seriously looked at it). - -### How We Can Improve? - -Let’s start from condition 1. Here we do full compaction but is not necessary. A simple optimization would be to compact so that just enough files are merged into the bottommost level (Lmax) to satisfy condition 1. It would work if we only need to pick some files from Lmax-1, or if it is cheaper over time, we can pick some files from other levels too. - -Then condition 2. If we finish condition 1, there might be holes in some ranges in older levels. These holes might make it possible that only by compacting some sub ranges, we can fix the LSM-tree for condition 2. RocksDB can take single files into consideration and apply more sophisticated heuristic. - -This new approach makes universal compaction closer to leveled compaction. The operation for 1 is closer to how Leveled compaction triggeres Lmax-1 to Lmax compaction. And 2 can potentially be implemented as something similar to level picking in Leveled Compaction. In fact, all those file picking can co-existing in one single compaction style and there isn’t fundamental conflicts to that. - -### Limitation - -There are two limitations: - -* Periodic automatic full compaction is unpleasant but at the same time is pleasant in another way. Some users might uses it to reason that everything is periodically collapsed so dead data is gone and old data is rewritten. We need to make sure periodic compaction works to continue with that. -* L0 to the first non-L0 level compaction is the first time data is partitioned in LSM-tree so that incremental compaction by range is possible. We might need to do more of these compactions in order to make incremental possible, which will increase compaction slightly. -* Compacting subset of a level would introduce some extra overhead for unaligned files, just as in leveled compaction. More SST boundary cutting heuristic can reduce this overhead but it will be there. - -But I believe the benefits would outweight the limitations. Reducing temporary space doubling and moving towards to unified compaction would be important achievements. - -### Interested in Help? - -Compaction is the core of LSM-tree, but its improvements are far overdue. If you are a user of universal compaction and would be able to benefit from those improvements, we will be happy to work with you on speeding up the project and bring them to RocksDB sooner. Feel free to communicate with us in [this issue](https://github.com/facebook/rocksdb/issues/8181). diff --git a/docs/_posts/2021-05-26-integrated-blob-db.markdown b/docs/_posts/2021-05-26-integrated-blob-db.markdown deleted file mode 100644 index 9f3a22fa2..000000000 --- a/docs/_posts/2021-05-26-integrated-blob-db.markdown +++ /dev/null @@ -1,101 +0,0 @@ ---- -title: Integrated BlobDB -layout: post -author: ltamasi -category: blog ---- -## Background - -BlobDB is essentially RocksDB for large-value use cases. The basic idea, which was proposed in the [WiscKey paper](https://www.usenix.org/system/files/conference/fast16/fast16-papers-lu.pdf), is key-value separation: by storing large values in dedicated blob files and storing only small pointers to them in the LSM tree, we avoid copying the values over and over again during compaction, thus reducing write amplification. Historically, BlobDB supported only FIFO and TTL based use cases that can tolerate some data loss. In addition, it was incompatible with many widely used RocksDB features, and required users to adopt a custom API. In 2020, we decided to rearchitect BlobDB from the ground up, taking the lessons learned from WiscKey and the original BlobDB but also drawing inspiration and incorporating ideas from other similar systems. Our goals were to eliminate the above limitations and to create a new integrated version that enables customers to use the well-known RocksDB API, has feature parity with the core of RocksDB, and offers better performance. This new implementation is now available and provides the following improvements over the original: - -* **API.** In contrast with the legacy BlobDB implementation, which had its own `StackableDB`-based interface (`rocksdb::blob_db::BlobDB`), the new version can be used via the well-known `rocksdb::DB` API, and can be configured simply by using a few column family options. -* **Consistency.** With the integrated BlobDB implementation, RocksDB’s consistency guarantees and various write options (like using the WAL or synchronous writes) now apply to blobs as well. Moreover, the new BlobDB keeps track of blob files in the RocksDB MANIFEST. -* **Write performance.** When using the old BlobDB, blobs are extracted and immediately written to blob files by the BlobDB layer *in the application thread*. This has multiple drawbacks from a performance perspective: first, it requires synchronization; second, it means that expensive operations like compression are performed in the application thread; and finally, it involves flushing the blob file after each blob. The new code takes a completely different approach by *offloading blob file building to RocksDB’s background jobs*, i.e. flushes and compactions. This means that similarly to SSTs, any given blob file is now written by a single background thread, eliminating the need for locking, flushing, or performing compression in the foreground. Note that this approach is also a better fit for network-based file systems where small writes might be expensive and opens up the possibility of file format optimizations that involve buffering (like dictionary compression). -* **Read performance.** The old code relies on each read (i.e. `Get`, `MultiGet`, or iterator) taking a snapshot and uses those snapshots when deciding which obsolete blob files can be removed. The new BlobDB improves this by generalizing RocksDB’s Version concept, which historically referred to the set of live SST files at a given point in time, to include the set of live blob files as well. This has performance benefits like [making the read path mostly lock-free by utilizing thread-local storage](https://rocksdb.org/blog/2014/06/27/avoid-expensive-locks-in-get.html). We have also introduced a blob file cache that can be utilized to keep frequently accessed blob files open. -* **Garbage collection.** Key-value separation means that if a key pointing to a blob gets overwritten or deleted, the blob becomes unreferenced garbage. To be able to reclaim this space, BlobDB now has garbage collection capabilities. GC is integrated into the compaction process and works by relocating valid blobs residing in old blob files as they are encountered during compaction. Blob files can be marked obsolete (and eventually deleted in one shot) once they contain nothing but garbage. This is more efficient than the method used by WiscKey, which involves performing a `Get` operation to find out whether a blob is still referenced followed by a `Put` to update the reference, which in turn results in garbage collection competing and potentially conflicting with the application’s writes. -* **Feature parity with the RocksDB core.** The new BlobDB supports way more features than the original and is near feature parity with vanilla RocksDB. In particular, we support all basic read/write APIs (with the exception of `Merge`, which is coming soon), recovery, compression, atomic flush, column families, compaction filters, checkpoints, backup/restore, transactions, per-file checksums, and the SST file manager. In addition, the new BlobDB’s options can be dynamically adjusted using the `SetOptions` interface. - -## API - -The new BlobDB can be configured (on a per-column family basis if needed) simply by using the following options: - -* `enable_blob_files`: set it to `true` to enable key-value separation. -* `min_blob_size`: values at or above this threshold will be written to blob files during flush or compaction. -* `blob_file_size`: the size limit for blob files. -* `blob_compression_type`: the compression type to use for blob files. All blobs in the same file are compressed using the same algorithm. -* `enable_blob_garbage_collection`: set this to `true` to make BlobDB actively relocate valid blobs from the oldest blob files as they are encountered during compaction. -* `blob_garbage_collection_age_cutoff`: the threshold that the GC logic uses to determine which blob files should be considered “old.” For example, the default value of 0.25 signals to RocksDB that blobs residing in the oldest 25% of blob files should be relocated by GC. This parameter can be tuned to adjust the trade-off between write amplification and space amplification. - -The above options are all dynamically adjustable via the `SetOptions` API; changing them will affect subsequent flushes and compactions but not ones that are already in progress. - -In terms of compaction styles, we recommend using leveled compaction with BlobDB. The rationale behind universal compaction in general is to provide lower write amplification at the expense of higher read amplification; however, as we will see later in the Performance section, BlobDB can provide very low write amp and good read performance with leveled compaction. Therefore, there is really no reason to take the hit in read performance that comes with universal compaction. - -In addition to the above, consider tuning the following non-BlobDB specific options: - -* `write_buffer_size`: this is the memtable size. You might want to increase it for large-value workloads to ensure that SST and blob files contain a decent number of keys. -* `target_file_size_base`: the target size of SST files. Note that even when using BlobDB, it is important to have an LSM tree with a “nice” shape and multiple levels and files per level to prevent heavy compactions. Since BlobDB extracts and writes large values to blob files, it makes sense to make this parameter significantly smaller than the memtable size. One guideline is to set `blob_file_size` to the same value as `write_buffer_size` (adjusted for compression if needed) and make `target_file_size_base` proportionally smaller based on the ratio of key size to value size. -* `max_bytes_for_level_base`: consider setting this to a multiple (e.g. 8x or 10x) of `target_file_size_base`. - -As mentioned above, the new BlobDB now also supports compaction filters. Key-value separation actually enables an optimization here: if the compaction filter of an application can make a decision about a key-value solely based on the key, it is unnecessary to read the value from the blob file. Applications can take advantage of this optimization by implementing the new `FilterBlobByKey` method of the `CompactionFilter` interface. This method gets called by RocksDB first whenever it encounters a key-value where the value is stored in a blob file. If this method returns a “final” decision like `kKeep`, `kRemove`, `kChangeValue`, or `kRemoveAndSkipUntil`, RocksDB will honor that decision; on the other hand, if the method returns `kUndetermined`, RocksDB will read the blob from the blob file and call `FilterV2` with the value in the usual fashion. - -## Performance - -We tested the performance of the new BlobDB for six different value sizes between 1 KB and 1 MB using a customized version of our [standard benchmark suite](https://github.com/facebook/rocksdb/wiki/Performance-Benchmarks) on a box with an 18-core Skylake DE CPU (running at 1.6 GHz, with hyperthreading enabled), 64 GB RAM, a 512 GB boot SSD, and two 1.88 TB M.2 SSDs in a RAID0 configuration for data. The RocksDB version used was equivalent to 6.18.1, with some benchmarking and statistics related enhancements. Leveled and universal compaction without key-value separation were used as reference points. Note that for simplicity, we use “leveled compaction” and “universal compaction” as shorthand for leveled and universal compaction without key-value separation, respectively, and “BlobDB” for BlobDB with leveled compaction. - -Our benchmarks cycled through six different workloads: two write-only ones (initial load and overwrite), two read/write ones (point lookup/write mix and range scan/write mix), and finally two read-only ones (point lookups and range scans). The first two phases performed a fixed amount of work (see below), while the final four were run for a fixed amount of time, namely 30 minutes each. Each phase other than the first one started with the database state left behind by the previous one. Here’s a brief description of the workloads: - -* **Initial load**: this workload has two distinct stages, a single-threaded random write stage during which compactions are disabled (so all data is flushed to L0, where it remains for the rest of the stage), followed by a full manual compaction. The random writes are performed with load-optimized settings, namely using the vector memtable implementation and with concurrent memtable writes and WAL disabled. This stage was used to populate the database with 1 TB worth of raw values, e.g. 2^30 (~1 billion) 1 KB values or 2^20 (~1 million) 1 MB values. -* **Overwrite**: this is a multi-threaded random write workload using the usual skiplist memtable, with compactions, WAL, and concurrent memtable writes enabled. In our tests, 16 writer threads were used. The total number of writes was set to the same number as in the initial load stage and split up evenly between the writer threads. For instance, for the 1 MB value size, we had 2^20 writes divided up between the 16 threads, resulting in each thread performing 2^16 write operations. At the end of this phase, a “wait for compactions” step was added to prevent this workload from exhibiting artificially low write amp or conversely, the next phase showing inflated write amp. -* **Point lookup/write mix**: a single writer thread performing random writes while N (in our case, 16) threads perform random point lookups. WAL is enabled and all writes are synced. -* **Range scan/write mix**: similar to the above, with one writer thread and N reader threads (where N was again set to 16 in our tests). The reader threads perform random range scans, with 10 `Next` calls per `Seek`. Again, WAL is enabled, and sync writes are used. -* **Point lookups (read-only)**: N=16 threads perform random point lookups. -* **Range scans (read-only)**: N=16 threads execute random range scans, with 10 `Next`s per `Seek` like above. - -With that out of the way, let’s see how the new BlobDB performs against traditional leveled and universal compaction. In the next few sections, we’ll be looking at write amplification as well as read and write performance. We’ll also briefly compare the write performance of the new BlobDB with the legacy implementation. - -### Write amplification - -Reducing write amp is the original motivation for key-value separation. Here, we follow RocksDB’s definition of write amplification (as used in compaction statistics and the info log). That is, we define write amp as the total amount of data written by flushes and compactions divided by the amount of data written by flushes, where “data written” includes SST files and blob files as well (if applicable). The following charts show that BlobDB significantly reduces write amplification for all of our (non-read only) workloads. - -For the initial load, where due to the nature of the workload both leveled and universal already have a low write amp factor of 1.6, BlobDB has a write amp close to the theoretical minimum of 1.0, namely in the 1.0..1.02 range, depending on value size. How is this possible? Well, the trick is that when key-value separation is used, the full compaction step only has to sort the keys but not the values. This results in a write amp that is about **36% lower** than the already low write amp you get with either leveled or universal. - -In the case of the overwrite workload, BlobDB had a write amp between 1.4 and 1.7 depending on value size. This is around **75-78% lower** than the write amp of leveled compaction (6.1 to 6.8) and **70-77% lower** than universal (5.7 to 6.2); for this workload, there wasn’t a huge difference between the performance of leveled and universal. - -When it comes to the point lookup/write mix workload, BlobDB had a write amp between 1.4 and 1.8. This is **83-88% lower** than the write amp of leveled compaction, which had values between 10.8 and 12.5. Universal fared much better than leveled under this workload, and had write amp in the 2.2..6.6 range; however, BlobDB still provided significant gains for all value sizes we tested: namely, write amp was **18-77% lower** than that of universal, depending on value size. - -As for the range scan/write mix workload, BlobDB again had a write amp between 1.4 and 1.8, while leveled had values between 13.6 and 14.9, and universal was between 2.8 and 5.0. In other words, BlobDB’s write amp was **88-90% lower** than that of leveled, and **46-70% lower** than that of universal. - -![Write amplification](/static/images/integrated-blob-db/BlobDB_Benchmarks_Write_Amp.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -### Write performance - -In terms of write performance, there are other factors to consider besides write amplification. The following charts show some interesting metrics for the two write-only workloads (initial load and overwrite). As discussed earlier, these two workloads perform a fixed amount of work; the two charts in the top row show how long it took BlobDB, leveled, and universal to complete that work. Note that each bar is broken down into two, corresponding to the two stages of each workload (random write and full compaction for initial load, and random write and waiting for compactions for overwrite). - -For initial load, note that the random write stage takes the same amount of time regardless of which algorithm is used. This is not surprising considering the fact that compactions are disabled during this stage and thus RocksDB is simply writing L0 files (and in BlobDB’s case, blob files) as fast as it can. The second stage, on the other hand, is very different: as mentioned above, BlobDB essentially only needs to read, sort, and rewrite the keys during compaction, which can be done much much faster (with 1 MB values, more than a hundred times faster) than doing the same for large key-values. Due to this, initial load completed **2.3x to 4.7x faster** overall when using BlobDB. - -As for the overwrite workload, BlobDB performs much better during both stages. The two charts in the bottom row help explain why. In the case of both leveled and universal compaction, compactions can’t keep up with the write rate, which eventually leads to back pressure in the form of write stalls. As shown in the chart below, both leveled and universal stall between ~40% and ~70% of the time; on the other hand, BlobDB is stall-free except for the largest value size tested (1 MB). This naturally leads to higher throughput, namely **2.1x to 3.5x higher** throughput compared to leveled, and **1.6x to 3.0x higher** throughput compared to universal. The overwrite time chart also shows that the catch-up stage that waits for all compactions to finish is much shorter (and in fact, at larger value sizes, negligible) with BlobDB. - -![Write performance](/static/images/integrated-blob-db/BlobDB_Benchmarks_Write_Perf.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -### Read/write and read-only performance - -The charts below show the read performance (in terms of operations per second) of BlobDB versus leveled and universal compaction under the two read/write workloads and the two read-only workloads. BlobDB meets or exceeds the read performance of leveled compaction, except for workloads involving range scans at the two smallest value sizes tested (1 KB and 4 KB). It also provides better (in some cases, much better) read performance than universal across the board. In particular, BlobDB provides up **1.4x higher** read performance than leveled (for larger values), and up to **5.6x higher** than universal. - -![Read-write and read-only performance](/static/images/integrated-blob-db/BlobDB_Benchmarks_RW_RO_Perf.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -### Comparing the two BlobDB implementations - -To compare the write performance of the new BlobDB with the legacy implementation, we ran two versions of the first (single-threaded random write) stage of the initial load benchmark using 1 KB values: one with WAL disabled, and one with WAL enabled. The new implementation completed the load **4.6x faster** than the old one without WAL, and **2.3x faster** with WAL. - -![Comparing the two BlobDB implementations](/static/images/integrated-blob-db/BlobDB_Benchmarks_Legacy_Vs_Integrated.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -## Future work - -There are a few remaining features that are not yet supported by the new BlobDB. The most important one is `Merge` (and the related `GetMergeOperands` API); in addition, we don’t currently support the `EventListener` interface, the `GetLiveFilesMetaData` and `GetColumnFamilyMetaData` APIs, secondary instances, and ingestion of blob files. We will continue to work on closing this gap. - -We also have further plans when it comes to performance. These include optimizing garbage collection, introducing a dedicated cache for blobs, improving iterator and `MultiGet` performance, and evolving the blob file format amongst others. - diff --git a/docs/_posts/2021-05-26-online-validation.markdown b/docs/_posts/2021-05-26-online-validation.markdown deleted file mode 100644 index 33e9dfc15..000000000 --- a/docs/_posts/2021-05-26-online-validation.markdown +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Online Validation -layout: post -author: sdong -category: blog ---- -To prevent or mitigate data corrution in RocksDB when some software or hardware issues happens, we keep adding online consistency checks and improving existing ones. - -We improved ColumnFamilyOptions::force_consistency_checks and enabled it by default. The option does some basic consistency checks to LSM-tree, e.g., files in one level are not overlapping. The DB will be frozen from new writes if a violation is detected. Previously, the feature’s check was too limited and didn’t always freeze the DB in a timely manner. Last year, we made the checking stricter so that it can [catch much more corrupted LSM-tree structures](https://github.com/facebook/rocksdb/pull/6901). We also fixed several issues where the checking failure was swallowed without freezing the DB. After making force_consistency_checks more reliable, we changed the default value to be on. - -ColumnFamilyOptions::paranoid_file_checks does some more expensive extra checking when generating a new SST file. Last year, we advanced coverage to this feature: after every SST file is generated, the SST file is created, read back keys one by one and check two things: (1) the keys are in comparator order (also available and enabled by default during file write via ColumnFamilyOptions::check_flush_compaction_key_order); (2) the hash of all the KVs is the same as calculated when we add KVs into it. These checks detect certain corruptions so we can prevent the corrupt files from being applied to the DB. We suggest users turn it on at least in shadow environments, and consider to run it in production too if you can afford the overheads. - -A recent feature is added to check the count of entries added into memtable while flushing it into an SST file. This feature is to have some online coverage to memtable corruption, caused by either software bug or hardware issue. This feature will be released in the coming release (6.21) and by default on. In the future, we will check more counters during memtables, e.g. number of puts or number of deletes. - -We also improved the reporting of online validation errors to improve debuggability. For example, failure to parse a corrupt key now reports details about the corrupt key. Since we did not want to expose key data in logs, error messages, etc., by default, this reporting is opt-in via DBOptions::allow_data_in_errors. - -More online checking features are planned and some are more sophisticated, including key/value checksums and sample based query validation. diff --git a/docs/_posts/2021-05-27-rocksdb-secondary-cache.markdown b/docs/_posts/2021-05-27-rocksdb-secondary-cache.markdown deleted file mode 100644 index 3ad1141bf..000000000 --- a/docs/_posts/2021-05-27-rocksdb-secondary-cache.markdown +++ /dev/null @@ -1,195 +0,0 @@ ---- -title: RocksDB Secondary Cache -layout: post -author: anand1976 -category: blog ---- -## Introduction - -The RocksDB team is implementing support for a block cache on non-volatile media, such as a local flash device or NVM/SCM. It can be viewed as an extension of RocksDB’s current volatile block cache (LRUCache or ClockCache). The non-volatile block cache acts as a second tier cache that contains blocks evicted from the volatile cache. Those blocks are then promoted to the volatile cache as they become hotter due to access. - -This feature is meant for cases where the DB is located on remote storage or cloud storage. The non-volatile cache is officially referred to in RocksDB as the SecondaryCache. By maintaining a SecondaryCache that’s an order of magnitude larger than DRAM, fewer reads would be required from remote storage, thus reducing read latency as well as network bandwidth consumption. - -From the user point of view, the local flash cache will support the following requirements - - -1. Provide a pointer to a secondary cache when opening a DB -2. Be able to share the secondary cache across DBs in the same process -3. Have multiple secondary caches on a host -4. Support persisting the cache across process restarts and reboots by ensuring repeatability of the cache key - -![Architecture](/static/images/rocksdb-secondary-cache/arch_diagram.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -## Design - -When designing the API for a SecondaryCache, we had a choice between making it visible to the RocksDB code (table reader) or hiding it behind the RocksDB block cache. There are several advantages of hiding it behind the block cache - - -* Allows flexibility in insertion of blocks into the secondary cache. A block can be inserted on eviction from the RAM tier, or it could be eagerly inserted. -* It makes the rest of the RocksDB code less complex by providing a uniform interface regardless of whether a secondary cache is configured or not -* Makes parallel reads, peeking in the cache for prefetching, failure handling etc. easier -* Makes it easier to extend to compressed data if needed, and allows other persistent media, such as PM, to be added as an additional tier - - -We decided to make the secondary cache transparent to the rest of RocksDB code by hiding it behind the block cache. A key issue that we needed to address was the allocation and ownership of memory of the cached items - insertion into the secondary cache may require that memory be allocated by the same. This means that parts of the cached object that can be transferred to the secondary cache needs to be copied out (referred to as **unpacking**), and on a lookup the data stored in the secondary cache needs to be provided to the object constructor (referred to as **packing**). For RocksDB cached objects such as data blocks, index and filter blocks, and compression dictionaries, unpacking involves copying out the raw uncompressed BlockContents of the block, and packing involves constructing the corresponding block/index/filter/dictionary object using the raw uncompressed data. - -Another alternative we considered was the existing PersistentCache interface. However, we decided to not pursue it and eventually deprecate it for the following reasons - -* It is exposed directly to the table reader code, which makes it more difficult to implement different policies such as inclusive/exclusive cache, as well as extending it to more sophisticated admission control policies -* The interface does not allow for custom memory allocation and object packing/unpacking, so new APIs would have to be defined anyway -* The current PersistentCache implementation is very simple and does not have any admission control policies - -## API - -The interface between RocksDB’s block cache and the secondary cache is designed to allow pluggable implementations. For FB internal usage, we plan to use Cachelib with a wrapper to provide the plug-in implementation and use folly and other fbcode libraries, which cannot be used directly by RocksDB, to efficiently implement the cache operations. The following diagrams show the flow of insertion and lookup of a block. - -![Insert flow](/static/images/rocksdb-secondary-cache/insert_flow.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -![Lookup flow](/static/images/rocksdb-secondary-cache/lookup_flow.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -An item in the secondary cache is referenced by a SecondaryCacheHandle. The handle may not be immediately ready or have a valid value. The caller can call IsReady() to determine if its ready, and can call Wait() in order to block until it becomes ready. The caller must call Value() after it becomes ready to determine if the item was successfully read. Value() must return nullptr on failure. - -``` -class SecondaryCacheHandle { - public: - virtual ~SecondaryCacheHandle() {} - - // Returns whether the handle is ready or not - virtual bool IsReady() = 0; - - // Block until handle becomes ready - virtual void Wait() = 0; - - // Return the value. If nullptr, it means the lookup was unsuccessful - virtual void* Value() = 0; - - // Return the size of value - virtual size_t Size() = 0; -}; -``` - -The user of the secondary cache (for example, BlockBasedTableReader indirectly through LRUCache) must implement the callbacks defined in CacheItemHelper, in order to facilitate the unpacking/packing of objects for saving to and restoring from the secondary cache. The CreateCallback must be implemented to construct a cacheable object from the raw data in secondary cache. - -``` - // The SizeCallback takes a void* pointer to the object and returns the size - // of the persistable data. It can be used by the secondary cache to allocate - // memory if needed. - using SizeCallback = size_t (*)(void* obj); - - // The SaveToCallback takes a void* object pointer and saves the persistable - // data into a buffer. The secondary cache may decide to not store it in a - // contiguous buffer, in which case this callback will be called multiple - // times with increasing offset - using SaveToCallback = Status (*)(void* from_obj, size_t from_offset, - size_t length, void* out); - - // A function pointer type for custom destruction of an entry's - // value. The Cache is responsible for copying and reclaiming space - // for the key, but values are managed by the caller. - using DeleterFn = void (*)(const Slice& key, void* value); - - // A struct with pointers to helper functions for spilling items from the - // cache into the secondary cache. May be extended in the future. An - // instance of this struct is expected to outlive the cache. - struct CacheItemHelper { - SizeCallback size_cb; - SaveToCallback saveto_cb; - DeleterFn del_cb; - - CacheItemHelper() : size_cb(nullptr), saveto_cb(nullptr), del_cb(nullptr) {} - CacheItemHelper(SizeCallback _size_cb, SaveToCallback _saveto_cb, - DeleterFn _del_cb) - : size_cb(_size_cb), saveto_cb(_saveto_cb), del_cb(_del_cb) {} - }; - - // The CreateCallback is passed by the block cache user to Lookup(). It - // takes in a buffer from the NVM cache and constructs an object using - // it. The callback doesn't have ownership of the buffer and should - // copy the contents into its own buffer. - // typedef std::function - // CreateCallback; - using CreateCallback = std::function; -``` - -The secondary cache provider must provide a concrete implementation of the SecondaryCache abstract class. - -``` -// SecondaryCache -// -// Cache interface for caching blocks on a secondary tier (which can include -// non-volatile media, or alternate forms of caching such as compressed data) -class SecondaryCache { - public: - virtual ~SecondaryCache() {} - - virtual std::string Name() = 0; - - static const std::string Type() { return "SecondaryCache"; } - - // Insert the given value into this cache. The value is not written - // directly. Rather, the SaveToCallback provided by helper_cb will be - // used to extract the persistable data in value, which will be written - // to this tier. The implementation may or may not write it to cache - // depending on the admission control policy, even if the return status is - // success. - virtual Status Insert(const Slice& key, void* value, - const Cache::CacheItemHelper* helper) = 0; - - // Lookup the data for the given key in this cache. The create_cb - // will be used to create the object. The handle returned may not be - // ready yet, unless wait=true, in which case Lookup() will block until - // the handle is ready - virtual std::unique_ptr Lookup( - const Slice& key, const Cache::CreateCallback& create_cb, bool wait) = 0; - - // At the discretion of the implementation, erase the data associated - // with key - virtual void Erase(const Slice& key) = 0; - - // Wait for a collection of handles to become ready. This would be used - // by MultiGet, for example, to read multitple data blocks in parallel - virtual void WaitAll(std::vector handles) = 0; - - virtual std::string GetPrintableOptions() const = 0; -}; -``` - -A SecondaryCache is configured by the user by providing a pointer to it in LRUCacheOptions - -``` -struct LRUCacheOptions { - ... - // A SecondaryCache instance to use as an additional cache tier - std::shared_ptr secondary_cache; - ... -}; -``` - -## Current Status - -The initial RocksDB support for the secondary cache has been merged into the main branch, and will be available in the 6.21 release. This includes providing a way for the user to configure a secondary cache when instantiating RocksDB’s LRU cache (volatile block cache), spilling blocks evicted from the LRU cache to the flash cache, promoting a block read from the SecondaryCache to the LRU cache, update tools such as cache_bench and db_bench to specify a flash cache. The relevant PRs are [#8271](https://github.com/facebook/rocksdb/pull/8271), [#8191](https://github.com/facebook/rocksdb/pull/8191), and [#8312](https://github.com/facebook/rocksdb/pull/8312). - -We prototyped an end-to-end solution, with the above PRs as well as a Cachelib based implementation of the SecondaryCache. We ran a mixgraph benchmark to simulate a realistic read/write workload. The results showed a 15% gain with the local flash cache over no local cache, and a ~25-30% reduction in network reads with a corresponding decrease in cache misses. - -![Throughput](/static/images/rocksdb-secondary-cache/Mixgraph_throughput.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -![Hit Rate](/static/images/rocksdb-secondary-cache/Mixgraph_hit_rate.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -## Future Work - -In the short term, we plan to do the following in order to fully integrate the SecondaryCache with RocksDB - - -1. Use DB session ID as the cache key prefix to ensure uniqueness and repeatability -2. Optimize flash cache usage of MultiGet and iterator workloads -3. Stress testing -4. More benchmarking - -Longer term, we plan to deploy this in production at Facebook. - -## Call to Action - -We are hoping for a community contribution of a secondary cache implementation, which would make this feature usable by the broader RocksDB userbase. If you are interested in contributing, please reach out to us in [this issue](https://github.com/facebook/rocksdb/issues/8347). - diff --git a/docs/_posts/2021-05-31-dictionary-compression.markdown b/docs/_posts/2021-05-31-dictionary-compression.markdown deleted file mode 100644 index 9b0f45293..000000000 --- a/docs/_posts/2021-05-31-dictionary-compression.markdown +++ /dev/null @@ -1,157 +0,0 @@ ---- -title: Preset Dictionary Compression -layout: post -author: ajkr -category: blog ---- - -## Summary - -Compression algorithms relying on an adaptive dictionary, such as LZ4, zstd, and zlib, struggle to achieve good compression ratios on small inputs when using the basic compress API. -With the basic compress API, the compressor starts with an empty dictionary. -With small inputs, not much content gets added to the dictionary during the compression. -Combined, these factors suggest the dictionary will never have enough contents to achieve great compression ratios. - -RocksDB groups key-value pairs into data blocks before storing them in files. -For use cases that are heavy on random accesses, smaller data block size is sometimes desirable for reducing I/O and CPU spent reading blocks. -However, as explained above, smaller data block size comes with the downside of worse compression ratio when using the basic compress API. - -Fortunately, zstd and other libraries offer advanced compress APIs that preset the dictionary. -A preset dictionary makes it possible for the compressor to start from a useful state instead of from an empty one, making compression immediately effective. - -RocksDB now optionally takes advantage of these dictionary presetting APIs. -The challenges in integrating this feature into the storage engine were more substantial than apparent on the surface. -First, we need to target a preset dictionary to the relevant data. -Second, preset dictionaries need to be trained from data samples, which need to be gathered. -Third, preset dictionaries need to be persisted since they are needed at decompression time. -Fourth, overhead in accessing the preset dictionary must be minimized to prevent regression in critical code paths. -Fifth, we need easy-to-use measurement to evaluate candidate use cases and production impact. - -In production, we have deployed dictionary presetting to save space in multiple RocksDB use cases with data block size 8KB or smaller. -We have measured meaningful benefit to compression ratio in use cases with data block size up to 16KB. -We have also measured a use case that can save both CPU and space by reducing data block size and turning on dictionary presetting at the same time. - -## Feature design -#### Targeting - -Over time we have considered a few possibilities for the scope of a dictionary. - -- Subcompaction -- SST file -- Column family - -The original choice was subcompaction scope. -This enabled an approach with minimal buffering overhead because we could collect samples while generating the first output SST file. -The dictionary could then be trained and applied to subsequent SST files in the same subcompaction. - -However, we found a large use case where the proximity of data in the keyspace was more correlated with its similarity than we had predicted. -In particular, the approach of training a dictionary on an adjacent file yielded substantially worse ratios than training the dictionary on the same file it would be used to compress. -In response to this finding, we changed the preset dictionary scope to per SST file. - -With this change in approach, we had to face the problem we had hoped to avoid: how can we compress all of an SST file's data blocks with the same preset dictionary while that dictionary can only be trained after many data blocks have been sampled? -The solutions we considered both involved a new overhead. -We could read the input more than once and introduce I/O overhead, or we could buffer the uncompressed output file data blocks until a dictionary is trained, introducing memory overhead. -We chose to take the hit on memory overhead. - -Another approach that we considered was associating multiple dictionaries with a column family. -For example, in MyRocks there could be a dictionary trained on data from each large table. -When compressing a data block, we would look at the table to which its data belongs and pick the corresponding dictionary. -However, this approach would introduce many challenges. -RocksDB would need to be aware of the key schema to know where are the table boundaries. -RocksDB would also need to periodically update the dictionaries to account for changes in data pattern. -It would need somewhere to store dictionaries at column family scope. -Overall, we thought these challenges were too difficult to pursue the approach. - -#### Training - -![](/static/images/dictcmp/dictcmp_raw_sampled.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} -

-Raw samples mode (`zstd_max_train_bytes == 0`) -

- -As mentioned earlier, the approach we took is to build the dictionary from buffered uncompressed data blocks. -The first row of data blocks in these diagrams illustrate this buffering. -The second row illustrates training samples selected from the buffered blocks. -In raw samples mode (above), the final dictionary is simply the concatenation of these samples. -Whereas, in zstd training mode (below), these samples will be passed to the trainer to produce the final dictionary. - -![](/static/images/dictcmp/dictcmp_zstd_trained.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} -

-zstd training mode (`zstd_max_train_bytes > 0`) -

- -#### Compression path - -Once the preset dictionary is generated by the above process, we apply it to the buffered data blocks and write them to the output file. -Thereafter, newly generated data blocks are immediately compressed and written out. - -One optimization here is available to zstd v0.7.0+ users. -Instead of deserializing the dictionary on each compress invocation, we can do that work once and reuse it. -A `ZSTD_CDict` holds this digested dictionary state and is passed to the compress API. - -#### Persistence - -When an SST file's data blocks are compressed using a preset dictionary, that dictionary is stored inside the file for later use in decompression. - -![](/static/images/dictcmp/dictcmp_sst_blocks.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} -

-SST file layout with the preset dictionary in its own (uncompressed) block -

- -#### Decompression path - -To decompress, we need to provide both the data block and the dictionary used to compress it. -Since dictionaries are just blocks in a file, we access them through block cache. -However this additional load on block cache can be problematic. -It can be alleviated by pinning the dictionaries to avoid going through the LRU locks. - -An optimization analogous to the digested dictionary exists for certain zstd users (see User API section for details). -When enabled, the block cache stores the digested dictionary state for decompression (`ZSTD_DDict`) instead of the block contents. -In some cases we have seen decompression CPU decrease overall when enabling dictionary thanks to this optimization. - -#### Measurement - -Typically our first step in evaluating a candidate use case is an offline analysis of the data. -This gives us a quick idea whether presetting dictionary will be beneficial without any code, config, or data changes. -Our `sst_dump` tool reports what size SST files would have been using specified compression libraries and options. -We can select random SST files and compare the size with vs. without dictionary. - -When that goes well, the next step is to see how it works in a live DB, like a production shadow or canary. -There we can observe how it affects application/system metrics. - -Even after dictionary is enabled, there is the question of how much space was finally saved. -We provide a way to A/B test size with vs. without dictionary while running in production. -This feature picks a sample of data blocks to compress in multiple ways -- one of the outputs is stored, while the other outputs are thrown away after counting their size. -Due to API limitations, the stored output always has to be the dictionary-compressed one, so this feature can only be used after enabling dictionary. -The size with and without dictionary are stored in the SST file as table properties. -These properties can be aggregated across all SST files in a DB (and across all DBs in a tier) to learn the final space saving. - -## User API - -RocksDB allows presetting compression dictionary for users of LZ4, zstd, and zlib. -The most advanced capabilities are available to zstd v1.1.4+ users who statically link (see below). -Newer versions of zstd (v1.3.6+) have internal changes to the dictionary trainer and digested dictionary management, which significantly improve memory and CPU efficiency. - -Run-time settings: - -- `CompressionOptions::max_dict_bytes`: Limit on per-SST file dictionary size. Increasing this causes dictionaries to consume more space and memory for the possibility of better data block compression. A typical value we use is 16KB. -- (**zstd only**) `CompressionOptions::zstd_max_train_bytes`: Limit on training data passed to zstd dictionary trainer. Larger values cause the training to consume more CPU (and take longer) while generating more effective dictionaries. The starting point guidance we received from zstd team is to set it to 100x `CompressionOptions::max_dict_bytes`. -- `CompressionOptions::max_dict_buffer_bytes`: Limit on data buffering from which training samples are gathered. By default we buffer up to the target file size per ongoing background job. If this amount of memory is concerning, this option can constrain the buffering with the downside that training samples will cover a smaller portion of the SST file. Work is ongoing to charge this memory usage to block cache so it will not need to be accounted for separately. -- `BlockBasedTableOptions::cache_index_and_filter_blocks`: Controls whether metadata blocks including dictionary are accessed through block cache or held in table reader memory (yes, its name is outdated). -- `BlockBasedTableOptions::metadata_cache_options`: Controls what metadata blocks are pinned in block cache. Pinning avoids LRU contention at the risk of cold blocks holding memory. -- `ColumnFamilyOptions::sample_for_compression`: Controls frequency of measuring extra compressions on data blocks using various libraries with default settings (i.e., without preset dictionary). - -Compile-time setting: - -- (**zstd only**) `EXTRA_CXXFLAGS=-DZSTD_STATIC_LINKING_ONLY`: Hold digested dictionaries in block cache to save repetitive deserialization overhead. This saves a lot of CPU for read-heavy workloads. This compiler flag is necessary because one of the digested dictionary APIs we use is marked as experimental. We still use it in production, however. - -Function: - -- `DB::GetPropertiesOfAllTables()`: The properties `kSlowCompressionEstimatedDataSize` and `kFastCompressionEstimatedDataSize` estimate what the data block size (`kDataSize`) would have been if the corresponding compression library had been used. These properties are only present when `ColumnFamilyOptions::sample_for_compression` causes one or more samples to be measured, and they become more accurate with higher sampling frequency. - -Tool: - -- `sst_dump --command=recompress`: Offline analysis tool that reports what the SST file size would have been using the specified compression library and options. diff --git a/docs/_posts/2021-12-29-ribbon-filter.markdown b/docs/_posts/2021-12-29-ribbon-filter.markdown deleted file mode 100644 index c6a52ce84..000000000 --- a/docs/_posts/2021-12-29-ribbon-filter.markdown +++ /dev/null @@ -1,281 +0,0 @@ ---- -title: Ribbon Filter -layout: post -author: pdillinger -category: blog ---- - -## Summary -Since version 6.15 last year, RocksDB supports Ribbon filters, a new -alternative to Bloom filters that save space, especially memory, at -the cost of more CPU usage, mostly in constructing the filters in the -background. Most applications with long-lived data (many hours or -longer) will likely benefit from adopting a Ribbon+Bloom hybrid filter -policy. Here we explain why and how. - -[Ribbon filter on RocksDB wiki](https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#ribbon-filter) - -[Ribbon filter paper](https://arxiv.org/abs/2103.02515) - -## Problem & background -Bloom filters play a critical role in optimizing point queries and -some range queries in LSM-tree storage systems like RocksDB. Very -large DBs can use 10% or more of their RAM memory for (Bloom) filters, -so that (average case) read performance can be very good despite high -(worst case) read amplification, [which is useful for lowering write -and/or space -amplification](http://smalldatum.blogspot.com/2015/11/read-write-space-amplification-pick-2_23.html). -Although the `format_version=5` Bloom filter in RocksDB is extremely -fast, all Bloom filters use around 50% more space than is -theoretically possible for a hashed structure configured for the same -false positive (FP) rate and number of keys added. What would it take -to save that significant share of “wasted” filter memory, and when -does it make sense to use such a Bloom alternative? - -A number of alternatives to Bloom filters were known, especially for -static filters (not modified after construction), but all the -previously known structures were unsatisfying for SSTs because of some -combination of -* Not enough space savings for CPU increase. For example, [Xor - filters](https://arxiv.org/abs/1912.08258) use 3-4x more CPU than - Bloom but only save 15-20% of - space. [GOV](https://arxiv.org/pdf/1603.04330.pdf) can save around - 30% space but requires around 10x more CPU than Bloom. -* Inconsistent space savings. [Cuckoo - filters](https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) - and Xor+ filters offer significant space savings for very low FP - rates (high bits per key) but little or no savings for higher FP - rates (low bits per key). ([Higher FP rates are considered best for - largest levels of - LSM.](https://stratos.seas.harvard.edu/files/stratos/files/monkeykeyvaluestore.pdf)) - [Spatially-coupled Xor - filters](https://arxiv.org/pdf/2001.10500.pdf) require very large - number of keys per filter for large space savings. -* Inflexible configuration. No published alternatives offered the same - continuous configurability of Bloom filters, where any FP rate and - any fractional bits per key could be chosen. This flexibility - improves memory efficiency with the `optimize_filters_for_memory` - option that minimizes internal fragmentation on filters. - -## Ribbon filter development and implementation -The Ribbon filter came about when I developed a faster, simpler, and -more adaptable algorithm for constructing a little-known [Xor-based -structure from Dietzfelbinger and -Walzer](https://arxiv.org/pdf/1907.04750.pdf). It has very good space -usage for required CPU time (~30% space savings for 3-4x CPU) and, -with some engineering, Bloom-like configurability. The complications -were managable for use in RocksDB: -* Ribbon space efficiency does not naturally scale to very large - number of keys in a single filter (whole SST file or partition), but - with the current 128-bit Ribbon implementation in RocksDB, even 100 - million keys in one filter saves 27% space vs. Bloom rather than 30% - for 100,000 keys in a filter. -* More temporary memory is required during construction, ~230 bits per - key for 128-bit Ribbon vs. ~75 bits per key for Bloom filter. A - quick calculation shows that if you are saving 3 bits per key on the - generated filter, you only need about 50 generated filters in memory - to offset this temporary memory usage. (Thousands of filters in - memory is typical.) Starting in RocksDB version 6.27, this temporary - memory can be accounted for under block cache using - `BlockBasedTableOptions::reserve_table_builder_memory`. -* Ribbon filter queries use relatively more CPU for lower FP rates - (but still O(1) relative to number of keys added to filter). This - should be OK because lower FP rates are only appropriate when then - cost of a false positive is very high (worth extra query time) or - memory is not so constrained (can use Bloom instead). - -Future: data in [the paper](https://arxiv.org/abs/2103.02515) suggests -that 32-bit Balanced Ribbon (new name: [Bump-Once -Ribbon](https://arxiv.org/pdf/2109.01892.pdf)) would improve all of -these issues and be better all around (except for code complexity). - -## Ribbon vs. Bloom in RocksDB configuration -Different applications and hardware configurations have different -constraints, but we can use hardware costs to examine and better -understand the trade-off between Bloom and Ribbon. - -### Same FP rate, RAM vs. CPU hardware cost -Under ideal conditions where we can adjust our hardware to suit the -application, in terms of dollars, how much does it cost to construct, -query, and keep in memory a Bloom filter vs. a Ribbon filter? The -Ribbon filter costs more for CPU but less for RAM. Importantly, the -RAM cost directly depends on how long the filter is kept in memory, -which in RocksDB is essentially the lifetime of the filter. -(Temporary RAM during construction is so short-lived that it is -ignored.) Using some consumer hardware and electricity prices and a -predicted balance between construction and queries, we can compute a -“break even” duration in memory. To minimize cost, filters with a -lifetime shorter than this should be Bloom and filters with a lifetime -longer than this should be Ribbon. (Python code) - -``` -# Commodity prices based roughly on consumer prices and rough guesses -# Upfront cost of a CPU per hardware thread -upfront_dollars_per_cpu_thread = 30.0 - -# CPU average power usage per hardware thread -watts_per_cpu_thread = 3.5 - -# Upfront cost of a GB of RAM -upfront_dollars_per_gb_ram = 8.0 - -# RAM average power usage per GB -# https://www.crucial.com/support/articles-faq-memory/how-much-power-does-memory-use -watts_per_gb_ram = 0.375 - -# Estimated price of power per kilowatt-hour, including overheads like conversion losses and cooling -dollars_per_kwh = 0.35 - -# Assume 3 year hardware lifetime -hours_per_lifetime = 3 * 365 * 24 -seconds_per_lifetime = hours_per_lifetime * 60 * 60 - -# Number of filter queries per key added in filter construction is heavily dependent on workload. -# When replication is in layer above RocksDB, it will be low, likely < 1. When replication is in -# storage layer below RocksDB, it will likely be > 1. Using a rough and general guesstimate. -key_query_per_construct = 1.0 - -#================================== -# Bloom & Ribbon filter performance -typical_bloom_bits_per_key = 10.0 -typical_ribbon_bits_per_key = 7.0 - -# Speeds here are sensitive to many variables, especially query speed because it -# is so dependent on memory latency. Using this benchmark here: -# for IMPL in 2 3; do -# ./filter_bench -impl=$IMPL -quick -m_keys_total_max=200 -use_full_block_reader -# done -# and "Random filter" queries. -nanoseconds_per_construct_bloom_key = 32.0 -nanoseconds_per_construct_ribbon_key = 140.0 - -nanoseconds_per_query_bloom_key = 500.0 -nanoseconds_per_query_ribbon_key = 600.0 - -#================================== -# Some constants -kwh_per_watt_lifetime = hours_per_lifetime / 1000.0 -bits_per_gb = 8 * 1024 * 1024 * 1024 - -#================================== -# Crunching the numbers -# on CPU for constructing filters -dollars_per_cpu_thread_lifetime = upfront_dollars_per_cpu_thread + watts_per_cpu_thread * kwh_per_watt_lifetime * dollars_per_kwh -dollars_per_cpu_thread_second = dollars_per_cpu_thread_lifetime / seconds_per_lifetime - -dollars_per_construct_bloom_key = dollars_per_cpu_thread_second * nanoseconds_per_construct_bloom_key / 10**9 -dollars_per_construct_ribbon_key = dollars_per_cpu_thread_second * nanoseconds_per_construct_ribbon_key / 10**9 - -dollars_per_query_bloom_key = dollars_per_cpu_thread_second * nanoseconds_per_query_bloom_key / 10**9 -dollars_per_query_ribbon_key = dollars_per_cpu_thread_second * nanoseconds_per_query_ribbon_key / 10**9 - -dollars_per_bloom_key_cpu = dollars_per_construct_bloom_key + key_query_per_construct * dollars_per_query_bloom_key -dollars_per_ribbon_key_cpu = dollars_per_construct_ribbon_key + key_query_per_construct * dollars_per_query_ribbon_key - -# on holding filters in RAM -dollars_per_gb_ram_lifetime = upfront_dollars_per_gb_ram + watts_per_gb_ram * kwh_per_watt_lifetime * dollars_per_kwh -dollars_per_gb_ram_second = dollars_per_gb_ram_lifetime / seconds_per_lifetime - -dollars_per_bloom_key_in_ram_second = dollars_per_gb_ram_second / bits_per_gb * typical_bloom_bits_per_key -dollars_per_ribbon_key_in_ram_second = dollars_per_gb_ram_second / bits_per_gb * typical_ribbon_bits_per_key - -#================================== -# How many seconds does it take for the added cost of constructing a ribbon filter instead -# of bloom to be offset by the added cost of holding the bloom filter in memory? -break_even_seconds = (dollars_per_ribbon_key_cpu - dollars_per_bloom_key_cpu) / (dollars_per_bloom_key_in_ram_second - dollars_per_ribbon_key_in_ram_second) -print(break_even_seconds) -# -> 3235.1647730256936 -``` - -So roughly speaking, filters that live in memory for more than an hour -should be Ribbon, and filters that live less than an hour should be -Bloom. This is very interesting, but how long do filters live in -RocksDB? - -First let's consider the average case. Write-heavy RocksDB loads are -often backed by flash storage, which has some specified write -endurance for its intended lifetime. This can be expressed as *device -writes per day* (DWPD), and supported DWPD is typically < 10.0 even -for high end devices (excluding NVRAM). Roughly speaking, the DB would -need to be writing at a rate of 20+ DWPD for data to have an average -lifetime of less than one hour. Thus, unless you are prematurely -burning out your flash or massively under-utilizing available storage, -using the Ribbon filter has the better cost profile *on average*. - -### Predictable lifetime -But we can do even better than optimizing for the average case. LSM -levels give us very strong data lifetime hints. Data in L0 might live -for minutes or a small number of hours. Data in Lmax might live for -days or weeks. So even if Ribbon filters weren't the best choice on -average for a workload, they almost certainly make sense for the -larger, longer-lived levels of the LSM. As of RocksDB 6.24, you can -specify a minimum LSM level for Ribbon filters with -`NewRibbonFilterPolicy`, and earlier levels will use Bloom filters. - -### Resident filter memory -The above analysis assumes that nearly all filters for all live SST -files are resident in memory. This is true if using -`cache_index_and_filter_blocks=0` and `max_open_files=-1` (defaults), -but `cache_index_and_filter_blocks=1` is popular. In that case, -if you use `optimize_filters_for_hits=1` and non-partitioned filters -(a popular MyRocks configuration), it is also likely that nearly all -live filters are in memory. However, if you don't use -`optimize_filters_for_hits` and use partitioned filters, then -cold data (by age or by key range) can lead to only a portion of -filters being resident in memory. In that case, benefit from Ribbon -filter is not as clear, though because Ribbon filters are smaller, -they are more efficient to read into memory. - -RocksDB version 6.21 and later include a rough feature to determine -block cache usage for data blocks, filter blocks, index blocks, etc. -Data like this is periodically dumped to LOG file -(`stats_dump_period_sec`): - -``` -Block cache entry stats(count,size,portion): DataBlock(441761,6.82 GB,75.765%) FilterBlock(3002,1.27 GB,14.1387%) IndexBlock(17777,887.75 MB,9.63267%) Misc(1,0.00 KB,0%) -Block cache LRUCache@0x7fdd08104290#7004432 capacity: 9.00 GB collections: 2573 last_copies: 10 last_secs: 0.143248 secs_since: 0 -``` - -This indicates that at this moment in time, the block cache object -identified by `LRUCache@0x7fdd08104290#7004432` (potentially used -by multiple DBs) uses roughly 14% of its 9GB, about 1.27 GB, on filter -blocks. This same data is available through `DB::GetMapProperty` with -`DB::Properties::kBlockCacheEntryStats`, and (with some effort) can -be compared to total size of all filters (not necessarily in memory) -using `rocksdb.filter.size` from -`DB::Properties::kAggregatedTableProperties`. - -### Sanity checking lifetime -Can we be sure that using filters even makes sense for such long-lived -data? We can apply [the current 5 minute rule for caching SSD data in -RAM](http://renata.borovica-gajic.com/data/adms2017_5minuterule.pdf). A -4KB filter page holds data for roughly 4K keys. If we assume at least -one negative (useful) filter query in its lifetime per added key, it -can satisfy the 5 minute rule with a lifetime of up to about two -weeks. Thus, the lifetime threshold for “no filter” is about 300x -higher than the lifetime threshold for Ribbon filter. - -### What to do with saved memory -The default way to improve overall RocksDB performance with more -available memory is to use more space for caching, which improves -latency, CPU load, read IOs, etc. With -`cache_index_and_filter_blocks=1`, savings in filters will -automatically make room for caching more data blocks in block -cache. With `cache_index_and_filter_blocks=0`, consider increasing -block cache size. - -Using the space savings to lower filter FP rates is also an option, -but there is less evidence for this commonly improving existing -*optimized* configurations. - -## Generic recommendation -If using `NewBloomFilterPolicy(bpk)` for a large persistent DB using -compression, try using `NewRibbonFilterPolicy(bpk)` instead, which -will generate Ribbon filters during compaction and Bloom filters -for flush, both with the same FP rate as the old setting. Once new SST -files are generated under the new policy, this should free up some -memory for more caching without much effect on burst or sustained -write speed. Both kinds of filters can be read under either policy, so -there's always an option to adjust settings or gracefully roll back to -using Bloom filter only (keeping in mind that SST files must be -replaced to see effect of that change). diff --git a/docs/_posts/2022-07-18-per-key-value-checksum.markdown b/docs/_posts/2022-07-18-per-key-value-checksum.markdown deleted file mode 100644 index 6b9ad801c..000000000 --- a/docs/_posts/2022-07-18-per-key-value-checksum.markdown +++ /dev/null @@ -1,142 +0,0 @@ ---- -title: "Per Key-Value Checksum" -layout: post -author: -- cbi42 -- ajkr -category: blog ---- - -## Summary - -Silent data corruptions can severely impact RocksDB users. As a key-value library, RocksDB resides at the bottom of the user space software stack for many diverse applications. Returning wrong query results can cause unpredictable consequences for our users so must be avoided. - -To prevent and detect corruption, RocksDB has several consistency checks [1], especially focusing on the storage layer. For example, SST files contain block checksums that are verified during reads, and each SST file has a full file checksum that can be verified when files are transferred. - -Other sources of corruptions, such as those from faulty CPU/memory or heap corruptions, pose risks for which protections are relatively underdeveloped. Meanwhile, recent work [2] suggests one per thousand machines in our fleet will at some point experience a hardware error that is exposed to an application. Additionally, software bugs can increase the risk of heap corruptions at any time. - -Hardware/heap corruptions are naturally difficult to detect in the application layer since they can compromise any data or control flow. Some factors we take into account when choosing where to add protection are the volume of data, the importance of the data, the CPU instructions that operate on the data, and the duration it resides in memory. One recently added protection, `detect_filter_construct_corruption`, has proven itself useful in preventing corrupt filters from being persisted. We have seen hardware encounter machine-check exceptions a few hours after we detected a corrupt filter. - -The next way we intend to detect hardware and heap corruptions before they cause queries to return wrong results is through developing a new feature: per key-value checksum. This feature will eventually provide optional end-to-end integrity protection for every key-value pair. RocksDB 7.4 offers substantial coverage of the user write and recovery paths with per key-value checksum protection. - -## User API - -For integrity protection during recovery, no change is required. Recovery is always protected. - -For user write protection, RocksDB allows the user to specify per key-value protection through `WriteOptions::protection_bytes_per_key` or pass in `protection_bytes_per_key` to `WriteBatch` constructor when creating a `WriteBatch` directly. Currently, only 0 (default, no protection) and 8 bytes per key are supported. This should be fine for write batches as they do not usually contain a huge number of keys. We are working on supporting more settings as 8 bytes per key might cause considerable memory overhead when the protection is extended to memtable entries. - -## Feature Design - -### Data Structures - -#### Protection info - -For protecting key-value pairs, we chose to use a hashing algorithm, xxh3 [3], for its good efficiency without relying on special hardware. While algorithms like crc32c can guarantee detection of certain patterns of bit flips, xxh3 offers no such guarantees. This is acceptable for us as we do not expect any particular error pattern [4], and even if we did, xxh3 can achieve a collision probability close enough to zero for us by tuning the number of protection bytes per key-value. - -Key-value pairs have multiple representations in RocksDB: in [WriteBatch](https://github.com/facebook/rocksdb/blob/7d0ecab570742c7280628b08ddc03cfd692f484f/db/write_batch.cc#L14-L31), in memtable [entries](https://github.com/facebook/rocksdb/blob/fc51b7f33adcba7ac725ed0e7fe8b8155aaeaee4/db/memtable.cc#L541-L545) and in [data blocks](https://github.com/facebook/rocksdb/blob/fc51b7f33adcba7ac725ed0e7fe8b8155aaeaee4/table/block_based/block_builder.cc#L21-L27). In this post we focus on key-values in write batches and memtable as in-memory data blocks are not yet protected. - -Besides user key and value, RocksDB includes internal metadata in the per key-value checksum calculation. Depending on the representation, internal metadata consists of some combination of sequence number, operation type, and column family ID. Note that since timestamp (when enabled) is part of the user key it is protected as well. - -The protection info consists of the XOR’d result of the xxh3 hash for all the protected components. This allows us to efficiently transform protection info for different representations. See below for an example converting WriteBatch protection info to memtable protection info. - -A risk of using XOR is the possibility of swapping corruptions (e.g., key becomes the value and the value becomes the key). To mitigate this risk, we use an independent seed for hashing each type of component. - -The following two figures illustrate how protection info in WriteBatch and memtable are calculated from a key-value’s components. - -![](/static/images/kv-checksum/ProtInfo-Writebatch.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -*Protection info for a key-value in a WriteBatch* -{: style="text-align: center"} - -![](/static/images/kv-checksum/ProtInfo-Memtable.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -*Protection info for a key-value in a memtable* -{: style="text-align: center"} - -The next figure illustrates how protection info for a key-value can be transformed to protect that same key-value in a different representation. Note this is done without recalculating the hash for all the key-value’s components. - -![](/static/images/kv-checksum/ProtInfo-Writebatch-to-Memtable.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -*Protection info for a key-value in a memtable derived from an existing WriteBatch protection info* -{: style="text-align: center"} - -Above, we see two (small) components are hashed: column family ID and sequence number. When a key-value is inserted from WriteBatch into memtable, it is assigned a sequence number and drops the column family ID since each memtable is associated with one column family. Recall the xxh3 of column family ID was included in the WriteBatch protection info, which is canceled out by the column family ID xxh3 included in the XOR. - -#### WAL fragment - -WAL (Write-ahead-log) persists write batches that correspond to operations in memtables and enables consistent database recovery after restart. RocksDB writes to WAL in chunks of some [fixed block size](https://github.com/facebook/rocksdb/blob/fc51b7f33adcba7ac725ed0e7fe8b8155aaeaee4/db/log_writer.h#L44) for efficiency. It is possible that some write batch does not fit into the space left in the current block and/or is larger than the fixed block size. Thus, serialized write batches (WAL records) are divided into WAL fragments before being written to WAL. The format of a WAL fragment is in the following diagram (there is another legacy format detailed in code [comments](https://github.com/facebook/rocksdb/blob/fc51b7f33adcba7ac725ed0e7fe8b8155aaeaee4/db/log_writer.h#L47-L59)). Roughly, the `Type` field indicates whether a fragment is at the beginning, middle or end of a record, and is used to group fragments. - -![](/static/images/kv-checksum/WAL-fragment.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -Note that each fragment is prefixed by a crc32c checksum that is calculated over `Type`, `Log #` and `Payload`. This ensures that RocksDB can detect corruptions that happened to the WAL in the storage layer. - -#### Write batch - -As mentioned above, a WAL record is a serialized `WriteBatch` that is split into physical fragments during writes to WAL. During DB recovery, once a WAL record is reconstructed from one or more fragments, it is [copied](https://github.com/facebook/rocksdb/blob/fc51b7f33adcba7ac725ed0e7fe8b8155aaeaee4/db/db_impl/db_impl_open.cc#L1127) into the content of a `WriteBatch`. The write batch will then be used to restore the memtable states. - -Besides the recovery path, a write batch is always constructed during user writes. Firstly, RocksDB allows users to construct a write batch directly, and pass it to DB through `DB::Write()` API for execution. Higher-level buffered write APIs like Transaction rely on a write batch to buffer writes prior to executing them. For unbuffered write APIs like `DB::Put()`, RocksDB constructs a write batch internally with the input user key and value. - -![](/static/images/kv-checksum/Write-batch.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -The above diagram shows a rough representation of a write batch in memory. `Contents` is the concatenation of serialized user operations in this write batch. Each operation consists of user key, value, op_type and optionally column family ID. With per key-value checksum protection enabled, a vector of ProtectionInfo is stored in the write batch, one for each user operation. - -#### Memtable entry - -![](/static/images/kv-checksum/Memtable-entry.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -A memtable entry is similar to write batch content, except that it captures only a single user operation and that it does not contain column family ID (since memtable is per column family). User key and value are length-prefixed, and seqno and optype are combined in a fixed 8 bytes representation. - -### Processes - -In order to protect user writes and recovery, per key-value checksum is covered in the following code paths. - -#### WriteBatch write - -Per key-value checksum coverage starts with the user buffers that contain user key and/or value. When users call DB Write APIs (e.g., `DB::Put()`), or when users add operations into write batches directly (e.g. `WriteBatch::Put()`), RocksDB constructs `ProtectionInfo` from the user buffer (e.g. [here](https://github.com/facebook/rocksdb/blob/96206531bc0bb56d87012921c5458c8a3047a6b3/db/write_batch.cc#L813)) and [stores](https://github.com/facebook/rocksdb/blob/96206531bc0bb56d87012921c5458c8a3047a6b3/include/rocksdb/write_batch.h#L478) the protection information within the corresponding `WriteBatch` object as diagramed below. Then the user key and/or value are copied into the `WriteBatch`, thus starting per key-value checksum protection from user buffer. - -![](/static/images/kv-checksum/Writebatch-write.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - - -#### WAL write - -Before a `WriteBatch` leaves RocksDB and be persisted in a WAL file, it is verified against its `ProtectionInfo` to ensure its content is not corrupted. We added `WriteBatch::VerifyChecksum()` for this purpose. Once we verify the content of a `WriteBatch`, it is then divided into potentially multiple WAL fragments and persisted in the underlying file system. From that point on, the integrity protection is handed off to the per fragment crc32c checksum that is persisted in WAL too. - -![](/static/images/kv-checksum/WAL-write.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -#### Memtable write - -Similar to the WAL write path, `ProtectionInfo` is verified before an entry is inserted into a memtable. The difference here is that an memtable entry has its own buffer, and the content of a `WriteBatch` is copied into the memtable entry. So the `ProtectionInfo` is verified against the memtable entry buffer instead. The current per key-value checksum protection ends at this verification on the buffer containing a memtable entry, and one of the future work is to extend the coverage to key-value pairs in memtables. - -![](/static/images/kv-checksum/Memtable-write.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -#### WAL read - -This is for the DB recovery path: WAL fragments are read into memory, concatenated together to form WAL records, and then `WriteBatch`es are constructed from WAL records and added to memtables. In RocksDB 7.4, once a `WriteBatch` copies its content from a WAL record, `ProtectionInfo` is constructed from the `WriteBatch` content and per key-value protection starts. However, this copy operation is not protected, neither is the reconstruction of a WAL record from WAL fragments. To provide protection from silent data corruption during these memory copying operations, we added checksum handshake detailed below in RocksDB 7.5. - -When a WAL fragment is first read into memory, its crc32c checksum is [verified](https://github.com/facebook/rocksdb/blob/2f13f5f7d09c589d5adebf0cbc42fadf0da0f00e/db/log_reader.cc#L483). The WAL fragment is then appended to the buffer containing a WAL record. RocksDB uses xxh3’s streaming API to calculate the checksum of the WAL record and updates the streaming hash state with the new WAL fragment content whenever it is appended to the WAL record buffer (e.g. [here](https://github.com/facebook/rocksdb/blob/2f13f5f7d09c589d5adebf0cbc42fadf0da0f00e/db/log_reader.cc#L135)). After the WAL record is constructed, it is copied into a `WriteBatch` and `ProtectionInfo` is constructed from the write batch content. Then, the xxh3 checksum of the WAL record is [verified](https://github.com/facebook/rocksdb/blob/2f13f5f7d09c589d5adebf0cbc42fadf0da0f00e/db/write_batch.cc#L3081-L3085) against the write batch content to complete the checksum handshake. If the checksum verification succeeds, then we are more confident that `ProtectionInfo` is calculated based on uncorrupted data, and the protection coverage continues with the newly constructed `ProtectionInfo` along the write code paths mentioned above. - -![](/static/images/kv-checksum/WAL-read.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -## Future work - -Future coverage expansion will cover memtable KVs, flush, compaction and user reads etc. - -## References - -[1] http://rocksdb.org/blog/2021/05/26/online-validation.html - -[2] H. D. Dixit, L. Boyle, G. Vunnam, S. Pendharkar, M. Beadon, and S. Sankar, ‘Detecting silent data corruptions in the wild’. arXiv, 2022. - -[3] https://github.com/Cyan4973/xxHash - -[4] https://github.com/Cyan4973/xxHash/issues/229#issuecomment-511956403 diff --git a/docs/_posts/2022-10-05-lost-buffered-write-recovery.markdown b/docs/_posts/2022-10-05-lost-buffered-write-recovery.markdown deleted file mode 100644 index fca3ea739..000000000 --- a/docs/_posts/2022-10-05-lost-buffered-write-recovery.markdown +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: "Verifying crash-recovery with lost buffered writes" -layout: post -author: -- ajkr -category: blog ---- - -## Introduction - -Writes to a RocksDB instance go through multiple layers before they are fully persisted. -Those layers may buffer writes, delaying their persistence. -Depending on the layer, buffered writes may be lost in a process or system crash. -A process crash loses writes buffered in process memory only. -A system crash additionally loses writes buffered in OS memory. - -The new test coverage introduced in this post verifies there is no hole in the recovered data in either type of crash. -A hole would exist if any recovered write were newer than any lost write, as illustrated below. -This guarantee is important for many applications, such as those that use the newest recovered write to determine the starting point for replication. - -![](/static/images/lost-buffered-write-recovery/happy-cat.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -*Valid (no hole) recovery: all recovered writes (1 and 2) are older than all lost writes (3 and 4)* -{: style="text-align: center"} - -![](/static/images/lost-buffered-write-recovery/angry-cat.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -*Invalid (hole) recovery: a recovered write (4) is newer than a lost write (3)* -{: style="text-align: center"} - -The new test coverage assumes all writes use the same options related to buffering/persistence. -For example, we do not cover the case of alternating writes with WAL disabled and WAL enabled (`WriteOptions::disableWAL`). -It also assumes the crash does not have any unexpected consequences like corrupting persisted data. - -Testing for holes in the recovery is challenging because there are many valid recovery outcomes. -Our solution involves tracing all the writes and then verifying the recovery matches a prefix of the trace. -This proves there are no holes in the recovery. -See "Extensions for lost buffered writes" subsection below for more details. - -Testing actual system crashes would be operationally difficult. -Our solution simulates system crash by buffering written but unsynced data in process memory such that it is lost in a process crash. -See "Simulating system crash" subsection below for more details. - -## Scenarios covered - -We began testing recovery has no hole in the following new scenarios. -This coverage is included in our internal CI that periodically runs against the latest commit on the main branch. - -1. **Process crash with WAL disabled** (`WriteOptions::disableWAL=1`), which loses writes since the last memtable flush. -2. **System crash with WAL enabled** (`WriteOptions::disableWAL=0`), which loses writes since the last memtable flush or WAL sync (`WriteOptions::sync=1`, `SyncWAL()`, or `FlushWAL(true /* sync */)`). -3. **Process crash with manual WAL flush** (`DBOptions::manual_wal_flush=1`), which loses writes since the last memtable flush or manual WAL flush (`FlushWAL()`). -4. **System crash with manual WAL flush** (`DBOptions::manual_wal_flush=1`), which loses writes since the last memtable flush or synced manual WAL flush (`FlushWAL(true /* sync */)`, or `FlushWAL(false /* sync */)` followed by WAL sync). - -## Issues found - -* [False detection of corruption after system crash due to race condition with WAL sync and `track_and_verify_wals_in_manifest](https://github.com/facebook/rocksdb/pull/10185) -* [Undetected hole in recovery after system crash due to race condition in WAL sync](https://github.com/facebook/rocksdb/pull/10560) -* [Recovery failure after system crash due to missing directory sync for critical metadata file](https://github.com/facebook/rocksdb/pull/10573) - -## Solution details - -### Basic setup - -![](/static/images/lost-buffered-write-recovery/basic-setup.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -Our correctness testing framework consists of a stress test program (`db_stress`) and a wrapper script (`db_crashtest.py`). -`db_crashtest.py` manages instances of `db_stress`, starting them and injecting crashes. -`db_stress` operates a DB and test oracle ("Latest values file"). - -At startup, `db_stress` verifies the DB using the test oracle, skipping keys that had pending writes when the last crash happened. -`db_stress` then stresses the DB with random operations, keeping the test oracle up-to-date. - -As the name "Latest values file" implies, this test oracle only tracks the latest value for each key. -As a result, this setup is unable to verify recoveries involving lost buffered writes, where recovering older values is tolerated as long as there is no hole. - -### Extensions for lost buffered writes - -To accommodate lost buffered writes, we extended the test oracle to include two new files: "`verifiedSeqno`.state" and "`verifiedSeqno`.trace". -`verifiedSeqno` is the sequence number of the last successful verification. -"`verifiedSeqno`.state" is the expected values file at that sequence number, and "`verifiedSeqno`.trace" is the trace file of all operations that happened after that sequence number. - -![](/static/images/lost-buffered-write-recovery/replay-extension.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -When buffered writes may have been lost by the previous `db_stress` instance, the current `db_stress` instance must reconstruct the latest values file before startup verification. -M is the recovery sequence number of the current `db_stress` instance and N is the recovery sequence number of the previous `db_stress` instance. -M is learned from the DB, while N is learned from the filesystem by parsing the "*.{trace,state}" filenames. -Then, the latest values file ("LATEST.state") can be reconstructed by replaying the first M-N traced operations (in "N.trace") on top of the last instance's starting point ("N.state"). - -![](/static/images/lost-buffered-write-recovery/trace-extension.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -When buffered writes may be lost by the current `db_stress` instance, we save the current expected values into "M.state" and begin tracing newer operations in "M.trace". - -### Simulating system crash - -When simulating system crash, we send file writes to a `TestFSWritableFile`, which buffers unsynced writes in process memory. -That way, the existing `db_stress` process crash mechanism will lose unsynced writes. - -![](/static/images/lost-buffered-write-recovery/test-fs-writable-file.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -`TestFSWritableFile` is implemented as follows. - -* `Append()` buffers the write in a local `std::string` rather than calling `write()`. -* `Sync()` transfers the local `std::string`s content to `PosixWritableFile::Append()`, which will then `write()` it to the OS page cache. - -## Next steps -An untested guarantee is that RocksDB recovers all writes that the user explicitly flushed out of the buffers lost in the crash. -We may recover more writes than these due to internal flushing of buffers, but never less. -Our test oracle needs to be further extended to track the lower bound on the sequence number that is expected to survive a crash. - -We would also like to make our system crash simulation more realistic. -Currently we only drop unsynced regular file data, but we should drop unsynced directory entries as well. - -## Acknowledgements - -Hui Xiao added the manual WAL flush coverage and compatibility with `TransactionDB`. -Zhichao Cao added the system crash simulation. -Several RocksDB team members contributed to this feature's dependencies. diff --git a/docs/_posts/2022-10-07-asynchronous-io-in-rocksdb.markdown b/docs/_posts/2022-10-07-asynchronous-io-in-rocksdb.markdown deleted file mode 100644 index 0586f1c3d..000000000 --- a/docs/_posts/2022-10-07-asynchronous-io-in-rocksdb.markdown +++ /dev/null @@ -1,133 +0,0 @@ ---- -title: Asynchronous IO in RocksDB -layout: post -author: -- akankshamahajan15 -- anand1976 -category: blog ---- -## Summary - -RocksDB provides several APIs to read KV pairs from a database, including Get and MultiGet for point lookups and Iterator for sequential scanning. These APIs may result in RocksDB reading blocks from SST files on disk storage. The types of blocks and the frequency with which they are read from storage is workload dependent. Some workloads may have a small working set and thus may be able to cache most of the data required, while others may have large working sets and have to read from disk more often. In the latter case, the latency would be much higher and throughput would be lower than the former. They would also be dependent on the characteristics of the underlying storage media, making it difficult to migrate from one medium to another, for example, local flash to disaggregated flash. - -One way to mitigate the impact of storage latency is to read asynchronously and in parallel as much as possible, in order to hide IO latency. We have implemented this in RocksDB in Iterators and MultiGet. In Iterators, we prefetch data asynchronously in the background for each file being iterated on, unlike the current implementation that does prefetching synchronously, thus blocking the iterator thread. In MultiGet, we determine the set of files that a given batch of keys overlaps, and read the necessary data blocks from those files in parallel using an asynchronous file system API. These optimizations have significantly decreased the overall latency of the RocksDB MultiGet and iteration APIs on slower storage compared to local flash. - -The optimizations described here are in the internal implementation of Iterator and MultiGet in RocksDB. The user API is still synchronous, so existing code can easily benefit from it. We might consider async user APIs in the future. - - -## Design - -### API - -A new flag in `ReadOptions`, `async_io`, controls the usage of async IO. This flag, when set, enables async IO in Iterators and MultiGet. For MultiGet, an additional `ReadOptions` flag, `optimize_multiget_for_io` (defaults to true), controls how aggressively to use async IO. If the flag is not set, files in the same level are read in parallel but not different levels. If the flag is set, the level restriction is removed and as many files as possible are read in parallel, regardless of level. The latter might have a higher CPU cost depending on the workload. - -At the FileSystem layer, we use the `FSRandomAccessFile::ReadAsync` API to start an async read, providing a completion callback. - -### Scan - -A RocksDB scan usually involves the allocation of a new iterator, followed by a Seek call with a target key to position the iterator, followed by multiple Next calls to iterate through the keys sequentially. Both the Seek and Next operations present opportunities to read asynchronously, thereby reducing the scan latency. - -A scan usually involves iterating through keys in multiple entities - the active memtable, sealed and unflushed memtables, every L0 file, and every non-empty non-zero level. The first two are completely in memory and thus not impacted by IO latency. The latter two involve reading from SST files. This means that an increase in IO latency has a multiplier effect, since multiple L0 files and levels have to be iterated on. - -Some factors, such as block cache and prefix bloom filters, can reduce the number of files to iterate and number of reads from the files. Nevertheless, even a few reads from disk can dominate the overall latency. RocksDB uses async IO in both Seek and Next to mitigate the latency impact, as described below. - - -#### Seek - -A RocksDB iterator maintains a collection of child iterators, one for each L0 file and for each non-empty non-zero levels. For a Seek operation every child iterator has to Seek to the target key. This is normally done serially, by doing synchronous reads from SST files when the required data blocks are not in cache. When the async_io option is enabled, RocksDB performs the Seek in 2 phases - 1) Locate the data block required for Seek in each file/level and issue an async read, and 2) in the second phase, reseek with the same key, which will wait for the async read to finish at each level and position the table iterator. Phase 1 reads multiple blocks in parallel, reducing overall Seek latency. - - -#### Next - -For the iterator Next operation, RocksDB tries to reduce the latency due to IO by prefetching data from the file. This prefetching occurs when a data block required by Next is not present in the cache. The reads from file and prefetching is managed by the FilePrefetchBuffer, which is an object that’s created per table iterator (BlockBasedTableIterator). The FilePrefetchBuffer reads the required data block, and an additional amount of data that varies depending on the options provided by the user in ReadOptions and BlockBasedTableOptions. The default behavior is to start prefetching on the third read from a file, with an initial prefetch size of 8KB and doubling it on every subsequent read, upto a max of 256KB. - -While the prefetching in the previous paragraph helps, it is still synchronous and contributes to the iterator latency. When the async_io option is enabled, RocksDB prefetches in the background, i.e while the iterator is scanning KV pairs. This is accomplished in FilePrefetchBuffer by maintaining two prefetch buffers. The prefetch size is calculated as usual, but its then split across the two buffers. As the iteration proceeds and data in the first buffer is consumed, the buffer is cleared and an async read is scheduled to prefetch additional data. This read continues in the background while the iterator continues to process data in the second buffer. At this point, the roles of the two buffers are reversed. This does not completely hide the IO latency, since the iterator would have to wait for an async read to complete after the data in memory has been consumed. However, it does hide some of it by overlapping CPU and IO, and async prefetch can be happening on multiple levels in parallel, further reducing the latency. - -![Scan flow](/static/images/asynchronous-io/scan_async.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -### MultiGet - -The MultiGet API accepts a batch of keys as input. Its a more efficient way of looking up multiple keys compared to a loop of Gets. One way MultiGet is more efficient is by reading multiple data blocks from an SST file in a batch, for keys in the same file. This greatly reduces the latency of the request, compared to a loop of Gets. The MultiRead FileSystem API is used to read a batch of data blocks. - -![MultiGet flow](/static/images/asynchronous-io/mget_async.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -Even with the MultiRead optimization, subset of keys that are in different files still need to be read serially. We can take this one step further and read multiple files in parallel. In order to do this, a few fundamental changes were required in the MultiGet implementation - - -1. Coroutines - A MultiGet involves determining the set of keys in a batch that overlap an SST file, and then calling TableReader::MultiGet to do the actual lookup. The TableReader probes the bloom filter, traverses the index block, looks up the block cache for the necessary, reads the missing data blocks from the SST file, and then searches for the keys in the data blocks. There is a significant amount of context that’s accumulated at each stage, and it would be rather complex to interleave data blocks reads by multiple TableReaders. In order to simplify it, we used async IO with C++ coroutines. The TableReader::MultiGet is implemented as a coroutine, and the coroutine is suspended after issuing async reads for missing data blocks. This allows the top-level MultiGet to iterate through the TableReaders for all the keys, before waiting for the reads to finish and resuming the coroutines. -2. Filtering - The downside of using coroutines is the CPU overhead, which is non-trivial. To minimize the overhead, its desirable to not use coroutines as much as possible. One scenario in which we can completely avoid the call to a TableReader::MultiGet coroutine is if we know that none of the overlapping keys are actually present in the SST file. This can easily determined by probing the bloom filter. In the previous implementation, the bloom filter lookup was embedded in TableReader::MultiGet. However, we could easily implement is as a separate step, before calling TableReader::MultiGet. -3. Splitting batches - The default strategy of MultiGet is to lookup keys in one level (or L0 file), before moving on to the next. This limits the amount of IO parallelism we can exploit. For example, the keys in a batch may not be clustered together, and may be scattered over multiple files. Even if they are clustered together in the key space, they may not all be in the same level. In order to optimize for these situations, we determine the subset of keys that are likely to be in a given level, and then split the MultiGet batch into 2 - the subset in that level, and the remainder. The batch containing the remainder can then be processed in parallel. The subset of keys likely to be in a level is determined by the filtering step. - -Together, these changes enabled two types of latency optimization in MultiGet using async IO - single-level and multi-level. The former reads data blocks in parallel from multiple files in the same LSM level, while the latter reads in parallel from multiple files in multiple levels. - -## Results - -Command used to generate the database: - -`buck-out/opt/gen/rocks/tools/rocks_db_bench —db=/rocks_db_team/prefix_scan —env_uri=ws://ws.flash.ftw3preprod1 -logtostderr=false -benchmarks="fillseqdeterministic" -key_size=32 -value_size=512 -num=5000000 -num_levels=4 -multiread_batched=true -use_direct_reads=false -adaptive_readahead=true -threads=1 -cache_size=10485760000 -async_io=false -multiread_stride=40000 -disable_auto_compactions=true -compaction_style=1 -bloom_bits=10` - -Structure of the database: - -`Level[0]: /000233.sst(size: 24828520 bytes)` -`Level[0]: /000232.sst(size: 49874113 bytes)` -`Level[0]: /000231.sst(size: 100243447 bytes)` -`Level[0]: /000230.sst(size: 201507232 bytes)` -`Level[1]: /000224.sst - /000229.sst(total size: 405046844 bytes)` -`Level[2]: /000211.sst - /000223.sst(total size: 814190051 bytes)` -`Level[3]: /000188.sst - /000210.sst(total size: 1515327216 bytes)` - - -### MultiGet - -MultiGet benchmark command: - -`buck-out/opt/gen/rocks/tools/rocks_db_bench -use_existing_db=true —db=/rocks_db_team/prefix_scan -benchmarks="multireadrandom" -key_size=32 -value_size=512 -num=5000000 -batch_size=8 -multiread_batched=true -use_direct_reads=false -duration=60 -ops_between_duration_checks=1 -readonly=true -threads=4 -cache_size=300000000 -async_io=true -multiread_stride=40000 -statistics —env_uri=ws://ws.flash.ftw3preprod1 -logtostderr=false -adaptive_readahead=true -bloom_bits=10` - -#### Single-file - -The default MultiGet implementation of reading from one file at a time had a latency of 1292 micros/op. - -`multireadrandom : 1291.992 micros/op 3095 ops/sec 60.007 seconds 185768 operations; 1.6 MB/s (46768 of 46768 found) ` -`rocksdb.db.multiget.micros P50 : 9664.419795 P95 : 20757.097056 P99 : 29329.444444 P100 : 46162.000000 COUNT : 23221 SUM : 239839394` - -#### Single-level - -MultiGet with async_io=true and optimize_multiget_for_io=false had a latency of 775 micros/op. - -`multireadrandom : 774.587 micros/op 5163 ops/sec 60.009 seconds 309864 operations; 2.7 MB/s (77816 of 77816 found)` -`rocksdb.db.multiget.micros P50 : [6029.601964](tel:6029601964) P95 : 10727.467932 P99 : 13986.683940 P100 : 47466.000000 COUNT : 38733 SUM : 239750172` - -#### Multi-level - -With all optimizations turned on, MultiGet had the lowest latency of 508 micros/op. - -`multireadrandom : 507.533 micros/op 7881 ops/sec 60.003 seconds 472896 operations; 4.1 MB/s (117536 of 117536 found)` -`rocksdb.db.multiget.micros P50 : 3923.819467 P95 : 7356.182075 P99 : 10880.728723 P100 : 28511.000000 COUNT : 59112 SUM : 239642721` - -### Scan - -Benchmark command: - -`buck-out/opt/gen/rocks/tools/rocks_db_bench -use_existing_db=true —db=/rocks_db_team/prefix_scan -ben``chmarks="seekrandom" -key_size=32 -value_size=512 -num=5000000 -batch_size=8 -multiread_batched=true -use_direct_reads=false -duration=60 -ops_between_duration_che``cks=1 -readonly=true -threads=4 -cache_size=300000000 -async_io=true -multiread_stride=40000 -statistics —env_uri=ws://ws.flash.ftw3preprod1 -logtostderr=false -a``daptive_readahead=true -bloom_bits=10 -seek_nexts=65536` - -### With async scan - -`seekrandom : 414442.303 micros/op 9 ops/sec 60.288 seconds 581 operations; 326.2 MB/s (145 of 145 found)` - -### Without async scan - -`seekrandom : 848858.669 micros/op 4 ops/sec 60.529 seconds 284 operations; 158.1 MB/s (74 of 74 found)` - -## Known Limitations - -These optimizations apply only to block based table SSTs. File system support for the `ReadAsync` and `Poll` interfaces is required. Currently, it is available only for `PosixFileSystem`. - -The MultiGet async IO optimization has a few additional limitations - - -1. Depends on folly, which introduces a few additional build steps -2. Higher CPU overhead due to coroutines. The CPU overhead of MultiGet may increase 6-15%, with the worst case being a single threaded MultiGet batch of keys with 1 key/file intersection and 100% cache hit rate. A more realistic case of multiple threads with a few keys (~4) overlap per file should see ~6% higher CPU util. -3. No parallelization of metadata reads. A metadata read will block the thread. -4. A few other cases will also be in serial, such as additional block reads for merge operands. - - diff --git a/docs/_posts/2022-10-31-align-compaction-output-file.markdown b/docs/_posts/2022-10-31-align-compaction-output-file.markdown deleted file mode 100644 index a2db41bc3..000000000 --- a/docs/_posts/2022-10-31-align-compaction-output-file.markdown +++ /dev/null @@ -1,107 +0,0 @@ ---- -title: Reduce Write Amplification by Aligning Compaction Output File Boundaries -layout: post -author: -- zjay -category: blog ---- -## TL;DR -By cutting the compaction output file earlier and allowing larger than targeted_file_size to align the compaction output files to the next level files, it can **reduce WA (Write Amplification) by more than 10%**. The feature is **enabled by default** after the user upgrades RocksDB to version `7.8.0+`. - -## Background -RocksDB level compaction picks one file from the source level and compacts to the next level, which is a typical partial merge compaction algorithm. Compared to the full merge compaction strategy for example [universal compaction](https://github.com/facebook/rocksdb/wiki/Universal-Compaction), it has the benefits of smaller compaction size, better parallelism, etc. But it also has a larger write amplification (typically 20-30 times user data). One of the problems is wasted compaction at the beginning and ending: - -![](/static/images/align-compaction-output/file_cut_normal.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -In the diagram above, `SST11` is selected for the compaction, it overlaps with `SST20` to `SST23`, so all these files are selected for compaction. But the beginning and ending of the SST on Level 2 are wasted, which also means it will be compacted again when `SST10` is compacting down. If the file boundaries are aligned, then the wasted compaction size could be reduced. On average, the wasted compaction is `1` file size: `0.5` at the beginning, and `0.5` at the end. Typically the average compaction fan-out is about 6 (with the default max_bytes_for_level_multiplier = 10), then `1 / (6 + 1) ~= 14%` of compaction is wasted. -## implementation -To reduce such wasted compaction, RocksDB now tries to align the compaction output file to the next level's file. So future compactions will have fewer wasted compaction. For example, the above case might be cut like this: - -![](/static/images/align-compaction-output/file_cut_align.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -The trade-off is the file won't be cut exactly after it exceeds target_file_size_base, instead, it will be more likely cut when it's aligned with the next level file's boundary, so the file size might be more varied. It could be as small as 50% of `target_file_size` or as large as `2x target_file_size`. It will only impact non-bottommost-level files, which should be only `~11%` of the data. -Internally, RocksDB tries to cut the file so its size is close to the `target_file_size` setting but also aligned with the next level boundary. When the compaction output file hit a next-level file boundary, either the beginning or ending boundary, it will cut if: -``` -current_size > ((5 * min(bounderies_num, 8) + 50) / 100) * target_file_size -``` -([details](https://github.com/facebook/rocksdb/blob/23fa5b7789d6acd0c211d6bdd41448bbf1513bb6/db/compaction/compaction_outputs.cc#L270-L290)) - -The file size is also capped at `2x target_file_size`: [details](https://github.com/facebook/rocksdb/blob/f726d29a8268ae4e2ffeec09172383cff2ab4db9/db/compaction/compaction.cc#L273-L277). -Another benefit of cutting the file earlier is having more trivial move compaction, which is moving the file from a high level to a low level without compacting anything. Based on a compaction simulator test, the trivial move data is increased by 30% (but still less than 1% compaction data is trivial move): - -![](/static/images/align-compaction-output/file_cut_trival_move.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -Based on the db_bench test, it can save `~12%` compaction load, here is the test command and result: -``` -TEST_TMPDIR=/data/dbbench ./db_bench --benchmarks=fillrandom,readrandom -max_background_jobs=12 -num=400000000 -target_file_size_base=33554432 - -# baseline: -Flush(GB): cumulative 25.882, interval 7.216 -Cumulative compaction: 285.90 GB write, 162.36 MB/s write, 269.68 GB read, 153.15 MB/s read, 2926.7 seconds - -# with this change: -Flush(GB): cumulative 25.882, interval 7.753 -Cumulative compaction: 249.97 GB write, 141.96 MB/s write, 233.74 GB read, 132.74 MB/s read, 2534.9 seconds -``` - -The feature is enabled by default by upgrading to RocksDB 7.8 or later versions, as the feature should have a limited impact on the file size and have great write amplification improvements. If in a rare case, it needs to opt out, set -``` -options.level_compaction_dynamic_file_size = false; -``` - -## Other Options and Benchmark -We also tested a few other options, starting with a fixed threshold: 75% of the target_file_size and 50%. Then with a dynamic threshold that is explained, but still limiting file size smaller than the target_file_size. -1. Baseline (main branch before [PR#10655](https://github.com/facebook/rocksdb/pull/10655)); -2. Fixed Threshold `75%`: after 75% of target file size, cut the file whenever it aligns with a low level file boundary; -3. Fixed Threshold `50%`: reduce the threshold to 50% of target file size; -4. Dynamic Threshold `(5*bounderies_num + 50)` percent of target file size and maxed at 90%; -5. Dynamic Threshold + allow 2x the target file size (chosen option). - -### Test Environment and Data -To speed up the benchmark, we introduced a compaction simulator within Rocksdb ([details](https://github.com/jay-zhuang/rocksdb/tree/compaction_sim)), which replaced the physical SST with in-memory data (a large bitset). Which can test compaction more consistently. As it's a simulator, it has its limitations: - -it assumes each key-value has the same size; -1. no deletion (but has override); -2. doesn't consider data compression; -3. single-threaded and finish all compactions before the next flush (so no write stall). - -We use 3 kinds of the dataset for tests: -1. Random Data, has an override, evenly distributed; -2. Zipf distribution with alpha = 1.01, moderately skewed; -3. Zipf distribution with alpha = 1.2, highly skewed. - -#### Write Amplification - -![](/static/images/align-compaction-output/write_amp_compare.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 100%"} - -As we can see, all options are better than the baseline. Option5 (brown) and option3 (green) have similar WA improvements. (The sudden WA drop during ~40G Random Dataset is because we enabled `level_compaction_dynamic_level_bytes` and the level number was increased from 3 to 4, the similar test result without enabling `level_compaction_dynamic_level_bytes`). - -#### File Size Distribution at the End of Test -This is the file size distribution at the end of the test, which loads about 100G data. As this change only impacts the non-bottommost file size, and the majority of the SST files are bottommost, there're no significant differences: - -![](/static/images/align-compaction-output/file_size_compare.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 100%"} - -#### All Compaction Generated File Sizes -The high-level files are much more likely to be compacted, so all compaction-generated files size has more significant change: - -![](/static/images/align-compaction-output/compaction_output_file_size_compare.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 100%"} - -Overall option5 has most of the file size close to the target file size. vs. option3 has a much smaller size. Here are more detailed stats for compaction output file size: -``` - base 50p 75p dynamic 2xdynamic -count 1.656000e+03 1.960000e+03 1.770000e+03 1.687000e+03 1.705000e+03 -mean 3.116062e+07 2.634125e+07 2.917876e+07 3.060135e+07 3.028076e+07 -std 7.145242e+06 1.065134e+07 8.800474e+06 7.612939e+06 8.046139e+06 -``` - -## Summary -Allowing more dynamic file size and aligning the compaction output file to the next level file's boundary improves the RocksDB write amplification by more than 10%, which will be enabled by default in `7.8.0` release. We picked a simple algorithm to decide when to cut the output file, which can be further improved. For example, by estimating output file size with index information. Any suggestions or PR are welcomed. - -## Acknowledgements -We thank Siying Dong for initializing the file-cutting idea and thank Andrew Kryczka, Mark Callaghan for contributing to the ideas. And Changyu Bi for the detailed code review. diff --git a/docs/_posts/2022-11-09-time-aware-tiered-storage.markdown b/docs/_posts/2022-11-09-time-aware-tiered-storage.markdown deleted file mode 100644 index 03a6b02ef..000000000 --- a/docs/_posts/2022-11-09-time-aware-tiered-storage.markdown +++ /dev/null @@ -1,121 +0,0 @@ ---- -title: Time-Aware Tiered Storage in RocksDB -layout: post -author: -- zjay -category: blog ---- -## TL:DR -Tiered storage is now natively supported in the RocksDB with the option [`last_level_temperature`](https://github.com/facebook/rocksdb/blob/b0d9776b704af01c2b5385e9d53754e0c8176373/include/rocksdb/advanced_options.h#L910), time-aware Tiered storage feature guarantees the recently written data are put in the hot tier storage with the option [`preclude_last_level_data_seconds`](https://github.com/facebook/rocksdb/blob/b0d9776b704af01c2b5385e9d53754e0c8176373/include/rocksdb/advanced_options.h#L927). - -## Background -RocksDB Tiered Storage assigns a data temperature when creating the new SST which [hints the file system](https://github.com/facebook/rocksdb/blob/b0d9776b704af01c2b5385e9d53754e0c8176373/include/rocksdb/file_system.h#L162) to put the data on the corresponding storage media, so the data in a single DB instance can be placed on different storage media. Before the feature, the user typically creates multiple DB instances for different storage media, for example, one DB instance stores the recent hot data and migrates the data to another cold DB instance when the data becomes cold. Tracking and migrating the data could be challenging. With the RocksDB tiered storage feature, RocksDB compaction migrates the data from hot storage to cold storage. - -![](/static/images/time-aware-tiered-storage/tiered_storage_overview.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -Currently, RocksDB supports assigning the last level file temperature. In an LSM tree, typically the last level data is most likely the coldest. As the most recent data is on the higher level and gradually compacted to the lower level. The higher level data is more likely to be read, because: -1. RocksDB read always queries from the higher level to the lower level until it finds the data; -2. The high-level data is much more likely to be read and written by the compactions. - -### Problem -Generally in the LSM tree, hotter data is likely on the higher levels as mentioned before, **but it is not always the case**, for example for the skewed dataset, the recent data could be compacted to the last level first. For the universal compaction, a major compaction would compact all data to the last level (the cold tier) which includes both recent data that should be cataloged as hot data. In production, **we found the majority of the compaction load is actually major compaction (more than 80%)**. - -![](/static/images/time-aware-tiered-storage/tiered_storage_problem.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -### Goal and Non-goals -It’s hard to predict the hot and cold data. The most frequently accessed data should be cataloged as hot data. But it is hard to predict which key is going to be accessed most, it is also hard to track the per-key based access history. The time-aware tiered storage feature is only **focusing on the use cases that the more recent data is more likely to be accessed**. Which is the majority of the cases, but not all. - -## User APIs -Here are the 3 main tiered storage options: -```c++ -Temperature last_level_temperature = Temperature::kUnknown; -uint64_t preclude_last_level_data_seconds = 0; -uint64_t preserve_internal_time_seconds = 0; -``` -[`last_level_temperature`](https://github.com/facebook/rocksdb/blob/b0d9776b704af01c2b5385e9d53754e0c8176373/include/rocksdb/advanced_options.h#L910) defines the data temperature for the last level SST files, which is typically kCold or kWarm. RocksDB doesn’t check the option value, instead it just passes that to the file_system API with [`FileOptions.temperature`](https://github.com/facebook/rocksdb/blob/b0d9776b704af01c2b5385e9d53754e0c8176373/include/rocksdb/file_system.h#L162) when creating the last level SST files. For all the other files, non-last-level SST files, and non-SST files like manifest files, the temperature is set to kUnknown, which typically maps to hot data. -The user can also get each SST’s temperature information through APIs: -```c++ -db.GetLiveFilesStorageInfo(); -db.GetLiveFilesMetaData(); -db.GetColumnFamilyMetaData(); -``` - -### User Metrics -Here are the tiered storage related statistics: -```c++ -HOT_FILE_READ_BYTES, -WARM_FILE_READ_BYTES, -COLD_FILE_READ_BYTES, -HOT_FILE_READ_COUNT, -WARM_FILE_READ_COUNT, -COLD_FILE_READ_COUNT, -// Last level and non-last level statistics -LAST_LEVEL_READ_BYTES, -LAST_LEVEL_READ_COUNT, -NON_LAST_LEVEL_READ_BYTES, -NON_LAST_LEVEL_READ_COUNT, -``` - -And more details from `IOStats`: -```c++ -struct FileIOByTemperature { -// the number of bytes read to Temperature::kHot file -uint64_t hot_file_bytes_read; -// the number of bytes read to Temperature::kWarm file -uint64_t warm_file_bytes_read; -// the number of bytes read to Temperature::kCold file -uint64_t cold_file_bytes_read; -// total number of reads to Temperature::kHot file -uint64_t hot_file_read_count; -// total number of reads to Temperature::kWarm file -uint64_t warm_file_read_count; -// total number of reads to Temperature::kCold file -uint64_t cold_file_read_count; -``` - -## Implementation -There are 2 main components for this feature. One is the **time-tracking**, and another is the **per-key based placement compaction**. These 2 components are relatively independent and linked together during the compaction initialization phase which gets the sequence number for splitting the hot and cold data. The time-tracking components can even be enabled independently by setting the option [`preserve_internal_time_seconds`](https://github.com/facebook/rocksdb/blob/b0d9776b704af01c2b5385e9d53754e0c8176373/include/rocksdb/advanced_options.h#L950). The purpose of that is before migrating existing user cases to the tiered storage feature and avoid compacting the existing hot data to the cold tier (detailed in the migration session below). - -Unlike the user-defined timestamp feature, the time tracking feature doesn’t have accurate time information for each key. It only samples the time information and gives a rough estimation for the key write time. Here is the high-level graph for the implementation: - -![](/static/images/time-aware-tiered-storage/tiered_storage_design.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -### Time Tracking -Time tracking information is recorded by a [periodic task](https://github.com/facebook/rocksdb/blob/d9e71fb2c53726d9c5ed73b4ec962a7ed6ef15ec/db/periodic_task_scheduler.cc#L36) which gets the latest sequence number and the current time and then stores it in an in-memory data structure. The interval of the periodic task is determined by the user setting [`preserve_internal_time_seconds`](https://github.com/facebook/rocksdb/blob/b0d9776b704af01c2b5385e9d53754e0c8176373/include/rocksdb/advanced_options.h#L950) and dividing that by 100. For example, if 3 days of data should be precluded from the last level, then the interval of the periodic task is about 0.7 hours (3 * 24 / 100 ~= 0.72), which also means only the latest 100 seq->time pairs needed in memory. - -Currently, the in-memory seq_time_mapping is only used during Flush() and encoded to the SST property. The data is delta encoded and again maximum 100 pairs are stored, so the extra data size is pretty small (far less than 1KB per SST) and only non-last-level SSTs need to have that information. Internally, RocksDB also uses the minimal sequence number and SST creation time from the SST metadata to improve the time accuracy. -**The sequence number to time information is distributed in each SST**, ranging from the min seqno to max seqno for that SST file, so each SST has its self-contained time information. This also means there could be redundancy for the time information, for example, if 2 SSTs have an overlapped sequence number (which is very likely for non-L0 files), the same seq->time pair may exist in both SSTs. -For the future, the time information could also be useful for other potential features like a better estimate of the oldest timestamp for an SST which is critical for the RocksDB TTL feature. - -### Per-Key Placement Compaction - -![](/static/images/time-aware-tiered-storage/per_key_placement_compaction.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -Compare to normal compaction which only outputs the data to a single level, Per-key placement compaction can output data to 2 different levels, as per-per placement compaction is only for the last level compaction, so the 2 output levels would **always be the penultimate level, and the last level**. The compaction places the key to its corresponding tier by simply checking the key’s sequence number. - -At the beginning of the compaction, the compaction job collects all seq to time information from every input SSTs and merges them together, then based on the current time to get the oldest sequence number that should be put into non-last-level (hot tier). During the last level compaction, as long as the key is newer than the oldest_sequence_number, it will be placed in the penultimate level (hot tier) instead of the last level (cold tier). - -Note, RocksDB also places the keys that are within the user snapshot in the hot tier, there’re a few reasons for that: -1. It’s reasonable to assume snapshot-protected data are hot data; -2. Avoid mixing the sequence number not zeroed out data with old last-level data, which is desirable to reduce the oldest obsolete data time (it’s defined as the oldest SST time that has a non-zero sequence number). It also means tombstones are always placed in the hot tier, which is also desirable as it should be pretty small. -3. The original motivation was to avoid moving data from the lower level to a higher level in case the user increases the [`preclude_last_level_data_seconds`](https://github.com/facebook/rocksdb/blob/b0d9776b704af01c2b5385e9d53754e0c8176373/include/rocksdb/advanced_options.h#L927), so the snapshot-protected data in the last level will become hot again, and moving data to a higher level. It’s not always safe to move data from a lower level to a higher level in the LSM tree which could cause key conflict. Later we added a conflict check to allow the data to move up as long as there’s no key conflict, but then the movement is not guaranteed (see Migration for details) - -### Migration -Once the user enables the feature, it enables both time tracking and per-key placement compaction **at the same time**. As the existing data, it can still be mismarked as cold data. To have a smooth migration to the feature. The user can enable the time-tracking feature first. For example, if the user plans to set [`preclude_last_level_data_seconds`](https://github.com/facebook/rocksdb/blob/b0d9776b704af01c2b5385e9d53754e0c8176373/include/rocksdb/advanced_options.h#L927) to 3 days, the user can enable time tracking 3 days earlier with [`preserve_internal_time_seconds`](https://github.com/facebook/rocksdb/blob/b0d9776b704af01c2b5385e9d53754e0c8176373/include/rocksdb/advanced_options.h#L950). Then when enabling the tiered storage feature, it already has the time information for the last 3 days' hot data, then per-key placement compaction won’t compact them to the last level. - -Just preserving the time information won’t prevent the data from compacting to the last level (which should be still on the hot tier). Once the [`preclude_last_level_data_seconds`](https://github.com/facebook/rocksdb/blob/b0d9776b704af01c2b5385e9d53754e0c8176373/include/rocksdb/advanced_options.h#L927) and [`last_level_temperature`](https://github.com/facebook/rocksdb/blob/b0d9776b704af01c2b5385e9d53754e0c8176373/include/rocksdb/advanced_options.h#L910) features are enabled, some of the last-level data might need to move up. Currently, RocksDB just does a conflict check, the hot/cold split in this case is not guaranteed. - -![](/static/images/time-aware-tiered-storage/compaction_moving_up_conflict.png) -{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} - -## Summary -Time-aware tired storage feature guarantees the new data is placed in the hot tier, which **is ideal for the tiering use cases where the most recent data is likely the hot data**. It’s done by tracking the write time information and per-key placement compaction to split the hot/cold data. - -The tiered storage feature is actively being developed, any suggestions or PRs will be welcomed. - -## Acknowledgements -We thank Siying Dong and Andrew Kryczka for brainstorming and reviewing the feature design and implementation. And it was my fortune to work with the RocksDB team members! \ No newline at end of file diff --git a/docs/_sass/_base.scss b/docs/_sass/_base.scss deleted file mode 100644 index 6d26d9feb..000000000 --- a/docs/_sass/_base.scss +++ /dev/null @@ -1,492 +0,0 @@ -body { - background: $secondary-bg; - color: $text; - font: normal #{$base-font-size}/#{$base-line-height} $base-font-family; - height: 100vh; - text-align: left; - text-rendering: optimizeLegibility; -} - -img { - max-width: 100%; -} - -article { - p { - img { - max-width: 100%; - display:block; - margin-left: auto; - margin-right: auto; - } - } -} - -a { - border-bottom: 1px dotted $primary-bg; - color: $text; - text-decoration: none; - -webkit-transition: background 0.3s, color 0.3s; - transition: background 0.3s, color 0.3s; -} - -blockquote { - padding: 15px 30px 15px 15px; - margin: 20px 0 0 10px; - background-color: rgba(204, 122, 111, 0.1); - border-left: 10px solid rgba(191, 87, 73, 0.2); -} - -#fb_oss a { - border: 0; -} - -h1, h2, h3, h4 { - font-family: $header-font-family; - font-weight: 900; -} - -.navPusher { - border-top: $header-height + $header-ptop + $header-pbot solid $primary-bg; - height: 100%; - left: 0; - position: relative; - z-index: 99; -} - -.homeContainer { - background: $primary-bg; - color: $primary-overlay; - - a { - color: $primary-overlay; - } - - .homeSplashFade { - color: white; - } - - .homeWrapper { - padding: 2em 10px; - text-align: left; - - .wrapper { - margin: 0px auto; - max-width: $content-width; - padding: 0 20px; - } - - .projectLogo { - img { - height: 100px; - margin-bottom: 0px; - } - } - - h1#project_title { - font-family: $header-font-family; - font-size: 300%; - letter-spacing: -0.08em; - line-height: 1em; - margin-bottom: 80px; - } - - h2#project_tagline { - font-family: $header-font-family; - font-size: 200%; - letter-spacing: -0.04em; - line-height: 1em; - } - } -} - -.wrapper { - margin: 0px auto; - max-width: $content-width; - padding: 0 10px; -} - -.projectLogo { - display: none; - - img { - height: 100px; - margin-bottom: 0px; - } -} - -section#intro { - margin: 40px 0; -} - -.fbossFontLight { - font-family: $base-font-family; - font-weight: 300; - font-style: normal; -} - -.fb-like { - display: block; - margin-bottom: 20px; - width: 100%; -} - -.center { - display: block; - text-align: center; -} - -.mainContainer { - background: $secondary-bg; - overflow: auto; - - .mainWrapper { - padding: 4vh 10px; - text-align: left; - - .allShareBlock { - padding: 10px 0; - - .pluginBlock { - margin: 12px 0; - padding: 0; - } - } - - a { - &:hover, - &:focus { - background: $primary-bg; - color: $primary-overlay; - } - } - - em, i { - font-style: italic; - } - - strong, b { - font-weight: bold; - } - - h1 { - font-size: 300%; - line-height: 1em; - padding: 1.4em 0 1em; - text-align: left; - } - - h2 { - font-size: 250%; - line-height: 1em; - margin-bottom: 20px; - padding: 1.4em 0 20px; - text-align: left; - - & { - border-bottom: 1px solid darken($primary-bg, 10%); - color: darken($primary-bg, 10%); - font-size: 22px; - padding: 10px 0; - } - - &.blockHeader { - border-bottom: 1px solid white; - color: white; - font-size: 22px; - margin-bottom: 20px; - padding: 10px 0; - } - } - - h3 { - font-size: 150%; - line-height: 1.2em; - padding: 1em 0 0.8em; - } - - h4 { - font-size: 130%; - line-height: 1.2em; - padding: 1em 0 0.8em; - } - - p { - padding: 0.8em 0; - } - - ul { - list-style: disc; - } - - ol, ul { - padding-left: 24px; - li { - padding-bottom: 4px; - padding-left: 6px; - } - } - - strong { - font-weight: bold; - } - - .post { - position: relative; - - .katex { - font-weight: 700; - } - - &.basicPost { - margin-top: 30px; - } - - a { - color: $primary-bg; - - &:hover, - &:focus { - color: #fff; - } - } - - h2 { - border-bottom: 4px solid $primary-bg; - font-size: 130%; - } - - h3 { - border-bottom: 1px solid $primary-bg; - font-size: 110%; - } - - ol { - list-style: decimal outside none; - } - - .post-header { - padding: 1em 0; - - h1 { - font-size: 150%; - line-height: 1em; - padding: 0.4em 0 0; - - a { - border: none; - } - } - - .post-meta { - color: $primary-bg; - font-family: $header-font-family; - text-align: center; - } - } - - .postSocialPlugins { - padding-top: 1em; - } - - .docPagination { - background: $primary-bg; - bottom: 0px; - left: 0px; - position: absolute; - right: 0px; - - .pager { - display: inline-block; - width: 50%; - } - - .pagingNext { - float: right; - text-align: right; - } - - a { - border: none; - color: $primary-overlay; - display: block; - padding: 4px 12px; - - &:hover { - background-color: $secondary-bg; - color: $text; - } - - .pagerLabel { - display: inline; - } - - .pagerTitle { - display: none; - } - } - } - } - - .posts { - .post { - margin-bottom: 6vh; - } - } - } -} - -#integrations_title { - font-size: 250%; - margin: 80px 0; -} - -.ytVideo { - height: 0; - overflow: hidden; - padding-bottom: 53.4%; /* 16:9 */ - padding-top: 25px; - position: relative; -} - -.ytVideo iframe, -.ytVideo object, -.ytVideo embed { - height: 100%; - left: 0; - position: absolute; - top: 0; - width: 100%; -} - -@media only screen and (min-width: 480px) { - h1#project_title { - font-size: 500%; - } - - h2#project_tagline { - font-size: 250%; - } - - .projectLogo { - img { - margin-bottom: 10px; - height: 200px; - } - } - - .homeContainer .homeWrapper { - padding-left: 10px; - padding-right: 10px; - } - - .mainContainer { - .mainWrapper { - .post { - h2 { - font-size: 180%; - } - - h3 { - font-size: 120%; - } - - .docPagination { - a { - .pagerLabel { - display: none; - } - .pagerTitle { - display: inline; - } - } - } - } - } - } -} - -@media only screen and (min-width: 900px) { - .homeContainer { - .homeWrapper { - position: relative; - - #inner { - box-sizing: border-box; - max-width: 600px; - padding-right: 40px; - } - - .projectLogo { - align-items: center; - bottom: 0; - display: flex; - justify-content: flex-end; - left: 0; - padding: 2em 20px 4em; - position: absolute; - right: 20px; - top: 0; - - img { - height: 100%; - max-height: 250px; - } - } - } - } -} - -@media only screen and (min-width: 1024px) { - .mainContainer { - .mainWrapper { - .post { - box-sizing: border-box; - display: block; - - .post-header { - h1 { - font-size: 250%; - } - } - } - - .posts { - .post { - margin-bottom: 4vh; - width: 100%; - } - } - } - } -} - -@media only screen and (min-width: 1200px) { - .homeContainer { - .homeWrapper { - #inner { - max-width: 750px; - } - } - } - - .wrapper { - max-width: 1100px; - } -} - -@media only screen and (min-width: 1500px) { - .homeContainer { - .homeWrapper { - #inner { - max-width: 1100px; - padding-bottom: 40px; - padding-top: 40px; - } - } - } - - .wrapper { - max-width: 1400px; - } -} diff --git a/docs/_sass/_blog.scss b/docs/_sass/_blog.scss deleted file mode 100644 index 12a73c1fc..000000000 --- a/docs/_sass/_blog.scss +++ /dev/null @@ -1,47 +0,0 @@ -.blogContainer { - .posts { - margin-top: 60px; - - .post { - border: 1px solid $primary-bg; - border-radius: 3px; - padding: 10px 20px 20px; - } - } - - .lonePost { - margin-top: 60px; - - .post { - padding: 10px 0px 0px; - } - } - - .post-header { - h1 { - text-align: center; - } - - .post-authorName { - color: rgba($text, 0.7); - font-size: 14px; - font-weight: 900; - margin-top: 0; - padding: 0; - text-align: center; - } - - .authorPhoto { - border-radius: 50%; - height: 50px; - left: 50%; - margin-left: auto; - margin-right: auto; - display: inline-block; - overflow: hidden; - position: static; - top: -25px; - width: 50px; - } - } -} diff --git a/docs/_sass/_buttons.scss b/docs/_sass/_buttons.scss deleted file mode 100644 index a0371618f..000000000 --- a/docs/_sass/_buttons.scss +++ /dev/null @@ -1,47 +0,0 @@ -.button { - border: 1px solid $primary-bg; - border-radius: 3px; - color: $primary-bg; - display: inline-block; - font-size: 14px; - font-weight: 900; - line-height: 1.2em; - padding: 10px; - text-transform: uppercase; - transition: background 0.3s, color 0.3s; - - &:hover { - background: $primary-bg; - color: $primary-overlay; - } -} - -.homeContainer { - .button { - border-color: $primary-overlay; - border-width: 1px; - color: $primary-overlay; - - &:hover { - background: $primary-overlay; - color: $primary-bg; - } - } -} - -.blockButton { - display: block; -} - -.edit-page-link { - float: right; - font-size: 14px; - font-weight: normal; - line-height: 20px; - opacity: 0.6; - transition: opacity 0.5s; -} - -.edit-page-link:hover { - opacity: 1; -} diff --git a/docs/_sass/_footer.scss b/docs/_sass/_footer.scss deleted file mode 100644 index 5b7439517..000000000 --- a/docs/_sass/_footer.scss +++ /dev/null @@ -1,82 +0,0 @@ -.footerContainer { - background: $secondary-bg; - color: $primary-bg; - overflow: hidden; - padding: 0 10px; - text-align: left; - - .footerWrapper { - border-top: 1px solid $primary-bg; - padding: 0; - - .footerBlocks { - align-items: center; - align-content: center; - display: flex; - flex-flow: row wrap; - margin: 0 -20px; - padding: 10px 0; - } - - .footerSection { - box-sizing: border-box; - flex: 1 1 25%; - font-size: 14px; - min-width: 275px; - padding: 0px 20px; - - a { - border: 0; - color: inherit; - display: inline-block; - line-height: 1.2em; - } - - .footerLink { - padding-right: 20px; - } - } - - .fbOpenSourceFooter { - align-items: center; - display: flex; - flex-flow: row nowrap; - max-width: 25%; - - .facebookOSSLogoSvg { - flex: 0 0 31px; - height: 30px; - margin-right: 10px; - width: 31px; - - path { - fill: $primary-bg; - } - - .middleRing { - opacity: 0.7; - } - - .innerRing { - opacity: 0.45; - } - } - - h2 { - display: block; - font-weight: 900; - line-height: 1em; - } - } - } -} - -@media only screen and (min-width: 900px) { - .footerSection { - &.rightAlign { - margin-left: auto; - max-width: 25%; - text-align: right; - } - } -} \ No newline at end of file diff --git a/docs/_sass/_gridBlock.scss b/docs/_sass/_gridBlock.scss deleted file mode 100644 index 679b31c14..000000000 --- a/docs/_sass/_gridBlock.scss +++ /dev/null @@ -1,115 +0,0 @@ -.gridBlock { - margin: -5px 0; - padding: 0; - padding-bottom: 20px; - - .blockElement { - padding: 5px 0; - - img { - max-width: 100%; - } - - h3 { - border-bottom: 1px solid rgba($primary-bg, 0.5); - color: $primary-bg; - font-size: 18px; - margin: 0; - padding: 10px 0; - } - } - - .gridClear { - clear: both; - } - -} - -.gridBlock .alignCenter { - text-align: center; -} -.gridBlock .alignRight { - text-align: right; -} -.gridBlock .imageAlignSide { - align-items: center; - display: flex; - flex-flow: row wrap; -} -.blockImage { - max-width: 150px; - width: 50%; -} -.imageAlignTop .blockImage { - margin-bottom: 20px; -} -.imageAlignTop.alignCenter .blockImage { - margin-left: auto; - margin-right: auto; -} -.imageAlignSide .blockImage { - flex: 0 1 100px; - margin-right: 20px; -} -.imageAlignSide .blockContent { - flex: 1 1; -} - -@media only screen and (max-width: 1023px) { - .responsiveList .blockContent { - position: relative; - } - .responsiveList .blockContent > div { - padding-left: 20px; - } - .responsiveList .blockContent::before { - content: "\2022"; - position: absolute; - } -} - -@media only screen and (min-width: 1024px) { - .gridBlock { - display: flex; - flex-direction: row; - flex-wrap: wrap; - margin: -10px -10px 10px -10px; - - .twoByGridBlock { - box-sizing: border-box; - flex: 1 0 50%; - padding: 10px; - } - - .fourByGridBlock { - box-sizing: border-box; - flex: 1 0 25%; - padding: 10px; - } - } - - h2 + .gridBlock { - padding-top: 20px; - } -} - -@media only screen and (min-width: 1400px) { - .gridBlock { - display: flex; - flex-direction: row; - flex-wrap: wrap; - margin: -10px -20px 10px -20px; - - .twoByGridBlock { - box-sizing: border-box; - flex: 1 0 50%; - padding: 10px 20px; - } - - .fourByGridBlock { - box-sizing: border-box; - flex: 1 0 25%; - padding: 10px 20px; - } - } -} \ No newline at end of file diff --git a/docs/_sass/_header.scss b/docs/_sass/_header.scss deleted file mode 100644 index ac79390f4..000000000 --- a/docs/_sass/_header.scss +++ /dev/null @@ -1,139 +0,0 @@ -.fixedHeaderContainer { - background: $primary-bg; - color: $primary-overlay; - height: $header-height; - padding: $header-ptop 0 $header-pbot; - position: sticky; - top: 0; - width: 100%; - z-index: 9999; - - a { - align-items: center; - border: 0; - color: $primary-overlay; - display: flex; - flex-flow: row nowrap; - height: $header-height; - } - - header { - display: flex; - flex-flow: row nowrap; - position: relative; - text-align: left; - - img { - height: 24px; - margin-right: 10px; - } - - h2 { - display: block; - font-family: $header-font-family; - font-weight: 900; - line-height: 18px; - position: relative; - } - } -} - -.navigationFull { - height: 34px; - margin-left: auto; - - nav { - position: relative; - - ul { - display: flex; - flex-flow: row nowrap; - margin: 0 -10px; - - li { - padding: 0 10px; - display: block; - - a { - border: 0; - color: $primary-overlay-special; - font-size: 16px; - font-weight: 400; - line-height: 1.2em; - - &:hover { - border-bottom: 2px solid $primary-overlay; - color: $primary-overlay; - } - } - - &.navItemActive { - a { - color: $primary-overlay; - } - } - } - } - } -} - -/* 900px - - - .fixedHeaderContainer { - .navigationWrapper { - nav { - padding: 0 1em; - position: relative; - top: -9px; - - ul { - margin: 0 -0.4em; - li { - display: inline-block; - - a { - padding: 14px 0.4em; - border: 0; - color: $primary-overlay-special; - display: inline-block; - - &:hover { - color: $primary-overlay; - } - } - - &.navItemActive { - a { - color: $primary-overlay; - } - } - } - } - } - - &.navigationFull { - display: inline-block; - } - - &.navigationSlider { - display: none; - } - } - } - - 1200px - - .fixedHeaderContainer { - header { - max-width: 1100px; - } - } - - 1500px - .fixedHeaderContainer { - header { - max-width: 1400px; - } - } - */ diff --git a/docs/_sass/_poweredby.scss b/docs/_sass/_poweredby.scss deleted file mode 100644 index 4155b6053..000000000 --- a/docs/_sass/_poweredby.scss +++ /dev/null @@ -1,69 +0,0 @@ -.poweredByContainer { - background: $primary-bg; - color: $primary-overlay; - margin-bottom: 20px; - - a { - color: $primary-overlay; - } - - .poweredByWrapper { - h2 { - border-color: $primary-overlay-special; - color: $primary-overlay-special; - } - } - - .poweredByMessage { - color: $primary-overlay-special; - font-size: 14px; - padding-top: 20px; - } -} - -.poweredByItems { - display: flex; - flex-flow: row wrap; - margin: 0 -10px; -} - -.poweredByItem { - box-sizing: border-box; - flex: 1 0 50%; - line-height: 1.1em; - padding: 5px 10px; - - &.itemLarge { - flex-basis: 100%; - padding: 10px; - text-align: center; - - &:nth-child(4) { - padding-bottom: 20px; - } - - img { - max-height: 30px; - } - } -} - -@media only screen and (min-width: 480px) { - .itemLarge { - flex-basis: 50%; - max-width: 50%; - } -} - -@media only screen and (min-width: 1024px) { - .poweredByItem { - flex-basis: 25%; - max-width: 25%; - - &.itemLarge { - padding-bottom: 20px; - text-align: left; - } - } -} - diff --git a/docs/_sass/_promo.scss b/docs/_sass/_promo.scss deleted file mode 100644 index 8c9a809dc..000000000 --- a/docs/_sass/_promo.scss +++ /dev/null @@ -1,55 +0,0 @@ -.promoSection { - display: flex; - flex-flow: column wrap; - font-size: 125%; - line-height: 1.6em; - margin: -10px 0; - position: relative; - z-index: 99; - - .promoRow { - padding: 10px 0; - - .pluginWrapper { - display: block; - - &.ghWatchWrapper, &.ghStarWrapper { - height: 28px; - } - } - - .pluginRowBlock { - display: flex; - flex-flow: row wrap; - margin: 0 -2px; - - .pluginWrapper { - padding: 0 2px; - } - } - } -} - -iframe.pluginIframe { - height: 500px; - margin-top: 20px; - width: 100%; -} - -.iframeContent { - display: none; -} - -.iframePreview { - display: inline-block; - margin-top: 20px; -} - -@media only screen and (min-width: 1024px) { - .iframeContent { - display: block; - } - .iframePreview { - display: none; - } -} \ No newline at end of file diff --git a/docs/_sass/_react_docs_nav.scss b/docs/_sass/_react_docs_nav.scss deleted file mode 100644 index f0a651e7f..000000000 --- a/docs/_sass/_react_docs_nav.scss +++ /dev/null @@ -1,332 +0,0 @@ -.docsNavContainer { - background: $sidenav; - height: 35px; - left: 0; - position: fixed; - width: 100%; - z-index: 100; -} - -.docMainWrapper { - .wrapper { - &.mainWrapper { - padding-left: 0; - padding-right: 0; - padding-top: 10px; - } - } -} - -.docsSliderActive { - .docsNavContainer { - box-sizing: border-box; - height: 100%; - overflow-y: auto; - -webkit-overflow-scrolling: touch; - padding-bottom: 50px; - } - - .mainContainer { - display: none; - } -} - -.navBreadcrumb { - box-sizing: border-box; - display: flex; - flex-flow: row nowrap; - font-size: 12px; - height: 35px; - overflow: hidden; - padding: 5px 10px; - - a, span { - border: 0; - color: $sidenav-text; - } - - i { - padding: 0 3px; - } -} - -nav.toc { - position: relative; - - section { - padding: 0px; - position: relative; - - .navGroups { - display: none; - padding: 40px 10px 10px; - } - } - - .toggleNav { - background: $sidenav; - color: $sidenav-text; - position: relative; - transition: background-color 0.3s, color 0.3s; - - .navToggle { - cursor: pointer; - height: 24px; - margin-right: 10px; - position: relative; - text-align: left; - width: 18px; - - &::before, &::after { - content: ""; - position: absolute; - top: 50%; - left: 0; - left: 8px; - width: 3px; - height: 6px; - border: 5px solid $sidenav-text; - border-width: 5px 0; - margin-top: -8px; - transform: rotate(45deg); - z-index: 1; - } - - &::after { - transform: rotate(-45deg); - } - - i { - &::before, &::after { - content: ""; - position: absolute; - top: 50%; - left: 2px; - background: transparent; - border-width: 0 5px 5px; - border-style: solid; - border-color: transparent $sidenav-text; - height: 0; - margin-top: -7px; - opacity: 1; - width: 5px; - z-index: 10; - } - - &::after { - border-width: 5px 5px 0; - margin-top: 2px; - } - } - } - - .navGroup { - background: $sidenav-overlay; - margin: 1px 0; - - ul { - display: none; - } - - h3 { - background: $sidenav-overlay; - color: $sidenav-text; - cursor: pointer; - font-size: 14px; - font-weight: 400; - line-height: 1.2em; - padding: 10px; - transition: color 0.2s; - - i:not(:empty) { - width: 16px; - height: 16px; - display: inline-block; - box-sizing: border-box; - text-align: center; - color: rgba($sidenav-text, 0.5); - margin-right: 10px; - transition: color 0.2s; - } - - &:hover { - color: $primary-bg; - - i:not(:empty) { - color: $primary-bg; - } - } - } - - &.navGroupActive { - background: $sidenav-active; - color: $sidenav-text; - - ul { - display: block; - padding-bottom: 10px; - padding-top: 10px; - } - - h3 { - background: $primary-bg; - color: $primary-overlay; - - i { - display: none; - } - } - } - } - - ul { - padding-left: 0; - padding-right: 24px; - - li { - list-style-type: none; - padding-bottom: 0; - padding-left: 0; - - a { - border: none; - color: $sidenav-text; - display: inline-block; - font-size: 14px; - line-height: 1.1em; - margin: 2px 10px 5px; - padding: 5px 0 2px; - transition: color 0.3s; - - &:hover, - &:focus { - color: $primary-bg; - } - - &.navItemActive { - color: $primary-bg; - font-weight: 900; - } - } - } - } - } - - .toggleNavActive { - .navBreadcrumb { - background: $sidenav; - margin-bottom: 20px; - position: fixed; - width: 100%; - } - - section { - .navGroups { - display: block; - } - } - - - .navToggle { - &::before, &::after { - border-width: 6px 0; - height: 0px; - margin-top: -6px; - } - - i { - opacity: 0; - } - } - } -} - -.docsNavVisible { - .navPusher { - .mainContainer { - padding-top: 35px; - } - } -} - -@media only screen and (min-width: 900px) { - .navBreadcrumb { - padding: 5px 0; - } - - nav.toc { - section { - .navGroups { - padding: 40px 0 0; - } - } - } -} - -@media only screen and (min-width: 1024px) { - .navToggle { - display: none; - } - - .docsSliderActive { - .mainContainer { - display: block; - } - } - - .docsNavVisible { - .navPusher { - .mainContainer { - padding-top: 0; - } - } - } - - .docsNavContainer { - background: none; - box-sizing: border-box; - height: auto; - margin: 40px 40px 0 0; - overflow-y: auto; - position: relative; - width: 300px; - } - - nav.toc { - section { - .navGroups { - display: block; - padding-top: 0px; - } - } - - .toggleNavActive { - .navBreadcrumb { - margin-bottom: 0; - position: relative; - } - } - } - - .docMainWrapper { - display: flex; - flex-flow: row nowrap; - margin-bottom: 40px; - - .wrapper { - padding-left: 0; - padding-right: 0; - - &.mainWrapper { - padding-top: 0; - } - } - } - - .navBreadcrumb { - display: none; - h2 { - padding: 0 10px; - } - } -} \ No newline at end of file diff --git a/docs/_sass/_react_header_nav.scss b/docs/_sass/_react_header_nav.scss deleted file mode 100644 index 13c0e562b..000000000 --- a/docs/_sass/_react_header_nav.scss +++ /dev/null @@ -1,141 +0,0 @@ -.navigationFull { - display: none; -} - -.navigationSlider { - position: absolute; - right: 0px; - - .navSlideout { - cursor: pointer; - padding-top: 4px; - position: absolute; - right: 10px; - top: 0; - transition: top 0.3s; - z-index: 101; - } - - .slidingNav { - background: $secondary-bg; - box-sizing: border-box; - height: 0px; - overflow-x: hidden; - padding: 0; - position: absolute; - right: 0px; - top: 0; - transition: height 0.3s cubic-bezier(0.68, -0.55, 0.265, 1.55), width 0.3s cubic-bezier(0.68, -0.55, 0.265, 1.55); - width: 0; - - ul { - flex-flow: column nowrap; - list-style: none; - padding: 10px; - - li { - margin: 0; - padding: 2px 0; - - a { - color: $primary-bg; - display: inline; - margin: 3px 5px; - padding: 2px 0px; - transition: background-color 0.3s; - - &:focus, - &:hover { - border-bottom: 2px solid $primary-bg; - } - } - } - } - } - - .navSlideoutActive { - .slidingNav { - height: auto; - padding-top: $header-height + $header-pbot; - width: 300px; - } - - .navSlideout { - top: -2px; - .menuExpand { - span:nth-child(1) { - background-color: $text; - top: 16px; - transform: rotate(45deg); - } - span:nth-child(2) { - opacity: 0; - } - span:nth-child(3) { - background-color: $text; - transform: rotate(-45deg); - } - } - } - } -} - -.menuExpand { - display: flex; - flex-flow: column nowrap; - height: 20px; - justify-content: space-between; - - span { - background: $primary-overlay; - border-radius: 3px; - display: block; - flex: 0 0 4px; - height: 4px; - position: relative; - top: 0; - transition: background-color 0.3s, top 0.3s, opacity 0.3s, transform 0.3s; - width: 20px; - } -} - -.navPusher { - border-top: $header-height + $header-ptop + $header-pbot solid $primary-bg; - position: relative; - left: 0; - z-index: 99; - height: 100%; - - &::after { - position: absolute; - top: 0; - right: 0; - width: 0; - height: 0; - background: rgba(0,0,0,0.4); - content: ''; - opacity: 0; - -webkit-transition: opacity 0.5s, width 0.1s 0.5s, height 0.1s 0.5s; - transition: opacity 0.5s, width 0.1s 0.5s, height 0.1s 0.5s; - } - - .sliderActive &::after { - width: 100%; - height: 100%; - opacity: 1; - -webkit-transition: opacity 0.5s; - transition: opacity 0.5s; - z-index: 100; - } -} - - -@media only screen and (min-width: 1024px) { - .navigationFull { - display: block; - } - - .navigationSlider { - display: none; - } -} \ No newline at end of file diff --git a/docs/_sass/_reset.scss b/docs/_sass/_reset.scss deleted file mode 100644 index 0e5f2e0c1..000000000 --- a/docs/_sass/_reset.scss +++ /dev/null @@ -1,43 +0,0 @@ -html, body, div, span, applet, object, iframe, -h1, h2, h3, h4, h5, h6, p, blockquote, pre, -a, abbr, acronym, address, big, cite, code, -del, dfn, em, img, ins, kbd, q, s, samp, -small, strike, strong, sub, sup, tt, var, -b, u, i, center, -dl, dt, dd, ol, ul, li, -fieldset, form, label, legend, -table, caption, tbody, tfoot, thead, tr, th, td, -article, aside, canvas, details, embed, -figure, figcaption, footer, header, hgroup, -menu, nav, output, ruby, section, summary, -time, mark, audio, video { - margin: 0; - padding: 0; - border: 0; - font-size: 100%; - font: inherit; - vertical-align: baseline; -} -/* HTML5 display-role reset for older browsers */ -article, aside, details, figcaption, figure, -footer, header, hgroup, menu, nav, section { - display: block; -} -body { - line-height: 1; -} -ol, ul { - list-style: none; -} -blockquote, q { - quotes: none; -} -blockquote:before, blockquote:after, -q:before, q:after { - content: ''; - content: none; -} -table { - border-collapse: collapse; - border-spacing: 0; -} diff --git a/docs/_sass/_search.scss b/docs/_sass/_search.scss deleted file mode 100644 index eadfa11d1..000000000 --- a/docs/_sass/_search.scss +++ /dev/null @@ -1,142 +0,0 @@ -input[type="search"] { - -moz-appearance: none; - -webkit-appearance: none; -} - -.navSearchWrapper { - align-self: center; - position: relative; - - &::before { - border: 3px solid $primary-overlay-special; - border-radius: 50%; - content: " "; - display: block; - height: 6px; - left: 15px; - width: 6px; - position: absolute; - top: 4px; - z-index: 1; - } - - &::after { - background: $primary-overlay-special; - content: " "; - height: 7px; - left: 24px; - position: absolute; - transform: rotate(-45deg); - top: 12px; - width: 3px; - z-index: 1; - } - - .aa-dropdown-menu { - background: $secondary-bg; - border: 3px solid rgba($text, 0.25); - color: $text; - font-size: 14px; - left: auto !important; - line-height: 1.2em; - right: 0 !important; - - .algolia-docsearch-suggestion--category-header { - background: $primary-overlay-special; - color: $primary-bg; - - .algolia-docsearch-suggestion--highlight { - background-color: $primary-bg; - color: $primary-overlay; - } - } - - .algolia-docsearch-suggestion--title .algolia-docsearch-suggestion--highlight, - .algolia-docsearch-suggestion--subcategory-column .algolia-docsearch-suggestion--highlight { - color: $primary-bg; - } - - .algolia-docsearch-suggestion__secondary, - .algolia-docsearch-suggestion--subcategory-column { - border-color: rgba($text, 0.3); - } - } -} - -input#search_input { - padding-left: 25px; - font-size: 14px; - line-height: 20px; - border-radius: 20px; - background-color: rgba($primary-overlay-special, 0.25); - border: none; - color: rgba($primary-overlay-special, 0); - outline: none; - position: relative; - transition: background-color .2s cubic-bezier(0.68, -0.55, 0.265, 1.55), width .2s cubic-bezier(0.68, -0.55, 0.265, 1.55), color .2s ease; - width: 60px; - - &:focus, &:active { - background-color: $secondary-bg; - color: $text; - width: 240px; - } -} - -.navigationSlider { - .navSearchWrapper { - &::before { - left: 6px; - top: 6px; - } - - &::after { - left: 15px; - top: 14px; - } - } - - input#search_input_react { - box-sizing: border-box; - padding-left: 25px; - font-size: 14px; - line-height: 20px; - border-radius: 20px; - background-color: rgba($primary-overlay-special, 0.25); - border: none; - color: $text; - outline: none; - position: relative; - transition: background-color .2s cubic-bezier(0.68, -0.55, 0.265, 1.55), width .2s cubic-bezier(0.68, -0.55, 0.265, 1.55), color .2s ease; - width: 100%; - - &:focus, &:active { - background-color: $primary-bg; - color: $primary-overlay; - } - } - - .algolia-docsearch-suggestion--subcategory-inline { - display: none; - } - - & > span { - width: 100%; - } - - .aa-dropdown-menu { - background: $secondary-bg; - border: 0px solid $secondary-bg; - color: $text; - font-size: 12px; - line-height: 2em; - max-height: 140px; - min-width: auto; - overflow-y: scroll; - -webkit-overflow-scrolling: touch; - padding: 0; - border-radius: 0; - position: relative !important; - width: 100%; - } -} \ No newline at end of file diff --git a/docs/_sass/_slideshow.scss b/docs/_sass/_slideshow.scss deleted file mode 100644 index cd98a6cdb..000000000 --- a/docs/_sass/_slideshow.scss +++ /dev/null @@ -1,48 +0,0 @@ -.slideshow { - position: relative; - - .slide { - display: none; - - img { - display: block; - margin: 0 auto; - } - - &.slideActive { - display: block; - } - - a { - border: none; - display: block; - } - } - - .pagination { - display: block; - margin: -10px; - padding: 1em 0; - text-align: center; - width: 100%; - - .pager { - background: transparent; - border: 2px solid rgba(255, 255, 255, 0.5); - border-radius: 50%; - cursor: pointer; - display: inline-block; - height: 12px; - margin: 10px; - transition: background-color 0.3s, border-color 0.3s; - width: 12px; - - &.pagerActive { - background: rgba(255, 255, 255, 0.5); - border-width: 4px; - height: 8px; - width: 8px; - } - } - } -} diff --git a/docs/_sass/_syntax-highlighting.scss b/docs/_sass/_syntax-highlighting.scss deleted file mode 100644 index e55c88a2e..000000000 --- a/docs/_sass/_syntax-highlighting.scss +++ /dev/null @@ -1,129 +0,0 @@ - - -.rougeHighlight { background-color: $code-bg; color: #93a1a1 } -.rougeHighlight .c { color: #586e75 } /* Comment */ -.rougeHighlight .err { color: #93a1a1 } /* Error */ -.rougeHighlight .g { color: #93a1a1 } /* Generic */ -.rougeHighlight .k { color: #859900 } /* Keyword */ -.rougeHighlight .l { color: #93a1a1 } /* Literal */ -.rougeHighlight .n { color: #93a1a1 } /* Name */ -.rougeHighlight .o { color: #859900 } /* Operator */ -.rougeHighlight .x { color: #cb4b16 } /* Other */ -.rougeHighlight .p { color: #93a1a1 } /* Punctuation */ -.rougeHighlight .cm { color: #586e75 } /* Comment.Multiline */ -.rougeHighlight .cp { color: #859900 } /* Comment.Preproc */ -.rougeHighlight .c1 { color: #72c02c; } /* Comment.Single */ -.rougeHighlight .cs { color: #859900 } /* Comment.Special */ -.rougeHighlight .gd { color: #2aa198 } /* Generic.Deleted */ -.rougeHighlight .ge { color: #93a1a1; font-style: italic } /* Generic.Emph */ -.rougeHighlight .gr { color: #dc322f } /* Generic.Error */ -.rougeHighlight .gh { color: #cb4b16 } /* Generic.Heading */ -.rougeHighlight .gi { color: #859900 } /* Generic.Inserted */ -.rougeHighlight .go { color: #93a1a1 } /* Generic.Output */ -.rougeHighlight .gp { color: #93a1a1 } /* Generic.Prompt */ -.rougeHighlight .gs { color: #93a1a1; font-weight: bold } /* Generic.Strong */ -.rougeHighlight .gu { color: #cb4b16 } /* Generic.Subheading */ -.rougeHighlight .gt { color: #93a1a1 } /* Generic.Traceback */ -.rougeHighlight .kc { color: #cb4b16 } /* Keyword.Constant */ -.rougeHighlight .kd { color: #268bd2 } /* Keyword.Declaration */ -.rougeHighlight .kn { color: #859900 } /* Keyword.Namespace */ -.rougeHighlight .kp { color: #859900 } /* Keyword.Pseudo */ -.rougeHighlight .kr { color: #268bd2 } /* Keyword.Reserved */ -.rougeHighlight .kt { color: #dc322f } /* Keyword.Type */ -.rougeHighlight .ld { color: #93a1a1 } /* Literal.Date */ -.rougeHighlight .m { color: #2aa198 } /* Literal.Number */ -.rougeHighlight .s { color: #2aa198 } /* Literal.String */ -.rougeHighlight .na { color: #93a1a1 } /* Name.Attribute */ -.rougeHighlight .nb { color: #B58900 } /* Name.Builtin */ -.rougeHighlight .nc { color: #268bd2 } /* Name.Class */ -.rougeHighlight .no { color: #cb4b16 } /* Name.Constant */ -.rougeHighlight .nd { color: #268bd2 } /* Name.Decorator */ -.rougeHighlight .ni { color: #cb4b16 } /* Name.Entity */ -.rougeHighlight .ne { color: #cb4b16 } /* Name.Exception */ -.rougeHighlight .nf { color: #268bd2 } /* Name.Function */ -.rougeHighlight .nl { color: #93a1a1 } /* Name.Label */ -.rougeHighlight .nn { color: #93a1a1 } /* Name.Namespace */ -.rougeHighlight .nx { color: #93a1a1 } /* Name.Other */ -.rougeHighlight .py { color: #93a1a1 } /* Name.Property */ -.rougeHighlight .nt { color: #268bd2 } /* Name.Tag */ -.rougeHighlight .nv { color: #268bd2 } /* Name.Variable */ -.rougeHighlight .ow { color: #859900 } /* Operator.Word */ -.rougeHighlight .w { color: #93a1a1 } /* Text.Whitespace */ -.rougeHighlight .mf { color: #2aa198 } /* Literal.Number.Float */ -.rougeHighlight .mh { color: #2aa198 } /* Literal.Number.Hex */ -.rougeHighlight .mi { color: #2aa198 } /* Literal.Number.Integer */ -.rougeHighlight .mo { color: #2aa198 } /* Literal.Number.Oct */ -.rougeHighlight .sb { color: #586e75 } /* Literal.String.Backtick */ -.rougeHighlight .sc { color: #2aa198 } /* Literal.String.Char */ -.rougeHighlight .sd { color: #93a1a1 } /* Literal.String.Doc */ -.rougeHighlight .s2 { color: #2aa198 } /* Literal.String.Double */ -.rougeHighlight .se { color: #cb4b16 } /* Literal.String.Escape */ -.rougeHighlight .sh { color: #93a1a1 } /* Literal.String.Heredoc */ -.rougeHighlight .si { color: #2aa198 } /* Literal.String.Interpol */ -.rougeHighlight .sx { color: #2aa198 } /* Literal.String.Other */ -.rougeHighlight .sr { color: #dc322f } /* Literal.String.Regex */ -.rougeHighlight .s1 { color: #2aa198 } /* Literal.String.Single */ -.rougeHighlight .ss { color: #2aa198 } /* Literal.String.Symbol */ -.rougeHighlight .bp { color: #268bd2 } /* Name.Builtin.Pseudo */ -.rougeHighlight .vc { color: #268bd2 } /* Name.Variable.Class */ -.rougeHighlight .vg { color: #268bd2 } /* Name.Variable.Global */ -.rougeHighlight .vi { color: #268bd2 } /* Name.Variable.Instance */ -.rougeHighlight .il { color: #2aa198 } /* Literal.Number.Integer.Long */ - -.highlighter-rouge { - color: darken(#72c02c, 8%); - font: 800 12px/1.5em Hack, monospace; - max-width: 100%; - - .rougeHighlight { - border-radius: 3px; - margin: 20px 0; - padding: 0px; - overflow-x: scroll; - -webkit-overflow-scrolling: touch; - - table { - background: none; - border: none; - - tbody { - tr { - background: none; - display: flex; - flex-flow: row nowrap; - - td { - display: block; - flex: 1 1; - - &.gutter { - border-right: 1px solid lighten($code-bg, 10%); - color: lighten($code-bg, 15%); - margin-right: 10px; - max-width: 40px; - padding-right: 10px; - - pre { - max-width: 20px; - } - } - } - } - } - } - } -} - -p > .highlighter-rouge, -li > .highlighter-rouge, -a > .highlighter-rouge { - font-size: 16px; - font-weight: 400; - line-height: inherit; -} - -a:hover { - .highlighter-rouge { - color: white; - } -} \ No newline at end of file diff --git a/docs/_sass/_tables.scss b/docs/_sass/_tables.scss deleted file mode 100644 index f847c7013..000000000 --- a/docs/_sass/_tables.scss +++ /dev/null @@ -1,47 +0,0 @@ -table { - background: $lightergrey; - border: 1px solid $lightgrey; - border-collapse: collapse; - display:table; - margin: 20px 0; - - thead { - border-bottom: 1px solid $lightgrey; - display: table-header-group; - } - tbody { - display: table-row-group; - } - tr { - display: table-row; - &:nth-of-type(odd) { - background: $greyish; - } - - th, td { - border-right: 1px dotted $lightgrey; - display: table-cell; - font-size: 14px; - line-height: 1.3em; - padding: 10px; - text-align: left; - - &:last-of-type { - border-right: 0; - } - - code { - color: $green; - display: inline-block; - font-size: 12px; - } - } - - th { - color: #000000; - font-weight: bold; - font-family: $header-font-family; - text-transform: uppercase; - } - } -} \ No newline at end of file diff --git a/docs/_top-level/support.md b/docs/_top-level/support.md deleted file mode 100644 index 05c39befd..000000000 --- a/docs/_top-level/support.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -layout: top-level -title: Support -id: support -category: support ---- - -## Need help? - -Do not hesitate to ask questions if you are having trouble with RocksDB. - -### GitHub issues - -Use [GitHub issues](https://github.com/facebook/rocksdb/issues) to report bugs, issues and feature requests for the RocksDB codebase. - -### Facebook Group - -Use the [RocksDB Facebook group](https://www.facebook.com/groups/rocksdb.dev/) for general questions and discussion about RocksDB. - -### FAQ - -Check out a list of [commonly asked questions](https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ) about RocksDB. diff --git a/docs/blog/all.html b/docs/blog/all.html deleted file mode 100644 index 3be2d3bff..000000000 --- a/docs/blog/all.html +++ /dev/null @@ -1,20 +0,0 @@ ---- -id: all -layout: blog -category: blog ---- - -
-
-

All Posts

- {% for post in site.posts %} - {% assign author = site.data.authors[post.author] %} -

- - {{ post.title }} - - on {{ post.date | date: "%B %e, %Y" }} by {{ author.display_name }} -

- {% endfor %} -
-
diff --git a/docs/blog/index.html b/docs/blog/index.html deleted file mode 100644 index 9f6b25d03..000000000 --- a/docs/blog/index.html +++ /dev/null @@ -1,12 +0,0 @@ ---- -id: blog -title: Blog -layout: blog -category: blog ---- - -
- {% for page in site.posts %} - {% include post.html truncate=true %} - {% endfor %} -
diff --git a/docs/css/main.scss b/docs/css/main.scss deleted file mode 100644 index 88ab4e811..000000000 --- a/docs/css/main.scss +++ /dev/null @@ -1,159 +0,0 @@ ---- -# Only the main Sass file needs front matter (the dashes are enough) ---- -@charset "utf-8"; - -@font-face { - font-family: 'Lato'; - src: url("{{ '/static/fonts/LatoLatin-Italic.woff2' }}") format('woff2'), - url("{{ '/static/fonts/LatoLatin-Italic.woff' }}") format('woff'); - font-weight: normal; - font-style: italic; -} - -@font-face { - font-family: 'Lato'; - src: url("{{ '/static/fonts/LatoLatin-Black.woff2' }}") format('woff2'), - url("{{ '/static/fonts/LatoLatin-Black.woff' }}") format('woff'); - font-weight: 900; - font-style: normal; -} - -@font-face { - font-family: 'Lato'; - src: url("{{ '/static/fonts/LatoLatin-BlackItalic.woff2' }}") format('woff2'), - url("{{ '/static/fonts/LatoLatin-BlackItalic.woff' }}") format('woff'); - font-weight: 900; - font-style: italic; -} - -@font-face { - font-family: 'Lato'; - src: url("{{ '/static/fonts/LatoLatin-Light.woff2' }}") format('woff2'), - url("{{ '/static/fonts/LatoLatin-Light.woff' }}") format('woff'); - font-weight: 300; - font-style: normal; -} - -@font-face { - font-family: 'Lato'; - src: url("{{ '/static/fonts/LatoLatin-Regular.woff2' }}") format('woff2'), - url("{{ '/static/fonts/LatoLatin-Regular.woff' }}") format('woff'); - font-weight: normal; - font-style: normal; -} - -// Our variables -$base-font-family: 'Lato', Calibri, Arial, sans-serif; -$header-font-family: 'Lato', 'Helvetica Neue', Arial, sans-serif; -$base-font-size: 18px; -$small-font-size: $base-font-size * 0.875; -$base-line-height: 1.4em; - -$spacing-unit: 12px; - -// Two configured colors (see _config.yml) -$primary-bg: {{ site.color.primary }}; -$secondary-bg: {{ site.color.secondary }}; - -// $primary-bg overlays -{% if site.color.primary-overlay == 'light' %} -$primary-overlay: darken($primary-bg, 70%); -$primary-overlay-special: darken($primary-bg, 40%); -{% else %} -$primary-overlay: #fff; -$primary-overlay-special: lighten($primary-bg, 30%); -{% endif %} - -// $secondary-bg overlays -{% if site.color.secondary-overlay == 'light' %} -$text: #393939; -$sidenav: darken($secondary-bg, 20%); -$sidenav-text: $text; -$sidenav-overlay: darken($sidenav, 10%); -$sidenav-active: lighten($sidenav, 10%); -{% else %} -$text: #fff; -$sidenav: lighten($secondary-bg, 20%); -$sidenav-text: $text; -$sidenav-overlay: lighten($sidenav, 10%); -$sidenav-active: darken($sidenav, 10%); -{% endif %} - -$code-bg: #002b36; - -$header-height: 34px; -$header-ptop: 10px; -$header-pbot: 8px; - -// Width of the content area -$content-width: 900px; - -// Table setting variables -$lightergrey: #F8F8F8; -$greyish: #E8E8E8; -$lightgrey: #B0B0B0; -$green: #2db04b; - -// Using media queries with like this: -// @include media-query($on-palm) { -// .wrapper { -// padding-right: $spacing-unit / 2; -// padding-left: $spacing-unit / 2; -// } -// } -@mixin media-query($device) { - @media screen and (max-width: $device) { - @content; - } -} - - - -// Import partials from `sass_dir` (defaults to `_sass`) -@import - "reset", - "base", - "header", - "search", - "syntax-highlighting", - "promo", - "buttons", - "gridBlock", - "poweredby", - "footer", - "react_header_nav", - "react_docs_nav", - "tables", - "blog" -; - -// Anchor links -// http://ben.balter.com/2014/03/13/pages-anchor-links/ -.header-link { - position: absolute; - margin-left: 0.2em; - opacity: 0; - - -webkit-transition: opacity 0.2s ease-in-out 0.1s; - -moz-transition: opacity 0.2s ease-in-out 0.1s; - -ms-transition: opacity 0.2s ease-in-out 0.1s; -} - -h2:hover .header-link, -h3:hover .header-link, -h4:hover .header-link, -h5:hover .header-link, -h6:hover .header-link { - opacity: 1; -} - -/* Social Banner */ -.socialBanner { - font-weight: bold; - font-size: 20px; - padding: 20px; - max-width: 768px; - margin: 0 auto; - text-align: center; - } diff --git a/docs/doc-type-examples/2016-04-07-blog-post-example.md b/docs/doc-type-examples/2016-04-07-blog-post-example.md deleted file mode 100644 index ef954d63a..000000000 --- a/docs/doc-type-examples/2016-04-07-blog-post-example.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: Blog Post Example -layout: post -author: exampleauthor -category: blog ---- - -Any local blog posts would go in the `_posts` directory. - -This is an example blog post introduction, try to keep it short and about a paragraph long, to encourage people to click through to read the entire post. - - - -Everything below the `` tag will only show on the actual blog post page, not on the `/blog/` index. - -Author is defined in `_data/authors.yml` - - -## No posts? - -If you have no blog for your site, you can remove the entire `_posts` folder. Otherwise add markdown files in here. See CONTRIBUTING.md for details. diff --git a/docs/doc-type-examples/docs-hello-world.md b/docs/doc-type-examples/docs-hello-world.md deleted file mode 100644 index c7094ba5a..000000000 --- a/docs/doc-type-examples/docs-hello-world.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -docid: hello-world -title: Hello, World! -layout: docs -permalink: /docs/hello-world.html ---- - -Any local docs would go in the `_docs` directory. - -## No documentation? - -If you have no documentation for your site, you can remove the entire `_docs` folder. Otherwise add markdown files in here. See CONTRIBUTING.md for details. diff --git a/docs/doc-type-examples/top-level-example.md b/docs/doc-type-examples/top-level-example.md deleted file mode 100644 index 67b1fa711..000000000 --- a/docs/doc-type-examples/top-level-example.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -layout: top-level -title: Support Example -id: top-level-example -category: top-level ---- - -This is a static page disconnected from the blog or docs collections that can be added at a top-level (i.e., the same level as `index.md`). diff --git a/docs/docs/index.html b/docs/docs/index.html deleted file mode 100644 index fa6ec8b5a..000000000 --- a/docs/docs/index.html +++ /dev/null @@ -1,6 +0,0 @@ ---- -id: docs -title: Docs -layout: redirect -destination: getting-started.html ---- diff --git a/docs/feed.xml b/docs/feed.xml deleted file mode 100644 index 725f00566..000000000 --- a/docs/feed.xml +++ /dev/null @@ -1,30 +0,0 @@ ---- -layout: null ---- - - - - {{ site.title | xml_escape }} - {{ site.description | xml_escape }} - https://rocksdb.org/feed.xml - - {{ site.time | date_to_rfc822 }} - {{ site.time | date_to_rfc822 }} - Jekyll v{{ jekyll.version }} - {% for post in site.posts limit:10 %} - - {{ post.title | xml_escape }} - {{ post.content | xml_escape }} - {{ post.date | date_to_rfc822 }} - {{ post.url | absolute_url }} - {{ post.url | absolute_url }} - {% for tag in post.tags %} - {{ tag | xml_escape }} - {% endfor %} - {% for cat in post.categories %} - {{ cat | xml_escape }} - {% endfor %} - - {% endfor %} - - diff --git a/docs/index.md b/docs/index.md deleted file mode 100644 index 2b9570d23..000000000 --- a/docs/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -layout: home -title: RocksDB | A persistent key-value store -id: home ---- - -## Features - -{% include content/gridblocks.html data_source=site.data.features align="center" %} diff --git a/docs/static/favicon.png b/docs/static/favicon.png deleted file mode 100644 index 7f668f38f..000000000 Binary files a/docs/static/favicon.png and /dev/null differ diff --git a/docs/static/fonts/LatoLatin-Black.woff b/docs/static/fonts/LatoLatin-Black.woff deleted file mode 100644 index d1e2579bf..000000000 Binary files a/docs/static/fonts/LatoLatin-Black.woff and /dev/null differ diff --git a/docs/static/fonts/LatoLatin-Black.woff2 b/docs/static/fonts/LatoLatin-Black.woff2 deleted file mode 100644 index 4127b4d0b..000000000 Binary files a/docs/static/fonts/LatoLatin-Black.woff2 and /dev/null differ diff --git a/docs/static/fonts/LatoLatin-BlackItalic.woff b/docs/static/fonts/LatoLatin-BlackItalic.woff deleted file mode 100644 index 142c1c9c4..000000000 Binary files a/docs/static/fonts/LatoLatin-BlackItalic.woff and /dev/null differ diff --git a/docs/static/fonts/LatoLatin-BlackItalic.woff2 b/docs/static/fonts/LatoLatin-BlackItalic.woff2 deleted file mode 100644 index e9862e690..000000000 Binary files a/docs/static/fonts/LatoLatin-BlackItalic.woff2 and /dev/null differ diff --git a/docs/static/fonts/LatoLatin-Italic.woff b/docs/static/fonts/LatoLatin-Italic.woff deleted file mode 100644 index d8cf84c8b..000000000 Binary files a/docs/static/fonts/LatoLatin-Italic.woff and /dev/null differ diff --git a/docs/static/fonts/LatoLatin-Italic.woff2 b/docs/static/fonts/LatoLatin-Italic.woff2 deleted file mode 100644 index aaa5a35c3..000000000 Binary files a/docs/static/fonts/LatoLatin-Italic.woff2 and /dev/null differ diff --git a/docs/static/fonts/LatoLatin-Light.woff b/docs/static/fonts/LatoLatin-Light.woff deleted file mode 100644 index e7d4278cc..000000000 Binary files a/docs/static/fonts/LatoLatin-Light.woff and /dev/null differ diff --git a/docs/static/fonts/LatoLatin-Light.woff2 b/docs/static/fonts/LatoLatin-Light.woff2 deleted file mode 100644 index b6d028836..000000000 Binary files a/docs/static/fonts/LatoLatin-Light.woff2 and /dev/null differ diff --git a/docs/static/fonts/LatoLatin-Regular.woff b/docs/static/fonts/LatoLatin-Regular.woff deleted file mode 100644 index bf73a6d9f..000000000 Binary files a/docs/static/fonts/LatoLatin-Regular.woff and /dev/null differ diff --git a/docs/static/fonts/LatoLatin-Regular.woff2 b/docs/static/fonts/LatoLatin-Regular.woff2 deleted file mode 100644 index a4d084bfb..000000000 Binary files a/docs/static/fonts/LatoLatin-Regular.woff2 and /dev/null differ diff --git a/docs/static/images/Resize-of-20140327_200754-300x225.jpg b/docs/static/images/Resize-of-20140327_200754-300x225.jpg deleted file mode 100644 index 9f9315101..000000000 Binary files a/docs/static/images/Resize-of-20140327_200754-300x225.jpg and /dev/null differ diff --git a/docs/static/images/align-compaction-output/compaction_output_file_size_compare.png b/docs/static/images/align-compaction-output/compaction_output_file_size_compare.png deleted file mode 100644 index 2ce86fb28..000000000 Binary files a/docs/static/images/align-compaction-output/compaction_output_file_size_compare.png and /dev/null differ diff --git a/docs/static/images/align-compaction-output/file_cut_align.png b/docs/static/images/align-compaction-output/file_cut_align.png deleted file mode 100644 index bc3e8990e..000000000 Binary files a/docs/static/images/align-compaction-output/file_cut_align.png and /dev/null differ diff --git a/docs/static/images/align-compaction-output/file_cut_normal.png b/docs/static/images/align-compaction-output/file_cut_normal.png deleted file mode 100644 index e17133ed2..000000000 Binary files a/docs/static/images/align-compaction-output/file_cut_normal.png and /dev/null differ diff --git a/docs/static/images/align-compaction-output/file_cut_trival_move.png b/docs/static/images/align-compaction-output/file_cut_trival_move.png deleted file mode 100644 index 7aca9aeb5..000000000 Binary files a/docs/static/images/align-compaction-output/file_cut_trival_move.png and /dev/null differ diff --git a/docs/static/images/align-compaction-output/file_size_compare.png b/docs/static/images/align-compaction-output/file_size_compare.png deleted file mode 100644 index 5f39a806f..000000000 Binary files a/docs/static/images/align-compaction-output/file_size_compare.png and /dev/null differ diff --git a/docs/static/images/align-compaction-output/write_amp_compare.png b/docs/static/images/align-compaction-output/write_amp_compare.png deleted file mode 100644 index 8b20f2ae3..000000000 Binary files a/docs/static/images/align-compaction-output/write_amp_compare.png and /dev/null differ diff --git a/docs/static/images/asynchronous-io/mget_async.png b/docs/static/images/asynchronous-io/mget_async.png deleted file mode 100644 index 79d1a851f..000000000 Binary files a/docs/static/images/asynchronous-io/mget_async.png and /dev/null differ diff --git a/docs/static/images/asynchronous-io/scan_async.png b/docs/static/images/asynchronous-io/scan_async.png deleted file mode 100644 index ee84189f4..000000000 Binary files a/docs/static/images/asynchronous-io/scan_async.png and /dev/null differ diff --git a/docs/static/images/binaryseek.png b/docs/static/images/binaryseek.png deleted file mode 100644 index 0e213f048..000000000 Binary files a/docs/static/images/binaryseek.png and /dev/null differ diff --git a/docs/static/images/bloom_fp_vs_bpk.png b/docs/static/images/bloom_fp_vs_bpk.png deleted file mode 100644 index e83f4d085..000000000 Binary files a/docs/static/images/bloom_fp_vs_bpk.png and /dev/null differ diff --git a/docs/static/images/compaction/full-range.png b/docs/static/images/compaction/full-range.png deleted file mode 100644 index 5b2c9fc61..000000000 Binary files a/docs/static/images/compaction/full-range.png and /dev/null differ diff --git a/docs/static/images/compaction/l0-l1-contend.png b/docs/static/images/compaction/l0-l1-contend.png deleted file mode 100644 index bcf8ec73a..000000000 Binary files a/docs/static/images/compaction/l0-l1-contend.png and /dev/null differ diff --git a/docs/static/images/compaction/l1-l2-contend.png b/docs/static/images/compaction/l1-l2-contend.png deleted file mode 100644 index 6dafbbbf2..000000000 Binary files a/docs/static/images/compaction/l1-l2-contend.png and /dev/null differ diff --git a/docs/static/images/compaction/part-range-old.png b/docs/static/images/compaction/part-range-old.png deleted file mode 100644 index 1cc723d13..000000000 Binary files a/docs/static/images/compaction/part-range-old.png and /dev/null differ diff --git a/docs/static/images/data-block-hash-index/block-format-binary-seek.png b/docs/static/images/data-block-hash-index/block-format-binary-seek.png deleted file mode 100644 index 0e213f048..000000000 Binary files a/docs/static/images/data-block-hash-index/block-format-binary-seek.png and /dev/null differ diff --git a/docs/static/images/data-block-hash-index/block-format-hash-index.png b/docs/static/images/data-block-hash-index/block-format-hash-index.png deleted file mode 100644 index accb8639e..000000000 Binary files a/docs/static/images/data-block-hash-index/block-format-hash-index.png and /dev/null differ diff --git a/docs/static/images/data-block-hash-index/hash-index-data-structure.png b/docs/static/images/data-block-hash-index/hash-index-data-structure.png deleted file mode 100644 index 9acc71d8e..000000000 Binary files a/docs/static/images/data-block-hash-index/hash-index-data-structure.png and /dev/null differ diff --git a/docs/static/images/data-block-hash-index/perf-cache-miss.png b/docs/static/images/data-block-hash-index/perf-cache-miss.png deleted file mode 100644 index 71788735d..000000000 Binary files a/docs/static/images/data-block-hash-index/perf-cache-miss.png and /dev/null differ diff --git a/docs/static/images/data-block-hash-index/perf-throughput.png b/docs/static/images/data-block-hash-index/perf-throughput.png deleted file mode 100644 index 54948af2f..000000000 Binary files a/docs/static/images/data-block-hash-index/perf-throughput.png and /dev/null differ diff --git a/docs/static/images/delrange/delrange_collapsed.png b/docs/static/images/delrange/delrange_collapsed.png deleted file mode 100644 index 52246c2c1..000000000 Binary files a/docs/static/images/delrange/delrange_collapsed.png and /dev/null differ diff --git a/docs/static/images/delrange/delrange_key_schema.png b/docs/static/images/delrange/delrange_key_schema.png deleted file mode 100644 index 0a14d4a3a..000000000 Binary files a/docs/static/images/delrange/delrange_key_schema.png and /dev/null differ diff --git a/docs/static/images/delrange/delrange_sst_blocks.png b/docs/static/images/delrange/delrange_sst_blocks.png deleted file mode 100644 index 6003e42ae..000000000 Binary files a/docs/static/images/delrange/delrange_sst_blocks.png and /dev/null differ diff --git a/docs/static/images/delrange/delrange_uncollapsed.png b/docs/static/images/delrange/delrange_uncollapsed.png deleted file mode 100644 index 39c7097af..000000000 Binary files a/docs/static/images/delrange/delrange_uncollapsed.png and /dev/null differ diff --git a/docs/static/images/delrange/delrange_write_path.png b/docs/static/images/delrange/delrange_write_path.png deleted file mode 100644 index 229dfb349..000000000 Binary files a/docs/static/images/delrange/delrange_write_path.png and /dev/null differ diff --git a/docs/static/images/dictcmp/dictcmp_raw_sampled.png b/docs/static/images/dictcmp/dictcmp_raw_sampled.png deleted file mode 100644 index 2eb6463c2..000000000 Binary files a/docs/static/images/dictcmp/dictcmp_raw_sampled.png and /dev/null differ diff --git a/docs/static/images/dictcmp/dictcmp_sst_blocks.png b/docs/static/images/dictcmp/dictcmp_sst_blocks.png deleted file mode 100644 index 551860b2e..000000000 Binary files a/docs/static/images/dictcmp/dictcmp_sst_blocks.png and /dev/null differ diff --git a/docs/static/images/dictcmp/dictcmp_zstd_trained.png b/docs/static/images/dictcmp/dictcmp_zstd_trained.png deleted file mode 100644 index 966c7fe0f..000000000 Binary files a/docs/static/images/dictcmp/dictcmp_zstd_trained.png and /dev/null differ diff --git a/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_Legacy_Vs_Integrated.png b/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_Legacy_Vs_Integrated.png deleted file mode 100644 index 7215390cb..000000000 Binary files a/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_Legacy_Vs_Integrated.png and /dev/null differ diff --git a/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_RW_RO_Perf.png b/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_RW_RO_Perf.png deleted file mode 100644 index f412ee60f..000000000 Binary files a/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_RW_RO_Perf.png and /dev/null differ diff --git a/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_Write_Amp.png b/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_Write_Amp.png deleted file mode 100644 index 19f40b035..000000000 Binary files a/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_Write_Amp.png and /dev/null differ diff --git a/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_Write_Perf.png b/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_Write_Perf.png deleted file mode 100644 index a1d43da0c..000000000 Binary files a/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_Write_Perf.png and /dev/null differ diff --git a/docs/static/images/kv-checksum/Memtable-entry.png b/docs/static/images/kv-checksum/Memtable-entry.png deleted file mode 100644 index 31eb7278a..000000000 Binary files a/docs/static/images/kv-checksum/Memtable-entry.png and /dev/null differ diff --git a/docs/static/images/kv-checksum/Memtable-write.png b/docs/static/images/kv-checksum/Memtable-write.png deleted file mode 100644 index 32f526fdf..000000000 Binary files a/docs/static/images/kv-checksum/Memtable-write.png and /dev/null differ diff --git a/docs/static/images/kv-checksum/ProtInfo-Memtable.png b/docs/static/images/kv-checksum/ProtInfo-Memtable.png deleted file mode 100644 index c2e21bb15..000000000 Binary files a/docs/static/images/kv-checksum/ProtInfo-Memtable.png and /dev/null differ diff --git a/docs/static/images/kv-checksum/ProtInfo-Writebatch-to-Memtable.png b/docs/static/images/kv-checksum/ProtInfo-Writebatch-to-Memtable.png deleted file mode 100644 index 91ad93b2b..000000000 Binary files a/docs/static/images/kv-checksum/ProtInfo-Writebatch-to-Memtable.png and /dev/null differ diff --git a/docs/static/images/kv-checksum/ProtInfo-Writebatch.png b/docs/static/images/kv-checksum/ProtInfo-Writebatch.png deleted file mode 100644 index b3cd5315b..000000000 Binary files a/docs/static/images/kv-checksum/ProtInfo-Writebatch.png and /dev/null differ diff --git a/docs/static/images/kv-checksum/WAL-fragment.png b/docs/static/images/kv-checksum/WAL-fragment.png deleted file mode 100644 index 9bbacca0d..000000000 Binary files a/docs/static/images/kv-checksum/WAL-fragment.png and /dev/null differ diff --git a/docs/static/images/kv-checksum/WAL-read.png b/docs/static/images/kv-checksum/WAL-read.png deleted file mode 100644 index e130733d3..000000000 Binary files a/docs/static/images/kv-checksum/WAL-read.png and /dev/null differ diff --git a/docs/static/images/kv-checksum/WAL-write.png b/docs/static/images/kv-checksum/WAL-write.png deleted file mode 100644 index fb9fd8fd5..000000000 Binary files a/docs/static/images/kv-checksum/WAL-write.png and /dev/null differ diff --git a/docs/static/images/kv-checksum/Write-batch.png b/docs/static/images/kv-checksum/Write-batch.png deleted file mode 100644 index 121d42555..000000000 Binary files a/docs/static/images/kv-checksum/Write-batch.png and /dev/null differ diff --git a/docs/static/images/kv-checksum/Writebatch-write.png b/docs/static/images/kv-checksum/Writebatch-write.png deleted file mode 100644 index b10ab35ef..000000000 Binary files a/docs/static/images/kv-checksum/Writebatch-write.png and /dev/null differ diff --git a/docs/static/images/lost-buffered-write-recovery/angry-cat.png b/docs/static/images/lost-buffered-write-recovery/angry-cat.png deleted file mode 100644 index e956fb6e0..000000000 Binary files a/docs/static/images/lost-buffered-write-recovery/angry-cat.png and /dev/null differ diff --git a/docs/static/images/lost-buffered-write-recovery/basic-setup.png b/docs/static/images/lost-buffered-write-recovery/basic-setup.png deleted file mode 100644 index f79831a29..000000000 Binary files a/docs/static/images/lost-buffered-write-recovery/basic-setup.png and /dev/null differ diff --git a/docs/static/images/lost-buffered-write-recovery/happy-cat.png b/docs/static/images/lost-buffered-write-recovery/happy-cat.png deleted file mode 100644 index 155b5341d..000000000 Binary files a/docs/static/images/lost-buffered-write-recovery/happy-cat.png and /dev/null differ diff --git a/docs/static/images/lost-buffered-write-recovery/replay-extension.png b/docs/static/images/lost-buffered-write-recovery/replay-extension.png deleted file mode 100644 index 5bedd949f..000000000 Binary files a/docs/static/images/lost-buffered-write-recovery/replay-extension.png and /dev/null differ diff --git a/docs/static/images/lost-buffered-write-recovery/test-fs-writable-file.png b/docs/static/images/lost-buffered-write-recovery/test-fs-writable-file.png deleted file mode 100644 index 58db8e2a8..000000000 Binary files a/docs/static/images/lost-buffered-write-recovery/test-fs-writable-file.png and /dev/null differ diff --git a/docs/static/images/lost-buffered-write-recovery/trace-extension.png b/docs/static/images/lost-buffered-write-recovery/trace-extension.png deleted file mode 100644 index f782955b6..000000000 Binary files a/docs/static/images/lost-buffered-write-recovery/trace-extension.png and /dev/null differ diff --git a/docs/static/images/pcache-blockindex.jpg b/docs/static/images/pcache-blockindex.jpg deleted file mode 100644 index 9c18bde93..000000000 Binary files a/docs/static/images/pcache-blockindex.jpg and /dev/null differ diff --git a/docs/static/images/pcache-fileindex.jpg b/docs/static/images/pcache-fileindex.jpg deleted file mode 100644 index 51f4e095c..000000000 Binary files a/docs/static/images/pcache-fileindex.jpg and /dev/null differ diff --git a/docs/static/images/pcache-filelayout.jpg b/docs/static/images/pcache-filelayout.jpg deleted file mode 100644 index 771ee60c1..000000000 Binary files a/docs/static/images/pcache-filelayout.jpg and /dev/null differ diff --git a/docs/static/images/pcache-readiopath.jpg b/docs/static/images/pcache-readiopath.jpg deleted file mode 100644 index 4993f0072..000000000 Binary files a/docs/static/images/pcache-readiopath.jpg and /dev/null differ diff --git a/docs/static/images/pcache-tieredstorage.jpg b/docs/static/images/pcache-tieredstorage.jpg deleted file mode 100644 index c362a2d69..000000000 Binary files a/docs/static/images/pcache-tieredstorage.jpg and /dev/null differ diff --git a/docs/static/images/pcache-writeiopath.jpg b/docs/static/images/pcache-writeiopath.jpg deleted file mode 100644 index 561b55181..000000000 Binary files a/docs/static/images/pcache-writeiopath.jpg and /dev/null differ diff --git a/docs/static/images/promo-adapt.svg b/docs/static/images/promo-adapt.svg deleted file mode 100644 index 7cd44434d..000000000 --- a/docs/static/images/promo-adapt.svg +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - diff --git a/docs/static/images/promo-flash.svg b/docs/static/images/promo-flash.svg deleted file mode 100644 index 79810c30a..000000000 --- a/docs/static/images/promo-flash.svg +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - - -]> - - - - - - - - - - - diff --git a/docs/static/images/promo-operations.svg b/docs/static/images/promo-operations.svg deleted file mode 100644 index 3036294ab..000000000 --- a/docs/static/images/promo-operations.svg +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - diff --git a/docs/static/images/promo-performance.svg b/docs/static/images/promo-performance.svg deleted file mode 100644 index be8a10120..000000000 --- a/docs/static/images/promo-performance.svg +++ /dev/null @@ -1,134 +0,0 @@ - - - - - - - - - - -netalloy chequered flag - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/static/images/rate-limiter/auto-tuned-write-KBps-series.png b/docs/static/images/rate-limiter/auto-tuned-write-KBps-series.png deleted file mode 100644 index b4b24849c..000000000 Binary files a/docs/static/images/rate-limiter/auto-tuned-write-KBps-series.png and /dev/null differ diff --git a/docs/static/images/rate-limiter/write-KBps-cdf.png b/docs/static/images/rate-limiter/write-KBps-cdf.png deleted file mode 100644 index 742f985bf..000000000 Binary files a/docs/static/images/rate-limiter/write-KBps-cdf.png and /dev/null differ diff --git a/docs/static/images/rate-limiter/write-KBps-series.png b/docs/static/images/rate-limiter/write-KBps-series.png deleted file mode 100644 index c7bdcb95a..000000000 Binary files a/docs/static/images/rate-limiter/write-KBps-series.png and /dev/null differ diff --git a/docs/static/images/rocksdb-secondary-cache/Mixgraph_hit_rate.png b/docs/static/images/rocksdb-secondary-cache/Mixgraph_hit_rate.png deleted file mode 100644 index 10fa73728..000000000 Binary files a/docs/static/images/rocksdb-secondary-cache/Mixgraph_hit_rate.png and /dev/null differ diff --git a/docs/static/images/rocksdb-secondary-cache/Mixgraph_throughput.png b/docs/static/images/rocksdb-secondary-cache/Mixgraph_throughput.png deleted file mode 100644 index df2e333f9..000000000 Binary files a/docs/static/images/rocksdb-secondary-cache/Mixgraph_throughput.png and /dev/null differ diff --git a/docs/static/images/rocksdb-secondary-cache/arch_diagram.png b/docs/static/images/rocksdb-secondary-cache/arch_diagram.png deleted file mode 100644 index 696a376ed..000000000 Binary files a/docs/static/images/rocksdb-secondary-cache/arch_diagram.png and /dev/null differ diff --git a/docs/static/images/rocksdb-secondary-cache/insert_flow.png b/docs/static/images/rocksdb-secondary-cache/insert_flow.png deleted file mode 100644 index f02e7e4c5..000000000 Binary files a/docs/static/images/rocksdb-secondary-cache/insert_flow.png and /dev/null differ diff --git a/docs/static/images/rocksdb-secondary-cache/lookup_flow.png b/docs/static/images/rocksdb-secondary-cache/lookup_flow.png deleted file mode 100644 index 2b3c70edb..000000000 Binary files a/docs/static/images/rocksdb-secondary-cache/lookup_flow.png and /dev/null differ diff --git a/docs/static/images/time-aware-tiered-storage/compaction_moving_up_conflict.png b/docs/static/images/time-aware-tiered-storage/compaction_moving_up_conflict.png deleted file mode 100644 index 8feaef203..000000000 Binary files a/docs/static/images/time-aware-tiered-storage/compaction_moving_up_conflict.png and /dev/null differ diff --git a/docs/static/images/time-aware-tiered-storage/per_key_placement_compaction.png b/docs/static/images/time-aware-tiered-storage/per_key_placement_compaction.png deleted file mode 100644 index 0b232d1fe..000000000 Binary files a/docs/static/images/time-aware-tiered-storage/per_key_placement_compaction.png and /dev/null differ diff --git a/docs/static/images/time-aware-tiered-storage/tiered_storage_design.png b/docs/static/images/time-aware-tiered-storage/tiered_storage_design.png deleted file mode 100644 index 7e5158c18..000000000 Binary files a/docs/static/images/time-aware-tiered-storage/tiered_storage_design.png and /dev/null differ diff --git a/docs/static/images/time-aware-tiered-storage/tiered_storage_overview.png b/docs/static/images/time-aware-tiered-storage/tiered_storage_overview.png deleted file mode 100644 index 7d115e667..000000000 Binary files a/docs/static/images/time-aware-tiered-storage/tiered_storage_overview.png and /dev/null differ diff --git a/docs/static/images/time-aware-tiered-storage/tiered_storage_problem.png b/docs/static/images/time-aware-tiered-storage/tiered_storage_problem.png deleted file mode 100644 index dbe2ae532..000000000 Binary files a/docs/static/images/time-aware-tiered-storage/tiered_storage_problem.png and /dev/null differ diff --git a/docs/static/images/tree_example1.png b/docs/static/images/tree_example1.png deleted file mode 100644 index 9f725860c..000000000 Binary files a/docs/static/images/tree_example1.png and /dev/null differ diff --git a/docs/static/logo.svg b/docs/static/logo.svg deleted file mode 100644 index e6e1e8afa..000000000 --- a/docs/static/logo.svg +++ /dev/null @@ -1,76 +0,0 @@ - - - - - - - - - - - - - - - - diff --git a/docs/static/og_image.png b/docs/static/og_image.png deleted file mode 100644 index 4e2759e61..000000000 Binary files a/docs/static/og_image.png and /dev/null differ diff --git a/env/env_basic_test.cc b/env/env_basic_test.cc deleted file mode 100644 index 11b07509c..000000000 --- a/env/env_basic_test.cc +++ /dev/null @@ -1,397 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -#include -#include -#include -#include - -#include "env/mock_env.h" -#include "file/file_util.h" -#include "rocksdb/convenience.h" -#include "rocksdb/env.h" -#include "rocksdb/env_encryption.h" -#include "test_util/testharness.h" - -namespace ROCKSDB_NAMESPACE { -namespace { -using CreateEnvFunc = Env*(); - -// These functions are used to create the various environments under which this -// test can execute. These functions are used to allow the test cases to be -// created without the Env being initialized, thereby eliminating a potential -// static initialization fiasco/race condition when attempting to get a -// custom/configured env prior to main being invoked. - -static Env* GetDefaultEnv() { return Env::Default(); } - -static Env* GetMockEnv() { - static std::unique_ptr mock_env(MockEnv::Create(Env::Default())); - return mock_env.get(); -} -static Env* NewTestEncryptedEnv(Env* base, const std::string& provider_id) { - ConfigOptions config_opts; - config_opts.invoke_prepare_options = false; - - std::shared_ptr provider; - EXPECT_OK(EncryptionProvider::CreateFromString(config_opts, provider_id, - &provider)); - return NewEncryptedEnv(base, provider); -} - -static Env* GetCtrEncryptedEnv() { - static std::unique_ptr ctr_encrypt_env( - NewTestEncryptedEnv(Env::Default(), "CTR://test")); - return ctr_encrypt_env.get(); -} - -static Env* GetMemoryEnv() { - static std::unique_ptr mem_env(NewMemEnv(Env::Default())); - return mem_env.get(); -} - -static Env* GetTestEnv() { - static std::shared_ptr env_guard; - static Env* custom_env = nullptr; - if (custom_env == nullptr) { - const char* uri = getenv("TEST_ENV_URI"); - if (uri != nullptr) { - EXPECT_OK(Env::CreateFromUri(ConfigOptions(), uri, "", &custom_env, - &env_guard)); - } - } - EXPECT_NE(custom_env, nullptr); - return custom_env; -} - -static Env* GetTestFS() { - static std::shared_ptr fs_env_guard; - static Env* fs_env = nullptr; - if (fs_env == nullptr) { - const char* uri = getenv("TEST_FS_URI"); - if (uri != nullptr) { - EXPECT_OK( - Env::CreateFromUri(ConfigOptions(), uri, "", &fs_env, &fs_env_guard)); - } - } - EXPECT_NE(fs_env, nullptr); - return fs_env; -} - -} // namespace -class EnvBasicTestWithParam - : public testing::Test, - public ::testing::WithParamInterface { - public: - Env* env_; - const EnvOptions soptions_; - std::string test_dir_; - - EnvBasicTestWithParam() : env_(GetParam()()) { - test_dir_ = test::PerThreadDBPath(env_, "env_basic_test"); - } - - void SetUp() override { ASSERT_OK(env_->CreateDirIfMissing(test_dir_)); } - - void TearDown() override { ASSERT_OK(DestroyDir(env_, test_dir_)); } -}; - -class EnvMoreTestWithParam : public EnvBasicTestWithParam {}; - -INSTANTIATE_TEST_CASE_P(EnvDefault, EnvBasicTestWithParam, - ::testing::Values(&GetDefaultEnv)); -INSTANTIATE_TEST_CASE_P(EnvDefault, EnvMoreTestWithParam, - ::testing::Values(&GetDefaultEnv)); - -INSTANTIATE_TEST_CASE_P(MockEnv, EnvBasicTestWithParam, - ::testing::Values(&GetMockEnv)); - -// next statements run env test against default encryption code. -INSTANTIATE_TEST_CASE_P(EncryptedEnv, EnvBasicTestWithParam, - ::testing::Values(&GetCtrEncryptedEnv)); -INSTANTIATE_TEST_CASE_P(EncryptedEnv, EnvMoreTestWithParam, - ::testing::Values(&GetCtrEncryptedEnv)); - -INSTANTIATE_TEST_CASE_P(MemEnv, EnvBasicTestWithParam, - ::testing::Values(&GetMemoryEnv)); - -namespace { - -// Returns a vector of 0 or 1 Env*, depending whether an Env is registered for -// TEST_ENV_URI. -// -// The purpose of returning an empty vector (instead of nullptr) is that gtest -// ValuesIn() will skip running tests when given an empty collection. -std::vector GetCustomEnvs() { - std::vector res; - const char* uri = getenv("TEST_ENV_URI"); - if (uri != nullptr) { - res.push_back(&GetTestEnv); - } - uri = getenv("TEST_FS_URI"); - if (uri != nullptr) { - res.push_back(&GetTestFS); - } - return res; -} - -} // anonymous namespace - -INSTANTIATE_TEST_CASE_P(CustomEnv, EnvBasicTestWithParam, - ::testing::ValuesIn(GetCustomEnvs())); - -INSTANTIATE_TEST_CASE_P(CustomEnv, EnvMoreTestWithParam, - ::testing::ValuesIn(GetCustomEnvs())); - -TEST_P(EnvBasicTestWithParam, Basics) { - uint64_t file_size; - std::unique_ptr writable_file; - std::vector children; - - // Check that the directory is empty. - ASSERT_EQ(Status::NotFound(), env_->FileExists(test_dir_ + "/non_existent")); - ASSERT_TRUE(!env_->GetFileSize(test_dir_ + "/non_existent", &file_size).ok()); - ASSERT_OK(env_->GetChildren(test_dir_, &children)); - ASSERT_EQ(0U, children.size()); - - // Create a file. - ASSERT_OK(env_->NewWritableFile(test_dir_ + "/f", &writable_file, soptions_)); - ASSERT_OK(writable_file->Close()); - writable_file.reset(); - - // Check that the file exists. - ASSERT_OK(env_->FileExists(test_dir_ + "/f")); - ASSERT_OK(env_->GetFileSize(test_dir_ + "/f", &file_size)); - ASSERT_EQ(0U, file_size); - ASSERT_OK(env_->GetChildren(test_dir_, &children)); - ASSERT_EQ(1U, children.size()); - ASSERT_EQ("f", children[0]); - ASSERT_OK(env_->DeleteFile(test_dir_ + "/f")); - - // Write to the file. - ASSERT_OK( - env_->NewWritableFile(test_dir_ + "/f1", &writable_file, soptions_)); - ASSERT_OK(writable_file->Append("abc")); - ASSERT_OK(writable_file->Close()); - writable_file.reset(); - ASSERT_OK( - env_->NewWritableFile(test_dir_ + "/f2", &writable_file, soptions_)); - ASSERT_OK(writable_file->Close()); - writable_file.reset(); - - // Check for expected size. - ASSERT_OK(env_->GetFileSize(test_dir_ + "/f1", &file_size)); - ASSERT_EQ(3U, file_size); - - // Check that renaming works. - ASSERT_TRUE( - !env_->RenameFile(test_dir_ + "/non_existent", test_dir_ + "/g").ok()); - ASSERT_OK(env_->RenameFile(test_dir_ + "/f1", test_dir_ + "/g")); - ASSERT_EQ(Status::NotFound(), env_->FileExists(test_dir_ + "/f1")); - ASSERT_OK(env_->FileExists(test_dir_ + "/g")); - ASSERT_OK(env_->GetFileSize(test_dir_ + "/g", &file_size)); - ASSERT_EQ(3U, file_size); - - // Check that renaming overwriting works - ASSERT_OK(env_->RenameFile(test_dir_ + "/f2", test_dir_ + "/g")); - ASSERT_OK(env_->GetFileSize(test_dir_ + "/g", &file_size)); - ASSERT_EQ(0U, file_size); - - // Check that opening non-existent file fails. - std::unique_ptr seq_file; - std::unique_ptr rand_file; - ASSERT_TRUE(!env_->NewSequentialFile(test_dir_ + "/non_existent", &seq_file, - soptions_) - .ok()); - ASSERT_TRUE(!seq_file); - ASSERT_NOK(env_->NewRandomAccessFile(test_dir_ + "/non_existent", &rand_file, - soptions_)); - ASSERT_TRUE(!rand_file); - - // Check that deleting works. - ASSERT_NOK(env_->DeleteFile(test_dir_ + "/non_existent")); - ASSERT_OK(env_->DeleteFile(test_dir_ + "/g")); - ASSERT_EQ(Status::NotFound(), env_->FileExists(test_dir_ + "/g")); - ASSERT_OK(env_->GetChildren(test_dir_, &children)); - ASSERT_EQ(0U, children.size()); - Status s = env_->GetChildren(test_dir_ + "/non_existent", &children); - ASSERT_TRUE(s.IsNotFound()); -} - -TEST_P(EnvBasicTestWithParam, ReadWrite) { - std::unique_ptr writable_file; - std::unique_ptr seq_file; - std::unique_ptr rand_file; - Slice result; - char scratch[100]; - - ASSERT_OK(env_->NewWritableFile(test_dir_ + "/f", &writable_file, soptions_)); - ASSERT_OK(writable_file->Append("hello ")); - ASSERT_OK(writable_file->Append("world")); - ASSERT_OK(writable_file->Close()); - writable_file.reset(); - - // Read sequentially. - ASSERT_OK(env_->NewSequentialFile(test_dir_ + "/f", &seq_file, soptions_)); - ASSERT_OK(seq_file->Read(5, &result, scratch)); // Read "hello". - ASSERT_EQ(0, result.compare("hello")); - ASSERT_OK(seq_file->Skip(1)); - ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Read "world". - ASSERT_EQ(0, result.compare("world")); - ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Try reading past EOF. - ASSERT_EQ(0U, result.size()); - ASSERT_OK(seq_file->Skip(100)); // Try to skip past end of file. - ASSERT_OK(seq_file->Read(1000, &result, scratch)); - ASSERT_EQ(0U, result.size()); - - // Random reads. - ASSERT_OK(env_->NewRandomAccessFile(test_dir_ + "/f", &rand_file, soptions_)); - ASSERT_OK(rand_file->Read(6, 5, &result, scratch)); // Read "world". - ASSERT_EQ(0, result.compare("world")); - ASSERT_OK(rand_file->Read(0, 5, &result, scratch)); // Read "hello". - ASSERT_EQ(0, result.compare("hello")); - ASSERT_OK(rand_file->Read(10, 100, &result, scratch)); // Read "d". - ASSERT_EQ(0, result.compare("d")); - - // Too high offset. - ASSERT_TRUE(rand_file->Read(1000, 5, &result, scratch).ok()); -} - -TEST_P(EnvBasicTestWithParam, Misc) { - std::unique_ptr writable_file; - ASSERT_OK(env_->NewWritableFile(test_dir_ + "/b", &writable_file, soptions_)); - - // These are no-ops, but we test they return success. - ASSERT_OK(writable_file->Sync()); - ASSERT_OK(writable_file->Flush()); - ASSERT_OK(writable_file->Close()); - writable_file.reset(); -} - -TEST_P(EnvBasicTestWithParam, LargeWrite) { - const size_t kWriteSize = 300 * 1024; - char* scratch = new char[kWriteSize * 2]; - - std::string write_data; - for (size_t i = 0; i < kWriteSize; ++i) { - write_data.append(1, static_cast(i)); - } - - std::unique_ptr writable_file; - ASSERT_OK(env_->NewWritableFile(test_dir_ + "/f", &writable_file, soptions_)); - ASSERT_OK(writable_file->Append("foo")); - ASSERT_OK(writable_file->Append(write_data)); - ASSERT_OK(writable_file->Close()); - writable_file.reset(); - - std::unique_ptr seq_file; - Slice result; - ASSERT_OK(env_->NewSequentialFile(test_dir_ + "/f", &seq_file, soptions_)); - ASSERT_OK(seq_file->Read(3, &result, scratch)); // Read "foo". - ASSERT_EQ(0, result.compare("foo")); - - size_t read = 0; - std::string read_data; - while (read < kWriteSize) { - ASSERT_OK(seq_file->Read(kWriteSize - read, &result, scratch)); - read_data.append(result.data(), result.size()); - read += result.size(); - } - ASSERT_TRUE(write_data == read_data); - delete[] scratch; -} - -TEST_P(EnvMoreTestWithParam, GetModTime) { - ASSERT_OK(env_->CreateDirIfMissing(test_dir_ + "/dir1")); - uint64_t mtime1 = 0x0; - ASSERT_OK(env_->GetFileModificationTime(test_dir_ + "/dir1", &mtime1)); -} - -TEST_P(EnvMoreTestWithParam, MakeDir) { - ASSERT_OK(env_->CreateDir(test_dir_ + "/j")); - ASSERT_OK(env_->FileExists(test_dir_ + "/j")); - std::vector children; - ASSERT_OK(env_->GetChildren(test_dir_, &children)); - ASSERT_EQ(1U, children.size()); - // fail because file already exists - ASSERT_TRUE(!env_->CreateDir(test_dir_ + "/j").ok()); - ASSERT_OK(env_->CreateDirIfMissing(test_dir_ + "/j")); - ASSERT_OK(env_->DeleteDir(test_dir_ + "/j")); - ASSERT_EQ(Status::NotFound(), env_->FileExists(test_dir_ + "/j")); -} - -TEST_P(EnvMoreTestWithParam, GetChildren) { - // empty folder returns empty vector - std::vector children; - std::vector childAttr; - ASSERT_OK(env_->CreateDirIfMissing(test_dir_)); - ASSERT_OK(env_->GetChildren(test_dir_, &children)); - ASSERT_OK(env_->FileExists(test_dir_)); - ASSERT_OK(env_->GetChildrenFileAttributes(test_dir_, &childAttr)); - ASSERT_EQ(0U, children.size()); - ASSERT_EQ(0U, childAttr.size()); - - // folder with contents returns relative path to test dir - ASSERT_OK(env_->CreateDirIfMissing(test_dir_ + "/niu")); - ASSERT_OK(env_->CreateDirIfMissing(test_dir_ + "/you")); - ASSERT_OK(env_->CreateDirIfMissing(test_dir_ + "/guo")); - ASSERT_OK(env_->GetChildren(test_dir_, &children)); - ASSERT_OK(env_->GetChildrenFileAttributes(test_dir_, &childAttr)); - ASSERT_EQ(3U, children.size()); - ASSERT_EQ(3U, childAttr.size()); - for (auto each : children) { - env_->DeleteDir(test_dir_ + "/" + each).PermitUncheckedError(); - } // necessary for default POSIX env - - // non-exist directory returns IOError - ASSERT_OK(env_->DeleteDir(test_dir_)); - ASSERT_NOK(env_->FileExists(test_dir_)); - ASSERT_NOK(env_->GetChildren(test_dir_, &children)); - ASSERT_NOK(env_->GetChildrenFileAttributes(test_dir_, &childAttr)); - - // if dir is a file, returns IOError - ASSERT_OK(env_->CreateDir(test_dir_)); - std::unique_ptr writable_file; - ASSERT_OK( - env_->NewWritableFile(test_dir_ + "/file", &writable_file, soptions_)); - ASSERT_OK(writable_file->Close()); - writable_file.reset(); - ASSERT_NOK(env_->GetChildren(test_dir_ + "/file", &children)); - ASSERT_EQ(0U, children.size()); -} - -TEST_P(EnvMoreTestWithParam, GetChildrenIgnoresDotAndDotDot) { - auto* env = Env::Default(); - ASSERT_OK(env->CreateDirIfMissing(test_dir_)); - - // Create a single file - std::string path = test_dir_; - const EnvOptions soptions; -#ifdef OS_WIN - path.append("\\test_file"); -#else - path.append("/test_file"); -#endif - std::string data("test data"); - std::unique_ptr file; - ASSERT_OK(env->NewWritableFile(path, &file, soptions)); - ASSERT_OK(file->Append("test data")); - - // get the children - std::vector result; - ASSERT_OK(env->GetChildren(test_dir_, &result)); - - // expect only one file named `test_data`, i.e. no `.` or `..` names - ASSERT_EQ(result.size(), 1); - ASSERT_EQ(result.at(0), "test_file"); -} - -} // namespace ROCKSDB_NAMESPACE -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/env/env_test.cc b/env/env_test.cc deleted file mode 100644 index 2f748846b..000000000 --- a/env/env_test.cc +++ /dev/null @@ -1,3546 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef OS_WIN -#include -#endif - -#if defined(ROCKSDB_IOURING_PRESENT) -#include -#include -#endif - -#include - -#include -#include -#include -#include - -#ifdef OS_LINUX -#include -#include -#include -#include -#include -#endif - -#ifdef ROCKSDB_FALLOCATE_PRESENT -#include -#endif - -#include "db/db_impl/db_impl.h" -#include "env/emulated_clock.h" -#include "env/env_chroot.h" -#include "env/env_encryption_ctr.h" -#include "env/fs_readonly.h" -#include "env/mock_env.h" -#include "env/unique_id_gen.h" -#include "logging/log_buffer.h" -#include "logging/logging.h" -#include "options/options_helper.h" -#include "port/malloc.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/convenience.h" -#include "rocksdb/env.h" -#include "rocksdb/env_encryption.h" -#include "rocksdb/file_system.h" -#include "rocksdb/system_clock.h" -#include "rocksdb/utilities/object_registry.h" -#include "test_util/mock_time_env.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/coding.h" -#include "util/crc32c.h" -#include "util/mutexlock.h" -#include "util/random.h" -#include "util/string_util.h" -#include "utilities/counted_fs.h" -#include "utilities/env_timed.h" -#include "utilities/fault_injection_env.h" -#include "utilities/fault_injection_fs.h" - -namespace ROCKSDB_NAMESPACE { - -using port::kPageSize; - -static const int kDelayMicros = 100000; - -struct Deleter { - explicit Deleter(void (*fn)(void*)) : fn_(fn) {} - - void operator()(void* ptr) { - assert(fn_); - assert(ptr); - (*fn_)(ptr); - } - - void (*fn_)(void*); -}; - -extern "C" bool RocksDbIOUringEnable() { return true; } - -std::unique_ptr NewAligned(const size_t size, const char ch) { - char* ptr = nullptr; -#ifdef OS_WIN - if (nullptr == - (ptr = reinterpret_cast(_aligned_malloc(size, kPageSize)))) { - return std::unique_ptr(nullptr, Deleter(_aligned_free)); - } - std::unique_ptr uptr(ptr, Deleter(_aligned_free)); -#else - if (posix_memalign(reinterpret_cast(&ptr), kPageSize, size) != 0) { - return std::unique_ptr(nullptr, Deleter(free)); - } - std::unique_ptr uptr(ptr, Deleter(free)); -#endif - memset(uptr.get(), ch, size); - return uptr; -} - -class EnvPosixTest : public testing::Test { - private: - port::Mutex mu_; - std::string events_; - - public: - Env* env_; - bool direct_io_; - EnvPosixTest() : env_(Env::Default()), direct_io_(false) {} - ~EnvPosixTest() { - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency({}); - SyncPoint::GetInstance()->ClearAllCallBacks(); - } -}; - -class EnvPosixTestWithParam - : public EnvPosixTest, - public ::testing::WithParamInterface> { - public: - EnvPosixTestWithParam() { - std::pair param_pair = GetParam(); - env_ = param_pair.first; - direct_io_ = param_pair.second; - } - - void WaitThreadPoolsEmpty() { - // Wait until the thread pools are empty. - while (env_->GetThreadPoolQueueLen(Env::Priority::LOW) != 0) { - Env::Default()->SleepForMicroseconds(kDelayMicros); - } - while (env_->GetThreadPoolQueueLen(Env::Priority::HIGH) != 0) { - Env::Default()->SleepForMicroseconds(kDelayMicros); - } - } - - ~EnvPosixTestWithParam() override { WaitThreadPoolsEmpty(); } -}; - -static void SetBool(void* ptr) { - reinterpret_cast*>(ptr)->store(true); -} - -TEST_F(EnvPosixTest, DISABLED_RunImmediately) { - for (int pri = Env::BOTTOM; pri < Env::TOTAL; ++pri) { - std::atomic called(false); - env_->SetBackgroundThreads(1, static_cast(pri)); - env_->Schedule(&SetBool, &called, static_cast(pri)); - Env::Default()->SleepForMicroseconds(kDelayMicros); - ASSERT_TRUE(called.load()); - } -} - -TEST_F(EnvPosixTest, RunEventually) { - std::atomic called(false); - env_->StartThread(&SetBool, &called); - env_->WaitForJoin(); - ASSERT_TRUE(called.load()); -} - -#ifdef OS_WIN -TEST_F(EnvPosixTest, AreFilesSame) { - { - bool tmp; - if (env_->AreFilesSame("", "", &tmp).IsNotSupported()) { - fprintf(stderr, - "skipping EnvBasicTestWithParam.AreFilesSame due to " - "unsupported Env::AreFilesSame\n"); - return; - } - } - - const EnvOptions soptions; - auto* env = Env::Default(); - std::string same_file_name = test::PerThreadDBPath(env, "same_file"); - std::string same_file_link_name = same_file_name + "_link"; - - std::unique_ptr same_file; - ASSERT_OK(env->NewWritableFile(same_file_name, &same_file, soptions)); - same_file->Append("random_data"); - ASSERT_OK(same_file->Flush()); - same_file.reset(); - - ASSERT_OK(env->LinkFile(same_file_name, same_file_link_name)); - bool result = false; - ASSERT_OK(env->AreFilesSame(same_file_name, same_file_link_name, &result)); - ASSERT_TRUE(result); -} -#endif - -#ifdef OS_LINUX -TEST_F(EnvPosixTest, DISABLED_FilePermission) { - // Only works for Linux environment - if (env_ == Env::Default()) { - EnvOptions soptions; - std::vector fileNames{ - test::PerThreadDBPath(env_, "testfile"), - test::PerThreadDBPath(env_, "testfile1")}; - std::unique_ptr wfile; - ASSERT_OK(env_->NewWritableFile(fileNames[0], &wfile, soptions)); - ASSERT_OK(env_->NewWritableFile(fileNames[1], &wfile, soptions)); - wfile.reset(); - std::unique_ptr rwfile; - ASSERT_OK(env_->NewRandomRWFile(fileNames[1], &rwfile, soptions)); - - struct stat sb; - for (const auto& filename : fileNames) { - if (::stat(filename.c_str(), &sb) == 0) { - ASSERT_EQ(sb.st_mode & 0777, 0644); - } - ASSERT_OK(env_->DeleteFile(filename)); - } - - env_->SetAllowNonOwnerAccess(false); - ASSERT_OK(env_->NewWritableFile(fileNames[0], &wfile, soptions)); - ASSERT_OK(env_->NewWritableFile(fileNames[1], &wfile, soptions)); - wfile.reset(); - ASSERT_OK(env_->NewRandomRWFile(fileNames[1], &rwfile, soptions)); - - for (const auto& filename : fileNames) { - if (::stat(filename.c_str(), &sb) == 0) { - ASSERT_EQ(sb.st_mode & 0777, 0600); - } - ASSERT_OK(env_->DeleteFile(filename)); - } - } -} - -TEST_F(EnvPosixTest, LowerThreadPoolCpuPriority) { - std::atomic from_priority(CpuPriority::kNormal); - std::atomic to_priority(CpuPriority::kNormal); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "ThreadPoolImpl::BGThread::BeforeSetCpuPriority", [&](void* pri) { - from_priority.store(*reinterpret_cast(pri)); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "ThreadPoolImpl::BGThread::AfterSetCpuPriority", [&](void* pri) { - to_priority.store(*reinterpret_cast(pri)); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - env_->SetBackgroundThreads(1, Env::BOTTOM); - env_->SetBackgroundThreads(1, Env::HIGH); - - auto RunTask = [&](Env::Priority pool) { - std::atomic called(false); - env_->Schedule(&SetBool, &called, pool); - for (int i = 0; i < kDelayMicros; i++) { - if (called.load()) { - break; - } - Env::Default()->SleepForMicroseconds(1); - } - ASSERT_TRUE(called.load()); - }; - - { - // Same priority, no-op. - env_->LowerThreadPoolCPUPriority(Env::Priority::BOTTOM, - CpuPriority::kNormal) - .PermitUncheckedError(); - RunTask(Env::Priority::BOTTOM); - ASSERT_EQ(from_priority, CpuPriority::kNormal); - ASSERT_EQ(to_priority, CpuPriority::kNormal); - } - - { - // Higher priority, no-op. - env_->LowerThreadPoolCPUPriority(Env::Priority::BOTTOM, CpuPriority::kHigh) - .PermitUncheckedError(); - RunTask(Env::Priority::BOTTOM); - ASSERT_EQ(from_priority, CpuPriority::kNormal); - ASSERT_EQ(to_priority, CpuPriority::kNormal); - } - - { - // Lower priority from kNormal -> kLow. - env_->LowerThreadPoolCPUPriority(Env::Priority::BOTTOM, CpuPriority::kLow) - .PermitUncheckedError(); - RunTask(Env::Priority::BOTTOM); - ASSERT_EQ(from_priority, CpuPriority::kNormal); - ASSERT_EQ(to_priority, CpuPriority::kLow); - } - - { - // Lower priority from kLow -> kIdle. - env_->LowerThreadPoolCPUPriority(Env::Priority::BOTTOM, CpuPriority::kIdle) - .PermitUncheckedError(); - RunTask(Env::Priority::BOTTOM); - ASSERT_EQ(from_priority, CpuPriority::kLow); - ASSERT_EQ(to_priority, CpuPriority::kIdle); - } - - { - // Lower priority from kNormal -> kIdle for another pool. - env_->LowerThreadPoolCPUPriority(Env::Priority::HIGH, CpuPriority::kIdle) - .PermitUncheckedError(); - RunTask(Env::Priority::HIGH); - ASSERT_EQ(from_priority, CpuPriority::kNormal); - ASSERT_EQ(to_priority, CpuPriority::kIdle); - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} -#endif - -TEST_F(EnvPosixTest, MemoryMappedFileBuffer) { - const int kFileBytes = 1 << 15; // 32 KB - std::string expected_data; - std::string fname = test::PerThreadDBPath(env_, "testfile"); - { - std::unique_ptr wfile; - const EnvOptions soptions; - ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); - - Random rnd(301); - expected_data = rnd.RandomString(kFileBytes); - ASSERT_OK(wfile->Append(expected_data)); - } - - std::unique_ptr mmap_buffer; - Status status = env_->NewMemoryMappedFileBuffer(fname, &mmap_buffer); - // it should be supported at least on linux -#if !defined(OS_LINUX) - if (status.IsNotSupported()) { - fprintf(stderr, - "skipping EnvPosixTest.MemoryMappedFileBuffer due to " - "unsupported Env::NewMemoryMappedFileBuffer\n"); - return; - } -#endif // !defined(OS_LINUX) - - ASSERT_OK(status); - ASSERT_NE(nullptr, mmap_buffer.get()); - ASSERT_NE(nullptr, mmap_buffer->GetBase()); - ASSERT_EQ(kFileBytes, mmap_buffer->GetLen()); - std::string actual_data(reinterpret_cast(mmap_buffer->GetBase()), - mmap_buffer->GetLen()); - ASSERT_EQ(expected_data, actual_data); -} - -#ifndef ROCKSDB_NO_DYNAMIC_EXTENSION -TEST_F(EnvPosixTest, LoadRocksDBLibrary) { - std::shared_ptr library; - std::function function; - Status status = env_->LoadLibrary("no-such-library", "", &library); - ASSERT_NOK(status); - ASSERT_EQ(nullptr, library.get()); - status = env_->LoadLibrary("rocksdb", "", &library); - if (status.ok()) { // If we have can find a rocksdb shared library - ASSERT_NE(nullptr, library.get()); - ASSERT_OK(library->LoadFunction("rocksdb_create_default_env", - &function)); // from C definition - ASSERT_NE(nullptr, function); - ASSERT_NOK(library->LoadFunction("no-such-method", &function)); - ASSERT_EQ(nullptr, function); - ASSERT_OK(env_->LoadLibrary(library->Name(), "", &library)); - } else { - ASSERT_EQ(nullptr, library.get()); - } -} -#endif // !ROCKSDB_NO_DYNAMIC_EXTENSION - -#if !defined(OS_WIN) && !defined(ROCKSDB_NO_DYNAMIC_EXTENSION) -TEST_F(EnvPosixTest, LoadRocksDBLibraryWithSearchPath) { - std::shared_ptr library; - std::function function; - ASSERT_NOK(env_->LoadLibrary("no-such-library", "/tmp", &library)); - ASSERT_EQ(nullptr, library.get()); - ASSERT_NOK(env_->LoadLibrary("dl", "/tmp", &library)); - ASSERT_EQ(nullptr, library.get()); - Status status = env_->LoadLibrary("rocksdb", "/tmp:./", &library); - if (status.ok()) { - ASSERT_NE(nullptr, library.get()); - ASSERT_OK(env_->LoadLibrary(library->Name(), "", &library)); - } - char buff[1024]; - std::string cwd = getcwd(buff, sizeof(buff)); - - status = env_->LoadLibrary("rocksdb", "/tmp:" + cwd, &library); - if (status.ok()) { - ASSERT_NE(nullptr, library.get()); - ASSERT_OK(env_->LoadLibrary(library->Name(), "", &library)); - } -} -#endif // !OS_WIN && !ROCKSDB_NO_DYNAMIC_EXTENSION - -TEST_P(EnvPosixTestWithParam, UnSchedule) { - std::atomic called(false); - env_->SetBackgroundThreads(1, Env::LOW); - - /* Block the low priority queue */ - test::SleepingBackgroundTask sleeping_task, sleeping_task1; - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, - Env::Priority::LOW); - - /* Schedule another task */ - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task1, - Env::Priority::LOW, &sleeping_task1); - - /* Remove it with a different tag */ - ASSERT_EQ(0, env_->UnSchedule(&called, Env::Priority::LOW)); - - /* Remove it from the queue with the right tag */ - ASSERT_EQ(1, env_->UnSchedule(&sleeping_task1, Env::Priority::LOW)); - - // Unblock background thread - sleeping_task.WakeUp(); - - /* Schedule another task */ - env_->Schedule(&SetBool, &called); - for (int i = 0; i < kDelayMicros; i++) { - if (called.load()) { - break; - } - Env::Default()->SleepForMicroseconds(1); - } - ASSERT_TRUE(called.load()); - - ASSERT_TRUE(!sleeping_task.IsSleeping() && !sleeping_task1.IsSleeping()); - WaitThreadPoolsEmpty(); -} - -// This tests assumes that the last scheduled -// task will run last. In fact, in the allotted -// sleeping time nothing may actually run or they may -// run in any order. The purpose of the test is unclear. -#ifndef OS_WIN -TEST_P(EnvPosixTestWithParam, RunMany) { - env_->SetBackgroundThreads(1, Env::LOW); - std::atomic last_id(0); - - struct CB { - std::atomic* last_id_ptr; // Pointer to shared slot - int id; // Order# for the execution of this callback - - CB(std::atomic* p, int i) : last_id_ptr(p), id(i) {} - - static void Run(void* v) { - CB* cb = reinterpret_cast(v); - int cur = cb->last_id_ptr->load(); - ASSERT_EQ(cb->id - 1, cur); - cb->last_id_ptr->store(cb->id); - } - }; - - // Schedule in different order than start time - CB cb1(&last_id, 1); - CB cb2(&last_id, 2); - CB cb3(&last_id, 3); - CB cb4(&last_id, 4); - env_->Schedule(&CB::Run, &cb1); - env_->Schedule(&CB::Run, &cb2); - env_->Schedule(&CB::Run, &cb3); - env_->Schedule(&CB::Run, &cb4); - // thread-pool pops a thread function and then run the function, which may - // cause threadpool is empty but the last function is still running. Add a - // dummy function at the end, to make sure the last callback is finished - // before threadpool is empty. - struct DummyCB { - static void Run(void*) {} - }; - env_->Schedule(&DummyCB::Run, nullptr); - - WaitThreadPoolsEmpty(); - ASSERT_EQ(4, last_id.load(std::memory_order_acquire)); -} -#endif - -struct State { - port::Mutex mu; - int val; - int num_running; -}; - -static void ThreadBody(void* arg) { - State* s = reinterpret_cast(arg); - s->mu.Lock(); - s->val += 1; - s->num_running -= 1; - s->mu.Unlock(); -} - -TEST_P(EnvPosixTestWithParam, StartThread) { - State state; - state.val = 0; - state.num_running = 3; - for (int i = 0; i < 3; i++) { - env_->StartThread(&ThreadBody, &state); - } - while (true) { - state.mu.Lock(); - int num = state.num_running; - state.mu.Unlock(); - if (num == 0) { - break; - } - Env::Default()->SleepForMicroseconds(kDelayMicros); - } - ASSERT_EQ(state.val, 3); - WaitThreadPoolsEmpty(); -} - -TEST_P(EnvPosixTestWithParam, TwoPools) { - // Data structures to signal tasks to run. - port::Mutex mutex; - port::CondVar cv(&mutex); - bool should_start = false; - - class CB { - public: - CB(const std::string& pool_name, int pool_size, port::Mutex* trigger_mu, - port::CondVar* trigger_cv, bool* _should_start) - : mu_(), - num_running_(0), - num_finished_(0), - pool_size_(pool_size), - pool_name_(pool_name), - trigger_mu_(trigger_mu), - trigger_cv_(trigger_cv), - should_start_(_should_start) {} - - static void Run(void* v) { - CB* cb = reinterpret_cast(v); - cb->Run(); - } - - void Run() { - { - MutexLock l(&mu_); - num_running_++; - // make sure we don't have more than pool_size_ jobs running. - ASSERT_LE(num_running_, pool_size_.load()); - } - - { - MutexLock l(trigger_mu_); - while (!(*should_start_)) { - trigger_cv_->Wait(); - } - } - - { - MutexLock l(&mu_); - num_running_--; - num_finished_++; - } - } - - int NumFinished() { - MutexLock l(&mu_); - return num_finished_; - } - - void Reset(int pool_size) { - pool_size_.store(pool_size); - num_finished_ = 0; - } - - private: - port::Mutex mu_; - int num_running_; - int num_finished_; - std::atomic pool_size_; - std::string pool_name_; - port::Mutex* trigger_mu_; - port::CondVar* trigger_cv_; - bool* should_start_; - }; - - const int kLowPoolSize = 2; - const int kHighPoolSize = 4; - const int kJobs = 8; - - CB low_pool_job("low", kLowPoolSize, &mutex, &cv, &should_start); - CB high_pool_job("high", kHighPoolSize, &mutex, &cv, &should_start); - - env_->SetBackgroundThreads(kLowPoolSize); - env_->SetBackgroundThreads(kHighPoolSize, Env::Priority::HIGH); - - ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); - ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - - // schedule same number of jobs in each pool - for (int i = 0; i < kJobs; i++) { - env_->Schedule(&CB::Run, &low_pool_job); - env_->Schedule(&CB::Run, &high_pool_job, Env::Priority::HIGH); - } - // Wait a short while for the jobs to be dispatched. - int sleep_count = 0; - while ((unsigned int)(kJobs - kLowPoolSize) != - env_->GetThreadPoolQueueLen(Env::Priority::LOW) || - (unsigned int)(kJobs - kHighPoolSize) != - env_->GetThreadPoolQueueLen(Env::Priority::HIGH)) { - env_->SleepForMicroseconds(kDelayMicros); - if (++sleep_count > 100) { - break; - } - } - - ASSERT_EQ((unsigned int)(kJobs - kLowPoolSize), - env_->GetThreadPoolQueueLen()); - ASSERT_EQ((unsigned int)(kJobs - kLowPoolSize), - env_->GetThreadPoolQueueLen(Env::Priority::LOW)); - ASSERT_EQ((unsigned int)(kJobs - kHighPoolSize), - env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - - // Trigger jobs to run. - { - MutexLock l(&mutex); - should_start = true; - cv.SignalAll(); - } - - // wait for all jobs to finish - while (low_pool_job.NumFinished() < kJobs || - high_pool_job.NumFinished() < kJobs) { - env_->SleepForMicroseconds(kDelayMicros); - } - - ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); - ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - - // Hold jobs to schedule; - should_start = false; - - // call IncBackgroundThreadsIfNeeded to two pools. One increasing and - // the other decreasing - env_->IncBackgroundThreadsIfNeeded(kLowPoolSize - 1, Env::Priority::LOW); - env_->IncBackgroundThreadsIfNeeded(kHighPoolSize + 1, Env::Priority::HIGH); - high_pool_job.Reset(kHighPoolSize + 1); - low_pool_job.Reset(kLowPoolSize); - - // schedule same number of jobs in each pool - for (int i = 0; i < kJobs; i++) { - env_->Schedule(&CB::Run, &low_pool_job); - env_->Schedule(&CB::Run, &high_pool_job, Env::Priority::HIGH); - } - // Wait a short while for the jobs to be dispatched. - sleep_count = 0; - while ((unsigned int)(kJobs - kLowPoolSize) != - env_->GetThreadPoolQueueLen(Env::Priority::LOW) || - (unsigned int)(kJobs - (kHighPoolSize + 1)) != - env_->GetThreadPoolQueueLen(Env::Priority::HIGH)) { - env_->SleepForMicroseconds(kDelayMicros); - if (++sleep_count > 100) { - break; - } - } - ASSERT_EQ((unsigned int)(kJobs - kLowPoolSize), - env_->GetThreadPoolQueueLen()); - ASSERT_EQ((unsigned int)(kJobs - kLowPoolSize), - env_->GetThreadPoolQueueLen(Env::Priority::LOW)); - ASSERT_EQ((unsigned int)(kJobs - (kHighPoolSize + 1)), - env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - - // Trigger jobs to run. - { - MutexLock l(&mutex); - should_start = true; - cv.SignalAll(); - } - - // wait for all jobs to finish - while (low_pool_job.NumFinished() < kJobs || - high_pool_job.NumFinished() < kJobs) { - env_->SleepForMicroseconds(kDelayMicros); - } - - env_->SetBackgroundThreads(kHighPoolSize, Env::Priority::HIGH); - WaitThreadPoolsEmpty(); -} - -TEST_P(EnvPosixTestWithParam, DecreaseNumBgThreads) { - constexpr int kWaitMicros = 60000000; // 1min - - std::vector tasks(10); - - // Set number of thread to 1 first. - env_->SetBackgroundThreads(1, Env::Priority::HIGH); - - // Schedule 3 tasks. 0 running; Task 1, 2 waiting. - for (size_t i = 0; i < 3; i++) { - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &tasks[i], - Env::Priority::HIGH); - } - ASSERT_FALSE(tasks[0].TimedWaitUntilSleeping(kWaitMicros)); - ASSERT_EQ(2U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - ASSERT_TRUE(tasks[0].IsSleeping()); - ASSERT_TRUE(!tasks[1].IsSleeping()); - ASSERT_TRUE(!tasks[2].IsSleeping()); - - // Increase to 2 threads. Task 0, 1 running; 2 waiting - env_->SetBackgroundThreads(2, Env::Priority::HIGH); - ASSERT_FALSE(tasks[1].TimedWaitUntilSleeping(kWaitMicros)); - ASSERT_EQ(1U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - ASSERT_TRUE(tasks[0].IsSleeping()); - ASSERT_TRUE(tasks[1].IsSleeping()); - ASSERT_TRUE(!tasks[2].IsSleeping()); - - // Shrink back to 1 thread. Still task 0, 1 running, 2 waiting - env_->SetBackgroundThreads(1, Env::Priority::HIGH); - Env::Default()->SleepForMicroseconds(kDelayMicros); - ASSERT_EQ(1U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - ASSERT_TRUE(tasks[0].IsSleeping()); - ASSERT_TRUE(tasks[1].IsSleeping()); - ASSERT_TRUE(!tasks[2].IsSleeping()); - - // The last task finishes. Task 0 running, 2 waiting. - tasks[1].WakeUp(); - ASSERT_FALSE(tasks[1].TimedWaitUntilDone(kWaitMicros)); - ASSERT_EQ(1U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - ASSERT_TRUE(tasks[0].IsSleeping()); - ASSERT_TRUE(!tasks[1].IsSleeping()); - ASSERT_TRUE(!tasks[2].IsSleeping()); - - // Increase to 5 threads. Task 0 and 2 running. - env_->SetBackgroundThreads(5, Env::Priority::HIGH); - ASSERT_FALSE(tasks[2].TimedWaitUntilSleeping(kWaitMicros)); - ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - ASSERT_TRUE(tasks[0].IsSleeping()); - ASSERT_TRUE(!tasks[1].IsSleeping()); - ASSERT_TRUE(tasks[2].IsSleeping()); - - // Change number of threads a couple of times while there is no sufficient - // tasks. - env_->SetBackgroundThreads(7, Env::Priority::HIGH); - tasks[2].WakeUp(); - ASSERT_FALSE(tasks[2].TimedWaitUntilDone(kWaitMicros)); - ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - env_->SetBackgroundThreads(3, Env::Priority::HIGH); - Env::Default()->SleepForMicroseconds(kDelayMicros); - ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - env_->SetBackgroundThreads(4, Env::Priority::HIGH); - Env::Default()->SleepForMicroseconds(kDelayMicros); - ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - env_->SetBackgroundThreads(5, Env::Priority::HIGH); - Env::Default()->SleepForMicroseconds(kDelayMicros); - ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - env_->SetBackgroundThreads(4, Env::Priority::HIGH); - Env::Default()->SleepForMicroseconds(kDelayMicros); - ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - - Env::Default()->SleepForMicroseconds(kDelayMicros * 50); - - // Enqueue 5 more tasks. Thread pool size now is 4. - // Task 0, 3, 4, 5 running;6, 7 waiting. - for (size_t i = 3; i < 8; i++) { - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &tasks[i], - Env::Priority::HIGH); - } - for (size_t i = 3; i <= 5; i++) { - ASSERT_FALSE(tasks[i].TimedWaitUntilSleeping(kWaitMicros)); - } - ASSERT_EQ(2U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - ASSERT_TRUE(tasks[0].IsSleeping()); - ASSERT_TRUE(!tasks[1].IsSleeping()); - ASSERT_TRUE(!tasks[2].IsSleeping()); - ASSERT_TRUE(tasks[3].IsSleeping()); - ASSERT_TRUE(tasks[4].IsSleeping()); - ASSERT_TRUE(tasks[5].IsSleeping()); - ASSERT_TRUE(!tasks[6].IsSleeping()); - ASSERT_TRUE(!tasks[7].IsSleeping()); - - // Wake up task 0, 3 and 4. Task 5, 6, 7 running. - tasks[0].WakeUp(); - tasks[3].WakeUp(); - tasks[4].WakeUp(); - - for (size_t i = 5; i < 8; i++) { - ASSERT_FALSE(tasks[i].TimedWaitUntilSleeping(kWaitMicros)); - } - ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - for (size_t i = 5; i < 8; i++) { - ASSERT_TRUE(tasks[i].IsSleeping()); - } - - // Shrink back to 1 thread. Still task 5, 6, 7 running - env_->SetBackgroundThreads(1, Env::Priority::HIGH); - Env::Default()->SleepForMicroseconds(kDelayMicros); - ASSERT_TRUE(tasks[5].IsSleeping()); - ASSERT_TRUE(tasks[6].IsSleeping()); - ASSERT_TRUE(tasks[7].IsSleeping()); - - // Wake up task 6. Task 5, 7 running - tasks[6].WakeUp(); - ASSERT_FALSE(tasks[6].TimedWaitUntilDone(kWaitMicros)); - ASSERT_TRUE(tasks[5].IsSleeping()); - ASSERT_TRUE(!tasks[6].IsSleeping()); - ASSERT_TRUE(tasks[7].IsSleeping()); - - // Wake up threads 7. Task 5 running - tasks[7].WakeUp(); - ASSERT_FALSE(tasks[7].TimedWaitUntilDone(kWaitMicros)); - ASSERT_TRUE(!tasks[7].IsSleeping()); - - // Enqueue thread 8 and 9. Task 5 running; one of 8, 9 might be running. - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &tasks[8], - Env::Priority::HIGH); - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &tasks[9], - Env::Priority::HIGH); - Env::Default()->SleepForMicroseconds(kDelayMicros); - ASSERT_GT(env_->GetThreadPoolQueueLen(Env::Priority::HIGH), (unsigned int)0); - ASSERT_TRUE(!tasks[8].IsSleeping() || !tasks[9].IsSleeping()); - - // Increase to 4 threads. Task 5, 8, 9 running. - env_->SetBackgroundThreads(4, Env::Priority::HIGH); - Env::Default()->SleepForMicroseconds(kDelayMicros); - ASSERT_EQ((unsigned int)0, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - ASSERT_TRUE(tasks[8].IsSleeping()); - ASSERT_TRUE(tasks[9].IsSleeping()); - - // Shrink to 1 thread - env_->SetBackgroundThreads(1, Env::Priority::HIGH); - - // Wake up thread 9. - tasks[9].WakeUp(); - ASSERT_FALSE(tasks[9].TimedWaitUntilDone(kWaitMicros)); - ASSERT_TRUE(!tasks[9].IsSleeping()); - ASSERT_TRUE(tasks[8].IsSleeping()); - - // Wake up thread 8 - tasks[8].WakeUp(); - ASSERT_FALSE(tasks[8].TimedWaitUntilDone(kWaitMicros)); - ASSERT_TRUE(!tasks[8].IsSleeping()); - - // Wake up the last thread - tasks[5].WakeUp(); - ASSERT_FALSE(tasks[5].TimedWaitUntilDone(kWaitMicros)); - WaitThreadPoolsEmpty(); -} - -TEST_P(EnvPosixTestWithParam, ReserveThreads) { - // Initialize the background thread to 1 in case other threads exist - // from the last unit test - env_->SetBackgroundThreads(1, Env::Priority::HIGH); - ASSERT_EQ(env_->GetBackgroundThreads(Env::HIGH), 1); - constexpr int kWaitMicros = 10000000; // 10seconds - std::vector tasks(4); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - // Set the sync point to ensure thread 0 can terminate - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"ThreadPoolImpl::BGThread::Termination:th0", - "EnvTest::ReserveThreads:0"}}); - // Empty the thread pool to ensure all the threads can start later - env_->SetBackgroundThreads(0, Env::Priority::HIGH); - TEST_SYNC_POINT("EnvTest::ReserveThreads:0"); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - // Set the sync point to ensure threads start and pass the sync point - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"ThreadPoolImpl::BGThread::Start:th0", "EnvTest::ReserveThreads:1"}, - {"ThreadPoolImpl::BGThread::Start:th1", "EnvTest::ReserveThreads:2"}, - {"ThreadPoolImpl::BGThread::Start:th2", "EnvTest::ReserveThreads:3"}, - {"ThreadPoolImpl::BGThread::Start:th3", "EnvTest::ReserveThreads:4"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Set number of thread to 3 first. - env_->SetBackgroundThreads(3, Env::Priority::HIGH); - ASSERT_EQ(env_->GetBackgroundThreads(Env::HIGH), 3); - // Add sync points to ensure all 3 threads start - TEST_SYNC_POINT("EnvTest::ReserveThreads:1"); - TEST_SYNC_POINT("EnvTest::ReserveThreads:2"); - TEST_SYNC_POINT("EnvTest::ReserveThreads:3"); - // Reserve 2 threads - ASSERT_EQ(2, env_->ReserveThreads(2, Env::Priority::HIGH)); - - // Schedule 3 tasks. Task 0 running (in this context, doing - // SleepingBackgroundTask); Task 1, 2 waiting; 3 reserved threads. - for (size_t i = 0; i < 3; i++) { - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &tasks[i], - Env::Priority::HIGH); - } - ASSERT_FALSE(tasks[0].TimedWaitUntilSleeping(kWaitMicros)); - ASSERT_EQ(2U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - ASSERT_TRUE(tasks[0].IsSleeping()); - ASSERT_TRUE(!tasks[1].IsSleeping()); - ASSERT_TRUE(!tasks[2].IsSleeping()); - - // Release 2 threads. Task 0, 1, 2 running; 0 reserved thread. - ASSERT_EQ(2, env_->ReleaseThreads(2, Env::Priority::HIGH)); - ASSERT_FALSE(tasks[1].TimedWaitUntilSleeping(kWaitMicros)); - ASSERT_FALSE(tasks[2].TimedWaitUntilSleeping(kWaitMicros)); - ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - ASSERT_TRUE(tasks[1].IsSleeping()); - ASSERT_TRUE(tasks[2].IsSleeping()); - // No more threads can be reserved - ASSERT_EQ(0, env_->ReserveThreads(3, Env::Priority::HIGH)); - // Expand the number of background threads so that the last thread - // is waiting - env_->SetBackgroundThreads(4, Env::Priority::HIGH); - // Add sync point to ensure the 4th thread starts - TEST_SYNC_POINT("EnvTest::ReserveThreads:4"); - // As the thread pool is expanded, we can reserve one more thread - ASSERT_EQ(1, env_->ReserveThreads(3, Env::Priority::HIGH)); - // No more threads can be reserved - ASSERT_EQ(0, env_->ReserveThreads(3, Env::Priority::HIGH)); - - // Reset the sync points for the next iteration in BGThread or the - // next time Submit() is called - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"ThreadPoolImpl::BGThread::WaitingThreadsInc", - "EnvTest::ReserveThreads:5"}, - {"ThreadPoolImpl::BGThread::Termination", "EnvTest::ReserveThreads:6"}, - {"ThreadPoolImpl::Submit::Enqueue", "EnvTest::ReserveThreads:7"}}); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - tasks[0].WakeUp(); - ASSERT_FALSE(tasks[0].TimedWaitUntilDone(kWaitMicros)); - // Add sync point to ensure the number of waiting threads increases - TEST_SYNC_POINT("EnvTest::ReserveThreads:5"); - // 1 more thread can be reserved - ASSERT_EQ(1, env_->ReserveThreads(3, Env::Priority::HIGH)); - // 2 reserved threads now - - // Currently, two threads are blocked since the number of waiting - // threads is equal to the number of reserved threads (i.e., 2). - // If we reduce the number of background thread to 1, at least one thread - // will be the last excessive thread (here we have no control over the - // number of excessive threads because thread order does not - // necessarily follows the schedule order, but we ensure that the last thread - // shall not run any task by expanding the thread pool after we schedule - // the tasks), and thus they(it) become(s) unblocked, the number of waiting - // threads decreases to 0 or 1, but the number of reserved threads is still 2 - env_->SetBackgroundThreads(1, Env::Priority::HIGH); - - // Task 1,2 running; 2 reserved threads, however, in fact, we only have - // 0 or 1 waiting thread in the thread pool, proved by the - // following test, we CANNOT reserve 2 threads even though we just - // release 2 - TEST_SYNC_POINT("EnvTest::ReserveThreads:6"); - ASSERT_EQ(2, env_->ReleaseThreads(2, Env::Priority::HIGH)); - ASSERT_GT(2, env_->ReserveThreads(2, Env::Priority::HIGH)); - - // Every new task will be put into the queue at this point - env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &tasks[3], - Env::Priority::HIGH); - TEST_SYNC_POINT("EnvTest::ReserveThreads:7"); - ASSERT_EQ(1U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - ASSERT_TRUE(!tasks[3].IsSleeping()); - - // Set the number of threads to 3 so that Task 3 can dequeue - env_->SetBackgroundThreads(3, Env::Priority::HIGH); - // Wakup Task 1 - tasks[1].WakeUp(); - ASSERT_FALSE(tasks[1].TimedWaitUntilDone(kWaitMicros)); - // Task 2, 3 running (Task 3 dequeue); 0 or 1 reserved thread - ASSERT_FALSE(tasks[3].TimedWaitUntilSleeping(kWaitMicros)); - ASSERT_TRUE(tasks[3].IsSleeping()); - ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); - - // At most 1 thread can be released - ASSERT_GT(2, env_->ReleaseThreads(3, Env::Priority::HIGH)); - tasks[2].WakeUp(); - ASSERT_FALSE(tasks[2].TimedWaitUntilDone(kWaitMicros)); - tasks[3].WakeUp(); - ASSERT_FALSE(tasks[3].TimedWaitUntilDone(kWaitMicros)); - WaitThreadPoolsEmpty(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -#if (defined OS_LINUX || defined OS_WIN) -namespace { -bool IsSingleVarint(const std::string& s) { - Slice slice(s); - - uint64_t v; - if (!GetVarint64(&slice, &v)) { - return false; - } - - return slice.size() == 0; -} - -bool IsUniqueIDValid(const std::string& s) { - return !s.empty() && !IsSingleVarint(s); -} - -const size_t MAX_ID_SIZE = 100; -char temp_id[MAX_ID_SIZE]; - -} // namespace - -// Determine whether we can use the FS_IOC_GETVERSION ioctl -// on a file in directory DIR. Create a temporary file therein, -// try to apply the ioctl (save that result), cleanup and -// return the result. Return true if it is supported, and -// false if anything fails. -// Note that this function "knows" that dir has just been created -// and is empty, so we create a simply-named test file: "f". -bool ioctl_support__FS_IOC_GETVERSION(const std::string& dir) { -#ifdef OS_WIN - return true; -#else - const std::string file = dir + "/f"; - int fd; - do { - fd = open(file.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); - } while (fd < 0 && errno == EINTR); - long int version; - bool ok = (fd >= 0 && ioctl(fd, FS_IOC_GETVERSION, &version) >= 0); - - close(fd); - unlink(file.c_str()); - - return ok; -#endif -} - -// To ensure that Env::GetUniqueId-related tests work correctly, the files -// should be stored in regular storage like "hard disk" or "flash device", -// and not on a tmpfs file system (like /dev/shm and /tmp on some systems). -// Otherwise we cannot get the correct id. -// -// This function serves as the replacement for test::TmpDir(), which may be -// customized to be on a file system that doesn't work with GetUniqueId(). - -class IoctlFriendlyTmpdir { - public: - explicit IoctlFriendlyTmpdir() { - char dir_buf[100]; - - const char* fmt = "%s/rocksdb.XXXXXX"; - const char* tmp = getenv("TEST_IOCTL_FRIENDLY_TMPDIR"); - -#ifdef OS_WIN -#define rmdir _rmdir - if (tmp == nullptr) { - tmp = getenv("TMP"); - } - - snprintf(dir_buf, sizeof dir_buf, fmt, tmp); - auto result = _mktemp(dir_buf); - assert(result != nullptr); - BOOL ret = CreateDirectory(dir_buf, NULL); - assert(ret == TRUE); - dir_ = dir_buf; -#else - std::list candidate_dir_list = {"/var/tmp", "/tmp"}; - - // If $TEST_IOCTL_FRIENDLY_TMPDIR/rocksdb.XXXXXX fits, use - // $TEST_IOCTL_FRIENDLY_TMPDIR; subtract 2 for the "%s", and - // add 1 for the trailing NUL byte. - if (tmp && strlen(tmp) + strlen(fmt) - 2 + 1 <= sizeof dir_buf) { - // use $TEST_IOCTL_FRIENDLY_TMPDIR value - candidate_dir_list.push_front(tmp); - } - - for (const std::string& d : candidate_dir_list) { - snprintf(dir_buf, sizeof dir_buf, fmt, d.c_str()); - if (mkdtemp(dir_buf)) { - if (ioctl_support__FS_IOC_GETVERSION(dir_buf)) { - dir_ = dir_buf; - return; - } else { - // Diagnose ioctl-related failure only if this is the - // directory specified via that envvar. - if (tmp && tmp == d) { - fprintf(stderr, - "TEST_IOCTL_FRIENDLY_TMPDIR-specified directory is " - "not suitable: %s\n", - d.c_str()); - } - rmdir(dir_buf); // ignore failure - } - } else { - // mkdtemp failed: diagnose it, but don't give up. - fprintf(stderr, "mkdtemp(%s/...) failed: %s\n", d.c_str(), - errnoStr(errno).c_str()); - } - } - - // check if it's running test within a docker container, in which case, the - // file system inside `overlayfs` may not support FS_IOC_GETVERSION - // skip the tests - struct stat buffer; - if (stat("/.dockerenv", &buffer) == 0) { - is_supported_ = false; - return; - } - - fprintf(stderr, - "failed to find an ioctl-friendly temporary directory;" - " specify one via the TEST_IOCTL_FRIENDLY_TMPDIR envvar\n"); - std::abort(); -#endif - } - - ~IoctlFriendlyTmpdir() { rmdir(dir_.c_str()); } - - const std::string& name() const { return dir_; } - - bool is_supported() const { return is_supported_; } - - private: - std::string dir_; - - bool is_supported_ = true; -}; - -TEST_F(EnvPosixTest, PositionedAppend) { - std::unique_ptr writable_file; - EnvOptions options; - options.use_direct_writes = true; - options.use_mmap_writes = false; - std::string fname = test::PerThreadDBPath(env_, "positioned_append"); - SetupSyncPointsToMockDirectIO(); - - ASSERT_OK(env_->NewWritableFile(fname, &writable_file, options)); - const size_t kBlockSize = 4096; - const size_t kDataSize = kPageSize; - // Write a page worth of 'a' - auto data_ptr = NewAligned(kDataSize, 'a'); - Slice data_a(data_ptr.get(), kDataSize); - ASSERT_OK(writable_file->PositionedAppend(data_a, 0U)); - // Write a page worth of 'b' right after the first sector - data_ptr = NewAligned(kDataSize, 'b'); - Slice data_b(data_ptr.get(), kDataSize); - ASSERT_OK(writable_file->PositionedAppend(data_b, kBlockSize)); - ASSERT_OK(writable_file->Close()); - // The file now has 1 sector worth of a followed by a page worth of b - - // Verify the above - std::unique_ptr seq_file; - ASSERT_OK(env_->NewSequentialFile(fname, &seq_file, options)); - size_t scratch_len = kPageSize * 2; - std::unique_ptr scratch(new char[scratch_len]); - Slice result; - ASSERT_OK(seq_file->Read(scratch_len, &result, scratch.get())); - ASSERT_EQ(kPageSize + kBlockSize, result.size()); - ASSERT_EQ('a', result[kBlockSize - 1]); - ASSERT_EQ('b', result[kBlockSize]); -} - -// `GetUniqueId()` temporarily returns zero on Windows. `BlockBasedTable` can -// handle a return value of zero but this test case cannot. -#ifndef OS_WIN -TEST_P(EnvPosixTestWithParam, RandomAccessUniqueID) { - // Create file. - if (env_ == Env::Default()) { - EnvOptions soptions; - soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; - IoctlFriendlyTmpdir ift; - if (!ift.is_supported()) { - ROCKSDB_GTEST_BYPASS( - "FS_IOC_GETVERSION is not supported by the filesystem"); - return; - } - std::string fname = ift.name() + "/testfile"; - std::unique_ptr wfile; - ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); - - std::unique_ptr file; - - // Get Unique ID - ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); - size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); - ASSERT_TRUE(id_size > 0); - std::string unique_id1(temp_id, id_size); - ASSERT_TRUE(IsUniqueIDValid(unique_id1)); - - // Get Unique ID again - ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); - id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); - ASSERT_TRUE(id_size > 0); - std::string unique_id2(temp_id, id_size); - ASSERT_TRUE(IsUniqueIDValid(unique_id2)); - - // Get Unique ID again after waiting some time. - env_->SleepForMicroseconds(1000000); - ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); - id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); - ASSERT_TRUE(id_size > 0); - std::string unique_id3(temp_id, id_size); - ASSERT_TRUE(IsUniqueIDValid(unique_id3)); - - // Check IDs are the same. - ASSERT_EQ(unique_id1, unique_id2); - ASSERT_EQ(unique_id2, unique_id3); - - // Delete the file - ASSERT_OK(env_->DeleteFile(fname)); - } -} -#endif // !defined(OS_WIN) - -// only works in linux platforms -#ifdef ROCKSDB_FALLOCATE_PRESENT -TEST_P(EnvPosixTestWithParam, AllocateTest) { - if (env_ == Env::Default()) { - SetupSyncPointsToMockDirectIO(); - std::string fname = test::PerThreadDBPath(env_, "preallocate_testfile"); - // Try fallocate in a file to see whether the target file system supports - // it. - // Skip the test if fallocate is not supported. - std::string fname_test_fallocate = - test::PerThreadDBPath(env_, "preallocate_testfile_2"); - int fd = -1; - do { - fd = open(fname_test_fallocate.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); - } while (fd < 0 && errno == EINTR); - ASSERT_GT(fd, 0); - - int alloc_status = fallocate(fd, 0, 0, 1); - - int err_number = 0; - if (alloc_status != 0) { - err_number = errno; - fprintf(stderr, "Warning: fallocate() fails, %s\n", - errnoStr(err_number).c_str()); - } - close(fd); - ASSERT_OK(env_->DeleteFile(fname_test_fallocate)); - if (alloc_status != 0 && err_number == EOPNOTSUPP) { - // The filesystem containing the file does not support fallocate - return; - } - - EnvOptions soptions; - soptions.use_mmap_writes = false; - soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; - std::unique_ptr wfile; - ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); - - // allocate 100 MB - size_t kPreallocateSize = 100 * 1024 * 1024; - size_t kBlockSize = 512; - size_t kDataSize = 1024 * 1024; - auto data_ptr = NewAligned(kDataSize, 'A'); - Slice data(data_ptr.get(), kDataSize); - wfile->SetPreallocationBlockSize(kPreallocateSize); - wfile->PrepareWrite(wfile->GetFileSize(), kDataSize); - ASSERT_OK(wfile->Append(data)); - ASSERT_OK(wfile->Flush()); - - struct stat f_stat; - ASSERT_EQ(stat(fname.c_str(), &f_stat), 0); - ASSERT_EQ((unsigned int)kDataSize, f_stat.st_size); - // verify that blocks are preallocated - // Note here that we don't check the exact number of blocks preallocated -- - // we only require that number of allocated blocks is at least what we - // expect. - // It looks like some FS give us more blocks that we asked for. That's fine. - // It might be worth investigating further. - ASSERT_LE((unsigned int)(kPreallocateSize / kBlockSize), f_stat.st_blocks); - - // close the file, should deallocate the blocks - wfile.reset(); - - stat(fname.c_str(), &f_stat); - ASSERT_EQ((unsigned int)kDataSize, f_stat.st_size); - // verify that preallocated blocks were deallocated on file close - // Because the FS might give us more blocks, we add a full page to the size - // and expect the number of blocks to be less or equal to that. - ASSERT_GE((f_stat.st_size + kPageSize + kBlockSize - 1) / kBlockSize, - (unsigned int)f_stat.st_blocks); - } -} -#endif // ROCKSDB_FALLOCATE_PRESENT - -// Returns true if any of the strings in ss are the prefix of another string. -bool HasPrefix(const std::unordered_set& ss) { - for (const std::string& s : ss) { - if (s.empty()) { - return true; - } - for (size_t i = 1; i < s.size(); ++i) { - if (ss.count(s.substr(0, i)) != 0) { - return true; - } - } - } - return false; -} - -// `GetUniqueId()` temporarily returns zero on Windows. `BlockBasedTable` can -// handle a return value of zero but this test case cannot. -#ifndef OS_WIN -TEST_P(EnvPosixTestWithParam, RandomAccessUniqueIDConcurrent) { - if (env_ == Env::Default()) { - // Check whether a bunch of concurrently existing files have unique IDs. - EnvOptions soptions; - soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; - - // Create the files - IoctlFriendlyTmpdir ift; - if (!ift.is_supported()) { - ROCKSDB_GTEST_BYPASS( - "FS_IOC_GETVERSION is not supported by the filesystem"); - return; - } - std::vector fnames; - for (int i = 0; i < 1000; ++i) { - fnames.push_back(ift.name() + "/" + "testfile" + std::to_string(i)); - - // Create file. - std::unique_ptr wfile; - ASSERT_OK(env_->NewWritableFile(fnames[i], &wfile, soptions)); - } - - // Collect and check whether the IDs are unique. - std::unordered_set ids; - for (const std::string& fname : fnames) { - std::unique_ptr file; - std::string unique_id; - ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); - size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); - ASSERT_TRUE(id_size > 0); - unique_id = std::string(temp_id, id_size); - ASSERT_TRUE(IsUniqueIDValid(unique_id)); - - ASSERT_TRUE(ids.count(unique_id) == 0); - ids.insert(unique_id); - } - - // Delete the files - for (const std::string& fname : fnames) { - ASSERT_OK(env_->DeleteFile(fname)); - } - - ASSERT_TRUE(!HasPrefix(ids)); - } -} - -// TODO: Disable the flaky test, it's a known issue that ext4 may return same -// key after file deletion. The issue is tracked in #7405, #7470. -TEST_P(EnvPosixTestWithParam, DISABLED_RandomAccessUniqueIDDeletes) { - if (env_ == Env::Default()) { - EnvOptions soptions; - soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; - - IoctlFriendlyTmpdir ift; - if (!ift.is_supported()) { - ROCKSDB_GTEST_BYPASS( - "FS_IOC_GETVERSION is not supported by the filesystem"); - return; - } - std::string fname = ift.name() + "/" + "testfile"; - - // Check that after file is deleted we don't get same ID again in a new - // file. - std::unordered_set ids; - for (int i = 0; i < 1000; ++i) { - // Create file. - { - std::unique_ptr wfile; - ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); - } - - // Get Unique ID - std::string unique_id; - { - std::unique_ptr file; - ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); - size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); - ASSERT_TRUE(id_size > 0); - unique_id = std::string(temp_id, id_size); - } - - ASSERT_TRUE(IsUniqueIDValid(unique_id)); - ASSERT_TRUE(ids.count(unique_id) == 0); - ids.insert(unique_id); - - // Delete the file - ASSERT_OK(env_->DeleteFile(fname)); - } - - ASSERT_TRUE(!HasPrefix(ids)); - } -} -#endif // !defined(OS_WIN) - -TEST_P(EnvPosixTestWithParam, MultiRead) { - EnvOptions soptions; - soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; - std::string fname = test::PerThreadDBPath(env_, "testfile"); - - const size_t kSectorSize = 4096; - const size_t kNumSectors = 8; - - // Create file. - { - std::unique_ptr wfile; -#if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && \ - !defined(OS_AIX) - if (soptions.use_direct_writes) { - soptions.use_direct_writes = false; - } -#endif - ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); - for (size_t i = 0; i < kNumSectors; ++i) { - auto data = NewAligned(kSectorSize * 8, static_cast(i + 1)); - Slice slice(data.get(), kSectorSize); - ASSERT_OK(wfile->Append(slice)); - } - ASSERT_OK(wfile->Close()); - } - - // More attempts to simulate more partial result sequences. - for (uint32_t attempt = 0; attempt < 20; attempt++) { - // Random Read - Random rnd(301 + attempt); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "UpdateResults::io_uring_result", [&](void* arg) { - if (attempt > 0) { - // No failure in the first attempt. - size_t& bytes_read = *static_cast(arg); - if (rnd.OneIn(4)) { - bytes_read = 0; - } else if (rnd.OneIn(3)) { - bytes_read = static_cast( - rnd.Uniform(static_cast(bytes_read))); - } - } - }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - std::unique_ptr file; - std::vector reqs(3); - std::vector> data; - uint64_t offset = 0; - for (size_t i = 0; i < reqs.size(); ++i) { - reqs[i].offset = offset; - offset += 2 * kSectorSize; - reqs[i].len = kSectorSize; - data.emplace_back(NewAligned(kSectorSize, 0)); - reqs[i].scratch = data.back().get(); - } -#if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && \ - !defined(OS_AIX) - if (soptions.use_direct_reads) { - soptions.use_direct_reads = false; - } -#endif - ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); - ASSERT_OK(file->MultiRead(reqs.data(), reqs.size())); - for (size_t i = 0; i < reqs.size(); ++i) { - auto buf = NewAligned(kSectorSize * 8, static_cast(i * 2 + 1)); - ASSERT_OK(reqs[i].status); - ASSERT_EQ(memcmp(reqs[i].scratch, buf.get(), kSectorSize), 0); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } -} - -TEST_F(EnvPosixTest, MultiReadNonAlignedLargeNum) { - // In this test we don't do aligned read, so it doesn't work for - // direct I/O case. - EnvOptions soptions; - soptions.use_direct_reads = soptions.use_direct_writes = false; - std::string fname = test::PerThreadDBPath(env_, "testfile"); - - const size_t kTotalSize = 81920; - Random rnd(301); - std::string expected_data = rnd.RandomString(kTotalSize); - - // Create file. - { - std::unique_ptr wfile; - ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); - ASSERT_OK(wfile->Append(expected_data)); - ASSERT_OK(wfile->Close()); - } - - // More attempts to simulate more partial result sequences. - for (uint32_t attempt = 0; attempt < 25; attempt++) { - // Right now kIoUringDepth is hard coded as 256, so we need very large - // number of keys to cover the case of multiple rounds of submissions. - // Right now the test latency is still acceptable. If it ends up with - // too long, we can modify the io uring depth with SyncPoint here. - const int num_reads = rnd.Uniform(512) + 1; - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "UpdateResults::io_uring_result", [&](void* arg) { - if (attempt > 5) { - // Improve partial result rates in second half of the run to - // cover the case of repeated partial results. - int odd = (attempt < 15) ? num_reads / 2 : 4; - // No failure in first several attempts. - size_t& bytes_read = *static_cast(arg); - if (rnd.OneIn(odd)) { - bytes_read = 0; - } else if (rnd.OneIn(odd / 2)) { - bytes_read = static_cast( - rnd.Uniform(static_cast(bytes_read))); - } - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Generate (offset, len) pairs - std::set start_offsets; - for (int i = 0; i < num_reads; i++) { - int rnd_off; - // No repeat offsets. - while (start_offsets.find(rnd_off = rnd.Uniform(81920)) != - start_offsets.end()) { - } - start_offsets.insert(rnd_off); - } - std::vector offsets; - std::vector lens; - // std::set already sorted the offsets. - for (int so : start_offsets) { - offsets.push_back(so); - } - for (size_t i = 0; i + 1 < offsets.size(); i++) { - lens.push_back(static_cast( - rnd.Uniform(static_cast(offsets[i + 1] - offsets[i])) + 1)); - } - lens.push_back(static_cast( - rnd.Uniform(static_cast(kTotalSize - offsets.back())) + 1)); - ASSERT_EQ(num_reads, lens.size()); - - // Create requests - std::vector scratches; - scratches.reserve(num_reads); - std::vector reqs(num_reads); - for (size_t i = 0; i < reqs.size(); ++i) { - reqs[i].offset = offsets[i]; - reqs[i].len = lens[i]; - scratches.emplace_back(reqs[i].len, ' '); - reqs[i].scratch = const_cast(scratches.back().data()); - } - - // Query the data - std::unique_ptr file; - ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); - ASSERT_OK(file->MultiRead(reqs.data(), reqs.size())); - - // Validate results - for (int i = 0; i < num_reads; ++i) { - ASSERT_OK(reqs[i].status); - ASSERT_EQ( - Slice(expected_data.data() + offsets[i], lens[i]).ToString(true), - reqs[i].result.ToString(true)); - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } -} - -TEST_F(EnvPosixTest, NonAlignedDirectIOMultiReadBeyondFileSize) { - EnvOptions soptions; - soptions.use_direct_reads = true; - soptions.use_direct_writes = false; - std::string fname = test::PerThreadDBPath(env_, "testfile"); - - Random rnd(301); - std::unique_ptr wfile; - size_t alignment = 0; - // Create file. - { - ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); - auto data_ptr = NewAligned(4095, 'b'); - Slice data_b(data_ptr.get(), 4095); - ASSERT_OK(wfile->PositionedAppend(data_b, 0U)); - ASSERT_OK(wfile->Close()); - } - -#if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && \ - !defined(OS_AIX) && !defined(OS_OPENBSD) && !defined(OS_FREEBSD) - if (soptions.use_direct_reads) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "NewRandomAccessFile:O_DIRECT", [&](void* arg) { - int* val = static_cast(arg); - *val &= ~O_DIRECT; - }); - } -#endif - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - const int num_reads = 2; - // Create requests - std::vector scratches; - scratches.reserve(num_reads); - std::vector reqs(num_reads); - - std::unique_ptr file; - ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); - alignment = file->GetRequiredBufferAlignment(); - ASSERT_EQ(num_reads, reqs.size()); - - std::vector> data; - - std::vector offsets = {0, 2047}; - std::vector lens = {2047, 4096 - 2047}; - - for (size_t i = 0; i < num_reads; i++) { - // Do alignment - reqs[i].offset = static_cast(TruncateToPageBoundary( - alignment, static_cast(/*offset=*/offsets[i]))); - reqs[i].len = - Roundup(static_cast(/*offset=*/offsets[i]) + /*length=*/lens[i], - alignment) - - reqs[i].offset; - - size_t new_capacity = Roundup(reqs[i].len, alignment); - data.emplace_back(NewAligned(new_capacity, 0)); - reqs[i].scratch = data.back().get(); - } - - // Query the data - ASSERT_OK(file->MultiRead(reqs.data(), reqs.size())); - - // Validate results - for (size_t i = 0; i < num_reads; ++i) { - ASSERT_OK(reqs[i].status); - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -#if defined(ROCKSDB_IOURING_PRESENT) -void GenerateFilesAndRequest(Env* env, const std::string& fname, - std::vector* ret_reqs, - std::vector* scratches) { - const size_t kTotalSize = 81920; - Random rnd(301); - std::string expected_data = rnd.RandomString(kTotalSize); - - // Create file. - { - std::unique_ptr wfile; - ASSERT_OK(env->NewWritableFile(fname, &wfile, EnvOptions())); - ASSERT_OK(wfile->Append(expected_data)); - ASSERT_OK(wfile->Close()); - } - - // Right now kIoUringDepth is hard coded as 256, so we need very large - // number of keys to cover the case of multiple rounds of submissions. - // Right now the test latency is still acceptable. If it ends up with - // too long, we can modify the io uring depth with SyncPoint here. - const int num_reads = 3; - std::vector offsets = {10000, 20000, 30000}; - std::vector lens = {3000, 200, 100}; - - // Create requests - scratches->reserve(num_reads); - std::vector& reqs = *ret_reqs; - reqs.resize(num_reads); - for (int i = 0; i < num_reads; ++i) { - reqs[i].offset = offsets[i]; - reqs[i].len = lens[i]; - scratches->emplace_back(reqs[i].len, ' '); - reqs[i].scratch = const_cast(scratches->back().data()); - } -} - -TEST_F(EnvPosixTest, MultiReadIOUringError) { - // In this test we don't do aligned read, so we can't do direct I/O. - EnvOptions soptions; - soptions.use_direct_reads = soptions.use_direct_writes = false; - std::string fname = test::PerThreadDBPath(env_, "testfile"); - - std::vector scratches; - std::vector reqs; - GenerateFilesAndRequest(env_, fname, &reqs, &scratches); - // Query the data - std::unique_ptr file; - ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); - - bool io_uring_wait_cqe_called = false; - SyncPoint::GetInstance()->SetCallBack( - "PosixRandomAccessFile::MultiRead:io_uring_wait_cqe:return", - [&](void* arg) { - if (!io_uring_wait_cqe_called) { - io_uring_wait_cqe_called = true; - ssize_t& ret = *(static_cast(arg)); - ret = 1; - } - }); - SyncPoint::GetInstance()->EnableProcessing(); - - Status s = file->MultiRead(reqs.data(), reqs.size()); - if (io_uring_wait_cqe_called) { - ASSERT_NOK(s); - } else { - s.PermitUncheckedError(); - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} - -TEST_F(EnvPosixTest, MultiReadIOUringError2) { - // In this test we don't do aligned read, so we can't do direct I/O. - EnvOptions soptions; - soptions.use_direct_reads = soptions.use_direct_writes = false; - std::string fname = test::PerThreadDBPath(env_, "testfile"); - - std::vector scratches; - std::vector reqs; - GenerateFilesAndRequest(env_, fname, &reqs, &scratches); - // Query the data - std::unique_ptr file; - ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); - - bool io_uring_submit_and_wait_called = false; - SyncPoint::GetInstance()->SetCallBack( - "PosixRandomAccessFile::MultiRead:io_uring_submit_and_wait:return1", - [&](void* arg) { - io_uring_submit_and_wait_called = true; - ssize_t* ret = static_cast(arg); - (*ret)--; - }); - SyncPoint::GetInstance()->SetCallBack( - "PosixRandomAccessFile::MultiRead:io_uring_submit_and_wait:return2", - [&](void* arg) { - struct io_uring* iu = static_cast(arg); - struct io_uring_cqe* cqe; - assert(io_uring_wait_cqe(iu, &cqe) == 0); - io_uring_cqe_seen(iu, cqe); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - Status s = file->MultiRead(reqs.data(), reqs.size()); - if (io_uring_submit_and_wait_called) { - ASSERT_NOK(s); - } else { - s.PermitUncheckedError(); - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); -} -#endif // ROCKSDB_IOURING_PRESENT - -// Only works in linux platforms -#ifdef OS_WIN -TEST_P(EnvPosixTestWithParam, DISABLED_InvalidateCache) { -#else -TEST_P(EnvPosixTestWithParam, InvalidateCache) { -#endif - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - EnvOptions soptions; - soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; - std::string fname = test::PerThreadDBPath(env_, "testfile"); - - const size_t kSectorSize = 512; - auto data = NewAligned(kSectorSize, 0); - Slice slice(data.get(), kSectorSize); - - // Create file. - { - std::unique_ptr wfile; -#if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && \ - !defined(OS_AIX) - if (soptions.use_direct_writes) { - soptions.use_direct_writes = false; - } -#endif - ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); - ASSERT_OK(wfile->Append(slice)); - ASSERT_OK(wfile->InvalidateCache(0, 0)); - ASSERT_OK(wfile->Close()); - } - - // Random Read - { - std::unique_ptr file; - auto scratch = NewAligned(kSectorSize, 0); - Slice result; -#if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && \ - !defined(OS_AIX) - if (soptions.use_direct_reads) { - soptions.use_direct_reads = false; - } -#endif - ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); - ASSERT_OK(file->Read(0, kSectorSize, &result, scratch.get())); - ASSERT_EQ(memcmp(scratch.get(), data.get(), kSectorSize), 0); - ASSERT_OK(file->InvalidateCache(0, 11)); - ASSERT_OK(file->InvalidateCache(0, 0)); - } - - // Sequential Read - { - std::unique_ptr file; - auto scratch = NewAligned(kSectorSize, 0); - Slice result; -#if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && \ - !defined(OS_AIX) - if (soptions.use_direct_reads) { - soptions.use_direct_reads = false; - } -#endif - ASSERT_OK(env_->NewSequentialFile(fname, &file, soptions)); - if (file->use_direct_io()) { - ASSERT_OK(file->PositionedRead(0, kSectorSize, &result, scratch.get())); - } else { - ASSERT_OK(file->Read(kSectorSize, &result, scratch.get())); - } - ASSERT_EQ(memcmp(scratch.get(), data.get(), kSectorSize), 0); - ASSERT_OK(file->InvalidateCache(0, 11)); - ASSERT_OK(file->InvalidateCache(0, 0)); - } - // Delete the file - ASSERT_OK(env_->DeleteFile(fname)); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); -} -#endif // OS_LINUX || OS_WIN - -class TestLogger : public Logger { - public: - using Logger::Logv; - void Logv(const char* format, va_list ap) override { - log_count++; - - char new_format[550]; - std::fill_n(new_format, sizeof(new_format), '2'); - { - va_list backup_ap; - va_copy(backup_ap, ap); - int n = vsnprintf(new_format, sizeof(new_format) - 1, format, backup_ap); - // 48 bytes for extra information + bytes allocated - -// When we have n == -1 there is not a terminating zero expected -#ifdef OS_WIN - if (n < 0) { - char_0_count++; - } -#endif - - if (new_format[0] == '[') { - // "[DEBUG] " - ASSERT_TRUE(n <= 56 + (512 - static_cast(sizeof(port::TimeVal)))); - } else { - ASSERT_TRUE(n <= 48 + (512 - static_cast(sizeof(port::TimeVal)))); - } - va_end(backup_ap); - } - - for (size_t i = 0; i < sizeof(new_format); i++) { - if (new_format[i] == 'x') { - char_x_count++; - } else if (new_format[i] == '\0') { - char_0_count++; - } - } - } - int log_count; - int char_x_count; - int char_0_count; -}; - -TEST_P(EnvPosixTestWithParam, LogBufferTest) { - TestLogger test_logger; - test_logger.SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); - test_logger.log_count = 0; - test_logger.char_x_count = 0; - test_logger.char_0_count = 0; - LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, &test_logger); - LogBuffer log_buffer_debug(DEBUG_LEVEL, &test_logger); - - char bytes200[200]; - std::fill_n(bytes200, sizeof(bytes200), '1'); - bytes200[sizeof(bytes200) - 1] = '\0'; - char bytes600[600]; - std::fill_n(bytes600, sizeof(bytes600), '1'); - bytes600[sizeof(bytes600) - 1] = '\0'; - char bytes9000[9000]; - std::fill_n(bytes9000, sizeof(bytes9000), '1'); - bytes9000[sizeof(bytes9000) - 1] = '\0'; - - ROCKS_LOG_BUFFER(&log_buffer, "x%sx", bytes200); - ROCKS_LOG_BUFFER(&log_buffer, "x%sx", bytes600); - ROCKS_LOG_BUFFER(&log_buffer, "x%sx%sx%sx", bytes200, bytes200, bytes200); - ROCKS_LOG_BUFFER(&log_buffer, "x%sx%sx", bytes200, bytes600); - ROCKS_LOG_BUFFER(&log_buffer, "x%sx%sx", bytes600, bytes9000); - - ROCKS_LOG_BUFFER(&log_buffer_debug, "x%sx", bytes200); - test_logger.SetInfoLogLevel(DEBUG_LEVEL); - ROCKS_LOG_BUFFER(&log_buffer_debug, "x%sx%sx%sx", bytes600, bytes9000, - bytes200); - - ASSERT_EQ(0, test_logger.log_count); - log_buffer.FlushBufferToLog(); - log_buffer_debug.FlushBufferToLog(); - ASSERT_EQ(6, test_logger.log_count); - ASSERT_EQ(6, test_logger.char_0_count); - ASSERT_EQ(10, test_logger.char_x_count); -} - -class TestLogger2 : public Logger { - public: - explicit TestLogger2(size_t max_log_size) : max_log_size_(max_log_size) {} - using Logger::Logv; - void Logv(const char* format, va_list ap) override { - char new_format[2000]; - std::fill_n(new_format, sizeof(new_format), '2'); - { - va_list backup_ap; - va_copy(backup_ap, ap); - int n = vsnprintf(new_format, sizeof(new_format) - 1, format, backup_ap); - // 48 bytes for extra information + bytes allocated - ASSERT_TRUE(n <= - 48 + static_cast(max_log_size_ - sizeof(port::TimeVal))); - ASSERT_TRUE(n > static_cast(max_log_size_ - sizeof(port::TimeVal))); - va_end(backup_ap); - } - } - size_t max_log_size_; -}; - -TEST_P(EnvPosixTestWithParam, LogBufferMaxSizeTest) { - char bytes9000[9000]; - std::fill_n(bytes9000, sizeof(bytes9000), '1'); - bytes9000[sizeof(bytes9000) - 1] = '\0'; - - for (size_t max_log_size = 256; max_log_size <= 1024; - max_log_size += 1024 - 256) { - TestLogger2 test_logger(max_log_size); - test_logger.SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); - LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, &test_logger); - ROCKS_LOG_BUFFER_MAX_SZ(&log_buffer, max_log_size, "%s", bytes9000); - log_buffer.FlushBufferToLog(); - } -} - -TEST_P(EnvPosixTestWithParam, Preallocation) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - const std::string src = test::PerThreadDBPath(env_, "testfile"); - std::unique_ptr srcfile; - EnvOptions soptions; - soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; -#if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && \ - !defined(OS_AIX) && !defined(OS_OPENBSD) && !defined(OS_FREEBSD) - if (soptions.use_direct_writes) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "NewWritableFile:O_DIRECT", [&](void* arg) { - int* val = static_cast(arg); - *val &= ~O_DIRECT; - }); - } -#endif - ASSERT_OK(env_->NewWritableFile(src, &srcfile, soptions)); - srcfile->SetPreallocationBlockSize(1024 * 1024); - - // No writes should mean no preallocation - size_t block_size, last_allocated_block; - srcfile->GetPreallocationStatus(&block_size, &last_allocated_block); - ASSERT_EQ(last_allocated_block, 0UL); - - // Small write should preallocate one block - size_t kStrSize = 4096; - auto data = NewAligned(kStrSize, 'A'); - Slice str(data.get(), kStrSize); - srcfile->PrepareWrite(srcfile->GetFileSize(), kStrSize); - ASSERT_OK(srcfile->Append(str)); - srcfile->GetPreallocationStatus(&block_size, &last_allocated_block); - ASSERT_EQ(last_allocated_block, 1UL); - - // Write an entire preallocation block, make sure we increased by two. - { - auto buf_ptr = NewAligned(block_size, ' '); - Slice buf(buf_ptr.get(), block_size); - srcfile->PrepareWrite(srcfile->GetFileSize(), block_size); - ASSERT_OK(srcfile->Append(buf)); - srcfile->GetPreallocationStatus(&block_size, &last_allocated_block); - ASSERT_EQ(last_allocated_block, 2UL); - } - - // Write five more blocks at once, ensure we're where we need to be. - { - auto buf_ptr = NewAligned(block_size * 5, ' '); - Slice buf = Slice(buf_ptr.get(), block_size * 5); - srcfile->PrepareWrite(srcfile->GetFileSize(), buf.size()); - ASSERT_OK(srcfile->Append(buf)); - srcfile->GetPreallocationStatus(&block_size, &last_allocated_block); - ASSERT_EQ(last_allocated_block, 7UL); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); -} - -// Test that the two ways to get children file attributes (in bulk or -// individually) behave consistently. -TEST_P(EnvPosixTestWithParam, ConsistentChildrenAttributes) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - EnvOptions soptions; - soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; - const int kNumChildren = 10; - - std::string data; - std::string test_base_dir = test::PerThreadDBPath(env_, "env_test_chr_attr"); - env_->CreateDir(test_base_dir).PermitUncheckedError(); - for (int i = 0; i < kNumChildren; ++i) { - const std::string path = test_base_dir + "/testfile_" + std::to_string(i); - std::unique_ptr file; -#if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && \ - !defined(OS_AIX) && !defined(OS_OPENBSD) && !defined(OS_FREEBSD) - if (soptions.use_direct_writes) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "NewWritableFile:O_DIRECT", [&](void* arg) { - int* val = static_cast(arg); - *val &= ~O_DIRECT; - }); - } -#endif - ASSERT_OK(env_->NewWritableFile(path, &file, soptions)); - auto buf_ptr = NewAligned(data.size(), 'T'); - Slice buf(buf_ptr.get(), data.size()); - ASSERT_OK(file->Append(buf)); - data.append(std::string(4096, 'T')); - } - - std::vector file_attrs; - ASSERT_OK(env_->GetChildrenFileAttributes(test_base_dir, &file_attrs)); - for (int i = 0; i < kNumChildren; ++i) { - const std::string name = "testfile_" + std::to_string(i); - const std::string path = test_base_dir + "/" + name; - - auto file_attrs_iter = std::find_if( - file_attrs.begin(), file_attrs.end(), - [&name](const Env::FileAttributes& fm) { return fm.name == name; }); - ASSERT_TRUE(file_attrs_iter != file_attrs.end()); - uint64_t size; - ASSERT_OK(env_->GetFileSize(path, &size)); - ASSERT_EQ(size, 4096 * i); - ASSERT_EQ(size, file_attrs_iter->size_bytes); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); -} - -// Test that all WritableFileWrapper forwards all calls to WritableFile. -TEST_P(EnvPosixTestWithParam, WritableFileWrapper) { - class Base : public WritableFile { - public: - mutable int* step_; - - void inc(int x) const { EXPECT_EQ(x, (*step_)++); } - - explicit Base(int* step) : step_(step) { inc(0); } - - Status Append(const Slice& /*data*/) override { - inc(1); - return Status::OK(); - } - - Status Append( - const Slice& /*data*/, - const DataVerificationInfo& /* verification_info */) override { - inc(1); - return Status::OK(); - } - - Status PositionedAppend(const Slice& /*data*/, - uint64_t /*offset*/) override { - inc(2); - return Status::OK(); - } - - Status PositionedAppend( - const Slice& /*data*/, uint64_t /*offset*/, - const DataVerificationInfo& /* verification_info */) override { - inc(2); - return Status::OK(); - } - - Status Truncate(uint64_t /*size*/) override { - inc(3); - return Status::OK(); - } - - Status Close() override { - inc(4); - return Status::OK(); - } - - Status Flush() override { - inc(5); - return Status::OK(); - } - - Status Sync() override { - inc(6); - return Status::OK(); - } - - Status Fsync() override { - inc(7); - return Status::OK(); - } - - bool IsSyncThreadSafe() const override { - inc(8); - return true; - } - - bool use_direct_io() const override { - inc(9); - return true; - } - - size_t GetRequiredBufferAlignment() const override { - inc(10); - return 0; - } - - void SetIOPriority(Env::IOPriority /*pri*/) override { inc(11); } - - Env::IOPriority GetIOPriority() override { - inc(12); - return Env::IOPriority::IO_LOW; - } - - void SetWriteLifeTimeHint(Env::WriteLifeTimeHint /*hint*/) override { - inc(13); - } - - Env::WriteLifeTimeHint GetWriteLifeTimeHint() override { - inc(14); - return Env::WriteLifeTimeHint::WLTH_NOT_SET; - } - - uint64_t GetFileSize() override { - inc(15); - return 0; - } - - void SetPreallocationBlockSize(size_t /*size*/) override { inc(16); } - - void GetPreallocationStatus(size_t* /*block_size*/, - size_t* /*last_allocated_block*/) override { - inc(17); - } - - size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const override { - inc(18); - return 0; - } - - Status InvalidateCache(size_t /*offset*/, size_t /*length*/) override { - inc(19); - return Status::OK(); - } - - Status RangeSync(uint64_t /*offset*/, uint64_t /*nbytes*/) override { - inc(20); - return Status::OK(); - } - - void PrepareWrite(size_t /*offset*/, size_t /*len*/) override { inc(21); } - - Status Allocate(uint64_t /*offset*/, uint64_t /*len*/) override { - inc(22); - return Status::OK(); - } - - public: - ~Base() override { inc(23); } - }; - - class Wrapper : public WritableFileWrapper { - public: - explicit Wrapper(WritableFile* target) : WritableFileWrapper(target) {} - }; - - int step = 0; - - { - Base b(&step); - Wrapper w(&b); - ASSERT_OK(w.Append(Slice())); - ASSERT_OK(w.PositionedAppend(Slice(), 0)); - ASSERT_OK(w.Truncate(0)); - ASSERT_OK(w.Close()); - ASSERT_OK(w.Flush()); - ASSERT_OK(w.Sync()); - ASSERT_OK(w.Fsync()); - w.IsSyncThreadSafe(); - w.use_direct_io(); - w.GetRequiredBufferAlignment(); - w.SetIOPriority(Env::IOPriority::IO_HIGH); - w.GetIOPriority(); - w.SetWriteLifeTimeHint(Env::WriteLifeTimeHint::WLTH_NOT_SET); - w.GetWriteLifeTimeHint(); - w.GetFileSize(); - w.SetPreallocationBlockSize(0); - w.GetPreallocationStatus(nullptr, nullptr); - w.GetUniqueId(nullptr, 0); - ASSERT_OK(w.InvalidateCache(0, 0)); - ASSERT_OK(w.RangeSync(0, 0)); - w.PrepareWrite(0, 0); - ASSERT_OK(w.Allocate(0, 0)); - } - - EXPECT_EQ(24, step); -} - -TEST_P(EnvPosixTestWithParam, PosixRandomRWFile) { - const std::string path = test::PerThreadDBPath(env_, "random_rw_file"); - - env_->DeleteFile(path).PermitUncheckedError(); - - std::unique_ptr file; - - // Cannot open non-existing file. - ASSERT_NOK(env_->NewRandomRWFile(path, &file, EnvOptions())); - - // Create the file using WritableFile - { - std::unique_ptr wf; - ASSERT_OK(env_->NewWritableFile(path, &wf, EnvOptions())); - } - - ASSERT_OK(env_->NewRandomRWFile(path, &file, EnvOptions())); - - char buf[10000]; - Slice read_res; - - ASSERT_OK(file->Write(0, "ABCD")); - ASSERT_OK(file->Read(0, 10, &read_res, buf)); - ASSERT_EQ(read_res.ToString(), "ABCD"); - - ASSERT_OK(file->Write(2, "XXXX")); - ASSERT_OK(file->Read(0, 10, &read_res, buf)); - ASSERT_EQ(read_res.ToString(), "ABXXXX"); - - ASSERT_OK(file->Write(10, "ZZZ")); - ASSERT_OK(file->Read(10, 10, &read_res, buf)); - ASSERT_EQ(read_res.ToString(), "ZZZ"); - - ASSERT_OK(file->Write(11, "Y")); - ASSERT_OK(file->Read(10, 10, &read_res, buf)); - ASSERT_EQ(read_res.ToString(), "ZYZ"); - - ASSERT_OK(file->Write(200, "FFFFF")); - ASSERT_OK(file->Read(200, 10, &read_res, buf)); - ASSERT_EQ(read_res.ToString(), "FFFFF"); - - ASSERT_OK(file->Write(205, "XXXX")); - ASSERT_OK(file->Read(200, 10, &read_res, buf)); - ASSERT_EQ(read_res.ToString(), "FFFFFXXXX"); - - ASSERT_OK(file->Write(5, "QQQQ")); - ASSERT_OK(file->Read(0, 9, &read_res, buf)); - ASSERT_EQ(read_res.ToString(), "ABXXXQQQQ"); - - ASSERT_OK(file->Read(2, 4, &read_res, buf)); - ASSERT_EQ(read_res.ToString(), "XXXQ"); - - // Close file and reopen it - ASSERT_OK(file->Close()); - ASSERT_OK(env_->NewRandomRWFile(path, &file, EnvOptions())); - - ASSERT_OK(file->Read(0, 9, &read_res, buf)); - ASSERT_EQ(read_res.ToString(), "ABXXXQQQQ"); - - ASSERT_OK(file->Read(10, 3, &read_res, buf)); - ASSERT_EQ(read_res.ToString(), "ZYZ"); - - ASSERT_OK(file->Read(200, 9, &read_res, buf)); - ASSERT_EQ(read_res.ToString(), "FFFFFXXXX"); - - ASSERT_OK(file->Write(4, "TTTTTTTTTTTTTTTT")); - ASSERT_OK(file->Read(0, 10, &read_res, buf)); - ASSERT_EQ(read_res.ToString(), "ABXXTTTTTT"); - - // Clean up - ASSERT_OK(env_->DeleteFile(path)); -} - -class RandomRWFileWithMirrorString { - public: - explicit RandomRWFileWithMirrorString(RandomRWFile* _file) : file_(_file) {} - - void Write(size_t offset, const std::string& data) { - // Write to mirror string - StringWrite(offset, data); - - // Write to file - Status s = file_->Write(offset, data); - ASSERT_OK(s) << s.ToString(); - } - - void Read(size_t offset = 0, size_t n = 1000000) { - Slice str_res(nullptr, 0); - if (offset < file_mirror_.size()) { - size_t str_res_sz = std::min(file_mirror_.size() - offset, n); - str_res = Slice(file_mirror_.data() + offset, str_res_sz); - StopSliceAtNull(&str_res); - } - - Slice file_res; - Status s = file_->Read(offset, n, &file_res, buf_); - ASSERT_OK(s) << s.ToString(); - StopSliceAtNull(&file_res); - - ASSERT_EQ(str_res.ToString(), file_res.ToString()) << offset << " " << n; - } - - void SetFile(RandomRWFile* _file) { file_ = _file; } - - private: - void StringWrite(size_t offset, const std::string& src) { - if (offset + src.size() > file_mirror_.size()) { - file_mirror_.resize(offset + src.size(), '\0'); - } - - char* pos = const_cast(file_mirror_.data() + offset); - memcpy(pos, src.data(), src.size()); - } - - void StopSliceAtNull(Slice* slc) { - for (size_t i = 0; i < slc->size(); i++) { - if ((*slc)[i] == '\0') { - *slc = Slice(slc->data(), i); - break; - } - } - } - - char buf_[10000]; - RandomRWFile* file_; - std::string file_mirror_; -}; - -TEST_P(EnvPosixTestWithParam, PosixRandomRWFileRandomized) { - const std::string path = test::PerThreadDBPath(env_, "random_rw_file_rand"); - env_->DeleteFile(path).PermitUncheckedError(); - - std::unique_ptr file; - -#ifdef OS_LINUX - // Cannot open non-existing file. - ASSERT_NOK(env_->NewRandomRWFile(path, &file, EnvOptions())); -#endif - - // Create the file using WritableFile - { - std::unique_ptr wf; - ASSERT_OK(env_->NewWritableFile(path, &wf, EnvOptions())); - } - - ASSERT_OK(env_->NewRandomRWFile(path, &file, EnvOptions())); - RandomRWFileWithMirrorString file_with_mirror(file.get()); - - Random rnd(301); - std::string buf; - for (int i = 0; i < 10000; i++) { - // Genrate random data - buf = rnd.RandomString(10); - - // Pick random offset for write - size_t write_off = rnd.Next() % 1000; - file_with_mirror.Write(write_off, buf); - - // Pick random offset for read - size_t read_off = rnd.Next() % 1000; - size_t read_sz = rnd.Next() % 20; - file_with_mirror.Read(read_off, read_sz); - - if (i % 500 == 0) { - // Reopen the file every 500 iters - ASSERT_OK(env_->NewRandomRWFile(path, &file, EnvOptions())); - file_with_mirror.SetFile(file.get()); - } - } - - // clean up - ASSERT_OK(env_->DeleteFile(path)); -} - -class TestEnv : public EnvWrapper { - public: - explicit TestEnv() : EnvWrapper(Env::Default()), close_count(0) {} - const char* Name() const override { return "TestEnv"; } - class TestLogger : public Logger { - public: - using Logger::Logv; - explicit TestLogger(TestEnv* env_ptr) : Logger() { env = env_ptr; } - ~TestLogger() override { - if (!closed_) { - Status s = CloseHelper(); - s.PermitUncheckedError(); - } - } - void Logv(const char* /*format*/, va_list /*ap*/) override {} - - protected: - Status CloseImpl() override { return CloseHelper(); } - - private: - Status CloseHelper() { - env->CloseCountInc(); - return Status::OK(); - } - TestEnv* env; - }; - - void CloseCountInc() { close_count++; } - - int GetCloseCount() { return close_count; } - - Status NewLogger(const std::string& /*fname*/, - std::shared_ptr* result) override { - result->reset(new TestLogger(this)); - return Status::OK(); - } - - private: - int close_count; -}; - -class EnvTest : public testing::Test { - public: - EnvTest() : test_directory_(test::PerThreadDBPath("env_test")) {} - - protected: - const std::string test_directory_; -}; - -TEST_F(EnvTest, Close) { - TestEnv* env = new TestEnv(); - std::shared_ptr logger; - Status s; - - s = env->NewLogger("", &logger); - ASSERT_OK(s); - ASSERT_OK(logger.get()->Close()); - ASSERT_EQ(env->GetCloseCount(), 1); - // Call Close() again. CloseHelper() should not be called again - ASSERT_OK(logger.get()->Close()); - ASSERT_EQ(env->GetCloseCount(), 1); - logger.reset(); - ASSERT_EQ(env->GetCloseCount(), 1); - - s = env->NewLogger("", &logger); - ASSERT_OK(s); - logger.reset(); - ASSERT_EQ(env->GetCloseCount(), 2); - - delete env; -} - -class LogvWithInfoLogLevelLogger : public Logger { - public: - using Logger::Logv; - void Logv(const InfoLogLevel /* log_level */, const char* /* format */, - va_list /* ap */) override {} -}; - -TEST_F(EnvTest, LogvWithInfoLogLevel) { - // Verifies the log functions work on a `Logger` that only overrides the - // `Logv()` overload including `InfoLogLevel`. - const std::string kSampleMessage("sample log message"); - LogvWithInfoLogLevelLogger logger; - ROCKS_LOG_HEADER(&logger, "%s", kSampleMessage.c_str()); - ROCKS_LOG_DEBUG(&logger, "%s", kSampleMessage.c_str()); - ROCKS_LOG_INFO(&logger, "%s", kSampleMessage.c_str()); - ROCKS_LOG_WARN(&logger, "%s", kSampleMessage.c_str()); - ROCKS_LOG_ERROR(&logger, "%s", kSampleMessage.c_str()); - ROCKS_LOG_FATAL(&logger, "%s", kSampleMessage.c_str()); -} - -INSTANTIATE_TEST_CASE_P(DefaultEnvWithoutDirectIO, EnvPosixTestWithParam, - ::testing::Values(std::pair(Env::Default(), - false))); -INSTANTIATE_TEST_CASE_P(DefaultEnvWithDirectIO, EnvPosixTestWithParam, - ::testing::Values(std::pair(Env::Default(), - true))); - -#if !defined(OS_WIN) -static Env* GetChrootEnv() { - static std::unique_ptr chroot_env( - NewChrootEnv(Env::Default(), test::TmpDir(Env::Default()))); - return chroot_env.get(); -} -INSTANTIATE_TEST_CASE_P(ChrootEnvWithoutDirectIO, EnvPosixTestWithParam, - ::testing::Values(std::pair(GetChrootEnv(), - false))); -INSTANTIATE_TEST_CASE_P(ChrootEnvWithDirectIO, EnvPosixTestWithParam, - ::testing::Values(std::pair(GetChrootEnv(), - true))); -#endif // !defined(OS_WIN) - -class EnvFSTestWithParam - : public ::testing::Test, - public ::testing::WithParamInterface> { - public: - EnvFSTestWithParam() { - bool env_non_null = std::get<0>(GetParam()); - bool env_default = std::get<1>(GetParam()); - bool fs_default = std::get<2>(GetParam()); - - env_ = env_non_null ? (env_default ? Env::Default() : nullptr) : nullptr; - fs_ = fs_default - ? FileSystem::Default() - : std::make_shared(FileSystem::Default()); - if (env_non_null && env_default && !fs_default) { - env_ptr_ = NewCompositeEnv(fs_); - } - if (env_non_null && !env_default && fs_default) { - env_ptr_ = - std::unique_ptr(new FaultInjectionTestEnv(Env::Default())); - fs_.reset(); - } - if (env_non_null && !env_default && !fs_default) { - env_ptr_.reset(new FaultInjectionTestEnv(Env::Default())); - composite_env_ptr_.reset(new CompositeEnvWrapper(env_ptr_.get(), fs_)); - env_ = composite_env_ptr_.get(); - } else { - env_ = env_ptr_.get(); - } - - dbname1_ = test::PerThreadDBPath("env_fs_test1"); - dbname2_ = test::PerThreadDBPath("env_fs_test2"); - } - - ~EnvFSTestWithParam() = default; - - Env* env_; - std::unique_ptr env_ptr_; - std::unique_ptr composite_env_ptr_; - std::shared_ptr fs_; - std::string dbname1_; - std::string dbname2_; -}; - -TEST_P(EnvFSTestWithParam, OptionsTest) { - Options opts; - opts.env = env_; - opts.create_if_missing = true; - std::string dbname = dbname1_; - - if (env_) { - if (fs_) { - ASSERT_EQ(fs_.get(), env_->GetFileSystem().get()); - } else { - ASSERT_NE(FileSystem::Default().get(), env_->GetFileSystem().get()); - } - } - for (int i = 0; i < 2; ++i) { - DB* db; - Status s = DB::Open(opts, dbname, &db); - ASSERT_OK(s); - - WriteOptions wo; - ASSERT_OK(db->Put(wo, "a", "a")); - ASSERT_OK(db->Flush(FlushOptions())); - ASSERT_OK(db->Put(wo, "b", "b")); - ASSERT_OK(db->Flush(FlushOptions())); - ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - std::string val; - ASSERT_OK(db->Get(ReadOptions(), "a", &val)); - ASSERT_EQ("a", val); - ASSERT_OK(db->Get(ReadOptions(), "b", &val)); - ASSERT_EQ("b", val); - - ASSERT_OK(db->Close()); - delete db; - ASSERT_OK(DestroyDB(dbname, opts)); - - dbname = dbname2_; - } -} - -// The parameters are as follows - -// 1. True means Options::env is non-null, false means null -// 2. True means use Env::Default, false means custom -// 3. True means use FileSystem::Default, false means custom -INSTANTIATE_TEST_CASE_P(EnvFSTest, EnvFSTestWithParam, - ::testing::Combine(::testing::Bool(), ::testing::Bool(), - ::testing::Bool())); -// This test ensures that default Env and those allocated by -// NewCompositeEnv() all share the same threadpool -TEST_F(EnvTest, MultipleCompositeEnv) { - std::shared_ptr fs1 = - std::make_shared(FileSystem::Default()); - std::shared_ptr fs2 = - std::make_shared(FileSystem::Default()); - std::unique_ptr env1 = NewCompositeEnv(fs1); - std::unique_ptr env2 = NewCompositeEnv(fs2); - Env::Default()->SetBackgroundThreads(8, Env::HIGH); - Env::Default()->SetBackgroundThreads(16, Env::LOW); - ASSERT_EQ(env1->GetBackgroundThreads(Env::LOW), 16); - ASSERT_EQ(env1->GetBackgroundThreads(Env::HIGH), 8); - ASSERT_EQ(env2->GetBackgroundThreads(Env::LOW), 16); - ASSERT_EQ(env2->GetBackgroundThreads(Env::HIGH), 8); -} - -TEST_F(EnvTest, IsDirectory) { - Status s = Env::Default()->CreateDirIfMissing(test_directory_); - ASSERT_OK(s); - const std::string test_sub_dir = test_directory_ + "sub1"; - const std::string test_file_path = test_directory_ + "file1"; - ASSERT_OK(Env::Default()->CreateDirIfMissing(test_sub_dir)); - bool is_dir = false; - ASSERT_OK(Env::Default()->IsDirectory(test_sub_dir, &is_dir)); - ASSERT_TRUE(is_dir); - { - std::unique_ptr wfile; - s = Env::Default()->GetFileSystem()->NewWritableFile( - test_file_path, FileOptions(), &wfile, /*dbg=*/nullptr); - ASSERT_OK(s); - std::unique_ptr fwriter; - fwriter.reset(new WritableFileWriter(std::move(wfile), test_file_path, - FileOptions(), - SystemClock::Default().get())); - constexpr char buf[] = "test"; - s = fwriter->Append(buf); - ASSERT_OK(s); - } - ASSERT_OK(Env::Default()->IsDirectory(test_file_path, &is_dir)); - ASSERT_FALSE(is_dir); -} - -TEST_F(EnvTest, EnvWriteVerificationTest) { - Status s = Env::Default()->CreateDirIfMissing(test_directory_); - const std::string test_file_path = test_directory_ + "file1"; - ASSERT_OK(s); - std::shared_ptr fault_fs( - new FaultInjectionTestFS(FileSystem::Default())); - fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); - std::unique_ptr file; - s = fault_fs_env->NewWritableFile(test_file_path, &file, EnvOptions()); - ASSERT_OK(s); - - DataVerificationInfo v_info; - std::string test_data = "test"; - std::string checksum; - uint32_t v_crc32c = crc32c::Extend(0, test_data.c_str(), test_data.size()); - PutFixed32(&checksum, v_crc32c); - v_info.checksum = Slice(checksum); - s = file->Append(Slice(test_data), v_info); - ASSERT_OK(s); -} - -class CreateEnvTest : public testing::Test { - public: - CreateEnvTest() { - config_options_.ignore_unknown_options = false; - config_options_.ignore_unsupported_options = false; - } - ConfigOptions config_options_; -}; - -TEST_F(CreateEnvTest, LoadCTRProvider) { - config_options_.invoke_prepare_options = false; - std::string CTR = CTREncryptionProvider::kClassName(); - std::shared_ptr provider; - // Test a provider with no cipher - ASSERT_OK( - EncryptionProvider::CreateFromString(config_options_, CTR, &provider)); - ASSERT_NE(provider, nullptr); - ASSERT_EQ(provider->Name(), CTR); - ASSERT_NOK(provider->PrepareOptions(config_options_)); - ASSERT_NOK(provider->ValidateOptions(DBOptions(), ColumnFamilyOptions())); - auto cipher = provider->GetOptions>("Cipher"); - ASSERT_NE(cipher, nullptr); - ASSERT_EQ(cipher->get(), nullptr); - provider.reset(); - - ASSERT_OK(EncryptionProvider::CreateFromString(config_options_, - CTR + "://test", &provider)); - ASSERT_NE(provider, nullptr); - ASSERT_EQ(provider->Name(), CTR); - ASSERT_OK(provider->PrepareOptions(config_options_)); - ASSERT_OK(provider->ValidateOptions(DBOptions(), ColumnFamilyOptions())); - cipher = provider->GetOptions>("Cipher"); - ASSERT_NE(cipher, nullptr); - ASSERT_NE(cipher->get(), nullptr); - ASSERT_STREQ(cipher->get()->Name(), "ROT13"); - provider.reset(); - - ASSERT_OK(EncryptionProvider::CreateFromString(config_options_, "1://test", - &provider)); - ASSERT_NE(provider, nullptr); - ASSERT_EQ(provider->Name(), CTR); - ASSERT_OK(provider->PrepareOptions(config_options_)); - ASSERT_OK(provider->ValidateOptions(DBOptions(), ColumnFamilyOptions())); - cipher = provider->GetOptions>("Cipher"); - ASSERT_NE(cipher, nullptr); - ASSERT_NE(cipher->get(), nullptr); - ASSERT_STREQ(cipher->get()->Name(), "ROT13"); - provider.reset(); - - ASSERT_OK(EncryptionProvider::CreateFromString( - config_options_, "id=" + CTR + "; cipher=ROT13", &provider)); - ASSERT_NE(provider, nullptr); - ASSERT_EQ(provider->Name(), CTR); - cipher = provider->GetOptions>("Cipher"); - ASSERT_NE(cipher, nullptr); - ASSERT_NE(cipher->get(), nullptr); - ASSERT_STREQ(cipher->get()->Name(), "ROT13"); - provider.reset(); -} - -TEST_F(CreateEnvTest, LoadROT13Cipher) { - std::shared_ptr cipher; - // Test a provider with no cipher - ASSERT_OK(BlockCipher::CreateFromString(config_options_, "ROT13", &cipher)); - ASSERT_NE(cipher, nullptr); - ASSERT_STREQ(cipher->Name(), "ROT13"); -} - -TEST_F(CreateEnvTest, CreateDefaultSystemClock) { - std::shared_ptr clock, copy; - ASSERT_OK(SystemClock::CreateFromString(config_options_, - SystemClock::kDefaultName(), &clock)); - ASSERT_NE(clock, nullptr); - ASSERT_EQ(clock, SystemClock::Default()); - std::string opts_str = clock->ToString(config_options_); - std::string mismatch; - ASSERT_OK(SystemClock::CreateFromString(config_options_, opts_str, ©)); - ASSERT_TRUE(clock->AreEquivalent(config_options_, copy.get(), &mismatch)); -} - -TEST_F(CreateEnvTest, CreateMockSystemClock) { - std::shared_ptr mock, copy; - - config_options_.registry->AddLibrary("test")->AddFactory( - MockSystemClock::kClassName(), - [](const std::string& /*uri*/, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new MockSystemClock(nullptr)); - return guard->get(); - }); - ASSERT_OK(SystemClock::CreateFromString( - config_options_, EmulatedSystemClock::kClassName(), &mock)); - ASSERT_NE(mock, nullptr); - ASSERT_STREQ(mock->Name(), EmulatedSystemClock::kClassName()); - ASSERT_EQ(mock->Inner(), SystemClock::Default().get()); - std::string opts_str = mock->ToString(config_options_); - std::string mismatch; - ASSERT_OK(SystemClock::CreateFromString(config_options_, opts_str, ©)); - ASSERT_TRUE(mock->AreEquivalent(config_options_, copy.get(), &mismatch)); - - std::string id = std::string("id=") + EmulatedSystemClock::kClassName() + - ";target=" + MockSystemClock::kClassName(); - - ASSERT_OK(SystemClock::CreateFromString(config_options_, id, &mock)); - ASSERT_NE(mock, nullptr); - ASSERT_STREQ(mock->Name(), EmulatedSystemClock::kClassName()); - ASSERT_NE(mock->Inner(), nullptr); - ASSERT_STREQ(mock->Inner()->Name(), MockSystemClock::kClassName()); - ASSERT_EQ(mock->Inner()->Inner(), SystemClock::Default().get()); - opts_str = mock->ToString(config_options_); - ASSERT_OK(SystemClock::CreateFromString(config_options_, opts_str, ©)); - ASSERT_TRUE(mock->AreEquivalent(config_options_, copy.get(), &mismatch)); - ASSERT_OK(SystemClock::CreateFromString( - config_options_, EmulatedSystemClock::kClassName(), &mock)); -} - -TEST_F(CreateEnvTest, CreateReadOnlyFileSystem) { - std::shared_ptr fs, copy; - - ASSERT_OK(FileSystem::CreateFromString( - config_options_, ReadOnlyFileSystem::kClassName(), &fs)); - ASSERT_NE(fs, nullptr); - ASSERT_STREQ(fs->Name(), ReadOnlyFileSystem::kClassName()); - ASSERT_EQ(fs->Inner(), FileSystem::Default().get()); - - std::string opts_str = fs->ToString(config_options_); - std::string mismatch; - - ASSERT_OK(FileSystem::CreateFromString(config_options_, opts_str, ©)); - ASSERT_TRUE(fs->AreEquivalent(config_options_, copy.get(), &mismatch)); - - ASSERT_OK(FileSystem::CreateFromString( - config_options_, - std::string("id=") + ReadOnlyFileSystem::kClassName() + - "; target=" + TimedFileSystem::kClassName(), - &fs)); - ASSERT_NE(fs, nullptr); - opts_str = fs->ToString(config_options_); - ASSERT_STREQ(fs->Name(), ReadOnlyFileSystem::kClassName()); - ASSERT_NE(fs->Inner(), nullptr); - ASSERT_STREQ(fs->Inner()->Name(), TimedFileSystem::kClassName()); - ASSERT_EQ(fs->Inner()->Inner(), FileSystem::Default().get()); - ASSERT_OK(FileSystem::CreateFromString(config_options_, opts_str, ©)); - ASSERT_TRUE(fs->AreEquivalent(config_options_, copy.get(), &mismatch)); -} - -TEST_F(CreateEnvTest, CreateTimedFileSystem) { - std::shared_ptr fs, copy; - - ASSERT_OK(FileSystem::CreateFromString(config_options_, - TimedFileSystem::kClassName(), &fs)); - ASSERT_NE(fs, nullptr); - ASSERT_STREQ(fs->Name(), TimedFileSystem::kClassName()); - ASSERT_EQ(fs->Inner(), FileSystem::Default().get()); - - std::string opts_str = fs->ToString(config_options_); - std::string mismatch; - - ASSERT_OK(FileSystem::CreateFromString(config_options_, opts_str, ©)); - ASSERT_TRUE(fs->AreEquivalent(config_options_, copy.get(), &mismatch)); - - ASSERT_OK(FileSystem::CreateFromString( - config_options_, - std::string("id=") + TimedFileSystem::kClassName() + - "; target=" + ReadOnlyFileSystem::kClassName(), - &fs)); - ASSERT_NE(fs, nullptr); - opts_str = fs->ToString(config_options_); - ASSERT_STREQ(fs->Name(), TimedFileSystem::kClassName()); - ASSERT_NE(fs->Inner(), nullptr); - ASSERT_STREQ(fs->Inner()->Name(), ReadOnlyFileSystem::kClassName()); - ASSERT_EQ(fs->Inner()->Inner(), FileSystem::Default().get()); - ASSERT_OK(FileSystem::CreateFromString(config_options_, opts_str, ©)); - ASSERT_TRUE(fs->AreEquivalent(config_options_, copy.get(), &mismatch)); -} - -TEST_F(CreateEnvTest, CreateCountedFileSystem) { - std::shared_ptr fs, copy; - - ASSERT_OK(FileSystem::CreateFromString(config_options_, - CountedFileSystem::kClassName(), &fs)); - ASSERT_NE(fs, nullptr); - ASSERT_STREQ(fs->Name(), CountedFileSystem::kClassName()); - ASSERT_EQ(fs->Inner(), FileSystem::Default().get()); - - std::string opts_str = fs->ToString(config_options_); - std::string mismatch; - - ASSERT_OK(FileSystem::CreateFromString(config_options_, opts_str, ©)); - ASSERT_TRUE(fs->AreEquivalent(config_options_, copy.get(), &mismatch)); - - ASSERT_OK(FileSystem::CreateFromString( - config_options_, - std::string("id=") + CountedFileSystem::kClassName() + - "; target=" + ReadOnlyFileSystem::kClassName(), - &fs)); - ASSERT_NE(fs, nullptr); - opts_str = fs->ToString(config_options_); - ASSERT_STREQ(fs->Name(), CountedFileSystem::kClassName()); - ASSERT_NE(fs->Inner(), nullptr); - ASSERT_STREQ(fs->Inner()->Name(), ReadOnlyFileSystem::kClassName()); - ASSERT_EQ(fs->Inner()->Inner(), FileSystem::Default().get()); - ASSERT_OK(FileSystem::CreateFromString(config_options_, opts_str, ©)); - ASSERT_TRUE(fs->AreEquivalent(config_options_, copy.get(), &mismatch)); -} - -#ifndef OS_WIN -TEST_F(CreateEnvTest, CreateChrootFileSystem) { - std::shared_ptr fs, copy; - auto tmp_dir = test::TmpDir(Env::Default()); - // The Chroot FileSystem has a required "chroot_dir" option. - ASSERT_NOK(FileSystem::CreateFromString(config_options_, - ChrootFileSystem::kClassName(), &fs)); - - // ChrootFileSystem fails with an invalid directory - ASSERT_NOK(FileSystem::CreateFromString( - config_options_, - std::string("chroot_dir=/No/Such/Directory; id=") + - ChrootFileSystem::kClassName(), - &fs)); - std::string chroot_opts = std::string("chroot_dir=") + tmp_dir + - std::string("; id=") + - ChrootFileSystem::kClassName(); - - // Create a valid ChrootFileSystem with an inner Default - ASSERT_OK(FileSystem::CreateFromString(config_options_, chroot_opts, &fs)); - ASSERT_NE(fs, nullptr); - ASSERT_STREQ(fs->Name(), ChrootFileSystem::kClassName()); - ASSERT_EQ(fs->Inner(), FileSystem::Default().get()); - std::string opts_str = fs->ToString(config_options_); - std::string mismatch; - ASSERT_OK(FileSystem::CreateFromString(config_options_, opts_str, ©)); - ASSERT_TRUE(fs->AreEquivalent(config_options_, copy.get(), &mismatch)); - - // Create a valid ChrootFileSystem with an inner TimedFileSystem - ASSERT_OK(FileSystem::CreateFromString( - config_options_, - chroot_opts + "; target=" + TimedFileSystem::kClassName(), &fs)); - ASSERT_NE(fs, nullptr); - ASSERT_STREQ(fs->Name(), ChrootFileSystem::kClassName()); - ASSERT_NE(fs->Inner(), nullptr); - ASSERT_STREQ(fs->Inner()->Name(), TimedFileSystem::kClassName()); - ASSERT_EQ(fs->Inner()->Inner(), FileSystem::Default().get()); - opts_str = fs->ToString(config_options_); - ASSERT_OK(FileSystem::CreateFromString(config_options_, opts_str, ©)); - ASSERT_TRUE(fs->AreEquivalent(config_options_, copy.get(), &mismatch)); - - // Create a TimedFileSystem with an inner ChrootFileSystem - ASSERT_OK(FileSystem::CreateFromString( - config_options_, - "target={" + chroot_opts + "}; id=" + TimedFileSystem::kClassName(), - &fs)); - ASSERT_NE(fs, nullptr); - ASSERT_STREQ(fs->Name(), TimedFileSystem::kClassName()); - ASSERT_NE(fs->Inner(), nullptr); - ASSERT_STREQ(fs->Inner()->Name(), ChrootFileSystem::kClassName()); - ASSERT_EQ(fs->Inner()->Inner(), FileSystem::Default().get()); - opts_str = fs->ToString(config_options_); - ASSERT_OK(FileSystem::CreateFromString(config_options_, opts_str, ©)); - ASSERT_TRUE(fs->AreEquivalent(config_options_, copy.get(), &mismatch)); -} -#endif // OS_WIN - -TEST_F(CreateEnvTest, CreateEncryptedFileSystem) { - std::shared_ptr fs, copy; - - std::string base_opts = - std::string("provider=1://test; id=") + EncryptedFileSystem::kClassName(); - // The EncryptedFileSystem requires a "provider" option. - ASSERT_NOK(FileSystem::CreateFromString( - config_options_, EncryptedFileSystem::kClassName(), &fs)); - - ASSERT_OK(FileSystem::CreateFromString(config_options_, base_opts, &fs)); - - ASSERT_NE(fs, nullptr); - ASSERT_STREQ(fs->Name(), EncryptedFileSystem::kClassName()); - ASSERT_EQ(fs->Inner(), FileSystem::Default().get()); - std::string opts_str = fs->ToString(config_options_); - std::string mismatch; - ASSERT_OK(FileSystem::CreateFromString(config_options_, opts_str, ©)); - ASSERT_TRUE(fs->AreEquivalent(config_options_, copy.get(), &mismatch)); - ASSERT_OK(FileSystem::CreateFromString( - config_options_, base_opts + "; target=" + TimedFileSystem::kClassName(), - &fs)); - ASSERT_NE(fs, nullptr); - ASSERT_STREQ(fs->Name(), EncryptedFileSystem::kClassName()); - ASSERT_NE(fs->Inner(), nullptr); - ASSERT_STREQ(fs->Inner()->Name(), TimedFileSystem::kClassName()); - ASSERT_EQ(fs->Inner()->Inner(), FileSystem::Default().get()); - opts_str = fs->ToString(config_options_); - ASSERT_OK(FileSystem::CreateFromString(config_options_, opts_str, ©)); - ASSERT_TRUE(fs->AreEquivalent(config_options_, copy.get(), &mismatch)); -} - - -namespace { - -constexpr size_t kThreads = 8; -constexpr size_t kIdsPerThread = 1000; - -// This is a mini-stress test to check for duplicates in functions like -// GenerateUniqueId() -template > -struct NoDuplicateMiniStressTest { - std::unordered_set ids; - std::mutex mutex; - Env* env; - - NoDuplicateMiniStressTest() { env = Env::Default(); } - - virtual ~NoDuplicateMiniStressTest() {} - - void Run() { - std::array threads; - for (size_t i = 0; i < kThreads; ++i) { - threads[i] = std::thread([&]() { ThreadFn(); }); - } - for (auto& thread : threads) { - thread.join(); - } - // All must be unique - ASSERT_EQ(ids.size(), kThreads * kIdsPerThread); - } - - void ThreadFn() { - std::array my_ids; - // Generate in parallel threads as fast as possible - for (size_t i = 0; i < kIdsPerThread; ++i) { - my_ids[i] = Generate(); - } - // Now collate - std::lock_guard lock(mutex); - for (auto& id : my_ids) { - ids.insert(id); - } - } - - virtual IdType Generate() = 0; -}; - -void VerifyRfcUuids(const std::unordered_set& uuids) { - if (uuids.empty()) { - return; - } -} - -using uint64_pair_t = std::pair; -struct HashUint64Pair { - std::size_t operator()( - std::pair const& u) const noexcept { - // Assume suitable distribution already - return static_cast(u.first ^ u.second); - } -}; - -} // namespace - -TEST_F(EnvTest, GenerateUniqueId) { - struct MyStressTest : public NoDuplicateMiniStressTest { - std::string Generate() override { return env->GenerateUniqueId(); } - }; - - MyStressTest t; - t.Run(); - - // Basically verify RFC-4122 format - for (auto& uuid : t.ids) { - ASSERT_EQ(36U, uuid.size()); - ASSERT_EQ('-', uuid[8]); - ASSERT_EQ('-', uuid[13]); - ASSERT_EQ('-', uuid[18]); - ASSERT_EQ('-', uuid[23]); - } -} - -TEST_F(EnvTest, GenerateDbSessionId) { - struct MyStressTest : public NoDuplicateMiniStressTest { - std::string Generate() override { return DBImpl::GenerateDbSessionId(env); } - }; - - MyStressTest t; - t.Run(); - - // Basically verify session ID - for (auto& id : t.ids) { - ASSERT_EQ(20U, id.size()); - } -} - -constexpr bool kRequirePortGenerateRfcUuid = -#if defined(OS_LINUX) || defined(OS_ANDROID) || defined(OS_WIN) - true; -#else - false; -#endif - -TEST_F(EnvTest, PortGenerateRfcUuid) { - if (!kRequirePortGenerateRfcUuid) { - ROCKSDB_GTEST_SKIP("Not supported/expected on this platform"); - return; - } - struct MyStressTest : public NoDuplicateMiniStressTest { - std::string Generate() override { - std::string u; - assert(port::GenerateRfcUuid(&u)); - return u; - } - }; - - MyStressTest t; - t.Run(); - - // Extra verification on versions and variants - VerifyRfcUuids(t.ids); -} - -// Test the atomic, linear generation of GenerateRawUuid -TEST_F(EnvTest, GenerateRawUniqueId) { - struct MyStressTest - : public NoDuplicateMiniStressTest { - uint64_pair_t Generate() override { - uint64_pair_t p; - GenerateRawUniqueId(&p.first, &p.second); - return p; - } - }; - - MyStressTest t; - t.Run(); -} - -// Test that each entropy source ("track") is at least adequate -TEST_F(EnvTest, GenerateRawUniqueIdTrackPortUuidOnly) { - if (!kRequirePortGenerateRfcUuid) { - ROCKSDB_GTEST_SKIP("Not supported/expected on this platform"); - return; - } - - struct MyStressTest - : public NoDuplicateMiniStressTest { - uint64_pair_t Generate() override { - uint64_pair_t p; - TEST_GenerateRawUniqueId(&p.first, &p.second, false, true, true); - return p; - } - }; - - MyStressTest t; - t.Run(); -} - -TEST_F(EnvTest, GenerateRawUniqueIdTrackEnvDetailsOnly) { - struct MyStressTest - : public NoDuplicateMiniStressTest { - uint64_pair_t Generate() override { - uint64_pair_t p; - TEST_GenerateRawUniqueId(&p.first, &p.second, true, false, true); - return p; - } - }; - - MyStressTest t; - t.Run(); -} - -TEST_F(EnvTest, GenerateRawUniqueIdTrackRandomDeviceOnly) { - struct MyStressTest - : public NoDuplicateMiniStressTest { - uint64_pair_t Generate() override { - uint64_pair_t p; - TEST_GenerateRawUniqueId(&p.first, &p.second, true, true, false); - return p; - } - }; - - MyStressTest t; - t.Run(); -} - -TEST_F(EnvTest, SemiStructuredUniqueIdGenTest) { - // Must be thread safe and usable as a static - static SemiStructuredUniqueIdGen gen; - - struct MyStressTest - : public NoDuplicateMiniStressTest { - uint64_pair_t Generate() override { - uint64_pair_t p; - gen.GenerateNext(&p.first, &p.second); - return p; - } - }; - - MyStressTest t; - t.Run(); -} - -TEST_F(EnvTest, FailureToCreateLockFile) { - auto env = Env::Default(); - auto fs = env->GetFileSystem(); - std::string dir = test::PerThreadDBPath(env, "lockdir"); - std::string file = dir + "/lockfile"; - - // Ensure directory doesn't exist - ASSERT_OK(DestroyDir(env, dir)); - - // Make sure that we can acquire a file lock after the first attempt fails - FileLock* lock = nullptr; - ASSERT_NOK(fs->LockFile(file, IOOptions(), &lock, /*dbg*/ nullptr)); - ASSERT_FALSE(lock); - - ASSERT_OK(fs->CreateDir(dir, IOOptions(), /*dbg*/ nullptr)); - ASSERT_OK(fs->LockFile(file, IOOptions(), &lock, /*dbg*/ nullptr)); - ASSERT_OK(fs->UnlockFile(lock, IOOptions(), /*dbg*/ nullptr)); - - // Clean up - ASSERT_OK(DestroyDir(env, dir)); -} - -TEST_F(CreateEnvTest, CreateDefaultEnv) { - ConfigOptions options; - options.ignore_unsupported_options = false; - - std::shared_ptr guard; - Env* env = nullptr; - ASSERT_OK(Env::CreateFromString(options, "", &env)); - ASSERT_EQ(env, Env::Default()); - - env = nullptr; - ASSERT_OK(Env::CreateFromString(options, Env::kDefaultName(), &env)); - ASSERT_EQ(env, Env::Default()); - - env = nullptr; - ASSERT_OK(Env::CreateFromString(options, "", &env, &guard)); - ASSERT_EQ(env, Env::Default()); - ASSERT_EQ(guard, nullptr); - - env = nullptr; - ASSERT_OK(Env::CreateFromString(options, Env::kDefaultName(), &env, &guard)); - ASSERT_EQ(env, Env::Default()); - ASSERT_EQ(guard, nullptr); - - std::string opt_str = env->ToString(options); - ASSERT_OK(Env::CreateFromString(options, opt_str, &env)); - ASSERT_EQ(env, Env::Default()); - ASSERT_OK(Env::CreateFromString(options, opt_str, &env, &guard)); - ASSERT_EQ(env, Env::Default()); - ASSERT_EQ(guard, nullptr); -} - -namespace { -class WrappedEnv : public EnvWrapper { - public: - explicit WrappedEnv(Env* t) : EnvWrapper(t) {} - explicit WrappedEnv(const std::shared_ptr& t) : EnvWrapper(t) {} - static const char* kClassName() { return "WrappedEnv"; } - const char* Name() const override { return kClassName(); } - static void Register(ObjectLibrary& lib, const std::string& /*arg*/) { - lib.AddFactory( - WrappedEnv::kClassName(), - [](const std::string& /*uri*/, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new WrappedEnv(nullptr)); - return guard->get(); - }); - } -}; -} // namespace -TEST_F(CreateEnvTest, CreateMockEnv) { - ConfigOptions options; - options.ignore_unsupported_options = false; - WrappedEnv::Register(*(options.registry->AddLibrary("test")), ""); - std::shared_ptr guard, copy; - std::string opt_str; - - Env* env = nullptr; - ASSERT_NOK(Env::CreateFromString(options, MockEnv::kClassName(), &env)); - ASSERT_OK( - Env::CreateFromString(options, MockEnv::kClassName(), &env, &guard)); - ASSERT_NE(env, nullptr); - ASSERT_NE(env, Env::Default()); - opt_str = env->ToString(options); - ASSERT_OK(Env::CreateFromString(options, opt_str, &env, ©)); - ASSERT_NE(copy, guard); - std::string mismatch; - ASSERT_TRUE(guard->AreEquivalent(options, copy.get(), &mismatch)); - guard.reset(MockEnv::Create(Env::Default(), SystemClock::Default())); - opt_str = guard->ToString(options); - ASSERT_OK(Env::CreateFromString(options, opt_str, &env, ©)); - std::unique_ptr wrapped_env(new WrappedEnv(Env::Default())); - guard.reset(MockEnv::Create(wrapped_env.get(), SystemClock::Default())); - opt_str = guard->ToString(options); - ASSERT_OK(Env::CreateFromString(options, opt_str, &env, ©)); - opt_str = copy->ToString(options); -} - -TEST_F(CreateEnvTest, CreateWrappedEnv) { - ConfigOptions options; - options.ignore_unsupported_options = false; - WrappedEnv::Register(*(options.registry->AddLibrary("test")), ""); - Env* env = nullptr; - std::shared_ptr guard, copy; - std::string opt_str; - std::string mismatch; - - ASSERT_NOK(Env::CreateFromString(options, WrappedEnv::kClassName(), &env)); - ASSERT_OK( - Env::CreateFromString(options, WrappedEnv::kClassName(), &env, &guard)); - ASSERT_NE(env, nullptr); - ASSERT_NE(env, Env::Default()); - ASSERT_FALSE(guard->AreEquivalent(options, Env::Default(), &mismatch)); - - opt_str = env->ToString(options); - ASSERT_OK(Env::CreateFromString(options, opt_str, &env, ©)); - ASSERT_NE(copy, guard); - ASSERT_TRUE(guard->AreEquivalent(options, copy.get(), &mismatch)); - - guard.reset(new WrappedEnv(std::make_shared(Env::Default()))); - ASSERT_NE(guard.get(), env); - opt_str = guard->ToString(options); - ASSERT_OK(Env::CreateFromString(options, opt_str, &env, ©)); - ASSERT_NE(copy, guard); - ASSERT_TRUE(guard->AreEquivalent(options, copy.get(), &mismatch)); - - guard.reset(new WrappedEnv(std::make_shared( - std::make_shared(Env::Default())))); - ASSERT_NE(guard.get(), env); - opt_str = guard->ToString(options); - ASSERT_OK(Env::CreateFromString(options, opt_str, &env, ©)); - ASSERT_NE(copy, guard); - ASSERT_TRUE(guard->AreEquivalent(options, copy.get(), &mismatch)); -} - -TEST_F(CreateEnvTest, CreateCompositeEnv) { - ConfigOptions options; - options.ignore_unsupported_options = false; - std::shared_ptr guard, copy; - Env* env = nullptr; - std::string mismatch, opt_str; - - WrappedEnv::Register(*(options.registry->AddLibrary("test")), ""); - std::unique_ptr base(NewCompositeEnv(FileSystem::Default())); - std::unique_ptr wrapped(new WrappedEnv(Env::Default())); - std::shared_ptr timed_fs = - std::make_shared(FileSystem::Default()); - std::shared_ptr clock = - std::make_shared(SystemClock::Default()); - - opt_str = base->ToString(options); - ASSERT_NOK(Env::CreateFromString(options, opt_str, &env)); - ASSERT_OK(Env::CreateFromString(options, opt_str, &env, &guard)); - ASSERT_NE(env, nullptr); - ASSERT_NE(env, Env::Default()); - ASSERT_EQ(env->GetFileSystem(), FileSystem::Default()); - ASSERT_EQ(env->GetSystemClock(), SystemClock::Default()); - - base = NewCompositeEnv(timed_fs); - opt_str = base->ToString(options); - ASSERT_NOK(Env::CreateFromString(options, opt_str, &env)); - ASSERT_OK(Env::CreateFromString(options, opt_str, &env, &guard)); - ASSERT_NE(env, nullptr); - ASSERT_NE(env, Env::Default()); - ASSERT_NE(env->GetFileSystem(), FileSystem::Default()); - ASSERT_EQ(env->GetSystemClock(), SystemClock::Default()); - - env = nullptr; - guard.reset(new CompositeEnvWrapper(wrapped.get(), timed_fs)); - opt_str = guard->ToString(options); - ASSERT_OK(Env::CreateFromString(options, opt_str, &env, ©)); - ASSERT_NE(env, nullptr); - ASSERT_NE(env, Env::Default()); - ASSERT_TRUE(guard->AreEquivalent(options, copy.get(), &mismatch)); - - env = nullptr; - guard.reset(new CompositeEnvWrapper(wrapped.get(), clock)); - opt_str = guard->ToString(options); - ASSERT_OK(Env::CreateFromString(options, opt_str, &env, ©)); - ASSERT_NE(env, nullptr); - ASSERT_NE(env, Env::Default()); - ASSERT_TRUE(guard->AreEquivalent(options, copy.get(), &mismatch)); - - env = nullptr; - guard.reset(new CompositeEnvWrapper(wrapped.get(), timed_fs, clock)); - opt_str = guard->ToString(options); - ASSERT_OK(Env::CreateFromString(options, opt_str, &env, ©)); - ASSERT_NE(env, nullptr); - ASSERT_NE(env, Env::Default()); - ASSERT_TRUE(guard->AreEquivalent(options, copy.get(), &mismatch)); - - guard.reset(new CompositeEnvWrapper(nullptr, timed_fs, clock)); - ColumnFamilyOptions cf_opts; - DBOptions db_opts; - db_opts.env = guard.get(); - auto comp = db_opts.env->CheckedCast(); - ASSERT_NE(comp, nullptr); - ASSERT_EQ(comp->Inner(), nullptr); - ASSERT_NOK(ValidateOptions(db_opts, cf_opts)); - ASSERT_OK(db_opts.env->PrepareOptions(options)); - ASSERT_NE(comp->Inner(), nullptr); - ASSERT_OK(ValidateOptions(db_opts, cf_opts)); -} - -// Forward declaration -class ReadAsyncFS; - -struct MockIOHandle { - std::function cb; - void* cb_arg; - bool create_io_error; -}; - -// ReadAsyncFS and ReadAsyncRandomAccessFile mocks the FS doing asynchronous -// reads by creating threads that submit read requests and then calling Poll API -// to obtain those results. -class ReadAsyncRandomAccessFile : public FSRandomAccessFileOwnerWrapper { - public: - ReadAsyncRandomAccessFile(ReadAsyncFS& fs, - std::unique_ptr& file) - : FSRandomAccessFileOwnerWrapper(std::move(file)), fs_(fs) {} - - IOStatus ReadAsync(FSReadRequest& req, const IOOptions& opts, - std::function cb, - void* cb_arg, void** io_handle, IOHandleDeleter* del_fn, - IODebugContext* dbg) override; - - private: - ReadAsyncFS& fs_; - std::unique_ptr file_; - int counter = 0; -}; - -class ReadAsyncFS : public FileSystemWrapper { - public: - explicit ReadAsyncFS(const std::shared_ptr& wrapped) - : FileSystemWrapper(wrapped) {} - - static const char* kClassName() { return "ReadAsyncFS"; } - const char* Name() const override { return kClassName(); } - - IOStatus NewRandomAccessFile(const std::string& fname, - const FileOptions& opts, - std::unique_ptr* result, - IODebugContext* dbg) override { - std::unique_ptr file; - IOStatus s = target()->NewRandomAccessFile(fname, opts, &file, dbg); - EXPECT_OK(s); - result->reset(new ReadAsyncRandomAccessFile(*this, file)); - return s; - } - - IOStatus Poll(std::vector& io_handles, - size_t /*min_completions*/) override { - // Wait for the threads completion. - for (auto& t : workers) { - t.join(); - } - - for (size_t i = 0; i < io_handles.size(); i++) { - MockIOHandle* handle = static_cast(io_handles[i]); - if (handle->create_io_error) { - FSReadRequest req; - req.status = IOStatus::IOError(); - handle->cb(req, handle->cb_arg); - } - } - return IOStatus::OK(); - } - - std::vector workers; -}; - -IOStatus ReadAsyncRandomAccessFile::ReadAsync( - FSReadRequest& req, const IOOptions& opts, - std::function cb, void* cb_arg, - void** io_handle, IOHandleDeleter* del_fn, IODebugContext* dbg) { - IOHandleDeleter deletefn = [](void* args) -> void { - delete (static_cast(args)); - args = nullptr; - }; - *del_fn = deletefn; - - // Allocate and populate io_handle. - MockIOHandle* mock_handle = new MockIOHandle(); - bool create_io_error = false; - if (counter % 2) { - create_io_error = true; - } - mock_handle->create_io_error = create_io_error; - mock_handle->cb = cb; - mock_handle->cb_arg = cb_arg; - *io_handle = static_cast(mock_handle); - counter++; - - // Submit read request asynchronously. - std::function submit_request = - [&opts, cb, cb_arg, dbg, create_io_error, this](FSReadRequest _req) { - if (!create_io_error) { - _req.status = target()->Read(_req.offset, _req.len, opts, - &(_req.result), _req.scratch, dbg); - cb(_req, cb_arg); - } - }; - - fs_.workers.emplace_back(submit_request, req); - return IOStatus::OK(); -} - -class TestAsyncRead : public testing::Test { - public: - TestAsyncRead() { env_ = Env::Default(); } - Env* env_; -}; - -// Tests the default implementation of ReadAsync API. -TEST_F(TestAsyncRead, ReadAsync) { - EnvOptions soptions; - std::shared_ptr fs = - std::make_shared(env_->GetFileSystem()); - - std::string fname = test::PerThreadDBPath(env_, "testfile"); - - const size_t kSectorSize = 4096; - const size_t kNumSectors = 8; - - // 1. create & write to a file. - { - std::unique_ptr wfile; - ASSERT_OK( - fs->NewWritableFile(fname, FileOptions(), &wfile, nullptr /*dbg*/)); - - for (size_t i = 0; i < kNumSectors; ++i) { - auto data = NewAligned(kSectorSize * 8, static_cast(i + 1)); - Slice slice(data.get(), kSectorSize); - ASSERT_OK(wfile->Append(slice, IOOptions(), nullptr)); - } - ASSERT_OK(wfile->Close(IOOptions(), nullptr)); - } - // 2. Read file - { - std::unique_ptr file; - ASSERT_OK(fs->NewRandomAccessFile(fname, FileOptions(), &file, nullptr)); - - IOOptions opts; - std::vector io_handles(kNumSectors); - std::vector reqs(kNumSectors); - std::vector> data; - std::vector vals; - IOHandleDeleter del_fn; - uint64_t offset = 0; - - // Initialize read requests - for (size_t i = 0; i < kNumSectors; i++) { - reqs[i].offset = offset; - reqs[i].len = kSectorSize; - data.emplace_back(NewAligned(kSectorSize, 0)); - reqs[i].scratch = data.back().get(); - vals.push_back(i); - offset += kSectorSize; - } - - // callback function passed to async read. - std::function callback = - [&](const FSReadRequest& req, void* cb_arg) { - assert(cb_arg != nullptr); - size_t i = *(reinterpret_cast(cb_arg)); - reqs[i].offset = req.offset; - reqs[i].result = req.result; - reqs[i].status = req.status; - }; - - // Submit asynchronous read requests. - for (size_t i = 0; i < kNumSectors; i++) { - void* cb_arg = static_cast(&(vals[i])); - ASSERT_OK(file->ReadAsync(reqs[i], opts, callback, cb_arg, - &(io_handles[i]), &del_fn, nullptr)); - } - - // Poll for the submitted requests. - fs->Poll(io_handles, kNumSectors); - - // Check the status of read requests. - for (size_t i = 0; i < kNumSectors; i++) { - if (i % 2) { - ASSERT_EQ(reqs[i].status, IOStatus::IOError()); - } else { - auto buf = NewAligned(kSectorSize * 8, static_cast(i + 1)); - Slice expected_data(buf.get(), kSectorSize); - - ASSERT_EQ(reqs[i].offset, i * kSectorSize); - ASSERT_OK(reqs[i].status); - ASSERT_EQ(expected_data.ToString(), reqs[i].result.ToString()); - } - } - - // Delete io_handles. - for (size_t i = 0; i < io_handles.size(); i++) { - del_fn(io_handles[i]); - } - } -} -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/env/io_posix_test.cc b/env/io_posix_test.cc deleted file mode 100644 index 81ce50587..000000000 --- a/env/io_posix_test.cc +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright (c) 2020-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "test_util/testharness.h" - -#ifdef ROCKSDB_LIB_IO_POSIX -#include "env/io_posix.h" - -namespace ROCKSDB_NAMESPACE { - -#ifdef OS_LINUX -class LogicalBlockSizeCacheTest : public testing::Test {}; - -// Tests the caching behavior. -TEST_F(LogicalBlockSizeCacheTest, Cache) { - int ncall = 0; - auto get_fd_block_size = [&](int fd) { - ncall++; - return fd; - }; - std::map dir_fds{ - {"/", 0}, - {"/db", 1}, - {"/db1", 2}, - {"/db2", 3}, - }; - auto get_dir_block_size = [&](const std::string& dir, size_t* size) { - ncall++; - *size = dir_fds[dir]; - return Status::OK(); - }; - LogicalBlockSizeCache cache(get_fd_block_size, get_dir_block_size); - ASSERT_EQ(0, ncall); - ASSERT_EQ(0, cache.Size()); - - ASSERT_EQ(6, cache.GetLogicalBlockSize("/sst", 6)); - ASSERT_EQ(1, ncall); - ASSERT_EQ(7, cache.GetLogicalBlockSize("/db/sst1", 7)); - ASSERT_EQ(2, ncall); - ASSERT_EQ(8, cache.GetLogicalBlockSize("/db/sst2", 8)); - ASSERT_EQ(3, ncall); - - ASSERT_OK(cache.RefAndCacheLogicalBlockSize({"/", "/db1/", "/db2"})); - ASSERT_EQ(3, cache.Size()); - ASSERT_TRUE(cache.Contains("/")); - ASSERT_TRUE(cache.Contains("/db1")); - ASSERT_TRUE(cache.Contains("/db2")); - ASSERT_EQ(6, ncall); - // Block size for / is cached. - ASSERT_EQ(0, cache.GetLogicalBlockSize("/sst", 6)); - ASSERT_EQ(6, ncall); - // No cached size for /db. - ASSERT_EQ(7, cache.GetLogicalBlockSize("/db/sst1", 7)); - ASSERT_EQ(7, ncall); - ASSERT_EQ(8, cache.GetLogicalBlockSize("/db/sst2", 8)); - ASSERT_EQ(8, ncall); - // Block size for /db1 is cached. - ASSERT_EQ(2, cache.GetLogicalBlockSize("/db1/sst1", 4)); - ASSERT_EQ(8, ncall); - ASSERT_EQ(2, cache.GetLogicalBlockSize("/db1/sst2", 5)); - ASSERT_EQ(8, ncall); - // Block size for /db2 is cached. - ASSERT_EQ(3, cache.GetLogicalBlockSize("/db2/sst1", 6)); - ASSERT_EQ(8, ncall); - ASSERT_EQ(3, cache.GetLogicalBlockSize("/db2/sst2", 7)); - ASSERT_EQ(8, ncall); - - ASSERT_OK(cache.RefAndCacheLogicalBlockSize({"/db"})); - ASSERT_EQ(4, cache.Size()); - ASSERT_TRUE(cache.Contains("/")); - ASSERT_TRUE(cache.Contains("/db1")); - ASSERT_TRUE(cache.Contains("/db2")); - ASSERT_TRUE(cache.Contains("/db")); - - ASSERT_EQ(9, ncall); - // Block size for /db is cached. - ASSERT_EQ(1, cache.GetLogicalBlockSize("/db/sst1", 7)); - ASSERT_EQ(9, ncall); - ASSERT_EQ(1, cache.GetLogicalBlockSize("/db/sst2", 8)); - ASSERT_EQ(9, ncall); -} - -// Tests the reference counting behavior. -TEST_F(LogicalBlockSizeCacheTest, Ref) { - int ncall = 0; - auto get_fd_block_size = [&](int fd) { - ncall++; - return fd; - }; - std::map dir_fds{ - {"/db", 0}, - }; - auto get_dir_block_size = [&](const std::string& dir, size_t* size) { - ncall++; - *size = dir_fds[dir]; - return Status::OK(); - }; - LogicalBlockSizeCache cache(get_fd_block_size, get_dir_block_size); - - ASSERT_EQ(0, ncall); - - ASSERT_EQ(1, cache.GetLogicalBlockSize("/db/sst0", 1)); - ASSERT_EQ(1, ncall); - - ASSERT_OK(cache.RefAndCacheLogicalBlockSize({"/db"})); - ASSERT_EQ(2, ncall); - ASSERT_EQ(1, cache.GetRefCount("/db")); - // Block size for /db is cached. Ref count = 1. - ASSERT_EQ(0, cache.GetLogicalBlockSize("/db/sst1", 1)); - ASSERT_EQ(2, ncall); - - // Ref count = 2, but won't recompute the cached buffer size. - ASSERT_OK(cache.RefAndCacheLogicalBlockSize({"/db"})); - ASSERT_EQ(2, cache.GetRefCount("/db")); - ASSERT_EQ(2, ncall); - - // Ref count = 1. - cache.UnrefAndTryRemoveCachedLogicalBlockSize({"/db"}); - ASSERT_EQ(1, cache.GetRefCount("/db")); - // Block size for /db is still cached. - ASSERT_EQ(0, cache.GetLogicalBlockSize("/db/sst2", 1)); - ASSERT_EQ(2, ncall); - - // Ref count = 0 and cached buffer size for /db is removed. - cache.UnrefAndTryRemoveCachedLogicalBlockSize({"/db"}); - ASSERT_EQ(0, cache.Size()); - ASSERT_EQ(1, cache.GetLogicalBlockSize("/db/sst0", 1)); - ASSERT_EQ(3, ncall); -} -#endif - -} // namespace ROCKSDB_NAMESPACE -#endif - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/env/mock_env_test.cc b/env/mock_env_test.cc deleted file mode 100644 index be174bd73..000000000 --- a/env/mock_env_test.cc +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -#include "env/mock_env.h" - -#include -#include - -#include "rocksdb/env.h" -#include "test_util/testharness.h" - -namespace ROCKSDB_NAMESPACE { - -class MockEnvTest : public testing::Test { - public: - MockEnv* env_; - const EnvOptions soptions_; - - MockEnvTest() : env_(MockEnv::Create(Env::Default())) {} - ~MockEnvTest() override { delete env_; } -}; - -TEST_F(MockEnvTest, Corrupt) { - const std::string kGood = "this is a good string, synced to disk"; - const std::string kCorrupted = "this part may be corrupted"; - const std::string kFileName = "/dir/f"; - std::unique_ptr writable_file; - ASSERT_OK(env_->NewWritableFile(kFileName, &writable_file, soptions_)); - ASSERT_OK(writable_file->Append(kGood)); - ASSERT_TRUE(writable_file->GetFileSize() == kGood.size()); - - std::string scratch; - scratch.resize(kGood.size() + kCorrupted.size() + 16); - Slice result; - std::unique_ptr rand_file; - ASSERT_OK(env_->NewRandomAccessFile(kFileName, &rand_file, soptions_)); - ASSERT_OK(rand_file->Read(0, kGood.size(), &result, &(scratch[0]))); - ASSERT_EQ(result.compare(kGood), 0); - - // Sync + corrupt => no change - ASSERT_OK(writable_file->Fsync()); - ASSERT_OK(dynamic_cast(env_)->CorruptBuffer(kFileName)); - result.clear(); - ASSERT_OK(rand_file->Read(0, kGood.size(), &result, &(scratch[0]))); - ASSERT_EQ(result.compare(kGood), 0); - - // Add new data and corrupt it - ASSERT_OK(writable_file->Append(kCorrupted)); - ASSERT_TRUE(writable_file->GetFileSize() == kGood.size() + kCorrupted.size()); - result.clear(); - ASSERT_OK( - rand_file->Read(kGood.size(), kCorrupted.size(), &result, &(scratch[0]))); - ASSERT_EQ(result.compare(kCorrupted), 0); - // Corrupted - ASSERT_OK(dynamic_cast(env_)->CorruptBuffer(kFileName)); - result.clear(); - ASSERT_OK( - rand_file->Read(kGood.size(), kCorrupted.size(), &result, &(scratch[0]))); - ASSERT_NE(result.compare(kCorrupted), 0); -} - -TEST_F(MockEnvTest, FakeSleeping) { - int64_t now = 0; - auto s = env_->GetCurrentTime(&now); - ASSERT_OK(s); - env_->SleepForMicroseconds(3 * 1000 * 1000); - int64_t after_sleep = 0; - s = env_->GetCurrentTime(&after_sleep); - ASSERT_OK(s); - auto delta = after_sleep - now; - // this will be true unless test runs for 2 seconds - ASSERT_TRUE(delta == 3 || delta == 4); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/examples/.gitignore b/examples/.gitignore deleted file mode 100644 index 39da06a85..000000000 --- a/examples/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -c_simple_example -column_families_example -compact_files_example -compaction_filter_example -multi_processes_example -optimistic_transaction_example -options_file_example -rocksdb_backup_restore_example -simple_example -transaction_example diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt deleted file mode 100644 index 0b93a6d8d..000000000 --- a/examples/CMakeLists.txt +++ /dev/null @@ -1,45 +0,0 @@ -add_executable(simple_example - simple_example.cc) -target_link_libraries(simple_example - ${ROCKSDB_LIB}) - -add_executable(column_families_example - column_families_example.cc) -target_link_libraries(column_families_example - ${ROCKSDB_LIB}) - -add_executable(compact_files_example - compact_files_example.cc) -target_link_libraries(compact_files_example - ${ROCKSDB_LIB}) - -add_executable(c_simple_example - c_simple_example.c) -target_link_libraries(c_simple_example - ${ROCKSDB_LIB}) - -add_executable(optimistic_transaction_example - optimistic_transaction_example.cc) -target_link_libraries(optimistic_transaction_example - ${ROCKSDB_LIB}) - -add_executable(transaction_example - transaction_example.cc) -target_link_libraries(transaction_example - ${ROCKSDB_LIB}) - -add_executable(compaction_filter_example - compaction_filter_example.cc) -target_link_libraries(compaction_filter_example - ${ROCKSDB_LIB}) - -add_executable(options_file_example - options_file_example.cc) -target_link_libraries(options_file_example - ${ROCKSDB_LIB}) - -add_executable(multi_processes_example - EXCLUDE_FROM_ALL - multi_processes_example.cc) -target_link_libraries(multi_processes_example - ${ROCKSDB_LIB}) diff --git a/examples/Makefile b/examples/Makefile deleted file mode 100644 index b056508a6..000000000 --- a/examples/Makefile +++ /dev/null @@ -1,58 +0,0 @@ -include ../make_config.mk - -ifndef DISABLE_JEMALLOC - ifdef JEMALLOC - PLATFORM_CXXFLAGS += -DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE - endif - EXEC_LDFLAGS := $(JEMALLOC_LIB) $(EXEC_LDFLAGS) -lpthread - PLATFORM_CXXFLAGS += $(JEMALLOC_INCLUDE) -endif - -ifneq ($(USE_RTTI), 1) - CXXFLAGS += -fno-rtti -endif - -CFLAGS += -Wstrict-prototypes - -.PHONY: clean librocksdb - -all: simple_example column_families_example compact_files_example c_simple_example optimistic_transaction_example transaction_example compaction_filter_example options_file_example rocksdb_backup_restore_example - -simple_example: librocksdb simple_example.cc - $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++17 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) - -column_families_example: librocksdb column_families_example.cc - $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++17 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) - -compaction_filter_example: librocksdb compaction_filter_example.cc - $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++17 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) - -compact_files_example: librocksdb compact_files_example.cc - $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++17 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) - -.c.o: - $(CC) $(CFLAGS) -c $< -o $@ -I../include - -c_simple_example: librocksdb c_simple_example.o - $(CXX) $@.o -o$@ ../librocksdb.a $(PLATFORM_LDFLAGS) $(EXEC_LDFLAGS) - -optimistic_transaction_example: librocksdb optimistic_transaction_example.cc - $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++17 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) - -transaction_example: librocksdb transaction_example.cc - $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++17 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) - -options_file_example: librocksdb options_file_example.cc - $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++17 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) - -multi_processes_example: librocksdb multi_processes_example.cc - $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++17 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) - -rocksdb_backup_restore_example: librocksdb rocksdb_backup_restore_example.cc - $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++17 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) - -clean: - rm -rf ./simple_example ./column_families_example ./compact_files_example ./compaction_filter_example ./c_simple_example c_simple_example.o ./optimistic_transaction_example ./transaction_example ./options_file_example ./multi_processes_example ./rocksdb_backup_restore_example - -librocksdb: - cd .. && $(MAKE) static_lib diff --git a/examples/README.md b/examples/README.md deleted file mode 100644 index f4ba2384b..000000000 --- a/examples/README.md +++ /dev/null @@ -1,2 +0,0 @@ -1. Compile RocksDB first by executing `make static_lib` in parent dir -2. Compile all examples: `cd examples/; make all` diff --git a/examples/c_simple_example.c b/examples/c_simple_example.c deleted file mode 100644 index fe2f917b4..000000000 --- a/examples/c_simple_example.c +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include -#include -#include -#include - -#include "rocksdb/c.h" - -#if defined(OS_WIN) -#include -#else -#include // sysconf() - get CPU count -#endif - -#if defined(OS_WIN) -const char DBPath[] = "C:\\Windows\\TEMP\\rocksdb_c_simple_example"; -const char DBBackupPath[] = - "C:\\Windows\\TEMP\\rocksdb_c_simple_example_backup"; -#else -const char DBPath[] = "/tmp/rocksdb_c_simple_example"; -const char DBBackupPath[] = "/tmp/rocksdb_c_simple_example_backup"; -#endif - -int main(int argc, char **argv) { - rocksdb_t *db; - rocksdb_backup_engine_t *be; - rocksdb_options_t *options = rocksdb_options_create(); - // Optimize RocksDB. This is the easiest way to - // get RocksDB to perform well. -#if defined(OS_WIN) - SYSTEM_INFO system_info; - GetSystemInfo(&system_info); - long cpus = system_info.dwNumberOfProcessors; -#else - long cpus = sysconf(_SC_NPROCESSORS_ONLN); -#endif - // Set # of online cores - rocksdb_options_increase_parallelism(options, (int)(cpus)); - rocksdb_options_optimize_level_style_compaction(options, 0); - // create the DB if it's not already present - rocksdb_options_set_create_if_missing(options, 1); - - // open DB - char *err = NULL; - db = rocksdb_open(options, DBPath, &err); - assert(!err); - - // open Backup Engine that we will use for backing up our database - be = rocksdb_backup_engine_open(options, DBBackupPath, &err); - assert(!err); - - // Put key-value - rocksdb_writeoptions_t *writeoptions = rocksdb_writeoptions_create(); - const char key[] = "key"; - const char *value = "value"; - rocksdb_put(db, writeoptions, key, strlen(key), value, strlen(value) + 1, - &err); - assert(!err); - // Get value - rocksdb_readoptions_t *readoptions = rocksdb_readoptions_create(); - size_t len; - char *returned_value = - rocksdb_get(db, readoptions, key, strlen(key), &len, &err); - assert(!err); - assert(strcmp(returned_value, "value") == 0); - free(returned_value); - - // create new backup in a directory specified by DBBackupPath - rocksdb_backup_engine_create_new_backup(be, db, &err); - assert(!err); - - rocksdb_close(db); - - // If something is wrong, you might want to restore data from last backup - rocksdb_restore_options_t *restore_options = rocksdb_restore_options_create(); - rocksdb_backup_engine_restore_db_from_latest_backup(be, DBPath, DBPath, - restore_options, &err); - assert(!err); - rocksdb_restore_options_destroy(restore_options); - - db = rocksdb_open(options, DBPath, &err); - assert(!err); - - // cleanup - rocksdb_writeoptions_destroy(writeoptions); - rocksdb_readoptions_destroy(readoptions); - rocksdb_options_destroy(options); - rocksdb_backup_engine_close(be); - rocksdb_close(db); - - return 0; -} diff --git a/examples/column_families_example.cc b/examples/column_families_example.cc deleted file mode 100644 index 3828d3fb3..000000000 --- a/examples/column_families_example.cc +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -#include -#include -#include - -#include "rocksdb/db.h" -#include "rocksdb/options.h" -#include "rocksdb/slice.h" - -#if defined(OS_WIN) -std::string kDBPath = "C:\\Windows\\TEMP\\rocksdb_column_families_example"; -#else -std::string kDBPath = "/tmp/rocksdb_column_families_example"; -#endif - -using ROCKSDB_NAMESPACE::ColumnFamilyDescriptor; -using ROCKSDB_NAMESPACE::ColumnFamilyHandle; -using ROCKSDB_NAMESPACE::ColumnFamilyOptions; -using ROCKSDB_NAMESPACE::DB; -using ROCKSDB_NAMESPACE::DBOptions; -using ROCKSDB_NAMESPACE::Options; -using ROCKSDB_NAMESPACE::ReadOptions; -using ROCKSDB_NAMESPACE::Slice; -using ROCKSDB_NAMESPACE::Status; -using ROCKSDB_NAMESPACE::WriteBatch; -using ROCKSDB_NAMESPACE::WriteOptions; - -int main() { - // open DB - Options options; - options.create_if_missing = true; - DB* db; - Status s = DB::Open(options, kDBPath, &db); - assert(s.ok()); - - // create column family - ColumnFamilyHandle* cf; - s = db->CreateColumnFamily(ColumnFamilyOptions(), "new_cf", &cf); - assert(s.ok()); - - // close DB - s = db->DestroyColumnFamilyHandle(cf); - assert(s.ok()); - delete db; - - // open DB with two column families - std::vector column_families; - // have to open default column family - column_families.push_back(ColumnFamilyDescriptor( - ROCKSDB_NAMESPACE::kDefaultColumnFamilyName, ColumnFamilyOptions())); - // open the new one, too - column_families.push_back( - ColumnFamilyDescriptor("new_cf", ColumnFamilyOptions())); - std::vector handles; - s = DB::Open(DBOptions(), kDBPath, column_families, &handles, &db); - assert(s.ok()); - - // put and get from non-default column family - s = db->Put(WriteOptions(), handles[1], Slice("key"), Slice("value")); - assert(s.ok()); - std::string value; - s = db->Get(ReadOptions(), handles[1], Slice("key"), &value); - assert(s.ok()); - - // atomic write - WriteBatch batch; - batch.Put(handles[0], Slice("key2"), Slice("value2")); - batch.Put(handles[1], Slice("key3"), Slice("value3")); - batch.Delete(handles[0], Slice("key")); - s = db->Write(WriteOptions(), &batch); - assert(s.ok()); - - // drop column family - s = db->DropColumnFamily(handles[1]); - assert(s.ok()); - - // close db - for (auto handle : handles) { - s = db->DestroyColumnFamilyHandle(handle); - assert(s.ok()); - } - delete db; - - return 0; -} diff --git a/examples/compact_files_example.cc b/examples/compact_files_example.cc deleted file mode 100644 index 1ecf8c794..000000000 --- a/examples/compact_files_example.cc +++ /dev/null @@ -1,177 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// An example code demonstrating how to use CompactFiles, EventListener, -// and GetColumnFamilyMetaData APIs to implement custom compaction algorithm. - -#include -#include - -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/options.h" - -using ROCKSDB_NAMESPACE::ColumnFamilyMetaData; -using ROCKSDB_NAMESPACE::CompactionOptions; -using ROCKSDB_NAMESPACE::DB; -using ROCKSDB_NAMESPACE::EventListener; -using ROCKSDB_NAMESPACE::FlushJobInfo; -using ROCKSDB_NAMESPACE::Options; -using ROCKSDB_NAMESPACE::ReadOptions; -using ROCKSDB_NAMESPACE::Status; -using ROCKSDB_NAMESPACE::WriteOptions; - -#if defined(OS_WIN) -std::string kDBPath = "C:\\Windows\\TEMP\\rocksdb_compact_files_example"; -#else -std::string kDBPath = "/tmp/rocksdb_compact_files_example"; -#endif - -struct CompactionTask; - -// This is an example interface of external-compaction algorithm. -// Compaction algorithm can be implemented outside the core-RocksDB -// code by using the pluggable compaction APIs that RocksDb provides. -class Compactor : public EventListener { - public: - // Picks and returns a compaction task given the specified DB - // and column family. It is the caller's responsibility to - // destroy the returned CompactionTask. Returns "nullptr" - // if it cannot find a proper compaction task. - virtual CompactionTask* PickCompaction(DB* db, - const std::string& cf_name) = 0; - - // Schedule and run the specified compaction task in background. - virtual void ScheduleCompaction(CompactionTask* task) = 0; -}; - -// Example structure that describes a compaction task. -struct CompactionTask { - CompactionTask(DB* _db, Compactor* _compactor, - const std::string& _column_family_name, - const std::vector& _input_file_names, - const int _output_level, - const CompactionOptions& _compact_options, bool _retry_on_fail) - : db(_db), - compactor(_compactor), - column_family_name(_column_family_name), - input_file_names(_input_file_names), - output_level(_output_level), - compact_options(_compact_options), - retry_on_fail(_retry_on_fail) {} - DB* db; - Compactor* compactor; - const std::string& column_family_name; - std::vector input_file_names; - int output_level; - CompactionOptions compact_options; - bool retry_on_fail; -}; - -// A simple compaction algorithm that always compacts everything -// to the highest level whenever possible. -class FullCompactor : public Compactor { - public: - explicit FullCompactor(const Options options) : options_(options) { - compact_options_.compression = options_.compression; - compact_options_.output_file_size_limit = options_.target_file_size_base; - } - - // When flush happens, it determines whether to trigger compaction. If - // triggered_writes_stop is true, it will also set the retry flag of - // compaction-task to true. - void OnFlushCompleted(DB* db, const FlushJobInfo& info) override { - CompactionTask* task = PickCompaction(db, info.cf_name); - if (task != nullptr) { - if (info.triggered_writes_stop) { - task->retry_on_fail = true; - } - // Schedule compaction in a different thread. - ScheduleCompaction(task); - } - } - - // Always pick a compaction which includes all files whenever possible. - CompactionTask* PickCompaction(DB* db, const std::string& cf_name) override { - ColumnFamilyMetaData cf_meta; - db->GetColumnFamilyMetaData(&cf_meta); - - std::vector input_file_names; - for (auto level : cf_meta.levels) { - for (auto file : level.files) { - if (file.being_compacted) { - return nullptr; - } - input_file_names.push_back(file.name); - } - } - return new CompactionTask(db, this, cf_name, input_file_names, - options_.num_levels - 1, compact_options_, false); - } - - // Schedule the specified compaction task in background. - void ScheduleCompaction(CompactionTask* task) override { - options_.env->Schedule(&FullCompactor::CompactFiles, task); - } - - static void CompactFiles(void* arg) { - std::unique_ptr task( - reinterpret_cast(arg)); - assert(task); - assert(task->db); - Status s = task->db->CompactFiles( - task->compact_options, task->input_file_names, task->output_level); - printf("CompactFiles() finished with status %s\n", s.ToString().c_str()); - if (!s.ok() && !s.IsIOError() && task->retry_on_fail) { - // If a compaction task with its retry_on_fail=true failed, - // try to schedule another compaction in case the reason - // is not an IO error. - CompactionTask* new_task = - task->compactor->PickCompaction(task->db, task->column_family_name); - task->compactor->ScheduleCompaction(new_task); - } - } - - private: - Options options_; - CompactionOptions compact_options_; -}; - -int main() { - Options options; - options.create_if_missing = true; - // Disable RocksDB background compaction. - options.compaction_style = ROCKSDB_NAMESPACE::kCompactionStyleNone; - // Small slowdown and stop trigger for experimental purpose. - options.level0_slowdown_writes_trigger = 3; - options.level0_stop_writes_trigger = 5; - options.IncreaseParallelism(5); - options.listeners.emplace_back(new FullCompactor(options)); - - DB* db = nullptr; - ROCKSDB_NAMESPACE::DestroyDB(kDBPath, options); - Status s = DB::Open(options, kDBPath, &db); - assert(s.ok()); - assert(db); - - // if background compaction is not working, write will stall - // because of options.level0_stop_writes_trigger - for (int i = 1000; i < 99999; ++i) { - db->Put(WriteOptions(), std::to_string(i), - std::string(500, 'a' + (i % 26))); - } - - // verify the values are still there - std::string value; - for (int i = 1000; i < 99999; ++i) { - db->Get(ReadOptions(), std::to_string(i), &value); - assert(value == std::string(500, 'a' + (i % 26))); - } - - // close the db. - delete db; - - return 0; -} diff --git a/examples/compaction_filter_example.cc b/examples/compaction_filter_example.cc deleted file mode 100644 index ed1ada823..000000000 --- a/examples/compaction_filter_example.cc +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "rocksdb/compaction_filter.h" -#include "rocksdb/db.h" -#include "rocksdb/merge_operator.h" -#include "rocksdb/options.h" - -class MyMerge : public ROCKSDB_NAMESPACE::MergeOperator { - public: - virtual bool FullMergeV2(const MergeOperationInput& merge_in, - MergeOperationOutput* merge_out) const override { - merge_out->new_value.clear(); - if (merge_in.existing_value != nullptr) { - merge_out->new_value.assign(merge_in.existing_value->data(), - merge_in.existing_value->size()); - } - for (const ROCKSDB_NAMESPACE::Slice& m : merge_in.operand_list) { - fprintf(stderr, "Merge(%s)\n", m.ToString().c_str()); - // the compaction filter filters out bad values - assert(m.ToString() != "bad"); - merge_out->new_value.assign(m.data(), m.size()); - } - return true; - } - - const char* Name() const override { return "MyMerge"; } -}; - -class MyFilter : public ROCKSDB_NAMESPACE::CompactionFilter { - public: - bool Filter(int level, const ROCKSDB_NAMESPACE::Slice& key, - const ROCKSDB_NAMESPACE::Slice& existing_value, - std::string* new_value, bool* value_changed) const override { - fprintf(stderr, "Filter(%s)\n", key.ToString().c_str()); - ++count_; - assert(*value_changed == false); - return false; - } - - bool FilterMergeOperand( - int level, const ROCKSDB_NAMESPACE::Slice& key, - const ROCKSDB_NAMESPACE::Slice& existing_value) const override { - fprintf(stderr, "FilterMerge(%s)\n", key.ToString().c_str()); - ++merge_count_; - return existing_value == "bad"; - } - - const char* Name() const override { return "MyFilter"; } - - mutable int count_ = 0; - mutable int merge_count_ = 0; -}; - -#if defined(OS_WIN) -std::string kDBPath = "C:\\Windows\\TEMP\\rocksmergetest"; -std::string kRemoveDirCommand = "rmdir /Q /S "; -#else -std::string kDBPath = "/tmp/rocksmergetest"; -std::string kRemoveDirCommand = "rm -rf "; -#endif - -int main() { - ROCKSDB_NAMESPACE::DB* raw_db; - ROCKSDB_NAMESPACE::Status status; - - MyFilter filter; - - std::string rm_cmd = kRemoveDirCommand + kDBPath; - int ret = system(rm_cmd.c_str()); - if (ret != 0) { - fprintf(stderr, "Error deleting %s, code: %d\n", kDBPath.c_str(), ret); - } - ROCKSDB_NAMESPACE::Options options; - options.create_if_missing = true; - options.merge_operator.reset(new MyMerge); - options.compaction_filter = &filter; - status = ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &raw_db); - assert(status.ok()); - std::unique_ptr db(raw_db); - - ROCKSDB_NAMESPACE::WriteOptions wopts; - db->Merge(wopts, "0", "bad"); // This is filtered out - db->Merge(wopts, "1", "data1"); - db->Merge(wopts, "1", "bad"); - db->Merge(wopts, "1", "data2"); - db->Merge(wopts, "1", "bad"); - db->Merge(wopts, "3", "data3"); - db->CompactRange(ROCKSDB_NAMESPACE::CompactRangeOptions(), nullptr, nullptr); - fprintf(stderr, "filter.count_ = %d\n", filter.count_); - assert(filter.count_ == 0); - fprintf(stderr, "filter.merge_count_ = %d\n", filter.merge_count_); - assert(filter.merge_count_ == 6); -} diff --git a/examples/multi_processes_example.cc b/examples/multi_processes_example.cc deleted file mode 100644 index 93c54d755..000000000 --- a/examples/multi_processes_example.cc +++ /dev/null @@ -1,393 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -// How to use this example -// Open two terminals, in one of them, run `./multi_processes_example 0` to -// start a process running the primary instance. This will create a new DB in -// kDBPath. The process will run for a while inserting keys to the normal -// RocksDB database. -// Next, go to the other terminal and run `./multi_processes_example 1` to -// start a process running the secondary instance. This will create a secondary -// instance following the aforementioned primary instance. This process will -// run for a while, tailing the logs of the primary. After process with primary -// instance exits, this process will keep running until you hit 'CTRL+C'. - -#include -#include -#include -#include -#include -#include -#include -#include - -// TODO: port this example to other systems. It should be straightforward for -// POSIX-compliant systems. -#if defined(OS_LINUX) -#include -#include -#include -#include -#include -#include - -#include "rocksdb/db.h" -#include "rocksdb/options.h" -#include "rocksdb/slice.h" - -using ROCKSDB_NAMESPACE::ColumnFamilyDescriptor; -using ROCKSDB_NAMESPACE::ColumnFamilyHandle; -using ROCKSDB_NAMESPACE::ColumnFamilyOptions; -using ROCKSDB_NAMESPACE::DB; -using ROCKSDB_NAMESPACE::FlushOptions; -using ROCKSDB_NAMESPACE::Iterator; -using ROCKSDB_NAMESPACE::Options; -using ROCKSDB_NAMESPACE::ReadOptions; -using ROCKSDB_NAMESPACE::Slice; -using ROCKSDB_NAMESPACE::Status; -using ROCKSDB_NAMESPACE::WriteOptions; - -const std::string kDBPath = "/tmp/rocksdb_multi_processes_example"; -const std::string kPrimaryStatusFile = - "/tmp/rocksdb_multi_processes_example_primary_status"; -const uint64_t kMaxKey = 600000; -const size_t kMaxValueLength = 256; -const size_t kNumKeysPerFlush = 1000; - -const std::vector& GetColumnFamilyNames() { - static std::vector column_family_names = { - ROCKSDB_NAMESPACE::kDefaultColumnFamilyName, "pikachu"}; - return column_family_names; -} - -inline bool IsLittleEndian() { - uint32_t x = 1; - return *reinterpret_cast(&x) != 0; -} - -static std::atomic& ShouldSecondaryWait() { - static std::atomic should_secondary_wait{1}; - return should_secondary_wait; -} - -static std::string Key(uint64_t k) { - std::string ret; - if (IsLittleEndian()) { - ret.append(reinterpret_cast(&k), sizeof(k)); - } else { - char buf[sizeof(k)]; - buf[0] = k & 0xff; - buf[1] = (k >> 8) & 0xff; - buf[2] = (k >> 16) & 0xff; - buf[3] = (k >> 24) & 0xff; - buf[4] = (k >> 32) & 0xff; - buf[5] = (k >> 40) & 0xff; - buf[6] = (k >> 48) & 0xff; - buf[7] = (k >> 56) & 0xff; - ret.append(buf, sizeof(k)); - } - size_t i = 0, j = ret.size() - 1; - while (i < j) { - char tmp = ret[i]; - ret[i] = ret[j]; - ret[j] = tmp; - ++i; - --j; - } - return ret; -} - -static uint64_t Key(std::string key) { - assert(key.size() == sizeof(uint64_t)); - size_t i = 0, j = key.size() - 1; - while (i < j) { - char tmp = key[i]; - key[i] = key[j]; - key[j] = tmp; - ++i; - --j; - } - uint64_t ret = 0; - if (IsLittleEndian()) { - memcpy(&ret, key.c_str(), sizeof(uint64_t)); - } else { - const char* buf = key.c_str(); - ret |= static_cast(buf[0]); - ret |= (static_cast(buf[1]) << 8); - ret |= (static_cast(buf[2]) << 16); - ret |= (static_cast(buf[3]) << 24); - ret |= (static_cast(buf[4]) << 32); - ret |= (static_cast(buf[5]) << 40); - ret |= (static_cast(buf[6]) << 48); - ret |= (static_cast(buf[7]) << 56); - } - return ret; -} - -static Slice GenerateRandomValue(const size_t max_length, char scratch[]) { - size_t sz = 1 + (std::rand() % max_length); - int rnd = std::rand(); - for (size_t i = 0; i != sz; ++i) { - scratch[i] = static_cast(rnd ^ i); - } - return Slice(scratch, sz); -} - -static bool ShouldCloseDB() { return true; } - -void CreateDB() { - long my_pid = static_cast(getpid()); - Options options; - Status s = ROCKSDB_NAMESPACE::DestroyDB(kDBPath, options); - if (!s.ok()) { - fprintf(stderr, "[process %ld] Failed to destroy DB: %s\n", my_pid, - s.ToString().c_str()); - assert(false); - } - options.create_if_missing = true; - DB* db = nullptr; - s = DB::Open(options, kDBPath, &db); - if (!s.ok()) { - fprintf(stderr, "[process %ld] Failed to open DB: %s\n", my_pid, - s.ToString().c_str()); - assert(false); - } - std::vector handles; - ColumnFamilyOptions cf_opts(options); - for (const auto& cf_name : GetColumnFamilyNames()) { - if (ROCKSDB_NAMESPACE::kDefaultColumnFamilyName != cf_name) { - ColumnFamilyHandle* handle = nullptr; - s = db->CreateColumnFamily(cf_opts, cf_name, &handle); - if (!s.ok()) { - fprintf(stderr, "[process %ld] Failed to create CF %s: %s\n", my_pid, - cf_name.c_str(), s.ToString().c_str()); - assert(false); - } - handles.push_back(handle); - } - } - fprintf(stdout, "[process %ld] Column families created\n", my_pid); - for (auto h : handles) { - delete h; - } - handles.clear(); - delete db; -} - -void RunPrimary() { - long my_pid = static_cast(getpid()); - fprintf(stdout, "[process %ld] Primary instance starts\n", my_pid); - CreateDB(); - std::srand(time(nullptr)); - DB* db = nullptr; - Options options; - options.create_if_missing = false; - std::vector column_families; - for (const auto& cf_name : GetColumnFamilyNames()) { - column_families.push_back(ColumnFamilyDescriptor(cf_name, options)); - } - std::vector handles; - WriteOptions write_opts; - char val_buf[kMaxValueLength] = {0}; - uint64_t curr_key = 0; - while (curr_key < kMaxKey) { - Status s; - if (nullptr == db) { - s = DB::Open(options, kDBPath, column_families, &handles, &db); - if (!s.ok()) { - fprintf(stderr, "[process %ld] Failed to open DB: %s\n", my_pid, - s.ToString().c_str()); - assert(false); - } - } - assert(nullptr != db); - assert(handles.size() == GetColumnFamilyNames().size()); - for (auto h : handles) { - assert(nullptr != h); - for (size_t i = 0; i != kNumKeysPerFlush; ++i) { - Slice key = Key(curr_key + static_cast(i)); - Slice value = GenerateRandomValue(kMaxValueLength, val_buf); - s = db->Put(write_opts, h, key, value); - if (!s.ok()) { - fprintf(stderr, "[process %ld] Failed to insert\n", my_pid); - assert(false); - } - } - s = db->Flush(FlushOptions(), h); - if (!s.ok()) { - fprintf(stderr, "[process %ld] Failed to flush\n", my_pid); - assert(false); - } - } - curr_key += static_cast(kNumKeysPerFlush); - if (ShouldCloseDB()) { - for (auto h : handles) { - delete h; - } - handles.clear(); - delete db; - db = nullptr; - } - } - if (nullptr != db) { - for (auto h : handles) { - delete h; - } - handles.clear(); - delete db; - db = nullptr; - } - fprintf(stdout, "[process %ld] Finished adding keys\n", my_pid); -} - -void secondary_instance_sigint_handler(int signal) { - ShouldSecondaryWait().store(0, std::memory_order_relaxed); - fprintf(stdout, "\n"); - fflush(stdout); -}; - -void RunSecondary() { - ::signal(SIGINT, secondary_instance_sigint_handler); - long my_pid = static_cast(getpid()); - const std::string kSecondaryPath = - "/tmp/rocksdb_multi_processes_example_secondary"; - // Create directory if necessary - if (nullptr == opendir(kSecondaryPath.c_str())) { - int ret = - mkdir(kSecondaryPath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); - if (ret < 0) { - perror("failed to create directory for secondary instance"); - exit(0); - } - } - DB* db = nullptr; - Options options; - options.create_if_missing = false; - options.max_open_files = -1; - Status s = DB::OpenAsSecondary(options, kDBPath, kSecondaryPath, &db); - if (!s.ok()) { - fprintf(stderr, "[process %ld] Failed to open in secondary mode: %s\n", - my_pid, s.ToString().c_str()); - assert(false); - } else { - fprintf(stdout, "[process %ld] Secondary instance starts\n", my_pid); - } - - ReadOptions ropts; - ropts.verify_checksums = true; - ropts.total_order_seek = true; - - std::vector test_threads; - test_threads.emplace_back([&]() { - while (1 == ShouldSecondaryWait().load(std::memory_order_relaxed)) { - std::unique_ptr iter(db->NewIterator(ropts)); - iter->SeekToFirst(); - size_t count = 0; - for (; iter->Valid(); iter->Next()) { - ++count; - } - } - fprintf(stdout, "[process %ld] Range_scan thread finished\n", my_pid); - }); - - test_threads.emplace_back([&]() { - std::srand(time(nullptr)); - while (1 == ShouldSecondaryWait().load(std::memory_order_relaxed)) { - Slice key = Key(std::rand() % kMaxKey); - std::string value; - db->Get(ropts, key, &value); - } - fprintf(stdout, "[process %ld] Point lookup thread finished\n", my_pid); - }); - - uint64_t curr_key = 0; - while (1 == ShouldSecondaryWait().load(std::memory_order_relaxed)) { - s = db->TryCatchUpWithPrimary(); - if (!s.ok()) { - fprintf(stderr, - "[process %ld] error while trying to catch up with " - "primary %s\n", - my_pid, s.ToString().c_str()); - assert(false); - } - { - std::unique_ptr iter(db->NewIterator(ropts)); - if (!iter) { - fprintf(stderr, "[process %ld] Failed to create iterator\n", my_pid); - assert(false); - } - iter->SeekToLast(); - if (iter->Valid()) { - uint64_t curr_max_key = Key(iter->key().ToString()); - if (curr_max_key != curr_key) { - fprintf(stdout, "[process %ld] Observed key %" PRIu64 "\n", my_pid, - curr_key); - curr_key = curr_max_key; - } - } - } - std::this_thread::sleep_for(std::chrono::seconds(1)); - } - s = db->TryCatchUpWithPrimary(); - if (!s.ok()) { - fprintf(stderr, - "[process %ld] error while trying to catch up with " - "primary %s\n", - my_pid, s.ToString().c_str()); - assert(false); - } - - std::vector column_families; - for (const auto& cf_name : GetColumnFamilyNames()) { - column_families.push_back(ColumnFamilyDescriptor(cf_name, options)); - } - std::vector handles; - DB* verification_db = nullptr; - s = DB::OpenForReadOnly(options, kDBPath, column_families, &handles, - &verification_db); - assert(s.ok()); - Iterator* iter1 = verification_db->NewIterator(ropts); - iter1->SeekToFirst(); - - Iterator* iter = db->NewIterator(ropts); - iter->SeekToFirst(); - for (; iter->Valid() && iter1->Valid(); iter->Next(), iter1->Next()) { - if (iter->key().ToString() != iter1->key().ToString()) { - fprintf(stderr, "%" PRIu64 "!= %" PRIu64 "\n", - Key(iter->key().ToString()), Key(iter1->key().ToString())); - assert(false); - } else if (iter->value().ToString() != iter1->value().ToString()) { - fprintf(stderr, "Value mismatch\n"); - assert(false); - } - } - fprintf(stdout, "[process %ld] Verification succeeded\n", my_pid); - for (auto& thr : test_threads) { - thr.join(); - } - delete iter; - delete iter1; - delete db; - delete verification_db; -} - -int main(int argc, char** argv) { - if (argc < 2) { - fprintf(stderr, "%s <0 for primary, 1 for secondary>\n", argv[0]); - return 0; - } - if (atoi(argv[1]) == 0) { - RunPrimary(); - } else { - RunSecondary(); - } - return 0; -} -#else // OS_LINUX -int main() { - fprintf(stderr, "Not implemented.\n"); - return 0; -} -#endif // !OS_LINUX diff --git a/examples/optimistic_transaction_example.cc b/examples/optimistic_transaction_example.cc deleted file mode 100644 index 079572737..000000000 --- a/examples/optimistic_transaction_example.cc +++ /dev/null @@ -1,190 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - - -#include "rocksdb/db.h" -#include "rocksdb/options.h" -#include "rocksdb/slice.h" -#include "rocksdb/utilities/optimistic_transaction_db.h" -#include "rocksdb/utilities/transaction.h" - -using ROCKSDB_NAMESPACE::DB; -using ROCKSDB_NAMESPACE::OptimisticTransactionDB; -using ROCKSDB_NAMESPACE::OptimisticTransactionOptions; -using ROCKSDB_NAMESPACE::Options; -using ROCKSDB_NAMESPACE::ReadOptions; -using ROCKSDB_NAMESPACE::Snapshot; -using ROCKSDB_NAMESPACE::Status; -using ROCKSDB_NAMESPACE::Transaction; -using ROCKSDB_NAMESPACE::WriteOptions; - -#if defined(OS_WIN) -std::string kDBPath = "C:\\Windows\\TEMP\\rocksdb_transaction_example"; -#else -std::string kDBPath = "/tmp/rocksdb_transaction_example"; -#endif - -int main() { - // open DB - Options options; - options.create_if_missing = true; - DB* db; - OptimisticTransactionDB* txn_db; - - Status s = OptimisticTransactionDB::Open(options, kDBPath, &txn_db); - assert(s.ok()); - db = txn_db->GetBaseDB(); - - WriteOptions write_options; - ReadOptions read_options; - OptimisticTransactionOptions txn_options; - std::string value; - - //////////////////////////////////////////////////////// - // - // Simple OptimisticTransaction Example ("Read Committed") - // - //////////////////////////////////////////////////////// - - // Start a transaction - Transaction* txn = txn_db->BeginTransaction(write_options); - assert(txn); - - // Read a key in this transaction - s = txn->Get(read_options, "abc", &value); - assert(s.IsNotFound()); - - // Write a key in this transaction - s = txn->Put("abc", "xyz"); - assert(s.ok()); - - // Read a key OUTSIDE this transaction. Does not affect txn. - s = db->Get(read_options, "abc", &value); - assert(s.IsNotFound()); - - // Write a key OUTSIDE of this transaction. - // Does not affect txn since this is an unrelated key. If we wrote key 'abc' - // here, the transaction would fail to commit. - s = db->Put(write_options, "xyz", "zzz"); - assert(s.ok()); - s = db->Put(write_options, "abc", "def"); - assert(s.ok()); - - // Commit transaction - s = txn->Commit(); - assert(s.IsBusy()); - delete txn; - - s = db->Get(read_options, "xyz", &value); - assert(s.ok()); - assert(value == "zzz"); - - s = db->Get(read_options, "abc", &value); - assert(s.ok()); - assert(value == "def"); - - //////////////////////////////////////////////////////// - // - // "Repeatable Read" (Snapshot Isolation) Example - // -- Using a single Snapshot - // - //////////////////////////////////////////////////////// - - // Set a snapshot at start of transaction by setting set_snapshot=true - txn_options.set_snapshot = true; - txn = txn_db->BeginTransaction(write_options, txn_options); - - const Snapshot* snapshot = txn->GetSnapshot(); - - // Write a key OUTSIDE of transaction - s = db->Put(write_options, "abc", "xyz"); - assert(s.ok()); - - // Read a key using the snapshot - read_options.snapshot = snapshot; - s = txn->GetForUpdate(read_options, "abc", &value); - assert(s.ok()); - assert(value == "def"); - - // Attempt to commit transaction - s = txn->Commit(); - - // Transaction could not commit since the write outside of the txn conflicted - // with the read! - assert(s.IsBusy()); - - delete txn; - // Clear snapshot from read options since it is no longer valid - read_options.snapshot = nullptr; - snapshot = nullptr; - - s = db->Get(read_options, "abc", &value); - assert(s.ok()); - assert(value == "xyz"); - - //////////////////////////////////////////////////////// - // - // "Read Committed" (Monotonic Atomic Views) Example - // --Using multiple Snapshots - // - //////////////////////////////////////////////////////// - - // In this example, we set the snapshot multiple times. This is probably - // only necessary if you have very strict isolation requirements to - // implement. - - // Set a snapshot at start of transaction - txn_options.set_snapshot = true; - txn = txn_db->BeginTransaction(write_options, txn_options); - - // Do some reads and writes to key "x" - read_options.snapshot = db->GetSnapshot(); - s = txn->Get(read_options, "x", &value); - assert(s.IsNotFound()); - s = txn->Put("x", "x"); - assert(s.ok()); - - // The transaction hasn't committed, so the write is not visible - // outside of txn. - s = db->Get(read_options, "x", &value); - assert(s.IsNotFound()); - - // Do a write outside of the transaction to key "y" - s = db->Put(write_options, "y", "z"); - assert(s.ok()); - - // Set a new snapshot in the transaction - txn->SetSnapshot(); - read_options.snapshot = db->GetSnapshot(); - - // Do some reads and writes to key "y" - s = txn->GetForUpdate(read_options, "y", &value); - assert(s.ok()); - assert(value == "z"); - txn->Put("y", "y"); - - // Commit. Since the snapshot was advanced, the write done outside of the - // transaction does not prevent this transaction from Committing. - s = txn->Commit(); - assert(s.ok()); - delete txn; - // Clear snapshot from read options since it is no longer valid - read_options.snapshot = nullptr; - - // txn is committed, read the latest values. - s = db->Get(read_options, "x", &value); - assert(s.ok()); - assert(value == "x"); - - s = db->Get(read_options, "y", &value); - assert(s.ok()); - assert(value == "y"); - - // Cleanup - delete txn_db; - DestroyDB(kDBPath, options); - return 0; -} - diff --git a/examples/options_file_example.cc b/examples/options_file_example.cc deleted file mode 100644 index 00632f391..000000000 --- a/examples/options_file_example.cc +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file demonstrates how to use the utility functions defined in -// rocksdb/utilities/options_util.h to open a rocksdb database without -// remembering all the rocksdb options. -#include -#include -#include - -#include "rocksdb/cache.h" -#include "rocksdb/compaction_filter.h" -#include "rocksdb/db.h" -#include "rocksdb/options.h" -#include "rocksdb/slice.h" -#include "rocksdb/table.h" -#include "rocksdb/utilities/options_util.h" - -using ROCKSDB_NAMESPACE::BlockBasedTableOptions; -using ROCKSDB_NAMESPACE::ColumnFamilyDescriptor; -using ROCKSDB_NAMESPACE::ColumnFamilyHandle; -using ROCKSDB_NAMESPACE::ColumnFamilyOptions; -using ROCKSDB_NAMESPACE::CompactionFilter; -using ROCKSDB_NAMESPACE::ConfigOptions; -using ROCKSDB_NAMESPACE::DB; -using ROCKSDB_NAMESPACE::DBOptions; -using ROCKSDB_NAMESPACE::NewLRUCache; -using ROCKSDB_NAMESPACE::Options; -using ROCKSDB_NAMESPACE::Slice; -using ROCKSDB_NAMESPACE::Status; - -#if defined(OS_WIN) -std::string kDBPath = "C:\\Windows\\TEMP\\rocksdb_options_file_example"; -#else -std::string kDBPath = "/tmp/rocksdb_options_file_example"; -#endif - -namespace { -// A dummy compaction filter -class DummyCompactionFilter : public CompactionFilter { - public: - virtual ~DummyCompactionFilter() {} - virtual bool Filter(int level, const Slice& key, const Slice& existing_value, - std::string* new_value, bool* value_changed) const { - return false; - } - virtual const char* Name() const { return "DummyCompactionFilter"; } -}; - -} // namespace - -int main() { - DBOptions db_opt; - db_opt.create_if_missing = true; - - std::vector cf_descs; - cf_descs.push_back( - {ROCKSDB_NAMESPACE::kDefaultColumnFamilyName, ColumnFamilyOptions()}); - cf_descs.push_back({"new_cf", ColumnFamilyOptions()}); - - // initialize BlockBasedTableOptions - auto cache = NewLRUCache(1 * 1024 * 1024 * 1024); - BlockBasedTableOptions bbt_opts; - bbt_opts.block_size = 32 * 1024; - bbt_opts.block_cache = cache; - - // initialize column families options - std::unique_ptr compaction_filter; - compaction_filter.reset(new DummyCompactionFilter()); - cf_descs[0].options.table_factory.reset(NewBlockBasedTableFactory(bbt_opts)); - cf_descs[0].options.compaction_filter = compaction_filter.get(); - cf_descs[1].options.table_factory.reset(NewBlockBasedTableFactory(bbt_opts)); - - // destroy and open DB - DB* db; - Status s = ROCKSDB_NAMESPACE::DestroyDB(kDBPath, - Options(db_opt, cf_descs[0].options)); - assert(s.ok()); - s = DB::Open(Options(db_opt, cf_descs[0].options), kDBPath, &db); - assert(s.ok()); - - // Create column family, and rocksdb will persist the options. - ColumnFamilyHandle* cf; - s = db->CreateColumnFamily(ColumnFamilyOptions(), "new_cf", &cf); - assert(s.ok()); - - // close DB - delete cf; - delete db; - - // In the following code, we will reopen the rocksdb instance using - // the options file stored in the db directory. - - // Load the options file. - DBOptions loaded_db_opt; - std::vector loaded_cf_descs; - ConfigOptions config_options; - s = LoadLatestOptions(config_options, kDBPath, &loaded_db_opt, - &loaded_cf_descs); - assert(s.ok()); - assert(loaded_db_opt.create_if_missing == db_opt.create_if_missing); - - // Initialize pointer options for each column family - for (size_t i = 0; i < loaded_cf_descs.size(); ++i) { - auto* loaded_bbt_opt = - loaded_cf_descs[0] - .options.table_factory->GetOptions(); - // Expect the same as BlockBasedTableOptions will be loaded form file. - assert(loaded_bbt_opt->block_size == bbt_opts.block_size); - // However, block_cache needs to be manually initialized as documented - // in rocksdb/utilities/options_util.h. - loaded_bbt_opt->block_cache = cache; - } - // In addition, as pointer options are initialized with default value, - // we need to properly initialized all the pointer options if non-defalut - // values are used before calling DB::Open(). - assert(loaded_cf_descs[0].options.compaction_filter == nullptr); - loaded_cf_descs[0].options.compaction_filter = compaction_filter.get(); - - // reopen the db using the loaded options. - std::vector handles; - s = DB::Open(loaded_db_opt, kDBPath, loaded_cf_descs, &handles, &db); - assert(s.ok()); - - // close DB - for (auto* handle : handles) { - delete handle; - } - delete db; -} diff --git a/examples/rocksdb_backup_restore_example.cc b/examples/rocksdb_backup_restore_example.cc deleted file mode 100644 index c833ed1c2..000000000 --- a/examples/rocksdb_backup_restore_example.cc +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include -#include -#include - -#include "rocksdb/db.h" -#include "rocksdb/options.h" -#include "rocksdb/utilities/backup_engine.h" - -using ROCKSDB_NAMESPACE::BackupEngine; -using ROCKSDB_NAMESPACE::BackupEngineOptions; -using ROCKSDB_NAMESPACE::BackupEngineReadOnly; -using ROCKSDB_NAMESPACE::BackupInfo; -using ROCKSDB_NAMESPACE::DB; -using ROCKSDB_NAMESPACE::Env; -using ROCKSDB_NAMESPACE::Options; -using ROCKSDB_NAMESPACE::ReadOptions; -using ROCKSDB_NAMESPACE::Status; -using ROCKSDB_NAMESPACE::WriteOptions; - -#if defined(OS_WIN) -std::string kDBPath = "C:\\Windows\\TEMP\\rocksdb_example"; -#else -std::string kDBPath = "/tmp/rocksdb_example"; -#endif - -int main() { - DB* db; - Options options; - // Optimize RocksDB. This is the easiest way to get RocksDB to perform well - options.IncreaseParallelism(); - options.OptimizeLevelStyleCompaction(); - // create the DB if it's not already present - options.create_if_missing = true; - - // open DB - Status s = DB::Open(options, kDBPath, &db); - assert(s.ok()); - - // Put key-value - db->Put(WriteOptions(), "key1", "value1"); - assert(s.ok()); - - // create backup - BackupEngine* backup_engine; - s = BackupEngine::Open(Env::Default(), - BackupEngineOptions("/tmp/rocksdb_example_backup"), - &backup_engine); - assert(s.ok()); - - backup_engine->CreateNewBackup(db); - assert(s.ok()); - - std::vector backup_info; - backup_engine->GetBackupInfo(&backup_info); - - s = backup_engine->VerifyBackup(1); - assert(s.ok()); - - // Put key-value - db->Put(WriteOptions(), "key2", "value2"); - assert(s.ok()); - - db->Close(); - delete db; - db = nullptr; - - // restore db to backup 1 - BackupEngineReadOnly* backup_engine_ro; - s = BackupEngineReadOnly::Open( - Env::Default(), BackupEngineOptions("/tmp/rocksdb_example_backup"), - &backup_engine_ro); - assert(s.ok()); - - s = backup_engine_ro->RestoreDBFromBackup(1, "/tmp/rocksdb_example", - "/tmp/rocksdb_example"); - assert(s.ok()); - - // open db again - s = DB::Open(options, kDBPath, &db); - assert(s.ok()); - - std::string value; - s = db->Get(ReadOptions(), "key1", &value); - assert(!s.IsNotFound()); - - s = db->Get(ReadOptions(), "key2", &value); - assert(s.IsNotFound()); - - delete backup_engine; - delete backup_engine_ro; - delete db; - - return 0; -} diff --git a/examples/rocksdb_option_file_example.ini b/examples/rocksdb_option_file_example.ini deleted file mode 100644 index 351890e51..000000000 --- a/examples/rocksdb_option_file_example.ini +++ /dev/null @@ -1,142 +0,0 @@ -# This is a RocksDB option file. -# -# A typical RocksDB options file has four sections, which are -# Version section, DBOptions section, at least one CFOptions -# section, and one TableOptions section for each column family. -# The RocksDB options file in general follows the basic INI -# file format with the following extensions / modifications: -# -# * Escaped characters -# We escaped the following characters: -# - \n -- line feed - new line -# - \r -- carriage return -# - \\ -- backslash \ -# - \: -- colon symbol : -# - \# -- hash tag # -# * Comments -# We support # style comments. Comments can appear at the ending -# part of a line. -# * Statements -# A statement is of the form option_name = value. -# Each statement contains a '=', where extra white-spaces -# are supported. However, we don't support multi-lined statement. -# Furthermore, each line can only contain at most one statement. -# * Sections -# Sections are of the form [SecitonTitle "SectionArgument"], -# where section argument is optional. -# * List -# We use colon-separated string to represent a list. -# For instance, n1:n2:n3:n4 is a list containing four values. -# -# Below is an example of a RocksDB options file: -[Version] - rocksdb_version=4.3.0 - options_file_version=1.1 - -[DBOptions] - stats_dump_period_sec=600 - max_manifest_file_size=18446744073709551615 - bytes_per_sync=8388608 - delayed_write_rate=2097152 - WAL_ttl_seconds=0 - WAL_size_limit_MB=0 - max_subcompactions=1 - wal_dir= - wal_bytes_per_sync=0 - db_write_buffer_size=0 - keep_log_file_num=1000 - table_cache_numshardbits=4 - max_file_opening_threads=1 - writable_file_max_buffer_size=1048576 - random_access_max_buffer_size=1048576 - use_fsync=false - max_total_wal_size=0 - max_open_files=-1 - skip_stats_update_on_db_open=false - max_background_compactions=16 - manifest_preallocation_size=4194304 - max_background_flushes=7 - is_fd_close_on_exec=true - max_log_file_size=0 - advise_random_on_open=true - create_missing_column_families=false - paranoid_checks=true - delete_obsolete_files_period_micros=21600000000 - log_file_time_to_roll=0 - compaction_readahead_size=0 - create_if_missing=false - use_adaptive_mutex=false - enable_thread_tracking=false - allow_fallocate=true - error_if_exists=false - recycle_log_file_num=0 - db_log_dir= - skip_log_error_on_recovery=false - new_table_reader_for_compaction_inputs=true - allow_mmap_reads=false - allow_mmap_writes=false - use_direct_reads=false - use_direct_writes=false - - -[CFOptions "default"] - compaction_style=kCompactionStyleLevel - compaction_filter=nullptr - num_levels=6 - table_factory=BlockBasedTable - comparator=leveldb.BytewiseComparator - max_sequential_skip_in_iterations=8 - max_bytes_for_level_base=1073741824 - memtable_prefix_bloom_probes=6 - memtable_prefix_bloom_bits=0 - memtable_prefix_bloom_huge_page_tlb_size=0 - max_successive_merges=0 - arena_block_size=16777216 - min_write_buffer_number_to_merge=1 - target_file_size_multiplier=1 - source_compaction_factor=1 - max_bytes_for_level_multiplier=8 - max_bytes_for_level_multiplier_additional=2:3:5 - compaction_filter_factory=nullptr - max_write_buffer_number=8 - level0_stop_writes_trigger=20 - compression=kSnappyCompression - level0_file_num_compaction_trigger=4 - purge_redundant_kvs_while_flush=true - max_write_buffer_size_to_maintain=0 - memtable_factory=SkipListFactory - max_grandparent_overlap_factor=8 - expanded_compaction_factor=25 - hard_pending_compaction_bytes_limit=137438953472 - inplace_update_num_locks=10000 - level_compaction_dynamic_level_bytes=true - level0_slowdown_writes_trigger=12 - filter_deletes=false - verify_checksums_in_compaction=true - min_partial_merge_operands=2 - paranoid_file_checks=false - target_file_size_base=134217728 - optimize_filters_for_hits=false - merge_operator=PutOperator - compression_per_level=kNoCompression:kNoCompression:kNoCompression:kSnappyCompression:kSnappyCompression:kSnappyCompression - compaction_measure_io_stats=false - prefix_extractor=nullptr - bloom_locality=0 - write_buffer_size=134217728 - disable_auto_compactions=false - inplace_update_support=false - -[TableOptions/BlockBasedTable "default"] - format_version=2 - whole_key_filtering=true - no_block_cache=false - checksum=kCRC32c - filter_policy=rocksdb.BuiltinBloomFilter - block_size_deviation=10 - block_size=8192 - block_restart_interval=16 - cache_index_and_filter_blocks=false - pin_l0_filter_and_index_blocks_in_cache=false - pin_top_level_index_and_filter=false - index_type=kBinarySearch - flush_block_policy_factory=FlushBlockBySizePolicyFactory diff --git a/examples/simple_example.cc b/examples/simple_example.cc deleted file mode 100644 index 2d49c4d14..000000000 --- a/examples/simple_example.cc +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include -#include - -#include "rocksdb/db.h" -#include "rocksdb/options.h" -#include "rocksdb/slice.h" - -using ROCKSDB_NAMESPACE::DB; -using ROCKSDB_NAMESPACE::Options; -using ROCKSDB_NAMESPACE::PinnableSlice; -using ROCKSDB_NAMESPACE::ReadOptions; -using ROCKSDB_NAMESPACE::Status; -using ROCKSDB_NAMESPACE::WriteBatch; -using ROCKSDB_NAMESPACE::WriteOptions; - -#if defined(OS_WIN) -std::string kDBPath = "C:\\Windows\\TEMP\\rocksdb_simple_example"; -#else -std::string kDBPath = "/tmp/rocksdb_simple_example"; -#endif - -int main() { - DB* db; - Options options; - // Optimize RocksDB. This is the easiest way to get RocksDB to perform well - options.IncreaseParallelism(); - options.OptimizeLevelStyleCompaction(); - // create the DB if it's not already present - options.create_if_missing = true; - - // open DB - Status s = DB::Open(options, kDBPath, &db); - assert(s.ok()); - - // Put key-value - s = db->Put(WriteOptions(), "key1", "value"); - assert(s.ok()); - std::string value; - // get value - s = db->Get(ReadOptions(), "key1", &value); - assert(s.ok()); - assert(value == "value"); - - // atomically apply a set of updates - { - WriteBatch batch; - batch.Delete("key1"); - batch.Put("key2", value); - s = db->Write(WriteOptions(), &batch); - } - - s = db->Get(ReadOptions(), "key1", &value); - assert(s.IsNotFound()); - - db->Get(ReadOptions(), "key2", &value); - assert(value == "value"); - - { - PinnableSlice pinnable_val; - db->Get(ReadOptions(), db->DefaultColumnFamily(), "key2", &pinnable_val); - assert(pinnable_val == "value"); - } - - { - std::string string_val; - // If it cannot pin the value, it copies the value to its internal buffer. - // The intenral buffer could be set during construction. - PinnableSlice pinnable_val(&string_val); - db->Get(ReadOptions(), db->DefaultColumnFamily(), "key2", &pinnable_val); - assert(pinnable_val == "value"); - // If the value is not pinned, the internal buffer must have the value. - assert(pinnable_val.IsPinned() || string_val == "value"); - } - - PinnableSlice pinnable_val; - s = db->Get(ReadOptions(), db->DefaultColumnFamily(), "key1", &pinnable_val); - assert(s.IsNotFound()); - // Reset PinnableSlice after each use and before each reuse - pinnable_val.Reset(); - db->Get(ReadOptions(), db->DefaultColumnFamily(), "key2", &pinnable_val); - assert(pinnable_val == "value"); - pinnable_val.Reset(); - // The Slice pointed by pinnable_val is not valid after this point - - delete db; - - return 0; -} diff --git a/examples/transaction_example.cc b/examples/transaction_example.cc deleted file mode 100644 index 541b13f79..000000000 --- a/examples/transaction_example.cc +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - - -#include "rocksdb/db.h" -#include "rocksdb/options.h" -#include "rocksdb/slice.h" -#include "rocksdb/utilities/transaction.h" -#include "rocksdb/utilities/transaction_db.h" - -using ROCKSDB_NAMESPACE::Options; -using ROCKSDB_NAMESPACE::ReadOptions; -using ROCKSDB_NAMESPACE::Snapshot; -using ROCKSDB_NAMESPACE::Status; -using ROCKSDB_NAMESPACE::Transaction; -using ROCKSDB_NAMESPACE::TransactionDB; -using ROCKSDB_NAMESPACE::TransactionDBOptions; -using ROCKSDB_NAMESPACE::TransactionOptions; -using ROCKSDB_NAMESPACE::WriteOptions; - -#if defined(OS_WIN) -std::string kDBPath = "C:\\Windows\\TEMP\\rocksdb_transaction_example"; -#else -std::string kDBPath = "/tmp/rocksdb_transaction_example"; -#endif - -int main() { - // open DB - Options options; - TransactionDBOptions txn_db_options; - options.create_if_missing = true; - TransactionDB* txn_db; - - Status s = TransactionDB::Open(options, txn_db_options, kDBPath, &txn_db); - assert(s.ok()); - - WriteOptions write_options; - ReadOptions read_options; - TransactionOptions txn_options; - std::string value; - - //////////////////////////////////////////////////////// - // - // Simple Transaction Example ("Read Committed") - // - //////////////////////////////////////////////////////// - - // Start a transaction - Transaction* txn = txn_db->BeginTransaction(write_options); - assert(txn); - - // Read a key in this transaction - s = txn->Get(read_options, "abc", &value); - assert(s.IsNotFound()); - - // Write a key in this transaction - s = txn->Put("abc", "def"); - assert(s.ok()); - - // Read a key OUTSIDE this transaction. Does not affect txn. - s = txn_db->Get(read_options, "abc", &value); - assert(s.IsNotFound()); - - // Write a key OUTSIDE of this transaction. - // Does not affect txn since this is an unrelated key. - s = txn_db->Put(write_options, "xyz", "zzz"); - assert(s.ok()); - - // Write a key OUTSIDE of this transaction. - // Fail because the key conflicts with the key written in txn. - s = txn_db->Put(write_options, "abc", "def"); - assert(s.subcode() == Status::kLockTimeout); - - // Value for key "xyz" has been committed, can be read in txn. - s = txn->Get(read_options, "xyz", &value); - assert(s.ok()); - assert(value == "zzz"); - - // Commit transaction - s = txn->Commit(); - assert(s.ok()); - delete txn; - - // Value is committed, can be read now. - s = txn_db->Get(read_options, "abc", &value); - assert(s.ok()); - assert(value == "def"); - - //////////////////////////////////////////////////////// - // - // "Repeatable Read" (Snapshot Isolation) Example - // -- Using a single Snapshot - // - //////////////////////////////////////////////////////// - - // Set a snapshot at start of transaction by setting set_snapshot=true - txn_options.set_snapshot = true; - txn = txn_db->BeginTransaction(write_options, txn_options); - - const Snapshot* snapshot = txn->GetSnapshot(); - - // Write a key OUTSIDE of transaction - s = txn_db->Put(write_options, "abc", "xyz"); - assert(s.ok()); - - // Read the latest committed value. - s = txn->Get(read_options, "abc", &value); - assert(s.ok()); - assert(value == "xyz"); - - // Read the snapshotted value. - read_options.snapshot = snapshot; - s = txn->Get(read_options, "abc", &value); - assert(s.ok()); - assert(value == "def"); - - // Attempt to read a key using the snapshot. This will fail since - // the previous write outside this txn conflicts with this read. - s = txn->GetForUpdate(read_options, "abc", &value); - assert(s.IsBusy()); - - txn->Rollback(); - - // Snapshot will be released upon deleting the transaction. - delete txn; - // Clear snapshot from read options since it is no longer valid - read_options.snapshot = nullptr; - snapshot = nullptr; - - //////////////////////////////////////////////////////// - // - // "Read Committed" (Monotonic Atomic Views) Example - // --Using multiple Snapshots - // - //////////////////////////////////////////////////////// - - // In this example, we set the snapshot multiple times. This is probably - // only necessary if you have very strict isolation requirements to - // implement. - - // Set a snapshot at start of transaction - txn_options.set_snapshot = true; - txn = txn_db->BeginTransaction(write_options, txn_options); - - // Do some reads and writes to key "x" - read_options.snapshot = txn_db->GetSnapshot(); - s = txn->Get(read_options, "x", &value); - assert(s.IsNotFound()); - s = txn->Put("x", "x"); - assert(s.ok()); - - // Do a write outside of the transaction to key "y" - s = txn_db->Put(write_options, "y", "y1"); - assert(s.ok()); - - // Set a new snapshot in the transaction - txn->SetSnapshot(); - txn->SetSavePoint(); - read_options.snapshot = txn_db->GetSnapshot(); - - // Do some reads and writes to key "y" - // Since the snapshot was advanced, the write done outside of the - // transaction does not conflict. - s = txn->GetForUpdate(read_options, "y", &value); - assert(s.ok()); - assert(value == "y1"); - s = txn->Put("y", "y2"); - assert(s.ok()); - - // Decide we want to revert the last write from this transaction. - txn->RollbackToSavePoint(); - - // Commit. - s = txn->Commit(); - assert(s.ok()); - delete txn; - // Clear snapshot from read options since it is no longer valid - read_options.snapshot = nullptr; - - // db state is at the save point. - s = txn_db->Get(read_options, "x", &value); - assert(s.ok()); - assert(value == "x"); - - s = txn_db->Get(read_options, "y", &value); - assert(s.ok()); - assert(value == "y1"); - - // Cleanup - delete txn_db; - ROCKSDB_NAMESPACE::DestroyDB(kDBPath, options); - return 0; -} - diff --git a/file/delete_scheduler_test.cc b/file/delete_scheduler_test.cc deleted file mode 100644 index 74982dbee..000000000 --- a/file/delete_scheduler_test.cc +++ /dev/null @@ -1,717 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "file/delete_scheduler.h" - -#include -#include -#include -#include - -#include "file/file_util.h" -#include "file/sst_file_manager_impl.h" -#include "rocksdb/env.h" -#include "rocksdb/options.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "util/string_util.h" - - -namespace ROCKSDB_NAMESPACE { - -class DeleteSchedulerTest : public testing::Test { - public: - DeleteSchedulerTest() : env_(Env::Default()) { - const int kNumDataDirs = 3; - dummy_files_dirs_.reserve(kNumDataDirs); - for (size_t i = 0; i < kNumDataDirs; ++i) { - dummy_files_dirs_.emplace_back( - test::PerThreadDBPath(env_, "delete_scheduler_dummy_data_dir") + - std::to_string(i)); - DestroyAndCreateDir(dummy_files_dirs_.back()); - } - stats_ = ROCKSDB_NAMESPACE::CreateDBStatistics(); - } - - ~DeleteSchedulerTest() override { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - for (const auto& dummy_files_dir : dummy_files_dirs_) { - DestroyDir(env_, dummy_files_dir); - } - } - - void DestroyAndCreateDir(const std::string& dir) { - ASSERT_OK(DestroyDir(env_, dir)); - EXPECT_OK(env_->CreateDir(dir)); - } - - int CountNormalFiles(size_t dummy_files_dirs_idx = 0) { - std::vector files_in_dir; - EXPECT_OK(env_->GetChildren(dummy_files_dirs_[dummy_files_dirs_idx], - &files_in_dir)); - - int normal_cnt = 0; - for (auto& f : files_in_dir) { - if (!DeleteScheduler::IsTrashFile(f)) { - normal_cnt++; - } - } - return normal_cnt; - } - - int CountTrashFiles(size_t dummy_files_dirs_idx = 0) { - std::vector files_in_dir; - EXPECT_OK(env_->GetChildren(dummy_files_dirs_[dummy_files_dirs_idx], - &files_in_dir)); - - int trash_cnt = 0; - for (auto& f : files_in_dir) { - if (DeleteScheduler::IsTrashFile(f)) { - trash_cnt++; - } - } - return trash_cnt; - } - - std::string NewDummyFile(const std::string& file_name, uint64_t size = 1024, - size_t dummy_files_dirs_idx = 0) { - std::string file_path = - dummy_files_dirs_[dummy_files_dirs_idx] + "/" + file_name; - std::unique_ptr f; - env_->NewWritableFile(file_path, &f, EnvOptions()); - std::string data(size, 'A'); - EXPECT_OK(f->Append(data)); - EXPECT_OK(f->Close()); - sst_file_mgr_->OnAddFile(file_path); - return file_path; - } - - void NewDeleteScheduler() { - // Tests in this file are for DeleteScheduler component and don't create any - // DBs, so we need to set max_trash_db_ratio to 100% (instead of default - // 25%) - sst_file_mgr_.reset( - new SstFileManagerImpl(env_->GetSystemClock(), env_->GetFileSystem(), - nullptr, rate_bytes_per_sec_, - /* max_trash_db_ratio= */ 1.1, 128 * 1024)); - delete_scheduler_ = sst_file_mgr_->delete_scheduler(); - sst_file_mgr_->SetStatisticsPtr(stats_); - } - - Env* env_; - std::vector dummy_files_dirs_; - int64_t rate_bytes_per_sec_; - DeleteScheduler* delete_scheduler_; - std::unique_ptr sst_file_mgr_; - std::shared_ptr stats_; -}; - -// Test the basic functionality of DeleteScheduler (Rate Limiting). -// 1- Create 100 dummy files -// 2- Delete the 100 dummy files using DeleteScheduler -// --- Hold DeleteScheduler::BackgroundEmptyTrash --- -// 3- Wait for DeleteScheduler to delete all files in trash -// 4- Verify that BackgroundEmptyTrash used to correct penlties for the files -// 5- Make sure that all created files were completely deleted -TEST_F(DeleteSchedulerTest, BasicRateLimiting) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"DeleteSchedulerTest::BasicRateLimiting:1", - "DeleteScheduler::BackgroundEmptyTrash"}, - }); - - std::vector penalties; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::BackgroundEmptyTrash:Wait", - [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); - int dir_synced = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteTrashFile::AfterSyncDir", [&](void* arg) { - dir_synced++; - std::string* dir = reinterpret_cast(arg); - EXPECT_EQ(dummy_files_dirs_[0], *dir); - }); - - int num_files = 100; // 100 files - uint64_t file_size = 1024; // every file is 1 kb - std::vector delete_kbs_per_sec = {512, 200, 100, 50, 25}; - - for (size_t t = 0; t < delete_kbs_per_sec.size(); t++) { - penalties.clear(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - DestroyAndCreateDir(dummy_files_dirs_[0]); - rate_bytes_per_sec_ = delete_kbs_per_sec[t] * 1024; - NewDeleteScheduler(); - - dir_synced = 0; - // Create 100 dummy files, every file is 1 Kb - std::vector generated_files; - for (int i = 0; i < num_files; i++) { - std::string file_name = "file" + std::to_string(i) + ".data"; - generated_files.push_back(NewDummyFile(file_name, file_size)); - } - - // Delete dummy files and measure time spent to empty trash - for (int i = 0; i < num_files; i++) { - ASSERT_OK(delete_scheduler_->DeleteFile(generated_files[i], - dummy_files_dirs_[0])); - } - ASSERT_EQ(CountNormalFiles(), 0); - - uint64_t delete_start_time = env_->NowMicros(); - TEST_SYNC_POINT("DeleteSchedulerTest::BasicRateLimiting:1"); - delete_scheduler_->WaitForEmptyTrash(); - uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time; - - auto bg_errors = delete_scheduler_->GetBackgroundErrors(); - ASSERT_EQ(bg_errors.size(), 0); - - uint64_t total_files_size = 0; - uint64_t expected_penlty = 0; - ASSERT_EQ(penalties.size(), num_files); - for (int i = 0; i < num_files; i++) { - total_files_size += file_size; - expected_penlty = ((total_files_size * 1000000) / rate_bytes_per_sec_); - ASSERT_EQ(expected_penlty, penalties[i]); - } - ASSERT_GT(time_spent_deleting, expected_penlty * 0.9); - - ASSERT_EQ(num_files, dir_synced); - - ASSERT_EQ(CountTrashFiles(), 0); - ASSERT_EQ(num_files, stats_->getAndResetTickerCount(FILES_MARKED_TRASH)); - ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } -} - -TEST_F(DeleteSchedulerTest, MultiDirectoryDeletionsScheduled) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"DeleteSchedulerTest::MultiDbPathDeletionsScheduled:1", - "DeleteScheduler::BackgroundEmptyTrash"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - rate_bytes_per_sec_ = 1 << 20; // 1MB - NewDeleteScheduler(); - - // Generate dummy files in multiple directories - const size_t kNumFiles = dummy_files_dirs_.size(); - const size_t kFileSize = 1 << 10; // 1KB - std::vector generated_files; - for (size_t i = 0; i < kNumFiles; i++) { - generated_files.push_back(NewDummyFile("file", kFileSize, i)); - ASSERT_EQ(1, CountNormalFiles(i)); - } - - // Mark dummy files as trash - for (size_t i = 0; i < kNumFiles; i++) { - ASSERT_OK(delete_scheduler_->DeleteFile(generated_files[i], "")); - ASSERT_EQ(0, CountNormalFiles(i)); - ASSERT_EQ(1, CountTrashFiles(i)); - } - TEST_SYNC_POINT("DeleteSchedulerTest::MultiDbPathDeletionsScheduled:1"); - delete_scheduler_->WaitForEmptyTrash(); - - // Verify dummy files eventually got deleted - for (size_t i = 0; i < kNumFiles; i++) { - ASSERT_EQ(0, CountNormalFiles(i)); - ASSERT_EQ(0, CountTrashFiles(i)); - } - - ASSERT_EQ(kNumFiles, stats_->getAndResetTickerCount(FILES_MARKED_TRASH)); - ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -// Same as the BasicRateLimiting test but delete files in multiple threads. -// 1- Create 100 dummy files -// 2- Delete the 100 dummy files using DeleteScheduler using 10 threads -// --- Hold DeleteScheduler::BackgroundEmptyTrash --- -// 3- Wait for DeleteScheduler to delete all files in queue -// 4- Verify that BackgroundEmptyTrash used to correct penlties for the files -// 5- Make sure that all created files were completely deleted -TEST_F(DeleteSchedulerTest, RateLimitingMultiThreaded) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"DeleteSchedulerTest::RateLimitingMultiThreaded:1", - "DeleteScheduler::BackgroundEmptyTrash"}, - }); - - std::vector penalties; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::BackgroundEmptyTrash:Wait", - [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); - - int thread_cnt = 10; - int num_files = 10; // 10 files per thread - uint64_t file_size = 1024; // every file is 1 kb - - std::vector delete_kbs_per_sec = {512, 200, 100, 50, 25}; - for (size_t t = 0; t < delete_kbs_per_sec.size(); t++) { - penalties.clear(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - DestroyAndCreateDir(dummy_files_dirs_[0]); - rate_bytes_per_sec_ = delete_kbs_per_sec[t] * 1024; - NewDeleteScheduler(); - - // Create 100 dummy files, every file is 1 Kb - std::vector generated_files; - for (int i = 0; i < num_files * thread_cnt; i++) { - std::string file_name = "file" + std::to_string(i) + ".data"; - generated_files.push_back(NewDummyFile(file_name, file_size)); - } - - // Delete dummy files using 10 threads and measure time spent to empty trash - std::atomic thread_num(0); - std::vector threads; - std::function delete_thread = [&]() { - int idx = thread_num.fetch_add(1); - int range_start = idx * num_files; - int range_end = range_start + num_files; - for (int j = range_start; j < range_end; j++) { - ASSERT_OK(delete_scheduler_->DeleteFile(generated_files[j], "")); - } - }; - - for (int i = 0; i < thread_cnt; i++) { - threads.emplace_back(delete_thread); - } - - for (size_t i = 0; i < threads.size(); i++) { - threads[i].join(); - } - - uint64_t delete_start_time = env_->NowMicros(); - TEST_SYNC_POINT("DeleteSchedulerTest::RateLimitingMultiThreaded:1"); - delete_scheduler_->WaitForEmptyTrash(); - uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time; - - auto bg_errors = delete_scheduler_->GetBackgroundErrors(); - ASSERT_EQ(bg_errors.size(), 0); - - uint64_t total_files_size = 0; - uint64_t expected_penlty = 0; - ASSERT_EQ(penalties.size(), num_files * thread_cnt); - for (int i = 0; i < num_files * thread_cnt; i++) { - total_files_size += file_size; - expected_penlty = ((total_files_size * 1000000) / rate_bytes_per_sec_); - ASSERT_EQ(expected_penlty, penalties[i]); - } - ASSERT_GT(time_spent_deleting, expected_penlty * 0.9); - - ASSERT_EQ(CountNormalFiles(), 0); - ASSERT_EQ(CountTrashFiles(), 0); - ASSERT_EQ(num_files * thread_cnt, - stats_->getAndResetTickerCount(FILES_MARKED_TRASH)); - ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } -} - -// Disable rate limiting by setting rate_bytes_per_sec_ to 0 and make sure -// that when DeleteScheduler delete a file it delete it immediately and don't -// move it to trash -TEST_F(DeleteSchedulerTest, DisableRateLimiting) { - int bg_delete_file = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteTrashFile:DeleteFile", - [&](void* /*arg*/) { bg_delete_file++; }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - rate_bytes_per_sec_ = 0; - NewDeleteScheduler(); - constexpr int num_files = 10; - - for (int i = 0; i < num_files; i++) { - // Every file we delete will be deleted immediately - std::string dummy_file = NewDummyFile("dummy.data"); - ASSERT_OK(delete_scheduler_->DeleteFile(dummy_file, "")); - ASSERT_TRUE(env_->FileExists(dummy_file).IsNotFound()); - ASSERT_EQ(CountNormalFiles(), 0); - ASSERT_EQ(CountTrashFiles(), 0); - } - - ASSERT_EQ(bg_delete_file, 0); - ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_MARKED_TRASH)); - ASSERT_EQ(num_files, - stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -// Testing that moving files to trash with the same name is not a problem -// 1- Create 10 files with the same name "conflict.data" -// 2- Delete the 10 files using DeleteScheduler -// 3- Make sure that trash directory contain 10 files ("conflict.data" x 10) -// --- Hold DeleteScheduler::BackgroundEmptyTrash --- -// 4- Make sure that files are deleted from trash -TEST_F(DeleteSchedulerTest, ConflictNames) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"DeleteSchedulerTest::ConflictNames:1", - "DeleteScheduler::BackgroundEmptyTrash"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - rate_bytes_per_sec_ = 1024 * 1024; // 1 Mb/sec - NewDeleteScheduler(); - - // Create "conflict.data" and move it to trash 10 times - for (int i = 0; i < 10; i++) { - std::string dummy_file = NewDummyFile("conflict.data"); - ASSERT_OK(delete_scheduler_->DeleteFile(dummy_file, "")); - } - ASSERT_EQ(CountNormalFiles(), 0); - // 10 files ("conflict.data" x 10) in trash - ASSERT_EQ(CountTrashFiles(), 10); - - // Hold BackgroundEmptyTrash - TEST_SYNC_POINT("DeleteSchedulerTest::ConflictNames:1"); - delete_scheduler_->WaitForEmptyTrash(); - ASSERT_EQ(CountTrashFiles(), 0); - - auto bg_errors = delete_scheduler_->GetBackgroundErrors(); - ASSERT_EQ(bg_errors.size(), 0); - ASSERT_EQ(10, stats_->getAndResetTickerCount(FILES_MARKED_TRASH)); - ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -// 1- Create 10 dummy files -// 2- Delete the 10 files using DeleteScheduler (move them to trsah) -// 3- Delete the 10 files directly (using env_->DeleteFile) -// --- Hold DeleteScheduler::BackgroundEmptyTrash --- -// 4- Make sure that DeleteScheduler failed to delete the 10 files and -// reported 10 background errors -TEST_F(DeleteSchedulerTest, BackgroundError) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"DeleteSchedulerTest::BackgroundError:1", - "DeleteScheduler::BackgroundEmptyTrash"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - rate_bytes_per_sec_ = 1024 * 1024; // 1 Mb/sec - NewDeleteScheduler(); - - // Generate 10 dummy files and move them to trash - for (int i = 0; i < 10; i++) { - std::string file_name = "data_" + std::to_string(i) + ".data"; - ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name), "")); - } - ASSERT_EQ(CountNormalFiles(), 0); - ASSERT_EQ(CountTrashFiles(), 10); - - // Delete 10 files from trash, this will cause background errors in - // BackgroundEmptyTrash since we already deleted the files it was - // goind to delete - for (int i = 0; i < 10; i++) { - std::string file_name = "data_" + std::to_string(i) + ".data.trash"; - ASSERT_OK(env_->DeleteFile(dummy_files_dirs_[0] + "/" + file_name)); - } - - // Hold BackgroundEmptyTrash - TEST_SYNC_POINT("DeleteSchedulerTest::BackgroundError:1"); - delete_scheduler_->WaitForEmptyTrash(); - auto bg_errors = delete_scheduler_->GetBackgroundErrors(); - ASSERT_EQ(bg_errors.size(), 10); - for (const auto& it : bg_errors) { - ASSERT_TRUE(it.second.IsPathNotFound()); - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -// 1- Create kTestFileNum dummy files -// 2- Delete kTestFileNum dummy files using DeleteScheduler -// 3- Wait for DeleteScheduler to delete all files in queue -// 4- Make sure all files in trash directory were deleted -// 5- Repeat previous steps 5 times -TEST_F(DeleteSchedulerTest, StartBGEmptyTrashMultipleTimes) { - constexpr int kTestFileNum = 10; - std::atomic_int bg_delete_file = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteTrashFile:DeleteFile", - [&](void* /*arg*/) { bg_delete_file++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / sec - NewDeleteScheduler(); - - // If trash file is generated faster than deleting, delete_scheduler will - // delete it directly instead of waiting for background trash empty thread to - // clean it. Set the ratio higher to avoid that. - sst_file_mgr_->SetMaxTrashDBRatio(kTestFileNum + 1); - - // Move files to trash, wait for empty trash, start again - for (int run = 1; run <= 5; run++) { - // Generate kTestFileNum dummy files and move them to trash - for (int i = 0; i < kTestFileNum; i++) { - std::string file_name = "data_" + std::to_string(i) + ".data"; - ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name), "")); - } - ASSERT_EQ(CountNormalFiles(), 0); - delete_scheduler_->WaitForEmptyTrash(); - ASSERT_EQ(bg_delete_file, kTestFileNum * run); - ASSERT_EQ(CountTrashFiles(), 0); - - auto bg_errors = delete_scheduler_->GetBackgroundErrors(); - ASSERT_EQ(bg_errors.size(), 0); - ASSERT_EQ(kTestFileNum, stats_->getAndResetTickerCount(FILES_MARKED_TRASH)); - ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); - } - - ASSERT_EQ(bg_delete_file, 5 * kTestFileNum); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); -} - -TEST_F(DeleteSchedulerTest, DeletePartialFile) { - int bg_delete_file = 0; - int bg_fsync = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteTrashFile:DeleteFile", - [&](void*) { bg_delete_file++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteTrashFile:Fsync", [&](void*) { bg_fsync++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / sec - NewDeleteScheduler(); - - // Should delete in 4 batch - ASSERT_OK( - delete_scheduler_->DeleteFile(NewDummyFile("data_1", 500 * 1024), "")); - ASSERT_OK( - delete_scheduler_->DeleteFile(NewDummyFile("data_2", 100 * 1024), "")); - // Should delete in 2 batch - ASSERT_OK( - delete_scheduler_->DeleteFile(NewDummyFile("data_2", 200 * 1024), "")); - - delete_scheduler_->WaitForEmptyTrash(); - - auto bg_errors = delete_scheduler_->GetBackgroundErrors(); - ASSERT_EQ(bg_errors.size(), 0); - ASSERT_EQ(7, bg_delete_file); - ASSERT_EQ(4, bg_fsync); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); -} - -#ifdef OS_LINUX -TEST_F(DeleteSchedulerTest, NoPartialDeleteWithLink) { - int bg_delete_file = 0; - int bg_fsync = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteTrashFile:DeleteFile", - [&](void*) { bg_delete_file++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteTrashFile:Fsync", [&](void*) { bg_fsync++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / sec - NewDeleteScheduler(); - - std::string file1 = NewDummyFile("data_1", 500 * 1024); - std::string file2 = NewDummyFile("data_2", 100 * 1024); - - ASSERT_OK(env_->LinkFile(file1, dummy_files_dirs_[0] + "/data_1b")); - ASSERT_OK(env_->LinkFile(file2, dummy_files_dirs_[0] + "/data_2b")); - - // Should delete in 4 batch if there is no hardlink - ASSERT_OK(delete_scheduler_->DeleteFile(file1, "")); - ASSERT_OK(delete_scheduler_->DeleteFile(file2, "")); - - delete_scheduler_->WaitForEmptyTrash(); - - auto bg_errors = delete_scheduler_->GetBackgroundErrors(); - ASSERT_EQ(bg_errors.size(), 0); - ASSERT_EQ(2, bg_delete_file); - ASSERT_EQ(0, bg_fsync); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); -} -#endif - -// 1- Create a DeleteScheduler with very slow rate limit (1 Byte / sec) -// 2- Delete 100 files using DeleteScheduler -// 3- Delete the DeleteScheduler (call the destructor while queue is not empty) -// 4- Make sure that not all files were deleted from trash and that -// DeleteScheduler background thread did not delete all files -TEST_F(DeleteSchedulerTest, DestructorWithNonEmptyQueue) { - int bg_delete_file = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteTrashFile:DeleteFile", - [&](void* /*arg*/) { bg_delete_file++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - rate_bytes_per_sec_ = 1; // 1 Byte / sec - NewDeleteScheduler(); - - for (int i = 0; i < 100; i++) { - std::string file_name = "data_" + std::to_string(i) + ".data"; - ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name), "")); - } - - // Deleting 100 files will need >28 hours to delete - // we will delete the DeleteScheduler while delete queue is not empty - sst_file_mgr_.reset(); - - ASSERT_LT(bg_delete_file, 100); - ASSERT_GT(CountTrashFiles(), 0); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DeleteSchedulerTest, DISABLED_DynamicRateLimiting1) { - std::vector penalties; - int bg_delete_file = 0; - int fg_delete_file = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteTrashFile:DeleteFile", - [&](void* /*arg*/) { bg_delete_file++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteFile", [&](void* /*arg*/) { fg_delete_file++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::BackgroundEmptyTrash:Wait", - [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ - {"DeleteSchedulerTest::DynamicRateLimiting1:1", - "DeleteScheduler::BackgroundEmptyTrash"}, - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - rate_bytes_per_sec_ = 0; // Disable rate limiting initially - NewDeleteScheduler(); - - int num_files = 10; // 10 files - uint64_t file_size = 1024; // every file is 1 kb - - std::vector delete_kbs_per_sec = {512, 200, 0, 100, 50, -2, 25}; - for (size_t t = 0; t < delete_kbs_per_sec.size(); t++) { - penalties.clear(); - bg_delete_file = 0; - fg_delete_file = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - DestroyAndCreateDir(dummy_files_dirs_[0]); - rate_bytes_per_sec_ = delete_kbs_per_sec[t] * 1024; - delete_scheduler_->SetRateBytesPerSecond(rate_bytes_per_sec_); - - // Create 100 dummy files, every file is 1 Kb - std::vector generated_files; - for (int i = 0; i < num_files; i++) { - std::string file_name = "file" + std::to_string(i) + ".data"; - generated_files.push_back(NewDummyFile(file_name, file_size)); - } - - // Delete dummy files and measure time spent to empty trash - for (int i = 0; i < num_files; i++) { - ASSERT_OK(delete_scheduler_->DeleteFile(generated_files[i], "")); - } - ASSERT_EQ(CountNormalFiles(), 0); - - if (rate_bytes_per_sec_ > 0) { - uint64_t delete_start_time = env_->NowMicros(); - TEST_SYNC_POINT("DeleteSchedulerTest::DynamicRateLimiting1:1"); - delete_scheduler_->WaitForEmptyTrash(); - uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time; - - auto bg_errors = delete_scheduler_->GetBackgroundErrors(); - ASSERT_EQ(bg_errors.size(), 0); - - uint64_t total_files_size = 0; - uint64_t expected_penlty = 0; - ASSERT_EQ(penalties.size(), num_files); - for (int i = 0; i < num_files; i++) { - total_files_size += file_size; - expected_penlty = ((total_files_size * 1000000) / rate_bytes_per_sec_); - ASSERT_EQ(expected_penlty, penalties[i]); - } - ASSERT_GT(time_spent_deleting, expected_penlty * 0.9); - ASSERT_EQ(bg_delete_file, num_files); - ASSERT_EQ(fg_delete_file, 0); - } else { - ASSERT_EQ(penalties.size(), 0); - ASSERT_EQ(bg_delete_file, 0); - ASSERT_EQ(fg_delete_file, num_files); - } - - ASSERT_EQ(CountTrashFiles(), 0); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } -} - -TEST_F(DeleteSchedulerTest, ImmediateDeleteOn25PercDBSize) { - int bg_delete_file = 0; - int fg_delete_file = 0; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteTrashFile:DeleteFile", - [&](void* /*arg*/) { bg_delete_file++; }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DeleteScheduler::DeleteFile", [&](void* /*arg*/) { fg_delete_file++; }); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - int num_files = 100; // 100 files - uint64_t file_size = 1024 * 10; // 100 KB as a file size - rate_bytes_per_sec_ = 1; // 1 byte per sec (very slow trash delete) - - NewDeleteScheduler(); - delete_scheduler_->SetMaxTrashDBRatio(0.25); - - std::vector generated_files; - for (int i = 0; i < num_files; i++) { - std::string file_name = "file" + std::to_string(i) + ".data"; - generated_files.push_back(NewDummyFile(file_name, file_size)); - } - - for (std::string& file_name : generated_files) { - ASSERT_OK(delete_scheduler_->DeleteFile(file_name, "")); - } - - // When we end up with 26 files in trash we will start - // deleting new files immediately - ASSERT_EQ(fg_delete_file, 74); - ASSERT_EQ(26, stats_->getAndResetTickerCount(FILES_MARKED_TRASH)); - ASSERT_EQ(74, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(DeleteSchedulerTest, IsTrashCheck) { - // Trash files - ASSERT_TRUE(DeleteScheduler::IsTrashFile("x.trash")); - ASSERT_TRUE(DeleteScheduler::IsTrashFile(".trash")); - ASSERT_TRUE(DeleteScheduler::IsTrashFile("abc.sst.trash")); - ASSERT_TRUE(DeleteScheduler::IsTrashFile("/a/b/c/abc..sst.trash")); - ASSERT_TRUE(DeleteScheduler::IsTrashFile("log.trash")); - ASSERT_TRUE(DeleteScheduler::IsTrashFile("^^^^^.log.trash")); - ASSERT_TRUE(DeleteScheduler::IsTrashFile("abc.t.trash")); - - // Not trash files - ASSERT_FALSE(DeleteScheduler::IsTrashFile("abc.sst")); - ASSERT_FALSE(DeleteScheduler::IsTrashFile("abc.txt")); - ASSERT_FALSE(DeleteScheduler::IsTrashFile("/a/b/c/abc.sst")); - ASSERT_FALSE(DeleteScheduler::IsTrashFile("/a/b/c/abc.sstrash")); - ASSERT_FALSE(DeleteScheduler::IsTrashFile("^^^^^.trashh")); - ASSERT_FALSE(DeleteScheduler::IsTrashFile("abc.ttrash")); - ASSERT_FALSE(DeleteScheduler::IsTrashFile(".ttrash")); - ASSERT_FALSE(DeleteScheduler::IsTrashFile("abc.trashx")); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc deleted file mode 100644 index 488e037ff..000000000 --- a/file/prefetch_test.cc +++ /dev/null @@ -1,2285 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "db/db_test_util.h" -#include "file/file_prefetch_buffer.h" -#include "file/file_util.h" -#include "rocksdb/file_system.h" -#include "test_util/sync_point.h" -#ifdef GFLAGS -#include "tools/io_tracer_parser_tool.h" -#endif -#include "util/random.h" - -namespace { -static bool enable_io_uring = true; -extern "C" bool RocksDbIOUringEnable() { return enable_io_uring; } -} // namespace - -namespace ROCKSDB_NAMESPACE { - -class MockFS; - -class MockRandomAccessFile : public FSRandomAccessFileOwnerWrapper { - public: - MockRandomAccessFile(std::unique_ptr& file, - bool support_prefetch, std::atomic_int& prefetch_count) - : FSRandomAccessFileOwnerWrapper(std::move(file)), - support_prefetch_(support_prefetch), - prefetch_count_(prefetch_count) {} - - IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions& options, - IODebugContext* dbg) override { - if (support_prefetch_) { - prefetch_count_.fetch_add(1); - return target()->Prefetch(offset, n, options, dbg); - } else { - return IOStatus::NotSupported("Prefetch not supported"); - } - } - - private: - const bool support_prefetch_; - std::atomic_int& prefetch_count_; -}; - -class MockFS : public FileSystemWrapper { - public: - explicit MockFS(const std::shared_ptr& wrapped, - bool support_prefetch) - : FileSystemWrapper(wrapped), support_prefetch_(support_prefetch) {} - - static const char* kClassName() { return "MockFS"; } - const char* Name() const override { return kClassName(); } - - IOStatus NewRandomAccessFile(const std::string& fname, - const FileOptions& opts, - std::unique_ptr* result, - IODebugContext* dbg) override { - std::unique_ptr file; - IOStatus s; - s = target()->NewRandomAccessFile(fname, opts, &file, dbg); - result->reset( - new MockRandomAccessFile(file, support_prefetch_, prefetch_count_)); - return s; - } - - void ClearPrefetchCount() { prefetch_count_ = 0; } - - bool IsPrefetchCalled() { return prefetch_count_ > 0; } - - int GetPrefetchCount() { - return prefetch_count_.load(std::memory_order_relaxed); - } - - private: - const bool support_prefetch_; - std::atomic_int prefetch_count_{0}; -}; - -class PrefetchTest - : public DBTestBase, - public ::testing::WithParamInterface> { - public: - PrefetchTest() : DBTestBase("prefetch_test", true) {} - - void SetGenericOptions(Env* env, bool use_direct_io, Options& options) { - options = CurrentOptions(); - options.write_buffer_size = 1024; - options.create_if_missing = true; - options.compression = kNoCompression; - options.env = env; - options.disable_auto_compactions = true; - if (use_direct_io) { - options.use_direct_reads = true; - options.use_direct_io_for_flush_and_compaction = true; - } - } - - void SetBlockBasedTableOptions(BlockBasedTableOptions& table_options) { - table_options.no_block_cache = true; - table_options.cache_index_and_filter_blocks = false; - table_options.metadata_block_size = 1024; - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } -}; - -INSTANTIATE_TEST_CASE_P(PrefetchTest, PrefetchTest, - ::testing::Combine(::testing::Bool(), - ::testing::Bool())); - -std::string BuildKey(int num, std::string postfix = "") { - return "my_key_" + std::to_string(num) + postfix; -} - -// This test verifies the basic functionality of prefetching. -TEST_P(PrefetchTest, Basic) { - // First param is if the mockFS support_prefetch or not - bool support_prefetch = - std::get<0>(GetParam()) && - test::IsPrefetchSupported(env_->GetFileSystem(), dbname_); - std::shared_ptr fs = - std::make_shared(env_->GetFileSystem(), support_prefetch); - - // Second param is if directIO is enabled or not - bool use_direct_io = std::get<1>(GetParam()); - - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - Options options; - SetGenericOptions(env.get(), use_direct_io, options); - options.statistics = CreateDBStatistics(); - - const int kNumKeys = 1100; - int buff_prefetch_count = 0; - SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start", - [&](void*) { buff_prefetch_count++; }); - SyncPoint::GetInstance()->EnableProcessing(); - - Status s = TryReopen(options); - if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - // create first key range - WriteBatch batch; - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i), "value for range 1 key")); - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - - // create second key range - batch.Clear(); - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i, "key2"), "value for range 2 key")); - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - - // delete second key range - batch.Clear(); - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(batch.Delete(BuildKey(i, "key2"))); - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - - // compact database - std::string start_key = BuildKey(0); - std::string end_key = BuildKey(kNumKeys - 1); - Slice least(start_key.data(), start_key.size()); - Slice greatest(end_key.data(), end_key.size()); - - HistogramData prev_table_open_prefetch_tail_read; - options.statistics->histogramData(TABLE_OPEN_PREFETCH_TAIL_READ_BYTES, - &prev_table_open_prefetch_tail_read); - const uint64_t prev_table_open_prefetch_tail_miss = - options.statistics->getTickerCount(TABLE_OPEN_PREFETCH_TAIL_MISS); - const uint64_t prev_table_open_prefetch_tail_hit = - options.statistics->getTickerCount(TABLE_OPEN_PREFETCH_TAIL_HIT); - - // commenting out the line below causes the example to work correctly - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); - - HistogramData cur_table_open_prefetch_tail_read; - options.statistics->histogramData(TABLE_OPEN_PREFETCH_TAIL_READ_BYTES, - &cur_table_open_prefetch_tail_read); - const uint64_t cur_table_open_prefetch_tail_miss = - options.statistics->getTickerCount(TABLE_OPEN_PREFETCH_TAIL_MISS); - const uint64_t cur_table_open_prefetch_tail_hit = - options.statistics->getTickerCount(TABLE_OPEN_PREFETCH_TAIL_HIT); - - if (support_prefetch && !use_direct_io) { - // If underline file system supports prefetch, and directIO is not enabled - // make sure prefetch() is called and FilePrefetchBuffer is not used. - ASSERT_TRUE(fs->IsPrefetchCalled()); - fs->ClearPrefetchCount(); - ASSERT_EQ(0, buff_prefetch_count); - } else { - // If underline file system doesn't support prefetch, or directIO is - // enabled, make sure prefetch() is not called and FilePrefetchBuffer is - // used. - ASSERT_FALSE(fs->IsPrefetchCalled()); - ASSERT_GT(buff_prefetch_count, 0); - ASSERT_GT(cur_table_open_prefetch_tail_read.count, - prev_table_open_prefetch_tail_read.count); - ASSERT_GT(cur_table_open_prefetch_tail_hit, - prev_table_open_prefetch_tail_hit); - ASSERT_GE(cur_table_open_prefetch_tail_miss, - prev_table_open_prefetch_tail_miss); - buff_prefetch_count = 0; - } - - // count the keys - { - auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); - int num_keys = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - num_keys++; - } - } - - // Make sure prefetch is called only if file system support prefetch. - if (support_prefetch && !use_direct_io) { - ASSERT_TRUE(fs->IsPrefetchCalled()); - fs->ClearPrefetchCount(); - ASSERT_EQ(0, buff_prefetch_count); - } else { - ASSERT_FALSE(fs->IsPrefetchCalled()); - ASSERT_GT(buff_prefetch_count, 0); - buff_prefetch_count = 0; - } - Close(); -} - -// This test verifies BlockBasedTableOptions.max_auto_readahead_size is -// configured dynamically. -TEST_P(PrefetchTest, ConfigureAutoMaxReadaheadSize) { - // First param is if the mockFS support_prefetch or not - bool support_prefetch = - std::get<0>(GetParam()) && - test::IsPrefetchSupported(env_->GetFileSystem(), dbname_); - std::shared_ptr fs = - std::make_shared(env_->GetFileSystem(), support_prefetch); - - // Second param is if directIO is enabled or not - bool use_direct_io = std::get<1>(GetParam()); - - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - Options options; - SetGenericOptions(env.get(), use_direct_io, options); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - table_options.max_auto_readahead_size = 0; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - int buff_prefetch_count = 0; - SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start", - [&](void*) { buff_prefetch_count++; }); - - // DB open will create table readers unless we reduce the table cache - // capacity. SanitizeOptions will set max_open_files to minimum of 20. Table - // cache is allocated with max_open_files - 10 as capacity. So override - // max_open_files to 10 so table cache capacity will become 0. This will - // prevent file open during DB open and force the file to be opened during - // Iteration. - SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { - int* max_open_files = (int*)arg; - *max_open_files = 11; - }); - - SyncPoint::GetInstance()->EnableProcessing(); - - Status s = TryReopen(options); - - if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - Random rnd(309); - int key_count = 0; - const int num_keys_per_level = 100; - // Level 0 : Keys in range [0, 99], Level 1:[100, 199], Level 2:[200, 299]. - for (int level = 2; level >= 0; level--) { - key_count = level * num_keys_per_level; - for (int i = 0; i < num_keys_per_level; ++i) { - ASSERT_OK(Put(Key(key_count++), rnd.RandomString(500))); - } - ASSERT_OK(Flush()); - MoveFilesToLevel(level); - } - Close(); - std::vector buff_prefectch_level_count = {0, 0, 0}; - TryReopen(options); - { - auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); - fs->ClearPrefetchCount(); - buff_prefetch_count = 0; - - for (int level = 2; level >= 0; level--) { - key_count = level * num_keys_per_level; - switch (level) { - case 0: - // max_auto_readahead_size is set 0 so data and index blocks are not - // prefetched. - ASSERT_OK(db_->SetOptions( - {{"block_based_table_factory", "{max_auto_readahead_size=0;}"}})); - break; - case 1: - // max_auto_readahead_size is set less than - // initial_auto_readahead_size. So readahead_size remains equal to - // max_auto_readahead_size. - ASSERT_OK(db_->SetOptions({{"block_based_table_factory", - "{max_auto_readahead_size=4096;}"}})); - break; - case 2: - ASSERT_OK(db_->SetOptions({{"block_based_table_factory", - "{max_auto_readahead_size=65536;}"}})); - break; - default: - assert(false); - } - - for (int i = 0; i < num_keys_per_level; ++i) { - iter->Seek(Key(key_count++)); - iter->Next(); - } - - buff_prefectch_level_count[level] = buff_prefetch_count; - if (support_prefetch && !use_direct_io) { - if (level == 0) { - ASSERT_FALSE(fs->IsPrefetchCalled()); - } else { - ASSERT_TRUE(fs->IsPrefetchCalled()); - } - fs->ClearPrefetchCount(); - } else { - ASSERT_FALSE(fs->IsPrefetchCalled()); - if (level == 0) { - ASSERT_EQ(buff_prefetch_count, 0); - } else { - ASSERT_GT(buff_prefetch_count, 0); - } - buff_prefetch_count = 0; - } - } - } - - if (!support_prefetch) { - ASSERT_GT(buff_prefectch_level_count[1], buff_prefectch_level_count[2]); - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - Close(); -} - -// This test verifies BlockBasedTableOptions.initial_auto_readahead_size is -// configured dynamically. -TEST_P(PrefetchTest, ConfigureInternalAutoReadaheadSize) { - // First param is if the mockFS support_prefetch or not - bool support_prefetch = - std::get<0>(GetParam()) && - test::IsPrefetchSupported(env_->GetFileSystem(), dbname_); - - // Second param is if directIO is enabled or not - bool use_direct_io = std::get<1>(GetParam()); - - std::shared_ptr fs = - std::make_shared(env_->GetFileSystem(), support_prefetch); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - Options options; - SetGenericOptions(env.get(), use_direct_io, options); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - table_options.initial_auto_readahead_size = 0; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - int buff_prefetch_count = 0; - // DB open will create table readers unless we reduce the table cache - // capacity. SanitizeOptions will set max_open_files to minimum of 20. - // Table cache is allocated with max_open_files - 10 as capacity. So - // override max_open_files to 10 so table cache capacity will become 0. - // This will prevent file open during DB open and force the file to be - // opened during Iteration. - SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { - int* max_open_files = (int*)arg; - *max_open_files = 11; - }); - - SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start", - [&](void*) { buff_prefetch_count++; }); - - SyncPoint::GetInstance()->EnableProcessing(); - - SyncPoint::GetInstance()->EnableProcessing(); - - Status s = TryReopen(options); - - if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - Random rnd(309); - int key_count = 0; - const int num_keys_per_level = 100; - // Level 0 : Keys in range [0, 99], Level 1:[100, 199], Level 2:[200, 299]. - for (int level = 2; level >= 0; level--) { - key_count = level * num_keys_per_level; - for (int i = 0; i < num_keys_per_level; ++i) { - ASSERT_OK(Put(Key(key_count++), rnd.RandomString(500))); - } - ASSERT_OK(Flush()); - MoveFilesToLevel(level); - } - Close(); - - TryReopen(options); - { - auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); - fs->ClearPrefetchCount(); - buff_prefetch_count = 0; - std::vector buff_prefetch_level_count = {0, 0, 0}; - - for (int level = 2; level >= 0; level--) { - key_count = level * num_keys_per_level; - switch (level) { - case 0: - // initial_auto_readahead_size is set 0 so data and index blocks are - // not prefetched. - ASSERT_OK(db_->SetOptions({{"block_based_table_factory", - "{initial_auto_readahead_size=0;}"}})); - break; - case 1: - // intial_auto_readahead_size and max_auto_readahead_size are set same - // so readahead_size remains same. - ASSERT_OK(db_->SetOptions({{"block_based_table_factory", - "{initial_auto_readahead_size=4096;max_" - "auto_readahead_size=4096;}"}})); - break; - case 2: - ASSERT_OK( - db_->SetOptions({{"block_based_table_factory", - "{initial_auto_readahead_size=65536;}"}})); - break; - default: - assert(false); - } - - for (int i = 0; i < num_keys_per_level; ++i) { - iter->Seek(Key(key_count++)); - iter->Next(); - } - - buff_prefetch_level_count[level] = buff_prefetch_count; - if (support_prefetch && !use_direct_io) { - if (level == 0) { - ASSERT_FALSE(fs->IsPrefetchCalled()); - } else { - ASSERT_TRUE(fs->IsPrefetchCalled()); - } - fs->ClearPrefetchCount(); - } else { - ASSERT_FALSE(fs->IsPrefetchCalled()); - if (level == 0) { - ASSERT_EQ(buff_prefetch_count, 0); - } else { - ASSERT_GT(buff_prefetch_count, 0); - } - buff_prefetch_count = 0; - } - } - if (!support_prefetch) { - ASSERT_GT(buff_prefetch_level_count[1], buff_prefetch_level_count[2]); - } - } - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - Close(); -} - -// This test verifies BlockBasedTableOptions.num_file_reads_for_auto_readahead -// is configured dynamically. -TEST_P(PrefetchTest, ConfigureNumFilesReadsForReadaheadSize) { - // First param is if the mockFS support_prefetch or not - bool support_prefetch = - std::get<0>(GetParam()) && - test::IsPrefetchSupported(env_->GetFileSystem(), dbname_); - - const int kNumKeys = 2000; - std::shared_ptr fs = - std::make_shared(env_->GetFileSystem(), support_prefetch); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - - // Second param is if directIO is enabled or not - bool use_direct_io = std::get<1>(GetParam()); - - Options options; - SetGenericOptions(env.get(), use_direct_io, options); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - table_options.num_file_reads_for_auto_readahead = 0; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - int buff_prefetch_count = 0; - SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start", - [&](void*) { buff_prefetch_count++; }); - SyncPoint::GetInstance()->EnableProcessing(); - - Status s = TryReopen(options); - if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - WriteBatch batch; - Random rnd(309); - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000))); - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - - std::string start_key = BuildKey(0); - std::string end_key = BuildKey(kNumKeys - 1); - Slice least(start_key.data(), start_key.size()); - Slice greatest(end_key.data(), end_key.size()); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); - - Close(); - TryReopen(options); - - fs->ClearPrefetchCount(); - buff_prefetch_count = 0; - - { - auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); - /* - * Reseek keys from sequential Data Blocks within same partitioned - * index. It will prefetch the data block at the first seek since - * num_file_reads_for_auto_readahead = 0. Data Block size is nearly 4076 so - * readahead will fetch 8 * 1024 data more initially (2 more data blocks). - */ - iter->Seek(BuildKey(0)); // Prefetch data + index block since - // num_file_reads_for_auto_readahead = 0. - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1000)); // In buffer - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1004)); // In buffer - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1008)); // Prefetch Data - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1011)); // In buffer - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1015)); // In buffer - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1019)); // In buffer - ASSERT_TRUE(iter->Valid()); - // Missed 2 blocks but they are already in buffer so no reset. - iter->Seek(BuildKey(103)); // Already in buffer. - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1033)); // Prefetch Data. - ASSERT_TRUE(iter->Valid()); - if (support_prefetch && !use_direct_io) { - ASSERT_EQ(fs->GetPrefetchCount(), 4); - fs->ClearPrefetchCount(); - } else { - ASSERT_EQ(buff_prefetch_count, 4); - buff_prefetch_count = 0; - } - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - Close(); -} - -// This test verifies the basic functionality of implicit autoreadahead: -// - Enable implicit autoreadahead and prefetch only if sequential blocks are -// read, -// - If data is already in buffer and few blocks are not requested to read, -// don't reset, -// - If data blocks are sequential during read after enabling implicit -// autoreadahead, reset readahead parameters. -TEST_P(PrefetchTest, PrefetchWhenReseek) { - // First param is if the mockFS support_prefetch or not - bool support_prefetch = - std::get<0>(GetParam()) && - test::IsPrefetchSupported(env_->GetFileSystem(), dbname_); - - const int kNumKeys = 2000; - std::shared_ptr fs = - std::make_shared(env_->GetFileSystem(), support_prefetch); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - - // Second param is if directIO is enabled or not - bool use_direct_io = std::get<1>(GetParam()); - - Options options; - SetGenericOptions(env.get(), use_direct_io, options); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - int buff_prefetch_count = 0; - SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start", - [&](void*) { buff_prefetch_count++; }); - SyncPoint::GetInstance()->EnableProcessing(); - - Status s = TryReopen(options); - if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - WriteBatch batch; - Random rnd(309); - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000))); - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - - std::string start_key = BuildKey(0); - std::string end_key = BuildKey(kNumKeys - 1); - Slice least(start_key.data(), start_key.size()); - Slice greatest(end_key.data(), end_key.size()); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); - - fs->ClearPrefetchCount(); - buff_prefetch_count = 0; - - { - auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); - /* - * Reseek keys from sequential Data Blocks within same partitioned - * index. After 2 sequential reads it will prefetch the data block. - * Data Block size is nearly 4076 so readahead will fetch 8 * 1024 data more - * initially (2 more data blocks). - */ - iter->Seek(BuildKey(0)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1000)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1004)); // Prefetch Data - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1008)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1011)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1015)); // Prefetch Data - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1019)); - ASSERT_TRUE(iter->Valid()); - // Missed 2 blocks but they are already in buffer so no reset. - iter->Seek(BuildKey(103)); // Already in buffer. - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1033)); // Prefetch Data - ASSERT_TRUE(iter->Valid()); - if (support_prefetch && !use_direct_io) { - ASSERT_EQ(fs->GetPrefetchCount(), 3); - fs->ClearPrefetchCount(); - } else { - ASSERT_EQ(buff_prefetch_count, 3); - buff_prefetch_count = 0; - } - } - { - /* - * Reseek keys from non sequential data blocks within same partitioned - * index. buff_prefetch_count will be 0 in that case. - */ - auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); - iter->Seek(BuildKey(0)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1008)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1019)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1033)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1048)); - ASSERT_TRUE(iter->Valid()); - if (support_prefetch && !use_direct_io) { - ASSERT_EQ(fs->GetPrefetchCount(), 0); - fs->ClearPrefetchCount(); - } else { - ASSERT_EQ(buff_prefetch_count, 0); - buff_prefetch_count = 0; - } - } - { - /* - * Reesek keys from Single Data Block. - */ - auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); - iter->Seek(BuildKey(0)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(10)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(100)); - ASSERT_TRUE(iter->Valid()); - if (support_prefetch && !use_direct_io) { - ASSERT_EQ(fs->GetPrefetchCount(), 0); - fs->ClearPrefetchCount(); - } else { - ASSERT_EQ(buff_prefetch_count, 0); - buff_prefetch_count = 0; - } - } - { - /* - * Reseek keys from sequential data blocks to set implicit auto readahead - * and prefetch data but after that iterate over different (non sequential) - * data blocks which won't prefetch any data further. So buff_prefetch_count - * will be 1 for the first one. - */ - auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); - iter->Seek(BuildKey(0)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1000)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1004)); // This iteration will prefetch buffer - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1008)); - ASSERT_TRUE(iter->Valid()); - iter->Seek( - BuildKey(996)); // Reseek won't prefetch any data and - // readahead_size will be initiallized to 8*1024. - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(992)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(989)); - ASSERT_TRUE(iter->Valid()); - if (support_prefetch && !use_direct_io) { - ASSERT_EQ(fs->GetPrefetchCount(), 1); - fs->ClearPrefetchCount(); - } else { - ASSERT_EQ(buff_prefetch_count, 1); - buff_prefetch_count = 0; - } - - // Read sequentially to confirm readahead_size is reset to initial value (2 - // more data blocks) - iter->Seek(BuildKey(1011)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1015)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1019)); // Prefetch Data - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1022)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1026)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(103)); // Prefetch Data - ASSERT_TRUE(iter->Valid()); - if (support_prefetch && !use_direct_io) { - ASSERT_EQ(fs->GetPrefetchCount(), 2); - fs->ClearPrefetchCount(); - } else { - ASSERT_EQ(buff_prefetch_count, 2); - buff_prefetch_count = 0; - } - } - { - /* Reseek keys from sequential partitioned index block. Since partitioned - * index fetch are sequential, buff_prefetch_count will be 1. - */ - auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); - iter->Seek(BuildKey(0)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1167)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1334)); // This iteration will prefetch buffer - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1499)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1667)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1847)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1999)); - ASSERT_TRUE(iter->Valid()); - if (support_prefetch && !use_direct_io) { - ASSERT_EQ(fs->GetPrefetchCount(), 1); - fs->ClearPrefetchCount(); - } else { - ASSERT_EQ(buff_prefetch_count, 1); - buff_prefetch_count = 0; - } - } - { - /* - * Reseek over different keys from different blocks. buff_prefetch_count is - * set 0. - */ - auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); - int i = 0; - int j = 1000; - do { - iter->Seek(BuildKey(i)); - if (!iter->Valid()) { - break; - } - i = i + 100; - iter->Seek(BuildKey(j)); - j = j + 100; - } while (i < 1000 && j < kNumKeys && iter->Valid()); - if (support_prefetch && !use_direct_io) { - ASSERT_EQ(fs->GetPrefetchCount(), 0); - fs->ClearPrefetchCount(); - } else { - ASSERT_EQ(buff_prefetch_count, 0); - buff_prefetch_count = 0; - } - } - { - /* Iterates sequentially over all keys. It will prefetch the buffer.*/ - auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - } - if (support_prefetch && !use_direct_io) { - ASSERT_EQ(fs->GetPrefetchCount(), 13); - fs->ClearPrefetchCount(); - } else { - ASSERT_EQ(buff_prefetch_count, 13); - buff_prefetch_count = 0; - } - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - Close(); -} - -// This test verifies the functionality of implicit autoreadahead when caching -// is enabled: -// - If data is already in buffer and few blocks are not requested to read, -// don't reset, -// - If block was eligible for prefetching/in buffer but found in cache, don't -// prefetch and reset. -TEST_P(PrefetchTest, PrefetchWhenReseekwithCache) { - // First param is if the mockFS support_prefetch or not - bool support_prefetch = - std::get<0>(GetParam()) && - test::IsPrefetchSupported(env_->GetFileSystem(), dbname_); - - const int kNumKeys = 2000; - std::shared_ptr fs = - std::make_shared(env_->GetFileSystem(), support_prefetch); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - - // Second param is if directIO is enabled or not - bool use_direct_io = std::get<1>(GetParam()); - - Options options; - SetGenericOptions(env.get(), use_direct_io, options); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - std::shared_ptr cache = NewLRUCache(4 * 1024 * 1024, 2); // 8MB - table_options.block_cache = cache; - table_options.no_block_cache = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - int buff_prefetch_count = 0; - SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start", - [&](void*) { buff_prefetch_count++; }); - SyncPoint::GetInstance()->EnableProcessing(); - - Status s = TryReopen(options); - if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - WriteBatch batch; - Random rnd(309); - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000))); - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - - std::string start_key = BuildKey(0); - std::string end_key = BuildKey(kNumKeys - 1); - Slice least(start_key.data(), start_key.size()); - Slice greatest(end_key.data(), end_key.size()); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); - - fs->ClearPrefetchCount(); - buff_prefetch_count = 0; - - { - /* - * Reseek keys from sequential Data Blocks within same partitioned - * index. After 2 sequential reads it will prefetch the data block. - * Data Block size is nearly 4076 so readahead will fetch 8 * 1024 data more - * initially (2 more data blocks). - */ - auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); - // Warm up the cache - iter->Seek(BuildKey(1011)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1015)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1019)); - ASSERT_TRUE(iter->Valid()); - if (support_prefetch && !use_direct_io) { - ASSERT_EQ(fs->GetPrefetchCount(), 1); - fs->ClearPrefetchCount(); - } else { - ASSERT_EQ(buff_prefetch_count, 1); - buff_prefetch_count = 0; - } - } - { - // After caching, blocks will be read from cache (Sequential blocks) - auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); - iter->Seek(BuildKey(0)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1000)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1004)); // Prefetch data (not in cache). - ASSERT_TRUE(iter->Valid()); - // Missed one sequential block but next is in already in buffer so readahead - // will not be reset. - iter->Seek(BuildKey(1011)); - ASSERT_TRUE(iter->Valid()); - // Prefetch data but blocks are in cache so no prefetch and reset. - iter->Seek(BuildKey(1015)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1019)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1022)); - ASSERT_TRUE(iter->Valid()); - // Prefetch data with readahead_size = 4 blocks. - iter->Seek(BuildKey(1026)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(103)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1033)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1037)); - ASSERT_TRUE(iter->Valid()); - - if (support_prefetch && !use_direct_io) { - ASSERT_EQ(fs->GetPrefetchCount(), 3); - fs->ClearPrefetchCount(); - } else { - ASSERT_EQ(buff_prefetch_count, 2); - buff_prefetch_count = 0; - } - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - Close(); -} - -// This test verifies the functionality of ReadOptions.adaptive_readahead. -TEST_P(PrefetchTest, DBIterLevelReadAhead) { - const int kNumKeys = 1000; - // Set options - std::shared_ptr fs = - std::make_shared(env_->GetFileSystem(), false); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - - bool use_direct_io = std::get<0>(GetParam()); - bool is_adaptive_readahead = std::get<1>(GetParam()); - - Options options; - SetGenericOptions(env.get(), use_direct_io, options); - options.statistics = CreateDBStatistics(); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - Status s = TryReopen(options); - if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - WriteBatch batch; - Random rnd(309); - int total_keys = 0; - for (int j = 0; j < 5; j++) { - for (int i = j * kNumKeys; i < (j + 1) * kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000))); - total_keys++; - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - ASSERT_OK(Flush()); - } - MoveFilesToLevel(2); - int buff_prefetch_count = 0; - int readahead_carry_over_count = 0; - int num_sst_files = NumTableFilesAtLevel(2); - size_t current_readahead_size = 0; - - // Test - Iterate over the keys sequentially. - { - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::Prefetch:Start", - [&](void*) { buff_prefetch_count++; }); - - // The callback checks, since reads are sequential, readahead_size doesn't - // start from 8KB when iterator moves to next file and its called - // num_sst_files-1 times (excluding for first file). - SyncPoint::GetInstance()->SetCallBack( - "BlockPrefetcher::SetReadaheadState", [&](void* arg) { - readahead_carry_over_count++; - size_t readahead_size = *reinterpret_cast(arg); - if (readahead_carry_over_count) { - ASSERT_GT(readahead_size, 8 * 1024); - } - }); - - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::TryReadFromCache", [&](void* arg) { - current_readahead_size = *reinterpret_cast(arg); - ASSERT_GT(current_readahead_size, 0); - }); - - SyncPoint::GetInstance()->EnableProcessing(); - - ReadOptions ro; - if (is_adaptive_readahead) { - ro.adaptive_readahead = true; - } - - ASSERT_OK(options.statistics->Reset()); - - auto iter = std::unique_ptr(db_->NewIterator(ro)); - int num_keys = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - num_keys++; - } - ASSERT_EQ(num_keys, total_keys); - - // For index and data blocks. - if (is_adaptive_readahead) { - ASSERT_EQ(readahead_carry_over_count, 2 * (num_sst_files - 1)); - } else { - ASSERT_GT(buff_prefetch_count, 0); - ASSERT_EQ(readahead_carry_over_count, 0); - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - } - Close(); -} - -// This test verifies the functionality of ReadOptions.adaptive_readahead when -// async_io is enabled. -TEST_P(PrefetchTest, DBIterLevelReadAheadWithAsyncIO) { - const int kNumKeys = 1000; - // Set options - std::shared_ptr fs = - std::make_shared(env_->GetFileSystem(), false); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - - bool use_direct_io = std::get<0>(GetParam()); - bool is_adaptive_readahead = std::get<1>(GetParam()); - - Options options; - SetGenericOptions(env.get(), use_direct_io, options); - options.statistics = CreateDBStatistics(); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - Status s = TryReopen(options); - if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - WriteBatch batch; - Random rnd(309); - int total_keys = 0; - for (int j = 0; j < 5; j++) { - for (int i = j * kNumKeys; i < (j + 1) * kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000))); - total_keys++; - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - ASSERT_OK(Flush()); - } - MoveFilesToLevel(2); - int buff_async_prefetch_count = 0; - int readahead_carry_over_count = 0; - int num_sst_files = NumTableFilesAtLevel(2); - size_t current_readahead_size = 0; - - // Test - Iterate over the keys sequentially. - { - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::PrefetchAsyncInternal:Start", - [&](void*) { buff_async_prefetch_count++; }); - - // The callback checks, since reads are sequential, readahead_size doesn't - // start from 8KB when iterator moves to next file and its called - // num_sst_files-1 times (excluding for first file). - SyncPoint::GetInstance()->SetCallBack( - "BlockPrefetcher::SetReadaheadState", [&](void* arg) { - readahead_carry_over_count++; - size_t readahead_size = *reinterpret_cast(arg); - if (readahead_carry_over_count) { - ASSERT_GT(readahead_size, 8 * 1024); - } - }); - - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::TryReadFromCache", [&](void* arg) { - current_readahead_size = *reinterpret_cast(arg); - ASSERT_GT(current_readahead_size, 0); - }); - - SyncPoint::GetInstance()->EnableProcessing(); - - ReadOptions ro; - if (is_adaptive_readahead) { - ro.adaptive_readahead = true; - } - ro.async_io = true; - - ASSERT_OK(options.statistics->Reset()); - - auto iter = std::unique_ptr(db_->NewIterator(ro)); - int num_keys = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - num_keys++; - } - ASSERT_EQ(num_keys, total_keys); - - // For index and data blocks. - if (is_adaptive_readahead) { - ASSERT_EQ(readahead_carry_over_count, 2 * (num_sst_files - 1)); - } else { - ASSERT_EQ(readahead_carry_over_count, 0); - } - ASSERT_GT(buff_async_prefetch_count, 0); - - // Check stats to make sure async prefetch is done. - { - HistogramData async_read_bytes; - options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); - if (ro.async_io) { - ASSERT_GT(async_read_bytes.count, 0); - } else { - ASSERT_EQ(async_read_bytes.count, 0); - } - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - } - Close(); -} - -TEST_P(PrefetchTest, DBIterAsyncIONoIOUring) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - - const int kNumKeys = 1000; - // Set options - bool use_direct_io = std::get<0>(GetParam()); - bool is_adaptive_readahead = std::get<1>(GetParam()); - - Options options; - SetGenericOptions(Env::Default(), use_direct_io, options); - options.statistics = CreateDBStatistics(); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - enable_io_uring = false; - Status s = TryReopen(options); - if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - WriteBatch batch; - Random rnd(309); - int total_keys = 0; - for (int j = 0; j < 5; j++) { - for (int i = j * kNumKeys; i < (j + 1) * kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000))); - total_keys++; - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - ASSERT_OK(Flush()); - } - MoveFilesToLevel(2); - - // Test - Iterate over the keys sequentially. - { - ReadOptions ro; - if (is_adaptive_readahead) { - ro.adaptive_readahead = true; - } - ro.async_io = true; - - ASSERT_OK(options.statistics->Reset()); - - auto iter = std::unique_ptr(db_->NewIterator(ro)); - int num_keys = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - num_keys++; - } - ASSERT_EQ(num_keys, total_keys); - - // Check stats to make sure async prefetch is done. - { - HistogramData async_read_bytes; - options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); - ASSERT_EQ(async_read_bytes.count, 0); - ASSERT_EQ(options.statistics->getTickerCount(READ_ASYNC_MICROS), 0); - } - } - - { - ReadOptions ro; - if (is_adaptive_readahead) { - ro.adaptive_readahead = true; - } - ro.async_io = true; - ro.tailing = true; - - ASSERT_OK(options.statistics->Reset()); - - auto iter = std::unique_ptr(db_->NewIterator(ro)); - int num_keys = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - num_keys++; - } - ASSERT_EQ(num_keys, total_keys); - - // Check stats to make sure async prefetch is done. - { - HistogramData async_read_bytes; - options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); - ASSERT_EQ(async_read_bytes.count, 0); - ASSERT_EQ(options.statistics->getTickerCount(READ_ASYNC_MICROS), 0); - } - } - Close(); - - enable_io_uring = true; -} - -class PrefetchTest1 : public DBTestBase, - public ::testing::WithParamInterface { - public: - PrefetchTest1() : DBTestBase("prefetch_test1", true) {} - - void SetGenericOptions(Env* env, bool use_direct_io, Options& options) { - options = CurrentOptions(); - options.write_buffer_size = 1024; - options.create_if_missing = true; - options.compression = kNoCompression; - options.env = env; - options.disable_auto_compactions = true; - if (use_direct_io) { - options.use_direct_reads = true; - options.use_direct_io_for_flush_and_compaction = true; - } - } - - void SetBlockBasedTableOptions(BlockBasedTableOptions& table_options) { - table_options.no_block_cache = true; - table_options.cache_index_and_filter_blocks = false; - table_options.metadata_block_size = 1024; - table_options.index_type = - BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; - } -}; - -INSTANTIATE_TEST_CASE_P(PrefetchTest1, PrefetchTest1, ::testing::Bool()); - -// This test verifies the functionality of ReadOptions.adaptive_readahead when -// reads are not sequential. -TEST_P(PrefetchTest1, NonSequentialReadsWithAdaptiveReadahead) { - const int kNumKeys = 1000; - // Set options - std::shared_ptr fs = - std::make_shared(env_->GetFileSystem(), false); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - - Options options; - SetGenericOptions(env.get(), GetParam(), options); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - Status s = TryReopen(options); - if (GetParam() && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - WriteBatch batch; - Random rnd(309); - for (int j = 0; j < 5; j++) { - for (int i = j * kNumKeys; i < (j + 1) * kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000))); - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - ASSERT_OK(Flush()); - } - MoveFilesToLevel(2); - - int buff_prefetch_count = 0; - int set_readahead = 0; - size_t readahead_size = 0; - - SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start", - [&](void*) { buff_prefetch_count++; }); - SyncPoint::GetInstance()->SetCallBack( - "BlockPrefetcher::SetReadaheadState", - [&](void* /*arg*/) { set_readahead++; }); - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::TryReadFromCache", - [&](void* arg) { readahead_size = *reinterpret_cast(arg); }); - - SyncPoint::GetInstance()->EnableProcessing(); - - { - // Iterate until prefetch is done. - ReadOptions ro; - ro.adaptive_readahead = true; - auto iter = std::unique_ptr(db_->NewIterator(ro)); - - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - - while (iter->Valid() && buff_prefetch_count == 0) { - iter->Next(); - } - - ASSERT_EQ(readahead_size, 8 * 1024); - ASSERT_EQ(buff_prefetch_count, 1); - ASSERT_EQ(set_readahead, 0); - buff_prefetch_count = 0; - - // Move to last file and check readahead size fallbacks to 8KB. So next - // readahead size after prefetch should be 8 * 1024; - iter->Seek(BuildKey(4004)); - ASSERT_TRUE(iter->Valid()); - - while (iter->Valid() && buff_prefetch_count == 0) { - iter->Next(); - } - - ASSERT_EQ(readahead_size, 8 * 1024); - ASSERT_EQ(set_readahead, 0); - ASSERT_EQ(buff_prefetch_count, 1); - } - Close(); -} - -// This test verifies the functionality of adaptive_readaheadsize with cache and -// if block is found in cache, decrease the readahead_size if -// - its enabled internally by RocksDB (implicit_auto_readahead_) and, -// - readahead_size is greater than 0 and, -// - the block would have called prefetch API if not found in cache for -// which conditions are: -// - few/no bytes are in buffer and, -// - block is sequential with the previous read and, -// - num_file_reads_ + 1 (including this read) > -// num_file_reads_for_auto_readahead_ -TEST_P(PrefetchTest1, DecreaseReadAheadIfInCache) { - const int kNumKeys = 2000; - // Set options - std::shared_ptr fs = - std::make_shared(env_->GetFileSystem(), false); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - - Options options; - SetGenericOptions(env.get(), GetParam(), options); - options.statistics = CreateDBStatistics(); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - std::shared_ptr cache = NewLRUCache(4 * 1024 * 1024, 2); // 8MB - table_options.block_cache = cache; - table_options.no_block_cache = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - Status s = TryReopen(options); - if (GetParam() && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - WriteBatch batch; - Random rnd(309); - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000))); - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - - std::string start_key = BuildKey(0); - std::string end_key = BuildKey(kNumKeys - 1); - Slice least(start_key.data(), start_key.size()); - Slice greatest(end_key.data(), end_key.size()); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); - - int buff_prefetch_count = 0; - size_t current_readahead_size = 0; - size_t expected_current_readahead_size = 8 * 1024; - size_t decrease_readahead_size = 8 * 1024; - - SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start", - [&](void*) { buff_prefetch_count++; }); - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::TryReadFromCache", [&](void* arg) { - current_readahead_size = *reinterpret_cast(arg); - }); - - SyncPoint::GetInstance()->EnableProcessing(); - ReadOptions ro; - ro.adaptive_readahead = true; - { - /* - * Reseek keys from sequential Data Blocks within same partitioned - * index. After 2 sequential reads it will prefetch the data block. - * Data Block size is nearly 4076 so readahead will fetch 8 * 1024 data - * more initially (2 more data blocks). - */ - auto iter = std::unique_ptr(db_->NewIterator(ro)); - // Warm up the cache - iter->Seek(BuildKey(1011)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1015)); - ASSERT_TRUE(iter->Valid()); - iter->Seek(BuildKey(1019)); - ASSERT_TRUE(iter->Valid()); - buff_prefetch_count = 0; - } - - { - ASSERT_OK(options.statistics->Reset()); - // After caching, blocks will be read from cache (Sequential blocks) - auto iter = std::unique_ptr(db_->NewIterator(ro)); - iter->Seek( - BuildKey(0)); // In cache so it will decrease the readahead_size. - ASSERT_TRUE(iter->Valid()); - expected_current_readahead_size = std::max( - decrease_readahead_size, - (expected_current_readahead_size >= decrease_readahead_size - ? (expected_current_readahead_size - decrease_readahead_size) - : 0)); - - iter->Seek(BuildKey(1000)); // Won't prefetch the block. - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(current_readahead_size, expected_current_readahead_size); - - iter->Seek(BuildKey(1004)); // Prefetch the block. - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(current_readahead_size, expected_current_readahead_size); - expected_current_readahead_size *= 2; - - iter->Seek(BuildKey(1011)); - ASSERT_TRUE(iter->Valid()); - - // Eligible to Prefetch data (not in buffer) but block is in cache so no - // prefetch will happen and will result in decrease in readahead_size. - // readahead_size will be 8 * 1024 - iter->Seek(BuildKey(1015)); - ASSERT_TRUE(iter->Valid()); - expected_current_readahead_size = std::max( - decrease_readahead_size, - (expected_current_readahead_size >= decrease_readahead_size - ? (expected_current_readahead_size - decrease_readahead_size) - : 0)); - - // 1016 is the same block as 1015. So no change in readahead_size. - iter->Seek(BuildKey(1016)); - ASSERT_TRUE(iter->Valid()); - - // Prefetch data (not in buffer) but found in cache. So decrease - // readahead_size. Since it will 0 after decrementing so readahead_size will - // be set to initial value. - iter->Seek(BuildKey(1019)); - ASSERT_TRUE(iter->Valid()); - expected_current_readahead_size = std::max( - decrease_readahead_size, - (expected_current_readahead_size >= decrease_readahead_size - ? (expected_current_readahead_size - decrease_readahead_size) - : 0)); - - // Prefetch next sequential data. - iter->Seek(BuildKey(1022)); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(current_readahead_size, expected_current_readahead_size); - ASSERT_EQ(buff_prefetch_count, 2); - - buff_prefetch_count = 0; - } - Close(); -} - -// This test verifies the basic functionality of seek parallelization for -// async_io. -TEST_P(PrefetchTest1, SeekParallelizationTest) { - const int kNumKeys = 2000; - // Set options - std::shared_ptr fs = - std::make_shared(env_->GetFileSystem(), false); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - - Options options; - SetGenericOptions(env.get(), GetParam(), options); - options.statistics = CreateDBStatistics(); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - Status s = TryReopen(options); - if (GetParam() && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - WriteBatch batch; - Random rnd(309); - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000))); - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - - std::string start_key = BuildKey(0); - std::string end_key = BuildKey(kNumKeys - 1); - Slice least(start_key.data(), start_key.size()); - Slice greatest(end_key.data(), end_key.size()); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); - - int buff_prefetch_count = 0; - - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::PrefetchAsyncInternal:Start", - [&](void*) { buff_prefetch_count++; }); - - SyncPoint::GetInstance()->EnableProcessing(); - ReadOptions ro; - ro.adaptive_readahead = true; - ro.async_io = true; - - { - ASSERT_OK(options.statistics->Reset()); - // Each block contains around 4 keys. - auto iter = std::unique_ptr(db_->NewIterator(ro)); - iter->Seek(BuildKey(0)); // Prefetch data because of seek parallelization. - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - - // New data block. Since num_file_reads in FilePrefetch after this read is - // 2, it won't go for prefetching. - iter->Next(); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - - // Prefetch data. - iter->Next(); - ASSERT_TRUE(iter->Valid()); - - ASSERT_EQ(buff_prefetch_count, 2); - - // Check stats to make sure async prefetch is done. - { - HistogramData async_read_bytes; - options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); - ASSERT_GT(async_read_bytes.count, 0); - ASSERT_GT(get_perf_context()->number_async_seek, 0); - } - - buff_prefetch_count = 0; - } - Close(); -} - -namespace { -#ifdef GFLAGS -const int kMaxArgCount = 100; -const size_t kArgBufferSize = 100000; - -void RunIOTracerParserTool(std::string trace_file) { - std::vector params = {"./io_tracer_parser", - "-io_trace_file=" + trace_file}; - - char arg_buffer[kArgBufferSize]; - char* argv[kMaxArgCount]; - int argc = 0; - int cursor = 0; - for (const auto& arg : params) { - ASSERT_LE(cursor + arg.size() + 1, kArgBufferSize); - ASSERT_LE(argc + 1, kMaxArgCount); - - snprintf(arg_buffer + cursor, arg.size() + 1, "%s", arg.c_str()); - - argv[argc++] = arg_buffer + cursor; - cursor += static_cast(arg.size()) + 1; - } - ASSERT_EQ(0, ROCKSDB_NAMESPACE::io_tracer_parser(argc, argv)); -} -#endif // GFLAGS -} // namespace - -// Tests the default implementation of ReadAsync API with PosixFileSystem during -// prefetching. -TEST_P(PrefetchTest, ReadAsyncWithPosixFS) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - - const int kNumKeys = 1000; - std::shared_ptr fs = std::make_shared( - FileSystem::Default(), /*support_prefetch=*/false); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - - bool use_direct_io = std::get<0>(GetParam()); - Options options; - SetGenericOptions(env.get(), use_direct_io, options); - options.statistics = CreateDBStatistics(); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - Status s = TryReopen(options); - if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - int total_keys = 0; - // Write the keys. - { - WriteBatch batch; - Random rnd(309); - for (int j = 0; j < 5; j++) { - for (int i = j * kNumKeys; i < (j + 1) * kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000))); - total_keys++; - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - ASSERT_OK(Flush()); - } - MoveFilesToLevel(2); - } - - int buff_prefetch_count = 0; - bool read_async_called = false; - ReadOptions ro; - ro.adaptive_readahead = true; - ro.async_io = true; - - if (std::get<1>(GetParam())) { - ro.readahead_size = 16 * 1024; - } - - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::PrefetchAsyncInternal:Start", - [&](void*) { buff_prefetch_count++; }); - - SyncPoint::GetInstance()->SetCallBack( - "UpdateResults::io_uring_result", - [&](void* /*arg*/) { read_async_called = true; }); - SyncPoint::GetInstance()->EnableProcessing(); - - // Read the keys. - { - ASSERT_OK(options.statistics->Reset()); - get_perf_context()->Reset(); - - auto iter = std::unique_ptr(db_->NewIterator(ro)); - int num_keys = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - num_keys++; - } - - if (read_async_called) { - ASSERT_EQ(num_keys, total_keys); - ASSERT_GT(buff_prefetch_count, 0); - // Check stats to make sure async prefetch is done. - HistogramData async_read_bytes; - options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); - HistogramData prefetched_bytes_discarded; - options.statistics->histogramData(PREFETCHED_BYTES_DISCARDED, - &prefetched_bytes_discarded); - ASSERT_GT(async_read_bytes.count, 0); - ASSERT_GT(prefetched_bytes_discarded.count, 0); - ASSERT_EQ(get_perf_context()->number_async_seek, 0); - } else { - // Not all platforms support iouring. In that case, ReadAsync in posix - // won't submit async requests. - ASSERT_EQ(num_keys, total_keys); - ASSERT_EQ(buff_prefetch_count, 0); - } - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - - Close(); -} - -// This test verifies implementation of seek parallelization with -// PosixFileSystem during prefetching. -TEST_P(PrefetchTest, MultipleSeekWithPosixFS) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - - const int kNumKeys = 1000; - std::shared_ptr fs = std::make_shared( - FileSystem::Default(), /*support_prefetch=*/false); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - - bool use_direct_io = std::get<0>(GetParam()); - Options options; - SetGenericOptions(env.get(), use_direct_io, options); - options.statistics = CreateDBStatistics(); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - Status s = TryReopen(options); - if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - int total_keys = 0; - // Write the keys. - { - WriteBatch batch; - Random rnd(309); - for (int j = 0; j < 5; j++) { - for (int i = j * kNumKeys; i < (j + 1) * kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000))); - total_keys++; - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - ASSERT_OK(Flush()); - } - MoveFilesToLevel(2); - } - - int num_keys_first_batch = 0; - int num_keys_second_batch = 0; - // Calculate number of keys without async_io for correctness validation. - { - auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); - // First Seek. - iter->Seek(BuildKey(450)); - while (iter->Valid() && num_keys_first_batch < 100) { - ASSERT_OK(iter->status()); - num_keys_first_batch++; - iter->Next(); - } - ASSERT_OK(iter->status()); - - iter->Seek(BuildKey(942)); - while (iter->Valid()) { - ASSERT_OK(iter->status()); - num_keys_second_batch++; - iter->Next(); - } - ASSERT_OK(iter->status()); - } - - int buff_prefetch_count = 0; - bool read_async_called = false; - ReadOptions ro; - ro.adaptive_readahead = true; - ro.async_io = true; - - if (std::get<1>(GetParam())) { - ro.readahead_size = 16 * 1024; - } - - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::PrefetchAsyncInternal:Start", - [&](void*) { buff_prefetch_count++; }); - - SyncPoint::GetInstance()->SetCallBack( - "UpdateResults::io_uring_result", - [&](void* /*arg*/) { read_async_called = true; }); - SyncPoint::GetInstance()->EnableProcessing(); - - // Read the keys using seek. - { - ASSERT_OK(options.statistics->Reset()); - get_perf_context()->Reset(); - - auto iter = std::unique_ptr(db_->NewIterator(ro)); - int num_keys = 0; - // First Seek. - { - iter->Seek(BuildKey(450)); - while (iter->Valid() && num_keys < 100) { - ASSERT_OK(iter->status()); - num_keys++; - iter->Next(); - } - - ASSERT_OK(iter->status()); - ASSERT_EQ(num_keys, num_keys_first_batch); - // Check stats to make sure async prefetch is done. - HistogramData async_read_bytes; - options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); - if (read_async_called) { - ASSERT_GT(async_read_bytes.count, 0); - ASSERT_GT(get_perf_context()->number_async_seek, 0); - } else { - // Not all platforms support iouring. In that case, ReadAsync in posix - // won't submit async requests. - ASSERT_EQ(async_read_bytes.count, 0); - ASSERT_EQ(get_perf_context()->number_async_seek, 0); - } - } - - // Second Seek. - { - num_keys = 0; - ASSERT_OK(options.statistics->Reset()); - get_perf_context()->Reset(); - - iter->Seek(BuildKey(942)); - while (iter->Valid()) { - ASSERT_OK(iter->status()); - num_keys++; - iter->Next(); - } - - ASSERT_OK(iter->status()); - ASSERT_EQ(num_keys, num_keys_second_batch); - HistogramData async_read_bytes; - options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); - HistogramData prefetched_bytes_discarded; - options.statistics->histogramData(PREFETCHED_BYTES_DISCARDED, - &prefetched_bytes_discarded); - ASSERT_GT(prefetched_bytes_discarded.count, 0); - - if (read_async_called) { - ASSERT_GT(buff_prefetch_count, 0); - - // Check stats to make sure async prefetch is done. - ASSERT_GT(async_read_bytes.count, 0); - ASSERT_GT(get_perf_context()->number_async_seek, 0); - } else { - // Not all platforms support iouring. In that case, ReadAsync in posix - // won't submit async requests. - ASSERT_EQ(async_read_bytes.count, 0); - ASSERT_EQ(get_perf_context()->number_async_seek, 0); - } - } - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - Close(); -} - -// This test verifies implementation of seek parallelization with -// PosixFileSystem during prefetching. -TEST_P(PrefetchTest, SeekParallelizationTestWithPosix) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - const int kNumKeys = 2000; - // Set options - std::shared_ptr fs = std::make_shared( - FileSystem::Default(), /*support_prefetch=*/false); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - - bool use_direct_io = std::get<0>(GetParam()); - Options options; - SetGenericOptions(env.get(), use_direct_io, options); - options.statistics = CreateDBStatistics(); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - Status s = TryReopen(options); - if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - WriteBatch batch; - Random rnd(309); - for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000))); - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - - std::string start_key = BuildKey(0); - std::string end_key = BuildKey(kNumKeys - 1); - Slice least(start_key.data(), start_key.size()); - Slice greatest(end_key.data(), end_key.size()); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); - - int buff_prefetch_count = 0; - - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::PrefetchAsyncInternal:Start", - [&](void*) { buff_prefetch_count++; }); - - bool read_async_called = false; - SyncPoint::GetInstance()->SetCallBack( - "UpdateResults::io_uring_result", - [&](void* /*arg*/) { read_async_called = true; }); - SyncPoint::GetInstance()->EnableProcessing(); - - SyncPoint::GetInstance()->EnableProcessing(); - ReadOptions ro; - ro.adaptive_readahead = true; - ro.async_io = true; - - if (std::get<1>(GetParam())) { - ro.readahead_size = 16 * 1024; - } - - { - ASSERT_OK(options.statistics->Reset()); - // Each block contains around 4 keys. - auto iter = std::unique_ptr(db_->NewIterator(ro)); - iter->Seek(BuildKey(0)); // Prefetch data because of seek parallelization. - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - - // New data block. Since num_file_reads in FilePrefetch after this read is - // 2, it won't go for prefetching. - iter->Next(); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - - // Prefetch data. - iter->Next(); - - ASSERT_TRUE(iter->Valid()); - HistogramData async_read_bytes; - options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); - if (read_async_called) { - ASSERT_GT(async_read_bytes.count, 0); - ASSERT_GT(get_perf_context()->number_async_seek, 0); - if (std::get<1>(GetParam())) { - ASSERT_EQ(buff_prefetch_count, 1); - } else { - ASSERT_EQ(buff_prefetch_count, 2); - } - } else { - // Not all platforms support iouring. In that case, ReadAsync in posix - // won't submit async requests. - ASSERT_EQ(async_read_bytes.count, 0); - ASSERT_EQ(get_perf_context()->number_async_seek, 0); - } - } - Close(); -} - -#ifdef GFLAGS -// This test verifies io_tracing with PosixFileSystem during prefetching. -TEST_P(PrefetchTest, TraceReadAsyncWithCallbackWrapper) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - - const int kNumKeys = 1000; - std::shared_ptr fs = std::make_shared( - FileSystem::Default(), /*support_prefetch=*/false); - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - - bool use_direct_io = std::get<0>(GetParam()); - Options options; - SetGenericOptions(env.get(), use_direct_io, options); - options.statistics = CreateDBStatistics(); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - Status s = TryReopen(options); - if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - int total_keys = 0; - // Write the keys. - { - WriteBatch batch; - Random rnd(309); - for (int j = 0; j < 5; j++) { - for (int i = j * kNumKeys; i < (j + 1) * kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000))); - total_keys++; - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - ASSERT_OK(Flush()); - } - MoveFilesToLevel(2); - } - - int buff_prefetch_count = 0; - bool read_async_called = false; - ReadOptions ro; - ro.adaptive_readahead = true; - ro.async_io = true; - - if (std::get<1>(GetParam())) { - ro.readahead_size = 16 * 1024; - } - - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::PrefetchAsyncInternal:Start", - [&](void*) { buff_prefetch_count++; }); - - SyncPoint::GetInstance()->SetCallBack( - "UpdateResults::io_uring_result", - [&](void* /*arg*/) { read_async_called = true; }); - SyncPoint::GetInstance()->EnableProcessing(); - - // Read the keys. - { - // Start io_tracing. - WriteOptions write_opt; - TraceOptions trace_opt; - std::unique_ptr trace_writer; - std::string trace_file_path = dbname_ + "/io_trace_file"; - - ASSERT_OK( - NewFileTraceWriter(env_, EnvOptions(), trace_file_path, &trace_writer)); - ASSERT_OK(db_->StartIOTrace(trace_opt, std::move(trace_writer))); - ASSERT_OK(options.statistics->Reset()); - - auto iter = std::unique_ptr(db_->NewIterator(ro)); - int num_keys = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - num_keys++; - } - - // End the tracing. - ASSERT_OK(db_->EndIOTrace()); - ASSERT_OK(env_->FileExists(trace_file_path)); - - ASSERT_EQ(num_keys, total_keys); - HistogramData async_read_bytes; - options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); - if (read_async_called) { - ASSERT_GT(buff_prefetch_count, 0); - // Check stats to make sure async prefetch is done. - ASSERT_GT(async_read_bytes.count, 0); - } else { - // Not all platforms support iouring. In that case, ReadAsync in posix - // won't submit async requests. - ASSERT_EQ(async_read_bytes.count, 0); - } - - // Check the file to see if ReadAsync is logged. - RunIOTracerParserTool(trace_file_path); - } - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - - Close(); -} -#endif // GFLAGS - -class FilePrefetchBufferTest : public testing::Test { - public: - void SetUp() override { - SetupSyncPointsToMockDirectIO(); - env_ = Env::Default(); - fs_ = FileSystem::Default(); - test_dir_ = test::PerThreadDBPath("file_prefetch_buffer_test"); - ASSERT_OK(fs_->CreateDir(test_dir_, IOOptions(), nullptr)); - stats_ = CreateDBStatistics(); - } - - void TearDown() override { EXPECT_OK(DestroyDir(env_, test_dir_)); } - - void Write(const std::string& fname, const std::string& content) { - std::unique_ptr f; - ASSERT_OK(fs_->NewWritableFile(Path(fname), FileOptions(), &f, nullptr)); - ASSERT_OK(f->Append(content, IOOptions(), nullptr)); - ASSERT_OK(f->Close(IOOptions(), nullptr)); - } - - void Read(const std::string& fname, const FileOptions& opts, - std::unique_ptr* reader) { - std::string fpath = Path(fname); - std::unique_ptr f; - ASSERT_OK(fs_->NewRandomAccessFile(fpath, opts, &f, nullptr)); - reader->reset(new RandomAccessFileReader( - std::move(f), fpath, env_->GetSystemClock().get(), - /*io_tracer=*/nullptr, stats_.get())); - } - - void AssertResult(const std::string& content, - const std::vector& reqs) { - for (const auto& r : reqs) { - ASSERT_OK(r.status); - ASSERT_EQ(r.len, r.result.size()); - ASSERT_EQ(content.substr(r.offset, r.len), r.result.ToString()); - } - } - - FileSystem* fs() { return fs_.get(); } - Statistics* stats() { return stats_.get(); } - - private: - Env* env_; - std::shared_ptr fs_; - std::string test_dir_; - std::shared_ptr stats_; - - std::string Path(const std::string& fname) { return test_dir_ + "/" + fname; } -}; - -TEST_F(FilePrefetchBufferTest, SeekWithBlockCacheHit) { - std::string fname = "seek-with-block-cache-hit"; - Random rand(0); - std::string content = rand.RandomString(32768); - Write(fname, content); - - FileOptions opts; - std::unique_ptr r; - Read(fname, opts, &r); - - FilePrefetchBuffer fpb(16384, 16384, true, false, false, 0, 0, fs()); - Slice result; - // Simulate a seek of 4096 bytes at offset 0. Due to the readahead settings, - // it will do two reads of 4096+8192 and 8192 - Status s = fpb.PrefetchAsync(IOOptions(), r.get(), 0, 4096, &result); - - // Platforms that don't have IO uring may not support async IO. - if (s.IsNotSupported()) { - return; - } - - ASSERT_TRUE(s.IsTryAgain()); - // Simulate a block cache hit - fpb.UpdateReadPattern(0, 4096, false); - // Now read some data that straddles the two prefetch buffers - offset 8192 to - // 16384 - ASSERT_TRUE(fpb.TryReadFromCacheAsync(IOOptions(), r.get(), 8192, 8192, - &result, &s, Env::IOPriority::IO_LOW)); -} - -TEST_F(FilePrefetchBufferTest, NoSyncWithAsyncIO) { - std::string fname = "seek-with-block-cache-hit"; - Random rand(0); - std::string content = rand.RandomString(32768); - Write(fname, content); - - FileOptions opts; - std::unique_ptr r; - Read(fname, opts, &r); - - FilePrefetchBuffer fpb( - /*readahead_size=*/8192, /*max_readahead_size=*/16384, /*enable=*/true, - /*track_min_offset=*/false, /*implicit_auto_readahead=*/false, - /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0, fs()); - - int read_async_called = 0; - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::ReadAsync", - [&](void* /*arg*/) { read_async_called++; }); - SyncPoint::GetInstance()->EnableProcessing(); - - Slice async_result; - // Simulate a seek of 4000 bytes at offset 3000. Due to the readahead - // settings, it will do two reads of 4000+4096 and 4096 - Status s = fpb.PrefetchAsync(IOOptions(), r.get(), 3000, 4000, &async_result); - - // Platforms that don't have IO uring may not support async IO - if (s.IsNotSupported()) { - return; - } - - ASSERT_TRUE(s.IsTryAgain()); - ASSERT_TRUE(fpb.TryReadFromCacheAsync(IOOptions(), r.get(), /*offset=*/3000, - /*length=*/4000, &async_result, &s, - Env::IOPriority::IO_LOW)); - // No sync call should be made. - HistogramData sst_read_micros; - stats()->histogramData(SST_READ_MICROS, &sst_read_micros); - ASSERT_EQ(sst_read_micros.count, 0); - - // Number of async calls should be. - ASSERT_EQ(read_async_called, 2); - // Length should be 4000. - ASSERT_EQ(async_result.size(), 4000); - // Data correctness. - Slice result(content.c_str() + 3000, 4000); - ASSERT_EQ(result.size(), 4000); - ASSERT_EQ(result, async_result); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - - return RUN_ALL_TESTS(); -} diff --git a/file/random_access_file_reader_test.cc b/file/random_access_file_reader_test.cc deleted file mode 100644 index 22e950c78..000000000 --- a/file/random_access_file_reader_test.cc +++ /dev/null @@ -1,479 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "file/random_access_file_reader.h" - -#include - -#include "file/file_util.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/file_system.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/random.h" - -namespace ROCKSDB_NAMESPACE { - -class RandomAccessFileReaderTest : public testing::Test { - public: - void SetUp() override { - SetupSyncPointsToMockDirectIO(); - env_ = Env::Default(); - fs_ = FileSystem::Default(); - test_dir_ = test::PerThreadDBPath("random_access_file_reader_test"); - ASSERT_OK(fs_->CreateDir(test_dir_, IOOptions(), nullptr)); - } - - void TearDown() override { EXPECT_OK(DestroyDir(env_, test_dir_)); } - - void Write(const std::string& fname, const std::string& content) { - std::unique_ptr f; - ASSERT_OK(fs_->NewWritableFile(Path(fname), FileOptions(), &f, nullptr)); - ASSERT_OK(f->Append(content, IOOptions(), nullptr)); - ASSERT_OK(f->Close(IOOptions(), nullptr)); - } - - void Read(const std::string& fname, const FileOptions& opts, - std::unique_ptr* reader) { - std::string fpath = Path(fname); - std::unique_ptr f; - ASSERT_OK(fs_->NewRandomAccessFile(fpath, opts, &f, nullptr)); - reader->reset(new RandomAccessFileReader(std::move(f), fpath, - env_->GetSystemClock().get())); - } - - void AssertResult(const std::string& content, - const std::vector& reqs) { - for (const auto& r : reqs) { - ASSERT_OK(r.status); - ASSERT_EQ(r.len, r.result.size()); - ASSERT_EQ(content.substr(r.offset, r.len), r.result.ToString()); - } - } - - private: - Env* env_; - std::shared_ptr fs_; - std::string test_dir_; - - std::string Path(const std::string& fname) { return test_dir_ + "/" + fname; } -}; - -// Skip the following tests in lite mode since direct I/O is unsupported. - -TEST_F(RandomAccessFileReaderTest, ReadDirectIO) { - std::string fname = "read-direct-io"; - Random rand(0); - std::string content = rand.RandomString(kDefaultPageSize); - Write(fname, content); - - FileOptions opts; - opts.use_direct_reads = true; - std::unique_ptr r; - Read(fname, opts, &r); - ASSERT_TRUE(r->use_direct_io()); - - const size_t page_size = r->file()->GetRequiredBufferAlignment(); - size_t offset = page_size / 2; - size_t len = page_size / 3; - Slice result; - AlignedBuf buf; - for (Env::IOPriority rate_limiter_priority : {Env::IO_LOW, Env::IO_TOTAL}) { - ASSERT_OK(r->Read(IOOptions(), offset, len, &result, nullptr, &buf, - rate_limiter_priority)); - ASSERT_EQ(result.ToString(), content.substr(offset, len)); - } -} - -TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) { - std::vector aligned_reqs; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "RandomAccessFileReader::MultiRead:AlignedReqs", [&](void* reqs) { - // Copy reqs, since it's allocated on stack inside MultiRead, which will - // be deallocated after MultiRead returns. - aligned_reqs = *reinterpret_cast*>(reqs); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // Creates a file with 3 pages. - std::string fname = "multi-read-direct-io"; - Random rand(0); - std::string content = rand.RandomString(3 * kDefaultPageSize); - Write(fname, content); - - FileOptions opts; - opts.use_direct_reads = true; - std::unique_ptr r; - Read(fname, opts, &r); - ASSERT_TRUE(r->use_direct_io()); - - const size_t page_size = r->file()->GetRequiredBufferAlignment(); - - { - // Reads 2 blocks in the 1st page. - // The results should be SharedSlices of the same underlying buffer. - // - // Illustration (each x is a 1/4 page) - // First page: xxxx - // 1st block: x - // 2nd block: xx - FSReadRequest r0; - r0.offset = 0; - r0.len = page_size / 4; - r0.scratch = nullptr; - - FSReadRequest r1; - r1.offset = page_size / 2; - r1.len = page_size / 2; - r1.scratch = nullptr; - - std::vector reqs; - reqs.push_back(std::move(r0)); - reqs.push_back(std::move(r1)); - AlignedBuf aligned_buf; - ASSERT_OK(r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf, - Env::IO_TOTAL /* rate_limiter_priority */)); - - AssertResult(content, reqs); - - // Reads the first page internally. - ASSERT_EQ(aligned_reqs.size(), 1); - const FSReadRequest& aligned_r = aligned_reqs[0]; - ASSERT_OK(aligned_r.status); - ASSERT_EQ(aligned_r.offset, 0); - ASSERT_EQ(aligned_r.len, page_size); - } - - { - // Reads 3 blocks: - // 1st block in the 1st page; - // 2nd block from the middle of the 1st page to the middle of the 2nd page; - // 3rd block in the 2nd page. - // The results should be SharedSlices of the same underlying buffer. - // - // Illustration (each x is a 1/4 page) - // 2 pages: xxxxxxxx - // 1st block: x - // 2nd block: xxxx - // 3rd block: x - FSReadRequest r0; - r0.offset = 0; - r0.len = page_size / 4; - r0.scratch = nullptr; - - FSReadRequest r1; - r1.offset = page_size / 2; - r1.len = page_size; - r1.scratch = nullptr; - - FSReadRequest r2; - r2.offset = 2 * page_size - page_size / 4; - r2.len = page_size / 4; - r2.scratch = nullptr; - - std::vector reqs; - reqs.push_back(std::move(r0)); - reqs.push_back(std::move(r1)); - reqs.push_back(std::move(r2)); - AlignedBuf aligned_buf; - ASSERT_OK(r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf, - Env::IO_TOTAL /* rate_limiter_priority */)); - - AssertResult(content, reqs); - - // Reads the first two pages in one request internally. - ASSERT_EQ(aligned_reqs.size(), 1); - const FSReadRequest& aligned_r = aligned_reqs[0]; - ASSERT_OK(aligned_r.status); - ASSERT_EQ(aligned_r.offset, 0); - ASSERT_EQ(aligned_r.len, 2 * page_size); - } - - { - // Reads 3 blocks: - // 1st block in the middle of the 1st page; - // 2nd block in the middle of the 2nd page; - // 3rd block in the middle of the 3rd page. - // The results should be SharedSlices of the same underlying buffer. - // - // Illustration (each x is a 1/4 page) - // 3 pages: xxxxxxxxxxxx - // 1st block: xx - // 2nd block: xx - // 3rd block: xx - FSReadRequest r0; - r0.offset = page_size / 4; - r0.len = page_size / 2; - r0.scratch = nullptr; - - FSReadRequest r1; - r1.offset = page_size + page_size / 4; - r1.len = page_size / 2; - r1.scratch = nullptr; - - FSReadRequest r2; - r2.offset = 2 * page_size + page_size / 4; - r2.len = page_size / 2; - r2.scratch = nullptr; - - std::vector reqs; - reqs.push_back(std::move(r0)); - reqs.push_back(std::move(r1)); - reqs.push_back(std::move(r2)); - AlignedBuf aligned_buf; - ASSERT_OK(r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf, - Env::IO_TOTAL /* rate_limiter_priority */)); - - AssertResult(content, reqs); - - // Reads the first 3 pages in one request internally. - ASSERT_EQ(aligned_reqs.size(), 1); - const FSReadRequest& aligned_r = aligned_reqs[0]; - ASSERT_OK(aligned_r.status); - ASSERT_EQ(aligned_r.offset, 0); - ASSERT_EQ(aligned_r.len, 3 * page_size); - } - - { - // Reads 2 blocks: - // 1st block in the middle of the 1st page; - // 2nd block in the middle of the 3rd page. - // The results are two different buffers. - // - // Illustration (each x is a 1/4 page) - // 3 pages: xxxxxxxxxxxx - // 1st block: xx - // 2nd block: xx - FSReadRequest r0; - r0.offset = page_size / 4; - r0.len = page_size / 2; - r0.scratch = nullptr; - - FSReadRequest r1; - r1.offset = 2 * page_size + page_size / 4; - r1.len = page_size / 2; - r1.scratch = nullptr; - - std::vector reqs; - reqs.push_back(std::move(r0)); - reqs.push_back(std::move(r1)); - AlignedBuf aligned_buf; - ASSERT_OK(r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf, - Env::IO_TOTAL /* rate_limiter_priority */)); - - AssertResult(content, reqs); - - // Reads the 1st and 3rd pages in two requests internally. - ASSERT_EQ(aligned_reqs.size(), 2); - const FSReadRequest& aligned_r0 = aligned_reqs[0]; - const FSReadRequest& aligned_r1 = aligned_reqs[1]; - ASSERT_OK(aligned_r0.status); - ASSERT_EQ(aligned_r0.offset, 0); - ASSERT_EQ(aligned_r0.len, page_size); - ASSERT_OK(aligned_r1.status); - ASSERT_EQ(aligned_r1.offset, 2 * page_size); - ASSERT_EQ(aligned_r1.len, page_size); - } - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); -} - - -TEST(FSReadRequest, Align) { - FSReadRequest r; - r.offset = 2000; - r.len = 2000; - r.scratch = nullptr; - ASSERT_OK(r.status); - - FSReadRequest aligned_r = Align(r, 1024); - ASSERT_OK(r.status); - ASSERT_OK(aligned_r.status); - ASSERT_EQ(aligned_r.offset, 1024); - ASSERT_EQ(aligned_r.len, 3072); -} - -TEST(FSReadRequest, TryMerge) { - // reverse means merging dest into src. - for (bool reverse : {true, false}) { - { - // dest: [ ] - // src: [ ] - FSReadRequest dest; - dest.offset = 0; - dest.len = 10; - dest.scratch = nullptr; - ASSERT_OK(dest.status); - - FSReadRequest src; - src.offset = 15; - src.len = 10; - src.scratch = nullptr; - ASSERT_OK(src.status); - - if (reverse) { - std::swap(dest, src); - } - ASSERT_FALSE(TryMerge(&dest, src)); - ASSERT_OK(dest.status); - ASSERT_OK(src.status); - } - - { - // dest: [ ] - // src: [ ] - FSReadRequest dest; - dest.offset = 0; - dest.len = 10; - dest.scratch = nullptr; - ASSERT_OK(dest.status); - - FSReadRequest src; - src.offset = 10; - src.len = 10; - src.scratch = nullptr; - ASSERT_OK(src.status); - - if (reverse) { - std::swap(dest, src); - } - ASSERT_TRUE(TryMerge(&dest, src)); - ASSERT_EQ(dest.offset, 0); - ASSERT_EQ(dest.len, 20); - ASSERT_OK(dest.status); - ASSERT_OK(src.status); - } - - { - // dest: [ ] - // src: [ ] - FSReadRequest dest; - dest.offset = 0; - dest.len = 10; - dest.scratch = nullptr; - ASSERT_OK(dest.status); - - FSReadRequest src; - src.offset = 5; - src.len = 10; - src.scratch = nullptr; - ASSERT_OK(src.status); - - if (reverse) { - std::swap(dest, src); - } - ASSERT_TRUE(TryMerge(&dest, src)); - ASSERT_EQ(dest.offset, 0); - ASSERT_EQ(dest.len, 15); - ASSERT_OK(dest.status); - ASSERT_OK(src.status); - } - - { - // dest: [ ] - // src: [ ] - FSReadRequest dest; - dest.offset = 0; - dest.len = 10; - dest.scratch = nullptr; - ASSERT_OK(dest.status); - - FSReadRequest src; - src.offset = 5; - src.len = 5; - src.scratch = nullptr; - ASSERT_OK(src.status); - - if (reverse) { - std::swap(dest, src); - } - ASSERT_TRUE(TryMerge(&dest, src)); - ASSERT_EQ(dest.offset, 0); - ASSERT_EQ(dest.len, 10); - ASSERT_OK(dest.status); - ASSERT_OK(src.status); - } - - { - // dest: [ ] - // src: [ ] - FSReadRequest dest; - dest.offset = 0; - dest.len = 10; - dest.scratch = nullptr; - ASSERT_OK(dest.status); - - FSReadRequest src; - src.offset = 5; - src.len = 1; - src.scratch = nullptr; - ASSERT_OK(src.status); - - if (reverse) std::swap(dest, src); - ASSERT_TRUE(TryMerge(&dest, src)); - ASSERT_EQ(dest.offset, 0); - ASSERT_EQ(dest.len, 10); - ASSERT_OK(dest.status); - ASSERT_OK(src.status); - } - - { - // dest: [ ] - // src: [ ] - FSReadRequest dest; - dest.offset = 0; - dest.len = 10; - dest.scratch = nullptr; - ASSERT_OK(dest.status); - - FSReadRequest src; - src.offset = 0; - src.len = 10; - src.scratch = nullptr; - ASSERT_OK(src.status); - - if (reverse) std::swap(dest, src); - ASSERT_TRUE(TryMerge(&dest, src)); - ASSERT_EQ(dest.offset, 0); - ASSERT_EQ(dest.len, 10); - ASSERT_OK(dest.status); - ASSERT_OK(src.status); - } - - { - // dest: [ ] - // src: [ ] - FSReadRequest dest; - dest.offset = 0; - dest.len = 10; - dest.scratch = nullptr; - ASSERT_OK(dest.status); - - FSReadRequest src; - src.offset = 0; - src.len = 5; - src.scratch = nullptr; - ASSERT_OK(src.status); - - if (reverse) std::swap(dest, src); - ASSERT_TRUE(TryMerge(&dest, src)); - ASSERT_EQ(dest.offset, 0); - ASSERT_EQ(dest.len, 10); - ASSERT_OK(dest.status); - ASSERT_OK(src.status); - } - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/fuzz/.gitignore b/fuzz/.gitignore deleted file mode 100644 index 9dab42105..000000000 --- a/fuzz/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -db_fuzzer -db_map_fuzzer -sst_file_writer_fuzzer - -proto/gen/* diff --git a/fuzz/Makefile b/fuzz/Makefile deleted file mode 100644 index b83040504..000000000 --- a/fuzz/Makefile +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -# This source code is licensed under both the GPLv2 (found in the -# COPYING file in the root directory) and Apache 2.0 License -# (found in the LICENSE.Apache file in the root directory). - -ROOT_DIR = $(abspath $(shell pwd)/../) - -include $(ROOT_DIR)/make_config.mk - -PROTOBUF_CFLAGS = `pkg-config --cflags protobuf` -PROTOBUF_LDFLAGS = `pkg-config --libs protobuf` - -PROTOBUF_MUTATOR_CFLAGS = `pkg-config --cflags libprotobuf-mutator` -PROTOBUF_MUTATOR_LDFLAGS = `pkg-config --libs libprotobuf-mutator` - -ROCKSDB_INCLUDE_DIR = $(ROOT_DIR)/include -ROCKSDB_LIB_DIR = $(ROOT_DIR) - -PROTO_IN = $(ROOT_DIR)/fuzz/proto -PROTO_OUT = $(ROOT_DIR)/fuzz/proto/gen - -ifneq ($(FUZZ_ENV), ossfuzz) -CC = $(CXX) -CCFLAGS += -Wall -fsanitize=address,fuzzer -CFLAGS += $(PLATFORM_CXXFLAGS) $(PROTOBUF_CFLAGS) $(PROTOBUF_MUTATOR_CFLAGS) -I$(PROTO_OUT) -I$(ROCKSDB_INCLUDE_DIR) -I$(ROCKSDB_LIB_DIR) -LDFLAGS += $(PLATFORM_LDFLAGS) $(PROTOBUF_MUTATOR_LDFLAGS) $(PROTOBUF_LDFLAGS) -L$(ROCKSDB_LIB_DIR) -lrocksdb -else -# OSS-Fuzz sets various environment flags that are used for compilation. -# These environment flags depend on which type of sanitizer build is being -# used, however, an ASan build would set the environment flags as follows: -# CFLAGS="-O1 -fno-omit-frame-pointer -gline-tables-only \ - -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=address \ - -fsanitize-address-use-after-scope -fsanitize=fuzzer-no-link" -# CXXFLAGS="-O1 -fno-omit-frame-pointer -gline-tables-only \ - -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=address \ - -fsanitize-address-use-after-scope -fsanitize=fuzzer-no-link \ - -stdlib=libc++" -# LIB_FUZZING_ENGINE="-fsanitize=fuzzer" -CC = $(CXX) -CCFLAGS = $(CXXFLAGS) -CFLAGS += $(PROTOBUF_CFLAGS) $(PROTOBUF_MUTATOR_CFLAGS) -I$(PROTO_OUT) -I$(ROCKSDB_INCLUDE_DIR) -I$(ROCKSDB_LIB_DIR) -LDFLAGS += $(PLATFORM_LDFLAGS) $(LIB_FUZZING_ENGINE) $(PROTOBUF_MUTATOR_LDFLAGS) $(PROTOBUF_LDFLAGS) -L$(ROCKSDB_LIB_DIR) -lrocksdb -endif - -.PHONY: gen_proto clean - -# Set PROTOC_BIN when invoking `make` if a custom protoc is required. -PROTOC_BIN ?= protoc - -gen_proto: - mkdir -p $(PROTO_OUT) - $(PROTOC_BIN) \ - --proto_path=$(PROTO_IN) \ - --cpp_out=$(PROTO_OUT) \ - $(PROTO_IN)/*.proto - -clean: - rm -rf db_fuzzer db_map_fuzzer sst_file_writer_fuzzer $(PROTO_OUT) - -db_fuzzer: db_fuzzer.cc - $(CC) $(CCFLAGS) -o db_fuzzer db_fuzzer.cc $(CFLAGS) $(LDFLAGS) - -db_map_fuzzer: gen_proto db_map_fuzzer.cc proto/gen/db_operation.pb.cc - $(CC) $(CCFLAGS) -o db_map_fuzzer db_map_fuzzer.cc proto/gen/db_operation.pb.cc $(CFLAGS) $(LDFLAGS) - -sst_file_writer_fuzzer: gen_proto sst_file_writer_fuzzer.cc proto/gen/db_operation.pb.cc - $(CC) $(CCFLAGS) -o sst_file_writer_fuzzer sst_file_writer_fuzzer.cc proto/gen/db_operation.pb.cc $(CFLAGS) $(LDFLAGS) diff --git a/fuzz/README.md b/fuzz/README.md deleted file mode 100644 index 238b283a2..000000000 --- a/fuzz/README.md +++ /dev/null @@ -1,165 +0,0 @@ -# Fuzzing RocksDB - -## Overview - -This directory contains [fuzz tests](https://en.wikipedia.org/wiki/Fuzzing) for RocksDB. -RocksDB testing infrastructure currently includes unit tests and [stress tests](https://github.com/facebook/rocksdb/wiki/Stress-test), -we hope fuzz testing can catch more bugs. - -## Prerequisite - -We use [LLVM libFuzzer](http://llvm.org/docs/LibFuzzer.html) as the fuzzying engine, -so make sure you have [clang](https://clang.llvm.org/get_started.html) as your compiler. - -Some tests rely on [structure aware fuzzing](https://github.com/google/fuzzing/blob/master/docs/structure-aware-fuzzing.md). -We use [protobuf](https://developers.google.com/protocol-buffers) to define structured input to the fuzzer, -and use [libprotobuf-mutator](https://github.com/google/libprotobuf-mutator) as the custom libFuzzer mutator. -So make sure you have protobuf and libprotobuf-mutator installed, and make sure `pkg-config` can find them. -On some systems, there are both protobuf2 and protobuf3 in the package management system, -make sure protobuf3 is installed. - -If you do not want to install protobuf library yourself, you can rely on libprotobuf-mutator to download protobuf -for you. For details about installation, please refer to [libprotobuf-mutator README](https://github.com/google/libprotobuf-mutator#readme) - -## Example - -This example shows you how to do structure aware fuzzing to `rocksdb::SstFileWriter`. - -After walking through the steps to create the fuzzer, we'll introduce a bug into `rocksdb::SstFileWriter::Put`, -then show that the fuzzer can catch the bug. - -### Design the test - -We want the fuzzing engine to automatically generate a list of database operations, -then we apply these operations to `SstFileWriter` in sequence, -finally, after the SST file is generated, we use `SstFileReader` to check the file's checksum. - -### Define input - -We define the database operations in protobuf, each operation has a type of operation and a key value pair, -see [proto/db_operation.proto](proto/db_operation.proto) for details. - -### Define tests with the input - -In [sst_file_writer_fuzzer.cc](sst_file_writer_fuzzer.cc), -we define the tests to be run on the generated input: - -``` -DEFINE_PROTO_FUZZER(DBOperations& input) { - // apply the operations to SstFileWriter and use SstFileReader to verify checksum. - // ... -} -``` - -`SstFileWriter` requires the keys of the operations to be unique and be in ascending order, -but the fuzzing engine generates the input randomly, so we need to process the generated input before -passing it to `DEFINE_PROTO_FUZZER`, this is accomplished by registering a post processor: - -``` -protobuf_mutator::libfuzzer::PostProcessorRegistration -``` - -### Compile and link the fuzzer - -In the rocksdb root directory, compile rocksdb library by `make static_lib`. - -Go to the `fuzz` directory, -run `make sst_file_writer_fuzzer` to generate the fuzzer, -it will compile rocksdb static library, generate protobuf, then compile and link `sst_file_writer_fuzzer`. - -### Introduce a bug - -Manually introduce a bug to `SstFileWriter::Put`: - -``` -diff --git a/table/sst_file_writer.cc b/table/sst_file_writer.cc -index ab1ee7c4e..c7da9ffa0 100644 ---- a/table/sst_file_writer.cc -+++ b/table/sst_file_writer.cc -@@ -277,6 +277,11 @@ Status SstFileWriter::Add(const Slice& user_key, const Slice& value) { - } - - Status SstFileWriter::Put(const Slice& user_key, const Slice& value) { -+ if (user_key.starts_with("!")) { -+ if (value.ends_with("!")) { -+ return Status::Corruption("bomb"); -+ } -+ } - return rep_->Add(user_key, value, ValueType::kTypeValue); - } -``` - -The bug is that for `Put`, if `user_key` starts with `!` and `value` ends with `!`, then corrupt. - -### Run fuzz testing to catch the bug - -Run the fuzzer by `time ./sst_file_writer_fuzzer`. - -Here is the output on my machine: - -``` -Corruption: bomb -==59680== ERROR: libFuzzer: deadly signal - #0 0x109487315 in __sanitizer_print_stack_trace+0x35 (libclang_rt.asan_osx_dynamic.dylib:x86_64+0x4d315) - #1 0x108d63f18 in fuzzer::PrintStackTrace() FuzzerUtil.cpp:205 - #2 0x108d47613 in fuzzer::Fuzzer::CrashCallback() FuzzerLoop.cpp:232 - #3 0x7fff6af535fc in _sigtramp+0x1c (libsystem_platform.dylib:x86_64+0x35fc) - #4 0x7ffee720f3ef () - #5 0x7fff6ae29807 in abort+0x77 (libsystem_c.dylib:x86_64+0x7f807) - #6 0x108cf1c4c in TestOneProtoInput(DBOperations&)+0x113c (sst_file_writer_fuzzer:x86_64+0x100302c4c) - #7 0x108cf09be in LLVMFuzzerTestOneInput+0x16e (sst_file_writer_fuzzer:x86_64+0x1003019be) - #8 0x108d48ce0 in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) FuzzerLoop.cpp:556 - #9 0x108d48425 in fuzzer::Fuzzer::RunOne(unsigned char const*, unsigned long, bool, fuzzer::InputInfo*, bool*) FuzzerLoop.cpp:470 - #10 0x108d4a626 in fuzzer::Fuzzer::MutateAndTestOne() FuzzerLoop.cpp:698 - #11 0x108d4b325 in fuzzer::Fuzzer::Loop(std::__1::vector >&) FuzzerLoop.cpp:830 - #12 0x108d37fcd in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) FuzzerDriver.cpp:829 - #13 0x108d652b2 in main FuzzerMain.cpp:19 - #14 0x7fff6ad5acc8 in start+0x0 (libdyld.dylib:x86_64+0x1acc8) - -NOTE: libFuzzer has rudimentary signal handlers. - Combine libFuzzer with AddressSanitizer or similar for better crash reports. -SUMMARY: libFuzzer: deadly signal -MS: 7 Custom-CustomCrossOver-InsertByte-Custom-ChangeBit-Custom-CustomCrossOver-; base unit: 90863b4d83c3f994bba0a417d0c2ee3b68f9e795 -0x6f,0x70,0x65,0x72,0x61,0x74,0x69,0x6f,0x6e,0x73,0x20,0x7b,0xa,0x20,0x20,0x6b,0x65,0x79,0x3a,0x20,0x22,0x21,0x22,0xa,0x20,0x20,0x76,0x61,0x6c,0x75,0x65,0x3a,0x20,0x22,0x21,0x22,0xa,0x20,0x20,0x74,0x79,0x70,0x65,0x3a,0x20,0x50,0x55,0x54,0xa,0x7d,0xa,0x6f,0x70,0x65,0x72,0x61,0x74,0x69,0x6f,0x6e,0x73,0x20,0x7b,0xa,0x20,0x20,0x6b,0x65,0x79,0x3a,0x20,0x22,0x2b,0x22,0xa,0x20,0x20,0x74,0x79,0x70,0x65,0x3a,0x20,0x50,0x55,0x54,0xa,0x7d,0xa,0x6f,0x70,0x65,0x72,0x61,0x74,0x69,0x6f,0x6e,0x73,0x20,0x7b,0xa,0x20,0x20,0x6b,0x65,0x79,0x3a,0x20,0x22,0x2e,0x22,0xa,0x20,0x20,0x74,0x79,0x70,0x65,0x3a,0x20,0x50,0x55,0x54,0xa,0x7d,0xa,0x6f,0x70,0x65,0x72,0x61,0x74,0x69,0x6f,0x6e,0x73,0x20,0x7b,0xa,0x20,0x20,0x6b,0x65,0x79,0x3a,0x20,0x22,0x5c,0x32,0x35,0x33,0x22,0xa,0x20,0x20,0x74,0x79,0x70,0x65,0x3a,0x20,0x50,0x55,0x54,0xa,0x7d,0xa, -operations {\x0a key: \"!\"\x0a value: \"!\"\x0a type: PUT\x0a}\x0aoperations {\x0a key: \"+\"\x0a type: PUT\x0a}\x0aoperations {\x0a key: \".\"\x0a type: PUT\x0a}\x0aoperations {\x0a key: \"\\253\"\x0a type: PUT\x0a}\x0a -artifact_prefix='./'; Test unit written to ./crash-a1460be302d09b548e61787178d9edaa40aea467 -Base64: b3BlcmF0aW9ucyB7CiAga2V5OiAiISIKICB2YWx1ZTogIiEiCiAgdHlwZTogUFVUCn0Kb3BlcmF0aW9ucyB7CiAga2V5OiAiKyIKICB0eXBlOiBQVVQKfQpvcGVyYXRpb25zIHsKICBrZXk6ICIuIgogIHR5cGU6IFBVVAp9Cm9wZXJhdGlvbnMgewogIGtleTogIlwyNTMiCiAgdHlwZTogUFVUCn0K -./sst_file_writer_fuzzer 5.97s user 4.40s system 64% cpu 16.195 total -``` - -Within 6 seconds, it catches the bug. - -The input that triggers the bug is persisted in `./crash-a1460be302d09b548e61787178d9edaa40aea467`: - -``` -$ cat ./crash-a1460be302d09b548e61787178d9edaa40aea467 -operations { - key: "!" - value: "!" - type: PUT -} -operations { - key: "+" - type: PUT -} -operations { - key: "." - type: PUT -} -operations { - key: "\253" - type: PUT -} -``` - -### Reproduce the crash to debug - -The above crash can be reproduced by `./sst_file_writer_fuzzer ./crash-a1460be302d09b548e61787178d9edaa40aea467`, -so you can debug the crash. - -## Future Work - -According to [OSS-Fuzz](https://github.com/google/oss-fuzz), -`as of June 2020, OSS-Fuzz has found over 20,000 bugs in 300 open source projects.` - -RocksDB can join OSS-Fuzz together with other open source projects such as sqlite. diff --git a/fuzz/db_fuzzer.cc b/fuzz/db_fuzzer.cc deleted file mode 100644 index e6d5bb63c..000000000 --- a/fuzz/db_fuzzer.cc +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include - -#include "rocksdb/db.h" - -enum OperationType { - kPut, - kGet, - kDelete, - kGetProperty, - kIterator, - kSnapshot, - kOpenClose, - kColumn, - kCompactRange, - kSeekForPrev, - OP_COUNT -}; - -constexpr char db_path[] = "/tmp/testdb"; - -// Fuzzes DB operations by doing interpretations on the data. Both the -// sequence of API calls to be called on the DB as well as the arguments -// to each of these APIs are interpreted by way of the data buffer. -// The operations that the fuzzer supports are given by the OperationType -// enum. The goal is to capture sanitizer bugs, so the code should be -// compiled with a given sanitizer (ASan, UBSan, MSan). -extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - ROCKSDB_NAMESPACE::DB* db; - ROCKSDB_NAMESPACE::Options options; - options.create_if_missing = true; - ROCKSDB_NAMESPACE::Status status = - ROCKSDB_NAMESPACE::DB::Open(options, db_path, &db); - if (!status.ok()) { - return 0; - } - FuzzedDataProvider fuzzed_data(data, size); - - // perform a sequence of calls on our db instance - int max_iter = static_cast(data[0]); - for (int i = 0; i < max_iter && i < size; i++) { - OperationType op = static_cast(data[i] % OP_COUNT); - - switch (op) { - case kPut: { - std::string key = fuzzed_data.ConsumeRandomLengthString(); - std::string val = fuzzed_data.ConsumeRandomLengthString(); - db->Put(ROCKSDB_NAMESPACE::WriteOptions(), key, val); - break; - } - case kGet: { - std::string key = fuzzed_data.ConsumeRandomLengthString(); - std::string value; - db->Get(ROCKSDB_NAMESPACE::ReadOptions(), key, &value); - break; - } - case kDelete: { - std::string key = fuzzed_data.ConsumeRandomLengthString(); - db->Delete(ROCKSDB_NAMESPACE::WriteOptions(), key); - break; - } - case kGetProperty: { - std::string prop; - std::string property_name = fuzzed_data.ConsumeRandomLengthString(); - db->GetProperty(property_name, &prop); - break; - } - case kIterator: { - ROCKSDB_NAMESPACE::Iterator* it = - db->NewIterator(ROCKSDB_NAMESPACE::ReadOptions()); - for (it->SeekToFirst(); it->Valid(); it->Next()) { - } - delete it; - break; - } - case kSnapshot: { - ROCKSDB_NAMESPACE::ReadOptions snapshot_options; - snapshot_options.snapshot = db->GetSnapshot(); - ROCKSDB_NAMESPACE::Iterator* it = db->NewIterator(snapshot_options); - db->ReleaseSnapshot(snapshot_options.snapshot); - delete it; - break; - } - case kOpenClose: { - db->Close(); - delete db; - status = ROCKSDB_NAMESPACE::DB::Open(options, db_path, &db); - if (!status.ok()) { - ROCKSDB_NAMESPACE::DestroyDB(db_path, options); - return 0; - } - - break; - } - case kColumn: { - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf; - ROCKSDB_NAMESPACE::Status s; - s = db->CreateColumnFamily(ROCKSDB_NAMESPACE::ColumnFamilyOptions(), - "new_cf", &cf); - s = db->DestroyColumnFamilyHandle(cf); - db->Close(); - delete db; - - // open DB with two column families - std::vector column_families; - // have to open default column family - column_families.push_back(ROCKSDB_NAMESPACE::ColumnFamilyDescriptor( - ROCKSDB_NAMESPACE::kDefaultColumnFamilyName, - ROCKSDB_NAMESPACE::ColumnFamilyOptions())); - // open the new one, too - column_families.push_back(ROCKSDB_NAMESPACE::ColumnFamilyDescriptor( - "new_cf", ROCKSDB_NAMESPACE::ColumnFamilyOptions())); - std::vector handles; - s = ROCKSDB_NAMESPACE::DB::Open(ROCKSDB_NAMESPACE::DBOptions(), db_path, - column_families, &handles, &db); - - if (s.ok()) { - std::string key1 = fuzzed_data.ConsumeRandomLengthString(); - std::string val1 = fuzzed_data.ConsumeRandomLengthString(); - std::string key2 = fuzzed_data.ConsumeRandomLengthString(); - s = db->Put(ROCKSDB_NAMESPACE::WriteOptions(), handles[1], key1, - val1); - std::string value; - s = db->Get(ROCKSDB_NAMESPACE::ReadOptions(), handles[1], key2, - &value); - s = db->DropColumnFamily(handles[1]); - for (auto handle : handles) { - s = db->DestroyColumnFamilyHandle(handle); - } - } else { - status = ROCKSDB_NAMESPACE::DB::Open(options, db_path, &db); - if (!status.ok()) { - // At this point there is no saving to do. So we exit - ROCKSDB_NAMESPACE::DestroyDB(db_path, ROCKSDB_NAMESPACE::Options()); - return 0; - } - } - break; - } - case kCompactRange: { - std::string slice_start = fuzzed_data.ConsumeRandomLengthString(); - std::string slice_end = fuzzed_data.ConsumeRandomLengthString(); - - ROCKSDB_NAMESPACE::Slice begin(slice_start); - ROCKSDB_NAMESPACE::Slice end(slice_end); - ROCKSDB_NAMESPACE::CompactRangeOptions options; - ROCKSDB_NAMESPACE::Status s = db->CompactRange(options, &begin, &end); - break; - } - case kSeekForPrev: { - std::string key = fuzzed_data.ConsumeRandomLengthString(); - auto iter = db->NewIterator(ROCKSDB_NAMESPACE::ReadOptions()); - iter->SeekForPrev(key); - delete iter; - break; - } - case OP_COUNT: - break; - } - } - - // Cleanup DB - db->Close(); - delete db; - ROCKSDB_NAMESPACE::DestroyDB(db_path, options); - return 0; -} diff --git a/fuzz/db_map_fuzzer.cc b/fuzz/db_map_fuzzer.cc deleted file mode 100644 index ed9df8f84..000000000 --- a/fuzz/db_map_fuzzer.cc +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include -#include -#include -#include - -#include "proto/gen/db_operation.pb.h" -#include "rocksdb/db.h" -#include "rocksdb/file_system.h" -#include "src/libfuzzer/libfuzzer_macro.h" -#include "util.h" - -protobuf_mutator::libfuzzer::PostProcessorRegistration reg = { - [](DBOperations* input, unsigned int /* seed */) { - const ROCKSDB_NAMESPACE::Comparator* comparator = - ROCKSDB_NAMESPACE::BytewiseComparator(); - auto ops = input->mutable_operations(); - // Make sure begin <= end for DELETE_RANGE. - for (DBOperation& op : *ops) { - if (op.type() == OpType::DELETE_RANGE) { - auto begin = op.key(); - auto end = op.value(); - if (comparator->Compare(begin, end) > 0) { - std::swap(begin, end); - op.set_key(begin); - op.set_value(end); - } - } - } - }}; - -// Execute randomly generated operations on both a DB and a std::map, -// then reopen the DB and make sure that iterating the DB produces the -// same key-value pairs as iterating through the std::map. -DEFINE_PROTO_FUZZER(DBOperations& input) { - if (input.operations().empty()) { - return; - } - - const std::string kDbPath = "/tmp/db_map_fuzzer_test"; - auto fs = ROCKSDB_NAMESPACE::FileSystem::Default(); - if (fs->FileExists(kDbPath, ROCKSDB_NAMESPACE::IOOptions(), /*dbg=*/nullptr) - .ok()) { - std::cerr << "db path " << kDbPath << " already exists" << std::endl; - abort(); - } - - std::map kv; - ROCKSDB_NAMESPACE::DB* db = nullptr; - ROCKSDB_NAMESPACE::Options options; - options.create_if_missing = true; - CHECK_OK(ROCKSDB_NAMESPACE::DB::Open(options, kDbPath, &db)); - - for (const DBOperation& op : input.operations()) { - switch (op.type()) { - case OpType::PUT: { - CHECK_OK( - db->Put(ROCKSDB_NAMESPACE::WriteOptions(), op.key(), op.value())); - kv[op.key()] = op.value(); - break; - } - case OpType::MERGE: { - break; - } - case OpType::DELETE: { - CHECK_OK(db->Delete(ROCKSDB_NAMESPACE::WriteOptions(), op.key())); - kv.erase(op.key()); - break; - } - case OpType::DELETE_RANGE: { - // [op.key(), op.value()) corresponds to [begin, end). - CHECK_OK(db->DeleteRange(ROCKSDB_NAMESPACE::WriteOptions(), - db->DefaultColumnFamily(), op.key(), - op.value())); - kv.erase(kv.lower_bound(op.key()), kv.lower_bound(op.value())); - break; - } - default: { - std::cerr << "Unsupported operation" << static_cast(op.type()); - return; - } - } - } - CHECK_OK(db->Close()); - delete db; - db = nullptr; - - CHECK_OK(ROCKSDB_NAMESPACE::DB::Open(options, kDbPath, &db)); - auto kv_it = kv.begin(); - ROCKSDB_NAMESPACE::Iterator* it = - db->NewIterator(ROCKSDB_NAMESPACE::ReadOptions()); - for (it->SeekToFirst(); it->Valid(); it->Next(), kv_it++) { - CHECK_TRUE(kv_it != kv.end()); - CHECK_EQ(it->key().ToString(), kv_it->first); - CHECK_EQ(it->value().ToString(), kv_it->second); - } - CHECK_TRUE(kv_it == kv.end()); - delete it; - - CHECK_OK(db->Close()); - delete db; - CHECK_OK(ROCKSDB_NAMESPACE::DestroyDB(kDbPath, options)); -} diff --git a/fuzz/proto/db_operation.proto b/fuzz/proto/db_operation.proto deleted file mode 100644 index 20a55eaa5..000000000 --- a/fuzz/proto/db_operation.proto +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -// Defines database operations. -// Each operation is a key-value pair and an operation type. - -syntax = "proto2"; - -enum OpType { - PUT = 0; - MERGE = 1; - DELETE = 2; - DELETE_RANGE = 3; -} - -message DBOperation { - required string key = 1; - // value is ignored for DELETE. - // [key, value] is the range for DELETE_RANGE. - optional string value = 2; - required OpType type = 3; -} - -message DBOperations { - repeated DBOperation operations = 1; -} diff --git a/fuzz/sst_file_writer_fuzzer.cc b/fuzz/sst_file_writer_fuzzer.cc deleted file mode 100644 index e93b9a3f5..000000000 --- a/fuzz/sst_file_writer_fuzzer.cc +++ /dev/null @@ -1,209 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include -#include -#include -#include - -#include "proto/gen/db_operation.pb.h" -#include "rocksdb/file_system.h" -#include "rocksdb/sst_file_writer.h" -#include "src/libfuzzer/libfuzzer_macro.h" -#include "table/table_builder.h" -#include "table/table_reader.h" -#include "util.h" - -using ROCKSDB_NAMESPACE::BytewiseComparator; -using ROCKSDB_NAMESPACE::Comparator; -using ROCKSDB_NAMESPACE::EnvOptions; -using ROCKSDB_NAMESPACE::ExternalSstFileInfo; -using ROCKSDB_NAMESPACE::FileOptions; -using ROCKSDB_NAMESPACE::FileSystem; -using ROCKSDB_NAMESPACE::ImmutableCFOptions; -using ROCKSDB_NAMESPACE::ImmutableOptions; -using ROCKSDB_NAMESPACE::InternalIterator; -using ROCKSDB_NAMESPACE::IOOptions; -using ROCKSDB_NAMESPACE::kMaxSequenceNumber; -using ROCKSDB_NAMESPACE::Options; -using ROCKSDB_NAMESPACE::ParsedInternalKey; -using ROCKSDB_NAMESPACE::ParseInternalKey; -using ROCKSDB_NAMESPACE::RandomAccessFileReader; -using ROCKSDB_NAMESPACE::ReadOptions; -using ROCKSDB_NAMESPACE::SstFileWriter; -using ROCKSDB_NAMESPACE::Status; -using ROCKSDB_NAMESPACE::TableReader; -using ROCKSDB_NAMESPACE::TableReaderCaller; -using ROCKSDB_NAMESPACE::TableReaderOptions; -using ROCKSDB_NAMESPACE::ValueType; - -// Keys in SST file writer operations must be unique and in ascending order. -// For each DBOperation generated by the fuzzer, this function is called on -// it to deduplicate and sort the keys in the DBOperations. -protobuf_mutator::libfuzzer::PostProcessorRegistration reg = { - [](DBOperations* input, unsigned int /* seed */) { - const Comparator* comparator = BytewiseComparator(); - auto ops = input->mutable_operations(); - - // Make sure begin <= end for DELETE_RANGE. - for (DBOperation& op : *ops) { - if (op.type() == OpType::DELETE_RANGE) { - auto begin = op.key(); - auto end = op.value(); - if (comparator->Compare(begin, end) > 0) { - std::swap(begin, end); - op.set_key(begin); - op.set_value(end); - } - } - } - - std::sort(ops->begin(), ops->end(), - [&comparator](const DBOperation& a, const DBOperation& b) { - return comparator->Compare(a.key(), b.key()) < 0; - }); - - auto last = std::unique( - ops->begin(), ops->end(), - [&comparator](const DBOperation& a, const DBOperation& b) { - return comparator->Compare(a.key(), b.key()) == 0; - }); - ops->erase(last, ops->end()); - }}; - -TableReader* NewTableReader(const std::string& sst_file_path, - const Options& options, - const EnvOptions& env_options, - const ImmutableCFOptions& cf_ioptions) { - // This code block is similar to SstFileReader::Open. - - uint64_t file_size = 0; - std::unique_ptr file_reader; - std::unique_ptr table_reader; - const auto& fs = options.env->GetFileSystem(); - FileOptions fopts(env_options); - Status s = options.env->GetFileSize(sst_file_path, &file_size); - if (s.ok()) { - s = RandomAccessFileReader::Create(fs, sst_file_path, fopts, &file_reader, - nullptr); - } - if (s.ok()) { - ImmutableOptions iopts(options, cf_ioptions); - TableReaderOptions t_opt(iopts, /*prefix_extractor=*/nullptr, env_options, - cf_ioptions.internal_comparator); - t_opt.largest_seqno = kMaxSequenceNumber; - s = options.table_factory->NewTableReader(t_opt, std::move(file_reader), - file_size, &table_reader, - /*prefetch=*/false); - } - if (!s.ok()) { - std::cerr << "Failed to create TableReader for " << sst_file_path << ": " - << s.ToString() << std::endl; - abort(); - } - return table_reader.release(); -} - -ValueType ToValueType(OpType op_type) { - switch (op_type) { - case OpType::PUT: - return ValueType::kTypeValue; - case OpType::MERGE: - return ValueType::kTypeMerge; - case OpType::DELETE: - return ValueType::kTypeDeletion; - case OpType::DELETE_RANGE: - return ValueType::kTypeRangeDeletion; - default: - std::cerr << "Unknown operation type " << static_cast(op_type) - << std::endl; - abort(); - } -} - -// Fuzzes DB operations as input, let SstFileWriter generate a SST file -// according to the operations, then let TableReader read and check all the -// key-value pairs from the generated SST file. -DEFINE_PROTO_FUZZER(DBOperations& input) { - if (input.operations().empty()) { - return; - } - - std::string sstfile; - { - auto fs = FileSystem::Default(); - std::string dir; - IOOptions opt; - CHECK_OK(fs->GetTestDirectory(opt, &dir, nullptr)); - sstfile = dir + "/SstFileWriterFuzzer.sst"; - } - - Options options; - EnvOptions env_options(options); - ImmutableCFOptions cf_ioptions(options); - - // Generate sst file. - SstFileWriter writer(env_options, options); - CHECK_OK(writer.Open(sstfile)); - for (const DBOperation& op : input.operations()) { - switch (op.type()) { - case OpType::PUT: { - CHECK_OK(writer.Put(op.key(), op.value())); - break; - } - case OpType::MERGE: { - CHECK_OK(writer.Merge(op.key(), op.value())); - break; - } - case OpType::DELETE: { - CHECK_OK(writer.Delete(op.key())); - break; - } - case OpType::DELETE_RANGE: { - CHECK_OK(writer.DeleteRange(op.key(), op.value())); - break; - } - default: { - std::cerr << "Unsupported operation" << static_cast(op.type()) - << std::endl; - abort(); - } - } - } - ExternalSstFileInfo info; - CHECK_OK(writer.Finish(&info)); - - // Iterate and verify key-value pairs. - std::unique_ptr table_reader( - ::NewTableReader(sstfile, options, env_options, cf_ioptions)); - ReadOptions roptions; - CHECK_OK(table_reader->VerifyChecksum(roptions, - TableReaderCaller::kUncategorized)); - std::unique_ptr it( - table_reader->NewIterator(roptions, /*prefix_extractor=*/nullptr, - /*arena=*/nullptr, /*skip_filters=*/true, - TableReaderCaller::kUncategorized)); - it->SeekToFirst(); - for (const DBOperation& op : input.operations()) { - if (op.type() == OpType::DELETE_RANGE) { - // InternalIterator cannot iterate over DELETE_RANGE entries. - continue; - } - CHECK_TRUE(it->Valid()); - ParsedInternalKey ikey; - CHECK_OK(ParseInternalKey(it->key(), &ikey, /*log_err_key=*/true)); - CHECK_EQ(ikey.user_key.ToString(), op.key()); - CHECK_EQ(ikey.sequence, 0); - CHECK_EQ(ikey.type, ToValueType(op.type())); - if (op.type() != OpType::DELETE) { - CHECK_EQ(op.value(), it->value().ToString()); - } - it->Next(); - } - CHECK_TRUE(!it->Valid()); - - // Delete sst file. - remove(sstfile.c_str()); -} diff --git a/fuzz/util.h b/fuzz/util.h deleted file mode 100644 index 97011823a..000000000 --- a/fuzz/util.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#pragma once - -#define CHECK_OK(expression) \ - do { \ - auto status = (expression); \ - if (!status.ok()) { \ - std::cerr << status.ToString() << std::endl; \ - abort(); \ - } \ - } while (0) - -#define CHECK_EQ(a, b) \ - if (a != b) { \ - std::cerr << "(" << #a << "=" << a << ") != (" << #b << "=" << b << ")" \ - << std::endl; \ - abort(); \ - } - -#define CHECK_TRUE(cond) \ - if (!(cond)) { \ - std::cerr << "\"" << #cond << "\" is false" << std::endl; \ - abort(); \ - } diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt deleted file mode 100644 index 5d62630fd..000000000 --- a/java/CMakeLists.txt +++ /dev/null @@ -1,549 +0,0 @@ -cmake_minimum_required(VERSION 3.4) - -if(${CMAKE_VERSION} VERSION_LESS "3.11.4") - message("Please consider switching to CMake 3.11.4 or newer") -endif() - -set(CMAKE_JAVA_COMPILE_FLAGS -source 7) - -set(JNI_NATIVE_SOURCES - rocksjni/backup_engine_options.cc - rocksjni/backupenginejni.cc - rocksjni/cassandra_compactionfilterjni.cc - rocksjni/cassandra_value_operator.cc - rocksjni/checkpoint.cc - rocksjni/clock_cache.cc - rocksjni/cache.cc - rocksjni/columnfamilyhandle.cc - rocksjni/compaction_filter.cc - rocksjni/compaction_filter_factory.cc - rocksjni/compaction_filter_factory_jnicallback.cc - rocksjni/compaction_job_info.cc - rocksjni/compaction_job_stats.cc - rocksjni/compaction_options.cc - rocksjni/compaction_options_fifo.cc - rocksjni/compaction_options_universal.cc - rocksjni/compact_range_options.cc - rocksjni/comparator.cc - rocksjni/comparatorjnicallback.cc - rocksjni/compression_options.cc - rocksjni/concurrent_task_limiter.cc - rocksjni/config_options.cc - rocksjni/env.cc - rocksjni/env_options.cc - rocksjni/event_listener.cc - rocksjni/event_listener_jnicallback.cc - rocksjni/filter.cc - rocksjni/ingest_external_file_options.cc - rocksjni/iterator.cc - rocksjni/jnicallback.cc - rocksjni/loggerjnicallback.cc - rocksjni/lru_cache.cc - rocksjni/memory_util.cc - rocksjni/memtablejni.cc - rocksjni/merge_operator.cc - rocksjni/native_comparator_wrapper_test.cc - rocksjni/optimistic_transaction_db.cc - rocksjni/optimistic_transaction_options.cc - rocksjni/options.cc - rocksjni/options_util.cc - rocksjni/persistent_cache.cc - rocksjni/ratelimiterjni.cc - rocksjni/remove_emptyvalue_compactionfilterjni.cc - rocksjni/restorejni.cc - rocksjni/rocks_callback_object.cc - rocksjni/rocksdb_exception_test.cc - rocksjni/rocksjni.cc - rocksjni/slice.cc - rocksjni/snapshot.cc - rocksjni/sst_file_manager.cc - rocksjni/sst_file_writerjni.cc - rocksjni/sst_file_readerjni.cc - rocksjni/sst_file_reader_iterator.cc - rocksjni/sst_partitioner.cc - rocksjni/statistics.cc - rocksjni/statisticsjni.cc - rocksjni/table.cc - rocksjni/table_filter.cc - rocksjni/table_filter_jnicallback.cc - rocksjni/testable_event_listener.cc - rocksjni/thread_status.cc - rocksjni/trace_writer.cc - rocksjni/trace_writer_jnicallback.cc - rocksjni/transaction.cc - rocksjni/transaction_db.cc - rocksjni/transaction_db_options.cc - rocksjni/transaction_log.cc - rocksjni/transaction_notifier.cc - rocksjni/transaction_notifier_jnicallback.cc - rocksjni/transaction_options.cc - rocksjni/ttl.cc - rocksjni/wal_filter.cc - rocksjni/wal_filter_jnicallback.cc - rocksjni/write_batch.cc - rocksjni/writebatchhandlerjnicallback.cc - rocksjni/write_batch_test.cc - rocksjni/write_batch_with_index.cc - rocksjni/write_buffer_manager.cc -) - -set(JAVA_MAIN_CLASSES - src/main/java/org/rocksdb/AbstractCompactionFilter.java - src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java - src/main/java/org/rocksdb/AbstractComparator.java - src/main/java/org/rocksdb/AbstractEventListener.java - src/main/java/org/rocksdb/AbstractImmutableNativeReference.java - src/main/java/org/rocksdb/AbstractMutableOptions.java - src/main/java/org/rocksdb/AbstractNativeReference.java - src/main/java/org/rocksdb/AbstractRocksIterator.java - src/main/java/org/rocksdb/AbstractSlice.java - src/main/java/org/rocksdb/AbstractTableFilter.java - src/main/java/org/rocksdb/AbstractTraceWriter.java - src/main/java/org/rocksdb/AbstractTransactionNotifier.java - src/main/java/org/rocksdb/AbstractWalFilter.java - src/main/java/org/rocksdb/AbstractWriteBatch.java - src/main/java/org/rocksdb/AccessHint.java - src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java - src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java - src/main/java/org/rocksdb/BackgroundErrorReason.java - src/main/java/org/rocksdb/BackupEngineOptions.java - src/main/java/org/rocksdb/BackupEngine.java - src/main/java/org/rocksdb/BackupInfo.java - src/main/java/org/rocksdb/BlockBasedTableConfig.java - src/main/java/org/rocksdb/BloomFilter.java - src/main/java/org/rocksdb/BuiltinComparator.java - src/main/java/org/rocksdb/ByteBufferGetStatus.java - src/main/java/org/rocksdb/Cache.java - src/main/java/org/rocksdb/CassandraCompactionFilter.java - src/main/java/org/rocksdb/CassandraValueMergeOperator.java - src/main/java/org/rocksdb/Checkpoint.java - src/main/java/org/rocksdb/ChecksumType.java - src/main/java/org/rocksdb/ClockCache.java - src/main/java/org/rocksdb/ColumnFamilyDescriptor.java - src/main/java/org/rocksdb/ColumnFamilyHandle.java - src/main/java/org/rocksdb/ColumnFamilyMetaData.java - src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java - src/main/java/org/rocksdb/ColumnFamilyOptions.java - src/main/java/org/rocksdb/CompactionJobInfo.java - src/main/java/org/rocksdb/CompactionJobStats.java - src/main/java/org/rocksdb/CompactionOptions.java - src/main/java/org/rocksdb/CompactionOptionsFIFO.java - src/main/java/org/rocksdb/CompactionOptionsUniversal.java - src/main/java/org/rocksdb/CompactionPriority.java - src/main/java/org/rocksdb/CompactionReason.java - src/main/java/org/rocksdb/CompactRangeOptions.java - src/main/java/org/rocksdb/CompactionStopStyle.java - src/main/java/org/rocksdb/CompactionStyle.java - src/main/java/org/rocksdb/ComparatorOptions.java - src/main/java/org/rocksdb/ComparatorType.java - src/main/java/org/rocksdb/CompressionOptions.java - src/main/java/org/rocksdb/CompressionType.java - src/main/java/org/rocksdb/ConfigOptions.java - src/main/java/org/rocksdb/DataBlockIndexType.java - src/main/java/org/rocksdb/DBOptionsInterface.java - src/main/java/org/rocksdb/DBOptions.java - src/main/java/org/rocksdb/DbPath.java - src/main/java/org/rocksdb/DirectSlice.java - src/main/java/org/rocksdb/EncodingType.java - src/main/java/org/rocksdb/Env.java - src/main/java/org/rocksdb/EnvOptions.java - src/main/java/org/rocksdb/EventListener.java - src/main/java/org/rocksdb/Experimental.java - src/main/java/org/rocksdb/ExternalFileIngestionInfo.java - src/main/java/org/rocksdb/Filter.java - src/main/java/org/rocksdb/FileOperationInfo.java - src/main/java/org/rocksdb/FlushJobInfo.java - src/main/java/org/rocksdb/FlushReason.java - src/main/java/org/rocksdb/FlushOptions.java - src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java - src/main/java/org/rocksdb/HashSkipListMemTableConfig.java - src/main/java/org/rocksdb/HistogramData.java - src/main/java/org/rocksdb/HistogramType.java - src/main/java/org/rocksdb/Holder.java - src/main/java/org/rocksdb/IndexShorteningMode.java - src/main/java/org/rocksdb/IndexType.java - src/main/java/org/rocksdb/InfoLogLevel.java - src/main/java/org/rocksdb/IngestExternalFileOptions.java - src/main/java/org/rocksdb/LevelMetaData.java - src/main/java/org/rocksdb/ConcurrentTaskLimiter.java - src/main/java/org/rocksdb/ConcurrentTaskLimiterImpl.java - src/main/java/org/rocksdb/KeyMayExist.java - src/main/java/org/rocksdb/LiveFileMetaData.java - src/main/java/org/rocksdb/LogFile.java - src/main/java/org/rocksdb/Logger.java - src/main/java/org/rocksdb/LRUCache.java - src/main/java/org/rocksdb/MemoryUsageType.java - src/main/java/org/rocksdb/MemoryUtil.java - src/main/java/org/rocksdb/MemTableConfig.java - src/main/java/org/rocksdb/MemTableInfo.java - src/main/java/org/rocksdb/MergeOperator.java - src/main/java/org/rocksdb/MutableColumnFamilyOptions.java - src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java - src/main/java/org/rocksdb/MutableDBOptions.java - src/main/java/org/rocksdb/MutableDBOptionsInterface.java - src/main/java/org/rocksdb/MutableOptionKey.java - src/main/java/org/rocksdb/MutableOptionValue.java - src/main/java/org/rocksdb/NativeComparatorWrapper.java - src/main/java/org/rocksdb/NativeLibraryLoader.java - src/main/java/org/rocksdb/OperationStage.java - src/main/java/org/rocksdb/OperationType.java - src/main/java/org/rocksdb/OptimisticTransactionDB.java - src/main/java/org/rocksdb/OptimisticTransactionOptions.java - src/main/java/org/rocksdb/Options.java - src/main/java/org/rocksdb/OptionString.java - src/main/java/org/rocksdb/OptionsUtil.java - src/main/java/org/rocksdb/PersistentCache.java - src/main/java/org/rocksdb/PlainTableConfig.java - src/main/java/org/rocksdb/PrepopulateBlobCache.java - src/main/java/org/rocksdb/Priority.java - src/main/java/org/rocksdb/Range.java - src/main/java/org/rocksdb/RateLimiter.java - src/main/java/org/rocksdb/RateLimiterMode.java - src/main/java/org/rocksdb/ReadOptions.java - src/main/java/org/rocksdb/ReadTier.java - src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java - src/main/java/org/rocksdb/RestoreOptions.java - src/main/java/org/rocksdb/ReusedSynchronisationType.java - src/main/java/org/rocksdb/RocksCallbackObject.java - src/main/java/org/rocksdb/RocksDBException.java - src/main/java/org/rocksdb/RocksDB.java - src/main/java/org/rocksdb/RocksEnv.java - src/main/java/org/rocksdb/RocksIteratorInterface.java - src/main/java/org/rocksdb/RocksIterator.java - src/main/java/org/rocksdb/RocksMemEnv.java - src/main/java/org/rocksdb/RocksMutableObject.java - src/main/java/org/rocksdb/RocksObject.java - src/main/java/org/rocksdb/SanityLevel.java - src/main/java/org/rocksdb/SizeApproximationFlag.java - src/main/java/org/rocksdb/SkipListMemTableConfig.java - src/main/java/org/rocksdb/Slice.java - src/main/java/org/rocksdb/Snapshot.java - src/main/java/org/rocksdb/SstFileManager.java - src/main/java/org/rocksdb/SstFileMetaData.java - src/main/java/org/rocksdb/SstFileReader.java - src/main/java/org/rocksdb/SstFileReaderIterator.java - src/main/java/org/rocksdb/SstFileWriter.java - src/main/java/org/rocksdb/SstPartitionerFactory.java - src/main/java/org/rocksdb/SstPartitionerFixedPrefixFactory.java - src/main/java/org/rocksdb/StateType.java - src/main/java/org/rocksdb/StatisticsCollectorCallback.java - src/main/java/org/rocksdb/StatisticsCollector.java - src/main/java/org/rocksdb/Statistics.java - src/main/java/org/rocksdb/StatsCollectorInput.java - src/main/java/org/rocksdb/StatsLevel.java - src/main/java/org/rocksdb/Status.java - src/main/java/org/rocksdb/StringAppendOperator.java - src/main/java/org/rocksdb/TableFileCreationBriefInfo.java - src/main/java/org/rocksdb/TableFileCreationInfo.java - src/main/java/org/rocksdb/TableFileCreationReason.java - src/main/java/org/rocksdb/TableFileDeletionInfo.java - src/main/java/org/rocksdb/TableFilter.java - src/main/java/org/rocksdb/TableProperties.java - src/main/java/org/rocksdb/TableFormatConfig.java - src/main/java/org/rocksdb/ThreadType.java - src/main/java/org/rocksdb/ThreadStatus.java - src/main/java/org/rocksdb/TickerType.java - src/main/java/org/rocksdb/TimedEnv.java - src/main/java/org/rocksdb/TraceOptions.java - src/main/java/org/rocksdb/TraceWriter.java - src/main/java/org/rocksdb/TransactionalDB.java - src/main/java/org/rocksdb/TransactionalOptions.java - src/main/java/org/rocksdb/TransactionDB.java - src/main/java/org/rocksdb/TransactionDBOptions.java - src/main/java/org/rocksdb/Transaction.java - src/main/java/org/rocksdb/TransactionLogIterator.java - src/main/java/org/rocksdb/TransactionOptions.java - src/main/java/org/rocksdb/TtlDB.java - src/main/java/org/rocksdb/TxnDBWritePolicy.java - src/main/java/org/rocksdb/VectorMemTableConfig.java - src/main/java/org/rocksdb/WalFileType.java - src/main/java/org/rocksdb/WalFilter.java - src/main/java/org/rocksdb/WalProcessingOption.java - src/main/java/org/rocksdb/WALRecoveryMode.java - src/main/java/org/rocksdb/WBWIRocksIterator.java - src/main/java/org/rocksdb/WriteBatch.java - src/main/java/org/rocksdb/WriteBatchInterface.java - src/main/java/org/rocksdb/WriteBatchWithIndex.java - src/main/java/org/rocksdb/WriteOptions.java - src/main/java/org/rocksdb/WriteBufferManager.java - src/main/java/org/rocksdb/WriteStallCondition.java - src/main/java/org/rocksdb/WriteStallInfo.java - src/main/java/org/rocksdb/util/ByteUtil.java - src/main/java/org/rocksdb/util/BytewiseComparator.java - src/main/java/org/rocksdb/util/Environment.java - src/main/java/org/rocksdb/util/IntComparator.java - src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java - src/main/java/org/rocksdb/util/SizeUnit.java - src/main/java/org/rocksdb/UInt64AddOperator.java -) - -set(JAVA_TEST_CLASSES - src/test/java/org/rocksdb/BackupEngineTest.java - src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java - src/test/java/org/rocksdb/NativeComparatorWrapperTest.java - src/test/java/org/rocksdb/PlatformRandomHelper.java - src/test/java/org/rocksdb/RocksDBExceptionTest.java - src/test/java/org/rocksdb/RocksNativeLibraryResource.java - src/test/java/org/rocksdb/SnapshotTest.java - src/test/java/org/rocksdb/WriteBatchTest.java - src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java - src/test/java/org/rocksdb/util/WriteBatchGetter.java - src/test/java/org/rocksdb/test/TestableEventListener.java -) - -include(FindJava) -include(UseJava) -find_package(JNI) - -include_directories(${JNI_INCLUDE_DIRS}) -include_directories(${PROJECT_SOURCE_DIR}/java) - -set(JAVA_TEST_LIBDIR ${PROJECT_SOURCE_DIR}/java/test-libs) -set(JAVA_TMP_JAR ${JAVA_TEST_LIBDIR}/tmp.jar) -set(JAVA_JUNIT_JAR ${JAVA_TEST_LIBDIR}/junit-4.12.jar) -set(JAVA_HAMCR_JAR ${JAVA_TEST_LIBDIR}/hamcrest-core-1.3.jar) -set(JAVA_MOCKITO_JAR ${JAVA_TEST_LIBDIR}/mockito-all-1.10.19.jar) -set(JAVA_CGLIB_JAR ${JAVA_TEST_LIBDIR}/cglib-2.2.2.jar) -set(JAVA_ASSERTJ_JAR ${JAVA_TEST_LIBDIR}/assertj-core-1.7.1.jar) -set(JAVA_TESTCLASSPATH ${JAVA_JUNIT_JAR} ${JAVA_HAMCR_JAR} ${JAVA_MOCKITO_JAR} ${JAVA_CGLIB_JAR} ${JAVA_ASSERTJ_JAR}) - -set(JNI_OUTPUT_DIR ${PROJECT_SOURCE_DIR}/java/include) -file(MAKE_DIRECTORY ${JNI_OUTPUT_DIR}) - -if(${Java_VERSION_MINOR} VERSION_LESS_EQUAL "7" AND ${Java_VERSION_MAJOR} STREQUAL "1") - message(FATAL_ERROR "Detected Java 7 or older (${Java_VERSION_STRING}), minimum required version in now Java 8") -endif() - -if(${Java_VERSION_MAJOR} VERSION_GREATER_EQUAL "10" AND ${CMAKE_VERSION} VERSION_LESS "3.11.4") - # Java 10 and newer don't have javah, but the alternative GENERATE_NATIVE_HEADERS requires CMake 3.11.4 or newer - message(FATAL_ERROR "Detected Java 10 or newer (${Java_VERSION_STRING}), to build with CMake please upgrade CMake to 3.11.4 or newer") - -elseif(${CMAKE_VERSION} VERSION_LESS "3.11.4") - # Old CMake - message("Using an old CMAKE (${CMAKE_VERSION}) - JNI headers generated in separate step") - add_jar( - rocksdbjni_classes - SOURCES - ${JAVA_MAIN_CLASSES} - ${JAVA_TEST_CLASSES} - INCLUDE_JARS ${JAVA_TESTCLASSPATH} - ) - -else () - # Java 1.8 or newer prepare the JAR... - message("Preparing Jar for JDK ${Java_VERSION_STRING}") - add_jar( - rocksdbjni_classes - SOURCES - ${JAVA_MAIN_CLASSES} - ${JAVA_TEST_CLASSES} - INCLUDE_JARS ${JAVA_TESTCLASSPATH} - GENERATE_NATIVE_HEADERS rocksdbjni_headers DESTINATION ${JNI_OUTPUT_DIR} - ) - -endif() - -if(NOT EXISTS ${PROJECT_SOURCE_DIR}/java/classes) - file(MAKE_DIRECTORY ${PROJECT_SOURCE_DIR}/java/classes) -endif() - -if(NOT EXISTS ${JAVA_TEST_LIBDIR}) - file(MAKE_DIRECTORY mkdir ${JAVA_TEST_LIBDIR}) -endif() - -if (DEFINED CUSTOM_DEPS_URL) - set(DEPS_URL ${CUSTOM_DEPS_URL}/) -else () - # Using a Facebook AWS account for S3 storage. (maven.org has a history - # of failing in Travis builds.) - set(DEPS_URL "https://rocksdb-deps.s3-us-west-2.amazonaws.com/jars") -endif() - -if(NOT EXISTS ${JAVA_JUNIT_JAR}) - message("Downloading ${JAVA_JUNIT_JAR}") - file(DOWNLOAD ${DEPS_URL}/junit-4.12.jar ${JAVA_TMP_JAR} STATUS downloadStatus) - list(GET downloadStatus 0 error_code) - list(GET downloadStatus 1 error_message) - if(NOT error_code EQUAL 0) - message(FATAL_ERROR "Failed downloading ${JAVA_JUNIT_JAR}: ${error_message}") - endif() - file(RENAME ${JAVA_TMP_JAR} ${JAVA_JUNIT_JAR}) -endif() -if(NOT EXISTS ${JAVA_HAMCR_JAR}) - message("Downloading ${JAVA_HAMCR_JAR}") - file(DOWNLOAD ${DEPS_URL}/hamcrest-core-1.3.jar ${JAVA_TMP_JAR} STATUS downloadStatus) - list(GET downloadStatus 0 error_code) - list(GET downloadStatus 1 error_message) - if(NOT error_code EQUAL 0) - message(FATAL_ERROR "Failed downloading ${JAVA_HAMCR_JAR}: ${error_message}") - endif() - file(RENAME ${JAVA_TMP_JAR} ${JAVA_HAMCR_JAR}) -endif() -if(NOT EXISTS ${JAVA_MOCKITO_JAR}) - message("Downloading ${JAVA_MOCKITO_JAR}") - file(DOWNLOAD ${DEPS_URL}/mockito-all-1.10.19.jar ${JAVA_TMP_JAR} STATUS downloadStatus) - list(GET downloadStatus 0 error_code) - list(GET downloadStatus 1 error_message) - if(NOT error_code EQUAL 0) - message(FATAL_ERROR "Failed downloading ${JAVA_MOCKITO_JAR}: ${error_message}") - endif() - file(RENAME ${JAVA_TMP_JAR} ${JAVA_MOCKITO_JAR}) -endif() -if(NOT EXISTS ${JAVA_CGLIB_JAR}) - message("Downloading ${JAVA_CGLIB_JAR}") - file(DOWNLOAD ${DEPS_URL}/cglib-2.2.2.jar ${JAVA_TMP_JAR} STATUS downloadStatus) - list(GET downloadStatus 0 error_code) - list(GET downloadStatus 1 error_message) - if(NOT error_code EQUAL 0) - message(FATAL_ERROR "Failed downloading ${JAVA_CGLIB_JAR}: ${error_message}") - endif() - file(RENAME ${JAVA_TMP_JAR} ${JAVA_CGLIB_JAR}) -endif() -if(NOT EXISTS ${JAVA_ASSERTJ_JAR}) - message("Downloading ${JAVA_ASSERTJ_JAR}") - file(DOWNLOAD ${DEPS_URL}/assertj-core-1.7.1.jar ${JAVA_TMP_JAR} STATUS downloadStatus) - list(GET downloadStatus 0 error_code) - list(GET downloadStatus 1 error_message) - if(NOT error_code EQUAL 0) - message(FATAL_ERROR "Failed downloading ${JAVA_ASSERTJ_JAR}: ${error_message}") - endif() - file(RENAME ${JAVA_TMP_JAR} ${JAVA_ASSERTJ_JAR}) -endif() - -if(${CMAKE_VERSION} VERSION_LESS "3.11.4") - # Old CMake ONLY generate JNI headers, otherwise JNI is handled in add_jar step above - message("Preparing JNI headers for old CMake (${CMAKE_VERSION})") - set(NATIVE_JAVA_CLASSES - org.rocksdb.AbstractCompactionFilter - org.rocksdb.AbstractCompactionFilterFactory - org.rocksdb.AbstractComparator - org.rocksdb.AbstractEventListener - org.rocksdb.AbstractImmutableNativeReference - org.rocksdb.AbstractNativeReference - org.rocksdb.AbstractRocksIterator - org.rocksdb.AbstractSlice - org.rocksdb.AbstractTableFilter - org.rocksdb.AbstractTraceWriter - org.rocksdb.AbstractTransactionNotifier - org.rocksdb.AbstractWalFilter - org.rocksdb.BackupEngineOptions - org.rocksdb.BackupEngine - org.rocksdb.BlockBasedTableConfig - org.rocksdb.BloomFilter - org.rocksdb.CassandraCompactionFilter - org.rocksdb.CassandraValueMergeOperator - org.rocksdb.Checkpoint - org.rocksdb.ClockCache - org.rocksdb.Cache - org.rocksdb.ColumnFamilyHandle - org.rocksdb.ColumnFamilyOptions - org.rocksdb.CompactionJobInfo - org.rocksdb.CompactionJobStats - org.rocksdb.CompactionOptions - org.rocksdb.CompactionOptionsFIFO - org.rocksdb.CompactionOptionsUniversal - org.rocksdb.CompactRangeOptions - org.rocksdb.ComparatorOptions - org.rocksdb.CompressionOptions - org.rocksdb.ConcurrentTaskLimiterImpl - org.rocksdb.ConfigOptions - org.rocksdb.DBOptions - org.rocksdb.DirectSlice - org.rocksdb.Env - org.rocksdb.EnvOptions - org.rocksdb.Filter - org.rocksdb.FlushOptions - org.rocksdb.HashLinkedListMemTableConfig - org.rocksdb.HashSkipListMemTableConfig - org.rocksdb.IngestExternalFileOptions - org.rocksdb.Logger - org.rocksdb.LRUCache - org.rocksdb.MemoryUtil - org.rocksdb.MemTableConfig - org.rocksdb.NativeComparatorWrapper - org.rocksdb.NativeLibraryLoader - org.rocksdb.OptimisticTransactionDB - org.rocksdb.OptimisticTransactionOptions - org.rocksdb.Options - org.rocksdb.OptionsUtil - org.rocksdb.PersistentCache - org.rocksdb.PlainTableConfig - org.rocksdb.RateLimiter - org.rocksdb.ReadOptions - org.rocksdb.RemoveEmptyValueCompactionFilter - org.rocksdb.RestoreOptions - org.rocksdb.RocksCallbackObject - org.rocksdb.RocksDB - org.rocksdb.RocksEnv - org.rocksdb.RocksIterator - org.rocksdb.RocksIteratorInterface - org.rocksdb.RocksMemEnv - org.rocksdb.RocksMutableObject - org.rocksdb.RocksObject - org.rocksdb.SkipListMemTableConfig - org.rocksdb.Slice - org.rocksdb.Snapshot - org.rocksdb.SstFileManager - org.rocksdb.SstFileWriter - org.rocksdb.SstFileReader - org.rocksdb.SstFileReaderIterator - org.rocksdb.SstPartitionerFactory - org.rocksdb.SstPartitionerFixedPrefixFactory - org.rocksdb.Statistics - org.rocksdb.StringAppendOperator - org.rocksdb.TableFormatConfig - org.rocksdb.ThreadStatus - org.rocksdb.TimedEnv - org.rocksdb.Transaction - org.rocksdb.TransactionDB - org.rocksdb.TransactionDBOptions - org.rocksdb.TransactionLogIterator - org.rocksdb.TransactionOptions - org.rocksdb.TtlDB - org.rocksdb.UInt64AddOperator - org.rocksdb.VectorMemTableConfig - org.rocksdb.WBWIRocksIterator - org.rocksdb.WriteBatch - org.rocksdb.WriteBatch.Handler - org.rocksdb.WriteBatchInterface - org.rocksdb.WriteBatchWithIndex - org.rocksdb.WriteOptions - org.rocksdb.NativeComparatorWrapperTest - org.rocksdb.RocksDBExceptionTest - org.rocksdb.SnapshotTest - org.rocksdb.WriteBatchTest - org.rocksdb.WriteBatchTestInternalHelper - org.rocksdb.WriteBufferManager - org.rocksdb.test.TestableEventListener - ) - - create_javah( - TARGET rocksdbjni_headers - CLASSES ${NATIVE_JAVA_CLASSES} - CLASSPATH rocksdbjni_classes ${JAVA_TESTCLASSPATH} - OUTPUT_DIR ${JNI_OUTPUT_DIR} - ) -endif() - -if(NOT MSVC) - set_property(TARGET ${ROCKSDB_STATIC_LIB} PROPERTY POSITION_INDEPENDENT_CODE ON) -endif() - -set(ROCKSDBJNI_STATIC_LIB rocksdbjni${ARTIFACT_SUFFIX}) -add_library(${ROCKSDBJNI_STATIC_LIB} ${JNI_NATIVE_SOURCES}) -add_dependencies(${ROCKSDBJNI_STATIC_LIB} rocksdbjni_headers) -target_link_libraries(${ROCKSDBJNI_STATIC_LIB} ${ROCKSDB_STATIC_LIB} ${ROCKSDB_LIB}) - -if(NOT MINGW) - set(ROCKSDBJNI_SHARED_LIB rocksdbjni-shared${ARTIFACT_SUFFIX}) - add_library(${ROCKSDBJNI_SHARED_LIB} SHARED ${JNI_NATIVE_SOURCES}) - add_dependencies(${ROCKSDBJNI_SHARED_LIB} rocksdbjni_headers) - target_link_libraries(${ROCKSDBJNI_SHARED_LIB} ${ROCKSDB_STATIC_LIB} ${ROCKSDB_LIB}) - - set_target_properties( - ${ROCKSDBJNI_SHARED_LIB} - PROPERTIES - COMPILE_PDB_OUTPUT_DIRECTORY ${CMAKE_CFG_INTDIR} - COMPILE_PDB_NAME ${ROCKSDBJNI_STATIC_LIB}.pdb - ) -endif() diff --git a/java/GetBenchmarks.md b/java/GetBenchmarks.md deleted file mode 100644 index b66a897e2..000000000 --- a/java/GetBenchmarks.md +++ /dev/null @@ -1,161 +0,0 @@ -# RocksDB Get Performance Benchmarks - -Results associated with [Improve Java API `get()` performance by reducing copies](https://github.com/facebook/rocksdb/pull/10970) - -## Build/Run - -Mac -``` -make clean jclean -DEBUG_LEVEL=0 make -j12 rocksdbjava -(cd java/target; cp rocksdbjni-7.9.0-osx.jar rocksdbjni-7.9.0-SNAPSHOT-osx.jar) -mvn install:install-file -Dfile=./java/target/rocksdbjni-7.9.0-SNAPSHOT-osx.jar -DgroupId=org.rocksdb -DartifactId=rocksdbjni -Dversion=7.9.0-SNAPSHOT -Dpackaging=jar -``` - -Linux -``` -make clean jclean -DEBUG_LEVEL=0 make -j12 rocksdbjava -(cd java/target; cp rocksdbjni-7.9.0-linux64.jar rocksdbjni-7.9.0-SNAPSHOT-linux64.jar) -mvn install:install-file -Dfile=./java/target/rocksdbjni-7.9.0-SNAPSHOT-linux64.jar -DgroupId=org.rocksdb -DartifactId=rocksdbjni -Dversion=7.9.0-SNAPSHOT -Dpackaging=jar -``` - -Build jmh test package, on either platform -``` -pushd java/jmh -mvn clean package -``` - -A quick test run, just as a sanity check, using a small number of keys, would be -``` -java -jar target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar -p keyCount=1000 -p keySize=128 -p valueSize=32768 -p columnFamilyTestType="no_column_family" GetBenchmarks -``` -The long performance run (as big as we can make it on our Ubuntu box without filling the disk) -``` -java -jar target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar -p keyCount=1000,50000 -p keySize=128 -p valueSize=1024,16384 -p columnFamilyTestType="1_column_family","20_column_families" GetBenchmarks.get GetBenchmarks.preallocatedByteBufferGet GetBenchmarks.preallocatedGet -``` - -## Results (small runs, Mac) - -These are run on a 10-core M1 with 64GB of memory and 2TB of SSD. -They probably reflect the absolute best case for this optimization, hitting in-memory buffers and completely eliminating a buffer copy. - -### Before -Benchmark (columnFamilyTestType) (keyCount) (keySize) (multiGetSize) (valueSize) Mode Cnt Score Error Units -GetBenchmarks.get no_column_family 1000 128 N/A 32768 thrpt 25 43496.578 ± 5743.090 ops/s -GetBenchmarks.preallocatedByteBufferGet no_column_family 1000 128 N/A 32768 thrpt 25 70765.578 ± 697.548 ops/s -GetBenchmarks.preallocatedGet no_column_family 1000 128 N/A 32768 thrpt 25 69883.554 ± 944.184 ops/s - -### After fixing byte[] (.get and .preallocatedGet) - -Benchmark (columnFamilyTestType) (keyCount) (keySize) (multiGetSize) (valueSize) Mode Cnt Score Error Units -GetBenchmarks.get no_column_family 1000 128 N/A 32768 thrpt 25 149207.681 ± 2261.671 ops/s -GetBenchmarks.preallocatedByteBufferGet no_column_family 1000 128 N/A 32768 thrpt 25 68920.489 ± 1574.664 ops/s -GetBenchmarks.preallocatedGet no_column_family 1000 128 N/A 32768 thrpt 25 177399.022 ± 2107.375 ops/s - -### After fixing ByteBuffer (.preallocatedByteBufferGet) - -Benchmark (columnFamilyTestType) (keyCount) (keySize) (multiGetSize) (valueSize) Mode Cnt Score Error Units -GetBenchmarks.get no_column_family 1000 128 N/A 32768 thrpt 25 150389.259 ± 1371.473 ops/s -GetBenchmarks.preallocatedByteBufferGet no_column_family 1000 128 N/A 32768 thrpt 25 179919.468 ± 1670.714 ops/s -GetBenchmarks.preallocatedGet no_column_family 1000 128 N/A 32768 thrpt 25 178261.938 ± 2630.571 ops/s -## Results (Ubuntu, big runs) -These take 3-4 hours -``` -java -jar target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar -p keyCount=1000,50000 -p keySize=128 -p valueSize=1024,16384 -p columnFamilyTestType="1_column_family","20_column_families" GetBenchmarks.get GetBenchmarks.preallocatedByteBufferGet GetBenchmarks.preallocatedGet -``` -It's clear that all `get()` variants have noticeably improved performance, though not the spectacular gains of the M1. -### With fixes for all of the `get()` instances - -Benchmark (columnFamilyTestType) (keyCount) (keySize) (valueSize) Mode Cnt Score Error Units -GetBenchmarks.get 1_column_family 1000 128 1024 thrpt 25 935648.793 ± 22879.910 ops/s -GetBenchmarks.get 1_column_family 1000 128 16384 thrpt 25 204366.301 ± 1326.570 ops/s -GetBenchmarks.get 1_column_family 50000 128 1024 thrpt 25 693451.990 ± 19822.720 ops/s -GetBenchmarks.get 1_column_family 50000 128 16384 thrpt 25 50473.768 ± 497.335 ops/s -GetBenchmarks.get 20_column_families 1000 128 1024 thrpt 25 550118.874 ± 14289.009 ops/s -GetBenchmarks.get 20_column_families 1000 128 16384 thrpt 25 120545.549 ± 648.280 ops/s -GetBenchmarks.get 20_column_families 50000 128 1024 thrpt 25 235671.353 ± 2231.195 ops/s -GetBenchmarks.get 20_column_families 50000 128 16384 thrpt 25 12463.887 ± 1950.746 ops/s -GetBenchmarks.preallocatedByteBufferGet 1_column_family 1000 128 1024 thrpt 25 1196026.040 ± 35435.729 ops/s -GetBenchmarks.preallocatedByteBufferGet 1_column_family 1000 128 16384 thrpt 25 403252.655 ± 3287.054 ops/s -GetBenchmarks.preallocatedByteBufferGet 1_column_family 50000 128 1024 thrpt 25 829965.448 ± 16945.452 ops/s -GetBenchmarks.preallocatedByteBufferGet 1_column_family 50000 128 16384 thrpt 25 63798.042 ± 1292.858 ops/s -GetBenchmarks.preallocatedByteBufferGet 20_column_families 1000 128 1024 thrpt 25 724557.253 ± 12710.828 ops/s -GetBenchmarks.preallocatedByteBufferGet 20_column_families 1000 128 16384 thrpt 25 176846.615 ± 1121.644 ops/s -GetBenchmarks.preallocatedByteBufferGet 20_column_families 50000 128 1024 thrpt 25 263553.764 ± 1304.243 ops/s -GetBenchmarks.preallocatedByteBufferGet 20_column_families 50000 128 16384 thrpt 25 14721.693 ± 2574.240 ops/s -GetBenchmarks.preallocatedGet 1_column_family 1000 128 1024 thrpt 25 1093947.765 ± 42846.276 ops/s -GetBenchmarks.preallocatedGet 1_column_family 1000 128 16384 thrpt 25 391629.913 ± 4039.965 ops/s -GetBenchmarks.preallocatedGet 1_column_family 50000 128 1024 thrpt 25 769332.958 ± 24180.749 ops/s -GetBenchmarks.preallocatedGet 1_column_family 50000 128 16384 thrpt 25 61712.038 ± 423.494 ops/s -GetBenchmarks.preallocatedGet 20_column_families 1000 128 1024 thrpt 25 694684.465 ± 5484.205 ops/s -GetBenchmarks.preallocatedGet 20_column_families 1000 128 16384 thrpt 25 172383.593 ± 841.679 ops/s -GetBenchmarks.preallocatedGet 20_column_families 50000 128 1024 thrpt 25 257447.351 ± 1388.667 ops/s -GetBenchmarks.preallocatedGet 20_column_families 50000 128 16384 thrpt 25 13418.522 ± 2418.619 ops/s - -### Baseline (no fixes) - -Benchmark (columnFamilyTestType) (keyCount) (keySize) (valueSize) Mode Cnt Score Error Units -GetBenchmarks.get 1_column_family 1000 128 1024 thrpt 25 866745.224 ± 8834.629 ops/s -GetBenchmarks.get 1_column_family 1000 128 16384 thrpt 25 184332.195 ± 2304.217 ops/s -GetBenchmarks.get 1_column_family 50000 128 1024 thrpt 25 666794.288 ± 16150.684 ops/s -GetBenchmarks.get 1_column_family 50000 128 16384 thrpt 25 47221.788 ± 433.165 ops/s -GetBenchmarks.get 20_column_families 1000 128 1024 thrpt 25 551513.636 ± 7763.681 ops/s -GetBenchmarks.get 20_column_families 1000 128 16384 thrpt 25 113117.720 ± 580.738 ops/s -GetBenchmarks.get 20_column_families 50000 128 1024 thrpt 25 238675.555 ± 1758.978 ops/s -GetBenchmarks.get 20_column_families 50000 128 16384 thrpt 25 11639.390 ± 1459.765 ops/s -GetBenchmarks.preallocatedByteBufferGet 1_column_family 1000 128 1024 thrpt 25 1153617.917 ± 26350.028 ops/s -GetBenchmarks.preallocatedByteBufferGet 1_column_family 1000 128 16384 thrpt 25 401710.334 ± 4324.539 ops/s -GetBenchmarks.preallocatedByteBufferGet 1_column_family 50000 128 1024 thrpt 25 809384.073 ± 13833.871 ops/s -GetBenchmarks.preallocatedByteBufferGet 1_column_family 50000 128 16384 thrpt 25 59279.005 ± 443.207 ops/s -GetBenchmarks.preallocatedByteBufferGet 20_column_families 1000 128 1024 thrpt 25 715466.403 ± 6591.375 ops/s -GetBenchmarks.preallocatedByteBufferGet 20_column_families 1000 128 16384 thrpt 25 175279.163 ± 910.923 ops/s -GetBenchmarks.preallocatedByteBufferGet 20_column_families 50000 128 1024 thrpt 25 263295.180 ± 856.456 ops/s -GetBenchmarks.preallocatedByteBufferGet 20_column_families 50000 128 16384 thrpt 25 14001.928 ± 2462.067 ops/s -GetBenchmarks.preallocatedGet 1_column_family 1000 128 1024 thrpt 25 1072866.854 ± 27030.592 ops/s -GetBenchmarks.preallocatedGet 1_column_family 1000 128 16384 thrpt 25 383950.853 ± 4510.654 ops/s -GetBenchmarks.preallocatedGet 1_column_family 50000 128 1024 thrpt 25 764395.469 ± 10097.417 ops/s -GetBenchmarks.preallocatedGet 1_column_family 50000 128 16384 thrpt 25 56851.330 ± 388.029 ops/s -GetBenchmarks.preallocatedGet 20_column_families 1000 128 1024 thrpt 25 668518.593 ± 9764.117 ops/s -GetBenchmarks.preallocatedGet 20_column_families 1000 128 16384 thrpt 25 171309.695 ± 875.895 ops/s -GetBenchmarks.preallocatedGet 20_column_families 50000 128 1024 thrpt 25 256057.801 ± 954.621 ops/s -GetBenchmarks.preallocatedGet 20_column_families 50000 128 16384 thrpt 25 13319.380 ± 2126.654 ops/s - -### Comparison - -It does at least look best when the data is cached. That is to say, smallest number of column families, and least keys. - -GetBenchmarks.get 1_column_family 1000 128 16384 thrpt 25 204366.301 ± 1326.570 ops/s -GetBenchmarks.get 1_column_family 1000 128 16384 thrpt 25 184332.195 ± 2304.217 ops/s - -GetBenchmarks.get 1_column_family 50000 128 16384 thrpt 25 50473.768 ± 497.335 ops/s -GetBenchmarks.get 1_column_family 50000 128 16384 thrpt 25 47221.788 ± 433.165 ops/s - -GetBenchmarks.get 20_column_families 1000 128 16384 thrpt 25 120545.549 ± 648.280 ops/s -GetBenchmarks.get 20_column_families 1000 128 16384 thrpt 25 113117.720 ± 580.738 ops/s - -GetBenchmarks.get 20_column_families 50000 128 16384 thrpt 25 12463.887 ± 1950.746 ops/s -GetBenchmarks.get 20_column_families 50000 128 16384 thrpt 25 11639.390 ± 1459.765 ops/s - -### Baseline -25 minute run, small number of keys -``` -java -jar target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar -p keyCount=1000 -p keySize=128 -p valueSize=32768 -p columnFamilyTestType="no_column_families" GetBenchmarks.get GetBenchmarks.preallocatedByteBufferGet GetBenchmarks.preallocatedGet -``` - -Benchmark (columnFamilyTestType) (keyCount) (keySize) (valueSize) Mode Cnt Score Error Units -GetBenchmarks.get no_column_families 1000 128 32768 thrpt 25 32344.908 ± 296.651 ops/s -GetBenchmarks.preallocatedByteBufferGet no_column_families 1000 128 32768 thrpt 25 45266.968 ± 424.514 ops/s -GetBenchmarks.preallocatedGet no_column_families 1000 128 32768 thrpt 25 43531.088 ± 291.785 ops/s - -### Optimized - -Benchmark (columnFamilyTestType) (keyCount) (keySize) (valueSize) Mode Cnt Score Error Units -GetBenchmarks.get no_column_families 1000 128 32768 thrpt 25 37463.716 ± 235.744 ops/s -GetBenchmarks.preallocatedByteBufferGet no_column_families 1000 128 32768 thrpt 25 48946.105 ± 466.463 ops/s -GetBenchmarks.preallocatedGet no_column_families 1000 128 32768 thrpt 25 47143.624 ± 576.763 ops/s - -## Conclusion - -The performance improvement is real. - diff --git a/java/HISTORY-JAVA.md b/java/HISTORY-JAVA.md deleted file mode 100644 index 731886a61..000000000 --- a/java/HISTORY-JAVA.md +++ /dev/null @@ -1,86 +0,0 @@ -# RocksJava Change Log - -## 3.13 (8/4/2015) -### New Features -* Exposed BackupEngine API. -* Added CappedPrefixExtractor support. To use such extractor, simply call useCappedPrefixExtractor in either Options or ColumnFamilyOptions. -* Added RemoveEmptyValueCompactionFilter. - -## 3.10.0 (3/24/2015) -### New Features -* Added compression per level API. -* MemEnv is now available in RocksJava via RocksMemEnv class. -* lz4 compression is now included in rocksjava static library when running `make rocksdbjavastatic`. - -### Public API Changes -* Overflowing a size_t when setting rocksdb options now throws an IllegalArgumentException, which removes the necessity for a developer to catch these Exceptions explicitly. -* The set and get functions for tableCacheRemoveScanCountLimit are deprecated. - - -## By 01/31/2015 -### New Features -* WriteBatchWithIndex support. -* Iterator support for WriteBatch and WriteBatchWithIndex -* GetUpdatesSince support. -* Snapshots carry now information about the related sequence number. -* TTL DB support. - -## By 11/14/2014 -### New Features -* Full support for Column Family. -* Slice and Comparator support. -* Default merge operator support. -* RateLimiter support. - -## By 06/15/2014 -### New Features -* Added basic Java binding for rocksdb::Env such that multiple RocksDB can share the same thread pool and environment. -* Added RestoreBackupableDB - -## By 05/30/2014 -### Internal Framework Improvement -* Added disOwnNativeHandle to RocksObject, which allows a RocksObject to give-up the ownership of its native handle. This method is useful when sharing and transferring the ownership of RocksDB C++ resources. - -## By 05/15/2014 -### New Features -* Added RocksObject --- the base class of all RocksDB classes which holds some RocksDB resources in the C++ side. -* Use environmental variable JAVA_HOME in Makefile for RocksJava -### Public API changes -* Renamed org.rocksdb.Iterator to org.rocksdb.RocksIterator to avoid potential confliction with Java built-in Iterator. - -## By 04/30/2014 -### New Features -* Added Java binding for MultiGet. -* Added static method RocksDB.loadLibrary(), which loads necessary library files. -* Added Java bindings for 60+ rocksdb::Options. -* Added Java binding for BloomFilter. -* Added Java binding for ReadOptions. -* Added Java binding for memtables. -* Added Java binding for sst formats. -* Added Java binding for RocksDB Iterator which enables sequential scan operation. -* Added Java binding for Statistics -* Added Java binding for BackupableDB. - -### DB Benchmark -* Added filluniquerandom, readseq benchmark. -* 70+ command-line options. -* Enabled BloomFilter configuration. - -## By 04/15/2014 -### New Features -* Added Java binding for WriteOptions. -* Added Java binding for WriteBatch, which enables batch-write. -* Added Java binding for rocksdb::Options. -* Added Java binding for block cache. -* Added Java version DB Benchmark. - -### DB Benchmark -* Added readwhilewriting benchmark. - -### Internal Framework Improvement -* Avoid a potential byte-array-copy between c++ and Java in RocksDB.get. -* Added SizeUnit in org.rocksdb.util to store consts like KB and GB. - -### 03/28/2014 -* RocksJava project started. -* Added Java binding for RocksDB, which supports Open, Close, Get and Put. diff --git a/java/Makefile b/java/Makefile deleted file mode 100644 index 7d2695af8..000000000 --- a/java/Makefile +++ /dev/null @@ -1,453 +0,0 @@ -NATIVE_JAVA_CLASSES = \ - org.rocksdb.AbstractCompactionFilter\ - org.rocksdb.AbstractCompactionFilterFactory\ - org.rocksdb.AbstractComparator\ - org.rocksdb.AbstractEventListener\ - org.rocksdb.AbstractSlice\ - org.rocksdb.AbstractTableFilter\ - org.rocksdb.AbstractTraceWriter\ - org.rocksdb.AbstractTransactionNotifier\ - org.rocksdb.AbstractWalFilter\ - org.rocksdb.BackupEngine\ - org.rocksdb.BackupEngineOptions\ - org.rocksdb.BlockBasedTableConfig\ - org.rocksdb.BloomFilter\ - org.rocksdb.Checkpoint\ - org.rocksdb.ClockCache\ - org.rocksdb.Cache\ - org.rocksdb.CassandraCompactionFilter\ - org.rocksdb.CassandraValueMergeOperator\ - org.rocksdb.ColumnFamilyHandle\ - org.rocksdb.ColumnFamilyOptions\ - org.rocksdb.CompactionJobInfo\ - org.rocksdb.CompactionJobStats\ - org.rocksdb.CompactionOptions\ - org.rocksdb.CompactionOptionsFIFO\ - org.rocksdb.CompactionOptionsUniversal\ - org.rocksdb.CompactRangeOptions\ - org.rocksdb.ComparatorOptions\ - org.rocksdb.CompressionOptions\ - org.rocksdb.ConfigOptions\ - org.rocksdb.DBOptions\ - org.rocksdb.DirectSlice\ - org.rocksdb.Env\ - org.rocksdb.EnvOptions\ - org.rocksdb.FlushOptions\ - org.rocksdb.Filter\ - org.rocksdb.IngestExternalFileOptions\ - org.rocksdb.HashLinkedListMemTableConfig\ - org.rocksdb.HashSkipListMemTableConfig\ - org.rocksdb.ConcurrentTaskLimiter\ - org.rocksdb.ConcurrentTaskLimiterImpl\ - org.rocksdb.KeyMayExist\ - org.rocksdb.Logger\ - org.rocksdb.LRUCache\ - org.rocksdb.MemoryUsageType\ - org.rocksdb.MemoryUtil\ - org.rocksdb.MergeOperator\ - org.rocksdb.NativeComparatorWrapper\ - org.rocksdb.OptimisticTransactionDB\ - org.rocksdb.OptimisticTransactionOptions\ - org.rocksdb.Options\ - org.rocksdb.OptionsUtil\ - org.rocksdb.PersistentCache\ - org.rocksdb.PlainTableConfig\ - org.rocksdb.RateLimiter\ - org.rocksdb.ReadOptions\ - org.rocksdb.RemoveEmptyValueCompactionFilter\ - org.rocksdb.RestoreOptions\ - org.rocksdb.RocksCallbackObject\ - org.rocksdb.RocksDB\ - org.rocksdb.RocksEnv\ - org.rocksdb.RocksIterator\ - org.rocksdb.RocksMemEnv\ - org.rocksdb.SkipListMemTableConfig\ - org.rocksdb.Slice\ - org.rocksdb.SstFileManager\ - org.rocksdb.SstFileWriter\ - org.rocksdb.SstFileReader\ - org.rocksdb.SstFileReaderIterator\ - org.rocksdb.SstPartitionerFactory\ - org.rocksdb.SstPartitionerFixedPrefixFactory\ - org.rocksdb.Statistics\ - org.rocksdb.ThreadStatus\ - org.rocksdb.TimedEnv\ - org.rocksdb.Transaction\ - org.rocksdb.TransactionDB\ - org.rocksdb.TransactionDBOptions\ - org.rocksdb.TransactionOptions\ - org.rocksdb.TransactionLogIterator\ - org.rocksdb.TtlDB\ - org.rocksdb.VectorMemTableConfig\ - org.rocksdb.Snapshot\ - org.rocksdb.StringAppendOperator\ - org.rocksdb.UInt64AddOperator\ - org.rocksdb.WriteBatch\ - org.rocksdb.WriteBatch.Handler\ - org.rocksdb.WriteOptions\ - org.rocksdb.WriteBatchWithIndex\ - org.rocksdb.WriteBufferManager\ - org.rocksdb.WBWIRocksIterator - -NATIVE_JAVA_TEST_CLASSES = \ - org.rocksdb.RocksDBExceptionTest\ - org.rocksdb.test.TestableEventListener\ - org.rocksdb.NativeComparatorWrapperTest.NativeStringComparatorWrapper\ - org.rocksdb.WriteBatchTest\ - org.rocksdb.WriteBatchTestInternalHelper - -ROCKSDB_MAJOR = $(shell grep -E "ROCKSDB_MAJOR.[0-9]" ../include/rocksdb/version.h | cut -d ' ' -f 3) -ROCKSDB_MINOR = $(shell grep -E "ROCKSDB_MINOR.[0-9]" ../include/rocksdb/version.h | cut -d ' ' -f 3) -ROCKSDB_PATCH = $(shell grep -E "ROCKSDB_PATCH.[0-9]" ../include/rocksdb/version.h | cut -d ' ' -f 3) - -NATIVE_INCLUDE = ./include -ARCH := $(shell getconf LONG_BIT) -SHA256_CMD ?= sha256sum - -JAVA_TESTS = \ - org.rocksdb.BackupEngineOptionsTest\ - org.rocksdb.BackupEngineTest\ - org.rocksdb.BlobOptionsTest\ - org.rocksdb.BlockBasedTableConfigTest\ - org.rocksdb.BuiltinComparatorTest\ - org.rocksdb.ByteBufferUnsupportedOperationTest\ - org.rocksdb.BytewiseComparatorRegressionTest\ - org.rocksdb.util.BytewiseComparatorTest\ - org.rocksdb.util.BytewiseComparatorIntTest\ - org.rocksdb.CheckPointTest\ - org.rocksdb.ClockCacheTest\ - org.rocksdb.ColumnFamilyOptionsTest\ - org.rocksdb.ColumnFamilyTest\ - org.rocksdb.CompactionFilterFactoryTest\ - org.rocksdb.CompactionJobInfoTest\ - org.rocksdb.CompactionJobStatsTest\ - org.rocksdb.CompactionOptionsTest\ - org.rocksdb.CompactionOptionsFIFOTest\ - org.rocksdb.CompactionOptionsUniversalTest\ - org.rocksdb.CompactionPriorityTest\ - org.rocksdb.CompactionStopStyleTest\ - org.rocksdb.ComparatorOptionsTest\ - org.rocksdb.CompressionOptionsTest\ - org.rocksdb.CompressionTypesTest\ - org.rocksdb.DBOptionsTest\ - org.rocksdb.DirectSliceTest\ - org.rocksdb.util.EnvironmentTest\ - org.rocksdb.EnvOptionsTest\ - org.rocksdb.EventListenerTest\ - org.rocksdb.IngestExternalFileOptionsTest\ - org.rocksdb.util.IntComparatorTest\ - org.rocksdb.util.JNIComparatorTest\ - org.rocksdb.FilterTest\ - org.rocksdb.FlushTest\ - org.rocksdb.InfoLogLevelTest\ - org.rocksdb.KeyMayExistTest\ - org.rocksdb.ConcurrentTaskLimiterTest\ - org.rocksdb.LoggerTest\ - org.rocksdb.LRUCacheTest\ - org.rocksdb.MemoryUtilTest\ - org.rocksdb.MemTableTest\ - org.rocksdb.MergeTest\ - org.rocksdb.MultiColumnRegressionTest \ - org.rocksdb.MultiGetManyKeysTest\ - org.rocksdb.MultiGetTest\ - org.rocksdb.MixedOptionsTest\ - org.rocksdb.MutableColumnFamilyOptionsTest\ - org.rocksdb.MutableDBOptionsTest\ - org.rocksdb.MutableOptionsGetSetTest \ - org.rocksdb.NativeComparatorWrapperTest\ - org.rocksdb.NativeLibraryLoaderTest\ - org.rocksdb.OptimisticTransactionTest\ - org.rocksdb.OptimisticTransactionDBTest\ - org.rocksdb.OptimisticTransactionOptionsTest\ - org.rocksdb.OptionsUtilTest\ - org.rocksdb.OptionsTest\ - org.rocksdb.PlainTableConfigTest\ - org.rocksdb.RateLimiterTest\ - org.rocksdb.ReadOnlyTest\ - org.rocksdb.ReadOptionsTest\ - org.rocksdb.util.ReverseBytewiseComparatorIntTest\ - org.rocksdb.RocksDBTest\ - org.rocksdb.RocksDBExceptionTest\ - org.rocksdb.DefaultEnvTest\ - org.rocksdb.RocksIteratorTest\ - org.rocksdb.RocksMemEnvTest\ - org.rocksdb.util.SizeUnitTest\ - org.rocksdb.SecondaryDBTest\ - org.rocksdb.SliceTest\ - org.rocksdb.SnapshotTest\ - org.rocksdb.SstFileManagerTest\ - org.rocksdb.SstFileWriterTest\ - org.rocksdb.SstFileReaderTest\ - org.rocksdb.SstPartitionerTest\ - org.rocksdb.TableFilterTest\ - org.rocksdb.TimedEnvTest\ - org.rocksdb.TransactionTest\ - org.rocksdb.TransactionDBTest\ - org.rocksdb.TransactionOptionsTest\ - org.rocksdb.TransactionDBOptionsTest\ - org.rocksdb.TransactionLogIteratorTest\ - org.rocksdb.TtlDBTest\ - org.rocksdb.StatisticsTest\ - org.rocksdb.StatisticsCollectorTest\ - org.rocksdb.VerifyChecksumsTest\ - org.rocksdb.WalFilterTest\ - org.rocksdb.WALRecoveryModeTest\ - org.rocksdb.WriteBatchHandlerTest\ - org.rocksdb.WriteBatchTest\ - org.rocksdb.WriteBatchThreadedTest\ - org.rocksdb.WriteOptionsTest\ - org.rocksdb.WriteBatchWithIndexTest - -MAIN_SRC = src/main/java -TEST_SRC = src/test/java -OUTPUT = target -MAIN_CLASSES = $(OUTPUT)/classes -TEST_CLASSES = $(OUTPUT)/test-classes -JAVADOC = $(OUTPUT)/apidocs - -BENCHMARK_MAIN_SRC = benchmark/src/main/java -BENCHMARK_OUTPUT = benchmark/target -BENCHMARK_MAIN_CLASSES = $(BENCHMARK_OUTPUT)/classes - -SAMPLES_MAIN_SRC = samples/src/main/java -SAMPLES_OUTPUT = samples/target -SAMPLES_MAIN_CLASSES = $(SAMPLES_OUTPUT)/classes - -JAVA_TEST_LIBDIR = test-libs -JAVA_JUNIT_VER = 4.13.1 -JAVA_JUNIT_SHA256 = c30719db974d6452793fe191b3638a5777005485bae145924044530ffa5f6122 -JAVA_JUNIT_JAR = junit-$(JAVA_JUNIT_VER).jar -JAVA_JUNIT_JAR_PATH = $(JAVA_TEST_LIBDIR)/$(JAVA_JUNIT_JAR) -JAVA_HAMCREST_VER = 2.2 -JAVA_HAMCREST_SHA256 = 5e62846a89f05cd78cd9c1a553f340d002458380c320455dd1f8fc5497a8a1c1 -JAVA_HAMCREST_JAR = hamcrest-$(JAVA_HAMCREST_VER).jar -JAVA_HAMCREST_JAR_PATH = $(JAVA_TEST_LIBDIR)/$(JAVA_HAMCREST_JAR) -JAVA_MOCKITO_VER = 1.10.19 -JAVA_MOCKITO_SHA256 = d1a7a7ef14b3db5c0fc3e0a63a81b374b510afe85add9f7984b97911f4c70605 -JAVA_MOCKITO_JAR = mockito-all-$(JAVA_MOCKITO_VER).jar -JAVA_MOCKITO_JAR_PATH = $(JAVA_TEST_LIBDIR)/$(JAVA_MOCKITO_JAR) -JAVA_CGLIB_VER = 3.3.0 -JAVA_CGLIB_SHA256 = 9fe0c26d7464140ccdfe019ac687be1fb906122b508ab54beb810db0f09a9212 -JAVA_CGLIB_JAR = cglib-$(JAVA_CGLIB_VER).jar -JAVA_CGLIB_JAR_PATH = $(JAVA_TEST_LIBDIR)/$(JAVA_CGLIB_JAR) -JAVA_ASSERTJ_VER = 2.9.0 -JAVA_ASSERTJ_SHA256 = 5e88ea3ecbe3c48aa1346fec76c84979fa9c8d22499f11479011691230e8babf -JAVA_ASSERTJ_JAR = assertj-core-$(JAVA_ASSERTJ_VER).jar -JAVA_ASSERTJ_JAR_PATH = $(JAVA_TEST_LIBDIR)/$(JAVA_ASSERTJ_JAR) -JAVA_TESTCLASSPATH = $(JAVA_JUNIT_JAR_PATH):$(JAVA_HAMCREST_JAR_PATH):$(JAVA_MOCKITO_JAR_PATH):$(JAVA_CGLIB_JAR_PATH):$(JAVA_ASSERTJ_JAR_PATH) - -MVN_LOCAL = ~/.m2/repository - -# Set the path of the java commands -ifeq ($(JAVA_CMD),) -ifneq ($(JAVA_HOME),) -JAVA_CMD := $(JAVA_HOME)/bin/java -else -JAVA_CMD := java -endif -endif - -ifeq ($(JAVAC_CMD),) -ifneq ($(JAVA_HOME),) -JAVAC_CMD := $(JAVA_HOME)/bin/javac -else -JAVAC_CMD := javac -endif -endif - -ifeq ($(JAVADOC_CMD),) -ifneq ($(JAVA_HOME),) -JAVADOC_CMD := $(JAVA_HOME)/bin/javadoc -else -JAVADOC_CMD := javadoc -endif -endif - -# Look for the Java version (1.6->6, 1.7->7, 1.8->8, 11.0->11, 13.0->13, 15.0->15 etc..) -JAVAC_VERSION := $(shell $(JAVAC_CMD) -version 2>&1) -JAVAC_MAJOR_VERSION := $(word 2,$(subst ., ,$(JAVAC_VERSION))) -ifeq ($(JAVAC_MAJOR_VERSION),1) -JAVAC_MAJOR_VERSION := $(word 3,$(subst ., ,$(JAVAC_VERSION))) -endif - -# Test whether the version we see meets our minimum -MIN_JAVAC_MAJOR_VERSION := 8 -JAVAC_VERSION_GE_MIN := $(shell [ $(JAVAC_MAJOR_VERSION) -ge $(MIN_JAVAC_MAJOR_VERSION) ] > /dev/null 2>&1 && echo true) - -# Set the default JAVA_ARGS to "" for DEBUG_LEVEL=0 -JAVA_ARGS ?= - -JAVAC_ARGS ?= - -# Read plugin configuration -PLUGIN_PATH = ../plugin -ROCKSDB_PLUGIN_MKS = $(foreach plugin, $(ROCKSDB_PLUGINS), $(PLUGIN_PATH)/$(plugin)/*.mk) -include $(ROCKSDB_PLUGIN_MKS) - -# Add paths to Java sources in plugins -ROCKSDB_PLUGIN_JAVA_ROOTS = $(foreach plugin, $(ROCKSDB_PLUGINS), $(PLUGIN_PATH)/$(plugin)/java) -PLUGIN_SOURCES = $(foreach root, $(ROCKSDB_PLUGIN_JAVA_ROOTS), $(foreach pkg, org/rocksdb/util org/rocksdb, $(root)/$(MAIN_SRC)/$(pkg)/*.java)) -CORE_SOURCES = $(foreach pkg, org/rocksdb/util org/rocksdb, $(MAIN_SRC)/$(pkg)/*.java) -SOURCES = $(wildcard $(CORE_SOURCES) $(PLUGIN_SOURCES)) -PLUGIN_TEST_SOURCES = $(foreach root, $(ROCKSDB_PLUGIN_JAVA_ROOTS), $(foreach pkg, org/rocksdb/test org/rocksdb/util org/rocksdb, $(root)/$(TEST_SRC)/$(pkg)/*.java)) -CORE_TEST_SOURCES = $(foreach pkg, org/rocksdb/test org/rocksdb/util org/rocksdb, $(TEST_SRC)/$(pkg)/*.java) -TEST_SOURCES = $(wildcard $(CORE_TEST_SOURCES) $(PLUGIN_TEST_SOURCES)) - -# Configure the plugin tests and java classes -ROCKSDB_PLUGIN_NATIVE_JAVA_CLASSES = $(foreach plugin, $(ROCKSDB_PLUGINS), $(foreach class, $($(plugin)_NATIVE_JAVA_CLASSES), $(class))) -NATIVE_JAVA_CLASSES = $(NATIVE_JAVA_CLASSES) $(ROCKSDB_PLUGIN_NATIVE_JAVA_CLASSES) -ROCKSDB_PLUGIN_JAVA_TESTS = $(foreach plugin, $(ROCKSDB_PLUGINS), $(foreach testclass, $($(plugin)_JAVA_TESTS), $(testclass))) -ALL_JAVA_TESTS = $(JAVA_TESTS) $(ROCKSDB_PLUGIN_JAVA_TESTS) - -# When debugging add -Xcheck:jni to the java args -ifneq ($(DEBUG_LEVEL),0) - JAVA_ARGS += -ea -Xcheck:jni - JAVAC_ARGS += -Xlint:deprecation -Xlint:unchecked -endif - -# Using a Facebook AWS account for S3 storage. (maven.org has a history -# of failing in Travis builds.) -DEPS_URL?=https://rocksdb-deps.s3-us-west-2.amazonaws.com/jars - -java-version: -ifneq ($(JAVAC_VERSION_GE_MIN),true) - echo 'Java version is $(JAVAC_VERSION), minimum required version is $(MIN_JAVAC_MAJOR_VERSION)' - exit 1 -endif - -clean: clean-not-downloaded clean-downloaded - -clean-not-downloaded: - $(AM_V_at)rm -rf $(NATIVE_INCLUDE) - $(AM_V_at)rm -rf $(OUTPUT) - $(AM_V_at)rm -rf $(BENCHMARK_OUTPUT) - $(AM_V_at)rm -rf $(SAMPLES_OUTPUT) - -clean-downloaded: - $(AM_V_at)rm -rf $(JAVA_TEST_LIBDIR) - - -javadocs: java - $(AM_V_GEN)mkdir -p $(JAVADOC) - $(AM_V_at)$(JAVADOC_CMD) -d $(JAVADOC) -sourcepath $(MAIN_SRC) -subpackages org - -javalib: java java_test javadocs - -java: java-version - $(AM_V_GEN)mkdir -p $(MAIN_CLASSES) - $(AM_V_at) $(JAVAC_CMD) $(JAVAC_ARGS) -h $(NATIVE_INCLUDE) -d $(MAIN_CLASSES) $(SOURCES) - $(AM_V_at)@cp ../HISTORY.md ./HISTORY-CPP.md - $(AM_V_at)@rm -f ./HISTORY-CPP.md - -sample: java - $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) - $(AM_V_at)$(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/RocksDBSample.java - $(AM_V_at)@rm -rf /tmp/rocksdbjni - $(AM_V_at)@rm -rf /tmp/rocksdbjni_not_found - $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) RocksDBSample /tmp/rocksdbjni - $(AM_V_at)@rm -rf /tmp/rocksdbjni - $(AM_V_at)@rm -rf /tmp/rocksdbjni_not_found - -column_family_sample: java - $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) - $(AM_V_at)$(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/RocksDBColumnFamilySample.java - $(AM_V_at)@rm -rf /tmp/rocksdbjni - $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) RocksDBColumnFamilySample /tmp/rocksdbjni - $(AM_V_at)@rm -rf /tmp/rocksdbjni - -transaction_sample: java - $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) - $(AM_V_at)$(JAVAC_CMD) -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/TransactionSample.java - $(AM_V_at)@rm -rf /tmp/rocksdbjni - $(JAVA_CMD) -ea -Xcheck:jni -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) TransactionSample /tmp/rocksdbjni - $(AM_V_at)@rm -rf /tmp/rocksdbjni - -optimistic_transaction_sample: java - $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) - $(AM_V_at)$(JAVAC_CMD) -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/OptimisticTransactionSample.java - $(AM_V_at)@rm -rf /tmp/rocksdbjni - $(JAVA_CMD) -ea -Xcheck:jni -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) OptimisticTransactionSample /tmp/rocksdbjni - $(AM_V_at)@rm -rf /tmp/rocksdbjni - -$(JAVA_TEST_LIBDIR): - mkdir -p "$(JAVA_TEST_LIBDIR)" - -$(JAVA_JUNIT_JAR_PATH): $(JAVA_TEST_LIBDIR) -ifneq (,$(wildcard $(MVN_LOCAL)/junit/junit/$(JAVA_JUNIT_VER)/$(JAVA_JUNIT_JAR))) - cp -v $(MVN_LOCAL)/junit/junit/$(JAVA_JUNIT_VER)/$(JAVA_JUNIT_JAR) $(JAVA_TEST_LIBDIR) -else - curl --fail --insecure --output $(JAVA_JUNIT_JAR_PATH) --location $(DEPS_URL)/$(JAVA_JUNIT_JAR) - JAVA_JUNIT_SHA256_ACTUAL=`$(SHA256_CMD) $(JAVA_JUNIT_JAR_PATH) | cut -d ' ' -f 1`; \ - if [ "$(JAVA_JUNIT_SHA256)" != "$$JAVA_JUNIT_SHA256_ACTUAL" ]; then \ - echo $(JAVA_JUNIT_JAR_PATH) checksum mismatch, expected=\"$(JAVA_JUNIT_SHA256)\" actual=\"$$JAVA_JUNIT_SHA256_ACTUAL\"; \ - exit 1; \ - fi -endif - -$(JAVA_HAMCREST_JAR_PATH): $(JAVA_TEST_LIBDIR) -ifneq (,$(wildcard $(MVN_LOCAL)/org/hamcrest/hamcrest/$(JAVA_HAMCREST_VER)/$(JAVA_HAMCREST_JAR))) - cp -v $(MVN_LOCAL)/org/hamcrest/hamcrest/$(JAVA_HAMCREST_VER)/$(JAVA_HAMCREST_JAR) $(JAVA_TEST_LIBDIR) -else - curl --fail --insecure --output $(JAVA_HAMCREST_JAR_PATH) --location $(DEPS_URL)/$(JAVA_HAMCREST_JAR) - JAVA_HAMCREST_SHA256_ACTUAL=`$(SHA256_CMD) $(JAVA_HAMCREST_JAR_PATH) | cut -d ' ' -f 1`; \ - if [ "$(JAVA_HAMCREST_SHA256)" != "$$JAVA_HAMCREST_SHA256_ACTUAL" ]; then \ - echo $(JAVA_HAMCREST_JAR_PATH) checksum mismatch, expected=\"$(JAVA_HAMCREST_SHA256)\" actual=\"$$JAVA_HAMCREST_SHA256_ACTUAL\"; \ - exit 1; \ - fi -endif - -$(JAVA_MOCKITO_JAR_PATH): $(JAVA_TEST_LIBDIR) -ifneq (,$(wildcard $(MVN_LOCAL)/org/mockito/mockito-all/$(JAVA_MOCKITO_VER)/$(JAVA_MOCKITO_JAR))) - cp -v $(MVN_LOCAL)/org/mockito/mockito-all/$(JAVA_MOCKITO_VER)/$(JAVA_MOCKITO_JAR) $(JAVA_TEST_LIBDIR) -else - curl --fail --insecure --output "$(JAVA_MOCKITO_JAR_PATH)" --location $(DEPS_URL)/$(JAVA_MOCKITO_JAR) - JAVA_MOCKITO_SHA256_ACTUAL=`$(SHA256_CMD) $(JAVA_MOCKITO_JAR_PATH) | cut -d ' ' -f 1`; \ - if [ "$(JAVA_MOCKITO_SHA256)" != "$$JAVA_MOCKITO_SHA256_ACTUAL" ]; then \ - echo $(JAVA_MOCKITO_JAR_PATH) checksum mismatch, expected=\"$(JAVA_MOCKITO_SHA256)\" actual=\"$$JAVA_MOCKITO_SHA256_ACTUAL\"; \ - exit 1; \ - fi -endif - -$(JAVA_CGLIB_JAR_PATH): $(JAVA_TEST_LIBDIR) -ifneq (,$(wildcard $(MVN_LOCAL)/cglib/cglib/$(JAVA_CGLIB_VER)/$(JAVA_CGLIB_JAR))) - cp -v $(MVN_LOCAL)/cglib/cglib/$(JAVA_CGLIB_VER)/$(JAVA_CGLIB_JAR) $(JAVA_TEST_LIBDIR) -else - curl --fail --insecure --output "$(JAVA_CGLIB_JAR_PATH)" --location $(DEPS_URL)/$(JAVA_CGLIB_JAR) - JAVA_CGLIB_SHA256_ACTUAL=`$(SHA256_CMD) $(JAVA_CGLIB_JAR_PATH) | cut -d ' ' -f 1`; \ - if [ "$(JAVA_CGLIB_SHA256)" != "$$JAVA_CGLIB_SHA256_ACTUAL" ]; then \ - echo $(JAVA_CGLIB_JAR_PATH) checksum mismatch, expected=\"$(JAVA_CGLIB_SHA256)\" actual=\"$$JAVA_CGLIB_SHA256_ACTUAL\"; \ - exit 1; \ - fi -endif - -$(JAVA_ASSERTJ_JAR_PATH): $(JAVA_TEST_LIBDIR) -ifneq (,$(wildcard $(MVN_LOCAL)/org/assertj/assertj-core/$(JAVA_ASSERTJ_VER)/$(JAVA_ASSERTJ_JAR))) - cp -v $(MVN_LOCAL)/org/assertj/assertj-core/$(JAVA_ASSERTJ_VER)/$(JAVA_ASSERTJ_JAR) $(JAVA_TEST_LIBDIR) -else - curl --fail --insecure --output "$(JAVA_ASSERTJ_JAR_PATH)" --location $(DEPS_URL)/$(JAVA_ASSERTJ_JAR) - JAVA_ASSERTJ_SHA256_ACTUAL=`$(SHA256_CMD) $(JAVA_ASSERTJ_JAR_PATH) | cut -d ' ' -f 1`; \ - if [ "$(JAVA_ASSERTJ_SHA256)" != "$$JAVA_ASSERTJ_SHA256_ACTUAL" ]; then \ - echo $(JAVA_ASSERTJ_JAR_PATH) checksum mismatch, expected=\"$(JAVA_ASSERTJ_SHA256)\" actual=\"$$JAVA_ASSERTJ_SHA256_ACTUAL\"; \ - exit 1; \ - fi -endif - -resolve_test_deps: $(JAVA_JUNIT_JAR_PATH) $(JAVA_HAMCREST_JAR_PATH) $(JAVA_MOCKITO_JAR_PATH) $(JAVA_CGLIB_JAR_PATH) $(JAVA_ASSERTJ_JAR_PATH) - -java_test: java resolve_test_deps - $(AM_V_GEN)mkdir -p $(TEST_CLASSES) - $(AM_V_at) $(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES):$(JAVA_TESTCLASSPATH) -h $(NATIVE_INCLUDE) -d $(TEST_CLASSES)\ - $(TEST_SOURCES) - -test: java java_test - $(MAKE) run_test - -run_test: - $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.rocksdb.test.RocksJunitRunner $(ALL_JAVA_TESTS) - -run_plugin_test: - $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.rocksdb.test.RocksJunitRunner $(ROCKSDB_PLUGIN_JAVA_TESTS) - -db_bench: java - $(AM_V_GEN)mkdir -p $(BENCHMARK_MAIN_CLASSES) - $(AM_V_at)$(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES) -d $(BENCHMARK_MAIN_CLASSES) $(BENCHMARK_MAIN_SRC)/org/rocksdb/benchmark/*.java diff --git a/java/RELEASE.md b/java/RELEASE.md deleted file mode 100644 index dda19455f..000000000 --- a/java/RELEASE.md +++ /dev/null @@ -1,59 +0,0 @@ -## Cross-building - -RocksDB can be built as a single self contained cross-platform JAR. The cross-platform jar can be used on any 64-bit OSX system, 32-bit Linux system, or 64-bit Linux system. - -Building a cross-platform JAR requires: - - * [Docker](https://www.docker.com/docker-community) - * A Mac OSX machine that can compile RocksDB. - * Java 7 set as JAVA_HOME. - -Once you have these items, run this make command from RocksDB's root source directory: - - make jclean clean rocksdbjavastaticreleasedocker - -This command will build RocksDB natively on OSX, and will then spin up docker containers to build RocksDB for 32-bit and 64-bit Linux with glibc, and 32-bit and 64-bit Linux with musl libc. - -You can find all native binaries and JARs in the java/target directory upon completion: - - librocksdbjni-linux32.so - librocksdbjni-linux64.so - librocksdbjni-linux64-musl.so - librocksdbjni-linux32-musl.so - librocksdbjni-osx.jnilib - rocksdbjni-x.y.z-javadoc.jar - rocksdbjni-x.y.z-linux32.jar - rocksdbjni-x.y.z-linux64.jar - rocksdbjni-x.y.z-linux64-musl.jar - rocksdbjni-x.y.z-linux32-musl.jar - rocksdbjni-x.y.z-osx.jar - rocksdbjni-x.y.z-sources.jar - rocksdbjni-x.y.z.jar - -Where x.y.z is the built version number of RocksDB. - -## Maven publication - -Set ~/.m2/settings.xml to contain: - - - - - sonatype-nexus-staging - your-sonatype-jira-username - your-sonatype-jira-password - - - - -From RocksDB's root directory, first build the Java static JARs: - - make jclean clean rocksdbjavastaticpublish - -This command will [stage the JAR artifacts on the Sonatype staging repository](http://central.sonatype.org/pages/manual-staging-bundle-creation-and-deployment.html). To release the staged artifacts. - -1. Go to [https://oss.sonatype.org/#stagingRepositories](https://oss.sonatype.org/#stagingRepositories) and search for "rocksdb" in the upper right hand search box. -2. Select the rocksdb staging repository, and inspect its contents. -3. If all is well, follow [these steps](https://oss.sonatype.org/#stagingRepositories) to close the repository and release it. - -After the release has occurred, the artifacts will be synced to Maven central within 24-48 hours. diff --git a/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java b/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java deleted file mode 100644 index 070f0fe75..000000000 --- a/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java +++ /dev/null @@ -1,1640 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -/** - * Copyright (C) 2011 the original author or authors. - * See the notice.md file distributed with this work for additional - * information regarding copyright ownership. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.rocksdb.benchmark; - -import java.io.IOException; -import java.lang.Runnable; -import java.lang.Math; -import java.io.File; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; -import java.nio.ByteBuffer; -import java.nio.file.Files; -import java.util.Collection; -import java.util.Date; -import java.util.EnumMap; -import java.util.List; -import java.util.Map; -import java.util.Random; -import java.util.concurrent.TimeUnit; -import java.util.Arrays; -import java.util.ArrayList; -import java.util.concurrent.Callable; -import java.util.concurrent.Executors; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import org.rocksdb.*; -import org.rocksdb.RocksMemEnv; -import org.rocksdb.util.SizeUnit; - -class Stats { - int id_; - long start_; - long finish_; - double seconds_; - long done_; - long found_; - long lastOpTime_; - long nextReport_; - long bytes_; - StringBuilder message_; - boolean excludeFromMerge_; - - // TODO(yhchiang): use the following arguments: - // (Long)Flag.stats_interval - // (Integer)Flag.stats_per_interval - - Stats(int id) { - id_ = id; - nextReport_ = 100; - done_ = 0; - bytes_ = 0; - seconds_ = 0; - start_ = System.nanoTime(); - lastOpTime_ = start_; - finish_ = start_; - found_ = 0; - message_ = new StringBuilder(""); - excludeFromMerge_ = false; - } - - void merge(final Stats other) { - if (other.excludeFromMerge_) { - return; - } - - done_ += other.done_; - found_ += other.found_; - bytes_ += other.bytes_; - seconds_ += other.seconds_; - if (other.start_ < start_) start_ = other.start_; - if (other.finish_ > finish_) finish_ = other.finish_; - - // Just keep the messages from one thread - if (message_.length() == 0) { - message_ = other.message_; - } - } - - void stop() { - finish_ = System.nanoTime(); - seconds_ = (double) (finish_ - start_) * 1e-9; - } - - void addMessage(String msg) { - if (message_.length() > 0) { - message_.append(" "); - } - message_.append(msg); - } - - void setId(int id) { id_ = id; } - void setExcludeFromMerge() { excludeFromMerge_ = true; } - - void finishedSingleOp(int bytes) { - done_++; - lastOpTime_ = System.nanoTime(); - bytes_ += bytes; - if (done_ >= nextReport_) { - if (nextReport_ < 1000) { - nextReport_ += 100; - } else if (nextReport_ < 5000) { - nextReport_ += 500; - } else if (nextReport_ < 10000) { - nextReport_ += 1000; - } else if (nextReport_ < 50000) { - nextReport_ += 5000; - } else if (nextReport_ < 100000) { - nextReport_ += 10000; - } else if (nextReport_ < 500000) { - nextReport_ += 50000; - } else { - nextReport_ += 100000; - } - System.err.printf("... Task %s finished %d ops%30s\r", id_, done_, ""); - } - } - - void report(String name) { - // Pretend at least one op was done in case we are running a benchmark - // that does not call FinishedSingleOp(). - if (done_ < 1) done_ = 1; - - StringBuilder extra = new StringBuilder(""); - if (bytes_ > 0) { - // Rate is computed on actual elapsed time, not the sum of per-thread - // elapsed times. - double elapsed = (finish_ - start_) * 1e-9; - extra.append(String.format("%6.1f MB/s", (bytes_ / 1048576.0) / elapsed)); - } - extra.append(message_.toString()); - double elapsed = (finish_ - start_); - double throughput = (double) done_ / (elapsed * 1e-9); - - System.out.format("%-12s : %11.3f micros/op %d ops/sec;%s%s\n", - name, (elapsed * 1e-6) / done_, - (long) throughput, (extra.length() == 0 ? "" : " "), extra.toString()); - } -} - -public class DbBenchmark { - enum Order { - SEQUENTIAL, - RANDOM - } - - enum DBState { - FRESH, - EXISTING - } - - static { - RocksDB.loadLibrary(); - } - - abstract class BenchmarkTask implements Callable { - // TODO(yhchiang): use (Integer)Flag.perf_level. - public BenchmarkTask( - int tid, long randSeed, long numEntries, long keyRange) { - tid_ = tid; - rand_ = new Random(randSeed + tid * 1000); - numEntries_ = numEntries; - keyRange_ = keyRange; - stats_ = new Stats(tid); - } - - @Override public Stats call() throws RocksDBException { - stats_.start_ = System.nanoTime(); - runTask(); - stats_.finish_ = System.nanoTime(); - return stats_; - } - - abstract protected void runTask() throws RocksDBException; - - protected int tid_; - protected Random rand_; - protected long numEntries_; - protected long keyRange_; - protected Stats stats_; - - protected void getFixedKey(byte[] key, long sn) { - generateKeyFromLong(key, sn); - } - - protected void getRandomKey(byte[] key, long range) { - generateKeyFromLong(key, Math.abs(rand_.nextLong() % range)); - } - } - - abstract class WriteTask extends BenchmarkTask { - public WriteTask( - int tid, long randSeed, long numEntries, long keyRange, - WriteOptions writeOpt, long entriesPerBatch) { - super(tid, randSeed, numEntries, keyRange); - writeOpt_ = writeOpt; - entriesPerBatch_ = entriesPerBatch; - maxWritesPerSecond_ = -1; - } - - public WriteTask( - int tid, long randSeed, long numEntries, long keyRange, - WriteOptions writeOpt, long entriesPerBatch, long maxWritesPerSecond) { - super(tid, randSeed, numEntries, keyRange); - writeOpt_ = writeOpt; - entriesPerBatch_ = entriesPerBatch; - maxWritesPerSecond_ = maxWritesPerSecond; - } - - @Override public void runTask() throws RocksDBException { - if (numEntries_ != DbBenchmark.this.num_) { - stats_.message_.append(String.format(" (%d ops)", numEntries_)); - } - byte[] key = new byte[keySize_]; - byte[] value = new byte[valueSize_]; - - try { - if (entriesPerBatch_ == 1) { - for (long i = 0; i < numEntries_; ++i) { - getKey(key, i, keyRange_); - DbBenchmark.this.gen_.generate(value); - db_.put(writeOpt_, key, value); - stats_.finishedSingleOp(keySize_ + valueSize_); - writeRateControl(i); - if (isFinished()) { - return; - } - } - } else { - for (long i = 0; i < numEntries_; i += entriesPerBatch_) { - WriteBatch batch = new WriteBatch(); - for (long j = 0; j < entriesPerBatch_; j++) { - getKey(key, i + j, keyRange_); - DbBenchmark.this.gen_.generate(value); - batch.put(key, value); - stats_.finishedSingleOp(keySize_ + valueSize_); - } - db_.write(writeOpt_, batch); - batch.dispose(); - writeRateControl(i); - if (isFinished()) { - return; - } - } - } - } catch (InterruptedException e) { - // thread has been terminated. - } - } - - protected void writeRateControl(long writeCount) - throws InterruptedException { - if (maxWritesPerSecond_ <= 0) return; - long minInterval = - writeCount * TimeUnit.SECONDS.toNanos(1) / maxWritesPerSecond_; - long interval = System.nanoTime() - stats_.start_; - if (minInterval - interval > TimeUnit.MILLISECONDS.toNanos(1)) { - TimeUnit.NANOSECONDS.sleep(minInterval - interval); - } - } - - abstract protected void getKey(byte[] key, long id, long range); - protected WriteOptions writeOpt_; - protected long entriesPerBatch_; - protected long maxWritesPerSecond_; - } - - class WriteSequentialTask extends WriteTask { - public WriteSequentialTask( - int tid, long randSeed, long numEntries, long keyRange, - WriteOptions writeOpt, long entriesPerBatch) { - super(tid, randSeed, numEntries, keyRange, - writeOpt, entriesPerBatch); - } - public WriteSequentialTask( - int tid, long randSeed, long numEntries, long keyRange, - WriteOptions writeOpt, long entriesPerBatch, - long maxWritesPerSecond) { - super(tid, randSeed, numEntries, keyRange, - writeOpt, entriesPerBatch, - maxWritesPerSecond); - } - @Override protected void getKey(byte[] key, long id, long range) { - getFixedKey(key, id); - } - } - - class WriteRandomTask extends WriteTask { - public WriteRandomTask( - int tid, long randSeed, long numEntries, long keyRange, - WriteOptions writeOpt, long entriesPerBatch) { - super(tid, randSeed, numEntries, keyRange, - writeOpt, entriesPerBatch); - } - public WriteRandomTask( - int tid, long randSeed, long numEntries, long keyRange, - WriteOptions writeOpt, long entriesPerBatch, - long maxWritesPerSecond) { - super(tid, randSeed, numEntries, keyRange, - writeOpt, entriesPerBatch, - maxWritesPerSecond); - } - @Override protected void getKey(byte[] key, long id, long range) { - getRandomKey(key, range); - } - } - - class WriteUniqueRandomTask extends WriteTask { - static final int MAX_BUFFER_SIZE = 10000000; - public WriteUniqueRandomTask( - int tid, long randSeed, long numEntries, long keyRange, - WriteOptions writeOpt, long entriesPerBatch) { - super(tid, randSeed, numEntries, keyRange, - writeOpt, entriesPerBatch); - initRandomKeySequence(); - } - public WriteUniqueRandomTask( - int tid, long randSeed, long numEntries, long keyRange, - WriteOptions writeOpt, long entriesPerBatch, - long maxWritesPerSecond) { - super(tid, randSeed, numEntries, keyRange, - writeOpt, entriesPerBatch, - maxWritesPerSecond); - initRandomKeySequence(); - } - @Override protected void getKey(byte[] key, long id, long range) { - generateKeyFromLong(key, nextUniqueRandom()); - } - - protected void initRandomKeySequence() { - bufferSize_ = MAX_BUFFER_SIZE; - if (bufferSize_ > keyRange_) { - bufferSize_ = (int) keyRange_; - } - currentKeyCount_ = bufferSize_; - keyBuffer_ = new long[MAX_BUFFER_SIZE]; - for (int k = 0; k < bufferSize_; ++k) { - keyBuffer_[k] = k; - } - } - - /** - * Semi-randomly return the next unique key. It is guaranteed to be - * fully random if keyRange_ <= MAX_BUFFER_SIZE. - */ - long nextUniqueRandom() { - if (bufferSize_ == 0) { - System.err.println("bufferSize_ == 0."); - return 0; - } - int r = rand_.nextInt(bufferSize_); - // randomly pick one from the keyBuffer - long randKey = keyBuffer_[r]; - if (currentKeyCount_ < keyRange_) { - // if we have not yet inserted all keys, insert next new key to [r]. - keyBuffer_[r] = currentKeyCount_++; - } else { - // move the last element to [r] and decrease the size by 1. - keyBuffer_[r] = keyBuffer_[--bufferSize_]; - } - return randKey; - } - - int bufferSize_; - long currentKeyCount_; - long[] keyBuffer_; - } - - class ReadRandomTask extends BenchmarkTask { - public ReadRandomTask( - int tid, long randSeed, long numEntries, long keyRange) { - super(tid, randSeed, numEntries, keyRange); - } - @Override public void runTask() throws RocksDBException { - byte[] key = new byte[keySize_]; - byte[] value = new byte[valueSize_]; - for (long i = 0; i < numEntries_; i++) { - getRandomKey(key, keyRange_); - int len = db_.get(key, value); - if (len != RocksDB.NOT_FOUND) { - stats_.found_++; - stats_.finishedSingleOp(keySize_ + valueSize_); - } else { - stats_.finishedSingleOp(keySize_); - } - if (isFinished()) { - return; - } - } - } - } - - class ReadSequentialTask extends BenchmarkTask { - public ReadSequentialTask( - int tid, long randSeed, long numEntries, long keyRange) { - super(tid, randSeed, numEntries, keyRange); - } - @Override public void runTask() throws RocksDBException { - RocksIterator iter = db_.newIterator(); - long i; - for (iter.seekToFirst(), i = 0; - iter.isValid() && i < numEntries_; - iter.next(), ++i) { - stats_.found_++; - stats_.finishedSingleOp(iter.key().length + iter.value().length); - if (isFinished()) { - iter.dispose(); - return; - } - } - iter.dispose(); - } - } - - public DbBenchmark(Map flags) throws Exception { - benchmarks_ = (List) flags.get(Flag.benchmarks); - num_ = (Integer) flags.get(Flag.num); - threadNum_ = (Integer) flags.get(Flag.threads); - reads_ = (Integer) (flags.get(Flag.reads) == null ? - flags.get(Flag.num) : flags.get(Flag.reads)); - keySize_ = (Integer) flags.get(Flag.key_size); - valueSize_ = (Integer) flags.get(Flag.value_size); - compressionRatio_ = (Double) flags.get(Flag.compression_ratio); - useExisting_ = (Boolean) flags.get(Flag.use_existing_db); - randSeed_ = (Long) flags.get(Flag.seed); - databaseDir_ = (String) flags.get(Flag.db); - writesPerSeconds_ = (Integer) flags.get(Flag.writes_per_second); - memtable_ = (String) flags.get(Flag.memtablerep); - maxWriteBufferNumber_ = (Integer) flags.get(Flag.max_write_buffer_number); - prefixSize_ = (Integer) flags.get(Flag.prefix_size); - keysPerPrefix_ = (Integer) flags.get(Flag.keys_per_prefix); - hashBucketCount_ = (Long) flags.get(Flag.hash_bucket_count); - usePlainTable_ = (Boolean) flags.get(Flag.use_plain_table); - useMemenv_ = (Boolean) flags.get(Flag.use_mem_env); - flags_ = flags; - finishLock_ = new Object(); - // options.setPrefixSize((Integer)flags_.get(Flag.prefix_size)); - // options.setKeysPerPrefix((Long)flags_.get(Flag.keys_per_prefix)); - compressionType_ = (String) flags.get(Flag.compression_type); - compression_ = CompressionType.NO_COMPRESSION; - try { - if (compressionType_!=null) { - final CompressionType compressionType = - CompressionType.getCompressionType(compressionType_); - if (compressionType != null && - compressionType != CompressionType.NO_COMPRESSION) { - System.loadLibrary(compressionType.getLibraryName()); - } - - } - } catch (UnsatisfiedLinkError e) { - System.err.format("Unable to load %s library:%s%n" + - "No compression is used.%n", - compressionType_, e.toString()); - compressionType_ = "none"; - } - gen_ = new RandomGenerator(randSeed_, compressionRatio_); - } - - private void prepareReadOptions(ReadOptions options) { - options.setVerifyChecksums((Boolean)flags_.get(Flag.verify_checksum)); - options.setTailing((Boolean)flags_.get(Flag.use_tailing_iterator)); - } - - private void prepareWriteOptions(WriteOptions options) { - options.setSync((Boolean)flags_.get(Flag.sync)); - options.setDisableWAL((Boolean)flags_.get(Flag.disable_wal)); - } - - private void prepareOptions(Options options) throws RocksDBException { - if (!useExisting_) { - options.setCreateIfMissing(true); - } else { - options.setCreateIfMissing(false); - } - if (useMemenv_) { - options.setEnv(new RocksMemEnv(Env.getDefault())); - } - switch (memtable_) { - case "skip_list": - options.setMemTableConfig(new SkipListMemTableConfig()); - break; - case "vector": - options.setMemTableConfig(new VectorMemTableConfig()); - break; - case "hash_linkedlist": - options.setMemTableConfig( - new HashLinkedListMemTableConfig() - .setBucketCount(hashBucketCount_)); - options.useFixedLengthPrefixExtractor(prefixSize_); - break; - case "hash_skiplist": - case "prefix_hash": - options.setMemTableConfig( - new HashSkipListMemTableConfig() - .setBucketCount(hashBucketCount_)); - options.useFixedLengthPrefixExtractor(prefixSize_); - break; - default: - System.err.format( - "unable to detect the specified memtable, " + - "use the default memtable factory %s%n", - options.memTableFactoryName()); - break; - } - if (usePlainTable_) { - options.setTableFormatConfig( - new PlainTableConfig().setKeySize(keySize_)); - } else { - BlockBasedTableConfig table_options = new BlockBasedTableConfig(); - table_options.setBlockSize((Long)flags_.get(Flag.block_size)) - .setBlockCacheSize((Long)flags_.get(Flag.cache_size)) - .setCacheNumShardBits( - (Integer)flags_.get(Flag.cache_numshardbits)); - options.setTableFormatConfig(table_options); - } - options.setWriteBufferSize( - (Long)flags_.get(Flag.write_buffer_size)); - options.setMaxWriteBufferNumber( - (Integer)flags_.get(Flag.max_write_buffer_number)); - options.setMaxBackgroundCompactions( - (Integer)flags_.get(Flag.max_background_compactions)); - options.getEnv().setBackgroundThreads( - (Integer)flags_.get(Flag.max_background_compactions)); - options.setMaxBackgroundFlushes( - (Integer)flags_.get(Flag.max_background_flushes)); - options.setMaxBackgroundJobs((Integer) flags_.get(Flag.max_background_jobs)); - options.setMaxOpenFiles( - (Integer)flags_.get(Flag.open_files)); - options.setUseFsync( - (Boolean)flags_.get(Flag.use_fsync)); - options.setWalDir( - (String)flags_.get(Flag.wal_dir)); - options.setDeleteObsoleteFilesPeriodMicros( - (Integer)flags_.get(Flag.delete_obsolete_files_period_micros)); - options.setTableCacheNumshardbits( - (Integer)flags_.get(Flag.table_cache_numshardbits)); - options.setAllowMmapReads( - (Boolean)flags_.get(Flag.mmap_read)); - options.setAllowMmapWrites( - (Boolean)flags_.get(Flag.mmap_write)); - options.setAdviseRandomOnOpen( - (Boolean)flags_.get(Flag.advise_random_on_open)); - options.setUseAdaptiveMutex( - (Boolean)flags_.get(Flag.use_adaptive_mutex)); - options.setBytesPerSync( - (Long)flags_.get(Flag.bytes_per_sync)); - options.setBloomLocality( - (Integer)flags_.get(Flag.bloom_locality)); - options.setMinWriteBufferNumberToMerge( - (Integer)flags_.get(Flag.min_write_buffer_number_to_merge)); - options.setMemtablePrefixBloomSizeRatio((Double) flags_.get(Flag.memtable_bloom_size_ratio)); - options.setMemtableWholeKeyFiltering((Boolean) flags_.get(Flag.memtable_whole_key_filtering)); - options.setNumLevels( - (Integer)flags_.get(Flag.num_levels)); - options.setTargetFileSizeBase( - (Integer)flags_.get(Flag.target_file_size_base)); - options.setTargetFileSizeMultiplier((Integer)flags_.get(Flag.target_file_size_multiplier)); - options.setMaxBytesForLevelBase( - (Integer)flags_.get(Flag.max_bytes_for_level_base)); - options.setMaxBytesForLevelMultiplier((Double) flags_.get(Flag.max_bytes_for_level_multiplier)); - options.setLevelZeroStopWritesTrigger( - (Integer)flags_.get(Flag.level0_stop_writes_trigger)); - options.setLevelZeroSlowdownWritesTrigger( - (Integer)flags_.get(Flag.level0_slowdown_writes_trigger)); - options.setLevelZeroFileNumCompactionTrigger( - (Integer)flags_.get(Flag.level0_file_num_compaction_trigger)); - options.setMaxCompactionBytes( - (Long) flags_.get(Flag.max_compaction_bytes)); - options.setDisableAutoCompactions( - (Boolean)flags_.get(Flag.disable_auto_compactions)); - options.setMaxSuccessiveMerges( - (Integer)flags_.get(Flag.max_successive_merges)); - options.setWalTtlSeconds((Long)flags_.get(Flag.wal_ttl_seconds)); - options.setWalSizeLimitMB((Long)flags_.get(Flag.wal_size_limit_MB)); - if(flags_.get(Flag.java_comparator) != null) { - options.setComparator( - (AbstractComparator)flags_.get(Flag.java_comparator)); - } - - /* TODO(yhchiang): enable the following parameters - options.setCompressionType((String)flags_.get(Flag.compression_type)); - options.setCompressionLevel((Integer)flags_.get(Flag.compression_level)); - options.setMinLevelToCompress((Integer)flags_.get(Flag.min_level_to_compress)); - options.setStatistics((Boolean)flags_.get(Flag.statistics)); - options.setUniversalSizeRatio( - (Integer)flags_.get(Flag.universal_size_ratio)); - options.setUniversalMinMergeWidth( - (Integer)flags_.get(Flag.universal_min_merge_width)); - options.setUniversalMaxMergeWidth( - (Integer)flags_.get(Flag.universal_max_merge_width)); - options.setUniversalMaxSizeAmplificationPercent( - (Integer)flags_.get(Flag.universal_max_size_amplification_percent)); - options.setUniversalCompressionSizePercent( - (Integer)flags_.get(Flag.universal_compression_size_percent)); - // TODO(yhchiang): add RocksDB.openForReadOnly() to enable Flag.readonly - // TODO(yhchiang): enable Flag.merge_operator by switch - options.setAccessHintOnCompactionStart( - (String)flags_.get(Flag.compaction_fadvice)); - // available values of fadvice are "NONE", "NORMAL", "SEQUENTIAL", "WILLNEED" for fadvice - */ - } - - private void run() throws RocksDBException { - if (!useExisting_) { - destroyDb(); - } - Options options = new Options(); - prepareOptions(options); - open(options); - - printHeader(options); - - for (String benchmark : benchmarks_) { - List> tasks = new ArrayList>(); - List> bgTasks = new ArrayList>(); - WriteOptions writeOpt = new WriteOptions(); - prepareWriteOptions(writeOpt); - ReadOptions readOpt = new ReadOptions(); - prepareReadOptions(readOpt); - int currentTaskId = 0; - boolean known = true; - - switch (benchmark) { - case "fillseq": - tasks.add(new WriteSequentialTask( - currentTaskId++, randSeed_, num_, num_, writeOpt, 1)); - break; - case "fillbatch": - tasks.add( - new WriteSequentialTask(currentTaskId++, randSeed_, num_, num_, writeOpt, 1000)); - break; - case "fillrandom": - tasks.add(new WriteRandomTask( - currentTaskId++, randSeed_, num_, num_, writeOpt, 1)); - break; - case "filluniquerandom": - tasks.add(new WriteUniqueRandomTask( - currentTaskId++, randSeed_, num_, num_, writeOpt, 1)); - break; - case "fillsync": - writeOpt.setSync(true); - tasks.add(new WriteRandomTask( - currentTaskId++, randSeed_, num_ / 1000, num_ / 1000, - writeOpt, 1)); - break; - case "readseq": - for (int t = 0; t < threadNum_; ++t) { - tasks.add(new ReadSequentialTask( - currentTaskId++, randSeed_, reads_ / threadNum_, num_)); - } - break; - case "readrandom": - for (int t = 0; t < threadNum_; ++t) { - tasks.add(new ReadRandomTask( - currentTaskId++, randSeed_, reads_ / threadNum_, num_)); - } - break; - case "readwhilewriting": - WriteTask writeTask = new WriteRandomTask( - -1, randSeed_, Long.MAX_VALUE, num_, writeOpt, 1, writesPerSeconds_); - writeTask.stats_.setExcludeFromMerge(); - bgTasks.add(writeTask); - for (int t = 0; t < threadNum_; ++t) { - tasks.add(new ReadRandomTask( - currentTaskId++, randSeed_, reads_ / threadNum_, num_)); - } - break; - case "readhot": - for (int t = 0; t < threadNum_; ++t) { - tasks.add(new ReadRandomTask( - currentTaskId++, randSeed_, reads_ / threadNum_, num_ / 100)); - } - break; - case "delete": - destroyDb(); - open(options); - break; - default: - known = false; - System.err.println("Unknown benchmark: " + benchmark); - break; - } - if (known) { - ExecutorService executor = Executors.newCachedThreadPool(); - ExecutorService bgExecutor = Executors.newCachedThreadPool(); - try { - // measure only the main executor time - List> bgResults = new ArrayList>(); - for (Callable bgTask : bgTasks) { - bgResults.add(bgExecutor.submit(bgTask)); - } - start(); - List> results = executor.invokeAll(tasks); - executor.shutdown(); - boolean finished = executor.awaitTermination(10, TimeUnit.SECONDS); - if (!finished) { - System.out.format( - "Benchmark %s was not finished before timeout.", - benchmark); - executor.shutdownNow(); - } - setFinished(true); - bgExecutor.shutdown(); - finished = bgExecutor.awaitTermination(10, TimeUnit.SECONDS); - if (!finished) { - System.out.format( - "Benchmark %s was not finished before timeout.", - benchmark); - bgExecutor.shutdownNow(); - } - - stop(benchmark, results, currentTaskId); - } catch (InterruptedException e) { - System.err.println(e); - } - } - writeOpt.dispose(); - readOpt.dispose(); - } - options.dispose(); - db_.close(); - } - - private void printHeader(Options options) { - int kKeySize = 16; - System.out.printf("Keys: %d bytes each\n", kKeySize); - System.out.printf("Values: %d bytes each (%d bytes after compression)\n", - valueSize_, - (int) (valueSize_ * compressionRatio_ + 0.5)); - System.out.printf("Entries: %d\n", num_); - System.out.printf("RawSize: %.1f MB (estimated)\n", - ((double)(kKeySize + valueSize_) * num_) / SizeUnit.MB); - System.out.printf("FileSize: %.1f MB (estimated)\n", - (((kKeySize + valueSize_ * compressionRatio_) * num_) / SizeUnit.MB)); - System.out.format("Memtable Factory: %s%n", options.memTableFactoryName()); - System.out.format("Prefix: %d bytes%n", prefixSize_); - System.out.format("Compression: %s%n", compressionType_); - printWarnings(); - System.out.printf("------------------------------------------------\n"); - } - - void printWarnings() { - boolean assertsEnabled = false; - assert assertsEnabled = true; // Intentional side effect!!! - if (assertsEnabled) { - System.out.printf( - "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n"); - } - } - - private void open(Options options) throws RocksDBException { - System.out.println("Using database directory: " + databaseDir_); - db_ = RocksDB.open(options, databaseDir_); - } - - private void start() { - setFinished(false); - startTime_ = System.nanoTime(); - } - - private void stop( - String benchmark, List> results, int concurrentThreads) { - long endTime = System.nanoTime(); - double elapsedSeconds = - 1.0d * (endTime - startTime_) / TimeUnit.SECONDS.toNanos(1); - - Stats stats = new Stats(-1); - int taskFinishedCount = 0; - for (Future result : results) { - if (result.isDone()) { - try { - Stats taskStats = result.get(3, TimeUnit.SECONDS); - if (!result.isCancelled()) { - taskFinishedCount++; - } - stats.merge(taskStats); - } catch (Exception e) { - // then it's not successful, the output will indicate this - } - } - } - String extra = ""; - if (benchmark.indexOf("read") >= 0) { - extra = String.format(" %d / %d found; ", stats.found_, stats.done_); - } else { - extra = String.format(" %d ops done; ", stats.done_); - } - - System.out.printf( - "%-16s : %11.5f micros/op; %6.1f MB/s;%s %d / %d task(s) finished.\n", - benchmark, elapsedSeconds / stats.done_ * 1e6, - (stats.bytes_ / 1048576.0) / elapsedSeconds, extra, - taskFinishedCount, concurrentThreads); - } - - public void generateKeyFromLong(byte[] slice, long n) { - assert(n >= 0); - int startPos = 0; - - if (keysPerPrefix_ > 0) { - long numPrefix = (num_ + keysPerPrefix_ - 1) / keysPerPrefix_; - long prefix = n % numPrefix; - int bytesToFill = Math.min(prefixSize_, 8); - for (int i = 0; i < bytesToFill; ++i) { - slice[i] = (byte) (prefix % 256); - prefix /= 256; - } - for (int i = 8; i < bytesToFill; ++i) { - slice[i] = '0'; - } - startPos = bytesToFill; - } - - for (int i = slice.length - 1; i >= startPos; --i) { - slice[i] = (byte) ('0' + (n % 10)); - n /= 10; - } - } - - private void destroyDb() { - if (db_ != null) { - db_.close(); - } - // TODO(yhchiang): develop our own FileUtil - // FileUtil.deleteDir(databaseDir_); - } - - private void printStats() { - } - - static void printHelp() { - System.out.println("usage:"); - for (Flag flag : Flag.values()) { - System.out.format(" --%s%n\t%s%n", - flag.name(), - flag.desc()); - if (flag.getDefaultValue() != null) { - System.out.format("\tDEFAULT: %s%n", - flag.getDefaultValue().toString()); - } - } - } - - public static void main(String[] args) throws Exception { - Map flags = new EnumMap(Flag.class); - for (Flag flag : Flag.values()) { - if (flag.getDefaultValue() != null) { - flags.put(flag, flag.getDefaultValue()); - } - } - for (String arg : args) { - boolean valid = false; - if (arg.equals("--help") || arg.equals("-h")) { - printHelp(); - System.exit(0); - } - if (arg.startsWith("--")) { - try { - String[] parts = arg.substring(2).split("="); - if (parts.length >= 1) { - Flag key = Flag.valueOf(parts[0]); - if (key != null) { - Object value = null; - if (parts.length >= 2) { - value = key.parseValue(parts[1]); - } - flags.put(key, value); - valid = true; - } - } - } - catch (Exception e) { - } - } - if (!valid) { - System.err.println("Invalid argument " + arg); - System.exit(1); - } - } - new DbBenchmark(flags).run(); - } - - private enum Flag { - benchmarks(Arrays.asList("fillseq", "readrandom", "fillrandom"), - "Comma-separated list of operations to run in the specified order\n" - + "\tActual benchmarks:\n" - + "\t\tfillseq -- write N values in sequential key order in async mode.\n" - + "\t\tfillrandom -- write N values in random key order in async mode.\n" - + "\t\tfillbatch -- write N/1000 batch where each batch has 1000 values\n" - + "\t\t in sequential key order in sync mode.\n" - + "\t\tfillsync -- write N/100 values in random key order in sync mode.\n" - + "\t\tfill100K -- write N/1000 100K values in random order in async mode.\n" - + "\t\treadseq -- read N times sequentially.\n" - + "\t\treadrandom -- read N times in random order.\n" - + "\t\treadhot -- read N times in random order from 1% section of DB.\n" - + "\t\treadwhilewriting -- measure the read performance of multiple readers\n" - + "\t\t with a bg single writer. The write rate of the bg\n" - + "\t\t is capped by --writes_per_second.\n" - + "\tMeta Operations:\n" - + "\t\tdelete -- delete DB") { - @Override public Object parseValue(String value) { - return new ArrayList(Arrays.asList(value.split(","))); - } - }, - compression_ratio(0.5d, - "Arrange to generate values that shrink to this fraction of\n" + - "\ttheir original size after compression.") { - @Override public Object parseValue(String value) { - return Double.parseDouble(value); - } - }, - use_existing_db(false, - "If true, do not destroy the existing database. If you set this\n" + - "\tflag and also specify a benchmark that wants a fresh database,\n" + - "\tthat benchmark will fail.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - num(1000000, - "Number of key/values to place in database.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - threads(1, - "Number of concurrent threads to run.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - reads(null, - "Number of read operations to do. If negative, do --nums reads.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - key_size(16, - "The size of each key in bytes.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - value_size(100, - "The size of each value in bytes.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - write_buffer_size(4L * SizeUnit.MB, - "Number of bytes to buffer in memtable before compacting\n" + - "\t(initialized to default value by 'main'.)") { - @Override public Object parseValue(String value) { - return Long.parseLong(value); - } - }, - max_write_buffer_number(2, - "The number of in-memory memtables. Each memtable is of size\n" + - "\twrite_buffer_size.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - prefix_size(0, "Controls the prefix size for HashSkipList, HashLinkedList,\n" + - "\tand plain table.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - keys_per_prefix(0, "Controls the average number of keys generated\n" + - "\tper prefix, 0 means no special handling of the prefix,\n" + - "\ti.e. use the prefix comes with the generated random number.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - memtablerep("skip_list", - "The memtable format. Available options are\n" + - "\tskip_list,\n" + - "\tvector,\n" + - "\thash_linkedlist,\n" + - "\thash_skiplist (prefix_hash.)") { - @Override public Object parseValue(String value) { - return value; - } - }, - hash_bucket_count(SizeUnit.MB, - "The number of hash buckets used in the hash-bucket-based\n" + - "\tmemtables. Memtables that currently support this argument are\n" + - "\thash_linkedlist and hash_skiplist.") { - @Override public Object parseValue(String value) { - return Long.parseLong(value); - } - }, - writes_per_second(10000, - "The write-rate of the background writer used in the\n" + - "\t`readwhilewriting` benchmark. Non-positive number indicates\n" + - "\tusing an unbounded write-rate in `readwhilewriting` benchmark.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - use_plain_table(false, - "Use plain-table sst format.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - cache_size(-1L, - "Number of bytes to use as a cache of uncompressed data.\n" + - "\tNegative means use default settings.") { - @Override public Object parseValue(String value) { - return Long.parseLong(value); - } - }, - seed(0L, - "Seed base for random number generators.") { - @Override public Object parseValue(String value) { - return Long.parseLong(value); - } - }, - num_levels(7, - "The total number of levels.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - numdistinct(1000L, - "Number of distinct keys to use. Used in RandomWithVerify to\n" + - "\tread/write on fewer keys so that gets are more likely to find the\n" + - "\tkey and puts are more likely to update the same key.") { - @Override public Object parseValue(String value) { - return Long.parseLong(value); - } - }, - merge_keys(-1L, - "Number of distinct keys to use for MergeRandom and\n" + - "\tReadRandomMergeRandom.\n" + - "\tIf negative, there will be FLAGS_num keys.") { - @Override public Object parseValue(String value) { - return Long.parseLong(value); - } - }, - bloom_locality(0,"Control bloom filter probes locality.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - duration(0,"Time in seconds for the random-ops tests to run.\n" + - "\tWhen 0 then num & reads determine the test duration.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - num_multi_db(0, - "Number of DBs used in the benchmark. 0 means single DB.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - histogram(false,"Print histogram of operation timings.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - min_write_buffer_number_to_merge( - defaultOptions_.minWriteBufferNumberToMerge(), - "The minimum number of write buffers that will be merged together\n" + - "\tbefore writing to storage. This is cheap because it is an\n" + - "\tin-memory merge. If this feature is not enabled, then all these\n" + - "\twrite buffers are flushed to L0 as separate files and this\n" + - "\tincreases read amplification because a get request has to check\n" + - "\tin all of these files. Also, an in-memory merge may result in\n" + - "\twriting less data to storage if there are duplicate records\n" + - "\tin each of these individual write buffers.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - max_background_compactions( - defaultOptions_.maxBackgroundCompactions(), - "The maximum number of concurrent background compactions\n" + - "\tthat can occur in parallel.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - max_background_flushes( - defaultOptions_.maxBackgroundFlushes(), - "The maximum number of concurrent background flushes\n" + - "\tthat can occur in parallel.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - max_background_jobs(defaultOptions_.maxBackgroundJobs(), - "The maximum number of concurrent background jobs\n" - + "\tthat can occur in parallel.") { - @Override - public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - /* TODO(yhchiang): enable the following - compaction_style((int32_t) defaultOptions_.compactionStyle(), - "style of compaction: level-based vs universal.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - },*/ - universal_size_ratio(0, - "Percentage flexibility while comparing file size\n" + - "\t(for universal compaction only).") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - universal_min_merge_width(0,"The minimum number of files in a\n" + - "\tsingle compaction run (for universal compaction only).") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - universal_max_merge_width(0,"The max number of files to compact\n" + - "\tin universal style compaction.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - universal_max_size_amplification_percent(0, - "The max size amplification for universal style compaction.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - universal_compression_size_percent(-1, - "The percentage of the database to compress for universal\n" + - "\tcompaction. -1 means compress everything.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - block_size(defaultBlockBasedTableOptions_.blockSize(), - "Number of bytes in a block.") { - @Override public Object parseValue(String value) { - return Long.parseLong(value); - } - }, - compressed_cache_size(-1L, - "Number of bytes to use as a cache of compressed data.") { - @Override public Object parseValue(String value) { - return Long.parseLong(value); - } - }, - open_files(defaultOptions_.maxOpenFiles(), - "Maximum number of files to keep open at the same time\n" + - "\t(use default if == 0)") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - bloom_bits(-1,"Bloom filter bits per key. Negative means\n" + - "\tuse default settings.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - memtable_bloom_size_ratio(0.0d, "Ratio of memtable used by the bloom filter.\n" - + "\t0 means no bloom filter.") { - @Override public Object parseValue(String value) { - return Double.parseDouble(value); - } - }, - memtable_whole_key_filtering(false, "Enable whole key bloom filter in memtable.") { - @Override - public Object parseValue(String value) { - return parseBoolean(value); - } - }, - cache_numshardbits(-1,"Number of shards for the block cache\n" + - "\tis 2 ** cache_numshardbits. Negative means use default settings.\n" + - "\tThis is applied only if FLAGS_cache_size is non-negative.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - verify_checksum(false,"Verify checksum for every block read\n" + - "\tfrom storage.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - statistics(false,"Database statistics.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - writes(-1L, "Number of write operations to do. If negative, do\n" + - "\t--num reads.") { - @Override public Object parseValue(String value) { - return Long.parseLong(value); - } - }, - sync(false,"Sync all writes to disk.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - use_fsync(false,"If true, issue fsync instead of fdatasync.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - disable_wal(false,"If true, do not write WAL for write.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - wal_dir("", "If not empty, use the given dir for WAL.") { - @Override public Object parseValue(String value) { - return value; - } - }, - target_file_size_base(2 * 1048576,"Target file size at level-1") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - target_file_size_multiplier(1, - "A multiplier to compute target level-N file size (N >= 2)") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - max_bytes_for_level_base(10 * 1048576, - "Max bytes for level-1") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - max_bytes_for_level_multiplier(10.0d, - "A multiplier to compute max bytes for level-N (N >= 2)") { - @Override public Object parseValue(String value) { - return Double.parseDouble(value); - } - }, - level0_stop_writes_trigger(12,"Number of files in level-0\n" + - "\tthat will trigger put stop.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - level0_slowdown_writes_trigger(8,"Number of files in level-0\n" + - "\tthat will slow down writes.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - level0_file_num_compaction_trigger(4,"Number of files in level-0\n" + - "\twhen compactions start.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - readwritepercent(90,"Ratio of reads to reads/writes (expressed\n" + - "\tas percentage) for the ReadRandomWriteRandom workload. The\n" + - "\tdefault value 90 means 90% operations out of all reads and writes\n" + - "\toperations are reads. In other words, 9 gets for every 1 put.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - mergereadpercent(70,"Ratio of merges to merges&reads (expressed\n" + - "\tas percentage) for the ReadRandomMergeRandom workload. The\n" + - "\tdefault value 70 means 70% out of all read and merge operations\n" + - "\tare merges. In other words, 7 merges for every 3 gets.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - deletepercent(2,"Percentage of deletes out of reads/writes/\n" + - "\tdeletes (used in RandomWithVerify only). RandomWithVerify\n" + - "\tcalculates writepercent as (100 - FLAGS_readwritepercent -\n" + - "\tdeletepercent), so deletepercent must be smaller than (100 -\n" + - "\tFLAGS_readwritepercent)") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - delete_obsolete_files_period_micros(0,"Option to delete\n" + - "\tobsolete files periodically. 0 means that obsolete files are\n" + - "\tdeleted after every compaction run.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - compression_type("snappy", - "Algorithm used to compress the database.") { - @Override public Object parseValue(String value) { - return value; - } - }, - compression_level(-1, - "Compression level. For zlib this should be -1 for the\n" + - "\tdefault level, or between 0 and 9.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - min_level_to_compress(-1,"If non-negative, compression starts\n" + - "\tfrom this level. Levels with number < min_level_to_compress are\n" + - "\tnot compressed. Otherwise, apply compression_type to\n" + - "\tall levels.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - table_cache_numshardbits(4,"") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - stats_interval(0L, "Stats are reported every N operations when\n" + - "\tthis is greater than zero. When 0 the interval grows over time.") { - @Override public Object parseValue(String value) { - return Long.parseLong(value); - } - }, - stats_per_interval(0,"Reports additional stats per interval when\n" + - "\tthis is greater than 0.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - perf_level(0,"Level of perf collection.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - max_compaction_bytes(0L, "Limit number of bytes in one compaction to be lower than this\n" + - "\threshold. But it's not guaranteed.") { - @Override public Object parseValue(String value) { - return Long.parseLong(value); - } - }, - readonly(false,"Run read only benchmarks.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - disable_auto_compactions(false,"Do not auto trigger compactions.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - wal_ttl_seconds(0L,"Set the TTL for the WAL Files in seconds.") { - @Override public Object parseValue(String value) { - return Long.parseLong(value); - } - }, - wal_size_limit_MB(0L,"Set the size limit for the WAL Files\n" + - "\tin MB.") { - @Override public Object parseValue(String value) { - return Long.parseLong(value); - } - }, - /* TODO(yhchiang): enable the following - direct_reads(rocksdb::EnvOptions().use_direct_reads, - "Allow direct I/O reads.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - direct_writes(rocksdb::EnvOptions().use_direct_reads, - "Allow direct I/O reads.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - */ - mmap_read(false, - "Allow reads to occur via mmap-ing files.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - mmap_write(false, - "Allow writes to occur via mmap-ing files.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - advise_random_on_open(defaultOptions_.adviseRandomOnOpen(), - "Advise random access on table file open.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - compaction_fadvice("NORMAL", - "Access pattern advice when a file is compacted.") { - @Override public Object parseValue(String value) { - return value; - } - }, - use_tailing_iterator(false, - "Use tailing iterator to access a series of keys instead of get.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - use_adaptive_mutex(defaultOptions_.useAdaptiveMutex(), - "Use adaptive mutex.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - bytes_per_sync(defaultOptions_.bytesPerSync(), - "Allows OS to incrementally sync files to disk while they are\n" + - "\tbeing written, in the background. Issue one request for every\n" + - "\tbytes_per_sync written. 0 turns it off.") { - @Override public Object parseValue(String value) { - return Long.parseLong(value); - } - }, - filter_deletes(false," On true, deletes use bloom-filter and drop\n" + - "\tthe delete if key not present.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - max_successive_merges(0,"Maximum number of successive merge\n" + - "\toperations on a key in the memtable.") { - @Override public Object parseValue(String value) { - return Integer.parseInt(value); - } - }, - db(getTempDir("rocksdb-jni"), - "Use the db with the following name.") { - @Override public Object parseValue(String value) { - return value; - } - }, - use_mem_env(false, "Use RocksMemEnv instead of default filesystem based\n" + - "environment.") { - @Override public Object parseValue(String value) { - return parseBoolean(value); - } - }, - java_comparator(null, "Class name of a Java Comparator to use instead\n" + - "\tof the default C++ ByteWiseComparatorImpl. Must be available on\n" + - "\tthe classpath") { - @Override - protected Object parseValue(final String value) { - try { - final ComparatorOptions copt = new ComparatorOptions(); - final Class clsComparator = - (Class)Class.forName(value); - final Constructor cstr = - clsComparator.getConstructor(ComparatorOptions.class); - return cstr.newInstance(copt); - } catch(final ClassNotFoundException cnfe) { - throw new IllegalArgumentException("Java Comparator '" + value + "'" + - " not found on the classpath", cnfe); - } catch(final NoSuchMethodException nsme) { - throw new IllegalArgumentException("Java Comparator '" + value + "'" + - " does not have a public ComparatorOptions constructor", nsme); - } catch(final IllegalAccessException | InstantiationException - | InvocationTargetException ie) { - throw new IllegalArgumentException("Unable to construct Java" + - " Comparator '" + value + "'", ie); - } - } - }; - - private Flag(Object defaultValue, String desc) { - defaultValue_ = defaultValue; - desc_ = desc; - } - - public Object getDefaultValue() { - return defaultValue_; - } - - public String desc() { - return desc_; - } - - public boolean parseBoolean(String value) { - if (value.equals("1")) { - return true; - } else if (value.equals("0")) { - return false; - } - return Boolean.parseBoolean(value); - } - - protected abstract Object parseValue(String value); - - private final Object defaultValue_; - private final String desc_; - } - - private final static String DEFAULT_TEMP_DIR = "/tmp"; - - private static String getTempDir(final String dirName) { - try { - return Files.createTempDirectory(dirName).toAbsolutePath().toString(); - } catch(final IOException ioe) { - System.err.println("Unable to create temp directory, defaulting to: " + - DEFAULT_TEMP_DIR); - return DEFAULT_TEMP_DIR + File.pathSeparator + dirName; - } - } - - private static class RandomGenerator { - private final byte[] data_; - private int dataLength_; - private int position_; - private double compressionRatio_; - Random rand_; - - private RandomGenerator(long seed, double compressionRatio) { - // We use a limited amount of data over and over again and ensure - // that it is larger than the compression window (32KB), and also - byte[] value = new byte[100]; - // large enough to serve all typical value sizes we want to write. - rand_ = new Random(seed); - dataLength_ = value.length * 10000; - data_ = new byte[dataLength_]; - compressionRatio_ = compressionRatio; - int pos = 0; - while (pos < dataLength_) { - compressibleBytes(value); - System.arraycopy(value, 0, data_, pos, - Math.min(value.length, dataLength_ - pos)); - pos += value.length; - } - } - - private void compressibleBytes(byte[] value) { - int baseLength = value.length; - if (compressionRatio_ < 1.0d) { - baseLength = (int) (compressionRatio_ * value.length + 0.5); - } - if (baseLength <= 0) { - baseLength = 1; - } - int pos; - for (pos = 0; pos < baseLength; ++pos) { - value[pos] = (byte) (' ' + rand_.nextInt(95)); // ' ' .. '~' - } - while (pos < value.length) { - System.arraycopy(value, 0, value, pos, - Math.min(baseLength, value.length - pos)); - pos += baseLength; - } - } - - private void generate(byte[] value) { - if (position_ + value.length > data_.length) { - position_ = 0; - assert(value.length <= data_.length); - } - position_ += value.length; - System.arraycopy(data_, position_ - value.length, - value, 0, value.length); - } - } - - boolean isFinished() { - synchronized(finishLock_) { - return isFinished_; - } - } - - void setFinished(boolean flag) { - synchronized(finishLock_) { - isFinished_ = flag; - } - } - - RocksDB db_; - final List benchmarks_; - final int num_; - final int reads_; - final int keySize_; - final int valueSize_; - final int threadNum_; - final int writesPerSeconds_; - final long randSeed_; - final boolean useExisting_; - final String databaseDir_; - double compressionRatio_; - RandomGenerator gen_; - long startTime_; - - // env - boolean useMemenv_; - - // memtable related - final int maxWriteBufferNumber_; - final int prefixSize_; - final int keysPerPrefix_; - final String memtable_; - final long hashBucketCount_; - - // sst format related - boolean usePlainTable_; - - Object finishLock_; - boolean isFinished_; - Map flags_; - // as the scope of a static member equals to the scope of the problem, - // we let its c++ pointer to be disposed in its finalizer. - static Options defaultOptions_ = new Options(); - static BlockBasedTableConfig defaultBlockBasedTableOptions_ = - new BlockBasedTableConfig(); - String compressionType_; - CompressionType compression_; -} diff --git a/java/crossbuild/Vagrantfile b/java/crossbuild/Vagrantfile deleted file mode 100644 index 0ee50de2c..000000000 --- a/java/crossbuild/Vagrantfile +++ /dev/null @@ -1,51 +0,0 @@ -# -*- mode: ruby -*- -# vi: set ft=ruby : - -# Vagrantfile API/syntax version. Don't touch unless you know what you're doing! -VAGRANTFILE_API_VERSION = "2" - -Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| - - config.vm.define "linux32" do |linux32| - linux32.vm.box = "bento/centos-6.10-i386" - linux32.vm.provision :shell, path: "build-linux-centos.sh" - end - - config.vm.define "linux64" do |linux64| - linux64.vm.box = "bento/centos-6.10" - linux64.vm.provision :shell, path: "build-linux-centos.sh" - end - - config.vm.define "linux32-musl" do |musl32| - musl32.vm.box = "alpine/alpine32" - musl32.vm.box_version = "3.6.0" - musl32.vm.provision :shell, path: "build-linux-alpine.sh" - end - - config.vm.define "linux64-musl" do |musl64| - musl64.vm.box = "generic/alpine36" - - ## Should use the alpine/alpine64 box, but this issue needs to be fixed first - https://github.com/hashicorp/vagrant/issues/11218 - # musl64.vm.box = "alpine/alpine64" - # musl64.vm.box_version = "3.6.0" - - musl64.vm.provision :shell, path: "build-linux-alpine.sh" - end - - config.vm.provider "virtualbox" do |v| - v.memory = 2048 - v.cpus = 4 - v.customize ["modifyvm", :id, "--nictype1", "virtio" ] - end - - if Vagrant.has_plugin?("vagrant-cachier") - config.cache.scope = :box - end - if Vagrant.has_plugin?("vagrant-vbguest") - config.vbguest.no_install = true - end - - config.vm.synced_folder "../target", "/rocksdb-build" - config.vm.synced_folder "../..", "/rocksdb", type: "rsync" - config.vm.boot_timeout = 1200 -end diff --git a/java/crossbuild/build-linux-alpine.sh b/java/crossbuild/build-linux-alpine.sh deleted file mode 100755 index 561d34141..000000000 --- a/java/crossbuild/build-linux-alpine.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -set -e - -# update Alpine with latest versions -echo '@edge http://nl.alpinelinux.org/alpine/edge/main' >> /etc/apk/repositories -echo '@community http://nl.alpinelinux.org/alpine/edge/community' >> /etc/apk/repositories -apk update -apk upgrade - -# install CA certificates -apk add ca-certificates - -# install build tools -apk add \ - build-base \ - coreutils \ - file \ - git \ - perl \ - automake \ - autoconf \ - cmake - -# install tool dependencies for building RocksDB static library -apk add \ - curl \ - bash \ - wget \ - tar \ - openssl - -# install RocksDB dependencies -apk add \ - snappy snappy-dev \ - zlib zlib-dev \ - bzip2 bzip2-dev \ - lz4 lz4-dev \ - zstd zstd-dev \ - linux-headers \ - jemalloc jemalloc-dev - -# install OpenJDK7 -apk add openjdk7 \ - && apk add java-cacerts \ - && rm /usr/lib/jvm/java-1.7-openjdk/jre/lib/security/cacerts \ - && ln -s /etc/ssl/certs/java/cacerts /usr/lib/jvm/java-1.7-openjdk/jre/lib/security/cacerts - -# cleanup -rm -rf /var/cache/apk/* - -# puts javac in the PATH -export JAVA_HOME=/usr/lib/jvm/java-1.7-openjdk -export PATH=/usr/lib/jvm/java-1.7-openjdk/bin:$PATH - -# gflags from source -cd /tmp &&\ - git clone -b v2.0 --single-branch https://github.com/gflags/gflags.git &&\ - cd gflags &&\ - ./configure --prefix=/usr && make && make install &&\ - rm -rf /tmp/* - - -# build rocksdb -cd /rocksdb -make jclean clean -PORTABLE=1 make -j8 rocksdbjavastatic -cp /rocksdb/java/target/librocksdbjni-* /rocksdb-build -cp /rocksdb/java/target/rocksdbjni-* /rocksdb-build diff --git a/java/crossbuild/build-linux-centos.sh b/java/crossbuild/build-linux-centos.sh deleted file mode 100755 index 176e3456c..000000000 --- a/java/crossbuild/build-linux-centos.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -set -e - -# remove fixed relesever variable present in the hanscode boxes -sudo rm -f /etc/yum/vars/releasever - -# enable EPEL -sudo yum -y install epel-release - -# install all required packages for rocksdb that are available through yum -sudo yum -y install openssl java-1.7.0-openjdk-devel zlib-devel bzip2-devel lz4-devel snappy-devel libzstd-devel jemalloc-devel cmake3 - -# set up cmake3 as cmake binary -sudo alternatives --install /usr/local/bin/cmake cmake /usr/bin/cmake 10 --slave /usr/local/bin/ctest ctest /usr/bin/ctest --slave /usr/local/bin/cpack cpack /usr/bin/cpack --slave /usr/local/bin/ccmake ccmake /usr/bin/ccmake -sudo alternatives --install /usr/local/bin/cmake cmake /usr/bin/cmake3 20 --slave /usr/local/bin/ctest ctest /usr/bin/ctest3 --slave /usr/local/bin/cpack cpack /usr/bin/cpack3 --slave /usr/local/bin/ccmake ccmake /usr/bin/ccmake3 - -# install gcc/g++ 4.8.2 from tru/devtools-2 -sudo wget -O /etc/yum.repos.d/devtools-2.repo https://people.centos.org/tru/devtools-2/devtools-2.repo -sudo yum -y install devtoolset-2-binutils devtoolset-2-gcc devtoolset-2-gcc-c++ - -# install gflags -wget https://github.com/gflags/gflags/archive/v2.0.tar.gz -O gflags-2.0.tar.gz -tar xvfz gflags-2.0.tar.gz; cd gflags-2.0; scl enable devtoolset-2 ./configure; scl enable devtoolset-2 make; sudo make install -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib - -# set java home so we can build rocksdb jars -export JAVA_HOME=/usr/lib/jvm/java-1.7.0 - -export PATH=$JAVA_HOME:/usr/local/bin:$PATH - -# build rocksdb -cd /rocksdb -scl enable devtoolset-2 'make clean-not-downloaded' -scl enable devtoolset-2 'PORTABLE=1 make -j8 rocksdbjavastatic' -cp /rocksdb/java/target/librocksdbjni-* /rocksdb-build -cp /rocksdb/java/target/rocksdbjni-* /rocksdb-build diff --git a/java/crossbuild/build-linux.sh b/java/crossbuild/build-linux.sh deleted file mode 100755 index 74178adb5..000000000 --- a/java/crossbuild/build-linux.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -# install all required packages for rocksdb -sudo apt-get update -sudo apt-get -y install git make gcc g++ libgflags-dev libsnappy-dev zlib1g-dev libbz2-dev default-jdk - -# set java home so we can build rocksdb jars -export JAVA_HOME=$(echo /usr/lib/jvm/java-7-openjdk*) -cd /rocksdb -make jclean clean -make -j 4 rocksdbjavastatic -cp /rocksdb/java/target/librocksdbjni-* /rocksdb-build -cp /rocksdb/java/target/rocksdbjni-* /rocksdb-build -sudo shutdown -h now - diff --git a/java/crossbuild/docker-build-linux-alpine.sh b/java/crossbuild/docker-build-linux-alpine.sh deleted file mode 100755 index e3e852efe..000000000 --- a/java/crossbuild/docker-build-linux-alpine.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -set -e -#set -x - -# just in-case this is run outside Docker -mkdir -p /rocksdb-local-build - -rm -rf /rocksdb-local-build/* -cp -r /rocksdb-host/* /rocksdb-local-build -cd /rocksdb-local-build - -make clean-not-downloaded -PORTABLE=1 make -j2 rocksdbjavastatic - -cp java/target/librocksdbjni-linux*.so java/target/rocksdbjni-*-linux*.jar java/target/rocksdbjni-*-linux*.jar.sha1 /rocksdb-java-target diff --git a/java/crossbuild/docker-build-linux-centos.sh b/java/crossbuild/docker-build-linux-centos.sh deleted file mode 100755 index 16581dec7..000000000 --- a/java/crossbuild/docker-build-linux-centos.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -set -e -#set -x - -# just in-case this is run outside Docker -mkdir -p /rocksdb-local-build - -rm -rf /rocksdb-local-build/* -cp -r /rocksdb-host/* /rocksdb-local-build -cd /rocksdb-local-build - -# Use scl devtoolset if available -if hash scl 2>/dev/null; then - if scl --list | grep -q 'devtoolset-8'; then - # CentOS 6+ - scl enable devtoolset-8 'make clean-not-downloaded' - scl enable devtoolset-8 'PORTABLE=1 make -j2 rocksdbjavastatic' - elif scl --list | grep -q 'devtoolset-7'; then - # CentOS 6+ - scl enable devtoolset-7 'make clean-not-downloaded' - scl enable devtoolset-7 'PORTABLE=1 make -j2 rocksdbjavastatic' - elif scl --list | grep -q 'devtoolset-2'; then - # CentOS 5 or 6 - scl enable devtoolset-2 'make clean-not-downloaded' - scl enable devtoolset-2 'PORTABLE=1 make -j2 rocksdbjavastatic' - else - echo "Could not find devtoolset" - exit 1; - fi -else - make clean-not-downloaded - PORTABLE=1 make -j2 rocksdbjavastatic -fi - -cp java/target/librocksdbjni-linux*.so java/target/rocksdbjni-*-linux*.jar java/target/rocksdbjni-*-linux*.jar.sha1 /rocksdb-java-target - diff --git a/java/jdb_bench.sh b/java/jdb_bench.sh deleted file mode 100755 index 5dfc385e3..000000000 --- a/java/jdb_bench.sh +++ /dev/null @@ -1,13 +0,0 @@ -# shellcheck disable=SC2148 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -PLATFORM=64 -if [ `getconf LONG_BIT` != "64" ] -then - PLATFORM=32 -fi - -ROCKS_JAR=`find target -name rocksdbjni*.jar` - -echo "Running benchmark in $PLATFORM-Bit mode." -# shellcheck disable=SC2068 -java -server -d$PLATFORM -XX:NewSize=4m -XX:+AggressiveOpts -Djava.library.path=target -cp "${ROCKS_JAR}:benchmark/target/classes" org.rocksdb.benchmark.DbBenchmark $@ diff --git a/java/jmh/LICENSE-HEADER.txt b/java/jmh/LICENSE-HEADER.txt deleted file mode 100644 index 365ee653b..000000000 --- a/java/jmh/LICENSE-HEADER.txt +++ /dev/null @@ -1,5 +0,0 @@ -Copyright (c) 2011-present, Facebook, Inc. All rights reserved. - This source code is licensed under both the GPLv2 (found in the - COPYING file in the root directory) and Apache 2.0 License - (found in the LICENSE.Apache file in the root directory). - diff --git a/java/jmh/README.md b/java/jmh/README.md deleted file mode 100644 index 1575ab517..000000000 --- a/java/jmh/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# JMH Benchmarks for RocksJava - -These are micro-benchmarks for RocksJava functionality, using [JMH (Java Microbenchmark Harness)](https://openjdk.java.net/projects/code-tools/jmh/). - -## Compiling - -**Note**: This uses a specific build of RocksDB that is set in the `` element of the `dependencies` section of the `pom.xml` file. If you are testing local changes you should build and install a SNAPSHOT version of rocksdbjni, and update the `pom.xml` of rocksdbjni-jmh file to test with this. - -For instance, this is how to install the OSX jar you just built for 6.26.0 - -```bash -$ mvn install:install-file -Dfile=./java/target/rocksdbjni-6.26.0-SNAPSHOT-osx.jar -DgroupId=org.rocksdb -DartifactId=rocksdbjni -Dversion=6.26.0-SNAPSHOT -Dpackaging=jar -``` - -```bash -$ mvn package -``` - -## Running -```bash -$ java -jar target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar -``` - -NOTE: you can append `-help` to the command above to see all of the JMH runtime options. diff --git a/java/jmh/pom.xml b/java/jmh/pom.xml deleted file mode 100644 index 3016aefa7..000000000 --- a/java/jmh/pom.xml +++ /dev/null @@ -1,138 +0,0 @@ - - - 4.0.0 - - org.rocksdb - rocksdbjni-jmh - 1.0-SNAPSHOT - - http://rocksdb.org/ - - rocksdbjni-jmh - JMH Benchmarks for RocksDB Java API - - - Facebook, Inc. - https://www.facebook.com - - - - - Apache License 2.0 - http://www.apache.org/licenses/LICENSE-2.0.html - repo - - - GNU General Public License, version 2 - http://www.gnu.org/licenses/gpl-2.0.html - repo - - - - - scm:git:git://github.com/facebook/rocksdb.git - scm:git:git@github.com:facebook/rocksdb.git - http://github.com/facebook/rocksdb/ - - - - 1.7 - 1.7 - UTF-8 - - 1.22 - benchmarks - - - - - org.rocksdb - rocksdbjni - 7.9.0-SNAPSHOT - - - - org.openjdk.jmh - jmh-core - ${jmh.version} - - - org.openjdk.jmh - jmh-generator-annprocess - ${jmh.version} - provided - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.8.1 - - ${project.build.source} - ${project.build.target} - ${project.build.sourceEncoding} - - - - - com.mycila - license-maven-plugin - 3.0 - true - -
LICENSE-HEADER.txt
- true - true - true - - pom.xml - - ${project.build.sourceEncoding} -
-
- - - org.apache.maven.plugins - maven-shade-plugin - 3.2.1 - - - package - - shade - - - ${project.artifactId}-${project.version}-${uberjar.name} - - - org.openjdk.jmh.Main - - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - -
-
- -
\ No newline at end of file diff --git a/java/jmh/src/main/java/org/rocksdb/jmh/ComparatorBenchmarks.java b/java/jmh/src/main/java/org/rocksdb/jmh/ComparatorBenchmarks.java deleted file mode 100644 index 1973b5487..000000000 --- a/java/jmh/src/main/java/org/rocksdb/jmh/ComparatorBenchmarks.java +++ /dev/null @@ -1,139 +0,0 @@ -/** - * Copyright (c) 2011-present, Facebook, Inc. All rights reserved. - * This source code is licensed under both the GPLv2 (found in the - * COPYING file in the root directory) and Apache 2.0 License - * (found in the LICENSE.Apache file in the root directory). - */ -package org.rocksdb.jmh; - -import org.openjdk.jmh.annotations.*; -import org.rocksdb.*; -import org.rocksdb.util.BytewiseComparator; -import org.rocksdb.util.FileUtils; -import org.rocksdb.util.ReverseBytewiseComparator; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.concurrent.atomic.AtomicInteger; - -import static org.rocksdb.util.KVUtils.ba; - -@State(Scope.Benchmark) -public class ComparatorBenchmarks { - - @Param({ - "native_bytewise", - "native_reverse_bytewise", - - "java_bytewise_non-direct_reused-64_adaptive-mutex", - "java_bytewise_non-direct_reused-64_non-adaptive-mutex", - "java_bytewise_non-direct_reused-64_thread-local", - "java_bytewise_direct_reused-64_adaptive-mutex", - "java_bytewise_direct_reused-64_non-adaptive-mutex", - "java_bytewise_direct_reused-64_thread-local", - "java_bytewise_non-direct_no-reuse", - "java_bytewise_direct_no-reuse", - - "java_reverse_bytewise_non-direct_reused-64_adaptive-mutex", - "java_reverse_bytewise_non-direct_reused-64_non-adaptive-mutex", - "java_reverse_bytewise_non-direct_reused-64_thread-local", - "java_reverse_bytewise_direct_reused-64_adaptive-mutex", - "java_reverse_bytewise_direct_reused-64_non-adaptive-mutex", - "java_reverse_bytewise_direct_reused-64_thread-local", - "java_reverse_bytewise_non-direct_no-reuse", - "java_reverse_bytewise_direct_no-reuse" - }) - public String comparatorName; - - Path dbDir; - ComparatorOptions comparatorOptions; - AbstractComparator comparator; - Options options; - RocksDB db; - - @Setup(Level.Trial) - public void setup() throws IOException, RocksDBException { - RocksDB.loadLibrary(); - - dbDir = Files.createTempDirectory("rocksjava-comparator-benchmarks"); - - options = new Options() - .setCreateIfMissing(true); - - if ("native_bytewise".equals(comparatorName)) { - options.setComparator(BuiltinComparator.BYTEWISE_COMPARATOR); - - } else if ("native_reverse_bytewise".equals(comparatorName)) { - options.setComparator(BuiltinComparator.REVERSE_BYTEWISE_COMPARATOR); - - } else if (comparatorName.startsWith("java_")) { - comparatorOptions = new ComparatorOptions(); - - if (comparatorName.indexOf("non-direct") > -1) { - comparatorOptions.setUseDirectBuffer(false); - } else if (comparatorName.indexOf("direct") > -1) { - comparatorOptions.setUseDirectBuffer(true); - } - - if (comparatorName.indexOf("no-reuse") > -1) { - comparatorOptions.setMaxReusedBufferSize(-1); - } else if (comparatorName.indexOf("_reused-") > -1) { - final int idx = comparatorName.indexOf("_reused-"); - String s = comparatorName.substring(idx + 8); - s = s.substring(0, s.indexOf('_')); - comparatorOptions.setMaxReusedBufferSize(Integer.parseInt(s)); - } - - if (comparatorName.indexOf("non-adaptive-mutex") > -1) { - comparatorOptions.setReusedSynchronisationType(ReusedSynchronisationType.MUTEX); - } else if (comparatorName.indexOf("adaptive-mutex") > -1) { - comparatorOptions.setReusedSynchronisationType(ReusedSynchronisationType.ADAPTIVE_MUTEX); - } else if (comparatorName.indexOf("thread-local") > -1) { - comparatorOptions.setReusedSynchronisationType(ReusedSynchronisationType.THREAD_LOCAL); - } - - if (comparatorName.startsWith("java_bytewise")) { - comparator = new BytewiseComparator(comparatorOptions); - } else if (comparatorName.startsWith("java_reverse_bytewise")) { - comparator = new ReverseBytewiseComparator(comparatorOptions); - } - - options.setComparator(comparator); - - } else { - throw new IllegalArgumentException("Unknown comparatorName: " + comparatorName); - } - - db = RocksDB.open(options, dbDir.toAbsolutePath().toString()); - } - - @TearDown(Level.Trial) - public void cleanup() throws IOException { - db.close(); - if (comparator != null) { - comparator.close(); - } - if (comparatorOptions != null) { - comparatorOptions.close(); - } - options.close(); - FileUtils.delete(dbDir); - } - - @State(Scope.Benchmark) - public static class Counter { - private final AtomicInteger count = new AtomicInteger(); - - public int next() { - return count.getAndIncrement(); - } - } - - - @Benchmark - public void put(final Counter counter) throws RocksDBException { - final int i = counter.next(); - db.put(ba("key" + i), ba("value" + i)); - } -} diff --git a/java/jmh/src/main/java/org/rocksdb/jmh/GetBenchmarks.java b/java/jmh/src/main/java/org/rocksdb/jmh/GetBenchmarks.java deleted file mode 100644 index 1c4329b3a..000000000 --- a/java/jmh/src/main/java/org/rocksdb/jmh/GetBenchmarks.java +++ /dev/null @@ -1,215 +0,0 @@ -/** - * Copyright (c) 2011-present, Facebook, Inc. All rights reserved. - * This source code is licensed under both the GPLv2 (found in the - * COPYING file in the root directory) and Apache 2.0 License - * (found in the LICENSE.Apache file in the root directory). - */ -package org.rocksdb.jmh; - -import static org.rocksdb.util.KVUtils.ba; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; -import org.openjdk.jmh.annotations.*; -import org.rocksdb.*; -import org.rocksdb.util.FileUtils; - -@State(Scope.Benchmark) -public class GetBenchmarks { - - @Param({ - "no_column_family", - "1_column_family", - "20_column_families", - "100_column_families" - }) - String columnFamilyTestType; - - @Param({"1000", "100000"}) int keyCount; - - @Param({"12", "64", "128"}) int keySize; - - @Param({"64", "1024", "65536"}) int valueSize; - - Path dbDir; - DBOptions options; - ReadOptions readOptions; - int cfs = 0; // number of column families - private AtomicInteger cfHandlesIdx; - ColumnFamilyHandle[] cfHandles; - RocksDB db; - private final AtomicInteger keyIndex = new AtomicInteger(); - private ByteBuffer keyBuf; - private ByteBuffer valueBuf; - private byte[] keyArr; - private byte[] valueArr; - - @Setup(Level.Trial) - public void setup() throws IOException, RocksDBException { - RocksDB.loadLibrary(); - - dbDir = Files.createTempDirectory("rocksjava-get-benchmarks"); - - options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - readOptions = new ReadOptions(); - - final List cfDescriptors = new ArrayList<>(); - cfDescriptors.add(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY)); - - if ("1_column_family".equals(columnFamilyTestType)) { - cfs = 1; - } else if ("20_column_families".equals(columnFamilyTestType)) { - cfs = 20; - } else if ("100_column_families".equals(columnFamilyTestType)) { - cfs = 100; - } - - if (cfs > 0) { - cfHandlesIdx = new AtomicInteger(1); - for (int i = 1; i <= cfs; i++) { - cfDescriptors.add(new ColumnFamilyDescriptor(ba("cf" + i))); - } - } - - final List cfHandlesList = new ArrayList<>(cfDescriptors.size()); - db = RocksDB.open(options, dbDir.toAbsolutePath().toString(), cfDescriptors, cfHandlesList); - cfHandles = cfHandlesList.toArray(new ColumnFamilyHandle[0]); - - // store initial data for retrieving via get - keyArr = new byte[keySize]; - valueArr = new byte[valueSize]; - Arrays.fill(keyArr, (byte) 0x30); - Arrays.fill(valueArr, (byte) 0x30); - for (int i = 0; i <= cfs; i++) { - for (int j = 0; j < keyCount; j++) { - final byte[] keyPrefix = ba("key" + j); - final byte[] valuePrefix = ba("value" + j); - System.arraycopy(keyPrefix, 0, keyArr, 0, keyPrefix.length); - System.arraycopy(valuePrefix, 0, valueArr, 0, valuePrefix.length); - db.put(cfHandles[i], keyArr, valueArr); - } - } - - try (final FlushOptions flushOptions = new FlushOptions().setWaitForFlush(true)) { - db.flush(flushOptions); - } - - keyBuf = ByteBuffer.allocateDirect(keySize); - valueBuf = ByteBuffer.allocateDirect(valueSize); - Arrays.fill(keyArr, (byte) 0x30); - Arrays.fill(valueArr, (byte) 0x30); - keyBuf.put(keyArr); - keyBuf.flip(); - valueBuf.put(valueArr); - valueBuf.flip(); - } - - @TearDown(Level.Trial) - public void cleanup() throws IOException { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - db.close(); - options.close(); - readOptions.close(); - FileUtils.delete(dbDir); - } - - private ColumnFamilyHandle getColumnFamily() { - if (cfs == 0) { - return cfHandles[0]; - } else if (cfs == 1) { - return cfHandles[1]; - } else { - int idx = cfHandlesIdx.getAndIncrement(); - if (idx > cfs) { - cfHandlesIdx.set(1); // doesn't ensure a perfect distribution, but it's ok - idx = 0; - } - return cfHandles[idx]; - } - } - - /** - * Takes the next position in the index. - */ - private int next() { - int idx; - int nextIdx; - while (true) { - idx = keyIndex.get(); - nextIdx = idx + 1; - if (nextIdx >= keyCount) { - nextIdx = 0; - } - - if (keyIndex.compareAndSet(idx, nextIdx)) { - break; - } - } - return idx; - } - - // String -> byte[] - private byte[] getKeyArr() { - final int MAX_LEN = 9; // key100000 - final int keyIdx = next(); - final byte[] keyPrefix = ba("key" + keyIdx); - System.arraycopy(keyPrefix, 0, keyArr, 0, keyPrefix.length); - Arrays.fill(keyArr, keyPrefix.length, MAX_LEN, (byte) 0x30); - return keyArr; - } - - // String -> ByteBuffer - private ByteBuffer getKeyBuf() { - final int MAX_LEN = 9; // key100000 - final int keyIdx = next(); - final String keyStr = "key" + keyIdx; - for (int i = 0; i < keyStr.length(); ++i) { - keyBuf.put(i, (byte) keyStr.charAt(i)); - } - for (int i = keyStr.length(); i < MAX_LEN; ++i) { - keyBuf.put(i, (byte) 0x30); - } - // Reset position for future reading - keyBuf.position(0); - return keyBuf; - } - - private byte[] getValueArr() { - return valueArr; - } - - private ByteBuffer getValueBuf() { - return valueBuf; - } - - @Benchmark - public void get() throws RocksDBException { - db.get(getColumnFamily(), getKeyArr()); - } - - @Benchmark - public void preallocatedGet() throws RocksDBException { - db.get(getColumnFamily(), getKeyArr(), getValueArr()); - } - - @Benchmark - public void preallocatedByteBufferGet() throws RocksDBException { - int res = db.get(getColumnFamily(), readOptions, getKeyBuf(), getValueBuf()); - // For testing correctness: - // assert res > 0; - // final byte[] ret = new byte[valueSize]; - // valueBuf.get(ret); - // System.out.println(str(ret)); - // valueBuf.flip(); - } -} \ No newline at end of file diff --git a/java/jmh/src/main/java/org/rocksdb/jmh/MultiGetBenchmarks.java b/java/jmh/src/main/java/org/rocksdb/jmh/MultiGetBenchmarks.java deleted file mode 100644 index d37447716..000000000 --- a/java/jmh/src/main/java/org/rocksdb/jmh/MultiGetBenchmarks.java +++ /dev/null @@ -1,214 +0,0 @@ -/** - * Copyright (c) 2011-present, Facebook, Inc. All rights reserved. - * This source code is licensed under both the GPLv2 (found in the - * COPYING file in the root directory) and Apache 2.0 License - * (found in the LICENSE.Apache file in the root directory). - */ -package org.rocksdb.jmh; - -import static org.rocksdb.util.KVUtils.ba; -import static org.rocksdb.util.KVUtils.keys; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; -import org.openjdk.jmh.annotations.*; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.OptionsBuilder; -import org.rocksdb.*; -import org.rocksdb.util.FileUtils; - -@State(Scope.Thread) -public class MultiGetBenchmarks { - @Param({ - "no_column_family", - "1_column_family", - "20_column_families", - "100_column_families" - }) - String columnFamilyTestType; - - @Param({"10000", "25000", "100000"}) int keyCount; - - @Param({ - "10", - "100", - "1000", - "10000", - }) - int multiGetSize; - - @Param({"16", "64", "250", "1000", "4000", "16000"}) int valueSize; - @Param({"16"}) int keySize; // big enough - - Path dbDir; - DBOptions options; - int cfs = 0; // number of column families - private AtomicInteger cfHandlesIdx; - ColumnFamilyHandle[] cfHandles; - RocksDB db; - private final AtomicInteger keyIndex = new AtomicInteger(); - - @Setup(Level.Trial) - public void setup() throws IOException, RocksDBException { - RocksDB.loadLibrary(); - - dbDir = Files.createTempDirectory("rocksjava-multiget-benchmarks"); - - options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - - final List cfDescriptors = new ArrayList<>(); - cfDescriptors.add(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY)); - - if ("1_column_family".equals(columnFamilyTestType)) { - cfs = 1; - } else if ("20_column_families".equals(columnFamilyTestType)) { - cfs = 20; - } else if ("100_column_families".equals(columnFamilyTestType)) { - cfs = 100; - } - - if (cfs > 0) { - cfHandlesIdx = new AtomicInteger(1); - for (int i = 1; i <= cfs; i++) { - cfDescriptors.add(new ColumnFamilyDescriptor(ba("cf" + i))); - } - } - - final List cfHandlesList = new ArrayList<>(cfDescriptors.size()); - db = RocksDB.open(options, dbDir.toAbsolutePath().toString(), cfDescriptors, cfHandlesList); - cfHandles = cfHandlesList.toArray(new ColumnFamilyHandle[0]); - - // store initial data for retrieving via get - for (int i = 0; i < cfs; i++) { - for (int j = 0; j < keyCount; j++) { - final byte[] paddedValue = Arrays.copyOf(ba("value" + j), valueSize); - db.put(cfHandles[i], ba("key" + j), paddedValue); - } - } - - try (final FlushOptions flushOptions = new FlushOptions() - .setWaitForFlush(true)) { - db.flush(flushOptions); - } - } - - @TearDown(Level.Trial) - public void cleanup() throws IOException { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - db.close(); - options.close(); - FileUtils.delete(dbDir); - } - - private ColumnFamilyHandle getColumnFamily() { - if (cfs == 0) { - return cfHandles[0]; - } else if (cfs == 1) { - return cfHandles[1]; - } else { - int idx = cfHandlesIdx.getAndIncrement(); - if (idx > cfs) { - cfHandlesIdx.set(1); // doesn't ensure a perfect distribution, but it's ok - idx = 0; - } - return cfHandles[idx]; - } - } - - /** - * Reserves the next {@inc} positions in the index. - * - * @param inc the number by which to increment the index - * @param limit the limit for the index - * @return the index before {@code inc} is added - */ - private int next(final int inc, final int limit) { - int idx; - int nextIdx; - while (true) { - idx = keyIndex.get(); - nextIdx = idx + inc; - if (nextIdx >= limit) { - nextIdx = inc; - } - - if (keyIndex.compareAndSet(idx, nextIdx)) { - break; - } - } - - if (nextIdx >= limit) { - return -1; - } else { - return idx; - } - } - - ByteBuffer keysBuffer; - ByteBuffer valuesBuffer; - - List valueBuffersList; - List keyBuffersList; - - @Setup - public void allocateSliceBuffers() { - keysBuffer = ByteBuffer.allocateDirect(keyCount * valueSize); - valuesBuffer = ByteBuffer.allocateDirect(keyCount * valueSize); - valueBuffersList = new ArrayList<>(); - keyBuffersList = new ArrayList<>(); - for (int i = 0; i < keyCount; i++) { - valueBuffersList.add(valuesBuffer.slice()); - valuesBuffer.position(i * valueSize); - keyBuffersList.add(keysBuffer.slice()); - keysBuffer.position(i * keySize); - } - } - - @TearDown - public void freeSliceBuffers() { - valueBuffersList.clear(); - } - - @Benchmark - public List multiGet10() throws RocksDBException { - final int fromKeyIdx = next(multiGetSize, keyCount); - if (fromKeyIdx >= 0) { - final List keys = keys(fromKeyIdx, fromKeyIdx + multiGetSize); - final List valueResults = db.multiGetAsList(keys); - for (final byte[] result : valueResults) { - if (result.length != valueSize) - throw new RuntimeException("Test valueSize assumption wrong"); - } - } - return new ArrayList<>(); - } - - public static void main(final String[] args) throws RunnerException { - final org.openjdk.jmh.runner.options.Options opt = - new OptionsBuilder() - .include(MultiGetBenchmarks.class.getSimpleName()) - .forks(1) - .jvmArgs("-ea") - .warmupIterations(1) - .measurementIterations(2) - .forks(2) - .param("columnFamilyTestType=", "1_column_family") - .param("multiGetSize=", "10", "1000") - .param("keyCount=", "1000") - .output("jmh_output") - .build(); - - new Runner(opt).run(); - } -} diff --git a/java/jmh/src/main/java/org/rocksdb/jmh/PutBenchmarks.java b/java/jmh/src/main/java/org/rocksdb/jmh/PutBenchmarks.java deleted file mode 100644 index 5aae21cb9..000000000 --- a/java/jmh/src/main/java/org/rocksdb/jmh/PutBenchmarks.java +++ /dev/null @@ -1,112 +0,0 @@ -/** - * Copyright (c) 2011-present, Facebook, Inc. All rights reserved. - * This source code is licensed under both the GPLv2 (found in the - * COPYING file in the root directory) and Apache 2.0 License - * (found in the LICENSE.Apache file in the root directory). - */ -package org.rocksdb.jmh; - -import org.openjdk.jmh.annotations.*; -import org.rocksdb.*; -import org.rocksdb.util.FileUtils; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; - -import static org.rocksdb.util.KVUtils.ba; - -@State(Scope.Benchmark) -public class PutBenchmarks { - - @Param({ - "no_column_family", - "1_column_family", - "20_column_families", - "100_column_families" - }) - String columnFamilyTestType; - - Path dbDir; - DBOptions options; - int cfs = 0; // number of column families - private AtomicInteger cfHandlesIdx; - ColumnFamilyHandle[] cfHandles; - RocksDB db; - - @Setup(Level.Trial) - public void setup() throws IOException, RocksDBException { - RocksDB.loadLibrary(); - - dbDir = Files.createTempDirectory("rocksjava-put-benchmarks"); - - options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - - final List cfDescriptors = new ArrayList<>(); - cfDescriptors.add(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY)); - - if ("1_column_family".equals(columnFamilyTestType)) { - cfs = 1; - } else if ("20_column_families".equals(columnFamilyTestType)) { - cfs = 20; - } else if ("100_column_families".equals(columnFamilyTestType)) { - cfs = 100; - } - - if (cfs > 0) { - cfHandlesIdx = new AtomicInteger(1); - for (int i = 1; i <= cfs; i++) { - cfDescriptors.add(new ColumnFamilyDescriptor(ba("cf" + i))); - } - } - - final List cfHandlesList = new ArrayList<>(cfDescriptors.size()); - db = RocksDB.open(options, dbDir.toAbsolutePath().toString(), cfDescriptors, cfHandlesList); - cfHandles = cfHandlesList.toArray(new ColumnFamilyHandle[0]); - } - - @TearDown(Level.Trial) - public void cleanup() throws IOException { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - db.close(); - options.close(); - FileUtils.delete(dbDir); - } - - private ColumnFamilyHandle getColumnFamily() { - if (cfs == 0) { - return cfHandles[0]; - } else if (cfs == 1) { - return cfHandles[1]; - } else { - int idx = cfHandlesIdx.getAndIncrement(); - if (idx > cfs) { - cfHandlesIdx.set(1); // doesn't ensure a perfect distribution, but it's ok - idx = 0; - } - return cfHandles[idx]; - } - } - - @State(Scope.Benchmark) - public static class Counter { - private final AtomicInteger count = new AtomicInteger(); - - public int next() { - return count.getAndIncrement(); - } - } - - @Benchmark - public void put(final ComparatorBenchmarks.Counter counter) throws RocksDBException { - final int i = counter.next(); - db.put(getColumnFamily(), ba("key" + i), ba("value" + i)); - } -} diff --git a/java/jmh/src/main/java/org/rocksdb/util/FileUtils.java b/java/jmh/src/main/java/org/rocksdb/util/FileUtils.java deleted file mode 100644 index 63744a14f..000000000 --- a/java/jmh/src/main/java/org/rocksdb/util/FileUtils.java +++ /dev/null @@ -1,59 +0,0 @@ -/** - * Copyright (c) 2011-present, Facebook, Inc. All rights reserved. - * This source code is licensed under both the GPLv2 (found in the - * COPYING file in the root directory) and Apache 2.0 License - * (found in the LICENSE.Apache file in the root directory). - */ -package org.rocksdb.util; - -import java.io.IOException; -import java.nio.file.FileVisitResult; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.SimpleFileVisitor; -import java.nio.file.attribute.BasicFileAttributes; - -public final class FileUtils { - private static final SimpleFileVisitor DELETE_DIR_VISITOR = new DeleteDirVisitor(); - - /** - * Deletes a path from the filesystem - * - * If the path is a directory its contents - * will be recursively deleted before it itself - * is deleted. - * - * Note that removal of a directory is not an atomic-operation - * and so if an error occurs during removal, some of the directories - * descendants may have already been removed - * - * @param path the path to delete. - * - * @throws IOException if an error occurs whilst removing a file or directory - */ - public static void delete(final Path path) throws IOException { - if (!Files.isDirectory(path)) { - Files.deleteIfExists(path); - } else { - Files.walkFileTree(path, DELETE_DIR_VISITOR); - } - } - - private static class DeleteDirVisitor extends SimpleFileVisitor { - @Override - public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) throws IOException { - Files.deleteIfExists(file); - return FileVisitResult.CONTINUE; - } - - @Override - public FileVisitResult postVisitDirectory(final Path dir, final IOException exc) throws IOException { - if (exc != null) { - throw exc; - } - - Files.deleteIfExists(dir); - return FileVisitResult.CONTINUE; - } - } -} diff --git a/java/jmh/src/main/java/org/rocksdb/util/KVUtils.java b/java/jmh/src/main/java/org/rocksdb/util/KVUtils.java deleted file mode 100644 index 5077291c8..000000000 --- a/java/jmh/src/main/java/org/rocksdb/util/KVUtils.java +++ /dev/null @@ -1,72 +0,0 @@ -/** - * Copyright (c) 2011-present, Facebook, Inc. All rights reserved. - * This source code is licensed under both the GPLv2 (found in the - * COPYING file in the root directory) and Apache 2.0 License - * (found in the LICENSE.Apache file in the root directory). - */ -package org.rocksdb.util; - -import static java.nio.charset.StandardCharsets.UTF_8; - -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; - -public final class KVUtils { - - /** - * Get a byte array from a string. - * - * Assumes UTF-8 encoding - * - * @param string the string - * - * @return the bytes. - */ - public static byte[] ba(final String string) { - return string.getBytes(UTF_8); - } - - /** - * Get a string from a byte array. - * - * Assumes UTF-8 encoding - * - * @param bytes the bytes - * - * @return the string. - */ - public static String str(final byte[] bytes) { - return new String(bytes, UTF_8); - } - - /** - * Get a list of keys where the keys are named key1..key1+N - * in the range of {@code from} to {@code to} i.e. keyFrom..keyTo. - * - * @param from the first key - * @param to the last key - * - * @return the array of keys - */ - public static List keys(final int from, final int to) { - final List keys = new ArrayList<>(to - from); - for (int i = from; i < to; i++) { - keys.add(ba("key" + i)); - } - return keys; - } - - public static List keys( - final List keyBuffers, final int from, final int to) { - final List keys = new ArrayList<>(to - from); - for (int i = from; i < to; i++) { - final ByteBuffer key = keyBuffers.get(i); - key.clear(); - key.put(ba("key" + i)); - key.flip(); - keys.add(key); - } - return keys; - } -} diff --git a/java/pom.xml.template b/java/pom.xml.template deleted file mode 100644 index 8a1981c66..000000000 --- a/java/pom.xml.template +++ /dev/null @@ -1,178 +0,0 @@ - - - 4.0.0 - - org.rocksdb - rocksdbjni - ${ROCKSDB_JAVA_VERSION} - - RocksDB JNI - RocksDB fat jar that contains .so files for linux32 and linux64 (glibc and musl-libc), jnilib files - for Mac OSX, and a .dll for Windows x64. - - https://rocksdb.org - 2012 - - - - Apache License 2.0 - http://www.apache.org/licenses/LICENSE-2.0.html - repo - - - GNU General Public License, version 2 - http://www.gnu.org/licenses/gpl-2.0.html - repo - - - - - scm:git:https://github.com/facebook/rocksdb.git - scm:git:https://github.com/facebook/rocksdb.git - scm:git:https://github.com/facebook/rocksdb.git - - - - Facebook - https://www.facebook.com - - - - - Facebook - help@facebook.com - America/New_York - - architect - - - - - - - rocksdb - Google Groups - rocksdb-subscribe@googlegroups.com - rocksdb-unsubscribe@googlegroups.com - rocksdb@googlegroups.com - https://groups.google.com/forum/#!forum/rocksdb - - - - - 1.8 - 1.8 - UTF-8 - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.2 - - ${project.build.source} - ${project.build.target} - ${project.build.sourceEncoding} - - - - org.apache.maven.plugins - maven-surefire-plugin - 2.18.1 - - ${argLine} -ea -Xcheck:jni -Djava.library.path=${project.build.directory} - false - false - - ${project.build.directory}/* - - - - - org.jacoco - jacoco-maven-plugin - 0.7.2.201409121644 - - - - prepare-agent - - - - report - prepare-package - - report - - - - - - org.codehaus.gmaven - groovy-maven-plugin - 2.0 - - - process-classes - - execute - - - - Xenu - - - String fileContents = new File(project.basedir.absolutePath + '/../include/rocksdb/version.h').getText('UTF-8') - matcher = (fileContents =~ /(?s).*ROCKSDB_MAJOR ([0-9]+).*?/) - String major_version = matcher.getAt(0).getAt(1) - matcher = (fileContents =~ /(?s).*ROCKSDB_MINOR ([0-9]+).*?/) - String minor_version = matcher.getAt(0).getAt(1) - matcher = (fileContents =~ /(?s).*ROCKSDB_PATCH ([0-9]+).*?/) - String patch_version = matcher.getAt(0).getAt(1) - String version = String.format('%s.%s.%s', major_version, minor_version, patch_version) - // Set version to be used in pom.properties - project.version = version - // Set version to be set as jar name - project.build.finalName = project.artifactId + "-" + version - - - - - - - - - - - junit - junit - 4.13.1 - test - - - org.hamcrest - hamcrest - 2.2 - test - - - cglib - cglib - 3.3.0 - test - - - org.assertj - assertj-core - 2.9.0 - test - - - org.mockito - mockito-all - 1.10.19 - test - - - diff --git a/java/rocksjni/backup_engine_options.cc b/java/rocksjni/backup_engine_options.cc deleted file mode 100644 index 25bfb6720..000000000 --- a/java/rocksjni/backup_engine_options.cc +++ /dev/null @@ -1,365 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling c++ ROCKSDB_NAMESPACE::BackupEnginge and -// ROCKSDB_NAMESPACE::BackupEngineOptions methods from Java side. - -#include -#include -#include - -#include -#include - -#include "include/org_rocksdb_BackupEngineOptions.h" -#include "rocksdb/utilities/backup_engine.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/////////////////////////////////////////////////////////////////////////// -// BackupDBOptions - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: newBackupEngineOptions - * Signature: (Ljava/lang/String;)J - */ -jlong Java_org_rocksdb_BackupEngineOptions_newBackupEngineOptions( - JNIEnv* env, jclass /*jcls*/, jstring jpath) { - const char* cpath = env->GetStringUTFChars(jpath, nullptr); - if (cpath == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - auto* bopt = new ROCKSDB_NAMESPACE::BackupEngineOptions(cpath); - env->ReleaseStringUTFChars(jpath, cpath); - return GET_CPLUSPLUS_POINTER(bopt); -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: backupDir - * Signature: (J)Ljava/lang/String; - */ -jstring Java_org_rocksdb_BackupEngineOptions_backupDir(JNIEnv* env, - jobject /*jopt*/, - jlong jhandle) { - auto* bopt = - reinterpret_cast(jhandle); - return env->NewStringUTF(bopt->backup_dir.c_str()); -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: setBackupEnv - * Signature: (JJ)V - */ -void Java_org_rocksdb_BackupEngineOptions_setBackupEnv( - JNIEnv* /*env*/, jobject /*jopt*/, jlong jhandle, jlong jrocks_env_handle) { - auto* bopt = - reinterpret_cast(jhandle); - auto* rocks_env = - reinterpret_cast(jrocks_env_handle); - bopt->backup_env = rocks_env; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: setShareTableFiles - * Signature: (JZ)V - */ -void Java_org_rocksdb_BackupEngineOptions_setShareTableFiles(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jboolean flag) { - auto* bopt = - reinterpret_cast(jhandle); - bopt->share_table_files = flag; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: shareTableFiles - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_BackupEngineOptions_shareTableFiles(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* bopt = - reinterpret_cast(jhandle); - return bopt->share_table_files; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: setInfoLog - * Signature: (JJ)V - */ -void Java_org_rocksdb_BackupEngineOptions_setInfoLog(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jlong /*jlogger_handle*/) { - auto* bopt = - reinterpret_cast(jhandle); - auto* sptr_logger = - reinterpret_cast*>( - jhandle); - bopt->info_log = sptr_logger->get(); -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: setSync - * Signature: (JZ)V - */ -void Java_org_rocksdb_BackupEngineOptions_setSync(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jboolean flag) { - auto* bopt = - reinterpret_cast(jhandle); - bopt->sync = flag; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: sync - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_BackupEngineOptions_sync(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* bopt = - reinterpret_cast(jhandle); - return bopt->sync; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: setDestroyOldData - * Signature: (JZ)V - */ -void Java_org_rocksdb_BackupEngineOptions_setDestroyOldData(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jboolean flag) { - auto* bopt = - reinterpret_cast(jhandle); - bopt->destroy_old_data = flag; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: destroyOldData - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_BackupEngineOptions_destroyOldData(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* bopt = - reinterpret_cast(jhandle); - return bopt->destroy_old_data; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: setBackupLogFiles - * Signature: (JZ)V - */ -void Java_org_rocksdb_BackupEngineOptions_setBackupLogFiles(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jboolean flag) { - auto* bopt = - reinterpret_cast(jhandle); - bopt->backup_log_files = flag; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: backupLogFiles - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_BackupEngineOptions_backupLogFiles(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* bopt = - reinterpret_cast(jhandle); - return bopt->backup_log_files; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: setBackupRateLimit - * Signature: (JJ)V - */ -void Java_org_rocksdb_BackupEngineOptions_setBackupRateLimit( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jbackup_rate_limit) { - auto* bopt = - reinterpret_cast(jhandle); - bopt->backup_rate_limit = jbackup_rate_limit; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: backupRateLimit - * Signature: (J)J - */ -jlong Java_org_rocksdb_BackupEngineOptions_backupRateLimit(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* bopt = - reinterpret_cast(jhandle); - return bopt->backup_rate_limit; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: setBackupRateLimiter - * Signature: (JJ)V - */ -void Java_org_rocksdb_BackupEngineOptions_setBackupRateLimiter( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jrate_limiter_handle) { - auto* bopt = - reinterpret_cast(jhandle); - auto* sptr_rate_limiter = - reinterpret_cast*>( - jrate_limiter_handle); - bopt->backup_rate_limiter = *sptr_rate_limiter; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: setRestoreRateLimit - * Signature: (JJ)V - */ -void Java_org_rocksdb_BackupEngineOptions_setRestoreRateLimit( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jrestore_rate_limit) { - auto* bopt = - reinterpret_cast(jhandle); - bopt->restore_rate_limit = jrestore_rate_limit; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: restoreRateLimit - * Signature: (J)J - */ -jlong Java_org_rocksdb_BackupEngineOptions_restoreRateLimit(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* bopt = - reinterpret_cast(jhandle); - return bopt->restore_rate_limit; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: setRestoreRateLimiter - * Signature: (JJ)V - */ -void Java_org_rocksdb_BackupEngineOptions_setRestoreRateLimiter( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jrate_limiter_handle) { - auto* bopt = - reinterpret_cast(jhandle); - auto* sptr_rate_limiter = - reinterpret_cast*>( - jrate_limiter_handle); - bopt->restore_rate_limiter = *sptr_rate_limiter; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: setShareFilesWithChecksum - * Signature: (JZ)V - */ -void Java_org_rocksdb_BackupEngineOptions_setShareFilesWithChecksum( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean flag) { - auto* bopt = - reinterpret_cast(jhandle); - bopt->share_files_with_checksum = flag; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: shareFilesWithChecksum - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_BackupEngineOptions_shareFilesWithChecksum( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - auto* bopt = - reinterpret_cast(jhandle); - return bopt->share_files_with_checksum; -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: setMaxBackgroundOperations - * Signature: (JI)V - */ -void Java_org_rocksdb_BackupEngineOptions_setMaxBackgroundOperations( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jint max_background_operations) { - auto* bopt = - reinterpret_cast(jhandle); - bopt->max_background_operations = static_cast(max_background_operations); -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: maxBackgroundOperations - * Signature: (J)I - */ -jint Java_org_rocksdb_BackupEngineOptions_maxBackgroundOperations( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - auto* bopt = - reinterpret_cast(jhandle); - return static_cast(bopt->max_background_operations); -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: setCallbackTriggerIntervalSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_BackupEngineOptions_setCallbackTriggerIntervalSize( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jcallback_trigger_interval_size) { - auto* bopt = - reinterpret_cast(jhandle); - bopt->callback_trigger_interval_size = - static_cast(jcallback_trigger_interval_size); -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: callbackTriggerIntervalSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_BackupEngineOptions_callbackTriggerIntervalSize( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - auto* bopt = - reinterpret_cast(jhandle); - return static_cast(bopt->callback_trigger_interval_size); -} - -/* - * Class: org_rocksdb_BackupEngineOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_BackupEngineOptions_disposeInternal(JNIEnv* /*env*/, - jobject /*jopt*/, - jlong jhandle) { - auto* bopt = - reinterpret_cast(jhandle); - assert(bopt != nullptr); - delete bopt; -} diff --git a/java/rocksjni/backupenginejni.cc b/java/rocksjni/backupenginejni.cc deleted file mode 100644 index 1ba7ea286..000000000 --- a/java/rocksjni/backupenginejni.cc +++ /dev/null @@ -1,279 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling C++ ROCKSDB_NAMESPACE::BackupEngine methods from the Java side. - -#include - -#include - -#include "include/org_rocksdb_BackupEngine.h" -#include "rocksdb/utilities/backup_engine.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_BackupEngine - * Method: open - * Signature: (JJ)J - */ -jlong Java_org_rocksdb_BackupEngine_open(JNIEnv* env, jclass /*jcls*/, - jlong env_handle, - jlong backup_engine_options_handle) { - auto* rocks_env = reinterpret_cast(env_handle); - auto* backup_engine_options = - reinterpret_cast( - backup_engine_options_handle); - ROCKSDB_NAMESPACE::BackupEngine* backup_engine; - auto status = ROCKSDB_NAMESPACE::BackupEngine::Open( - rocks_env, *backup_engine_options, &backup_engine); - - if (status.ok()) { - return GET_CPLUSPLUS_POINTER(backup_engine); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - return 0; - } -} - -/* - * Class: org_rocksdb_BackupEngine - * Method: createNewBackup - * Signature: (JJZ)V - */ -void Java_org_rocksdb_BackupEngine_createNewBackup( - JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jlong db_handle, - jboolean jflush_before_backup) { - auto* db = reinterpret_cast(db_handle); - auto* backup_engine = - reinterpret_cast(jbe_handle); - auto status = backup_engine->CreateNewBackup( - db, static_cast(jflush_before_backup)); - - if (status.ok()) { - return; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); -} - -/* - * Class: org_rocksdb_BackupEngine - * Method: createNewBackupWithMetadata - * Signature: (JJLjava/lang/String;Z)V - */ -void Java_org_rocksdb_BackupEngine_createNewBackupWithMetadata( - JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jlong db_handle, - jstring japp_metadata, jboolean jflush_before_backup) { - auto* db = reinterpret_cast(db_handle); - auto* backup_engine = - reinterpret_cast(jbe_handle); - - jboolean has_exception = JNI_FALSE; - std::string app_metadata = ROCKSDB_NAMESPACE::JniUtil::copyStdString( - env, japp_metadata, &has_exception); - if (has_exception == JNI_TRUE) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, "Could not copy jstring to std::string"); - return; - } - - auto status = backup_engine->CreateNewBackupWithMetadata( - db, app_metadata, static_cast(jflush_before_backup)); - - if (status.ok()) { - return; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); -} - -/* - * Class: org_rocksdb_BackupEngine - * Method: getBackupInfo - * Signature: (J)Ljava/util/List; - */ -jobject Java_org_rocksdb_BackupEngine_getBackupInfo(JNIEnv* env, - jobject /*jbe*/, - jlong jbe_handle) { - auto* backup_engine = - reinterpret_cast(jbe_handle); - std::vector backup_infos; - backup_engine->GetBackupInfo(&backup_infos); - return ROCKSDB_NAMESPACE::BackupInfoListJni::getBackupInfo(env, backup_infos); -} - -/* - * Class: org_rocksdb_BackupEngine - * Method: getCorruptedBackups - * Signature: (J)[I - */ -jintArray Java_org_rocksdb_BackupEngine_getCorruptedBackups(JNIEnv* env, - jobject /*jbe*/, - jlong jbe_handle) { - auto* backup_engine = - reinterpret_cast(jbe_handle); - std::vector backup_ids; - backup_engine->GetCorruptedBackups(&backup_ids); - // store backupids in int array - std::vector int_backup_ids(backup_ids.begin(), backup_ids.end()); - - // Store ints in java array - // Its ok to loose precision here (64->32) - jsize ret_backup_ids_size = static_cast(backup_ids.size()); - jintArray ret_backup_ids = env->NewIntArray(ret_backup_ids_size); - if (ret_backup_ids == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - env->SetIntArrayRegion(ret_backup_ids, 0, ret_backup_ids_size, - int_backup_ids.data()); - return ret_backup_ids; -} - -/* - * Class: org_rocksdb_BackupEngine - * Method: garbageCollect - * Signature: (J)V - */ -void Java_org_rocksdb_BackupEngine_garbageCollect(JNIEnv* env, jobject /*jbe*/, - jlong jbe_handle) { - auto* backup_engine = - reinterpret_cast(jbe_handle); - auto status = backup_engine->GarbageCollect(); - - if (status.ok()) { - return; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); -} - -/* - * Class: org_rocksdb_BackupEngine - * Method: purgeOldBackups - * Signature: (JI)V - */ -void Java_org_rocksdb_BackupEngine_purgeOldBackups(JNIEnv* env, jobject /*jbe*/, - jlong jbe_handle, - jint jnum_backups_to_keep) { - auto* backup_engine = - reinterpret_cast(jbe_handle); - auto status = backup_engine->PurgeOldBackups( - static_cast(jnum_backups_to_keep)); - - if (status.ok()) { - return; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); -} - -/* - * Class: org_rocksdb_BackupEngine - * Method: deleteBackup - * Signature: (JI)V - */ -void Java_org_rocksdb_BackupEngine_deleteBackup(JNIEnv* env, jobject /*jbe*/, - jlong jbe_handle, - jint jbackup_id) { - auto* backup_engine = - reinterpret_cast(jbe_handle); - auto status = backup_engine->DeleteBackup( - static_cast(jbackup_id)); - - if (status.ok()) { - return; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); -} - -/* - * Class: org_rocksdb_BackupEngine - * Method: restoreDbFromBackup - * Signature: (JILjava/lang/String;Ljava/lang/String;J)V - */ -void Java_org_rocksdb_BackupEngine_restoreDbFromBackup( - JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jint jbackup_id, - jstring jdb_dir, jstring jwal_dir, jlong jrestore_options_handle) { - auto* backup_engine = - reinterpret_cast(jbe_handle); - const char* db_dir = env->GetStringUTFChars(jdb_dir, nullptr); - if (db_dir == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - const char* wal_dir = env->GetStringUTFChars(jwal_dir, nullptr); - if (wal_dir == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseStringUTFChars(jdb_dir, db_dir); - return; - } - auto* restore_options = reinterpret_cast( - jrestore_options_handle); - auto status = backup_engine->RestoreDBFromBackup( - static_cast(jbackup_id), db_dir, wal_dir, - *restore_options); - - env->ReleaseStringUTFChars(jwal_dir, wal_dir); - env->ReleaseStringUTFChars(jdb_dir, db_dir); - - if (status.ok()) { - return; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); -} - -/* - * Class: org_rocksdb_BackupEngine - * Method: restoreDbFromLatestBackup - * Signature: (JLjava/lang/String;Ljava/lang/String;J)V - */ -void Java_org_rocksdb_BackupEngine_restoreDbFromLatestBackup( - JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jstring jdb_dir, - jstring jwal_dir, jlong jrestore_options_handle) { - auto* backup_engine = - reinterpret_cast(jbe_handle); - const char* db_dir = env->GetStringUTFChars(jdb_dir, nullptr); - if (db_dir == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - const char* wal_dir = env->GetStringUTFChars(jwal_dir, nullptr); - if (wal_dir == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseStringUTFChars(jdb_dir, db_dir); - return; - } - auto* restore_options = reinterpret_cast( - jrestore_options_handle); - auto status = backup_engine->RestoreDBFromLatestBackup(db_dir, wal_dir, - *restore_options); - - env->ReleaseStringUTFChars(jwal_dir, wal_dir); - env->ReleaseStringUTFChars(jdb_dir, db_dir); - - if (status.ok()) { - return; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); -} - -/* - * Class: org_rocksdb_BackupEngine - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_BackupEngine_disposeInternal(JNIEnv* /*env*/, - jobject /*jbe*/, - jlong jbe_handle) { - auto* be = reinterpret_cast(jbe_handle); - assert(be != nullptr); - delete be; -} diff --git a/java/rocksjni/cache.cc b/java/rocksjni/cache.cc deleted file mode 100644 index 5ca1d5175..000000000 --- a/java/rocksjni/cache.cc +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::Cache. - -#include - -#include "include/org_rocksdb_Cache.h" -#include "rocksdb/advanced_cache.h" - -/* - * Class: org_rocksdb_Cache - * Method: getUsage - * Signature: (J)J - */ -jlong Java_org_rocksdb_Cache_getUsage(JNIEnv*, jclass, jlong jhandle) { - auto* sptr_cache = - reinterpret_cast*>(jhandle); - return static_cast(sptr_cache->get()->GetUsage()); -} - -/* - * Class: org_rocksdb_Cache - * Method: getPinnedUsage - * Signature: (J)J - */ -jlong Java_org_rocksdb_Cache_getPinnedUsage(JNIEnv*, jclass, jlong jhandle) { - auto* sptr_cache = - reinterpret_cast*>(jhandle); - return static_cast(sptr_cache->get()->GetPinnedUsage()); -} diff --git a/java/rocksjni/cassandra_compactionfilterjni.cc b/java/rocksjni/cassandra_compactionfilterjni.cc deleted file mode 100644 index 25817aeca..000000000 --- a/java/rocksjni/cassandra_compactionfilterjni.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include - -#include "include/org_rocksdb_CassandraCompactionFilter.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "utilities/cassandra/cassandra_compaction_filter.h" - -/* - * Class: org_rocksdb_CassandraCompactionFilter - * Method: createNewCassandraCompactionFilter0 - * Signature: (ZI)J - */ -jlong Java_org_rocksdb_CassandraCompactionFilter_createNewCassandraCompactionFilter0( - JNIEnv* /*env*/, jclass /*jcls*/, jboolean purge_ttl_on_expiration, - jint gc_grace_period_in_seconds) { - auto* compaction_filter = - new ROCKSDB_NAMESPACE::cassandra::CassandraCompactionFilter( - purge_ttl_on_expiration, gc_grace_period_in_seconds); - // set the native handle to our native compaction filter - return GET_CPLUSPLUS_POINTER(compaction_filter); -} diff --git a/java/rocksjni/cassandra_value_operator.cc b/java/rocksjni/cassandra_value_operator.cc deleted file mode 100644 index 6de28c1b1..000000000 --- a/java/rocksjni/cassandra_value_operator.cc +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) 2017-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include -#include -#include - -#include -#include - -#include "include/org_rocksdb_CassandraValueMergeOperator.h" -#include "rocksdb/db.h" -#include "rocksdb/memtablerep.h" -#include "rocksdb/merge_operator.h" -#include "rocksdb/options.h" -#include "rocksdb/slice_transform.h" -#include "rocksdb/statistics.h" -#include "rocksdb/table.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" -#include "utilities/cassandra/merge_operator.h" - -/* - * Class: org_rocksdb_CassandraValueMergeOperator - * Method: newSharedCassandraValueMergeOperator - * Signature: (II)J - */ -jlong Java_org_rocksdb_CassandraValueMergeOperator_newSharedCassandraValueMergeOperator( - JNIEnv* /*env*/, jclass /*jclazz*/, jint gcGracePeriodInSeconds, - jint operands_limit) { - auto* op = new std::shared_ptr( - new ROCKSDB_NAMESPACE::cassandra::CassandraValueMergeOperator( - gcGracePeriodInSeconds, operands_limit)); - return GET_CPLUSPLUS_POINTER(op); -} - -/* - * Class: org_rocksdb_CassandraValueMergeOperator - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_CassandraValueMergeOperator_disposeInternal( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - auto* op = - reinterpret_cast*>( - jhandle); - delete op; -} diff --git a/java/rocksjni/checkpoint.cc b/java/rocksjni/checkpoint.cc deleted file mode 100644 index d7cfd813b..000000000 --- a/java/rocksjni/checkpoint.cc +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling c++ ROCKSDB_NAMESPACE::Checkpoint methods from Java side. - -#include "rocksdb/utilities/checkpoint.h" - -#include -#include -#include - -#include - -#include "include/org_rocksdb_Checkpoint.h" -#include "rocksdb/db.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" -/* - * Class: org_rocksdb_Checkpoint - * Method: newCheckpoint - * Signature: (J)J - */ -jlong Java_org_rocksdb_Checkpoint_newCheckpoint(JNIEnv* /*env*/, - jclass /*jclazz*/, - jlong jdb_handle) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::Checkpoint* checkpoint; - ROCKSDB_NAMESPACE::Checkpoint::Create(db, &checkpoint); - return GET_CPLUSPLUS_POINTER(checkpoint); -} - -/* - * Class: org_rocksdb_Checkpoint - * Method: dispose - * Signature: (J)V - */ -void Java_org_rocksdb_Checkpoint_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* checkpoint = reinterpret_cast(jhandle); - assert(checkpoint != nullptr); - delete checkpoint; -} - -/* - * Class: org_rocksdb_Checkpoint - * Method: createCheckpoint - * Signature: (JLjava/lang/String;)V - */ -void Java_org_rocksdb_Checkpoint_createCheckpoint(JNIEnv* env, jobject /*jobj*/, - jlong jcheckpoint_handle, - jstring jcheckpoint_path) { - const char* checkpoint_path = env->GetStringUTFChars(jcheckpoint_path, 0); - if (checkpoint_path == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - auto* checkpoint = - reinterpret_cast(jcheckpoint_handle); - ROCKSDB_NAMESPACE::Status s = checkpoint->CreateCheckpoint(checkpoint_path); - - env->ReleaseStringUTFChars(jcheckpoint_path, checkpoint_path); - - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} diff --git a/java/rocksjni/clock_cache.cc b/java/rocksjni/clock_cache.cc deleted file mode 100644 index e04991aa9..000000000 --- a/java/rocksjni/clock_cache.cc +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::ClockCache. - -#include "cache/clock_cache.h" - -#include - -#include "include/org_rocksdb_ClockCache.h" -#include "rocksjni/cplusplus_to_java_convert.h" - -/* - * Class: org_rocksdb_ClockCache - * Method: newClockCache - * Signature: (JIZ)J - */ -jlong Java_org_rocksdb_ClockCache_newClockCache( - JNIEnv* /*env*/, jclass /*jcls*/, jlong jcapacity, jint jnum_shard_bits, - jboolean jstrict_capacity_limit) { - auto* sptr_clock_cache = new std::shared_ptr( - ROCKSDB_NAMESPACE::NewClockCache( - static_cast(jcapacity), static_cast(jnum_shard_bits), - static_cast(jstrict_capacity_limit))); - return GET_CPLUSPLUS_POINTER(sptr_clock_cache); -} - -/* - * Class: org_rocksdb_ClockCache - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_ClockCache_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* sptr_clock_cache = - reinterpret_cast*>(jhandle); - delete sptr_clock_cache; // delete std::shared_ptr -} diff --git a/java/rocksjni/columnfamilyhandle.cc b/java/rocksjni/columnfamilyhandle.cc deleted file mode 100644 index 4140580f0..000000000 --- a/java/rocksjni/columnfamilyhandle.cc +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::ColumnFamilyHandle. - -#include -#include -#include - -#include "include/org_rocksdb_ColumnFamilyHandle.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_ColumnFamilyHandle - * Method: getName - * Signature: (J)[B - */ -jbyteArray Java_org_rocksdb_ColumnFamilyHandle_getName(JNIEnv* env, - jobject /*jobj*/, - jlong jhandle) { - auto* cfh = reinterpret_cast(jhandle); - std::string cf_name = cfh->GetName(); - return ROCKSDB_NAMESPACE::JniUtil::copyBytes(env, cf_name); -} - -/* - * Class: org_rocksdb_ColumnFamilyHandle - * Method: getID - * Signature: (J)I - */ -jint Java_org_rocksdb_ColumnFamilyHandle_getID(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* cfh = reinterpret_cast(jhandle); - const int32_t id = cfh->GetID(); - return static_cast(id); -} - -/* - * Class: org_rocksdb_ColumnFamilyHandle - * Method: getDescriptor - * Signature: (J)Lorg/rocksdb/ColumnFamilyDescriptor; - */ -jobject Java_org_rocksdb_ColumnFamilyHandle_getDescriptor(JNIEnv* env, - jobject /*jobj*/, - jlong jhandle) { - auto* cfh = reinterpret_cast(jhandle); - ROCKSDB_NAMESPACE::ColumnFamilyDescriptor desc; - ROCKSDB_NAMESPACE::Status s = cfh->GetDescriptor(&desc); - if (s.ok()) { - return ROCKSDB_NAMESPACE::ColumnFamilyDescriptorJni::construct(env, &desc); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } -} - -/* - * Class: org_rocksdb_ColumnFamilyHandle - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_ColumnFamilyHandle_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* cfh = reinterpret_cast(jhandle); - assert(cfh != nullptr); - delete cfh; -} diff --git a/java/rocksjni/compact_range_options.cc b/java/rocksjni/compact_range_options.cc deleted file mode 100644 index 77fbb8890..000000000 --- a/java/rocksjni/compact_range_options.cc +++ /dev/null @@ -1,222 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::CompactRangeOptions. - -#include - -#include "include/org_rocksdb_CompactRangeOptions.h" -#include "rocksdb/options.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: newCompactRangeOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_CompactRangeOptions_newCompactRangeOptions( - JNIEnv* /*env*/, jclass /*jclazz*/) { - auto* options = new ROCKSDB_NAMESPACE::CompactRangeOptions(); - return GET_CPLUSPLUS_POINTER(options); -} - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: exclusiveManualCompaction - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_CompactRangeOptions_exclusiveManualCompaction( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - auto* options = - reinterpret_cast(jhandle); - return static_cast(options->exclusive_manual_compaction); -} - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: setExclusiveManualCompaction - * Signature: (JZ)V - */ -void Java_org_rocksdb_CompactRangeOptions_setExclusiveManualCompaction( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jboolean exclusive_manual_compaction) { - auto* options = - reinterpret_cast(jhandle); - options->exclusive_manual_compaction = - static_cast(exclusive_manual_compaction); -} - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: bottommostLevelCompaction - * Signature: (J)I - */ -jint Java_org_rocksdb_CompactRangeOptions_bottommostLevelCompaction( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - auto* options = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::BottommostLevelCompactionJni:: - toJavaBottommostLevelCompaction(options->bottommost_level_compaction); -} - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: setBottommostLevelCompaction - * Signature: (JI)V - */ -void Java_org_rocksdb_CompactRangeOptions_setBottommostLevelCompaction( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jint bottommost_level_compaction) { - auto* options = - reinterpret_cast(jhandle); - options->bottommost_level_compaction = - ROCKSDB_NAMESPACE::BottommostLevelCompactionJni:: - toCppBottommostLevelCompaction(bottommost_level_compaction); -} - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: changeLevel - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_CompactRangeOptions_changeLevel(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* options = - reinterpret_cast(jhandle); - return static_cast(options->change_level); -} - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: setChangeLevel - * Signature: (JZ)V - */ -void Java_org_rocksdb_CompactRangeOptions_setChangeLevel( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean change_level) { - auto* options = - reinterpret_cast(jhandle); - options->change_level = static_cast(change_level); -} - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: targetLevel - * Signature: (J)I - */ -jint Java_org_rocksdb_CompactRangeOptions_targetLevel(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* options = - reinterpret_cast(jhandle); - return static_cast(options->target_level); -} - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: setTargetLevel - * Signature: (JI)V - */ -void Java_org_rocksdb_CompactRangeOptions_setTargetLevel(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jint target_level) { - auto* options = - reinterpret_cast(jhandle); - options->target_level = static_cast(target_level); -} - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: targetPathId - * Signature: (J)I - */ -jint Java_org_rocksdb_CompactRangeOptions_targetPathId(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* options = - reinterpret_cast(jhandle); - return static_cast(options->target_path_id); -} - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: setTargetPathId - * Signature: (JI)V - */ -void Java_org_rocksdb_CompactRangeOptions_setTargetPathId(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jint target_path_id) { - auto* options = - reinterpret_cast(jhandle); - options->target_path_id = static_cast(target_path_id); -} - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: allowWriteStall - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_CompactRangeOptions_allowWriteStall(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* options = - reinterpret_cast(jhandle); - return static_cast(options->allow_write_stall); -} - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: setAllowWriteStall - * Signature: (JZ)V - */ -void Java_org_rocksdb_CompactRangeOptions_setAllowWriteStall( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jboolean allow_write_stall) { - auto* options = - reinterpret_cast(jhandle); - options->allow_write_stall = static_cast(allow_write_stall); -} - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: maxSubcompactions - * Signature: (J)I - */ -jint Java_org_rocksdb_CompactRangeOptions_maxSubcompactions(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* options = - reinterpret_cast(jhandle); - return static_cast(options->max_subcompactions); -} - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: setMaxSubcompactions - * Signature: (JI)V - */ -void Java_org_rocksdb_CompactRangeOptions_setMaxSubcompactions( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jint max_subcompactions) { - auto* options = - reinterpret_cast(jhandle); - options->max_subcompactions = static_cast(max_subcompactions); -} - -/* - * Class: org_rocksdb_CompactRangeOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_CompactRangeOptions_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* options = - reinterpret_cast(jhandle); - delete options; -} diff --git a/java/rocksjni/compaction_filter.cc b/java/rocksjni/compaction_filter.cc deleted file mode 100644 index ea04996ac..000000000 --- a/java/rocksjni/compaction_filter.cc +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::CompactionFilter. - -#include "rocksdb/compaction_filter.h" - -#include - -#include "include/org_rocksdb_AbstractCompactionFilter.h" - -// - -/* - * Class: org_rocksdb_AbstractCompactionFilter - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_AbstractCompactionFilter_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - auto* cf = reinterpret_cast(handle); - assert(cf != nullptr); - delete cf; -} -// diff --git a/java/rocksjni/compaction_filter_factory.cc b/java/rocksjni/compaction_filter_factory.cc deleted file mode 100644 index 16fbdbbdd..000000000 --- a/java/rocksjni/compaction_filter_factory.cc +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::CompactionFilterFactory. - -#include - -#include - -#include "include/org_rocksdb_AbstractCompactionFilterFactory.h" -#include "rocksjni/compaction_filter_factory_jnicallback.h" -#include "rocksjni/cplusplus_to_java_convert.h" - -/* - * Class: org_rocksdb_AbstractCompactionFilterFactory - * Method: createNewCompactionFilterFactory0 - * Signature: ()J - */ -jlong Java_org_rocksdb_AbstractCompactionFilterFactory_createNewCompactionFilterFactory0( - JNIEnv* env, jobject jobj) { - auto* cff = - new ROCKSDB_NAMESPACE::CompactionFilterFactoryJniCallback(env, jobj); - auto* ptr_sptr_cff = new std::shared_ptr< - ROCKSDB_NAMESPACE::CompactionFilterFactoryJniCallback>(cff); - return GET_CPLUSPLUS_POINTER(ptr_sptr_cff); -} - -/* - * Class: org_rocksdb_AbstractCompactionFilterFactory - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_AbstractCompactionFilterFactory_disposeInternal( - JNIEnv*, jobject, jlong jhandle) { - auto* ptr_sptr_cff = reinterpret_cast< - std::shared_ptr*>( - jhandle); - delete ptr_sptr_cff; -} diff --git a/java/rocksjni/compaction_filter_factory_jnicallback.cc b/java/rocksjni/compaction_filter_factory_jnicallback.cc deleted file mode 100644 index 14285526f..000000000 --- a/java/rocksjni/compaction_filter_factory_jnicallback.cc +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::CompactionFilterFactory. - -#include "rocksjni/compaction_filter_factory_jnicallback.h" - -#include "rocksjni/portal.h" - -namespace ROCKSDB_NAMESPACE { -CompactionFilterFactoryJniCallback::CompactionFilterFactoryJniCallback( - JNIEnv* env, jobject jcompaction_filter_factory) - : JniCallback(env, jcompaction_filter_factory) { - // Note: The name of a CompactionFilterFactory will not change during - // it's lifetime, so we cache it in a global var - jmethodID jname_method_id = - AbstractCompactionFilterFactoryJni::getNameMethodId(env); - if (jname_method_id == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return; - } - - jstring jname = - (jstring)env->CallObjectMethod(m_jcallback_obj, jname_method_id); - if (env->ExceptionCheck()) { - // exception thrown - return; - } - jboolean has_exception = JNI_FALSE; - m_name = - JniUtil::copyString(env, jname, &has_exception); // also releases jname - if (has_exception == JNI_TRUE) { - // exception thrown - return; - } - - m_jcreate_compaction_filter_methodid = - AbstractCompactionFilterFactoryJni::getCreateCompactionFilterMethodId( - env); - if (m_jcreate_compaction_filter_methodid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return; - } -} - -const char* CompactionFilterFactoryJniCallback::Name() const { - return m_name.get(); -} - -std::unique_ptr -CompactionFilterFactoryJniCallback::CreateCompactionFilter( - const CompactionFilter::Context& context) { - jboolean attached_thread = JNI_FALSE; - JNIEnv* env = getJniEnv(&attached_thread); - assert(env != nullptr); - - jlong addr_compaction_filter = - env->CallLongMethod(m_jcallback_obj, m_jcreate_compaction_filter_methodid, - static_cast(context.is_full_compaction), - static_cast(context.is_manual_compaction)); - - if (env->ExceptionCheck()) { - // exception thrown from CallLongMethod - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return nullptr; - } - - auto* cff = reinterpret_cast(addr_compaction_filter); - - releaseJniEnv(attached_thread); - - return std::unique_ptr(cff); -} - -} // namespace ROCKSDB_NAMESPACE diff --git a/java/rocksjni/compaction_filter_factory_jnicallback.h b/java/rocksjni/compaction_filter_factory_jnicallback.h deleted file mode 100644 index 2f26f8dbe..000000000 --- a/java/rocksjni/compaction_filter_factory_jnicallback.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::CompactionFilterFactory. - -#ifndef JAVA_ROCKSJNI_COMPACTION_FILTER_FACTORY_JNICALLBACK_H_ -#define JAVA_ROCKSJNI_COMPACTION_FILTER_FACTORY_JNICALLBACK_H_ - -#include - -#include - -#include "rocksdb/compaction_filter.h" -#include "rocksjni/jnicallback.h" - -namespace ROCKSDB_NAMESPACE { - -class CompactionFilterFactoryJniCallback : public JniCallback, - public CompactionFilterFactory { - public: - CompactionFilterFactoryJniCallback(JNIEnv* env, - jobject jcompaction_filter_factory); - virtual std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& context); - virtual const char* Name() const; - - private: - std::unique_ptr m_name; - jmethodID m_jcreate_compaction_filter_methodid; -}; - -} // namespace ROCKSDB_NAMESPACE - -#endif // JAVA_ROCKSJNI_COMPACTION_FILTER_FACTORY_JNICALLBACK_H_ diff --git a/java/rocksjni/compaction_job_info.cc b/java/rocksjni/compaction_job_info.cc deleted file mode 100644 index fb292f59c..000000000 --- a/java/rocksjni/compaction_job_info.cc +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::CompactionJobInfo. - -#include - -#include "include/org_rocksdb_CompactionJobInfo.h" -#include "rocksdb/listener.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_CompactionJobInfo - * Method: newCompactionJobInfo - * Signature: ()J - */ -jlong Java_org_rocksdb_CompactionJobInfo_newCompactionJobInfo(JNIEnv*, jclass) { - auto* compact_job_info = new ROCKSDB_NAMESPACE::CompactionJobInfo(); - return GET_CPLUSPLUS_POINTER(compact_job_info); -} - -/* - * Class: org_rocksdb_CompactionJobInfo - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_CompactionJobInfo_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - auto* compact_job_info = - reinterpret_cast(jhandle); - delete compact_job_info; -} - -/* - * Class: org_rocksdb_CompactionJobInfo - * Method: columnFamilyName - * Signature: (J)[B - */ -jbyteArray Java_org_rocksdb_CompactionJobInfo_columnFamilyName(JNIEnv* env, - jclass, - jlong jhandle) { - auto* compact_job_info = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::JniUtil::copyBytes(env, compact_job_info->cf_name); -} - -/* - * Class: org_rocksdb_CompactionJobInfo - * Method: status - * Signature: (J)Lorg/rocksdb/Status; - */ -jobject Java_org_rocksdb_CompactionJobInfo_status(JNIEnv* env, jclass, - jlong jhandle) { - auto* compact_job_info = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::StatusJni::construct(env, compact_job_info->status); -} - -/* - * Class: org_rocksdb_CompactionJobInfo - * Method: threadId - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobInfo_threadId(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_info = - reinterpret_cast(jhandle); - return static_cast(compact_job_info->thread_id); -} - -/* - * Class: org_rocksdb_CompactionJobInfo - * Method: jobId - * Signature: (J)I - */ -jint Java_org_rocksdb_CompactionJobInfo_jobId(JNIEnv*, jclass, jlong jhandle) { - auto* compact_job_info = - reinterpret_cast(jhandle); - return static_cast(compact_job_info->job_id); -} - -/* - * Class: org_rocksdb_CompactionJobInfo - * Method: baseInputLevel - * Signature: (J)I - */ -jint Java_org_rocksdb_CompactionJobInfo_baseInputLevel(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_info = - reinterpret_cast(jhandle); - return static_cast(compact_job_info->base_input_level); -} - -/* - * Class: org_rocksdb_CompactionJobInfo - * Method: outputLevel - * Signature: (J)I - */ -jint Java_org_rocksdb_CompactionJobInfo_outputLevel(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_info = - reinterpret_cast(jhandle); - return static_cast(compact_job_info->output_level); -} - -/* - * Class: org_rocksdb_CompactionJobInfo - * Method: inputFiles - * Signature: (J)[Ljava/lang/String; - */ -jobjectArray Java_org_rocksdb_CompactionJobInfo_inputFiles(JNIEnv* env, jclass, - jlong jhandle) { - auto* compact_job_info = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::JniUtil::toJavaStrings( - env, &compact_job_info->input_files); -} - -/* - * Class: org_rocksdb_CompactionJobInfo - * Method: outputFiles - * Signature: (J)[Ljava/lang/String; - */ -jobjectArray Java_org_rocksdb_CompactionJobInfo_outputFiles(JNIEnv* env, jclass, - jlong jhandle) { - auto* compact_job_info = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::JniUtil::toJavaStrings( - env, &compact_job_info->output_files); -} - -/* - * Class: org_rocksdb_CompactionJobInfo - * Method: tableProperties - * Signature: (J)Ljava/util/Map; - */ -jobject Java_org_rocksdb_CompactionJobInfo_tableProperties(JNIEnv* env, jclass, - jlong jhandle) { - auto* compact_job_info = - reinterpret_cast(jhandle); - auto* map = &compact_job_info->table_properties; - - jobject jhash_map = ROCKSDB_NAMESPACE::HashMapJni::construct( - env, static_cast(map->size())); - if (jhash_map == nullptr) { - // exception occurred - return nullptr; - } - - const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV< - const std::string, - std::shared_ptr, jobject, - jobject> - fn_map_kv = - [env](const std::pair< - const std::string, - std::shared_ptr>& - kv) { - jstring jkey = ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &(kv.first), false); - if (env->ExceptionCheck()) { - // an error occurred - return std::unique_ptr>(nullptr); - } - - jobject jtable_properties = - ROCKSDB_NAMESPACE::TablePropertiesJni::fromCppTableProperties( - env, *(kv.second.get())); - if (env->ExceptionCheck()) { - // an error occurred - env->DeleteLocalRef(jkey); - return std::unique_ptr>(nullptr); - } - - return std::unique_ptr>( - new std::pair(static_cast(jkey), - jtable_properties)); - }; - - if (!ROCKSDB_NAMESPACE::HashMapJni::putAll(env, jhash_map, map->begin(), - map->end(), fn_map_kv)) { - // exception occurred - return nullptr; - } - - return jhash_map; -} - -/* - * Class: org_rocksdb_CompactionJobInfo - * Method: compactionReason - * Signature: (J)B - */ -jbyte Java_org_rocksdb_CompactionJobInfo_compactionReason(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_info = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::CompactionReasonJni::toJavaCompactionReason( - compact_job_info->compaction_reason); -} - -/* - * Class: org_rocksdb_CompactionJobInfo - * Method: compression - * Signature: (J)B - */ -jbyte Java_org_rocksdb_CompactionJobInfo_compression(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_info = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( - compact_job_info->compression); -} - -/* - * Class: org_rocksdb_CompactionJobInfo - * Method: stats - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobInfo_stats(JNIEnv*, jclass, jlong jhandle) { - auto* compact_job_info = - reinterpret_cast(jhandle); - auto* stats = new ROCKSDB_NAMESPACE::CompactionJobStats(); - stats->Add(compact_job_info->stats); - return GET_CPLUSPLUS_POINTER(stats); -} diff --git a/java/rocksjni/compaction_job_stats.cc b/java/rocksjni/compaction_job_stats.cc deleted file mode 100644 index a2599c132..000000000 --- a/java/rocksjni/compaction_job_stats.cc +++ /dev/null @@ -1,345 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::CompactionJobStats. - -#include "rocksdb/compaction_job_stats.h" - -#include - -#include "include/org_rocksdb_CompactionJobStats.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: newCompactionJobStats - * Signature: ()J - */ -jlong Java_org_rocksdb_CompactionJobStats_newCompactionJobStats(JNIEnv*, - jclass) { - auto* compact_job_stats = new ROCKSDB_NAMESPACE::CompactionJobStats(); - return GET_CPLUSPLUS_POINTER(compact_job_stats); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_CompactionJobStats_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - delete compact_job_stats; -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: reset - * Signature: (J)V - */ -void Java_org_rocksdb_CompactionJobStats_reset(JNIEnv*, jclass, jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - compact_job_stats->Reset(); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: add - * Signature: (JJ)V - */ -void Java_org_rocksdb_CompactionJobStats_add(JNIEnv*, jclass, jlong jhandle, - jlong jother_handle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - auto* other_compact_job_stats = - reinterpret_cast(jother_handle); - compact_job_stats->Add(*other_compact_job_stats); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: elapsedMicros - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_elapsedMicros(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->elapsed_micros); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: numInputRecords - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_numInputRecords(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->num_input_records); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: numInputFiles - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_numInputFiles(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->num_input_files); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: numInputFilesAtOutputLevel - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_numInputFilesAtOutputLevel( - JNIEnv*, jclass, jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->num_input_files_at_output_level); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: numOutputRecords - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_numOutputRecords(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->num_output_records); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: numOutputFiles - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_numOutputFiles(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->num_output_files); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: isManualCompaction - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_CompactionJobStats_isManualCompaction(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - if (compact_job_stats->is_manual_compaction) { - return JNI_TRUE; - } else { - return JNI_FALSE; - } -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: totalInputBytes - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_totalInputBytes(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->total_input_bytes); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: totalOutputBytes - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_totalOutputBytes(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->total_output_bytes); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: numRecordsReplaced - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_numRecordsReplaced(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->num_records_replaced); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: totalInputRawKeyBytes - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_totalInputRawKeyBytes(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->total_input_raw_key_bytes); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: totalInputRawValueBytes - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_totalInputRawValueBytes( - JNIEnv*, jclass, jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->total_input_raw_value_bytes); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: numInputDeletionRecords - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_numInputDeletionRecords( - JNIEnv*, jclass, jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->num_input_deletion_records); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: numExpiredDeletionRecords - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_numExpiredDeletionRecords( - JNIEnv*, jclass, jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->num_expired_deletion_records); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: numCorruptKeys - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_numCorruptKeys(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->num_corrupt_keys); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: fileWriteNanos - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_fileWriteNanos(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->file_write_nanos); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: fileRangeSyncNanos - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_fileRangeSyncNanos(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->file_range_sync_nanos); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: fileFsyncNanos - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_fileFsyncNanos(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->file_fsync_nanos); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: filePrepareWriteNanos - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_filePrepareWriteNanos(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->file_prepare_write_nanos); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: smallestOutputKeyPrefix - * Signature: (J)[B - */ -jbyteArray Java_org_rocksdb_CompactionJobStats_smallestOutputKeyPrefix( - JNIEnv* env, jclass, jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::JniUtil::copyBytes( - env, compact_job_stats->smallest_output_key_prefix); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: largestOutputKeyPrefix - * Signature: (J)[B - */ -jbyteArray Java_org_rocksdb_CompactionJobStats_largestOutputKeyPrefix( - JNIEnv* env, jclass, jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::JniUtil::copyBytes( - env, compact_job_stats->largest_output_key_prefix); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: numSingleDelFallthru - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_numSingleDelFallthru(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->num_single_del_fallthru); -} - -/* - * Class: org_rocksdb_CompactionJobStats - * Method: numSingleDelMismatch - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionJobStats_numSingleDelMismatch(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_job_stats = - reinterpret_cast(jhandle); - return static_cast(compact_job_stats->num_single_del_mismatch); -} diff --git a/java/rocksjni/compaction_options.cc b/java/rocksjni/compaction_options.cc deleted file mode 100644 index bbbde0313..000000000 --- a/java/rocksjni/compaction_options.cc +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::CompactionOptions. - -#include - -#include "include/org_rocksdb_CompactionOptions.h" -#include "rocksdb/options.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_CompactionOptions - * Method: newCompactionOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_CompactionOptions_newCompactionOptions(JNIEnv*, jclass) { - auto* compact_opts = new ROCKSDB_NAMESPACE::CompactionOptions(); - return GET_CPLUSPLUS_POINTER(compact_opts); -} - -/* - * Class: org_rocksdb_CompactionOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_CompactionOptions_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - auto* compact_opts = - reinterpret_cast(jhandle); - delete compact_opts; -} - -/* - * Class: org_rocksdb_CompactionOptions - * Method: compression - * Signature: (J)B - */ -jbyte Java_org_rocksdb_CompactionOptions_compression(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_opts = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( - compact_opts->compression); -} - -/* - * Class: org_rocksdb_CompactionOptions - * Method: setCompression - * Signature: (JB)V - */ -void Java_org_rocksdb_CompactionOptions_setCompression( - JNIEnv*, jclass, jlong jhandle, jbyte jcompression_type_value) { - auto* compact_opts = - reinterpret_cast(jhandle); - compact_opts->compression = - ROCKSDB_NAMESPACE::CompressionTypeJni::toCppCompressionType( - jcompression_type_value); -} - -/* - * Class: org_rocksdb_CompactionOptions - * Method: outputFileSizeLimit - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionOptions_outputFileSizeLimit(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_opts = - reinterpret_cast(jhandle); - return static_cast(compact_opts->output_file_size_limit); -} - -/* - * Class: org_rocksdb_CompactionOptions - * Method: setOutputFileSizeLimit - * Signature: (JJ)V - */ -void Java_org_rocksdb_CompactionOptions_setOutputFileSizeLimit( - JNIEnv*, jclass, jlong jhandle, jlong joutput_file_size_limit) { - auto* compact_opts = - reinterpret_cast(jhandle); - compact_opts->output_file_size_limit = - static_cast(joutput_file_size_limit); -} - -/* - * Class: org_rocksdb_CompactionOptions - * Method: maxSubcompactions - * Signature: (J)I - */ -jint Java_org_rocksdb_CompactionOptions_maxSubcompactions(JNIEnv*, jclass, - jlong jhandle) { - auto* compact_opts = - reinterpret_cast(jhandle); - return static_cast(compact_opts->max_subcompactions); -} - -/* - * Class: org_rocksdb_CompactionOptions - * Method: setMaxSubcompactions - * Signature: (JI)V - */ -void Java_org_rocksdb_CompactionOptions_setMaxSubcompactions( - JNIEnv*, jclass, jlong jhandle, jint jmax_subcompactions) { - auto* compact_opts = - reinterpret_cast(jhandle); - compact_opts->max_subcompactions = static_cast(jmax_subcompactions); -} diff --git a/java/rocksjni/compaction_options_fifo.cc b/java/rocksjni/compaction_options_fifo.cc deleted file mode 100644 index f6a47fec5..000000000 --- a/java/rocksjni/compaction_options_fifo.cc +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::CompactionOptionsFIFO. - -#include - -#include "include/org_rocksdb_CompactionOptionsFIFO.h" -#include "rocksdb/advanced_options.h" -#include "rocksjni/cplusplus_to_java_convert.h" - -/* - * Class: org_rocksdb_CompactionOptionsFIFO - * Method: newCompactionOptionsFIFO - * Signature: ()J - */ -jlong Java_org_rocksdb_CompactionOptionsFIFO_newCompactionOptionsFIFO(JNIEnv*, - jclass) { - const auto* opt = new ROCKSDB_NAMESPACE::CompactionOptionsFIFO(); - return GET_CPLUSPLUS_POINTER(opt); -} - -/* - * Class: org_rocksdb_CompactionOptionsFIFO - * Method: setMaxTableFilesSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_CompactionOptionsFIFO_setMaxTableFilesSize( - JNIEnv*, jobject, jlong jhandle, jlong jmax_table_files_size) { - auto* opt = - reinterpret_cast(jhandle); - opt->max_table_files_size = static_cast(jmax_table_files_size); -} - -/* - * Class: org_rocksdb_CompactionOptionsFIFO - * Method: maxTableFilesSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompactionOptionsFIFO_maxTableFilesSize(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = - reinterpret_cast(jhandle); - return static_cast(opt->max_table_files_size); -} - -/* - * Class: org_rocksdb_CompactionOptionsFIFO - * Method: setAllowCompaction - * Signature: (JZ)V - */ -void Java_org_rocksdb_CompactionOptionsFIFO_setAllowCompaction( - JNIEnv*, jobject, jlong jhandle, jboolean allow_compaction) { - auto* opt = - reinterpret_cast(jhandle); - opt->allow_compaction = static_cast(allow_compaction); -} - -/* - * Class: org_rocksdb_CompactionOptionsFIFO - * Method: allowCompaction - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_CompactionOptionsFIFO_allowCompaction(JNIEnv*, - jobject, - jlong jhandle) { - auto* opt = - reinterpret_cast(jhandle); - return static_cast(opt->allow_compaction); -} - -/* - * Class: org_rocksdb_CompactionOptionsFIFO - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_CompactionOptionsFIFO_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - delete reinterpret_cast(jhandle); -} diff --git a/java/rocksjni/compaction_options_universal.cc b/java/rocksjni/compaction_options_universal.cc deleted file mode 100644 index 9fc6f3158..000000000 --- a/java/rocksjni/compaction_options_universal.cc +++ /dev/null @@ -1,209 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::CompactionOptionsUniversal. - -#include - -#include "include/org_rocksdb_CompactionOptionsUniversal.h" -#include "rocksdb/advanced_options.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: newCompactionOptionsUniversal - * Signature: ()J - */ -jlong Java_org_rocksdb_CompactionOptionsUniversal_newCompactionOptionsUniversal( - JNIEnv*, jclass) { - const auto* opt = new ROCKSDB_NAMESPACE::CompactionOptionsUniversal(); - return GET_CPLUSPLUS_POINTER(opt); -} - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: setSizeRatio - * Signature: (JI)V - */ -void Java_org_rocksdb_CompactionOptionsUniversal_setSizeRatio( - JNIEnv*, jobject, jlong jhandle, jint jsize_ratio) { - auto* opt = - reinterpret_cast(jhandle); - opt->size_ratio = static_cast(jsize_ratio); -} - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: sizeRatio - * Signature: (J)I - */ -jint Java_org_rocksdb_CompactionOptionsUniversal_sizeRatio(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = - reinterpret_cast(jhandle); - return static_cast(opt->size_ratio); -} - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: setMinMergeWidth - * Signature: (JI)V - */ -void Java_org_rocksdb_CompactionOptionsUniversal_setMinMergeWidth( - JNIEnv*, jobject, jlong jhandle, jint jmin_merge_width) { - auto* opt = - reinterpret_cast(jhandle); - opt->min_merge_width = static_cast(jmin_merge_width); -} - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: minMergeWidth - * Signature: (J)I - */ -jint Java_org_rocksdb_CompactionOptionsUniversal_minMergeWidth(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = - reinterpret_cast(jhandle); - return static_cast(opt->min_merge_width); -} - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: setMaxMergeWidth - * Signature: (JI)V - */ -void Java_org_rocksdb_CompactionOptionsUniversal_setMaxMergeWidth( - JNIEnv*, jobject, jlong jhandle, jint jmax_merge_width) { - auto* opt = - reinterpret_cast(jhandle); - opt->max_merge_width = static_cast(jmax_merge_width); -} - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: maxMergeWidth - * Signature: (J)I - */ -jint Java_org_rocksdb_CompactionOptionsUniversal_maxMergeWidth(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = - reinterpret_cast(jhandle); - return static_cast(opt->max_merge_width); -} - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: setMaxSizeAmplificationPercent - * Signature: (JI)V - */ -void Java_org_rocksdb_CompactionOptionsUniversal_setMaxSizeAmplificationPercent( - JNIEnv*, jobject, jlong jhandle, jint jmax_size_amplification_percent) { - auto* opt = - reinterpret_cast(jhandle); - opt->max_size_amplification_percent = - static_cast(jmax_size_amplification_percent); -} - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: maxSizeAmplificationPercent - * Signature: (J)I - */ -jint Java_org_rocksdb_CompactionOptionsUniversal_maxSizeAmplificationPercent( - JNIEnv*, jobject, jlong jhandle) { - auto* opt = - reinterpret_cast(jhandle); - return static_cast(opt->max_size_amplification_percent); -} - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: setCompressionSizePercent - * Signature: (JI)V - */ -void Java_org_rocksdb_CompactionOptionsUniversal_setCompressionSizePercent( - JNIEnv*, jobject, jlong jhandle, jint jcompression_size_percent) { - auto* opt = - reinterpret_cast(jhandle); - opt->compression_size_percent = - static_cast(jcompression_size_percent); -} - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: compressionSizePercent - * Signature: (J)I - */ -jint Java_org_rocksdb_CompactionOptionsUniversal_compressionSizePercent( - JNIEnv*, jobject, jlong jhandle) { - auto* opt = - reinterpret_cast(jhandle); - return static_cast(opt->compression_size_percent); -} - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: setStopStyle - * Signature: (JB)V - */ -void Java_org_rocksdb_CompactionOptionsUniversal_setStopStyle( - JNIEnv*, jobject, jlong jhandle, jbyte jstop_style_value) { - auto* opt = - reinterpret_cast(jhandle); - opt->stop_style = - ROCKSDB_NAMESPACE::CompactionStopStyleJni::toCppCompactionStopStyle( - jstop_style_value); -} - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: stopStyle - * Signature: (J)B - */ -jbyte Java_org_rocksdb_CompactionOptionsUniversal_stopStyle(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::CompactionStopStyleJni::toJavaCompactionStopStyle( - opt->stop_style); -} - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: setAllowTrivialMove - * Signature: (JZ)V - */ -void Java_org_rocksdb_CompactionOptionsUniversal_setAllowTrivialMove( - JNIEnv*, jobject, jlong jhandle, jboolean jallow_trivial_move) { - auto* opt = - reinterpret_cast(jhandle); - opt->allow_trivial_move = static_cast(jallow_trivial_move); -} - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: allowTrivialMove - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_CompactionOptionsUniversal_allowTrivialMove( - JNIEnv*, jobject, jlong jhandle) { - auto* opt = - reinterpret_cast(jhandle); - return opt->allow_trivial_move; -} - -/* - * Class: org_rocksdb_CompactionOptionsUniversal - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_CompactionOptionsUniversal_disposeInternal( - JNIEnv*, jobject, jlong jhandle) { - delete reinterpret_cast( - jhandle); -} diff --git a/java/rocksjni/comparator.cc b/java/rocksjni/comparator.cc deleted file mode 100644 index 11279c4ce..000000000 --- a/java/rocksjni/comparator.cc +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::Comparator. - -#include -#include -#include - -#include -#include - -#include "include/org_rocksdb_AbstractComparator.h" -#include "include/org_rocksdb_NativeComparatorWrapper.h" -#include "rocksjni/comparatorjnicallback.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_AbstractComparator - * Method: createNewComparator - * Signature: (J)J - */ -jlong Java_org_rocksdb_AbstractComparator_createNewComparator( - JNIEnv* env, jobject jcomparator, jlong copt_handle) { - auto* copt = - reinterpret_cast( - copt_handle); - auto* c = - new ROCKSDB_NAMESPACE::ComparatorJniCallback(env, jcomparator, copt); - return GET_CPLUSPLUS_POINTER(c); -} - -/* - * Class: org_rocksdb_AbstractComparator - * Method: usingDirectBuffers - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_AbstractComparator_usingDirectBuffers(JNIEnv*, - jobject, - jlong jhandle) { - auto* c = - reinterpret_cast(jhandle); - return static_cast(c->m_options->direct_buffer); -} - -/* - * Class: org_rocksdb_NativeComparatorWrapper - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_NativeComparatorWrapper_disposeInternal( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jcomparator_handle) { - auto* comparator = - reinterpret_cast(jcomparator_handle); - delete comparator; -} diff --git a/java/rocksjni/comparatorjnicallback.cc b/java/rocksjni/comparatorjnicallback.cc deleted file mode 100644 index d354b40b8..000000000 --- a/java/rocksjni/comparatorjnicallback.cc +++ /dev/null @@ -1,647 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::Comparator. - -#include "rocksjni/comparatorjnicallback.h" - -#include "rocksjni/portal.h" - -namespace ROCKSDB_NAMESPACE { -ComparatorJniCallback::ComparatorJniCallback( - JNIEnv* env, jobject jcomparator, - const ComparatorJniCallbackOptions* options) - : JniCallback(env, jcomparator), - m_options(std::make_unique(*options)) { - // cache the AbstractComparatorJniBridge class as we will reuse it many times - // for each callback - m_abstract_comparator_jni_bridge_clazz = static_cast( - env->NewGlobalRef(AbstractComparatorJniBridge::getJClass(env))); - - // Note: The name of a Comparator will not change during it's lifetime, - // so we cache it in a global var - jmethodID jname_mid = AbstractComparatorJni::getNameMethodId(env); - if (jname_mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return; - } - jstring js_name = (jstring)env->CallObjectMethod(m_jcallback_obj, jname_mid); - if (env->ExceptionCheck()) { - // exception thrown - return; - } - jboolean has_exception = JNI_FALSE; - m_name = JniUtil::copyString(env, js_name, - &has_exception); // also releases jsName - if (has_exception == JNI_TRUE) { - // exception thrown - return; - } - - // cache the ByteBuffer class as we will reuse it many times for each callback - m_jbytebuffer_clazz = - static_cast(env->NewGlobalRef(ByteBufferJni::getJClass(env))); - - m_jcompare_mid = AbstractComparatorJniBridge::getCompareInternalMethodId( - env, m_abstract_comparator_jni_bridge_clazz); - if (m_jcompare_mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return; - } - - m_jshortest_mid = - AbstractComparatorJniBridge::getFindShortestSeparatorInternalMethodId( - env, m_abstract_comparator_jni_bridge_clazz); - if (m_jshortest_mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return; - } - - m_jshort_mid = - AbstractComparatorJniBridge::getFindShortSuccessorInternalMethodId( - env, m_abstract_comparator_jni_bridge_clazz); - if (m_jshort_mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return; - } - - // do we need reusable buffers? - if (m_options->max_reused_buffer_size > -1) { - if (m_options->reused_synchronisation_type == - ReusedSynchronisationType::THREAD_LOCAL) { - // buffers reused per thread - UnrefHandler unref = [](void* ptr) { - ThreadLocalBuf* tlb = reinterpret_cast(ptr); - jboolean attached_thread = JNI_FALSE; - JNIEnv* _env = JniUtil::getJniEnv(tlb->jvm, &attached_thread); - if (_env != nullptr) { - if (tlb->direct_buffer) { - void* buf = _env->GetDirectBufferAddress(tlb->jbuf); - delete[] static_cast(buf); - } - _env->DeleteGlobalRef(tlb->jbuf); - JniUtil::releaseJniEnv(tlb->jvm, attached_thread); - } - }; - - m_tl_buf_a = new ThreadLocalPtr(unref); - m_tl_buf_b = new ThreadLocalPtr(unref); - - m_jcompare_buf_a = nullptr; - m_jcompare_buf_b = nullptr; - m_jshortest_buf_start = nullptr; - m_jshortest_buf_limit = nullptr; - m_jshort_buf_key = nullptr; - - } else { - // buffers reused and shared across threads - const bool adaptive = m_options->reused_synchronisation_type == - ReusedSynchronisationType::ADAPTIVE_MUTEX; - mtx_compare = std::unique_ptr(new port::Mutex(adaptive)); - mtx_shortest = std::unique_ptr(new port::Mutex(adaptive)); - mtx_short = std::unique_ptr(new port::Mutex(adaptive)); - - m_jcompare_buf_a = env->NewGlobalRef(ByteBufferJni::construct( - env, m_options->direct_buffer, m_options->max_reused_buffer_size, - m_jbytebuffer_clazz)); - if (m_jcompare_buf_a == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - m_jcompare_buf_b = env->NewGlobalRef(ByteBufferJni::construct( - env, m_options->direct_buffer, m_options->max_reused_buffer_size, - m_jbytebuffer_clazz)); - if (m_jcompare_buf_b == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - m_jshortest_buf_start = env->NewGlobalRef(ByteBufferJni::construct( - env, m_options->direct_buffer, m_options->max_reused_buffer_size, - m_jbytebuffer_clazz)); - if (m_jshortest_buf_start == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - m_jshortest_buf_limit = env->NewGlobalRef(ByteBufferJni::construct( - env, m_options->direct_buffer, m_options->max_reused_buffer_size, - m_jbytebuffer_clazz)); - if (m_jshortest_buf_limit == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - m_jshort_buf_key = env->NewGlobalRef(ByteBufferJni::construct( - env, m_options->direct_buffer, m_options->max_reused_buffer_size, - m_jbytebuffer_clazz)); - if (m_jshort_buf_key == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - m_tl_buf_a = nullptr; - m_tl_buf_b = nullptr; - } - - } else { - m_jcompare_buf_a = nullptr; - m_jcompare_buf_b = nullptr; - m_jshortest_buf_start = nullptr; - m_jshortest_buf_limit = nullptr; - m_jshort_buf_key = nullptr; - - m_tl_buf_a = nullptr; - m_tl_buf_b = nullptr; - } -} - -ComparatorJniCallback::~ComparatorJniCallback() { - jboolean attached_thread = JNI_FALSE; - JNIEnv* env = getJniEnv(&attached_thread); - assert(env != nullptr); - - env->DeleteGlobalRef(m_abstract_comparator_jni_bridge_clazz); - - env->DeleteGlobalRef(m_jbytebuffer_clazz); - - if (m_jcompare_buf_a != nullptr) { - if (m_options->direct_buffer) { - void* buf = env->GetDirectBufferAddress(m_jcompare_buf_a); - delete[] static_cast(buf); - } - env->DeleteGlobalRef(m_jcompare_buf_a); - } - - if (m_jcompare_buf_b != nullptr) { - if (m_options->direct_buffer) { - void* buf = env->GetDirectBufferAddress(m_jcompare_buf_b); - delete[] static_cast(buf); - } - env->DeleteGlobalRef(m_jcompare_buf_b); - } - - if (m_jshortest_buf_start != nullptr) { - if (m_options->direct_buffer) { - void* buf = env->GetDirectBufferAddress(m_jshortest_buf_start); - delete[] static_cast(buf); - } - env->DeleteGlobalRef(m_jshortest_buf_start); - } - - if (m_jshortest_buf_limit != nullptr) { - if (m_options->direct_buffer) { - void* buf = env->GetDirectBufferAddress(m_jshortest_buf_limit); - delete[] static_cast(buf); - } - env->DeleteGlobalRef(m_jshortest_buf_limit); - } - - if (m_jshort_buf_key != nullptr) { - if (m_options->direct_buffer) { - void* buf = env->GetDirectBufferAddress(m_jshort_buf_key); - delete[] static_cast(buf); - } - env->DeleteGlobalRef(m_jshort_buf_key); - } - - if (m_tl_buf_a != nullptr) { - delete m_tl_buf_a; - } - - if (m_tl_buf_b != nullptr) { - delete m_tl_buf_b; - } - - releaseJniEnv(attached_thread); -} - -const char* ComparatorJniCallback::Name() const { return m_name.get(); } - -int ComparatorJniCallback::Compare(const Slice& a, const Slice& b) const { - jboolean attached_thread = JNI_FALSE; - JNIEnv* env = getJniEnv(&attached_thread); - assert(env != nullptr); - - const bool reuse_jbuf_a = - static_cast(a.size()) <= m_options->max_reused_buffer_size; - const bool reuse_jbuf_b = - static_cast(b.size()) <= m_options->max_reused_buffer_size; - - MaybeLockForReuse(mtx_compare, reuse_jbuf_a || reuse_jbuf_b); - - jobject jcompare_buf_a = - GetBuffer(env, a, reuse_jbuf_a, m_tl_buf_a, m_jcompare_buf_a); - if (jcompare_buf_a == nullptr) { - // exception occurred - MaybeUnlockForReuse(mtx_compare, reuse_jbuf_a || reuse_jbuf_b); - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return 0; - } - - jobject jcompare_buf_b = - GetBuffer(env, b, reuse_jbuf_b, m_tl_buf_b, m_jcompare_buf_b); - if (jcompare_buf_b == nullptr) { - // exception occurred - if (!reuse_jbuf_a) { - DeleteBuffer(env, jcompare_buf_a); - } - MaybeUnlockForReuse(mtx_compare, reuse_jbuf_a || reuse_jbuf_b); - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return 0; - } - - jint result = env->CallStaticIntMethod( - m_abstract_comparator_jni_bridge_clazz, m_jcompare_mid, m_jcallback_obj, - jcompare_buf_a, reuse_jbuf_a ? a.size() : -1, jcompare_buf_b, - reuse_jbuf_b ? b.size() : -1); - - if (env->ExceptionCheck()) { - // exception thrown from CallIntMethod - env->ExceptionDescribe(); // print out exception to stderr - result = 0; // we could not get a result from java callback so use 0 - } - - if (!reuse_jbuf_a) { - DeleteBuffer(env, jcompare_buf_a); - } - if (!reuse_jbuf_b) { - DeleteBuffer(env, jcompare_buf_b); - } - - MaybeUnlockForReuse(mtx_compare, reuse_jbuf_a || reuse_jbuf_b); - - releaseJniEnv(attached_thread); - - return result; -} - -void ComparatorJniCallback::FindShortestSeparator(std::string* start, - const Slice& limit) const { - if (start == nullptr) { - return; - } - - jboolean attached_thread = JNI_FALSE; - JNIEnv* env = getJniEnv(&attached_thread); - assert(env != nullptr); - - const bool reuse_jbuf_start = static_cast(start->length()) <= - m_options->max_reused_buffer_size; - const bool reuse_jbuf_limit = - static_cast(limit.size()) <= m_options->max_reused_buffer_size; - - MaybeLockForReuse(mtx_shortest, reuse_jbuf_start || reuse_jbuf_limit); - - Slice sstart(start->data(), start->length()); - jobject j_start_buf = GetBuffer(env, sstart, reuse_jbuf_start, m_tl_buf_a, - m_jshortest_buf_start); - if (j_start_buf == nullptr) { - // exception occurred - MaybeUnlockForReuse(mtx_shortest, reuse_jbuf_start || reuse_jbuf_limit); - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return; - } - - jobject j_limit_buf = GetBuffer(env, limit, reuse_jbuf_limit, m_tl_buf_b, - m_jshortest_buf_limit); - if (j_limit_buf == nullptr) { - // exception occurred - if (!reuse_jbuf_start) { - DeleteBuffer(env, j_start_buf); - } - MaybeUnlockForReuse(mtx_shortest, reuse_jbuf_start || reuse_jbuf_limit); - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return; - } - - jint jstart_len = env->CallStaticIntMethod( - m_abstract_comparator_jni_bridge_clazz, m_jshortest_mid, m_jcallback_obj, - j_start_buf, reuse_jbuf_start ? start->length() : -1, j_limit_buf, - reuse_jbuf_limit ? limit.size() : -1); - - if (env->ExceptionCheck()) { - // exception thrown from CallIntMethod - env->ExceptionDescribe(); // print out exception to stderr - - } else if (static_cast(jstart_len) != start->length()) { - // start buffer has changed in Java, so update `start` with the result - bool copy_from_non_direct = false; - if (reuse_jbuf_start) { - // reused a buffer - if (m_options->direct_buffer) { - // reused direct buffer - void* start_buf = env->GetDirectBufferAddress(j_start_buf); - if (start_buf == nullptr) { - if (!reuse_jbuf_start) { - DeleteBuffer(env, j_start_buf); - } - if (!reuse_jbuf_limit) { - DeleteBuffer(env, j_limit_buf); - } - MaybeUnlockForReuse(mtx_shortest, - reuse_jbuf_start || reuse_jbuf_limit); - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, "Unable to get Direct Buffer Address"); - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return; - } - start->assign(static_cast(start_buf), jstart_len); - - } else { - // reused non-direct buffer - copy_from_non_direct = true; - } - } else { - // there was a new buffer - if (m_options->direct_buffer) { - // it was direct... don't forget to potentially truncate the `start` - // string - start->resize(jstart_len); - } else { - // it was non-direct - copy_from_non_direct = true; - } - } - - if (copy_from_non_direct) { - jbyteArray jarray = - ByteBufferJni::array(env, j_start_buf, m_jbytebuffer_clazz); - if (jarray == nullptr) { - if (!reuse_jbuf_start) { - DeleteBuffer(env, j_start_buf); - } - if (!reuse_jbuf_limit) { - DeleteBuffer(env, j_limit_buf); - } - MaybeUnlockForReuse(mtx_shortest, reuse_jbuf_start || reuse_jbuf_limit); - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return; - } - jboolean has_exception = JNI_FALSE; - JniUtil::byteString( - env, jarray, - [start, jstart_len](const char* data, const size_t) { - return start->assign(data, static_cast(jstart_len)); - }, - &has_exception); - env->DeleteLocalRef(jarray); - if (has_exception == JNI_TRUE) { - if (!reuse_jbuf_start) { - DeleteBuffer(env, j_start_buf); - } - if (!reuse_jbuf_limit) { - DeleteBuffer(env, j_limit_buf); - } - env->ExceptionDescribe(); // print out exception to stderr - MaybeUnlockForReuse(mtx_shortest, reuse_jbuf_start || reuse_jbuf_limit); - releaseJniEnv(attached_thread); - return; - } - } - } - - if (!reuse_jbuf_start) { - DeleteBuffer(env, j_start_buf); - } - if (!reuse_jbuf_limit) { - DeleteBuffer(env, j_limit_buf); - } - - MaybeUnlockForReuse(mtx_shortest, reuse_jbuf_start || reuse_jbuf_limit); - - releaseJniEnv(attached_thread); -} - -void ComparatorJniCallback::FindShortSuccessor(std::string* key) const { - if (key == nullptr) { - return; - } - - jboolean attached_thread = JNI_FALSE; - JNIEnv* env = getJniEnv(&attached_thread); - assert(env != nullptr); - - const bool reuse_jbuf_key = - static_cast(key->length()) <= m_options->max_reused_buffer_size; - - MaybeLockForReuse(mtx_short, reuse_jbuf_key); - - Slice skey(key->data(), key->length()); - jobject j_key_buf = - GetBuffer(env, skey, reuse_jbuf_key, m_tl_buf_a, m_jshort_buf_key); - if (j_key_buf == nullptr) { - // exception occurred - MaybeUnlockForReuse(mtx_short, reuse_jbuf_key); - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return; - } - - jint jkey_len = env->CallStaticIntMethod( - m_abstract_comparator_jni_bridge_clazz, m_jshort_mid, m_jcallback_obj, - j_key_buf, reuse_jbuf_key ? key->length() : -1); - - if (env->ExceptionCheck()) { - // exception thrown from CallObjectMethod - if (!reuse_jbuf_key) { - DeleteBuffer(env, j_key_buf); - } - MaybeUnlockForReuse(mtx_short, reuse_jbuf_key); - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return; - } - - if (static_cast(jkey_len) != key->length()) { - // key buffer has changed in Java, so update `key` with the result - bool copy_from_non_direct = false; - if (reuse_jbuf_key) { - // reused a buffer - if (m_options->direct_buffer) { - // reused direct buffer - void* key_buf = env->GetDirectBufferAddress(j_key_buf); - if (key_buf == nullptr) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, "Unable to get Direct Buffer Address"); - if (!reuse_jbuf_key) { - DeleteBuffer(env, j_key_buf); - } - MaybeUnlockForReuse(mtx_short, reuse_jbuf_key); - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return; - } - key->assign(static_cast(key_buf), jkey_len); - } else { - // reused non-direct buffer - copy_from_non_direct = true; - } - } else { - // there was a new buffer - if (m_options->direct_buffer) { - // it was direct... don't forget to potentially truncate the `key` - // string - key->resize(jkey_len); - } else { - // it was non-direct - copy_from_non_direct = true; - } - } - - if (copy_from_non_direct) { - jbyteArray jarray = - ByteBufferJni::array(env, j_key_buf, m_jbytebuffer_clazz); - if (jarray == nullptr) { - if (!reuse_jbuf_key) { - DeleteBuffer(env, j_key_buf); - } - MaybeUnlockForReuse(mtx_short, reuse_jbuf_key); - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return; - } - jboolean has_exception = JNI_FALSE; - JniUtil::byteString( - env, jarray, - [key, jkey_len](const char* data, const size_t) { - return key->assign(data, static_cast(jkey_len)); - }, - &has_exception); - env->DeleteLocalRef(jarray); - if (has_exception == JNI_TRUE) { - if (!reuse_jbuf_key) { - DeleteBuffer(env, j_key_buf); - } - MaybeUnlockForReuse(mtx_short, reuse_jbuf_key); - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return; - } - } - } - - if (!reuse_jbuf_key) { - DeleteBuffer(env, j_key_buf); - } - - MaybeUnlockForReuse(mtx_short, reuse_jbuf_key); - - releaseJniEnv(attached_thread); -} - -inline void ComparatorJniCallback::MaybeLockForReuse( - const std::unique_ptr& mutex, const bool cond) const { - // no need to lock if using thread_local - if (m_options->reused_synchronisation_type != - ReusedSynchronisationType::THREAD_LOCAL && - cond) { - mutex.get()->Lock(); - } -} - -inline void ComparatorJniCallback::MaybeUnlockForReuse( - const std::unique_ptr& mutex, const bool cond) const { - // no need to unlock if using thread_local - if (m_options->reused_synchronisation_type != - ReusedSynchronisationType::THREAD_LOCAL && - cond) { - mutex.get()->Unlock(); - } -} - -jobject ComparatorJniCallback::GetBuffer(JNIEnv* env, const Slice& src, - bool reuse_buffer, - ThreadLocalPtr* tl_buf, - jobject jreuse_buffer) const { - if (reuse_buffer) { - if (m_options->reused_synchronisation_type == - ReusedSynchronisationType::THREAD_LOCAL) { - // reuse thread-local bufffer - ThreadLocalBuf* tlb = reinterpret_cast(tl_buf->Get()); - if (tlb == nullptr) { - // thread-local buffer has not yet been created, so create it - jobject jtl_buf = env->NewGlobalRef(ByteBufferJni::construct( - env, m_options->direct_buffer, m_options->max_reused_buffer_size, - m_jbytebuffer_clazz)); - if (jtl_buf == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - tlb = new ThreadLocalBuf(m_jvm, m_options->direct_buffer, jtl_buf); - tl_buf->Reset(tlb); - } - return ReuseBuffer(env, src, tlb->jbuf); - } else { - // reuse class member buffer - return ReuseBuffer(env, src, jreuse_buffer); - } - } else { - // new buffer - return NewBuffer(env, src); - } -} - -jobject ComparatorJniCallback::ReuseBuffer(JNIEnv* env, const Slice& src, - jobject jreuse_buffer) const { - // we can reuse the buffer - if (m_options->direct_buffer) { - // copy into direct buffer - void* buf = env->GetDirectBufferAddress(jreuse_buffer); - if (buf == nullptr) { - // either memory region is undefined, given object is not a direct - // java.nio.Buffer, or JNI access to direct buffers is not supported by - // this virtual machine. - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, "Unable to get Direct Buffer Address"); - return nullptr; - } - memcpy(buf, src.data(), src.size()); - } else { - // copy into non-direct buffer - const jbyteArray jarray = - ByteBufferJni::array(env, jreuse_buffer, m_jbytebuffer_clazz); - if (jarray == nullptr) { - // exception occurred - return nullptr; - } - env->SetByteArrayRegion( - jarray, 0, static_cast(src.size()), - const_cast(reinterpret_cast(src.data()))); - if (env->ExceptionCheck()) { - // exception occurred - env->DeleteLocalRef(jarray); - return nullptr; - } - env->DeleteLocalRef(jarray); - } - return jreuse_buffer; -} - -jobject ComparatorJniCallback::NewBuffer(JNIEnv* env, const Slice& src) const { - // we need a new buffer - jobject jbuf = - ByteBufferJni::constructWith(env, m_options->direct_buffer, src.data(), - src.size(), m_jbytebuffer_clazz); - if (jbuf == nullptr) { - // exception occurred - return nullptr; - } - return jbuf; -} - -void ComparatorJniCallback::DeleteBuffer(JNIEnv* env, jobject jbuffer) const { - env->DeleteLocalRef(jbuffer); -} - -} // namespace ROCKSDB_NAMESPACE diff --git a/java/rocksjni/comparatorjnicallback.h b/java/rocksjni/comparatorjnicallback.h deleted file mode 100644 index 034c0d5d7..000000000 --- a/java/rocksjni/comparatorjnicallback.h +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::Comparator - -#ifndef JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_ -#define JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_ - -#include - -#include -#include - -#include "port/port.h" -#include "rocksdb/comparator.h" -#include "rocksdb/slice.h" -#include "rocksjni/jnicallback.h" -#include "util/thread_local.h" - -namespace ROCKSDB_NAMESPACE { - -enum ReusedSynchronisationType { - /** - * Standard mutex. - */ - MUTEX, - - /** - * Use adaptive mutex, which spins in the user space before resorting - * to kernel. This could reduce context switch when the mutex is not - * heavily contended. However, if the mutex is hot, we could end up - * wasting spin time. - */ - ADAPTIVE_MUTEX, - - /** - * There is a reused buffer per-thread. - */ - THREAD_LOCAL -}; - -struct ComparatorJniCallbackOptions { - // Set the synchronisation type used to guard the reused buffers. - // Only used if max_reused_buffer_size > 0. - ReusedSynchronisationType reused_synchronisation_type = ADAPTIVE_MUTEX; - - // Indicates if a direct byte buffer (i.e. outside of the normal - // garbage-collected heap) is used for the callbacks to Java, - // as opposed to a non-direct byte buffer which is a wrapper around - // an on-heap byte[]. - bool direct_buffer = true; - - // Maximum size of a buffer (in bytes) that will be reused. - // Comparators will use 5 of these buffers, - // so the retained memory size will be 5 * max_reused_buffer_size. - // When a buffer is needed for transferring data to a callback, - // if it requires less than max_reused_buffer_size, then an - // existing buffer will be reused, else a new buffer will be - // allocated just for that callback. -1 to disable. - int32_t max_reused_buffer_size = 64; -}; - -/** - * This class acts as a bridge between C++ - * and Java. The methods in this class will be - * called back from the RocksDB storage engine (C++) - * we then callback to the appropriate Java method - * this enables Comparators to be implemented in Java. - * - * The design of this Comparator caches the Java Slice - * objects that are used in the compare and findShortestSeparator - * method callbacks. Instead of creating new objects for each callback - * of those functions, by reuse via setHandle we are a lot - * faster; Unfortunately this means that we have to - * introduce independent locking in regions of each of those methods - * via the mutexs mtx_compare and mtx_findShortestSeparator respectively - */ -class ComparatorJniCallback : public JniCallback, public Comparator { - public: - ComparatorJniCallback(JNIEnv* env, jobject jcomparator, - const ComparatorJniCallbackOptions* options); - ~ComparatorJniCallback(); - virtual const char* Name() const; - virtual int Compare(const Slice& a, const Slice& b) const; - virtual void FindShortestSeparator(std::string* start, - const Slice& limit) const; - virtual void FindShortSuccessor(std::string* key) const; - const std::unique_ptr m_options; - - private: - struct ThreadLocalBuf { - ThreadLocalBuf(JavaVM* _jvm, bool _direct_buffer, jobject _jbuf) - : jvm(_jvm), direct_buffer(_direct_buffer), jbuf(_jbuf) {} - JavaVM* jvm; - bool direct_buffer; - jobject jbuf; - }; - inline void MaybeLockForReuse(const std::unique_ptr& mutex, - const bool cond) const; - inline void MaybeUnlockForReuse(const std::unique_ptr& mutex, - const bool cond) const; - jobject GetBuffer(JNIEnv* env, const Slice& src, bool reuse_buffer, - ThreadLocalPtr* tl_buf, jobject jreuse_buffer) const; - jobject ReuseBuffer(JNIEnv* env, const Slice& src, - jobject jreuse_buffer) const; - jobject NewBuffer(JNIEnv* env, const Slice& src) const; - void DeleteBuffer(JNIEnv* env, jobject jbuffer) const; - // used for synchronisation in compare method - std::unique_ptr mtx_compare; - // used for synchronisation in findShortestSeparator method - std::unique_ptr mtx_shortest; - // used for synchronisation in findShortSuccessor method - std::unique_ptr mtx_short; - std::unique_ptr m_name; - jclass m_abstract_comparator_jni_bridge_clazz; // TODO(AR) could we make this - // static somehow? - jclass m_jbytebuffer_clazz; // TODO(AR) we could cache this globally for the - // entire VM if we switch more APIs to use - // ByteBuffer // TODO(AR) could we make this - // static somehow? - jmethodID m_jcompare_mid; // TODO(AR) could we make this static somehow? - jmethodID m_jshortest_mid; // TODO(AR) could we make this static somehow? - jmethodID m_jshort_mid; // TODO(AR) could we make this static somehow? - jobject m_jcompare_buf_a; - jobject m_jcompare_buf_b; - jobject m_jshortest_buf_start; - jobject m_jshortest_buf_limit; - jobject m_jshort_buf_key; - ThreadLocalPtr* m_tl_buf_a; - ThreadLocalPtr* m_tl_buf_b; -}; -} // namespace ROCKSDB_NAMESPACE - -#endif // JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_ diff --git a/java/rocksjni/compression_options.cc b/java/rocksjni/compression_options.cc deleted file mode 100644 index 53f240560..000000000 --- a/java/rocksjni/compression_options.cc +++ /dev/null @@ -1,214 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::CompressionOptions. - -#include - -#include "include/org_rocksdb_CompressionOptions.h" -#include "rocksdb/advanced_options.h" -#include "rocksjni/cplusplus_to_java_convert.h" - -/* - * Class: org_rocksdb_CompressionOptions - * Method: newCompressionOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_CompressionOptions_newCompressionOptions(JNIEnv*, - jclass) { - const auto* opt = new ROCKSDB_NAMESPACE::CompressionOptions(); - return GET_CPLUSPLUS_POINTER(opt); -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: setWindowBits - * Signature: (JI)V - */ -void Java_org_rocksdb_CompressionOptions_setWindowBits(JNIEnv*, jobject, - jlong jhandle, - jint jwindow_bits) { - auto* opt = reinterpret_cast(jhandle); - opt->window_bits = static_cast(jwindow_bits); -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: windowBits - * Signature: (J)I - */ -jint Java_org_rocksdb_CompressionOptions_windowBits(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->window_bits); -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: setLevel - * Signature: (JI)V - */ -void Java_org_rocksdb_CompressionOptions_setLevel(JNIEnv*, jobject, - jlong jhandle, jint jlevel) { - auto* opt = reinterpret_cast(jhandle); - opt->level = static_cast(jlevel); -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: level - * Signature: (J)I - */ -jint Java_org_rocksdb_CompressionOptions_level(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->level); -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: setStrategy - * Signature: (JI)V - */ -void Java_org_rocksdb_CompressionOptions_setStrategy(JNIEnv*, jobject, - jlong jhandle, - jint jstrategy) { - auto* opt = reinterpret_cast(jhandle); - opt->strategy = static_cast(jstrategy); -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: strategy - * Signature: (J)I - */ -jint Java_org_rocksdb_CompressionOptions_strategy(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->strategy); -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: setMaxDictBytes - * Signature: (JI)V - */ -void Java_org_rocksdb_CompressionOptions_setMaxDictBytes(JNIEnv*, jobject, - jlong jhandle, - jint jmax_dict_bytes) { - auto* opt = reinterpret_cast(jhandle); - opt->max_dict_bytes = static_cast(jmax_dict_bytes); -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: maxDictBytes - * Signature: (J)I - */ -jint Java_org_rocksdb_CompressionOptions_maxDictBytes(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->max_dict_bytes); -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: setZstdMaxTrainBytes - * Signature: (JI)V - */ -void Java_org_rocksdb_CompressionOptions_setZstdMaxTrainBytes( - JNIEnv*, jobject, jlong jhandle, jint jzstd_max_train_bytes) { - auto* opt = reinterpret_cast(jhandle); - opt->zstd_max_train_bytes = static_cast(jzstd_max_train_bytes); -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: zstdMaxTrainBytes - * Signature: (J)I - */ -jint Java_org_rocksdb_CompressionOptions_zstdMaxTrainBytes(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->zstd_max_train_bytes); -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: setMaxDictBufferBytes - * Signature: (JJ)V - */ -void Java_org_rocksdb_CompressionOptions_setMaxDictBufferBytes( - JNIEnv*, jobject, jlong jhandle, jlong jmax_dict_buffer_bytes) { - auto* opt = reinterpret_cast(jhandle); - opt->max_dict_buffer_bytes = static_cast(jmax_dict_buffer_bytes); -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: maxDictBufferBytes - * Signature: (J)J - */ -jlong Java_org_rocksdb_CompressionOptions_maxDictBufferBytes(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->max_dict_buffer_bytes); -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: setZstdMaxTrainBytes - * Signature: (JZ)V - */ -void Java_org_rocksdb_CompressionOptions_setUseZstdDictTrainer( - JNIEnv*, jobject, jlong jhandle, jboolean juse_zstd_dict_trainer) { - auto* opt = reinterpret_cast(jhandle); - opt->use_zstd_dict_trainer = juse_zstd_dict_trainer == JNI_TRUE; -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: zstdMaxTrainBytes - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_CompressionOptions_useZstdDictTrainer(JNIEnv*, - jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->use_zstd_dict_trainer); -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: setEnabled - * Signature: (JZ)V - */ -void Java_org_rocksdb_CompressionOptions_setEnabled(JNIEnv*, jobject, - jlong jhandle, - jboolean jenabled) { - auto* opt = reinterpret_cast(jhandle); - opt->enabled = jenabled == JNI_TRUE; -} - -/* - * Class: org_rocksdb_CompressionOptions - * Method: enabled - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_CompressionOptions_enabled(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->enabled); -} -/* - * Class: org_rocksdb_CompressionOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_CompressionOptions_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - delete reinterpret_cast(jhandle); -} diff --git a/java/rocksjni/concurrent_task_limiter.cc b/java/rocksjni/concurrent_task_limiter.cc deleted file mode 100644 index 0b0b2d271..000000000 --- a/java/rocksjni/concurrent_task_limiter.cc +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "rocksdb/concurrent_task_limiter.h" - -#include - -#include -#include - -#include "include/org_rocksdb_ConcurrentTaskLimiterImpl.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_ConcurrentTaskLimiterImpl - * Method: newConcurrentTaskLimiterImpl0 - * Signature: (Ljava/lang/String;I)J - */ -jlong Java_org_rocksdb_ConcurrentTaskLimiterImpl_newConcurrentTaskLimiterImpl0( - JNIEnv* env, jclass, jstring jname, jint limit) { - jboolean has_exception = JNI_FALSE; - std::string name = - ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, jname, &has_exception); - if (JNI_TRUE == has_exception) { - return 0; - } - - auto* ptr = new std::shared_ptr( - ROCKSDB_NAMESPACE::NewConcurrentTaskLimiter(name, limit)); - - return GET_CPLUSPLUS_POINTER(ptr); -} - -/* - * Class: org_rocksdb_ConcurrentTaskLimiterImpl - * Method: name - * Signature: (J)Ljava/lang/String; - */ -jstring Java_org_rocksdb_ConcurrentTaskLimiterImpl_name(JNIEnv* env, jclass, - jlong handle) { - const auto& limiter = *reinterpret_cast< - std::shared_ptr*>(handle); - return ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &limiter->GetName()); -} - -/* - * Class: org_rocksdb_ConcurrentTaskLimiterImpl - * Method: setMaxOutstandingTask - * Signature: (JI)V - */ -void Java_org_rocksdb_ConcurrentTaskLimiterImpl_setMaxOutstandingTask( - JNIEnv*, jclass, jlong handle, jint max_outstanding_task) { - const auto& limiter = *reinterpret_cast< - std::shared_ptr*>(handle); - limiter->SetMaxOutstandingTask(static_cast(max_outstanding_task)); -} - -/* - * Class: org_rocksdb_ConcurrentTaskLimiterImpl - * Method: resetMaxOutstandingTask - * Signature: (J)V - */ -void Java_org_rocksdb_ConcurrentTaskLimiterImpl_resetMaxOutstandingTask( - JNIEnv*, jclass, jlong handle) { - const auto& limiter = *reinterpret_cast< - std::shared_ptr*>(handle); - limiter->ResetMaxOutstandingTask(); -} - -/* - * Class: org_rocksdb_ConcurrentTaskLimiterImpl - * Method: outstandingTask - * Signature: (J)I - */ -jint Java_org_rocksdb_ConcurrentTaskLimiterImpl_outstandingTask(JNIEnv*, jclass, - jlong handle) { - const auto& limiter = *reinterpret_cast< - std::shared_ptr*>(handle); - return static_cast(limiter->GetOutstandingTask()); -} - -/* - * Class: org_rocksdb_ConcurrentTaskLimiterImpl - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_ConcurrentTaskLimiterImpl_disposeInternal(JNIEnv*, - jobject, - jlong jhandle) { - auto* ptr = reinterpret_cast< - std::shared_ptr*>(jhandle); - delete ptr; // delete std::shared_ptr -} diff --git a/java/rocksjni/config_options.cc b/java/rocksjni/config_options.cc deleted file mode 100644 index 55a9cbb66..000000000 --- a/java/rocksjni/config_options.cc +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling C++ ROCKSDB_NAMESPACE::ConfigOptions methods -// from Java side. - -#include - -#include "include/org_rocksdb_ConfigOptions.h" -#include "rocksdb/convenience.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_ConfigOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_ConfigOptions_disposeInternal(JNIEnv *, jobject, - jlong jhandle) { - auto *co = reinterpret_cast(jhandle); - assert(co != nullptr); - delete co; -} - -/* - * Class: org_rocksdb_ConfigOptions - * Method: newConfigOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_ConfigOptions_newConfigOptions(JNIEnv *, jclass) { - auto *cfg_opt = new ROCKSDB_NAMESPACE::ConfigOptions(); - return GET_CPLUSPLUS_POINTER(cfg_opt); -} - -/* - * Class: org_rocksdb_ConfigOptions - * Method: setEnv - * Signature: (JJ;)V - */ -void Java_org_rocksdb_ConfigOptions_setEnv(JNIEnv *, jclass, jlong handle, - jlong rocksdb_env_handle) { - auto *cfg_opt = reinterpret_cast(handle); - auto *rocksdb_env = - reinterpret_cast(rocksdb_env_handle); - cfg_opt->env = rocksdb_env; -} - -/* - * Class: org_rocksdb_ConfigOptions - * Method: setDelimiter - * Signature: (JLjava/lang/String;)V - */ -void Java_org_rocksdb_ConfigOptions_setDelimiter(JNIEnv *env, jclass, - jlong handle, jstring s) { - auto *cfg_opt = reinterpret_cast(handle); - const char *delim = env->GetStringUTFChars(s, nullptr); - if (delim == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - cfg_opt->delimiter = delim; - env->ReleaseStringUTFChars(s, delim); -} - -/* - * Class: org_rocksdb_ConfigOptions - * Method: setIgnoreUnknownOptions - * Signature: (JZ)V - */ -void Java_org_rocksdb_ConfigOptions_setIgnoreUnknownOptions(JNIEnv *, jclass, - jlong handle, - jboolean b) { - auto *cfg_opt = reinterpret_cast(handle); - cfg_opt->ignore_unknown_options = static_cast(b); -} - -/* - * Class: org_rocksdb_ConfigOptions - * Method: setInputStringsEscaped - * Signature: (JZ)V - */ -void Java_org_rocksdb_ConfigOptions_setInputStringsEscaped(JNIEnv *, jclass, - jlong handle, - jboolean b) { - auto *cfg_opt = reinterpret_cast(handle); - cfg_opt->input_strings_escaped = static_cast(b); -} - -/* - * Class: org_rocksdb_ConfigOptions - * Method: setSanityLevel - * Signature: (JI)V - */ -void Java_org_rocksdb_ConfigOptions_setSanityLevel(JNIEnv *, jclass, - jlong handle, jbyte level) { - auto *cfg_opt = reinterpret_cast(handle); - cfg_opt->sanity_level = - ROCKSDB_NAMESPACE::SanityLevelJni::toCppSanityLevel(level); -} diff --git a/java/rocksjni/cplusplus_to_java_convert.h b/java/rocksjni/cplusplus_to_java_convert.h deleted file mode 100644 index 0eea6fa2c..000000000 --- a/java/rocksjni/cplusplus_to_java_convert.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#pragma once - -/* - * This macro is used for 32 bit OS. In 32 bit OS, the result number is a - negative number if we use reinterpret_cast(pointer). - * For example, jlong ptr = reinterpret_cast(pointer), ptr is a negative - number in 32 bit OS. - * If we check ptr using ptr > 0, it fails. For example, the following code is - not correct. - * if (jblock_cache_handle > 0) { - std::shared_ptr *pCache = - reinterpret_cast *>( - jblock_cache_handle); - options.block_cache = *pCache; - } - * But the result number is positive number if we do - reinterpret_cast(pointer) first and then cast it to jlong. size_t is 4 - bytes long in 32 bit OS and 8 bytes long in 64 bit OS. - static_cast(reinterpret_cast(_pointer)) is also working in 64 - bit OS. - * - * We don't need an opposite cast because it works from jlong to c++ pointer in - both 32 bit and 64 bit OS. - * For example, the following code is working in both 32 bit and 64 bit OS. - jblock_cache_handle is jlong. - * std::shared_ptr *pCache = - reinterpret_cast *>( - jblock_cache_handle); -*/ - -#define GET_CPLUSPLUS_POINTER(_pointer) \ - static_cast(reinterpret_cast(_pointer)) diff --git a/java/rocksjni/env.cc b/java/rocksjni/env.cc deleted file mode 100644 index bb739fe2b..000000000 --- a/java/rocksjni/env.cc +++ /dev/null @@ -1,205 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling c++ ROCKSDB_NAMESPACE::Env methods from Java side. - -#include "rocksdb/env.h" - -#include - -#include - -#include "include/org_rocksdb_Env.h" -#include "include/org_rocksdb_RocksEnv.h" -#include "include/org_rocksdb_RocksMemEnv.h" -#include "include/org_rocksdb_TimedEnv.h" -#include "portal.h" -#include "rocksjni/cplusplus_to_java_convert.h" - -/* - * Class: org_rocksdb_Env - * Method: getDefaultEnvInternal - * Signature: ()J - */ -jlong Java_org_rocksdb_Env_getDefaultEnvInternal(JNIEnv*, jclass) { - return GET_CPLUSPLUS_POINTER(ROCKSDB_NAMESPACE::Env::Default()); -} - -/* - * Class: org_rocksdb_RocksEnv - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_RocksEnv_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - auto* e = reinterpret_cast(jhandle); - assert(e != nullptr); - delete e; -} - -/* - * Class: org_rocksdb_Env - * Method: setBackgroundThreads - * Signature: (JIB)V - */ -void Java_org_rocksdb_Env_setBackgroundThreads(JNIEnv*, jobject, jlong jhandle, - jint jnum, - jbyte jpriority_value) { - auto* rocks_env = reinterpret_cast(jhandle); - rocks_env->SetBackgroundThreads( - static_cast(jnum), - ROCKSDB_NAMESPACE::PriorityJni::toCppPriority(jpriority_value)); -} - -/* - * Class: org_rocksdb_Env - * Method: getBackgroundThreads - * Signature: (JB)I - */ -jint Java_org_rocksdb_Env_getBackgroundThreads(JNIEnv*, jobject, jlong jhandle, - jbyte jpriority_value) { - auto* rocks_env = reinterpret_cast(jhandle); - const int num = rocks_env->GetBackgroundThreads( - ROCKSDB_NAMESPACE::PriorityJni::toCppPriority(jpriority_value)); - return static_cast(num); -} - -/* - * Class: org_rocksdb_Env - * Method: getThreadPoolQueueLen - * Signature: (JB)I - */ -jint Java_org_rocksdb_Env_getThreadPoolQueueLen(JNIEnv*, jobject, jlong jhandle, - jbyte jpriority_value) { - auto* rocks_env = reinterpret_cast(jhandle); - const int queue_len = rocks_env->GetThreadPoolQueueLen( - ROCKSDB_NAMESPACE::PriorityJni::toCppPriority(jpriority_value)); - return static_cast(queue_len); -} - -/* - * Class: org_rocksdb_Env - * Method: incBackgroundThreadsIfNeeded - * Signature: (JIB)V - */ -void Java_org_rocksdb_Env_incBackgroundThreadsIfNeeded(JNIEnv*, jobject, - jlong jhandle, jint jnum, - jbyte jpriority_value) { - auto* rocks_env = reinterpret_cast(jhandle); - rocks_env->IncBackgroundThreadsIfNeeded( - static_cast(jnum), - ROCKSDB_NAMESPACE::PriorityJni::toCppPriority(jpriority_value)); -} - -/* - * Class: org_rocksdb_Env - * Method: lowerThreadPoolIOPriority - * Signature: (JB)V - */ -void Java_org_rocksdb_Env_lowerThreadPoolIOPriority(JNIEnv*, jobject, - jlong jhandle, - jbyte jpriority_value) { - auto* rocks_env = reinterpret_cast(jhandle); - rocks_env->LowerThreadPoolIOPriority( - ROCKSDB_NAMESPACE::PriorityJni::toCppPriority(jpriority_value)); -} - -/* - * Class: org_rocksdb_Env - * Method: lowerThreadPoolCPUPriority - * Signature: (JB)V - */ -void Java_org_rocksdb_Env_lowerThreadPoolCPUPriority(JNIEnv*, jobject, - jlong jhandle, - jbyte jpriority_value) { - auto* rocks_env = reinterpret_cast(jhandle); - rocks_env->LowerThreadPoolCPUPriority( - ROCKSDB_NAMESPACE::PriorityJni::toCppPriority(jpriority_value)); -} - -/* - * Class: org_rocksdb_Env - * Method: getThreadList - * Signature: (J)[Lorg/rocksdb/ThreadStatus; - */ -jobjectArray Java_org_rocksdb_Env_getThreadList(JNIEnv* env, jobject, - jlong jhandle) { - auto* rocks_env = reinterpret_cast(jhandle); - std::vector thread_status; - ROCKSDB_NAMESPACE::Status s = rocks_env->GetThreadList(&thread_status); - if (!s.ok()) { - // error, throw exception - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } - - // object[] - const jsize len = static_cast(thread_status.size()); - jobjectArray jthread_status = env->NewObjectArray( - len, ROCKSDB_NAMESPACE::ThreadStatusJni::getJClass(env), nullptr); - if (jthread_status == nullptr) { - // an exception occurred - return nullptr; - } - for (jsize i = 0; i < len; ++i) { - jobject jts = - ROCKSDB_NAMESPACE::ThreadStatusJni::construct(env, &(thread_status[i])); - env->SetObjectArrayElement(jthread_status, i, jts); - if (env->ExceptionCheck()) { - // exception occurred - env->DeleteLocalRef(jthread_status); - return nullptr; - } - } - - return jthread_status; -} - -/* - * Class: org_rocksdb_RocksMemEnv - * Method: createMemEnv - * Signature: (J)J - */ -jlong Java_org_rocksdb_RocksMemEnv_createMemEnv(JNIEnv*, jclass, - jlong jbase_env_handle) { - auto* base_env = reinterpret_cast(jbase_env_handle); - return GET_CPLUSPLUS_POINTER(ROCKSDB_NAMESPACE::NewMemEnv(base_env)); -} - -/* - * Class: org_rocksdb_RocksMemEnv - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_RocksMemEnv_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - auto* e = reinterpret_cast(jhandle); - assert(e != nullptr); - delete e; -} - -/* - * Class: org_rocksdb_TimedEnv - * Method: createTimedEnv - * Signature: (J)J - */ -jlong Java_org_rocksdb_TimedEnv_createTimedEnv(JNIEnv*, jclass, - jlong jbase_env_handle) { - auto* base_env = reinterpret_cast(jbase_env_handle); - return GET_CPLUSPLUS_POINTER(ROCKSDB_NAMESPACE::NewTimedEnv(base_env)); -} - -/* - * Class: org_rocksdb_TimedEnv - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_TimedEnv_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - auto* e = reinterpret_cast(jhandle); - assert(e != nullptr); - delete e; -} diff --git a/java/rocksjni/env_options.cc b/java/rocksjni/env_options.cc deleted file mode 100644 index 3237e2775..000000000 --- a/java/rocksjni/env_options.cc +++ /dev/null @@ -1,305 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling C++ ROCKSDB_NAMESPACE::EnvOptions methods -// from Java side. - -#include - -#include "include/org_rocksdb_EnvOptions.h" -#include "rocksdb/env.h" -#include "rocksjni/cplusplus_to_java_convert.h" - -#define ENV_OPTIONS_SET_BOOL(_jhandle, _opt) \ - reinterpret_cast(_jhandle)->_opt = \ - static_cast(_opt) - -#define ENV_OPTIONS_SET_SIZE_T(_jhandle, _opt) \ - reinterpret_cast(_jhandle)->_opt = \ - static_cast(_opt) - -#define ENV_OPTIONS_SET_UINT64_T(_jhandle, _opt) \ - reinterpret_cast(_jhandle)->_opt = \ - static_cast(_opt) - -#define ENV_OPTIONS_GET(_jhandle, _opt) \ - reinterpret_cast(_jhandle)->_opt - -/* - * Class: org_rocksdb_EnvOptions - * Method: newEnvOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_EnvOptions_newEnvOptions__(JNIEnv *, jclass) { - auto *env_opt = new ROCKSDB_NAMESPACE::EnvOptions(); - return GET_CPLUSPLUS_POINTER(env_opt); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: newEnvOptions - * Signature: (J)J - */ -jlong Java_org_rocksdb_EnvOptions_newEnvOptions__J(JNIEnv *, jclass, - jlong jdboptions_handle) { - auto *db_options = - reinterpret_cast(jdboptions_handle); - auto *env_opt = new ROCKSDB_NAMESPACE::EnvOptions(*db_options); - return GET_CPLUSPLUS_POINTER(env_opt); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_EnvOptions_disposeInternal(JNIEnv *, jobject, - jlong jhandle) { - auto *eo = reinterpret_cast(jhandle); - assert(eo != nullptr); - delete eo; -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: setUseMmapReads - * Signature: (JZ)V - */ -void Java_org_rocksdb_EnvOptions_setUseMmapReads(JNIEnv *, jobject, - jlong jhandle, - jboolean use_mmap_reads) { - ENV_OPTIONS_SET_BOOL(jhandle, use_mmap_reads); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: useMmapReads - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_EnvOptions_useMmapReads(JNIEnv *, jobject, - jlong jhandle) { - return ENV_OPTIONS_GET(jhandle, use_mmap_reads); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: setUseMmapWrites - * Signature: (JZ)V - */ -void Java_org_rocksdb_EnvOptions_setUseMmapWrites(JNIEnv *, jobject, - jlong jhandle, - jboolean use_mmap_writes) { - ENV_OPTIONS_SET_BOOL(jhandle, use_mmap_writes); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: useMmapWrites - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_EnvOptions_useMmapWrites(JNIEnv *, jobject, - jlong jhandle) { - return ENV_OPTIONS_GET(jhandle, use_mmap_writes); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: setUseDirectReads - * Signature: (JZ)V - */ -void Java_org_rocksdb_EnvOptions_setUseDirectReads(JNIEnv *, jobject, - jlong jhandle, - jboolean use_direct_reads) { - ENV_OPTIONS_SET_BOOL(jhandle, use_direct_reads); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: useDirectReads - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_EnvOptions_useDirectReads(JNIEnv *, jobject, - jlong jhandle) { - return ENV_OPTIONS_GET(jhandle, use_direct_reads); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: setUseDirectWrites - * Signature: (JZ)V - */ -void Java_org_rocksdb_EnvOptions_setUseDirectWrites( - JNIEnv *, jobject, jlong jhandle, jboolean use_direct_writes) { - ENV_OPTIONS_SET_BOOL(jhandle, use_direct_writes); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: useDirectWrites - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_EnvOptions_useDirectWrites(JNIEnv *, jobject, - jlong jhandle) { - return ENV_OPTIONS_GET(jhandle, use_direct_writes); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: setAllowFallocate - * Signature: (JZ)V - */ -void Java_org_rocksdb_EnvOptions_setAllowFallocate(JNIEnv *, jobject, - jlong jhandle, - jboolean allow_fallocate) { - ENV_OPTIONS_SET_BOOL(jhandle, allow_fallocate); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: allowFallocate - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_EnvOptions_allowFallocate(JNIEnv *, jobject, - jlong jhandle) { - return ENV_OPTIONS_GET(jhandle, allow_fallocate); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: setSetFdCloexec - * Signature: (JZ)V - */ -void Java_org_rocksdb_EnvOptions_setSetFdCloexec(JNIEnv *, jobject, - jlong jhandle, - jboolean set_fd_cloexec) { - ENV_OPTIONS_SET_BOOL(jhandle, set_fd_cloexec); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: setFdCloexec - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_EnvOptions_setFdCloexec(JNIEnv *, jobject, - jlong jhandle) { - return ENV_OPTIONS_GET(jhandle, set_fd_cloexec); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: setBytesPerSync - * Signature: (JJ)V - */ -void Java_org_rocksdb_EnvOptions_setBytesPerSync(JNIEnv *, jobject, - jlong jhandle, - jlong bytes_per_sync) { - ENV_OPTIONS_SET_UINT64_T(jhandle, bytes_per_sync); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: bytesPerSync - * Signature: (J)J - */ -jlong Java_org_rocksdb_EnvOptions_bytesPerSync(JNIEnv *, jobject, - jlong jhandle) { - return ENV_OPTIONS_GET(jhandle, bytes_per_sync); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: setFallocateWithKeepSize - * Signature: (JZ)V - */ -void Java_org_rocksdb_EnvOptions_setFallocateWithKeepSize( - JNIEnv *, jobject, jlong jhandle, jboolean fallocate_with_keep_size) { - ENV_OPTIONS_SET_BOOL(jhandle, fallocate_with_keep_size); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: fallocateWithKeepSize - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_EnvOptions_fallocateWithKeepSize(JNIEnv *, jobject, - jlong jhandle) { - return ENV_OPTIONS_GET(jhandle, fallocate_with_keep_size); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: setCompactionReadaheadSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_EnvOptions_setCompactionReadaheadSize( - JNIEnv *, jobject, jlong jhandle, jlong compaction_readahead_size) { - ENV_OPTIONS_SET_SIZE_T(jhandle, compaction_readahead_size); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: compactionReadaheadSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_EnvOptions_compactionReadaheadSize(JNIEnv *, jobject, - jlong jhandle) { - return ENV_OPTIONS_GET(jhandle, compaction_readahead_size); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: setRandomAccessMaxBufferSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_EnvOptions_setRandomAccessMaxBufferSize( - JNIEnv *, jobject, jlong jhandle, jlong random_access_max_buffer_size) { - ENV_OPTIONS_SET_SIZE_T(jhandle, random_access_max_buffer_size); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: randomAccessMaxBufferSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_EnvOptions_randomAccessMaxBufferSize(JNIEnv *, jobject, - jlong jhandle) { - return ENV_OPTIONS_GET(jhandle, random_access_max_buffer_size); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: setWritableFileMaxBufferSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_EnvOptions_setWritableFileMaxBufferSize( - JNIEnv *, jobject, jlong jhandle, jlong writable_file_max_buffer_size) { - ENV_OPTIONS_SET_SIZE_T(jhandle, writable_file_max_buffer_size); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: writableFileMaxBufferSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_EnvOptions_writableFileMaxBufferSize(JNIEnv *, jobject, - jlong jhandle) { - return ENV_OPTIONS_GET(jhandle, writable_file_max_buffer_size); -} - -/* - * Class: org_rocksdb_EnvOptions - * Method: setRateLimiter - * Signature: (JJ)V - */ -void Java_org_rocksdb_EnvOptions_setRateLimiter(JNIEnv *, jobject, - jlong jhandle, - jlong rl_handle) { - auto *sptr_rate_limiter = - reinterpret_cast *>( - rl_handle); - auto *env_opt = reinterpret_cast(jhandle); - env_opt->rate_limiter = sptr_rate_limiter->get(); -} diff --git a/java/rocksjni/event_listener.cc b/java/rocksjni/event_listener.cc deleted file mode 100644 index 965932c9c..000000000 --- a/java/rocksjni/event_listener.cc +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::EventListener. - -#include - -#include - -#include "include/org_rocksdb_AbstractEventListener.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/event_listener_jnicallback.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_AbstractEventListener - * Method: createNewEventListener - * Signature: (J)J - */ -jlong Java_org_rocksdb_AbstractEventListener_createNewEventListener( - JNIEnv* env, jobject jobj, jlong jenabled_event_callback_values) { - auto enabled_event_callbacks = - ROCKSDB_NAMESPACE::EnabledEventCallbackJni::toCppEnabledEventCallbacks( - jenabled_event_callback_values); - auto* sptr_event_listener = - new std::shared_ptr( - new ROCKSDB_NAMESPACE::EventListenerJniCallback( - env, jobj, enabled_event_callbacks)); - return GET_CPLUSPLUS_POINTER(sptr_event_listener); -} - -/* - * Class: org_rocksdb_AbstractEventListener - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_AbstractEventListener_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - delete reinterpret_cast*>( - jhandle); -} diff --git a/java/rocksjni/event_listener_jnicallback.cc b/java/rocksjni/event_listener_jnicallback.cc deleted file mode 100644 index 342d938b4..000000000 --- a/java/rocksjni/event_listener_jnicallback.cc +++ /dev/null @@ -1,502 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::EventListener. - -#include "rocksjni/event_listener_jnicallback.h" - -#include "rocksjni/portal.h" - -namespace ROCKSDB_NAMESPACE { -EventListenerJniCallback::EventListenerJniCallback( - JNIEnv* env, jobject jevent_listener, - const std::set& enabled_event_callbacks) - : JniCallback(env, jevent_listener), - m_enabled_event_callbacks(enabled_event_callbacks) { - InitCallbackMethodId( - m_on_flush_completed_proxy_mid, EnabledEventCallback::ON_FLUSH_COMPLETED, - env, AbstractEventListenerJni::getOnFlushCompletedProxyMethodId); - - InitCallbackMethodId(m_on_flush_begin_proxy_mid, - EnabledEventCallback::ON_FLUSH_BEGIN, env, - AbstractEventListenerJni::getOnFlushBeginProxyMethodId); - - InitCallbackMethodId(m_on_table_file_deleted_mid, - EnabledEventCallback::ON_TABLE_FILE_DELETED, env, - AbstractEventListenerJni::getOnTableFileDeletedMethodId); - - InitCallbackMethodId( - m_on_compaction_begin_proxy_mid, - EnabledEventCallback::ON_COMPACTION_BEGIN, env, - AbstractEventListenerJni::getOnCompactionBeginProxyMethodId); - - InitCallbackMethodId( - m_on_compaction_completed_proxy_mid, - EnabledEventCallback::ON_COMPACTION_COMPLETED, env, - AbstractEventListenerJni::getOnCompactionCompletedProxyMethodId); - - InitCallbackMethodId(m_on_table_file_created_mid, - EnabledEventCallback::ON_TABLE_FILE_CREATED, env, - AbstractEventListenerJni::getOnTableFileCreatedMethodId); - - InitCallbackMethodId( - m_on_table_file_creation_started_mid, - EnabledEventCallback::ON_TABLE_FILE_CREATION_STARTED, env, - AbstractEventListenerJni::getOnTableFileCreationStartedMethodId); - - InitCallbackMethodId(m_on_mem_table_sealed_mid, - EnabledEventCallback::ON_MEMTABLE_SEALED, env, - AbstractEventListenerJni::getOnMemTableSealedMethodId); - - InitCallbackMethodId( - m_on_column_family_handle_deletion_started_mid, - EnabledEventCallback::ON_COLUMN_FAMILY_HANDLE_DELETION_STARTED, env, - AbstractEventListenerJni::getOnColumnFamilyHandleDeletionStartedMethodId); - - InitCallbackMethodId( - m_on_external_file_ingested_proxy_mid, - EnabledEventCallback::ON_EXTERNAL_FILE_INGESTED, env, - AbstractEventListenerJni::getOnExternalFileIngestedProxyMethodId); - - InitCallbackMethodId( - m_on_background_error_proxy_mid, - EnabledEventCallback::ON_BACKGROUND_ERROR, env, - AbstractEventListenerJni::getOnBackgroundErrorProxyMethodId); - - InitCallbackMethodId( - m_on_stall_conditions_changed_mid, - EnabledEventCallback::ON_STALL_CONDITIONS_CHANGED, env, - AbstractEventListenerJni::getOnStallConditionsChangedMethodId); - - InitCallbackMethodId(m_on_file_read_finish_mid, - EnabledEventCallback::ON_FILE_READ_FINISH, env, - AbstractEventListenerJni::getOnFileReadFinishMethodId); - - InitCallbackMethodId(m_on_file_write_finish_mid, - EnabledEventCallback::ON_FILE_WRITE_FINISH, env, - AbstractEventListenerJni::getOnFileWriteFinishMethodId); - - InitCallbackMethodId(m_on_file_flush_finish_mid, - EnabledEventCallback::ON_FILE_FLUSH_FINISH, env, - AbstractEventListenerJni::getOnFileFlushFinishMethodId); - - InitCallbackMethodId(m_on_file_sync_finish_mid, - EnabledEventCallback::ON_FILE_SYNC_FINISH, env, - AbstractEventListenerJni::getOnFileSyncFinishMethodId); - - InitCallbackMethodId( - m_on_file_range_sync_finish_mid, - EnabledEventCallback::ON_FILE_RANGE_SYNC_FINISH, env, - AbstractEventListenerJni::getOnFileRangeSyncFinishMethodId); - - InitCallbackMethodId( - m_on_file_truncate_finish_mid, - EnabledEventCallback::ON_FILE_TRUNCATE_FINISH, env, - AbstractEventListenerJni::getOnFileTruncateFinishMethodId); - - InitCallbackMethodId(m_on_file_close_finish_mid, - EnabledEventCallback::ON_FILE_CLOSE_FINISH, env, - AbstractEventListenerJni::getOnFileCloseFinishMethodId); - - InitCallbackMethodId( - m_should_be_notified_on_file_io, - EnabledEventCallback::SHOULD_BE_NOTIFIED_ON_FILE_IO, env, - AbstractEventListenerJni::getShouldBeNotifiedOnFileIOMethodId); - - InitCallbackMethodId( - m_on_error_recovery_begin_proxy_mid, - EnabledEventCallback::ON_ERROR_RECOVERY_BEGIN, env, - AbstractEventListenerJni::getOnErrorRecoveryBeginProxyMethodId); - - InitCallbackMethodId( - m_on_error_recovery_completed_mid, - EnabledEventCallback::ON_ERROR_RECOVERY_COMPLETED, env, - AbstractEventListenerJni::getOnErrorRecoveryCompletedMethodId); -} - -EventListenerJniCallback::~EventListenerJniCallback() {} - -void EventListenerJniCallback::OnFlushCompleted( - DB* db, const FlushJobInfo& flush_job_info) { - if (m_on_flush_completed_proxy_mid == nullptr) { - return; - } - - JNIEnv* env; - jboolean attached_thread; - jobject jflush_job_info = SetupCallbackInvocation( - env, attached_thread, flush_job_info, - FlushJobInfoJni::fromCppFlushJobInfo); - - if (jflush_job_info != nullptr) { - env->CallVoidMethod(m_jcallback_obj, m_on_flush_completed_proxy_mid, - reinterpret_cast(db), jflush_job_info); - } - - CleanupCallbackInvocation(env, attached_thread, {&jflush_job_info}); -} - -void EventListenerJniCallback::OnFlushBegin( - DB* db, const FlushJobInfo& flush_job_info) { - if (m_on_flush_begin_proxy_mid == nullptr) { - return; - } - - JNIEnv* env; - jboolean attached_thread; - jobject jflush_job_info = SetupCallbackInvocation( - env, attached_thread, flush_job_info, - FlushJobInfoJni::fromCppFlushJobInfo); - - if (jflush_job_info != nullptr) { - env->CallVoidMethod(m_jcallback_obj, m_on_flush_begin_proxy_mid, - reinterpret_cast(db), jflush_job_info); - } - - CleanupCallbackInvocation(env, attached_thread, {&jflush_job_info}); -} - -void EventListenerJniCallback::OnTableFileDeleted( - const TableFileDeletionInfo& info) { - if (m_on_table_file_deleted_mid == nullptr) { - return; - } - - JNIEnv* env; - jboolean attached_thread; - jobject jdeletion_info = SetupCallbackInvocation( - env, attached_thread, info, - TableFileDeletionInfoJni::fromCppTableFileDeletionInfo); - - if (jdeletion_info != nullptr) { - env->CallVoidMethod(m_jcallback_obj, m_on_table_file_deleted_mid, - jdeletion_info); - } - - CleanupCallbackInvocation(env, attached_thread, {&jdeletion_info}); -} - -void EventListenerJniCallback::OnCompactionBegin(DB* db, - const CompactionJobInfo& ci) { - if (m_on_compaction_begin_proxy_mid == nullptr) { - return; - } - - JNIEnv* env; - jboolean attached_thread; - jobject jcompaction_job_info = SetupCallbackInvocation( - env, attached_thread, ci, CompactionJobInfoJni::fromCppCompactionJobInfo); - - if (jcompaction_job_info != nullptr) { - env->CallVoidMethod(m_jcallback_obj, m_on_compaction_begin_proxy_mid, - reinterpret_cast(db), jcompaction_job_info); - } - - CleanupCallbackInvocation(env, attached_thread, {&jcompaction_job_info}); -} - -void EventListenerJniCallback::OnCompactionCompleted( - DB* db, const CompactionJobInfo& ci) { - if (m_on_compaction_completed_proxy_mid == nullptr) { - return; - } - - JNIEnv* env; - jboolean attached_thread; - jobject jcompaction_job_info = SetupCallbackInvocation( - env, attached_thread, ci, CompactionJobInfoJni::fromCppCompactionJobInfo); - - if (jcompaction_job_info != nullptr) { - env->CallVoidMethod(m_jcallback_obj, m_on_compaction_completed_proxy_mid, - reinterpret_cast(db), jcompaction_job_info); - } - - CleanupCallbackInvocation(env, attached_thread, {&jcompaction_job_info}); -} - -void EventListenerJniCallback::OnTableFileCreated( - const TableFileCreationInfo& info) { - if (m_on_table_file_created_mid == nullptr) { - return; - } - - JNIEnv* env; - jboolean attached_thread; - jobject jfile_creation_info = SetupCallbackInvocation( - env, attached_thread, info, - TableFileCreationInfoJni::fromCppTableFileCreationInfo); - - if (jfile_creation_info != nullptr) { - env->CallVoidMethod(m_jcallback_obj, m_on_table_file_created_mid, - jfile_creation_info); - } - - CleanupCallbackInvocation(env, attached_thread, {&jfile_creation_info}); -} - -void EventListenerJniCallback::OnTableFileCreationStarted( - const TableFileCreationBriefInfo& info) { - if (m_on_table_file_creation_started_mid == nullptr) { - return; - } - - JNIEnv* env; - jboolean attached_thread; - jobject jcreation_brief_info = - SetupCallbackInvocation( - env, attached_thread, info, - TableFileCreationBriefInfoJni::fromCppTableFileCreationBriefInfo); - - if (jcreation_brief_info != nullptr) { - env->CallVoidMethod(m_jcallback_obj, m_on_table_file_creation_started_mid, - jcreation_brief_info); - } - - CleanupCallbackInvocation(env, attached_thread, {&jcreation_brief_info}); -} - -void EventListenerJniCallback::OnMemTableSealed(const MemTableInfo& info) { - if (m_on_mem_table_sealed_mid == nullptr) { - return; - } - - JNIEnv* env; - jboolean attached_thread; - jobject jmem_table_info = SetupCallbackInvocation( - env, attached_thread, info, MemTableInfoJni::fromCppMemTableInfo); - - if (jmem_table_info != nullptr) { - env->CallVoidMethod(m_jcallback_obj, m_on_mem_table_sealed_mid, - jmem_table_info); - } - - CleanupCallbackInvocation(env, attached_thread, {&jmem_table_info}); -} - -void EventListenerJniCallback::OnColumnFamilyHandleDeletionStarted( - ColumnFamilyHandle* handle) { - if (m_on_column_family_handle_deletion_started_mid == nullptr) { - return; - } - - JNIEnv* env; - jboolean attached_thread; - jobject jcf_handle = SetupCallbackInvocation( - env, attached_thread, *handle, - ColumnFamilyHandleJni::fromCppColumnFamilyHandle); - - if (jcf_handle != nullptr) { - env->CallVoidMethod(m_jcallback_obj, - m_on_column_family_handle_deletion_started_mid, - jcf_handle); - } - - CleanupCallbackInvocation(env, attached_thread, {&jcf_handle}); -} - -void EventListenerJniCallback::OnExternalFileIngested( - DB* db, const ExternalFileIngestionInfo& info) { - if (m_on_external_file_ingested_proxy_mid == nullptr) { - return; - } - - JNIEnv* env; - jboolean attached_thread; - jobject jingestion_info = SetupCallbackInvocation( - env, attached_thread, info, - ExternalFileIngestionInfoJni::fromCppExternalFileIngestionInfo); - - if (jingestion_info != nullptr) { - env->CallVoidMethod(m_jcallback_obj, m_on_external_file_ingested_proxy_mid, - reinterpret_cast(db), jingestion_info); - } - - CleanupCallbackInvocation(env, attached_thread, {&jingestion_info}); -} - -void EventListenerJniCallback::OnBackgroundError(BackgroundErrorReason reason, - Status* bg_error) { - if (m_on_background_error_proxy_mid == nullptr) { - return; - } - - JNIEnv* env; - jboolean attached_thread; - jobject jstatus = SetupCallbackInvocation( - env, attached_thread, *bg_error, StatusJni::construct); - - if (jstatus != nullptr) { - env->CallVoidMethod(m_jcallback_obj, m_on_background_error_proxy_mid, - static_cast(reason), jstatus); - } - - CleanupCallbackInvocation(env, attached_thread, {&jstatus}); -} - -void EventListenerJniCallback::OnStallConditionsChanged( - const WriteStallInfo& info) { - if (m_on_stall_conditions_changed_mid == nullptr) { - return; - } - - JNIEnv* env; - jboolean attached_thread; - jobject jwrite_stall_info = SetupCallbackInvocation( - env, attached_thread, info, WriteStallInfoJni::fromCppWriteStallInfo); - - if (jwrite_stall_info != nullptr) { - env->CallVoidMethod(m_jcallback_obj, m_on_stall_conditions_changed_mid, - jwrite_stall_info); - } - - CleanupCallbackInvocation(env, attached_thread, {&jwrite_stall_info}); -} - -void EventListenerJniCallback::OnFileReadFinish(const FileOperationInfo& info) { - OnFileOperation(m_on_file_read_finish_mid, info); -} - -void EventListenerJniCallback::OnFileWriteFinish( - const FileOperationInfo& info) { - OnFileOperation(m_on_file_write_finish_mid, info); -} - -void EventListenerJniCallback::OnFileFlushFinish( - const FileOperationInfo& info) { - OnFileOperation(m_on_file_flush_finish_mid, info); -} - -void EventListenerJniCallback::OnFileSyncFinish(const FileOperationInfo& info) { - OnFileOperation(m_on_file_sync_finish_mid, info); -} - -void EventListenerJniCallback::OnFileRangeSyncFinish( - const FileOperationInfo& info) { - OnFileOperation(m_on_file_range_sync_finish_mid, info); -} - -void EventListenerJniCallback::OnFileTruncateFinish( - const FileOperationInfo& info) { - OnFileOperation(m_on_file_truncate_finish_mid, info); -} - -void EventListenerJniCallback::OnFileCloseFinish( - const FileOperationInfo& info) { - OnFileOperation(m_on_file_close_finish_mid, info); -} - -bool EventListenerJniCallback::ShouldBeNotifiedOnFileIO() { - if (m_should_be_notified_on_file_io == nullptr) { - return false; - } - - jboolean attached_thread = JNI_FALSE; - JNIEnv* env = getJniEnv(&attached_thread); - assert(env != nullptr); - - jboolean jshould_be_notified = - env->CallBooleanMethod(m_jcallback_obj, m_should_be_notified_on_file_io); - - CleanupCallbackInvocation(env, attached_thread, {}); - - return static_cast(jshould_be_notified); -} - -void EventListenerJniCallback::OnErrorRecoveryBegin( - BackgroundErrorReason reason, Status bg_error, bool* auto_recovery) { - if (m_on_error_recovery_begin_proxy_mid == nullptr) { - return; - } - - JNIEnv* env; - jboolean attached_thread; - jobject jbg_error = SetupCallbackInvocation( - env, attached_thread, bg_error, StatusJni::construct); - - if (jbg_error != nullptr) { - jboolean jauto_recovery = env->CallBooleanMethod( - m_jcallback_obj, m_on_error_recovery_begin_proxy_mid, - static_cast(reason), jbg_error); - *auto_recovery = jauto_recovery == JNI_TRUE; - } - - CleanupCallbackInvocation(env, attached_thread, {&jbg_error}); -} - -void EventListenerJniCallback::OnErrorRecoveryCompleted(Status old_bg_error) { - if (m_on_error_recovery_completed_mid == nullptr) { - return; - } - - JNIEnv* env; - jboolean attached_thread; - jobject jold_bg_error = SetupCallbackInvocation( - env, attached_thread, old_bg_error, StatusJni::construct); - - if (jold_bg_error != nullptr) { - env->CallVoidMethod(m_jcallback_obj, m_on_error_recovery_completed_mid, - jold_bg_error); - } - - CleanupCallbackInvocation(env, attached_thread, {&jold_bg_error}); -} - -void EventListenerJniCallback::InitCallbackMethodId( - jmethodID& mid, EnabledEventCallback eec, JNIEnv* env, - jmethodID (*get_id)(JNIEnv* env)) { - if (m_enabled_event_callbacks.count(eec) == 1) { - mid = get_id(env); - } else { - mid = nullptr; - } -} - -template -jobject EventListenerJniCallback::SetupCallbackInvocation( - JNIEnv*& env, jboolean& attached_thread, const T& cpp_obj, - jobject (*convert)(JNIEnv* env, const T* cpp_obj)) { - attached_thread = JNI_FALSE; - env = getJniEnv(&attached_thread); - assert(env != nullptr); - - return convert(env, &cpp_obj); -} - -void EventListenerJniCallback::CleanupCallbackInvocation( - JNIEnv* env, jboolean attached_thread, - std::initializer_list refs) { - for (auto* ref : refs) { - if (*ref == nullptr) continue; - env->DeleteLocalRef(*ref); - } - - if (env->ExceptionCheck()) { - // exception thrown from CallVoidMethod - env->ExceptionDescribe(); // print out exception to stderr - } - - releaseJniEnv(attached_thread); -} - -void EventListenerJniCallback::OnFileOperation(const jmethodID& mid, - const FileOperationInfo& info) { - if (mid == nullptr) { - return; - } - - JNIEnv* env; - jboolean attached_thread; - jobject jop_info = SetupCallbackInvocation( - env, attached_thread, info, - FileOperationInfoJni::fromCppFileOperationInfo); - - if (jop_info != nullptr) { - env->CallVoidMethod(m_jcallback_obj, mid, jop_info); - } - - CleanupCallbackInvocation(env, attached_thread, {&jop_info}); -} -} // namespace ROCKSDB_NAMESPACE diff --git a/java/rocksjni/event_listener_jnicallback.h b/java/rocksjni/event_listener_jnicallback.h deleted file mode 100644 index f4a235a23..000000000 --- a/java/rocksjni/event_listener_jnicallback.h +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::EventListener. - -#ifndef JAVA_ROCKSJNI_EVENT_LISTENER_JNICALLBACK_H_ -#define JAVA_ROCKSJNI_EVENT_LISTENER_JNICALLBACK_H_ - -#include - -#include -#include - -#include "rocksdb/listener.h" -#include "rocksjni/jnicallback.h" - -namespace ROCKSDB_NAMESPACE { - -enum EnabledEventCallback { - ON_FLUSH_COMPLETED = 0x0, - ON_FLUSH_BEGIN = 0x1, - ON_TABLE_FILE_DELETED = 0x2, - ON_COMPACTION_BEGIN = 0x3, - ON_COMPACTION_COMPLETED = 0x4, - ON_TABLE_FILE_CREATED = 0x5, - ON_TABLE_FILE_CREATION_STARTED = 0x6, - ON_MEMTABLE_SEALED = 0x7, - ON_COLUMN_FAMILY_HANDLE_DELETION_STARTED = 0x8, - ON_EXTERNAL_FILE_INGESTED = 0x9, - ON_BACKGROUND_ERROR = 0xA, - ON_STALL_CONDITIONS_CHANGED = 0xB, - ON_FILE_READ_FINISH = 0xC, - ON_FILE_WRITE_FINISH = 0xD, - ON_FILE_FLUSH_FINISH = 0xE, - ON_FILE_SYNC_FINISH = 0xF, - ON_FILE_RANGE_SYNC_FINISH = 0x10, - ON_FILE_TRUNCATE_FINISH = 0x11, - ON_FILE_CLOSE_FINISH = 0x12, - SHOULD_BE_NOTIFIED_ON_FILE_IO = 0x13, - ON_ERROR_RECOVERY_BEGIN = 0x14, - ON_ERROR_RECOVERY_COMPLETED = 0x15, - - NUM_ENABLED_EVENT_CALLBACK = 0x16, -}; - -class EventListenerJniCallback : public JniCallback, public EventListener { - public: - EventListenerJniCallback( - JNIEnv* env, jobject jevent_listener, - const std::set& enabled_event_callbacks); - virtual ~EventListenerJniCallback(); - virtual void OnFlushCompleted(DB* db, const FlushJobInfo& flush_job_info); - virtual void OnFlushBegin(DB* db, const FlushJobInfo& flush_job_info); - virtual void OnTableFileDeleted(const TableFileDeletionInfo& info); - virtual void OnCompactionBegin(DB* db, const CompactionJobInfo& ci); - virtual void OnCompactionCompleted(DB* db, const CompactionJobInfo& ci); - virtual void OnTableFileCreated(const TableFileCreationInfo& info); - virtual void OnTableFileCreationStarted( - const TableFileCreationBriefInfo& info); - virtual void OnMemTableSealed(const MemTableInfo& info); - virtual void OnColumnFamilyHandleDeletionStarted(ColumnFamilyHandle* handle); - virtual void OnExternalFileIngested(DB* db, - const ExternalFileIngestionInfo& info); - virtual void OnBackgroundError(BackgroundErrorReason reason, - Status* bg_error); - virtual void OnStallConditionsChanged(const WriteStallInfo& info); - virtual void OnFileReadFinish(const FileOperationInfo& info); - virtual void OnFileWriteFinish(const FileOperationInfo& info); - virtual void OnFileFlushFinish(const FileOperationInfo& info); - virtual void OnFileSyncFinish(const FileOperationInfo& info); - virtual void OnFileRangeSyncFinish(const FileOperationInfo& info); - virtual void OnFileTruncateFinish(const FileOperationInfo& info); - virtual void OnFileCloseFinish(const FileOperationInfo& info); - virtual bool ShouldBeNotifiedOnFileIO(); - virtual void OnErrorRecoveryBegin(BackgroundErrorReason reason, - Status bg_error, bool* auto_recovery); - virtual void OnErrorRecoveryCompleted(Status old_bg_error); - - private: - inline void InitCallbackMethodId(jmethodID& mid, EnabledEventCallback eec, - JNIEnv* env, - jmethodID (*get_id)(JNIEnv* env)); - template - inline jobject SetupCallbackInvocation( - JNIEnv*& env, jboolean& attached_thread, const T& cpp_obj, - jobject (*convert)(JNIEnv* env, const T* cpp_obj)); - inline void CleanupCallbackInvocation(JNIEnv* env, jboolean attached_thread, - std::initializer_list refs); - inline void OnFileOperation(const jmethodID& mid, - const FileOperationInfo& info); - - const std::set m_enabled_event_callbacks; - jmethodID m_on_flush_completed_proxy_mid; - jmethodID m_on_flush_begin_proxy_mid; - jmethodID m_on_table_file_deleted_mid; - jmethodID m_on_compaction_begin_proxy_mid; - jmethodID m_on_compaction_completed_proxy_mid; - jmethodID m_on_table_file_created_mid; - jmethodID m_on_table_file_creation_started_mid; - jmethodID m_on_mem_table_sealed_mid; - jmethodID m_on_column_family_handle_deletion_started_mid; - jmethodID m_on_external_file_ingested_proxy_mid; - jmethodID m_on_background_error_proxy_mid; - jmethodID m_on_stall_conditions_changed_mid; - jmethodID m_on_file_read_finish_mid; - jmethodID m_on_file_write_finish_mid; - jmethodID m_on_file_flush_finish_mid; - jmethodID m_on_file_sync_finish_mid; - jmethodID m_on_file_range_sync_finish_mid; - jmethodID m_on_file_truncate_finish_mid; - jmethodID m_on_file_close_finish_mid; - jmethodID m_should_be_notified_on_file_io; - jmethodID m_on_error_recovery_begin_proxy_mid; - jmethodID m_on_error_recovery_completed_mid; -}; - -} // namespace ROCKSDB_NAMESPACE - -#endif // JAVA_ROCKSJNI_EVENT_LISTENER_JNICALLBACK_H_ diff --git a/java/rocksjni/filter.cc b/java/rocksjni/filter.cc deleted file mode 100644 index ed22016d2..000000000 --- a/java/rocksjni/filter.cc +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::FilterPolicy. - -#include -#include -#include - -#include - -#include "include/org_rocksdb_BloomFilter.h" -#include "include/org_rocksdb_Filter.h" -#include "rocksdb/filter_policy.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_BloomFilter - * Method: createBloomFilter - * Signature: (DZ)J - */ -jlong Java_org_rocksdb_BloomFilter_createNewBloomFilter(JNIEnv* /*env*/, - jclass /*jcls*/, - jdouble bits_per_key) { - auto* sptr_filter = - new std::shared_ptr( - ROCKSDB_NAMESPACE::NewBloomFilterPolicy(bits_per_key)); - return GET_CPLUSPLUS_POINTER(sptr_filter); -} - -/* - * Class: org_rocksdb_Filter - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_Filter_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, - jlong jhandle) { - auto* handle = - reinterpret_cast*>( - jhandle); - delete handle; // delete std::shared_ptr -} diff --git a/java/rocksjni/ingest_external_file_options.cc b/java/rocksjni/ingest_external_file_options.cc deleted file mode 100644 index 052cf3325..000000000 --- a/java/rocksjni/ingest_external_file_options.cc +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::FilterPolicy. - -#include - -#include "include/org_rocksdb_IngestExternalFileOptions.h" -#include "rocksdb/options.h" -#include "rocksjni/cplusplus_to_java_convert.h" - -/* - * Class: org_rocksdb_IngestExternalFileOptions - * Method: newIngestExternalFileOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_IngestExternalFileOptions_newIngestExternalFileOptions__( - JNIEnv*, jclass) { - auto* options = new ROCKSDB_NAMESPACE::IngestExternalFileOptions(); - return GET_CPLUSPLUS_POINTER(options); -} - -/* - * Class: org_rocksdb_IngestExternalFileOptions - * Method: newIngestExternalFileOptions - * Signature: (ZZZZ)J - */ -jlong Java_org_rocksdb_IngestExternalFileOptions_newIngestExternalFileOptions__ZZZZ( - JNIEnv*, jclass, jboolean jmove_files, jboolean jsnapshot_consistency, - jboolean jallow_global_seqno, jboolean jallow_blocking_flush) { - auto* options = new ROCKSDB_NAMESPACE::IngestExternalFileOptions(); - options->move_files = static_cast(jmove_files); - options->snapshot_consistency = static_cast(jsnapshot_consistency); - options->allow_global_seqno = static_cast(jallow_global_seqno); - options->allow_blocking_flush = static_cast(jallow_blocking_flush); - return GET_CPLUSPLUS_POINTER(options); -} - -/* - * Class: org_rocksdb_IngestExternalFileOptions - * Method: moveFiles - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_IngestExternalFileOptions_moveFiles(JNIEnv*, jobject, - jlong jhandle) { - auto* options = - reinterpret_cast(jhandle); - return static_cast(options->move_files); -} - -/* - * Class: org_rocksdb_IngestExternalFileOptions - * Method: setMoveFiles - * Signature: (JZ)V - */ -void Java_org_rocksdb_IngestExternalFileOptions_setMoveFiles( - JNIEnv*, jobject, jlong jhandle, jboolean jmove_files) { - auto* options = - reinterpret_cast(jhandle); - options->move_files = static_cast(jmove_files); -} - -/* - * Class: org_rocksdb_IngestExternalFileOptions - * Method: snapshotConsistency - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_IngestExternalFileOptions_snapshotConsistency( - JNIEnv*, jobject, jlong jhandle) { - auto* options = - reinterpret_cast(jhandle); - return static_cast(options->snapshot_consistency); -} - -/* - * Class: org_rocksdb_IngestExternalFileOptions - * Method: setSnapshotConsistency - * Signature: (JZ)V - */ -void Java_org_rocksdb_IngestExternalFileOptions_setSnapshotConsistency( - JNIEnv*, jobject, jlong jhandle, jboolean jsnapshot_consistency) { - auto* options = - reinterpret_cast(jhandle); - options->snapshot_consistency = static_cast(jsnapshot_consistency); -} - -/* - * Class: org_rocksdb_IngestExternalFileOptions - * Method: allowGlobalSeqNo - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_IngestExternalFileOptions_allowGlobalSeqNo( - JNIEnv*, jobject, jlong jhandle) { - auto* options = - reinterpret_cast(jhandle); - return static_cast(options->allow_global_seqno); -} - -/* - * Class: org_rocksdb_IngestExternalFileOptions - * Method: setAllowGlobalSeqNo - * Signature: (JZ)V - */ -void Java_org_rocksdb_IngestExternalFileOptions_setAllowGlobalSeqNo( - JNIEnv*, jobject, jlong jhandle, jboolean jallow_global_seqno) { - auto* options = - reinterpret_cast(jhandle); - options->allow_global_seqno = static_cast(jallow_global_seqno); -} - -/* - * Class: org_rocksdb_IngestExternalFileOptions - * Method: allowBlockingFlush - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_IngestExternalFileOptions_allowBlockingFlush( - JNIEnv*, jobject, jlong jhandle) { - auto* options = - reinterpret_cast(jhandle); - return static_cast(options->allow_blocking_flush); -} - -/* - * Class: org_rocksdb_IngestExternalFileOptions - * Method: setAllowBlockingFlush - * Signature: (JZ)V - */ -void Java_org_rocksdb_IngestExternalFileOptions_setAllowBlockingFlush( - JNIEnv*, jobject, jlong jhandle, jboolean jallow_blocking_flush) { - auto* options = - reinterpret_cast(jhandle); - options->allow_blocking_flush = static_cast(jallow_blocking_flush); -} - -/* - * Class: org_rocksdb_IngestExternalFileOptions - * Method: ingestBehind - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_IngestExternalFileOptions_ingestBehind( - JNIEnv*, jobject, jlong jhandle) { - auto* options = - reinterpret_cast(jhandle); - return options->ingest_behind == JNI_TRUE; -} - -/* - * Class: org_rocksdb_IngestExternalFileOptions - * Method: setIngestBehind - * Signature: (JZ)V - */ -void Java_org_rocksdb_IngestExternalFileOptions_setIngestBehind( - JNIEnv*, jobject, jlong jhandle, jboolean jingest_behind) { - auto* options = - reinterpret_cast(jhandle); - options->ingest_behind = jingest_behind == JNI_TRUE; -} - -/* - * Class: org_rocksdb_IngestExternalFileOptions - * Method: writeGlobalSeqno - * Signature: (J)Z - */ -JNIEXPORT jboolean JNICALL -Java_org_rocksdb_IngestExternalFileOptions_writeGlobalSeqno(JNIEnv*, jobject, - jlong jhandle) { - auto* options = - reinterpret_cast(jhandle); - return options->write_global_seqno == JNI_TRUE; -} - -/* - * Class: org_rocksdb_IngestExternalFileOptions - * Method: setWriteGlobalSeqno - * Signature: (JZ)V - */ -JNIEXPORT void JNICALL -Java_org_rocksdb_IngestExternalFileOptions_setWriteGlobalSeqno( - JNIEnv*, jobject, jlong jhandle, jboolean jwrite_global_seqno) { - auto* options = - reinterpret_cast(jhandle); - options->write_global_seqno = jwrite_global_seqno == JNI_TRUE; -} - -/* - * Class: org_rocksdb_IngestExternalFileOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_IngestExternalFileOptions_disposeInternal(JNIEnv*, - jobject, - jlong jhandle) { - auto* options = - reinterpret_cast(jhandle); - delete options; -} diff --git a/java/rocksjni/iterator.cc b/java/rocksjni/iterator.cc deleted file mode 100644 index 3ddb9778b..000000000 --- a/java/rocksjni/iterator.cc +++ /dev/null @@ -1,340 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling c++ ROCKSDB_NAMESPACE::Iterator methods from Java side. - -#include "rocksdb/iterator.h" - -#include -#include -#include - -#include - -#include "include/org_rocksdb_RocksIterator.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_RocksIterator - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_RocksIterator_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - auto* it = reinterpret_cast(handle); - assert(it != nullptr); - delete it; -} - -/* - * Class: org_rocksdb_RocksIterator - * Method: isValid0 - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_RocksIterator_isValid0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - return reinterpret_cast(handle)->Valid(); -} - -/* - * Class: org_rocksdb_RocksIterator - * Method: seekToFirst0 - * Signature: (J)V - */ -void Java_org_rocksdb_RocksIterator_seekToFirst0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - reinterpret_cast(handle)->SeekToFirst(); -} - -/* - * Class: org_rocksdb_RocksIterator - * Method: seekToLast0 - * Signature: (J)V - */ -void Java_org_rocksdb_RocksIterator_seekToLast0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - reinterpret_cast(handle)->SeekToLast(); -} - -/* - * Class: org_rocksdb_RocksIterator - * Method: next0 - * Signature: (J)V - */ -void Java_org_rocksdb_RocksIterator_next0(JNIEnv* /*env*/, jobject /*jobj*/, - jlong handle) { - reinterpret_cast(handle)->Next(); -} - -/* - * Class: org_rocksdb_RocksIterator - * Method: prev0 - * Signature: (J)V - */ -void Java_org_rocksdb_RocksIterator_prev0(JNIEnv* /*env*/, jobject /*jobj*/, - jlong handle) { - reinterpret_cast(handle)->Prev(); -} - -/* - * Class: org_rocksdb_RocksIterator - * Method: refresh0 - * Signature: (J)V - */ -void Java_org_rocksdb_RocksIterator_refresh0(JNIEnv* env, jobject /*jobj*/, - jlong handle) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Status s = it->Refresh(); - - if (s.ok()) { - return; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_RocksIterator - * Method: seek0 - * Signature: (J[BI)V - */ -void Java_org_rocksdb_RocksIterator_seek0(JNIEnv* env, jobject /*jobj*/, - jlong handle, jbyteArray jtarget, - jint jtarget_len) { - auto* it = reinterpret_cast(handle); - auto seek = [&it](ROCKSDB_NAMESPACE::Slice& target_slice) { - it->Seek(target_slice); - }; - ROCKSDB_NAMESPACE::JniUtil::k_op_region(seek, env, jtarget, 0, jtarget_len); -} - -/* - * This method supports fetching into indirect byte buffers; - * the Java wrapper extracts the byte[] and passes it here. - * In this case, the buffer offset of the key may be non-zero. - * - * Class: org_rocksdb_RocksIterator - * Method: seek0 - * Signature: (J[BII)V - */ -void Java_org_rocksdb_RocksIterator_seekByteArray0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, - jint jtarget_off, jint jtarget_len) { - auto* it = reinterpret_cast(handle); - auto seek = [&it](ROCKSDB_NAMESPACE::Slice& target_slice) { - it->Seek(target_slice); - }; - ROCKSDB_NAMESPACE::JniUtil::k_op_region(seek, env, jtarget, jtarget_off, - jtarget_len); -} - -/* - * Class: org_rocksdb_RocksIterator - * Method: seekDirect0 - * Signature: (JLjava/nio/ByteBuffer;II)V - */ -void Java_org_rocksdb_RocksIterator_seekDirect0(JNIEnv* env, jobject /*jobj*/, - jlong handle, jobject jtarget, - jint jtarget_off, - jint jtarget_len) { - auto* it = reinterpret_cast(handle); - auto seek = [&it](ROCKSDB_NAMESPACE::Slice& target_slice) { - it->Seek(target_slice); - }; - ROCKSDB_NAMESPACE::JniUtil::k_op_direct(seek, env, jtarget, jtarget_off, - jtarget_len); -} - -/* - * Class: org_rocksdb_RocksIterator - * Method: seekForPrevDirect0 - * Signature: (JLjava/nio/ByteBuffer;II)V - */ -void Java_org_rocksdb_RocksIterator_seekForPrevDirect0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, - jint jtarget_off, jint jtarget_len) { - auto* it = reinterpret_cast(handle); - auto seekPrev = [&it](ROCKSDB_NAMESPACE::Slice& target_slice) { - it->SeekForPrev(target_slice); - }; - ROCKSDB_NAMESPACE::JniUtil::k_op_direct(seekPrev, env, jtarget, jtarget_off, - jtarget_len); -} - -/* - * Class: org_rocksdb_RocksIterator - * Method: seekForPrev0 - * Signature: (J[BI)V - */ -void Java_org_rocksdb_RocksIterator_seekForPrev0(JNIEnv* env, jobject /*jobj*/, - jlong handle, - jbyteArray jtarget, - jint jtarget_len) { - auto* it = reinterpret_cast(handle); - auto seek = [&it](ROCKSDB_NAMESPACE::Slice& target_slice) { - it->SeekForPrev(target_slice); - }; - ROCKSDB_NAMESPACE::JniUtil::k_op_region(seek, env, jtarget, 0, jtarget_len); -} - -/* - * This method supports fetching into indirect byte buffers; - * the Java wrapper extracts the byte[] and passes it here. - * In this case, the buffer offset of the key may be non-zero. - * - * Class: org_rocksdb_RocksIterator - * Method: seek0 - * Signature: (J[BII)V - */ -void Java_org_rocksdb_RocksIterator_seekForPrevByteArray0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, - jint jtarget_off, jint jtarget_len) { - auto* it = reinterpret_cast(handle); - auto seek = [&it](ROCKSDB_NAMESPACE::Slice& target_slice) { - it->SeekForPrev(target_slice); - }; - ROCKSDB_NAMESPACE::JniUtil::k_op_region(seek, env, jtarget, jtarget_off, - jtarget_len); -} - -/* - * Class: org_rocksdb_RocksIterator - * Method: status0 - * Signature: (J)V - */ -void Java_org_rocksdb_RocksIterator_status0(JNIEnv* env, jobject /*jobj*/, - jlong handle) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Status s = it->status(); - - if (s.ok()) { - return; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_RocksIterator - * Method: key0 - * Signature: (J)[B - */ -jbyteArray Java_org_rocksdb_RocksIterator_key0(JNIEnv* env, jobject /*jobj*/, - jlong handle) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Slice key_slice = it->key(); - - jbyteArray jkey = env->NewByteArray(static_cast(key_slice.size())); - if (jkey == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - env->SetByteArrayRegion( - jkey, 0, static_cast(key_slice.size()), - const_cast(reinterpret_cast(key_slice.data()))); - return jkey; -} - -/* - * Class: org_rocksdb_RocksIterator - * Method: keyDirect0 - * Signature: (JLjava/nio/ByteBuffer;II)I - */ -jint Java_org_rocksdb_RocksIterator_keyDirect0(JNIEnv* env, jobject /*jobj*/, - jlong handle, jobject jtarget, - jint jtarget_off, - jint jtarget_len) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Slice key_slice = it->key(); - return ROCKSDB_NAMESPACE::JniUtil::copyToDirect(env, key_slice, jtarget, - jtarget_off, jtarget_len); -} - -/* - * This method supports fetching into indirect byte buffers; - * the Java wrapper extracts the byte[] and passes it here. - * - * Class: org_rocksdb_RocksIterator - * Method: keyByteArray0 - * Signature: (J[BII)I - */ -jint Java_org_rocksdb_RocksIterator_keyByteArray0(JNIEnv* env, jobject /*jobj*/, - jlong handle, jbyteArray jkey, - jint jkey_off, - jint jkey_len) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Slice key_slice = it->key(); - jsize copy_size = std::min(static_cast(key_slice.size()), - static_cast(jkey_len)); - env->SetByteArrayRegion( - jkey, jkey_off, copy_size, - const_cast(reinterpret_cast(key_slice.data()))); - - return static_cast(key_slice.size()); -} - -/* - * Class: org_rocksdb_RocksIterator - * Method: value0 - * Signature: (J)[B - */ -jbyteArray Java_org_rocksdb_RocksIterator_value0(JNIEnv* env, jobject /*jobj*/, - jlong handle) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Slice value_slice = it->value(); - - jbyteArray jkeyValue = - env->NewByteArray(static_cast(value_slice.size())); - if (jkeyValue == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - env->SetByteArrayRegion( - jkeyValue, 0, static_cast(value_slice.size()), - const_cast(reinterpret_cast(value_slice.data()))); - return jkeyValue; -} - -/* - * Class: org_rocksdb_RocksIterator - * Method: valueDirect0 - * Signature: (JLjava/nio/ByteBuffer;II)I - */ -jint Java_org_rocksdb_RocksIterator_valueDirect0(JNIEnv* env, jobject /*jobj*/, - jlong handle, jobject jtarget, - jint jtarget_off, - jint jtarget_len) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Slice value_slice = it->value(); - return ROCKSDB_NAMESPACE::JniUtil::copyToDirect(env, value_slice, jtarget, - jtarget_off, jtarget_len); -} - -/* - * This method supports fetching into indirect byte buffers; - * the Java wrapper extracts the byte[] and passes it here. - * - * Class: org_rocksdb_RocksIterator - * Method: valueByteArray0 - * Signature: (J[BII)I - */ -jint Java_org_rocksdb_RocksIterator_valueByteArray0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jvalue_target, - jint jvalue_off, jint jvalue_len) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Slice value_slice = it->value(); - jsize copy_size = std::min(static_cast(value_slice.size()), - static_cast(jvalue_len)); - env->SetByteArrayRegion( - jvalue_target, jvalue_off, copy_size, - const_cast(reinterpret_cast(value_slice.data()))); - - return static_cast(value_slice.size()); -} diff --git a/java/rocksjni/jnicallback.cc b/java/rocksjni/jnicallback.cc deleted file mode 100644 index f2742cd88..000000000 --- a/java/rocksjni/jnicallback.cc +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// JNI Callbacks from C++ to sub-classes or org.rocksdb.RocksCallbackObject - -#include "rocksjni/jnicallback.h" - -#include - -#include "rocksjni/portal.h" - -namespace ROCKSDB_NAMESPACE { -JniCallback::JniCallback(JNIEnv* env, jobject jcallback_obj) { - // Note: jcallback_obj may be accessed by multiple threads, - // so we ref the jvm not the env - const jint rs = env->GetJavaVM(&m_jvm); - if (rs != JNI_OK) { - // exception thrown - return; - } - - // Note: we may want to access the Java callback object instance - // across multiple method calls, so we create a global ref - assert(jcallback_obj != nullptr); - m_jcallback_obj = env->NewGlobalRef(jcallback_obj); - if (jcallback_obj == nullptr) { - // exception thrown: OutOfMemoryError - return; - } -} - -JNIEnv* JniCallback::getJniEnv(jboolean* attached) const { - return JniUtil::getJniEnv(m_jvm, attached); -} - -void JniCallback::releaseJniEnv(jboolean& attached) const { - JniUtil::releaseJniEnv(m_jvm, attached); -} - -JniCallback::~JniCallback() { - jboolean attached_thread = JNI_FALSE; - JNIEnv* env = getJniEnv(&attached_thread); - assert(env != nullptr); - - if (m_jcallback_obj != nullptr) { - env->DeleteGlobalRef(m_jcallback_obj); - } - - releaseJniEnv(attached_thread); -} -} // namespace ROCKSDB_NAMESPACE diff --git a/java/rocksjni/jnicallback.h b/java/rocksjni/jnicallback.h deleted file mode 100644 index a03a04128..000000000 --- a/java/rocksjni/jnicallback.h +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// JNI Callbacks from C++ to sub-classes or org.rocksdb.RocksCallbackObject - -#ifndef JAVA_ROCKSJNI_JNICALLBACK_H_ -#define JAVA_ROCKSJNI_JNICALLBACK_H_ - -#include - -#include "rocksdb/rocksdb_namespace.h" - -namespace ROCKSDB_NAMESPACE { -class JniCallback { - public: - JniCallback(JNIEnv* env, jobject jcallback_obj); - virtual ~JniCallback(); - - const jobject& GetJavaObject() const { return m_jcallback_obj; } - - protected: - JavaVM* m_jvm; - jobject m_jcallback_obj; - JNIEnv* getJniEnv(jboolean* attached) const; - void releaseJniEnv(jboolean& attached) const; -}; -} // namespace ROCKSDB_NAMESPACE - -#endif // JAVA_ROCKSJNI_JNICALLBACK_H_ diff --git a/java/rocksjni/loggerjnicallback.cc b/java/rocksjni/loggerjnicallback.cc deleted file mode 100644 index aa9f95cd4..000000000 --- a/java/rocksjni/loggerjnicallback.cc +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::Logger. - -#include "rocksjni/loggerjnicallback.h" - -#include -#include - -#include "include/org_rocksdb_Logger.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -namespace ROCKSDB_NAMESPACE { - -LoggerJniCallback::LoggerJniCallback(JNIEnv* env, jobject jlogger) - : JniCallback(env, jlogger) { - m_jLogMethodId = LoggerJni::getLogMethodId(env); - if (m_jLogMethodId == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return; - } - - jobject jdebug_level = InfoLogLevelJni::DEBUG_LEVEL(env); - if (jdebug_level == nullptr) { - // exception thrown: NoSuchFieldError, ExceptionInInitializerError - // or OutOfMemoryError - return; - } - m_jdebug_level = env->NewGlobalRef(jdebug_level); - if (m_jdebug_level == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - jobject jinfo_level = InfoLogLevelJni::INFO_LEVEL(env); - if (jinfo_level == nullptr) { - // exception thrown: NoSuchFieldError, ExceptionInInitializerError - // or OutOfMemoryError - return; - } - m_jinfo_level = env->NewGlobalRef(jinfo_level); - if (m_jinfo_level == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - jobject jwarn_level = InfoLogLevelJni::WARN_LEVEL(env); - if (jwarn_level == nullptr) { - // exception thrown: NoSuchFieldError, ExceptionInInitializerError - // or OutOfMemoryError - return; - } - m_jwarn_level = env->NewGlobalRef(jwarn_level); - if (m_jwarn_level == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - jobject jerror_level = InfoLogLevelJni::ERROR_LEVEL(env); - if (jerror_level == nullptr) { - // exception thrown: NoSuchFieldError, ExceptionInInitializerError - // or OutOfMemoryError - return; - } - m_jerror_level = env->NewGlobalRef(jerror_level); - if (m_jerror_level == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - jobject jfatal_level = InfoLogLevelJni::FATAL_LEVEL(env); - if (jfatal_level == nullptr) { - // exception thrown: NoSuchFieldError, ExceptionInInitializerError - // or OutOfMemoryError - return; - } - m_jfatal_level = env->NewGlobalRef(jfatal_level); - if (m_jfatal_level == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - jobject jheader_level = InfoLogLevelJni::HEADER_LEVEL(env); - if (jheader_level == nullptr) { - // exception thrown: NoSuchFieldError, ExceptionInInitializerError - // or OutOfMemoryError - return; - } - m_jheader_level = env->NewGlobalRef(jheader_level); - if (m_jheader_level == nullptr) { - // exception thrown: OutOfMemoryError - return; - } -} - -void LoggerJniCallback::Logv(const char* /*format*/, va_list /*ap*/) { - // We implement this method because it is virtual but we don't - // use it because we need to know about the log level. -} - -void LoggerJniCallback::Logv(const InfoLogLevel log_level, const char* format, - va_list ap) { - if (GetInfoLogLevel() <= log_level) { - // determine InfoLogLevel java enum instance - jobject jlog_level; - switch (log_level) { - case ROCKSDB_NAMESPACE::InfoLogLevel::DEBUG_LEVEL: - jlog_level = m_jdebug_level; - break; - case ROCKSDB_NAMESPACE::InfoLogLevel::INFO_LEVEL: - jlog_level = m_jinfo_level; - break; - case ROCKSDB_NAMESPACE::InfoLogLevel::WARN_LEVEL: - jlog_level = m_jwarn_level; - break; - case ROCKSDB_NAMESPACE::InfoLogLevel::ERROR_LEVEL: - jlog_level = m_jerror_level; - break; - case ROCKSDB_NAMESPACE::InfoLogLevel::FATAL_LEVEL: - jlog_level = m_jfatal_level; - break; - case ROCKSDB_NAMESPACE::InfoLogLevel::HEADER_LEVEL: - jlog_level = m_jheader_level; - break; - default: - jlog_level = m_jfatal_level; - break; - } - - assert(format != nullptr); - const std::unique_ptr msg = format_str(format, ap); - - // pass msg to java callback handler - jboolean attached_thread = JNI_FALSE; - JNIEnv* env = getJniEnv(&attached_thread); - assert(env != nullptr); - - jstring jmsg = env->NewStringUTF(msg.get()); - if (jmsg == nullptr) { - // unable to construct string - if (env->ExceptionCheck()) { - env->ExceptionDescribe(); // print out exception to stderr - } - releaseJniEnv(attached_thread); - return; - } - if (env->ExceptionCheck()) { - // exception thrown: OutOfMemoryError - env->ExceptionDescribe(); // print out exception to stderr - env->DeleteLocalRef(jmsg); - releaseJniEnv(attached_thread); - return; - } - - env->CallVoidMethod(m_jcallback_obj, m_jLogMethodId, jlog_level, jmsg); - if (env->ExceptionCheck()) { - // exception thrown - env->ExceptionDescribe(); // print out exception to stderr - env->DeleteLocalRef(jmsg); - releaseJniEnv(attached_thread); - return; - } - - env->DeleteLocalRef(jmsg); - releaseJniEnv(attached_thread); - } -} - -std::unique_ptr LoggerJniCallback::format_str(const char* format, - va_list ap) const { - va_list ap_copy; - - va_copy(ap_copy, ap); - const size_t required = - vsnprintf(nullptr, 0, format, ap_copy) + 1; // Extra space for '\0' - va_end(ap_copy); - - std::unique_ptr buf(new char[required]); - - va_copy(ap_copy, ap); - vsnprintf(buf.get(), required, format, ap_copy); - va_end(ap_copy); - - return buf; -} -LoggerJniCallback::~LoggerJniCallback() { - jboolean attached_thread = JNI_FALSE; - JNIEnv* env = getJniEnv(&attached_thread); - assert(env != nullptr); - - if (m_jdebug_level != nullptr) { - env->DeleteGlobalRef(m_jdebug_level); - } - - if (m_jinfo_level != nullptr) { - env->DeleteGlobalRef(m_jinfo_level); - } - - if (m_jwarn_level != nullptr) { - env->DeleteGlobalRef(m_jwarn_level); - } - - if (m_jerror_level != nullptr) { - env->DeleteGlobalRef(m_jerror_level); - } - - if (m_jfatal_level != nullptr) { - env->DeleteGlobalRef(m_jfatal_level); - } - - if (m_jheader_level != nullptr) { - env->DeleteGlobalRef(m_jheader_level); - } - - releaseJniEnv(attached_thread); -} - -} // namespace ROCKSDB_NAMESPACE - -/* - * Class: org_rocksdb_Logger - * Method: createNewLoggerOptions - * Signature: (J)J - */ -jlong Java_org_rocksdb_Logger_createNewLoggerOptions(JNIEnv* env, jobject jobj, - jlong joptions) { - auto* sptr_logger = new std::shared_ptr( - new ROCKSDB_NAMESPACE::LoggerJniCallback(env, jobj)); - - // set log level - auto* options = reinterpret_cast(joptions); - sptr_logger->get()->SetInfoLogLevel(options->info_log_level); - - return GET_CPLUSPLUS_POINTER(sptr_logger); -} - -/* - * Class: org_rocksdb_Logger - * Method: createNewLoggerDbOptions - * Signature: (J)J - */ -jlong Java_org_rocksdb_Logger_createNewLoggerDbOptions(JNIEnv* env, - jobject jobj, - jlong jdb_options) { - auto* sptr_logger = new std::shared_ptr( - new ROCKSDB_NAMESPACE::LoggerJniCallback(env, jobj)); - - // set log level - auto* db_options = - reinterpret_cast(jdb_options); - sptr_logger->get()->SetInfoLogLevel(db_options->info_log_level); - - return GET_CPLUSPLUS_POINTER(sptr_logger); -} - -/* - * Class: org_rocksdb_Logger - * Method: setInfoLogLevel - * Signature: (JB)V - */ -void Java_org_rocksdb_Logger_setInfoLogLevel(JNIEnv* /*env*/, jobject /*jobj*/, - jlong jhandle, jbyte jlog_level) { - auto* handle = - reinterpret_cast*>( - jhandle); - handle->get()->SetInfoLogLevel( - static_cast(jlog_level)); -} - -/* - * Class: org_rocksdb_Logger - * Method: infoLogLevel - * Signature: (J)B - */ -jbyte Java_org_rocksdb_Logger_infoLogLevel(JNIEnv* /*env*/, jobject /*jobj*/, - jlong jhandle) { - auto* handle = - reinterpret_cast*>( - jhandle); - return static_cast(handle->get()->GetInfoLogLevel()); -} - -/* - * Class: org_rocksdb_Logger - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_Logger_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, - jlong jhandle) { - auto* handle = - reinterpret_cast*>( - jhandle); - delete handle; // delete std::shared_ptr -} diff --git a/java/rocksjni/loggerjnicallback.h b/java/rocksjni/loggerjnicallback.h deleted file mode 100644 index 57774988c..000000000 --- a/java/rocksjni/loggerjnicallback.h +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::Logger - -#ifndef JAVA_ROCKSJNI_LOGGERJNICALLBACK_H_ -#define JAVA_ROCKSJNI_LOGGERJNICALLBACK_H_ - -#include - -#include -#include - -#include "port/port.h" -#include "rocksdb/env.h" -#include "rocksjni/jnicallback.h" - -namespace ROCKSDB_NAMESPACE { - -class LoggerJniCallback : public JniCallback, public Logger { - public: - LoggerJniCallback(JNIEnv* env, jobject jLogger); - ~LoggerJniCallback(); - - using Logger::GetInfoLogLevel; - using Logger::SetInfoLogLevel; - // Write an entry to the log file with the specified format. - virtual void Logv(const char* format, va_list ap); - // Write an entry to the log file with the specified log level - // and format. Any log with level under the internal log level - // of *this (see @SetInfoLogLevel and @GetInfoLogLevel) will not be - // printed. - virtual void Logv(const InfoLogLevel log_level, const char* format, - va_list ap); - - private: - jmethodID m_jLogMethodId; - jobject m_jdebug_level; - jobject m_jinfo_level; - jobject m_jwarn_level; - jobject m_jerror_level; - jobject m_jfatal_level; - jobject m_jheader_level; - std::unique_ptr format_str(const char* format, va_list ap) const; -}; -} // namespace ROCKSDB_NAMESPACE - -#endif // JAVA_ROCKSJNI_LOGGERJNICALLBACK_H_ diff --git a/java/rocksjni/lru_cache.cc b/java/rocksjni/lru_cache.cc deleted file mode 100644 index 56dffa2f0..000000000 --- a/java/rocksjni/lru_cache.cc +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::LRUCache. - -#include "cache/lru_cache.h" - -#include - -#include "include/org_rocksdb_LRUCache.h" -#include "rocksjni/cplusplus_to_java_convert.h" - -/* - * Class: org_rocksdb_LRUCache - * Method: newLRUCache - * Signature: (JIZD)J - */ -jlong Java_org_rocksdb_LRUCache_newLRUCache(JNIEnv* /*env*/, jclass /*jcls*/, - jlong jcapacity, - jint jnum_shard_bits, - jboolean jstrict_capacity_limit, - jdouble jhigh_pri_pool_ratio, - jdouble jlow_pri_pool_ratio) { - auto* sptr_lru_cache = new std::shared_ptr( - ROCKSDB_NAMESPACE::NewLRUCache( - static_cast(jcapacity), static_cast(jnum_shard_bits), - static_cast(jstrict_capacity_limit), - static_cast(jhigh_pri_pool_ratio), - nullptr /* memory_allocator */, rocksdb::kDefaultToAdaptiveMutex, - rocksdb::kDefaultCacheMetadataChargePolicy, - static_cast(jlow_pri_pool_ratio))); - return GET_CPLUSPLUS_POINTER(sptr_lru_cache); -} - -/* - * Class: org_rocksdb_LRUCache - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_LRUCache_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* sptr_lru_cache = - reinterpret_cast*>(jhandle); - delete sptr_lru_cache; // delete std::shared_ptr -} diff --git a/java/rocksjni/memory_util.cc b/java/rocksjni/memory_util.cc deleted file mode 100644 index c87c4f403..000000000 --- a/java/rocksjni/memory_util.cc +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "rocksdb/utilities/memory_util.h" - -#include - -#include -#include -#include -#include - -#include "include/org_rocksdb_MemoryUtil.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_MemoryUtil - * Method: getApproximateMemoryUsageByType - * Signature: ([J[J)Ljava/util/Map; - */ -jobject Java_org_rocksdb_MemoryUtil_getApproximateMemoryUsageByType( - JNIEnv *env, jclass, jlongArray jdb_handles, jlongArray jcache_handles) { - jboolean has_exception = JNI_FALSE; - std::vector dbs = - ROCKSDB_NAMESPACE::JniUtil::fromJPointers( - env, jdb_handles, &has_exception); - if (has_exception == JNI_TRUE) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - std::unordered_set cache_set; - jsize cache_handle_count = env->GetArrayLength(jcache_handles); - if (cache_handle_count > 0) { - jlong *ptr_jcache_handles = - env->GetLongArrayElements(jcache_handles, nullptr); - if (ptr_jcache_handles == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - for (jsize i = 0; i < cache_handle_count; i++) { - auto *cache_ptr = - reinterpret_cast *>( - ptr_jcache_handles[i]); - cache_set.insert(cache_ptr->get()); - } - env->ReleaseLongArrayElements(jcache_handles, ptr_jcache_handles, - JNI_ABORT); - } - - std::map usage_by_type; - if (ROCKSDB_NAMESPACE::MemoryUtil::GetApproximateMemoryUsageByType( - dbs, cache_set, &usage_by_type) != ROCKSDB_NAMESPACE::Status::OK()) { - // Non-OK status - return nullptr; - } - - jobject jusage_by_type = ROCKSDB_NAMESPACE::HashMapJni::construct( - env, static_cast(usage_by_type.size())); - if (jusage_by_type == nullptr) { - // exception occurred - return nullptr; - } - const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV< - const ROCKSDB_NAMESPACE::MemoryUtil::UsageType, const uint64_t, jobject, - jobject> - fn_map_kv = [env]( - const std::pair &pair) { - // Construct key - const jobject jusage_type = ROCKSDB_NAMESPACE::ByteJni::valueOf( - env, ROCKSDB_NAMESPACE::MemoryUsageTypeJni::toJavaMemoryUsageType( - pair.first)); - if (jusage_type == nullptr) { - // an error occurred - return std::unique_ptr>(nullptr); - } - // Construct value - const jobject jusage_value = - ROCKSDB_NAMESPACE::LongJni::valueOf(env, pair.second); - if (jusage_value == nullptr) { - // an error occurred - return std::unique_ptr>(nullptr); - } - // Construct and return pointer to pair of jobjects - return std::unique_ptr>( - new std::pair(jusage_type, jusage_value)); - }; - - if (!ROCKSDB_NAMESPACE::HashMapJni::putAll(env, jusage_by_type, - usage_by_type.begin(), - usage_by_type.end(), fn_map_kv)) { - // exception occcurred - jusage_by_type = nullptr; - } - - return jusage_by_type; -} diff --git a/java/rocksjni/memtablejni.cc b/java/rocksjni/memtablejni.cc deleted file mode 100644 index a4d02f354..000000000 --- a/java/rocksjni/memtablejni.cc +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for MemTables. - -#include "include/org_rocksdb_HashLinkedListMemTableConfig.h" -#include "include/org_rocksdb_HashSkipListMemTableConfig.h" -#include "include/org_rocksdb_SkipListMemTableConfig.h" -#include "include/org_rocksdb_VectorMemTableConfig.h" -#include "rocksdb/memtablerep.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_HashSkipListMemTableConfig - * Method: newMemTableFactoryHandle - * Signature: (JII)J - */ -jlong Java_org_rocksdb_HashSkipListMemTableConfig_newMemTableFactoryHandle( - JNIEnv* env, jobject /*jobj*/, jlong jbucket_count, jint jheight, - jint jbranching_factor) { - ROCKSDB_NAMESPACE::Status s = - ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(jbucket_count); - if (s.ok()) { - return GET_CPLUSPLUS_POINTER(ROCKSDB_NAMESPACE::NewHashSkipListRepFactory( - static_cast(jbucket_count), static_cast(jheight), - static_cast(jbranching_factor))); - } - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - return 0; -} - -/* - * Class: org_rocksdb_HashLinkedListMemTableConfig - * Method: newMemTableFactoryHandle - * Signature: (JJIZI)J - */ -jlong Java_org_rocksdb_HashLinkedListMemTableConfig_newMemTableFactoryHandle( - JNIEnv* env, jobject /*jobj*/, jlong jbucket_count, - jlong jhuge_page_tlb_size, jint jbucket_entries_logging_threshold, - jboolean jif_log_bucket_dist_when_flash, jint jthreshold_use_skiplist) { - ROCKSDB_NAMESPACE::Status statusBucketCount = - ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(jbucket_count); - ROCKSDB_NAMESPACE::Status statusHugePageTlb = - ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( - jhuge_page_tlb_size); - if (statusBucketCount.ok() && statusHugePageTlb.ok()) { - return GET_CPLUSPLUS_POINTER(ROCKSDB_NAMESPACE::NewHashLinkListRepFactory( - static_cast(jbucket_count), - static_cast(jhuge_page_tlb_size), - static_cast(jbucket_entries_logging_threshold), - static_cast(jif_log_bucket_dist_when_flash), - static_cast(jthreshold_use_skiplist))); - } - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew( - env, !statusBucketCount.ok() ? statusBucketCount : statusHugePageTlb); - return 0; -} - -/* - * Class: org_rocksdb_VectorMemTableConfig - * Method: newMemTableFactoryHandle - * Signature: (J)J - */ -jlong Java_org_rocksdb_VectorMemTableConfig_newMemTableFactoryHandle( - JNIEnv* env, jobject /*jobj*/, jlong jreserved_size) { - ROCKSDB_NAMESPACE::Status s = - ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(jreserved_size); - if (s.ok()) { - return GET_CPLUSPLUS_POINTER(new ROCKSDB_NAMESPACE::VectorRepFactory( - static_cast(jreserved_size))); - } - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - return 0; -} - -/* - * Class: org_rocksdb_SkipListMemTableConfig - * Method: newMemTableFactoryHandle0 - * Signature: (J)J - */ -jlong Java_org_rocksdb_SkipListMemTableConfig_newMemTableFactoryHandle0( - JNIEnv* env, jobject /*jobj*/, jlong jlookahead) { - ROCKSDB_NAMESPACE::Status s = - ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(jlookahead); - if (s.ok()) { - return GET_CPLUSPLUS_POINTER(new ROCKSDB_NAMESPACE::SkipListFactory( - static_cast(jlookahead))); - } - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - return 0; -} diff --git a/java/rocksjni/merge_operator.cc b/java/rocksjni/merge_operator.cc deleted file mode 100644 index ce3c5df56..000000000 --- a/java/rocksjni/merge_operator.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -// Copyright (c) 2014, Vlad Balan (vlad.gm@gmail.com). All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ -// for ROCKSDB_NAMESPACE::MergeOperator. - -#include "rocksdb/merge_operator.h" - -#include -#include -#include - -#include -#include - -#include "include/org_rocksdb_StringAppendOperator.h" -#include "include/org_rocksdb_UInt64AddOperator.h" -#include "rocksdb/db.h" -#include "rocksdb/memtablerep.h" -#include "rocksdb/options.h" -#include "rocksdb/slice_transform.h" -#include "rocksdb/statistics.h" -#include "rocksdb/table.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" -#include "utilities/merge_operators.h" - -/* - * Class: org_rocksdb_StringAppendOperator - * Method: newSharedStringAppendOperator - * Signature: (C)J - */ -jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator__C( - JNIEnv* /*env*/, jclass /*jclazz*/, jchar jdelim) { - auto* sptr_string_append_op = - new std::shared_ptr( - ROCKSDB_NAMESPACE::MergeOperators::CreateStringAppendOperator( - (char)jdelim)); - return GET_CPLUSPLUS_POINTER(sptr_string_append_op); -} - -jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator__Ljava_lang_String_2( - JNIEnv* env, jclass /*jclass*/, jstring jdelim) { - jboolean has_exception = JNI_FALSE; - auto delim = - ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, jdelim, &has_exception); - if (has_exception == JNI_TRUE) { - return 0; - } - auto* sptr_string_append_op = - new std::shared_ptr( - ROCKSDB_NAMESPACE::MergeOperators::CreateStringAppendOperator(delim)); - return GET_CPLUSPLUS_POINTER(sptr_string_append_op); -} - -/* - * Class: org_rocksdb_StringAppendOperator - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_StringAppendOperator_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* sptr_string_append_op = - reinterpret_cast*>( - jhandle); - delete sptr_string_append_op; // delete std::shared_ptr -} - -/* - * Class: org_rocksdb_UInt64AddOperator - * Method: newSharedUInt64AddOperator - * Signature: ()J - */ -jlong Java_org_rocksdb_UInt64AddOperator_newSharedUInt64AddOperator( - JNIEnv* /*env*/, jclass /*jclazz*/) { - auto* sptr_uint64_add_op = - new std::shared_ptr( - ROCKSDB_NAMESPACE::MergeOperators::CreateUInt64AddOperator()); - return GET_CPLUSPLUS_POINTER(sptr_uint64_add_op); -} - -/* - * Class: org_rocksdb_UInt64AddOperator - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_UInt64AddOperator_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* sptr_uint64_add_op = - reinterpret_cast*>( - jhandle); - delete sptr_uint64_add_op; // delete std::shared_ptr -} diff --git a/java/rocksjni/native_comparator_wrapper_test.cc b/java/rocksjni/native_comparator_wrapper_test.cc deleted file mode 100644 index ac33ca22d..000000000 --- a/java/rocksjni/native_comparator_wrapper_test.cc +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include - -#include - -#include "include/org_rocksdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper.h" -#include "rocksdb/comparator.h" -#include "rocksdb/slice.h" -#include "rocksjni/cplusplus_to_java_convert.h" - -namespace ROCKSDB_NAMESPACE { - -class NativeComparatorWrapperTestStringComparator : public Comparator { - const char* Name() const { - return "NativeComparatorWrapperTestStringComparator"; - } - - int Compare(const Slice& a, const Slice& b) const { - return a.ToString().compare(b.ToString()); - } - - void FindShortestSeparator(std::string* /*start*/, - const Slice& /*limit*/) const { - return; - } - - void FindShortSuccessor(std::string* /*key*/) const { return; } -}; -} // namespace ROCKSDB_NAMESPACE - -/* - * Class: org_rocksdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper - * Method: newStringComparator - * Signature: ()J - */ -jlong Java_org_rocksdb_NativeComparatorWrapperTest_00024NativeStringComparatorWrapper_newStringComparator( - JNIEnv* /*env*/, jobject /*jobj*/) { - auto* comparator = - new ROCKSDB_NAMESPACE::NativeComparatorWrapperTestStringComparator(); - return GET_CPLUSPLUS_POINTER(comparator); -} diff --git a/java/rocksjni/optimistic_transaction_db.cc b/java/rocksjni/optimistic_transaction_db.cc deleted file mode 100644 index 238224f58..000000000 --- a/java/rocksjni/optimistic_transaction_db.cc +++ /dev/null @@ -1,270 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ -// for ROCKSDB_NAMESPACE::TransactionDB. - -#include "rocksdb/utilities/optimistic_transaction_db.h" - -#include - -#include "include/org_rocksdb_OptimisticTransactionDB.h" -#include "rocksdb/options.h" -#include "rocksdb/utilities/transaction.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_OptimisticTransactionDB - * Method: open - * Signature: (JLjava/lang/String;)J - */ -jlong Java_org_rocksdb_OptimisticTransactionDB_open__JLjava_lang_String_2( - JNIEnv* env, jclass, jlong joptions_handle, jstring jdb_path) { - const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); - if (db_path == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - - auto* options = - reinterpret_cast(joptions_handle); - ROCKSDB_NAMESPACE::OptimisticTransactionDB* otdb = nullptr; - ROCKSDB_NAMESPACE::Status s = - ROCKSDB_NAMESPACE::OptimisticTransactionDB::Open(*options, db_path, - &otdb); - env->ReleaseStringUTFChars(jdb_path, db_path); - - if (s.ok()) { - return GET_CPLUSPLUS_POINTER(otdb); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return 0; - } -} - -/* - * Class: org_rocksdb_OptimisticTransactionDB - * Method: open - * Signature: (JLjava/lang/String;[[B[J)[J - */ -jlongArray -Java_org_rocksdb_OptimisticTransactionDB_open__JLjava_lang_String_2_3_3B_3J( - JNIEnv* env, jclass, jlong jdb_options_handle, jstring jdb_path, - jobjectArray jcolumn_names, jlongArray jcolumn_options_handles) { - const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); - if (db_path == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - std::vector column_families; - const jsize len_cols = env->GetArrayLength(jcolumn_names); - if (len_cols > 0) { - jlong* jco = env->GetLongArrayElements(jcolumn_options_handles, nullptr); - if (jco == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseStringUTFChars(jdb_path, db_path); - return nullptr; - } - - for (int i = 0; i < len_cols; i++) { - const jobject jcn = env->GetObjectArrayElement(jcolumn_names, i); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->ReleaseLongArrayElements(jcolumn_options_handles, jco, JNI_ABORT); - env->ReleaseStringUTFChars(jdb_path, db_path); - return nullptr; - } - - const jbyteArray jcn_ba = reinterpret_cast(jcn); - const jsize jcf_name_len = env->GetArrayLength(jcn_ba); - jbyte* jcf_name = env->GetByteArrayElements(jcn_ba, nullptr); - if (jcf_name == nullptr) { - // exception thrown: OutOfMemoryError - env->DeleteLocalRef(jcn); - env->ReleaseLongArrayElements(jcolumn_options_handles, jco, JNI_ABORT); - env->ReleaseStringUTFChars(jdb_path, db_path); - return nullptr; - } - - const std::string cf_name(reinterpret_cast(jcf_name), - jcf_name_len); - const ROCKSDB_NAMESPACE::ColumnFamilyOptions* cf_options = - reinterpret_cast(jco[i]); - column_families.push_back( - ROCKSDB_NAMESPACE::ColumnFamilyDescriptor(cf_name, *cf_options)); - - env->ReleaseByteArrayElements(jcn_ba, jcf_name, JNI_ABORT); - env->DeleteLocalRef(jcn); - } - env->ReleaseLongArrayElements(jcolumn_options_handles, jco, JNI_ABORT); - } - - auto* db_options = - reinterpret_cast(jdb_options_handle); - std::vector handles; - ROCKSDB_NAMESPACE::OptimisticTransactionDB* otdb = nullptr; - const ROCKSDB_NAMESPACE::Status s = - ROCKSDB_NAMESPACE::OptimisticTransactionDB::Open( - *db_options, db_path, column_families, &handles, &otdb); - - env->ReleaseStringUTFChars(jdb_path, db_path); - - // check if open operation was successful - if (s.ok()) { - const jsize resultsLen = 1 + len_cols; // db handle + column family handles - std::unique_ptr results = - std::unique_ptr(new jlong[resultsLen]); - results[0] = reinterpret_cast(otdb); - for (int i = 1; i <= len_cols; i++) { - results[i] = reinterpret_cast(handles[i - 1]); - } - - jlongArray jresults = env->NewLongArray(resultsLen); - if (jresults == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - env->SetLongArrayRegion(jresults, 0, resultsLen, results.get()); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - return nullptr; - } - return jresults; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; -} - -/* - * Class: org_rocksdb_OptimisticTransactionDB - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_OptimisticTransactionDB_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - auto* optimistic_txn_db = - reinterpret_cast(jhandle); - assert(optimistic_txn_db != nullptr); - delete optimistic_txn_db; -} - -/* - * Class: org_rocksdb_OptimisticTransactionDB - * Method: closeDatabase - * Signature: (J)V - */ -void Java_org_rocksdb_OptimisticTransactionDB_closeDatabase(JNIEnv* env, jclass, - jlong jhandle) { - auto* optimistic_txn_db = - reinterpret_cast(jhandle); - assert(optimistic_txn_db != nullptr); - ROCKSDB_NAMESPACE::Status s = optimistic_txn_db->Close(); - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_OptimisticTransactionDB - * Method: beginTransaction - * Signature: (JJ)J - */ -jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction__JJ( - JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle) { - auto* optimistic_txn_db = - reinterpret_cast(jhandle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - ROCKSDB_NAMESPACE::Transaction* txn = - optimistic_txn_db->BeginTransaction(*write_options); - return GET_CPLUSPLUS_POINTER(txn); -} - -/* - * Class: org_rocksdb_OptimisticTransactionDB - * Method: beginTransaction - * Signature: (JJJ)J - */ -jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction__JJJ( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jwrite_options_handle, jlong joptimistic_txn_options_handle) { - auto* optimistic_txn_db = - reinterpret_cast(jhandle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - auto* optimistic_txn_options = - reinterpret_cast( - joptimistic_txn_options_handle); - ROCKSDB_NAMESPACE::Transaction* txn = optimistic_txn_db->BeginTransaction( - *write_options, *optimistic_txn_options); - return GET_CPLUSPLUS_POINTER(txn); -} - -/* - * Class: org_rocksdb_OptimisticTransactionDB - * Method: beginTransaction_withOld - * Signature: (JJJ)J - */ -jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction_1withOld__JJJ( - JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle, - jlong jold_txn_handle) { - auto* optimistic_txn_db = - reinterpret_cast(jhandle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - auto* old_txn = - reinterpret_cast(jold_txn_handle); - ROCKSDB_NAMESPACE::OptimisticTransactionOptions optimistic_txn_options; - ROCKSDB_NAMESPACE::Transaction* txn = optimistic_txn_db->BeginTransaction( - *write_options, optimistic_txn_options, old_txn); - - // RocksJava relies on the assumption that - // we do not allocate a new Transaction object - // when providing an old_optimistic_txn - assert(txn == old_txn); - - return GET_CPLUSPLUS_POINTER(txn); -} - -/* - * Class: org_rocksdb_OptimisticTransactionDB - * Method: beginTransaction_withOld - * Signature: (JJJJ)J - */ -jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction_1withOld__JJJJ( - JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle, - jlong joptimistic_txn_options_handle, jlong jold_txn_handle) { - auto* optimistic_txn_db = - reinterpret_cast(jhandle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - auto* optimistic_txn_options = - reinterpret_cast( - joptimistic_txn_options_handle); - auto* old_txn = - reinterpret_cast(jold_txn_handle); - ROCKSDB_NAMESPACE::Transaction* txn = optimistic_txn_db->BeginTransaction( - *write_options, *optimistic_txn_options, old_txn); - - // RocksJava relies on the assumption that - // we do not allocate a new Transaction object - // when providing an old_optimisic_txn - assert(txn == old_txn); - - return GET_CPLUSPLUS_POINTER(txn); -} - -/* - * Class: org_rocksdb_OptimisticTransactionDB - * Method: getBaseDB - * Signature: (J)J - */ -jlong Java_org_rocksdb_OptimisticTransactionDB_getBaseDB(JNIEnv*, jobject, - jlong jhandle) { - auto* optimistic_txn_db = - reinterpret_cast(jhandle); - return GET_CPLUSPLUS_POINTER(optimistic_txn_db->GetBaseDB()); -} diff --git a/java/rocksjni/optimistic_transaction_options.cc b/java/rocksjni/optimistic_transaction_options.cc deleted file mode 100644 index 501c6c4fb..000000000 --- a/java/rocksjni/optimistic_transaction_options.cc +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ -// for ROCKSDB_NAMESPACE::OptimisticTransactionOptions. - -#include - -#include "include/org_rocksdb_OptimisticTransactionOptions.h" -#include "rocksdb/comparator.h" -#include "rocksdb/utilities/optimistic_transaction_db.h" -#include "rocksjni/cplusplus_to_java_convert.h" - -/* - * Class: org_rocksdb_OptimisticTransactionOptions - * Method: newOptimisticTransactionOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_OptimisticTransactionOptions_newOptimisticTransactionOptions( - JNIEnv* /*env*/, jclass /*jcls*/) { - ROCKSDB_NAMESPACE::OptimisticTransactionOptions* opts = - new ROCKSDB_NAMESPACE::OptimisticTransactionOptions(); - return GET_CPLUSPLUS_POINTER(opts); -} - -/* - * Class: org_rocksdb_OptimisticTransactionOptions - * Method: isSetSnapshot - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_OptimisticTransactionOptions_isSetSnapshot( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - auto* opts = - reinterpret_cast( - jhandle); - return opts->set_snapshot; -} - -/* - * Class: org_rocksdb_OptimisticTransactionOptions - * Method: setSetSnapshot - * Signature: (JZ)V - */ -void Java_org_rocksdb_OptimisticTransactionOptions_setSetSnapshot( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean jset_snapshot) { - auto* opts = - reinterpret_cast( - jhandle); - opts->set_snapshot = jset_snapshot; -} - -/* - * Class: org_rocksdb_OptimisticTransactionOptions - * Method: setComparator - * Signature: (JJ)V - */ -void Java_org_rocksdb_OptimisticTransactionOptions_setComparator( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jcomparator_handle) { - auto* opts = - reinterpret_cast( - jhandle); - opts->cmp = - reinterpret_cast(jcomparator_handle); -} - -/* - * Class: org_rocksdb_OptimisticTransactionOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_OptimisticTransactionOptions_disposeInternal( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - delete reinterpret_cast( - jhandle); -} diff --git a/java/rocksjni/options.cc b/java/rocksjni/options.cc deleted file mode 100644 index 724d298e7..000000000 --- a/java/rocksjni/options.cc +++ /dev/null @@ -1,8695 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::Options. - -#include "rocksdb/options.h" - -#include -#include -#include - -#include -#include - -#include "include/org_rocksdb_ColumnFamilyOptions.h" -#include "include/org_rocksdb_ComparatorOptions.h" -#include "include/org_rocksdb_DBOptions.h" -#include "include/org_rocksdb_FlushOptions.h" -#include "include/org_rocksdb_Options.h" -#include "include/org_rocksdb_ReadOptions.h" -#include "include/org_rocksdb_WriteOptions.h" -#include "rocksdb/comparator.h" -#include "rocksdb/convenience.h" -#include "rocksdb/db.h" -#include "rocksdb/memtablerep.h" -#include "rocksdb/merge_operator.h" -#include "rocksdb/rate_limiter.h" -#include "rocksdb/slice_transform.h" -#include "rocksdb/sst_partitioner.h" -#include "rocksdb/statistics.h" -#include "rocksdb/table.h" -#include "rocksjni/comparatorjnicallback.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" -#include "rocksjni/statisticsjni.h" -#include "rocksjni/table_filter_jnicallback.h" -#include "utilities/merge_operators.h" - -/* - * Class: org_rocksdb_Options - * Method: newOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_Options_newOptions__(JNIEnv*, jclass) { - auto* op = new ROCKSDB_NAMESPACE::Options(); - return GET_CPLUSPLUS_POINTER(op); -} - -/* - * Class: org_rocksdb_Options - * Method: newOptions - * Signature: (JJ)J - */ -jlong Java_org_rocksdb_Options_newOptions__JJ(JNIEnv*, jclass, jlong jdboptions, - jlong jcfoptions) { - auto* dbOpt = - reinterpret_cast(jdboptions); - auto* cfOpt = reinterpret_cast( - jcfoptions); - auto* op = new ROCKSDB_NAMESPACE::Options(*dbOpt, *cfOpt); - return GET_CPLUSPLUS_POINTER(op); -} - -/* - * Class: org_rocksdb_Options - * Method: copyOptions - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_copyOptions(JNIEnv*, jclass, jlong jhandle) { - auto new_opt = new ROCKSDB_NAMESPACE::Options( - *(reinterpret_cast(jhandle))); - return GET_CPLUSPLUS_POINTER(new_opt); -} - -/* - * Class: org_rocksdb_Options - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_Options_disposeInternal(JNIEnv*, jobject, jlong handle) { - auto* op = reinterpret_cast(handle); - assert(op != nullptr); - delete op; -} - -/* - * Class: org_rocksdb_Options - * Method: setIncreaseParallelism - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setIncreaseParallelism(JNIEnv*, jobject, - jlong jhandle, - jint totalThreads) { - reinterpret_cast(jhandle)->IncreaseParallelism( - static_cast(totalThreads)); -} - -/* - * Class: org_rocksdb_Options - * Method: setCreateIfMissing - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setCreateIfMissing(JNIEnv*, jobject, - jlong jhandle, jboolean flag) { - reinterpret_cast(jhandle)->create_if_missing = - flag; -} - -/* - * Class: org_rocksdb_Options - * Method: createIfMissing - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_createIfMissing(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->create_if_missing; -} - -/* - * Class: org_rocksdb_Options - * Method: setCreateMissingColumnFamilies - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setCreateMissingColumnFamilies(JNIEnv*, jobject, - jlong jhandle, - jboolean flag) { - reinterpret_cast(jhandle) - ->create_missing_column_families = flag; -} - -/* - * Class: org_rocksdb_Options - * Method: createMissingColumnFamilies - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_createMissingColumnFamilies(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->create_missing_column_families; -} - -/* - * Class: org_rocksdb_Options - * Method: setComparatorHandle - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setComparatorHandle__JI(JNIEnv*, jobject, - jlong jhandle, - jint builtinComparator) { - switch (builtinComparator) { - case 1: - reinterpret_cast(jhandle)->comparator = - ROCKSDB_NAMESPACE::ReverseBytewiseComparator(); - break; - default: - reinterpret_cast(jhandle)->comparator = - ROCKSDB_NAMESPACE::BytewiseComparator(); - break; - } -} - -/* - * Class: org_rocksdb_Options - * Method: setComparatorHandle - * Signature: (JJB)V - */ -void Java_org_rocksdb_Options_setComparatorHandle__JJB(JNIEnv*, jobject, - jlong jopt_handle, - jlong jcomparator_handle, - jbyte jcomparator_type) { - ROCKSDB_NAMESPACE::Comparator* comparator = nullptr; - switch (jcomparator_type) { - // JAVA_COMPARATOR - case 0x0: - comparator = reinterpret_cast( - jcomparator_handle); - break; - - // JAVA_NATIVE_COMPARATOR_WRAPPER - case 0x1: - comparator = - reinterpret_cast(jcomparator_handle); - break; - } - auto* opt = reinterpret_cast(jopt_handle); - opt->comparator = comparator; -} - -/* - * Class: org_rocksdb_Options - * Method: setMergeOperatorName - * Signature: (JJjava/lang/String)V - */ -void Java_org_rocksdb_Options_setMergeOperatorName(JNIEnv* env, jobject, - jlong jhandle, - jstring jop_name) { - const char* op_name = env->GetStringUTFChars(jop_name, nullptr); - if (op_name == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - auto* options = reinterpret_cast(jhandle); - options->merge_operator = - ROCKSDB_NAMESPACE::MergeOperators::CreateFromStringId(op_name); - - env->ReleaseStringUTFChars(jop_name, op_name); -} - -/* - * Class: org_rocksdb_Options - * Method: setMergeOperator - * Signature: (JJjava/lang/String)V - */ -void Java_org_rocksdb_Options_setMergeOperator(JNIEnv*, jobject, jlong jhandle, - jlong mergeOperatorHandle) { - reinterpret_cast(jhandle)->merge_operator = - *(reinterpret_cast*>( - mergeOperatorHandle)); -} - -/* - * Class: org_rocksdb_Options - * Method: setCompactionFilterHandle - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setCompactionFilterHandle( - JNIEnv*, jobject, jlong jopt_handle, jlong jcompactionfilter_handle) { - reinterpret_cast(jopt_handle) - ->compaction_filter = - reinterpret_cast( - jcompactionfilter_handle); -} - -/* - * Class: org_rocksdb_Options - * Method: setCompactionFilterFactoryHandle - * Signature: (JJ)V - */ -void JNICALL Java_org_rocksdb_Options_setCompactionFilterFactoryHandle( - JNIEnv*, jobject, jlong jopt_handle, - jlong jcompactionfilterfactory_handle) { - auto* cff_factory = reinterpret_cast< - std::shared_ptr*>( - jcompactionfilterfactory_handle); - reinterpret_cast(jopt_handle) - ->compaction_filter_factory = *cff_factory; -} - -/* - * Class: org_rocksdb_Options - * Method: setWriteBufferSize - * Signature: (JJ)I - */ -void Java_org_rocksdb_Options_setWriteBufferSize(JNIEnv* env, jobject, - jlong jhandle, - jlong jwrite_buffer_size) { - auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( - jwrite_buffer_size); - if (s.ok()) { - reinterpret_cast(jhandle)->write_buffer_size = - jwrite_buffer_size; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_Options - * Method: setWriteBufferManager - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setWriteBufferManager( - JNIEnv*, jobject, jlong joptions_handle, - jlong jwrite_buffer_manager_handle) { - auto* write_buffer_manager = - reinterpret_cast*>( - jwrite_buffer_manager_handle); - reinterpret_cast(joptions_handle) - ->write_buffer_manager = *write_buffer_manager; -} - -/* - * Class: org_rocksdb_Options - * Method: writeBufferSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_writeBufferSize(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->write_buffer_size; -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxWriteBufferNumber - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setMaxWriteBufferNumber( - JNIEnv*, jobject, jlong jhandle, jint jmax_write_buffer_number) { - reinterpret_cast(jhandle) - ->max_write_buffer_number = jmax_write_buffer_number; -} - -/* - * Class: org_rocksdb_Options - * Method: setStatistics - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setStatistics(JNIEnv*, jobject, jlong jhandle, - jlong jstatistics_handle) { - auto* opt = reinterpret_cast(jhandle); - auto* pSptr = - reinterpret_cast*>( - jstatistics_handle); - opt->statistics = *pSptr; -} - -/* - * Class: org_rocksdb_Options - * Method: statistics - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_statistics(JNIEnv*, jobject, jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - std::shared_ptr sptr = opt->statistics; - if (sptr == nullptr) { - return 0; - } else { - std::shared_ptr* pSptr = - new std::shared_ptr(sptr); - return GET_CPLUSPLUS_POINTER(pSptr); - } -} - -/* - * Class: org_rocksdb_Options - * Method: maxWriteBufferNumber - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_maxWriteBufferNumber(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_write_buffer_number; -} - -/* - * Class: org_rocksdb_Options - * Method: errorIfExists - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_errorIfExists(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->error_if_exists; -} - -/* - * Class: org_rocksdb_Options - * Method: setErrorIfExists - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setErrorIfExists(JNIEnv*, jobject, jlong jhandle, - jboolean error_if_exists) { - reinterpret_cast(jhandle)->error_if_exists = - static_cast(error_if_exists); -} - -/* - * Class: org_rocksdb_Options - * Method: paranoidChecks - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_paranoidChecks(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->paranoid_checks; -} - -/* - * Class: org_rocksdb_Options - * Method: setParanoidChecks - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setParanoidChecks(JNIEnv*, jobject, jlong jhandle, - jboolean paranoid_checks) { - reinterpret_cast(jhandle)->paranoid_checks = - static_cast(paranoid_checks); -} - -/* - * Class: org_rocksdb_Options - * Method: setEnv - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setEnv(JNIEnv*, jobject, jlong jhandle, - jlong jenv) { - reinterpret_cast(jhandle)->env = - reinterpret_cast(jenv); -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxTotalWalSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setMaxTotalWalSize(JNIEnv*, jobject, - jlong jhandle, - jlong jmax_total_wal_size) { - reinterpret_cast(jhandle)->max_total_wal_size = - static_cast(jmax_total_wal_size); -} - -/* - * Class: org_rocksdb_Options - * Method: maxTotalWalSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_maxTotalWalSize(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_total_wal_size; -} - -/* - * Class: org_rocksdb_Options - * Method: maxOpenFiles - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_maxOpenFiles(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle)->max_open_files; -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxOpenFiles - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setMaxOpenFiles(JNIEnv*, jobject, jlong jhandle, - jint max_open_files) { - reinterpret_cast(jhandle)->max_open_files = - static_cast(max_open_files); -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxFileOpeningThreads - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setMaxFileOpeningThreads( - JNIEnv*, jobject, jlong jhandle, jint jmax_file_opening_threads) { - reinterpret_cast(jhandle) - ->max_file_opening_threads = static_cast(jmax_file_opening_threads); -} - -/* - * Class: org_rocksdb_Options - * Method: maxFileOpeningThreads - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_maxFileOpeningThreads(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->max_file_opening_threads); -} - -/* - * Class: org_rocksdb_Options - * Method: useFsync - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_useFsync(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle)->use_fsync; -} - -/* - * Class: org_rocksdb_Options - * Method: setUseFsync - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setUseFsync(JNIEnv*, jobject, jlong jhandle, - jboolean use_fsync) { - reinterpret_cast(jhandle)->use_fsync = - static_cast(use_fsync); -} - -/* - * Class: org_rocksdb_Options - * Method: setDbPaths - * Signature: (J[Ljava/lang/String;[J)V - */ -void Java_org_rocksdb_Options_setDbPaths(JNIEnv* env, jobject, jlong jhandle, - jobjectArray jpaths, - jlongArray jtarget_sizes) { - std::vector db_paths; - jlong* ptr_jtarget_size = env->GetLongArrayElements(jtarget_sizes, nullptr); - if (ptr_jtarget_size == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - jboolean has_exception = JNI_FALSE; - const jsize len = env->GetArrayLength(jpaths); - for (jsize i = 0; i < len; i++) { - jobject jpath = - reinterpret_cast(env->GetObjectArrayElement(jpaths, i)); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); - return; - } - std::string path = ROCKSDB_NAMESPACE::JniUtil::copyStdString( - env, static_cast(jpath), &has_exception); - env->DeleteLocalRef(jpath); - - if (has_exception == JNI_TRUE) { - env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); - return; - } - - jlong jtarget_size = ptr_jtarget_size[i]; - - db_paths.push_back( - ROCKSDB_NAMESPACE::DbPath(path, static_cast(jtarget_size))); - } - - env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); - - auto* opt = reinterpret_cast(jhandle); - opt->db_paths = db_paths; -} - -/* - * Class: org_rocksdb_Options - * Method: dbPathsLen - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_dbPathsLen(JNIEnv*, jobject, jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->db_paths.size()); -} - -/* - * Class: org_rocksdb_Options - * Method: dbPaths - * Signature: (J[Ljava/lang/String;[J)V - */ -void Java_org_rocksdb_Options_dbPaths(JNIEnv* env, jobject, jlong jhandle, - jobjectArray jpaths, - jlongArray jtarget_sizes) { - jboolean is_copy; - jlong* ptr_jtarget_size = env->GetLongArrayElements(jtarget_sizes, &is_copy); - if (ptr_jtarget_size == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - auto* opt = reinterpret_cast(jhandle); - const jsize len = env->GetArrayLength(jpaths); - for (jsize i = 0; i < len; i++) { - ROCKSDB_NAMESPACE::DbPath db_path = opt->db_paths[i]; - - jstring jpath = env->NewStringUTF(db_path.path.c_str()); - if (jpath == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); - return; - } - env->SetObjectArrayElement(jpaths, i, jpath); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jpath); - env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); - return; - } - - ptr_jtarget_size[i] = static_cast(db_path.target_size); - } - - env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, - is_copy == JNI_TRUE ? 0 : JNI_ABORT); -} - -/* - * Class: org_rocksdb_Options - * Method: dbLogDir - * Signature: (J)Ljava/lang/String - */ -jstring Java_org_rocksdb_Options_dbLogDir(JNIEnv* env, jobject, jlong jhandle) { - return env->NewStringUTF( - reinterpret_cast(jhandle) - ->db_log_dir.c_str()); -} - -/* - * Class: org_rocksdb_Options - * Method: setDbLogDir - * Signature: (JLjava/lang/String)V - */ -void Java_org_rocksdb_Options_setDbLogDir(JNIEnv* env, jobject, jlong jhandle, - jstring jdb_log_dir) { - const char* log_dir = env->GetStringUTFChars(jdb_log_dir, nullptr); - if (log_dir == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - reinterpret_cast(jhandle)->db_log_dir.assign( - log_dir); - env->ReleaseStringUTFChars(jdb_log_dir, log_dir); -} - -/* - * Class: org_rocksdb_Options - * Method: walDir - * Signature: (J)Ljava/lang/String - */ -jstring Java_org_rocksdb_Options_walDir(JNIEnv* env, jobject, jlong jhandle) { - return env->NewStringUTF( - reinterpret_cast(jhandle)->wal_dir.c_str()); -} - -/* - * Class: org_rocksdb_Options - * Method: setWalDir - * Signature: (JLjava/lang/String)V - */ -void Java_org_rocksdb_Options_setWalDir(JNIEnv* env, jobject, jlong jhandle, - jstring jwal_dir) { - const char* wal_dir = env->GetStringUTFChars(jwal_dir, nullptr); - if (wal_dir == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - reinterpret_cast(jhandle)->wal_dir.assign( - wal_dir); - env->ReleaseStringUTFChars(jwal_dir, wal_dir); -} - -/* - * Class: org_rocksdb_Options - * Method: deleteObsoleteFilesPeriodMicros - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_deleteObsoleteFilesPeriodMicros(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->delete_obsolete_files_period_micros; -} - -/* - * Class: org_rocksdb_Options - * Method: setDeleteObsoleteFilesPeriodMicros - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setDeleteObsoleteFilesPeriodMicros(JNIEnv*, - jobject, - jlong jhandle, - jlong micros) { - reinterpret_cast(jhandle) - ->delete_obsolete_files_period_micros = static_cast(micros); -} - -/* - * Class: org_rocksdb_Options - * Method: maxBackgroundCompactions - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_maxBackgroundCompactions(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_background_compactions; -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxBackgroundCompactions - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setMaxBackgroundCompactions(JNIEnv*, jobject, - jlong jhandle, - jint max) { - reinterpret_cast(jhandle) - ->max_background_compactions = static_cast(max); -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxSubcompactions - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setMaxSubcompactions(JNIEnv*, jobject, - jlong jhandle, jint max) { - reinterpret_cast(jhandle)->max_subcompactions = - static_cast(max); -} - -/* - * Class: org_rocksdb_Options - * Method: maxSubcompactions - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_maxSubcompactions(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_subcompactions; -} - -/* - * Class: org_rocksdb_Options - * Method: maxBackgroundFlushes - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_maxBackgroundFlushes(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_background_flushes; -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxBackgroundFlushes - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setMaxBackgroundFlushes( - JNIEnv*, jobject, jlong jhandle, jint max_background_flushes) { - reinterpret_cast(jhandle) - ->max_background_flushes = static_cast(max_background_flushes); -} - -/* - * Class: org_rocksdb_Options - * Method: maxBackgroundJobs - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_maxBackgroundJobs(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_background_jobs; -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxBackgroundJobs - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setMaxBackgroundJobs(JNIEnv*, jobject, - jlong jhandle, - jint max_background_jobs) { - reinterpret_cast(jhandle)->max_background_jobs = - static_cast(max_background_jobs); -} - -/* - * Class: org_rocksdb_Options - * Method: maxLogFileSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_maxLogFileSize(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_log_file_size; -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxLogFileSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setMaxLogFileSize(JNIEnv* env, jobject, - jlong jhandle, - jlong max_log_file_size) { - auto s = - ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(max_log_file_size); - if (s.ok()) { - reinterpret_cast(jhandle)->max_log_file_size = - max_log_file_size; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_Options - * Method: logFileTimeToRoll - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_logFileTimeToRoll(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->log_file_time_to_roll; -} - -/* - * Class: org_rocksdb_Options - * Method: setLogFileTimeToRoll - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setLogFileTimeToRoll( - JNIEnv* env, jobject, jlong jhandle, jlong log_file_time_to_roll) { - auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( - log_file_time_to_roll); - if (s.ok()) { - reinterpret_cast(jhandle) - ->log_file_time_to_roll = log_file_time_to_roll; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_Options - * Method: keepLogFileNum - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_keepLogFileNum(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->keep_log_file_num; -} - -/* - * Class: org_rocksdb_Options - * Method: setKeepLogFileNum - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setKeepLogFileNum(JNIEnv* env, jobject, - jlong jhandle, - jlong keep_log_file_num) { - auto s = - ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(keep_log_file_num); - if (s.ok()) { - reinterpret_cast(jhandle)->keep_log_file_num = - keep_log_file_num; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_Options - * Method: recycleLogFileNum - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_recycleLogFileNum(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->recycle_log_file_num; -} - -/* - * Class: org_rocksdb_Options - * Method: setRecycleLogFileNum - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setRecycleLogFileNum(JNIEnv* env, jobject, - jlong jhandle, - jlong recycle_log_file_num) { - auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( - recycle_log_file_num); - if (s.ok()) { - reinterpret_cast(jhandle) - ->recycle_log_file_num = recycle_log_file_num; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_Options - * Method: maxManifestFileSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_maxManifestFileSize(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_manifest_file_size; -} - -/* - * Method: memTableFactoryName - * Signature: (J)Ljava/lang/String - */ -jstring Java_org_rocksdb_Options_memTableFactoryName(JNIEnv* env, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - ROCKSDB_NAMESPACE::MemTableRepFactory* tf = opt->memtable_factory.get(); - - // Should never be nullptr. - // Default memtable factory is SkipListFactory - assert(tf); - - // temporarly fix for the historical typo - if (strcmp(tf->Name(), "HashLinkListRepFactory") == 0) { - return env->NewStringUTF("HashLinkedListRepFactory"); - } - - return env->NewStringUTF(tf->Name()); -} - -static std::vector -rocksdb_convert_cf_paths_from_java_helper(JNIEnv* env, jobjectArray path_array, - jlongArray size_array, - jboolean* has_exception) { - jboolean copy_str_has_exception; - std::vector paths = ROCKSDB_NAMESPACE::JniUtil::copyStrings( - env, path_array, ©_str_has_exception); - if (JNI_TRUE == copy_str_has_exception) { - // Exception thrown - *has_exception = JNI_TRUE; - return {}; - } - - if (static_cast(env->GetArrayLength(size_array)) != paths.size()) { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew( - env, - ROCKSDB_NAMESPACE::Status::InvalidArgument( - ROCKSDB_NAMESPACE::Slice("There should be a corresponding target " - "size for every path and vice versa."))); - *has_exception = JNI_TRUE; - return {}; - } - - jlong* size_array_ptr = env->GetLongArrayElements(size_array, nullptr); - if (nullptr == size_array_ptr) { - // exception thrown: OutOfMemoryError - *has_exception = JNI_TRUE; - return {}; - } - std::vector cf_paths; - for (size_t i = 0; i < paths.size(); ++i) { - jlong target_size = size_array_ptr[i]; - if (target_size < 0) { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew( - env, - ROCKSDB_NAMESPACE::Status::InvalidArgument(ROCKSDB_NAMESPACE::Slice( - "Path target size has to be positive."))); - *has_exception = JNI_TRUE; - env->ReleaseLongArrayElements(size_array, size_array_ptr, JNI_ABORT); - return {}; - } - cf_paths.push_back(ROCKSDB_NAMESPACE::DbPath( - paths[i], static_cast(target_size))); - } - - env->ReleaseLongArrayElements(size_array, size_array_ptr, JNI_ABORT); - - return cf_paths; -} - -/* - * Class: org_rocksdb_Options - * Method: setCfPaths - * Signature: (J[Ljava/lang/String;[J)V - */ -void Java_org_rocksdb_Options_setCfPaths(JNIEnv* env, jclass, jlong jhandle, - jobjectArray path_array, - jlongArray size_array) { - auto* options = reinterpret_cast(jhandle); - jboolean has_exception = JNI_FALSE; - std::vector cf_paths = - rocksdb_convert_cf_paths_from_java_helper(env, path_array, size_array, - &has_exception); - if (JNI_FALSE == has_exception) { - options->cf_paths = std::move(cf_paths); - } -} - -/* - * Class: org_rocksdb_Options - * Method: cfPathsLen - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_cfPathsLen(JNIEnv*, jclass, jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->cf_paths.size()); -} - -template -static void rocksdb_convert_cf_paths_to_java_helper(JNIEnv* env, jlong jhandle, - jobjectArray jpaths, - jlongArray jtarget_sizes) { - jboolean is_copy; - jlong* ptr_jtarget_size = env->GetLongArrayElements(jtarget_sizes, &is_copy); - if (ptr_jtarget_size == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - auto* opt = reinterpret_cast(jhandle); - const jsize len = env->GetArrayLength(jpaths); - for (jsize i = 0; i < len; i++) { - ROCKSDB_NAMESPACE::DbPath cf_path = opt->cf_paths[i]; - - jstring jpath = env->NewStringUTF(cf_path.path.c_str()); - if (jpath == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); - return; - } - env->SetObjectArrayElement(jpaths, i, jpath); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jpath); - env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); - return; - } - - ptr_jtarget_size[i] = static_cast(cf_path.target_size); - - env->DeleteLocalRef(jpath); - } - - env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, - is_copy ? 0 : JNI_ABORT); -} - -/* - * Class: org_rocksdb_Options - * Method: cfPaths - * Signature: (J[Ljava/lang/String;[J)V - */ -void Java_org_rocksdb_Options_cfPaths(JNIEnv* env, jclass, jlong jhandle, - jobjectArray jpaths, - jlongArray jtarget_sizes) { - rocksdb_convert_cf_paths_to_java_helper( - env, jhandle, jpaths, jtarget_sizes); -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxManifestFileSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setMaxManifestFileSize( - JNIEnv*, jobject, jlong jhandle, jlong max_manifest_file_size) { - reinterpret_cast(jhandle) - ->max_manifest_file_size = static_cast(max_manifest_file_size); -} - -/* - * Method: setMemTableFactory - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setMemTableFactory(JNIEnv*, jobject, - jlong jhandle, - jlong jfactory_handle) { - reinterpret_cast(jhandle) - ->memtable_factory.reset( - reinterpret_cast( - jfactory_handle)); -} - -/* - * Class: org_rocksdb_Options - * Method: setRateLimiter - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setRateLimiter(JNIEnv*, jobject, jlong jhandle, - jlong jrate_limiter_handle) { - std::shared_ptr* pRateLimiter = - reinterpret_cast*>( - jrate_limiter_handle); - reinterpret_cast(jhandle)->rate_limiter = - *pRateLimiter; -} - -/* - * Class: org_rocksdb_Options - * Method: setSstFileManager - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setSstFileManager( - JNIEnv*, jobject, jlong jhandle, jlong jsst_file_manager_handle) { - auto* sptr_sst_file_manager = - reinterpret_cast*>( - jsst_file_manager_handle); - reinterpret_cast(jhandle)->sst_file_manager = - *sptr_sst_file_manager; -} - -/* - * Class: org_rocksdb_Options - * Method: setLogger - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setLogger(JNIEnv*, jobject, jlong jhandle, - jlong jlogger_handle) { - std::shared_ptr* pLogger = - reinterpret_cast*>( - jlogger_handle); - reinterpret_cast(jhandle)->info_log = *pLogger; -} - -/* - * Class: org_rocksdb_Options - * Method: setInfoLogLevel - * Signature: (JB)V - */ -void Java_org_rocksdb_Options_setInfoLogLevel(JNIEnv*, jobject, jlong jhandle, - jbyte jlog_level) { - reinterpret_cast(jhandle)->info_log_level = - static_cast(jlog_level); -} - -/* - * Class: org_rocksdb_Options - * Method: infoLogLevel - * Signature: (J)B - */ -jbyte Java_org_rocksdb_Options_infoLogLevel(JNIEnv*, jobject, jlong jhandle) { - return static_cast( - reinterpret_cast(jhandle)->info_log_level); -} - -/* - * Class: org_rocksdb_Options - * Method: tableCacheNumshardbits - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_tableCacheNumshardbits(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->table_cache_numshardbits; -} - -/* - * Class: org_rocksdb_Options - * Method: setTableCacheNumshardbits - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setTableCacheNumshardbits( - JNIEnv*, jobject, jlong jhandle, jint table_cache_numshardbits) { - reinterpret_cast(jhandle) - ->table_cache_numshardbits = static_cast(table_cache_numshardbits); -} - -/* - * Method: useFixedLengthPrefixExtractor - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_useFixedLengthPrefixExtractor( - JNIEnv*, jobject, jlong jhandle, jint jprefix_length) { - reinterpret_cast(jhandle) - ->prefix_extractor.reset(ROCKSDB_NAMESPACE::NewFixedPrefixTransform( - static_cast(jprefix_length))); -} - -/* - * Method: useCappedPrefixExtractor - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_useCappedPrefixExtractor(JNIEnv*, jobject, - jlong jhandle, - jint jprefix_length) { - reinterpret_cast(jhandle) - ->prefix_extractor.reset(ROCKSDB_NAMESPACE::NewCappedPrefixTransform( - static_cast(jprefix_length))); -} - -/* - * Class: org_rocksdb_Options - * Method: walTtlSeconds - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_walTtlSeconds(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->WAL_ttl_seconds; -} - -/* - * Class: org_rocksdb_Options - * Method: setWalTtlSeconds - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setWalTtlSeconds(JNIEnv*, jobject, jlong jhandle, - jlong WAL_ttl_seconds) { - reinterpret_cast(jhandle)->WAL_ttl_seconds = - static_cast(WAL_ttl_seconds); -} - -/* - * Class: org_rocksdb_Options - * Method: walTtlSeconds - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_walSizeLimitMB(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->WAL_size_limit_MB; -} - -/* - * Class: org_rocksdb_Options - * Method: setWalSizeLimitMB - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setWalSizeLimitMB(JNIEnv*, jobject, jlong jhandle, - jlong WAL_size_limit_MB) { - reinterpret_cast(jhandle)->WAL_size_limit_MB = - static_cast(WAL_size_limit_MB); -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxWriteBatchGroupSizeBytes - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setMaxWriteBatchGroupSizeBytes( - JNIEnv*, jclass, jlong jhandle, jlong jmax_write_batch_group_size_bytes) { - auto* opt = reinterpret_cast(jhandle); - opt->max_write_batch_group_size_bytes = - static_cast(jmax_write_batch_group_size_bytes); -} - -/* - * Class: org_rocksdb_Options - * Method: maxWriteBatchGroupSizeBytes - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_maxWriteBatchGroupSizeBytes(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->max_write_batch_group_size_bytes); -} - -/* - * Class: org_rocksdb_Options - * Method: manifestPreallocationSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_manifestPreallocationSize(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->manifest_preallocation_size; -} - -/* - * Class: org_rocksdb_Options - * Method: setManifestPreallocationSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setManifestPreallocationSize( - JNIEnv* env, jobject, jlong jhandle, jlong preallocation_size) { - auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( - preallocation_size); - if (s.ok()) { - reinterpret_cast(jhandle) - ->manifest_preallocation_size = preallocation_size; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Method: setTableFactory - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setTableFactory(JNIEnv*, jobject, jlong jhandle, - jlong jtable_factory_handle) { - auto* options = reinterpret_cast(jhandle); - auto* table_factory = - reinterpret_cast(jtable_factory_handle); - options->table_factory.reset(table_factory); -} - -/* - * Method: setSstPartitionerFactory - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setSstPartitionerFactory(JNIEnv*, jobject, - jlong jhandle, - jlong factory_handle) { - auto* options = reinterpret_cast(jhandle); - auto factory = reinterpret_cast< - std::shared_ptr*>( - factory_handle); - options->sst_partitioner_factory = *factory; -} - -/* - * Class: org_rocksdb_Options - * Method: setCompactionThreadLimiter - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setCompactionThreadLimiter( - JNIEnv*, jclass, jlong jhandle, jlong jlimiter_handle) { - auto* options = reinterpret_cast(jhandle); - auto* limiter = reinterpret_cast< - std::shared_ptr*>( - jlimiter_handle); - options->compaction_thread_limiter = *limiter; -} - -/* - * Class: org_rocksdb_Options - * Method: allowMmapReads - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_allowMmapReads(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->allow_mmap_reads; -} - -/* - * Class: org_rocksdb_Options - * Method: setAllowMmapReads - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setAllowMmapReads(JNIEnv*, jobject, jlong jhandle, - jboolean allow_mmap_reads) { - reinterpret_cast(jhandle)->allow_mmap_reads = - static_cast(allow_mmap_reads); -} - -/* - * Class: org_rocksdb_Options - * Method: allowMmapWrites - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_allowMmapWrites(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->allow_mmap_writes; -} - -/* - * Class: org_rocksdb_Options - * Method: setAllowMmapWrites - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setAllowMmapWrites(JNIEnv*, jobject, - jlong jhandle, - jboolean allow_mmap_writes) { - reinterpret_cast(jhandle)->allow_mmap_writes = - static_cast(allow_mmap_writes); -} - -/* - * Class: org_rocksdb_Options - * Method: useDirectReads - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_useDirectReads(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->use_direct_reads; -} - -/* - * Class: org_rocksdb_Options - * Method: setUseDirectReads - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setUseDirectReads(JNIEnv*, jobject, jlong jhandle, - jboolean use_direct_reads) { - reinterpret_cast(jhandle)->use_direct_reads = - static_cast(use_direct_reads); -} - -/* - * Class: org_rocksdb_Options - * Method: useDirectIoForFlushAndCompaction - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_useDirectIoForFlushAndCompaction( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->use_direct_io_for_flush_and_compaction; -} - -/* - * Class: org_rocksdb_Options - * Method: setUseDirectIoForFlushAndCompaction - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setUseDirectIoForFlushAndCompaction( - JNIEnv*, jobject, jlong jhandle, - jboolean use_direct_io_for_flush_and_compaction) { - reinterpret_cast(jhandle) - ->use_direct_io_for_flush_and_compaction = - static_cast(use_direct_io_for_flush_and_compaction); -} - -/* - * Class: org_rocksdb_Options - * Method: setAllowFAllocate - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setAllowFAllocate(JNIEnv*, jobject, jlong jhandle, - jboolean jallow_fallocate) { - reinterpret_cast(jhandle)->allow_fallocate = - static_cast(jallow_fallocate); -} - -/* - * Class: org_rocksdb_Options - * Method: allowFAllocate - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_allowFAllocate(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->allow_fallocate); -} - -/* - * Class: org_rocksdb_Options - * Method: isFdCloseOnExec - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_isFdCloseOnExec(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->is_fd_close_on_exec; -} - -/* - * Class: org_rocksdb_Options - * Method: setIsFdCloseOnExec - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setIsFdCloseOnExec(JNIEnv*, jobject, - jlong jhandle, - jboolean is_fd_close_on_exec) { - reinterpret_cast(jhandle)->is_fd_close_on_exec = - static_cast(is_fd_close_on_exec); -} - -/* - * Class: org_rocksdb_Options - * Method: statsDumpPeriodSec - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_statsDumpPeriodSec(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->stats_dump_period_sec; -} - -/* - * Class: org_rocksdb_Options - * Method: setStatsDumpPeriodSec - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setStatsDumpPeriodSec( - JNIEnv*, jobject, jlong jhandle, jint jstats_dump_period_sec) { - reinterpret_cast(jhandle) - ->stats_dump_period_sec = - static_cast(jstats_dump_period_sec); -} - -/* - * Class: org_rocksdb_Options - * Method: statsPersistPeriodSec - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_statsPersistPeriodSec(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->stats_persist_period_sec; -} - -/* - * Class: org_rocksdb_Options - * Method: setStatsPersistPeriodSec - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setStatsPersistPeriodSec( - JNIEnv*, jobject, jlong jhandle, jint jstats_persist_period_sec) { - reinterpret_cast(jhandle) - ->stats_persist_period_sec = - static_cast(jstats_persist_period_sec); -} - -/* - * Class: org_rocksdb_Options - * Method: statsHistoryBufferSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_statsHistoryBufferSize(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->stats_history_buffer_size; -} - -/* - * Class: org_rocksdb_Options - * Method: setStatsHistoryBufferSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setStatsHistoryBufferSize( - JNIEnv*, jobject, jlong jhandle, jlong jstats_history_buffer_size) { - reinterpret_cast(jhandle) - ->stats_history_buffer_size = - static_cast(jstats_history_buffer_size); -} - -/* - * Class: org_rocksdb_Options - * Method: adviseRandomOnOpen - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_adviseRandomOnOpen(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->advise_random_on_open; -} - -/* - * Class: org_rocksdb_Options - * Method: setAdviseRandomOnOpen - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setAdviseRandomOnOpen( - JNIEnv*, jobject, jlong jhandle, jboolean advise_random_on_open) { - reinterpret_cast(jhandle) - ->advise_random_on_open = static_cast(advise_random_on_open); -} - -/* - * Class: org_rocksdb_Options - * Method: setDbWriteBufferSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setDbWriteBufferSize( - JNIEnv*, jobject, jlong jhandle, jlong jdb_write_buffer_size) { - auto* opt = reinterpret_cast(jhandle); - opt->db_write_buffer_size = static_cast(jdb_write_buffer_size); -} - -/* - * Class: org_rocksdb_Options - * Method: dbWriteBufferSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_dbWriteBufferSize(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->db_write_buffer_size); -} - -/* - * Class: org_rocksdb_Options - * Method: setAccessHintOnCompactionStart - * Signature: (JB)V - */ -void Java_org_rocksdb_Options_setAccessHintOnCompactionStart( - JNIEnv*, jobject, jlong jhandle, jbyte jaccess_hint_value) { - auto* opt = reinterpret_cast(jhandle); - opt->access_hint_on_compaction_start = - ROCKSDB_NAMESPACE::AccessHintJni::toCppAccessHint(jaccess_hint_value); -} - -/* - * Class: org_rocksdb_Options - * Method: accessHintOnCompactionStart - * Signature: (J)B - */ -jbyte Java_org_rocksdb_Options_accessHintOnCompactionStart(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::AccessHintJni::toJavaAccessHint( - opt->access_hint_on_compaction_start); -} - -/* - * Class: org_rocksdb_Options - * Method: setCompactionReadaheadSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setCompactionReadaheadSize( - JNIEnv*, jobject, jlong jhandle, jlong jcompaction_readahead_size) { - auto* opt = reinterpret_cast(jhandle); - opt->compaction_readahead_size = - static_cast(jcompaction_readahead_size); -} - -/* - * Class: org_rocksdb_Options - * Method: compactionReadaheadSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_compactionReadaheadSize(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->compaction_readahead_size); -} - -/* - * Class: org_rocksdb_Options - * Method: setRandomAccessMaxBufferSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setRandomAccessMaxBufferSize( - JNIEnv*, jobject, jlong jhandle, jlong jrandom_access_max_buffer_size) { - auto* opt = reinterpret_cast(jhandle); - opt->random_access_max_buffer_size = - static_cast(jrandom_access_max_buffer_size); -} - -/* - * Class: org_rocksdb_Options - * Method: randomAccessMaxBufferSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_randomAccessMaxBufferSize(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->random_access_max_buffer_size); -} - -/* - * Class: org_rocksdb_Options - * Method: setWritableFileMaxBufferSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setWritableFileMaxBufferSize( - JNIEnv*, jobject, jlong jhandle, jlong jwritable_file_max_buffer_size) { - auto* opt = reinterpret_cast(jhandle); - opt->writable_file_max_buffer_size = - static_cast(jwritable_file_max_buffer_size); -} - -/* - * Class: org_rocksdb_Options - * Method: writableFileMaxBufferSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_writableFileMaxBufferSize(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->writable_file_max_buffer_size); -} - -/* - * Class: org_rocksdb_Options - * Method: useAdaptiveMutex - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_useAdaptiveMutex(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->use_adaptive_mutex; -} - -/* - * Class: org_rocksdb_Options - * Method: setUseAdaptiveMutex - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setUseAdaptiveMutex(JNIEnv*, jobject, - jlong jhandle, - jboolean use_adaptive_mutex) { - reinterpret_cast(jhandle)->use_adaptive_mutex = - static_cast(use_adaptive_mutex); -} - -/* - * Class: org_rocksdb_Options - * Method: bytesPerSync - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_bytesPerSync(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle)->bytes_per_sync; -} - -/* - * Class: org_rocksdb_Options - * Method: setBytesPerSync - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setBytesPerSync(JNIEnv*, jobject, jlong jhandle, - jlong bytes_per_sync) { - reinterpret_cast(jhandle)->bytes_per_sync = - static_cast(bytes_per_sync); -} - -/* - * Class: org_rocksdb_Options - * Method: setWalBytesPerSync - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setWalBytesPerSync(JNIEnv*, jobject, - jlong jhandle, - jlong jwal_bytes_per_sync) { - reinterpret_cast(jhandle)->wal_bytes_per_sync = - static_cast(jwal_bytes_per_sync); -} - -/* - * Class: org_rocksdb_Options - * Method: walBytesPerSync - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_walBytesPerSync(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->wal_bytes_per_sync); -} - -/* - * Class: org_rocksdb_Options - * Method: setStrictBytesPerSync - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setStrictBytesPerSync( - JNIEnv*, jobject, jlong jhandle, jboolean jstrict_bytes_per_sync) { - reinterpret_cast(jhandle) - ->strict_bytes_per_sync = jstrict_bytes_per_sync == JNI_TRUE; -} - -/* - * Class: org_rocksdb_Options - * Method: strictBytesPerSync - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_strictBytesPerSync(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->strict_bytes_per_sync); -} - -// Note: the RocksJava API currently only supports EventListeners implemented in -// Java. It could be extended in future to also support adding/removing -// EventListeners implemented in C++. -static void rocksdb_set_event_listeners_helper( - JNIEnv* env, jlongArray jlistener_array, - std::vector>& - listener_sptr_vec) { - jlong* ptr_jlistener_array = - env->GetLongArrayElements(jlistener_array, nullptr); - if (ptr_jlistener_array == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - const jsize array_size = env->GetArrayLength(jlistener_array); - listener_sptr_vec.clear(); - for (jsize i = 0; i < array_size; ++i) { - const auto& listener_sptr = - *reinterpret_cast*>( - ptr_jlistener_array[i]); - listener_sptr_vec.push_back(listener_sptr); - } -} - -/* - * Class: org_rocksdb_Options - * Method: setEventListeners - * Signature: (J[J)V - */ -void Java_org_rocksdb_Options_setEventListeners(JNIEnv* env, jclass, - jlong jhandle, - jlongArray jlistener_array) { - auto* opt = reinterpret_cast(jhandle); - rocksdb_set_event_listeners_helper(env, jlistener_array, opt->listeners); -} - -// Note: the RocksJava API currently only supports EventListeners implemented in -// Java. It could be extended in future to also support adding/removing -// EventListeners implemented in C++. -static jobjectArray rocksdb_get_event_listeners_helper( - JNIEnv* env, - const std::vector>& - listener_sptr_vec) { - jsize sz = static_cast(listener_sptr_vec.size()); - jclass jlistener_clazz = - ROCKSDB_NAMESPACE::AbstractEventListenerJni::getJClass(env); - jobjectArray jlisteners = env->NewObjectArray(sz, jlistener_clazz, nullptr); - if (jlisteners == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - for (jsize i = 0; i < sz; ++i) { - const auto* jni_cb = - static_cast( - listener_sptr_vec[i].get()); - env->SetObjectArrayElement(jlisteners, i, jni_cb->GetJavaObject()); - } - return jlisteners; -} - -/* - * Class: org_rocksdb_Options - * Method: eventListeners - * Signature: (J)[Lorg/rocksdb/AbstractEventListener; - */ -jobjectArray Java_org_rocksdb_Options_eventListeners(JNIEnv* env, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return rocksdb_get_event_listeners_helper(env, opt->listeners); -} - -/* - * Class: org_rocksdb_Options - * Method: setEnableThreadTracking - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setEnableThreadTracking( - JNIEnv*, jobject, jlong jhandle, jboolean jenable_thread_tracking) { - auto* opt = reinterpret_cast(jhandle); - opt->enable_thread_tracking = static_cast(jenable_thread_tracking); -} - -/* - * Class: org_rocksdb_Options - * Method: enableThreadTracking - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_enableThreadTracking(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->enable_thread_tracking); -} - -/* - * Class: org_rocksdb_Options - * Method: setDelayedWriteRate - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setDelayedWriteRate(JNIEnv*, jobject, - jlong jhandle, - jlong jdelayed_write_rate) { - auto* opt = reinterpret_cast(jhandle); - opt->delayed_write_rate = static_cast(jdelayed_write_rate); -} - -/* - * Class: org_rocksdb_Options - * Method: delayedWriteRate - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_delayedWriteRate(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->delayed_write_rate); -} - -/* - * Class: org_rocksdb_Options - * Method: setEnablePipelinedWrite - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setEnablePipelinedWrite( - JNIEnv*, jobject, jlong jhandle, jboolean jenable_pipelined_write) { - auto* opt = reinterpret_cast(jhandle); - opt->enable_pipelined_write = jenable_pipelined_write == JNI_TRUE; -} - -/* - * Class: org_rocksdb_Options - * Method: enablePipelinedWrite - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_enablePipelinedWrite(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->enable_pipelined_write); -} - -/* - * Class: org_rocksdb_Options - * Method: setUnorderedWrite - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setUnorderedWrite(JNIEnv*, jobject, jlong jhandle, - jboolean unordered_write) { - reinterpret_cast(jhandle)->unordered_write = - static_cast(unordered_write); -} - -/* - * Class: org_rocksdb_Options - * Method: unorderedWrite - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_unorderedWrite(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->unordered_write; -} - -/* - * Class: org_rocksdb_Options - * Method: setAllowConcurrentMemtableWrite - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setAllowConcurrentMemtableWrite(JNIEnv*, jobject, - jlong jhandle, - jboolean allow) { - reinterpret_cast(jhandle) - ->allow_concurrent_memtable_write = static_cast(allow); -} - -/* - * Class: org_rocksdb_Options - * Method: allowConcurrentMemtableWrite - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_allowConcurrentMemtableWrite(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->allow_concurrent_memtable_write; -} - -/* - * Class: org_rocksdb_Options - * Method: setEnableWriteThreadAdaptiveYield - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setEnableWriteThreadAdaptiveYield( - JNIEnv*, jobject, jlong jhandle, jboolean yield) { - reinterpret_cast(jhandle) - ->enable_write_thread_adaptive_yield = static_cast(yield); -} - -/* - * Class: org_rocksdb_Options - * Method: enableWriteThreadAdaptiveYield - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_enableWriteThreadAdaptiveYield( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->enable_write_thread_adaptive_yield; -} - -/* - * Class: org_rocksdb_Options - * Method: setWriteThreadMaxYieldUsec - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setWriteThreadMaxYieldUsec(JNIEnv*, jobject, - jlong jhandle, - jlong max) { - reinterpret_cast(jhandle) - ->write_thread_max_yield_usec = static_cast(max); -} - -/* - * Class: org_rocksdb_Options - * Method: writeThreadMaxYieldUsec - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_writeThreadMaxYieldUsec(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->write_thread_max_yield_usec; -} - -/* - * Class: org_rocksdb_Options - * Method: setWriteThreadSlowYieldUsec - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setWriteThreadSlowYieldUsec(JNIEnv*, jobject, - jlong jhandle, - jlong slow) { - reinterpret_cast(jhandle) - ->write_thread_slow_yield_usec = static_cast(slow); -} - -/* - * Class: org_rocksdb_Options - * Method: writeThreadSlowYieldUsec - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_writeThreadSlowYieldUsec(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->write_thread_slow_yield_usec; -} - -/* - * Class: org_rocksdb_Options - * Method: setSkipStatsUpdateOnDbOpen - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setSkipStatsUpdateOnDbOpen( - JNIEnv*, jobject, jlong jhandle, jboolean jskip_stats_update_on_db_open) { - auto* opt = reinterpret_cast(jhandle); - opt->skip_stats_update_on_db_open = - static_cast(jskip_stats_update_on_db_open); -} - -/* - * Class: org_rocksdb_Options - * Method: skipStatsUpdateOnDbOpen - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_skipStatsUpdateOnDbOpen(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->skip_stats_update_on_db_open); -} - -/* - * Class: org_rocksdb_Options - * Method: setSkipCheckingSstFileSizesOnDbOpen - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setSkipCheckingSstFileSizesOnDbOpen( - JNIEnv*, jclass, jlong jhandle, - jboolean jskip_checking_sst_file_sizes_on_db_open) { - auto* opt = reinterpret_cast(jhandle); - opt->skip_checking_sst_file_sizes_on_db_open = - static_cast(jskip_checking_sst_file_sizes_on_db_open); -} - -/* - * Class: org_rocksdb_Options - * Method: skipCheckingSstFileSizesOnDbOpen - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_skipCheckingSstFileSizesOnDbOpen( - JNIEnv*, jclass, jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->skip_checking_sst_file_sizes_on_db_open); -} - -/* - * Class: org_rocksdb_Options - * Method: setWalRecoveryMode - * Signature: (JB)V - */ -void Java_org_rocksdb_Options_setWalRecoveryMode( - JNIEnv*, jobject, jlong jhandle, jbyte jwal_recovery_mode_value) { - auto* opt = reinterpret_cast(jhandle); - opt->wal_recovery_mode = - ROCKSDB_NAMESPACE::WALRecoveryModeJni::toCppWALRecoveryMode( - jwal_recovery_mode_value); -} - -/* - * Class: org_rocksdb_Options - * Method: walRecoveryMode - * Signature: (J)B - */ -jbyte Java_org_rocksdb_Options_walRecoveryMode(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::WALRecoveryModeJni::toJavaWALRecoveryMode( - opt->wal_recovery_mode); -} - -/* - * Class: org_rocksdb_Options - * Method: setAllow2pc - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setAllow2pc(JNIEnv*, jobject, jlong jhandle, - jboolean jallow_2pc) { - auto* opt = reinterpret_cast(jhandle); - opt->allow_2pc = static_cast(jallow_2pc); -} - -/* - * Class: org_rocksdb_Options - * Method: allow2pc - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_allow2pc(JNIEnv*, jobject, jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->allow_2pc); -} - -/* - * Class: org_rocksdb_Options - * Method: setRowCache - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setRowCache(JNIEnv*, jobject, jlong jhandle, - jlong jrow_cache_handle) { - auto* opt = reinterpret_cast(jhandle); - auto* row_cache = - reinterpret_cast*>( - jrow_cache_handle); - opt->row_cache = *row_cache; -} - -/* - * Class: org_rocksdb_Options - * Method: setWalFilter - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setWalFilter(JNIEnv*, jobject, jlong jhandle, - jlong jwal_filter_handle) { - auto* opt = reinterpret_cast(jhandle); - auto* wal_filter = reinterpret_cast( - jwal_filter_handle); - opt->wal_filter = wal_filter; -} - -/* - * Class: org_rocksdb_Options - * Method: setFailIfOptionsFileError - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setFailIfOptionsFileError( - JNIEnv*, jobject, jlong jhandle, jboolean jfail_if_options_file_error) { - auto* opt = reinterpret_cast(jhandle); - opt->fail_if_options_file_error = - static_cast(jfail_if_options_file_error); -} - -/* - * Class: org_rocksdb_Options - * Method: failIfOptionsFileError - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_failIfOptionsFileError(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->fail_if_options_file_error); -} - -/* - * Class: org_rocksdb_Options - * Method: setDumpMallocStats - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setDumpMallocStats(JNIEnv*, jobject, - jlong jhandle, - jboolean jdump_malloc_stats) { - auto* opt = reinterpret_cast(jhandle); - opt->dump_malloc_stats = static_cast(jdump_malloc_stats); -} - -/* - * Class: org_rocksdb_Options - * Method: dumpMallocStats - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_dumpMallocStats(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->dump_malloc_stats); -} - -/* - * Class: org_rocksdb_Options - * Method: setAvoidFlushDuringRecovery - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setAvoidFlushDuringRecovery( - JNIEnv*, jobject, jlong jhandle, jboolean javoid_flush_during_recovery) { - auto* opt = reinterpret_cast(jhandle); - opt->avoid_flush_during_recovery = - static_cast(javoid_flush_during_recovery); -} - -/* - * Class: org_rocksdb_Options - * Method: avoidFlushDuringRecovery - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_avoidFlushDuringRecovery(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->avoid_flush_during_recovery); -} - -/* - * Class: org_rocksdb_Options - * Method: setAvoidUnnecessaryBlockingIO - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setAvoidUnnecessaryBlockingIO( - JNIEnv*, jclass, jlong jhandle, jboolean avoid_blocking_io) { - auto* opt = reinterpret_cast(jhandle); - opt->avoid_unnecessary_blocking_io = static_cast(avoid_blocking_io); -} - -/* - * Class: org_rocksdb_Options - * Method: avoidUnnecessaryBlockingIO - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_avoidUnnecessaryBlockingIO(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->avoid_unnecessary_blocking_io); -} - -/* - * Class: org_rocksdb_Options - * Method: setPersistStatsToDisk - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setPersistStatsToDisk( - JNIEnv*, jclass, jlong jhandle, jboolean persist_stats_to_disk) { - auto* opt = reinterpret_cast(jhandle); - opt->persist_stats_to_disk = static_cast(persist_stats_to_disk); -} - -/* - * Class: org_rocksdb_Options - * Method: persistStatsToDisk - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_persistStatsToDisk(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->persist_stats_to_disk); -} - -/* - * Class: org_rocksdb_Options - * Method: setWriteDbidToManifest - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setWriteDbidToManifest( - JNIEnv*, jclass, jlong jhandle, jboolean jwrite_dbid_to_manifest) { - auto* opt = reinterpret_cast(jhandle); - opt->write_dbid_to_manifest = static_cast(jwrite_dbid_to_manifest); -} - -/* - * Class: org_rocksdb_Options - * Method: writeDbidToManifest - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_writeDbidToManifest(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->write_dbid_to_manifest); -} - -/* - * Class: org_rocksdb_Options - * Method: setLogReadaheadSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setLogReadaheadSize(JNIEnv*, jclass, - jlong jhandle, - jlong jlog_readahead_size) { - auto* opt = reinterpret_cast(jhandle); - opt->log_readahead_size = static_cast(jlog_readahead_size); -} - -/* - * Class: org_rocksdb_Options - * Method: logReasaheadSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_logReadaheadSize(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->log_readahead_size); -} - -/* - * Class: org_rocksdb_Options - * Method: setBestEffortsRecovery - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setBestEffortsRecovery( - JNIEnv*, jclass, jlong jhandle, jboolean jbest_efforts_recovery) { - auto* opt = reinterpret_cast(jhandle); - opt->best_efforts_recovery = static_cast(jbest_efforts_recovery); -} - -/* - * Class: org_rocksdb_Options - * Method: bestEffortsRecovery - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_bestEffortsRecovery(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->best_efforts_recovery); -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxBgErrorResumeCount - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setMaxBgErrorResumeCount( - JNIEnv*, jclass, jlong jhandle, jint jmax_bgerror_resume_count) { - auto* opt = reinterpret_cast(jhandle); - opt->max_bgerror_resume_count = static_cast(jmax_bgerror_resume_count); -} - -/* - * Class: org_rocksdb_Options - * Method: maxBgerrorResumeCount - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_maxBgerrorResumeCount(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->max_bgerror_resume_count); -} - -/* - * Class: org_rocksdb_Options - * Method: setBgerrorResumeRetryInterval - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setBgerrorResumeRetryInterval( - JNIEnv*, jclass, jlong jhandle, jlong jbgerror_resume_retry_interval) { - auto* opt = reinterpret_cast(jhandle); - opt->bgerror_resume_retry_interval = - static_cast(jbgerror_resume_retry_interval); -} - -/* - * Class: org_rocksdb_Options - * Method: bgerrorResumeRetryInterval - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_bgerrorResumeRetryInterval(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->bgerror_resume_retry_interval); -} - -/* - * Class: org_rocksdb_Options - * Method: setAvoidFlushDuringShutdown - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setAvoidFlushDuringShutdown( - JNIEnv*, jobject, jlong jhandle, jboolean javoid_flush_during_shutdown) { - auto* opt = reinterpret_cast(jhandle); - opt->avoid_flush_during_shutdown = - static_cast(javoid_flush_during_shutdown); -} - -/* - * Class: org_rocksdb_Options - * Method: avoidFlushDuringShutdown - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_avoidFlushDuringShutdown(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->avoid_flush_during_shutdown); -} - -/* - * Class: org_rocksdb_Options - * Method: setAllowIngestBehind - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setAllowIngestBehind( - JNIEnv*, jobject, jlong jhandle, jboolean jallow_ingest_behind) { - auto* opt = reinterpret_cast(jhandle); - opt->allow_ingest_behind = jallow_ingest_behind == JNI_TRUE; -} - -/* - * Class: org_rocksdb_Options - * Method: allowIngestBehind - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_allowIngestBehind(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->allow_ingest_behind); -} - -/* - * Class: org_rocksdb_Options - * Method: setTwoWriteQueues - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setTwoWriteQueues(JNIEnv*, jobject, jlong jhandle, - jboolean jtwo_write_queues) { - auto* opt = reinterpret_cast(jhandle); - opt->two_write_queues = jtwo_write_queues == JNI_TRUE; -} - -/* - * Class: org_rocksdb_Options - * Method: twoWriteQueues - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_twoWriteQueues(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->two_write_queues); -} - -/* - * Class: org_rocksdb_Options - * Method: setManualWalFlush - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setManualWalFlush(JNIEnv*, jobject, jlong jhandle, - jboolean jmanual_wal_flush) { - auto* opt = reinterpret_cast(jhandle); - opt->manual_wal_flush = jmanual_wal_flush == JNI_TRUE; -} - -/* - * Class: org_rocksdb_Options - * Method: manualWalFlush - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_manualWalFlush(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->manual_wal_flush); -} - -/* - * Class: org_rocksdb_Options - * Method: setAtomicFlush - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setAtomicFlush(JNIEnv*, jobject, jlong jhandle, - jboolean jatomic_flush) { - auto* opt = reinterpret_cast(jhandle); - opt->atomic_flush = jatomic_flush == JNI_TRUE; -} - -/* - * Class: org_rocksdb_Options - * Method: atomicFlush - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_atomicFlush(JNIEnv*, jobject, jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->atomic_flush); -} - -/* - * Method: tableFactoryName - * Signature: (J)Ljava/lang/String - */ -jstring Java_org_rocksdb_Options_tableFactoryName(JNIEnv* env, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - ROCKSDB_NAMESPACE::TableFactory* tf = opt->table_factory.get(); - - // Should never be nullptr. - // Default memtable factory is SkipListFactory - assert(tf); - - return env->NewStringUTF(tf->Name()); -} - -/* - * Class: org_rocksdb_Options - * Method: minWriteBufferNumberToMerge - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_minWriteBufferNumberToMerge(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->min_write_buffer_number_to_merge; -} - -/* - * Class: org_rocksdb_Options - * Method: setMinWriteBufferNumberToMerge - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setMinWriteBufferNumberToMerge( - JNIEnv*, jobject, jlong jhandle, jint jmin_write_buffer_number_to_merge) { - reinterpret_cast(jhandle) - ->min_write_buffer_number_to_merge = - static_cast(jmin_write_buffer_number_to_merge); -} -/* - * Class: org_rocksdb_Options - * Method: maxWriteBufferNumberToMaintain - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_maxWriteBufferNumberToMaintain(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_write_buffer_number_to_maintain; -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxWriteBufferNumberToMaintain - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setMaxWriteBufferNumberToMaintain( - JNIEnv*, jobject, jlong jhandle, - jint jmax_write_buffer_number_to_maintain) { - reinterpret_cast(jhandle) - ->max_write_buffer_number_to_maintain = - static_cast(jmax_write_buffer_number_to_maintain); -} - -/* - * Class: org_rocksdb_Options - * Method: setCompressionType - * Signature: (JB)V - */ -void Java_org_rocksdb_Options_setCompressionType( - JNIEnv*, jobject, jlong jhandle, jbyte jcompression_type_value) { - auto* opts = reinterpret_cast(jhandle); - opts->compression = - ROCKSDB_NAMESPACE::CompressionTypeJni::toCppCompressionType( - jcompression_type_value); -} - -/* - * Class: org_rocksdb_Options - * Method: compressionType - * Signature: (J)B - */ -jbyte Java_org_rocksdb_Options_compressionType(JNIEnv*, jobject, - jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( - opts->compression); -} - -/** - * Helper method to convert a Java byte array of compression levels - * to a C++ vector of ROCKSDB_NAMESPACE::CompressionType - * - * @param env A pointer to the Java environment - * @param jcompression_levels A reference to a java byte array - * where each byte indicates a compression level - * - * @return A std::unique_ptr to the vector, or std::unique_ptr(nullptr) if a JNI - * exception occurs - */ -std::unique_ptr> -rocksdb_compression_vector_helper(JNIEnv* env, jbyteArray jcompression_levels) { - jsize len = env->GetArrayLength(jcompression_levels); - jbyte* jcompression_level = - env->GetByteArrayElements(jcompression_levels, nullptr); - if (jcompression_level == nullptr) { - // exception thrown: OutOfMemoryError - return std::unique_ptr>(); - } - - auto* compression_levels = - new std::vector(); - std::unique_ptr> - uptr_compression_levels(compression_levels); - - for (jsize i = 0; i < len; i++) { - jbyte jcl = jcompression_level[i]; - compression_levels->push_back( - static_cast(jcl)); - } - - env->ReleaseByteArrayElements(jcompression_levels, jcompression_level, - JNI_ABORT); - - return uptr_compression_levels; -} - -/** - * Helper method to convert a C++ vector of ROCKSDB_NAMESPACE::CompressionType - * to a Java byte array of compression levels - * - * @param env A pointer to the Java environment - * @param jcompression_levels A reference to a java byte array - * where each byte indicates a compression level - * - * @return A jbytearray or nullptr if an exception occurs - */ -jbyteArray rocksdb_compression_list_helper( - JNIEnv* env, - std::vector compression_levels) { - const size_t len = compression_levels.size(); - jbyte* jbuf = new jbyte[len]; - - for (size_t i = 0; i < len; i++) { - jbuf[i] = compression_levels[i]; - } - - // insert in java array - jbyteArray jcompression_levels = env->NewByteArray(static_cast(len)); - if (jcompression_levels == nullptr) { - // exception thrown: OutOfMemoryError - delete[] jbuf; - return nullptr; - } - env->SetByteArrayRegion(jcompression_levels, 0, static_cast(len), - jbuf); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jcompression_levels); - delete[] jbuf; - return nullptr; - } - - delete[] jbuf; - - return jcompression_levels; -} - -/* - * Class: org_rocksdb_Options - * Method: setCompressionPerLevel - * Signature: (J[B)V - */ -void Java_org_rocksdb_Options_setCompressionPerLevel( - JNIEnv* env, jobject, jlong jhandle, jbyteArray jcompressionLevels) { - auto uptr_compression_levels = - rocksdb_compression_vector_helper(env, jcompressionLevels); - if (!uptr_compression_levels) { - // exception occurred - return; - } - auto* options = reinterpret_cast(jhandle); - options->compression_per_level = *(uptr_compression_levels.get()); -} - -/* - * Class: org_rocksdb_Options - * Method: compressionPerLevel - * Signature: (J)[B - */ -jbyteArray Java_org_rocksdb_Options_compressionPerLevel(JNIEnv* env, jobject, - jlong jhandle) { - auto* options = reinterpret_cast(jhandle); - return rocksdb_compression_list_helper(env, options->compression_per_level); -} - -/* - * Class: org_rocksdb_Options - * Method: setBottommostCompressionType - * Signature: (JB)V - */ -void Java_org_rocksdb_Options_setBottommostCompressionType( - JNIEnv*, jobject, jlong jhandle, jbyte jcompression_type_value) { - auto* options = reinterpret_cast(jhandle); - options->bottommost_compression = - ROCKSDB_NAMESPACE::CompressionTypeJni::toCppCompressionType( - jcompression_type_value); -} - -/* - * Class: org_rocksdb_Options - * Method: bottommostCompressionType - * Signature: (J)B - */ -jbyte Java_org_rocksdb_Options_bottommostCompressionType(JNIEnv*, jobject, - jlong jhandle) { - auto* options = reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( - options->bottommost_compression); -} - -/* - * Class: org_rocksdb_Options - * Method: setBottommostCompressionOptions - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setBottommostCompressionOptions( - JNIEnv*, jobject, jlong jhandle, - jlong jbottommost_compression_options_handle) { - auto* options = reinterpret_cast(jhandle); - auto* bottommost_compression_options = - reinterpret_cast( - jbottommost_compression_options_handle); - options->bottommost_compression_opts = *bottommost_compression_options; -} - -/* - * Class: org_rocksdb_Options - * Method: setCompressionOptions - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setCompressionOptions( - JNIEnv*, jobject, jlong jhandle, jlong jcompression_options_handle) { - auto* options = reinterpret_cast(jhandle); - auto* compression_options = - reinterpret_cast( - jcompression_options_handle); - options->compression_opts = *compression_options; -} - -/* - * Class: org_rocksdb_Options - * Method: setCompactionStyle - * Signature: (JB)V - */ -void Java_org_rocksdb_Options_setCompactionStyle(JNIEnv*, jobject, - jlong jhandle, - jbyte jcompaction_style) { - auto* options = reinterpret_cast(jhandle); - options->compaction_style = - ROCKSDB_NAMESPACE::CompactionStyleJni::toCppCompactionStyle( - jcompaction_style); -} - -/* - * Class: org_rocksdb_Options - * Method: compactionStyle - * Signature: (J)B - */ -jbyte Java_org_rocksdb_Options_compactionStyle(JNIEnv*, jobject, - jlong jhandle) { - auto* options = reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::CompactionStyleJni::toJavaCompactionStyle( - options->compaction_style); -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxTableFilesSizeFIFO - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setMaxTableFilesSizeFIFO( - JNIEnv*, jobject, jlong jhandle, jlong jmax_table_files_size) { - reinterpret_cast(jhandle) - ->compaction_options_fifo.max_table_files_size = - static_cast(jmax_table_files_size); -} - -/* - * Class: org_rocksdb_Options - * Method: maxTableFilesSizeFIFO - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_maxTableFilesSizeFIFO(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->compaction_options_fifo.max_table_files_size; -} - -/* - * Class: org_rocksdb_Options - * Method: numLevels - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_numLevels(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle)->num_levels; -} - -/* - * Class: org_rocksdb_Options - * Method: setNumLevels - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setNumLevels(JNIEnv*, jobject, jlong jhandle, - jint jnum_levels) { - reinterpret_cast(jhandle)->num_levels = - static_cast(jnum_levels); -} - -/* - * Class: org_rocksdb_Options - * Method: levelZeroFileNumCompactionTrigger - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_levelZeroFileNumCompactionTrigger(JNIEnv*, - jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->level0_file_num_compaction_trigger; -} - -/* - * Class: org_rocksdb_Options - * Method: setLevelZeroFileNumCompactionTrigger - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setLevelZeroFileNumCompactionTrigger( - JNIEnv*, jobject, jlong jhandle, jint jlevel0_file_num_compaction_trigger) { - reinterpret_cast(jhandle) - ->level0_file_num_compaction_trigger = - static_cast(jlevel0_file_num_compaction_trigger); -} - -/* - * Class: org_rocksdb_Options - * Method: levelZeroSlowdownWritesTrigger - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_levelZeroSlowdownWritesTrigger(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->level0_slowdown_writes_trigger; -} - -/* - * Class: org_rocksdb_Options - * Method: setLevelSlowdownWritesTrigger - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setLevelZeroSlowdownWritesTrigger( - JNIEnv*, jobject, jlong jhandle, jint jlevel0_slowdown_writes_trigger) { - reinterpret_cast(jhandle) - ->level0_slowdown_writes_trigger = - static_cast(jlevel0_slowdown_writes_trigger); -} - -/* - * Class: org_rocksdb_Options - * Method: levelZeroStopWritesTrigger - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_levelZeroStopWritesTrigger(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->level0_stop_writes_trigger; -} - -/* - * Class: org_rocksdb_Options - * Method: setLevelStopWritesTrigger - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setLevelZeroStopWritesTrigger( - JNIEnv*, jobject, jlong jhandle, jint jlevel0_stop_writes_trigger) { - reinterpret_cast(jhandle) - ->level0_stop_writes_trigger = - static_cast(jlevel0_stop_writes_trigger); -} - -/* - * Class: org_rocksdb_Options - * Method: targetFileSizeBase - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_targetFileSizeBase(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->target_file_size_base; -} - -/* - * Class: org_rocksdb_Options - * Method: setTargetFileSizeBase - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setTargetFileSizeBase( - JNIEnv*, jobject, jlong jhandle, jlong jtarget_file_size_base) { - reinterpret_cast(jhandle) - ->target_file_size_base = static_cast(jtarget_file_size_base); -} - -/* - * Class: org_rocksdb_Options - * Method: targetFileSizeMultiplier - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_targetFileSizeMultiplier(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->target_file_size_multiplier; -} - -/* - * Class: org_rocksdb_Options - * Method: setTargetFileSizeMultiplier - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setTargetFileSizeMultiplier( - JNIEnv*, jobject, jlong jhandle, jint jtarget_file_size_multiplier) { - reinterpret_cast(jhandle) - ->target_file_size_multiplier = - static_cast(jtarget_file_size_multiplier); -} - -/* - * Class: org_rocksdb_Options - * Method: maxBytesForLevelBase - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_maxBytesForLevelBase(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_bytes_for_level_base; -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxBytesForLevelBase - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setMaxBytesForLevelBase( - JNIEnv*, jobject, jlong jhandle, jlong jmax_bytes_for_level_base) { - reinterpret_cast(jhandle) - ->max_bytes_for_level_base = - static_cast(jmax_bytes_for_level_base); -} - -/* - * Class: org_rocksdb_Options - * Method: levelCompactionDynamicLevelBytes - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_levelCompactionDynamicLevelBytes( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->level_compaction_dynamic_level_bytes; -} - -/* - * Class: org_rocksdb_Options - * Method: setLevelCompactionDynamicLevelBytes - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setLevelCompactionDynamicLevelBytes( - JNIEnv*, jobject, jlong jhandle, jboolean jenable_dynamic_level_bytes) { - reinterpret_cast(jhandle) - ->level_compaction_dynamic_level_bytes = (jenable_dynamic_level_bytes); -} - -/* - * Class: org_rocksdb_Options - * Method: maxBytesForLevelMultiplier - * Signature: (J)D - */ -jdouble Java_org_rocksdb_Options_maxBytesForLevelMultiplier(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_bytes_for_level_multiplier; -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxBytesForLevelMultiplier - * Signature: (JD)V - */ -void Java_org_rocksdb_Options_setMaxBytesForLevelMultiplier( - JNIEnv*, jobject, jlong jhandle, jdouble jmax_bytes_for_level_multiplier) { - reinterpret_cast(jhandle) - ->max_bytes_for_level_multiplier = - static_cast(jmax_bytes_for_level_multiplier); -} - -/* - * Class: org_rocksdb_Options - * Method: maxCompactionBytes - * Signature: (J)I - */ -jlong Java_org_rocksdb_Options_maxCompactionBytes(JNIEnv*, jobject, - jlong jhandle) { - return static_cast( - reinterpret_cast(jhandle) - ->max_compaction_bytes); -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxCompactionBytes - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setMaxCompactionBytes( - JNIEnv*, jobject, jlong jhandle, jlong jmax_compaction_bytes) { - reinterpret_cast(jhandle)->max_compaction_bytes = - static_cast(jmax_compaction_bytes); -} - -/* - * Class: org_rocksdb_Options - * Method: arenaBlockSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_arenaBlockSize(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->arena_block_size; -} - -/* - * Class: org_rocksdb_Options - * Method: setArenaBlockSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setArenaBlockSize(JNIEnv* env, jobject, - jlong jhandle, - jlong jarena_block_size) { - auto s = - ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(jarena_block_size); - if (s.ok()) { - reinterpret_cast(jhandle)->arena_block_size = - jarena_block_size; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_Options - * Method: disableAutoCompactions - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_disableAutoCompactions(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->disable_auto_compactions; -} - -/* - * Class: org_rocksdb_Options - * Method: setDisableAutoCompactions - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setDisableAutoCompactions( - JNIEnv*, jobject, jlong jhandle, jboolean jdisable_auto_compactions) { - reinterpret_cast(jhandle) - ->disable_auto_compactions = static_cast(jdisable_auto_compactions); -} - -/* - * Class: org_rocksdb_Options - * Method: maxSequentialSkipInIterations - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_maxSequentialSkipInIterations(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_sequential_skip_in_iterations; -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxSequentialSkipInIterations - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setMaxSequentialSkipInIterations( - JNIEnv*, jobject, jlong jhandle, jlong jmax_sequential_skip_in_iterations) { - reinterpret_cast(jhandle) - ->max_sequential_skip_in_iterations = - static_cast(jmax_sequential_skip_in_iterations); -} - -/* - * Class: org_rocksdb_Options - * Method: inplaceUpdateSupport - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_inplaceUpdateSupport(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->inplace_update_support; -} - -/* - * Class: org_rocksdb_Options - * Method: setInplaceUpdateSupport - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setInplaceUpdateSupport( - JNIEnv*, jobject, jlong jhandle, jboolean jinplace_update_support) { - reinterpret_cast(jhandle) - ->inplace_update_support = static_cast(jinplace_update_support); -} - -/* - * Class: org_rocksdb_Options - * Method: inplaceUpdateNumLocks - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_inplaceUpdateNumLocks(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->inplace_update_num_locks; -} - -/* - * Class: org_rocksdb_Options - * Method: setInplaceUpdateNumLocks - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setInplaceUpdateNumLocks( - JNIEnv* env, jobject, jlong jhandle, jlong jinplace_update_num_locks) { - auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( - jinplace_update_num_locks); - if (s.ok()) { - reinterpret_cast(jhandle) - ->inplace_update_num_locks = jinplace_update_num_locks; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_Options - * Method: memtablePrefixBloomSizeRatio - * Signature: (J)I - */ -jdouble Java_org_rocksdb_Options_memtablePrefixBloomSizeRatio(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->memtable_prefix_bloom_size_ratio; -} - -/* - * Class: org_rocksdb_Options - * Method: setMemtablePrefixBloomSizeRatio - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setMemtablePrefixBloomSizeRatio( - JNIEnv*, jobject, jlong jhandle, - jdouble jmemtable_prefix_bloom_size_ratio) { - reinterpret_cast(jhandle) - ->memtable_prefix_bloom_size_ratio = - static_cast(jmemtable_prefix_bloom_size_ratio); -} - -/* - * Class: org_rocksdb_Options - * Method: experimentalMempurgeThreshold - * Signature: (J)I - */ -jdouble Java_org_rocksdb_Options_experimentalMempurgeThreshold(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->experimental_mempurge_threshold; -} - -/* - * Class: org_rocksdb_Options - * Method: setExperimentalMempurgeThreshold - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setExperimentalMempurgeThreshold( - JNIEnv*, jobject, jlong jhandle, jdouble jexperimental_mempurge_threshold) { - reinterpret_cast(jhandle) - ->experimental_mempurge_threshold = - static_cast(jexperimental_mempurge_threshold); -} - -/* - * Class: org_rocksdb_Options - * Method: memtableWholeKeyFiltering - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_memtableWholeKeyFiltering(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->memtable_whole_key_filtering; -} - -/* - * Class: org_rocksdb_Options - * Method: setMemtableWholeKeyFiltering - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setMemtableWholeKeyFiltering( - JNIEnv*, jobject, jlong jhandle, jboolean jmemtable_whole_key_filtering) { - reinterpret_cast(jhandle) - ->memtable_whole_key_filtering = - static_cast(jmemtable_whole_key_filtering); -} - -/* - * Class: org_rocksdb_Options - * Method: bloomLocality - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_bloomLocality(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle)->bloom_locality; -} - -/* - * Class: org_rocksdb_Options - * Method: setBloomLocality - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setBloomLocality(JNIEnv*, jobject, jlong jhandle, - jint jbloom_locality) { - reinterpret_cast(jhandle)->bloom_locality = - static_cast(jbloom_locality); -} - -/* - * Class: org_rocksdb_Options - * Method: maxSuccessiveMerges - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_maxSuccessiveMerges(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_successive_merges; -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxSuccessiveMerges - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setMaxSuccessiveMerges( - JNIEnv* env, jobject, jlong jhandle, jlong jmax_successive_merges) { - auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( - jmax_successive_merges); - if (s.ok()) { - reinterpret_cast(jhandle) - ->max_successive_merges = jmax_successive_merges; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_Options - * Method: optimizeFiltersForHits - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_optimizeFiltersForHits(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->optimize_filters_for_hits; -} - -/* - * Class: org_rocksdb_Options - * Method: setOptimizeFiltersForHits - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setOptimizeFiltersForHits( - JNIEnv*, jobject, jlong jhandle, jboolean joptimize_filters_for_hits) { - reinterpret_cast(jhandle) - ->optimize_filters_for_hits = - static_cast(joptimize_filters_for_hits); -} - -/* - * Class: org_rocksdb_Options - * Method: oldDefaults - * Signature: (JII)V - */ -void Java_org_rocksdb_Options_oldDefaults(JNIEnv*, jclass, jlong jhandle, - jint major_version, - jint minor_version) { - reinterpret_cast(jhandle)->OldDefaults( - major_version, minor_version); -} - -/* - * Class: org_rocksdb_Options - * Method: optimizeForSmallDb - * Signature: (J)V - */ -void Java_org_rocksdb_Options_optimizeForSmallDb__J(JNIEnv*, jobject, - jlong jhandle) { - reinterpret_cast(jhandle)->OptimizeForSmallDb(); -} - -/* - * Class: org_rocksdb_Options - * Method: optimizeForSmallDb - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_optimizeForSmallDb__JJ(JNIEnv*, jclass, - jlong jhandle, - jlong cache_handle) { - auto* cache_sptr_ptr = - reinterpret_cast*>( - cache_handle); - auto* options_ptr = reinterpret_cast(jhandle); - auto* cf_options_ptr = - static_cast(options_ptr); - cf_options_ptr->OptimizeForSmallDb(cache_sptr_ptr); -} - -/* - * Class: org_rocksdb_Options - * Method: optimizeForPointLookup - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_optimizeForPointLookup( - JNIEnv*, jobject, jlong jhandle, jlong block_cache_size_mb) { - reinterpret_cast(jhandle) - ->OptimizeForPointLookup(block_cache_size_mb); -} - -/* - * Class: org_rocksdb_Options - * Method: optimizeLevelStyleCompaction - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_optimizeLevelStyleCompaction( - JNIEnv*, jobject, jlong jhandle, jlong memtable_memory_budget) { - reinterpret_cast(jhandle) - ->OptimizeLevelStyleCompaction(memtable_memory_budget); -} - -/* - * Class: org_rocksdb_Options - * Method: optimizeUniversalStyleCompaction - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_optimizeUniversalStyleCompaction( - JNIEnv*, jobject, jlong jhandle, jlong memtable_memory_budget) { - reinterpret_cast(jhandle) - ->OptimizeUniversalStyleCompaction(memtable_memory_budget); -} - -/* - * Class: org_rocksdb_Options - * Method: prepareForBulkLoad - * Signature: (J)V - */ -void Java_org_rocksdb_Options_prepareForBulkLoad(JNIEnv*, jobject, - jlong jhandle) { - reinterpret_cast(jhandle)->PrepareForBulkLoad(); -} - -/* - * Class: org_rocksdb_Options - * Method: memtableHugePageSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_memtableHugePageSize(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->memtable_huge_page_size; -} - -/* - * Class: org_rocksdb_Options - * Method: setMemtableHugePageSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setMemtableHugePageSize( - JNIEnv* env, jobject, jlong jhandle, jlong jmemtable_huge_page_size) { - auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( - jmemtable_huge_page_size); - if (s.ok()) { - reinterpret_cast(jhandle) - ->memtable_huge_page_size = jmemtable_huge_page_size; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_Options - * Method: softPendingCompactionBytesLimit - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_softPendingCompactionBytesLimit(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->soft_pending_compaction_bytes_limit; -} - -/* - * Class: org_rocksdb_Options - * Method: setSoftPendingCompactionBytesLimit - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setSoftPendingCompactionBytesLimit( - JNIEnv*, jobject, jlong jhandle, - jlong jsoft_pending_compaction_bytes_limit) { - reinterpret_cast(jhandle) - ->soft_pending_compaction_bytes_limit = - static_cast(jsoft_pending_compaction_bytes_limit); -} - -/* - * Class: org_rocksdb_Options - * Method: softHardCompactionBytesLimit - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_hardPendingCompactionBytesLimit(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->hard_pending_compaction_bytes_limit; -} - -/* - * Class: org_rocksdb_Options - * Method: setHardPendingCompactionBytesLimit - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setHardPendingCompactionBytesLimit( - JNIEnv*, jobject, jlong jhandle, - jlong jhard_pending_compaction_bytes_limit) { - reinterpret_cast(jhandle) - ->hard_pending_compaction_bytes_limit = - static_cast(jhard_pending_compaction_bytes_limit); -} - -/* - * Class: org_rocksdb_Options - * Method: level0FileNumCompactionTrigger - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_level0FileNumCompactionTrigger(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->level0_file_num_compaction_trigger; -} - -/* - * Class: org_rocksdb_Options - * Method: setLevel0FileNumCompactionTrigger - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setLevel0FileNumCompactionTrigger( - JNIEnv*, jobject, jlong jhandle, jint jlevel0_file_num_compaction_trigger) { - reinterpret_cast(jhandle) - ->level0_file_num_compaction_trigger = - static_cast(jlevel0_file_num_compaction_trigger); -} - -/* - * Class: org_rocksdb_Options - * Method: level0SlowdownWritesTrigger - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_level0SlowdownWritesTrigger(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->level0_slowdown_writes_trigger; -} - -/* - * Class: org_rocksdb_Options - * Method: setLevel0SlowdownWritesTrigger - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setLevel0SlowdownWritesTrigger( - JNIEnv*, jobject, jlong jhandle, jint jlevel0_slowdown_writes_trigger) { - reinterpret_cast(jhandle) - ->level0_slowdown_writes_trigger = - static_cast(jlevel0_slowdown_writes_trigger); -} - -/* - * Class: org_rocksdb_Options - * Method: level0StopWritesTrigger - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_level0StopWritesTrigger(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->level0_stop_writes_trigger; -} - -/* - * Class: org_rocksdb_Options - * Method: setLevel0StopWritesTrigger - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setLevel0StopWritesTrigger( - JNIEnv*, jobject, jlong jhandle, jint jlevel0_stop_writes_trigger) { - reinterpret_cast(jhandle) - ->level0_stop_writes_trigger = - static_cast(jlevel0_stop_writes_trigger); -} - -/* - * Class: org_rocksdb_Options - * Method: maxBytesForLevelMultiplierAdditional - * Signature: (J)[I - */ -jintArray Java_org_rocksdb_Options_maxBytesForLevelMultiplierAdditional( - JNIEnv* env, jobject, jlong jhandle) { - auto mbflma = reinterpret_cast(jhandle) - ->max_bytes_for_level_multiplier_additional; - - const size_t size = mbflma.size(); - - jint* additionals = new jint[size]; - for (size_t i = 0; i < size; i++) { - additionals[i] = static_cast(mbflma[i]); - } - - jsize jlen = static_cast(size); - jintArray result = env->NewIntArray(jlen); - if (result == nullptr) { - // exception thrown: OutOfMemoryError - delete[] additionals; - return nullptr; - } - - env->SetIntArrayRegion(result, 0, jlen, additionals); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(result); - delete[] additionals; - return nullptr; - } - - delete[] additionals; - - return result; -} - -/* - * Class: org_rocksdb_Options - * Method: setMaxBytesForLevelMultiplierAdditional - * Signature: (J[I)V - */ -void Java_org_rocksdb_Options_setMaxBytesForLevelMultiplierAdditional( - JNIEnv* env, jobject, jlong jhandle, - jintArray jmax_bytes_for_level_multiplier_additional) { - jsize len = env->GetArrayLength(jmax_bytes_for_level_multiplier_additional); - jint* additionals = env->GetIntArrayElements( - jmax_bytes_for_level_multiplier_additional, nullptr); - if (additionals == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - auto* opt = reinterpret_cast(jhandle); - opt->max_bytes_for_level_multiplier_additional.clear(); - for (jsize i = 0; i < len; i++) { - opt->max_bytes_for_level_multiplier_additional.push_back( - static_cast(additionals[i])); - } - - env->ReleaseIntArrayElements(jmax_bytes_for_level_multiplier_additional, - additionals, JNI_ABORT); -} - -/* - * Class: org_rocksdb_Options - * Method: paranoidFileChecks - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_paranoidFileChecks(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->paranoid_file_checks; -} - -/* - * Class: org_rocksdb_Options - * Method: setParanoidFileChecks - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setParanoidFileChecks( - JNIEnv*, jobject, jlong jhandle, jboolean jparanoid_file_checks) { - reinterpret_cast(jhandle)->paranoid_file_checks = - static_cast(jparanoid_file_checks); -} - -/* - * Class: org_rocksdb_Options - * Method: setCompactionPriority - * Signature: (JB)V - */ -void Java_org_rocksdb_Options_setCompactionPriority( - JNIEnv*, jobject, jlong jhandle, jbyte jcompaction_priority_value) { - auto* opts = reinterpret_cast(jhandle); - opts->compaction_pri = - ROCKSDB_NAMESPACE::CompactionPriorityJni::toCppCompactionPriority( - jcompaction_priority_value); -} - -/* - * Class: org_rocksdb_Options - * Method: compactionPriority - * Signature: (J)B - */ -jbyte Java_org_rocksdb_Options_compactionPriority(JNIEnv*, jobject, - jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::CompactionPriorityJni::toJavaCompactionPriority( - opts->compaction_pri); -} - -/* - * Class: org_rocksdb_Options - * Method: setReportBgIoStats - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setReportBgIoStats(JNIEnv*, jobject, - jlong jhandle, - jboolean jreport_bg_io_stats) { - auto* opts = reinterpret_cast(jhandle); - opts->report_bg_io_stats = static_cast(jreport_bg_io_stats); -} - -/* - * Class: org_rocksdb_Options - * Method: reportBgIoStats - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_reportBgIoStats(JNIEnv*, jobject, - jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return static_cast(opts->report_bg_io_stats); -} - -/* - * Class: org_rocksdb_Options - * Method: setTtl - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setTtl(JNIEnv*, jobject, jlong jhandle, - jlong jttl) { - auto* opts = reinterpret_cast(jhandle); - opts->ttl = static_cast(jttl); -} - -/* - * Class: org_rocksdb_Options - * Method: ttl - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_ttl(JNIEnv*, jobject, jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return static_cast(opts->ttl); -} - -/* - * Class: org_rocksdb_Options - * Method: setPeriodicCompactionSeconds - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setPeriodicCompactionSeconds( - JNIEnv*, jobject, jlong jhandle, jlong jperiodicCompactionSeconds) { - auto* opts = reinterpret_cast(jhandle); - opts->periodic_compaction_seconds = - static_cast(jperiodicCompactionSeconds); -} - -/* - * Class: org_rocksdb_Options - * Method: periodicCompactionSeconds - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_periodicCompactionSeconds(JNIEnv*, jobject, - jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return static_cast(opts->periodic_compaction_seconds); -} - -/* - * Class: org_rocksdb_Options - * Method: setCompactionOptionsUniversal - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setCompactionOptionsUniversal( - JNIEnv*, jobject, jlong jhandle, - jlong jcompaction_options_universal_handle) { - auto* opts = reinterpret_cast(jhandle); - auto* opts_uni = - reinterpret_cast( - jcompaction_options_universal_handle); - opts->compaction_options_universal = *opts_uni; -} - -/* - * Class: org_rocksdb_Options - * Method: setCompactionOptionsFIFO - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setCompactionOptionsFIFO( - JNIEnv*, jobject, jlong jhandle, jlong jcompaction_options_fifo_handle) { - auto* opts = reinterpret_cast(jhandle); - auto* opts_fifo = reinterpret_cast( - jcompaction_options_fifo_handle); - opts->compaction_options_fifo = *opts_fifo; -} - -/* - * Class: org_rocksdb_Options - * Method: setForceConsistencyChecks - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setForceConsistencyChecks( - JNIEnv*, jobject, jlong jhandle, jboolean jforce_consistency_checks) { - auto* opts = reinterpret_cast(jhandle); - opts->force_consistency_checks = static_cast(jforce_consistency_checks); -} - -/* - * Class: org_rocksdb_Options - * Method: forceConsistencyChecks - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_forceConsistencyChecks(JNIEnv*, jobject, - jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return static_cast(opts->force_consistency_checks); -} - -/// BLOB options - -/* - * Class: org_rocksdb_Options - * Method: setEnableBlobFiles - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setEnableBlobFiles(JNIEnv*, jobject, - jlong jhandle, - jboolean jenable_blob_files) { - auto* opts = reinterpret_cast(jhandle); - opts->enable_blob_files = static_cast(jenable_blob_files); -} - -/* - * Class: org_rocksdb_Options - * Method: enableBlobFiles - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_enableBlobFiles(JNIEnv*, jobject, - jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return static_cast(opts->enable_blob_files); -} - -/* - * Class: org_rocksdb_Options - * Method: setMinBlobSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setMinBlobSize(JNIEnv*, jobject, jlong jhandle, - jlong jmin_blob_size) { - auto* opts = reinterpret_cast(jhandle); - opts->min_blob_size = static_cast(jmin_blob_size); -} - -/* - * Class: org_rocksdb_Options - * Method: minBlobSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_minBlobSize(JNIEnv*, jobject, jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return static_cast(opts->min_blob_size); -} - -/* - * Class: org_rocksdb_Options - * Method: setBlobFileSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setBlobFileSize(JNIEnv*, jobject, jlong jhandle, - jlong jblob_file_size) { - auto* opts = reinterpret_cast(jhandle); - opts->blob_file_size = static_cast(jblob_file_size); -} - -/* - * Class: org_rocksdb_Options - * Method: blobFileSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_blobFileSize(JNIEnv*, jobject, jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return static_cast(opts->blob_file_size); -} - -/* - * Class: org_rocksdb_Options - * Method: setBlobCompressionType - * Signature: (JB)V - */ -void Java_org_rocksdb_Options_setBlobCompressionType( - JNIEnv*, jobject, jlong jhandle, jbyte jblob_compression_type_value) { - auto* opts = reinterpret_cast(jhandle); - opts->blob_compression_type = - ROCKSDB_NAMESPACE::CompressionTypeJni::toCppCompressionType( - jblob_compression_type_value); -} - -/* - * Class: org_rocksdb_Options - * Method: blobCompressionType - * Signature: (J)B - */ -jbyte Java_org_rocksdb_Options_blobCompressionType(JNIEnv*, jobject, - jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( - opts->blob_compression_type); -} - -/* - * Class: org_rocksdb_Options - * Method: setEnableBlobGarbageCollection - * Signature: (JZ)V - */ -void Java_org_rocksdb_Options_setEnableBlobGarbageCollection( - JNIEnv*, jobject, jlong jhandle, jboolean jenable_blob_garbage_collection) { - auto* opts = reinterpret_cast(jhandle); - opts->enable_blob_garbage_collection = - static_cast(jenable_blob_garbage_collection); -} - -/* - * Class: org_rocksdb_Options - * Method: enableBlobGarbageCollection - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Options_enableBlobGarbageCollection(JNIEnv*, jobject, - jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return static_cast(opts->enable_blob_garbage_collection); -} - -/* - * Class: org_rocksdb_Options - * Method: setBlobGarbageCollectionAgeCutoff - * Signature: (JD)V - */ -void Java_org_rocksdb_Options_setBlobGarbageCollectionAgeCutoff( - JNIEnv*, jobject, jlong jhandle, - jdouble jblob_garbage_collection_age_cutoff) { - auto* opts = reinterpret_cast(jhandle); - opts->blob_garbage_collection_age_cutoff = - static_cast(jblob_garbage_collection_age_cutoff); -} - -/* - * Class: org_rocksdb_Options - * Method: blobGarbageCollectionAgeCutoff - * Signature: (J)D - */ -jdouble Java_org_rocksdb_Options_blobGarbageCollectionAgeCutoff(JNIEnv*, - jobject, - jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return static_cast(opts->blob_garbage_collection_age_cutoff); -} - -/* - * Class: org_rocksdb_Options - * Method: setBlobGarbageCollectionForceThreshold - * Signature: (JD)V - */ -void Java_org_rocksdb_Options_setBlobGarbageCollectionForceThreshold( - JNIEnv*, jobject, jlong jhandle, - jdouble jblob_garbage_collection_force_threshold) { - auto* opts = reinterpret_cast(jhandle); - opts->blob_garbage_collection_force_threshold = - static_cast(jblob_garbage_collection_force_threshold); -} - -/* - * Class: org_rocksdb_Options - * Method: blobGarbageCollectionForceThreshold - * Signature: (J)D - */ -jdouble Java_org_rocksdb_Options_blobGarbageCollectionForceThreshold( - JNIEnv*, jobject, jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return static_cast(opts->blob_garbage_collection_force_threshold); -} - -/* - * Class: org_rocksdb_Options - * Method: setBlobCompactionReadaheadSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_Options_setBlobCompactionReadaheadSize( - JNIEnv*, jobject, jlong jhandle, jlong jblob_compaction_readahead_size) { - auto* opts = reinterpret_cast(jhandle); - opts->blob_compaction_readahead_size = - static_cast(jblob_compaction_readahead_size); -} - -/* - * Class: org_rocksdb_Options - * Method: blobCompactionReadaheadSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_Options_blobCompactionReadaheadSize(JNIEnv*, jobject, - jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return static_cast(opts->blob_compaction_readahead_size); -} - -/* - * Class: org_rocksdb_Options - * Method: setBlobFileStartingLevel - * Signature: (JI)V - */ -void Java_org_rocksdb_Options_setBlobFileStartingLevel( - JNIEnv*, jobject, jlong jhandle, jint jblob_file_starting_level) { - auto* opts = reinterpret_cast(jhandle); - opts->blob_file_starting_level = jblob_file_starting_level; -} - -/* - * Class: org_rocksdb_Options - * Method: blobFileStartingLevel - * Signature: (J)I - */ -jint Java_org_rocksdb_Options_blobFileStartingLevel(JNIEnv*, jobject, - jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return static_cast(opts->blob_file_starting_level); -} - -/* - * Class: org_rocksdb_Options - * Method: setPrepopulateBlobCache - * Signature: (JB)V - */ -void Java_org_rocksdb_Options_setPrepopulateBlobCache( - JNIEnv*, jobject, jlong jhandle, jbyte jprepopulate_blob_cache_value) { - auto* opts = reinterpret_cast(jhandle); - opts->prepopulate_blob_cache = - ROCKSDB_NAMESPACE::PrepopulateBlobCacheJni::toCppPrepopulateBlobCache( - jprepopulate_blob_cache_value); -} - -/* - * Class: org_rocksdb_Options - * Method: prepopulateBlobCache - * Signature: (J)B - */ -jbyte Java_org_rocksdb_Options_prepopulateBlobCache(JNIEnv*, jobject, - jlong jhandle) { - auto* opts = reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::PrepopulateBlobCacheJni::toJavaPrepopulateBlobCache( - opts->prepopulate_blob_cache); -} - -////////////////////////////////////////////////////////////////////////////// -// ROCKSDB_NAMESPACE::ColumnFamilyOptions - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: newColumnFamilyOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_newColumnFamilyOptions(JNIEnv*, - jclass) { - auto* op = new ROCKSDB_NAMESPACE::ColumnFamilyOptions(); - return GET_CPLUSPLUS_POINTER(op); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: copyColumnFamilyOptions - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_copyColumnFamilyOptions( - JNIEnv*, jclass, jlong jhandle) { - auto new_opt = new ROCKSDB_NAMESPACE::ColumnFamilyOptions( - *(reinterpret_cast(jhandle))); - return GET_CPLUSPLUS_POINTER(new_opt); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: newColumnFamilyOptionsFromOptions - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_newColumnFamilyOptionsFromOptions( - JNIEnv*, jclass, jlong joptions_handle) { - auto new_opt = new ROCKSDB_NAMESPACE::ColumnFamilyOptions( - *reinterpret_cast(joptions_handle)); - return GET_CPLUSPLUS_POINTER(new_opt); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: getColumnFamilyOptionsFromProps - * Signature: (JLjava/lang/String;)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_getColumnFamilyOptionsFromProps__JLjava_lang_String_2( - JNIEnv* env, jclass, jlong cfg_handle, jstring jopt_string) { - const char* opt_string = env->GetStringUTFChars(jopt_string, nullptr); - if (opt_string == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - auto* config_options = - reinterpret_cast(cfg_handle); - auto* cf_options = new ROCKSDB_NAMESPACE::ColumnFamilyOptions(); - ROCKSDB_NAMESPACE::Status status = - ROCKSDB_NAMESPACE::GetColumnFamilyOptionsFromString( - *config_options, ROCKSDB_NAMESPACE::ColumnFamilyOptions(), opt_string, - cf_options); - - env->ReleaseStringUTFChars(jopt_string, opt_string); - - // Check if ColumnFamilyOptions creation was possible. - jlong ret_value = 0; - if (status.ok()) { - ret_value = GET_CPLUSPLUS_POINTER(cf_options); - } else { - // if operation failed the ColumnFamilyOptions need to be deleted - // again to prevent a memory leak. - delete cf_options; - } - return ret_value; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: getColumnFamilyOptionsFromProps - * Signature: (Ljava/util/String;)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_getColumnFamilyOptionsFromProps__Ljava_lang_String_2( - JNIEnv* env, jclass, jstring jopt_string) { - const char* opt_string = env->GetStringUTFChars(jopt_string, nullptr); - if (opt_string == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - - auto* cf_options = new ROCKSDB_NAMESPACE::ColumnFamilyOptions(); - ROCKSDB_NAMESPACE::ConfigOptions config_options; - config_options.input_strings_escaped = false; - config_options.ignore_unknown_options = false; - ROCKSDB_NAMESPACE::Status status = - ROCKSDB_NAMESPACE::GetColumnFamilyOptionsFromString( - config_options, ROCKSDB_NAMESPACE::ColumnFamilyOptions(), opt_string, - cf_options); - - env->ReleaseStringUTFChars(jopt_string, opt_string); - - // Check if ColumnFamilyOptions creation was possible. - jlong ret_value = 0; - if (status.ok()) { - ret_value = GET_CPLUSPLUS_POINTER(cf_options); - } else { - // if operation failed the ColumnFamilyOptions need to be deleted - // again to prevent a memory leak. - delete cf_options; - } - return ret_value; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_disposeInternal(JNIEnv*, jobject, - jlong handle) { - auto* cfo = reinterpret_cast(handle); - assert(cfo != nullptr); - delete cfo; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: oldDefaults - * Signature: (JII)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_oldDefaults(JNIEnv*, jclass, - jlong jhandle, - jint major_version, - jint minor_version) { - reinterpret_cast(jhandle) - ->OldDefaults(major_version, minor_version); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: optimizeForSmallDb - * Signature: (J)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_optimizeForSmallDb__J(JNIEnv*, - jobject, - jlong jhandle) { - reinterpret_cast(jhandle) - ->OptimizeForSmallDb(); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: optimizeForSmallDb - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_optimizeForSmallDb__JJ( - JNIEnv*, jclass, jlong jhandle, jlong cache_handle) { - auto* cache_sptr_ptr = - reinterpret_cast*>( - cache_handle); - reinterpret_cast(jhandle) - ->OptimizeForSmallDb(cache_sptr_ptr); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: optimizeForPointLookup - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_optimizeForPointLookup( - JNIEnv*, jobject, jlong jhandle, jlong block_cache_size_mb) { - reinterpret_cast(jhandle) - ->OptimizeForPointLookup(block_cache_size_mb); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: optimizeLevelStyleCompaction - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_optimizeLevelStyleCompaction( - JNIEnv*, jobject, jlong jhandle, jlong memtable_memory_budget) { - reinterpret_cast(jhandle) - ->OptimizeLevelStyleCompaction(memtable_memory_budget); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: optimizeUniversalStyleCompaction - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_optimizeUniversalStyleCompaction( - JNIEnv*, jobject, jlong jhandle, jlong memtable_memory_budget) { - reinterpret_cast(jhandle) - ->OptimizeUniversalStyleCompaction(memtable_memory_budget); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setComparatorHandle - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setComparatorHandle__JI( - JNIEnv*, jobject, jlong jhandle, jint builtinComparator) { - switch (builtinComparator) { - case 1: - reinterpret_cast(jhandle) - ->comparator = ROCKSDB_NAMESPACE::ReverseBytewiseComparator(); - break; - default: - reinterpret_cast(jhandle) - ->comparator = ROCKSDB_NAMESPACE::BytewiseComparator(); - break; - } -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setComparatorHandle - * Signature: (JJB)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setComparatorHandle__JJB( - JNIEnv*, jobject, jlong jopt_handle, jlong jcomparator_handle, - jbyte jcomparator_type) { - ROCKSDB_NAMESPACE::Comparator* comparator = nullptr; - switch (jcomparator_type) { - // JAVA_COMPARATOR - case 0x0: - comparator = reinterpret_cast( - jcomparator_handle); - break; - - // JAVA_NATIVE_COMPARATOR_WRAPPER - case 0x1: - comparator = - reinterpret_cast(jcomparator_handle); - break; - } - auto* opt = - reinterpret_cast(jopt_handle); - opt->comparator = comparator; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMergeOperatorName - * Signature: (JJjava/lang/String)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMergeOperatorName( - JNIEnv* env, jobject, jlong jhandle, jstring jop_name) { - auto* options = - reinterpret_cast(jhandle); - const char* op_name = env->GetStringUTFChars(jop_name, nullptr); - if (op_name == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - options->merge_operator = - ROCKSDB_NAMESPACE::MergeOperators::CreateFromStringId(op_name); - env->ReleaseStringUTFChars(jop_name, op_name); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMergeOperator - * Signature: (JJjava/lang/String)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMergeOperator( - JNIEnv*, jobject, jlong jhandle, jlong mergeOperatorHandle) { - reinterpret_cast(jhandle) - ->merge_operator = - *(reinterpret_cast*>( - mergeOperatorHandle)); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setCompactionFilterHandle - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompactionFilterHandle( - JNIEnv*, jobject, jlong jopt_handle, jlong jcompactionfilter_handle) { - reinterpret_cast(jopt_handle) - ->compaction_filter = - reinterpret_cast( - jcompactionfilter_handle); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setCompactionFilterFactoryHandle - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompactionFilterFactoryHandle( - JNIEnv*, jobject, jlong jopt_handle, - jlong jcompactionfilterfactory_handle) { - auto* cff_factory = reinterpret_cast< - std::shared_ptr*>( - jcompactionfilterfactory_handle); - reinterpret_cast(jopt_handle) - ->compaction_filter_factory = *cff_factory; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setWriteBufferSize - * Signature: (JJ)I - */ -void Java_org_rocksdb_ColumnFamilyOptions_setWriteBufferSize( - JNIEnv* env, jobject, jlong jhandle, jlong jwrite_buffer_size) { - auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( - jwrite_buffer_size); - if (s.ok()) { - reinterpret_cast(jhandle) - ->write_buffer_size = jwrite_buffer_size; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: writeBufferSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_writeBufferSize(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->write_buffer_size; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMaxWriteBufferNumber - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxWriteBufferNumber( - JNIEnv*, jobject, jlong jhandle, jint jmax_write_buffer_number) { - reinterpret_cast(jhandle) - ->max_write_buffer_number = jmax_write_buffer_number; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: maxWriteBufferNumber - * Signature: (J)I - */ -jint Java_org_rocksdb_ColumnFamilyOptions_maxWriteBufferNumber(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_write_buffer_number; -} - -/* - * Method: setMemTableFactory - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMemTableFactory( - JNIEnv*, jobject, jlong jhandle, jlong jfactory_handle) { - reinterpret_cast(jhandle) - ->memtable_factory.reset( - reinterpret_cast( - jfactory_handle)); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: memTableFactoryName - * Signature: (J)Ljava/lang/String - */ -jstring Java_org_rocksdb_ColumnFamilyOptions_memTableFactoryName( - JNIEnv* env, jobject, jlong jhandle) { - auto* opt = - reinterpret_cast(jhandle); - ROCKSDB_NAMESPACE::MemTableRepFactory* tf = opt->memtable_factory.get(); - - // Should never be nullptr. - // Default memtable factory is SkipListFactory - assert(tf); - - // temporarly fix for the historical typo - if (strcmp(tf->Name(), "HashLinkListRepFactory") == 0) { - return env->NewStringUTF("HashLinkedListRepFactory"); - } - - return env->NewStringUTF(tf->Name()); -} - -/* - * Method: useFixedLengthPrefixExtractor - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_useFixedLengthPrefixExtractor( - JNIEnv*, jobject, jlong jhandle, jint jprefix_length) { - reinterpret_cast(jhandle) - ->prefix_extractor.reset(ROCKSDB_NAMESPACE::NewFixedPrefixTransform( - static_cast(jprefix_length))); -} - -/* - * Method: useCappedPrefixExtractor - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_useCappedPrefixExtractor( - JNIEnv*, jobject, jlong jhandle, jint jprefix_length) { - reinterpret_cast(jhandle) - ->prefix_extractor.reset(ROCKSDB_NAMESPACE::NewCappedPrefixTransform( - static_cast(jprefix_length))); -} - -/* - * Method: setTableFactory - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setTableFactory( - JNIEnv*, jobject, jlong jhandle, jlong jfactory_handle) { - reinterpret_cast(jhandle) - ->table_factory.reset( - reinterpret_cast(jfactory_handle)); -} - -/* - * Method: setSstPartitionerFactory - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setSstPartitionerFactory( - JNIEnv*, jobject, jlong jhandle, jlong factory_handle) { - auto* options = - reinterpret_cast(jhandle); - auto factory = reinterpret_cast< - std::shared_ptr*>( - factory_handle); - options->sst_partitioner_factory = *factory; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setCompactionThreadLimiter - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompactionThreadLimiter( - JNIEnv*, jclass, jlong jhandle, jlong jlimiter_handle) { - auto* options = - reinterpret_cast(jhandle); - auto* limiter = reinterpret_cast< - std::shared_ptr*>( - jlimiter_handle); - options->compaction_thread_limiter = *limiter; -} - -/* - * Method: tableFactoryName - * Signature: (J)Ljava/lang/String - */ -jstring Java_org_rocksdb_ColumnFamilyOptions_tableFactoryName(JNIEnv* env, - jobject, - jlong jhandle) { - auto* opt = - reinterpret_cast(jhandle); - ROCKSDB_NAMESPACE::TableFactory* tf = opt->table_factory.get(); - - // Should never be nullptr. - // Default memtable factory is SkipListFactory - assert(tf); - - return env->NewStringUTF(tf->Name()); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setCfPaths - * Signature: (J[Ljava/lang/String;[J)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setCfPaths(JNIEnv* env, jclass, - jlong jhandle, - jobjectArray path_array, - jlongArray size_array) { - auto* options = - reinterpret_cast(jhandle); - jboolean has_exception = JNI_FALSE; - std::vector cf_paths = - rocksdb_convert_cf_paths_from_java_helper(env, path_array, size_array, - &has_exception); - if (JNI_FALSE == has_exception) { - options->cf_paths = std::move(cf_paths); - } -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: cfPathsLen - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_cfPathsLen(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = - reinterpret_cast(jhandle); - return static_cast(opt->cf_paths.size()); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: cfPaths - * Signature: (J[Ljava/lang/String;[J)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_cfPaths(JNIEnv* env, jclass, - jlong jhandle, - jobjectArray jpaths, - jlongArray jtarget_sizes) { - rocksdb_convert_cf_paths_to_java_helper< - ROCKSDB_NAMESPACE::ColumnFamilyOptions>(env, jhandle, jpaths, - jtarget_sizes); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: minWriteBufferNumberToMerge - * Signature: (J)I - */ -jint Java_org_rocksdb_ColumnFamilyOptions_minWriteBufferNumberToMerge( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->min_write_buffer_number_to_merge; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMinWriteBufferNumberToMerge - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMinWriteBufferNumberToMerge( - JNIEnv*, jobject, jlong jhandle, jint jmin_write_buffer_number_to_merge) { - reinterpret_cast(jhandle) - ->min_write_buffer_number_to_merge = - static_cast(jmin_write_buffer_number_to_merge); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: maxWriteBufferNumberToMaintain - * Signature: (J)I - */ -jint Java_org_rocksdb_ColumnFamilyOptions_maxWriteBufferNumberToMaintain( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_write_buffer_number_to_maintain; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMaxWriteBufferNumberToMaintain - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxWriteBufferNumberToMaintain( - JNIEnv*, jobject, jlong jhandle, - jint jmax_write_buffer_number_to_maintain) { - reinterpret_cast(jhandle) - ->max_write_buffer_number_to_maintain = - static_cast(jmax_write_buffer_number_to_maintain); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setCompressionType - * Signature: (JB)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompressionType( - JNIEnv*, jobject, jlong jhandle, jbyte jcompression_type_value) { - auto* cf_opts = - reinterpret_cast(jhandle); - cf_opts->compression = - ROCKSDB_NAMESPACE::CompressionTypeJni::toCppCompressionType( - jcompression_type_value); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: compressionType - * Signature: (J)B - */ -jbyte Java_org_rocksdb_ColumnFamilyOptions_compressionType(JNIEnv*, jobject, - jlong jhandle) { - auto* cf_opts = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( - cf_opts->compression); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setCompressionPerLevel - * Signature: (J[B)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompressionPerLevel( - JNIEnv* env, jobject, jlong jhandle, jbyteArray jcompressionLevels) { - auto* options = - reinterpret_cast(jhandle); - auto uptr_compression_levels = - rocksdb_compression_vector_helper(env, jcompressionLevels); - if (!uptr_compression_levels) { - // exception occurred - return; - } - options->compression_per_level = *(uptr_compression_levels.get()); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: compressionPerLevel - * Signature: (J)[B - */ -jbyteArray Java_org_rocksdb_ColumnFamilyOptions_compressionPerLevel( - JNIEnv* env, jobject, jlong jhandle) { - auto* cf_options = - reinterpret_cast(jhandle); - return rocksdb_compression_list_helper(env, - cf_options->compression_per_level); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setBottommostCompressionType - * Signature: (JB)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setBottommostCompressionType( - JNIEnv*, jobject, jlong jhandle, jbyte jcompression_type_value) { - auto* cf_options = - reinterpret_cast(jhandle); - cf_options->bottommost_compression = - ROCKSDB_NAMESPACE::CompressionTypeJni::toCppCompressionType( - jcompression_type_value); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: bottommostCompressionType - * Signature: (J)B - */ -jbyte Java_org_rocksdb_ColumnFamilyOptions_bottommostCompressionType( - JNIEnv*, jobject, jlong jhandle) { - auto* cf_options = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( - cf_options->bottommost_compression); -} -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setBottommostCompressionOptions - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setBottommostCompressionOptions( - JNIEnv*, jobject, jlong jhandle, - jlong jbottommost_compression_options_handle) { - auto* cf_options = - reinterpret_cast(jhandle); - auto* bottommost_compression_options = - reinterpret_cast( - jbottommost_compression_options_handle); - cf_options->bottommost_compression_opts = *bottommost_compression_options; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setCompressionOptions - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompressionOptions( - JNIEnv*, jobject, jlong jhandle, jlong jcompression_options_handle) { - auto* cf_options = - reinterpret_cast(jhandle); - auto* compression_options = - reinterpret_cast( - jcompression_options_handle); - cf_options->compression_opts = *compression_options; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setCompactionStyle - * Signature: (JB)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompactionStyle( - JNIEnv*, jobject, jlong jhandle, jbyte jcompaction_style) { - auto* cf_options = - reinterpret_cast(jhandle); - cf_options->compaction_style = - ROCKSDB_NAMESPACE::CompactionStyleJni::toCppCompactionStyle( - jcompaction_style); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: compactionStyle - * Signature: (J)B - */ -jbyte Java_org_rocksdb_ColumnFamilyOptions_compactionStyle(JNIEnv*, jobject, - jlong jhandle) { - auto* cf_options = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::CompactionStyleJni::toJavaCompactionStyle( - cf_options->compaction_style); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMaxTableFilesSizeFIFO - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxTableFilesSizeFIFO( - JNIEnv*, jobject, jlong jhandle, jlong jmax_table_files_size) { - reinterpret_cast(jhandle) - ->compaction_options_fifo.max_table_files_size = - static_cast(jmax_table_files_size); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: maxTableFilesSizeFIFO - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_maxTableFilesSizeFIFO( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->compaction_options_fifo.max_table_files_size; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: numLevels - * Signature: (J)I - */ -jint Java_org_rocksdb_ColumnFamilyOptions_numLevels(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->num_levels; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setNumLevels - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setNumLevels(JNIEnv*, jobject, - jlong jhandle, - jint jnum_levels) { - reinterpret_cast(jhandle) - ->num_levels = static_cast(jnum_levels); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: levelZeroFileNumCompactionTrigger - * Signature: (J)I - */ -jint Java_org_rocksdb_ColumnFamilyOptions_levelZeroFileNumCompactionTrigger( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->level0_file_num_compaction_trigger; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setLevelZeroFileNumCompactionTrigger - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setLevelZeroFileNumCompactionTrigger( - JNIEnv*, jobject, jlong jhandle, jint jlevel0_file_num_compaction_trigger) { - reinterpret_cast(jhandle) - ->level0_file_num_compaction_trigger = - static_cast(jlevel0_file_num_compaction_trigger); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: levelZeroSlowdownWritesTrigger - * Signature: (J)I - */ -jint Java_org_rocksdb_ColumnFamilyOptions_levelZeroSlowdownWritesTrigger( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->level0_slowdown_writes_trigger; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setLevelSlowdownWritesTrigger - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setLevelZeroSlowdownWritesTrigger( - JNIEnv*, jobject, jlong jhandle, jint jlevel0_slowdown_writes_trigger) { - reinterpret_cast(jhandle) - ->level0_slowdown_writes_trigger = - static_cast(jlevel0_slowdown_writes_trigger); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: levelZeroStopWritesTrigger - * Signature: (J)I - */ -jint Java_org_rocksdb_ColumnFamilyOptions_levelZeroStopWritesTrigger( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->level0_stop_writes_trigger; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setLevelStopWritesTrigger - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setLevelZeroStopWritesTrigger( - JNIEnv*, jobject, jlong jhandle, jint jlevel0_stop_writes_trigger) { - reinterpret_cast(jhandle) - ->level0_stop_writes_trigger = - static_cast(jlevel0_stop_writes_trigger); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: targetFileSizeBase - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_targetFileSizeBase(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->target_file_size_base; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setTargetFileSizeBase - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setTargetFileSizeBase( - JNIEnv*, jobject, jlong jhandle, jlong jtarget_file_size_base) { - reinterpret_cast(jhandle) - ->target_file_size_base = static_cast(jtarget_file_size_base); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: targetFileSizeMultiplier - * Signature: (J)I - */ -jint Java_org_rocksdb_ColumnFamilyOptions_targetFileSizeMultiplier( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->target_file_size_multiplier; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setTargetFileSizeMultiplier - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setTargetFileSizeMultiplier( - JNIEnv*, jobject, jlong jhandle, jint jtarget_file_size_multiplier) { - reinterpret_cast(jhandle) - ->target_file_size_multiplier = - static_cast(jtarget_file_size_multiplier); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: maxBytesForLevelBase - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_maxBytesForLevelBase(JNIEnv*, - jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_bytes_for_level_base; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMaxBytesForLevelBase - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxBytesForLevelBase( - JNIEnv*, jobject, jlong jhandle, jlong jmax_bytes_for_level_base) { - reinterpret_cast(jhandle) - ->max_bytes_for_level_base = - static_cast(jmax_bytes_for_level_base); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: levelCompactionDynamicLevelBytes - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_levelCompactionDynamicLevelBytes( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->level_compaction_dynamic_level_bytes; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setLevelCompactionDynamicLevelBytes - * Signature: (JZ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setLevelCompactionDynamicLevelBytes( - JNIEnv*, jobject, jlong jhandle, jboolean jenable_dynamic_level_bytes) { - reinterpret_cast(jhandle) - ->level_compaction_dynamic_level_bytes = (jenable_dynamic_level_bytes); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: maxBytesForLevelMultiplier - * Signature: (J)D - */ -jdouble Java_org_rocksdb_ColumnFamilyOptions_maxBytesForLevelMultiplier( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_bytes_for_level_multiplier; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMaxBytesForLevelMultiplier - * Signature: (JD)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxBytesForLevelMultiplier( - JNIEnv*, jobject, jlong jhandle, jdouble jmax_bytes_for_level_multiplier) { - reinterpret_cast(jhandle) - ->max_bytes_for_level_multiplier = - static_cast(jmax_bytes_for_level_multiplier); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: maxCompactionBytes - * Signature: (J)I - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_maxCompactionBytes(JNIEnv*, jobject, - jlong jhandle) { - return static_cast( - reinterpret_cast(jhandle) - ->max_compaction_bytes); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMaxCompactionBytes - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxCompactionBytes( - JNIEnv*, jobject, jlong jhandle, jlong jmax_compaction_bytes) { - reinterpret_cast(jhandle) - ->max_compaction_bytes = static_cast(jmax_compaction_bytes); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: arenaBlockSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_arenaBlockSize(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->arena_block_size; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setArenaBlockSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setArenaBlockSize( - JNIEnv* env, jobject, jlong jhandle, jlong jarena_block_size) { - auto s = - ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(jarena_block_size); - if (s.ok()) { - reinterpret_cast(jhandle) - ->arena_block_size = jarena_block_size; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: disableAutoCompactions - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_disableAutoCompactions( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->disable_auto_compactions; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setDisableAutoCompactions - * Signature: (JZ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setDisableAutoCompactions( - JNIEnv*, jobject, jlong jhandle, jboolean jdisable_auto_compactions) { - reinterpret_cast(jhandle) - ->disable_auto_compactions = static_cast(jdisable_auto_compactions); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: maxSequentialSkipInIterations - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_maxSequentialSkipInIterations( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_sequential_skip_in_iterations; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMaxSequentialSkipInIterations - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxSequentialSkipInIterations( - JNIEnv*, jobject, jlong jhandle, jlong jmax_sequential_skip_in_iterations) { - reinterpret_cast(jhandle) - ->max_sequential_skip_in_iterations = - static_cast(jmax_sequential_skip_in_iterations); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: inplaceUpdateSupport - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_inplaceUpdateSupport( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->inplace_update_support; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setInplaceUpdateSupport - * Signature: (JZ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setInplaceUpdateSupport( - JNIEnv*, jobject, jlong jhandle, jboolean jinplace_update_support) { - reinterpret_cast(jhandle) - ->inplace_update_support = static_cast(jinplace_update_support); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: inplaceUpdateNumLocks - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_inplaceUpdateNumLocks( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->inplace_update_num_locks; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setInplaceUpdateNumLocks - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setInplaceUpdateNumLocks( - JNIEnv* env, jobject, jlong jhandle, jlong jinplace_update_num_locks) { - auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( - jinplace_update_num_locks); - if (s.ok()) { - reinterpret_cast(jhandle) - ->inplace_update_num_locks = jinplace_update_num_locks; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: memtablePrefixBloomSizeRatio - * Signature: (J)I - */ -jdouble Java_org_rocksdb_ColumnFamilyOptions_memtablePrefixBloomSizeRatio( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->memtable_prefix_bloom_size_ratio; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMemtablePrefixBloomSizeRatio - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMemtablePrefixBloomSizeRatio( - JNIEnv*, jobject, jlong jhandle, - jdouble jmemtable_prefix_bloom_size_ratio) { - reinterpret_cast(jhandle) - ->memtable_prefix_bloom_size_ratio = - static_cast(jmemtable_prefix_bloom_size_ratio); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: experimentalMempurgeThreshold - * Signature: (J)I - */ -jdouble Java_org_rocksdb_ColumnFamilyOptions_experimentalMempurgeThreshold( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->experimental_mempurge_threshold; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setExperimentalMempurgeThreshold - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setExperimentalMempurgeThreshold( - JNIEnv*, jobject, jlong jhandle, jdouble jexperimental_mempurge_threshold) { - reinterpret_cast(jhandle) - ->experimental_mempurge_threshold = - static_cast(jexperimental_mempurge_threshold); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: memtableWholeKeyFiltering - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_memtableWholeKeyFiltering( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->memtable_whole_key_filtering; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMemtableWholeKeyFiltering - * Signature: (JZ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMemtableWholeKeyFiltering( - JNIEnv*, jobject, jlong jhandle, jboolean jmemtable_whole_key_filtering) { - reinterpret_cast(jhandle) - ->memtable_whole_key_filtering = - static_cast(jmemtable_whole_key_filtering); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: bloomLocality - * Signature: (J)I - */ -jint Java_org_rocksdb_ColumnFamilyOptions_bloomLocality(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->bloom_locality; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setBloomLocality - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setBloomLocality( - JNIEnv*, jobject, jlong jhandle, jint jbloom_locality) { - reinterpret_cast(jhandle) - ->bloom_locality = static_cast(jbloom_locality); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: maxSuccessiveMerges - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_maxSuccessiveMerges(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_successive_merges; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMaxSuccessiveMerges - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxSuccessiveMerges( - JNIEnv* env, jobject, jlong jhandle, jlong jmax_successive_merges) { - auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( - jmax_successive_merges); - if (s.ok()) { - reinterpret_cast(jhandle) - ->max_successive_merges = jmax_successive_merges; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: optimizeFiltersForHits - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_optimizeFiltersForHits( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->optimize_filters_for_hits; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setOptimizeFiltersForHits - * Signature: (JZ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setOptimizeFiltersForHits( - JNIEnv*, jobject, jlong jhandle, jboolean joptimize_filters_for_hits) { - reinterpret_cast(jhandle) - ->optimize_filters_for_hits = - static_cast(joptimize_filters_for_hits); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: memtableHugePageSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_memtableHugePageSize(JNIEnv*, - jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->memtable_huge_page_size; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMemtableHugePageSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMemtableHugePageSize( - JNIEnv* env, jobject, jlong jhandle, jlong jmemtable_huge_page_size) { - auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( - jmemtable_huge_page_size); - if (s.ok()) { - reinterpret_cast(jhandle) - ->memtable_huge_page_size = jmemtable_huge_page_size; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: softPendingCompactionBytesLimit - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_softPendingCompactionBytesLimit( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->soft_pending_compaction_bytes_limit; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setSoftPendingCompactionBytesLimit - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setSoftPendingCompactionBytesLimit( - JNIEnv*, jobject, jlong jhandle, - jlong jsoft_pending_compaction_bytes_limit) { - reinterpret_cast(jhandle) - ->soft_pending_compaction_bytes_limit = - static_cast(jsoft_pending_compaction_bytes_limit); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: softHardCompactionBytesLimit - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_hardPendingCompactionBytesLimit( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->hard_pending_compaction_bytes_limit; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setHardPendingCompactionBytesLimit - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setHardPendingCompactionBytesLimit( - JNIEnv*, jobject, jlong jhandle, - jlong jhard_pending_compaction_bytes_limit) { - reinterpret_cast(jhandle) - ->hard_pending_compaction_bytes_limit = - static_cast(jhard_pending_compaction_bytes_limit); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: level0FileNumCompactionTrigger - * Signature: (J)I - */ -jint Java_org_rocksdb_ColumnFamilyOptions_level0FileNumCompactionTrigger( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->level0_file_num_compaction_trigger; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setLevel0FileNumCompactionTrigger - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setLevel0FileNumCompactionTrigger( - JNIEnv*, jobject, jlong jhandle, jint jlevel0_file_num_compaction_trigger) { - reinterpret_cast(jhandle) - ->level0_file_num_compaction_trigger = - static_cast(jlevel0_file_num_compaction_trigger); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: level0SlowdownWritesTrigger - * Signature: (J)I - */ -jint Java_org_rocksdb_ColumnFamilyOptions_level0SlowdownWritesTrigger( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->level0_slowdown_writes_trigger; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setLevel0SlowdownWritesTrigger - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setLevel0SlowdownWritesTrigger( - JNIEnv*, jobject, jlong jhandle, jint jlevel0_slowdown_writes_trigger) { - reinterpret_cast(jhandle) - ->level0_slowdown_writes_trigger = - static_cast(jlevel0_slowdown_writes_trigger); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: level0StopWritesTrigger - * Signature: (J)I - */ -jint Java_org_rocksdb_ColumnFamilyOptions_level0StopWritesTrigger( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->level0_stop_writes_trigger; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setLevel0StopWritesTrigger - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setLevel0StopWritesTrigger( - JNIEnv*, jobject, jlong jhandle, jint jlevel0_stop_writes_trigger) { - reinterpret_cast(jhandle) - ->level0_stop_writes_trigger = - static_cast(jlevel0_stop_writes_trigger); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: maxBytesForLevelMultiplierAdditional - * Signature: (J)[I - */ -jintArray -Java_org_rocksdb_ColumnFamilyOptions_maxBytesForLevelMultiplierAdditional( - JNIEnv* env, jobject, jlong jhandle) { - auto mbflma = - reinterpret_cast(jhandle) - ->max_bytes_for_level_multiplier_additional; - - const size_t size = mbflma.size(); - - jint* additionals = new jint[size]; - for (size_t i = 0; i < size; i++) { - additionals[i] = static_cast(mbflma[i]); - } - - jsize jlen = static_cast(size); - jintArray result = env->NewIntArray(jlen); - if (result == nullptr) { - // exception thrown: OutOfMemoryError - delete[] additionals; - return nullptr; - } - env->SetIntArrayRegion(result, 0, jlen, additionals); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(result); - delete[] additionals; - return nullptr; - } - - delete[] additionals; - - return result; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMaxBytesForLevelMultiplierAdditional - * Signature: (J[I)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxBytesForLevelMultiplierAdditional( - JNIEnv* env, jobject, jlong jhandle, - jintArray jmax_bytes_for_level_multiplier_additional) { - jsize len = env->GetArrayLength(jmax_bytes_for_level_multiplier_additional); - jint* additionals = env->GetIntArrayElements( - jmax_bytes_for_level_multiplier_additional, nullptr); - if (additionals == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - auto* cf_opt = - reinterpret_cast(jhandle); - cf_opt->max_bytes_for_level_multiplier_additional.clear(); - for (jsize i = 0; i < len; i++) { - cf_opt->max_bytes_for_level_multiplier_additional.push_back( - static_cast(additionals[i])); - } - - env->ReleaseIntArrayElements(jmax_bytes_for_level_multiplier_additional, - additionals, JNI_ABORT); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: paranoidFileChecks - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_paranoidFileChecks( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->paranoid_file_checks; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setParanoidFileChecks - * Signature: (JZ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setParanoidFileChecks( - JNIEnv*, jobject, jlong jhandle, jboolean jparanoid_file_checks) { - reinterpret_cast(jhandle) - ->paranoid_file_checks = static_cast(jparanoid_file_checks); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setCompactionPriority - * Signature: (JB)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompactionPriority( - JNIEnv*, jobject, jlong jhandle, jbyte jcompaction_priority_value) { - auto* cf_opts = - reinterpret_cast(jhandle); - cf_opts->compaction_pri = - ROCKSDB_NAMESPACE::CompactionPriorityJni::toCppCompactionPriority( - jcompaction_priority_value); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: compactionPriority - * Signature: (J)B - */ -jbyte Java_org_rocksdb_ColumnFamilyOptions_compactionPriority(JNIEnv*, jobject, - jlong jhandle) { - auto* cf_opts = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::CompactionPriorityJni::toJavaCompactionPriority( - cf_opts->compaction_pri); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setReportBgIoStats - * Signature: (JZ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setReportBgIoStats( - JNIEnv*, jobject, jlong jhandle, jboolean jreport_bg_io_stats) { - auto* cf_opts = - reinterpret_cast(jhandle); - cf_opts->report_bg_io_stats = static_cast(jreport_bg_io_stats); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: reportBgIoStats - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_reportBgIoStats(JNIEnv*, jobject, - jlong jhandle) { - auto* cf_opts = - reinterpret_cast(jhandle); - return static_cast(cf_opts->report_bg_io_stats); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setTtl - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setTtl(JNIEnv*, jobject, - jlong jhandle, jlong jttl) { - auto* cf_opts = - reinterpret_cast(jhandle); - cf_opts->ttl = static_cast(jttl); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: ttl - * Signature: (J)J - */ -JNIEXPORT jlong JNICALL -Java_org_rocksdb_ColumnFamilyOptions_ttl(JNIEnv*, jobject, jlong jhandle) { - auto* cf_opts = - reinterpret_cast(jhandle); - return static_cast(cf_opts->ttl); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setPeriodicCompactionSeconds - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setPeriodicCompactionSeconds( - JNIEnv*, jobject, jlong jhandle, jlong jperiodicCompactionSeconds) { - auto* cf_opts = - reinterpret_cast(jhandle); - cf_opts->periodic_compaction_seconds = - static_cast(jperiodicCompactionSeconds); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: periodicCompactionSeconds - * Signature: (J)J - */ -JNIEXPORT jlong JNICALL -Java_org_rocksdb_ColumnFamilyOptions_periodicCompactionSeconds(JNIEnv*, jobject, - jlong jhandle) { - auto* cf_opts = - reinterpret_cast(jhandle); - return static_cast(cf_opts->periodic_compaction_seconds); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setCompactionOptionsUniversal - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompactionOptionsUniversal( - JNIEnv*, jobject, jlong jhandle, - jlong jcompaction_options_universal_handle) { - auto* cf_opts = - reinterpret_cast(jhandle); - auto* opts_uni = - reinterpret_cast( - jcompaction_options_universal_handle); - cf_opts->compaction_options_universal = *opts_uni; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setCompactionOptionsFIFO - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompactionOptionsFIFO( - JNIEnv*, jobject, jlong jhandle, jlong jcompaction_options_fifo_handle) { - auto* cf_opts = - reinterpret_cast(jhandle); - auto* opts_fifo = reinterpret_cast( - jcompaction_options_fifo_handle); - cf_opts->compaction_options_fifo = *opts_fifo; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setForceConsistencyChecks - * Signature: (JZ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setForceConsistencyChecks( - JNIEnv*, jobject, jlong jhandle, jboolean jforce_consistency_checks) { - auto* cf_opts = - reinterpret_cast(jhandle); - cf_opts->force_consistency_checks = - static_cast(jforce_consistency_checks); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: forceConsistencyChecks - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_forceConsistencyChecks( - JNIEnv*, jobject, jlong jhandle) { - auto* cf_opts = - reinterpret_cast(jhandle); - return static_cast(cf_opts->force_consistency_checks); -} - -/// BLOB options - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setEnableBlobFiles - * Signature: (JZ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setEnableBlobFiles( - JNIEnv*, jobject, jlong jhandle, jboolean jenable_blob_files) { - auto* opts = - reinterpret_cast(jhandle); - opts->enable_blob_files = static_cast(jenable_blob_files); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: enableBlobFiles - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_enableBlobFiles(JNIEnv*, jobject, - jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return static_cast(opts->enable_blob_files); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setMinBlobSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setMinBlobSize(JNIEnv*, jobject, - jlong jhandle, - jlong jmin_blob_size) { - auto* opts = - reinterpret_cast(jhandle); - opts->min_blob_size = static_cast(jmin_blob_size); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: minBlobSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_minBlobSize(JNIEnv*, jobject, - jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return static_cast(opts->min_blob_size); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setBlobFileSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setBlobFileSize( - JNIEnv*, jobject, jlong jhandle, jlong jblob_file_size) { - auto* opts = - reinterpret_cast(jhandle); - opts->blob_file_size = static_cast(jblob_file_size); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: blobFileSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_blobFileSize(JNIEnv*, jobject, - jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return static_cast(opts->blob_file_size); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setBlobCompressionType - * Signature: (JB)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setBlobCompressionType( - JNIEnv*, jobject, jlong jhandle, jbyte jblob_compression_type_value) { - auto* opts = - reinterpret_cast(jhandle); - opts->blob_compression_type = - ROCKSDB_NAMESPACE::CompressionTypeJni::toCppCompressionType( - jblob_compression_type_value); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: blobCompressionType - * Signature: (J)B - */ -jbyte Java_org_rocksdb_ColumnFamilyOptions_blobCompressionType(JNIEnv*, jobject, - jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( - opts->blob_compression_type); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setEnableBlobGarbageCollection - * Signature: (JZ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setEnableBlobGarbageCollection( - JNIEnv*, jobject, jlong jhandle, jboolean jenable_blob_garbage_collection) { - auto* opts = - reinterpret_cast(jhandle); - opts->enable_blob_garbage_collection = - static_cast(jenable_blob_garbage_collection); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: enableBlobGarbageCollection - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_enableBlobGarbageCollection( - JNIEnv*, jobject, jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return static_cast(opts->enable_blob_garbage_collection); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setBlobGarbageCollectionAgeCutoff - * Signature: (JD)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setBlobGarbageCollectionAgeCutoff( - JNIEnv*, jobject, jlong jhandle, - jdouble jblob_garbage_collection_age_cutoff) { - auto* opts = - reinterpret_cast(jhandle); - opts->blob_garbage_collection_age_cutoff = - static_cast(jblob_garbage_collection_age_cutoff); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: blobGarbageCollectionAgeCutoff - * Signature: (J)D - */ -jdouble Java_org_rocksdb_ColumnFamilyOptions_blobGarbageCollectionAgeCutoff( - JNIEnv*, jobject, jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return static_cast(opts->blob_garbage_collection_age_cutoff); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setBlobGarbageCollectionForceThreshold - * Signature: (JD)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setBlobGarbageCollectionForceThreshold( - JNIEnv*, jobject, jlong jhandle, - jdouble jblob_garbage_collection_force_threshold) { - auto* opts = - reinterpret_cast(jhandle); - opts->blob_garbage_collection_force_threshold = - static_cast(jblob_garbage_collection_force_threshold); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: blobGarbageCollectionForceThreshold - * Signature: (J)D - */ -jdouble -Java_org_rocksdb_ColumnFamilyOptions_blobGarbageCollectionForceThreshold( - JNIEnv*, jobject, jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return static_cast(opts->blob_garbage_collection_force_threshold); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setBlobCompactionReadaheadSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setBlobCompactionReadaheadSize( - JNIEnv*, jobject, jlong jhandle, jlong jblob_compaction_readahead_size) { - auto* opts = - reinterpret_cast(jhandle); - opts->blob_compaction_readahead_size = - static_cast(jblob_compaction_readahead_size); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: blobCompactionReadaheadSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_ColumnFamilyOptions_blobCompactionReadaheadSize( - JNIEnv*, jobject, jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return static_cast(opts->blob_compaction_readahead_size); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setBlobFileStartingLevel - * Signature: (JI)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setBlobFileStartingLevel( - JNIEnv*, jobject, jlong jhandle, jint jblob_file_starting_level) { - auto* opts = - reinterpret_cast(jhandle); - opts->blob_file_starting_level = jblob_file_starting_level; -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: blobFileStartingLevel - * Signature: (J)I - */ -jint Java_org_rocksdb_ColumnFamilyOptions_blobFileStartingLevel(JNIEnv*, - jobject, - jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return static_cast(opts->blob_file_starting_level); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: setPrepopulateBlobCache - * Signature: (JB)V - */ -void Java_org_rocksdb_ColumnFamilyOptions_setPrepopulateBlobCache( - JNIEnv*, jobject, jlong jhandle, jbyte jprepopulate_blob_cache_value) { - auto* opts = - reinterpret_cast(jhandle); - opts->prepopulate_blob_cache = - ROCKSDB_NAMESPACE::PrepopulateBlobCacheJni::toCppPrepopulateBlobCache( - jprepopulate_blob_cache_value); -} - -/* - * Class: org_rocksdb_ColumnFamilyOptions - * Method: prepopulateBlobCache - * Signature: (J)B - */ -jbyte Java_org_rocksdb_ColumnFamilyOptions_prepopulateBlobCache(JNIEnv*, - jobject, - jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::PrepopulateBlobCacheJni::toJavaPrepopulateBlobCache( - opts->prepopulate_blob_cache); -} - -///////////////////////////////////////////////////////////////////// -// ROCKSDB_NAMESPACE::DBOptions - -/* - * Class: org_rocksdb_DBOptions - * Method: newDBOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_DBOptions_newDBOptions(JNIEnv*, jclass) { - auto* dbop = new ROCKSDB_NAMESPACE::DBOptions(); - return GET_CPLUSPLUS_POINTER(dbop); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: copyDBOptions - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_copyDBOptions(JNIEnv*, jclass, jlong jhandle) { - auto new_opt = new ROCKSDB_NAMESPACE::DBOptions( - *(reinterpret_cast(jhandle))); - return GET_CPLUSPLUS_POINTER(new_opt); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: newDBOptionsFromOptions - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_newDBOptionsFromOptions( - JNIEnv*, jclass, jlong joptions_handle) { - auto new_opt = new ROCKSDB_NAMESPACE::DBOptions( - *reinterpret_cast(joptions_handle)); - return GET_CPLUSPLUS_POINTER(new_opt); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: getDBOptionsFromProps - * Signature: (JLjava/lang/String;)J - */ -jlong Java_org_rocksdb_DBOptions_getDBOptionsFromProps__JLjava_lang_String_2( - JNIEnv* env, jclass, jlong config_handle, jstring jopt_string) { - const char* opt_string = env->GetStringUTFChars(jopt_string, nullptr); - if (opt_string == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - - auto* config_options = - reinterpret_cast(config_handle); - auto* db_options = new ROCKSDB_NAMESPACE::DBOptions(); - ROCKSDB_NAMESPACE::Status status = ROCKSDB_NAMESPACE::GetDBOptionsFromString( - *config_options, ROCKSDB_NAMESPACE::DBOptions(), opt_string, db_options); - - env->ReleaseStringUTFChars(jopt_string, opt_string); - - // Check if DBOptions creation was possible. - jlong ret_value = 0; - if (status.ok()) { - ret_value = GET_CPLUSPLUS_POINTER(db_options); - } else { - // if operation failed the DBOptions need to be deleted - // again to prevent a memory leak. - delete db_options; - } - return ret_value; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: getDBOptionsFromProps - * Signature: (Ljava/util/String;)J - */ -jlong Java_org_rocksdb_DBOptions_getDBOptionsFromProps__Ljava_lang_String_2( - JNIEnv* env, jclass, jstring jopt_string) { - const char* opt_string = env->GetStringUTFChars(jopt_string, nullptr); - if (opt_string == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - - const ROCKSDB_NAMESPACE::DBOptions base_options; - auto* db_options = new ROCKSDB_NAMESPACE::DBOptions(); - ROCKSDB_NAMESPACE::ConfigOptions config_options(base_options); - config_options.input_strings_escaped = false; - config_options.ignore_unknown_options = false; - ROCKSDB_NAMESPACE::Status status = ROCKSDB_NAMESPACE::GetDBOptionsFromString( - config_options, base_options, opt_string, db_options); - - env->ReleaseStringUTFChars(jopt_string, opt_string); - - // Check if DBOptions creation was possible. - jlong ret_value = 0; - if (status.ok()) { - ret_value = GET_CPLUSPLUS_POINTER(db_options); - } else { - // if operation failed the DBOptions need to be deleted - // again to prevent a memory leak. - delete db_options; - } - return ret_value; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_DBOptions_disposeInternal(JNIEnv*, jobject, - jlong handle) { - auto* dbo = reinterpret_cast(handle); - assert(dbo != nullptr); - delete dbo; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: optimizeForSmallDb - * Signature: (J)V - */ -void Java_org_rocksdb_DBOptions_optimizeForSmallDb(JNIEnv*, jobject, - jlong jhandle) { - reinterpret_cast(jhandle) - ->OptimizeForSmallDb(); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setEnv - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setEnv(JNIEnv*, jobject, jlong jhandle, - jlong jenv_handle) { - reinterpret_cast(jhandle)->env = - reinterpret_cast(jenv_handle); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setIncreaseParallelism - * Signature: (JI)V - */ -void Java_org_rocksdb_DBOptions_setIncreaseParallelism(JNIEnv*, jobject, - jlong jhandle, - jint totalThreads) { - reinterpret_cast(jhandle)->IncreaseParallelism( - static_cast(totalThreads)); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setCreateIfMissing - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setCreateIfMissing(JNIEnv*, jobject, - jlong jhandle, - jboolean flag) { - reinterpret_cast(jhandle)->create_if_missing = - flag; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: createIfMissing - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_createIfMissing(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->create_if_missing; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setCreateMissingColumnFamilies - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setCreateMissingColumnFamilies(JNIEnv*, jobject, - jlong jhandle, - jboolean flag) { - reinterpret_cast(jhandle) - ->create_missing_column_families = flag; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: createMissingColumnFamilies - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_createMissingColumnFamilies(JNIEnv*, - jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->create_missing_column_families; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setErrorIfExists - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setErrorIfExists(JNIEnv*, jobject, - jlong jhandle, - jboolean error_if_exists) { - reinterpret_cast(jhandle)->error_if_exists = - static_cast(error_if_exists); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: errorIfExists - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_errorIfExists(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->error_if_exists; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setParanoidChecks - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setParanoidChecks(JNIEnv*, jobject, - jlong jhandle, - jboolean paranoid_checks) { - reinterpret_cast(jhandle)->paranoid_checks = - static_cast(paranoid_checks); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: paranoidChecks - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_paranoidChecks(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->paranoid_checks; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setRateLimiter - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setRateLimiter(JNIEnv*, jobject, jlong jhandle, - jlong jrate_limiter_handle) { - std::shared_ptr* pRateLimiter = - reinterpret_cast*>( - jrate_limiter_handle); - reinterpret_cast(jhandle)->rate_limiter = - *pRateLimiter; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setSstFileManager - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setSstFileManager( - JNIEnv*, jobject, jlong jhandle, jlong jsst_file_manager_handle) { - auto* sptr_sst_file_manager = - reinterpret_cast*>( - jsst_file_manager_handle); - reinterpret_cast(jhandle)->sst_file_manager = - *sptr_sst_file_manager; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setLogger - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setLogger(JNIEnv*, jobject, jlong jhandle, - jlong jlogger_handle) { - std::shared_ptr* pLogger = - reinterpret_cast*>( - jlogger_handle); - reinterpret_cast(jhandle)->info_log = *pLogger; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setInfoLogLevel - * Signature: (JB)V - */ -void Java_org_rocksdb_DBOptions_setInfoLogLevel(JNIEnv*, jobject, jlong jhandle, - jbyte jlog_level) { - reinterpret_cast(jhandle)->info_log_level = - static_cast(jlog_level); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: infoLogLevel - * Signature: (J)B - */ -jbyte Java_org_rocksdb_DBOptions_infoLogLevel(JNIEnv*, jobject, jlong jhandle) { - return static_cast( - reinterpret_cast(jhandle)->info_log_level); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setMaxTotalWalSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setMaxTotalWalSize(JNIEnv*, jobject, - jlong jhandle, - jlong jmax_total_wal_size) { - reinterpret_cast(jhandle)->max_total_wal_size = - static_cast(jmax_total_wal_size); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: maxTotalWalSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_maxTotalWalSize(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_total_wal_size; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setMaxOpenFiles - * Signature: (JI)V - */ -void Java_org_rocksdb_DBOptions_setMaxOpenFiles(JNIEnv*, jobject, jlong jhandle, - jint max_open_files) { - reinterpret_cast(jhandle)->max_open_files = - static_cast(max_open_files); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: maxOpenFiles - * Signature: (J)I - */ -jint Java_org_rocksdb_DBOptions_maxOpenFiles(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_open_files; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setMaxFileOpeningThreads - * Signature: (JI)V - */ -void Java_org_rocksdb_DBOptions_setMaxFileOpeningThreads( - JNIEnv*, jobject, jlong jhandle, jint jmax_file_opening_threads) { - reinterpret_cast(jhandle) - ->max_file_opening_threads = static_cast(jmax_file_opening_threads); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: maxFileOpeningThreads - * Signature: (J)I - */ -jint Java_org_rocksdb_DBOptions_maxFileOpeningThreads(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->max_file_opening_threads); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setStatistics - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setStatistics(JNIEnv*, jobject, jlong jhandle, - jlong jstatistics_handle) { - auto* opt = reinterpret_cast(jhandle); - auto* pSptr = - reinterpret_cast*>( - jstatistics_handle); - opt->statistics = *pSptr; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: statistics - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_statistics(JNIEnv*, jobject, jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - std::shared_ptr sptr = opt->statistics; - if (sptr == nullptr) { - return 0; - } else { - std::shared_ptr* pSptr = - new std::shared_ptr(sptr); - return GET_CPLUSPLUS_POINTER(pSptr); - } -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setUseFsync - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setUseFsync(JNIEnv*, jobject, jlong jhandle, - jboolean use_fsync) { - reinterpret_cast(jhandle)->use_fsync = - static_cast(use_fsync); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: useFsync - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_useFsync(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle)->use_fsync; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setDbPaths - * Signature: (J[Ljava/lang/String;[J)V - */ -void Java_org_rocksdb_DBOptions_setDbPaths(JNIEnv* env, jobject, jlong jhandle, - jobjectArray jpaths, - jlongArray jtarget_sizes) { - std::vector db_paths; - jlong* ptr_jtarget_size = env->GetLongArrayElements(jtarget_sizes, nullptr); - if (ptr_jtarget_size == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - jboolean has_exception = JNI_FALSE; - const jsize len = env->GetArrayLength(jpaths); - for (jsize i = 0; i < len; i++) { - jobject jpath = - reinterpret_cast(env->GetObjectArrayElement(jpaths, i)); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); - return; - } - std::string path = ROCKSDB_NAMESPACE::JniUtil::copyStdString( - env, static_cast(jpath), &has_exception); - env->DeleteLocalRef(jpath); - - if (has_exception == JNI_TRUE) { - env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); - return; - } - - jlong jtarget_size = ptr_jtarget_size[i]; - - db_paths.push_back( - ROCKSDB_NAMESPACE::DbPath(path, static_cast(jtarget_size))); - } - - env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); - - auto* opt = reinterpret_cast(jhandle); - opt->db_paths = db_paths; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: dbPathsLen - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_dbPathsLen(JNIEnv*, jobject, jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->db_paths.size()); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: dbPaths - * Signature: (J[Ljava/lang/String;[J)V - */ -void Java_org_rocksdb_DBOptions_dbPaths(JNIEnv* env, jobject, jlong jhandle, - jobjectArray jpaths, - jlongArray jtarget_sizes) { - jboolean is_copy; - jlong* ptr_jtarget_size = env->GetLongArrayElements(jtarget_sizes, &is_copy); - if (ptr_jtarget_size == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - auto* opt = reinterpret_cast(jhandle); - const jsize len = env->GetArrayLength(jpaths); - for (jsize i = 0; i < len; i++) { - ROCKSDB_NAMESPACE::DbPath db_path = opt->db_paths[i]; - - jstring jpath = env->NewStringUTF(db_path.path.c_str()); - if (jpath == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); - return; - } - env->SetObjectArrayElement(jpaths, i, jpath); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jpath); - env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); - return; - } - - ptr_jtarget_size[i] = static_cast(db_path.target_size); - } - - env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, - is_copy == JNI_TRUE ? 0 : JNI_ABORT); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setDbLogDir - * Signature: (JLjava/lang/String)V - */ -void Java_org_rocksdb_DBOptions_setDbLogDir(JNIEnv* env, jobject, jlong jhandle, - jstring jdb_log_dir) { - const char* log_dir = env->GetStringUTFChars(jdb_log_dir, nullptr); - if (log_dir == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - reinterpret_cast(jhandle)->db_log_dir.assign( - log_dir); - env->ReleaseStringUTFChars(jdb_log_dir, log_dir); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: dbLogDir - * Signature: (J)Ljava/lang/String - */ -jstring Java_org_rocksdb_DBOptions_dbLogDir(JNIEnv* env, jobject, - jlong jhandle) { - return env->NewStringUTF( - reinterpret_cast(jhandle) - ->db_log_dir.c_str()); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setWalDir - * Signature: (JLjava/lang/String)V - */ -void Java_org_rocksdb_DBOptions_setWalDir(JNIEnv* env, jobject, jlong jhandle, - jstring jwal_dir) { - const char* wal_dir = env->GetStringUTFChars(jwal_dir, 0); - reinterpret_cast(jhandle)->wal_dir.assign( - wal_dir); - env->ReleaseStringUTFChars(jwal_dir, wal_dir); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: walDir - * Signature: (J)Ljava/lang/String - */ -jstring Java_org_rocksdb_DBOptions_walDir(JNIEnv* env, jobject, jlong jhandle) { - return env->NewStringUTF( - reinterpret_cast(jhandle) - ->wal_dir.c_str()); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setDeleteObsoleteFilesPeriodMicros - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setDeleteObsoleteFilesPeriodMicros( - JNIEnv*, jobject, jlong jhandle, jlong micros) { - reinterpret_cast(jhandle) - ->delete_obsolete_files_period_micros = static_cast(micros); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: deleteObsoleteFilesPeriodMicros - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_deleteObsoleteFilesPeriodMicros( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->delete_obsolete_files_period_micros; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setMaxBackgroundCompactions - * Signature: (JI)V - */ -void Java_org_rocksdb_DBOptions_setMaxBackgroundCompactions(JNIEnv*, jobject, - jlong jhandle, - jint max) { - reinterpret_cast(jhandle) - ->max_background_compactions = static_cast(max); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: maxBackgroundCompactions - * Signature: (J)I - */ -jint Java_org_rocksdb_DBOptions_maxBackgroundCompactions(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_background_compactions; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setMaxSubcompactions - * Signature: (JI)V - */ -void Java_org_rocksdb_DBOptions_setMaxSubcompactions(JNIEnv*, jobject, - jlong jhandle, jint max) { - reinterpret_cast(jhandle)->max_subcompactions = - static_cast(max); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: maxSubcompactions - * Signature: (J)I - */ -jint Java_org_rocksdb_DBOptions_maxSubcompactions(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_subcompactions; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setMaxBackgroundFlushes - * Signature: (JI)V - */ -void Java_org_rocksdb_DBOptions_setMaxBackgroundFlushes( - JNIEnv*, jobject, jlong jhandle, jint max_background_flushes) { - reinterpret_cast(jhandle) - ->max_background_flushes = static_cast(max_background_flushes); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: maxBackgroundFlushes - * Signature: (J)I - */ -jint Java_org_rocksdb_DBOptions_maxBackgroundFlushes(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_background_flushes; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setMaxBackgroundJobs - * Signature: (JI)V - */ -void Java_org_rocksdb_DBOptions_setMaxBackgroundJobs(JNIEnv*, jobject, - jlong jhandle, - jint max_background_jobs) { - reinterpret_cast(jhandle) - ->max_background_jobs = static_cast(max_background_jobs); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: maxBackgroundJobs - * Signature: (J)I - */ -jint Java_org_rocksdb_DBOptions_maxBackgroundJobs(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_background_jobs; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setMaxLogFileSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setMaxLogFileSize(JNIEnv* env, jobject, - jlong jhandle, - jlong max_log_file_size) { - auto s = - ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(max_log_file_size); - if (s.ok()) { - reinterpret_cast(jhandle) - ->max_log_file_size = max_log_file_size; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_DBOptions - * Method: maxLogFileSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_maxLogFileSize(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_log_file_size; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setLogFileTimeToRoll - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setLogFileTimeToRoll( - JNIEnv* env, jobject, jlong jhandle, jlong log_file_time_to_roll) { - auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( - log_file_time_to_roll); - if (s.ok()) { - reinterpret_cast(jhandle) - ->log_file_time_to_roll = log_file_time_to_roll; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_DBOptions - * Method: logFileTimeToRoll - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_logFileTimeToRoll(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->log_file_time_to_roll; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setKeepLogFileNum - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setKeepLogFileNum(JNIEnv* env, jobject, - jlong jhandle, - jlong keep_log_file_num) { - auto s = - ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(keep_log_file_num); - if (s.ok()) { - reinterpret_cast(jhandle) - ->keep_log_file_num = keep_log_file_num; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_DBOptions - * Method: keepLogFileNum - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_keepLogFileNum(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->keep_log_file_num; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setRecycleLogFileNum - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setRecycleLogFileNum( - JNIEnv* env, jobject, jlong jhandle, jlong recycle_log_file_num) { - auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( - recycle_log_file_num); - if (s.ok()) { - reinterpret_cast(jhandle) - ->recycle_log_file_num = recycle_log_file_num; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_DBOptions - * Method: recycleLogFileNum - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_recycleLogFileNum(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->recycle_log_file_num; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setMaxManifestFileSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setMaxManifestFileSize( - JNIEnv*, jobject, jlong jhandle, jlong max_manifest_file_size) { - reinterpret_cast(jhandle) - ->max_manifest_file_size = static_cast(max_manifest_file_size); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: maxManifestFileSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_maxManifestFileSize(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->max_manifest_file_size; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setTableCacheNumshardbits - * Signature: (JI)V - */ -void Java_org_rocksdb_DBOptions_setTableCacheNumshardbits( - JNIEnv*, jobject, jlong jhandle, jint table_cache_numshardbits) { - reinterpret_cast(jhandle) - ->table_cache_numshardbits = static_cast(table_cache_numshardbits); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: tableCacheNumshardbits - * Signature: (J)I - */ -jint Java_org_rocksdb_DBOptions_tableCacheNumshardbits(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->table_cache_numshardbits; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setWalTtlSeconds - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setWalTtlSeconds(JNIEnv*, jobject, - jlong jhandle, - jlong WAL_ttl_seconds) { - reinterpret_cast(jhandle)->WAL_ttl_seconds = - static_cast(WAL_ttl_seconds); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: walTtlSeconds - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_walTtlSeconds(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->WAL_ttl_seconds; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setWalSizeLimitMB - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setWalSizeLimitMB(JNIEnv*, jobject, - jlong jhandle, - jlong WAL_size_limit_MB) { - reinterpret_cast(jhandle)->WAL_size_limit_MB = - static_cast(WAL_size_limit_MB); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: walTtlSeconds - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_walSizeLimitMB(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->WAL_size_limit_MB; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setMaxWriteBatchGroupSizeBytes - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setMaxWriteBatchGroupSizeBytes( - JNIEnv*, jclass, jlong jhandle, jlong jmax_write_batch_group_size_bytes) { - auto* opt = reinterpret_cast(jhandle); - opt->max_write_batch_group_size_bytes = - static_cast(jmax_write_batch_group_size_bytes); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: maxWriteBatchGroupSizeBytes - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_maxWriteBatchGroupSizeBytes(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->max_write_batch_group_size_bytes); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setManifestPreallocationSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setManifestPreallocationSize( - JNIEnv* env, jobject, jlong jhandle, jlong preallocation_size) { - auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( - preallocation_size); - if (s.ok()) { - reinterpret_cast(jhandle) - ->manifest_preallocation_size = preallocation_size; - } else { - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_DBOptions - * Method: manifestPreallocationSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_manifestPreallocationSize(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->manifest_preallocation_size; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: useDirectReads - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_useDirectReads(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->use_direct_reads; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setUseDirectReads - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setUseDirectReads(JNIEnv*, jobject, - jlong jhandle, - jboolean use_direct_reads) { - reinterpret_cast(jhandle)->use_direct_reads = - static_cast(use_direct_reads); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: useDirectIoForFlushAndCompaction - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_useDirectIoForFlushAndCompaction( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->use_direct_io_for_flush_and_compaction; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setUseDirectReads - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setUseDirectIoForFlushAndCompaction( - JNIEnv*, jobject, jlong jhandle, - jboolean use_direct_io_for_flush_and_compaction) { - reinterpret_cast(jhandle) - ->use_direct_io_for_flush_and_compaction = - static_cast(use_direct_io_for_flush_and_compaction); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setAllowFAllocate - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setAllowFAllocate(JNIEnv*, jobject, - jlong jhandle, - jboolean jallow_fallocate) { - reinterpret_cast(jhandle)->allow_fallocate = - static_cast(jallow_fallocate); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: allowFAllocate - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_allowFAllocate(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->allow_fallocate); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setAllowMmapReads - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setAllowMmapReads(JNIEnv*, jobject, - jlong jhandle, - jboolean allow_mmap_reads) { - reinterpret_cast(jhandle)->allow_mmap_reads = - static_cast(allow_mmap_reads); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: allowMmapReads - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_allowMmapReads(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->allow_mmap_reads; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setAllowMmapWrites - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setAllowMmapWrites(JNIEnv*, jobject, - jlong jhandle, - jboolean allow_mmap_writes) { - reinterpret_cast(jhandle)->allow_mmap_writes = - static_cast(allow_mmap_writes); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: allowMmapWrites - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_allowMmapWrites(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->allow_mmap_writes; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setIsFdCloseOnExec - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setIsFdCloseOnExec( - JNIEnv*, jobject, jlong jhandle, jboolean is_fd_close_on_exec) { - reinterpret_cast(jhandle) - ->is_fd_close_on_exec = static_cast(is_fd_close_on_exec); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: isFdCloseOnExec - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_isFdCloseOnExec(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->is_fd_close_on_exec; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setStatsDumpPeriodSec - * Signature: (JI)V - */ -void Java_org_rocksdb_DBOptions_setStatsDumpPeriodSec( - JNIEnv*, jobject, jlong jhandle, jint jstats_dump_period_sec) { - reinterpret_cast(jhandle) - ->stats_dump_period_sec = - static_cast(jstats_dump_period_sec); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: statsDumpPeriodSec - * Signature: (J)I - */ -jint Java_org_rocksdb_DBOptions_statsDumpPeriodSec(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->stats_dump_period_sec; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setStatsPersistPeriodSec - * Signature: (JI)V - */ -void Java_org_rocksdb_DBOptions_setStatsPersistPeriodSec( - JNIEnv*, jobject, jlong jhandle, jint jstats_persist_period_sec) { - reinterpret_cast(jhandle) - ->stats_persist_period_sec = - static_cast(jstats_persist_period_sec); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: statsPersistPeriodSec - * Signature: (J)I - */ -jint Java_org_rocksdb_DBOptions_statsPersistPeriodSec(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->stats_persist_period_sec; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setStatsHistoryBufferSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setStatsHistoryBufferSize( - JNIEnv*, jobject, jlong jhandle, jlong jstats_history_buffer_size) { - reinterpret_cast(jhandle) - ->stats_history_buffer_size = - static_cast(jstats_history_buffer_size); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: statsHistoryBufferSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_statsHistoryBufferSize(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->stats_history_buffer_size; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setAdviseRandomOnOpen - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setAdviseRandomOnOpen( - JNIEnv*, jobject, jlong jhandle, jboolean advise_random_on_open) { - reinterpret_cast(jhandle) - ->advise_random_on_open = static_cast(advise_random_on_open); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: adviseRandomOnOpen - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_adviseRandomOnOpen(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->advise_random_on_open; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setDbWriteBufferSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setDbWriteBufferSize( - JNIEnv*, jobject, jlong jhandle, jlong jdb_write_buffer_size) { - auto* opt = reinterpret_cast(jhandle); - opt->db_write_buffer_size = static_cast(jdb_write_buffer_size); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setWriteBufferManager - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setWriteBufferManager( - JNIEnv*, jobject, jlong jdb_options_handle, - jlong jwrite_buffer_manager_handle) { - auto* write_buffer_manager = - reinterpret_cast*>( - jwrite_buffer_manager_handle); - reinterpret_cast(jdb_options_handle) - ->write_buffer_manager = *write_buffer_manager; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: dbWriteBufferSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_dbWriteBufferSize(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->db_write_buffer_size); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setAccessHintOnCompactionStart - * Signature: (JB)V - */ -void Java_org_rocksdb_DBOptions_setAccessHintOnCompactionStart( - JNIEnv*, jobject, jlong jhandle, jbyte jaccess_hint_value) { - auto* opt = reinterpret_cast(jhandle); - opt->access_hint_on_compaction_start = - ROCKSDB_NAMESPACE::AccessHintJni::toCppAccessHint(jaccess_hint_value); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: accessHintOnCompactionStart - * Signature: (J)B - */ -jbyte Java_org_rocksdb_DBOptions_accessHintOnCompactionStart(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::AccessHintJni::toJavaAccessHint( - opt->access_hint_on_compaction_start); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setCompactionReadaheadSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setCompactionReadaheadSize( - JNIEnv*, jobject, jlong jhandle, jlong jcompaction_readahead_size) { - auto* opt = reinterpret_cast(jhandle); - opt->compaction_readahead_size = - static_cast(jcompaction_readahead_size); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: compactionReadaheadSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_compactionReadaheadSize(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->compaction_readahead_size); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setRandomAccessMaxBufferSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setRandomAccessMaxBufferSize( - JNIEnv*, jobject, jlong jhandle, jlong jrandom_access_max_buffer_size) { - auto* opt = reinterpret_cast(jhandle); - opt->random_access_max_buffer_size = - static_cast(jrandom_access_max_buffer_size); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: randomAccessMaxBufferSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_randomAccessMaxBufferSize(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->random_access_max_buffer_size); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setWritableFileMaxBufferSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setWritableFileMaxBufferSize( - JNIEnv*, jobject, jlong jhandle, jlong jwritable_file_max_buffer_size) { - auto* opt = reinterpret_cast(jhandle); - opt->writable_file_max_buffer_size = - static_cast(jwritable_file_max_buffer_size); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: writableFileMaxBufferSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_writableFileMaxBufferSize(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->writable_file_max_buffer_size); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setUseAdaptiveMutex - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setUseAdaptiveMutex( - JNIEnv*, jobject, jlong jhandle, jboolean use_adaptive_mutex) { - reinterpret_cast(jhandle)->use_adaptive_mutex = - static_cast(use_adaptive_mutex); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: useAdaptiveMutex - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_useAdaptiveMutex(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->use_adaptive_mutex; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setBytesPerSync - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setBytesPerSync(JNIEnv*, jobject, jlong jhandle, - jlong bytes_per_sync) { - reinterpret_cast(jhandle)->bytes_per_sync = - static_cast(bytes_per_sync); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: bytesPerSync - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_bytesPerSync(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->bytes_per_sync; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setWalBytesPerSync - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setWalBytesPerSync(JNIEnv*, jobject, - jlong jhandle, - jlong jwal_bytes_per_sync) { - reinterpret_cast(jhandle)->wal_bytes_per_sync = - static_cast(jwal_bytes_per_sync); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: walBytesPerSync - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_walBytesPerSync(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->wal_bytes_per_sync); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setStrictBytesPerSync - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setStrictBytesPerSync( - JNIEnv*, jobject, jlong jhandle, jboolean jstrict_bytes_per_sync) { - reinterpret_cast(jhandle) - ->strict_bytes_per_sync = jstrict_bytes_per_sync == JNI_TRUE; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: strictBytesPerSync - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_strictBytesPerSync(JNIEnv*, jobject, - jlong jhandle) { - return static_cast( - reinterpret_cast(jhandle) - ->strict_bytes_per_sync); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setEventListeners - * Signature: (J[J)V - */ -void Java_org_rocksdb_DBOptions_setEventListeners(JNIEnv* env, jclass, - jlong jhandle, - jlongArray jlistener_array) { - auto* opt = reinterpret_cast(jhandle); - rocksdb_set_event_listeners_helper(env, jlistener_array, opt->listeners); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: eventListeners - * Signature: (J)[Lorg/rocksdb/AbstractEventListener; - */ -jobjectArray Java_org_rocksdb_DBOptions_eventListeners(JNIEnv* env, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return rocksdb_get_event_listeners_helper(env, opt->listeners); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setDelayedWriteRate - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setDelayedWriteRate(JNIEnv*, jobject, - jlong jhandle, - jlong jdelayed_write_rate) { - auto* opt = reinterpret_cast(jhandle); - opt->delayed_write_rate = static_cast(jdelayed_write_rate); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: delayedWriteRate - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_delayedWriteRate(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->delayed_write_rate); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setEnablePipelinedWrite - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setEnablePipelinedWrite( - JNIEnv*, jobject, jlong jhandle, jboolean jenable_pipelined_write) { - auto* opt = reinterpret_cast(jhandle); - opt->enable_pipelined_write = jenable_pipelined_write == JNI_TRUE; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: enablePipelinedWrite - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_enablePipelinedWrite(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->enable_pipelined_write); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setUnorderedWrite - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setUnorderedWrite(JNIEnv*, jobject, - jlong jhandle, - jboolean junordered_write) { - auto* opt = reinterpret_cast(jhandle); - opt->unordered_write = junordered_write == JNI_TRUE; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: unorderedWrite - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_unorderedWrite(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->unordered_write); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setEnableThreadTracking - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setEnableThreadTracking( - JNIEnv*, jobject, jlong jhandle, jboolean jenable_thread_tracking) { - auto* opt = reinterpret_cast(jhandle); - opt->enable_thread_tracking = jenable_thread_tracking == JNI_TRUE; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: enableThreadTracking - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_enableThreadTracking(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->enable_thread_tracking); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setAllowConcurrentMemtableWrite - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setAllowConcurrentMemtableWrite( - JNIEnv*, jobject, jlong jhandle, jboolean allow) { - reinterpret_cast(jhandle) - ->allow_concurrent_memtable_write = static_cast(allow); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: allowConcurrentMemtableWrite - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_allowConcurrentMemtableWrite( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->allow_concurrent_memtable_write; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setEnableWriteThreadAdaptiveYield - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setEnableWriteThreadAdaptiveYield( - JNIEnv*, jobject, jlong jhandle, jboolean yield) { - reinterpret_cast(jhandle) - ->enable_write_thread_adaptive_yield = static_cast(yield); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: enableWriteThreadAdaptiveYield - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_enableWriteThreadAdaptiveYield( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->enable_write_thread_adaptive_yield; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setWriteThreadMaxYieldUsec - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setWriteThreadMaxYieldUsec(JNIEnv*, jobject, - jlong jhandle, - jlong max) { - reinterpret_cast(jhandle) - ->write_thread_max_yield_usec = static_cast(max); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: writeThreadMaxYieldUsec - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_writeThreadMaxYieldUsec(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->write_thread_max_yield_usec; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setWriteThreadSlowYieldUsec - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setWriteThreadSlowYieldUsec(JNIEnv*, jobject, - jlong jhandle, - jlong slow) { - reinterpret_cast(jhandle) - ->write_thread_slow_yield_usec = static_cast(slow); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: writeThreadSlowYieldUsec - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_writeThreadSlowYieldUsec(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->write_thread_slow_yield_usec; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setSkipStatsUpdateOnDbOpen - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setSkipStatsUpdateOnDbOpen( - JNIEnv*, jobject, jlong jhandle, jboolean jskip_stats_update_on_db_open) { - auto* opt = reinterpret_cast(jhandle); - opt->skip_stats_update_on_db_open = - static_cast(jskip_stats_update_on_db_open); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: skipStatsUpdateOnDbOpen - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_skipStatsUpdateOnDbOpen(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->skip_stats_update_on_db_open); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setSkipCheckingSstFileSizesOnDbOpen - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setSkipCheckingSstFileSizesOnDbOpen( - JNIEnv*, jclass, jlong jhandle, - jboolean jskip_checking_sst_file_sizes_on_db_open) { - auto* opt = reinterpret_cast(jhandle); - opt->skip_checking_sst_file_sizes_on_db_open = - static_cast(jskip_checking_sst_file_sizes_on_db_open); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: skipCheckingSstFileSizesOnDbOpen - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_skipCheckingSstFileSizesOnDbOpen( - JNIEnv*, jclass, jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->skip_checking_sst_file_sizes_on_db_open); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setWalRecoveryMode - * Signature: (JB)V - */ -void Java_org_rocksdb_DBOptions_setWalRecoveryMode( - JNIEnv*, jobject, jlong jhandle, jbyte jwal_recovery_mode_value) { - auto* opt = reinterpret_cast(jhandle); - opt->wal_recovery_mode = - ROCKSDB_NAMESPACE::WALRecoveryModeJni::toCppWALRecoveryMode( - jwal_recovery_mode_value); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: walRecoveryMode - * Signature: (J)B - */ -jbyte Java_org_rocksdb_DBOptions_walRecoveryMode(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::WALRecoveryModeJni::toJavaWALRecoveryMode( - opt->wal_recovery_mode); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setAllow2pc - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setAllow2pc(JNIEnv*, jobject, jlong jhandle, - jboolean jallow_2pc) { - auto* opt = reinterpret_cast(jhandle); - opt->allow_2pc = static_cast(jallow_2pc); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: allow2pc - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_allow2pc(JNIEnv*, jobject, jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->allow_2pc); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setRowCache - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setRowCache(JNIEnv*, jobject, jlong jhandle, - jlong jrow_cache_handle) { - auto* opt = reinterpret_cast(jhandle); - auto* row_cache = - reinterpret_cast*>( - jrow_cache_handle); - opt->row_cache = *row_cache; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setWalFilter - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setWalFilter(JNIEnv*, jobject, jlong jhandle, - jlong jwal_filter_handle) { - auto* opt = reinterpret_cast(jhandle); - auto* wal_filter = reinterpret_cast( - jwal_filter_handle); - opt->wal_filter = wal_filter; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setFailIfOptionsFileError - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setFailIfOptionsFileError( - JNIEnv*, jobject, jlong jhandle, jboolean jfail_if_options_file_error) { - auto* opt = reinterpret_cast(jhandle); - opt->fail_if_options_file_error = - static_cast(jfail_if_options_file_error); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: failIfOptionsFileError - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_failIfOptionsFileError(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->fail_if_options_file_error); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setDumpMallocStats - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setDumpMallocStats( - JNIEnv*, jobject, jlong jhandle, jboolean jdump_malloc_stats) { - auto* opt = reinterpret_cast(jhandle); - opt->dump_malloc_stats = static_cast(jdump_malloc_stats); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: dumpMallocStats - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_dumpMallocStats(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->dump_malloc_stats); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setAvoidFlushDuringRecovery - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setAvoidFlushDuringRecovery( - JNIEnv*, jobject, jlong jhandle, jboolean javoid_flush_during_recovery) { - auto* opt = reinterpret_cast(jhandle); - opt->avoid_flush_during_recovery = - static_cast(javoid_flush_during_recovery); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: avoidFlushDuringRecovery - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_avoidFlushDuringRecovery(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->avoid_flush_during_recovery); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setAllowIngestBehind - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setAllowIngestBehind( - JNIEnv*, jobject, jlong jhandle, jboolean jallow_ingest_behind) { - auto* opt = reinterpret_cast(jhandle); - opt->allow_ingest_behind = jallow_ingest_behind == JNI_TRUE; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: allowIngestBehind - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_allowIngestBehind(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->allow_ingest_behind); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setTwoWriteQueues - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setTwoWriteQueues(JNIEnv*, jobject, - jlong jhandle, - jboolean jtwo_write_queues) { - auto* opt = reinterpret_cast(jhandle); - opt->two_write_queues = jtwo_write_queues == JNI_TRUE; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: twoWriteQueues - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_twoWriteQueues(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->two_write_queues); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setManualWalFlush - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setManualWalFlush(JNIEnv*, jobject, - jlong jhandle, - jboolean jmanual_wal_flush) { - auto* opt = reinterpret_cast(jhandle); - opt->manual_wal_flush = jmanual_wal_flush == JNI_TRUE; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: manualWalFlush - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_manualWalFlush(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->manual_wal_flush); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setAtomicFlush - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setAtomicFlush(JNIEnv*, jobject, jlong jhandle, - jboolean jatomic_flush) { - auto* opt = reinterpret_cast(jhandle); - opt->atomic_flush = jatomic_flush == JNI_TRUE; -} - -/* - * Class: org_rocksdb_DBOptions - * Method: atomicFlush - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_atomicFlush(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->atomic_flush); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setAvoidFlushDuringShutdown - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setAvoidFlushDuringShutdown( - JNIEnv*, jobject, jlong jhandle, jboolean javoid_flush_during_shutdown) { - auto* opt = reinterpret_cast(jhandle); - opt->avoid_flush_during_shutdown = - static_cast(javoid_flush_during_shutdown); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: avoidFlushDuringShutdown - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_avoidFlushDuringShutdown(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->avoid_flush_during_shutdown); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setAvoidUnnecessaryBlockingIO - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setAvoidUnnecessaryBlockingIO( - JNIEnv*, jclass, jlong jhandle, jboolean avoid_blocking_io) { - auto* opt = reinterpret_cast(jhandle); - opt->avoid_unnecessary_blocking_io = static_cast(avoid_blocking_io); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: avoidUnnecessaryBlockingIO - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_avoidUnnecessaryBlockingIO(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->avoid_unnecessary_blocking_io); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setPersistStatsToDisk - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setPersistStatsToDisk( - JNIEnv*, jclass, jlong jhandle, jboolean persist_stats_to_disk) { - auto* opt = reinterpret_cast(jhandle); - opt->persist_stats_to_disk = static_cast(persist_stats_to_disk); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: persistStatsToDisk - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_persistStatsToDisk(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->persist_stats_to_disk); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setWriteDbidToManifest - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setWriteDbidToManifest( - JNIEnv*, jclass, jlong jhandle, jboolean jwrite_dbid_to_manifest) { - auto* opt = reinterpret_cast(jhandle); - opt->write_dbid_to_manifest = static_cast(jwrite_dbid_to_manifest); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: writeDbidToManifest - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_writeDbidToManifest(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->write_dbid_to_manifest); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setLogReadaheadSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setLogReadaheadSize(JNIEnv*, jclass, - jlong jhandle, - jlong jlog_readahead_size) { - auto* opt = reinterpret_cast(jhandle); - opt->log_readahead_size = static_cast(jlog_readahead_size); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: logReasaheadSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_logReadaheadSize(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->log_readahead_size); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setBestEffortsRecovery - * Signature: (JZ)V - */ -void Java_org_rocksdb_DBOptions_setBestEffortsRecovery( - JNIEnv*, jclass, jlong jhandle, jboolean jbest_efforts_recovery) { - auto* opt = reinterpret_cast(jhandle); - opt->best_efforts_recovery = static_cast(jbest_efforts_recovery); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: bestEffortsRecovery - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_DBOptions_bestEffortsRecovery(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->best_efforts_recovery); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setMaxBgErrorResumeCount - * Signature: (JI)V - */ -void Java_org_rocksdb_DBOptions_setMaxBgErrorResumeCount( - JNIEnv*, jclass, jlong jhandle, jint jmax_bgerror_resume_count) { - auto* opt = reinterpret_cast(jhandle); - opt->max_bgerror_resume_count = static_cast(jmax_bgerror_resume_count); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: maxBgerrorResumeCount - * Signature: (J)I - */ -jint Java_org_rocksdb_DBOptions_maxBgerrorResumeCount(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->max_bgerror_resume_count); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: setBgerrorResumeRetryInterval - * Signature: (JJ)V - */ -void Java_org_rocksdb_DBOptions_setBgerrorResumeRetryInterval( - JNIEnv*, jclass, jlong jhandle, jlong jbgerror_resume_retry_interval) { - auto* opt = reinterpret_cast(jhandle); - opt->bgerror_resume_retry_interval = - static_cast(jbgerror_resume_retry_interval); -} - -/* - * Class: org_rocksdb_DBOptions - * Method: bgerrorResumeRetryInterval - * Signature: (J)J - */ -jlong Java_org_rocksdb_DBOptions_bgerrorResumeRetryInterval(JNIEnv*, jclass, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->bgerror_resume_retry_interval); -} - -////////////////////////////////////////////////////////////////////////////// -// ROCKSDB_NAMESPACE::WriteOptions - -/* - * Class: org_rocksdb_WriteOptions - * Method: newWriteOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_WriteOptions_newWriteOptions(JNIEnv*, jclass) { - auto* op = new ROCKSDB_NAMESPACE::WriteOptions(); - return GET_CPLUSPLUS_POINTER(op); -} - -/* - * Class: org_rocksdb_WriteOptions - * Method: copyWriteOptions - * Signature: (J)J - */ -jlong Java_org_rocksdb_WriteOptions_copyWriteOptions(JNIEnv*, jclass, - jlong jhandle) { - auto new_opt = new ROCKSDB_NAMESPACE::WriteOptions( - *(reinterpret_cast(jhandle))); - return GET_CPLUSPLUS_POINTER(new_opt); -} - -/* - * Class: org_rocksdb_WriteOptions - * Method: disposeInternal - * Signature: ()V - */ -void Java_org_rocksdb_WriteOptions_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - auto* write_options = - reinterpret_cast(jhandle); - assert(write_options != nullptr); - delete write_options; -} - -/* - * Class: org_rocksdb_WriteOptions - * Method: setSync - * Signature: (JZ)V - */ -void Java_org_rocksdb_WriteOptions_setSync(JNIEnv*, jobject, jlong jhandle, - jboolean jflag) { - reinterpret_cast(jhandle)->sync = jflag; -} - -/* - * Class: org_rocksdb_WriteOptions - * Method: sync - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_WriteOptions_sync(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle)->sync; -} - -/* - * Class: org_rocksdb_WriteOptions - * Method: setDisableWAL - * Signature: (JZ)V - */ -void Java_org_rocksdb_WriteOptions_setDisableWAL(JNIEnv*, jobject, - jlong jhandle, - jboolean jflag) { - reinterpret_cast(jhandle)->disableWAL = - jflag; -} - -/* - * Class: org_rocksdb_WriteOptions - * Method: disableWAL - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_WriteOptions_disableWAL(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->disableWAL; -} - -/* - * Class: org_rocksdb_WriteOptions - * Method: setIgnoreMissingColumnFamilies - * Signature: (JZ)V - */ -void Java_org_rocksdb_WriteOptions_setIgnoreMissingColumnFamilies( - JNIEnv*, jobject, jlong jhandle, jboolean jignore_missing_column_families) { - reinterpret_cast(jhandle) - ->ignore_missing_column_families = - static_cast(jignore_missing_column_families); -} - -/* - * Class: org_rocksdb_WriteOptions - * Method: ignoreMissingColumnFamilies - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_WriteOptions_ignoreMissingColumnFamilies( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->ignore_missing_column_families; -} - -/* - * Class: org_rocksdb_WriteOptions - * Method: setNoSlowdown - * Signature: (JZ)V - */ -void Java_org_rocksdb_WriteOptions_setNoSlowdown(JNIEnv*, jobject, - jlong jhandle, - jboolean jno_slowdown) { - reinterpret_cast(jhandle)->no_slowdown = - static_cast(jno_slowdown); -} - -/* - * Class: org_rocksdb_WriteOptions - * Method: noSlowdown - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_WriteOptions_noSlowdown(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->no_slowdown; -} - -/* - * Class: org_rocksdb_WriteOptions - * Method: setLowPri - * Signature: (JZ)V - */ -void Java_org_rocksdb_WriteOptions_setLowPri(JNIEnv*, jobject, jlong jhandle, - jboolean jlow_pri) { - reinterpret_cast(jhandle)->low_pri = - static_cast(jlow_pri); -} - -/* - * Class: org_rocksdb_WriteOptions - * Method: lowPri - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_WriteOptions_lowPri(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle)->low_pri; -} - -/* - * Class: org_rocksdb_WriteOptions - * Method: memtableInsertHintPerBatch - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_WriteOptions_memtableInsertHintPerBatch( - JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle) - ->memtable_insert_hint_per_batch; -} - -/* - * Class: org_rocksdb_WriteOptions - * Method: setMemtableInsertHintPerBatch - * Signature: (JZ)V - */ -void Java_org_rocksdb_WriteOptions_setMemtableInsertHintPerBatch( - JNIEnv*, jobject, jlong jhandle, jboolean jmemtable_insert_hint_per_batch) { - reinterpret_cast(jhandle) - ->memtable_insert_hint_per_batch = - static_cast(jmemtable_insert_hint_per_batch); -} - -///////////////////////////////////////////////////////////////////// -// ROCKSDB_NAMESPACE::ReadOptions - -/* - * Class: org_rocksdb_ReadOptions - * Method: newReadOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_ReadOptions_newReadOptions__(JNIEnv*, jclass) { - auto* read_options = new ROCKSDB_NAMESPACE::ReadOptions(); - return GET_CPLUSPLUS_POINTER(read_options); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: newReadOptions - * Signature: (ZZ)J - */ -jlong Java_org_rocksdb_ReadOptions_newReadOptions__ZZ( - JNIEnv*, jclass, jboolean jverify_checksums, jboolean jfill_cache) { - auto* read_options = new ROCKSDB_NAMESPACE::ReadOptions( - static_cast(jverify_checksums), static_cast(jfill_cache)); - return GET_CPLUSPLUS_POINTER(read_options); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: copyReadOptions - * Signature: (J)J - */ -jlong Java_org_rocksdb_ReadOptions_copyReadOptions(JNIEnv*, jclass, - jlong jhandle) { - auto new_opt = new ROCKSDB_NAMESPACE::ReadOptions( - *(reinterpret_cast(jhandle))); - return GET_CPLUSPLUS_POINTER(new_opt); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_ReadOptions_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - auto* read_options = - reinterpret_cast(jhandle); - assert(read_options != nullptr); - delete read_options; -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setVerifyChecksums - * Signature: (JZ)V - */ -void Java_org_rocksdb_ReadOptions_setVerifyChecksums( - JNIEnv*, jobject, jlong jhandle, jboolean jverify_checksums) { - reinterpret_cast(jhandle)->verify_checksums = - static_cast(jverify_checksums); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: verifyChecksums - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ReadOptions_verifyChecksums(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->verify_checksums; -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setFillCache - * Signature: (JZ)V - */ -void Java_org_rocksdb_ReadOptions_setFillCache(JNIEnv*, jobject, jlong jhandle, - jboolean jfill_cache) { - reinterpret_cast(jhandle)->fill_cache = - static_cast(jfill_cache); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: fillCache - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ReadOptions_fillCache(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle)->fill_cache; -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setTailing - * Signature: (JZ)V - */ -void Java_org_rocksdb_ReadOptions_setTailing(JNIEnv*, jobject, jlong jhandle, - jboolean jtailing) { - reinterpret_cast(jhandle)->tailing = - static_cast(jtailing); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: tailing - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ReadOptions_tailing(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle)->tailing; -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: managed - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ReadOptions_managed(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle)->managed; -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setManaged - * Signature: (JZ)V - */ -void Java_org_rocksdb_ReadOptions_setManaged(JNIEnv*, jobject, jlong jhandle, - jboolean jmanaged) { - reinterpret_cast(jhandle)->managed = - static_cast(jmanaged); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: totalOrderSeek - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ReadOptions_totalOrderSeek(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->total_order_seek; -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setTotalOrderSeek - * Signature: (JZ)V - */ -void Java_org_rocksdb_ReadOptions_setTotalOrderSeek( - JNIEnv*, jobject, jlong jhandle, jboolean jtotal_order_seek) { - reinterpret_cast(jhandle)->total_order_seek = - static_cast(jtotal_order_seek); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: prefixSameAsStart - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ReadOptions_prefixSameAsStart(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle) - ->prefix_same_as_start; -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setPrefixSameAsStart - * Signature: (JZ)V - */ -void Java_org_rocksdb_ReadOptions_setPrefixSameAsStart( - JNIEnv*, jobject, jlong jhandle, jboolean jprefix_same_as_start) { - reinterpret_cast(jhandle) - ->prefix_same_as_start = static_cast(jprefix_same_as_start); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: pinData - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ReadOptions_pinData(JNIEnv*, jobject, jlong jhandle) { - return reinterpret_cast(jhandle)->pin_data; -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setPinData - * Signature: (JZ)V - */ -void Java_org_rocksdb_ReadOptions_setPinData(JNIEnv*, jobject, jlong jhandle, - jboolean jpin_data) { - reinterpret_cast(jhandle)->pin_data = - static_cast(jpin_data); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: backgroundPurgeOnIteratorCleanup - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ReadOptions_backgroundPurgeOnIteratorCleanup( - JNIEnv*, jobject, jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->background_purge_on_iterator_cleanup); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setBackgroundPurgeOnIteratorCleanup - * Signature: (JZ)V - */ -void Java_org_rocksdb_ReadOptions_setBackgroundPurgeOnIteratorCleanup( - JNIEnv*, jobject, jlong jhandle, - jboolean jbackground_purge_on_iterator_cleanup) { - auto* opt = reinterpret_cast(jhandle); - opt->background_purge_on_iterator_cleanup = - static_cast(jbackground_purge_on_iterator_cleanup); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: readaheadSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_ReadOptions_readaheadSize(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->readahead_size); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setReadaheadSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_ReadOptions_setReadaheadSize(JNIEnv*, jobject, - jlong jhandle, - jlong jreadahead_size) { - auto* opt = reinterpret_cast(jhandle); - opt->readahead_size = static_cast(jreadahead_size); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: maxSkippableInternalKeys - * Signature: (J)J - */ -jlong Java_org_rocksdb_ReadOptions_maxSkippableInternalKeys(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->max_skippable_internal_keys); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setMaxSkippableInternalKeys - * Signature: (JJ)V - */ -void Java_org_rocksdb_ReadOptions_setMaxSkippableInternalKeys( - JNIEnv*, jobject, jlong jhandle, jlong jmax_skippable_internal_keys) { - auto* opt = reinterpret_cast(jhandle); - opt->max_skippable_internal_keys = - static_cast(jmax_skippable_internal_keys); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: ignoreRangeDeletions - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ReadOptions_ignoreRangeDeletions(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->ignore_range_deletions); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setIgnoreRangeDeletions - * Signature: (JZ)V - */ -void Java_org_rocksdb_ReadOptions_setIgnoreRangeDeletions( - JNIEnv*, jobject, jlong jhandle, jboolean jignore_range_deletions) { - auto* opt = reinterpret_cast(jhandle); - opt->ignore_range_deletions = static_cast(jignore_range_deletions); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setSnapshot - * Signature: (JJ)V - */ -void Java_org_rocksdb_ReadOptions_setSnapshot(JNIEnv*, jobject, jlong jhandle, - jlong jsnapshot) { - reinterpret_cast(jhandle)->snapshot = - reinterpret_cast(jsnapshot); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: snapshot - * Signature: (J)J - */ -jlong Java_org_rocksdb_ReadOptions_snapshot(JNIEnv*, jobject, jlong jhandle) { - auto& snapshot = - reinterpret_cast(jhandle)->snapshot; - return GET_CPLUSPLUS_POINTER(snapshot); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: readTier - * Signature: (J)B - */ -jbyte Java_org_rocksdb_ReadOptions_readTier(JNIEnv*, jobject, jlong jhandle) { - return static_cast( - reinterpret_cast(jhandle)->read_tier); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setReadTier - * Signature: (JB)V - */ -void Java_org_rocksdb_ReadOptions_setReadTier(JNIEnv*, jobject, jlong jhandle, - jbyte jread_tier) { - reinterpret_cast(jhandle)->read_tier = - static_cast(jread_tier); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setIterateUpperBound - * Signature: (JJ)I - */ -void Java_org_rocksdb_ReadOptions_setIterateUpperBound( - JNIEnv*, jobject, jlong jhandle, jlong jupper_bound_slice_handle) { - reinterpret_cast(jhandle) - ->iterate_upper_bound = - reinterpret_cast(jupper_bound_slice_handle); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: iterateUpperBound - * Signature: (J)J - */ -jlong Java_org_rocksdb_ReadOptions_iterateUpperBound(JNIEnv*, jobject, - jlong jhandle) { - auto& upper_bound_slice_handle = - reinterpret_cast(jhandle) - ->iterate_upper_bound; - return GET_CPLUSPLUS_POINTER(upper_bound_slice_handle); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setIterateLowerBound - * Signature: (JJ)I - */ -void Java_org_rocksdb_ReadOptions_setIterateLowerBound( - JNIEnv*, jobject, jlong jhandle, jlong jlower_bound_slice_handle) { - reinterpret_cast(jhandle) - ->iterate_lower_bound = - reinterpret_cast(jlower_bound_slice_handle); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: iterateLowerBound - * Signature: (J)J - */ -jlong Java_org_rocksdb_ReadOptions_iterateLowerBound(JNIEnv*, jobject, - jlong jhandle) { - auto& lower_bound_slice_handle = - reinterpret_cast(jhandle) - ->iterate_lower_bound; - return GET_CPLUSPLUS_POINTER(lower_bound_slice_handle); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setTableFilter - * Signature: (JJ)V - */ -void Java_org_rocksdb_ReadOptions_setTableFilter( - JNIEnv*, jobject, jlong jhandle, jlong jjni_table_filter_handle) { - auto* opt = reinterpret_cast(jhandle); - auto* jni_table_filter = - reinterpret_cast( - jjni_table_filter_handle); - opt->table_filter = jni_table_filter->GetTableFilterFunction(); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: autoPrefixMode - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ReadOptions_autoPrefixMode(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->auto_prefix_mode); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setAutoPrefixMode - * Signature: (JZ)V - */ -void Java_org_rocksdb_ReadOptions_setAutoPrefixMode( - JNIEnv*, jobject, jlong jhandle, jboolean jauto_prefix_mode) { - auto* opt = reinterpret_cast(jhandle); - opt->auto_prefix_mode = static_cast(jauto_prefix_mode); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: timestamp - * Signature: (J)J - */ -jlong Java_org_rocksdb_ReadOptions_timestamp(JNIEnv*, jobject, jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - auto& timestamp_slice_handle = opt->timestamp; - return reinterpret_cast(timestamp_slice_handle); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setTimestamp - * Signature: (JJ)V - */ -void Java_org_rocksdb_ReadOptions_setTimestamp(JNIEnv*, jobject, jlong jhandle, - jlong jtimestamp_slice_handle) { - auto* opt = reinterpret_cast(jhandle); - opt->timestamp = - reinterpret_cast(jtimestamp_slice_handle); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: iterStartTs - * Signature: (J)J - */ -jlong Java_org_rocksdb_ReadOptions_iterStartTs(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - auto& iter_start_ts_handle = opt->iter_start_ts; - return reinterpret_cast(iter_start_ts_handle); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setIterStartTs - * Signature: (JJ)V - */ -void Java_org_rocksdb_ReadOptions_setIterStartTs(JNIEnv*, jobject, - jlong jhandle, - jlong jiter_start_ts_handle) { - auto* opt = reinterpret_cast(jhandle); - opt->iter_start_ts = - reinterpret_cast(jiter_start_ts_handle); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: deadline - * Signature: (J)J - */ -jlong Java_org_rocksdb_ReadOptions_deadline(JNIEnv*, jobject, jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->deadline.count()); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setDeadline - * Signature: (JJ)V - */ -void Java_org_rocksdb_ReadOptions_setDeadline(JNIEnv*, jobject, jlong jhandle, - jlong jdeadline) { - auto* opt = reinterpret_cast(jhandle); - opt->deadline = std::chrono::microseconds(static_cast(jdeadline)); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: ioTimeout - * Signature: (J)J - */ -jlong Java_org_rocksdb_ReadOptions_ioTimeout(JNIEnv*, jobject, jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->io_timeout.count()); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setIoTimeout - * Signature: (JJ)V - */ -void Java_org_rocksdb_ReadOptions_setIoTimeout(JNIEnv*, jobject, jlong jhandle, - jlong jio_timeout) { - auto* opt = reinterpret_cast(jhandle); - opt->io_timeout = - std::chrono::microseconds(static_cast(jio_timeout)); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: valueSizeSofLimit - * Signature: (J)J - */ -jlong Java_org_rocksdb_ReadOptions_valueSizeSoftLimit(JNIEnv*, jobject, - jlong jhandle) { - auto* opt = reinterpret_cast(jhandle); - return static_cast(opt->value_size_soft_limit); -} - -/* - * Class: org_rocksdb_ReadOptions - * Method: setValueSizeSofLimit - * Signature: (JJ)V - */ -void Java_org_rocksdb_ReadOptions_setValueSizeSoftLimit( - JNIEnv*, jobject, jlong jhandle, jlong jvalue_size_soft_limit) { - auto* opt = reinterpret_cast(jhandle); - opt->value_size_soft_limit = static_cast(jvalue_size_soft_limit); -} - -///////////////////////////////////////////////////////////////////// -// ROCKSDB_NAMESPACE::ComparatorOptions - -/* - * Class: org_rocksdb_ComparatorOptions - * Method: newComparatorOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_ComparatorOptions_newComparatorOptions(JNIEnv*, jclass) { - auto* comparator_opt = new ROCKSDB_NAMESPACE::ComparatorJniCallbackOptions(); - return GET_CPLUSPLUS_POINTER(comparator_opt); -} - -/* - * Class: org_rocksdb_ComparatorOptions - * Method: reusedSynchronisationType - * Signature: (J)B - */ -jbyte Java_org_rocksdb_ComparatorOptions_reusedSynchronisationType( - JNIEnv*, jobject, jlong jhandle) { - auto* comparator_opt = - reinterpret_cast( - jhandle); - return ROCKSDB_NAMESPACE::ReusedSynchronisationTypeJni:: - toJavaReusedSynchronisationType( - comparator_opt->reused_synchronisation_type); -} - -/* - * Class: org_rocksdb_ComparatorOptions - * Method: setReusedSynchronisationType - * Signature: (JB)V - */ -void Java_org_rocksdb_ComparatorOptions_setReusedSynchronisationType( - JNIEnv*, jobject, jlong jhandle, jbyte jreused_synhcronisation_type) { - auto* comparator_opt = - reinterpret_cast( - jhandle); - comparator_opt->reused_synchronisation_type = - ROCKSDB_NAMESPACE::ReusedSynchronisationTypeJni:: - toCppReusedSynchronisationType(jreused_synhcronisation_type); -} - -/* - * Class: org_rocksdb_ComparatorOptions - * Method: useDirectBuffer - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_ComparatorOptions_useDirectBuffer(JNIEnv*, jobject, - jlong jhandle) { - return static_cast( - reinterpret_cast( - jhandle) - ->direct_buffer); -} - -/* - * Class: org_rocksdb_ComparatorOptions - * Method: setUseDirectBuffer - * Signature: (JZ)V - */ -void Java_org_rocksdb_ComparatorOptions_setUseDirectBuffer( - JNIEnv*, jobject, jlong jhandle, jboolean jdirect_buffer) { - reinterpret_cast(jhandle) - ->direct_buffer = jdirect_buffer == JNI_TRUE; -} - -/* - * Class: org_rocksdb_ComparatorOptions - * Method: maxReusedBufferSize - * Signature: (J)I - */ -jint Java_org_rocksdb_ComparatorOptions_maxReusedBufferSize(JNIEnv*, jobject, - jlong jhandle) { - return static_cast( - reinterpret_cast( - jhandle) - ->max_reused_buffer_size); -} - -/* - * Class: org_rocksdb_ComparatorOptions - * Method: setMaxReusedBufferSize - * Signature: (JI)V - */ -void Java_org_rocksdb_ComparatorOptions_setMaxReusedBufferSize( - JNIEnv*, jobject, jlong jhandle, jint jmax_reused_buffer_size) { - reinterpret_cast(jhandle) - ->max_reused_buffer_size = static_cast(jmax_reused_buffer_size); -} - -/* - * Class: org_rocksdb_ComparatorOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_ComparatorOptions_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - auto* comparator_opt = - reinterpret_cast( - jhandle); - assert(comparator_opt != nullptr); - delete comparator_opt; -} - -///////////////////////////////////////////////////////////////////// -// ROCKSDB_NAMESPACE::FlushOptions - -/* - * Class: org_rocksdb_FlushOptions - * Method: newFlushOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_FlushOptions_newFlushOptions(JNIEnv*, jclass) { - auto* flush_opt = new ROCKSDB_NAMESPACE::FlushOptions(); - return GET_CPLUSPLUS_POINTER(flush_opt); -} - -/* - * Class: org_rocksdb_FlushOptions - * Method: setWaitForFlush - * Signature: (JZ)V - */ -void Java_org_rocksdb_FlushOptions_setWaitForFlush(JNIEnv*, jobject, - jlong jhandle, - jboolean jwait) { - reinterpret_cast(jhandle)->wait = - static_cast(jwait); -} - -/* - * Class: org_rocksdb_FlushOptions - * Method: waitForFlush - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_FlushOptions_waitForFlush(JNIEnv*, jobject, - jlong jhandle) { - return reinterpret_cast(jhandle)->wait; -} - -/* - * Class: org_rocksdb_FlushOptions - * Method: setAllowWriteStall - * Signature: (JZ)V - */ -void Java_org_rocksdb_FlushOptions_setAllowWriteStall( - JNIEnv*, jobject, jlong jhandle, jboolean jallow_write_stall) { - auto* flush_options = - reinterpret_cast(jhandle); - flush_options->allow_write_stall = jallow_write_stall == JNI_TRUE; -} - -/* - * Class: org_rocksdb_FlushOptions - * Method: allowWriteStall - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_FlushOptions_allowWriteStall(JNIEnv*, jobject, - jlong jhandle) { - auto* flush_options = - reinterpret_cast(jhandle); - return static_cast(flush_options->allow_write_stall); -} - -/* - * Class: org_rocksdb_FlushOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_FlushOptions_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - auto* flush_opt = reinterpret_cast(jhandle); - assert(flush_opt != nullptr); - delete flush_opt; -} diff --git a/java/rocksjni/options_util.cc b/java/rocksjni/options_util.cc deleted file mode 100644 index c3d7fcef6..000000000 --- a/java/rocksjni/options_util.cc +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling C++ ROCKSDB_NAMESPACE::OptionsUtil methods from Java side. - -#include "rocksdb/utilities/options_util.h" - -#include - -#include - -#include "include/org_rocksdb_OptionsUtil.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksjni/portal.h" - -void build_column_family_descriptor_list( - JNIEnv* env, jobject jcfds, - std::vector& cf_descs) { - jmethodID add_mid = ROCKSDB_NAMESPACE::ListJni::getListAddMethodId(env); - if (add_mid == nullptr) { - // exception occurred accessing method - return; - } - - // Column family descriptor - for (ROCKSDB_NAMESPACE::ColumnFamilyDescriptor& cfd : cf_descs) { - // Construct a ColumnFamilyDescriptor java object - jobject jcfd = - ROCKSDB_NAMESPACE::ColumnFamilyDescriptorJni::construct(env, &cfd); - if (env->ExceptionCheck()) { - // exception occurred constructing object - if (jcfd != nullptr) { - env->DeleteLocalRef(jcfd); - } - return; - } - - // Add the object to java list. - jboolean rs = env->CallBooleanMethod(jcfds, add_mid, jcfd); - if (env->ExceptionCheck() || rs == JNI_FALSE) { - // exception occurred calling method, or could not add - if (jcfd != nullptr) { - env->DeleteLocalRef(jcfd); - } - return; - } - } -} - -/* - * Class: org_rocksdb_OptionsUtil - * Method: loadLatestOptions - * Signature: (JLjava/lang/String;JLjava/util/List;)V - */ -void Java_org_rocksdb_OptionsUtil_loadLatestOptions( - JNIEnv* env, jclass /*jcls*/, jlong cfg_handle, jstring jdbpath, - jlong jdb_opts_handle, jobject jcfds) { - jboolean has_exception = JNI_FALSE; - auto db_path = - ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, jdbpath, &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - return; - } - std::vector cf_descs; - auto* config_options = - reinterpret_cast(cfg_handle); - auto* db_options = - reinterpret_cast(jdb_opts_handle); - ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::LoadLatestOptions( - *config_options, db_path, db_options, &cf_descs); - if (!s.ok()) { - // error, raise an exception - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } else { - build_column_family_descriptor_list(env, jcfds, cf_descs); - } -} - -/* - * Class: org_rocksdb_OptionsUtil - * Method: loadOptionsFromFile - * Signature: (JLjava/lang/String;JLjava/util/List;)V - */ -void Java_org_rocksdb_OptionsUtil_loadOptionsFromFile( - JNIEnv* env, jclass /*jcls*/, jlong cfg_handle, jstring jopts_file_name, - jlong jdb_opts_handle, jobject jcfds) { - jboolean has_exception = JNI_FALSE; - auto opts_file_name = ROCKSDB_NAMESPACE::JniUtil::copyStdString( - env, jopts_file_name, &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - return; - } - std::vector cf_descs; - auto* config_options = - reinterpret_cast(cfg_handle); - auto* db_options = - reinterpret_cast(jdb_opts_handle); - ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::LoadOptionsFromFile( - *config_options, opts_file_name, db_options, &cf_descs); - if (!s.ok()) { - // error, raise an exception - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } else { - build_column_family_descriptor_list(env, jcfds, cf_descs); - } -} - -/* - * Class: org_rocksdb_OptionsUtil - * Method: getLatestOptionsFileName - * Signature: (Ljava/lang/String;J)Ljava/lang/String; - */ -jstring Java_org_rocksdb_OptionsUtil_getLatestOptionsFileName( - JNIEnv* env, jclass /*jcls*/, jstring jdbpath, jlong jenv_handle) { - jboolean has_exception = JNI_FALSE; - auto db_path = - ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, jdbpath, &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - return nullptr; - } - std::string options_file_name; - ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::GetLatestOptionsFileName( - db_path, reinterpret_cast(jenv_handle), - &options_file_name); - if (!s.ok()) { - // error, raise an exception - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } else { - return env->NewStringUTF(options_file_name.c_str()); - } -} diff --git a/java/rocksjni/persistent_cache.cc b/java/rocksjni/persistent_cache.cc deleted file mode 100644 index 295d91798..000000000 --- a/java/rocksjni/persistent_cache.cc +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::PersistentCache. - -#include "rocksdb/persistent_cache.h" - -#include - -#include - -#include "include/org_rocksdb_PersistentCache.h" -#include "loggerjnicallback.h" -#include "portal.h" -#include "rocksjni/cplusplus_to_java_convert.h" - -/* - * Class: org_rocksdb_PersistentCache - * Method: newPersistentCache - * Signature: (JLjava/lang/String;JJZ)J - */ -jlong Java_org_rocksdb_PersistentCache_newPersistentCache( - JNIEnv* env, jclass, jlong jenv_handle, jstring jpath, jlong jsz, - jlong jlogger_handle, jboolean joptimized_for_nvm) { - auto* rocks_env = reinterpret_cast(jenv_handle); - jboolean has_exception = JNI_FALSE; - std::string path = - ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, jpath, &has_exception); - if (has_exception == JNI_TRUE) { - return 0; - } - auto* logger = - reinterpret_cast*>( - jlogger_handle); - auto* cache = - new std::shared_ptr(nullptr); - ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::NewPersistentCache( - rocks_env, path, static_cast(jsz), *logger, - static_cast(joptimized_for_nvm), cache); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } - return GET_CPLUSPLUS_POINTER(cache); -} - -/* - * Class: org_rocksdb_PersistentCache - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_PersistentCache_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - auto* cache = - reinterpret_cast*>( - jhandle); - delete cache; // delete std::shared_ptr -} diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h deleted file mode 100644 index ee87f8947..000000000 --- a/java/rocksjni/portal.h +++ /dev/null @@ -1,8686 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -// This file is designed for caching those frequently used IDs and provide -// efficient portal (i.e, a set of static functions) to access java code -// from c++. - -#ifndef JAVA_ROCKSJNI_PORTAL_H_ -#define JAVA_ROCKSJNI_PORTAL_H_ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "rocksdb/convenience.h" -#include "rocksdb/db.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/rate_limiter.h" -#include "rocksdb/status.h" -#include "rocksdb/table.h" -#include "rocksdb/utilities/backup_engine.h" -#include "rocksdb/utilities/memory_util.h" -#include "rocksdb/utilities/transaction_db.h" -#include "rocksdb/utilities/write_batch_with_index.h" -#include "rocksjni/compaction_filter_factory_jnicallback.h" -#include "rocksjni/comparatorjnicallback.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/event_listener_jnicallback.h" -#include "rocksjni/loggerjnicallback.h" -#include "rocksjni/table_filter_jnicallback.h" -#include "rocksjni/trace_writer_jnicallback.h" -#include "rocksjni/transaction_notifier_jnicallback.h" -#include "rocksjni/wal_filter_jnicallback.h" -#include "rocksjni/writebatchhandlerjnicallback.h" - -// Remove macro on windows -#ifdef DELETE -#undef DELETE -#endif - -namespace ROCKSDB_NAMESPACE { - -class JavaClass { - public: - /** - * Gets and initializes a Java Class - * - * @param env A pointer to the Java environment - * @param jclazz_name The fully qualified JNI name of the Java Class - * e.g. "java/lang/String" - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env, const char* jclazz_name) { - jclass jclazz = env->FindClass(jclazz_name); - assert(jclazz != nullptr); - return jclazz; - } -}; - -// Native class template -template -class RocksDBNativeClass : public JavaClass {}; - -// Native class template for sub-classes of RocksMutableObject -template -class NativeRocksMutableObject : public RocksDBNativeClass { - public: - /** - * Gets the Java Method ID for the - * RocksMutableObject#setNativeHandle(long, boolean) method - * - * @param env A pointer to the Java environment - * @return The Java Method ID or nullptr the RocksMutableObject class cannot - * be accessed, or if one of the NoSuchMethodError, - * ExceptionInInitializerError or OutOfMemoryError exceptions is thrown - */ - static jmethodID getSetNativeHandleMethod(JNIEnv* env) { - static jclass jclazz = DERIVED::getJClass(env); - if (jclazz == nullptr) { - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "setNativeHandle", "(JZ)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Sets the C++ object pointer handle in the Java object - * - * @param env A pointer to the Java environment - * @param jobj The Java object on which to set the pointer handle - * @param ptr The C++ object pointer - * @param java_owns_handle JNI_TRUE if ownership of the C++ object is - * managed by the Java object - * - * @return true if a Java exception is pending, false otherwise - */ - static bool setHandle(JNIEnv* env, jobject jobj, PTR ptr, - jboolean java_owns_handle) { - assert(jobj != nullptr); - static jmethodID mid = getSetNativeHandleMethod(env); - if (mid == nullptr) { - return true; // signal exception - } - - env->CallVoidMethod(jobj, mid, GET_CPLUSPLUS_POINTER(ptr), - java_owns_handle); - if (env->ExceptionCheck()) { - return true; // signal exception - } - - return false; - } -}; - -// Java Exception template -template -class JavaException : public JavaClass { - public: - /** - * Create and throw a java exception with the provided message - * - * @param env A pointer to the Java environment - * @param msg The message for the exception - * - * @return true if an exception was thrown, false otherwise - */ - static bool ThrowNew(JNIEnv* env, const std::string& msg) { - jclass jclazz = DERIVED::getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - std::cerr << "JavaException::ThrowNew - Error: unexpected exception!" - << std::endl; - return env->ExceptionCheck(); - } - - const jint rs = env->ThrowNew(jclazz, msg.c_str()); - if (rs != JNI_OK) { - // exception could not be thrown - std::cerr << "JavaException::ThrowNew - Fatal: could not throw exception!" - << std::endl; - return env->ExceptionCheck(); - } - - return true; - } -}; - -// The portal class for java.lang.IllegalArgumentException -class IllegalArgumentExceptionJni - : public JavaException { - public: - /** - * Get the Java Class java.lang.IllegalArgumentException - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaException::getJClass(env, "java/lang/IllegalArgumentException"); - } - - /** - * Create and throw a Java IllegalArgumentException with the provided status - * - * If s.ok() == true, then this function will not throw any exception. - * - * @param env A pointer to the Java environment - * @param s The status for the exception - * - * @return true if an exception was thrown, false otherwise - */ - static bool ThrowNew(JNIEnv* env, const Status& s) { - assert(!s.ok()); - if (s.ok()) { - return false; - } - - // get the IllegalArgumentException class - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - std::cerr << "IllegalArgumentExceptionJni::ThrowNew/class - Error: " - "unexpected exception!" - << std::endl; - return env->ExceptionCheck(); - } - - return JavaException::ThrowNew(env, s.ToString()); - } -}; - -// The portal class for org.rocksdb.Status.Code -class CodeJni : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.Status.Code - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/Status$Code"); - } - - /** - * Get the Java Method: Status.Code#getValue - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getValueMethod(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "getValue", "()b"); - assert(mid != nullptr); - return mid; - } -}; - -// The portal class for org.rocksdb.Status.SubCode -class SubCodeJni : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.Status.SubCode - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/Status$SubCode"); - } - - /** - * Get the Java Method: Status.SubCode#getValue - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getValueMethod(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "getValue", "()b"); - assert(mid != nullptr); - return mid; - } - - static ROCKSDB_NAMESPACE::Status::SubCode toCppSubCode( - const jbyte jsub_code) { - switch (jsub_code) { - case 0x0: - return ROCKSDB_NAMESPACE::Status::SubCode::kNone; - case 0x1: - return ROCKSDB_NAMESPACE::Status::SubCode::kMutexTimeout; - case 0x2: - return ROCKSDB_NAMESPACE::Status::SubCode::kLockTimeout; - case 0x3: - return ROCKSDB_NAMESPACE::Status::SubCode::kLockLimit; - case 0x4: - return ROCKSDB_NAMESPACE::Status::SubCode::kNoSpace; - case 0x5: - return ROCKSDB_NAMESPACE::Status::SubCode::kDeadlock; - case 0x6: - return ROCKSDB_NAMESPACE::Status::SubCode::kStaleFile; - case 0x7: - return ROCKSDB_NAMESPACE::Status::SubCode::kMemoryLimit; - - case 0x7F: - default: - return ROCKSDB_NAMESPACE::Status::SubCode::kNone; - } - } -}; - -// The portal class for org.rocksdb.Status -class StatusJni - : public RocksDBNativeClass { - public: - /** - * Get the Java Class org.rocksdb.Status - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/Status"); - } - - /** - * Get the Java Method: Status#getCode - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getCodeMethod(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetMethodID(jclazz, "getCode", "()Lorg/rocksdb/Status$Code;"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: Status#getSubCode - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getSubCodeMethod(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "getSubCode", - "()Lorg/rocksdb/Status$SubCode;"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: Status#getState - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getStateMethod(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetMethodID(jclazz, "getState", "()Ljava/lang/String;"); - assert(mid != nullptr); - return mid; - } - - /** - * Create a new Java org.rocksdb.Status object with the same properties as - * the provided C++ ROCKSDB_NAMESPACE::Status object - * - * @param env A pointer to the Java environment - * @param status The ROCKSDB_NAMESPACE::Status object - * - * @return A reference to a Java org.rocksdb.Status object, or nullptr - * if an an exception occurs - */ - static jobject construct(JNIEnv* env, const Status& status) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = - env->GetMethodID(jclazz, "", "(BBLjava/lang/String;)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - // convert the Status state for Java - jstring jstate = nullptr; - if (status.getState() != nullptr) { - const char* const state = status.getState(); - jstate = env->NewStringUTF(state); - if (env->ExceptionCheck()) { - if (jstate != nullptr) { - env->DeleteLocalRef(jstate); - } - return nullptr; - } - } - - jobject jstatus = - env->NewObject(jclazz, mid, toJavaStatusCode(status.code()), - toJavaStatusSubCode(status.subcode()), jstate); - if (env->ExceptionCheck()) { - // exception occurred - if (jstate != nullptr) { - env->DeleteLocalRef(jstate); - } - return nullptr; - } - - if (jstate != nullptr) { - env->DeleteLocalRef(jstate); - } - - return jstatus; - } - - static jobject construct(JNIEnv* env, const Status* status) { - return construct(env, *status); - } - - // Returns the equivalent org.rocksdb.Status.Code for the provided - // C++ ROCKSDB_NAMESPACE::Status::Code enum - static jbyte toJavaStatusCode(const ROCKSDB_NAMESPACE::Status::Code& code) { - switch (code) { - case ROCKSDB_NAMESPACE::Status::Code::kOk: - return 0x0; - case ROCKSDB_NAMESPACE::Status::Code::kNotFound: - return 0x1; - case ROCKSDB_NAMESPACE::Status::Code::kCorruption: - return 0x2; - case ROCKSDB_NAMESPACE::Status::Code::kNotSupported: - return 0x3; - case ROCKSDB_NAMESPACE::Status::Code::kInvalidArgument: - return 0x4; - case ROCKSDB_NAMESPACE::Status::Code::kIOError: - return 0x5; - case ROCKSDB_NAMESPACE::Status::Code::kMergeInProgress: - return 0x6; - case ROCKSDB_NAMESPACE::Status::Code::kIncomplete: - return 0x7; - case ROCKSDB_NAMESPACE::Status::Code::kShutdownInProgress: - return 0x8; - case ROCKSDB_NAMESPACE::Status::Code::kTimedOut: - return 0x9; - case ROCKSDB_NAMESPACE::Status::Code::kAborted: - return 0xA; - case ROCKSDB_NAMESPACE::Status::Code::kBusy: - return 0xB; - case ROCKSDB_NAMESPACE::Status::Code::kExpired: - return 0xC; - case ROCKSDB_NAMESPACE::Status::Code::kTryAgain: - return 0xD; - case ROCKSDB_NAMESPACE::Status::Code::kColumnFamilyDropped: - return 0xE; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent org.rocksdb.Status.SubCode for the provided - // C++ ROCKSDB_NAMESPACE::Status::SubCode enum - static jbyte toJavaStatusSubCode( - const ROCKSDB_NAMESPACE::Status::SubCode& subCode) { - switch (subCode) { - case ROCKSDB_NAMESPACE::Status::SubCode::kNone: - return 0x0; - case ROCKSDB_NAMESPACE::Status::SubCode::kMutexTimeout: - return 0x1; - case ROCKSDB_NAMESPACE::Status::SubCode::kLockTimeout: - return 0x2; - case ROCKSDB_NAMESPACE::Status::SubCode::kLockLimit: - return 0x3; - case ROCKSDB_NAMESPACE::Status::SubCode::kNoSpace: - return 0x4; - case ROCKSDB_NAMESPACE::Status::SubCode::kDeadlock: - return 0x5; - case ROCKSDB_NAMESPACE::Status::SubCode::kStaleFile: - return 0x6; - case ROCKSDB_NAMESPACE::Status::SubCode::kMemoryLimit: - return 0x7; - default: - return 0x7F; // undefined - } - } - - static std::unique_ptr toCppStatus( - const jbyte jcode_value, const jbyte jsub_code_value) { - std::unique_ptr status; - switch (jcode_value) { - case 0x0: - // Ok - status = std::unique_ptr( - new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::OK())); - break; - case 0x1: - // NotFound - status = std::unique_ptr( - new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::NotFound( - ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); - break; - case 0x2: - // Corruption - status = std::unique_ptr( - new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::Corruption( - ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); - break; - case 0x3: - // NotSupported - status = std::unique_ptr( - new ROCKSDB_NAMESPACE::Status( - ROCKSDB_NAMESPACE::Status::NotSupported( - ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode( - jsub_code_value)))); - break; - case 0x4: - // InvalidArgument - status = std::unique_ptr( - new ROCKSDB_NAMESPACE::Status( - ROCKSDB_NAMESPACE::Status::InvalidArgument( - ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode( - jsub_code_value)))); - break; - case 0x5: - // IOError - status = std::unique_ptr( - new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::IOError( - ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); - break; - case 0x6: - // MergeInProgress - status = std::unique_ptr( - new ROCKSDB_NAMESPACE::Status( - ROCKSDB_NAMESPACE::Status::MergeInProgress( - ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode( - jsub_code_value)))); - break; - case 0x7: - // Incomplete - status = std::unique_ptr( - new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::Incomplete( - ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); - break; - case 0x8: - // ShutdownInProgress - status = std::unique_ptr( - new ROCKSDB_NAMESPACE::Status( - ROCKSDB_NAMESPACE::Status::ShutdownInProgress( - ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode( - jsub_code_value)))); - break; - case 0x9: - // TimedOut - status = std::unique_ptr( - new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::TimedOut( - ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); - break; - case 0xA: - // Aborted - status = std::unique_ptr( - new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::Aborted( - ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); - break; - case 0xB: - // Busy - status = std::unique_ptr( - new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::Busy( - ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); - break; - case 0xC: - // Expired - status = std::unique_ptr( - new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::Expired( - ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); - break; - case 0xD: - // TryAgain - status = std::unique_ptr( - new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::TryAgain( - ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); - break; - case 0xE: - // ColumnFamilyDropped - status = std::unique_ptr( - new ROCKSDB_NAMESPACE::Status( - ROCKSDB_NAMESPACE::Status::ColumnFamilyDropped( - ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode( - jsub_code_value)))); - break; - case 0x7F: - default: - return nullptr; - } - return status; - } - - // Returns the equivalent ROCKSDB_NAMESPACE::Status for the Java - // org.rocksdb.Status - static std::unique_ptr toCppStatus( - JNIEnv* env, const jobject jstatus) { - jmethodID mid_code = getCodeMethod(env); - if (mid_code == nullptr) { - // exception occurred - return nullptr; - } - jobject jcode = env->CallObjectMethod(jstatus, mid_code); - if (env->ExceptionCheck()) { - // exception occurred - return nullptr; - } - - jmethodID mid_code_value = ROCKSDB_NAMESPACE::CodeJni::getValueMethod(env); - if (mid_code_value == nullptr) { - // exception occurred - return nullptr; - } - jbyte jcode_value = env->CallByteMethod(jcode, mid_code_value); - if (env->ExceptionCheck()) { - // exception occurred - if (jcode != nullptr) { - env->DeleteLocalRef(jcode); - } - return nullptr; - } - - jmethodID mid_subCode = getSubCodeMethod(env); - if (mid_subCode == nullptr) { - // exception occurred - return nullptr; - } - jobject jsubCode = env->CallObjectMethod(jstatus, mid_subCode); - if (env->ExceptionCheck()) { - // exception occurred - if (jcode != nullptr) { - env->DeleteLocalRef(jcode); - } - return nullptr; - } - - jbyte jsub_code_value = 0x0; // None - if (jsubCode != nullptr) { - jmethodID mid_subCode_value = - ROCKSDB_NAMESPACE::SubCodeJni::getValueMethod(env); - if (mid_subCode_value == nullptr) { - // exception occurred - return nullptr; - } - jsub_code_value = env->CallByteMethod(jsubCode, mid_subCode_value); - if (env->ExceptionCheck()) { - // exception occurred - if (jcode != nullptr) { - env->DeleteLocalRef(jcode); - } - return nullptr; - } - } - - jmethodID mid_state = getStateMethod(env); - if (mid_state == nullptr) { - // exception occurred - return nullptr; - } - jobject jstate = env->CallObjectMethod(jstatus, mid_state); - if (env->ExceptionCheck()) { - // exception occurred - if (jsubCode != nullptr) { - env->DeleteLocalRef(jsubCode); - } - if (jcode != nullptr) { - env->DeleteLocalRef(jcode); - } - return nullptr; - } - - std::unique_ptr status = - toCppStatus(jcode_value, jsub_code_value); - - // delete all local refs - if (jstate != nullptr) { - env->DeleteLocalRef(jstate); - } - if (jsubCode != nullptr) { - env->DeleteLocalRef(jsubCode); - } - if (jcode != nullptr) { - env->DeleteLocalRef(jcode); - } - - return status; - } -}; - -// The portal class for org.rocksdb.RocksDBException -class RocksDBExceptionJni : public JavaException { - public: - /** - * Get the Java Class org.rocksdb.RocksDBException - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaException::getJClass(env, "org/rocksdb/RocksDBException"); - } - - /** - * Create and throw a Java RocksDBException with the provided message - * - * @param env A pointer to the Java environment - * @param msg The message for the exception - * - * @return true if an exception was thrown, false otherwise - */ - static bool ThrowNew(JNIEnv* env, const std::string& msg) { - return JavaException::ThrowNew(env, msg); - } - - /** - * Create and throw a Java RocksDBException with the provided status - * - * If s->ok() == true, then this function will not throw any exception. - * - * @param env A pointer to the Java environment - * @param s The status for the exception - * - * @return true if an exception was thrown, false otherwise - */ - static bool ThrowNew(JNIEnv* env, std::unique_ptr& s) { - return ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, *(s.get())); - } - - /** - * Create and throw a Java RocksDBException with the provided status - * - * If s.ok() == true, then this function will not throw any exception. - * - * @param env A pointer to the Java environment - * @param s The status for the exception - * - * @return true if an exception was thrown, false otherwise - */ - static bool ThrowNew(JNIEnv* env, const Status& s) { - if (s.ok()) { - return false; - } - - // get the RocksDBException class - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - std::cerr << "RocksDBExceptionJni::ThrowNew/class - Error: unexpected " - "exception!" - << std::endl; - return env->ExceptionCheck(); - } - - // get the constructor of org.rocksdb.RocksDBException - jmethodID mid = - env->GetMethodID(jclazz, "", "(Lorg/rocksdb/Status;)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - std::cerr - << "RocksDBExceptionJni::ThrowNew/cstr - Error: unexpected exception!" - << std::endl; - return env->ExceptionCheck(); - } - - // get the Java status object - jobject jstatus = StatusJni::construct(env, s); - if (jstatus == nullptr) { - // exception occcurred - std::cerr << "RocksDBExceptionJni::ThrowNew/StatusJni - Error: " - "unexpected exception!" - << std::endl; - return env->ExceptionCheck(); - } - - // construct the RocksDBException - jthrowable rocksdb_exception = - reinterpret_cast(env->NewObject(jclazz, mid, jstatus)); - if (env->ExceptionCheck()) { - if (jstatus != nullptr) { - env->DeleteLocalRef(jstatus); - } - if (rocksdb_exception != nullptr) { - env->DeleteLocalRef(rocksdb_exception); - } - std::cerr << "RocksDBExceptionJni::ThrowNew/NewObject - Error: " - "unexpected exception!" - << std::endl; - return true; - } - - // throw the RocksDBException - const jint rs = env->Throw(rocksdb_exception); - if (rs != JNI_OK) { - // exception could not be thrown - std::cerr - << "RocksDBExceptionJni::ThrowNew - Fatal: could not throw exception!" - << std::endl; - if (jstatus != nullptr) { - env->DeleteLocalRef(jstatus); - } - if (rocksdb_exception != nullptr) { - env->DeleteLocalRef(rocksdb_exception); - } - return env->ExceptionCheck(); - } - - if (jstatus != nullptr) { - env->DeleteLocalRef(jstatus); - } - if (rocksdb_exception != nullptr) { - env->DeleteLocalRef(rocksdb_exception); - } - - return true; - } - - /** - * Create and throw a Java RocksDBException with the provided message - * and status - * - * If s.ok() == true, then this function will not throw any exception. - * - * @param env A pointer to the Java environment - * @param msg The message for the exception - * @param s The status for the exception - * - * @return true if an exception was thrown, false otherwise - */ - static bool ThrowNew(JNIEnv* env, const std::string& msg, const Status& s) { - assert(!s.ok()); - if (s.ok()) { - return false; - } - - // get the RocksDBException class - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - std::cerr << "RocksDBExceptionJni::ThrowNew/class - Error: unexpected " - "exception!" - << std::endl; - return env->ExceptionCheck(); - } - - // get the constructor of org.rocksdb.RocksDBException - jmethodID mid = env->GetMethodID( - jclazz, "", "(Ljava/lang/String;Lorg/rocksdb/Status;)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - std::cerr - << "RocksDBExceptionJni::ThrowNew/cstr - Error: unexpected exception!" - << std::endl; - return env->ExceptionCheck(); - } - - jstring jmsg = env->NewStringUTF(msg.c_str()); - if (jmsg == nullptr) { - // exception thrown: OutOfMemoryError - std::cerr - << "RocksDBExceptionJni::ThrowNew/msg - Error: unexpected exception!" - << std::endl; - return env->ExceptionCheck(); - } - - // get the Java status object - jobject jstatus = StatusJni::construct(env, s); - if (jstatus == nullptr) { - // exception occcurred - std::cerr << "RocksDBExceptionJni::ThrowNew/StatusJni - Error: " - "unexpected exception!" - << std::endl; - if (jmsg != nullptr) { - env->DeleteLocalRef(jmsg); - } - return env->ExceptionCheck(); - } - - // construct the RocksDBException - jthrowable rocksdb_exception = reinterpret_cast( - env->NewObject(jclazz, mid, jmsg, jstatus)); - if (env->ExceptionCheck()) { - if (jstatus != nullptr) { - env->DeleteLocalRef(jstatus); - } - if (jmsg != nullptr) { - env->DeleteLocalRef(jmsg); - } - if (rocksdb_exception != nullptr) { - env->DeleteLocalRef(rocksdb_exception); - } - std::cerr << "RocksDBExceptionJni::ThrowNew/NewObject - Error: " - "unexpected exception!" - << std::endl; - return true; - } - - // throw the RocksDBException - const jint rs = env->Throw(rocksdb_exception); - if (rs != JNI_OK) { - // exception could not be thrown - std::cerr - << "RocksDBExceptionJni::ThrowNew - Fatal: could not throw exception!" - << std::endl; - if (jstatus != nullptr) { - env->DeleteLocalRef(jstatus); - } - if (jmsg != nullptr) { - env->DeleteLocalRef(jmsg); - } - if (rocksdb_exception != nullptr) { - env->DeleteLocalRef(rocksdb_exception); - } - return env->ExceptionCheck(); - } - - if (jstatus != nullptr) { - env->DeleteLocalRef(jstatus); - } - if (jmsg != nullptr) { - env->DeleteLocalRef(jmsg); - } - if (rocksdb_exception != nullptr) { - env->DeleteLocalRef(rocksdb_exception); - } - - return true; - } - - /** - * Get the Java Method: RocksDBException#getStatus - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getStatusMethod(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetMethodID(jclazz, "getStatus", "()Lorg/rocksdb/Status;"); - assert(mid != nullptr); - return mid; - } - - static std::unique_ptr toCppStatus( - JNIEnv* env, jthrowable jrocksdb_exception) { - if (!env->IsInstanceOf(jrocksdb_exception, getJClass(env))) { - // not an instance of RocksDBException - return nullptr; - } - - // get the java status object - jmethodID mid = getStatusMethod(env); - if (mid == nullptr) { - // exception occurred accessing class or method - return nullptr; - } - - jobject jstatus = env->CallObjectMethod(jrocksdb_exception, mid); - if (env->ExceptionCheck()) { - // exception occurred - return nullptr; - } - - if (jstatus == nullptr) { - return nullptr; // no status available - } - - return ROCKSDB_NAMESPACE::StatusJni::toCppStatus(env, jstatus); - } -}; - -// The portal class for java.util.List -class ListJni : public JavaClass { - public: - /** - * Get the Java Class java.util.List - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getListClass(JNIEnv* env) { - return JavaClass::getJClass(env, "java/util/List"); - } - - /** - * Get the Java Class java.util.ArrayList - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getArrayListClass(JNIEnv* env) { - return JavaClass::getJClass(env, "java/util/ArrayList"); - } - - /** - * Get the Java Class java.util.Iterator - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getIteratorClass(JNIEnv* env) { - return JavaClass::getJClass(env, "java/util/Iterator"); - } - - /** - * Get the Java Method: List#iterator - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getIteratorMethod(JNIEnv* env) { - jclass jlist_clazz = getListClass(env); - if (jlist_clazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetMethodID(jlist_clazz, "iterator", "()Ljava/util/Iterator;"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: Iterator#hasNext - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getHasNextMethod(JNIEnv* env) { - jclass jiterator_clazz = getIteratorClass(env); - if (jiterator_clazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jiterator_clazz, "hasNext", "()Z"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: Iterator#next - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getNextMethod(JNIEnv* env) { - jclass jiterator_clazz = getIteratorClass(env); - if (jiterator_clazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetMethodID(jiterator_clazz, "next", "()Ljava/lang/Object;"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: ArrayList constructor - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getArrayListConstructorMethodId(JNIEnv* env) { - jclass jarray_list_clazz = getArrayListClass(env); - if (jarray_list_clazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - static jmethodID mid = - env->GetMethodID(jarray_list_clazz, "", "(I)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: List#add - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getListAddMethodId(JNIEnv* env) { - jclass jlist_clazz = getListClass(env); - if (jlist_clazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetMethodID(jlist_clazz, "add", "(Ljava/lang/Object;)Z"); - assert(mid != nullptr); - return mid; - } -}; - -// The portal class for java.lang.Byte -class ByteJni : public JavaClass { - public: - /** - * Get the Java Class java.lang.Byte - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "java/lang/Byte"); - } - - /** - * Get the Java Class byte[] - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getArrayJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "[B"); - } - - /** - * Creates a new 2-dimensional Java Byte Array byte[][] - * - * @param env A pointer to the Java environment - * @param len The size of the first dimension - * - * @return A reference to the Java byte[][] or nullptr if an exception occurs - */ - static jobjectArray new2dByteArray(JNIEnv* env, const jsize len) { - jclass clazz = getArrayJClass(env); - if (clazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - return env->NewObjectArray(len, clazz, nullptr); - } - - /** - * Get the Java Method: Byte#byteValue - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getByteValueMethod(JNIEnv* env) { - jclass clazz = getJClass(env); - if (clazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(clazz, "byteValue", "()B"); - assert(mid != nullptr); - return mid; - } - - /** - * Calls the Java Method: Byte#valueOf, returning a constructed Byte jobject - * - * @param env A pointer to the Java environment - * - * @return A constructing Byte object or nullptr if the class or method id - * could not be retrieved, or an exception occurred - */ - static jobject valueOf(JNIEnv* env, jbyte jprimitive_byte) { - jclass clazz = getJClass(env); - if (clazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetStaticMethodID(clazz, "valueOf", "(B)Ljava/lang/Byte;"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - const jobject jbyte_obj = - env->CallStaticObjectMethod(clazz, mid, jprimitive_byte); - if (env->ExceptionCheck()) { - // exception occurred - return nullptr; - } - - return jbyte_obj; - } -}; - -// The portal class for java.nio.ByteBuffer -class ByteBufferJni : public JavaClass { - public: - /** - * Get the Java Class java.nio.ByteBuffer - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "java/nio/ByteBuffer"); - } - - /** - * Get the Java Method: ByteBuffer#allocate - * - * @param env A pointer to the Java environment - * @param jbytebuffer_clazz if you have a reference to a ByteBuffer class, or - * nullptr - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getAllocateMethodId(JNIEnv* env, - jclass jbytebuffer_clazz = nullptr) { - const jclass jclazz = - jbytebuffer_clazz == nullptr ? getJClass(env) : jbytebuffer_clazz; - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetStaticMethodID(jclazz, "allocate", "(I)Ljava/nio/ByteBuffer;"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: ByteBuffer#array - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getArrayMethodId(JNIEnv* env, - jclass jbytebuffer_clazz = nullptr) { - const jclass jclazz = - jbytebuffer_clazz == nullptr ? getJClass(env) : jbytebuffer_clazz; - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "array", "()[B"); - assert(mid != nullptr); - return mid; - } - - static jobject construct(JNIEnv* env, const bool direct, - const size_t capacity, - jclass jbytebuffer_clazz = nullptr) { - return constructWith(env, direct, nullptr, capacity, jbytebuffer_clazz); - } - - static jobject constructWith(JNIEnv* env, const bool direct, const char* buf, - const size_t capacity, - jclass jbytebuffer_clazz = nullptr) { - if (direct) { - bool allocated = false; - if (buf == nullptr) { - buf = new char[capacity]; - allocated = true; - } - jobject jbuf = env->NewDirectByteBuffer(const_cast(buf), - static_cast(capacity)); - if (jbuf == nullptr) { - // exception occurred - if (allocated) { - delete[] static_cast(buf); - } - return nullptr; - } - return jbuf; - } else { - const jclass jclazz = - jbytebuffer_clazz == nullptr ? getJClass(env) : jbytebuffer_clazz; - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - const jmethodID jmid_allocate = - getAllocateMethodId(env, jbytebuffer_clazz); - if (jmid_allocate == nullptr) { - // exception occurred accessing class, or NoSuchMethodException or - // OutOfMemoryError - return nullptr; - } - const jobject jbuf = env->CallStaticObjectMethod( - jclazz, jmid_allocate, static_cast(capacity)); - if (env->ExceptionCheck()) { - // exception occurred - return nullptr; - } - - // set buffer data? - if (buf != nullptr) { - jbyteArray jarray = array(env, jbuf, jbytebuffer_clazz); - if (jarray == nullptr) { - // exception occurred - env->DeleteLocalRef(jbuf); - return nullptr; - } - - jboolean is_copy = JNI_FALSE; - jbyte* ja = reinterpret_cast( - env->GetPrimitiveArrayCritical(jarray, &is_copy)); - if (ja == nullptr) { - // exception occurred - env->DeleteLocalRef(jarray); - env->DeleteLocalRef(jbuf); - return nullptr; - } - - memcpy(ja, const_cast(buf), capacity); - - env->ReleasePrimitiveArrayCritical(jarray, ja, 0); - - env->DeleteLocalRef(jarray); - } - - return jbuf; - } - } - - static jbyteArray array(JNIEnv* env, const jobject& jbyte_buffer, - jclass jbytebuffer_clazz = nullptr) { - const jmethodID mid = getArrayMethodId(env, jbytebuffer_clazz); - if (mid == nullptr) { - // exception occurred accessing class, or NoSuchMethodException or - // OutOfMemoryError - return nullptr; - } - const jobject jarray = env->CallObjectMethod(jbyte_buffer, mid); - if (env->ExceptionCheck()) { - // exception occurred - return nullptr; - } - return static_cast(jarray); - } -}; - -// The portal class for java.lang.Integer -class IntegerJni : public JavaClass { - public: - /** - * Get the Java Class java.lang.Integer - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "java/lang/Integer"); - } - - static jobject valueOf(JNIEnv* env, jint jprimitive_int) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = - env->GetStaticMethodID(jclazz, "valueOf", "(I)Ljava/lang/Integer;"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - const jobject jinteger_obj = - env->CallStaticObjectMethod(jclazz, mid, jprimitive_int); - if (env->ExceptionCheck()) { - // exception occurred - return nullptr; - } - - return jinteger_obj; - } -}; - -// The portal class for java.lang.Long -class LongJni : public JavaClass { - public: - /** - * Get the Java Class java.lang.Long - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "java/lang/Long"); - } - - static jobject valueOf(JNIEnv* env, jlong jprimitive_long) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = - env->GetStaticMethodID(jclazz, "valueOf", "(J)Ljava/lang/Long;"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - const jobject jlong_obj = - env->CallStaticObjectMethod(jclazz, mid, jprimitive_long); - if (env->ExceptionCheck()) { - // exception occurred - return nullptr; - } - - return jlong_obj; - } -}; - -// The portal class for java.lang.StringBuilder -class StringBuilderJni : public JavaClass { - public: - /** - * Get the Java Class java.lang.StringBuilder - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "java/lang/StringBuilder"); - } - - /** - * Get the Java Method: StringBuilder#append - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getListAddMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID( - jclazz, "append", "(Ljava/lang/String;)Ljava/lang/StringBuilder;"); - assert(mid != nullptr); - return mid; - } - - /** - * Appends a C-style string to a StringBuilder - * - * @param env A pointer to the Java environment - * @param jstring_builder Reference to a java.lang.StringBuilder - * @param c_str A C-style string to append to the StringBuilder - * - * @return A reference to the updated StringBuilder, or a nullptr if - * an exception occurs - */ - static jobject append(JNIEnv* env, jobject jstring_builder, - const char* c_str) { - jmethodID mid = getListAddMethodId(env); - if (mid == nullptr) { - // exception occurred accessing class or method - return nullptr; - } - - jstring new_value_str = env->NewStringUTF(c_str); - if (new_value_str == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - jobject jresult_string_builder = - env->CallObjectMethod(jstring_builder, mid, new_value_str); - if (env->ExceptionCheck()) { - // exception occurred - env->DeleteLocalRef(new_value_str); - return nullptr; - } - - return jresult_string_builder; - } -}; - -// various utility functions for working with RocksDB and JNI -class JniUtil { - public: - /** - * Detect if jlong overflows size_t - * - * @param jvalue the jlong value - * - * @return - */ - inline static Status check_if_jlong_fits_size_t(const jlong& jvalue) { - Status s = Status::OK(); - if (static_cast(jvalue) > std::numeric_limits::max()) { - s = Status::InvalidArgument(Slice("jlong overflows 32 bit value.")); - } - return s; - } - - /** - * Obtains a reference to the JNIEnv from - * the JVM - * - * If the current thread is not attached to the JavaVM - * then it will be attached so as to retrieve the JNIEnv - * - * If a thread is attached, it must later be manually - * released by calling JavaVM::DetachCurrentThread. - * This can be handled by always matching calls to this - * function with calls to {@link JniUtil::releaseJniEnv(JavaVM*, jboolean)} - * - * @param jvm (IN) A pointer to the JavaVM instance - * @param attached (OUT) A pointer to a boolean which - * will be set to JNI_TRUE if we had to attach the thread - * - * @return A pointer to the JNIEnv or nullptr if a fatal error - * occurs and the JNIEnv cannot be retrieved - */ - static JNIEnv* getJniEnv(JavaVM* jvm, jboolean* attached) { - assert(jvm != nullptr); - - JNIEnv* env; - const jint env_rs = - jvm->GetEnv(reinterpret_cast(&env), JNI_VERSION_1_6); - - if (env_rs == JNI_OK) { - // current thread is already attached, return the JNIEnv - *attached = JNI_FALSE; - return env; - } else if (env_rs == JNI_EDETACHED) { - // current thread is not attached, attempt to attach - const jint rs_attach = - jvm->AttachCurrentThread(reinterpret_cast(&env), NULL); - if (rs_attach == JNI_OK) { - *attached = JNI_TRUE; - return env; - } else { - // error, could not attach the thread - std::cerr << "JniUtil::getJniEnv - Fatal: could not attach current " - "thread to JVM!" - << std::endl; - return nullptr; - } - } else if (env_rs == JNI_EVERSION) { - // error, JDK does not support JNI_VERSION_1_6+ - std::cerr - << "JniUtil::getJniEnv - Fatal: JDK does not support JNI_VERSION_1_6" - << std::endl; - return nullptr; - } else { - std::cerr << "JniUtil::getJniEnv - Fatal: Unknown error: env_rs=" - << env_rs << std::endl; - return nullptr; - } - } - - /** - * Counterpart to {@link JniUtil::getJniEnv(JavaVM*, jboolean*)} - * - * Detachess the current thread from the JVM if it was previously - * attached - * - * @param jvm (IN) A pointer to the JavaVM instance - * @param attached (IN) JNI_TRUE if we previously had to attach the thread - * to the JavaVM to get the JNIEnv - */ - static void releaseJniEnv(JavaVM* jvm, jboolean& attached) { - assert(jvm != nullptr); - if (attached == JNI_TRUE) { - const jint rs_detach = jvm->DetachCurrentThread(); - assert(rs_detach == JNI_OK); - if (rs_detach != JNI_OK) { - std::cerr << "JniUtil::getJniEnv - Warn: Unable to detach current " - "thread from JVM!" - << std::endl; - } - } - } - - /** - * Copies a Java String[] to a C++ std::vector - * - * @param env (IN) A pointer to the java environment - * @param jss (IN) The Java String array to copy - * @param has_exception (OUT) will be set to JNI_TRUE - * if an OutOfMemoryError or ArrayIndexOutOfBoundsException - * exception occurs - * - * @return A std::vector containing copies of the Java strings - */ - static std::vector copyStrings(JNIEnv* env, jobjectArray jss, - jboolean* has_exception) { - return ROCKSDB_NAMESPACE::JniUtil::copyStrings( - env, jss, env->GetArrayLength(jss), has_exception); - } - - /** - * Copies a Java String[] to a C++ std::vector - * - * @param env (IN) A pointer to the java environment - * @param jss (IN) The Java String array to copy - * @param jss_len (IN) The length of the Java String array to copy - * @param has_exception (OUT) will be set to JNI_TRUE - * if an OutOfMemoryError or ArrayIndexOutOfBoundsException - * exception occurs - * - * @return A std::vector containing copies of the Java strings - */ - static std::vector copyStrings(JNIEnv* env, jobjectArray jss, - const jsize jss_len, - jboolean* has_exception) { - std::vector strs; - strs.reserve(jss_len); - for (jsize i = 0; i < jss_len; i++) { - jobject js = env->GetObjectArrayElement(jss, i); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - *has_exception = JNI_TRUE; - return strs; - } - - jstring jstr = static_cast(js); - const char* str = env->GetStringUTFChars(jstr, nullptr); - if (str == nullptr) { - // exception thrown: OutOfMemoryError - env->DeleteLocalRef(js); - *has_exception = JNI_TRUE; - return strs; - } - - strs.push_back(std::string(str)); - - env->ReleaseStringUTFChars(jstr, str); - env->DeleteLocalRef(js); - } - - *has_exception = JNI_FALSE; - return strs; - } - - /** - * Copies a jstring to a C-style null-terminated byte string - * and releases the original jstring - * - * The jstring is copied as UTF-8 - * - * If an exception occurs, then JNIEnv::ExceptionCheck() - * will have been called - * - * @param env (IN) A pointer to the java environment - * @param js (IN) The java string to copy - * @param has_exception (OUT) will be set to JNI_TRUE - * if an OutOfMemoryError exception occurs - * - * @return A pointer to the copied string, or a - * nullptr if has_exception == JNI_TRUE - */ - static std::unique_ptr copyString(JNIEnv* env, jstring js, - jboolean* has_exception) { - const char* utf = env->GetStringUTFChars(js, nullptr); - if (utf == nullptr) { - // exception thrown: OutOfMemoryError - env->ExceptionCheck(); - *has_exception = JNI_TRUE; - return nullptr; - } else if (env->ExceptionCheck()) { - // exception thrown - env->ReleaseStringUTFChars(js, utf); - *has_exception = JNI_TRUE; - return nullptr; - } - - const jsize utf_len = env->GetStringUTFLength(js); - std::unique_ptr str( - new char[utf_len + - 1]); // Note: + 1 is needed for the c_str null terminator - std::strcpy(str.get(), utf); - env->ReleaseStringUTFChars(js, utf); - *has_exception = JNI_FALSE; - return str; - } - - /** - * Copies a jstring to a std::string - * and releases the original jstring - * - * If an exception occurs, then JNIEnv::ExceptionCheck() - * will have been called - * - * @param env (IN) A pointer to the java environment - * @param js (IN) The java string to copy - * @param has_exception (OUT) will be set to JNI_TRUE - * if an OutOfMemoryError exception occurs - * - * @return A std:string copy of the jstring, or an - * empty std::string if has_exception == JNI_TRUE - */ - static std::string copyStdString(JNIEnv* env, jstring js, - jboolean* has_exception) { - const char* utf = env->GetStringUTFChars(js, nullptr); - if (utf == nullptr) { - // exception thrown: OutOfMemoryError - env->ExceptionCheck(); - *has_exception = JNI_TRUE; - return std::string(); - } else if (env->ExceptionCheck()) { - // exception thrown - env->ReleaseStringUTFChars(js, utf); - *has_exception = JNI_TRUE; - return std::string(); - } - - std::string name(utf); - env->ReleaseStringUTFChars(js, utf); - *has_exception = JNI_FALSE; - return name; - } - - /** - * Copies bytes from a std::string to a jByteArray - * - * @param env A pointer to the java environment - * @param bytes The bytes to copy - * - * @return the Java byte[], or nullptr if an exception occurs - * - * @throws RocksDBException thrown - * if memory size to copy exceeds general java specific array size - * limitation. - */ - static jbyteArray copyBytes(JNIEnv* env, std::string bytes) { - return createJavaByteArrayWithSizeCheck(env, bytes.c_str(), bytes.size()); - } - - /** - * Given a Java byte[][] which is an array of java.lang.Strings - * where each String is a byte[], the passed function `string_fn` - * will be called on each String, the result is the collected by - * calling the passed function `collector_fn` - * - * @param env (IN) A pointer to the java environment - * @param jbyte_strings (IN) A Java array of Strings expressed as bytes - * @param string_fn (IN) A transform function to call for each String - * @param collector_fn (IN) A collector which is called for the result - * of each `string_fn` - * @param has_exception (OUT) will be set to JNI_TRUE - * if an ArrayIndexOutOfBoundsException or OutOfMemoryError - * exception occurs - */ - template - static void byteStrings(JNIEnv* env, jobjectArray jbyte_strings, - std::function string_fn, - std::function collector_fn, - jboolean* has_exception) { - const jsize jlen = env->GetArrayLength(jbyte_strings); - - for (jsize i = 0; i < jlen; i++) { - jobject jbyte_string_obj = env->GetObjectArrayElement(jbyte_strings, i); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - *has_exception = JNI_TRUE; // signal error - return; - } - - jbyteArray jbyte_string_ary = - reinterpret_cast(jbyte_string_obj); - T result = byteString(env, jbyte_string_ary, string_fn, has_exception); - - env->DeleteLocalRef(jbyte_string_obj); - - if (*has_exception == JNI_TRUE) { - // exception thrown: OutOfMemoryError - return; - } - - collector_fn(i, result); - } - - *has_exception = JNI_FALSE; - } - - /** - * Given a Java String which is expressed as a Java Byte Array byte[], - * the passed function `string_fn` will be called on the String - * and the result returned - * - * @param env (IN) A pointer to the java environment - * @param jbyte_string_ary (IN) A Java String expressed in bytes - * @param string_fn (IN) A transform function to call on the String - * @param has_exception (OUT) will be set to JNI_TRUE - * if an OutOfMemoryError exception occurs - */ - template - static T byteString(JNIEnv* env, jbyteArray jbyte_string_ary, - std::function string_fn, - jboolean* has_exception) { - const jsize jbyte_string_len = env->GetArrayLength(jbyte_string_ary); - return byteString(env, jbyte_string_ary, jbyte_string_len, string_fn, - has_exception); - } - - /** - * Given a Java String which is expressed as a Java Byte Array byte[], - * the passed function `string_fn` will be called on the String - * and the result returned - * - * @param env (IN) A pointer to the java environment - * @param jbyte_string_ary (IN) A Java String expressed in bytes - * @param jbyte_string_len (IN) The length of the Java String - * expressed in bytes - * @param string_fn (IN) A transform function to call on the String - * @param has_exception (OUT) will be set to JNI_TRUE - * if an OutOfMemoryError exception occurs - */ - template - static T byteString(JNIEnv* env, jbyteArray jbyte_string_ary, - const jsize jbyte_string_len, - std::function string_fn, - jboolean* has_exception) { - jbyte* jbyte_string = env->GetByteArrayElements(jbyte_string_ary, nullptr); - if (jbyte_string == nullptr) { - // exception thrown: OutOfMemoryError - *has_exception = JNI_TRUE; - return nullptr; // signal error - } - - T result = - string_fn(reinterpret_cast(jbyte_string), jbyte_string_len); - - env->ReleaseByteArrayElements(jbyte_string_ary, jbyte_string, JNI_ABORT); - - *has_exception = JNI_FALSE; - return result; - } - - /** - * Converts a std::vector to a Java byte[][] where each Java String - * is expressed as a Java Byte Array byte[]. - * - * @param env A pointer to the java environment - * @param strings A vector of Strings - * - * @return A Java array of Strings expressed as bytes, - * or nullptr if an exception is thrown - */ - static jobjectArray stringsBytes(JNIEnv* env, - std::vector strings) { - jclass jcls_ba = ByteJni::getArrayJClass(env); - if (jcls_ba == nullptr) { - // exception occurred - return nullptr; - } - - const jsize len = static_cast(strings.size()); - - jobjectArray jbyte_strings = env->NewObjectArray(len, jcls_ba, nullptr); - if (jbyte_strings == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - for (jsize i = 0; i < len; i++) { - std::string* str = &strings[i]; - const jsize str_len = static_cast(str->size()); - - jbyteArray jbyte_string_ary = env->NewByteArray(str_len); - if (jbyte_string_ary == nullptr) { - // exception thrown: OutOfMemoryError - env->DeleteLocalRef(jbyte_strings); - return nullptr; - } - - env->SetByteArrayRegion( - jbyte_string_ary, 0, str_len, - const_cast(reinterpret_cast(str->c_str()))); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jbyte_string_ary); - env->DeleteLocalRef(jbyte_strings); - return nullptr; - } - - env->SetObjectArrayElement(jbyte_strings, i, jbyte_string_ary); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - // or ArrayStoreException - env->DeleteLocalRef(jbyte_string_ary); - env->DeleteLocalRef(jbyte_strings); - return nullptr; - } - - env->DeleteLocalRef(jbyte_string_ary); - } - - return jbyte_strings; - } - - /** - * Converts a std::vector to a Java String[]. - * - * @param env A pointer to the java environment - * @param strings A vector of Strings - * - * @return A Java array of Strings, - * or nullptr if an exception is thrown - */ - static jobjectArray toJavaStrings(JNIEnv* env, - const std::vector* strings) { - jclass jcls_str = env->FindClass("java/lang/String"); - if (jcls_str == nullptr) { - // exception occurred - return nullptr; - } - - const jsize len = static_cast(strings->size()); - - jobjectArray jstrings = env->NewObjectArray(len, jcls_str, nullptr); - if (jstrings == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - for (jsize i = 0; i < len; i++) { - const std::string* str = &((*strings)[i]); - jstring js = ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, str); - if (js == nullptr) { - env->DeleteLocalRef(jstrings); - return nullptr; - } - - env->SetObjectArrayElement(jstrings, i, js); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - // or ArrayStoreException - env->DeleteLocalRef(js); - env->DeleteLocalRef(jstrings); - return nullptr; - } - } - - return jstrings; - } - - /** - * Creates a Java UTF String from a C++ std::string - * - * @param env A pointer to the java environment - * @param string the C++ std::string - * @param treat_empty_as_null true if empty strings should be treated as null - * - * @return the Java UTF string, or nullptr if the provided string - * is null (or empty and treat_empty_as_null is set), or if an - * exception occurs allocating the Java String. - */ - static jstring toJavaString(JNIEnv* env, const std::string* string, - const bool treat_empty_as_null = false) { - if (string == nullptr) { - return nullptr; - } - - if (treat_empty_as_null && string->empty()) { - return nullptr; - } - - return env->NewStringUTF(string->c_str()); - } - - /** - * Copies bytes to a new jByteArray with the check of java array size - * limitation. - * - * @param bytes pointer to memory to copy to a new jByteArray - * @param size number of bytes to copy - * - * @return the Java byte[], or nullptr if an exception occurs - * - * @throws RocksDBException thrown - * if memory size to copy exceeds general java array size limitation to - * avoid overflow. - */ - static jbyteArray createJavaByteArrayWithSizeCheck(JNIEnv* env, - const char* bytes, - const size_t size) { - // Limitation for java array size is vm specific - // In general it cannot exceed Integer.MAX_VALUE (2^31 - 1) - // Current HotSpot VM limitation for array size is Integer.MAX_VALUE - 5 - // (2^31 - 1 - 5) It means that the next call to env->NewByteArray can still - // end with OutOfMemoryError("Requested array size exceeds VM limit") coming - // from VM - static const size_t MAX_JARRAY_SIZE = (static_cast(1)) << 31; - if (size > MAX_JARRAY_SIZE) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, "Requested array size exceeds VM limit"); - return nullptr; - } - - const jsize jlen = static_cast(size); - jbyteArray jbytes = env->NewByteArray(jlen); - if (jbytes == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - env->SetByteArrayRegion( - jbytes, 0, jlen, - const_cast(reinterpret_cast(bytes))); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jbytes); - return nullptr; - } - - return jbytes; - } - - /** - * Copies bytes from a ROCKSDB_NAMESPACE::Slice to a jByteArray - * - * @param env A pointer to the java environment - * @param bytes The bytes to copy - * - * @return the Java byte[] or nullptr if an exception occurs - * - * @throws RocksDBException thrown - * if memory size to copy exceeds general java specific array size - * limitation. - */ - static jbyteArray copyBytes(JNIEnv* env, const Slice& bytes) { - return createJavaByteArrayWithSizeCheck(env, bytes.data(), bytes.size()); - } - - /* - * Helper for operations on a key and value - * for example WriteBatch->Put - * - * TODO(AR) could be used for RocksDB->Put etc. - */ - static std::unique_ptr kv_op( - std::function - op, - JNIEnv* env, jobject /*jobj*/, jbyteArray jkey, jint jkey_len, - jbyteArray jvalue, jint jvalue_len) { - jbyte* key = env->GetByteArrayElements(jkey, nullptr); - if (env->ExceptionCheck()) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - jbyte* value = env->GetByteArrayElements(jvalue, nullptr); - if (env->ExceptionCheck()) { - // exception thrown: OutOfMemoryError - if (key != nullptr) { - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); - } - return nullptr; - } - - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); - ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast(value), - jvalue_len); - - auto status = op(key_slice, value_slice); - - if (value != nullptr) { - env->ReleaseByteArrayElements(jvalue, value, JNI_ABORT); - } - if (key != nullptr) { - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); - } - - return std::unique_ptr( - new ROCKSDB_NAMESPACE::Status(status)); - } - - /* - * Helper for operations on a key - * for example WriteBatch->Delete - * - * TODO(AR) could be used for RocksDB->Delete etc. - */ - static std::unique_ptr k_op( - std::function op, - JNIEnv* env, jobject /*jobj*/, jbyteArray jkey, jint jkey_len) { - jbyte* key = env->GetByteArrayElements(jkey, nullptr); - if (env->ExceptionCheck()) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); - - auto status = op(key_slice); - - if (key != nullptr) { - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); - } - - return std::unique_ptr( - new ROCKSDB_NAMESPACE::Status(status)); - } - - /* - * Helper for operations on a key which is a region of an array - * Used to extract the common code from seek/seekForPrev. - * Possible that it can be generalised from that. - * - * We use GetByteArrayRegion to copy the key region of the whole array into - * a char[] We suspect this is not much slower than GetByteArrayElements, - * which probably copies anyway. - */ - static void k_op_region(std::function op, - JNIEnv* env, jbyteArray jkey, jint jkey_off, - jint jkey_len) { - const std::unique_ptr key(new char[jkey_len]); - if (key == nullptr) { - jclass oom_class = env->FindClass("/lang/java/OutOfMemoryError"); - env->ThrowNew(oom_class, - "Memory allocation failed in RocksDB JNI function"); - return; - } - env->GetByteArrayRegion(jkey, jkey_off, jkey_len, - reinterpret_cast(key.get())); - if (env->ExceptionCheck()) { - // exception thrown: OutOfMemoryError - return; - } - - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key.get()), - jkey_len); - op(key_slice); - } - - /* - * Helper for operations on a value - * for example WriteBatchWithIndex->GetFromBatch - */ - static jbyteArray v_op(std::function - op, - JNIEnv* env, jbyteArray jkey, jint jkey_len) { - jbyte* key = env->GetByteArrayElements(jkey, nullptr); - if (env->ExceptionCheck()) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); - - std::string value; - ROCKSDB_NAMESPACE::Status s = op(key_slice, &value); - - if (key != nullptr) { - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); - } - - if (s.IsNotFound()) { - return nullptr; - } - - if (s.ok()) { - jbyteArray jret_value = - env->NewByteArray(static_cast(value.size())); - if (jret_value == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - env->SetByteArrayRegion( - jret_value, 0, static_cast(value.size()), - const_cast(reinterpret_cast(value.c_str()))); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - if (jret_value != nullptr) { - env->DeleteLocalRef(jret_value); - } - return nullptr; - } - - return jret_value; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } - - /** - * Creates a vector of C++ pointers from - * a Java array of C++ pointer addresses. - * - * @param env (IN) A pointer to the java environment - * @param pointers (IN) A Java array of C++ pointer addresses - * @param has_exception (OUT) will be set to JNI_TRUE - * if an ArrayIndexOutOfBoundsException or OutOfMemoryError - * exception occurs. - * - * @return A vector of C++ pointers. - */ - template - static std::vector fromJPointers(JNIEnv* env, jlongArray jptrs, - jboolean* has_exception) { - const jsize jptrs_len = env->GetArrayLength(jptrs); - std::vector ptrs; - jlong* jptr = env->GetLongArrayElements(jptrs, nullptr); - if (jptr == nullptr) { - // exception thrown: OutOfMemoryError - *has_exception = JNI_TRUE; - return ptrs; - } - ptrs.reserve(jptrs_len); - for (jsize i = 0; i < jptrs_len; i++) { - ptrs.push_back(reinterpret_cast(jptr[i])); - } - env->ReleaseLongArrayElements(jptrs, jptr, JNI_ABORT); - return ptrs; - } - - /** - * Creates a Java array of C++ pointer addresses - * from a vector of C++ pointers. - * - * @param env (IN) A pointer to the java environment - * @param pointers (IN) A vector of C++ pointers - * @param has_exception (OUT) will be set to JNI_TRUE - * if an ArrayIndexOutOfBoundsException or OutOfMemoryError - * exception occurs - * - * @return Java array of C++ pointer addresses. - */ - template - static jlongArray toJPointers(JNIEnv* env, const std::vector& pointers, - jboolean* has_exception) { - const jsize len = static_cast(pointers.size()); - std::unique_ptr results(new jlong[len]); - std::transform( - pointers.begin(), pointers.end(), results.get(), - [](T* pointer) -> jlong { return GET_CPLUSPLUS_POINTER(pointer); }); - - jlongArray jpointers = env->NewLongArray(len); - if (jpointers == nullptr) { - // exception thrown: OutOfMemoryError - *has_exception = JNI_TRUE; - return nullptr; - } - - env->SetLongArrayRegion(jpointers, 0, len, results.get()); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - *has_exception = JNI_TRUE; - env->DeleteLocalRef(jpointers); - return nullptr; - } - - *has_exception = JNI_FALSE; - - return jpointers; - } - - /* - * Helper for operations on a key and value - * for example WriteBatch->Put - * - * TODO(AR) could be extended to cover returning ROCKSDB_NAMESPACE::Status - * from `op` and used for RocksDB->Put etc. - */ - static void kv_op_direct( - std::function - op, - JNIEnv* env, jobject jkey, jint jkey_off, jint jkey_len, jobject jval, - jint jval_off, jint jval_len) { - char* key = reinterpret_cast(env->GetDirectBufferAddress(jkey)); - if (key == nullptr || - env->GetDirectBufferCapacity(jkey) < (jkey_off + jkey_len)) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, - "Invalid key argument"); - return; - } - - char* value = reinterpret_cast(env->GetDirectBufferAddress(jval)); - if (value == nullptr || - env->GetDirectBufferCapacity(jval) < (jval_off + jval_len)) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, "Invalid value argument"); - return; - } - - key += jkey_off; - value += jval_off; - - ROCKSDB_NAMESPACE::Slice key_slice(key, jkey_len); - ROCKSDB_NAMESPACE::Slice value_slice(value, jval_len); - - op(key_slice, value_slice); - } - - /* - * Helper for operations on a key and value - * for example WriteBatch->Delete - * - * TODO(AR) could be extended to cover returning ROCKSDB_NAMESPACE::Status - * from `op` and used for RocksDB->Delete etc. - */ - static void k_op_direct(std::function op, - JNIEnv* env, jobject jkey, jint jkey_off, - jint jkey_len) { - char* key = reinterpret_cast(env->GetDirectBufferAddress(jkey)); - if (key == nullptr || - env->GetDirectBufferCapacity(jkey) < (jkey_off + jkey_len)) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, - "Invalid key argument"); - return; - } - - key += jkey_off; - - ROCKSDB_NAMESPACE::Slice key_slice(key, jkey_len); - - return op(key_slice); - } - - template - static jint copyToDirect(JNIEnv* env, T& source, jobject jtarget, - jint jtarget_off, jint jtarget_len) { - char* target = - reinterpret_cast(env->GetDirectBufferAddress(jtarget)); - if (target == nullptr || - env->GetDirectBufferCapacity(jtarget) < (jtarget_off + jtarget_len)) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, "Invalid target argument"); - return 0; - } - - target += jtarget_off; - - const jint cvalue_len = static_cast(source.size()); - const jint length = std::min(jtarget_len, cvalue_len); - - memcpy(target, source.data(), length); - - return cvalue_len; - } -}; - -class MapJni : public JavaClass { - public: - /** - * Get the Java Class java.util.Map - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "java/util/Map"); - } - - /** - * Get the Java Method: Map#put - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getMapPutMethodId(JNIEnv* env) { - jclass jlist_clazz = getJClass(env); - if (jlist_clazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID( - jlist_clazz, "put", - "(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"); - assert(mid != nullptr); - return mid; - } -}; - -class HashMapJni : public JavaClass { - public: - /** - * Get the Java Class java.util.HashMap - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "java/util/HashMap"); - } - - /** - * Create a new Java java.util.HashMap object. - * - * @param env A pointer to the Java environment - * - * @return A reference to a Java java.util.HashMap object, or - * nullptr if an an exception occurs - */ - static jobject construct(JNIEnv* env, const uint32_t initial_capacity = 16) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = env->GetMethodID(jclazz, "", "(I)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - jobject jhash_map = - env->NewObject(jclazz, mid, static_cast(initial_capacity)); - if (env->ExceptionCheck()) { - return nullptr; - } - - return jhash_map; - } - - /** - * A function which maps a std::pair to a std::pair - * - * @return Either a pointer to a std::pair, or nullptr - * if an error occurs during the mapping - */ - template - using FnMapKV = - std::function>(const std::pair&)>; - - // template ::value_type, std::pair>::value, - // int32_t>::type = 0> static void putAll(JNIEnv* env, const jobject - // jhash_map, I iterator, const FnMapKV &fn_map_kv) { - /** - * Returns true if it succeeds, false if an error occurs - */ - template - static bool putAll(JNIEnv* env, const jobject jhash_map, - iterator_type iterator, iterator_type end, - const FnMapKV& fn_map_kv) { - const jmethodID jmid_put = - ROCKSDB_NAMESPACE::MapJni::getMapPutMethodId(env); - if (jmid_put == nullptr) { - return false; - } - - for (auto it = iterator; it != end; ++it) { - const std::unique_ptr> result = - fn_map_kv(*it); - if (result == nullptr) { - // an error occurred during fn_map_kv - return false; - } - env->CallObjectMethod(jhash_map, jmid_put, result->first, result->second); - if (env->ExceptionCheck()) { - // exception occurred - env->DeleteLocalRef(result->second); - env->DeleteLocalRef(result->first); - return false; - } - - // release local references - env->DeleteLocalRef(result->second); - env->DeleteLocalRef(result->first); - } - - return true; - } - - /** - * Creates a java.util.Map from a std::map - * - * @param env A pointer to the Java environment - * @param map the Cpp map - * - * @return a reference to the Java java.util.Map object, or nullptr if an - * exception occcurred - */ - static jobject fromCppMap(JNIEnv* env, - const std::map* map) { - if (map == nullptr) { - return nullptr; - } - - jobject jhash_map = construct(env, static_cast(map->size())); - if (jhash_map == nullptr) { - // exception occurred - return nullptr; - } - - const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV< - const std::string, const std::string, jobject, jobject> - fn_map_kv = - [env](const std::pair& kv) { - jstring jkey = ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &(kv.first), false); - if (env->ExceptionCheck()) { - // an error occurred - return std::unique_ptr>(nullptr); - } - - jstring jvalue = ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &(kv.second), true); - if (env->ExceptionCheck()) { - // an error occurred - env->DeleteLocalRef(jkey); - return std::unique_ptr>(nullptr); - } - - return std::unique_ptr>( - new std::pair( - static_cast(jkey), - static_cast(jvalue))); - }; - - if (!putAll(env, jhash_map, map->begin(), map->end(), fn_map_kv)) { - // exception occurred - return nullptr; - } - - return jhash_map; - } - - /** - * Creates a java.util.Map from a std::map - * - * @param env A pointer to the Java environment - * @param map the Cpp map - * - * @return a reference to the Java java.util.Map object, or nullptr if an - * exception occcurred - */ - static jobject fromCppMap(JNIEnv* env, - const std::map* map) { - if (map == nullptr) { - return nullptr; - } - - if (map == nullptr) { - return nullptr; - } - - jobject jhash_map = construct(env, static_cast(map->size())); - if (jhash_map == nullptr) { - // exception occurred - return nullptr; - } - - const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV< - const std::string, const uint32_t, jobject, jobject> - fn_map_kv = - [env](const std::pair& kv) { - jstring jkey = ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &(kv.first), false); - if (env->ExceptionCheck()) { - // an error occurred - return std::unique_ptr>(nullptr); - } - - jobject jvalue = ROCKSDB_NAMESPACE::IntegerJni::valueOf( - env, static_cast(kv.second)); - if (env->ExceptionCheck()) { - // an error occurred - env->DeleteLocalRef(jkey); - return std::unique_ptr>(nullptr); - } - - return std::unique_ptr>( - new std::pair(static_cast(jkey), - jvalue)); - }; - - if (!putAll(env, jhash_map, map->begin(), map->end(), fn_map_kv)) { - // exception occurred - return nullptr; - } - - return jhash_map; - } - - /** - * Creates a java.util.Map from a std::map - * - * @param env A pointer to the Java environment - * @param map the Cpp map - * - * @return a reference to the Java java.util.Map object, or nullptr if an - * exception occcurred - */ - static jobject fromCppMap(JNIEnv* env, - const std::map* map) { - if (map == nullptr) { - return nullptr; - } - - jobject jhash_map = construct(env, static_cast(map->size())); - if (jhash_map == nullptr) { - // exception occurred - return nullptr; - } - - const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV< - const std::string, const uint64_t, jobject, jobject> - fn_map_kv = - [env](const std::pair& kv) { - jstring jkey = ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &(kv.first), false); - if (env->ExceptionCheck()) { - // an error occurred - return std::unique_ptr>(nullptr); - } - - jobject jvalue = ROCKSDB_NAMESPACE::LongJni::valueOf( - env, static_cast(kv.second)); - if (env->ExceptionCheck()) { - // an error occurred - env->DeleteLocalRef(jkey); - return std::unique_ptr>(nullptr); - } - - return std::unique_ptr>( - new std::pair(static_cast(jkey), - jvalue)); - }; - - if (!putAll(env, jhash_map, map->begin(), map->end(), fn_map_kv)) { - // exception occurred - return nullptr; - } - - return jhash_map; - } - - /** - * Creates a java.util.Map from a std::map - * - * @param env A pointer to the Java environment - * @param map the Cpp map - * - * @return a reference to the Java java.util.Map object, or nullptr if an - * exception occcurred - */ - static jobject fromCppMap(JNIEnv* env, - const std::map* map) { - if (map == nullptr) { - return nullptr; - } - - jobject jhash_map = construct(env, static_cast(map->size())); - if (jhash_map == nullptr) { - // exception occurred - return nullptr; - } - - const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV - fn_map_kv = [env](const std::pair& kv) { - jobject jkey = ROCKSDB_NAMESPACE::IntegerJni::valueOf( - env, static_cast(kv.first)); - if (env->ExceptionCheck()) { - // an error occurred - return std::unique_ptr>(nullptr); - } - - jobject jvalue = ROCKSDB_NAMESPACE::LongJni::valueOf( - env, static_cast(kv.second)); - if (env->ExceptionCheck()) { - // an error occurred - env->DeleteLocalRef(jkey); - return std::unique_ptr>(nullptr); - } - - return std::unique_ptr>( - new std::pair(static_cast(jkey), - jvalue)); - }; - - if (!putAll(env, jhash_map, map->begin(), map->end(), fn_map_kv)) { - // exception occurred - return nullptr; - } - - return jhash_map; - } -}; - -// The portal class for org.rocksdb.RocksDB -class RocksDBJni - : public RocksDBNativeClass { - public: - /** - * Get the Java Class org.rocksdb.RocksDB - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/RocksDB"); - } -}; - -// The portal class for org.rocksdb.Options -class OptionsJni - : public RocksDBNativeClass { - public: - /** - * Get the Java Class org.rocksdb.Options - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/Options"); - } -}; - -// The portal class for org.rocksdb.DBOptions -class DBOptionsJni - : public RocksDBNativeClass { - public: - /** - * Get the Java Class org.rocksdb.DBOptions - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/DBOptions"); - } -}; - -// The portal class for org.rocksdb.ColumnFamilyOptions -class ColumnFamilyOptionsJni - : public RocksDBNativeClass { - public: - /** - * Get the Java Class org.rocksdb.ColumnFamilyOptions - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, - "org/rocksdb/ColumnFamilyOptions"); - } - - /** - * Create a new Java org.rocksdb.ColumnFamilyOptions object with the same - * properties as the provided C++ ROCKSDB_NAMESPACE::ColumnFamilyOptions - * object - * - * @param env A pointer to the Java environment - * @param cfoptions A pointer to ROCKSDB_NAMESPACE::ColumnFamilyOptions object - * - * @return A reference to a Java org.rocksdb.ColumnFamilyOptions object, or - * nullptr if an an exception occurs - */ - static jobject construct(JNIEnv* env, const ColumnFamilyOptions* cfoptions) { - auto* cfo = new ROCKSDB_NAMESPACE::ColumnFamilyOptions(*cfoptions); - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = env->GetMethodID(jclazz, "", "(J)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - jobject jcfd = env->NewObject(jclazz, mid, GET_CPLUSPLUS_POINTER(cfo)); - if (env->ExceptionCheck()) { - return nullptr; - } - - return jcfd; - } -}; - -// The portal class for org.rocksdb.WriteOptions -class WriteOptionsJni - : public RocksDBNativeClass { - public: - /** - * Get the Java Class org.rocksdb.WriteOptions - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/WriteOptions"); - } -}; - -// The portal class for org.rocksdb.ReadOptions -class ReadOptionsJni - : public RocksDBNativeClass { - public: - /** - * Get the Java Class org.rocksdb.ReadOptions - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/ReadOptions"); - } -}; - -// The portal class for org.rocksdb.WriteBatch -class WriteBatchJni - : public RocksDBNativeClass { - public: - /** - * Get the Java Class org.rocksdb.WriteBatch - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/WriteBatch"); - } - - /** - * Create a new Java org.rocksdb.WriteBatch object - * - * @param env A pointer to the Java environment - * @param wb A pointer to ROCKSDB_NAMESPACE::WriteBatch object - * - * @return A reference to a Java org.rocksdb.WriteBatch object, or - * nullptr if an an exception occurs - */ - static jobject construct(JNIEnv* env, const WriteBatch* wb) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = env->GetMethodID(jclazz, "", "(J)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - jobject jwb = env->NewObject(jclazz, mid, GET_CPLUSPLUS_POINTER(wb)); - if (env->ExceptionCheck()) { - return nullptr; - } - - return jwb; - } -}; - -// The portal class for org.rocksdb.WriteBatch.Handler -class WriteBatchHandlerJni - : public RocksDBNativeClass< - const ROCKSDB_NAMESPACE::WriteBatchHandlerJniCallback*, - WriteBatchHandlerJni> { - public: - /** - * Get the Java Class org.rocksdb.WriteBatch.Handler - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/WriteBatch$Handler"); - } - - /** - * Get the Java Method: WriteBatch.Handler#put - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getPutCfMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "put", "(I[B[B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#put - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getPutMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "put", "([B[B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#merge - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getMergeCfMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "merge", "(I[B[B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#merge - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getMergeMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "merge", "([B[B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#delete - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getDeleteCfMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "delete", "(I[B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#delete - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getDeleteMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "delete", "([B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#singleDelete - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getSingleDeleteCfMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "singleDelete", "(I[B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#singleDelete - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getSingleDeleteMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "singleDelete", "([B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#deleteRange - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getDeleteRangeCfMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "deleteRange", "(I[B[B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#deleteRange - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getDeleteRangeMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "deleteRange", "([B[B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#logData - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getLogDataMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "logData", "([B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#putBlobIndex - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getPutBlobIndexCfMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "putBlobIndex", "(I[B[B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#markBeginPrepare - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getMarkBeginPrepareMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "markBeginPrepare", "()V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#markEndPrepare - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getMarkEndPrepareMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "markEndPrepare", "([B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#markNoop - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getMarkNoopMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "markNoop", "(Z)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#markRollback - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getMarkRollbackMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "markRollback", "([B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#markCommit - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getMarkCommitMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "markCommit", "([B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#markCommitWithTimestamp - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getMarkCommitWithTimestampMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetMethodID(jclazz, "markCommitWithTimestamp", "([B[B)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: WriteBatch.Handler#shouldContinue - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getContinueMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "shouldContinue", "()Z"); - assert(mid != nullptr); - return mid; - } -}; - -class WriteBatchSavePointJni : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.WriteBatch.SavePoint - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/WriteBatch$SavePoint"); - } - - /** - * Get the Java Method: HistogramData constructor - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getConstructorMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "", "(JJJ)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Create a new Java org.rocksdb.WriteBatch.SavePoint object - * - * @param env A pointer to the Java environment - * @param savePoint A pointer to ROCKSDB_NAMESPACE::WriteBatch::SavePoint - * object - * - * @return A reference to a Java org.rocksdb.WriteBatch.SavePoint object, or - * nullptr if an an exception occurs - */ - static jobject construct(JNIEnv* env, const SavePoint& save_point) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = getConstructorMethodId(env); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - jobject jsave_point = - env->NewObject(jclazz, mid, static_cast(save_point.size), - static_cast(save_point.count), - static_cast(save_point.content_flags)); - if (env->ExceptionCheck()) { - return nullptr; - } - - return jsave_point; - } -}; - -// The portal class for org.rocksdb.WriteBatchWithIndex -class WriteBatchWithIndexJni - : public RocksDBNativeClass { - public: - /** - * Get the Java Class org.rocksdb.WriteBatchWithIndex - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, - "org/rocksdb/WriteBatchWithIndex"); - } -}; - -// The portal class for org.rocksdb.HistogramData -class HistogramDataJni : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.HistogramData - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/HistogramData"); - } - - /** - * Get the Java Method: HistogramData constructor - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getConstructorMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "", "(DDDDDDJJD)V"); - assert(mid != nullptr); - return mid; - } -}; - -// The portal class for org.rocksdb.BackupEngineOptions -class BackupEngineOptionsJni - : public RocksDBNativeClass { - public: - /** - * Get the Java Class org.rocksdb.BackupEngineOptions - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, - "org/rocksdb/BackupEngineOptions"); - } -}; - -// The portal class for org.rocksdb.BackupEngine -class BackupEngineJni - : public RocksDBNativeClass { - public: - /** - * Get the Java Class org.rocksdb.BackupEngine - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/BackupEngine"); - } -}; - -// The portal class for org.rocksdb.RocksIterator -class IteratorJni - : public RocksDBNativeClass { - public: - /** - * Get the Java Class org.rocksdb.RocksIterator - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/RocksIterator"); - } -}; - -// The portal class for org.rocksdb.Filter -class FilterJni - : public RocksDBNativeClass< - std::shared_ptr*, FilterJni> { - public: - /** - * Get the Java Class org.rocksdb.Filter - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/Filter"); - } -}; - -// The portal class for org.rocksdb.ColumnFamilyHandle -class ColumnFamilyHandleJni - : public RocksDBNativeClass { - public: - static jobject fromCppColumnFamilyHandle( - JNIEnv* env, const ROCKSDB_NAMESPACE::ColumnFamilyHandle* info) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID ctor = getConstructorMethodId(env, jclazz); - assert(ctor != nullptr); - return env->NewObject(jclazz, ctor, GET_CPLUSPLUS_POINTER(info)); - } - - static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { - return env->GetMethodID(clazz, "", "(J)V"); - } - - /** - * Get the Java Class org.rocksdb.ColumnFamilyHandle - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/ColumnFamilyHandle"); - } -}; - -// The portal class for org.rocksdb.FlushOptions -class FlushOptionsJni - : public RocksDBNativeClass { - public: - /** - * Get the Java Class org.rocksdb.FlushOptions - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/FlushOptions"); - } -}; - -// The portal class for org.rocksdb.ComparatorOptions -class ComparatorOptionsJni - : public RocksDBNativeClass< - ROCKSDB_NAMESPACE::ComparatorJniCallbackOptions*, - ComparatorOptionsJni> { - public: - /** - * Get the Java Class org.rocksdb.ComparatorOptions - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/ComparatorOptions"); - } -}; - -// The portal class for org.rocksdb.AbstractCompactionFilterFactory -class AbstractCompactionFilterFactoryJni - : public RocksDBNativeClass< - const ROCKSDB_NAMESPACE::CompactionFilterFactoryJniCallback*, - AbstractCompactionFilterFactoryJni> { - public: - /** - * Get the Java Class org.rocksdb.AbstractCompactionFilterFactory - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass( - env, "org/rocksdb/AbstractCompactionFilterFactory"); - } - - /** - * Get the Java Method: AbstractCompactionFilterFactory#name - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getNameMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetMethodID(jclazz, "name", "()Ljava/lang/String;"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractCompactionFilterFactory#createCompactionFilter - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getCreateCompactionFilterMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetMethodID(jclazz, "createCompactionFilter", "(ZZ)J"); - assert(mid != nullptr); - return mid; - } -}; - -// The portal class for org.rocksdb.AbstractTransactionNotifier -class AbstractTransactionNotifierJni - : public RocksDBNativeClass< - const ROCKSDB_NAMESPACE::TransactionNotifierJniCallback*, - AbstractTransactionNotifierJni> { - public: - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass( - env, "org/rocksdb/AbstractTransactionNotifier"); - } - - // Get the java method `snapshotCreated` - // of org.rocksdb.AbstractTransactionNotifier. - static jmethodID getSnapshotCreatedMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "snapshotCreated", "(J)V"); - assert(mid != nullptr); - return mid; - } -}; - -// The portal class for org.rocksdb.AbstractComparatorJniBridge -class AbstractComparatorJniBridge : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.AbstractComparatorJniBridge - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/AbstractComparatorJniBridge"); - } - - /** - * Get the Java Method: Comparator#compareInternal - * - * @param env A pointer to the Java environment - * @param jclazz the AbstractComparatorJniBridge class - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getCompareInternalMethodId(JNIEnv* env, jclass jclazz) { - static jmethodID mid = - env->GetStaticMethodID(jclazz, "compareInternal", - "(Lorg/rocksdb/AbstractComparator;Ljava/nio/" - "ByteBuffer;ILjava/nio/ByteBuffer;I)I"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: Comparator#findShortestSeparatorInternal - * - * @param env A pointer to the Java environment - * @param jclazz the AbstractComparatorJniBridge class - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getFindShortestSeparatorInternalMethodId(JNIEnv* env, - jclass jclazz) { - static jmethodID mid = - env->GetStaticMethodID(jclazz, "findShortestSeparatorInternal", - "(Lorg/rocksdb/AbstractComparator;Ljava/nio/" - "ByteBuffer;ILjava/nio/ByteBuffer;I)I"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: Comparator#findShortSuccessorInternal - * - * @param env A pointer to the Java environment - * @param jclazz the AbstractComparatorJniBridge class - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getFindShortSuccessorInternalMethodId(JNIEnv* env, - jclass jclazz) { - static jmethodID mid = env->GetStaticMethodID( - jclazz, "findShortSuccessorInternal", - "(Lorg/rocksdb/AbstractComparator;Ljava/nio/ByteBuffer;I)I"); - assert(mid != nullptr); - return mid; - } -}; - -// The portal class for org.rocksdb.AbstractComparator -class AbstractComparatorJni - : public RocksDBNativeClass { - public: - /** - * Get the Java Class org.rocksdb.AbstractComparator - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/AbstractComparator"); - } - - /** - * Get the Java Method: Comparator#name - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getNameMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetMethodID(jclazz, "name", "()Ljava/lang/String;"); - assert(mid != nullptr); - return mid; - } -}; - -// The portal class for org.rocksdb.AbstractSlice -class AbstractSliceJni - : public NativeRocksMutableObject { - public: - /** - * Get the Java Class org.rocksdb.AbstractSlice - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/AbstractSlice"); - } -}; - -// The portal class for org.rocksdb.Slice -class SliceJni - : public NativeRocksMutableObject { - public: - /** - * Get the Java Class org.rocksdb.Slice - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/Slice"); - } - - /** - * Constructs a Slice object - * - * @param env A pointer to the Java environment - * - * @return A reference to a Java Slice object, or a nullptr if an - * exception occurs - */ - static jobject construct0(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "", "()V"); - if (mid == nullptr) { - // exception occurred accessing method - return nullptr; - } - - jobject jslice = env->NewObject(jclazz, mid); - if (env->ExceptionCheck()) { - return nullptr; - } - - return jslice; - } -}; - -// The portal class for org.rocksdb.DirectSlice -class DirectSliceJni - : public NativeRocksMutableObject { - public: - /** - * Get the Java Class org.rocksdb.DirectSlice - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/DirectSlice"); - } - - /** - * Constructs a DirectSlice object - * - * @param env A pointer to the Java environment - * - * @return A reference to a Java DirectSlice object, or a nullptr if an - * exception occurs - */ - static jobject construct0(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "", "()V"); - if (mid == nullptr) { - // exception occurred accessing method - return nullptr; - } - - jobject jdirect_slice = env->NewObject(jclazz, mid); - if (env->ExceptionCheck()) { - return nullptr; - } - - return jdirect_slice; - } -}; - -// The portal class for org.rocksdb.BackupInfo -class BackupInfoJni : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.BackupInfo - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/BackupInfo"); - } - - /** - * Constructs a BackupInfo object - * - * @param env A pointer to the Java environment - * @param backup_id id of the backup - * @param timestamp timestamp of the backup - * @param size size of the backup - * @param number_files number of files related to the backup - * @param app_metadata application specific metadata - * - * @return A reference to a Java BackupInfo object, or a nullptr if an - * exception occurs - */ - static jobject construct0(JNIEnv* env, uint32_t backup_id, int64_t timestamp, - uint64_t size, uint32_t number_files, - const std::string& app_metadata) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetMethodID(jclazz, "", "(IJJILjava/lang/String;)V"); - if (mid == nullptr) { - // exception occurred accessing method - return nullptr; - } - - jstring japp_metadata = nullptr; - if (app_metadata != nullptr) { - japp_metadata = env->NewStringUTF(app_metadata.c_str()); - if (japp_metadata == nullptr) { - // exception occurred creating java string - return nullptr; - } - } - - jobject jbackup_info = env->NewObject(jclazz, mid, backup_id, timestamp, - size, number_files, japp_metadata); - if (env->ExceptionCheck()) { - env->DeleteLocalRef(japp_metadata); - return nullptr; - } - - return jbackup_info; - } -}; - -class BackupInfoListJni { - public: - /** - * Converts a C++ std::vector object to - * a Java ArrayList object - * - * @param env A pointer to the Java environment - * @param backup_infos A vector of BackupInfo - * - * @return Either a reference to a Java ArrayList object, or a nullptr - * if an exception occurs - */ - static jobject getBackupInfo(JNIEnv* env, - std::vector backup_infos) { - jclass jarray_list_clazz = - ROCKSDB_NAMESPACE::ListJni::getArrayListClass(env); - if (jarray_list_clazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID cstr_mid = - ROCKSDB_NAMESPACE::ListJni::getArrayListConstructorMethodId(env); - if (cstr_mid == nullptr) { - // exception occurred accessing method - return nullptr; - } - - jmethodID add_mid = ROCKSDB_NAMESPACE::ListJni::getListAddMethodId(env); - if (add_mid == nullptr) { - // exception occurred accessing method - return nullptr; - } - - // create java list - jobject jbackup_info_handle_list = - env->NewObject(jarray_list_clazz, cstr_mid, backup_infos.size()); - if (env->ExceptionCheck()) { - // exception occurred constructing object - return nullptr; - } - - // insert in java list - auto end = backup_infos.end(); - for (auto it = backup_infos.begin(); it != end; ++it) { - auto backup_info = *it; - - jobject obj = ROCKSDB_NAMESPACE::BackupInfoJni::construct0( - env, backup_info.backup_id, backup_info.timestamp, backup_info.size, - backup_info.number_files, backup_info.app_metadata); - if (env->ExceptionCheck()) { - // exception occurred constructing object - if (obj != nullptr) { - env->DeleteLocalRef(obj); - } - if (jbackup_info_handle_list != nullptr) { - env->DeleteLocalRef(jbackup_info_handle_list); - } - return nullptr; - } - - jboolean rs = - env->CallBooleanMethod(jbackup_info_handle_list, add_mid, obj); - if (env->ExceptionCheck() || rs == JNI_FALSE) { - // exception occurred calling method, or could not add - if (obj != nullptr) { - env->DeleteLocalRef(obj); - } - if (jbackup_info_handle_list != nullptr) { - env->DeleteLocalRef(jbackup_info_handle_list); - } - return nullptr; - } - } - - return jbackup_info_handle_list; - } -}; - -// The portal class for org.rocksdb.WBWIRocksIterator -class WBWIRocksIteratorJni : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.WBWIRocksIterator - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/WBWIRocksIterator"); - } - - /** - * Get the Java Field: WBWIRocksIterator#entry - * - * @param env A pointer to the Java environment - * - * @return The Java Field ID or nullptr if the class or field id could not - * be retrieved - */ - static jfieldID getWriteEntryField(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jfieldID fid = env->GetFieldID( - jclazz, "entry", "Lorg/rocksdb/WBWIRocksIterator$WriteEntry;"); - assert(fid != nullptr); - return fid; - } - - /** - * Gets the value of the WBWIRocksIterator#entry - * - * @param env A pointer to the Java environment - * @param jwbwi_rocks_iterator A reference to a WBWIIterator - * - * @return A reference to a Java WBWIRocksIterator.WriteEntry object, or - * a nullptr if an exception occurs - */ - static jobject getWriteEntry(JNIEnv* env, jobject jwbwi_rocks_iterator) { - assert(jwbwi_rocks_iterator != nullptr); - - jfieldID jwrite_entry_field = getWriteEntryField(env); - if (jwrite_entry_field == nullptr) { - // exception occurred accessing the field - return nullptr; - } - - jobject jwe = env->GetObjectField(jwbwi_rocks_iterator, jwrite_entry_field); - assert(jwe != nullptr); - return jwe; - } -}; - -// The portal class for org.rocksdb.WBWIRocksIterator.WriteType -class WriteTypeJni : public JavaClass { - public: - /** - * Get the PUT enum field value of WBWIRocksIterator.WriteType - * - * @param env A pointer to the Java environment - * - * @return A reference to the enum field value or a nullptr if - * the enum field value could not be retrieved - */ - static jobject PUT(JNIEnv* env) { return getEnum(env, "PUT"); } - - /** - * Get the MERGE enum field value of WBWIRocksIterator.WriteType - * - * @param env A pointer to the Java environment - * - * @return A reference to the enum field value or a nullptr if - * the enum field value could not be retrieved - */ - static jobject MERGE(JNIEnv* env) { return getEnum(env, "MERGE"); } - - /** - * Get the DELETE enum field value of WBWIRocksIterator.WriteType - * - * @param env A pointer to the Java environment - * - * @return A reference to the enum field value or a nullptr if - * the enum field value could not be retrieved - */ - static jobject DELETE(JNIEnv* env) { return getEnum(env, "DELETE"); } - - /** - * Get the LOG enum field value of WBWIRocksIterator.WriteType - * - * @param env A pointer to the Java environment - * - * @return A reference to the enum field value or a nullptr if - * the enum field value could not be retrieved - */ - static jobject LOG(JNIEnv* env) { return getEnum(env, "LOG"); } - - // Returns the equivalent org.rocksdb.WBWIRocksIterator.WriteType for the - // provided C++ ROCKSDB_NAMESPACE::WriteType enum - static jbyte toJavaWriteType(const ROCKSDB_NAMESPACE::WriteType& writeType) { - switch (writeType) { - case ROCKSDB_NAMESPACE::WriteType::kPutRecord: - return 0x0; - case ROCKSDB_NAMESPACE::WriteType::kMergeRecord: - return 0x1; - case ROCKSDB_NAMESPACE::WriteType::kDeleteRecord: - return 0x2; - case ROCKSDB_NAMESPACE::WriteType::kSingleDeleteRecord: - return 0x3; - case ROCKSDB_NAMESPACE::WriteType::kDeleteRangeRecord: - return 0x4; - case ROCKSDB_NAMESPACE::WriteType::kLogDataRecord: - return 0x5; - case ROCKSDB_NAMESPACE::WriteType::kXIDRecord: - return 0x6; - default: - return 0x7F; // undefined - } - } - - private: - /** - * Get the Java Class org.rocksdb.WBWIRocksIterator.WriteType - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/WBWIRocksIterator$WriteType"); - } - - /** - * Get an enum field of org.rocksdb.WBWIRocksIterator.WriteType - * - * @param env A pointer to the Java environment - * @param name The name of the enum field - * - * @return A reference to the enum field value or a nullptr if - * the enum field value could not be retrieved - */ - static jobject getEnum(JNIEnv* env, const char name[]) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jfieldID jfid = env->GetStaticFieldID( - jclazz, name, "Lorg/rocksdb/WBWIRocksIterator$WriteType;"); - if (env->ExceptionCheck()) { - // exception occurred while getting field - return nullptr; - } else if (jfid == nullptr) { - return nullptr; - } - - jobject jwrite_type = env->GetStaticObjectField(jclazz, jfid); - assert(jwrite_type != nullptr); - return jwrite_type; - } -}; - -// The portal class for org.rocksdb.WBWIRocksIterator.WriteEntry -class WriteEntryJni : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.WBWIRocksIterator.WriteEntry - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, - "org/rocksdb/WBWIRocksIterator$WriteEntry"); - } -}; - -// The portal class for org.rocksdb.InfoLogLevel -class InfoLogLevelJni : public JavaClass { - public: - /** - * Get the DEBUG_LEVEL enum field value of InfoLogLevel - * - * @param env A pointer to the Java environment - * - * @return A reference to the enum field value or a nullptr if - * the enum field value could not be retrieved - */ - static jobject DEBUG_LEVEL(JNIEnv* env) { - return getEnum(env, "DEBUG_LEVEL"); - } - - /** - * Get the INFO_LEVEL enum field value of InfoLogLevel - * - * @param env A pointer to the Java environment - * - * @return A reference to the enum field value or a nullptr if - * the enum field value could not be retrieved - */ - static jobject INFO_LEVEL(JNIEnv* env) { return getEnum(env, "INFO_LEVEL"); } - - /** - * Get the WARN_LEVEL enum field value of InfoLogLevel - * - * @param env A pointer to the Java environment - * - * @return A reference to the enum field value or a nullptr if - * the enum field value could not be retrieved - */ - static jobject WARN_LEVEL(JNIEnv* env) { return getEnum(env, "WARN_LEVEL"); } - - /** - * Get the ERROR_LEVEL enum field value of InfoLogLevel - * - * @param env A pointer to the Java environment - * - * @return A reference to the enum field value or a nullptr if - * the enum field value could not be retrieved - */ - static jobject ERROR_LEVEL(JNIEnv* env) { - return getEnum(env, "ERROR_LEVEL"); - } - - /** - * Get the FATAL_LEVEL enum field value of InfoLogLevel - * - * @param env A pointer to the Java environment - * - * @return A reference to the enum field value or a nullptr if - * the enum field value could not be retrieved - */ - static jobject FATAL_LEVEL(JNIEnv* env) { - return getEnum(env, "FATAL_LEVEL"); - } - - /** - * Get the HEADER_LEVEL enum field value of InfoLogLevel - * - * @param env A pointer to the Java environment - * - * @return A reference to the enum field value or a nullptr if - * the enum field value could not be retrieved - */ - static jobject HEADER_LEVEL(JNIEnv* env) { - return getEnum(env, "HEADER_LEVEL"); - } - - private: - /** - * Get the Java Class org.rocksdb.InfoLogLevel - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/InfoLogLevel"); - } - - /** - * Get an enum field of org.rocksdb.InfoLogLevel - * - * @param env A pointer to the Java environment - * @param name The name of the enum field - * - * @return A reference to the enum field value or a nullptr if - * the enum field value could not be retrieved - */ - static jobject getEnum(JNIEnv* env, const char name[]) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jfieldID jfid = - env->GetStaticFieldID(jclazz, name, "Lorg/rocksdb/InfoLogLevel;"); - if (env->ExceptionCheck()) { - // exception occurred while getting field - return nullptr; - } else if (jfid == nullptr) { - return nullptr; - } - - jobject jinfo_log_level = env->GetStaticObjectField(jclazz, jfid); - assert(jinfo_log_level != nullptr); - return jinfo_log_level; - } -}; - -// The portal class for org.rocksdb.Logger -class LoggerJni - : public RocksDBNativeClass< - std::shared_ptr*, LoggerJni> { - public: - /** - * Get the Java Class org/rocksdb/Logger - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/Logger"); - } - - /** - * Get the Java Method: Logger#log - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getLogMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID( - jclazz, "log", "(Lorg/rocksdb/InfoLogLevel;Ljava/lang/String;)V"); - assert(mid != nullptr); - return mid; - } -}; - -// The portal class for org.rocksdb.TransactionLogIterator.BatchResult -class BatchResultJni : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.TransactionLogIterator.BatchResult - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass( - env, "org/rocksdb/TransactionLogIterator$BatchResult"); - } - - /** - * Create a new Java org.rocksdb.TransactionLogIterator.BatchResult object - * with the same properties as the provided C++ ROCKSDB_NAMESPACE::BatchResult - * object - * - * @param env A pointer to the Java environment - * @param batch_result The ROCKSDB_NAMESPACE::BatchResult object - * - * @return A reference to a Java - * org.rocksdb.TransactionLogIterator.BatchResult object, - * or nullptr if an an exception occurs - */ - static jobject construct(JNIEnv* env, - ROCKSDB_NAMESPACE::BatchResult& batch_result) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = env->GetMethodID(jclazz, "", "(JJ)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - jobject jbatch_result = env->NewObject(jclazz, mid, batch_result.sequence, - batch_result.writeBatchPtr.get()); - if (jbatch_result == nullptr) { - // exception thrown: InstantiationException or OutOfMemoryError - return nullptr; - } - - batch_result.writeBatchPtr.release(); - return jbatch_result; - } -}; - -// The portal class for org.rocksdb.BottommostLevelCompaction -class BottommostLevelCompactionJni { - public: - // Returns the equivalent org.rocksdb.BottommostLevelCompaction for the - // provided C++ ROCKSDB_NAMESPACE::BottommostLevelCompaction enum - static jint toJavaBottommostLevelCompaction( - const ROCKSDB_NAMESPACE::BottommostLevelCompaction& - bottommost_level_compaction) { - switch (bottommost_level_compaction) { - case ROCKSDB_NAMESPACE::BottommostLevelCompaction::kSkip: - return 0x0; - case ROCKSDB_NAMESPACE::BottommostLevelCompaction:: - kIfHaveCompactionFilter: - return 0x1; - case ROCKSDB_NAMESPACE::BottommostLevelCompaction::kForce: - return 0x2; - case ROCKSDB_NAMESPACE::BottommostLevelCompaction::kForceOptimized: - return 0x3; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::BottommostLevelCompaction - // enum for the provided Java org.rocksdb.BottommostLevelCompaction - static ROCKSDB_NAMESPACE::BottommostLevelCompaction - toCppBottommostLevelCompaction(jint bottommost_level_compaction) { - switch (bottommost_level_compaction) { - case 0x0: - return ROCKSDB_NAMESPACE::BottommostLevelCompaction::kSkip; - case 0x1: - return ROCKSDB_NAMESPACE::BottommostLevelCompaction:: - kIfHaveCompactionFilter; - case 0x2: - return ROCKSDB_NAMESPACE::BottommostLevelCompaction::kForce; - case 0x3: - return ROCKSDB_NAMESPACE::BottommostLevelCompaction::kForceOptimized; - default: - // undefined/default - return ROCKSDB_NAMESPACE::BottommostLevelCompaction:: - kIfHaveCompactionFilter; - } - } -}; - -// The portal class for org.rocksdb.CompactionStopStyle -class CompactionStopStyleJni { - public: - // Returns the equivalent org.rocksdb.CompactionStopStyle for the provided - // C++ ROCKSDB_NAMESPACE::CompactionStopStyle enum - static jbyte toJavaCompactionStopStyle( - const ROCKSDB_NAMESPACE::CompactionStopStyle& compaction_stop_style) { - switch (compaction_stop_style) { - case ROCKSDB_NAMESPACE::CompactionStopStyle:: - kCompactionStopStyleSimilarSize: - return 0x0; - case ROCKSDB_NAMESPACE::CompactionStopStyle:: - kCompactionStopStyleTotalSize: - return 0x1; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::CompactionStopStyle enum for - // the provided Java org.rocksdb.CompactionStopStyle - static ROCKSDB_NAMESPACE::CompactionStopStyle toCppCompactionStopStyle( - jbyte jcompaction_stop_style) { - switch (jcompaction_stop_style) { - case 0x0: - return ROCKSDB_NAMESPACE::CompactionStopStyle:: - kCompactionStopStyleSimilarSize; - case 0x1: - return ROCKSDB_NAMESPACE::CompactionStopStyle:: - kCompactionStopStyleTotalSize; - default: - // undefined/default - return ROCKSDB_NAMESPACE::CompactionStopStyle:: - kCompactionStopStyleSimilarSize; - } - } -}; - -// The portal class for org.rocksdb.CompressionType -class CompressionTypeJni { - public: - // Returns the equivalent org.rocksdb.CompressionType for the provided - // C++ ROCKSDB_NAMESPACE::CompressionType enum - static jbyte toJavaCompressionType( - const ROCKSDB_NAMESPACE::CompressionType& compression_type) { - switch (compression_type) { - case ROCKSDB_NAMESPACE::CompressionType::kNoCompression: - return 0x0; - case ROCKSDB_NAMESPACE::CompressionType::kSnappyCompression: - return 0x1; - case ROCKSDB_NAMESPACE::CompressionType::kZlibCompression: - return 0x2; - case ROCKSDB_NAMESPACE::CompressionType::kBZip2Compression: - return 0x3; - case ROCKSDB_NAMESPACE::CompressionType::kLZ4Compression: - return 0x4; - case ROCKSDB_NAMESPACE::CompressionType::kLZ4HCCompression: - return 0x5; - case ROCKSDB_NAMESPACE::CompressionType::kXpressCompression: - return 0x6; - case ROCKSDB_NAMESPACE::CompressionType::kZSTD: - return 0x7; - case ROCKSDB_NAMESPACE::CompressionType::kDisableCompressionOption: - default: - return 0x7F; - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::CompressionType enum for the - // provided Java org.rocksdb.CompressionType - static ROCKSDB_NAMESPACE::CompressionType toCppCompressionType( - jbyte jcompression_type) { - switch (jcompression_type) { - case 0x0: - return ROCKSDB_NAMESPACE::CompressionType::kNoCompression; - case 0x1: - return ROCKSDB_NAMESPACE::CompressionType::kSnappyCompression; - case 0x2: - return ROCKSDB_NAMESPACE::CompressionType::kZlibCompression; - case 0x3: - return ROCKSDB_NAMESPACE::CompressionType::kBZip2Compression; - case 0x4: - return ROCKSDB_NAMESPACE::CompressionType::kLZ4Compression; - case 0x5: - return ROCKSDB_NAMESPACE::CompressionType::kLZ4HCCompression; - case 0x6: - return ROCKSDB_NAMESPACE::CompressionType::kXpressCompression; - case 0x7: - return ROCKSDB_NAMESPACE::CompressionType::kZSTD; - case 0x7F: - default: - return ROCKSDB_NAMESPACE::CompressionType::kDisableCompressionOption; - } - } -}; - -// The portal class for org.rocksdb.CompactionPriority -class CompactionPriorityJni { - public: - // Returns the equivalent org.rocksdb.CompactionPriority for the provided - // C++ ROCKSDB_NAMESPACE::CompactionPri enum - static jbyte toJavaCompactionPriority( - const ROCKSDB_NAMESPACE::CompactionPri& compaction_priority) { - switch (compaction_priority) { - case ROCKSDB_NAMESPACE::CompactionPri::kByCompensatedSize: - return 0x0; - case ROCKSDB_NAMESPACE::CompactionPri::kOldestLargestSeqFirst: - return 0x1; - case ROCKSDB_NAMESPACE::CompactionPri::kOldestSmallestSeqFirst: - return 0x2; - case ROCKSDB_NAMESPACE::CompactionPri::kMinOverlappingRatio: - return 0x3; - case ROCKSDB_NAMESPACE::CompactionPri::kRoundRobin: - return 0x4; - default: - return 0x0; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::CompactionPri enum for the - // provided Java org.rocksdb.CompactionPriority - static ROCKSDB_NAMESPACE::CompactionPri toCppCompactionPriority( - jbyte jcompaction_priority) { - switch (jcompaction_priority) { - case 0x0: - return ROCKSDB_NAMESPACE::CompactionPri::kByCompensatedSize; - case 0x1: - return ROCKSDB_NAMESPACE::CompactionPri::kOldestLargestSeqFirst; - case 0x2: - return ROCKSDB_NAMESPACE::CompactionPri::kOldestSmallestSeqFirst; - case 0x3: - return ROCKSDB_NAMESPACE::CompactionPri::kMinOverlappingRatio; - case 0x4: - return ROCKSDB_NAMESPACE::CompactionPri::kRoundRobin; - default: - // undefined/default - return ROCKSDB_NAMESPACE::CompactionPri::kByCompensatedSize; - } - } -}; - -// The portal class for org.rocksdb.AccessHint -class AccessHintJni { - public: - // Returns the equivalent org.rocksdb.AccessHint for the provided - // C++ ROCKSDB_NAMESPACE::DBOptions::AccessHint enum - static jbyte toJavaAccessHint( - const ROCKSDB_NAMESPACE::DBOptions::AccessHint& access_hint) { - switch (access_hint) { - case ROCKSDB_NAMESPACE::DBOptions::AccessHint::NONE: - return 0x0; - case ROCKSDB_NAMESPACE::DBOptions::AccessHint::NORMAL: - return 0x1; - case ROCKSDB_NAMESPACE::DBOptions::AccessHint::SEQUENTIAL: - return 0x2; - case ROCKSDB_NAMESPACE::DBOptions::AccessHint::WILLNEED: - return 0x3; - default: - // undefined/default - return 0x1; - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::DBOptions::AccessHint enum - // for the provided Java org.rocksdb.AccessHint - static ROCKSDB_NAMESPACE::DBOptions::AccessHint toCppAccessHint( - jbyte jaccess_hint) { - switch (jaccess_hint) { - case 0x0: - return ROCKSDB_NAMESPACE::DBOptions::AccessHint::NONE; - case 0x1: - return ROCKSDB_NAMESPACE::DBOptions::AccessHint::NORMAL; - case 0x2: - return ROCKSDB_NAMESPACE::DBOptions::AccessHint::SEQUENTIAL; - case 0x3: - return ROCKSDB_NAMESPACE::DBOptions::AccessHint::WILLNEED; - default: - // undefined/default - return ROCKSDB_NAMESPACE::DBOptions::AccessHint::NORMAL; - } - } -}; - -// The portal class for org.rocksdb.WALRecoveryMode -class WALRecoveryModeJni { - public: - // Returns the equivalent org.rocksdb.WALRecoveryMode for the provided - // C++ ROCKSDB_NAMESPACE::WALRecoveryMode enum - static jbyte toJavaWALRecoveryMode( - const ROCKSDB_NAMESPACE::WALRecoveryMode& wal_recovery_mode) { - switch (wal_recovery_mode) { - case ROCKSDB_NAMESPACE::WALRecoveryMode::kTolerateCorruptedTailRecords: - return 0x0; - case ROCKSDB_NAMESPACE::WALRecoveryMode::kAbsoluteConsistency: - return 0x1; - case ROCKSDB_NAMESPACE::WALRecoveryMode::kPointInTimeRecovery: - return 0x2; - case ROCKSDB_NAMESPACE::WALRecoveryMode::kSkipAnyCorruptedRecords: - return 0x3; - default: - // undefined/default - return 0x2; - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::WALRecoveryMode enum for the - // provided Java org.rocksdb.WALRecoveryMode - static ROCKSDB_NAMESPACE::WALRecoveryMode toCppWALRecoveryMode( - jbyte jwal_recovery_mode) { - switch (jwal_recovery_mode) { - case 0x0: - return ROCKSDB_NAMESPACE::WALRecoveryMode:: - kTolerateCorruptedTailRecords; - case 0x1: - return ROCKSDB_NAMESPACE::WALRecoveryMode::kAbsoluteConsistency; - case 0x2: - return ROCKSDB_NAMESPACE::WALRecoveryMode::kPointInTimeRecovery; - case 0x3: - return ROCKSDB_NAMESPACE::WALRecoveryMode::kSkipAnyCorruptedRecords; - default: - // undefined/default - return ROCKSDB_NAMESPACE::WALRecoveryMode::kPointInTimeRecovery; - } - } -}; - -// The portal class for org.rocksdb.TickerType -class TickerTypeJni { - public: - // Returns the equivalent org.rocksdb.TickerType for the provided - // C++ ROCKSDB_NAMESPACE::Tickers enum - static jbyte toJavaTickerType(const ROCKSDB_NAMESPACE::Tickers& tickers) { - switch (tickers) { - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_MISS: - return 0x0; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_HIT: - return 0x1; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_ADD: - return 0x2; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_ADD_FAILURES: - return 0x3; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_MISS: - return 0x4; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_HIT: - return 0x5; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_ADD: - return 0x6; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_BYTES_INSERT: - return 0x7; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_MISS: - return 0x9; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_HIT: - return 0xA; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_ADD: - return 0xB; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_BYTES_INSERT: - return 0xC; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_MISS: - return 0xE; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_HIT: - return 0xF; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_ADD: - return 0x10; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_BYTES_INSERT: - return 0x11; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_BYTES_READ: - return 0x12; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_BYTES_WRITE: - return 0x13; - case ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_USEFUL: - return 0x14; - case ROCKSDB_NAMESPACE::Tickers::PERSISTENT_CACHE_HIT: - return 0x15; - case ROCKSDB_NAMESPACE::Tickers::PERSISTENT_CACHE_MISS: - return 0x16; - case ROCKSDB_NAMESPACE::Tickers::SIM_BLOCK_CACHE_HIT: - return 0x17; - case ROCKSDB_NAMESPACE::Tickers::SIM_BLOCK_CACHE_MISS: - return 0x18; - case ROCKSDB_NAMESPACE::Tickers::MEMTABLE_HIT: - return 0x19; - case ROCKSDB_NAMESPACE::Tickers::MEMTABLE_MISS: - return 0x1A; - case ROCKSDB_NAMESPACE::Tickers::GET_HIT_L0: - return 0x1B; - case ROCKSDB_NAMESPACE::Tickers::GET_HIT_L1: - return 0x1C; - case ROCKSDB_NAMESPACE::Tickers::GET_HIT_L2_AND_UP: - return 0x1D; - case ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_NEWER_ENTRY: - return 0x1E; - case ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_OBSOLETE: - return 0x1F; - case ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_RANGE_DEL: - return 0x20; - case ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_USER: - return 0x21; - case ROCKSDB_NAMESPACE::Tickers::COMPACTION_RANGE_DEL_DROP_OBSOLETE: - return 0x22; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_WRITTEN: - return 0x23; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_READ: - return 0x24; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_UPDATED: - return 0x25; - case ROCKSDB_NAMESPACE::Tickers::BYTES_WRITTEN: - return 0x26; - case ROCKSDB_NAMESPACE::Tickers::BYTES_READ: - return 0x27; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_SEEK: - return 0x28; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_NEXT: - return 0x29; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_PREV: - return 0x2A; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_SEEK_FOUND: - return 0x2B; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_NEXT_FOUND: - return 0x2C; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_PREV_FOUND: - return 0x2D; - case ROCKSDB_NAMESPACE::Tickers::ITER_BYTES_READ: - return 0x2E; - case ROCKSDB_NAMESPACE::Tickers::NO_FILE_OPENS: - return 0x30; - case ROCKSDB_NAMESPACE::Tickers::NO_FILE_ERRORS: - return 0x31; - case ROCKSDB_NAMESPACE::Tickers::STALL_MICROS: - return 0x35; - case ROCKSDB_NAMESPACE::Tickers::DB_MUTEX_WAIT_MICROS: - return 0x36; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_CALLS: - return 0x39; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_READ: - return 0x3A; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_BYTES_READ: - return 0x3B; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_MERGE_FAILURES: - return 0x3D; - case ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_PREFIX_CHECKED: - return 0x3E; - case ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_PREFIX_USEFUL: - return 0x3F; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_OF_RESEEKS_IN_ITERATION: - return 0x40; - case ROCKSDB_NAMESPACE::Tickers::GET_UPDATES_SINCE_CALLS: - return 0x41; - case ROCKSDB_NAMESPACE::Tickers::WAL_FILE_SYNCED: - return 0x46; - case ROCKSDB_NAMESPACE::Tickers::WAL_FILE_BYTES: - return 0x47; - case ROCKSDB_NAMESPACE::Tickers::WRITE_DONE_BY_SELF: - return 0x48; - case ROCKSDB_NAMESPACE::Tickers::WRITE_DONE_BY_OTHER: - return 0x49; - case ROCKSDB_NAMESPACE::Tickers::WRITE_WITH_WAL: - return 0x4B; - case ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES: - return 0x4C; - case ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES: - return 0x4D; - case ROCKSDB_NAMESPACE::Tickers::FLUSH_WRITE_BYTES: - return 0x4E; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_DIRECT_LOAD_TABLE_PROPERTIES: - return 0x4F; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_ACQUIRES: - return 0x50; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_RELEASES: - return 0x51; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_CLEANUPS: - return 0x52; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_COMPRESSED: - return 0x53; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_DECOMPRESSED: - return 0x54; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_NOT_COMPRESSED: - return 0x55; - case ROCKSDB_NAMESPACE::Tickers::MERGE_OPERATION_TOTAL_TIME: - return 0x56; - case ROCKSDB_NAMESPACE::Tickers::FILTER_OPERATION_TOTAL_TIME: - return 0x57; - case ROCKSDB_NAMESPACE::Tickers::ROW_CACHE_HIT: - return 0x58; - case ROCKSDB_NAMESPACE::Tickers::ROW_CACHE_MISS: - return 0x59; - case ROCKSDB_NAMESPACE::Tickers::READ_AMP_ESTIMATE_USEFUL_BYTES: - return 0x5A; - case ROCKSDB_NAMESPACE::Tickers::READ_AMP_TOTAL_READ_BYTES: - return 0x5B; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_RATE_LIMITER_DRAINS: - return 0x5C; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_ITER_SKIP: - return 0x5D; - case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND: - return 0x5E; - case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED: - // -0x01 so we can skip over the already taken 0x5F (TICKER_ENUM_MAX). - return -0x01; - case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED: - return 0x60; - case ROCKSDB_NAMESPACE::Tickers::COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE: - return 0x61; - case ROCKSDB_NAMESPACE::Tickers::COMPACTION_CANCELLED: - return 0x62; - case ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_FULL_POSITIVE: - return 0x63; - case ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_FULL_TRUE_POSITIVE: - return 0x64; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_PUT: - return 0x65; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_WRITE: - return 0x66; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_GET: - return 0x67; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_MULTIGET: - return 0x68; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_SEEK: - return 0x69; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_NEXT: - return 0x6A; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_PREV: - return 0x6B; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_KEYS_WRITTEN: - return 0x6C; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_KEYS_READ: - return 0x6D; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BYTES_WRITTEN: - return 0x6E; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BYTES_READ: - return 0x6F; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_INLINED: - return 0x70; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_INLINED_TTL: - return 0x71; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_BLOB: - return 0x72; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_BLOB_TTL: - return 0x73; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_BYTES_WRITTEN: - return 0x74; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_BYTES_READ: - return 0x75; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_SYNCED: - return 0x76; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EXPIRED_COUNT: - return 0x77; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EXPIRED_SIZE: - return 0x78; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EVICTED_COUNT: - return 0x79; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EVICTED_SIZE: - return 0x7A; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_FILES: - return 0x7B; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_NEW_FILES: - return 0x7C; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_FAILURES: - return 0x7D; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_KEYS_RELOCATED: - return -0x02; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_BYTES_RELOCATED: - return -0x05; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_NUM_FILES_EVICTED: - return -0x06; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_NUM_KEYS_EVICTED: - return -0x07; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_BYTES_EVICTED: - return -0x08; - case ROCKSDB_NAMESPACE::Tickers::TXN_PREPARE_MUTEX_OVERHEAD: - return -0x09; - case ROCKSDB_NAMESPACE::Tickers::TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD: - return -0x0A; - case ROCKSDB_NAMESPACE::Tickers::TXN_DUPLICATE_KEY_OVERHEAD: - return -0x0B; - case ROCKSDB_NAMESPACE::Tickers::TXN_SNAPSHOT_MUTEX_OVERHEAD: - return -0x0C; - case ROCKSDB_NAMESPACE::Tickers::TXN_GET_TRY_AGAIN: - return -0x0D; - case ROCKSDB_NAMESPACE::Tickers::FILES_MARKED_TRASH: - return -0x0E; - case ROCKSDB_NAMESPACE::Tickers::FILES_DELETED_IMMEDIATELY: - return -0X0F; - case ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES_MARKED: - return -0x10; - case ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES_PERIODIC: - return -0x11; - case ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES_TTL: - return -0x12; - case ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_MARKED: - return -0x13; - case ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_PERIODIC: - return -0x14; - case ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_TTL: - return -0x15; - case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_ERROR_COUNT: - return -0x16; - case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_IO_ERROR_COUNT: - return -0x17; - case ROCKSDB_NAMESPACE::Tickers:: - ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT: - return -0x18; - case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_AUTORESUME_COUNT: - return -0x19; - case ROCKSDB_NAMESPACE::Tickers:: - ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT: - return -0x1A; - case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT: - return -0x1B; - case ROCKSDB_NAMESPACE::Tickers::MEMTABLE_PAYLOAD_BYTES_AT_FLUSH: - return -0x1C; - case ROCKSDB_NAMESPACE::Tickers::MEMTABLE_GARBAGE_BYTES_AT_FLUSH: - return -0x1D; - case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_HITS: - return -0x1E; - case ROCKSDB_NAMESPACE::Tickers::VERIFY_CHECKSUM_READ_BYTES: - return -0x1F; - case ROCKSDB_NAMESPACE::Tickers::BACKUP_READ_BYTES: - return -0x20; - case ROCKSDB_NAMESPACE::Tickers::BACKUP_WRITE_BYTES: - return -0x21; - case ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_READ_BYTES: - return -0x22; - case ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_WRITE_BYTES: - return -0x23; - case ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_BYTES: - return -0x24; - case ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_BYTES: - return -0x25; - case ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_BYTES: - return -0x26; - case ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_COUNT: - return -0x27; - case ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_COUNT: - return -0x28; - case ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_COUNT: - return -0x29; - case ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_BYTES: - return -0x2A; - case ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_COUNT: - return -0x2B; - case ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_BYTES: - return -0x2C; - case ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT: - return -0x2D; - case ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_COMPUTE_COUNT: - return -0x2E; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_MISS: - return -0x2F; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_HIT: - return -0x30; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_ADD: - return -0x31; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_ADD_FAILURES: - return -0x32; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_BYTES_READ: - return -0x33; - case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_BYTES_WRITE: - return -0x34; - case ROCKSDB_NAMESPACE::Tickers::READ_ASYNC_MICROS: - return -0x35; - case ROCKSDB_NAMESPACE::Tickers::ASYNC_READ_ERROR_COUNT: - return -0x36; - case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_FILTER_HITS: - return -0x37; - case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_INDEX_HITS: - return -0x38; - case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_DATA_HITS: - return -0x39; - case ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_MISS: - return -0x3A; - case ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_HIT: - return -0x3B; - case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: - // 0x5F was the max value in the initial copy of tickers to Java. - // Since these values are exposed directly to Java clients, we keep - // the value the same forever. - // - // TODO: This particular case seems confusing and unnecessary to pin the - // value since it's meant to be the number of tickers, not an actual - // ticker value. But we aren't yet in a position to fix it since the - // number of tickers doesn't fit in the Java representation (jbyte). - return 0x5F; - default: - // undefined/default - return 0x0; - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::Tickers enum for the - // provided Java org.rocksdb.TickerType - static ROCKSDB_NAMESPACE::Tickers toCppTickers(jbyte jticker_type) { - switch (jticker_type) { - case 0x0: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_MISS; - case 0x1: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_HIT; - case 0x2: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_ADD; - case 0x3: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_ADD_FAILURES; - case 0x4: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_MISS; - case 0x5: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_HIT; - case 0x6: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_ADD; - case 0x7: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_BYTES_INSERT; - case 0x9: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_MISS; - case 0xA: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_HIT; - case 0xB: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_ADD; - case 0xC: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_BYTES_INSERT; - case 0xE: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_MISS; - case 0xF: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_HIT; - case 0x10: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_ADD; - case 0x11: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_BYTES_INSERT; - case 0x12: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_BYTES_READ; - case 0x13: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_BYTES_WRITE; - case 0x14: - return ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_USEFUL; - case 0x15: - return ROCKSDB_NAMESPACE::Tickers::PERSISTENT_CACHE_HIT; - case 0x16: - return ROCKSDB_NAMESPACE::Tickers::PERSISTENT_CACHE_MISS; - case 0x17: - return ROCKSDB_NAMESPACE::Tickers::SIM_BLOCK_CACHE_HIT; - case 0x18: - return ROCKSDB_NAMESPACE::Tickers::SIM_BLOCK_CACHE_MISS; - case 0x19: - return ROCKSDB_NAMESPACE::Tickers::MEMTABLE_HIT; - case 0x1A: - return ROCKSDB_NAMESPACE::Tickers::MEMTABLE_MISS; - case 0x1B: - return ROCKSDB_NAMESPACE::Tickers::GET_HIT_L0; - case 0x1C: - return ROCKSDB_NAMESPACE::Tickers::GET_HIT_L1; - case 0x1D: - return ROCKSDB_NAMESPACE::Tickers::GET_HIT_L2_AND_UP; - case 0x1E: - return ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_NEWER_ENTRY; - case 0x1F: - return ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_OBSOLETE; - case 0x20: - return ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_RANGE_DEL; - case 0x21: - return ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_USER; - case 0x22: - return ROCKSDB_NAMESPACE::Tickers::COMPACTION_RANGE_DEL_DROP_OBSOLETE; - case 0x23: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_WRITTEN; - case 0x24: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_READ; - case 0x25: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_UPDATED; - case 0x26: - return ROCKSDB_NAMESPACE::Tickers::BYTES_WRITTEN; - case 0x27: - return ROCKSDB_NAMESPACE::Tickers::BYTES_READ; - case 0x28: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_SEEK; - case 0x29: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_NEXT; - case 0x2A: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_PREV; - case 0x2B: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_SEEK_FOUND; - case 0x2C: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_NEXT_FOUND; - case 0x2D: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_PREV_FOUND; - case 0x2E: - return ROCKSDB_NAMESPACE::Tickers::ITER_BYTES_READ; - case 0x30: - return ROCKSDB_NAMESPACE::Tickers::NO_FILE_OPENS; - case 0x31: - return ROCKSDB_NAMESPACE::Tickers::NO_FILE_ERRORS; - case 0x35: - return ROCKSDB_NAMESPACE::Tickers::STALL_MICROS; - case 0x36: - return ROCKSDB_NAMESPACE::Tickers::DB_MUTEX_WAIT_MICROS; - case 0x39: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_CALLS; - case 0x3A: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_READ; - case 0x3B: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_BYTES_READ; - case 0x3D: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_MERGE_FAILURES; - case 0x3E: - return ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_PREFIX_CHECKED; - case 0x3F: - return ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_PREFIX_USEFUL; - case 0x40: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_OF_RESEEKS_IN_ITERATION; - case 0x41: - return ROCKSDB_NAMESPACE::Tickers::GET_UPDATES_SINCE_CALLS; - case 0x46: - return ROCKSDB_NAMESPACE::Tickers::WAL_FILE_SYNCED; - case 0x47: - return ROCKSDB_NAMESPACE::Tickers::WAL_FILE_BYTES; - case 0x48: - return ROCKSDB_NAMESPACE::Tickers::WRITE_DONE_BY_SELF; - case 0x49: - return ROCKSDB_NAMESPACE::Tickers::WRITE_DONE_BY_OTHER; - case 0x4B: - return ROCKSDB_NAMESPACE::Tickers::WRITE_WITH_WAL; - case 0x4C: - return ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES; - case 0x4D: - return ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES; - case 0x4E: - return ROCKSDB_NAMESPACE::Tickers::FLUSH_WRITE_BYTES; - case 0x4F: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_DIRECT_LOAD_TABLE_PROPERTIES; - case 0x50: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_ACQUIRES; - case 0x51: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_RELEASES; - case 0x52: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_CLEANUPS; - case 0x53: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_COMPRESSED; - case 0x54: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_DECOMPRESSED; - case 0x55: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_NOT_COMPRESSED; - case 0x56: - return ROCKSDB_NAMESPACE::Tickers::MERGE_OPERATION_TOTAL_TIME; - case 0x57: - return ROCKSDB_NAMESPACE::Tickers::FILTER_OPERATION_TOTAL_TIME; - case 0x58: - return ROCKSDB_NAMESPACE::Tickers::ROW_CACHE_HIT; - case 0x59: - return ROCKSDB_NAMESPACE::Tickers::ROW_CACHE_MISS; - case 0x5A: - return ROCKSDB_NAMESPACE::Tickers::READ_AMP_ESTIMATE_USEFUL_BYTES; - case 0x5B: - return ROCKSDB_NAMESPACE::Tickers::READ_AMP_TOTAL_READ_BYTES; - case 0x5C: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_RATE_LIMITER_DRAINS; - case 0x5D: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_ITER_SKIP; - case 0x5E: - return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND; - case -0x01: - // -0x01 so we can skip over the already taken 0x5F (TICKER_ENUM_MAX). - return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED; - case 0x60: - return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED; - case 0x61: - return ROCKSDB_NAMESPACE::Tickers:: - COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE; - case 0x62: - return ROCKSDB_NAMESPACE::Tickers::COMPACTION_CANCELLED; - case 0x63: - return ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_FULL_POSITIVE; - case 0x64: - return ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_FULL_TRUE_POSITIVE; - case 0x65: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_PUT; - case 0x66: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_WRITE; - case 0x67: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_GET; - case 0x68: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_MULTIGET; - case 0x69: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_SEEK; - case 0x6A: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_NEXT; - case 0x6B: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_PREV; - case 0x6C: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_KEYS_WRITTEN; - case 0x6D: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_KEYS_READ; - case 0x6E: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BYTES_WRITTEN; - case 0x6F: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BYTES_READ; - case 0x70: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_INLINED; - case 0x71: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_INLINED_TTL; - case 0x72: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_BLOB; - case 0x73: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_BLOB_TTL; - case 0x74: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_BYTES_WRITTEN; - case 0x75: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_BYTES_READ; - case 0x76: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_SYNCED; - case 0x77: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EXPIRED_COUNT; - case 0x78: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EXPIRED_SIZE; - case 0x79: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EVICTED_COUNT; - case 0x7A: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EVICTED_SIZE; - case 0x7B: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_FILES; - case 0x7C: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_NEW_FILES; - case 0x7D: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_FAILURES; - case -0x02: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_KEYS_RELOCATED; - case -0x05: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_BYTES_RELOCATED; - case -0x06: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_NUM_FILES_EVICTED; - case -0x07: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_NUM_KEYS_EVICTED; - case -0x08: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_BYTES_EVICTED; - case -0x09: - return ROCKSDB_NAMESPACE::Tickers::TXN_PREPARE_MUTEX_OVERHEAD; - case -0x0A: - return ROCKSDB_NAMESPACE::Tickers::TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD; - case -0x0B: - return ROCKSDB_NAMESPACE::Tickers::TXN_DUPLICATE_KEY_OVERHEAD; - case -0x0C: - return ROCKSDB_NAMESPACE::Tickers::TXN_SNAPSHOT_MUTEX_OVERHEAD; - case -0x0D: - return ROCKSDB_NAMESPACE::Tickers::TXN_GET_TRY_AGAIN; - case -0x0E: - return ROCKSDB_NAMESPACE::Tickers::FILES_MARKED_TRASH; - case -0x0F: - return ROCKSDB_NAMESPACE::Tickers::FILES_DELETED_IMMEDIATELY; - case -0x10: - return ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES_MARKED; - case -0x11: - return ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES_PERIODIC; - case -0x12: - return ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES_TTL; - case -0x13: - return ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_MARKED; - case -0x14: - return ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_PERIODIC; - case -0x15: - return ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_TTL; - case -0x16: - return ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_ERROR_COUNT; - case -0x17: - return ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_IO_ERROR_COUNT; - case -0x18: - return ROCKSDB_NAMESPACE::Tickers:: - ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT; - case -0x19: - return ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_AUTORESUME_COUNT; - case -0x1A: - return ROCKSDB_NAMESPACE::Tickers:: - ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT; - case -0x1B: - return ROCKSDB_NAMESPACE::Tickers:: - ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT; - case -0x1C: - return ROCKSDB_NAMESPACE::Tickers::MEMTABLE_PAYLOAD_BYTES_AT_FLUSH; - case -0x1D: - return ROCKSDB_NAMESPACE::Tickers::MEMTABLE_GARBAGE_BYTES_AT_FLUSH; - case -0x1E: - return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_HITS; - case -0x1F: - return ROCKSDB_NAMESPACE::Tickers::VERIFY_CHECKSUM_READ_BYTES; - case -0x20: - return ROCKSDB_NAMESPACE::Tickers::BACKUP_READ_BYTES; - case -0x21: - return ROCKSDB_NAMESPACE::Tickers::BACKUP_WRITE_BYTES; - case -0x22: - return ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_READ_BYTES; - case -0x23: - return ROCKSDB_NAMESPACE::Tickers::REMOTE_COMPACT_WRITE_BYTES; - case -0x24: - return ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_BYTES; - case -0x25: - return ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_BYTES; - case -0x26: - return ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_BYTES; - case -0x27: - return ROCKSDB_NAMESPACE::Tickers::HOT_FILE_READ_COUNT; - case -0x28: - return ROCKSDB_NAMESPACE::Tickers::WARM_FILE_READ_COUNT; - case -0x29: - return ROCKSDB_NAMESPACE::Tickers::COLD_FILE_READ_COUNT; - case -0x2A: - return ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_BYTES; - case -0x2B: - return ROCKSDB_NAMESPACE::Tickers::LAST_LEVEL_READ_COUNT; - case -0x2C: - return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_BYTES; - case -0x2D: - return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT; - case -0x2E: - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_COMPUTE_COUNT; - case -0x2F: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_MISS; - case -0x30: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_HIT; - case -0x31: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_ADD; - case -0x32: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_ADD_FAILURES; - case -0x33: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_BYTES_READ; - case -0x34: - return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_BYTES_WRITE; - case -0x35: - return ROCKSDB_NAMESPACE::Tickers::READ_ASYNC_MICROS; - case -0x36: - return ROCKSDB_NAMESPACE::Tickers::ASYNC_READ_ERROR_COUNT; - case -0x37: - return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_FILTER_HITS; - case -0x38: - return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_INDEX_HITS; - case -0x39: - return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_DATA_HITS; - case -0x3A: - return ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_MISS; - case -0x3B: - return ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_HIT; - case 0x5F: - // 0x5F was the max value in the initial copy of tickers to Java. - // Since these values are exposed directly to Java clients, we keep - // the value the same forever. - // - // TODO: This particular case seems confusing and unnecessary to pin the - // value since it's meant to be the number of tickers, not an actual - // ticker value. But we aren't yet in a position to fix it since the - // number of tickers doesn't fit in the Java representation (jbyte). - return ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX; - - default: - // undefined/default - return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_MISS; - } - } -}; - -// The portal class for org.rocksdb.HistogramType -class HistogramTypeJni { - public: - // Returns the equivalent org.rocksdb.HistogramType for the provided - // C++ ROCKSDB_NAMESPACE::Histograms enum - static jbyte toJavaHistogramsType( - const ROCKSDB_NAMESPACE::Histograms& histograms) { - switch (histograms) { - case ROCKSDB_NAMESPACE::Histograms::DB_GET: - return 0x0; - case ROCKSDB_NAMESPACE::Histograms::DB_WRITE: - return 0x1; - case ROCKSDB_NAMESPACE::Histograms::COMPACTION_TIME: - return 0x2; - case ROCKSDB_NAMESPACE::Histograms::SUBCOMPACTION_SETUP_TIME: - return 0x3; - case ROCKSDB_NAMESPACE::Histograms::TABLE_SYNC_MICROS: - return 0x4; - case ROCKSDB_NAMESPACE::Histograms::COMPACTION_OUTFILE_SYNC_MICROS: - return 0x5; - case ROCKSDB_NAMESPACE::Histograms::WAL_FILE_SYNC_MICROS: - return 0x6; - case ROCKSDB_NAMESPACE::Histograms::MANIFEST_FILE_SYNC_MICROS: - return 0x7; - case ROCKSDB_NAMESPACE::Histograms::TABLE_OPEN_IO_MICROS: - return 0x8; - case ROCKSDB_NAMESPACE::Histograms::DB_MULTIGET: - return 0x9; - case ROCKSDB_NAMESPACE::Histograms::READ_BLOCK_COMPACTION_MICROS: - return 0xA; - case ROCKSDB_NAMESPACE::Histograms::READ_BLOCK_GET_MICROS: - return 0xB; - case ROCKSDB_NAMESPACE::Histograms::WRITE_RAW_BLOCK_MICROS: - return 0xC; - case ROCKSDB_NAMESPACE::Histograms::NUM_FILES_IN_SINGLE_COMPACTION: - return 0x12; - case ROCKSDB_NAMESPACE::Histograms::DB_SEEK: - return 0x13; - case ROCKSDB_NAMESPACE::Histograms::WRITE_STALL: - return 0x14; - case ROCKSDB_NAMESPACE::Histograms::SST_READ_MICROS: - return 0x15; - case ROCKSDB_NAMESPACE::Histograms::NUM_SUBCOMPACTIONS_SCHEDULED: - return 0x16; - case ROCKSDB_NAMESPACE::Histograms::BYTES_PER_READ: - return 0x17; - case ROCKSDB_NAMESPACE::Histograms::BYTES_PER_WRITE: - return 0x18; - case ROCKSDB_NAMESPACE::Histograms::BYTES_PER_MULTIGET: - return 0x19; - case ROCKSDB_NAMESPACE::Histograms::BYTES_COMPRESSED: - return 0x1A; - case ROCKSDB_NAMESPACE::Histograms::BYTES_DECOMPRESSED: - return 0x1B; - case ROCKSDB_NAMESPACE::Histograms::COMPRESSION_TIMES_NANOS: - return 0x1C; - case ROCKSDB_NAMESPACE::Histograms::DECOMPRESSION_TIMES_NANOS: - return 0x1D; - case ROCKSDB_NAMESPACE::Histograms::READ_NUM_MERGE_OPERANDS: - return 0x1E; - // 0x20 to skip 0x1F so TICKER_ENUM_MAX remains unchanged for minor - // version compatibility. - case ROCKSDB_NAMESPACE::Histograms::FLUSH_TIME: - return 0x20; - case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_KEY_SIZE: - return 0x21; - case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_VALUE_SIZE: - return 0x22; - case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_WRITE_MICROS: - return 0x23; - case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_GET_MICROS: - return 0x24; - case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_MULTIGET_MICROS: - return 0x25; - case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_SEEK_MICROS: - return 0x26; - case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_NEXT_MICROS: - return 0x27; - case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_PREV_MICROS: - return 0x28; - case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_WRITE_MICROS: - return 0x29; - case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_READ_MICROS: - return 0x2A; - case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_SYNC_MICROS: - return 0x2B; - case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_COMPRESSION_MICROS: - return 0x2D; - case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_DECOMPRESSION_MICROS: - return 0x2E; - case ROCKSDB_NAMESPACE::Histograms:: - NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL: - return 0x2F; - case ROCKSDB_NAMESPACE::Histograms::NUM_SST_READ_PER_LEVEL: - return 0x31; - case ROCKSDB_NAMESPACE::Histograms::ERROR_HANDLER_AUTORESUME_RETRY_COUNT: - return 0x32; - case ROCKSDB_NAMESPACE::Histograms::ASYNC_READ_BYTES: - return 0x33; - case ROCKSDB_NAMESPACE::Histograms::POLL_WAIT_MICROS: - return 0x34; - case ROCKSDB_NAMESPACE::Histograms::PREFETCHED_BYTES_DISCARDED: - return 0x35; - case ROCKSDB_NAMESPACE::Histograms::MULTIGET_IO_BATCH_SIZE: - return 0x36; - case NUM_LEVEL_READ_PER_MULTIGET: - return 0x37; - case ASYNC_PREFETCH_ABORT_MICROS: - return 0x38; - case ROCKSDB_NAMESPACE::Histograms::TABLE_OPEN_PREFETCH_TAIL_READ_BYTES: - return 0x39; - case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX: - // 0x1F for backwards compatibility on current minor version. - return 0x1F; - - default: - // undefined/default - return 0x0; - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::Histograms enum for the - // provided Java org.rocksdb.HistogramsType - static ROCKSDB_NAMESPACE::Histograms toCppHistograms(jbyte jhistograms_type) { - switch (jhistograms_type) { - case 0x0: - return ROCKSDB_NAMESPACE::Histograms::DB_GET; - case 0x1: - return ROCKSDB_NAMESPACE::Histograms::DB_WRITE; - case 0x2: - return ROCKSDB_NAMESPACE::Histograms::COMPACTION_TIME; - case 0x3: - return ROCKSDB_NAMESPACE::Histograms::SUBCOMPACTION_SETUP_TIME; - case 0x4: - return ROCKSDB_NAMESPACE::Histograms::TABLE_SYNC_MICROS; - case 0x5: - return ROCKSDB_NAMESPACE::Histograms::COMPACTION_OUTFILE_SYNC_MICROS; - case 0x6: - return ROCKSDB_NAMESPACE::Histograms::WAL_FILE_SYNC_MICROS; - case 0x7: - return ROCKSDB_NAMESPACE::Histograms::MANIFEST_FILE_SYNC_MICROS; - case 0x8: - return ROCKSDB_NAMESPACE::Histograms::TABLE_OPEN_IO_MICROS; - case 0x9: - return ROCKSDB_NAMESPACE::Histograms::DB_MULTIGET; - case 0xA: - return ROCKSDB_NAMESPACE::Histograms::READ_BLOCK_COMPACTION_MICROS; - case 0xB: - return ROCKSDB_NAMESPACE::Histograms::READ_BLOCK_GET_MICROS; - case 0xC: - return ROCKSDB_NAMESPACE::Histograms::WRITE_RAW_BLOCK_MICROS; - case 0x12: - return ROCKSDB_NAMESPACE::Histograms::NUM_FILES_IN_SINGLE_COMPACTION; - case 0x13: - return ROCKSDB_NAMESPACE::Histograms::DB_SEEK; - case 0x14: - return ROCKSDB_NAMESPACE::Histograms::WRITE_STALL; - case 0x15: - return ROCKSDB_NAMESPACE::Histograms::SST_READ_MICROS; - case 0x16: - return ROCKSDB_NAMESPACE::Histograms::NUM_SUBCOMPACTIONS_SCHEDULED; - case 0x17: - return ROCKSDB_NAMESPACE::Histograms::BYTES_PER_READ; - case 0x18: - return ROCKSDB_NAMESPACE::Histograms::BYTES_PER_WRITE; - case 0x19: - return ROCKSDB_NAMESPACE::Histograms::BYTES_PER_MULTIGET; - case 0x1A: - return ROCKSDB_NAMESPACE::Histograms::BYTES_COMPRESSED; - case 0x1B: - return ROCKSDB_NAMESPACE::Histograms::BYTES_DECOMPRESSED; - case 0x1C: - return ROCKSDB_NAMESPACE::Histograms::COMPRESSION_TIMES_NANOS; - case 0x1D: - return ROCKSDB_NAMESPACE::Histograms::DECOMPRESSION_TIMES_NANOS; - case 0x1E: - return ROCKSDB_NAMESPACE::Histograms::READ_NUM_MERGE_OPERANDS; - // 0x20 to skip 0x1F so TICKER_ENUM_MAX remains unchanged for minor - // version compatibility. - case 0x20: - return ROCKSDB_NAMESPACE::Histograms::FLUSH_TIME; - case 0x21: - return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_KEY_SIZE; - case 0x22: - return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_VALUE_SIZE; - case 0x23: - return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_WRITE_MICROS; - case 0x24: - return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_GET_MICROS; - case 0x25: - return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_MULTIGET_MICROS; - case 0x26: - return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_SEEK_MICROS; - case 0x27: - return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_NEXT_MICROS; - case 0x28: - return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_PREV_MICROS; - case 0x29: - return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_WRITE_MICROS; - case 0x2A: - return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_READ_MICROS; - case 0x2B: - return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_SYNC_MICROS; - case 0x2D: - return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_COMPRESSION_MICROS; - case 0x2E: - return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_DECOMPRESSION_MICROS; - case 0x2F: - return ROCKSDB_NAMESPACE::Histograms:: - NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL; - case 0x31: - return ROCKSDB_NAMESPACE::Histograms::NUM_SST_READ_PER_LEVEL; - case 0x32: - return ROCKSDB_NAMESPACE::Histograms:: - ERROR_HANDLER_AUTORESUME_RETRY_COUNT; - case 0x33: - return ROCKSDB_NAMESPACE::Histograms::ASYNC_READ_BYTES; - case 0x34: - return ROCKSDB_NAMESPACE::Histograms::POLL_WAIT_MICROS; - case 0x35: - return ROCKSDB_NAMESPACE::Histograms::PREFETCHED_BYTES_DISCARDED; - case 0x36: - return ROCKSDB_NAMESPACE::Histograms::MULTIGET_IO_BATCH_SIZE; - case 0x37: - return ROCKSDB_NAMESPACE::Histograms::NUM_LEVEL_READ_PER_MULTIGET; - case 0x38: - return ROCKSDB_NAMESPACE::Histograms::ASYNC_PREFETCH_ABORT_MICROS; - case 0x39: - return ROCKSDB_NAMESPACE::Histograms:: - TABLE_OPEN_PREFETCH_TAIL_READ_BYTES; - case 0x1F: - // 0x1F for backwards compatibility on current minor version. - return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX; - - default: - // undefined/default - return ROCKSDB_NAMESPACE::Histograms::DB_GET; - } - } -}; - -// The portal class for org.rocksdb.StatsLevel -class StatsLevelJni { - public: - // Returns the equivalent org.rocksdb.StatsLevel for the provided - // C++ ROCKSDB_NAMESPACE::StatsLevel enum - static jbyte toJavaStatsLevel( - const ROCKSDB_NAMESPACE::StatsLevel& stats_level) { - switch (stats_level) { - case ROCKSDB_NAMESPACE::StatsLevel::kExceptDetailedTimers: - return 0x0; - case ROCKSDB_NAMESPACE::StatsLevel::kExceptTimeForMutex: - return 0x1; - case ROCKSDB_NAMESPACE::StatsLevel::kAll: - return 0x2; - - default: - // undefined/default - return 0x0; - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::StatsLevel enum for the - // provided Java org.rocksdb.StatsLevel - static ROCKSDB_NAMESPACE::StatsLevel toCppStatsLevel(jbyte jstats_level) { - switch (jstats_level) { - case 0x0: - return ROCKSDB_NAMESPACE::StatsLevel::kExceptDetailedTimers; - case 0x1: - return ROCKSDB_NAMESPACE::StatsLevel::kExceptTimeForMutex; - case 0x2: - return ROCKSDB_NAMESPACE::StatsLevel::kAll; - - default: - // undefined/default - return ROCKSDB_NAMESPACE::StatsLevel::kExceptDetailedTimers; - } - } -}; - -// The portal class for org.rocksdb.RateLimiterMode -class RateLimiterModeJni { - public: - // Returns the equivalent org.rocksdb.RateLimiterMode for the provided - // C++ ROCKSDB_NAMESPACE::RateLimiter::Mode enum - static jbyte toJavaRateLimiterMode( - const ROCKSDB_NAMESPACE::RateLimiter::Mode& rate_limiter_mode) { - switch (rate_limiter_mode) { - case ROCKSDB_NAMESPACE::RateLimiter::Mode::kReadsOnly: - return 0x0; - case ROCKSDB_NAMESPACE::RateLimiter::Mode::kWritesOnly: - return 0x1; - case ROCKSDB_NAMESPACE::RateLimiter::Mode::kAllIo: - return 0x2; - - default: - // undefined/default - return 0x1; - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::RateLimiter::Mode enum for - // the provided Java org.rocksdb.RateLimiterMode - static ROCKSDB_NAMESPACE::RateLimiter::Mode toCppRateLimiterMode( - jbyte jrate_limiter_mode) { - switch (jrate_limiter_mode) { - case 0x0: - return ROCKSDB_NAMESPACE::RateLimiter::Mode::kReadsOnly; - case 0x1: - return ROCKSDB_NAMESPACE::RateLimiter::Mode::kWritesOnly; - case 0x2: - return ROCKSDB_NAMESPACE::RateLimiter::Mode::kAllIo; - - default: - // undefined/default - return ROCKSDB_NAMESPACE::RateLimiter::Mode::kWritesOnly; - } - } -}; - -// The portal class for org.rocksdb.MemoryUsageType -class MemoryUsageTypeJni { - public: - // Returns the equivalent org.rocksdb.MemoryUsageType for the provided - // C++ ROCKSDB_NAMESPACE::MemoryUtil::UsageType enum - static jbyte toJavaMemoryUsageType( - const ROCKSDB_NAMESPACE::MemoryUtil::UsageType& usage_type) { - switch (usage_type) { - case ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kMemTableTotal: - return 0x0; - case ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kMemTableUnFlushed: - return 0x1; - case ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kTableReadersTotal: - return 0x2; - case ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kCacheTotal: - return 0x3; - default: - // undefined: use kNumUsageTypes - return 0x4; - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::MemoryUtil::UsageType enum - // for the provided Java org.rocksdb.MemoryUsageType - static ROCKSDB_NAMESPACE::MemoryUtil::UsageType toCppMemoryUsageType( - jbyte usage_type) { - switch (usage_type) { - case 0x0: - return ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kMemTableTotal; - case 0x1: - return ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kMemTableUnFlushed; - case 0x2: - return ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kTableReadersTotal; - case 0x3: - return ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kCacheTotal; - default: - // undefined/default: use kNumUsageTypes - return ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kNumUsageTypes; - } - } -}; - -// The portal class for org.rocksdb.Transaction -class TransactionJni : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.Transaction - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/Transaction"); - } - - /** - * Create a new Java org.rocksdb.Transaction.WaitingTransactions object - * - * @param env A pointer to the Java environment - * @param jtransaction A Java org.rocksdb.Transaction object - * @param column_family_id The id of the column family - * @param key The key - * @param transaction_ids The transaction ids - * - * @return A reference to a Java - * org.rocksdb.Transaction.WaitingTransactions object, - * or nullptr if an an exception occurs - */ - static jobject newWaitingTransactions( - JNIEnv* env, jobject jtransaction, const uint32_t column_family_id, - const std::string& key, - const std::vector& transaction_ids) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = env->GetMethodID( - jclazz, "newWaitingTransactions", - "(JLjava/lang/String;[J)Lorg/rocksdb/Transaction$WaitingTransactions;"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - jstring jkey = env->NewStringUTF(key.c_str()); - if (jkey == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - const size_t len = transaction_ids.size(); - jlongArray jtransaction_ids = env->NewLongArray(static_cast(len)); - if (jtransaction_ids == nullptr) { - // exception thrown: OutOfMemoryError - env->DeleteLocalRef(jkey); - return nullptr; - } - - jboolean is_copy; - jlong* body = env->GetLongArrayElements(jtransaction_ids, &is_copy); - if (body == nullptr) { - // exception thrown: OutOfMemoryError - env->DeleteLocalRef(jkey); - env->DeleteLocalRef(jtransaction_ids); - return nullptr; - } - for (size_t i = 0; i < len; ++i) { - body[i] = static_cast(transaction_ids[i]); - } - env->ReleaseLongArrayElements(jtransaction_ids, body, - is_copy == JNI_TRUE ? 0 : JNI_ABORT); - - jobject jwaiting_transactions = env->CallObjectMethod( - jtransaction, mid, static_cast(column_family_id), jkey, - jtransaction_ids); - if (env->ExceptionCheck()) { - // exception thrown: InstantiationException or OutOfMemoryError - env->DeleteLocalRef(jkey); - env->DeleteLocalRef(jtransaction_ids); - return nullptr; - } - - return jwaiting_transactions; - } -}; - -// The portal class for org.rocksdb.TransactionDB -class TransactionDBJni : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.TransactionDB - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TransactionDB"); - } - - /** - * Create a new Java org.rocksdb.TransactionDB.DeadlockInfo object - * - * @param env A pointer to the Java environment - * @param jtransaction A Java org.rocksdb.Transaction object - * @param column_family_id The id of the column family - * @param key The key - * @param transaction_ids The transaction ids - * - * @return A reference to a Java - * org.rocksdb.Transaction.WaitingTransactions object, - * or nullptr if an an exception occurs - */ - static jobject newDeadlockInfo( - JNIEnv* env, jobject jtransaction_db, - const ROCKSDB_NAMESPACE::TransactionID transaction_id, - const uint32_t column_family_id, const std::string& waiting_key, - const bool exclusive) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = env->GetMethodID( - jclazz, "newDeadlockInfo", - "(JJLjava/lang/String;Z)Lorg/rocksdb/TransactionDB$DeadlockInfo;"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - jstring jwaiting_key = env->NewStringUTF(waiting_key.c_str()); - if (jwaiting_key == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - // resolve the column family id to a ColumnFamilyHandle - jobject jdeadlock_info = env->CallObjectMethod( - jtransaction_db, mid, transaction_id, - static_cast(column_family_id), jwaiting_key, exclusive); - if (env->ExceptionCheck()) { - // exception thrown: InstantiationException or OutOfMemoryError - env->DeleteLocalRef(jwaiting_key); - return nullptr; - } - - return jdeadlock_info; - } -}; - -// The portal class for org.rocksdb.TxnDBWritePolicy -class TxnDBWritePolicyJni { - public: - // Returns the equivalent org.rocksdb.TxnDBWritePolicy for the provided - // C++ ROCKSDB_NAMESPACE::TxnDBWritePolicy enum - static jbyte toJavaTxnDBWritePolicy( - const ROCKSDB_NAMESPACE::TxnDBWritePolicy& txndb_write_policy) { - switch (txndb_write_policy) { - case ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_COMMITTED: - return 0x0; - case ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_PREPARED: - return 0x1; - case ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_UNPREPARED: - return 0x2; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::TxnDBWritePolicy enum for the - // provided Java org.rocksdb.TxnDBWritePolicy - static ROCKSDB_NAMESPACE::TxnDBWritePolicy toCppTxnDBWritePolicy( - jbyte jtxndb_write_policy) { - switch (jtxndb_write_policy) { - case 0x0: - return ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_COMMITTED; - case 0x1: - return ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_PREPARED; - case 0x2: - return ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_UNPREPARED; - default: - // undefined/default - return ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_COMMITTED; - } - } -}; - -// The portal class for org.rocksdb.TransactionDB.KeyLockInfo -class KeyLockInfoJni : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.TransactionDB.KeyLockInfo - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TransactionDB$KeyLockInfo"); - } - - /** - * Create a new Java org.rocksdb.TransactionDB.KeyLockInfo object - * with the same properties as the provided C++ ROCKSDB_NAMESPACE::KeyLockInfo - * object - * - * @param env A pointer to the Java environment - * @param key_lock_info The ROCKSDB_NAMESPACE::KeyLockInfo object - * - * @return A reference to a Java - * org.rocksdb.TransactionDB.KeyLockInfo object, - * or nullptr if an an exception occurs - */ - static jobject construct( - JNIEnv* env, const ROCKSDB_NAMESPACE::KeyLockInfo& key_lock_info) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = - env->GetMethodID(jclazz, "", "(Ljava/lang/String;[JZ)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - jstring jkey = env->NewStringUTF(key_lock_info.key.c_str()); - if (jkey == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - const jsize jtransaction_ids_len = - static_cast(key_lock_info.ids.size()); - jlongArray jtransactions_ids = env->NewLongArray(jtransaction_ids_len); - if (jtransactions_ids == nullptr) { - // exception thrown: OutOfMemoryError - env->DeleteLocalRef(jkey); - return nullptr; - } - - const jobject jkey_lock_info = env->NewObject( - jclazz, mid, jkey, jtransactions_ids, key_lock_info.exclusive); - if (jkey_lock_info == nullptr) { - // exception thrown: InstantiationException or OutOfMemoryError - env->DeleteLocalRef(jtransactions_ids); - env->DeleteLocalRef(jkey); - return nullptr; - } - - return jkey_lock_info; - } -}; - -// The portal class for org.rocksdb.TransactionDB.DeadlockInfo -class DeadlockInfoJni : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.TransactionDB.DeadlockInfo - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TransactionDB$DeadlockInfo"); - } -}; - -// The portal class for org.rocksdb.TransactionDB.DeadlockPath -class DeadlockPathJni : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.TransactionDB.DeadlockPath - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TransactionDB$DeadlockPath"); - } - - /** - * Create a new Java org.rocksdb.TransactionDB.DeadlockPath object - * - * @param env A pointer to the Java environment - * - * @return A reference to a Java - * org.rocksdb.TransactionDB.DeadlockPath object, - * or nullptr if an an exception occurs - */ - static jobject construct(JNIEnv* env, const jobjectArray jdeadlock_infos, - const bool limit_exceeded) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = env->GetMethodID(jclazz, "", "([LDeadlockInfo;Z)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - const jobject jdeadlock_path = - env->NewObject(jclazz, mid, jdeadlock_infos, limit_exceeded); - if (jdeadlock_path == nullptr) { - // exception thrown: InstantiationException or OutOfMemoryError - return nullptr; - } - - return jdeadlock_path; - } -}; - -class AbstractTableFilterJni - : public RocksDBNativeClass< - const ROCKSDB_NAMESPACE::TableFilterJniCallback*, - AbstractTableFilterJni> { - public: - /** - * Get the Java Method: TableFilter#filter(TableProperties) - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getFilterMethod(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetMethodID(jclazz, "filter", "(Lorg/rocksdb/TableProperties;)Z"); - assert(mid != nullptr); - return mid; - } - - private: - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TableFilter"); - } -}; - -class TablePropertiesJni : public JavaClass { - public: - /** - * Create a new Java org.rocksdb.TableProperties object. - * - * @param env A pointer to the Java environment - * @param table_properties A Cpp table properties object - * - * @return A reference to a Java org.rocksdb.TableProperties object, or - * nullptr if an an exception occurs - */ - static jobject fromCppTableProperties( - JNIEnv* env, const ROCKSDB_NAMESPACE::TableProperties& table_properties) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = env->GetMethodID( - jclazz, "", - "(JJJJJJJJJJJJJJJJJJJJJJ[BLjava/lang/String;Ljava/lang/String;Ljava/" - "lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/" - "String;Ljava/util/Map;Ljava/util/Map;)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - jbyteArray jcolumn_family_name = ROCKSDB_NAMESPACE::JniUtil::copyBytes( - env, table_properties.column_family_name); - if (jcolumn_family_name == nullptr) { - // exception occurred creating java string - return nullptr; - } - - jstring jfilter_policy_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &table_properties.filter_policy_name, true); - if (env->ExceptionCheck()) { - // exception occurred creating java string - env->DeleteLocalRef(jcolumn_family_name); - return nullptr; - } - - jstring jcomparator_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &table_properties.comparator_name, true); - if (env->ExceptionCheck()) { - // exception occurred creating java string - env->DeleteLocalRef(jcolumn_family_name); - env->DeleteLocalRef(jfilter_policy_name); - return nullptr; - } - - jstring jmerge_operator_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &table_properties.merge_operator_name, true); - if (env->ExceptionCheck()) { - // exception occurred creating java string - env->DeleteLocalRef(jcolumn_family_name); - env->DeleteLocalRef(jfilter_policy_name); - env->DeleteLocalRef(jcomparator_name); - return nullptr; - } - - jstring jprefix_extractor_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &table_properties.prefix_extractor_name, true); - if (env->ExceptionCheck()) { - // exception occurred creating java string - env->DeleteLocalRef(jcolumn_family_name); - env->DeleteLocalRef(jfilter_policy_name); - env->DeleteLocalRef(jcomparator_name); - env->DeleteLocalRef(jmerge_operator_name); - return nullptr; - } - - jstring jproperty_collectors_names = - ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &table_properties.property_collectors_names, true); - if (env->ExceptionCheck()) { - // exception occurred creating java string - env->DeleteLocalRef(jcolumn_family_name); - env->DeleteLocalRef(jfilter_policy_name); - env->DeleteLocalRef(jcomparator_name); - env->DeleteLocalRef(jmerge_operator_name); - env->DeleteLocalRef(jprefix_extractor_name); - return nullptr; - } - - jstring jcompression_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &table_properties.compression_name, true); - if (env->ExceptionCheck()) { - // exception occurred creating java string - env->DeleteLocalRef(jcolumn_family_name); - env->DeleteLocalRef(jfilter_policy_name); - env->DeleteLocalRef(jcomparator_name); - env->DeleteLocalRef(jmerge_operator_name); - env->DeleteLocalRef(jprefix_extractor_name); - env->DeleteLocalRef(jproperty_collectors_names); - return nullptr; - } - - // Map - jobject juser_collected_properties = - ROCKSDB_NAMESPACE::HashMapJni::fromCppMap( - env, &table_properties.user_collected_properties); - if (env->ExceptionCheck()) { - // exception occurred creating java map - env->DeleteLocalRef(jcolumn_family_name); - env->DeleteLocalRef(jfilter_policy_name); - env->DeleteLocalRef(jcomparator_name); - env->DeleteLocalRef(jmerge_operator_name); - env->DeleteLocalRef(jprefix_extractor_name); - env->DeleteLocalRef(jproperty_collectors_names); - env->DeleteLocalRef(jcompression_name); - return nullptr; - } - - // Map - jobject jreadable_properties = ROCKSDB_NAMESPACE::HashMapJni::fromCppMap( - env, &table_properties.readable_properties); - if (env->ExceptionCheck()) { - // exception occurred creating java map - env->DeleteLocalRef(jcolumn_family_name); - env->DeleteLocalRef(jfilter_policy_name); - env->DeleteLocalRef(jcomparator_name); - env->DeleteLocalRef(jmerge_operator_name); - env->DeleteLocalRef(jprefix_extractor_name); - env->DeleteLocalRef(jproperty_collectors_names); - env->DeleteLocalRef(jcompression_name); - env->DeleteLocalRef(juser_collected_properties); - return nullptr; - } - - jobject jtable_properties = env->NewObject( - jclazz, mid, static_cast(table_properties.data_size), - static_cast(table_properties.index_size), - static_cast(table_properties.index_partitions), - static_cast(table_properties.top_level_index_size), - static_cast(table_properties.index_key_is_user_key), - static_cast(table_properties.index_value_is_delta_encoded), - static_cast(table_properties.filter_size), - static_cast(table_properties.raw_key_size), - static_cast(table_properties.raw_value_size), - static_cast(table_properties.num_data_blocks), - static_cast(table_properties.num_entries), - static_cast(table_properties.num_deletions), - static_cast(table_properties.num_merge_operands), - static_cast(table_properties.num_range_deletions), - static_cast(table_properties.format_version), - static_cast(table_properties.fixed_key_len), - static_cast(table_properties.column_family_id), - static_cast(table_properties.creation_time), - static_cast(table_properties.oldest_key_time), - static_cast( - table_properties.slow_compression_estimated_data_size), - static_cast( - table_properties.fast_compression_estimated_data_size), - static_cast( - table_properties.external_sst_file_global_seqno_offset), - jcolumn_family_name, jfilter_policy_name, jcomparator_name, - jmerge_operator_name, jprefix_extractor_name, - jproperty_collectors_names, jcompression_name, - juser_collected_properties, jreadable_properties); - - if (env->ExceptionCheck()) { - return nullptr; - } - - return jtable_properties; - } - - private: - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TableProperties"); - } -}; - -class ColumnFamilyDescriptorJni : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.ColumnFamilyDescriptor - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/ColumnFamilyDescriptor"); - } - - /** - * Create a new Java org.rocksdb.ColumnFamilyDescriptor object with the same - * properties as the provided C++ ROCKSDB_NAMESPACE::ColumnFamilyDescriptor - * object - * - * @param env A pointer to the Java environment - * @param cfd A pointer to ROCKSDB_NAMESPACE::ColumnFamilyDescriptor object - * - * @return A reference to a Java org.rocksdb.ColumnFamilyDescriptor object, or - * nullptr if an an exception occurs - */ - static jobject construct(JNIEnv* env, ColumnFamilyDescriptor* cfd) { - jbyteArray jcf_name = JniUtil::copyBytes(env, cfd->name); - jobject cfopts = ColumnFamilyOptionsJni::construct(env, &(cfd->options)); - - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = env->GetMethodID(jclazz, "", - "([BLorg/rocksdb/ColumnFamilyOptions;)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - env->DeleteLocalRef(jcf_name); - return nullptr; - } - - jobject jcfd = env->NewObject(jclazz, mid, jcf_name, cfopts); - if (env->ExceptionCheck()) { - env->DeleteLocalRef(jcf_name); - return nullptr; - } - - return jcfd; - } - - /** - * Get the Java Method: ColumnFamilyDescriptor#columnFamilyName - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getColumnFamilyNameMethod(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "columnFamilyName", "()[B"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: ColumnFamilyDescriptor#columnFamilyOptions - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getColumnFamilyOptionsMethod(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID( - jclazz, "columnFamilyOptions", "()Lorg/rocksdb/ColumnFamilyOptions;"); - assert(mid != nullptr); - return mid; - } -}; - -// The portal class for org.rocksdb.IndexType -class IndexTypeJni { - public: - // Returns the equivalent org.rocksdb.IndexType for the provided - // C++ ROCKSDB_NAMESPACE::IndexType enum - static jbyte toJavaIndexType( - const ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType& index_type) { - switch (index_type) { - case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType::kBinarySearch: - return 0x0; - case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType::kHashSearch: - return 0x1; - case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType:: - kTwoLevelIndexSearch: - return 0x2; - case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType:: - kBinarySearchWithFirstKey: - return 0x3; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::IndexType enum for the - // provided Java org.rocksdb.IndexType - static ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType toCppIndexType( - jbyte jindex_type) { - switch (jindex_type) { - case 0x0: - return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType:: - kBinarySearch; - case 0x1: - return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType:: - kHashSearch; - case 0x2: - return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType:: - kTwoLevelIndexSearch; - case 0x3: - return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType:: - kBinarySearchWithFirstKey; - default: - // undefined/default - return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType:: - kBinarySearch; - } - } -}; - -// The portal class for org.rocksdb.DataBlockIndexType -class DataBlockIndexTypeJni { - public: - // Returns the equivalent org.rocksdb.DataBlockIndexType for the provided - // C++ ROCKSDB_NAMESPACE::DataBlockIndexType enum - static jbyte toJavaDataBlockIndexType( - const ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType& - index_type) { - switch (index_type) { - case ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType:: - kDataBlockBinarySearch: - return 0x0; - case ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType:: - kDataBlockBinaryAndHash: - return 0x1; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::DataBlockIndexType enum for - // the provided Java org.rocksdb.DataBlockIndexType - static ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType - toCppDataBlockIndexType(jbyte jindex_type) { - switch (jindex_type) { - case 0x0: - return ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType:: - kDataBlockBinarySearch; - case 0x1: - return ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType:: - kDataBlockBinaryAndHash; - default: - // undefined/default - return ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType:: - kDataBlockBinarySearch; - } - } -}; - -// The portal class for org.rocksdb.ChecksumType -class ChecksumTypeJni { - public: - // Returns the equivalent org.rocksdb.ChecksumType for the provided - // C++ ROCKSDB_NAMESPACE::ChecksumType enum - static jbyte toJavaChecksumType( - const ROCKSDB_NAMESPACE::ChecksumType& checksum_type) { - switch (checksum_type) { - case ROCKSDB_NAMESPACE::ChecksumType::kNoChecksum: - return 0x0; - case ROCKSDB_NAMESPACE::ChecksumType::kCRC32c: - return 0x1; - case ROCKSDB_NAMESPACE::ChecksumType::kxxHash: - return 0x2; - case ROCKSDB_NAMESPACE::ChecksumType::kxxHash64: - return 0x3; - case ROCKSDB_NAMESPACE::ChecksumType::kXXH3: - return 0x4; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::ChecksumType enum for the - // provided Java org.rocksdb.ChecksumType - static ROCKSDB_NAMESPACE::ChecksumType toCppChecksumType( - jbyte jchecksum_type) { - switch (jchecksum_type) { - case 0x0: - return ROCKSDB_NAMESPACE::ChecksumType::kNoChecksum; - case 0x1: - return ROCKSDB_NAMESPACE::ChecksumType::kCRC32c; - case 0x2: - return ROCKSDB_NAMESPACE::ChecksumType::kxxHash; - case 0x3: - return ROCKSDB_NAMESPACE::ChecksumType::kxxHash64; - case 0x4: - return ROCKSDB_NAMESPACE::ChecksumType::kXXH3; - default: - // undefined/default - return ROCKSDB_NAMESPACE::ChecksumType::kCRC32c; - } - } -}; - -// The portal class for org.rocksdb.IndexShorteningMode -class IndexShorteningModeJni { - public: - // Returns the equivalent org.rocksdb.IndexShorteningMode for the provided - // C++ ROCKSDB_NAMESPACE::IndexShorteningMode enum - static jbyte toJavaIndexShorteningMode( - const ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode& - index_shortening_mode) { - switch (index_shortening_mode) { - case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode:: - kNoShortening: - return 0x0; - case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode:: - kShortenSeparators: - return 0x1; - case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode:: - kShortenSeparatorsAndSuccessor: - return 0x2; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::IndexShorteningMode enum for - // the provided Java org.rocksdb.IndexShorteningMode - static ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode - toCppIndexShorteningMode(jbyte jindex_shortening_mode) { - switch (jindex_shortening_mode) { - case 0x0: - return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode:: - kNoShortening; - case 0x1: - return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode:: - kShortenSeparators; - case 0x2: - return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode:: - kShortenSeparatorsAndSuccessor; - default: - // undefined/default - return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexShorteningMode:: - kShortenSeparators; - } - } -}; - -// The portal class for org.rocksdb.Priority -class PriorityJni { - public: - // Returns the equivalent org.rocksdb.Priority for the provided - // C++ ROCKSDB_NAMESPACE::Env::Priority enum - static jbyte toJavaPriority( - const ROCKSDB_NAMESPACE::Env::Priority& priority) { - switch (priority) { - case ROCKSDB_NAMESPACE::Env::Priority::BOTTOM: - return 0x0; - case ROCKSDB_NAMESPACE::Env::Priority::LOW: - return 0x1; - case ROCKSDB_NAMESPACE::Env::Priority::HIGH: - return 0x2; - case ROCKSDB_NAMESPACE::Env::Priority::TOTAL: - return 0x3; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::env::Priority enum for the - // provided Java org.rocksdb.Priority - static ROCKSDB_NAMESPACE::Env::Priority toCppPriority(jbyte jpriority) { - switch (jpriority) { - case 0x0: - return ROCKSDB_NAMESPACE::Env::Priority::BOTTOM; - case 0x1: - return ROCKSDB_NAMESPACE::Env::Priority::LOW; - case 0x2: - return ROCKSDB_NAMESPACE::Env::Priority::HIGH; - case 0x3: - return ROCKSDB_NAMESPACE::Env::Priority::TOTAL; - default: - // undefined/default - return ROCKSDB_NAMESPACE::Env::Priority::LOW; - } - } -}; - -// The portal class for org.rocksdb.ThreadType -class ThreadTypeJni { - public: - // Returns the equivalent org.rocksdb.ThreadType for the provided - // C++ ROCKSDB_NAMESPACE::ThreadStatus::ThreadType enum - static jbyte toJavaThreadType( - const ROCKSDB_NAMESPACE::ThreadStatus::ThreadType& thread_type) { - switch (thread_type) { - case ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::HIGH_PRIORITY: - return 0x0; - case ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::LOW_PRIORITY: - return 0x1; - case ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::USER: - return 0x2; - case ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::BOTTOM_PRIORITY: - return 0x3; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::ThreadStatus::ThreadType enum - // for the provided Java org.rocksdb.ThreadType - static ROCKSDB_NAMESPACE::ThreadStatus::ThreadType toCppThreadType( - jbyte jthread_type) { - switch (jthread_type) { - case 0x0: - return ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::HIGH_PRIORITY; - case 0x1: - return ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::LOW_PRIORITY; - case 0x2: - return ThreadStatus::ThreadType::USER; - case 0x3: - return ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::BOTTOM_PRIORITY; - default: - // undefined/default - return ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::LOW_PRIORITY; - } - } -}; - -// The portal class for org.rocksdb.OperationType -class OperationTypeJni { - public: - // Returns the equivalent org.rocksdb.OperationType for the provided - // C++ ROCKSDB_NAMESPACE::ThreadStatus::OperationType enum - static jbyte toJavaOperationType( - const ROCKSDB_NAMESPACE::ThreadStatus::OperationType& operation_type) { - switch (operation_type) { - case ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_UNKNOWN: - return 0x0; - case ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_COMPACTION: - return 0x1; - case ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_FLUSH: - return 0x2; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::ThreadStatus::OperationType - // enum for the provided Java org.rocksdb.OperationType - static ROCKSDB_NAMESPACE::ThreadStatus::OperationType toCppOperationType( - jbyte joperation_type) { - switch (joperation_type) { - case 0x0: - return ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_UNKNOWN; - case 0x1: - return ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_COMPACTION; - case 0x2: - return ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_FLUSH; - default: - // undefined/default - return ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_UNKNOWN; - } - } -}; - -// The portal class for org.rocksdb.OperationStage -class OperationStageJni { - public: - // Returns the equivalent org.rocksdb.OperationStage for the provided - // C++ ROCKSDB_NAMESPACE::ThreadStatus::OperationStage enum - static jbyte toJavaOperationStage( - const ROCKSDB_NAMESPACE::ThreadStatus::OperationStage& operation_stage) { - switch (operation_stage) { - case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::STAGE_UNKNOWN: - return 0x0; - case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::STAGE_FLUSH_RUN: - return 0x1; - case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_FLUSH_WRITE_L0: - return 0x2; - case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_COMPACTION_PREPARE: - return 0x3; - case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_COMPACTION_RUN: - return 0x4; - case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_COMPACTION_PROCESS_KV: - return 0x5; - case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_COMPACTION_INSTALL: - return 0x6; - case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_COMPACTION_SYNC_FILE: - return 0x7; - case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_PICK_MEMTABLES_TO_FLUSH: - return 0x8; - case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_MEMTABLE_ROLLBACK: - return 0x9; - case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS: - return 0xA; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::ThreadStatus::OperationStage - // enum for the provided Java org.rocksdb.OperationStage - static ROCKSDB_NAMESPACE::ThreadStatus::OperationStage toCppOperationStage( - jbyte joperation_stage) { - switch (joperation_stage) { - case 0x0: - return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::STAGE_UNKNOWN; - case 0x1: - return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::STAGE_FLUSH_RUN; - case 0x2: - return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_FLUSH_WRITE_L0; - case 0x3: - return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_COMPACTION_PREPARE; - case 0x4: - return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_COMPACTION_RUN; - case 0x5: - return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_COMPACTION_PROCESS_KV; - case 0x6: - return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_COMPACTION_INSTALL; - case 0x7: - return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_COMPACTION_SYNC_FILE; - case 0x8: - return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_PICK_MEMTABLES_TO_FLUSH; - case 0x9: - return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_MEMTABLE_ROLLBACK; - case 0xA: - return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: - STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS; - default: - // undefined/default - return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::STAGE_UNKNOWN; - } - } -}; - -// The portal class for org.rocksdb.StateType -class StateTypeJni { - public: - // Returns the equivalent org.rocksdb.StateType for the provided - // C++ ROCKSDB_NAMESPACE::ThreadStatus::StateType enum - static jbyte toJavaStateType( - const ROCKSDB_NAMESPACE::ThreadStatus::StateType& state_type) { - switch (state_type) { - case ROCKSDB_NAMESPACE::ThreadStatus::StateType::STATE_UNKNOWN: - return 0x0; - case ROCKSDB_NAMESPACE::ThreadStatus::StateType::STATE_MUTEX_WAIT: - return 0x1; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::ThreadStatus::StateType enum - // for the provided Java org.rocksdb.StateType - static ROCKSDB_NAMESPACE::ThreadStatus::StateType toCppStateType( - jbyte jstate_type) { - switch (jstate_type) { - case 0x0: - return ROCKSDB_NAMESPACE::ThreadStatus::StateType::STATE_UNKNOWN; - case 0x1: - return ROCKSDB_NAMESPACE::ThreadStatus::StateType::STATE_MUTEX_WAIT; - default: - // undefined/default - return ROCKSDB_NAMESPACE::ThreadStatus::StateType::STATE_UNKNOWN; - } - } -}; - -// The portal class for org.rocksdb.ThreadStatus -class ThreadStatusJni : public JavaClass { - public: - /** - * Get the Java Class org.rocksdb.ThreadStatus - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/ThreadStatus"); - } - - /** - * Create a new Java org.rocksdb.ThreadStatus object with the same - * properties as the provided C++ ROCKSDB_NAMESPACE::ThreadStatus object - * - * @param env A pointer to the Java environment - * @param thread_status A pointer to ROCKSDB_NAMESPACE::ThreadStatus object - * - * @return A reference to a Java org.rocksdb.ColumnFamilyOptions object, or - * nullptr if an an exception occurs - */ - static jobject construct( - JNIEnv* env, const ROCKSDB_NAMESPACE::ThreadStatus* thread_status) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = env->GetMethodID( - jclazz, "", "(JBLjava/lang/String;Ljava/lang/String;BJB[JB)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - jstring jdb_name = - JniUtil::toJavaString(env, &(thread_status->db_name), true); - if (env->ExceptionCheck()) { - // an error occurred - return nullptr; - } - - jstring jcf_name = - JniUtil::toJavaString(env, &(thread_status->cf_name), true); - if (env->ExceptionCheck()) { - // an error occurred - env->DeleteLocalRef(jdb_name); - return nullptr; - } - - // long[] - const jsize len = static_cast( - ROCKSDB_NAMESPACE::ThreadStatus::kNumOperationProperties); - jlongArray joperation_properties = env->NewLongArray(len); - if (joperation_properties == nullptr) { - // an exception occurred - env->DeleteLocalRef(jdb_name); - env->DeleteLocalRef(jcf_name); - return nullptr; - } - jboolean is_copy; - jlong* body = env->GetLongArrayElements(joperation_properties, &is_copy); - if (body == nullptr) { - // exception thrown: OutOfMemoryError - env->DeleteLocalRef(jdb_name); - env->DeleteLocalRef(jcf_name); - env->DeleteLocalRef(joperation_properties); - return nullptr; - } - for (size_t i = 0; i < len; ++i) { - body[i] = static_cast(thread_status->op_properties[i]); - } - env->ReleaseLongArrayElements(joperation_properties, body, - is_copy == JNI_TRUE ? 0 : JNI_ABORT); - - jobject jcfd = env->NewObject( - jclazz, mid, static_cast(thread_status->thread_id), - ThreadTypeJni::toJavaThreadType(thread_status->thread_type), jdb_name, - jcf_name, - OperationTypeJni::toJavaOperationType(thread_status->operation_type), - static_cast(thread_status->op_elapsed_micros), - OperationStageJni::toJavaOperationStage(thread_status->operation_stage), - joperation_properties, - StateTypeJni::toJavaStateType(thread_status->state_type)); - if (env->ExceptionCheck()) { - // exception occurred - env->DeleteLocalRef(jdb_name); - env->DeleteLocalRef(jcf_name); - env->DeleteLocalRef(joperation_properties); - return nullptr; - } - - // cleanup - env->DeleteLocalRef(jdb_name); - env->DeleteLocalRef(jcf_name); - env->DeleteLocalRef(joperation_properties); - - return jcfd; - } -}; - -// The portal class for org.rocksdb.CompactionStyle -class CompactionStyleJni { - public: - // Returns the equivalent org.rocksdb.CompactionStyle for the provided - // C++ ROCKSDB_NAMESPACE::CompactionStyle enum - static jbyte toJavaCompactionStyle( - const ROCKSDB_NAMESPACE::CompactionStyle& compaction_style) { - switch (compaction_style) { - case ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleLevel: - return 0x0; - case ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleUniversal: - return 0x1; - case ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleFIFO: - return 0x2; - case ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleNone: - return 0x3; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::CompactionStyle enum for the - // provided Java org.rocksdb.CompactionStyle - static ROCKSDB_NAMESPACE::CompactionStyle toCppCompactionStyle( - jbyte jcompaction_style) { - switch (jcompaction_style) { - case 0x0: - return ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleLevel; - case 0x1: - return ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleUniversal; - case 0x2: - return ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleFIFO; - case 0x3: - return ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleNone; - default: - // undefined/default - return ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleLevel; - } - } -}; - -// The portal class for org.rocksdb.CompactionReason -class CompactionReasonJni { - public: - // Returns the equivalent org.rocksdb.CompactionReason for the provided - // C++ ROCKSDB_NAMESPACE::CompactionReason enum - static jbyte toJavaCompactionReason( - const ROCKSDB_NAMESPACE::CompactionReason& compaction_reason) { - switch (compaction_reason) { - case ROCKSDB_NAMESPACE::CompactionReason::kUnknown: - return 0x0; - case ROCKSDB_NAMESPACE::CompactionReason::kLevelL0FilesNum: - return 0x1; - case ROCKSDB_NAMESPACE::CompactionReason::kLevelMaxLevelSize: - return 0x2; - case ROCKSDB_NAMESPACE::CompactionReason::kUniversalSizeAmplification: - return 0x3; - case ROCKSDB_NAMESPACE::CompactionReason::kUniversalSizeRatio: - return 0x4; - case ROCKSDB_NAMESPACE::CompactionReason::kUniversalSortedRunNum: - return 0x5; - case ROCKSDB_NAMESPACE::CompactionReason::kFIFOMaxSize: - return 0x6; - case ROCKSDB_NAMESPACE::CompactionReason::kFIFOReduceNumFiles: - return 0x7; - case ROCKSDB_NAMESPACE::CompactionReason::kFIFOTtl: - return 0x8; - case ROCKSDB_NAMESPACE::CompactionReason::kManualCompaction: - return 0x9; - case ROCKSDB_NAMESPACE::CompactionReason::kFilesMarkedForCompaction: - return 0x10; - case ROCKSDB_NAMESPACE::CompactionReason::kBottommostFiles: - return 0x0A; - case ROCKSDB_NAMESPACE::CompactionReason::kTtl: - return 0x0B; - case ROCKSDB_NAMESPACE::CompactionReason::kFlush: - return 0x0C; - case ROCKSDB_NAMESPACE::CompactionReason::kExternalSstIngestion: - return 0x0D; - case ROCKSDB_NAMESPACE::CompactionReason::kPeriodicCompaction: - return 0x0E; - case ROCKSDB_NAMESPACE::CompactionReason::kChangeTemperature: - return 0x0F; - case ROCKSDB_NAMESPACE::CompactionReason::kForcedBlobGC: - return 0x11; - case ROCKSDB_NAMESPACE::CompactionReason::kRoundRobinTtl: - return 0x12; - case ROCKSDB_NAMESPACE::CompactionReason::kRefitLevel: - return 0x13; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::CompactionReason enum for the - // provided Java org.rocksdb.CompactionReason - static ROCKSDB_NAMESPACE::CompactionReason toCppCompactionReason( - jbyte jcompaction_reason) { - switch (jcompaction_reason) { - case 0x0: - return ROCKSDB_NAMESPACE::CompactionReason::kUnknown; - case 0x1: - return ROCKSDB_NAMESPACE::CompactionReason::kLevelL0FilesNum; - case 0x2: - return ROCKSDB_NAMESPACE::CompactionReason::kLevelMaxLevelSize; - case 0x3: - return ROCKSDB_NAMESPACE::CompactionReason::kUniversalSizeAmplification; - case 0x4: - return ROCKSDB_NAMESPACE::CompactionReason::kUniversalSizeRatio; - case 0x5: - return ROCKSDB_NAMESPACE::CompactionReason::kUniversalSortedRunNum; - case 0x6: - return ROCKSDB_NAMESPACE::CompactionReason::kFIFOMaxSize; - case 0x7: - return ROCKSDB_NAMESPACE::CompactionReason::kFIFOReduceNumFiles; - case 0x8: - return ROCKSDB_NAMESPACE::CompactionReason::kFIFOTtl; - case 0x9: - return ROCKSDB_NAMESPACE::CompactionReason::kManualCompaction; - case 0x10: - return ROCKSDB_NAMESPACE::CompactionReason::kFilesMarkedForCompaction; - case 0x0A: - return ROCKSDB_NAMESPACE::CompactionReason::kBottommostFiles; - case 0x0B: - return ROCKSDB_NAMESPACE::CompactionReason::kTtl; - case 0x0C: - return ROCKSDB_NAMESPACE::CompactionReason::kFlush; - case 0x0D: - return ROCKSDB_NAMESPACE::CompactionReason::kExternalSstIngestion; - case 0x0E: - return ROCKSDB_NAMESPACE::CompactionReason::kPeriodicCompaction; - case 0x0F: - return ROCKSDB_NAMESPACE::CompactionReason::kChangeTemperature; - case 0x11: - return ROCKSDB_NAMESPACE::CompactionReason::kForcedBlobGC; - case 0x12: - return ROCKSDB_NAMESPACE::CompactionReason::kRoundRobinTtl; - case 0x13: - return ROCKSDB_NAMESPACE::CompactionReason::kRefitLevel; - default: - // undefined/default - return ROCKSDB_NAMESPACE::CompactionReason::kUnknown; - } - } -}; - -// The portal class for org.rocksdb.WalFileType -class WalFileTypeJni { - public: - // Returns the equivalent org.rocksdb.WalFileType for the provided - // C++ ROCKSDB_NAMESPACE::WalFileType enum - static jbyte toJavaWalFileType( - const ROCKSDB_NAMESPACE::WalFileType& wal_file_type) { - switch (wal_file_type) { - case ROCKSDB_NAMESPACE::WalFileType::kArchivedLogFile: - return 0x0; - case ROCKSDB_NAMESPACE::WalFileType::kAliveLogFile: - return 0x1; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::WalFileType enum for the - // provided Java org.rocksdb.WalFileType - static ROCKSDB_NAMESPACE::WalFileType toCppWalFileType(jbyte jwal_file_type) { - switch (jwal_file_type) { - case 0x0: - return ROCKSDB_NAMESPACE::WalFileType::kArchivedLogFile; - case 0x1: - return ROCKSDB_NAMESPACE::WalFileType::kAliveLogFile; - default: - // undefined/default - return ROCKSDB_NAMESPACE::WalFileType::kAliveLogFile; - } - } -}; - -class LogFileJni : public JavaClass { - public: - /** - * Create a new Java org.rocksdb.LogFile object. - * - * @param env A pointer to the Java environment - * @param log_file A Cpp log file object - * - * @return A reference to a Java org.rocksdb.LogFile object, or - * nullptr if an an exception occurs - */ - static jobject fromCppLogFile(JNIEnv* env, - ROCKSDB_NAMESPACE::LogFile* log_file) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = - env->GetMethodID(jclazz, "", "(Ljava/lang/String;JBJJ)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - std::string path_name = log_file->PathName(); - jstring jpath_name = - ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &path_name, true); - if (env->ExceptionCheck()) { - // exception occurred creating java string - return nullptr; - } - - jobject jlog_file = env->NewObject( - jclazz, mid, jpath_name, static_cast(log_file->LogNumber()), - ROCKSDB_NAMESPACE::WalFileTypeJni::toJavaWalFileType(log_file->Type()), - static_cast(log_file->StartSequence()), - static_cast(log_file->SizeFileBytes())); - - if (env->ExceptionCheck()) { - env->DeleteLocalRef(jpath_name); - return nullptr; - } - - // cleanup - env->DeleteLocalRef(jpath_name); - - return jlog_file; - } - - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/LogFile"); - } -}; - -class LiveFileMetaDataJni : public JavaClass { - public: - /** - * Create a new Java org.rocksdb.LiveFileMetaData object. - * - * @param env A pointer to the Java environment - * @param live_file_meta_data A Cpp live file meta data object - * - * @return A reference to a Java org.rocksdb.LiveFileMetaData object, or - * nullptr if an an exception occurs - */ - static jobject fromCppLiveFileMetaData( - JNIEnv* env, ROCKSDB_NAMESPACE::LiveFileMetaData* live_file_meta_data) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = env->GetMethodID( - jclazz, "", - "([BILjava/lang/String;Ljava/lang/String;JJJ[B[BJZJJ)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - jbyteArray jcolumn_family_name = ROCKSDB_NAMESPACE::JniUtil::copyBytes( - env, live_file_meta_data->column_family_name); - if (jcolumn_family_name == nullptr) { - // exception occurred creating java byte array - return nullptr; - } - - jstring jfile_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &live_file_meta_data->name, true); - if (env->ExceptionCheck()) { - // exception occurred creating java string - env->DeleteLocalRef(jcolumn_family_name); - return nullptr; - } - - jstring jpath = ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &live_file_meta_data->db_path, true); - if (env->ExceptionCheck()) { - // exception occurred creating java string - env->DeleteLocalRef(jcolumn_family_name); - env->DeleteLocalRef(jfile_name); - return nullptr; - } - - jbyteArray jsmallest_key = ROCKSDB_NAMESPACE::JniUtil::copyBytes( - env, live_file_meta_data->smallestkey); - if (jsmallest_key == nullptr) { - // exception occurred creating java byte array - env->DeleteLocalRef(jcolumn_family_name); - env->DeleteLocalRef(jfile_name); - env->DeleteLocalRef(jpath); - return nullptr; - } - - jbyteArray jlargest_key = ROCKSDB_NAMESPACE::JniUtil::copyBytes( - env, live_file_meta_data->largestkey); - if (jlargest_key == nullptr) { - // exception occurred creating java byte array - env->DeleteLocalRef(jcolumn_family_name); - env->DeleteLocalRef(jfile_name); - env->DeleteLocalRef(jpath); - env->DeleteLocalRef(jsmallest_key); - return nullptr; - } - - jobject jlive_file_meta_data = env->NewObject( - jclazz, mid, jcolumn_family_name, - static_cast(live_file_meta_data->level), jfile_name, jpath, - static_cast(live_file_meta_data->size), - static_cast(live_file_meta_data->smallest_seqno), - static_cast(live_file_meta_data->largest_seqno), jsmallest_key, - jlargest_key, - static_cast(live_file_meta_data->num_reads_sampled), - static_cast(live_file_meta_data->being_compacted), - static_cast(live_file_meta_data->num_entries), - static_cast(live_file_meta_data->num_deletions)); - - if (env->ExceptionCheck()) { - env->DeleteLocalRef(jcolumn_family_name); - env->DeleteLocalRef(jfile_name); - env->DeleteLocalRef(jpath); - env->DeleteLocalRef(jsmallest_key); - env->DeleteLocalRef(jlargest_key); - return nullptr; - } - - // cleanup - env->DeleteLocalRef(jcolumn_family_name); - env->DeleteLocalRef(jfile_name); - env->DeleteLocalRef(jpath); - env->DeleteLocalRef(jsmallest_key); - env->DeleteLocalRef(jlargest_key); - - return jlive_file_meta_data; - } - - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/LiveFileMetaData"); - } -}; - -class SstFileMetaDataJni : public JavaClass { - public: - /** - * Create a new Java org.rocksdb.SstFileMetaData object. - * - * @param env A pointer to the Java environment - * @param sst_file_meta_data A Cpp sst file meta data object - * - * @return A reference to a Java org.rocksdb.SstFileMetaData object, or - * nullptr if an an exception occurs - */ - static jobject fromCppSstFileMetaData( - JNIEnv* env, - const ROCKSDB_NAMESPACE::SstFileMetaData* sst_file_meta_data) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = env->GetMethodID( - jclazz, "", "(Ljava/lang/String;Ljava/lang/String;JJJ[B[BJZJJ)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - jstring jfile_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &sst_file_meta_data->name, true); - if (jfile_name == nullptr) { - // exception occurred creating java byte array - return nullptr; - } - - jstring jpath = ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &sst_file_meta_data->db_path, true); - if (jpath == nullptr) { - // exception occurred creating java byte array - env->DeleteLocalRef(jfile_name); - return nullptr; - } - - jbyteArray jsmallest_key = ROCKSDB_NAMESPACE::JniUtil::copyBytes( - env, sst_file_meta_data->smallestkey); - if (jsmallest_key == nullptr) { - // exception occurred creating java byte array - env->DeleteLocalRef(jfile_name); - env->DeleteLocalRef(jpath); - return nullptr; - } - - jbyteArray jlargest_key = ROCKSDB_NAMESPACE::JniUtil::copyBytes( - env, sst_file_meta_data->largestkey); - if (jlargest_key == nullptr) { - // exception occurred creating java byte array - env->DeleteLocalRef(jfile_name); - env->DeleteLocalRef(jpath); - env->DeleteLocalRef(jsmallest_key); - return nullptr; - } - - jobject jsst_file_meta_data = env->NewObject( - jclazz, mid, jfile_name, jpath, - static_cast(sst_file_meta_data->size), - static_cast(sst_file_meta_data->smallest_seqno), - static_cast(sst_file_meta_data->largest_seqno), jsmallest_key, - jlargest_key, static_cast(sst_file_meta_data->num_reads_sampled), - static_cast(sst_file_meta_data->being_compacted), - static_cast(sst_file_meta_data->num_entries), - static_cast(sst_file_meta_data->num_deletions)); - - if (env->ExceptionCheck()) { - env->DeleteLocalRef(jfile_name); - env->DeleteLocalRef(jpath); - env->DeleteLocalRef(jsmallest_key); - env->DeleteLocalRef(jlargest_key); - return nullptr; - } - - // cleanup - env->DeleteLocalRef(jfile_name); - env->DeleteLocalRef(jpath); - env->DeleteLocalRef(jsmallest_key); - env->DeleteLocalRef(jlargest_key); - - return jsst_file_meta_data; - } - - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/SstFileMetaData"); - } -}; - -class LevelMetaDataJni : public JavaClass { - public: - /** - * Create a new Java org.rocksdb.LevelMetaData object. - * - * @param env A pointer to the Java environment - * @param level_meta_data A Cpp level meta data object - * - * @return A reference to a Java org.rocksdb.LevelMetaData object, or - * nullptr if an an exception occurs - */ - static jobject fromCppLevelMetaData( - JNIEnv* env, const ROCKSDB_NAMESPACE::LevelMetaData* level_meta_data) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = env->GetMethodID(jclazz, "", - "(IJ[Lorg/rocksdb/SstFileMetaData;)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - const jsize jlen = static_cast(level_meta_data->files.size()); - jobjectArray jfiles = - env->NewObjectArray(jlen, SstFileMetaDataJni::getJClass(env), nullptr); - if (jfiles == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - jsize i = 0; - for (auto it = level_meta_data->files.begin(); - it != level_meta_data->files.end(); ++it) { - jobject jfile = SstFileMetaDataJni::fromCppSstFileMetaData(env, &(*it)); - if (jfile == nullptr) { - // exception occurred - env->DeleteLocalRef(jfiles); - return nullptr; - } - env->SetObjectArrayElement(jfiles, i++, jfile); - } - - jobject jlevel_meta_data = - env->NewObject(jclazz, mid, static_cast(level_meta_data->level), - static_cast(level_meta_data->size), jfiles); - - if (env->ExceptionCheck()) { - env->DeleteLocalRef(jfiles); - return nullptr; - } - - // cleanup - env->DeleteLocalRef(jfiles); - - return jlevel_meta_data; - } - - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/LevelMetaData"); - } -}; - -class ColumnFamilyMetaDataJni : public JavaClass { - public: - /** - * Create a new Java org.rocksdb.ColumnFamilyMetaData object. - * - * @param env A pointer to the Java environment - * @param column_famly_meta_data A Cpp live file meta data object - * - * @return A reference to a Java org.rocksdb.ColumnFamilyMetaData object, or - * nullptr if an an exception occurs - */ - static jobject fromCppColumnFamilyMetaData( - JNIEnv* env, - const ROCKSDB_NAMESPACE::ColumnFamilyMetaData* column_famly_meta_data) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = env->GetMethodID(jclazz, "", - "(JJ[B[Lorg/rocksdb/LevelMetaData;)V"); - if (mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return nullptr; - } - - jbyteArray jname = ROCKSDB_NAMESPACE::JniUtil::copyBytes( - env, column_famly_meta_data->name); - if (jname == nullptr) { - // exception occurred creating java byte array - return nullptr; - } - - const jsize jlen = - static_cast(column_famly_meta_data->levels.size()); - jobjectArray jlevels = - env->NewObjectArray(jlen, LevelMetaDataJni::getJClass(env), nullptr); - if (jlevels == nullptr) { - // exception thrown: OutOfMemoryError - env->DeleteLocalRef(jname); - return nullptr; - } - - jsize i = 0; - for (auto it = column_famly_meta_data->levels.begin(); - it != column_famly_meta_data->levels.end(); ++it) { - jobject jlevel = LevelMetaDataJni::fromCppLevelMetaData(env, &(*it)); - if (jlevel == nullptr) { - // exception occurred - env->DeleteLocalRef(jname); - env->DeleteLocalRef(jlevels); - return nullptr; - } - env->SetObjectArrayElement(jlevels, i++, jlevel); - } - - jobject jcolumn_family_meta_data = env->NewObject( - jclazz, mid, static_cast(column_famly_meta_data->size), - static_cast(column_famly_meta_data->file_count), jname, jlevels); - - if (env->ExceptionCheck()) { - env->DeleteLocalRef(jname); - env->DeleteLocalRef(jlevels); - return nullptr; - } - - // cleanup - env->DeleteLocalRef(jname); - env->DeleteLocalRef(jlevels); - - return jcolumn_family_meta_data; - } - - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/ColumnFamilyMetaData"); - } -}; - -// The portal class for org.rocksdb.AbstractTraceWriter -class AbstractTraceWriterJni - : public RocksDBNativeClass< - const ROCKSDB_NAMESPACE::TraceWriterJniCallback*, - AbstractTraceWriterJni> { - public: - /** - * Get the Java Class org.rocksdb.AbstractTraceWriter - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, - "org/rocksdb/AbstractTraceWriter"); - } - - /** - * Get the Java Method: AbstractTraceWriter#write - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getWriteProxyMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "writeProxy", "(J)S"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractTraceWriter#closeWriter - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getCloseWriterProxyMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "closeWriterProxy", "()S"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractTraceWriter#getFileSize - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getGetFileSizeMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "getFileSize", "()J"); - assert(mid != nullptr); - return mid; - } -}; - -// The portal class for org.rocksdb.AbstractWalFilter -class AbstractWalFilterJni - : public RocksDBNativeClass { - public: - /** - * Get the Java Class org.rocksdb.AbstractWalFilter - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/AbstractWalFilter"); - } - - /** - * Get the Java Method: AbstractWalFilter#columnFamilyLogNumberMap - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getColumnFamilyLogNumberMapMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetMethodID(jclazz, "columnFamilyLogNumberMap", - "(Ljava/util/Map;Ljava/util/Map;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractTraceWriter#logRecordFoundProxy - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getLogRecordFoundProxyMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = env->GetMethodID(jclazz, "logRecordFoundProxy", - "(JLjava/lang/String;JJ)S"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractTraceWriter#name - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID or nullptr if the class or method id could not - * be retrieved - */ - static jmethodID getNameMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - static jmethodID mid = - env->GetMethodID(jclazz, "name", "()Ljava/lang/String;"); - assert(mid != nullptr); - return mid; - } -}; - -// The portal class for org.rocksdb.WalProcessingOption -class WalProcessingOptionJni { - public: - // Returns the equivalent org.rocksdb.WalProcessingOption for the provided - // C++ ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption enum - static jbyte toJavaWalProcessingOption( - const ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption& - wal_processing_option) { - switch (wal_processing_option) { - case ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption:: - kContinueProcessing: - return 0x0; - case ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption:: - kIgnoreCurrentRecord: - return 0x1; - case ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption::kStopReplay: - return 0x2; - case ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption::kCorruptedRecord: - return 0x3; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ - // ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption enum for the provided - // Java org.rocksdb.WalProcessingOption - static ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption - toCppWalProcessingOption(jbyte jwal_processing_option) { - switch (jwal_processing_option) { - case 0x0: - return ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption:: - kContinueProcessing; - case 0x1: - return ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption:: - kIgnoreCurrentRecord; - case 0x2: - return ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption::kStopReplay; - case 0x3: - return ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption:: - kCorruptedRecord; - default: - // undefined/default - return ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption:: - kCorruptedRecord; - } - } -}; - -// The portal class for org.rocksdb.ReusedSynchronisationType -class ReusedSynchronisationTypeJni { - public: - // Returns the equivalent org.rocksdb.ReusedSynchronisationType for the - // provided C++ ROCKSDB_NAMESPACE::ReusedSynchronisationType enum - static jbyte toJavaReusedSynchronisationType( - const ROCKSDB_NAMESPACE::ReusedSynchronisationType& - reused_synchronisation_type) { - switch (reused_synchronisation_type) { - case ROCKSDB_NAMESPACE::ReusedSynchronisationType::MUTEX: - return 0x0; - case ROCKSDB_NAMESPACE::ReusedSynchronisationType::ADAPTIVE_MUTEX: - return 0x1; - case ROCKSDB_NAMESPACE::ReusedSynchronisationType::THREAD_LOCAL: - return 0x2; - default: - return 0x7F; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::ReusedSynchronisationType - // enum for the provided Java org.rocksdb.ReusedSynchronisationType - static ROCKSDB_NAMESPACE::ReusedSynchronisationType - toCppReusedSynchronisationType(jbyte reused_synchronisation_type) { - switch (reused_synchronisation_type) { - case 0x0: - return ROCKSDB_NAMESPACE::ReusedSynchronisationType::MUTEX; - case 0x1: - return ROCKSDB_NAMESPACE::ReusedSynchronisationType::ADAPTIVE_MUTEX; - case 0x2: - return ROCKSDB_NAMESPACE::ReusedSynchronisationType::THREAD_LOCAL; - default: - // undefined/default - return ROCKSDB_NAMESPACE::ReusedSynchronisationType::ADAPTIVE_MUTEX; - } - } -}; -// The portal class for org.rocksdb.SanityLevel -class SanityLevelJni { - public: - // Returns the equivalent org.rocksdb.SanityLevel for the provided - // C++ ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel enum - static jbyte toJavaSanityLevel( - const ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel& sanity_level) { - switch (sanity_level) { - case ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel::kSanityLevelNone: - return 0x0; - case ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel:: - kSanityLevelLooselyCompatible: - return 0x1; - case ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel:: - kSanityLevelExactMatch: - return -0x01; - default: - return -0x01; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel - // enum for the provided Java org.rocksdb.SanityLevel - static ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel toCppSanityLevel( - jbyte sanity_level) { - switch (sanity_level) { - case 0x0: - return ROCKSDB_NAMESPACE::ConfigOptions::kSanityLevelNone; - case 0x1: - return ROCKSDB_NAMESPACE::ConfigOptions::kSanityLevelLooselyCompatible; - default: - // undefined/default - return ROCKSDB_NAMESPACE::ConfigOptions::kSanityLevelExactMatch; - } - } -}; - -// The portal class for org.rocksdb.PrepopulateBlobCache -class PrepopulateBlobCacheJni { - public: - // Returns the equivalent org.rocksdb.PrepopulateBlobCache for the provided - // C++ ROCKSDB_NAMESPACE::PrepopulateBlobCache enum - static jbyte toJavaPrepopulateBlobCache( - ROCKSDB_NAMESPACE::PrepopulateBlobCache prepopulate_blob_cache) { - switch (prepopulate_blob_cache) { - case ROCKSDB_NAMESPACE::PrepopulateBlobCache::kDisable: - return 0x0; - case ROCKSDB_NAMESPACE::PrepopulateBlobCache::kFlushOnly: - return 0x1; - default: - return 0x7f; // undefined - } - } - - // Returns the equivalent C++ ROCKSDB_NAMESPACE::PrepopulateBlobCache enum for - // the provided Java org.rocksdb.PrepopulateBlobCache - static ROCKSDB_NAMESPACE::PrepopulateBlobCache toCppPrepopulateBlobCache( - jbyte jprepopulate_blob_cache) { - switch (jprepopulate_blob_cache) { - case 0x0: - return ROCKSDB_NAMESPACE::PrepopulateBlobCache::kDisable; - case 0x1: - return ROCKSDB_NAMESPACE::PrepopulateBlobCache::kFlushOnly; - case 0x7F: - default: - // undefined/default - return ROCKSDB_NAMESPACE::PrepopulateBlobCache::kDisable; - } - } -}; - -// The portal class for org.rocksdb.AbstractListener.EnabledEventCallback -class EnabledEventCallbackJni { - public: - // Returns the set of equivalent C++ - // ROCKSDB_NAMESPACE::EnabledEventCallbackJni::EnabledEventCallback enums for - // the provided Java jenabled_event_callback_values - static std::set toCppEnabledEventCallbacks( - jlong jenabled_event_callback_values) { - std::set enabled_event_callbacks; - for (size_t i = 0; i < EnabledEventCallback::NUM_ENABLED_EVENT_CALLBACK; - ++i) { - if (((1ULL << i) & jenabled_event_callback_values) > 0) { - enabled_event_callbacks.emplace(static_cast(i)); - } - } - return enabled_event_callbacks; - } -}; - -// The portal class for org.rocksdb.AbstractEventListener -class AbstractEventListenerJni - : public RocksDBNativeClass< - const ROCKSDB_NAMESPACE::EventListenerJniCallback*, - AbstractEventListenerJni> { - public: - /** - * Get the Java Class org.rocksdb.AbstractEventListener - * - * @param env A pointer to the Java environment - * - * @return The Java Class or nullptr if one of the - * ClassFormatError, ClassCircularityError, NoClassDefFoundError, - * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown - */ - static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, - "org/rocksdb/AbstractEventListener"); - } - - /** - * Get the Java Method: AbstractEventListener#onFlushCompletedProxy - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnFlushCompletedProxyMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID(jclazz, "onFlushCompletedProxy", - "(JLorg/rocksdb/FlushJobInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onFlushBeginProxy - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnFlushBeginProxyMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID(jclazz, "onFlushBeginProxy", - "(JLorg/rocksdb/FlushJobInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onTableFileDeleted - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnTableFileDeletedMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID( - jclazz, "onTableFileDeleted", "(Lorg/rocksdb/TableFileDeletionInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onCompactionBeginProxy - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnCompactionBeginProxyMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = - env->GetMethodID(jclazz, "onCompactionBeginProxy", - "(JLorg/rocksdb/CompactionJobInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onCompactionCompletedProxy - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnCompactionCompletedProxyMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = - env->GetMethodID(jclazz, "onCompactionCompletedProxy", - "(JLorg/rocksdb/CompactionJobInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onTableFileCreated - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnTableFileCreatedMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID( - jclazz, "onTableFileCreated", "(Lorg/rocksdb/TableFileCreationInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onTableFileCreationStarted - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnTableFileCreationStartedMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = - env->GetMethodID(jclazz, "onTableFileCreationStarted", - "(Lorg/rocksdb/TableFileCreationBriefInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onMemTableSealed - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnMemTableSealedMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID(jclazz, "onMemTableSealed", - "(Lorg/rocksdb/MemTableInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: - * AbstractEventListener#onColumnFamilyHandleDeletionStarted - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnColumnFamilyHandleDeletionStartedMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = - env->GetMethodID(jclazz, "onColumnFamilyHandleDeletionStarted", - "(Lorg/rocksdb/ColumnFamilyHandle;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onExternalFileIngestedProxy - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnExternalFileIngestedProxyMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = - env->GetMethodID(jclazz, "onExternalFileIngestedProxy", - "(JLorg/rocksdb/ExternalFileIngestionInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onBackgroundError - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnBackgroundErrorProxyMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID(jclazz, "onBackgroundErrorProxy", - "(BLorg/rocksdb/Status;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onStallConditionsChanged - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnStallConditionsChangedMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID(jclazz, "onStallConditionsChanged", - "(Lorg/rocksdb/WriteStallInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onFileReadFinish - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnFileReadFinishMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID( - jclazz, "onFileReadFinish", "(Lorg/rocksdb/FileOperationInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onFileWriteFinish - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnFileWriteFinishMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID( - jclazz, "onFileWriteFinish", "(Lorg/rocksdb/FileOperationInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onFileFlushFinish - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnFileFlushFinishMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID( - jclazz, "onFileFlushFinish", "(Lorg/rocksdb/FileOperationInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onFileSyncFinish - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnFileSyncFinishMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID( - jclazz, "onFileSyncFinish", "(Lorg/rocksdb/FileOperationInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onFileRangeSyncFinish - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnFileRangeSyncFinishMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID( - jclazz, "onFileRangeSyncFinish", "(Lorg/rocksdb/FileOperationInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onFileTruncateFinish - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnFileTruncateFinishMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID( - jclazz, "onFileTruncateFinish", "(Lorg/rocksdb/FileOperationInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onFileCloseFinish - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnFileCloseFinishMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID( - jclazz, "onFileCloseFinish", "(Lorg/rocksdb/FileOperationInfo;)V"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#shouldBeNotifiedOnFileIO - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getShouldBeNotifiedOnFileIOMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = - env->GetMethodID(jclazz, "shouldBeNotifiedOnFileIO", "()Z"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onErrorRecoveryBeginProxy - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnErrorRecoveryBeginProxyMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID(jclazz, "onErrorRecoveryBeginProxy", - "(BLorg/rocksdb/Status;)Z"); - assert(mid != nullptr); - return mid; - } - - /** - * Get the Java Method: AbstractEventListener#onErrorRecoveryCompleted - * - * @param env A pointer to the Java environment - * - * @return The Java Method ID - */ - static jmethodID getOnErrorRecoveryCompletedMethodId(JNIEnv* env) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID mid = env->GetMethodID(jclazz, "onErrorRecoveryCompleted", - "(Lorg/rocksdb/Status;)V"); - assert(mid != nullptr); - return mid; - } -}; - -class FlushJobInfoJni : public JavaClass { - public: - /** - * Create a new Java org.rocksdb.FlushJobInfo object. - * - * @param env A pointer to the Java environment - * @param flush_job_info A Cpp flush job info object - * - * @return A reference to a Java org.rocksdb.FlushJobInfo object, or - * nullptr if an an exception occurs - */ - static jobject fromCppFlushJobInfo( - JNIEnv* env, const ROCKSDB_NAMESPACE::FlushJobInfo* flush_job_info) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - static jmethodID ctor = getConstructorMethodId(env, jclazz); - assert(ctor != nullptr); - jstring jcf_name = JniUtil::toJavaString(env, &flush_job_info->cf_name); - if (env->ExceptionCheck()) { - return nullptr; - } - jstring jfile_path = JniUtil::toJavaString(env, &flush_job_info->file_path); - if (env->ExceptionCheck()) { - env->DeleteLocalRef(jfile_path); - return nullptr; - } - jobject jtable_properties = TablePropertiesJni::fromCppTableProperties( - env, flush_job_info->table_properties); - if (jtable_properties == nullptr) { - env->DeleteLocalRef(jcf_name); - env->DeleteLocalRef(jfile_path); - return nullptr; - } - return env->NewObject( - jclazz, ctor, static_cast(flush_job_info->cf_id), jcf_name, - jfile_path, static_cast(flush_job_info->thread_id), - static_cast(flush_job_info->job_id), - static_cast(flush_job_info->triggered_writes_slowdown), - static_cast(flush_job_info->triggered_writes_stop), - static_cast(flush_job_info->smallest_seqno), - static_cast(flush_job_info->largest_seqno), jtable_properties, - static_cast(flush_job_info->flush_reason)); - } - - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/FlushJobInfo"); - } - - static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { - return env->GetMethodID(clazz, "", - "(JLjava/lang/String;Ljava/lang/String;JIZZJJLorg/" - "rocksdb/TableProperties;B)V"); - } -}; - -class TableFileDeletionInfoJni : public JavaClass { - public: - /** - * Create a new Java org.rocksdb.TableFileDeletionInfo object. - * - * @param env A pointer to the Java environment - * @param file_del_info A Cpp table file deletion info object - * - * @return A reference to a Java org.rocksdb.TableFileDeletionInfo object, or - * nullptr if an an exception occurs - */ - static jobject fromCppTableFileDeletionInfo( - JNIEnv* env, - const ROCKSDB_NAMESPACE::TableFileDeletionInfo* file_del_info) { - jclass jclazz = getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - static jmethodID ctor = getConstructorMethodId(env, jclazz); - assert(ctor != nullptr); - jstring jdb_name = JniUtil::toJavaString(env, &file_del_info->db_name); - if (env->ExceptionCheck()) { - return nullptr; - } - jobject jstatus = StatusJni::construct(env, file_del_info->status); - if (jstatus == nullptr) { - env->DeleteLocalRef(jdb_name); - return nullptr; - } - return env->NewObject(jclazz, ctor, jdb_name, - JniUtil::toJavaString(env, &file_del_info->file_path), - static_cast(file_del_info->job_id), jstatus); - } - - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TableFileDeletionInfo"); - } - - static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { - return env->GetMethodID( - clazz, "", - "(Ljava/lang/String;Ljava/lang/String;ILorg/rocksdb/Status;)V"); - } -}; - -class CompactionJobInfoJni : public JavaClass { - public: - static jobject fromCppCompactionJobInfo( - JNIEnv* env, - const ROCKSDB_NAMESPACE::CompactionJobInfo* compaction_job_info) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID ctor = getConstructorMethodId(env, jclazz); - assert(ctor != nullptr); - return env->NewObject(jclazz, ctor, - GET_CPLUSPLUS_POINTER(compaction_job_info)); - } - - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/CompactionJobInfo"); - } - - static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { - return env->GetMethodID(clazz, "", "(J)V"); - } -}; - -class TableFileCreationInfoJni : public JavaClass { - public: - static jobject fromCppTableFileCreationInfo( - JNIEnv* env, const ROCKSDB_NAMESPACE::TableFileCreationInfo* info) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID ctor = getConstructorMethodId(env, jclazz); - assert(ctor != nullptr); - jstring jdb_name = JniUtil::toJavaString(env, &info->db_name); - if (env->ExceptionCheck()) { - return nullptr; - } - jstring jcf_name = JniUtil::toJavaString(env, &info->cf_name); - if (env->ExceptionCheck()) { - env->DeleteLocalRef(jdb_name); - return nullptr; - } - jstring jfile_path = JniUtil::toJavaString(env, &info->file_path); - if (env->ExceptionCheck()) { - env->DeleteLocalRef(jdb_name); - env->DeleteLocalRef(jcf_name); - return nullptr; - } - jobject jtable_properties = - TablePropertiesJni::fromCppTableProperties(env, info->table_properties); - if (jtable_properties == nullptr) { - env->DeleteLocalRef(jdb_name); - env->DeleteLocalRef(jcf_name); - return nullptr; - } - jobject jstatus = StatusJni::construct(env, info->status); - if (jstatus == nullptr) { - env->DeleteLocalRef(jdb_name); - env->DeleteLocalRef(jcf_name); - env->DeleteLocalRef(jtable_properties); - return nullptr; - } - return env->NewObject(jclazz, ctor, static_cast(info->file_size), - jtable_properties, jstatus, jdb_name, jcf_name, - jfile_path, static_cast(info->job_id), - static_cast(info->reason)); - } - - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TableFileCreationInfo"); - } - - static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { - return env->GetMethodID( - clazz, "", - "(JLorg/rocksdb/TableProperties;Lorg/rocksdb/Status;Ljava/lang/" - "String;Ljava/lang/String;Ljava/lang/String;IB)V"); - } -}; - -class TableFileCreationBriefInfoJni : public JavaClass { - public: - static jobject fromCppTableFileCreationBriefInfo( - JNIEnv* env, const ROCKSDB_NAMESPACE::TableFileCreationBriefInfo* info) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID ctor = getConstructorMethodId(env, jclazz); - assert(ctor != nullptr); - jstring jdb_name = JniUtil::toJavaString(env, &info->db_name); - if (env->ExceptionCheck()) { - return nullptr; - } - jstring jcf_name = JniUtil::toJavaString(env, &info->cf_name); - if (env->ExceptionCheck()) { - env->DeleteLocalRef(jdb_name); - return nullptr; - } - jstring jfile_path = JniUtil::toJavaString(env, &info->file_path); - if (env->ExceptionCheck()) { - env->DeleteLocalRef(jdb_name); - env->DeleteLocalRef(jcf_name); - return nullptr; - } - return env->NewObject(jclazz, ctor, jdb_name, jcf_name, jfile_path, - static_cast(info->job_id), - static_cast(info->reason)); - } - - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TableFileCreationBriefInfo"); - } - - static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { - return env->GetMethodID( - clazz, "", - "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;IB)V"); - } -}; - -class MemTableInfoJni : public JavaClass { - public: - static jobject fromCppMemTableInfo( - JNIEnv* env, const ROCKSDB_NAMESPACE::MemTableInfo* info) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID ctor = getConstructorMethodId(env, jclazz); - assert(ctor != nullptr); - jstring jcf_name = JniUtil::toJavaString(env, &info->cf_name); - if (env->ExceptionCheck()) { - return nullptr; - } - return env->NewObject(jclazz, ctor, jcf_name, - static_cast(info->first_seqno), - static_cast(info->earliest_seqno), - static_cast(info->num_entries), - static_cast(info->num_deletes)); - } - - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/MemTableInfo"); - } - - static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { - return env->GetMethodID(clazz, "", "(Ljava/lang/String;JJJJ)V"); - } -}; - -class ExternalFileIngestionInfoJni : public JavaClass { - public: - static jobject fromCppExternalFileIngestionInfo( - JNIEnv* env, const ROCKSDB_NAMESPACE::ExternalFileIngestionInfo* info) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID ctor = getConstructorMethodId(env, jclazz); - assert(ctor != nullptr); - jstring jcf_name = JniUtil::toJavaString(env, &info->cf_name); - if (env->ExceptionCheck()) { - return nullptr; - } - jstring jexternal_file_path = - JniUtil::toJavaString(env, &info->external_file_path); - if (env->ExceptionCheck()) { - env->DeleteLocalRef(jcf_name); - return nullptr; - } - jstring jinternal_file_path = - JniUtil::toJavaString(env, &info->internal_file_path); - if (env->ExceptionCheck()) { - env->DeleteLocalRef(jcf_name); - env->DeleteLocalRef(jexternal_file_path); - return nullptr; - } - jobject jtable_properties = - TablePropertiesJni::fromCppTableProperties(env, info->table_properties); - if (jtable_properties == nullptr) { - env->DeleteLocalRef(jcf_name); - env->DeleteLocalRef(jexternal_file_path); - env->DeleteLocalRef(jinternal_file_path); - return nullptr; - } - return env->NewObject( - jclazz, ctor, jcf_name, jexternal_file_path, jinternal_file_path, - static_cast(info->global_seqno), jtable_properties); - } - - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/ExternalFileIngestionInfo"); - } - - static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { - return env->GetMethodID(clazz, "", - "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/" - "String;JLorg/rocksdb/TableProperties;)V"); - } -}; - -class WriteStallInfoJni : public JavaClass { - public: - static jobject fromCppWriteStallInfo( - JNIEnv* env, const ROCKSDB_NAMESPACE::WriteStallInfo* info) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID ctor = getConstructorMethodId(env, jclazz); - assert(ctor != nullptr); - jstring jcf_name = JniUtil::toJavaString(env, &info->cf_name); - if (env->ExceptionCheck()) { - return nullptr; - } - return env->NewObject(jclazz, ctor, jcf_name, - static_cast(info->condition.cur), - static_cast(info->condition.prev)); - } - - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/WriteStallInfo"); - } - - static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { - return env->GetMethodID(clazz, "", "(Ljava/lang/String;BB)V"); - } -}; - -class FileOperationInfoJni : public JavaClass { - public: - static jobject fromCppFileOperationInfo( - JNIEnv* env, const ROCKSDB_NAMESPACE::FileOperationInfo* info) { - jclass jclazz = getJClass(env); - assert(jclazz != nullptr); - static jmethodID ctor = getConstructorMethodId(env, jclazz); - assert(ctor != nullptr); - jstring jpath = JniUtil::toJavaString(env, &info->path); - if (env->ExceptionCheck()) { - return nullptr; - } - jobject jstatus = StatusJni::construct(env, info->status); - if (jstatus == nullptr) { - env->DeleteLocalRef(jpath); - return nullptr; - } - return env->NewObject( - jclazz, ctor, jpath, static_cast(info->offset), - static_cast(info->length), - static_cast(info->start_ts.time_since_epoch().count()), - static_cast(info->duration.count()), jstatus); - } - - static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/FileOperationInfo"); - } - - static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { - return env->GetMethodID(clazz, "", - "(Ljava/lang/String;JJJJLorg/rocksdb/Status;)V"); - } -}; -} // namespace ROCKSDB_NAMESPACE -#endif // JAVA_ROCKSJNI_PORTAL_H_ diff --git a/java/rocksjni/ratelimiterjni.cc b/java/rocksjni/ratelimiterjni.cc deleted file mode 100644 index 7a17f367e..000000000 --- a/java/rocksjni/ratelimiterjni.cc +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for RateLimiter. - -#include "include/org_rocksdb_RateLimiter.h" -#include "rocksdb/rate_limiter.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_RateLimiter - * Method: newRateLimiterHandle - * Signature: (JJIBZ)J - */ -jlong Java_org_rocksdb_RateLimiter_newRateLimiterHandle( - JNIEnv* /*env*/, jclass /*jclazz*/, jlong jrate_bytes_per_second, - jlong jrefill_period_micros, jint jfairness, jbyte jrate_limiter_mode, - jboolean jauto_tune) { - auto rate_limiter_mode = - ROCKSDB_NAMESPACE::RateLimiterModeJni::toCppRateLimiterMode( - jrate_limiter_mode); - auto* sptr_rate_limiter = new std::shared_ptr( - ROCKSDB_NAMESPACE::NewGenericRateLimiter( - static_cast(jrate_bytes_per_second), - static_cast(jrefill_period_micros), - static_cast(jfairness), rate_limiter_mode, jauto_tune)); - - return GET_CPLUSPLUS_POINTER(sptr_rate_limiter); -} - -/* - * Class: org_rocksdb_RateLimiter - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_RateLimiter_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* handle = - reinterpret_cast*>( - jhandle); - delete handle; // delete std::shared_ptr -} - -/* - * Class: org_rocksdb_RateLimiter - * Method: setBytesPerSecond - * Signature: (JJ)V - */ -void Java_org_rocksdb_RateLimiter_setBytesPerSecond(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle, - jlong jbytes_per_second) { - reinterpret_cast*>(handle) - ->get() - ->SetBytesPerSecond(jbytes_per_second); -} - -/* - * Class: org_rocksdb_RateLimiter - * Method: getBytesPerSecond - * Signature: (J)J - */ -jlong Java_org_rocksdb_RateLimiter_getBytesPerSecond(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - return reinterpret_cast*>( - handle) - ->get() - ->GetBytesPerSecond(); -} - -/* - * Class: org_rocksdb_RateLimiter - * Method: request - * Signature: (JJ)V - */ -void Java_org_rocksdb_RateLimiter_request(JNIEnv* /*env*/, jobject /*jobj*/, - jlong handle, jlong jbytes) { - reinterpret_cast*>(handle) - ->get() - ->Request(jbytes, ROCKSDB_NAMESPACE::Env::IO_TOTAL); -} - -/* - * Class: org_rocksdb_RateLimiter - * Method: getSingleBurstBytes - * Signature: (J)J - */ -jlong Java_org_rocksdb_RateLimiter_getSingleBurstBytes(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - return reinterpret_cast*>( - handle) - ->get() - ->GetSingleBurstBytes(); -} - -/* - * Class: org_rocksdb_RateLimiter - * Method: getTotalBytesThrough - * Signature: (J)J - */ -jlong Java_org_rocksdb_RateLimiter_getTotalBytesThrough(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - return reinterpret_cast*>( - handle) - ->get() - ->GetTotalBytesThrough(); -} - -/* - * Class: org_rocksdb_RateLimiter - * Method: getTotalRequests - * Signature: (J)J - */ -jlong Java_org_rocksdb_RateLimiter_getTotalRequests(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - return reinterpret_cast*>( - handle) - ->get() - ->GetTotalRequests(); -} diff --git a/java/rocksjni/remove_emptyvalue_compactionfilterjni.cc b/java/rocksjni/remove_emptyvalue_compactionfilterjni.cc deleted file mode 100644 index c0b09e151..000000000 --- a/java/rocksjni/remove_emptyvalue_compactionfilterjni.cc +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include - -#include "include/org_rocksdb_RemoveEmptyValueCompactionFilter.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "utilities/compaction_filters/remove_emptyvalue_compactionfilter.h" - -/* - * Class: org_rocksdb_RemoveEmptyValueCompactionFilter - * Method: createNewRemoveEmptyValueCompactionFilter0 - * Signature: ()J - */ -jlong Java_org_rocksdb_RemoveEmptyValueCompactionFilter_createNewRemoveEmptyValueCompactionFilter0( - JNIEnv* /*env*/, jclass /*jcls*/) { - auto* compaction_filter = - new ROCKSDB_NAMESPACE::RemoveEmptyValueCompactionFilter(); - - // set the native handle to our native compaction filter - return GET_CPLUSPLUS_POINTER(compaction_filter); -} diff --git a/java/rocksjni/restorejni.cc b/java/rocksjni/restorejni.cc deleted file mode 100644 index aadc86128..000000000 --- a/java/rocksjni/restorejni.cc +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling C++ ROCKSDB_NAMESPACE::RestoreOptions methods -// from Java side. - -#include -#include -#include - -#include - -#include "include/org_rocksdb_RestoreOptions.h" -#include "rocksdb/utilities/backup_engine.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" -/* - * Class: org_rocksdb_RestoreOptions - * Method: newRestoreOptions - * Signature: (Z)J - */ -jlong Java_org_rocksdb_RestoreOptions_newRestoreOptions( - JNIEnv* /*env*/, jclass /*jcls*/, jboolean keep_log_files) { - auto* ropt = new ROCKSDB_NAMESPACE::RestoreOptions(keep_log_files); - return GET_CPLUSPLUS_POINTER(ropt); -} - -/* - * Class: org_rocksdb_RestoreOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_RestoreOptions_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* ropt = reinterpret_cast(jhandle); - assert(ropt); - delete ropt; -} diff --git a/java/rocksjni/rocks_callback_object.cc b/java/rocksjni/rocks_callback_object.cc deleted file mode 100644 index 35513e151..000000000 --- a/java/rocksjni/rocks_callback_object.cc +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// JNI Callbacks from C++ to sub-classes or org.rocksdb.RocksCallbackObject - -#include - -#include "include/org_rocksdb_RocksCallbackObject.h" -#include "jnicallback.h" - -/* - * Class: org_rocksdb_RocksCallbackObject - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_RocksCallbackObject_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - // TODO(AR) is deleting from the super class JniCallback OK, or must we delete - // the subclass? Example hierarchies: - // 1) Comparator -> BaseComparatorJniCallback + JniCallback -> - // DirectComparatorJniCallback 2) Comparator -> BaseComparatorJniCallback + - // JniCallback -> ComparatorJniCallback - // I think this is okay, as Comparator and JniCallback both have virtual - // destructors... - delete reinterpret_cast(handle); -} diff --git a/java/rocksjni/rocksdb_exception_test.cc b/java/rocksjni/rocksdb_exception_test.cc deleted file mode 100644 index 67e62f726..000000000 --- a/java/rocksjni/rocksdb_exception_test.cc +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include - -#include "include/org_rocksdb_RocksDBExceptionTest.h" -#include "rocksdb/slice.h" -#include "rocksdb/status.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_RocksDBExceptionTest - * Method: raiseException - * Signature: ()V - */ -void Java_org_rocksdb_RocksDBExceptionTest_raiseException(JNIEnv* env, - jobject /*jobj*/) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, - std::string("test message")); -} - -/* - * Class: org_rocksdb_RocksDBExceptionTest - * Method: raiseExceptionWithStatusCode - * Signature: ()V - */ -void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionWithStatusCode( - JNIEnv* env, jobject /*jobj*/) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, "test message", ROCKSDB_NAMESPACE::Status::NotSupported()); -} - -/* - * Class: org_rocksdb_RocksDBExceptionTest - * Method: raiseExceptionNoMsgWithStatusCode - * Signature: ()V - */ -void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionNoMsgWithStatusCode( - JNIEnv* env, jobject /*jobj*/) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::NotSupported()); -} - -/* - * Class: org_rocksdb_RocksDBExceptionTest - * Method: raiseExceptionWithStatusCodeSubCode - * Signature: ()V - */ -void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionWithStatusCodeSubCode( - JNIEnv* env, jobject /*jobj*/) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, "test message", - ROCKSDB_NAMESPACE::Status::TimedOut( - ROCKSDB_NAMESPACE::Status::SubCode::kLockTimeout)); -} - -/* - * Class: org_rocksdb_RocksDBExceptionTest - * Method: raiseExceptionNoMsgWithStatusCodeSubCode - * Signature: ()V - */ -void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionNoMsgWithStatusCodeSubCode( - JNIEnv* env, jobject /*jobj*/) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::TimedOut( - ROCKSDB_NAMESPACE::Status::SubCode::kLockTimeout)); -} - -/* - * Class: org_rocksdb_RocksDBExceptionTest - * Method: raiseExceptionWithStatusCodeState - * Signature: ()V - */ -void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionWithStatusCodeState( - JNIEnv* env, jobject /*jobj*/) { - ROCKSDB_NAMESPACE::Slice state("test state"); - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, "test message", ROCKSDB_NAMESPACE::Status::NotSupported(state)); -} diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc deleted file mode 100644 index ced72e841..000000000 --- a/java/rocksjni/rocksjni.cc +++ /dev/null @@ -1,3957 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling c++ ROCKSDB_NAMESPACE::DB methods from Java side. - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "include/org_rocksdb_RocksDB.h" -#include "rocksdb/cache.h" -#include "rocksdb/convenience.h" -#include "rocksdb/db.h" -#include "rocksdb/options.h" -#include "rocksdb/types.h" -#include "rocksdb/version.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -#ifdef min -#undef min -#endif - -jlong rocksdb_open_helper(JNIEnv* env, jlong jopt_handle, jstring jdb_path, - std::function - open_fn) { - const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); - if (db_path == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - - auto* opt = reinterpret_cast(jopt_handle); - ROCKSDB_NAMESPACE::DB* db = nullptr; - ROCKSDB_NAMESPACE::Status s = open_fn(*opt, db_path, &db); - - env->ReleaseStringUTFChars(jdb_path, db_path); - - if (s.ok()) { - return GET_CPLUSPLUS_POINTER(db); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return 0; - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: open - * Signature: (JLjava/lang/String;)J - */ -jlong Java_org_rocksdb_RocksDB_open__JLjava_lang_String_2(JNIEnv* env, jclass, - jlong jopt_handle, - jstring jdb_path) { - return rocksdb_open_helper(env, jopt_handle, jdb_path, - (ROCKSDB_NAMESPACE::Status(*)( - const ROCKSDB_NAMESPACE::Options&, - const std::string&, ROCKSDB_NAMESPACE::DB**)) & - ROCKSDB_NAMESPACE::DB::Open); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: openROnly - * Signature: (JLjava/lang/String;Z)J - */ -jlong Java_org_rocksdb_RocksDB_openROnly__JLjava_lang_String_2Z( - JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path, - jboolean jerror_if_wal_file_exists) { - const bool error_if_wal_file_exists = jerror_if_wal_file_exists == JNI_TRUE; - return rocksdb_open_helper( - env, jopt_handle, jdb_path, - [error_if_wal_file_exists](const ROCKSDB_NAMESPACE::Options& options, - const std::string& db_path, - ROCKSDB_NAMESPACE::DB** db) { - return ROCKSDB_NAMESPACE::DB::OpenForReadOnly(options, db_path, db, - error_if_wal_file_exists); - }); -} - -jlongArray rocksdb_open_helper( - JNIEnv* env, jlong jopt_handle, jstring jdb_path, - jobjectArray jcolumn_names, jlongArray jcolumn_options, - std::function&, - std::vector*, - ROCKSDB_NAMESPACE::DB**)> - open_fn) { - const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); - if (db_path == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - const jsize len_cols = env->GetArrayLength(jcolumn_names); - jlong* jco = env->GetLongArrayElements(jcolumn_options, nullptr); - if (jco == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseStringUTFChars(jdb_path, db_path); - return nullptr; - } - - std::vector column_families; - jboolean has_exception = JNI_FALSE; - ROCKSDB_NAMESPACE::JniUtil::byteStrings( - env, jcolumn_names, - [](const char* str_data, const size_t str_len) { - return std::string(str_data, str_len); - }, - [&jco, &column_families](size_t idx, std::string cf_name) { - ROCKSDB_NAMESPACE::ColumnFamilyOptions* cf_options = - reinterpret_cast(jco[idx]); - column_families.push_back( - ROCKSDB_NAMESPACE::ColumnFamilyDescriptor(cf_name, *cf_options)); - }, - &has_exception); - - env->ReleaseLongArrayElements(jcolumn_options, jco, JNI_ABORT); - - if (has_exception == JNI_TRUE) { - // exception occurred - env->ReleaseStringUTFChars(jdb_path, db_path); - return nullptr; - } - - auto* opt = reinterpret_cast(jopt_handle); - std::vector cf_handles; - ROCKSDB_NAMESPACE::DB* db = nullptr; - ROCKSDB_NAMESPACE::Status s = - open_fn(*opt, db_path, column_families, &cf_handles, &db); - - // we have now finished with db_path - env->ReleaseStringUTFChars(jdb_path, db_path); - - // check if open operation was successful - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } - - const jsize resultsLen = 1 + len_cols; // db handle + column family handles - std::unique_ptr results = - std::unique_ptr(new jlong[resultsLen]); - results[0] = GET_CPLUSPLUS_POINTER(db); - for (int i = 1; i <= len_cols; i++) { - results[i] = GET_CPLUSPLUS_POINTER(cf_handles[i - 1]); - } - - jlongArray jresults = env->NewLongArray(resultsLen); - if (jresults == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - env->SetLongArrayRegion(jresults, 0, resultsLen, results.get()); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jresults); - return nullptr; - } - - return jresults; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: openROnly - * Signature: (JLjava/lang/String;[[B[JZ)[J - */ -jlongArray Java_org_rocksdb_RocksDB_openROnly__JLjava_lang_String_2_3_3B_3JZ( - JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path, - jobjectArray jcolumn_names, jlongArray jcolumn_options, - jboolean jerror_if_wal_file_exists) { - const bool error_if_wal_file_exists = jerror_if_wal_file_exists == JNI_TRUE; - return rocksdb_open_helper( - env, jopt_handle, jdb_path, jcolumn_names, jcolumn_options, - [error_if_wal_file_exists]( - const ROCKSDB_NAMESPACE::DBOptions& options, - const std::string& db_path, - const std::vector& - column_families, - std::vector* handles, - ROCKSDB_NAMESPACE::DB** db) { - return ROCKSDB_NAMESPACE::DB::OpenForReadOnly( - options, db_path, column_families, handles, db, - error_if_wal_file_exists); - }); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: open - * Signature: (JLjava/lang/String;[[B[J)[J - */ -jlongArray Java_org_rocksdb_RocksDB_open__JLjava_lang_String_2_3_3B_3J( - JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path, - jobjectArray jcolumn_names, jlongArray jcolumn_options) { - return rocksdb_open_helper( - env, jopt_handle, jdb_path, jcolumn_names, jcolumn_options, - (ROCKSDB_NAMESPACE::Status(*)( - const ROCKSDB_NAMESPACE::DBOptions&, const std::string&, - const std::vector&, - std::vector*, - ROCKSDB_NAMESPACE::DB**)) & - ROCKSDB_NAMESPACE::DB::Open); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: openAsSecondary - * Signature: (JLjava/lang/String;Ljava/lang/String;)J - */ -jlong Java_org_rocksdb_RocksDB_openAsSecondary__JLjava_lang_String_2Ljava_lang_String_2( - JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path, - jstring jsecondary_db_path) { - const char* secondary_db_path = - env->GetStringUTFChars(jsecondary_db_path, nullptr); - if (secondary_db_path == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - - jlong db_handle = rocksdb_open_helper( - env, jopt_handle, jdb_path, - [secondary_db_path](const ROCKSDB_NAMESPACE::Options& options, - const std::string& db_path, - ROCKSDB_NAMESPACE::DB** db) { - return ROCKSDB_NAMESPACE::DB::OpenAsSecondary(options, db_path, - secondary_db_path, db); - }); - - // we have now finished with secondary_db_path - env->ReleaseStringUTFChars(jsecondary_db_path, secondary_db_path); - - return db_handle; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: openAsSecondary - * Signature: (JLjava/lang/String;Ljava/lang/String;[[B[J)[J - */ -jlongArray -Java_org_rocksdb_RocksDB_openAsSecondary__JLjava_lang_String_2Ljava_lang_String_2_3_3B_3J( - JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path, - jstring jsecondary_db_path, jobjectArray jcolumn_names, - jlongArray jcolumn_options) { - const char* secondary_db_path = - env->GetStringUTFChars(jsecondary_db_path, nullptr); - if (secondary_db_path == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - jlongArray jhandles = rocksdb_open_helper( - env, jopt_handle, jdb_path, jcolumn_names, jcolumn_options, - [secondary_db_path]( - const ROCKSDB_NAMESPACE::DBOptions& options, - const std::string& db_path, - const std::vector& - column_families, - std::vector* handles, - ROCKSDB_NAMESPACE::DB** db) { - return ROCKSDB_NAMESPACE::DB::OpenAsSecondary( - options, db_path, secondary_db_path, column_families, handles, db); - }); - - // we have now finished with secondary_db_path - env->ReleaseStringUTFChars(jsecondary_db_path, secondary_db_path); - - return jhandles; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_RocksDB_disposeInternal(JNIEnv*, jobject, jlong jhandle) { - auto* db = reinterpret_cast(jhandle); - assert(db != nullptr); - delete db; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: closeDatabase - * Signature: (J)V - */ -void Java_org_rocksdb_RocksDB_closeDatabase(JNIEnv* env, jclass, - jlong jhandle) { - auto* db = reinterpret_cast(jhandle); - assert(db != nullptr); - ROCKSDB_NAMESPACE::Status s = db->Close(); - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: listColumnFamilies - * Signature: (JLjava/lang/String;)[[B - */ -jobjectArray Java_org_rocksdb_RocksDB_listColumnFamilies(JNIEnv* env, jclass, - jlong jopt_handle, - jstring jdb_path) { - std::vector column_family_names; - const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); - if (db_path == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - auto* opt = reinterpret_cast(jopt_handle); - ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::DB::ListColumnFamilies( - *opt, db_path, &column_family_names); - - env->ReleaseStringUTFChars(jdb_path, db_path); - - jobjectArray jcolumn_family_names = - ROCKSDB_NAMESPACE::JniUtil::stringsBytes(env, column_family_names); - - return jcolumn_family_names; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: createColumnFamily - * Signature: (J[BIJ)J - */ -jlong Java_org_rocksdb_RocksDB_createColumnFamily(JNIEnv* env, jobject, - jlong jhandle, - jbyteArray jcf_name, - jint jcf_name_len, - jlong jcf_options_handle) { - auto* db = reinterpret_cast(jhandle); - jboolean has_exception = JNI_FALSE; - const std::string cf_name = - ROCKSDB_NAMESPACE::JniUtil::byteString( - env, jcf_name, jcf_name_len, - [](const char* str, const size_t len) { - return std::string(str, len); - }, - &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - return 0; - } - auto* cf_options = reinterpret_cast( - jcf_options_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - ROCKSDB_NAMESPACE::Status s = - db->CreateColumnFamily(*cf_options, cf_name, &cf_handle); - if (!s.ok()) { - // error occurred - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return 0; - } - return GET_CPLUSPLUS_POINTER(cf_handle); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: createColumnFamilies - * Signature: (JJ[[B)[J - */ -jlongArray Java_org_rocksdb_RocksDB_createColumnFamilies__JJ_3_3B( - JNIEnv* env, jobject, jlong jhandle, jlong jcf_options_handle, - jobjectArray jcf_names) { - auto* db = reinterpret_cast(jhandle); - auto* cf_options = reinterpret_cast( - jcf_options_handle); - jboolean has_exception = JNI_FALSE; - std::vector cf_names; - ROCKSDB_NAMESPACE::JniUtil::byteStrings( - env, jcf_names, - [](const char* str, const size_t len) { return std::string(str, len); }, - [&cf_names](const size_t, std::string str) { cf_names.push_back(str); }, - &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - return nullptr; - } - - std::vector cf_handles; - ROCKSDB_NAMESPACE::Status s = - db->CreateColumnFamilies(*cf_options, cf_names, &cf_handles); - if (!s.ok()) { - // error occurred - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } - - jlongArray jcf_handles = ROCKSDB_NAMESPACE::JniUtil::toJPointers< - ROCKSDB_NAMESPACE::ColumnFamilyHandle>(env, cf_handles, &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - return nullptr; - } - return jcf_handles; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: createColumnFamilies - * Signature: (J[J[[B)[J - */ -jlongArray Java_org_rocksdb_RocksDB_createColumnFamilies__J_3J_3_3B( - JNIEnv* env, jobject, jlong jhandle, jlongArray jcf_options_handles, - jobjectArray jcf_names) { - auto* db = reinterpret_cast(jhandle); - const jsize jlen = env->GetArrayLength(jcf_options_handles); - std::vector cf_descriptors; - cf_descriptors.reserve(jlen); - - jlong* jcf_options_handles_elems = - env->GetLongArrayElements(jcf_options_handles, nullptr); - if (jcf_options_handles_elems == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - // extract the column family descriptors - jboolean has_exception = JNI_FALSE; - for (jsize i = 0; i < jlen; i++) { - auto* cf_options = - reinterpret_cast( - jcf_options_handles_elems[i]); - jbyteArray jcf_name = - static_cast(env->GetObjectArrayElement(jcf_names, i)); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->ReleaseLongArrayElements(jcf_options_handles, - jcf_options_handles_elems, JNI_ABORT); - return nullptr; - } - const std::string cf_name = - ROCKSDB_NAMESPACE::JniUtil::byteString( - env, jcf_name, - [](const char* str, const size_t len) { - return std::string(str, len); - }, - &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - env->DeleteLocalRef(jcf_name); - env->ReleaseLongArrayElements(jcf_options_handles, - jcf_options_handles_elems, JNI_ABORT); - return nullptr; - } - - cf_descriptors.push_back( - ROCKSDB_NAMESPACE::ColumnFamilyDescriptor(cf_name, *cf_options)); - - env->DeleteLocalRef(jcf_name); - } - - std::vector cf_handles; - ROCKSDB_NAMESPACE::Status s = - db->CreateColumnFamilies(cf_descriptors, &cf_handles); - - env->ReleaseLongArrayElements(jcf_options_handles, jcf_options_handles_elems, - JNI_ABORT); - - if (!s.ok()) { - // error occurred - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } - - jlongArray jcf_handles = ROCKSDB_NAMESPACE::JniUtil::toJPointers< - ROCKSDB_NAMESPACE::ColumnFamilyHandle>(env, cf_handles, &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - return nullptr; - } - return jcf_handles; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: dropColumnFamily - * Signature: (JJ)V; - */ -void Java_org_rocksdb_RocksDB_dropColumnFamily(JNIEnv* env, jobject, - jlong jdb_handle, - jlong jcf_handle) { - auto* db_handle = reinterpret_cast(jdb_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - ROCKSDB_NAMESPACE::Status s = db_handle->DropColumnFamily(cf_handle); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: dropColumnFamilies - * Signature: (J[J)V - */ -void Java_org_rocksdb_RocksDB_dropColumnFamilies( - JNIEnv* env, jobject, jlong jdb_handle, jlongArray jcolumn_family_handles) { - auto* db_handle = reinterpret_cast(jdb_handle); - - std::vector cf_handles; - if (jcolumn_family_handles != nullptr) { - const jsize len_cols = env->GetArrayLength(jcolumn_family_handles); - - jlong* jcfh = env->GetLongArrayElements(jcolumn_family_handles, nullptr); - if (jcfh == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - for (jsize i = 0; i < len_cols; i++) { - auto* cf_handle = - reinterpret_cast(jcfh[i]); - cf_handles.push_back(cf_handle); - } - env->ReleaseLongArrayElements(jcolumn_family_handles, jcfh, JNI_ABORT); - } - - ROCKSDB_NAMESPACE::Status s = db_handle->DropColumnFamilies(cf_handles); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -////////////////////////////////////////////////////////////////////////////// -// ROCKSDB_NAMESPACE::DB::Put - -/** - * @return true if the put succeeded, false if a Java Exception was thrown - */ -bool rocksdb_put_helper(JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, - const ROCKSDB_NAMESPACE::WriteOptions& write_options, - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle, - jbyteArray jkey, jint jkey_off, jint jkey_len, - jbyteArray jval, jint jval_off, jint jval_len) { - jbyte* key = new jbyte[jkey_len]; - env->GetByteArrayRegion(jkey, jkey_off, jkey_len, key); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] key; - return false; - } - - jbyte* value = new jbyte[jval_len]; - env->GetByteArrayRegion(jval, jval_off, jval_len, value); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] value; - delete[] key; - return false; - } - - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); - ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast(value), - jval_len); - - ROCKSDB_NAMESPACE::Status s; - if (cf_handle != nullptr) { - s = db->Put(write_options, cf_handle, key_slice, value_slice); - } else { - // backwards compatibility - s = db->Put(write_options, key_slice, value_slice); - } - - // cleanup - delete[] value; - delete[] key; - - if (s.ok()) { - return true; - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return false; - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: put - * Signature: (J[BII[BII)V - */ -void Java_org_rocksdb_RocksDB_put__J_3BII_3BII(JNIEnv* env, jobject, - jlong jdb_handle, - jbyteArray jkey, jint jkey_off, - jint jkey_len, jbyteArray jval, - jint jval_off, jint jval_len) { - auto* db = reinterpret_cast(jdb_handle); - static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = - ROCKSDB_NAMESPACE::WriteOptions(); - rocksdb_put_helper(env, db, default_write_options, nullptr, jkey, jkey_off, - jkey_len, jval, jval_off, jval_len); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: put - * Signature: (J[BII[BIIJ)V - */ -void Java_org_rocksdb_RocksDB_put__J_3BII_3BIIJ(JNIEnv* env, jobject, - jlong jdb_handle, - jbyteArray jkey, jint jkey_off, - jint jkey_len, jbyteArray jval, - jint jval_off, jint jval_len, - jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = - ROCKSDB_NAMESPACE::WriteOptions(); - auto* cf_handle = - reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - rocksdb_put_helper(env, db, default_write_options, cf_handle, jkey, - jkey_off, jkey_len, jval, jval_off, jval_len); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument( - "Invalid ColumnFamilyHandle.")); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: put - * Signature: (JJ[BII[BII)V - */ -void Java_org_rocksdb_RocksDB_put__JJ_3BII_3BII(JNIEnv* env, jobject, - jlong jdb_handle, - jlong jwrite_options_handle, - jbyteArray jkey, jint jkey_off, - jint jkey_len, jbyteArray jval, - jint jval_off, jint jval_len) { - auto* db = reinterpret_cast(jdb_handle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - rocksdb_put_helper(env, db, *write_options, nullptr, jkey, jkey_off, jkey_len, - jval, jval_off, jval_len); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: put - * Signature: (JJ[BII[BIIJ)V - */ -void Java_org_rocksdb_RocksDB_put__JJ_3BII_3BIIJ( - JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options_handle, - jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, - jint jval_off, jint jval_len, jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - rocksdb_put_helper(env, db, *write_options, cf_handle, jkey, jkey_off, - jkey_len, jval, jval_off, jval_len); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument( - "Invalid ColumnFamilyHandle.")); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: putDirect - * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V - */ -void Java_org_rocksdb_RocksDB_putDirect( - JNIEnv* env, jobject /*jdb*/, jlong jdb_handle, jlong jwrite_options_handle, - jobject jkey, jint jkey_off, jint jkey_len, jobject jval, jint jval_off, - jint jval_len, jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - auto put = [&env, &db, &cf_handle, &write_options]( - ROCKSDB_NAMESPACE::Slice& key, - ROCKSDB_NAMESPACE::Slice& value) { - ROCKSDB_NAMESPACE::Status s; - if (cf_handle == nullptr) { - s = db->Put(*write_options, key, value); - } else { - s = db->Put(*write_options, cf_handle, key, value); - } - if (s.ok()) { - return; - } - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - }; - ROCKSDB_NAMESPACE::JniUtil::kv_op_direct(put, env, jkey, jkey_off, jkey_len, - jval, jval_off, jval_len); -} - -////////////////////////////////////////////////////////////////////////////// -// ROCKSDB_NAMESPACE::DB::Delete() - -/** - * @return true if the delete succeeded, false if a Java Exception was thrown - */ -bool rocksdb_delete_helper(JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, - const ROCKSDB_NAMESPACE::WriteOptions& write_options, - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle, - jbyteArray jkey, jint jkey_off, jint jkey_len) { - jbyte* key = new jbyte[jkey_len]; - env->GetByteArrayRegion(jkey, jkey_off, jkey_len, key); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] key; - return false; - } - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); - - ROCKSDB_NAMESPACE::Status s; - if (cf_handle != nullptr) { - s = db->Delete(write_options, cf_handle, key_slice); - } else { - // backwards compatibility - s = db->Delete(write_options, key_slice); - } - - // cleanup - delete[] key; - - if (s.ok()) { - return true; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return false; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: delete - * Signature: (J[BII)V - */ -void Java_org_rocksdb_RocksDB_delete__J_3BII(JNIEnv* env, jobject, - jlong jdb_handle, jbyteArray jkey, - jint jkey_off, jint jkey_len) { - auto* db = reinterpret_cast(jdb_handle); - static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = - ROCKSDB_NAMESPACE::WriteOptions(); - rocksdb_delete_helper(env, db, default_write_options, nullptr, jkey, jkey_off, - jkey_len); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: delete - * Signature: (J[BIIJ)V - */ -void Java_org_rocksdb_RocksDB_delete__J_3BIIJ(JNIEnv* env, jobject, - jlong jdb_handle, jbyteArray jkey, - jint jkey_off, jint jkey_len, - jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = - ROCKSDB_NAMESPACE::WriteOptions(); - auto* cf_handle = - reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - rocksdb_delete_helper(env, db, default_write_options, cf_handle, jkey, - jkey_off, jkey_len); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument( - "Invalid ColumnFamilyHandle.")); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: delete - * Signature: (JJ[BII)V - */ -void Java_org_rocksdb_RocksDB_delete__JJ_3BII(JNIEnv* env, jobject, - jlong jdb_handle, - jlong jwrite_options, - jbyteArray jkey, jint jkey_off, - jint jkey_len) { - auto* db = reinterpret_cast(jdb_handle); - auto* write_options = - reinterpret_cast(jwrite_options); - rocksdb_delete_helper(env, db, *write_options, nullptr, jkey, jkey_off, - jkey_len); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: delete - * Signature: (JJ[BIIJ)V - */ -void Java_org_rocksdb_RocksDB_delete__JJ_3BIIJ( - JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, - jbyteArray jkey, jint jkey_off, jint jkey_len, jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - auto* write_options = - reinterpret_cast(jwrite_options); - auto* cf_handle = - reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - rocksdb_delete_helper(env, db, *write_options, cf_handle, jkey, jkey_off, - jkey_len); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument( - "Invalid ColumnFamilyHandle.")); - } -} - -////////////////////////////////////////////////////////////////////////////// -// ROCKSDB_NAMESPACE::DB::SingleDelete() -/** - * @return true if the single delete succeeded, false if a Java Exception - * was thrown - */ -bool rocksdb_single_delete_helper( - JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, - const ROCKSDB_NAMESPACE::WriteOptions& write_options, - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle, jbyteArray jkey, - jint jkey_len) { - jbyte* key = new jbyte[jkey_len]; - env->GetByteArrayRegion(jkey, 0, jkey_len, key); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] key; - return false; - } - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); - - ROCKSDB_NAMESPACE::Status s; - if (cf_handle != nullptr) { - s = db->SingleDelete(write_options, cf_handle, key_slice); - } else { - // backwards compatibility - s = db->SingleDelete(write_options, key_slice); - } - - delete[] key; - - if (s.ok()) { - return true; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return false; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: singleDelete - * Signature: (J[BI)V - */ -void Java_org_rocksdb_RocksDB_singleDelete__J_3BI(JNIEnv* env, jobject, - jlong jdb_handle, - jbyteArray jkey, - jint jkey_len) { - auto* db = reinterpret_cast(jdb_handle); - static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = - ROCKSDB_NAMESPACE::WriteOptions(); - rocksdb_single_delete_helper(env, db, default_write_options, nullptr, jkey, - jkey_len); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: singleDelete - * Signature: (J[BIJ)V - */ -void Java_org_rocksdb_RocksDB_singleDelete__J_3BIJ(JNIEnv* env, jobject, - jlong jdb_handle, - jbyteArray jkey, - jint jkey_len, - jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = - ROCKSDB_NAMESPACE::WriteOptions(); - auto* cf_handle = - reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - rocksdb_single_delete_helper(env, db, default_write_options, cf_handle, - jkey, jkey_len); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument( - "Invalid ColumnFamilyHandle.")); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: singleDelete - * Signature: (JJ[BIJ)V - */ -void Java_org_rocksdb_RocksDB_singleDelete__JJ_3BI(JNIEnv* env, jobject, - jlong jdb_handle, - jlong jwrite_options, - jbyteArray jkey, - jint jkey_len) { - auto* db = reinterpret_cast(jdb_handle); - auto* write_options = - reinterpret_cast(jwrite_options); - rocksdb_single_delete_helper(env, db, *write_options, nullptr, jkey, - jkey_len); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: singleDelete - * Signature: (JJ[BIJ)V - */ -void Java_org_rocksdb_RocksDB_singleDelete__JJ_3BIJ( - JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, - jbyteArray jkey, jint jkey_len, jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - auto* write_options = - reinterpret_cast(jwrite_options); - auto* cf_handle = - reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - rocksdb_single_delete_helper(env, db, *write_options, cf_handle, jkey, - jkey_len); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument( - "Invalid ColumnFamilyHandle.")); - } -} - -////////////////////////////////////////////////////////////////////////////// -// ROCKSDB_NAMESPACE::DB::DeleteRange() -/** - * @return true if the delete range succeeded, false if a Java Exception - * was thrown - */ -bool rocksdb_delete_range_helper( - JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, - const ROCKSDB_NAMESPACE::WriteOptions& write_options, - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle, jbyteArray jbegin_key, - jint jbegin_key_off, jint jbegin_key_len, jbyteArray jend_key, - jint jend_key_off, jint jend_key_len) { - jbyte* begin_key = new jbyte[jbegin_key_len]; - env->GetByteArrayRegion(jbegin_key, jbegin_key_off, jbegin_key_len, - begin_key); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] begin_key; - return false; - } - ROCKSDB_NAMESPACE::Slice begin_key_slice(reinterpret_cast(begin_key), - jbegin_key_len); - - jbyte* end_key = new jbyte[jend_key_len]; - env->GetByteArrayRegion(jend_key, jend_key_off, jend_key_len, end_key); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] begin_key; - delete[] end_key; - return false; - } - ROCKSDB_NAMESPACE::Slice end_key_slice(reinterpret_cast(end_key), - jend_key_len); - - ROCKSDB_NAMESPACE::Status s = - db->DeleteRange(write_options, cf_handle, begin_key_slice, end_key_slice); - - // cleanup - delete[] begin_key; - delete[] end_key; - - if (s.ok()) { - return true; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return false; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: deleteRange - * Signature: (J[BII[BII)V - */ -void Java_org_rocksdb_RocksDB_deleteRange__J_3BII_3BII( - JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jbegin_key, - jint jbegin_key_off, jint jbegin_key_len, jbyteArray jend_key, - jint jend_key_off, jint jend_key_len) { - auto* db = reinterpret_cast(jdb_handle); - static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = - ROCKSDB_NAMESPACE::WriteOptions(); - rocksdb_delete_range_helper(env, db, default_write_options, nullptr, - jbegin_key, jbegin_key_off, jbegin_key_len, - jend_key, jend_key_off, jend_key_len); -} - -jint rocksdb_get_helper_direct( - JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, - const ROCKSDB_NAMESPACE::ReadOptions& read_options, - ROCKSDB_NAMESPACE::ColumnFamilyHandle* column_family_handle, jobject jkey, - jint jkey_off, jint jkey_len, jobject jval, jint jval_off, jint jval_len, - bool* has_exception) { - static const int kNotFound = -1; - static const int kStatusError = -2; - static const int kArgumentError = -3; - - char* key = reinterpret_cast(env->GetDirectBufferAddress(jkey)); - if (key == nullptr) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, - "Invalid key argument (argument is not a valid direct ByteBuffer)"); - *has_exception = true; - return kArgumentError; - } - if (env->GetDirectBufferCapacity(jkey) < (jkey_off + jkey_len)) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, - "Invalid key argument. Capacity is less than requested region (offset " - "+ length)."); - *has_exception = true; - return kArgumentError; - } - - char* value = reinterpret_cast(env->GetDirectBufferAddress(jval)); - if (value == nullptr) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, - "Invalid value argument (argument is not a valid direct ByteBuffer)"); - *has_exception = true; - return kArgumentError; - } - - if (env->GetDirectBufferCapacity(jval) < (jval_off + jval_len)) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, - "Invalid value argument. Capacity is less than requested region " - "(offset + length)."); - *has_exception = true; - return kArgumentError; - } - - key += jkey_off; - value += jval_off; - - ROCKSDB_NAMESPACE::Slice key_slice(key, jkey_len); - - ROCKSDB_NAMESPACE::PinnableSlice pinnable_value; - ROCKSDB_NAMESPACE::Status s; - if (column_family_handle != nullptr) { - s = db->Get(read_options, column_family_handle, key_slice, &pinnable_value); - } else { - // backwards compatibility - s = db->Get(read_options, db->DefaultColumnFamily(), key_slice, - &pinnable_value); - } - - if (s.IsNotFound()) { - *has_exception = false; - return kNotFound; - } else if (!s.ok()) { - *has_exception = true; - // Here since we are throwing a Java exception from c++ side. - // As a result, c++ does not know calling this function will in fact - // throwing an exception. As a result, the execution flow will - // not stop here, and codes after this throw will still be - // executed. - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - - // Return a dummy const value to avoid compilation error, although - // java side might not have a chance to get the return value :) - return kStatusError; - } - - const jint pinnable_value_len = static_cast(pinnable_value.size()); - const jint length = std::min(jval_len, pinnable_value_len); - - memcpy(value, pinnable_value.data(), length); - pinnable_value.Reset(); - - *has_exception = false; - return pinnable_value_len; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: deleteRange - * Signature: (J[BII[BIIJ)V - */ -void Java_org_rocksdb_RocksDB_deleteRange__J_3BII_3BIIJ( - JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jbegin_key, - jint jbegin_key_off, jint jbegin_key_len, jbyteArray jend_key, - jint jend_key_off, jint jend_key_len, jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = - ROCKSDB_NAMESPACE::WriteOptions(); - auto* cf_handle = - reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - rocksdb_delete_range_helper(env, db, default_write_options, cf_handle, - jbegin_key, jbegin_key_off, jbegin_key_len, - jend_key, jend_key_off, jend_key_len); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument( - "Invalid ColumnFamilyHandle.")); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: deleteRange - * Signature: (JJ[BII[BII)V - */ -void Java_org_rocksdb_RocksDB_deleteRange__JJ_3BII_3BII( - JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, - jbyteArray jbegin_key, jint jbegin_key_off, jint jbegin_key_len, - jbyteArray jend_key, jint jend_key_off, jint jend_key_len) { - auto* db = reinterpret_cast(jdb_handle); - auto* write_options = - reinterpret_cast(jwrite_options); - rocksdb_delete_range_helper(env, db, *write_options, nullptr, jbegin_key, - jbegin_key_off, jbegin_key_len, jend_key, - jend_key_off, jend_key_len); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: deleteRange - * Signature: (JJ[BII[BIIJ)V - */ -void Java_org_rocksdb_RocksDB_deleteRange__JJ_3BII_3BIIJ( - JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, - jbyteArray jbegin_key, jint jbegin_key_off, jint jbegin_key_len, - jbyteArray jend_key, jint jend_key_off, jint jend_key_len, - jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - auto* write_options = - reinterpret_cast(jwrite_options); - auto* cf_handle = - reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - rocksdb_delete_range_helper(env, db, *write_options, cf_handle, jbegin_key, - jbegin_key_off, jbegin_key_len, jend_key, - jend_key_off, jend_key_len); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument( - "Invalid ColumnFamilyHandle.")); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getDirect - * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)I - */ -jint Java_org_rocksdb_RocksDB_getDirect(JNIEnv* env, jobject /*jdb*/, - jlong jdb_handle, jlong jropt_handle, - jobject jkey, jint jkey_off, - jint jkey_len, jobject jval, - jint jval_off, jint jval_len, - jlong jcf_handle) { - auto* db_handle = reinterpret_cast(jdb_handle); - auto* ro_opt = - reinterpret_cast(jropt_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - bool has_exception = false; - return rocksdb_get_helper_direct( - env, db_handle, - ro_opt == nullptr ? ROCKSDB_NAMESPACE::ReadOptions() : *ro_opt, cf_handle, - jkey, jkey_off, jkey_len, jval, jval_off, jval_len, &has_exception); -} - -////////////////////////////////////////////////////////////////////////////// -// ROCKSDB_NAMESPACE::DB::Merge - -/** - * @return true if the merge succeeded, false if a Java Exception was thrown - */ -bool rocksdb_merge_helper(JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, - const ROCKSDB_NAMESPACE::WriteOptions& write_options, - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle, - jbyteArray jkey, jint jkey_off, jint jkey_len, - jbyteArray jval, jint jval_off, jint jval_len) { - jbyte* key = new jbyte[jkey_len]; - env->GetByteArrayRegion(jkey, jkey_off, jkey_len, key); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] key; - return false; - } - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); - - jbyte* value = new jbyte[jval_len]; - env->GetByteArrayRegion(jval, jval_off, jval_len, value); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] value; - delete[] key; - return false; - } - ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast(value), - jval_len); - - ROCKSDB_NAMESPACE::Status s; - if (cf_handle != nullptr) { - s = db->Merge(write_options, cf_handle, key_slice, value_slice); - } else { - s = db->Merge(write_options, key_slice, value_slice); - } - - // cleanup - delete[] value; - delete[] key; - - if (s.ok()) { - return true; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return false; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: merge - * Signature: (J[BII[BII)V - */ -void Java_org_rocksdb_RocksDB_merge__J_3BII_3BII(JNIEnv* env, jobject, - jlong jdb_handle, - jbyteArray jkey, jint jkey_off, - jint jkey_len, jbyteArray jval, - jint jval_off, jint jval_len) { - auto* db = reinterpret_cast(jdb_handle); - static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = - ROCKSDB_NAMESPACE::WriteOptions(); - rocksdb_merge_helper(env, db, default_write_options, nullptr, jkey, jkey_off, - jkey_len, jval, jval_off, jval_len); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: merge - * Signature: (J[BII[BIIJ)V - */ -void Java_org_rocksdb_RocksDB_merge__J_3BII_3BIIJ( - JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, - jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len, - jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = - ROCKSDB_NAMESPACE::WriteOptions(); - auto* cf_handle = - reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - rocksdb_merge_helper(env, db, default_write_options, cf_handle, jkey, - jkey_off, jkey_len, jval, jval_off, jval_len); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument( - "Invalid ColumnFamilyHandle.")); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: merge - * Signature: (JJ[BII[BII)V - */ -void Java_org_rocksdb_RocksDB_merge__JJ_3BII_3BII( - JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options_handle, - jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, - jint jval_off, jint jval_len) { - auto* db = reinterpret_cast(jdb_handle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - rocksdb_merge_helper(env, db, *write_options, nullptr, jkey, jkey_off, - jkey_len, jval, jval_off, jval_len); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: merge - * Signature: (JJ[BII[BIIJ)V - */ -void Java_org_rocksdb_RocksDB_merge__JJ_3BII_3BIIJ( - JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options_handle, - jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, - jint jval_off, jint jval_len, jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - rocksdb_merge_helper(env, db, *write_options, cf_handle, jkey, jkey_off, - jkey_len, jval, jval_off, jval_len); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument( - "Invalid ColumnFamilyHandle.")); - } -} - -jlong rocksdb_iterator_helper( - ROCKSDB_NAMESPACE::DB* db, ROCKSDB_NAMESPACE::ReadOptions read_options, - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle) { - ROCKSDB_NAMESPACE::Iterator* iterator = nullptr; - if (cf_handle != nullptr) { - iterator = db->NewIterator(read_options, cf_handle); - } else { - iterator = db->NewIterator(read_options); - } - return GET_CPLUSPLUS_POINTER(iterator); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: deleteDirect - * Signature: (JJLjava/nio/ByteBuffer;IIJ)V - */ -void Java_org_rocksdb_RocksDB_deleteDirect(JNIEnv* env, jobject /*jdb*/, - jlong jdb_handle, - jlong jwrite_options, jobject jkey, - jint jkey_offset, jint jkey_len, - jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - auto* write_options = - reinterpret_cast(jwrite_options); - auto* cf_handle = - reinterpret_cast(jcf_handle); - auto remove = [&env, &db, &write_options, - &cf_handle](ROCKSDB_NAMESPACE::Slice& key) { - ROCKSDB_NAMESPACE::Status s; - if (cf_handle == nullptr) { - s = db->Delete(*write_options, key); - } else { - s = db->Delete(*write_options, cf_handle, key); - } - if (s.ok()) { - return; - } - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - }; - ROCKSDB_NAMESPACE::JniUtil::k_op_direct(remove, env, jkey, jkey_offset, - jkey_len); -} - -////////////////////////////////////////////////////////////////////////////// -// ROCKSDB_NAMESPACE::DB::Write -/* - * Class: org_rocksdb_RocksDB - * Method: write0 - * Signature: (JJJ)V - */ -void Java_org_rocksdb_RocksDB_write0(JNIEnv* env, jobject, jlong jdb_handle, - jlong jwrite_options_handle, - jlong jwb_handle) { - auto* db = reinterpret_cast(jdb_handle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - auto* wb = reinterpret_cast(jwb_handle); - - ROCKSDB_NAMESPACE::Status s = db->Write(*write_options, wb); - - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: write1 - * Signature: (JJJ)V - */ -void Java_org_rocksdb_RocksDB_write1(JNIEnv* env, jobject, jlong jdb_handle, - jlong jwrite_options_handle, - jlong jwbwi_handle) { - auto* db = reinterpret_cast(jdb_handle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - auto* wbwi = - reinterpret_cast(jwbwi_handle); - auto* wb = wbwi->GetWriteBatch(); - - ROCKSDB_NAMESPACE::Status s = db->Write(*write_options, wb); - - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -////////////////////////////////////////////////////////////////////////////// -// ROCKSDB_NAMESPACE::DB::Get - -jbyteArray rocksdb_get_helper( - JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, - const ROCKSDB_NAMESPACE::ReadOptions& read_opt, - ROCKSDB_NAMESPACE::ColumnFamilyHandle* column_family_handle, - jbyteArray jkey, jint jkey_off, jint jkey_len) { - jbyte* key = new jbyte[jkey_len]; - env->GetByteArrayRegion(jkey, jkey_off, jkey_len, key); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] key; - return nullptr; - } - - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); - - ROCKSDB_NAMESPACE::PinnableSlice pinnable_value; - ROCKSDB_NAMESPACE::Status s; - if (column_family_handle != nullptr) { - s = db->Get(read_opt, column_family_handle, key_slice, &pinnable_value); - } else { - s = db->Get(read_opt, db->DefaultColumnFamily(), key_slice, - &pinnable_value); - } - - // cleanup - delete[] key; - - if (s.IsNotFound()) { - return nullptr; - } - - if (s.ok()) { - jbyteArray jret_value = - ROCKSDB_NAMESPACE::JniUtil::copyBytes(env, pinnable_value); - pinnable_value.Reset(); - if (jret_value == nullptr) { - // exception occurred - return nullptr; - } - return jret_value; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: get - * Signature: (J[BII)[B - */ -jbyteArray Java_org_rocksdb_RocksDB_get__J_3BII(JNIEnv* env, jobject, - jlong jdb_handle, - jbyteArray jkey, jint jkey_off, - jint jkey_len) { - return rocksdb_get_helper( - env, reinterpret_cast(jdb_handle), - ROCKSDB_NAMESPACE::ReadOptions(), nullptr, jkey, jkey_off, jkey_len); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: get - * Signature: (J[BIIJ)[B - */ -jbyteArray Java_org_rocksdb_RocksDB_get__J_3BIIJ(JNIEnv* env, jobject, - jlong jdb_handle, - jbyteArray jkey, jint jkey_off, - jint jkey_len, - jlong jcf_handle) { - auto db_handle = reinterpret_cast(jdb_handle); - auto cf_handle = - reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - return rocksdb_get_helper(env, db_handle, ROCKSDB_NAMESPACE::ReadOptions(), - cf_handle, jkey, jkey_off, jkey_len); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument( - "Invalid ColumnFamilyHandle.")); - return nullptr; - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: get - * Signature: (JJ[BII)[B - */ -jbyteArray Java_org_rocksdb_RocksDB_get__JJ_3BII(JNIEnv* env, jobject, - jlong jdb_handle, - jlong jropt_handle, - jbyteArray jkey, jint jkey_off, - jint jkey_len) { - return rocksdb_get_helper( - env, reinterpret_cast(jdb_handle), - *reinterpret_cast(jropt_handle), nullptr, - jkey, jkey_off, jkey_len); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: get - * Signature: (JJ[BIIJ)[B - */ -jbyteArray Java_org_rocksdb_RocksDB_get__JJ_3BIIJ( - JNIEnv* env, jobject, jlong jdb_handle, jlong jropt_handle, jbyteArray jkey, - jint jkey_off, jint jkey_len, jlong jcf_handle) { - auto* db_handle = reinterpret_cast(jdb_handle); - auto& ro_opt = - *reinterpret_cast(jropt_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - return rocksdb_get_helper(env, db_handle, ro_opt, cf_handle, jkey, jkey_off, - jkey_len); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument( - "Invalid ColumnFamilyHandle.")); - return nullptr; - } -} - -jint rocksdb_get_helper( - JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, - const ROCKSDB_NAMESPACE::ReadOptions& read_options, - ROCKSDB_NAMESPACE::ColumnFamilyHandle* column_family_handle, - jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, - jint jval_off, jint jval_len, bool* has_exception) { - static const int kNotFound = -1; - static const int kStatusError = -2; - - jbyte* key = new jbyte[jkey_len]; - env->GetByteArrayRegion(jkey, jkey_off, jkey_len, key); - if (env->ExceptionCheck()) { - // exception thrown: OutOfMemoryError - delete[] key; - *has_exception = true; - return kStatusError; - } - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); - - ROCKSDB_NAMESPACE::PinnableSlice pinnable_value; - ROCKSDB_NAMESPACE::Status s; - if (column_family_handle != nullptr) { - s = db->Get(read_options, column_family_handle, key_slice, &pinnable_value); - } else { - s = db->Get(read_options, db->DefaultColumnFamily(), key_slice, - &pinnable_value); - } - - // cleanup - delete[] key; - - if (s.IsNotFound()) { - *has_exception = false; - return kNotFound; - } else if (!s.ok()) { - *has_exception = true; - // Here since we are throwing a Java exception from c++ side. - // As a result, c++ does not know calling this function will in fact - // throwing an exception. As a result, the execution flow will - // not stop here, and codes after this throw will still be - // executed. - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - - // Return a dummy const value to avoid compilation error, although - // java side might not have a chance to get the return value :) - return kStatusError; - } - - const jint pinnable_value_len = static_cast(pinnable_value.size()); - const jint length = std::min(jval_len, pinnable_value_len); - - env->SetByteArrayRegion(jval, jval_off, length, - const_cast(reinterpret_cast( - pinnable_value.data()))); - pinnable_value.Reset(); - if (env->ExceptionCheck()) { - // exception thrown: OutOfMemoryError - *has_exception = true; - return kStatusError; - } - - *has_exception = false; - return pinnable_value_len; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: get - * Signature: (J[BII[BII)I - */ -jint Java_org_rocksdb_RocksDB_get__J_3BII_3BII(JNIEnv* env, jobject, - jlong jdb_handle, - jbyteArray jkey, jint jkey_off, - jint jkey_len, jbyteArray jval, - jint jval_off, jint jval_len) { - bool has_exception = false; - return rocksdb_get_helper( - env, reinterpret_cast(jdb_handle), - ROCKSDB_NAMESPACE::ReadOptions(), nullptr, jkey, jkey_off, jkey_len, jval, - jval_off, jval_len, &has_exception); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: get - * Signature: (J[BII[BIIJ)I - */ -jint Java_org_rocksdb_RocksDB_get__J_3BII_3BIIJ(JNIEnv* env, jobject, - jlong jdb_handle, - jbyteArray jkey, jint jkey_off, - jint jkey_len, jbyteArray jval, - jint jval_off, jint jval_len, - jlong jcf_handle) { - auto* db_handle = reinterpret_cast(jdb_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - bool has_exception = false; - return rocksdb_get_helper(env, db_handle, ROCKSDB_NAMESPACE::ReadOptions(), - cf_handle, jkey, jkey_off, jkey_len, jval, - jval_off, jval_len, &has_exception); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument( - "Invalid ColumnFamilyHandle.")); - // will never be evaluated - return 0; - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: get - * Signature: (JJ[BII[BII)I - */ -jint Java_org_rocksdb_RocksDB_get__JJ_3BII_3BII(JNIEnv* env, jobject, - jlong jdb_handle, - jlong jropt_handle, - jbyteArray jkey, jint jkey_off, - jint jkey_len, jbyteArray jval, - jint jval_off, jint jval_len) { - bool has_exception = false; - return rocksdb_get_helper( - env, reinterpret_cast(jdb_handle), - *reinterpret_cast(jropt_handle), nullptr, - jkey, jkey_off, jkey_len, jval, jval_off, jval_len, &has_exception); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: get - * Signature: (JJ[BII[BIIJ)I - */ -jint Java_org_rocksdb_RocksDB_get__JJ_3BII_3BIIJ( - JNIEnv* env, jobject, jlong jdb_handle, jlong jropt_handle, jbyteArray jkey, - jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len, - jlong jcf_handle) { - auto* db_handle = reinterpret_cast(jdb_handle); - auto& ro_opt = - *reinterpret_cast(jropt_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - bool has_exception = false; - return rocksdb_get_helper(env, db_handle, ro_opt, cf_handle, jkey, jkey_off, - jkey_len, jval, jval_off, jval_len, - &has_exception); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument( - "Invalid ColumnFamilyHandle.")); - // will never be evaluated - return 0; - } -} - -inline void multi_get_helper_release_keys(std::vector& keys_to_free) { - auto end = keys_to_free.end(); - for (auto it = keys_to_free.begin(); it != end; ++it) { - delete[] * it; - } - keys_to_free.clear(); -} - -/** - * @brief fill a native array of cf handles from java handles - * - * @param env - * @param cf_handles to fill from the java variants - * @param jcolumn_family_handles - * @return true if the copy succeeds - * @return false if a JNI exception is generated - */ -inline bool cf_handles_from_jcf_handles( - JNIEnv* env, - std::vector& cf_handles, - jlongArray jcolumn_family_handles) { - if (jcolumn_family_handles != nullptr) { - const jsize len_cols = env->GetArrayLength(jcolumn_family_handles); - - jlong* jcfh = env->GetLongArrayElements(jcolumn_family_handles, nullptr); - if (jcfh == nullptr) { - // exception thrown: OutOfMemoryError - jclass exception_cls = (env)->FindClass("java/lang/OutOfMemoryError"); - (env)->ThrowNew(exception_cls, - "Insufficient Memory for CF handle array."); - return false; - } - - for (jsize i = 0; i < len_cols; i++) { - auto* cf_handle = - reinterpret_cast(jcfh[i]); - cf_handles.push_back(cf_handle); - } - env->ReleaseLongArrayElements(jcolumn_family_handles, jcfh, JNI_ABORT); - } - return true; -} - -/** - * @brief copy keys from JNI into vector of slices for Rocks API - * - * @param keys to instantiate - * @param jkeys - * @param jkey_offs - * @param jkey_lens - * @return true if the copy succeeds - * @return false if a JNI exception is raised - */ -inline bool keys_from_jkeys(JNIEnv* env, - std::vector& keys, - std::vector& keys_to_free, - jobjectArray jkeys, jintArray jkey_offs, - jintArray jkey_lens) { - jint* jkey_off = env->GetIntArrayElements(jkey_offs, nullptr); - if (jkey_off == nullptr) { - // exception thrown: OutOfMemoryError - jclass exception_cls = (env)->FindClass("java/lang/OutOfMemoryError"); - (env)->ThrowNew(exception_cls, "Insufficient Memory for key offset array."); - return false; - } - - jint* jkey_len = env->GetIntArrayElements(jkey_lens, nullptr); - if (jkey_len == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseIntArrayElements(jkey_offs, jkey_off, JNI_ABORT); - jclass exception_cls = (env)->FindClass("java/lang/OutOfMemoryError"); - (env)->ThrowNew(exception_cls, "Insufficient Memory for key length array."); - return false; - } - - const jsize len_keys = env->GetArrayLength(jkeys); - for (jsize i = 0; i < len_keys; i++) { - jobject jkey = env->GetObjectArrayElement(jkeys, i); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->ReleaseIntArrayElements(jkey_lens, jkey_len, JNI_ABORT); - env->ReleaseIntArrayElements(jkey_offs, jkey_off, JNI_ABORT); - multi_get_helper_release_keys(keys_to_free); - jclass exception_cls = (env)->FindClass("java/lang/OutOfMemoryError"); - (env)->ThrowNew(exception_cls, - "Insufficient Memory for key object array."); - return false; - } - - jbyteArray jkey_ba = reinterpret_cast(jkey); - - const jint len_key = jkey_len[i]; - jbyte* key = new jbyte[len_key]; - env->GetByteArrayRegion(jkey_ba, jkey_off[i], len_key, key); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] key; - env->DeleteLocalRef(jkey); - env->ReleaseIntArrayElements(jkey_lens, jkey_len, JNI_ABORT); - env->ReleaseIntArrayElements(jkey_offs, jkey_off, JNI_ABORT); - multi_get_helper_release_keys(keys_to_free); - jclass exception_cls = - (env)->FindClass("java/lang/ArrayIndexOutOfBoundsException"); - (env)->ThrowNew(exception_cls, "Invalid byte array region index."); - return false; - } - - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), len_key); - keys.push_back(key_slice); - - env->DeleteLocalRef(jkey); - keys_to_free.push_back(key); - } - - // cleanup jkey_off and jken_len - env->ReleaseIntArrayElements(jkey_lens, jkey_len, JNI_ABORT); - env->ReleaseIntArrayElements(jkey_offs, jkey_off, JNI_ABORT); - - return true; -} - -inline bool keys_from_bytebuffers(JNIEnv* env, - std::vector& keys, - jobjectArray jkeys, jintArray jkey_offs, - jintArray jkey_lens) { - jint* jkey_off = env->GetIntArrayElements(jkey_offs, nullptr); - if (jkey_off == nullptr) { - // exception thrown: OutOfMemoryError - jclass exception_cls = (env)->FindClass("java/lang/OutOfMemoryError"); - (env)->ThrowNew(exception_cls, "Insufficient Memory for key offset array."); - return false; - } - - jint* jkey_len = env->GetIntArrayElements(jkey_lens, nullptr); - if (jkey_len == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseIntArrayElements(jkey_offs, jkey_off, JNI_ABORT); - jclass exception_cls = (env)->FindClass("java/lang/OutOfMemoryError"); - (env)->ThrowNew(exception_cls, "Insufficient Memory for key length array."); - return false; - } - - const jsize len_keys = env->GetArrayLength(jkeys); - for (jsize i = 0; i < len_keys; i++) { - jobject jkey = env->GetObjectArrayElement(jkeys, i); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - // cleanup jkey_off and jkey_len - env->ReleaseIntArrayElements(jkey_lens, jkey_len, JNI_ABORT); - env->ReleaseIntArrayElements(jkey_offs, jkey_off, JNI_ABORT); - - return false; - } - char* key = reinterpret_cast(env->GetDirectBufferAddress(jkey)); - ROCKSDB_NAMESPACE::Slice key_slice(key + jkey_off[i], jkey_len[i]); - keys.push_back(key_slice); - - env->DeleteLocalRef(jkey); - } - - // cleanup jkey_off and jkey_len - env->ReleaseIntArrayElements(jkey_lens, jkey_len, JNI_ABORT); - env->ReleaseIntArrayElements(jkey_offs, jkey_off, JNI_ABORT); - - return true; -} - -/** - * cf multi get - * - * @return byte[][] of values or nullptr if an - * exception occurs - */ -jobjectArray multi_get_helper(JNIEnv* env, jobject, ROCKSDB_NAMESPACE::DB* db, - const ROCKSDB_NAMESPACE::ReadOptions& rOpt, - jobjectArray jkeys, jintArray jkey_offs, - jintArray jkey_lens, - jlongArray jcolumn_family_handles) { - std::vector cf_handles; - if (!cf_handles_from_jcf_handles(env, cf_handles, jcolumn_family_handles)) { - return nullptr; - } - - std::vector keys; - std::vector keys_to_free; - if (!keys_from_jkeys(env, keys, keys_to_free, jkeys, jkey_offs, jkey_lens)) { - return nullptr; - } - - std::vector values; - std::vector s; - if (cf_handles.size() == 0) { - s = db->MultiGet(rOpt, keys, &values); - } else { - s = db->MultiGet(rOpt, cf_handles, keys, &values); - } - - // free up allocated byte arrays - multi_get_helper_release_keys(keys_to_free); - - // prepare the results - jobjectArray jresults = ROCKSDB_NAMESPACE::ByteJni::new2dByteArray( - env, static_cast(s.size())); - if (jresults == nullptr) { - // exception occurred - jclass exception_cls = (env)->FindClass("java/lang/OutOfMemoryError"); - (env)->ThrowNew(exception_cls, "Insufficient Memory for results."); - return nullptr; - } - - // add to the jresults - for (std::vector::size_type i = 0; i != s.size(); - i++) { - if (s[i].ok()) { - std::string* value = &values[i]; - const jsize jvalue_len = static_cast(value->size()); - jbyteArray jentry_value = env->NewByteArray(jvalue_len); - if (jentry_value == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - env->SetByteArrayRegion( - jentry_value, 0, static_cast(jvalue_len), - const_cast(reinterpret_cast(value->c_str()))); - if (env->ExceptionCheck()) { - // exception thrown: - // ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jentry_value); - return nullptr; - } - - env->SetObjectArrayElement(jresults, static_cast(i), jentry_value); - if (env->ExceptionCheck()) { - // exception thrown: - // ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jentry_value); - return nullptr; - } - - env->DeleteLocalRef(jentry_value); - } - } - - return jresults; -} - -/** - * cf multi get - * - * fill supplied native buffers, or raise JNI - * exception on a problem - */ - -/** - * @brief multi_get_helper_direct for fast-path multiget (io_uring) on Linux - * - * @param env - * @param db - * @param rOpt read options - * @param jcolumn_family_handles 0, 1, or n column family handles - * @param jkeys - * @param jkey_offsets - * @param jkey_lengths - * @param jvalues byte buffers to receive values - * @param jvalue_sizes returned actual sizes of data values for keys - * @param jstatuses returned java RocksDB status values for per key - */ -void multi_get_helper_direct(JNIEnv* env, jobject, ROCKSDB_NAMESPACE::DB* db, - const ROCKSDB_NAMESPACE::ReadOptions& rOpt, - jlongArray jcolumn_family_handles, - jobjectArray jkeys, jintArray jkey_offsets, - jintArray jkey_lengths, jobjectArray jvalues, - jintArray jvalue_sizes, jobjectArray jstatuses) { - const jsize num_keys = env->GetArrayLength(jkeys); - - std::vector keys; - if (!keys_from_bytebuffers(env, keys, jkeys, jkey_offsets, jkey_lengths)) { - return; - } - - std::vector values(num_keys); - - std::vector cf_handles; - if (!cf_handles_from_jcf_handles(env, cf_handles, jcolumn_family_handles)) { - return; - } - - std::vector s(num_keys); - if (cf_handles.size() == 0) { - // we can use the more efficient call here - auto cf_handle = db->DefaultColumnFamily(); - db->MultiGet(rOpt, cf_handle, num_keys, keys.data(), values.data(), - s.data()); - } else if (cf_handles.size() == 1) { - // we can use the more efficient call here - auto cf_handle = cf_handles[0]; - db->MultiGet(rOpt, cf_handle, num_keys, keys.data(), values.data(), - s.data()); - } else { - // multiple CFs version - db->MultiGet(rOpt, num_keys, cf_handles.data(), keys.data(), values.data(), - s.data()); - } - - // prepare the results - jobjectArray jresults = ROCKSDB_NAMESPACE::ByteJni::new2dByteArray( - env, static_cast(s.size())); - if (jresults == nullptr) { - // exception occurred - jclass exception_cls = (env)->FindClass("java/lang/OutOfMemoryError"); - (env)->ThrowNew(exception_cls, "Insufficient Memory for results."); - return; - } - - std::vector value_size; - for (int i = 0; i < num_keys; i++) { - auto jstatus = ROCKSDB_NAMESPACE::StatusJni::construct(env, s[i]); - if (jstatus == nullptr) { - // exception in context - return; - } - env->SetObjectArrayElement(jstatuses, i, jstatus); - - if (s[i].ok()) { - jobject jvalue_bytebuf = env->GetObjectArrayElement(jvalues, i); - if (env->ExceptionCheck()) { - // ArrayIndexOutOfBoundsException is thrown - return; - } - jlong jvalue_capacity = env->GetDirectBufferCapacity(jvalue_bytebuf); - if (jvalue_capacity == -1) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, - "Invalid value(s) argument (argument is not a valid direct " - "ByteBuffer)"); - return; - } - void* jvalue_address = env->GetDirectBufferAddress(jvalue_bytebuf); - if (jvalue_address == nullptr) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, - "Invalid value(s) argument (argument is not a valid direct " - "ByteBuffer)"); - return; - } - - // record num returned, push back that number, which may be bigger then - // the ByteBuffer supplied. then copy as much as fits in the ByteBuffer. - value_size.push_back(static_cast(values[i].size())); - auto copy_bytes = - std::min(static_cast(values[i].size()), jvalue_capacity); - memcpy(jvalue_address, values[i].data(), copy_bytes); - } else { - // bad status for this - value_size.push_back(0); - } - } - - env->SetIntArrayRegion(jvalue_sizes, 0, num_keys, value_size.data()); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: multiGet - * Signature: (J[[B[I[I)[[B - */ -jobjectArray Java_org_rocksdb_RocksDB_multiGet__J_3_3B_3I_3I( - JNIEnv* env, jobject jdb, jlong jdb_handle, jobjectArray jkeys, - jintArray jkey_offs, jintArray jkey_lens) { - return multi_get_helper( - env, jdb, reinterpret_cast(jdb_handle), - ROCKSDB_NAMESPACE::ReadOptions(), jkeys, jkey_offs, jkey_lens, nullptr); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: multiGet - * Signature: (J[[B[I[I[J)[[B - */ -jobjectArray Java_org_rocksdb_RocksDB_multiGet__J_3_3B_3I_3I_3J( - JNIEnv* env, jobject jdb, jlong jdb_handle, jobjectArray jkeys, - jintArray jkey_offs, jintArray jkey_lens, - jlongArray jcolumn_family_handles) { - return multi_get_helper(env, jdb, - reinterpret_cast(jdb_handle), - ROCKSDB_NAMESPACE::ReadOptions(), jkeys, jkey_offs, - jkey_lens, jcolumn_family_handles); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: multiGet - * Signature: (JJ[[B[I[I)[[B - */ -jobjectArray Java_org_rocksdb_RocksDB_multiGet__JJ_3_3B_3I_3I( - JNIEnv* env, jobject jdb, jlong jdb_handle, jlong jropt_handle, - jobjectArray jkeys, jintArray jkey_offs, jintArray jkey_lens) { - return multi_get_helper( - env, jdb, reinterpret_cast(jdb_handle), - *reinterpret_cast(jropt_handle), jkeys, - jkey_offs, jkey_lens, nullptr); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: multiGet - * Signature: (JJ[[B[I[I[J)[[B - */ -jobjectArray Java_org_rocksdb_RocksDB_multiGet__JJ_3_3B_3I_3I_3J( - JNIEnv* env, jobject jdb, jlong jdb_handle, jlong jropt_handle, - jobjectArray jkeys, jintArray jkey_offs, jintArray jkey_lens, - jlongArray jcolumn_family_handles) { - return multi_get_helper( - env, jdb, reinterpret_cast(jdb_handle), - *reinterpret_cast(jropt_handle), jkeys, - jkey_offs, jkey_lens, jcolumn_family_handles); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: multiGet - * Signature: - * (JJ[J[Ljava/nio/ByteBuffer;[I[I[Ljava/nio/ByteBuffer;[I[Lorg/rocksdb/Status;)V - */ -void Java_org_rocksdb_RocksDB_multiGet__JJ_3J_3Ljava_nio_ByteBuffer_2_3I_3I_3Ljava_nio_ByteBuffer_2_3I_3Lorg_rocksdb_Status_2( - JNIEnv* env, jobject jdb, jlong jdb_handle, jlong jropt_handle, - jlongArray jcolumn_family_handles, jobjectArray jkeys, - jintArray jkey_offsets, jintArray jkey_lengths, jobjectArray jvalues, - jintArray jvalues_sizes, jobjectArray jstatus_objects) { - return multi_get_helper_direct( - env, jdb, reinterpret_cast(jdb_handle), - *reinterpret_cast(jropt_handle), - jcolumn_family_handles, jkeys, jkey_offsets, jkey_lengths, jvalues, - jvalues_sizes, jstatus_objects); -} -// private native void -// multiGet(final long dbHandle, final long rOptHandle, -// final long[] columnFamilyHandles, final ByteBuffer[] keysArray, -// final ByteBuffer[] valuesArray); - -////////////////////////////////////////////////////////////////////////////// -// ROCKSDB_NAMESPACE::DB::KeyMayExist -bool key_may_exist_helper(JNIEnv* env, jlong jdb_handle, jlong jcf_handle, - jlong jread_opts_handle, jbyteArray jkey, - jint jkey_offset, jint jkey_len, bool* has_exception, - std::string* value, bool* value_found) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - ROCKSDB_NAMESPACE::ReadOptions read_opts = - jread_opts_handle == 0 - ? ROCKSDB_NAMESPACE::ReadOptions() - : *(reinterpret_cast( - jread_opts_handle)); - - jbyte* key = new jbyte[jkey_len]; - env->GetByteArrayRegion(jkey, jkey_offset, jkey_len, key); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] key; - *has_exception = true; - return false; - } - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); - - const bool exists = - db->KeyMayExist(read_opts, cf_handle, key_slice, value, value_found); - - // cleanup - delete[] key; - - return exists; -} - -bool key_may_exist_direct_helper(JNIEnv* env, jlong jdb_handle, - jlong jcf_handle, jlong jread_opts_handle, - jobject jkey, jint jkey_offset, jint jkey_len, - bool* has_exception, std::string* value, - bool* value_found) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - ROCKSDB_NAMESPACE::ReadOptions read_opts = - jread_opts_handle == 0 - ? ROCKSDB_NAMESPACE::ReadOptions() - : *(reinterpret_cast( - jread_opts_handle)); - - char* key = reinterpret_cast(env->GetDirectBufferAddress(jkey)); - if (key == nullptr) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, - "Invalid key argument (argument is not a valid direct ByteBuffer)"); - *has_exception = true; - return false; - } - if (env->GetDirectBufferCapacity(jkey) < (jkey_offset + jkey_len)) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, - "Invalid key argument. Capacity is less than requested region (offset " - "+ length)."); - *has_exception = true; - return false; - } - - ROCKSDB_NAMESPACE::Slice key_slice(key, jkey_len); - - const bool exists = - db->KeyMayExist(read_opts, cf_handle, key_slice, value, value_found); - - return exists; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: keyMayExist - * Signature: (JJJ[BII)Z - */ -jboolean Java_org_rocksdb_RocksDB_keyMayExist( - JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, - jlong jread_opts_handle, jbyteArray jkey, jint jkey_offset, jint jkey_len) { - bool has_exception = false; - std::string value; - bool value_found = false; - - const bool exists = key_may_exist_helper( - env, jdb_handle, jcf_handle, jread_opts_handle, jkey, jkey_offset, - jkey_len, &has_exception, &value, &value_found); - - if (has_exception) { - // java exception already raised - return false; - } - - return static_cast(exists); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: keyMayExistDirect - * Signature: (JJJLjava/nio/ByteBuffer;II)Z - */ -jboolean Java_org_rocksdb_RocksDB_keyMayExistDirect( - JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, - jlong jread_opts_handle, jobject jkey, jint jkey_offset, jint jkey_len) { - bool has_exception = false; - std::string value; - bool value_found = false; - - const bool exists = key_may_exist_direct_helper( - env, jdb_handle, jcf_handle, jread_opts_handle, jkey, jkey_offset, - jkey_len, &has_exception, &value, &value_found); - if (has_exception) { - // java exception already raised - return false; - } - - return static_cast(exists); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: keyMayExistDirectFoundValue - * Signature: - * (JJJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;II)[J - */ -jintArray Java_org_rocksdb_RocksDB_keyMayExistDirectFoundValue( - JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, - jlong jread_opts_handle, jobject jkey, jint jkey_offset, jint jkey_len, - jobject jval, jint jval_offset, jint jval_len) { - char* val_buffer = reinterpret_cast(env->GetDirectBufferAddress(jval)); - if (val_buffer == nullptr) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, - "Invalid value argument (argument is not a valid direct ByteBuffer)"); - return nullptr; - } - - if (env->GetDirectBufferCapacity(jval) < (jval_offset + jval_len)) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, - "Invalid value argument. Capacity is less than requested region " - "(offset + length)."); - return nullptr; - } - - bool has_exception = false; - std::string cvalue; - bool value_found = false; - - const bool exists = key_may_exist_direct_helper( - env, jdb_handle, jcf_handle, jread_opts_handle, jkey, jkey_offset, - jkey_len, &has_exception, &cvalue, &value_found); - - if (has_exception) { - // java exception already raised - return nullptr; - } - - const jint cvalue_len = static_cast(cvalue.size()); - const jint length = std::min(jval_len, cvalue_len); - memcpy(val_buffer + jval_offset, cvalue.c_str(), length); - - // keep consistent with java KeyMayExistEnum.values() - const int kNotExist = 0; - const int kExistsWithoutValue = 1; - const int kExistsWithValue = 2; - - // TODO fix return value/type - // exists/value_found/neither - // cvalue_len - jintArray jresult = env->NewIntArray(2); - const jint jexists = - exists ? (value_found ? kExistsWithValue : kExistsWithoutValue) - : kNotExist; - - env->SetIntArrayRegion(jresult, 0, 1, &jexists); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jresult); - return nullptr; - } - env->SetIntArrayRegion(jresult, 1, 1, &cvalue_len); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jresult); - return nullptr; - } - - return jresult; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: keyMayExistFoundValue - * Signature: (JJJ[BII)[[B - */ -jobjectArray Java_org_rocksdb_RocksDB_keyMayExistFoundValue( - JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, - jlong jread_opts_handle, jbyteArray jkey, jint jkey_offset, jint jkey_len) { - bool has_exception = false; - std::string value; - bool value_found = false; - - const bool exists = key_may_exist_helper( - env, jdb_handle, jcf_handle, jread_opts_handle, jkey, jkey_offset, - jkey_len, &has_exception, &value, &value_found); - - if (has_exception) { - // java exception already raised - return nullptr; - } - - jbyte result_flags[1]; - if (!exists) { - result_flags[0] = 0; - } else if (!value_found) { - result_flags[0] = 1; - } else { - // found - result_flags[0] = 2; - } - - jobjectArray jresults = ROCKSDB_NAMESPACE::ByteJni::new2dByteArray(env, 2); - if (jresults == nullptr) { - // exception occurred - return nullptr; - } - - // prepare the result flag - jbyteArray jresult_flags = env->NewByteArray(1); - if (jresult_flags == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - env->SetByteArrayRegion(jresult_flags, 0, 1, result_flags); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jresult_flags); - return nullptr; - } - - env->SetObjectArrayElement(jresults, 0, jresult_flags); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jresult_flags); - return nullptr; - } - - env->DeleteLocalRef(jresult_flags); - - if (result_flags[0] == 2) { - // set the value - const jsize jvalue_len = static_cast(value.size()); - jbyteArray jresult_value = env->NewByteArray(jvalue_len); - if (jresult_value == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - env->SetByteArrayRegion( - jresult_value, 0, jvalue_len, - const_cast(reinterpret_cast(value.data()))); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jresult_value); - return nullptr; - } - env->SetObjectArrayElement(jresults, 1, jresult_value); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jresult_value); - return nullptr; - } - - env->DeleteLocalRef(jresult_value); - } - - return jresults; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: iterator - * Signature: (J)J - */ -jlong Java_org_rocksdb_RocksDB_iterator__J(JNIEnv*, jobject, jlong db_handle) { - auto* db = reinterpret_cast(db_handle); - return rocksdb_iterator_helper(db, ROCKSDB_NAMESPACE::ReadOptions(), nullptr); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: iterator - * Signature: (JJ)J - */ -jlong Java_org_rocksdb_RocksDB_iterator__JJ(JNIEnv*, jobject, jlong db_handle, - jlong jread_options_handle) { - auto* db = reinterpret_cast(db_handle); - auto& read_options = - *reinterpret_cast(jread_options_handle); - return rocksdb_iterator_helper(db, read_options, nullptr); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: iteratorCF - * Signature: (JJ)J - */ -jlong Java_org_rocksdb_RocksDB_iteratorCF__JJ(JNIEnv*, jobject, jlong db_handle, - jlong jcf_handle) { - auto* db = reinterpret_cast(db_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - return rocksdb_iterator_helper(db, ROCKSDB_NAMESPACE::ReadOptions(), - cf_handle); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: iteratorCF - * Signature: (JJJ)J - */ -jlong Java_org_rocksdb_RocksDB_iteratorCF__JJJ(JNIEnv*, jobject, - jlong db_handle, - jlong jcf_handle, - jlong jread_options_handle) { - auto* db = reinterpret_cast(db_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - auto& read_options = - *reinterpret_cast(jread_options_handle); - return rocksdb_iterator_helper(db, read_options, cf_handle); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: iterators - * Signature: (J[JJ)[J - */ -jlongArray Java_org_rocksdb_RocksDB_iterators(JNIEnv* env, jobject, - jlong db_handle, - jlongArray jcolumn_family_handles, - jlong jread_options_handle) { - auto* db = reinterpret_cast(db_handle); - auto& read_options = - *reinterpret_cast(jread_options_handle); - - std::vector cf_handles; - if (jcolumn_family_handles != nullptr) { - const jsize len_cols = env->GetArrayLength(jcolumn_family_handles); - jlong* jcfh = env->GetLongArrayElements(jcolumn_family_handles, nullptr); - if (jcfh == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - for (jsize i = 0; i < len_cols; i++) { - auto* cf_handle = - reinterpret_cast(jcfh[i]); - cf_handles.push_back(cf_handle); - } - - env->ReleaseLongArrayElements(jcolumn_family_handles, jcfh, JNI_ABORT); - } - - std::vector iterators; - ROCKSDB_NAMESPACE::Status s = - db->NewIterators(read_options, cf_handles, &iterators); - if (s.ok()) { - jlongArray jLongArray = - env->NewLongArray(static_cast(iterators.size())); - if (jLongArray == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - for (std::vector::size_type i = 0; - i < iterators.size(); i++) { - env->SetLongArrayRegion( - jLongArray, static_cast(i), 1, - const_cast(reinterpret_cast(&iterators[i]))); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jLongArray); - return nullptr; - } - } - - return jLongArray; - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } -} - -/* - * Method: getSnapshot - * Signature: (J)J - */ -jlong Java_org_rocksdb_RocksDB_getSnapshot(JNIEnv*, jobject, jlong db_handle) { - auto* db = reinterpret_cast(db_handle); - const ROCKSDB_NAMESPACE::Snapshot* snapshot = db->GetSnapshot(); - return GET_CPLUSPLUS_POINTER(snapshot); -} - -/* - * Method: releaseSnapshot - * Signature: (JJ)V - */ -void Java_org_rocksdb_RocksDB_releaseSnapshot(JNIEnv*, jobject, jlong db_handle, - jlong snapshot_handle) { - auto* db = reinterpret_cast(db_handle); - auto* snapshot = - reinterpret_cast(snapshot_handle); - db->ReleaseSnapshot(snapshot); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getProperty - * Signature: (JJLjava/lang/String;I)Ljava/lang/String; - */ -jstring Java_org_rocksdb_RocksDB_getProperty(JNIEnv* env, jobject, - jlong jdb_handle, jlong jcf_handle, - jstring jproperty, - jint jproperty_len) { - const char* property = env->GetStringUTFChars(jproperty, nullptr); - if (property == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - ROCKSDB_NAMESPACE::Slice property_name(property, jproperty_len); - - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - - std::string property_value; - bool retCode = db->GetProperty(cf_handle, property_name, &property_value); - env->ReleaseStringUTFChars(jproperty, property); - - if (retCode) { - return env->NewStringUTF(property_value.c_str()); - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::NotFound()); - return nullptr; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getMapProperty - * Signature: (JJLjava/lang/String;I)Ljava/util/Map; - */ -jobject Java_org_rocksdb_RocksDB_getMapProperty(JNIEnv* env, jobject, - jlong jdb_handle, - jlong jcf_handle, - jstring jproperty, - jint jproperty_len) { - const char* property = env->GetStringUTFChars(jproperty, nullptr); - if (property == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - ROCKSDB_NAMESPACE::Slice property_name(property, jproperty_len); - - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - - std::map property_value; - bool retCode = db->GetMapProperty(cf_handle, property_name, &property_value); - env->ReleaseStringUTFChars(jproperty, property); - - if (retCode) { - return ROCKSDB_NAMESPACE::HashMapJni::fromCppMap(env, &property_value); - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::NotFound()); - return nullptr; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getLongProperty - * Signature: (JJLjava/lang/String;I)J - */ -jlong Java_org_rocksdb_RocksDB_getLongProperty(JNIEnv* env, jobject, - jlong jdb_handle, - jlong jcf_handle, - jstring jproperty, - jint jproperty_len) { - const char* property = env->GetStringUTFChars(jproperty, nullptr); - if (property == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - ROCKSDB_NAMESPACE::Slice property_name(property, jproperty_len); - - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - - uint64_t property_value; - bool retCode = db->GetIntProperty(cf_handle, property_name, &property_value); - env->ReleaseStringUTFChars(jproperty, property); - - if (retCode) { - return property_value; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::NotFound()); - return 0; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: resetStats - * Signature: (J)V - */ -void Java_org_rocksdb_RocksDB_resetStats(JNIEnv*, jobject, jlong jdb_handle) { - auto* db = reinterpret_cast(jdb_handle); - db->ResetStats(); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getAggregatedLongProperty - * Signature: (JLjava/lang/String;I)J - */ -jlong Java_org_rocksdb_RocksDB_getAggregatedLongProperty(JNIEnv* env, jobject, - jlong db_handle, - jstring jproperty, - jint jproperty_len) { - const char* property = env->GetStringUTFChars(jproperty, nullptr); - if (property == nullptr) { - return 0; - } - ROCKSDB_NAMESPACE::Slice property_name(property, jproperty_len); - auto* db = reinterpret_cast(db_handle); - uint64_t property_value = 0; - bool retCode = db->GetAggregatedIntProperty(property_name, &property_value); - env->ReleaseStringUTFChars(jproperty, property); - - if (retCode) { - return property_value; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::NotFound()); - return 0; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getApproximateSizes - * Signature: (JJ[JB)[J - */ -jlongArray Java_org_rocksdb_RocksDB_getApproximateSizes( - JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, - jlongArray jrange_slice_handles, jbyte jinclude_flags) { - const jsize jlen = env->GetArrayLength(jrange_slice_handles); - const size_t range_count = jlen / 2; - - jlong* jranges = env->GetLongArrayElements(jrange_slice_handles, nullptr); - if (jranges == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - auto ranges = std::unique_ptr( - new ROCKSDB_NAMESPACE::Range[range_count]); - size_t range_offset = 0; - for (jsize i = 0; i < jlen; ++i) { - auto* start = reinterpret_cast(jranges[i]); - auto* limit = reinterpret_cast(jranges[++i]); - ranges.get()[range_offset++] = ROCKSDB_NAMESPACE::Range(*start, *limit); - } - - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - - auto sizes = std::unique_ptr(new uint64_t[range_count]); - - ROCKSDB_NAMESPACE::DB::SizeApproximationFlags include_flags = - ROCKSDB_NAMESPACE::DB::SizeApproximationFlags::NONE; - if (jinclude_flags & 1) { - include_flags = - ROCKSDB_NAMESPACE::DB::SizeApproximationFlags::INCLUDE_MEMTABLES; - } - if (jinclude_flags & 2) { - include_flags = - (include_flags | - ROCKSDB_NAMESPACE::DB::SizeApproximationFlags::INCLUDE_FILES); - } - - db->GetApproximateSizes(cf_handle, ranges.get(), - static_cast(range_count), sizes.get(), - include_flags); - - // release LongArrayElements - env->ReleaseLongArrayElements(jrange_slice_handles, jranges, JNI_ABORT); - - // prepare results - auto results = std::unique_ptr(new jlong[range_count]); - for (size_t i = 0; i < range_count; ++i) { - results.get()[i] = static_cast(sizes.get()[i]); - } - - const jsize jrange_count = jlen / 2; - jlongArray jresults = env->NewLongArray(jrange_count); - if (jresults == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - env->SetLongArrayRegion(jresults, 0, jrange_count, results.get()); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jresults); - return nullptr; - } - - return jresults; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getApproximateMemTableStats - * Signature: (JJJJ)[J - */ -jlongArray Java_org_rocksdb_RocksDB_getApproximateMemTableStats( - JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, - jlong jstartHandle, jlong jlimitHandle) { - auto* start = reinterpret_cast(jstartHandle); - auto* limit = reinterpret_cast(jlimitHandle); - const ROCKSDB_NAMESPACE::Range range(*start, *limit); - - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - - uint64_t count = 0; - uint64_t sizes = 0; - db->GetApproximateMemTableStats(cf_handle, range, &count, &sizes); - - // prepare results - jlong results[2] = {static_cast(count), static_cast(sizes)}; - - jlongArray jsizes = env->NewLongArray(2); - if (jsizes == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - env->SetLongArrayRegion(jsizes, 0, 2, results); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jsizes); - return nullptr; - } - - return jsizes; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: compactRange - * Signature: (J[BI[BIJJ)V - */ -void Java_org_rocksdb_RocksDB_compactRange(JNIEnv* env, jobject, - jlong jdb_handle, jbyteArray jbegin, - jint jbegin_len, jbyteArray jend, - jint jend_len, - jlong jcompact_range_opts_handle, - jlong jcf_handle) { - jboolean has_exception = JNI_FALSE; - - std::string str_begin; - if (jbegin_len > 0) { - str_begin = ROCKSDB_NAMESPACE::JniUtil::byteString( - env, jbegin, jbegin_len, - [](const char* str, const size_t len) { return std::string(str, len); }, - &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - return; - } - } - - std::string str_end; - if (jend_len > 0) { - str_end = ROCKSDB_NAMESPACE::JniUtil::byteString( - env, jend, jend_len, - [](const char* str, const size_t len) { return std::string(str, len); }, - &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - return; - } - } - - ROCKSDB_NAMESPACE::CompactRangeOptions* compact_range_opts = nullptr; - if (jcompact_range_opts_handle == 0) { - // NOTE: we DO own the pointer! - compact_range_opts = new ROCKSDB_NAMESPACE::CompactRangeOptions(); - } else { - // NOTE: we do NOT own the pointer! - compact_range_opts = - reinterpret_cast( - jcompact_range_opts_handle); - } - - auto* db = reinterpret_cast(jdb_handle); - - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - - ROCKSDB_NAMESPACE::Status s; - if (jbegin_len > 0 || jend_len > 0) { - const ROCKSDB_NAMESPACE::Slice begin(str_begin); - const ROCKSDB_NAMESPACE::Slice end(str_end); - s = db->CompactRange(*compact_range_opts, cf_handle, &begin, &end); - } else { - s = db->CompactRange(*compact_range_opts, cf_handle, nullptr, nullptr); - } - - if (jcompact_range_opts_handle == 0) { - delete compact_range_opts; - } - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: setOptions - * Signature: (JJ[Ljava/lang/String;[Ljava/lang/String;)V - */ -void Java_org_rocksdb_RocksDB_setOptions(JNIEnv* env, jobject, jlong jdb_handle, - jlong jcf_handle, jobjectArray jkeys, - jobjectArray jvalues) { - const jsize len = env->GetArrayLength(jkeys); - assert(len == env->GetArrayLength(jvalues)); - - std::unordered_map options_map; - for (jsize i = 0; i < len; i++) { - jobject jobj_key = env->GetObjectArrayElement(jkeys, i); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - return; - } - - jobject jobj_value = env->GetObjectArrayElement(jvalues, i); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jobj_key); - return; - } - - jboolean has_exception = JNI_FALSE; - std::string s_key = ROCKSDB_NAMESPACE::JniUtil::copyStdString( - env, reinterpret_cast(jobj_key), &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - env->DeleteLocalRef(jobj_value); - env->DeleteLocalRef(jobj_key); - return; - } - - std::string s_value = ROCKSDB_NAMESPACE::JniUtil::copyStdString( - env, reinterpret_cast(jobj_value), &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - env->DeleteLocalRef(jobj_value); - env->DeleteLocalRef(jobj_key); - return; - } - - options_map[s_key] = s_value; - - env->DeleteLocalRef(jobj_key); - env->DeleteLocalRef(jobj_value); - } - - auto* db = reinterpret_cast(jdb_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - if (cf_handle == nullptr) { - cf_handle = db->DefaultColumnFamily(); - } - auto s = db->SetOptions(cf_handle, options_map); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: setDBOptions - * Signature: (J[Ljava/lang/String;[Ljava/lang/String;)V - */ -void Java_org_rocksdb_RocksDB_setDBOptions(JNIEnv* env, jobject, - jlong jdb_handle, jobjectArray jkeys, - jobjectArray jvalues) { - const jsize len = env->GetArrayLength(jkeys); - assert(len == env->GetArrayLength(jvalues)); - - std::unordered_map options_map; - for (jsize i = 0; i < len; i++) { - jobject jobj_key = env->GetObjectArrayElement(jkeys, i); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - return; - } - - jobject jobj_value = env->GetObjectArrayElement(jvalues, i); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jobj_key); - return; - } - - jboolean has_exception = JNI_FALSE; - std::string s_key = ROCKSDB_NAMESPACE::JniUtil::copyStdString( - env, reinterpret_cast(jobj_key), &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - env->DeleteLocalRef(jobj_value); - env->DeleteLocalRef(jobj_key); - return; - } - - std::string s_value = ROCKSDB_NAMESPACE::JniUtil::copyStdString( - env, reinterpret_cast(jobj_value), &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - env->DeleteLocalRef(jobj_value); - env->DeleteLocalRef(jobj_key); - return; - } - - options_map[s_key] = s_value; - - env->DeleteLocalRef(jobj_key); - env->DeleteLocalRef(jobj_value); - } - - auto* db = reinterpret_cast(jdb_handle); - auto s = db->SetDBOptions(options_map); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getOptions - * Signature: (JJ)Ljava/lang/String; - */ -jstring Java_org_rocksdb_RocksDB_getOptions(JNIEnv* env, jobject, - jlong jdb_handle, - jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - - auto options = db->GetOptions(cf_handle); - std::string options_as_string; - ROCKSDB_NAMESPACE::Status s = - GetStringFromColumnFamilyOptions(&options_as_string, options); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } - return env->NewStringUTF(options_as_string.c_str()); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getDBOptions - * Signature: (J)Ljava/lang/String; - */ -jstring Java_org_rocksdb_RocksDB_getDBOptions(JNIEnv* env, jobject, - jlong jdb_handle) { - auto* db = reinterpret_cast(jdb_handle); - - auto options = db->GetDBOptions(); - std::string options_as_string; - ROCKSDB_NAMESPACE::Status s = - GetStringFromDBOptions(&options_as_string, options); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } - return env->NewStringUTF(options_as_string.c_str()); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: compactFiles - * Signature: (JJJ[Ljava/lang/String;IIJ)[Ljava/lang/String; - */ -jobjectArray Java_org_rocksdb_RocksDB_compactFiles( - JNIEnv* env, jobject, jlong jdb_handle, jlong jcompaction_opts_handle, - jlong jcf_handle, jobjectArray jinput_file_names, jint joutput_level, - jint joutput_path_id, jlong jcompaction_job_info_handle) { - jboolean has_exception = JNI_FALSE; - const std::vector input_file_names = - ROCKSDB_NAMESPACE::JniUtil::copyStrings(env, jinput_file_names, - &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - return nullptr; - } - - auto* compaction_opts = - reinterpret_cast( - jcompaction_opts_handle); - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - - ROCKSDB_NAMESPACE::CompactionJobInfo* compaction_job_info = nullptr; - if (jcompaction_job_info_handle != 0) { - compaction_job_info = - reinterpret_cast( - jcompaction_job_info_handle); - } - - std::vector output_file_names; - auto s = db->CompactFiles(*compaction_opts, cf_handle, input_file_names, - static_cast(joutput_level), - static_cast(joutput_path_id), - &output_file_names, compaction_job_info); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } - - return ROCKSDB_NAMESPACE::JniUtil::toJavaStrings(env, &output_file_names); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: cancelAllBackgroundWork - * Signature: (JZ)V - */ -void Java_org_rocksdb_RocksDB_cancelAllBackgroundWork(JNIEnv*, jobject, - jlong jdb_handle, - jboolean jwait) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::CancelAllBackgroundWork(db, jwait); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: pauseBackgroundWork - * Signature: (J)V - */ -void Java_org_rocksdb_RocksDB_pauseBackgroundWork(JNIEnv* env, jobject, - jlong jdb_handle) { - auto* db = reinterpret_cast(jdb_handle); - auto s = db->PauseBackgroundWork(); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: continueBackgroundWork - * Signature: (J)V - */ -void Java_org_rocksdb_RocksDB_continueBackgroundWork(JNIEnv* env, jobject, - jlong jdb_handle) { - auto* db = reinterpret_cast(jdb_handle); - auto s = db->ContinueBackgroundWork(); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: enableAutoCompaction - * Signature: (J[J)V - */ -void Java_org_rocksdb_RocksDB_enableAutoCompaction(JNIEnv* env, jobject, - jlong jdb_handle, - jlongArray jcf_handles) { - auto* db = reinterpret_cast(jdb_handle); - jboolean has_exception = JNI_FALSE; - const std::vector cf_handles = - ROCKSDB_NAMESPACE::JniUtil::fromJPointers< - ROCKSDB_NAMESPACE::ColumnFamilyHandle>(env, jcf_handles, - &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - return; - } - db->EnableAutoCompaction(cf_handles); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: numberLevels - * Signature: (JJ)I - */ -jint Java_org_rocksdb_RocksDB_numberLevels(JNIEnv*, jobject, jlong jdb_handle, - jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - return static_cast(db->NumberLevels(cf_handle)); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: maxMemCompactionLevel - * Signature: (JJ)I - */ -jint Java_org_rocksdb_RocksDB_maxMemCompactionLevel(JNIEnv*, jobject, - jlong jdb_handle, - jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - return static_cast(db->MaxMemCompactionLevel(cf_handle)); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: level0StopWriteTrigger - * Signature: (JJ)I - */ -jint Java_org_rocksdb_RocksDB_level0StopWriteTrigger(JNIEnv*, jobject, - jlong jdb_handle, - jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - return static_cast(db->Level0StopWriteTrigger(cf_handle)); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getName - * Signature: (J)Ljava/lang/String; - */ -jstring Java_org_rocksdb_RocksDB_getName(JNIEnv* env, jobject, - jlong jdb_handle) { - auto* db = reinterpret_cast(jdb_handle); - std::string name = db->GetName(); - return ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &name, false); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getEnv - * Signature: (J)J - */ -jlong Java_org_rocksdb_RocksDB_getEnv(JNIEnv*, jobject, jlong jdb_handle) { - auto* db = reinterpret_cast(jdb_handle); - return GET_CPLUSPLUS_POINTER(db->GetEnv()); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: flush - * Signature: (JJ[J)V - */ -void Java_org_rocksdb_RocksDB_flush(JNIEnv* env, jobject, jlong jdb_handle, - jlong jflush_opts_handle, - jlongArray jcf_handles) { - auto* db = reinterpret_cast(jdb_handle); - auto* flush_opts = - reinterpret_cast(jflush_opts_handle); - std::vector cf_handles; - if (jcf_handles == nullptr) { - cf_handles.push_back(db->DefaultColumnFamily()); - } else { - jboolean has_exception = JNI_FALSE; - cf_handles = ROCKSDB_NAMESPACE::JniUtil::fromJPointers< - ROCKSDB_NAMESPACE::ColumnFamilyHandle>(env, jcf_handles, - &has_exception); - if (has_exception) { - // exception occurred - return; - } - } - auto s = db->Flush(*flush_opts, cf_handles); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: flushWal - * Signature: (JZ)V - */ -void Java_org_rocksdb_RocksDB_flushWal(JNIEnv* env, jobject, jlong jdb_handle, - jboolean jsync) { - auto* db = reinterpret_cast(jdb_handle); - auto s = db->FlushWAL(jsync == JNI_TRUE); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: syncWal - * Signature: (J)V - */ -void Java_org_rocksdb_RocksDB_syncWal(JNIEnv* env, jobject, jlong jdb_handle) { - auto* db = reinterpret_cast(jdb_handle); - auto s = db->SyncWAL(); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getLatestSequenceNumber - * Signature: (J)V - */ -jlong Java_org_rocksdb_RocksDB_getLatestSequenceNumber(JNIEnv*, jobject, - jlong jdb_handle) { - auto* db = reinterpret_cast(jdb_handle); - return db->GetLatestSequenceNumber(); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: disableFileDeletions - * Signature: (J)V - */ -void Java_org_rocksdb_RocksDB_disableFileDeletions(JNIEnv* env, jobject, - jlong jdb_handle) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::Status s = db->DisableFileDeletions(); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: enableFileDeletions - * Signature: (JZ)V - */ -void Java_org_rocksdb_RocksDB_enableFileDeletions(JNIEnv* env, jobject, - jlong jdb_handle, - jboolean jforce) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::Status s = db->EnableFileDeletions(jforce); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getLiveFiles - * Signature: (JZ)[Ljava/lang/String; - */ -jobjectArray Java_org_rocksdb_RocksDB_getLiveFiles(JNIEnv* env, jobject, - jlong jdb_handle, - jboolean jflush_memtable) { - auto* db = reinterpret_cast(jdb_handle); - std::vector live_files; - uint64_t manifest_file_size = 0; - auto s = db->GetLiveFiles(live_files, &manifest_file_size, - jflush_memtable == JNI_TRUE); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } - - // append the manifest_file_size to the vector - // for passing back to java - live_files.push_back(std::to_string(manifest_file_size)); - - return ROCKSDB_NAMESPACE::JniUtil::toJavaStrings(env, &live_files); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getSortedWalFiles - * Signature: (J)[Lorg/rocksdb/LogFile; - */ -jobjectArray Java_org_rocksdb_RocksDB_getSortedWalFiles(JNIEnv* env, jobject, - jlong jdb_handle) { - auto* db = reinterpret_cast(jdb_handle); - std::vector> sorted_wal_files; - auto s = db->GetSortedWalFiles(sorted_wal_files); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } - - // convert to Java type - const jsize jlen = static_cast(sorted_wal_files.size()); - jobjectArray jsorted_wal_files = env->NewObjectArray( - jlen, ROCKSDB_NAMESPACE::LogFileJni::getJClass(env), nullptr); - if (jsorted_wal_files == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - jsize i = 0; - for (auto it = sorted_wal_files.begin(); it != sorted_wal_files.end(); ++it) { - jobject jlog_file = - ROCKSDB_NAMESPACE::LogFileJni::fromCppLogFile(env, it->get()); - if (jlog_file == nullptr) { - // exception occurred - env->DeleteLocalRef(jsorted_wal_files); - return nullptr; - } - - env->SetObjectArrayElement(jsorted_wal_files, i++, jlog_file); - if (env->ExceptionCheck()) { - // exception occurred - env->DeleteLocalRef(jlog_file); - env->DeleteLocalRef(jsorted_wal_files); - return nullptr; - } - - env->DeleteLocalRef(jlog_file); - } - - return jsorted_wal_files; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getUpdatesSince - * Signature: (JJ)J - */ -jlong Java_org_rocksdb_RocksDB_getUpdatesSince(JNIEnv* env, jobject, - jlong jdb_handle, - jlong jsequence_number) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::SequenceNumber sequence_number = - static_cast(jsequence_number); - std::unique_ptr iter; - ROCKSDB_NAMESPACE::Status s = db->GetUpdatesSince(sequence_number, &iter); - if (s.ok()) { - return GET_CPLUSPLUS_POINTER(iter.release()); - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return 0; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: deleteFile - * Signature: (JLjava/lang/String;)V - */ -void Java_org_rocksdb_RocksDB_deleteFile(JNIEnv* env, jobject, jlong jdb_handle, - jstring jname) { - auto* db = reinterpret_cast(jdb_handle); - jboolean has_exception = JNI_FALSE; - std::string name = - ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, jname, &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - return; - } - db->DeleteFile(name); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getLiveFilesMetaData - * Signature: (J)[Lorg/rocksdb/LiveFileMetaData; - */ -jobjectArray Java_org_rocksdb_RocksDB_getLiveFilesMetaData(JNIEnv* env, jobject, - jlong jdb_handle) { - auto* db = reinterpret_cast(jdb_handle); - std::vector live_files_meta_data; - db->GetLiveFilesMetaData(&live_files_meta_data); - - // convert to Java type - const jsize jlen = static_cast(live_files_meta_data.size()); - jobjectArray jlive_files_meta_data = env->NewObjectArray( - jlen, ROCKSDB_NAMESPACE::LiveFileMetaDataJni::getJClass(env), nullptr); - if (jlive_files_meta_data == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - jsize i = 0; - for (auto it = live_files_meta_data.begin(); it != live_files_meta_data.end(); - ++it) { - jobject jlive_file_meta_data = - ROCKSDB_NAMESPACE::LiveFileMetaDataJni::fromCppLiveFileMetaData(env, - &(*it)); - if (jlive_file_meta_data == nullptr) { - // exception occurred - env->DeleteLocalRef(jlive_files_meta_data); - return nullptr; - } - - env->SetObjectArrayElement(jlive_files_meta_data, i++, - jlive_file_meta_data); - if (env->ExceptionCheck()) { - // exception occurred - env->DeleteLocalRef(jlive_file_meta_data); - env->DeleteLocalRef(jlive_files_meta_data); - return nullptr; - } - - env->DeleteLocalRef(jlive_file_meta_data); - } - - return jlive_files_meta_data; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getColumnFamilyMetaData - * Signature: (JJ)Lorg/rocksdb/ColumnFamilyMetaData; - */ -jobject Java_org_rocksdb_RocksDB_getColumnFamilyMetaData(JNIEnv* env, jobject, - jlong jdb_handle, - jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - ROCKSDB_NAMESPACE::ColumnFamilyMetaData cf_metadata; - db->GetColumnFamilyMetaData(cf_handle, &cf_metadata); - return ROCKSDB_NAMESPACE::ColumnFamilyMetaDataJni:: - fromCppColumnFamilyMetaData(env, &cf_metadata); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: ingestExternalFile - * Signature: (JJ[Ljava/lang/String;IJ)V - */ -void Java_org_rocksdb_RocksDB_ingestExternalFile( - JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, - jobjectArray jfile_path_list, jint jfile_path_list_len, - jlong jingest_external_file_options_handle) { - jboolean has_exception = JNI_FALSE; - std::vector file_path_list = - ROCKSDB_NAMESPACE::JniUtil::copyStrings( - env, jfile_path_list, jfile_path_list_len, &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - return; - } - - auto* db = reinterpret_cast(jdb_handle); - auto* column_family = - reinterpret_cast(jcf_handle); - auto* ifo = reinterpret_cast( - jingest_external_file_options_handle); - ROCKSDB_NAMESPACE::Status s = - db->IngestExternalFile(column_family, file_path_list, *ifo); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: verifyChecksum - * Signature: (J)V - */ -void Java_org_rocksdb_RocksDB_verifyChecksum(JNIEnv* env, jobject, - jlong jdb_handle) { - auto* db = reinterpret_cast(jdb_handle); - auto s = db->VerifyChecksum(); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getDefaultColumnFamily - * Signature: (J)J - */ -jlong Java_org_rocksdb_RocksDB_getDefaultColumnFamily(JNIEnv*, jobject, - jlong jdb_handle) { - auto* db_handle = reinterpret_cast(jdb_handle); - auto* cf_handle = db_handle->DefaultColumnFamily(); - return GET_CPLUSPLUS_POINTER(cf_handle); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getPropertiesOfAllTables - * Signature: (JJ)Ljava/util/Map; - */ -jobject Java_org_rocksdb_RocksDB_getPropertiesOfAllTables(JNIEnv* env, jobject, - jlong jdb_handle, - jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - ROCKSDB_NAMESPACE::TablePropertiesCollection table_properties_collection; - auto s = - db->GetPropertiesOfAllTables(cf_handle, &table_properties_collection); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } - - // convert to Java type - jobject jhash_map = ROCKSDB_NAMESPACE::HashMapJni::construct( - env, static_cast(table_properties_collection.size())); - if (jhash_map == nullptr) { - // exception occurred - return nullptr; - } - - const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV< - const std::string, - const std::shared_ptr, jobject, - jobject> - fn_map_kv = - [env](const std::pair>& - kv) { - jstring jkey = ROCKSDB_NAMESPACE::JniUtil::toJavaString( - env, &(kv.first), false); - if (env->ExceptionCheck()) { - // an error occurred - return std::unique_ptr>(nullptr); - } - - jobject jtable_properties = - ROCKSDB_NAMESPACE::TablePropertiesJni::fromCppTableProperties( - env, *(kv.second.get())); - if (jtable_properties == nullptr) { - // an error occurred - env->DeleteLocalRef(jkey); - return std::unique_ptr>(nullptr); - } - - return std::unique_ptr>( - new std::pair( - static_cast(jkey), - static_cast(jtable_properties))); - }; - - if (!ROCKSDB_NAMESPACE::HashMapJni::putAll( - env, jhash_map, table_properties_collection.begin(), - table_properties_collection.end(), fn_map_kv)) { - // exception occurred - return nullptr; - } - - return jhash_map; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: getPropertiesOfTablesInRange - * Signature: (JJ[J)Ljava/util/Map; - */ -jobject Java_org_rocksdb_RocksDB_getPropertiesOfTablesInRange( - JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, - jlongArray jrange_slice_handles) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - const jsize jlen = env->GetArrayLength(jrange_slice_handles); - jlong* jrange_slice_handle = - env->GetLongArrayElements(jrange_slice_handles, nullptr); - if (jrange_slice_handle == nullptr) { - // exception occurred - return nullptr; - } - - const size_t ranges_len = static_cast(jlen / 2); - auto ranges = std::unique_ptr( - new ROCKSDB_NAMESPACE::Range[ranges_len]); - for (jsize i = 0, j = 0; i < jlen; ++i) { - auto* start = - reinterpret_cast(jrange_slice_handle[i]); - auto* limit = - reinterpret_cast(jrange_slice_handle[++i]); - ranges[j++] = ROCKSDB_NAMESPACE::Range(*start, *limit); - } - - ROCKSDB_NAMESPACE::TablePropertiesCollection table_properties_collection; - auto s = db->GetPropertiesOfTablesInRange(cf_handle, ranges.get(), ranges_len, - &table_properties_collection); - if (!s.ok()) { - // error occurred - env->ReleaseLongArrayElements(jrange_slice_handles, jrange_slice_handle, - JNI_ABORT); - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } - - // cleanup - env->ReleaseLongArrayElements(jrange_slice_handles, jrange_slice_handle, - JNI_ABORT); - - return jrange_slice_handles; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: suggestCompactRange - * Signature: (JJ)[J - */ -jlongArray Java_org_rocksdb_RocksDB_suggestCompactRange(JNIEnv* env, jobject, - jlong jdb_handle, - jlong jcf_handle) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - auto* begin = new ROCKSDB_NAMESPACE::Slice(); - auto* end = new ROCKSDB_NAMESPACE::Slice(); - auto s = db->SuggestCompactRange(cf_handle, begin, end); - if (!s.ok()) { - // error occurred - delete begin; - delete end; - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } - - jlongArray jslice_handles = env->NewLongArray(2); - if (jslice_handles == nullptr) { - // exception thrown: OutOfMemoryError - delete begin; - delete end; - return nullptr; - } - - jlong slice_handles[2]; - slice_handles[0] = GET_CPLUSPLUS_POINTER(begin); - slice_handles[1] = GET_CPLUSPLUS_POINTER(end); - env->SetLongArrayRegion(jslice_handles, 0, 2, slice_handles); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete begin; - delete end; - env->DeleteLocalRef(jslice_handles); - return nullptr; - } - - return jslice_handles; -} - -/* - * Class: org_rocksdb_RocksDB - * Method: promoteL0 - * Signature: (JJI)V - */ -void Java_org_rocksdb_RocksDB_promoteL0(JNIEnv*, jobject, jlong jdb_handle, - jlong jcf_handle, jint jtarget_level) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; - if (jcf_handle == 0) { - cf_handle = db->DefaultColumnFamily(); - } else { - cf_handle = - reinterpret_cast(jcf_handle); - } - db->PromoteL0(cf_handle, static_cast(jtarget_level)); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: startTrace - * Signature: (JJJ)V - */ -void Java_org_rocksdb_RocksDB_startTrace( - JNIEnv* env, jobject, jlong jdb_handle, jlong jmax_trace_file_size, - jlong jtrace_writer_jnicallback_handle) { - auto* db = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::TraceOptions trace_options; - trace_options.max_trace_file_size = - static_cast(jmax_trace_file_size); - // transfer ownership of trace writer from Java to C++ - auto trace_writer = - std::unique_ptr( - reinterpret_cast( - jtrace_writer_jnicallback_handle)); - auto s = db->StartTrace(trace_options, std::move(trace_writer)); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: endTrace - * Signature: (J)V - */ -void Java_org_rocksdb_RocksDB_endTrace(JNIEnv* env, jobject, jlong jdb_handle) { - auto* db = reinterpret_cast(jdb_handle); - auto s = db->EndTrace(); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: tryCatchUpWithPrimary - * Signature: (J)V - */ -void Java_org_rocksdb_RocksDB_tryCatchUpWithPrimary(JNIEnv* env, jobject, - jlong jdb_handle) { - auto* db = reinterpret_cast(jdb_handle); - auto s = db->TryCatchUpWithPrimary(); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: destroyDB - * Signature: (Ljava/lang/String;J)V - */ -void Java_org_rocksdb_RocksDB_destroyDB(JNIEnv* env, jclass, jstring jdb_path, - jlong joptions_handle) { - const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); - if (db_path == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - auto* options = - reinterpret_cast(joptions_handle); - if (options == nullptr) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument("Invalid Options.")); - } - - ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::DestroyDB(db_path, *options); - env->ReleaseStringUTFChars(jdb_path, db_path); - - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -bool get_slice_helper(JNIEnv* env, jobjectArray ranges, jsize index, - std::unique_ptr& slice, - std::vector>& ranges_to_free) { - jobject jArray = env->GetObjectArrayElement(ranges, index); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - return false; - } - - if (jArray == nullptr) { - return true; - } - - jbyteArray jba = reinterpret_cast(jArray); - jsize len_ba = env->GetArrayLength(jba); - ranges_to_free.push_back(std::unique_ptr(new jbyte[len_ba])); - env->GetByteArrayRegion(jba, 0, len_ba, ranges_to_free.back().get()); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jArray); - return false; - } - env->DeleteLocalRef(jArray); - slice.reset(new ROCKSDB_NAMESPACE::Slice( - reinterpret_cast(ranges_to_free.back().get()), len_ba)); - return true; -} -/* - * Class: org_rocksdb_RocksDB - * Method: deleteFilesInRanges - * Signature: (JJLjava/util/List;Z)V - */ -void Java_org_rocksdb_RocksDB_deleteFilesInRanges(JNIEnv* env, jobject /*jdb*/, - jlong jdb_handle, - jlong jcf_handle, - jobjectArray ranges, - jboolean include_end) { - jsize length = env->GetArrayLength(ranges); - - std::vector rangesVector; - std::vector> slices; - std::vector> ranges_to_free; - for (jsize i = 0; (i + 1) < length; i += 2) { - slices.push_back(std::unique_ptr()); - if (!get_slice_helper(env, ranges, i, slices.back(), ranges_to_free)) { - // exception thrown - return; - } - - slices.push_back(std::unique_ptr()); - if (!get_slice_helper(env, ranges, i + 1, slices.back(), ranges_to_free)) { - // exception thrown - return; - } - - rangesVector.push_back(ROCKSDB_NAMESPACE::RangePtr( - slices[slices.size() - 2].get(), slices[slices.size() - 1].get())); - } - - auto* db = reinterpret_cast(jdb_handle); - auto* column_family = - reinterpret_cast(jcf_handle); - - ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::DeleteFilesInRanges( - db, column_family == nullptr ? db->DefaultColumnFamily() : column_family, - rangesVector.data(), rangesVector.size(), include_end); - - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_RocksDB - * Method: version - * Signature: ()I - */ -jint Java_org_rocksdb_RocksDB_version(JNIEnv*, jclass) { - uint32_t encodedVersion = (ROCKSDB_MAJOR & 0xff) << 16; - encodedVersion |= (ROCKSDB_MINOR & 0xff) << 8; - encodedVersion |= (ROCKSDB_PATCH & 0xff); - return static_cast(encodedVersion); -} diff --git a/java/rocksjni/slice.cc b/java/rocksjni/slice.cc deleted file mode 100644 index 63c6b1b9f..000000000 --- a/java/rocksjni/slice.cc +++ /dev/null @@ -1,374 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::Slice. - -#include "rocksdb/slice.h" - -#include -#include -#include - -#include - -#include "include/org_rocksdb_AbstractSlice.h" -#include "include/org_rocksdb_DirectSlice.h" -#include "include/org_rocksdb_Slice.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -// - -/* - * Class: org_rocksdb_Slice - * Method: createNewSlice0 - * Signature: ([BI)J - */ -jlong Java_org_rocksdb_Slice_createNewSlice0(JNIEnv* env, jclass /*jcls*/, - jbyteArray data, jint offset) { - const jsize dataSize = env->GetArrayLength(data); - const int len = dataSize - offset; - - // NOTE: buf will be deleted in the Java_org_rocksdb_Slice_disposeInternalBuf - // method - jbyte* buf = new jbyte[len]; - env->GetByteArrayRegion(data, offset, len, buf); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - return 0; - } - - const auto* slice = new ROCKSDB_NAMESPACE::Slice((const char*)buf, len); - return GET_CPLUSPLUS_POINTER(slice); -} - -/* - * Class: org_rocksdb_Slice - * Method: createNewSlice1 - * Signature: ([B)J - */ -jlong Java_org_rocksdb_Slice_createNewSlice1(JNIEnv* env, jclass /*jcls*/, - jbyteArray data) { - jbyte* ptrData = env->GetByteArrayElements(data, nullptr); - if (ptrData == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - const int len = env->GetArrayLength(data) + 1; - - // NOTE: buf will be deleted in the Java_org_rocksdb_Slice_disposeInternalBuf - // method - char* buf = new char[len]; - memcpy(buf, ptrData, len - 1); - buf[len - 1] = '\0'; - - const auto* slice = new ROCKSDB_NAMESPACE::Slice(buf, len - 1); - - env->ReleaseByteArrayElements(data, ptrData, JNI_ABORT); - - return GET_CPLUSPLUS_POINTER(slice); -} - -/* - * Class: org_rocksdb_Slice - * Method: data0 - * Signature: (J)[B - */ -jbyteArray Java_org_rocksdb_Slice_data0(JNIEnv* env, jobject /*jobj*/, - jlong handle) { - const auto* slice = reinterpret_cast(handle); - const jsize len = static_cast(slice->size()); - const jbyteArray data = env->NewByteArray(len); - if (data == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - env->SetByteArrayRegion( - data, 0, len, - const_cast(reinterpret_cast(slice->data()))); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(data); - return nullptr; - } - - return data; -} - -/* - * Class: org_rocksdb_Slice - * Method: clear0 - * Signature: (JZJ)V - */ -void Java_org_rocksdb_Slice_clear0(JNIEnv* /*env*/, jobject /*jobj*/, - jlong handle, jboolean shouldRelease, - jlong internalBufferOffset) { - auto* slice = reinterpret_cast(handle); - if (shouldRelease == JNI_TRUE) { - const char* buf = slice->data_ - internalBufferOffset; - delete[] buf; - } - slice->clear(); -} - -/* - * Class: org_rocksdb_Slice - * Method: removePrefix0 - * Signature: (JI)V - */ -void Java_org_rocksdb_Slice_removePrefix0(JNIEnv* /*env*/, jobject /*jobj*/, - jlong handle, jint length) { - auto* slice = reinterpret_cast(handle); - slice->remove_prefix(length); -} - -/* - * Class: org_rocksdb_DirectSlice - * Method: setLength0 - * Signature: (JI)V - */ -void Java_org_rocksdb_DirectSlice_setLength0(JNIEnv* /*env*/, jobject /*jobj*/, - jlong handle, jint length) { - auto* slice = reinterpret_cast(handle); - slice->size_ = length; -} - -/* - * Class: org_rocksdb_Slice - * Method: disposeInternalBuf - * Signature: (JJ)V - */ -void Java_org_rocksdb_Slice_disposeInternalBuf(JNIEnv* /*env*/, - jobject /*jobj*/, jlong handle, - jlong internalBufferOffset) { - const auto* slice = reinterpret_cast(handle); - const char* buf = slice->data_ - internalBufferOffset; - delete[] buf; -} - -// - -// (data_addr); - const auto* slice = new ROCKSDB_NAMESPACE::Slice(ptrData, length); - return GET_CPLUSPLUS_POINTER(slice); -} - -/* - * Class: org_rocksdb_DirectSlice - * Method: createNewDirectSlice1 - * Signature: (Ljava/nio/ByteBuffer;)J - */ -jlong Java_org_rocksdb_DirectSlice_createNewDirectSlice1(JNIEnv* env, - jclass /*jcls*/, - jobject data) { - void* data_addr = env->GetDirectBufferAddress(data); - if (data_addr == nullptr) { - // error: memory region is undefined, given object is not a direct - // java.nio.Buffer, or JNI access to direct buffers is not supported by JVM - ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew( - env, ROCKSDB_NAMESPACE::Status::InvalidArgument( - "Could not access DirectBuffer")); - return 0; - } - - const auto* ptrData = reinterpret_cast(data_addr); - const auto* slice = new ROCKSDB_NAMESPACE::Slice(ptrData); - return GET_CPLUSPLUS_POINTER(slice); -} - -/* - * Class: org_rocksdb_DirectSlice - * Method: data0 - * Signature: (J)Ljava/lang/Object; - */ -jobject Java_org_rocksdb_DirectSlice_data0(JNIEnv* env, jobject /*jobj*/, - jlong handle) { - const auto* slice = reinterpret_cast(handle); - return env->NewDirectByteBuffer(const_cast(slice->data()), - slice->size()); -} - -/* - * Class: org_rocksdb_DirectSlice - * Method: get0 - * Signature: (JI)B - */ -jbyte Java_org_rocksdb_DirectSlice_get0(JNIEnv* /*env*/, jobject /*jobj*/, - jlong handle, jint offset) { - const auto* slice = reinterpret_cast(handle); - return (*slice)[offset]; -} - -/* - * Class: org_rocksdb_DirectSlice - * Method: clear0 - * Signature: (JZJ)V - */ -void Java_org_rocksdb_DirectSlice_clear0(JNIEnv* /*env*/, jobject /*jobj*/, - jlong handle, jboolean shouldRelease, - jlong internalBufferOffset) { - auto* slice = reinterpret_cast(handle); - if (shouldRelease == JNI_TRUE) { - const char* buf = slice->data_ - internalBufferOffset; - delete[] buf; - } - slice->clear(); -} - -/* - * Class: org_rocksdb_DirectSlice - * Method: removePrefix0 - * Signature: (JI)V - */ -void Java_org_rocksdb_DirectSlice_removePrefix0(JNIEnv* /*env*/, - jobject /*jobj*/, jlong handle, - jint length) { - auto* slice = reinterpret_cast(handle); - slice->remove_prefix(length); -} - -/* - * Class: org_rocksdb_DirectSlice - * Method: disposeInternalBuf - * Signature: (JJ)V - */ -void Java_org_rocksdb_DirectSlice_disposeInternalBuf( - JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, - jlong internalBufferOffset) { - const auto* slice = reinterpret_cast(handle); - const char* buf = slice->data_ - internalBufferOffset; - delete[] buf; -} - -// diff --git a/java/rocksjni/snapshot.cc b/java/rocksjni/snapshot.cc deleted file mode 100644 index 2a1265a58..000000000 --- a/java/rocksjni/snapshot.cc +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++. - -#include -#include -#include - -#include "include/org_rocksdb_Snapshot.h" -#include "rocksdb/db.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_Snapshot - * Method: getSequenceNumber - * Signature: (J)J - */ -jlong Java_org_rocksdb_Snapshot_getSequenceNumber(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jsnapshot_handle) { - auto* snapshot = - reinterpret_cast(jsnapshot_handle); - return snapshot->GetSequenceNumber(); -} diff --git a/java/rocksjni/sst_file_manager.cc b/java/rocksjni/sst_file_manager.cc deleted file mode 100644 index c51436819..000000000 --- a/java/rocksjni/sst_file_manager.cc +++ /dev/null @@ -1,250 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling C++ ROCKSDB_NAMESPACE::SstFileManager methods -// from Java side. - -#include "rocksdb/sst_file_manager.h" - -#include - -#include - -#include "include/org_rocksdb_SstFileManager.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_SstFileManager - * Method: newSstFileManager - * Signature: (JJJDJ)J - */ -jlong Java_org_rocksdb_SstFileManager_newSstFileManager( - JNIEnv* jnienv, jclass /*jcls*/, jlong jenv_handle, jlong jlogger_handle, - jlong jrate_bytes, jdouble jmax_trash_db_ratio, - jlong jmax_delete_chunk_bytes) { - auto* env = reinterpret_cast(jenv_handle); - ROCKSDB_NAMESPACE::Status s; - ROCKSDB_NAMESPACE::SstFileManager* sst_file_manager = nullptr; - - if (jlogger_handle != 0) { - auto* sptr_logger = - reinterpret_cast*>( - jlogger_handle); - sst_file_manager = ROCKSDB_NAMESPACE::NewSstFileManager( - env, *sptr_logger, "", jrate_bytes, true, &s, jmax_trash_db_ratio, - jmax_delete_chunk_bytes); - } else { - sst_file_manager = ROCKSDB_NAMESPACE::NewSstFileManager( - env, nullptr, "", jrate_bytes, true, &s, jmax_trash_db_ratio, - jmax_delete_chunk_bytes); - } - - if (!s.ok()) { - if (sst_file_manager != nullptr) { - delete sst_file_manager; - } - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(jnienv, s); - } - auto* sptr_sst_file_manager = - new std::shared_ptr(sst_file_manager); - - return GET_CPLUSPLUS_POINTER(sptr_sst_file_manager); -} - -/* - * Class: org_rocksdb_SstFileManager - * Method: setMaxAllowedSpaceUsage - * Signature: (JJ)V - */ -void Java_org_rocksdb_SstFileManager_setMaxAllowedSpaceUsage( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jmax_allowed_space) { - auto* sptr_sst_file_manager = - reinterpret_cast*>( - jhandle); - sptr_sst_file_manager->get()->SetMaxAllowedSpaceUsage(jmax_allowed_space); -} - -/* - * Class: org_rocksdb_SstFileManager - * Method: setCompactionBufferSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_SstFileManager_setCompactionBufferSize( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jcompaction_buffer_size) { - auto* sptr_sst_file_manager = - reinterpret_cast*>( - jhandle); - sptr_sst_file_manager->get()->SetCompactionBufferSize( - jcompaction_buffer_size); -} - -/* - * Class: org_rocksdb_SstFileManager - * Method: isMaxAllowedSpaceReached - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_SstFileManager_isMaxAllowedSpaceReached( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - auto* sptr_sst_file_manager = - reinterpret_cast*>( - jhandle); - return sptr_sst_file_manager->get()->IsMaxAllowedSpaceReached(); -} - -/* - * Class: org_rocksdb_SstFileManager - * Method: isMaxAllowedSpaceReachedIncludingCompactions - * Signature: (J)Z - */ -jboolean -Java_org_rocksdb_SstFileManager_isMaxAllowedSpaceReachedIncludingCompactions( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - auto* sptr_sst_file_manager = - reinterpret_cast*>( - jhandle); - return sptr_sst_file_manager->get() - ->IsMaxAllowedSpaceReachedIncludingCompactions(); -} - -/* - * Class: org_rocksdb_SstFileManager - * Method: getTotalSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_SstFileManager_getTotalSize(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* sptr_sst_file_manager = - reinterpret_cast*>( - jhandle); - return sptr_sst_file_manager->get()->GetTotalSize(); -} - -/* - * Class: org_rocksdb_SstFileManager - * Method: getTrackedFiles - * Signature: (J)Ljava/util/Map; - */ -jobject Java_org_rocksdb_SstFileManager_getTrackedFiles(JNIEnv* env, - jobject /*jobj*/, - jlong jhandle) { - auto* sptr_sst_file_manager = - reinterpret_cast*>( - jhandle); - auto tracked_files = sptr_sst_file_manager->get()->GetTrackedFiles(); - - // TODO(AR) could refactor to share code with - // ROCKSDB_NAMESPACE::HashMapJni::fromCppMap(env, tracked_files); - - const jobject jtracked_files = ROCKSDB_NAMESPACE::HashMapJni::construct( - env, static_cast(tracked_files.size())); - if (jtracked_files == nullptr) { - // exception occurred - return nullptr; - } - - const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV - fn_map_kv = - [env](const std::pair& pair) { - const jstring jtracked_file_path = - env->NewStringUTF(pair.first.c_str()); - if (jtracked_file_path == nullptr) { - // an error occurred - return std::unique_ptr>(nullptr); - } - const jobject jtracked_file_size = - ROCKSDB_NAMESPACE::LongJni::valueOf(env, pair.second); - if (jtracked_file_size == nullptr) { - // an error occurred - return std::unique_ptr>(nullptr); - } - return std::unique_ptr>( - new std::pair(jtracked_file_path, - jtracked_file_size)); - }; - - if (!ROCKSDB_NAMESPACE::HashMapJni::putAll(env, jtracked_files, - tracked_files.begin(), - tracked_files.end(), fn_map_kv)) { - // exception occcurred - return nullptr; - } - - return jtracked_files; -} - -/* - * Class: org_rocksdb_SstFileManager - * Method: getDeleteRateBytesPerSecond - * Signature: (J)J - */ -jlong Java_org_rocksdb_SstFileManager_getDeleteRateBytesPerSecond( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - auto* sptr_sst_file_manager = - reinterpret_cast*>( - jhandle); - return sptr_sst_file_manager->get()->GetDeleteRateBytesPerSecond(); -} - -/* - * Class: org_rocksdb_SstFileManager - * Method: setDeleteRateBytesPerSecond - * Signature: (JJ)V - */ -void Java_org_rocksdb_SstFileManager_setDeleteRateBytesPerSecond( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jdelete_rate) { - auto* sptr_sst_file_manager = - reinterpret_cast*>( - jhandle); - sptr_sst_file_manager->get()->SetDeleteRateBytesPerSecond(jdelete_rate); -} - -/* - * Class: org_rocksdb_SstFileManager - * Method: getMaxTrashDBRatio - * Signature: (J)D - */ -jdouble Java_org_rocksdb_SstFileManager_getMaxTrashDBRatio(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* sptr_sst_file_manager = - reinterpret_cast*>( - jhandle); - return sptr_sst_file_manager->get()->GetMaxTrashDBRatio(); -} - -/* - * Class: org_rocksdb_SstFileManager - * Method: setMaxTrashDBRatio - * Signature: (JD)V - */ -void Java_org_rocksdb_SstFileManager_setMaxTrashDBRatio(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jdouble jratio) { - auto* sptr_sst_file_manager = - reinterpret_cast*>( - jhandle); - sptr_sst_file_manager->get()->SetMaxTrashDBRatio(jratio); -} - -/* - * Class: org_rocksdb_SstFileManager - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_SstFileManager_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* sptr_sst_file_manager = - reinterpret_cast*>( - jhandle); - delete sptr_sst_file_manager; -} diff --git a/java/rocksjni/sst_file_reader_iterator.cc b/java/rocksjni/sst_file_reader_iterator.cc deleted file mode 100644 index 68fa4c37c..000000000 --- a/java/rocksjni/sst_file_reader_iterator.cc +++ /dev/null @@ -1,373 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling c++ ROCKSDB_NAMESPACE::Iterator methods from Java side. - -#include -#include -#include - -#include "include/org_rocksdb_SstFileReaderIterator.h" -#include "rocksdb/iterator.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_SstFileReaderIterator_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - auto* it = reinterpret_cast(handle); - assert(it != nullptr); - delete it; -} - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: isValid0 - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_SstFileReaderIterator_isValid0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - return reinterpret_cast(handle)->Valid(); -} - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: seekToFirst0 - * Signature: (J)V - */ -void Java_org_rocksdb_SstFileReaderIterator_seekToFirst0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - reinterpret_cast(handle)->SeekToFirst(); -} - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: seekToLast0 - * Signature: (J)V - */ -void Java_org_rocksdb_SstFileReaderIterator_seekToLast0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - reinterpret_cast(handle)->SeekToLast(); -} - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: next0 - * Signature: (J)V - */ -void Java_org_rocksdb_SstFileReaderIterator_next0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - reinterpret_cast(handle)->Next(); -} - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: prev0 - * Signature: (J)V - */ -void Java_org_rocksdb_SstFileReaderIterator_prev0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - reinterpret_cast(handle)->Prev(); -} - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: seek0 - * Signature: (J[BI)V - */ -void Java_org_rocksdb_SstFileReaderIterator_seek0(JNIEnv* env, jobject /*jobj*/, - jlong handle, - jbyteArray jtarget, - jint jtarget_len) { - jbyte* target = env->GetByteArrayElements(jtarget, nullptr); - if (target == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - ROCKSDB_NAMESPACE::Slice target_slice(reinterpret_cast(target), - jtarget_len); - - auto* it = reinterpret_cast(handle); - it->Seek(target_slice); - - env->ReleaseByteArrayElements(jtarget, target, JNI_ABORT); -} - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: seekForPrev0 - * Signature: (J[BI)V - */ -void Java_org_rocksdb_SstFileReaderIterator_seekForPrev0(JNIEnv* env, - jobject /*jobj*/, - jlong handle, - jbyteArray jtarget, - jint jtarget_len) { - jbyte* target = env->GetByteArrayElements(jtarget, nullptr); - if (target == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - ROCKSDB_NAMESPACE::Slice target_slice(reinterpret_cast(target), - jtarget_len); - - auto* it = reinterpret_cast(handle); - it->SeekForPrev(target_slice); - - env->ReleaseByteArrayElements(jtarget, target, JNI_ABORT); -} - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: status0 - * Signature: (J)V - */ -void Java_org_rocksdb_SstFileReaderIterator_status0(JNIEnv* env, - jobject /*jobj*/, - jlong handle) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Status s = it->status(); - - if (s.ok()) { - return; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: key0 - * Signature: (J)[B - */ -jbyteArray Java_org_rocksdb_SstFileReaderIterator_key0(JNIEnv* env, - jobject /*jobj*/, - jlong handle) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Slice key_slice = it->key(); - - jbyteArray jkey = env->NewByteArray(static_cast(key_slice.size())); - if (jkey == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - env->SetByteArrayRegion( - jkey, 0, static_cast(key_slice.size()), - const_cast(reinterpret_cast(key_slice.data()))); - return jkey; -} - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: value0 - * Signature: (J)[B - */ -jbyteArray Java_org_rocksdb_SstFileReaderIterator_value0(JNIEnv* env, - jobject /*jobj*/, - jlong handle) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Slice value_slice = it->value(); - - jbyteArray jkeyValue = - env->NewByteArray(static_cast(value_slice.size())); - if (jkeyValue == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - env->SetByteArrayRegion( - jkeyValue, 0, static_cast(value_slice.size()), - const_cast(reinterpret_cast(value_slice.data()))); - return jkeyValue; -} - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: keyDirect0 - * Signature: (JLjava/nio/ByteBuffer;II)I - */ -jint Java_org_rocksdb_SstFileReaderIterator_keyDirect0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, - jint jtarget_off, jint jtarget_len) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Slice key_slice = it->key(); - return ROCKSDB_NAMESPACE::JniUtil::copyToDirect(env, key_slice, jtarget, - jtarget_off, jtarget_len); -} - -/* - * This method supports fetching into indirect byte buffers; - * the Java wrapper extracts the byte[] and passes it here. - * - * Class: org_rocksdb_SstFileReaderIterator - * Method: keyByteArray0 - * Signature: (J[BII)I - */ -jint Java_org_rocksdb_SstFileReaderIterator_keyByteArray0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jkey, jint jkey_off, - jint jkey_len) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Slice key_slice = it->key(); - auto slice_size = key_slice.size(); - jsize copy_size = std::min(static_cast(slice_size), - static_cast(jkey_len)); - env->SetByteArrayRegion( - jkey, jkey_off, copy_size, - const_cast(reinterpret_cast(key_slice.data()))); - - return static_cast(slice_size); -} - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: valueDirect0 - * Signature: (JLjava/nio/ByteBuffer;II)I - */ -jint Java_org_rocksdb_SstFileReaderIterator_valueDirect0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, - jint jtarget_off, jint jtarget_len) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Slice value_slice = it->value(); - return ROCKSDB_NAMESPACE::JniUtil::copyToDirect(env, value_slice, jtarget, - jtarget_off, jtarget_len); -} - -/* - * This method supports fetching into indirect byte buffers; - * the Java wrapper extracts the byte[] and passes it here. - * - * Class: org_rocksdb_SstFileReaderIterator - * Method: valueByteArray0 - * Signature: (J[BII)I - */ -jint Java_org_rocksdb_SstFileReaderIterator_valueByteArray0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jvalue_target, - jint jvalue_off, jint jvalue_len) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Slice value_slice = it->value(); - auto slice_size = value_slice.size(); - jsize copy_size = std::min(static_cast(slice_size), - static_cast(jvalue_len)); - env->SetByteArrayRegion( - jvalue_target, jvalue_off, copy_size, - const_cast(reinterpret_cast(value_slice.data()))); - - return static_cast(slice_size); -} - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: seekDirect0 - * Signature: (JLjava/nio/ByteBuffer;II)V - */ -void Java_org_rocksdb_SstFileReaderIterator_seekDirect0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, - jint jtarget_off, jint jtarget_len) { - auto* it = reinterpret_cast(handle); - auto seek = [&it](ROCKSDB_NAMESPACE::Slice& target_slice) { - it->Seek(target_slice); - }; - ROCKSDB_NAMESPACE::JniUtil::k_op_direct(seek, env, jtarget, jtarget_off, - jtarget_len); -} - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: seekForPrevDirect0 - * Signature: (JLjava/nio/ByteBuffer;II)V - */ -void Java_org_rocksdb_SstFileReaderIterator_seekForPrevDirect0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, - jint jtarget_off, jint jtarget_len) { - auto* it = reinterpret_cast(handle); - auto seekPrev = [&it](ROCKSDB_NAMESPACE::Slice& target_slice) { - it->SeekForPrev(target_slice); - }; - ROCKSDB_NAMESPACE::JniUtil::k_op_direct(seekPrev, env, jtarget, jtarget_off, - jtarget_len); -} - -/* - * This method supports fetching into indirect byte buffers; - * the Java wrapper extracts the byte[] and passes it here. - * - * Class: org_rocksdb_SstFileReaderIterator - * Method: seekByteArray0 - * Signature: (J[BII)V - */ -void Java_org_rocksdb_SstFileReaderIterator_seekByteArray0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, - jint jtarget_off, jint jtarget_len) { - const std::unique_ptr target(new char[jtarget_len]); - if (target == nullptr) { - jclass oom_class = env->FindClass("/lang/java/OutOfMemoryError"); - env->ThrowNew(oom_class, - "Memory allocation failed in RocksDB JNI function"); - return; - } - env->GetByteArrayRegion(jtarget, jtarget_off, jtarget_len, - reinterpret_cast(target.get())); - - ROCKSDB_NAMESPACE::Slice target_slice(target.get(), jtarget_len); - - auto* it = reinterpret_cast(handle); - it->Seek(target_slice); -} - -/* - * This method supports fetching into indirect byte buffers; - * the Java wrapper extracts the byte[] and passes it here. - * - * Class: org_rocksdb_SstFileReaderIterator - * Method: seekForPrevByteArray0 - * Signature: (J[BII)V - */ -void Java_org_rocksdb_SstFileReaderIterator_seekForPrevByteArray0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, - jint jtarget_off, jint jtarget_len) { - const std::unique_ptr target(new char[jtarget_len]); - if (target == nullptr) { - jclass oom_class = env->FindClass("/lang/java/OutOfMemoryError"); - env->ThrowNew(oom_class, - "Memory allocation failed in RocksDB JNI function"); - return; - } - env->GetByteArrayRegion(jtarget, jtarget_off, jtarget_len, - reinterpret_cast(target.get())); - - ROCKSDB_NAMESPACE::Slice target_slice(target.get(), jtarget_len); - - auto* it = reinterpret_cast(handle); - it->SeekForPrev(target_slice); -} - -/* - * Class: org_rocksdb_SstFileReaderIterator - * Method: refresh0 - * Signature: (J)V - */ -void Java_org_rocksdb_SstFileReaderIterator_refresh0(JNIEnv* env, - jobject /*jobj*/, - jlong handle) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Status s = it->Refresh(); - - if (s.ok()) { - return; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} diff --git a/java/rocksjni/sst_file_readerjni.cc b/java/rocksjni/sst_file_readerjni.cc deleted file mode 100644 index 7ef711842..000000000 --- a/java/rocksjni/sst_file_readerjni.cc +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling C++ ROCKSDB_NAMESPACE::SstFileReader methods -// from Java side. - -#include - -#include - -#include "include/org_rocksdb_SstFileReader.h" -#include "rocksdb/comparator.h" -#include "rocksdb/env.h" -#include "rocksdb/options.h" -#include "rocksdb/sst_file_reader.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_SstFileReader - * Method: newSstFileReader - * Signature: (J)J - */ -jlong Java_org_rocksdb_SstFileReader_newSstFileReader(JNIEnv * /*env*/, - jclass /*jcls*/, - jlong joptions) { - auto *options = - reinterpret_cast(joptions); - ROCKSDB_NAMESPACE::SstFileReader *sst_file_reader = - new ROCKSDB_NAMESPACE::SstFileReader(*options); - return GET_CPLUSPLUS_POINTER(sst_file_reader); -} - -/* - * Class: org_rocksdb_SstFileReader - * Method: open - * Signature: (JLjava/lang/String;)V - */ -void Java_org_rocksdb_SstFileReader_open(JNIEnv *env, jobject /*jobj*/, - jlong jhandle, jstring jfile_path) { - const char *file_path = env->GetStringUTFChars(jfile_path, nullptr); - if (file_path == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - ROCKSDB_NAMESPACE::Status s = - reinterpret_cast(jhandle)->Open( - file_path); - env->ReleaseStringUTFChars(jfile_path, file_path); - - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_SstFileReader - * Method: newIterator - * Signature: (JJ)J - */ -jlong Java_org_rocksdb_SstFileReader_newIterator(JNIEnv * /*env*/, - jobject /*jobj*/, - jlong jhandle, - jlong jread_options_handle) { - auto *sst_file_reader = - reinterpret_cast(jhandle); - auto *read_options = - reinterpret_cast(jread_options_handle); - return GET_CPLUSPLUS_POINTER(sst_file_reader->NewIterator(*read_options)); -} - -/* - * Class: org_rocksdb_SstFileReader - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_SstFileReader_disposeInternal(JNIEnv * /*env*/, - jobject /*jobj*/, - jlong jhandle) { - delete reinterpret_cast(jhandle); -} - -/* - * Class: org_rocksdb_SstFileReader - * Method: verifyChecksum - * Signature: (J)V - */ -void Java_org_rocksdb_SstFileReader_verifyChecksum(JNIEnv *env, - jobject /*jobj*/, - jlong jhandle) { - auto *sst_file_reader = - reinterpret_cast(jhandle); - auto s = sst_file_reader->VerifyChecksum(); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_SstFileReader - * Method: getTableProperties - * Signature: (J)J - */ -jobject Java_org_rocksdb_SstFileReader_getTableProperties(JNIEnv *env, - jobject /*jobj*/, - jlong jhandle) { - auto *sst_file_reader = - reinterpret_cast(jhandle); - std::shared_ptr tp = - sst_file_reader->GetTableProperties(); - jobject jtable_properties = - ROCKSDB_NAMESPACE::TablePropertiesJni::fromCppTableProperties( - env, *(tp.get())); - return jtable_properties; -} diff --git a/java/rocksjni/sst_file_writerjni.cc b/java/rocksjni/sst_file_writerjni.cc deleted file mode 100644 index 1898c3cfc..000000000 --- a/java/rocksjni/sst_file_writerjni.cc +++ /dev/null @@ -1,310 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling C++ ROCKSDB_NAMESPACE::SstFileWriter methods -// from Java side. - -#include - -#include - -#include "include/org_rocksdb_SstFileWriter.h" -#include "rocksdb/comparator.h" -#include "rocksdb/env.h" -#include "rocksdb/options.h" -#include "rocksdb/sst_file_writer.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_SstFileWriter - * Method: newSstFileWriter - * Signature: (JJJB)J - */ -jlong Java_org_rocksdb_SstFileWriter_newSstFileWriter__JJJB( - JNIEnv * /*env*/, jclass /*jcls*/, jlong jenvoptions, jlong joptions, - jlong jcomparator_handle, jbyte jcomparator_type) { - ROCKSDB_NAMESPACE::Comparator *comparator = nullptr; - switch (jcomparator_type) { - // JAVA_COMPARATOR - case 0x0: - comparator = reinterpret_cast( - jcomparator_handle); - break; - - // JAVA_NATIVE_COMPARATOR_WRAPPER - case 0x1: - comparator = - reinterpret_cast(jcomparator_handle); - break; - } - auto *env_options = - reinterpret_cast(jenvoptions); - auto *options = - reinterpret_cast(joptions); - ROCKSDB_NAMESPACE::SstFileWriter *sst_file_writer = - new ROCKSDB_NAMESPACE::SstFileWriter(*env_options, *options, comparator); - return GET_CPLUSPLUS_POINTER(sst_file_writer); -} - -/* - * Class: org_rocksdb_SstFileWriter - * Method: newSstFileWriter - * Signature: (JJ)J - */ -jlong Java_org_rocksdb_SstFileWriter_newSstFileWriter__JJ(JNIEnv * /*env*/, - jclass /*jcls*/, - jlong jenvoptions, - jlong joptions) { - auto *env_options = - reinterpret_cast(jenvoptions); - auto *options = - reinterpret_cast(joptions); - ROCKSDB_NAMESPACE::SstFileWriter *sst_file_writer = - new ROCKSDB_NAMESPACE::SstFileWriter(*env_options, *options); - return GET_CPLUSPLUS_POINTER(sst_file_writer); -} - -/* - * Class: org_rocksdb_SstFileWriter - * Method: open - * Signature: (JLjava/lang/String;)V - */ -void Java_org_rocksdb_SstFileWriter_open(JNIEnv *env, jobject /*jobj*/, - jlong jhandle, jstring jfile_path) { - const char *file_path = env->GetStringUTFChars(jfile_path, nullptr); - if (file_path == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - ROCKSDB_NAMESPACE::Status s = - reinterpret_cast(jhandle)->Open( - file_path); - env->ReleaseStringUTFChars(jfile_path, file_path); - - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_SstFileWriter - * Method: put - * Signature: (JJJ)V - */ -void Java_org_rocksdb_SstFileWriter_put__JJJ(JNIEnv *env, jobject /*jobj*/, - jlong jhandle, jlong jkey_handle, - jlong jvalue_handle) { - auto *key_slice = reinterpret_cast(jkey_handle); - auto *value_slice = - reinterpret_cast(jvalue_handle); - ROCKSDB_NAMESPACE::Status s = - reinterpret_cast(jhandle)->Put( - *key_slice, *value_slice); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_SstFileWriter - * Method: put - * Signature: (JJJ)V - */ -void Java_org_rocksdb_SstFileWriter_put__J_3B_3B(JNIEnv *env, jobject /*jobj*/, - jlong jhandle, jbyteArray jkey, - jbyteArray jval) { - jbyte *key = env->GetByteArrayElements(jkey, nullptr); - if (key == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), - env->GetArrayLength(jkey)); - - jbyte *value = env->GetByteArrayElements(jval, nullptr); - if (value == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); - return; - } - ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast(value), - env->GetArrayLength(jval)); - - ROCKSDB_NAMESPACE::Status s = - reinterpret_cast(jhandle)->Put( - key_slice, value_slice); - - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); - env->ReleaseByteArrayElements(jval, value, JNI_ABORT); - - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_SstFileWriter - * Method: putDirect - * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;II)V - */ -void Java_org_rocksdb_SstFileWriter_putDirect(JNIEnv *env, jobject /*jdb*/, - jlong jdb_handle, jobject jkey, - jint jkey_off, jint jkey_len, - jobject jval, jint jval_off, - jint jval_len) { - auto *writer = - reinterpret_cast(jdb_handle); - auto put = [&env, &writer](ROCKSDB_NAMESPACE::Slice &key, - ROCKSDB_NAMESPACE::Slice &value) { - ROCKSDB_NAMESPACE::Status s = writer->Put(key, value); - if (s.ok()) { - return; - } - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - }; - ROCKSDB_NAMESPACE::JniUtil::kv_op_direct(put, env, jkey, jkey_off, jkey_len, - jval, jval_off, jval_len); -} - -/* - * Class: org_rocksdb_SstFileWriter - * Method: fileSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_SstFileWriter_fileSize(JNIEnv * /*env*/, jobject /*jdb*/, - jlong jdb_handle) { - auto *writer = - reinterpret_cast(jdb_handle); - return static_cast(writer->FileSize()); -} - -/* - * Class: org_rocksdb_SstFileWriter - * Method: merge - * Signature: (JJJ)V - */ -void Java_org_rocksdb_SstFileWriter_merge__JJJ(JNIEnv *env, jobject /*jobj*/, - jlong jhandle, jlong jkey_handle, - jlong jvalue_handle) { - auto *key_slice = reinterpret_cast(jkey_handle); - auto *value_slice = - reinterpret_cast(jvalue_handle); - ROCKSDB_NAMESPACE::Status s = - reinterpret_cast(jhandle)->Merge( - *key_slice, *value_slice); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_SstFileWriter - * Method: merge - * Signature: (J[B[B)V - */ -void Java_org_rocksdb_SstFileWriter_merge__J_3B_3B(JNIEnv *env, - jobject /*jobj*/, - jlong jhandle, - jbyteArray jkey, - jbyteArray jval) { - jbyte *key = env->GetByteArrayElements(jkey, nullptr); - if (key == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), - env->GetArrayLength(jkey)); - - jbyte *value = env->GetByteArrayElements(jval, nullptr); - if (value == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); - return; - } - ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast(value), - env->GetArrayLength(jval)); - - ROCKSDB_NAMESPACE::Status s = - reinterpret_cast(jhandle)->Merge( - key_slice, value_slice); - - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); - env->ReleaseByteArrayElements(jval, value, JNI_ABORT); - - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_SstFileWriter - * Method: delete - * Signature: (JJJ)V - */ -void Java_org_rocksdb_SstFileWriter_delete__J_3B(JNIEnv *env, jobject /*jobj*/, - jlong jhandle, - jbyteArray jkey) { - jbyte *key = env->GetByteArrayElements(jkey, nullptr); - if (key == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), - env->GetArrayLength(jkey)); - - ROCKSDB_NAMESPACE::Status s = - reinterpret_cast(jhandle)->Delete( - key_slice); - - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); - - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_SstFileWriter - * Method: delete - * Signature: (JJJ)V - */ -void Java_org_rocksdb_SstFileWriter_delete__JJ(JNIEnv *env, jobject /*jobj*/, - jlong jhandle, - jlong jkey_handle) { - auto *key_slice = reinterpret_cast(jkey_handle); - ROCKSDB_NAMESPACE::Status s = - reinterpret_cast(jhandle)->Delete( - *key_slice); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_SstFileWriter - * Method: finish - * Signature: (J)V - */ -void Java_org_rocksdb_SstFileWriter_finish(JNIEnv *env, jobject /*jobj*/, - jlong jhandle) { - ROCKSDB_NAMESPACE::Status s = - reinterpret_cast(jhandle)->Finish(); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_SstFileWriter - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_SstFileWriter_disposeInternal(JNIEnv * /*env*/, - jobject /*jobj*/, - jlong jhandle) { - delete reinterpret_cast(jhandle); -} diff --git a/java/rocksjni/sst_partitioner.cc b/java/rocksjni/sst_partitioner.cc deleted file mode 100644 index 1cea3b0cb..000000000 --- a/java/rocksjni/sst_partitioner.cc +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling C++ ROCKSDB_NAMESPACE::SstFileManager methods -// from Java side. - -#include "rocksdb/sst_partitioner.h" - -#include - -#include - -#include "include/org_rocksdb_SstPartitionerFixedPrefixFactory.h" -#include "rocksdb/sst_file_manager.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_SstPartitionerFixedPrefixFactory - * Method: newSstPartitionerFixedPrefixFactory0 - * Signature: (J)J - */ -jlong Java_org_rocksdb_SstPartitionerFixedPrefixFactory_newSstPartitionerFixedPrefixFactory0( - JNIEnv*, jclass, jlong prefix_len) { - auto* ptr = new std::shared_ptr( - ROCKSDB_NAMESPACE::NewSstPartitionerFixedPrefixFactory(prefix_len)); - return GET_CPLUSPLUS_POINTER(ptr); -} - -/* - * Class: org_rocksdb_SstPartitionerFixedPrefixFactory - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_SstPartitionerFixedPrefixFactory_disposeInternal( - JNIEnv*, jobject, jlong jhandle) { - auto* ptr = reinterpret_cast< - std::shared_ptr*>(jhandle); - delete ptr; // delete std::shared_ptr -} diff --git a/java/rocksjni/statistics.cc b/java/rocksjni/statistics.cc deleted file mode 100644 index bd405afa1..000000000 --- a/java/rocksjni/statistics.cc +++ /dev/null @@ -1,268 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling c++ ROCKSDB_NAMESPACE::Statistics methods from Java side. - -#include "rocksdb/statistics.h" - -#include - -#include -#include - -#include "include/org_rocksdb_Statistics.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" -#include "rocksjni/statisticsjni.h" - -/* - * Class: org_rocksdb_Statistics - * Method: newStatistics - * Signature: ()J - */ -jlong Java_org_rocksdb_Statistics_newStatistics__(JNIEnv* env, jclass jcls) { - return Java_org_rocksdb_Statistics_newStatistics___3BJ(env, jcls, nullptr, 0); -} - -/* - * Class: org_rocksdb_Statistics - * Method: newStatistics - * Signature: (J)J - */ -jlong Java_org_rocksdb_Statistics_newStatistics__J( - JNIEnv* env, jclass jcls, jlong jother_statistics_handle) { - return Java_org_rocksdb_Statistics_newStatistics___3BJ( - env, jcls, nullptr, jother_statistics_handle); -} - -/* - * Class: org_rocksdb_Statistics - * Method: newStatistics - * Signature: ([B)J - */ -jlong Java_org_rocksdb_Statistics_newStatistics___3B(JNIEnv* env, jclass jcls, - jbyteArray jhistograms) { - return Java_org_rocksdb_Statistics_newStatistics___3BJ(env, jcls, jhistograms, - 0); -} - -/* - * Class: org_rocksdb_Statistics - * Method: newStatistics - * Signature: ([BJ)J - */ -jlong Java_org_rocksdb_Statistics_newStatistics___3BJ( - JNIEnv* env, jclass, jbyteArray jhistograms, - jlong jother_statistics_handle) { - std::shared_ptr* pSptr_other_statistics = - nullptr; - if (jother_statistics_handle > 0) { - pSptr_other_statistics = - reinterpret_cast*>( - jother_statistics_handle); - } - - std::set histograms; - if (jhistograms != nullptr) { - const jsize len = env->GetArrayLength(jhistograms); - if (len > 0) { - jbyte* jhistogram = env->GetByteArrayElements(jhistograms, nullptr); - if (jhistogram == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - - for (jsize i = 0; i < len; i++) { - const ROCKSDB_NAMESPACE::Histograms histogram = - ROCKSDB_NAMESPACE::HistogramTypeJni::toCppHistograms(jhistogram[i]); - histograms.emplace(histogram); - } - - env->ReleaseByteArrayElements(jhistograms, jhistogram, JNI_ABORT); - } - } - - std::shared_ptr sptr_other_statistics = - nullptr; - if (pSptr_other_statistics != nullptr) { - sptr_other_statistics = *pSptr_other_statistics; - } - - auto* pSptr_statistics = - new std::shared_ptr( - new ROCKSDB_NAMESPACE::StatisticsJni(sptr_other_statistics, - histograms)); - - return GET_CPLUSPLUS_POINTER(pSptr_statistics); -} - -/* - * Class: org_rocksdb_Statistics - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_Statistics_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - if (jhandle > 0) { - auto* pSptr_statistics = - reinterpret_cast*>( - jhandle); - delete pSptr_statistics; - } -} - -/* - * Class: org_rocksdb_Statistics - * Method: statsLevel - * Signature: (J)B - */ -jbyte Java_org_rocksdb_Statistics_statsLevel(JNIEnv*, jobject, jlong jhandle) { - auto* pSptr_statistics = - reinterpret_cast*>( - jhandle); - assert(pSptr_statistics != nullptr); - return ROCKSDB_NAMESPACE::StatsLevelJni::toJavaStatsLevel( - pSptr_statistics->get()->get_stats_level()); -} - -/* - * Class: org_rocksdb_Statistics - * Method: setStatsLevel - * Signature: (JB)V - */ -void Java_org_rocksdb_Statistics_setStatsLevel(JNIEnv*, jobject, jlong jhandle, - jbyte jstats_level) { - auto* pSptr_statistics = - reinterpret_cast*>( - jhandle); - assert(pSptr_statistics != nullptr); - auto stats_level = - ROCKSDB_NAMESPACE::StatsLevelJni::toCppStatsLevel(jstats_level); - pSptr_statistics->get()->set_stats_level(stats_level); -} - -/* - * Class: org_rocksdb_Statistics - * Method: getTickerCount - * Signature: (JB)J - */ -jlong Java_org_rocksdb_Statistics_getTickerCount(JNIEnv*, jobject, - jlong jhandle, - jbyte jticker_type) { - auto* pSptr_statistics = - reinterpret_cast*>( - jhandle); - assert(pSptr_statistics != nullptr); - auto ticker = ROCKSDB_NAMESPACE::TickerTypeJni::toCppTickers(jticker_type); - uint64_t count = pSptr_statistics->get()->getTickerCount(ticker); - return static_cast(count); -} - -/* - * Class: org_rocksdb_Statistics - * Method: getAndResetTickerCount - * Signature: (JB)J - */ -jlong Java_org_rocksdb_Statistics_getAndResetTickerCount(JNIEnv*, jobject, - jlong jhandle, - jbyte jticker_type) { - auto* pSptr_statistics = - reinterpret_cast*>( - jhandle); - assert(pSptr_statistics != nullptr); - auto ticker = ROCKSDB_NAMESPACE::TickerTypeJni::toCppTickers(jticker_type); - return pSptr_statistics->get()->getAndResetTickerCount(ticker); -} - -/* - * Class: org_rocksdb_Statistics - * Method: getHistogramData - * Signature: (JB)Lorg/rocksdb/HistogramData; - */ -jobject Java_org_rocksdb_Statistics_getHistogramData(JNIEnv* env, jobject, - jlong jhandle, - jbyte jhistogram_type) { - auto* pSptr_statistics = - reinterpret_cast*>( - jhandle); - assert(pSptr_statistics != nullptr); - - // TODO(AR) perhaps better to construct a Java Object Wrapper that - // uses ptr to C++ `new HistogramData` - ROCKSDB_NAMESPACE::HistogramData data; - - auto histogram = - ROCKSDB_NAMESPACE::HistogramTypeJni::toCppHistograms(jhistogram_type); - pSptr_statistics->get()->histogramData( - static_cast(histogram), &data); - - jclass jclazz = ROCKSDB_NAMESPACE::HistogramDataJni::getJClass(env); - if (jclazz == nullptr) { - // exception occurred accessing class - return nullptr; - } - - jmethodID mid = - ROCKSDB_NAMESPACE::HistogramDataJni::getConstructorMethodId(env); - if (mid == nullptr) { - // exception occurred accessing method - return nullptr; - } - - return env->NewObject(jclazz, mid, data.median, data.percentile95, - data.percentile99, data.average, - data.standard_deviation, data.max, data.count, data.sum, - data.min); -} - -/* - * Class: org_rocksdb_Statistics - * Method: getHistogramString - * Signature: (JB)Ljava/lang/String; - */ -jstring Java_org_rocksdb_Statistics_getHistogramString(JNIEnv* env, jobject, - jlong jhandle, - jbyte jhistogram_type) { - auto* pSptr_statistics = - reinterpret_cast*>( - jhandle); - assert(pSptr_statistics != nullptr); - auto histogram = - ROCKSDB_NAMESPACE::HistogramTypeJni::toCppHistograms(jhistogram_type); - auto str = pSptr_statistics->get()->getHistogramString(histogram); - return env->NewStringUTF(str.c_str()); -} - -/* - * Class: org_rocksdb_Statistics - * Method: reset - * Signature: (J)V - */ -void Java_org_rocksdb_Statistics_reset(JNIEnv* env, jobject, jlong jhandle) { - auto* pSptr_statistics = - reinterpret_cast*>( - jhandle); - assert(pSptr_statistics != nullptr); - ROCKSDB_NAMESPACE::Status s = pSptr_statistics->get()->Reset(); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_Statistics - * Method: toString - * Signature: (J)Ljava/lang/String; - */ -jstring Java_org_rocksdb_Statistics_toString(JNIEnv* env, jobject, - jlong jhandle) { - auto* pSptr_statistics = - reinterpret_cast*>( - jhandle); - assert(pSptr_statistics != nullptr); - auto str = pSptr_statistics->get()->ToString(); - return env->NewStringUTF(str.c_str()); -} diff --git a/java/rocksjni/statisticsjni.cc b/java/rocksjni/statisticsjni.cc deleted file mode 100644 index f46337893..000000000 --- a/java/rocksjni/statisticsjni.cc +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::Statistics - -#include "rocksjni/statisticsjni.h" - -namespace ROCKSDB_NAMESPACE { - -StatisticsJni::StatisticsJni(std::shared_ptr stats) - : StatisticsImpl(stats), m_ignore_histograms() {} - -StatisticsJni::StatisticsJni(std::shared_ptr stats, - const std::set ignore_histograms) - : StatisticsImpl(stats), m_ignore_histograms(ignore_histograms) {} - -bool StatisticsJni::HistEnabledForType(uint32_t type) const { - if (type >= HISTOGRAM_ENUM_MAX) { - return false; - } - - if (m_ignore_histograms.count(type) > 0) { - return false; - } - - return true; -} -} // namespace ROCKSDB_NAMESPACE diff --git a/java/rocksjni/statisticsjni.h b/java/rocksjni/statisticsjni.h deleted file mode 100644 index ce823f9b1..000000000 --- a/java/rocksjni/statisticsjni.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::Statistics - -#ifndef JAVA_ROCKSJNI_STATISTICSJNI_H_ -#define JAVA_ROCKSJNI_STATISTICSJNI_H_ - -#include -#include -#include - -#include "monitoring/statistics.h" -#include "rocksdb/statistics.h" - -namespace ROCKSDB_NAMESPACE { - -class StatisticsJni : public StatisticsImpl { - public: - StatisticsJni(std::shared_ptr stats); - StatisticsJni(std::shared_ptr stats, - const std::set ignore_histograms); - virtual bool HistEnabledForType(uint32_t type) const override; - - private: - const std::set m_ignore_histograms; -}; - -} // namespace ROCKSDB_NAMESPACE - -#endif // JAVA_ROCKSJNI_STATISTICSJNI_H_ diff --git a/java/rocksjni/table.cc b/java/rocksjni/table.cc deleted file mode 100644 index 7f99900e4..000000000 --- a/java/rocksjni/table.cc +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::Options. - -#include "rocksdb/table.h" - -#include - -#include "include/org_rocksdb_BlockBasedTableConfig.h" -#include "include/org_rocksdb_PlainTableConfig.h" -#include "portal.h" -#include "rocksdb/cache.h" -#include "rocksdb/filter_policy.h" -#include "rocksjni/cplusplus_to_java_convert.h" - -/* - * Class: org_rocksdb_PlainTableConfig - * Method: newTableFactoryHandle - * Signature: (IIDIIBZZ)J - */ -jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle( - JNIEnv * /*env*/, jobject /*jobj*/, jint jkey_size, - jint jbloom_bits_per_key, jdouble jhash_table_ratio, jint jindex_sparseness, - jint jhuge_page_tlb_size, jbyte jencoding_type, jboolean jfull_scan_mode, - jboolean jstore_index_in_file) { - ROCKSDB_NAMESPACE::PlainTableOptions options = - ROCKSDB_NAMESPACE::PlainTableOptions(); - options.user_key_len = jkey_size; - options.bloom_bits_per_key = jbloom_bits_per_key; - options.hash_table_ratio = jhash_table_ratio; - options.index_sparseness = jindex_sparseness; - options.huge_page_tlb_size = jhuge_page_tlb_size; - options.encoding_type = - static_cast(jencoding_type); - options.full_scan_mode = jfull_scan_mode; - options.store_index_in_file = jstore_index_in_file; - return GET_CPLUSPLUS_POINTER( - ROCKSDB_NAMESPACE::NewPlainTableFactory(options)); -} - -/* - * Class: org_rocksdb_BlockBasedTableConfig - * Method: newTableFactoryHandle - * Signature: (ZZZZBBDBZJJJJIIIJZZZJZZIIZZBJIJI)J - */ -jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle( - JNIEnv *, jobject, jboolean jcache_index_and_filter_blocks, - jboolean jcache_index_and_filter_blocks_with_high_priority, - jboolean jpin_l0_filter_and_index_blocks_in_cache, - jboolean jpin_top_level_index_and_filter, jbyte jindex_type_value, - jbyte jdata_block_index_type_value, - jdouble jdata_block_hash_table_util_ratio, jbyte jchecksum_type_value, - jboolean jno_block_cache, jlong jblock_cache_handle, - jlong jpersistent_cache_handle, jlong jblock_size, - jint jblock_size_deviation, jint jblock_restart_interval, - jint jindex_block_restart_interval, jlong jmetadata_block_size, - jboolean jpartition_filters, jboolean joptimize_filters_for_memory, - jboolean juse_delta_encoding, jlong jfilter_policy_handle, - jboolean jwhole_key_filtering, jboolean jverify_compression, - jint jread_amp_bytes_per_bit, jint jformat_version, - jboolean jenable_index_compression, jboolean jblock_align, - jbyte jindex_shortening, jlong jblock_cache_size, - jint jblock_cache_num_shard_bits) { - ROCKSDB_NAMESPACE::BlockBasedTableOptions options; - options.cache_index_and_filter_blocks = - static_cast(jcache_index_and_filter_blocks); - options.cache_index_and_filter_blocks_with_high_priority = - static_cast(jcache_index_and_filter_blocks_with_high_priority); - options.pin_l0_filter_and_index_blocks_in_cache = - static_cast(jpin_l0_filter_and_index_blocks_in_cache); - options.pin_top_level_index_and_filter = - static_cast(jpin_top_level_index_and_filter); - options.index_type = - ROCKSDB_NAMESPACE::IndexTypeJni::toCppIndexType(jindex_type_value); - options.data_block_index_type = - ROCKSDB_NAMESPACE::DataBlockIndexTypeJni::toCppDataBlockIndexType( - jdata_block_index_type_value); - options.data_block_hash_table_util_ratio = - static_cast(jdata_block_hash_table_util_ratio); - options.checksum = ROCKSDB_NAMESPACE::ChecksumTypeJni::toCppChecksumType( - jchecksum_type_value); - options.no_block_cache = static_cast(jno_block_cache); - if (options.no_block_cache) { - options.block_cache = nullptr; - } else { - if (jblock_cache_handle > 0) { - std::shared_ptr *pCache = - reinterpret_cast *>( - jblock_cache_handle); - options.block_cache = *pCache; - } else if (jblock_cache_size >= 0) { - if (jblock_cache_num_shard_bits > 0) { - options.block_cache = ROCKSDB_NAMESPACE::NewLRUCache( - static_cast(jblock_cache_size), - static_cast(jblock_cache_num_shard_bits)); - } else { - options.block_cache = ROCKSDB_NAMESPACE::NewLRUCache( - static_cast(jblock_cache_size)); - } - } else { - options.no_block_cache = true; - options.block_cache = nullptr; - } - } - if (jpersistent_cache_handle > 0) { - std::shared_ptr *pCache = - reinterpret_cast *>( - jpersistent_cache_handle); - options.persistent_cache = *pCache; - } - options.block_size = static_cast(jblock_size); - options.block_size_deviation = static_cast(jblock_size_deviation); - options.block_restart_interval = static_cast(jblock_restart_interval); - options.index_block_restart_interval = - static_cast(jindex_block_restart_interval); - options.metadata_block_size = static_cast(jmetadata_block_size); - options.partition_filters = static_cast(jpartition_filters); - options.optimize_filters_for_memory = - static_cast(joptimize_filters_for_memory); - options.use_delta_encoding = static_cast(juse_delta_encoding); - if (jfilter_policy_handle > 0) { - std::shared_ptr *pFilterPolicy = - reinterpret_cast *>( - jfilter_policy_handle); - options.filter_policy = *pFilterPolicy; - } - options.whole_key_filtering = static_cast(jwhole_key_filtering); - options.verify_compression = static_cast(jverify_compression); - options.read_amp_bytes_per_bit = - static_cast(jread_amp_bytes_per_bit); - options.format_version = static_cast(jformat_version); - options.enable_index_compression = - static_cast(jenable_index_compression); - options.block_align = static_cast(jblock_align); - options.index_shortening = - ROCKSDB_NAMESPACE::IndexShorteningModeJni::toCppIndexShorteningMode( - jindex_shortening); - - return GET_CPLUSPLUS_POINTER( - ROCKSDB_NAMESPACE::NewBlockBasedTableFactory(options)); -} diff --git a/java/rocksjni/table_filter.cc b/java/rocksjni/table_filter.cc deleted file mode 100644 index 1400fa1d9..000000000 --- a/java/rocksjni/table_filter.cc +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// org.rocksdb.AbstractTableFilter. - -#include - -#include - -#include "include/org_rocksdb_AbstractTableFilter.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/table_filter_jnicallback.h" - -/* - * Class: org_rocksdb_AbstractTableFilter - * Method: createNewTableFilter - * Signature: ()J - */ -jlong Java_org_rocksdb_AbstractTableFilter_createNewTableFilter( - JNIEnv* env, jobject jtable_filter) { - auto* table_filter_jnicallback = - new ROCKSDB_NAMESPACE::TableFilterJniCallback(env, jtable_filter); - return GET_CPLUSPLUS_POINTER(table_filter_jnicallback); -} diff --git a/java/rocksjni/table_filter_jnicallback.cc b/java/rocksjni/table_filter_jnicallback.cc deleted file mode 100644 index 5350c5cee..000000000 --- a/java/rocksjni/table_filter_jnicallback.cc +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::TableFilter. - -#include "rocksjni/table_filter_jnicallback.h" - -#include "rocksjni/portal.h" - -namespace ROCKSDB_NAMESPACE { -TableFilterJniCallback::TableFilterJniCallback(JNIEnv* env, - jobject jtable_filter) - : JniCallback(env, jtable_filter) { - m_jfilter_methodid = AbstractTableFilterJni::getFilterMethod(env); - if (m_jfilter_methodid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return; - } - - // create the function reference - /* - Note the JNI ENV must be obtained/release - on each call to the function itself as - it may be called from multiple threads - */ - m_table_filter_function = - [this](const ROCKSDB_NAMESPACE::TableProperties& table_properties) { - jboolean attached_thread = JNI_FALSE; - JNIEnv* thread_env = getJniEnv(&attached_thread); - assert(thread_env != nullptr); - - // create a Java TableProperties object - jobject jtable_properties = TablePropertiesJni::fromCppTableProperties( - thread_env, table_properties); - if (jtable_properties == nullptr) { - // exception thrown from fromCppTableProperties - thread_env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return false; - } - - jboolean result = thread_env->CallBooleanMethod( - m_jcallback_obj, m_jfilter_methodid, jtable_properties); - if (thread_env->ExceptionCheck()) { - // exception thrown from CallBooleanMethod - thread_env->DeleteLocalRef(jtable_properties); - thread_env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return false; - } - - // ok... cleanup and then return - releaseJniEnv(attached_thread); - return static_cast(result); - }; -} - -std::function -TableFilterJniCallback::GetTableFilterFunction() { - return m_table_filter_function; -} - -} // namespace ROCKSDB_NAMESPACE diff --git a/java/rocksjni/table_filter_jnicallback.h b/java/rocksjni/table_filter_jnicallback.h deleted file mode 100644 index 0ef404ca2..000000000 --- a/java/rocksjni/table_filter_jnicallback.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::TableFilter. - -#ifndef JAVA_ROCKSJNI_TABLE_FILTER_JNICALLBACK_H_ -#define JAVA_ROCKSJNI_TABLE_FILTER_JNICALLBACK_H_ - -#include - -#include -#include - -#include "rocksdb/table_properties.h" -#include "rocksjni/jnicallback.h" - -namespace ROCKSDB_NAMESPACE { - -class TableFilterJniCallback : public JniCallback { - public: - TableFilterJniCallback(JNIEnv* env, jobject jtable_filter); - std::function - GetTableFilterFunction(); - - private: - jmethodID m_jfilter_methodid; - std::function - m_table_filter_function; -}; - -} // namespace ROCKSDB_NAMESPACE - -#endif // JAVA_ROCKSJNI_TABLE_FILTER_JNICALLBACK_H_ diff --git a/java/rocksjni/testable_event_listener.cc b/java/rocksjni/testable_event_listener.cc deleted file mode 100644 index 71188bc3c..000000000 --- a/java/rocksjni/testable_event_listener.cc +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -#include -#include -#include -#include - -#include "include/org_rocksdb_test_TestableEventListener.h" -#include "rocksdb/listener.h" -#include "rocksdb/status.h" -#include "rocksdb/table_properties.h" - -using ROCKSDB_NAMESPACE::BackgroundErrorReason; -using ROCKSDB_NAMESPACE::CompactionJobInfo; -using ROCKSDB_NAMESPACE::CompactionJobStats; -using ROCKSDB_NAMESPACE::CompactionReason; -using ROCKSDB_NAMESPACE::CompressionType; -using ROCKSDB_NAMESPACE::ExternalFileIngestionInfo; -using ROCKSDB_NAMESPACE::FileOperationInfo; -using ROCKSDB_NAMESPACE::FileOperationType; -using ROCKSDB_NAMESPACE::FlushJobInfo; -using ROCKSDB_NAMESPACE::FlushReason; -using ROCKSDB_NAMESPACE::MemTableInfo; -using ROCKSDB_NAMESPACE::Status; -using ROCKSDB_NAMESPACE::TableFileCreationBriefInfo; -using ROCKSDB_NAMESPACE::TableFileCreationInfo; -using ROCKSDB_NAMESPACE::TableFileCreationReason; -using ROCKSDB_NAMESPACE::TableFileDeletionInfo; -using ROCKSDB_NAMESPACE::TableProperties; -using ROCKSDB_NAMESPACE::WriteStallCondition; -using ROCKSDB_NAMESPACE::WriteStallInfo; - -static TableProperties newTablePropertiesForTest() { - TableProperties table_properties; - table_properties.data_size = UINT64_MAX; - table_properties.index_size = UINT64_MAX; - table_properties.index_partitions = UINT64_MAX; - table_properties.top_level_index_size = UINT64_MAX; - table_properties.index_key_is_user_key = UINT64_MAX; - table_properties.index_value_is_delta_encoded = UINT64_MAX; - table_properties.filter_size = UINT64_MAX; - table_properties.raw_key_size = UINT64_MAX; - table_properties.raw_value_size = UINT64_MAX; - table_properties.num_data_blocks = UINT64_MAX; - table_properties.num_entries = UINT64_MAX; - table_properties.num_deletions = UINT64_MAX; - table_properties.num_merge_operands = UINT64_MAX; - table_properties.num_range_deletions = UINT64_MAX; - table_properties.format_version = UINT64_MAX; - table_properties.fixed_key_len = UINT64_MAX; - table_properties.column_family_id = UINT64_MAX; - table_properties.creation_time = UINT64_MAX; - table_properties.oldest_key_time = UINT64_MAX; - table_properties.file_creation_time = UINT64_MAX; - table_properties.slow_compression_estimated_data_size = UINT64_MAX; - table_properties.fast_compression_estimated_data_size = UINT64_MAX; - table_properties.external_sst_file_global_seqno_offset = UINT64_MAX; - table_properties.db_id = "dbId"; - table_properties.db_session_id = "sessionId"; - table_properties.column_family_name = "columnFamilyName"; - table_properties.filter_policy_name = "filterPolicyName"; - table_properties.comparator_name = "comparatorName"; - table_properties.merge_operator_name = "mergeOperatorName"; - table_properties.prefix_extractor_name = "prefixExtractorName"; - table_properties.property_collectors_names = "propertyCollectorsNames"; - table_properties.compression_name = "compressionName"; - table_properties.compression_options = "compressionOptions"; - table_properties.user_collected_properties = {{"key", "value"}}; - table_properties.readable_properties = {{"key", "value"}}; - return table_properties; -} - -/* - * Class: org_rocksdb_test_TestableEventListener - * Method: invokeAllCallbacks - * Signature: (J)V - */ -void Java_org_rocksdb_test_TestableEventListener_invokeAllCallbacks( - JNIEnv *, jclass, jlong jhandle) { - const auto &el = - *reinterpret_cast *>( - jhandle); - - TableProperties table_properties = newTablePropertiesForTest(); - - FlushJobInfo flush_job_info; - flush_job_info.cf_id = INT_MAX; - flush_job_info.cf_name = "testColumnFamily"; - flush_job_info.file_path = "/file/path"; - flush_job_info.file_number = UINT64_MAX; - flush_job_info.oldest_blob_file_number = UINT64_MAX; - flush_job_info.thread_id = UINT64_MAX; - flush_job_info.job_id = INT_MAX; - flush_job_info.triggered_writes_slowdown = true; - flush_job_info.triggered_writes_stop = true; - flush_job_info.smallest_seqno = UINT64_MAX; - flush_job_info.largest_seqno = UINT64_MAX; - flush_job_info.table_properties = table_properties; - flush_job_info.flush_reason = FlushReason::kManualFlush; - - el->OnFlushCompleted(nullptr, flush_job_info); - el->OnFlushBegin(nullptr, flush_job_info); - - Status status = Status::Incomplete(Status::SubCode::kNoSpace); - - TableFileDeletionInfo file_deletion_info; - file_deletion_info.db_name = "dbName"; - file_deletion_info.file_path = "/file/path"; - file_deletion_info.job_id = INT_MAX; - file_deletion_info.status = status; - - el->OnTableFileDeleted(file_deletion_info); - - CompactionJobInfo compaction_job_info; - compaction_job_info.cf_id = UINT32_MAX; - compaction_job_info.cf_name = "compactionColumnFamily"; - compaction_job_info.status = status; - compaction_job_info.thread_id = UINT64_MAX; - compaction_job_info.job_id = INT_MAX; - compaction_job_info.base_input_level = INT_MAX; - compaction_job_info.output_level = INT_MAX; - compaction_job_info.input_files = {"inputFile.sst"}; - compaction_job_info.input_file_infos = {}; - compaction_job_info.output_files = {"outputFile.sst"}; - compaction_job_info.output_file_infos = {}; - compaction_job_info.table_properties = { - {"tableProperties", std::shared_ptr( - &table_properties, [](TableProperties *) {})}}; - compaction_job_info.compaction_reason = CompactionReason::kFlush; - compaction_job_info.compression = CompressionType::kSnappyCompression; - - compaction_job_info.stats = CompactionJobStats(); - - el->OnCompactionBegin(nullptr, compaction_job_info); - el->OnCompactionCompleted(nullptr, compaction_job_info); - - TableFileCreationInfo file_creation_info; - file_creation_info.file_size = UINT64_MAX; - file_creation_info.table_properties = table_properties; - file_creation_info.status = status; - file_creation_info.file_checksum = "fileChecksum"; - file_creation_info.file_checksum_func_name = "fileChecksumFuncName"; - file_creation_info.db_name = "dbName"; - file_creation_info.cf_name = "columnFamilyName"; - file_creation_info.file_path = "/file/path"; - file_creation_info.job_id = INT_MAX; - file_creation_info.reason = TableFileCreationReason::kMisc; - - el->OnTableFileCreated(file_creation_info); - - TableFileCreationBriefInfo file_creation_brief_info; - file_creation_brief_info.db_name = "dbName"; - file_creation_brief_info.cf_name = "columnFamilyName"; - file_creation_brief_info.file_path = "/file/path"; - file_creation_brief_info.job_id = INT_MAX; - file_creation_brief_info.reason = TableFileCreationReason::kMisc; - - el->OnTableFileCreationStarted(file_creation_brief_info); - - MemTableInfo mem_table_info; - mem_table_info.cf_name = "columnFamilyName"; - mem_table_info.first_seqno = UINT64_MAX; - mem_table_info.earliest_seqno = UINT64_MAX; - mem_table_info.num_entries = UINT64_MAX; - mem_table_info.num_deletes = UINT64_MAX; - - el->OnMemTableSealed(mem_table_info); - el->OnColumnFamilyHandleDeletionStarted(nullptr); - - ExternalFileIngestionInfo file_ingestion_info; - file_ingestion_info.cf_name = "columnFamilyName"; - file_ingestion_info.external_file_path = "/external/file/path"; - file_ingestion_info.internal_file_path = "/internal/file/path"; - file_ingestion_info.global_seqno = UINT64_MAX; - file_ingestion_info.table_properties = table_properties; - el->OnExternalFileIngested(nullptr, file_ingestion_info); - - el->OnBackgroundError(BackgroundErrorReason::kFlush, &status); - - WriteStallInfo write_stall_info; - write_stall_info.cf_name = "columnFamilyName"; - write_stall_info.condition.cur = WriteStallCondition::kDelayed; - write_stall_info.condition.prev = WriteStallCondition::kStopped; - el->OnStallConditionsChanged(write_stall_info); - - const std::string file_path = "/file/path"; - const auto start_timestamp = - std::make_pair(std::chrono::time_point( - std::chrono::nanoseconds(1600699420000000000ll)), - std::chrono::time_point( - std::chrono::nanoseconds(1600699420000000000ll))); - const auto finish_timestamp = - std::chrono::time_point( - std::chrono::nanoseconds(1600699425000000000ll)); - FileOperationInfo op_info = - FileOperationInfo(FileOperationType::kRead, file_path, start_timestamp, - finish_timestamp, status); - op_info.offset = UINT64_MAX; - op_info.length = SIZE_MAX; - - el->OnFileReadFinish(op_info); - el->OnFileWriteFinish(op_info); - el->OnFileFlushFinish(op_info); - el->OnFileSyncFinish(op_info); - el->OnFileRangeSyncFinish(op_info); - el->OnFileTruncateFinish(op_info); - el->OnFileCloseFinish(op_info); - el->ShouldBeNotifiedOnFileIO(); - - bool auto_recovery; - el->OnErrorRecoveryBegin(BackgroundErrorReason::kFlush, status, - &auto_recovery); - el->OnErrorRecoveryCompleted(status); -} diff --git a/java/rocksjni/thread_status.cc b/java/rocksjni/thread_status.cc deleted file mode 100644 index c600f6cd5..000000000 --- a/java/rocksjni/thread_status.cc +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling c++ ROCKSDB_NAMESPACE::ThreadStatus methods from Java side. - -#include "rocksdb/thread_status.h" - -#include - -#include "include/org_rocksdb_ThreadStatus.h" -#include "portal.h" - -/* - * Class: org_rocksdb_ThreadStatus - * Method: getThreadTypeName - * Signature: (B)Ljava/lang/String; - */ -jstring Java_org_rocksdb_ThreadStatus_getThreadTypeName( - JNIEnv* env, jclass, jbyte jthread_type_value) { - auto name = ROCKSDB_NAMESPACE::ThreadStatus::GetThreadTypeName( - ROCKSDB_NAMESPACE::ThreadTypeJni::toCppThreadType(jthread_type_value)); - return ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &name, true); -} - -/* - * Class: org_rocksdb_ThreadStatus - * Method: getOperationName - * Signature: (B)Ljava/lang/String; - */ -jstring Java_org_rocksdb_ThreadStatus_getOperationName( - JNIEnv* env, jclass, jbyte joperation_type_value) { - auto name = ROCKSDB_NAMESPACE::ThreadStatus::GetOperationName( - ROCKSDB_NAMESPACE::OperationTypeJni::toCppOperationType( - joperation_type_value)); - return ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &name, true); -} - -/* - * Class: org_rocksdb_ThreadStatus - * Method: microsToStringNative - * Signature: (J)Ljava/lang/String; - */ -jstring Java_org_rocksdb_ThreadStatus_microsToStringNative(JNIEnv* env, jclass, - jlong jmicros) { - auto str = ROCKSDB_NAMESPACE::ThreadStatus::MicrosToString( - static_cast(jmicros)); - return ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &str, true); -} - -/* - * Class: org_rocksdb_ThreadStatus - * Method: getOperationStageName - * Signature: (B)Ljava/lang/String; - */ -jstring Java_org_rocksdb_ThreadStatus_getOperationStageName( - JNIEnv* env, jclass, jbyte joperation_stage_value) { - auto name = ROCKSDB_NAMESPACE::ThreadStatus::GetOperationStageName( - ROCKSDB_NAMESPACE::OperationStageJni::toCppOperationStage( - joperation_stage_value)); - return ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &name, true); -} - -/* - * Class: org_rocksdb_ThreadStatus - * Method: getOperationPropertyName - * Signature: (BI)Ljava/lang/String; - */ -jstring Java_org_rocksdb_ThreadStatus_getOperationPropertyName( - JNIEnv* env, jclass, jbyte joperation_type_value, jint jindex) { - auto name = ROCKSDB_NAMESPACE::ThreadStatus::GetOperationPropertyName( - ROCKSDB_NAMESPACE::OperationTypeJni::toCppOperationType( - joperation_type_value), - static_cast(jindex)); - return ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &name, true); -} - -/* - * Class: org_rocksdb_ThreadStatus - * Method: interpretOperationProperties - * Signature: (B[J)Ljava/util/Map; - */ -jobject Java_org_rocksdb_ThreadStatus_interpretOperationProperties( - JNIEnv* env, jclass, jbyte joperation_type_value, - jlongArray joperation_properties) { - // convert joperation_properties - const jsize len = env->GetArrayLength(joperation_properties); - const std::unique_ptr op_properties(new uint64_t[len]); - jlong* jop = env->GetLongArrayElements(joperation_properties, nullptr); - if (jop == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - for (jsize i = 0; i < len; i++) { - op_properties[i] = static_cast(jop[i]); - } - env->ReleaseLongArrayElements(joperation_properties, jop, JNI_ABORT); - - // call the function - auto result = ROCKSDB_NAMESPACE::ThreadStatus::InterpretOperationProperties( - ROCKSDB_NAMESPACE::OperationTypeJni::toCppOperationType( - joperation_type_value), - op_properties.get()); - jobject jresult = ROCKSDB_NAMESPACE::HashMapJni::fromCppMap(env, &result); - if (env->ExceptionCheck()) { - // exception occurred - return nullptr; - } - - return jresult; -} - -/* - * Class: org_rocksdb_ThreadStatus - * Method: getStateName - * Signature: (B)Ljava/lang/String; - */ -jstring Java_org_rocksdb_ThreadStatus_getStateName(JNIEnv* env, jclass, - jbyte jstate_type_value) { - auto name = ROCKSDB_NAMESPACE::ThreadStatus::GetStateName( - ROCKSDB_NAMESPACE::StateTypeJni::toCppStateType(jstate_type_value)); - return ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &name, true); -} diff --git a/java/rocksjni/trace_writer.cc b/java/rocksjni/trace_writer.cc deleted file mode 100644 index d58276399..000000000 --- a/java/rocksjni/trace_writer.cc +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::CompactionFilterFactory. - -#include - -#include "include/org_rocksdb_AbstractTraceWriter.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/trace_writer_jnicallback.h" - -/* - * Class: org_rocksdb_AbstractTraceWriter - * Method: createNewTraceWriter - * Signature: ()J - */ -jlong Java_org_rocksdb_AbstractTraceWriter_createNewTraceWriter(JNIEnv* env, - jobject jobj) { - auto* trace_writer = new ROCKSDB_NAMESPACE::TraceWriterJniCallback(env, jobj); - return GET_CPLUSPLUS_POINTER(trace_writer); -} diff --git a/java/rocksjni/trace_writer_jnicallback.cc b/java/rocksjni/trace_writer_jnicallback.cc deleted file mode 100644 index d1ed32038..000000000 --- a/java/rocksjni/trace_writer_jnicallback.cc +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::TraceWriter. - -#include "rocksjni/trace_writer_jnicallback.h" - -#include "rocksjni/portal.h" - -namespace ROCKSDB_NAMESPACE { -TraceWriterJniCallback::TraceWriterJniCallback(JNIEnv* env, - jobject jtrace_writer) - : JniCallback(env, jtrace_writer) { - m_jwrite_proxy_methodid = AbstractTraceWriterJni::getWriteProxyMethodId(env); - if (m_jwrite_proxy_methodid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return; - } - - m_jclose_writer_proxy_methodid = - AbstractTraceWriterJni::getCloseWriterProxyMethodId(env); - if (m_jclose_writer_proxy_methodid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return; - } - - m_jget_file_size_methodid = - AbstractTraceWriterJni::getGetFileSizeMethodId(env); - if (m_jget_file_size_methodid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return; - } -} - -Status TraceWriterJniCallback::Write(const Slice& data) { - jboolean attached_thread = JNI_FALSE; - JNIEnv* env = getJniEnv(&attached_thread); - if (env == nullptr) { - return Status::IOError("Unable to attach JNI Environment"); - } - - jshort jstatus = - env->CallShortMethod(m_jcallback_obj, m_jwrite_proxy_methodid, &data); - - if (env->ExceptionCheck()) { - // exception thrown from CallShortMethod - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return Status::IOError( - "Unable to call AbstractTraceWriter#writeProxy(long)"); - } - - // unpack status code and status sub-code from jstatus - jbyte jcode_value = (jstatus >> 8) & 0xFF; - jbyte jsub_code_value = jstatus & 0xFF; - std::unique_ptr s = - StatusJni::toCppStatus(jcode_value, jsub_code_value); - - releaseJniEnv(attached_thread); - - return Status(*s); -} - -Status TraceWriterJniCallback::Close() { - jboolean attached_thread = JNI_FALSE; - JNIEnv* env = getJniEnv(&attached_thread); - if (env == nullptr) { - return Status::IOError("Unable to attach JNI Environment"); - } - - jshort jstatus = - env->CallShortMethod(m_jcallback_obj, m_jclose_writer_proxy_methodid); - - if (env->ExceptionCheck()) { - // exception thrown from CallShortMethod - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return Status::IOError( - "Unable to call AbstractTraceWriter#closeWriterProxy()"); - } - - // unpack status code and status sub-code from jstatus - jbyte code_value = (jstatus >> 8) & 0xFF; - jbyte sub_code_value = jstatus & 0xFF; - std::unique_ptr s = - StatusJni::toCppStatus(code_value, sub_code_value); - - releaseJniEnv(attached_thread); - - return Status(*s); -} - -uint64_t TraceWriterJniCallback::GetFileSize() { - jboolean attached_thread = JNI_FALSE; - JNIEnv* env = getJniEnv(&attached_thread); - if (env == nullptr) { - return 0; - } - - jlong jfile_size = - env->CallLongMethod(m_jcallback_obj, m_jget_file_size_methodid); - - if (env->ExceptionCheck()) { - // exception thrown from CallLongMethod - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return 0; - } - - releaseJniEnv(attached_thread); - - return static_cast(jfile_size); -} - -} // namespace ROCKSDB_NAMESPACE diff --git a/java/rocksjni/trace_writer_jnicallback.h b/java/rocksjni/trace_writer_jnicallback.h deleted file mode 100644 index c82a3a72c..000000000 --- a/java/rocksjni/trace_writer_jnicallback.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::TraceWriter. - -#ifndef JAVA_ROCKSJNI_TRACE_WRITER_JNICALLBACK_H_ -#define JAVA_ROCKSJNI_TRACE_WRITER_JNICALLBACK_H_ - -#include - -#include - -#include "rocksdb/trace_reader_writer.h" -#include "rocksjni/jnicallback.h" - -namespace ROCKSDB_NAMESPACE { - -class TraceWriterJniCallback : public JniCallback, public TraceWriter { - public: - TraceWriterJniCallback(JNIEnv* env, jobject jtrace_writer); - virtual Status Write(const Slice& data); - virtual Status Close(); - virtual uint64_t GetFileSize(); - - private: - jmethodID m_jwrite_proxy_methodid; - jmethodID m_jclose_writer_proxy_methodid; - jmethodID m_jget_file_size_methodid; -}; - -} // namespace ROCKSDB_NAMESPACE - -#endif // JAVA_ROCKSJNI_TRACE_WRITER_JNICALLBACK_H_ diff --git a/java/rocksjni/transaction.cc b/java/rocksjni/transaction.cc deleted file mode 100644 index 1a0a64fc7..000000000 --- a/java/rocksjni/transaction.cc +++ /dev/null @@ -1,1655 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ -// for ROCKSDB_NAMESPACE::Transaction. - -#include "rocksdb/utilities/transaction.h" - -#include - -#include - -#include "include/org_rocksdb_Transaction.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -#if defined(_MSC_VER) -#pragma warning(push) -#pragma warning(disable : 4503) // identifier' : decorated name length - // exceeded, name was truncated -#endif - -/* - * Class: org_rocksdb_Transaction - * Method: setSnapshot - * Signature: (J)V - */ -void Java_org_rocksdb_Transaction_setSnapshot(JNIEnv* /*env*/, jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - txn->SetSnapshot(); -} - -/* - * Class: org_rocksdb_Transaction - * Method: setSnapshotOnNextOperation - * Signature: (J)V - */ -void Java_org_rocksdb_Transaction_setSnapshotOnNextOperation__J( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - txn->SetSnapshotOnNextOperation(nullptr); -} - -/* - * Class: org_rocksdb_Transaction - * Method: setSnapshotOnNextOperation - * Signature: (JJ)V - */ -void Java_org_rocksdb_Transaction_setSnapshotOnNextOperation__JJ( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jtxn_notifier_handle) { - auto* txn = reinterpret_cast(jhandle); - auto* txn_notifier = reinterpret_cast< - std::shared_ptr*>( - jtxn_notifier_handle); - txn->SetSnapshotOnNextOperation(*txn_notifier); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getSnapshot - * Signature: (J)J - */ -jlong Java_org_rocksdb_Transaction_getSnapshot(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - const ROCKSDB_NAMESPACE::Snapshot* snapshot = txn->GetSnapshot(); - return GET_CPLUSPLUS_POINTER(snapshot); -} - -/* - * Class: org_rocksdb_Transaction - * Method: clearSnapshot - * Signature: (J)V - */ -void Java_org_rocksdb_Transaction_clearSnapshot(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - txn->ClearSnapshot(); -} - -/* - * Class: org_rocksdb_Transaction - * Method: prepare - * Signature: (J)V - */ -void Java_org_rocksdb_Transaction_prepare(JNIEnv* env, jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - ROCKSDB_NAMESPACE::Status s = txn->Prepare(); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_Transaction - * Method: commit - * Signature: (J)V - */ -void Java_org_rocksdb_Transaction_commit(JNIEnv* env, jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - ROCKSDB_NAMESPACE::Status s = txn->Commit(); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_Transaction - * Method: rollback - * Signature: (J)V - */ -void Java_org_rocksdb_Transaction_rollback(JNIEnv* env, jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - ROCKSDB_NAMESPACE::Status s = txn->Rollback(); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_Transaction - * Method: setSavePoint - * Signature: (J)V - */ -void Java_org_rocksdb_Transaction_setSavePoint(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - txn->SetSavePoint(); -} - -/* - * Class: org_rocksdb_Transaction - * Method: rollbackToSavePoint - * Signature: (J)V - */ -void Java_org_rocksdb_Transaction_rollbackToSavePoint(JNIEnv* env, - jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - ROCKSDB_NAMESPACE::Status s = txn->RollbackToSavePoint(); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -typedef std::function - FnGet; - -// TODO(AR) consider refactoring to share this between here and rocksjni.cc -jbyteArray txn_get_helper(JNIEnv* env, const FnGet& fn_get, - const jlong& jread_options_handle, - const jbyteArray& jkey, const jint& jkey_part_len) { - jbyte* key = env->GetByteArrayElements(jkey, nullptr); - if (key == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), - jkey_part_len); - - auto* read_options = - reinterpret_cast(jread_options_handle); - std::string value; - ROCKSDB_NAMESPACE::Status s = fn_get(*read_options, key_slice, &value); - - // trigger java unref on key. - // by passing JNI_ABORT, it will simply release the reference without - // copying the result back to the java byte array. - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); - - if (s.IsNotFound()) { - return nullptr; - } - - if (s.ok()) { - jbyteArray jret_value = env->NewByteArray(static_cast(value.size())); - if (jret_value == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - env->SetByteArrayRegion( - jret_value, 0, static_cast(value.size()), - const_cast(reinterpret_cast(value.c_str()))); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - return nullptr; - } - return jret_value; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; -} - -/* - * Class: org_rocksdb_Transaction - * Method: get - * Signature: (JJ[BIJ)[B - */ -jbyteArray Java_org_rocksdb_Transaction_get__JJ_3BIJ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, - jbyteArray jkey, jint jkey_part_len, jlong jcolumn_family_handle) { - auto* txn = reinterpret_cast(jhandle); - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - FnGet fn_get = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::Get, txn, std::placeholders::_1, - column_family_handle, std::placeholders::_2, std::placeholders::_3); - return txn_get_helper(env, fn_get, jread_options_handle, jkey, jkey_part_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: get - * Signature: (JJ[BI)[B - */ -jbyteArray Java_org_rocksdb_Transaction_get__JJ_3BI( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, - jbyteArray jkey, jint jkey_part_len) { - auto* txn = reinterpret_cast(jhandle); - FnGet fn_get = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::Get, txn, std::placeholders::_1, - std::placeholders::_2, std::placeholders::_3); - return txn_get_helper(env, fn_get, jread_options_handle, jkey, jkey_part_len); -} - -// TODO(AR) consider refactoring to share this between here and rocksjni.cc -// used by txn_multi_get_helper below -std::vector txn_column_families_helper( - JNIEnv* env, jlongArray jcolumn_family_handles, bool* has_exception) { - std::vector cf_handles; - if (jcolumn_family_handles != nullptr) { - const jsize len_cols = env->GetArrayLength(jcolumn_family_handles); - if (len_cols > 0) { - jlong* jcfh = env->GetLongArrayElements(jcolumn_family_handles, nullptr); - if (jcfh == nullptr) { - // exception thrown: OutOfMemoryError - *has_exception = JNI_TRUE; - return std::vector(); - } - for (int i = 0; i < len_cols; i++) { - auto* cf_handle = - reinterpret_cast(jcfh[i]); - cf_handles.push_back(cf_handle); - } - env->ReleaseLongArrayElements(jcolumn_family_handles, jcfh, JNI_ABORT); - } - } - return cf_handles; -} - -typedef std::function( - const ROCKSDB_NAMESPACE::ReadOptions&, - const std::vector&, std::vector*)> - FnMultiGet; - -void free_parts( - JNIEnv* env, - std::vector>& parts_to_free) { - for (auto& value : parts_to_free) { - jobject jk; - jbyteArray jk_ba; - jbyte* jk_val; - std::tie(jk_ba, jk_val, jk) = value; - env->ReleaseByteArrayElements(jk_ba, jk_val, JNI_ABORT); - env->DeleteLocalRef(jk); - } -} - -void free_key_values(std::vector& keys_to_free) { - for (auto& key : keys_to_free) { - delete[] key; - } -} - -// TODO(AR) consider refactoring to share this between here and rocksjni.cc -// cf multi get -jobjectArray txn_multi_get_helper(JNIEnv* env, const FnMultiGet& fn_multi_get, - const jlong& jread_options_handle, - const jobjectArray& jkey_parts) { - const jsize len_key_parts = env->GetArrayLength(jkey_parts); - - std::vector key_parts; - std::vector keys_to_free; - for (int i = 0; i < len_key_parts; i++) { - const jobject jk = env->GetObjectArrayElement(jkey_parts, i); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - free_key_values(keys_to_free); - return nullptr; - } - jbyteArray jk_ba = reinterpret_cast(jk); - const jsize len_key = env->GetArrayLength(jk_ba); - jbyte* jk_val = new jbyte[len_key]; - if (jk_val == nullptr) { - // exception thrown: OutOfMemoryError - env->DeleteLocalRef(jk); - free_key_values(keys_to_free); - - jclass exception_cls = (env)->FindClass("java/lang/OutOfMemoryError"); - (env)->ThrowNew(exception_cls, - "Insufficient Memory for CF handle array."); - return nullptr; - } - env->GetByteArrayRegion(jk_ba, 0, len_key, jk_val); - - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(jk_val), - len_key); - key_parts.push_back(key_slice); - keys_to_free.push_back(jk_val); - env->DeleteLocalRef(jk); - } - - auto* read_options = - reinterpret_cast(jread_options_handle); - std::vector value_parts; - std::vector s = - fn_multi_get(*read_options, key_parts, &value_parts); - - // free up allocated byte arrays - free_key_values(keys_to_free); - - // prepare the results - const jclass jcls_ba = env->FindClass("[B"); - jobjectArray jresults = - env->NewObjectArray(static_cast(s.size()), jcls_ba, nullptr); - if (jresults == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - // add to the jresults - for (std::vector::size_type i = 0; i != s.size(); - i++) { - if (s[i].ok()) { - jbyteArray jentry_value = - env->NewByteArray(static_cast(value_parts[i].size())); - if (jentry_value == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - env->SetByteArrayRegion( - jentry_value, 0, static_cast(value_parts[i].size()), - const_cast( - reinterpret_cast(value_parts[i].c_str()))); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jentry_value); - return nullptr; - } - - env->SetObjectArrayElement(jresults, static_cast(i), jentry_value); - env->DeleteLocalRef(jentry_value); - } - } - - return jresults; -} - -/* - * Class: org_rocksdb_Transaction - * Method: multiGet - * Signature: (JJ[[B[J)[[B - */ -jobjectArray Java_org_rocksdb_Transaction_multiGet__JJ_3_3B_3J( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, - jobjectArray jkey_parts, jlongArray jcolumn_family_handles) { - bool has_exception = false; - const std::vector - column_family_handles = txn_column_families_helper( - env, jcolumn_family_handles, &has_exception); - if (has_exception) { - // exception thrown: OutOfMemoryError - return nullptr; - } - auto* txn = reinterpret_cast(jhandle); - FnMultiGet fn_multi_get = std::bind ( - ROCKSDB_NAMESPACE::Transaction::*)( - const ROCKSDB_NAMESPACE::ReadOptions&, - const std::vector&, - const std::vector&, std::vector*)>( - &ROCKSDB_NAMESPACE::Transaction::MultiGet, txn, std::placeholders::_1, - column_family_handles, std::placeholders::_2, std::placeholders::_3); - return txn_multi_get_helper(env, fn_multi_get, jread_options_handle, - jkey_parts); -} - -/* - * Class: org_rocksdb_Transaction - * Method: multiGet - * Signature: (JJ[[B)[[B - */ -jobjectArray Java_org_rocksdb_Transaction_multiGet__JJ_3_3B( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, - jobjectArray jkey_parts) { - auto* txn = reinterpret_cast(jhandle); - FnMultiGet fn_multi_get = std::bind ( - ROCKSDB_NAMESPACE::Transaction::*)( - const ROCKSDB_NAMESPACE::ReadOptions&, - const std::vector&, std::vector*)>( - &ROCKSDB_NAMESPACE::Transaction::MultiGet, txn, std::placeholders::_1, - std::placeholders::_2, std::placeholders::_3); - return txn_multi_get_helper(env, fn_multi_get, jread_options_handle, - jkey_parts); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getForUpdate - * Signature: (JJ[BIJZZ)[B - */ -jbyteArray Java_org_rocksdb_Transaction_getForUpdate__JJ_3BIJZZ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, - jbyteArray jkey, jint jkey_part_len, jlong jcolumn_family_handle, - jboolean jexclusive, jboolean jdo_validate) { - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - auto* txn = reinterpret_cast(jhandle); - FnGet fn_get_for_update = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::GetForUpdate, txn, - std::placeholders::_1, column_family_handle, std::placeholders::_2, - std::placeholders::_3, jexclusive, jdo_validate); - return txn_get_helper(env, fn_get_for_update, jread_options_handle, jkey, - jkey_part_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getForUpdate - * Signature: (JJ[BIZZ)[B - */ -jbyteArray Java_org_rocksdb_Transaction_getForUpdate__JJ_3BIZZ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, - jbyteArray jkey, jint jkey_part_len, jboolean jexclusive, - jboolean jdo_validate) { - auto* txn = reinterpret_cast(jhandle); - FnGet fn_get_for_update = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::GetForUpdate, txn, - std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, - jexclusive, jdo_validate); - return txn_get_helper(env, fn_get_for_update, jread_options_handle, jkey, - jkey_part_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: multiGetForUpdate - * Signature: (JJ[[B[J)[[B - */ -jobjectArray Java_org_rocksdb_Transaction_multiGetForUpdate__JJ_3_3B_3J( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, - jobjectArray jkey_parts, jlongArray jcolumn_family_handles) { - bool has_exception = false; - const std::vector - column_family_handles = txn_column_families_helper( - env, jcolumn_family_handles, &has_exception); - if (has_exception) { - // exception thrown: OutOfMemoryError - return nullptr; - } - auto* txn = reinterpret_cast(jhandle); - FnMultiGet fn_multi_get_for_update = std::bind (ROCKSDB_NAMESPACE::Transaction::*)( - const ROCKSDB_NAMESPACE::ReadOptions&, - const std::vector&, - const std::vector&, std::vector*)>( - &ROCKSDB_NAMESPACE::Transaction::MultiGetForUpdate, txn, - std::placeholders::_1, column_family_handles, std::placeholders::_2, - std::placeholders::_3); - return txn_multi_get_helper(env, fn_multi_get_for_update, - jread_options_handle, jkey_parts); -} - -/* - * Class: org_rocksdb_Transaction - * Method: multiGetForUpdate - * Signature: (JJ[[B)[[B - */ -jobjectArray Java_org_rocksdb_Transaction_multiGetForUpdate__JJ_3_3B( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, - jobjectArray jkey_parts) { - auto* txn = reinterpret_cast(jhandle); - FnMultiGet fn_multi_get_for_update = std::bind (ROCKSDB_NAMESPACE::Transaction::*)( - const ROCKSDB_NAMESPACE::ReadOptions&, - const std::vector&, std::vector*)>( - &ROCKSDB_NAMESPACE::Transaction::MultiGetForUpdate, txn, - std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); - return txn_multi_get_helper(env, fn_multi_get_for_update, - jread_options_handle, jkey_parts); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getIterator - * Signature: (JJ)J - */ -jlong Java_org_rocksdb_Transaction_getIterator__JJ(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jlong jread_options_handle) { - auto* txn = reinterpret_cast(jhandle); - auto* read_options = - reinterpret_cast(jread_options_handle); - return GET_CPLUSPLUS_POINTER(txn->GetIterator(*read_options)); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getIterator - * Signature: (JJJ)J - */ -jlong Java_org_rocksdb_Transaction_getIterator__JJJ( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jread_options_handle, jlong jcolumn_family_handle) { - auto* txn = reinterpret_cast(jhandle); - auto* read_options = - reinterpret_cast(jread_options_handle); - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - return GET_CPLUSPLUS_POINTER( - txn->GetIterator(*read_options, column_family_handle)); -} - -typedef std::function - FnWriteKV; - -// TODO(AR) consider refactoring to share this between here and rocksjni.cc -void txn_write_kv_helper(JNIEnv* env, const FnWriteKV& fn_write_kv, - const jbyteArray& jkey, const jint& jkey_part_len, - const jbyteArray& jval, const jint& jval_len) { - jbyte* key = env->GetByteArrayElements(jkey, nullptr); - if (key == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - jbyte* value = env->GetByteArrayElements(jval, nullptr); - if (value == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); - return; - } - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), - jkey_part_len); - ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast(value), - jval_len); - - ROCKSDB_NAMESPACE::Status s = fn_write_kv(key_slice, value_slice); - - // trigger java unref on key. - // by passing JNI_ABORT, it will simply release the reference without - // copying the result back to the java byte array. - env->ReleaseByteArrayElements(jval, value, JNI_ABORT); - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); - - if (s.ok()) { - return; - } - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_Transaction - * Method: put - * Signature: (J[BI[BIJZ)V - */ -void Java_org_rocksdb_Transaction_put__J_3BI_3BIJZ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, - jint jkey_part_len, jbyteArray jval, jint jval_len, - jlong jcolumn_family_handle, jboolean jassume_tracked) { - auto* txn = reinterpret_cast(jhandle); - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - FnWriteKV fn_put = - std::bind(&ROCKSDB_NAMESPACE::Transaction::Put, txn, - column_family_handle, std::placeholders::_1, - std::placeholders::_2, jassume_tracked); - txn_write_kv_helper(env, fn_put, jkey, jkey_part_len, jval, jval_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: put - * Signature: (J[BI[BI)V - */ -void Java_org_rocksdb_Transaction_put__J_3BI_3BI(JNIEnv* env, jobject /*jobj*/, - jlong jhandle, jbyteArray jkey, - jint jkey_part_len, - jbyteArray jval, - jint jval_len) { - auto* txn = reinterpret_cast(jhandle); - FnWriteKV fn_put = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::Put, txn, std::placeholders::_1, - std::placeholders::_2); - txn_write_kv_helper(env, fn_put, jkey, jkey_part_len, jval, jval_len); -} - -typedef std::function - FnWriteKVParts; - -// TODO(AR) consider refactoring to share this between here and rocksjni.cc -void txn_write_kv_parts_helper(JNIEnv* env, - const FnWriteKVParts& fn_write_kv_parts, - const jobjectArray& jkey_parts, - const jint& jkey_parts_len, - const jobjectArray& jvalue_parts, - const jint& jvalue_parts_len) { -#ifndef DEBUG - (void)jvalue_parts_len; -#else - assert(jkey_parts_len == jvalue_parts_len); -#endif - - auto key_parts = std::vector(); - auto value_parts = std::vector(); - auto jparts_to_free = std::vector>(); - - // Since this is fundamentally a gather write at the RocksDB level, - // it seems wrong to refactor it by copying (gathering) keys and data here, - // in order to avoid the local reference limit. - // The user needs to be a aware that there is a limit to the number of parts - // which can be gathered. - if (env->EnsureLocalCapacity(jkey_parts_len + jvalue_parts_len) != 0) { - // no space for all the jobjects we store up - env->ExceptionClear(); - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( - env, "Insufficient JNI local references for " + - std::to_string(jkey_parts_len) + " key/value parts"); - return; - } - - // convert java key_parts/value_parts byte[][] to Slice(s) - for (jsize i = 0; i < jkey_parts_len; ++i) { - const jobject jobj_key_part = env->GetObjectArrayElement(jkey_parts, i); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - free_parts(env, jparts_to_free); - return; - } - const jobject jobj_value_part = env->GetObjectArrayElement(jvalue_parts, i); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jobj_key_part); - free_parts(env, jparts_to_free); - return; - } - - const jbyteArray jba_key_part = reinterpret_cast(jobj_key_part); - const jsize jkey_part_len = env->GetArrayLength(jba_key_part); - jbyte* jkey_part = env->GetByteArrayElements(jba_key_part, nullptr); - if (jkey_part == nullptr) { - // exception thrown: OutOfMemoryError - env->DeleteLocalRef(jobj_value_part); - env->DeleteLocalRef(jobj_key_part); - free_parts(env, jparts_to_free); - return; - } - - const jbyteArray jba_value_part = - reinterpret_cast(jobj_value_part); - const jsize jvalue_part_len = env->GetArrayLength(jba_value_part); - jbyte* jvalue_part = env->GetByteArrayElements(jba_value_part, nullptr); - if (jvalue_part == nullptr) { - // exception thrown: OutOfMemoryError - env->DeleteLocalRef(jobj_value_part); - env->DeleteLocalRef(jobj_key_part); - env->ReleaseByteArrayElements(jba_key_part, jkey_part, JNI_ABORT); - free_parts(env, jparts_to_free); - return; - } - - jparts_to_free.push_back( - std::make_tuple(jba_key_part, jkey_part, jobj_key_part)); - jparts_to_free.push_back( - std::make_tuple(jba_value_part, jvalue_part, jobj_value_part)); - - key_parts.push_back(ROCKSDB_NAMESPACE::Slice( - reinterpret_cast(jkey_part), jkey_part_len)); - value_parts.push_back(ROCKSDB_NAMESPACE::Slice( - reinterpret_cast(jvalue_part), jvalue_part_len)); - } - - // call the write_multi function - ROCKSDB_NAMESPACE::Status s = fn_write_kv_parts( - ROCKSDB_NAMESPACE::SliceParts(key_parts.data(), (int)key_parts.size()), - ROCKSDB_NAMESPACE::SliceParts(value_parts.data(), - (int)value_parts.size())); - - // cleanup temporary memory - free_parts(env, jparts_to_free); - - // return - if (s.ok()) { - return; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_Transaction - * Method: put - * Signature: (J[[BI[[BIJZ)V - */ -void Java_org_rocksdb_Transaction_put__J_3_3BI_3_3BIJZ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, - jint jkey_parts_len, jobjectArray jvalue_parts, jint jvalue_parts_len, - jlong jcolumn_family_handle, jboolean jassume_tracked) { - auto* txn = reinterpret_cast(jhandle); - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - FnWriteKVParts fn_put_parts = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::Put, txn, column_family_handle, - std::placeholders::_1, std::placeholders::_2, jassume_tracked); - txn_write_kv_parts_helper(env, fn_put_parts, jkey_parts, jkey_parts_len, - jvalue_parts, jvalue_parts_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: put - * Signature: (J[[BI[[BI)V - */ -void Java_org_rocksdb_Transaction_put__J_3_3BI_3_3BI( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, - jint jkey_parts_len, jobjectArray jvalue_parts, jint jvalue_parts_len) { - auto* txn = reinterpret_cast(jhandle); - FnWriteKVParts fn_put_parts = std::bind( - &ROCKSDB_NAMESPACE::Transaction::Put, txn, std::placeholders::_1, - std::placeholders::_2); - txn_write_kv_parts_helper(env, fn_put_parts, jkey_parts, jkey_parts_len, - jvalue_parts, jvalue_parts_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: merge - * Signature: (J[BI[BIJZ)V - */ -void Java_org_rocksdb_Transaction_merge__J_3BI_3BIJZ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, - jint jkey_part_len, jbyteArray jval, jint jval_len, - jlong jcolumn_family_handle, jboolean jassume_tracked) { - auto* txn = reinterpret_cast(jhandle); - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - FnWriteKV fn_merge = - std::bind(&ROCKSDB_NAMESPACE::Transaction::Merge, txn, - column_family_handle, std::placeholders::_1, - std::placeholders::_2, jassume_tracked); - txn_write_kv_helper(env, fn_merge, jkey, jkey_part_len, jval, jval_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: merge - * Signature: (J[BI[BI)V - */ -void Java_org_rocksdb_Transaction_merge__J_3BI_3BI( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, - jint jkey_part_len, jbyteArray jval, jint jval_len) { - auto* txn = reinterpret_cast(jhandle); - FnWriteKV fn_merge = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::Merge, txn, std::placeholders::_1, - std::placeholders::_2); - txn_write_kv_helper(env, fn_merge, jkey, jkey_part_len, jval, jval_len); -} - -typedef std::function - FnWriteK; - -// TODO(AR) consider refactoring to share this between here and rocksjni.cc -void txn_write_k_helper(JNIEnv* env, const FnWriteK& fn_write_k, - const jbyteArray& jkey, const jint& jkey_part_len) { - jbyte* key = env->GetByteArrayElements(jkey, nullptr); - if (key == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), - jkey_part_len); - - ROCKSDB_NAMESPACE::Status s = fn_write_k(key_slice); - - // trigger java unref on key. - // by passing JNI_ABORT, it will simply release the reference without - // copying the result back to the java byte array. - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); - - if (s.ok()) { - return; - } - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_Transaction - * Method: delete - * Signature: (J[BIJZ)V - */ -void Java_org_rocksdb_Transaction_delete__J_3BIJZ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, - jint jkey_part_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { - auto* txn = reinterpret_cast(jhandle); - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - FnWriteK fn_delete = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::Delete, txn, column_family_handle, - std::placeholders::_1, jassume_tracked); - txn_write_k_helper(env, fn_delete, jkey, jkey_part_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: delete - * Signature: (J[BI)V - */ -void Java_org_rocksdb_Transaction_delete__J_3BI(JNIEnv* env, jobject /*jobj*/, - jlong jhandle, jbyteArray jkey, - jint jkey_part_len) { - auto* txn = reinterpret_cast(jhandle); - FnWriteK fn_delete = std::bind( - &ROCKSDB_NAMESPACE::Transaction::Delete, txn, std::placeholders::_1); - txn_write_k_helper(env, fn_delete, jkey, jkey_part_len); -} - -typedef std::function - FnWriteKParts; - -// TODO(AR) consider refactoring to share this between here and rocksjni.cc -void txn_write_k_parts_helper(JNIEnv* env, - const FnWriteKParts& fn_write_k_parts, - const jobjectArray& jkey_parts, - const jint& jkey_parts_len) { - std::vector key_parts; - std::vector> jkey_parts_to_free; - - // convert java key_parts byte[][] to Slice(s) - for (jint i = 0; i < jkey_parts_len; ++i) { - const jobject jobj_key_part = env->GetObjectArrayElement(jkey_parts, i); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - free_parts(env, jkey_parts_to_free); - return; - } - - const jbyteArray jba_key_part = reinterpret_cast(jobj_key_part); - const jsize jkey_part_len = env->GetArrayLength(jba_key_part); - jbyte* jkey_part = env->GetByteArrayElements(jba_key_part, nullptr); - if (jkey_part == nullptr) { - // exception thrown: OutOfMemoryError - env->DeleteLocalRef(jobj_key_part); - free_parts(env, jkey_parts_to_free); - return; - } - - jkey_parts_to_free.push_back(std::tuple( - jba_key_part, jkey_part, jobj_key_part)); - - key_parts.push_back(ROCKSDB_NAMESPACE::Slice( - reinterpret_cast(jkey_part), jkey_part_len)); - } - - // call the write_multi function - ROCKSDB_NAMESPACE::Status s = fn_write_k_parts( - ROCKSDB_NAMESPACE::SliceParts(key_parts.data(), (int)key_parts.size())); - - // cleanup temporary memory - free_parts(env, jkey_parts_to_free); - - // return - if (s.ok()) { - return; - } - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_Transaction - * Method: delete - * Signature: (J[[BIJZ)V - */ -void Java_org_rocksdb_Transaction_delete__J_3_3BIJZ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, - jint jkey_parts_len, jlong jcolumn_family_handle, - jboolean jassume_tracked) { - auto* txn = reinterpret_cast(jhandle); - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - FnWriteKParts fn_delete_parts = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::Delete, txn, column_family_handle, - std::placeholders::_1, jassume_tracked); - txn_write_k_parts_helper(env, fn_delete_parts, jkey_parts, jkey_parts_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: delete - * Signature: (J[[BI)V - */ -void Java_org_rocksdb_Transaction_delete__J_3_3BI(JNIEnv* env, jobject /*jobj*/, - jlong jhandle, - jobjectArray jkey_parts, - jint jkey_parts_len) { - auto* txn = reinterpret_cast(jhandle); - FnWriteKParts fn_delete_parts = std::bind( - &ROCKSDB_NAMESPACE::Transaction::Delete, txn, std::placeholders::_1); - txn_write_k_parts_helper(env, fn_delete_parts, jkey_parts, jkey_parts_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: singleDelete - * Signature: (J[BIJZ)V - */ -void Java_org_rocksdb_Transaction_singleDelete__J_3BIJZ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, - jint jkey_part_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { - auto* txn = reinterpret_cast(jhandle); - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - FnWriteK fn_single_delete = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::SingleDelete, txn, - column_family_handle, std::placeholders::_1, jassume_tracked); - txn_write_k_helper(env, fn_single_delete, jkey, jkey_part_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: singleDelete - * Signature: (J[BI)V - */ -void Java_org_rocksdb_Transaction_singleDelete__J_3BI(JNIEnv* env, - jobject /*jobj*/, - jlong jhandle, - jbyteArray jkey, - jint jkey_part_len) { - auto* txn = reinterpret_cast(jhandle); - FnWriteK fn_single_delete = std::bind( - &ROCKSDB_NAMESPACE::Transaction::SingleDelete, txn, - std::placeholders::_1); - txn_write_k_helper(env, fn_single_delete, jkey, jkey_part_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: singleDelete - * Signature: (J[[BIJZ)V - */ -void Java_org_rocksdb_Transaction_singleDelete__J_3_3BIJZ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, - jint jkey_parts_len, jlong jcolumn_family_handle, - jboolean jassume_tracked) { - auto* txn = reinterpret_cast(jhandle); - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - FnWriteKParts fn_single_delete_parts = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::SingleDelete, txn, - column_family_handle, std::placeholders::_1, jassume_tracked); - txn_write_k_parts_helper(env, fn_single_delete_parts, jkey_parts, - jkey_parts_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: singleDelete - * Signature: (J[[BI)V - */ -void Java_org_rocksdb_Transaction_singleDelete__J_3_3BI(JNIEnv* env, - jobject /*jobj*/, - jlong jhandle, - jobjectArray jkey_parts, - jint jkey_parts_len) { - auto* txn = reinterpret_cast(jhandle); - FnWriteKParts fn_single_delete_parts = std::bind( - &ROCKSDB_NAMESPACE::Transaction::SingleDelete, txn, - std::placeholders::_1); - txn_write_k_parts_helper(env, fn_single_delete_parts, jkey_parts, - jkey_parts_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: putUntracked - * Signature: (J[BI[BIJ)V - */ -void Java_org_rocksdb_Transaction_putUntracked__J_3BI_3BIJ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, - jint jkey_part_len, jbyteArray jval, jint jval_len, - jlong jcolumn_family_handle) { - auto* txn = reinterpret_cast(jhandle); - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - FnWriteKV fn_put_untracked = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::PutUntracked, txn, - column_family_handle, std::placeholders::_1, std::placeholders::_2); - txn_write_kv_helper(env, fn_put_untracked, jkey, jkey_part_len, jval, - jval_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: putUntracked - * Signature: (J[BI[BI)V - */ -void Java_org_rocksdb_Transaction_putUntracked__J_3BI_3BI( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, - jint jkey_part_len, jbyteArray jval, jint jval_len) { - auto* txn = reinterpret_cast(jhandle); - FnWriteKV fn_put_untracked = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::PutUntracked, txn, - std::placeholders::_1, std::placeholders::_2); - txn_write_kv_helper(env, fn_put_untracked, jkey, jkey_part_len, jval, - jval_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: putUntracked - * Signature: (J[[BI[[BIJ)V - */ -void Java_org_rocksdb_Transaction_putUntracked__J_3_3BI_3_3BIJ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, - jint jkey_parts_len, jobjectArray jvalue_parts, jint jvalue_parts_len, - jlong jcolumn_family_handle) { - auto* txn = reinterpret_cast(jhandle); - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - FnWriteKVParts fn_put_parts_untracked = std::bind( - &ROCKSDB_NAMESPACE::Transaction::PutUntracked, txn, column_family_handle, - std::placeholders::_1, std::placeholders::_2); - txn_write_kv_parts_helper(env, fn_put_parts_untracked, jkey_parts, - jkey_parts_len, jvalue_parts, jvalue_parts_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: putUntracked - * Signature: (J[[BI[[BI)V - */ -void Java_org_rocksdb_Transaction_putUntracked__J_3_3BI_3_3BI( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, - jint jkey_parts_len, jobjectArray jvalue_parts, jint jvalue_parts_len) { - auto* txn = reinterpret_cast(jhandle); - FnWriteKVParts fn_put_parts_untracked = std::bind( - &ROCKSDB_NAMESPACE::Transaction::PutUntracked, txn, std::placeholders::_1, - std::placeholders::_2); - txn_write_kv_parts_helper(env, fn_put_parts_untracked, jkey_parts, - jkey_parts_len, jvalue_parts, jvalue_parts_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: mergeUntracked - * Signature: (J[BI[BIJ)V - */ -void Java_org_rocksdb_Transaction_mergeUntracked__J_3BI_3BIJ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, - jint jkey_part_len, jbyteArray jval, jint jval_len, - jlong jcolumn_family_handle) { - auto* txn = reinterpret_cast(jhandle); - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - FnWriteKV fn_merge_untracked = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::MergeUntracked, txn, - column_family_handle, std::placeholders::_1, std::placeholders::_2); - txn_write_kv_helper(env, fn_merge_untracked, jkey, jkey_part_len, jval, - jval_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: mergeUntracked - * Signature: (J[BI[BI)V - */ -void Java_org_rocksdb_Transaction_mergeUntracked__J_3BI_3BI( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, - jint jkey_part_len, jbyteArray jval, jint jval_len) { - auto* txn = reinterpret_cast(jhandle); - FnWriteKV fn_merge_untracked = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::MergeUntracked, txn, - std::placeholders::_1, std::placeholders::_2); - txn_write_kv_helper(env, fn_merge_untracked, jkey, jkey_part_len, jval, - jval_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: deleteUntracked - * Signature: (J[BIJ)V - */ -void Java_org_rocksdb_Transaction_deleteUntracked__J_3BIJ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, - jint jkey_part_len, jlong jcolumn_family_handle) { - auto* txn = reinterpret_cast(jhandle); - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - FnWriteK fn_delete_untracked = std::bind( - &ROCKSDB_NAMESPACE::Transaction::DeleteUntracked, txn, - column_family_handle, std::placeholders::_1); - txn_write_k_helper(env, fn_delete_untracked, jkey, jkey_part_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: deleteUntracked - * Signature: (J[BI)V - */ -void Java_org_rocksdb_Transaction_deleteUntracked__J_3BI(JNIEnv* env, - jobject /*jobj*/, - jlong jhandle, - jbyteArray jkey, - jint jkey_part_len) { - auto* txn = reinterpret_cast(jhandle); - FnWriteK fn_delete_untracked = std::bind( - &ROCKSDB_NAMESPACE::Transaction::DeleteUntracked, txn, - std::placeholders::_1); - txn_write_k_helper(env, fn_delete_untracked, jkey, jkey_part_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: deleteUntracked - * Signature: (J[[BIJ)V - */ -void Java_org_rocksdb_Transaction_deleteUntracked__J_3_3BIJ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, - jint jkey_parts_len, jlong jcolumn_family_handle) { - auto* txn = reinterpret_cast(jhandle); - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - FnWriteKParts fn_delete_untracked_parts = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::DeleteUntracked, txn, - column_family_handle, std::placeholders::_1); - txn_write_k_parts_helper(env, fn_delete_untracked_parts, jkey_parts, - jkey_parts_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: deleteUntracked - * Signature: (J[[BI)V - */ -void Java_org_rocksdb_Transaction_deleteUntracked__J_3_3BI( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, - jint jkey_parts_len) { - auto* txn = reinterpret_cast(jhandle); - FnWriteKParts fn_delete_untracked_parts = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::DeleteUntracked, txn, - std::placeholders::_1); - txn_write_k_parts_helper(env, fn_delete_untracked_parts, jkey_parts, - jkey_parts_len); -} - -/* - * Class: org_rocksdb_Transaction - * Method: putLogData - * Signature: (J[BI)V - */ -void Java_org_rocksdb_Transaction_putLogData(JNIEnv* env, jobject /*jobj*/, - jlong jhandle, jbyteArray jkey, - jint jkey_part_len) { - auto* txn = reinterpret_cast(jhandle); - - jbyte* key = env->GetByteArrayElements(jkey, nullptr); - if (key == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), - jkey_part_len); - txn->PutLogData(key_slice); - - // trigger java unref on key. - // by passing JNI_ABORT, it will simply release the reference without - // copying the result back to the java byte array. - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); -} - -/* - * Class: org_rocksdb_Transaction - * Method: disableIndexing - * Signature: (J)V - */ -void Java_org_rocksdb_Transaction_disableIndexing(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - txn->DisableIndexing(); -} - -/* - * Class: org_rocksdb_Transaction - * Method: enableIndexing - * Signature: (J)V - */ -void Java_org_rocksdb_Transaction_enableIndexing(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - txn->EnableIndexing(); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getNumKeys - * Signature: (J)J - */ -jlong Java_org_rocksdb_Transaction_getNumKeys(JNIEnv* /*env*/, jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - return txn->GetNumKeys(); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getNumPuts - * Signature: (J)J - */ -jlong Java_org_rocksdb_Transaction_getNumPuts(JNIEnv* /*env*/, jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - return txn->GetNumPuts(); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getNumDeletes - * Signature: (J)J - */ -jlong Java_org_rocksdb_Transaction_getNumDeletes(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - return txn->GetNumDeletes(); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getNumMerges - * Signature: (J)J - */ -jlong Java_org_rocksdb_Transaction_getNumMerges(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - return txn->GetNumMerges(); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getElapsedTime - * Signature: (J)J - */ -jlong Java_org_rocksdb_Transaction_getElapsedTime(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - return txn->GetElapsedTime(); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getWriteBatch - * Signature: (J)J - */ -jlong Java_org_rocksdb_Transaction_getWriteBatch(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - return GET_CPLUSPLUS_POINTER(txn->GetWriteBatch()); -} - -/* - * Class: org_rocksdb_Transaction - * Method: setLockTimeout - * Signature: (JJ)V - */ -void Java_org_rocksdb_Transaction_setLockTimeout(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jlong jlock_timeout) { - auto* txn = reinterpret_cast(jhandle); - txn->SetLockTimeout(jlock_timeout); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getWriteOptions - * Signature: (J)J - */ -jlong Java_org_rocksdb_Transaction_getWriteOptions(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - return GET_CPLUSPLUS_POINTER(txn->GetWriteOptions()); -} - -/* - * Class: org_rocksdb_Transaction - * Method: setWriteOptions - * Signature: (JJ)V - */ -void Java_org_rocksdb_Transaction_setWriteOptions(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jlong jwrite_options_handle) { - auto* txn = reinterpret_cast(jhandle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - txn->SetWriteOptions(*write_options); -} - -/* - * Class: org_rocksdb_Transaction - * Method: undo - * Signature: (J[BIJ)V - */ -void Java_org_rocksdb_Transaction_undoGetForUpdate__J_3BIJ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, - jint jkey_part_len, jlong jcolumn_family_handle) { - auto* txn = reinterpret_cast(jhandle); - auto* column_family_handle = - reinterpret_cast( - jcolumn_family_handle); - jbyte* key = env->GetByteArrayElements(jkey, nullptr); - if (key == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), - jkey_part_len); - txn->UndoGetForUpdate(column_family_handle, key_slice); - - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); -} - -/* - * Class: org_rocksdb_Transaction - * Method: undoGetForUpdate - * Signature: (J[BI)V - */ -void Java_org_rocksdb_Transaction_undoGetForUpdate__J_3BI(JNIEnv* env, - jobject /*jobj*/, - jlong jhandle, - jbyteArray jkey, - jint jkey_part_len) { - auto* txn = reinterpret_cast(jhandle); - jbyte* key = env->GetByteArrayElements(jkey, nullptr); - if (key == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), - jkey_part_len); - txn->UndoGetForUpdate(key_slice); - - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); -} - -/* - * Class: org_rocksdb_Transaction - * Method: rebuildFromWriteBatch - * Signature: (JJ)V - */ -void Java_org_rocksdb_Transaction_rebuildFromWriteBatch( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jwrite_batch_handle) { - auto* txn = reinterpret_cast(jhandle); - auto* write_batch = - reinterpret_cast(jwrite_batch_handle); - ROCKSDB_NAMESPACE::Status s = txn->RebuildFromWriteBatch(write_batch); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_Transaction - * Method: getCommitTimeWriteBatch - * Signature: (J)J - */ -jlong Java_org_rocksdb_Transaction_getCommitTimeWriteBatch(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - return GET_CPLUSPLUS_POINTER(txn->GetCommitTimeWriteBatch()); -} - -/* - * Class: org_rocksdb_Transaction - * Method: setLogNumber - * Signature: (JJ)V - */ -void Java_org_rocksdb_Transaction_setLogNumber(JNIEnv* /*env*/, - jobject /*jobj*/, jlong jhandle, - jlong jlog_number) { - auto* txn = reinterpret_cast(jhandle); - txn->SetLogNumber(jlog_number); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getLogNumber - * Signature: (J)J - */ -jlong Java_org_rocksdb_Transaction_getLogNumber(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - return txn->GetLogNumber(); -} - -/* - * Class: org_rocksdb_Transaction - * Method: setName - * Signature: (JLjava/lang/String;)V - */ -void Java_org_rocksdb_Transaction_setName(JNIEnv* env, jobject /*jobj*/, - jlong jhandle, jstring jname) { - auto* txn = reinterpret_cast(jhandle); - const char* name = env->GetStringUTFChars(jname, nullptr); - if (name == nullptr) { - // exception thrown: OutOfMemoryError - return; - } - - ROCKSDB_NAMESPACE::Status s = txn->SetName(name); - - env->ReleaseStringUTFChars(jname, name); - - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_Transaction - * Method: getName - * Signature: (J)Ljava/lang/String; - */ -jstring Java_org_rocksdb_Transaction_getName(JNIEnv* env, jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - ROCKSDB_NAMESPACE::TransactionName name = txn->GetName(); - return env->NewStringUTF(name.data()); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getID - * Signature: (J)J - */ -jlong Java_org_rocksdb_Transaction_getID(JNIEnv* /*env*/, jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - ROCKSDB_NAMESPACE::TransactionID id = txn->GetID(); - return static_cast(id); -} - -/* - * Class: org_rocksdb_Transaction - * Method: isDeadlockDetect - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_Transaction_isDeadlockDetect(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - return static_cast(txn->IsDeadlockDetect()); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getWaitingTxns - * Signature: (J)Lorg/rocksdb/Transaction/WaitingTransactions; - */ -jobject Java_org_rocksdb_Transaction_getWaitingTxns(JNIEnv* env, - jobject jtransaction_obj, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - uint32_t column_family_id; - std::string key; - std::vector waiting_txns = - txn->GetWaitingTxns(&column_family_id, &key); - jobject jwaiting_txns = - ROCKSDB_NAMESPACE::TransactionJni::newWaitingTransactions( - env, jtransaction_obj, column_family_id, key, waiting_txns); - return jwaiting_txns; -} - -/* - * Class: org_rocksdb_Transaction - * Method: getState - * Signature: (J)B - */ -jbyte Java_org_rocksdb_Transaction_getState(JNIEnv* /*env*/, jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - ROCKSDB_NAMESPACE::Transaction::TransactionState txn_status = txn->GetState(); - switch (txn_status) { - case ROCKSDB_NAMESPACE::Transaction::TransactionState::STARTED: - return 0x0; - - case ROCKSDB_NAMESPACE::Transaction::TransactionState::AWAITING_PREPARE: - return 0x1; - - case ROCKSDB_NAMESPACE::Transaction::TransactionState::PREPARED: - return 0x2; - - case ROCKSDB_NAMESPACE::Transaction::TransactionState::AWAITING_COMMIT: - return 0x3; - - case ROCKSDB_NAMESPACE::Transaction::TransactionState::COMMITTED: - return 0x4; - - case ROCKSDB_NAMESPACE::Transaction::TransactionState::AWAITING_ROLLBACK: - return 0x5; - - case ROCKSDB_NAMESPACE::Transaction::TransactionState::ROLLEDBACK: - return 0x6; - - case ROCKSDB_NAMESPACE::Transaction::TransactionState::LOCKS_STOLEN: - return 0x7; - } - - assert(false); - return static_cast(-1); -} - -/* - * Class: org_rocksdb_Transaction - * Method: getId - * Signature: (J)J - */ -jlong Java_org_rocksdb_Transaction_getId(JNIEnv* /*env*/, jobject /*jobj*/, - jlong jhandle) { - auto* txn = reinterpret_cast(jhandle); - uint64_t id = txn->GetId(); - return static_cast(id); -} - -/* - * Class: org_rocksdb_Transaction - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_Transaction_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - delete reinterpret_cast(jhandle); -} diff --git a/java/rocksjni/transaction_db.cc b/java/rocksjni/transaction_db.cc deleted file mode 100644 index 0adf85606..000000000 --- a/java/rocksjni/transaction_db.cc +++ /dev/null @@ -1,451 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ -// for ROCKSDB_NAMESPACE::TransactionDB. - -#include "rocksdb/utilities/transaction_db.h" - -#include - -#include -#include -#include - -#include "include/org_rocksdb_TransactionDB.h" -#include "rocksdb/options.h" -#include "rocksdb/utilities/transaction.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_TransactionDB - * Method: open - * Signature: (JJLjava/lang/String;)J - */ -jlong Java_org_rocksdb_TransactionDB_open__JJLjava_lang_String_2( - JNIEnv* env, jclass, jlong joptions_handle, jlong jtxn_db_options_handle, - jstring jdb_path) { - auto* options = - reinterpret_cast(joptions_handle); - auto* txn_db_options = - reinterpret_cast( - jtxn_db_options_handle); - ROCKSDB_NAMESPACE::TransactionDB* tdb = nullptr; - const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); - if (db_path == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::TransactionDB::Open( - *options, *txn_db_options, db_path, &tdb); - env->ReleaseStringUTFChars(jdb_path, db_path); - - if (s.ok()) { - return GET_CPLUSPLUS_POINTER(tdb); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return 0; - } -} - -/* - * Class: org_rocksdb_TransactionDB - * Method: open - * Signature: (JJLjava/lang/String;[[B[J)[J - */ -jlongArray Java_org_rocksdb_TransactionDB_open__JJLjava_lang_String_2_3_3B_3J( - JNIEnv* env, jclass, jlong jdb_options_handle, jlong jtxn_db_options_handle, - jstring jdb_path, jobjectArray jcolumn_names, - jlongArray jcolumn_options_handles) { - const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); - if (db_path == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - const jsize len_cols = env->GetArrayLength(jcolumn_names); - jlong* jco = env->GetLongArrayElements(jcolumn_options_handles, nullptr); - if (jco == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseStringUTFChars(jdb_path, db_path); - return nullptr; - } - std::vector column_families; - for (int i = 0; i < len_cols; i++) { - const jobject jcn = env->GetObjectArrayElement(jcolumn_names, i); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->ReleaseLongArrayElements(jcolumn_options_handles, jco, JNI_ABORT); - env->ReleaseStringUTFChars(jdb_path, db_path); - return nullptr; - } - const jbyteArray jcn_ba = reinterpret_cast(jcn); - jbyte* jcf_name = env->GetByteArrayElements(jcn_ba, nullptr); - if (jcf_name == nullptr) { - // exception thrown: OutOfMemoryError - env->DeleteLocalRef(jcn); - env->ReleaseLongArrayElements(jcolumn_options_handles, jco, JNI_ABORT); - env->ReleaseStringUTFChars(jdb_path, db_path); - return nullptr; - } - - const int jcf_name_len = env->GetArrayLength(jcn_ba); - const std::string cf_name(reinterpret_cast(jcf_name), jcf_name_len); - const ROCKSDB_NAMESPACE::ColumnFamilyOptions* cf_options = - reinterpret_cast(jco[i]); - column_families.push_back( - ROCKSDB_NAMESPACE::ColumnFamilyDescriptor(cf_name, *cf_options)); - - env->ReleaseByteArrayElements(jcn_ba, jcf_name, JNI_ABORT); - env->DeleteLocalRef(jcn); - } - env->ReleaseLongArrayElements(jcolumn_options_handles, jco, JNI_ABORT); - - auto* db_options = - reinterpret_cast(jdb_options_handle); - auto* txn_db_options = - reinterpret_cast( - jtxn_db_options_handle); - std::vector handles; - ROCKSDB_NAMESPACE::TransactionDB* tdb = nullptr; - const ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::TransactionDB::Open( - *db_options, *txn_db_options, db_path, column_families, &handles, &tdb); - - // check if open operation was successful - if (s.ok()) { - const jsize resultsLen = 1 + len_cols; // db handle + column family handles - std::unique_ptr results = - std::unique_ptr(new jlong[resultsLen]); - results[0] = GET_CPLUSPLUS_POINTER(tdb); - for (int i = 1; i <= len_cols; i++) { - results[i] = GET_CPLUSPLUS_POINTER(handles[i - 1]); - } - - jlongArray jresults = env->NewLongArray(resultsLen); - if (jresults == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - env->SetLongArrayRegion(jresults, 0, resultsLen, results.get()); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jresults); - return nullptr; - } - return jresults; - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; - } -} - -/* - * Class: org_rocksdb_TransactionDB - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_TransactionDB_disposeInternal(JNIEnv*, jobject, - jlong jhandle) { - auto* txn_db = reinterpret_cast(jhandle); - assert(txn_db != nullptr); - delete txn_db; -} - -/* - * Class: org_rocksdb_TransactionDB - * Method: closeDatabase - * Signature: (J)V - */ -void Java_org_rocksdb_TransactionDB_closeDatabase(JNIEnv* env, jclass, - jlong jhandle) { - auto* txn_db = reinterpret_cast(jhandle); - assert(txn_db != nullptr); - ROCKSDB_NAMESPACE::Status s = txn_db->Close(); - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_TransactionDB - * Method: beginTransaction - * Signature: (JJ)J - */ -jlong Java_org_rocksdb_TransactionDB_beginTransaction__JJ( - JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle) { - auto* txn_db = reinterpret_cast(jhandle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - ROCKSDB_NAMESPACE::Transaction* txn = - txn_db->BeginTransaction(*write_options); - return GET_CPLUSPLUS_POINTER(txn); -} - -/* - * Class: org_rocksdb_TransactionDB - * Method: beginTransaction - * Signature: (JJJ)J - */ -jlong Java_org_rocksdb_TransactionDB_beginTransaction__JJJ( - JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle, - jlong jtxn_options_handle) { - auto* txn_db = reinterpret_cast(jhandle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - auto* txn_options = reinterpret_cast( - jtxn_options_handle); - ROCKSDB_NAMESPACE::Transaction* txn = - txn_db->BeginTransaction(*write_options, *txn_options); - return GET_CPLUSPLUS_POINTER(txn); -} - -/* - * Class: org_rocksdb_TransactionDB - * Method: beginTransaction_withOld - * Signature: (JJJ)J - */ -jlong Java_org_rocksdb_TransactionDB_beginTransaction_1withOld__JJJ( - JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle, - jlong jold_txn_handle) { - auto* txn_db = reinterpret_cast(jhandle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - auto* old_txn = - reinterpret_cast(jold_txn_handle); - ROCKSDB_NAMESPACE::TransactionOptions txn_options; - ROCKSDB_NAMESPACE::Transaction* txn = - txn_db->BeginTransaction(*write_options, txn_options, old_txn); - - // RocksJava relies on the assumption that - // we do not allocate a new Transaction object - // when providing an old_txn - assert(txn == old_txn); - - return GET_CPLUSPLUS_POINTER(txn); -} - -/* - * Class: org_rocksdb_TransactionDB - * Method: beginTransaction_withOld - * Signature: (JJJJ)J - */ -jlong Java_org_rocksdb_TransactionDB_beginTransaction_1withOld__JJJJ( - JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle, - jlong jtxn_options_handle, jlong jold_txn_handle) { - auto* txn_db = reinterpret_cast(jhandle); - auto* write_options = - reinterpret_cast(jwrite_options_handle); - auto* txn_options = reinterpret_cast( - jtxn_options_handle); - auto* old_txn = - reinterpret_cast(jold_txn_handle); - ROCKSDB_NAMESPACE::Transaction* txn = - txn_db->BeginTransaction(*write_options, *txn_options, old_txn); - - // RocksJava relies on the assumption that - // we do not allocate a new Transaction object - // when providing an old_txn - assert(txn == old_txn); - - return GET_CPLUSPLUS_POINTER(txn); -} - -/* - * Class: org_rocksdb_TransactionDB - * Method: getTransactionByName - * Signature: (JLjava/lang/String;)J - */ -jlong Java_org_rocksdb_TransactionDB_getTransactionByName(JNIEnv* env, jobject, - jlong jhandle, - jstring jname) { - auto* txn_db = reinterpret_cast(jhandle); - const char* name = env->GetStringUTFChars(jname, nullptr); - if (name == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - ROCKSDB_NAMESPACE::Transaction* txn = txn_db->GetTransactionByName(name); - env->ReleaseStringUTFChars(jname, name); - return GET_CPLUSPLUS_POINTER(txn); -} - -/* - * Class: org_rocksdb_TransactionDB - * Method: getAllPreparedTransactions - * Signature: (J)[J - */ -jlongArray Java_org_rocksdb_TransactionDB_getAllPreparedTransactions( - JNIEnv* env, jobject, jlong jhandle) { - auto* txn_db = reinterpret_cast(jhandle); - std::vector txns; - txn_db->GetAllPreparedTransactions(&txns); - - const size_t size = txns.size(); - assert(size < UINT32_MAX); // does it fit in a jint? - - const jsize len = static_cast(size); - std::vector tmp(len); - for (jsize i = 0; i < len; ++i) { - tmp[i] = GET_CPLUSPLUS_POINTER(txns[i]); - } - - jlongArray jtxns = env->NewLongArray(len); - if (jtxns == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - env->SetLongArrayRegion(jtxns, 0, len, tmp.data()); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jtxns); - return nullptr; - } - - return jtxns; -} - -/* - * Class: org_rocksdb_TransactionDB - * Method: getLockStatusData - * Signature: (J)Ljava/util/Map; - */ -jobject Java_org_rocksdb_TransactionDB_getLockStatusData(JNIEnv* env, jobject, - jlong jhandle) { - auto* txn_db = reinterpret_cast(jhandle); - const std::unordered_multimap - lock_status_data = txn_db->GetLockStatusData(); - const jobject jlock_status_data = ROCKSDB_NAMESPACE::HashMapJni::construct( - env, static_cast(lock_status_data.size())); - if (jlock_status_data == nullptr) { - // exception occurred - return nullptr; - } - - const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV< - const int32_t, const ROCKSDB_NAMESPACE::KeyLockInfo, jobject, jobject> - fn_map_kv = - [env](const std::pair& pair) { - const jobject jlong_column_family_id = - ROCKSDB_NAMESPACE::LongJni::valueOf(env, pair.first); - if (jlong_column_family_id == nullptr) { - // an error occurred - return std::unique_ptr>(nullptr); - } - const jobject jkey_lock_info = - ROCKSDB_NAMESPACE::KeyLockInfoJni::construct(env, pair.second); - if (jkey_lock_info == nullptr) { - // an error occurred - return std::unique_ptr>(nullptr); - } - return std::unique_ptr>( - new std::pair(jlong_column_family_id, - jkey_lock_info)); - }; - - if (!ROCKSDB_NAMESPACE::HashMapJni::putAll( - env, jlock_status_data, lock_status_data.begin(), - lock_status_data.end(), fn_map_kv)) { - // exception occcurred - return nullptr; - } - - return jlock_status_data; -} - -/* - * Class: org_rocksdb_TransactionDB - * Method: getDeadlockInfoBuffer - * Signature: (J)[Lorg/rocksdb/TransactionDB/DeadlockPath; - */ -jobjectArray Java_org_rocksdb_TransactionDB_getDeadlockInfoBuffer( - JNIEnv* env, jobject jobj, jlong jhandle) { - auto* txn_db = reinterpret_cast(jhandle); - const std::vector deadlock_info_buffer = - txn_db->GetDeadlockInfoBuffer(); - - const jsize deadlock_info_buffer_len = - static_cast(deadlock_info_buffer.size()); - jobjectArray jdeadlock_info_buffer = env->NewObjectArray( - deadlock_info_buffer_len, - ROCKSDB_NAMESPACE::DeadlockPathJni::getJClass(env), nullptr); - if (jdeadlock_info_buffer == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - jsize jdeadlock_info_buffer_offset = 0; - - auto buf_end = deadlock_info_buffer.end(); - for (auto buf_it = deadlock_info_buffer.begin(); buf_it != buf_end; - ++buf_it) { - const ROCKSDB_NAMESPACE::DeadlockPath deadlock_path = *buf_it; - const std::vector deadlock_infos = - deadlock_path.path; - const jsize deadlock_infos_len = - static_cast(deadlock_info_buffer.size()); - jobjectArray jdeadlock_infos = env->NewObjectArray( - deadlock_infos_len, ROCKSDB_NAMESPACE::DeadlockInfoJni::getJClass(env), - nullptr); - if (jdeadlock_infos == nullptr) { - // exception thrown: OutOfMemoryError - env->DeleteLocalRef(jdeadlock_info_buffer); - return nullptr; - } - jsize jdeadlock_infos_offset = 0; - - auto infos_end = deadlock_infos.end(); - for (auto infos_it = deadlock_infos.begin(); infos_it != infos_end; - ++infos_it) { - const ROCKSDB_NAMESPACE::DeadlockInfo deadlock_info = *infos_it; - const jobject jdeadlock_info = - ROCKSDB_NAMESPACE::TransactionDBJni::newDeadlockInfo( - env, jobj, deadlock_info.m_txn_id, deadlock_info.m_cf_id, - deadlock_info.m_waiting_key, deadlock_info.m_exclusive); - if (jdeadlock_info == nullptr) { - // exception occcurred - env->DeleteLocalRef(jdeadlock_info_buffer); - return nullptr; - } - env->SetObjectArrayElement(jdeadlock_infos, jdeadlock_infos_offset++, - jdeadlock_info); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException or - // ArrayStoreException - env->DeleteLocalRef(jdeadlock_info); - env->DeleteLocalRef(jdeadlock_info_buffer); - return nullptr; - } - } - - const jobject jdeadlock_path = - ROCKSDB_NAMESPACE::DeadlockPathJni::construct( - env, jdeadlock_infos, deadlock_path.limit_exceeded); - if (jdeadlock_path == nullptr) { - // exception occcurred - env->DeleteLocalRef(jdeadlock_info_buffer); - return nullptr; - } - env->SetObjectArrayElement(jdeadlock_info_buffer, - jdeadlock_info_buffer_offset++, jdeadlock_path); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException or ArrayStoreException - env->DeleteLocalRef(jdeadlock_path); - env->DeleteLocalRef(jdeadlock_info_buffer); - return nullptr; - } - } - - return jdeadlock_info_buffer; -} - -/* - * Class: org_rocksdb_TransactionDB - * Method: setDeadlockInfoBufferSize - * Signature: (JI)V - */ -void Java_org_rocksdb_TransactionDB_setDeadlockInfoBufferSize( - JNIEnv*, jobject, jlong jhandle, jint jdeadlock_info_buffer_size) { - auto* txn_db = reinterpret_cast(jhandle); - txn_db->SetDeadlockInfoBufferSize(jdeadlock_info_buffer_size); -} diff --git a/java/rocksjni/transaction_db_options.cc b/java/rocksjni/transaction_db_options.cc deleted file mode 100644 index 4cf27121e..000000000 --- a/java/rocksjni/transaction_db_options.cc +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ -// for ROCKSDB_NAMESPACE::TransactionDBOptions. - -#include - -#include "include/org_rocksdb_TransactionDBOptions.h" -#include "rocksdb/utilities/transaction_db.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_TransactionDBOptions - * Method: newTransactionDBOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_TransactionDBOptions_newTransactionDBOptions( - JNIEnv* /*env*/, jclass /*jcls*/) { - ROCKSDB_NAMESPACE::TransactionDBOptions* opts = - new ROCKSDB_NAMESPACE::TransactionDBOptions(); - return GET_CPLUSPLUS_POINTER(opts); -} - -/* - * Class: org_rocksdb_TransactionDBOptions - * Method: getMaxNumLocks - * Signature: (J)J - */ -jlong Java_org_rocksdb_TransactionDBOptions_getMaxNumLocks(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return opts->max_num_locks; -} - -/* - * Class: org_rocksdb_TransactionDBOptions - * Method: setMaxNumLocks - * Signature: (JJ)V - */ -void Java_org_rocksdb_TransactionDBOptions_setMaxNumLocks( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jmax_num_locks) { - auto* opts = - reinterpret_cast(jhandle); - opts->max_num_locks = jmax_num_locks; -} - -/* - * Class: org_rocksdb_TransactionDBOptions - * Method: getNumStripes - * Signature: (J)J - */ -jlong Java_org_rocksdb_TransactionDBOptions_getNumStripes(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return opts->num_stripes; -} - -/* - * Class: org_rocksdb_TransactionDBOptions - * Method: setNumStripes - * Signature: (JJ)V - */ -void Java_org_rocksdb_TransactionDBOptions_setNumStripes(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jlong jnum_stripes) { - auto* opts = - reinterpret_cast(jhandle); - opts->num_stripes = jnum_stripes; -} - -/* - * Class: org_rocksdb_TransactionDBOptions - * Method: getTransactionLockTimeout - * Signature: (J)J - */ -jlong Java_org_rocksdb_TransactionDBOptions_getTransactionLockTimeout( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return opts->transaction_lock_timeout; -} - -/* - * Class: org_rocksdb_TransactionDBOptions - * Method: setTransactionLockTimeout - * Signature: (JJ)V - */ -void Java_org_rocksdb_TransactionDBOptions_setTransactionLockTimeout( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jtransaction_lock_timeout) { - auto* opts = - reinterpret_cast(jhandle); - opts->transaction_lock_timeout = jtransaction_lock_timeout; -} - -/* - * Class: org_rocksdb_TransactionDBOptions - * Method: getDefaultLockTimeout - * Signature: (J)J - */ -jlong Java_org_rocksdb_TransactionDBOptions_getDefaultLockTimeout( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return opts->default_lock_timeout; -} - -/* - * Class: org_rocksdb_TransactionDBOptions - * Method: setDefaultLockTimeout - * Signature: (JJ)V - */ -void Java_org_rocksdb_TransactionDBOptions_setDefaultLockTimeout( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jdefault_lock_timeout) { - auto* opts = - reinterpret_cast(jhandle); - opts->default_lock_timeout = jdefault_lock_timeout; -} - -/* - * Class: org_rocksdb_TransactionDBOptions - * Method: getWritePolicy - * Signature: (J)B - */ -jbyte Java_org_rocksdb_TransactionDBOptions_getWritePolicy(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return ROCKSDB_NAMESPACE::TxnDBWritePolicyJni::toJavaTxnDBWritePolicy( - opts->write_policy); -} - -/* - * Class: org_rocksdb_TransactionDBOptions - * Method: setWritePolicy - * Signature: (JB)V - */ -void Java_org_rocksdb_TransactionDBOptions_setWritePolicy(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jbyte jwrite_policy) { - auto* opts = - reinterpret_cast(jhandle); - opts->write_policy = - ROCKSDB_NAMESPACE::TxnDBWritePolicyJni::toCppTxnDBWritePolicy( - jwrite_policy); -} - -/* - * Class: org_rocksdb_TransactionDBOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_TransactionDBOptions_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - delete reinterpret_cast(jhandle); -} diff --git a/java/rocksjni/transaction_log.cc b/java/rocksjni/transaction_log.cc deleted file mode 100644 index 97c3bb301..000000000 --- a/java/rocksjni/transaction_log.cc +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling c++ ROCKSDB_NAMESPACE::Iterator methods from Java side. - -#include "rocksdb/transaction_log.h" - -#include -#include -#include - -#include "include/org_rocksdb_TransactionLogIterator.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_TransactionLogIterator - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_TransactionLogIterator_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - delete reinterpret_cast(handle); -} - -/* - * Class: org_rocksdb_TransactionLogIterator - * Method: isValid - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_TransactionLogIterator_isValid(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - return reinterpret_cast(handle) - ->Valid(); -} - -/* - * Class: org_rocksdb_TransactionLogIterator - * Method: next - * Signature: (J)V - */ -void Java_org_rocksdb_TransactionLogIterator_next(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - reinterpret_cast(handle)->Next(); -} - -/* - * Class: org_rocksdb_TransactionLogIterator - * Method: status - * Signature: (J)V - */ -void Java_org_rocksdb_TransactionLogIterator_status(JNIEnv* env, - jobject /*jobj*/, - jlong handle) { - ROCKSDB_NAMESPACE::Status s = - reinterpret_cast(handle) - ->status(); - if (!s.ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - } -} - -/* - * Class: org_rocksdb_TransactionLogIterator - * Method: getBatch - * Signature: (J)Lorg/rocksdb/TransactionLogIterator$BatchResult - */ -jobject Java_org_rocksdb_TransactionLogIterator_getBatch(JNIEnv* env, - jobject /*jobj*/, - jlong handle) { - ROCKSDB_NAMESPACE::BatchResult batch_result = - reinterpret_cast(handle) - ->GetBatch(); - return ROCKSDB_NAMESPACE::BatchResultJni::construct(env, batch_result); -} diff --git a/java/rocksjni/transaction_notifier.cc b/java/rocksjni/transaction_notifier.cc deleted file mode 100644 index cefeb648a..000000000 --- a/java/rocksjni/transaction_notifier.cc +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ -// for ROCKSDB_NAMESPACE::TransactionNotifier. - -#include - -#include "include/org_rocksdb_AbstractTransactionNotifier.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/transaction_notifier_jnicallback.h" - -/* - * Class: org_rocksdb_AbstractTransactionNotifier - * Method: createNewTransactionNotifier - * Signature: ()J - */ -jlong Java_org_rocksdb_AbstractTransactionNotifier_createNewTransactionNotifier( - JNIEnv* env, jobject jobj) { - auto* transaction_notifier = - new ROCKSDB_NAMESPACE::TransactionNotifierJniCallback(env, jobj); - auto* sptr_transaction_notifier = - new std::shared_ptr( - transaction_notifier); - return GET_CPLUSPLUS_POINTER(sptr_transaction_notifier); -} - -/* - * Class: org_rocksdb_AbstractTransactionNotifier - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_AbstractTransactionNotifier_disposeInternal( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - // TODO(AR) refactor to use JniCallback::JniCallback - // when https://github.com/facebook/rocksdb/pull/1241/ is merged - std::shared_ptr* handle = - reinterpret_cast< - std::shared_ptr*>( - jhandle); - delete handle; -} diff --git a/java/rocksjni/transaction_notifier_jnicallback.cc b/java/rocksjni/transaction_notifier_jnicallback.cc deleted file mode 100644 index 26761cabd..000000000 --- a/java/rocksjni/transaction_notifier_jnicallback.cc +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::TransactionNotifier. - -#include "rocksjni/transaction_notifier_jnicallback.h" - -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -namespace ROCKSDB_NAMESPACE { - -TransactionNotifierJniCallback::TransactionNotifierJniCallback( - JNIEnv* env, jobject jtransaction_notifier) - : JniCallback(env, jtransaction_notifier) { - // we cache the method id for the JNI callback - m_jsnapshot_created_methodID = - AbstractTransactionNotifierJni::getSnapshotCreatedMethodId(env); -} - -void TransactionNotifierJniCallback::SnapshotCreated( - const Snapshot* newSnapshot) { - jboolean attached_thread = JNI_FALSE; - JNIEnv* env = getJniEnv(&attached_thread); - assert(env != nullptr); - - env->CallVoidMethod(m_jcallback_obj, m_jsnapshot_created_methodID, - GET_CPLUSPLUS_POINTER(newSnapshot)); - - if (env->ExceptionCheck()) { - // exception thrown from CallVoidMethod - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return; - } - - releaseJniEnv(attached_thread); -} -} // namespace ROCKSDB_NAMESPACE diff --git a/java/rocksjni/transaction_notifier_jnicallback.h b/java/rocksjni/transaction_notifier_jnicallback.h deleted file mode 100644 index 089a5ee4a..000000000 --- a/java/rocksjni/transaction_notifier_jnicallback.h +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::TransactionNotifier. - -#ifndef JAVA_ROCKSJNI_TRANSACTION_NOTIFIER_JNICALLBACK_H_ -#define JAVA_ROCKSJNI_TRANSACTION_NOTIFIER_JNICALLBACK_H_ - -#include - -#include "rocksdb/utilities/transaction.h" -#include "rocksjni/jnicallback.h" - -namespace ROCKSDB_NAMESPACE { - -/** - * This class acts as a bridge between C++ - * and Java. The methods in this class will be - * called back from the RocksDB TransactionDB or OptimisticTransactionDB (C++), - * we then callback to the appropriate Java method - * this enables TransactionNotifier to be implemented in Java. - * - * Unlike RocksJava's Comparator JNI Callback, we do not attempt - * to reduce Java object allocations by caching the Snapshot object - * presented to the callback. This could be revisited in future - * if performance is lacking. - */ -class TransactionNotifierJniCallback : public JniCallback, - public TransactionNotifier { - public: - TransactionNotifierJniCallback(JNIEnv* env, jobject jtransaction_notifier); - virtual void SnapshotCreated(const Snapshot* newSnapshot); - - private: - jmethodID m_jsnapshot_created_methodID; -}; -} // namespace ROCKSDB_NAMESPACE - -#endif // JAVA_ROCKSJNI_TRANSACTION_NOTIFIER_JNICALLBACK_H_ diff --git a/java/rocksjni/transaction_options.cc b/java/rocksjni/transaction_options.cc deleted file mode 100644 index dcf363e14..000000000 --- a/java/rocksjni/transaction_options.cc +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ -// for ROCKSDB_NAMESPACE::TransactionOptions. - -#include - -#include "include/org_rocksdb_TransactionOptions.h" -#include "rocksdb/utilities/transaction_db.h" -#include "rocksjni/cplusplus_to_java_convert.h" - -/* - * Class: org_rocksdb_TransactionOptions - * Method: newTransactionOptions - * Signature: ()J - */ -jlong Java_org_rocksdb_TransactionOptions_newTransactionOptions( - JNIEnv* /*env*/, jclass /*jcls*/) { - auto* opts = new ROCKSDB_NAMESPACE::TransactionOptions(); - return GET_CPLUSPLUS_POINTER(opts); -} - -/* - * Class: org_rocksdb_TransactionOptions - * Method: isSetSnapshot - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_TransactionOptions_isSetSnapshot(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return opts->set_snapshot; -} - -/* - * Class: org_rocksdb_TransactionOptions - * Method: setSetSnapshot - * Signature: (JZ)V - */ -void Java_org_rocksdb_TransactionOptions_setSetSnapshot( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean jset_snapshot) { - auto* opts = - reinterpret_cast(jhandle); - opts->set_snapshot = jset_snapshot; -} - -/* - * Class: org_rocksdb_TransactionOptions - * Method: isDeadlockDetect - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_TransactionOptions_isDeadlockDetect(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return opts->deadlock_detect; -} - -/* - * Class: org_rocksdb_TransactionOptions - * Method: setDeadlockDetect - * Signature: (JZ)V - */ -void Java_org_rocksdb_TransactionOptions_setDeadlockDetect( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jboolean jdeadlock_detect) { - auto* opts = - reinterpret_cast(jhandle); - opts->deadlock_detect = jdeadlock_detect; -} - -/* - * Class: org_rocksdb_TransactionOptions - * Method: getLockTimeout - * Signature: (J)J - */ -jlong Java_org_rocksdb_TransactionOptions_getLockTimeout(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return opts->lock_timeout; -} - -/* - * Class: org_rocksdb_TransactionOptions - * Method: setLockTimeout - * Signature: (JJ)V - */ -void Java_org_rocksdb_TransactionOptions_setLockTimeout(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jlong jlock_timeout) { - auto* opts = - reinterpret_cast(jhandle); - opts->lock_timeout = jlock_timeout; -} - -/* - * Class: org_rocksdb_TransactionOptions - * Method: getExpiration - * Signature: (J)J - */ -jlong Java_org_rocksdb_TransactionOptions_getExpiration(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return opts->expiration; -} - -/* - * Class: org_rocksdb_TransactionOptions - * Method: setExpiration - * Signature: (JJ)V - */ -void Java_org_rocksdb_TransactionOptions_setExpiration(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jlong jexpiration) { - auto* opts = - reinterpret_cast(jhandle); - opts->expiration = jexpiration; -} - -/* - * Class: org_rocksdb_TransactionOptions - * Method: getDeadlockDetectDepth - * Signature: (J)J - */ -jlong Java_org_rocksdb_TransactionOptions_getDeadlockDetectDepth( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return opts->deadlock_detect_depth; -} - -/* - * Class: org_rocksdb_TransactionOptions - * Method: setDeadlockDetectDepth - * Signature: (JJ)V - */ -void Java_org_rocksdb_TransactionOptions_setDeadlockDetectDepth( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jdeadlock_detect_depth) { - auto* opts = - reinterpret_cast(jhandle); - opts->deadlock_detect_depth = jdeadlock_detect_depth; -} - -/* - * Class: org_rocksdb_TransactionOptions - * Method: getMaxWriteBatchSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_TransactionOptions_getMaxWriteBatchSize(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* opts = - reinterpret_cast(jhandle); - return opts->max_write_batch_size; -} - -/* - * Class: org_rocksdb_TransactionOptions - * Method: setMaxWriteBatchSize - * Signature: (JJ)V - */ -void Java_org_rocksdb_TransactionOptions_setMaxWriteBatchSize( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jmax_write_batch_size) { - auto* opts = - reinterpret_cast(jhandle); - opts->max_write_batch_size = jmax_write_batch_size; -} - -/* - * Class: org_rocksdb_TransactionOptions - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_TransactionOptions_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - delete reinterpret_cast(jhandle); -} diff --git a/java/rocksjni/ttl.cc b/java/rocksjni/ttl.cc deleted file mode 100644 index 1fe2083d9..000000000 --- a/java/rocksjni/ttl.cc +++ /dev/null @@ -1,212 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling c++ ROCKSDB_NAMESPACE::TtlDB methods. -// from Java side. - -#include -#include -#include - -#include -#include -#include - -#include "include/org_rocksdb_TtlDB.h" -#include "rocksdb/utilities/db_ttl.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_TtlDB - * Method: open - * Signature: (JLjava/lang/String;IZ)J - */ -jlong Java_org_rocksdb_TtlDB_open(JNIEnv* env, jclass, jlong joptions_handle, - jstring jdb_path, jint jttl, - jboolean jread_only) { - const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); - if (db_path == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - - auto* opt = reinterpret_cast(joptions_handle); - ROCKSDB_NAMESPACE::DBWithTTL* db = nullptr; - ROCKSDB_NAMESPACE::Status s = - ROCKSDB_NAMESPACE::DBWithTTL::Open(*opt, db_path, &db, jttl, jread_only); - env->ReleaseStringUTFChars(jdb_path, db_path); - - // as TTLDB extends RocksDB on the java side, we can reuse - // the RocksDB portal here. - if (s.ok()) { - return GET_CPLUSPLUS_POINTER(db); - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return 0; - } -} - -/* - * Class: org_rocksdb_TtlDB - * Method: openCF - * Signature: (JLjava/lang/String;[[B[J[IZ)[J - */ -jlongArray Java_org_rocksdb_TtlDB_openCF(JNIEnv* env, jclass, jlong jopt_handle, - jstring jdb_path, - jobjectArray jcolumn_names, - jlongArray jcolumn_options, - jintArray jttls, jboolean jread_only) { - const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); - if (db_path == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - - const jsize len_cols = env->GetArrayLength(jcolumn_names); - jlong* jco = env->GetLongArrayElements(jcolumn_options, nullptr); - if (jco == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseStringUTFChars(jdb_path, db_path); - return nullptr; - } - - std::vector column_families; - jboolean has_exception = JNI_FALSE; - ROCKSDB_NAMESPACE::JniUtil::byteStrings( - env, jcolumn_names, - [](const char* str_data, const size_t str_len) { - return std::string(str_data, str_len); - }, - [&jco, &column_families](size_t idx, std::string cf_name) { - ROCKSDB_NAMESPACE::ColumnFamilyOptions* cf_options = - reinterpret_cast(jco[idx]); - column_families.push_back( - ROCKSDB_NAMESPACE::ColumnFamilyDescriptor(cf_name, *cf_options)); - }, - &has_exception); - - env->ReleaseLongArrayElements(jcolumn_options, jco, JNI_ABORT); - - if (has_exception == JNI_TRUE) { - // exception occurred - env->ReleaseStringUTFChars(jdb_path, db_path); - return nullptr; - } - - std::vector ttl_values; - jint* jttlv = env->GetIntArrayElements(jttls, nullptr); - if (jttlv == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseStringUTFChars(jdb_path, db_path); - return nullptr; - } - const jsize len_ttls = env->GetArrayLength(jttls); - for (jsize i = 0; i < len_ttls; i++) { - ttl_values.push_back(jttlv[i]); - } - env->ReleaseIntArrayElements(jttls, jttlv, JNI_ABORT); - - auto* opt = reinterpret_cast(jopt_handle); - std::vector handles; - ROCKSDB_NAMESPACE::DBWithTTL* db = nullptr; - ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::DBWithTTL::Open( - *opt, db_path, column_families, &handles, &db, ttl_values, jread_only); - - // we have now finished with db_path - env->ReleaseStringUTFChars(jdb_path, db_path); - - // check if open operation was successful - if (s.ok()) { - const jsize resultsLen = 1 + len_cols; // db handle + column family handles - std::unique_ptr results = - std::unique_ptr(new jlong[resultsLen]); - results[0] = GET_CPLUSPLUS_POINTER(db); - for (int i = 1; i <= len_cols; i++) { - results[i] = GET_CPLUSPLUS_POINTER(handles[i - 1]); - } - - jlongArray jresults = env->NewLongArray(resultsLen); - if (jresults == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - env->SetLongArrayRegion(jresults, 0, resultsLen, results.get()); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jresults); - return nullptr; - } - - return jresults; - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return NULL; - } -} - -/* - * Class: org_rocksdb_TtlDB - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_TtlDB_disposeInternal(JNIEnv*, jobject, jlong jhandle) { - auto* ttl_db = reinterpret_cast(jhandle); - assert(ttl_db != nullptr); - delete ttl_db; -} - -/* - * Class: org_rocksdb_TtlDB - * Method: closeDatabase - * Signature: (J)V - */ -void Java_org_rocksdb_TtlDB_closeDatabase(JNIEnv* /* env */, jclass, - jlong /* jhandle */) { - // auto* ttl_db = reinterpret_cast(jhandle); - // assert(ttl_db != nullptr); - // ROCKSDB_NAMESPACE::Status s = ttl_db->Close(); - // ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - - // TODO(AR) this is disabled until - // https://github.com/facebook/rocksdb/issues/4818 is resolved! -} - -/* - * Class: org_rocksdb_TtlDB - * Method: createColumnFamilyWithTtl - * Signature: (JLorg/rocksdb/ColumnFamilyDescriptor;[BJI)J; - */ -jlong Java_org_rocksdb_TtlDB_createColumnFamilyWithTtl(JNIEnv* env, jobject, - jlong jdb_handle, - jbyteArray jcolumn_name, - jlong jcolumn_options, - jint jttl) { - jbyte* cfname = env->GetByteArrayElements(jcolumn_name, nullptr); - if (cfname == nullptr) { - // exception thrown: OutOfMemoryError - return 0; - } - const jsize len = env->GetArrayLength(jcolumn_name); - - auto* cfOptions = reinterpret_cast( - jcolumn_options); - - auto* db_handle = reinterpret_cast(jdb_handle); - ROCKSDB_NAMESPACE::ColumnFamilyHandle* handle; - ROCKSDB_NAMESPACE::Status s = db_handle->CreateColumnFamilyWithTtl( - *cfOptions, std::string(reinterpret_cast(cfname), len), &handle, - jttl); - - env->ReleaseByteArrayElements(jcolumn_name, cfname, JNI_ABORT); - - if (s.ok()) { - return GET_CPLUSPLUS_POINTER(handle); - } - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return 0; -} diff --git a/java/rocksjni/wal_filter.cc b/java/rocksjni/wal_filter.cc deleted file mode 100644 index 24b88afed..000000000 --- a/java/rocksjni/wal_filter.cc +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::WalFilter. - -#include - -#include "include/org_rocksdb_AbstractWalFilter.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/wal_filter_jnicallback.h" - -/* - * Class: org_rocksdb_AbstractWalFilter - * Method: createNewWalFilter - * Signature: ()J - */ -jlong Java_org_rocksdb_AbstractWalFilter_createNewWalFilter(JNIEnv* env, - jobject jobj) { - auto* wal_filter = new ROCKSDB_NAMESPACE::WalFilterJniCallback(env, jobj); - return GET_CPLUSPLUS_POINTER(wal_filter); -} diff --git a/java/rocksjni/wal_filter_jnicallback.cc b/java/rocksjni/wal_filter_jnicallback.cc deleted file mode 100644 index d2e3c9076..000000000 --- a/java/rocksjni/wal_filter_jnicallback.cc +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::WalFilter. - -#include "rocksjni/wal_filter_jnicallback.h" - -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -namespace ROCKSDB_NAMESPACE { -WalFilterJniCallback::WalFilterJniCallback(JNIEnv* env, jobject jwal_filter) - : JniCallback(env, jwal_filter) { - // Note: The name of a WalFilter will not change during it's lifetime, - // so we cache it in a global var - jmethodID jname_mid = AbstractWalFilterJni::getNameMethodId(env); - if (jname_mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return; - } - jstring jname = (jstring)env->CallObjectMethod(m_jcallback_obj, jname_mid); - if (env->ExceptionCheck()) { - // exception thrown - return; - } - jboolean has_exception = JNI_FALSE; - m_name = JniUtil::copyString(env, jname, - &has_exception); // also releases jname - if (has_exception == JNI_TRUE) { - // exception thrown - return; - } - - m_column_family_log_number_map_mid = - AbstractWalFilterJni::getColumnFamilyLogNumberMapMethodId(env); - if (m_column_family_log_number_map_mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return; - } - - m_log_record_found_proxy_mid = - AbstractWalFilterJni::getLogRecordFoundProxyMethodId(env); - if (m_log_record_found_proxy_mid == nullptr) { - // exception thrown: NoSuchMethodException or OutOfMemoryError - return; - } -} - -void WalFilterJniCallback::ColumnFamilyLogNumberMap( - const std::map& cf_lognumber_map, - const std::map& cf_name_id_map) { - jboolean attached_thread = JNI_FALSE; - JNIEnv* env = getJniEnv(&attached_thread); - if (env == nullptr) { - return; - } - - jobject jcf_lognumber_map = - ROCKSDB_NAMESPACE::HashMapJni::fromCppMap(env, &cf_lognumber_map); - if (jcf_lognumber_map == nullptr) { - // exception occurred - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return; - } - - jobject jcf_name_id_map = - ROCKSDB_NAMESPACE::HashMapJni::fromCppMap(env, &cf_name_id_map); - if (jcf_name_id_map == nullptr) { - // exception occurred - env->ExceptionDescribe(); // print out exception to stderr - env->DeleteLocalRef(jcf_lognumber_map); - releaseJniEnv(attached_thread); - return; - } - - env->CallVoidMethod(m_jcallback_obj, m_column_family_log_number_map_mid, - jcf_lognumber_map, jcf_name_id_map); - - env->DeleteLocalRef(jcf_lognumber_map); - env->DeleteLocalRef(jcf_name_id_map); - - if (env->ExceptionCheck()) { - // exception thrown from CallVoidMethod - env->ExceptionDescribe(); // print out exception to stderr - } - - releaseJniEnv(attached_thread); -} - -WalFilter::WalProcessingOption WalFilterJniCallback::LogRecordFound( - unsigned long long log_number, const std::string& log_file_name, - const WriteBatch& batch, WriteBatch* new_batch, bool* batch_changed) { - jboolean attached_thread = JNI_FALSE; - JNIEnv* env = getJniEnv(&attached_thread); - if (env == nullptr) { - return WalFilter::WalProcessingOption::kCorruptedRecord; - } - - jstring jlog_file_name = JniUtil::toJavaString(env, &log_file_name); - if (jlog_file_name == nullptr) { - // exception occcurred - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return WalFilter::WalProcessingOption::kCorruptedRecord; - } - - jshort jlog_record_found_result = env->CallShortMethod( - m_jcallback_obj, m_log_record_found_proxy_mid, - static_cast(log_number), jlog_file_name, - GET_CPLUSPLUS_POINTER(&batch), GET_CPLUSPLUS_POINTER(new_batch)); - - env->DeleteLocalRef(jlog_file_name); - - if (env->ExceptionCheck()) { - // exception thrown from CallShortMethod - env->ExceptionDescribe(); // print out exception to stderr - releaseJniEnv(attached_thread); - return WalFilter::WalProcessingOption::kCorruptedRecord; - } - - // unpack WalProcessingOption and batch_changed from jlog_record_found_result - jbyte jwal_processing_option_value = (jlog_record_found_result >> 8) & 0xFF; - jbyte jbatch_changed_value = jlog_record_found_result & 0xFF; - - releaseJniEnv(attached_thread); - - *batch_changed = jbatch_changed_value == JNI_TRUE; - - return WalProcessingOptionJni::toCppWalProcessingOption( - jwal_processing_option_value); -} - -const char* WalFilterJniCallback::Name() const { return m_name.get(); } - -} // namespace ROCKSDB_NAMESPACE diff --git a/java/rocksjni/wal_filter_jnicallback.h b/java/rocksjni/wal_filter_jnicallback.h deleted file mode 100644 index 5cdc65978..000000000 --- a/java/rocksjni/wal_filter_jnicallback.h +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::WalFilter. - -#ifndef JAVA_ROCKSJNI_WAL_FILTER_JNICALLBACK_H_ -#define JAVA_ROCKSJNI_WAL_FILTER_JNICALLBACK_H_ - -#include - -#include -#include -#include - -#include "rocksdb/wal_filter.h" -#include "rocksjni/jnicallback.h" - -namespace ROCKSDB_NAMESPACE { - -class WalFilterJniCallback : public JniCallback, public WalFilter { - public: - WalFilterJniCallback(JNIEnv* env, jobject jwal_filter); - virtual void ColumnFamilyLogNumberMap( - const std::map& cf_lognumber_map, - const std::map& cf_name_id_map); - virtual WalFilter::WalProcessingOption LogRecordFound( - unsigned long long log_number, const std::string& log_file_name, - const WriteBatch& batch, WriteBatch* new_batch, bool* batch_changed); - virtual const char* Name() const; - - private: - std::unique_ptr m_name; - jmethodID m_column_family_log_number_map_mid; - jmethodID m_log_record_found_proxy_mid; -}; - -} // namespace ROCKSDB_NAMESPACE - -#endif // JAVA_ROCKSJNI_WAL_FILTER_JNICALLBACK_H_ diff --git a/java/rocksjni/write_batch.cc b/java/rocksjni/write_batch.cc deleted file mode 100644 index 6704e4a7e..000000000 --- a/java/rocksjni/write_batch.cc +++ /dev/null @@ -1,676 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling c++ ROCKSDB_NAMESPACE::WriteBatch methods from Java side. -#include "rocksdb/write_batch.h" - -#include - -#include "db/memtable.h" -#include "db/write_batch_internal.h" -#include "include/org_rocksdb_WriteBatch.h" -#include "include/org_rocksdb_WriteBatch_Handler.h" -#include "logging/logging.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/memtablerep.h" -#include "rocksdb/status.h" -#include "rocksdb/write_buffer_manager.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" -#include "rocksjni/writebatchhandlerjnicallback.h" -#include "table/scoped_arena_iterator.h" - -/* - * Class: org_rocksdb_WriteBatch - * Method: newWriteBatch - * Signature: (I)J - */ -jlong Java_org_rocksdb_WriteBatch_newWriteBatch__I(JNIEnv* /*env*/, - jclass /*jcls*/, - jint jreserved_bytes) { - auto* wb = - new ROCKSDB_NAMESPACE::WriteBatch(static_cast(jreserved_bytes)); - return GET_CPLUSPLUS_POINTER(wb); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: newWriteBatch - * Signature: ([BI)J - */ -jlong Java_org_rocksdb_WriteBatch_newWriteBatch___3BI(JNIEnv* env, - jclass /*jcls*/, - jbyteArray jserialized, - jint jserialized_length) { - jboolean has_exception = JNI_FALSE; - std::string serialized = ROCKSDB_NAMESPACE::JniUtil::byteString( - env, jserialized, jserialized_length, - [](const char* str, const size_t len) { return std::string(str, len); }, - &has_exception); - if (has_exception == JNI_TRUE) { - // exception occurred - return 0; - } - - auto* wb = new ROCKSDB_NAMESPACE::WriteBatch(serialized); - return GET_CPLUSPLUS_POINTER(wb); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: count0 - * Signature: (J)I - */ -jint Java_org_rocksdb_WriteBatch_count0(JNIEnv* /*env*/, jobject /*jobj*/, - jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - return static_cast(wb->Count()); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: clear0 - * Signature: (J)V - */ -void Java_org_rocksdb_WriteBatch_clear0(JNIEnv* /*env*/, jobject /*jobj*/, - jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - wb->Clear(); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: setSavePoint0 - * Signature: (J)V - */ -void Java_org_rocksdb_WriteBatch_setSavePoint0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - wb->SetSavePoint(); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: rollbackToSavePoint0 - * Signature: (J)V - */ -void Java_org_rocksdb_WriteBatch_rollbackToSavePoint0(JNIEnv* env, - jobject /*jobj*/, - jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - auto s = wb->RollbackToSavePoint(); - - if (s.ok()) { - return; - } - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: popSavePoint - * Signature: (J)V - */ -void Java_org_rocksdb_WriteBatch_popSavePoint(JNIEnv* env, jobject /*jobj*/, - jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - auto s = wb->PopSavePoint(); - - if (s.ok()) { - return; - } - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: setMaxBytes - * Signature: (JJ)V - */ -void Java_org_rocksdb_WriteBatch_setMaxBytes(JNIEnv* /*env*/, jobject /*jobj*/, - jlong jwb_handle, - jlong jmax_bytes) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - wb->SetMaxBytes(static_cast(jmax_bytes)); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: put - * Signature: (J[BI[BI)V - */ -void Java_org_rocksdb_WriteBatch_put__J_3BI_3BI(JNIEnv* env, jobject jobj, - jlong jwb_handle, - jbyteArray jkey, jint jkey_len, - jbyteArray jentry_value, - jint jentry_value_len) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - auto put = [&wb](ROCKSDB_NAMESPACE::Slice key, - ROCKSDB_NAMESPACE::Slice value) { - return wb->Put(key, value); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::kv_op(put, env, jobj, jkey, jkey_len, - jentry_value, jentry_value_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: put - * Signature: (J[BI[BIJ)V - */ -void Java_org_rocksdb_WriteBatch_put__J_3BI_3BIJ( - JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len, - jbyteArray jentry_value, jint jentry_value_len, jlong jcf_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - auto* cf_handle = - reinterpret_cast(jcf_handle); - assert(cf_handle != nullptr); - auto put = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice key, - ROCKSDB_NAMESPACE::Slice value) { - return wb->Put(cf_handle, key, value); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::kv_op(put, env, jobj, jkey, jkey_len, - jentry_value, jentry_value_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: putDirect - * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V - */ -void Java_org_rocksdb_WriteBatch_putDirect(JNIEnv* env, jobject /*jobj*/, - jlong jwb_handle, jobject jkey, - jint jkey_offset, jint jkey_len, - jobject jval, jint jval_offset, - jint jval_len, jlong jcf_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - auto* cf_handle = - reinterpret_cast(jcf_handle); - auto put = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice& key, - ROCKSDB_NAMESPACE::Slice& value) { - if (cf_handle == nullptr) { - wb->Put(key, value); - } else { - wb->Put(cf_handle, key, value); - } - }; - ROCKSDB_NAMESPACE::JniUtil::kv_op_direct( - put, env, jkey, jkey_offset, jkey_len, jval, jval_offset, jval_len); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: merge - * Signature: (J[BI[BI)V - */ -void Java_org_rocksdb_WriteBatch_merge__J_3BI_3BI( - JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len, - jbyteArray jentry_value, jint jentry_value_len) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - auto merge = [&wb](ROCKSDB_NAMESPACE::Slice key, - ROCKSDB_NAMESPACE::Slice value) { - return wb->Merge(key, value); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::kv_op(merge, env, jobj, jkey, jkey_len, - jentry_value, jentry_value_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: merge - * Signature: (J[BI[BIJ)V - */ -void Java_org_rocksdb_WriteBatch_merge__J_3BI_3BIJ( - JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len, - jbyteArray jentry_value, jint jentry_value_len, jlong jcf_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - auto* cf_handle = - reinterpret_cast(jcf_handle); - assert(cf_handle != nullptr); - auto merge = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice key, - ROCKSDB_NAMESPACE::Slice value) { - return wb->Merge(cf_handle, key, value); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::kv_op(merge, env, jobj, jkey, jkey_len, - jentry_value, jentry_value_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: delete - * Signature: (J[BI)V - */ -void Java_org_rocksdb_WriteBatch_delete__J_3BI(JNIEnv* env, jobject jobj, - jlong jwb_handle, - jbyteArray jkey, jint jkey_len) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - auto remove = [&wb](ROCKSDB_NAMESPACE::Slice key) { return wb->Delete(key); }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::k_op(remove, env, jobj, jkey, jkey_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: delete - * Signature: (J[BIJ)V - */ -void Java_org_rocksdb_WriteBatch_delete__J_3BIJ(JNIEnv* env, jobject jobj, - jlong jwb_handle, - jbyteArray jkey, jint jkey_len, - jlong jcf_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - auto* cf_handle = - reinterpret_cast(jcf_handle); - assert(cf_handle != nullptr); - auto remove = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice key) { - return wb->Delete(cf_handle, key); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::k_op(remove, env, jobj, jkey, jkey_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: singleDelete - * Signature: (J[BI)V - */ -void Java_org_rocksdb_WriteBatch_singleDelete__J_3BI(JNIEnv* env, jobject jobj, - jlong jwb_handle, - jbyteArray jkey, - jint jkey_len) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - auto single_delete = [&wb](ROCKSDB_NAMESPACE::Slice key) { - return wb->SingleDelete(key); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::k_op(single_delete, env, jobj, jkey, - jkey_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: singleDelete - * Signature: (J[BIJ)V - */ -void Java_org_rocksdb_WriteBatch_singleDelete__J_3BIJ(JNIEnv* env, jobject jobj, - jlong jwb_handle, - jbyteArray jkey, - jint jkey_len, - jlong jcf_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - auto* cf_handle = - reinterpret_cast(jcf_handle); - assert(cf_handle != nullptr); - auto single_delete = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice key) { - return wb->SingleDelete(cf_handle, key); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::k_op(single_delete, env, jobj, jkey, - jkey_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: deleteDirect - * Signature: (JLjava/nio/ByteBuffer;IIJ)V - */ -void Java_org_rocksdb_WriteBatch_deleteDirect(JNIEnv* env, jobject /*jobj*/, - jlong jwb_handle, jobject jkey, - jint jkey_offset, jint jkey_len, - jlong jcf_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - auto* cf_handle = - reinterpret_cast(jcf_handle); - auto remove = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice& key) { - if (cf_handle == nullptr) { - wb->Delete(key); - } else { - wb->Delete(cf_handle, key); - } - }; - ROCKSDB_NAMESPACE::JniUtil::k_op_direct(remove, env, jkey, jkey_offset, - jkey_len); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: deleteRange - * Signature: (J[BI[BI)V - */ -void Java_org_rocksdb_WriteBatch_deleteRange__J_3BI_3BI( - JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jbegin_key, - jint jbegin_key_len, jbyteArray jend_key, jint jend_key_len) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - auto deleteRange = [&wb](ROCKSDB_NAMESPACE::Slice beginKey, - ROCKSDB_NAMESPACE::Slice endKey) { - return wb->DeleteRange(beginKey, endKey); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::kv_op(deleteRange, env, jobj, jbegin_key, - jbegin_key_len, jend_key, jend_key_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: deleteRange - * Signature: (J[BI[BIJ)V - */ -void Java_org_rocksdb_WriteBatch_deleteRange__J_3BI_3BIJ( - JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jbegin_key, - jint jbegin_key_len, jbyteArray jend_key, jint jend_key_len, - jlong jcf_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - auto* cf_handle = - reinterpret_cast(jcf_handle); - assert(cf_handle != nullptr); - auto deleteRange = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice beginKey, - ROCKSDB_NAMESPACE::Slice endKey) { - return wb->DeleteRange(cf_handle, beginKey, endKey); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::kv_op(deleteRange, env, jobj, jbegin_key, - jbegin_key_len, jend_key, jend_key_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: putLogData - * Signature: (J[BI)V - */ -void Java_org_rocksdb_WriteBatch_putLogData(JNIEnv* env, jobject jobj, - jlong jwb_handle, jbyteArray jblob, - jint jblob_len) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - auto putLogData = [&wb](ROCKSDB_NAMESPACE::Slice blob) { - return wb->PutLogData(blob); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::k_op(putLogData, env, jobj, jblob, jblob_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: iterate - * Signature: (JJ)V - */ -void Java_org_rocksdb_WriteBatch_iterate(JNIEnv* env, jobject /*jobj*/, - jlong jwb_handle, - jlong handlerHandle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - ROCKSDB_NAMESPACE::Status s = wb->Iterate( - reinterpret_cast( - handlerHandle)); - - if (s.ok()) { - return; - } - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: data - * Signature: (J)[B - */ -jbyteArray Java_org_rocksdb_WriteBatch_data(JNIEnv* env, jobject /*jobj*/, - jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - auto data = wb->Data(); - return ROCKSDB_NAMESPACE::JniUtil::copyBytes(env, data); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: getDataSize - * Signature: (J)J - */ -jlong Java_org_rocksdb_WriteBatch_getDataSize(JNIEnv* /*env*/, jobject /*jobj*/, - jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - auto data_size = wb->GetDataSize(); - return static_cast(data_size); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: hasPut - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_WriteBatch_hasPut(JNIEnv* /*env*/, jobject /*jobj*/, - jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - return wb->HasPut(); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: hasDelete - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_WriteBatch_hasDelete(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - return wb->HasDelete(); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: hasSingleDelete - * Signature: (J)Z - */ -JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasSingleDelete( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - return wb->HasSingleDelete(); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: hasDeleteRange - * Signature: (J)Z - */ -JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasDeleteRange( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - return wb->HasDeleteRange(); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: hasMerge - * Signature: (J)Z - */ -JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasMerge( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - return wb->HasMerge(); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: hasBeginPrepare - * Signature: (J)Z - */ -JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasBeginPrepare( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - return wb->HasBeginPrepare(); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: hasEndPrepare - * Signature: (J)Z - */ -JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasEndPrepare( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - return wb->HasEndPrepare(); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: hasCommit - * Signature: (J)Z - */ -JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasCommit( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - return wb->HasCommit(); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: hasRollback - * Signature: (J)Z - */ -JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasRollback( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - return wb->HasRollback(); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: markWalTerminationPoint - * Signature: (J)V - */ -void Java_org_rocksdb_WriteBatch_markWalTerminationPoint(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - wb->MarkWalTerminationPoint(); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: getWalTerminationPoint - * Signature: (J)Lorg/rocksdb/WriteBatch/SavePoint; - */ -jobject Java_org_rocksdb_WriteBatch_getWalTerminationPoint(JNIEnv* env, - jobject /*jobj*/, - jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - auto save_point = wb->GetWalTerminationPoint(); - return ROCKSDB_NAMESPACE::WriteBatchSavePointJni::construct(env, save_point); -} - -/* - * Class: org_rocksdb_WriteBatch - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_WriteBatch_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - auto* wb = reinterpret_cast(handle); - assert(wb != nullptr); - delete wb; -} - -/* - * Class: org_rocksdb_WriteBatch_Handler - * Method: createNewHandler0 - * Signature: ()J - */ -jlong Java_org_rocksdb_WriteBatch_00024Handler_createNewHandler0(JNIEnv* env, - jobject jobj) { - auto* wbjnic = new ROCKSDB_NAMESPACE::WriteBatchHandlerJniCallback(env, jobj); - return GET_CPLUSPLUS_POINTER(wbjnic); -} diff --git a/java/rocksjni/write_batch_test.cc b/java/rocksjni/write_batch_test.cc deleted file mode 100644 index 30b9a7229..000000000 --- a/java/rocksjni/write_batch_test.cc +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling c++ ROCKSDB_NAMESPACE::WriteBatch methods testing from Java side. -#include "rocksdb/write_batch.h" - -#include - -#include "db/memtable.h" -#include "db/write_batch_internal.h" -#include "include/org_rocksdb_WriteBatch.h" -#include "include/org_rocksdb_WriteBatchTest.h" -#include "include/org_rocksdb_WriteBatchTestInternalHelper.h" -#include "include/org_rocksdb_WriteBatch_Handler.h" -#include "options/cf_options.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/memtablerep.h" -#include "rocksdb/status.h" -#include "rocksdb/write_buffer_manager.h" -#include "rocksjni/portal.h" -#include "table/scoped_arena_iterator.h" -#include "test_util/testharness.h" -#include "util/string_util.h" - -/* - * Class: org_rocksdb_WriteBatchTest - * Method: getContents - * Signature: (J)[B - */ -jbyteArray Java_org_rocksdb_WriteBatchTest_getContents(JNIEnv* env, - jclass /*jclazz*/, - jlong jwb_handle) { - auto* b = reinterpret_cast(jwb_handle); - assert(b != nullptr); - - // todo: Currently the following code is directly copied from - // db/write_bench_test.cc. It could be implemented in java once - // all the necessary components can be accessed via jni api. - - ROCKSDB_NAMESPACE::InternalKeyComparator cmp( - ROCKSDB_NAMESPACE::BytewiseComparator()); - auto factory = std::make_shared(); - ROCKSDB_NAMESPACE::Options options; - ROCKSDB_NAMESPACE::WriteBufferManager wb(options.db_write_buffer_size); - options.memtable_factory = factory; - ROCKSDB_NAMESPACE::MemTable* mem = new ROCKSDB_NAMESPACE::MemTable( - cmp, ROCKSDB_NAMESPACE::ImmutableOptions(options), - ROCKSDB_NAMESPACE::MutableCFOptions(options), &wb, - ROCKSDB_NAMESPACE::kMaxSequenceNumber, 0 /* column_family_id */); - mem->Ref(); - std::string state; - ROCKSDB_NAMESPACE::ColumnFamilyMemTablesDefault cf_mems_default(mem); - ROCKSDB_NAMESPACE::Status s = - ROCKSDB_NAMESPACE::WriteBatchInternal::InsertInto(b, &cf_mems_default, - nullptr, nullptr); - unsigned int count = 0; - ROCKSDB_NAMESPACE::Arena arena; - ROCKSDB_NAMESPACE::ScopedArenaIterator iter( - mem->NewIterator(ROCKSDB_NAMESPACE::ReadOptions(), &arena)); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ROCKSDB_NAMESPACE::ParsedInternalKey ikey; - ikey.clear(); - ROCKSDB_NAMESPACE::Status pik_status = ROCKSDB_NAMESPACE::ParseInternalKey( - iter->key(), &ikey, true /* log_err_key */); - pik_status.PermitUncheckedError(); - assert(pik_status.ok()); - switch (ikey.type) { - case ROCKSDB_NAMESPACE::kTypeValue: - state.append("Put("); - state.append(ikey.user_key.ToString()); - state.append(", "); - state.append(iter->value().ToString()); - state.append(")"); - count++; - break; - case ROCKSDB_NAMESPACE::kTypeMerge: - state.append("Merge("); - state.append(ikey.user_key.ToString()); - state.append(", "); - state.append(iter->value().ToString()); - state.append(")"); - count++; - break; - case ROCKSDB_NAMESPACE::kTypeDeletion: - state.append("Delete("); - state.append(ikey.user_key.ToString()); - state.append(")"); - count++; - break; - case ROCKSDB_NAMESPACE::kTypeSingleDeletion: - state.append("SingleDelete("); - state.append(ikey.user_key.ToString()); - state.append(")"); - count++; - break; - case ROCKSDB_NAMESPACE::kTypeRangeDeletion: - state.append("DeleteRange("); - state.append(ikey.user_key.ToString()); - state.append(", "); - state.append(iter->value().ToString()); - state.append(")"); - count++; - break; - case ROCKSDB_NAMESPACE::kTypeLogData: - state.append("LogData("); - state.append(ikey.user_key.ToString()); - state.append(")"); - count++; - break; - default: - assert(false); - state.append("Err:Expected("); - state.append(std::to_string(ikey.type)); - state.append(")"); - count++; - break; - } - state.append("@"); - state.append(std::to_string(ikey.sequence)); - } - if (!s.ok()) { - state.append(s.ToString()); - } else if (ROCKSDB_NAMESPACE::WriteBatchInternal::Count(b) != count) { - state.append("Err:CountMismatch(expected="); - state.append( - std::to_string(ROCKSDB_NAMESPACE::WriteBatchInternal::Count(b))); - state.append(", actual="); - state.append(std::to_string(count)); - state.append(")"); - } - delete mem->Unref(); - - jbyteArray jstate = env->NewByteArray(static_cast(state.size())); - if (jstate == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - - env->SetByteArrayRegion( - jstate, 0, static_cast(state.size()), - const_cast(reinterpret_cast(state.c_str()))); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jstate); - return nullptr; - } - - return jstate; -} - -/* - * Class: org_rocksdb_WriteBatchTestInternalHelper - * Method: setSequence - * Signature: (JJ)V - */ -void Java_org_rocksdb_WriteBatchTestInternalHelper_setSequence( - JNIEnv* /*env*/, jclass /*jclazz*/, jlong jwb_handle, jlong jsn) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - ROCKSDB_NAMESPACE::WriteBatchInternal::SetSequence( - wb, static_cast(jsn)); -} - -/* - * Class: org_rocksdb_WriteBatchTestInternalHelper - * Method: sequence - * Signature: (J)J - */ -jlong Java_org_rocksdb_WriteBatchTestInternalHelper_sequence(JNIEnv* /*env*/, - jclass /*jclazz*/, - jlong jwb_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - - return static_cast( - ROCKSDB_NAMESPACE::WriteBatchInternal::Sequence(wb)); -} - -/* - * Class: org_rocksdb_WriteBatchTestInternalHelper - * Method: append - * Signature: (JJ)V - */ -void Java_org_rocksdb_WriteBatchTestInternalHelper_append(JNIEnv* /*env*/, - jclass /*jclazz*/, - jlong jwb_handle_1, - jlong jwb_handle_2) { - auto* wb1 = reinterpret_cast(jwb_handle_1); - assert(wb1 != nullptr); - auto* wb2 = reinterpret_cast(jwb_handle_2); - assert(wb2 != nullptr); - - ROCKSDB_NAMESPACE::WriteBatchInternal::Append(wb1, wb2); -} diff --git a/java/rocksjni/write_batch_with_index.cc b/java/rocksjni/write_batch_with_index.cc deleted file mode 100644 index a5c3216cb..000000000 --- a/java/rocksjni/write_batch_with_index.cc +++ /dev/null @@ -1,953 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the "bridge" between Java and C++ and enables -// calling c++ ROCKSDB_NAMESPACE::WriteBatchWithIndex methods from Java side. - -#include "rocksdb/utilities/write_batch_with_index.h" - -#include "include/org_rocksdb_WBWIRocksIterator.h" -#include "include/org_rocksdb_WriteBatchWithIndex.h" -#include "rocksdb/comparator.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: newWriteBatchWithIndex - * Signature: ()J - */ -jlong Java_org_rocksdb_WriteBatchWithIndex_newWriteBatchWithIndex__( - JNIEnv* /*env*/, jclass /*jcls*/) { - auto* wbwi = new ROCKSDB_NAMESPACE::WriteBatchWithIndex(); - return GET_CPLUSPLUS_POINTER(wbwi); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: newWriteBatchWithIndex - * Signature: (Z)J - */ -jlong Java_org_rocksdb_WriteBatchWithIndex_newWriteBatchWithIndex__Z( - JNIEnv* /*env*/, jclass /*jcls*/, jboolean joverwrite_key) { - auto* wbwi = new ROCKSDB_NAMESPACE::WriteBatchWithIndex( - ROCKSDB_NAMESPACE::BytewiseComparator(), 0, - static_cast(joverwrite_key)); - return GET_CPLUSPLUS_POINTER(wbwi); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: newWriteBatchWithIndex - * Signature: (JBIZ)J - */ -jlong Java_org_rocksdb_WriteBatchWithIndex_newWriteBatchWithIndex__JBIZ( - JNIEnv* /*env*/, jclass /*jcls*/, jlong jfallback_index_comparator_handle, - jbyte jcomparator_type, jint jreserved_bytes, jboolean joverwrite_key) { - ROCKSDB_NAMESPACE::Comparator* fallback_comparator = nullptr; - switch (jcomparator_type) { - // JAVA_COMPARATOR - case 0x0: - fallback_comparator = - reinterpret_cast( - jfallback_index_comparator_handle); - break; - - // JAVA_NATIVE_COMPARATOR_WRAPPER - case 0x1: - fallback_comparator = reinterpret_cast( - jfallback_index_comparator_handle); - break; - } - auto* wbwi = new ROCKSDB_NAMESPACE::WriteBatchWithIndex( - fallback_comparator, static_cast(jreserved_bytes), - static_cast(joverwrite_key)); - return GET_CPLUSPLUS_POINTER(wbwi); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: count0 - * Signature: (J)I - */ -jint Java_org_rocksdb_WriteBatchWithIndex_count0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jwbwi_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - - return static_cast(wbwi->GetWriteBatch()->Count()); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: put - * Signature: (J[BI[BI)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_put__J_3BI_3BI( - JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, - jint jkey_len, jbyteArray jentry_value, jint jentry_value_len) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - auto put = [&wbwi](ROCKSDB_NAMESPACE::Slice key, - ROCKSDB_NAMESPACE::Slice value) { - return wbwi->Put(key, value); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::kv_op(put, env, jobj, jkey, jkey_len, - jentry_value, jentry_value_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: put - * Signature: (J[BI[BIJ)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_put__J_3BI_3BIJ( - JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, - jint jkey_len, jbyteArray jentry_value, jint jentry_value_len, - jlong jcf_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - auto* cf_handle = - reinterpret_cast(jcf_handle); - assert(cf_handle != nullptr); - auto put = [&wbwi, &cf_handle](ROCKSDB_NAMESPACE::Slice key, - ROCKSDB_NAMESPACE::Slice value) { - return wbwi->Put(cf_handle, key, value); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::kv_op(put, env, jobj, jkey, jkey_len, - jentry_value, jentry_value_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: putDirect - * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_putDirect( - JNIEnv* env, jobject /*jobj*/, jlong jwb_handle, jobject jkey, - jint jkey_offset, jint jkey_len, jobject jval, jint jval_offset, - jint jval_len, jlong jcf_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - auto* cf_handle = - reinterpret_cast(jcf_handle); - auto put = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice& key, - ROCKSDB_NAMESPACE::Slice& value) { - if (cf_handle == nullptr) { - wb->Put(key, value); - } else { - wb->Put(cf_handle, key, value); - } - }; - ROCKSDB_NAMESPACE::JniUtil::kv_op_direct( - put, env, jkey, jkey_offset, jkey_len, jval, jval_offset, jval_len); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: merge - * Signature: (J[BI[BI)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_merge__J_3BI_3BI( - JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, - jint jkey_len, jbyteArray jentry_value, jint jentry_value_len) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - auto merge = [&wbwi](ROCKSDB_NAMESPACE::Slice key, - ROCKSDB_NAMESPACE::Slice value) { - return wbwi->Merge(key, value); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::kv_op(merge, env, jobj, jkey, jkey_len, - jentry_value, jentry_value_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: merge - * Signature: (J[BI[BIJ)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_merge__J_3BI_3BIJ( - JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, - jint jkey_len, jbyteArray jentry_value, jint jentry_value_len, - jlong jcf_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - auto* cf_handle = - reinterpret_cast(jcf_handle); - assert(cf_handle != nullptr); - auto merge = [&wbwi, &cf_handle](ROCKSDB_NAMESPACE::Slice key, - ROCKSDB_NAMESPACE::Slice value) { - return wbwi->Merge(cf_handle, key, value); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::kv_op(merge, env, jobj, jkey, jkey_len, - jentry_value, jentry_value_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: delete - * Signature: (J[BI)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_delete__J_3BI(JNIEnv* env, - jobject jobj, - jlong jwbwi_handle, - jbyteArray jkey, - jint jkey_len) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - auto remove = [&wbwi](ROCKSDB_NAMESPACE::Slice key) { - return wbwi->Delete(key); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::k_op(remove, env, jobj, jkey, jkey_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: delete - * Signature: (J[BIJ)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_delete__J_3BIJ( - JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, - jint jkey_len, jlong jcf_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - auto* cf_handle = - reinterpret_cast(jcf_handle); - assert(cf_handle != nullptr); - auto remove = [&wbwi, &cf_handle](ROCKSDB_NAMESPACE::Slice key) { - return wbwi->Delete(cf_handle, key); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::k_op(remove, env, jobj, jkey, jkey_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: singleDelete - * Signature: (J[BI)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_singleDelete__J_3BI( - JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, - jint jkey_len) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - auto single_delete = [&wbwi](ROCKSDB_NAMESPACE::Slice key) { - return wbwi->SingleDelete(key); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::k_op(single_delete, env, jobj, jkey, - jkey_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: singleDelete - * Signature: (J[BIJ)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_singleDelete__J_3BIJ( - JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, - jint jkey_len, jlong jcf_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - auto* cf_handle = - reinterpret_cast(jcf_handle); - assert(cf_handle != nullptr); - auto single_delete = [&wbwi, &cf_handle](ROCKSDB_NAMESPACE::Slice key) { - return wbwi->SingleDelete(cf_handle, key); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::k_op(single_delete, env, jobj, jkey, - jkey_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: deleteDirect - * Signature: (JLjava/nio/ByteBuffer;IIJ)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_deleteDirect( - JNIEnv* env, jobject /*jobj*/, jlong jwb_handle, jobject jkey, - jint jkey_offset, jint jkey_len, jlong jcf_handle) { - auto* wb = reinterpret_cast(jwb_handle); - assert(wb != nullptr); - auto* cf_handle = - reinterpret_cast(jcf_handle); - auto remove = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice& key) { - if (cf_handle == nullptr) { - wb->Delete(key); - } else { - wb->Delete(cf_handle, key); - } - }; - ROCKSDB_NAMESPACE::JniUtil::k_op_direct(remove, env, jkey, jkey_offset, - jkey_len); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: deleteRange - * Signature: (J[BI[BI)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_deleteRange__J_3BI_3BI( - JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jbegin_key, - jint jbegin_key_len, jbyteArray jend_key, jint jend_key_len) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - auto deleteRange = [&wbwi](ROCKSDB_NAMESPACE::Slice beginKey, - ROCKSDB_NAMESPACE::Slice endKey) { - return wbwi->DeleteRange(beginKey, endKey); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::kv_op(deleteRange, env, jobj, jbegin_key, - jbegin_key_len, jend_key, jend_key_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: deleteRange - * Signature: (J[BI[BIJ)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_deleteRange__J_3BI_3BIJ( - JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jbegin_key, - jint jbegin_key_len, jbyteArray jend_key, jint jend_key_len, - jlong jcf_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - auto* cf_handle = - reinterpret_cast(jcf_handle); - assert(cf_handle != nullptr); - auto deleteRange = [&wbwi, &cf_handle](ROCKSDB_NAMESPACE::Slice beginKey, - ROCKSDB_NAMESPACE::Slice endKey) { - return wbwi->DeleteRange(cf_handle, beginKey, endKey); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::kv_op(deleteRange, env, jobj, jbegin_key, - jbegin_key_len, jend_key, jend_key_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: putLogData - * Signature: (J[BI)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_putLogData(JNIEnv* env, jobject jobj, - jlong jwbwi_handle, - jbyteArray jblob, - jint jblob_len) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - auto putLogData = [&wbwi](ROCKSDB_NAMESPACE::Slice blob) { - return wbwi->PutLogData(blob); - }; - std::unique_ptr status = - ROCKSDB_NAMESPACE::JniUtil::k_op(putLogData, env, jobj, jblob, jblob_len); - if (status != nullptr && !status->ok()) { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); - } -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: clear - * Signature: (J)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_clear0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jwbwi_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - - wbwi->Clear(); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: setSavePoint0 - * Signature: (J)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_setSavePoint0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jwbwi_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - - wbwi->SetSavePoint(); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: rollbackToSavePoint0 - * Signature: (J)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_rollbackToSavePoint0( - JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - - auto s = wbwi->RollbackToSavePoint(); - - if (s.ok()) { - return; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: popSavePoint - * Signature: (J)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_popSavePoint(JNIEnv* env, - jobject /*jobj*/, - jlong jwbwi_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - - auto s = wbwi->PopSavePoint(); - - if (s.ok()) { - return; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: setMaxBytes - * Signature: (JJ)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_setMaxBytes(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jwbwi_handle, - jlong jmax_bytes) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - - wbwi->SetMaxBytes(static_cast(jmax_bytes)); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: getWriteBatch - * Signature: (J)Lorg/rocksdb/WriteBatch; - */ -jobject Java_org_rocksdb_WriteBatchWithIndex_getWriteBatch(JNIEnv* env, - jobject /*jobj*/, - jlong jwbwi_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - assert(wbwi != nullptr); - - auto* wb = wbwi->GetWriteBatch(); - - // TODO(AR) is the `wb` object owned by us? - return ROCKSDB_NAMESPACE::WriteBatchJni::construct(env, wb); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: iterator0 - * Signature: (J)J - */ -jlong Java_org_rocksdb_WriteBatchWithIndex_iterator0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jwbwi_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - auto* wbwi_iterator = wbwi->NewIterator(); - return GET_CPLUSPLUS_POINTER(wbwi_iterator); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: iterator1 - * Signature: (JJ)J - */ -jlong Java_org_rocksdb_WriteBatchWithIndex_iterator1(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jwbwi_handle, - jlong jcf_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - auto* wbwi_iterator = wbwi->NewIterator(cf_handle); - return GET_CPLUSPLUS_POINTER(wbwi_iterator); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: iteratorWithBase - * Signature: (JJJJ)J - */ -jlong Java_org_rocksdb_WriteBatchWithIndex_iteratorWithBase( - JNIEnv*, jobject, jlong jwbwi_handle, jlong jcf_handle, - jlong jbase_iterator_handle, jlong jread_opts_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - auto* base_iterator = - reinterpret_cast(jbase_iterator_handle); - ROCKSDB_NAMESPACE::ReadOptions* read_opts = - jread_opts_handle == 0 - ? nullptr - : reinterpret_cast( - jread_opts_handle); - auto* iterator = - wbwi->NewIteratorWithBase(cf_handle, base_iterator, read_opts); - return GET_CPLUSPLUS_POINTER(iterator); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: getFromBatch - * Signature: (JJ[BI)[B - */ -jbyteArray JNICALL Java_org_rocksdb_WriteBatchWithIndex_getFromBatch__JJ_3BI( - JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle, jlong jdbopt_handle, - jbyteArray jkey, jint jkey_len) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - auto* dbopt = reinterpret_cast(jdbopt_handle); - - auto getter = [&wbwi, &dbopt](const ROCKSDB_NAMESPACE::Slice& key, - std::string* value) { - return wbwi->GetFromBatch(*dbopt, key, value); - }; - - return ROCKSDB_NAMESPACE::JniUtil::v_op(getter, env, jkey, jkey_len); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: getFromBatch - * Signature: (JJ[BIJ)[B - */ -jbyteArray Java_org_rocksdb_WriteBatchWithIndex_getFromBatch__JJ_3BIJ( - JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle, jlong jdbopt_handle, - jbyteArray jkey, jint jkey_len, jlong jcf_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - auto* dbopt = reinterpret_cast(jdbopt_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - - auto getter = [&wbwi, &cf_handle, &dbopt](const ROCKSDB_NAMESPACE::Slice& key, - std::string* value) { - return wbwi->GetFromBatch(cf_handle, *dbopt, key, value); - }; - - return ROCKSDB_NAMESPACE::JniUtil::v_op(getter, env, jkey, jkey_len); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: getFromBatchAndDB - * Signature: (JJJ[BI)[B - */ -jbyteArray Java_org_rocksdb_WriteBatchWithIndex_getFromBatchAndDB__JJJ_3BI( - JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle, jlong jdb_handle, - jlong jreadopt_handle, jbyteArray jkey, jint jkey_len) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - auto* db = reinterpret_cast(jdb_handle); - auto* readopt = - reinterpret_cast(jreadopt_handle); - - auto getter = [&wbwi, &db, &readopt](const ROCKSDB_NAMESPACE::Slice& key, - std::string* value) { - return wbwi->GetFromBatchAndDB(db, *readopt, key, value); - }; - - return ROCKSDB_NAMESPACE::JniUtil::v_op(getter, env, jkey, jkey_len); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: getFromBatchAndDB - * Signature: (JJJ[BIJ)[B - */ -jbyteArray Java_org_rocksdb_WriteBatchWithIndex_getFromBatchAndDB__JJJ_3BIJ( - JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle, jlong jdb_handle, - jlong jreadopt_handle, jbyteArray jkey, jint jkey_len, jlong jcf_handle) { - auto* wbwi = - reinterpret_cast(jwbwi_handle); - auto* db = reinterpret_cast(jdb_handle); - auto* readopt = - reinterpret_cast(jreadopt_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - - auto getter = [&wbwi, &db, &cf_handle, &readopt]( - const ROCKSDB_NAMESPACE::Slice& key, std::string* value) { - return wbwi->GetFromBatchAndDB(db, *readopt, cf_handle, key, value); - }; - - return ROCKSDB_NAMESPACE::JniUtil::v_op(getter, env, jkey, jkey_len); -} - -/* - * Class: org_rocksdb_WriteBatchWithIndex - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_WriteBatchWithIndex_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - auto* wbwi = - reinterpret_cast(handle); - assert(wbwi != nullptr); - delete wbwi; -} - -/* WBWIRocksIterator below */ - -/* - * Class: org_rocksdb_WBWIRocksIterator - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_WBWIRocksIterator_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - auto* it = reinterpret_cast(handle); - assert(it != nullptr); - delete it; -} - -/* - * Class: org_rocksdb_WBWIRocksIterator - * Method: isValid0 - * Signature: (J)Z - */ -jboolean Java_org_rocksdb_WBWIRocksIterator_isValid0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - return reinterpret_cast(handle)->Valid(); -} - -/* - * Class: org_rocksdb_WBWIRocksIterator - * Method: seekToFirst0 - * Signature: (J)V - */ -void Java_org_rocksdb_WBWIRocksIterator_seekToFirst0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - reinterpret_cast(handle)->SeekToFirst(); -} - -/* - * Class: org_rocksdb_WBWIRocksIterator - * Method: seekToLast0 - * Signature: (J)V - */ -void Java_org_rocksdb_WBWIRocksIterator_seekToLast0(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong handle) { - reinterpret_cast(handle)->SeekToLast(); -} - -/* - * Class: org_rocksdb_WBWIRocksIterator - * Method: next0 - * Signature: (J)V - */ -void Java_org_rocksdb_WBWIRocksIterator_next0(JNIEnv* /*env*/, jobject /*jobj*/, - jlong handle) { - reinterpret_cast(handle)->Next(); -} - -/* - * Class: org_rocksdb_WBWIRocksIterator - * Method: prev0 - * Signature: (J)V - */ -void Java_org_rocksdb_WBWIRocksIterator_prev0(JNIEnv* /*env*/, jobject /*jobj*/, - jlong handle) { - reinterpret_cast(handle)->Prev(); -} - -/* - * Class: org_rocksdb_WBWIRocksIterator - * Method: seek0 - * Signature: (J[BI)V - */ -void Java_org_rocksdb_WBWIRocksIterator_seek0(JNIEnv* env, jobject /*jobj*/, - jlong handle, jbyteArray jtarget, - jint jtarget_len) { - auto* it = reinterpret_cast(handle); - jbyte* target = new jbyte[jtarget_len]; - env->GetByteArrayRegion(jtarget, 0, jtarget_len, target); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] target; - return; - } - - ROCKSDB_NAMESPACE::Slice target_slice(reinterpret_cast(target), - jtarget_len); - - it->Seek(target_slice); - - delete[] target; -} - -/* - * Class: org_rocksdb_WBWIRocksIterator - * Method: seekDirect0 - * Signature: (JLjava/nio/ByteBuffer;II)V - */ -void Java_org_rocksdb_WBWIRocksIterator_seekDirect0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, - jint jtarget_off, jint jtarget_len) { - auto* it = reinterpret_cast(handle); - auto seek = [&it](ROCKSDB_NAMESPACE::Slice& target_slice) { - it->Seek(target_slice); - }; - ROCKSDB_NAMESPACE::JniUtil::k_op_direct(seek, env, jtarget, jtarget_off, - jtarget_len); -} - -/* - * This method supports fetching into indirect byte buffers; - * the Java wrapper extracts the byte[] and passes it here. - * - * Class: org_rocksdb_WBWIRocksIterator - * Method: seekByteArray0 - * Signature: (J[BII)V - */ -void Java_org_rocksdb_WBWIRocksIterator_seekByteArray0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, - jint jtarget_off, jint jtarget_len) { - const std::unique_ptr target(new char[jtarget_len]); - if (target == nullptr) { - jclass oom_class = env->FindClass("/lang/java/OutOfMemoryError"); - env->ThrowNew(oom_class, - "Memory allocation failed in RocksDB JNI function"); - return; - } - env->GetByteArrayRegion(jtarget, jtarget_off, jtarget_len, - reinterpret_cast(target.get())); - - ROCKSDB_NAMESPACE::Slice target_slice(target.get(), jtarget_len); - - auto* it = reinterpret_cast(handle); - it->Seek(target_slice); -} - -/* - * Class: org_rocksdb_WBWIRocksIterator - * Method: seekForPrev0 - * Signature: (J[BI)V - */ -void Java_org_rocksdb_WBWIRocksIterator_seekForPrev0(JNIEnv* env, - jobject /*jobj*/, - jlong handle, - jbyteArray jtarget, - jint jtarget_len) { - auto* it = reinterpret_cast(handle); - jbyte* target = new jbyte[jtarget_len]; - env->GetByteArrayRegion(jtarget, 0, jtarget_len, target); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] target; - return; - } - - ROCKSDB_NAMESPACE::Slice target_slice(reinterpret_cast(target), - jtarget_len); - - it->SeekForPrev(target_slice); - - delete[] target; -} - -/* - * Class: org_rocksdb_WBWIRocksIterator - * Method: seekForPrevDirect0 - * Signature: (JLjava/nio/ByteBuffer;II)V - */ -void Java_org_rocksdb_WBWIRocksIterator_seekForPrevDirect0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, - jint jtarget_off, jint jtarget_len) { - auto* it = reinterpret_cast(handle); - auto seek_for_prev = [&it](ROCKSDB_NAMESPACE::Slice& target_slice) { - it->SeekForPrev(target_slice); - }; - ROCKSDB_NAMESPACE::JniUtil::k_op_direct(seek_for_prev, env, jtarget, - jtarget_off, jtarget_len); -} - -/* - * This method supports fetching into indirect byte buffers; - * the Java wrapper extracts the byte[] and passes it here. - * - * Class: org_rocksdb_WBWIRocksIterator - * Method: seekForPrevByteArray0 - * Signature: (J[BII)V - */ -void Java_org_rocksdb_WBWIRocksIterator_seekForPrevByteArray0( - JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, - jint jtarget_off, jint jtarget_len) { - const std::unique_ptr target(new char[jtarget_len]); - if (target == nullptr) { - jclass oom_class = env->FindClass("/lang/java/OutOfMemoryError"); - env->ThrowNew(oom_class, - "Memory allocation failed in RocksDB JNI function"); - return; - } - env->GetByteArrayRegion(jtarget, jtarget_off, jtarget_len, - reinterpret_cast(target.get())); - - ROCKSDB_NAMESPACE::Slice target_slice(target.get(), jtarget_len); - - auto* it = reinterpret_cast(handle); - it->SeekForPrev(target_slice); -} - -/* - * Class: org_rocksdb_WBWIRocksIterator - * Method: status0 - * Signature: (J)V - */ -void Java_org_rocksdb_WBWIRocksIterator_status0(JNIEnv* env, jobject /*jobj*/, - jlong handle) { - auto* it = reinterpret_cast(handle); - ROCKSDB_NAMESPACE::Status s = it->status(); - - if (s.ok()) { - return; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} - -/* - * Class: org_rocksdb_WBWIRocksIterator - * Method: entry1 - * Signature: (J)[J - */ -jlongArray Java_org_rocksdb_WBWIRocksIterator_entry1(JNIEnv* env, - jobject /*jobj*/, - jlong handle) { - auto* it = reinterpret_cast(handle); - const ROCKSDB_NAMESPACE::WriteEntry& we = it->Entry(); - - jlong results[3]; - - // set the type of the write entry - results[0] = ROCKSDB_NAMESPACE::WriteTypeJni::toJavaWriteType(we.type); - - // NOTE: key_slice and value_slice will be freed by - // org.rocksdb.DirectSlice#close - - auto* key_slice = new ROCKSDB_NAMESPACE::Slice(we.key.data(), we.key.size()); - results[1] = GET_CPLUSPLUS_POINTER(key_slice); - if (we.type == ROCKSDB_NAMESPACE::kDeleteRecord || - we.type == ROCKSDB_NAMESPACE::kSingleDeleteRecord || - we.type == ROCKSDB_NAMESPACE::kLogDataRecord) { - // set native handle of value slice to null if no value available - results[2] = 0; - } else { - auto* value_slice = - new ROCKSDB_NAMESPACE::Slice(we.value.data(), we.value.size()); - results[2] = GET_CPLUSPLUS_POINTER(value_slice); - } - - jlongArray jresults = env->NewLongArray(3); - if (jresults == nullptr) { - // exception thrown: OutOfMemoryError - if (results[2] != 0) { - auto* value_slice = - reinterpret_cast(results[2]); - delete value_slice; - } - delete key_slice; - return nullptr; - } - - env->SetLongArrayRegion(jresults, 0, 3, results); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - env->DeleteLocalRef(jresults); - if (results[2] != 0) { - auto* value_slice = - reinterpret_cast(results[2]); - delete value_slice; - } - delete key_slice; - return nullptr; - } - - return jresults; -} - -/* - * Class: org_rocksdb_WBWIRocksIterator - * Method: refresh0 - * Signature: (J)V - */ -void Java_org_rocksdb_WBWIRocksIterator_refresh0(JNIEnv* env) { - ROCKSDB_NAMESPACE::Status s = - ROCKSDB_NAMESPACE::Status::NotSupported("Refresh() is not supported"); - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); -} diff --git a/java/rocksjni/write_buffer_manager.cc b/java/rocksjni/write_buffer_manager.cc deleted file mode 100644 index 9ce697e10..000000000 --- a/java/rocksjni/write_buffer_manager.cc +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "rocksdb/write_buffer_manager.h" - -#include - -#include - -#include "include/org_rocksdb_WriteBufferManager.h" -#include "rocksdb/cache.h" -#include "rocksjni/cplusplus_to_java_convert.h" - -/* - * Class: org_rocksdb_WriteBufferManager - * Method: newWriteBufferManager - * Signature: (JJ)J - */ -jlong Java_org_rocksdb_WriteBufferManager_newWriteBufferManager( - JNIEnv* /*env*/, jclass /*jclazz*/, jlong jbuffer_size, jlong jcache_handle, - jboolean allow_stall) { - auto* cache_ptr = - reinterpret_cast*>( - jcache_handle); - auto* write_buffer_manager = - new std::shared_ptr( - std::make_shared( - jbuffer_size, *cache_ptr, allow_stall)); - return GET_CPLUSPLUS_POINTER(write_buffer_manager); -} - -/* - * Class: org_rocksdb_WriteBufferManager - * Method: disposeInternal - * Signature: (J)V - */ -void Java_org_rocksdb_WriteBufferManager_disposeInternal(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle) { - auto* write_buffer_manager = - reinterpret_cast*>( - jhandle); - assert(write_buffer_manager != nullptr); - delete write_buffer_manager; -} diff --git a/java/rocksjni/writebatchhandlerjnicallback.cc b/java/rocksjni/writebatchhandlerjnicallback.cc deleted file mode 100644 index 66ceabe9a..000000000 --- a/java/rocksjni/writebatchhandlerjnicallback.cc +++ /dev/null @@ -1,519 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::Comparator. - -#include "rocksjni/writebatchhandlerjnicallback.h" - -#include "rocksjni/portal.h" - -namespace ROCKSDB_NAMESPACE { -WriteBatchHandlerJniCallback::WriteBatchHandlerJniCallback( - JNIEnv* env, jobject jWriteBatchHandler) - : JniCallback(env, jWriteBatchHandler), m_env(env) { - m_jPutCfMethodId = WriteBatchHandlerJni::getPutCfMethodId(env); - if (m_jPutCfMethodId == nullptr) { - // exception thrown - return; - } - - m_jPutMethodId = WriteBatchHandlerJni::getPutMethodId(env); - if (m_jPutMethodId == nullptr) { - // exception thrown - return; - } - - m_jMergeCfMethodId = WriteBatchHandlerJni::getMergeCfMethodId(env); - if (m_jMergeCfMethodId == nullptr) { - // exception thrown - return; - } - - m_jMergeMethodId = WriteBatchHandlerJni::getMergeMethodId(env); - if (m_jMergeMethodId == nullptr) { - // exception thrown - return; - } - - m_jDeleteCfMethodId = WriteBatchHandlerJni::getDeleteCfMethodId(env); - if (m_jDeleteCfMethodId == nullptr) { - // exception thrown - return; - } - - m_jDeleteMethodId = WriteBatchHandlerJni::getDeleteMethodId(env); - if (m_jDeleteMethodId == nullptr) { - // exception thrown - return; - } - - m_jSingleDeleteCfMethodId = - WriteBatchHandlerJni::getSingleDeleteCfMethodId(env); - if (m_jSingleDeleteCfMethodId == nullptr) { - // exception thrown - return; - } - - m_jSingleDeleteMethodId = WriteBatchHandlerJni::getSingleDeleteMethodId(env); - if (m_jSingleDeleteMethodId == nullptr) { - // exception thrown - return; - } - - m_jDeleteRangeCfMethodId = - WriteBatchHandlerJni::getDeleteRangeCfMethodId(env); - if (m_jDeleteRangeCfMethodId == nullptr) { - // exception thrown - return; - } - - m_jDeleteRangeMethodId = WriteBatchHandlerJni::getDeleteRangeMethodId(env); - if (m_jDeleteRangeMethodId == nullptr) { - // exception thrown - return; - } - - m_jLogDataMethodId = WriteBatchHandlerJni::getLogDataMethodId(env); - if (m_jLogDataMethodId == nullptr) { - // exception thrown - return; - } - - m_jPutBlobIndexCfMethodId = - WriteBatchHandlerJni::getPutBlobIndexCfMethodId(env); - if (m_jPutBlobIndexCfMethodId == nullptr) { - // exception thrown - return; - } - - m_jMarkBeginPrepareMethodId = - WriteBatchHandlerJni::getMarkBeginPrepareMethodId(env); - if (m_jMarkBeginPrepareMethodId == nullptr) { - // exception thrown - return; - } - - m_jMarkEndPrepareMethodId = - WriteBatchHandlerJni::getMarkEndPrepareMethodId(env); - if (m_jMarkEndPrepareMethodId == nullptr) { - // exception thrown - return; - } - - m_jMarkNoopMethodId = WriteBatchHandlerJni::getMarkNoopMethodId(env); - if (m_jMarkNoopMethodId == nullptr) { - // exception thrown - return; - } - - m_jMarkRollbackMethodId = WriteBatchHandlerJni::getMarkRollbackMethodId(env); - if (m_jMarkRollbackMethodId == nullptr) { - // exception thrown - return; - } - - m_jMarkCommitMethodId = WriteBatchHandlerJni::getMarkCommitMethodId(env); - if (m_jMarkCommitMethodId == nullptr) { - // exception thrown - return; - } - - m_jMarkCommitWithTimestampMethodId = - WriteBatchHandlerJni::getMarkCommitWithTimestampMethodId(env); - if (m_jMarkCommitWithTimestampMethodId == nullptr) { - // exception thrown - return; - } - - m_jContinueMethodId = WriteBatchHandlerJni::getContinueMethodId(env); - if (m_jContinueMethodId == nullptr) { - // exception thrown - return; - } -} - -ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::PutCF( - uint32_t column_family_id, const Slice& key, const Slice& value) { - auto put = [this, column_family_id](jbyteArray j_key, jbyteArray j_value) { - m_env->CallVoidMethod(m_jcallback_obj, m_jPutCfMethodId, - static_cast(column_family_id), j_key, j_value); - }; - auto status = WriteBatchHandlerJniCallback::kv_op(key, value, put); - if (status == nullptr) { - return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is - // an Exception but we don't know - // the ROCKSDB_NAMESPACE::Status? - } else { - return ROCKSDB_NAMESPACE::Status(*status); - } -} - -void WriteBatchHandlerJniCallback::Put(const Slice& key, const Slice& value) { - auto put = [this](jbyteArray j_key, jbyteArray j_value) { - m_env->CallVoidMethod(m_jcallback_obj, m_jPutMethodId, j_key, j_value); - }; - WriteBatchHandlerJniCallback::kv_op(key, value, put); -} - -ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::MergeCF( - uint32_t column_family_id, const Slice& key, const Slice& value) { - auto merge = [this, column_family_id](jbyteArray j_key, jbyteArray j_value) { - m_env->CallVoidMethod(m_jcallback_obj, m_jMergeCfMethodId, - static_cast(column_family_id), j_key, j_value); - }; - auto status = WriteBatchHandlerJniCallback::kv_op(key, value, merge); - if (status == nullptr) { - return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is - // an Exception but we don't know - // the ROCKSDB_NAMESPACE::Status? - } else { - return ROCKSDB_NAMESPACE::Status(*status); - } -} - -void WriteBatchHandlerJniCallback::Merge(const Slice& key, const Slice& value) { - auto merge = [this](jbyteArray j_key, jbyteArray j_value) { - m_env->CallVoidMethod(m_jcallback_obj, m_jMergeMethodId, j_key, j_value); - }; - WriteBatchHandlerJniCallback::kv_op(key, value, merge); -} - -ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::DeleteCF( - uint32_t column_family_id, const Slice& key) { - auto remove = [this, column_family_id](jbyteArray j_key) { - m_env->CallVoidMethod(m_jcallback_obj, m_jDeleteCfMethodId, - static_cast(column_family_id), j_key); - }; - auto status = WriteBatchHandlerJniCallback::k_op(key, remove); - if (status == nullptr) { - return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is - // an Exception but we don't know - // the ROCKSDB_NAMESPACE::Status? - } else { - return ROCKSDB_NAMESPACE::Status(*status); - } -} - -void WriteBatchHandlerJniCallback::Delete(const Slice& key) { - auto remove = [this](jbyteArray j_key) { - m_env->CallVoidMethod(m_jcallback_obj, m_jDeleteMethodId, j_key); - }; - WriteBatchHandlerJniCallback::k_op(key, remove); -} - -ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::SingleDeleteCF( - uint32_t column_family_id, const Slice& key) { - auto singleDelete = [this, column_family_id](jbyteArray j_key) { - m_env->CallVoidMethod(m_jcallback_obj, m_jSingleDeleteCfMethodId, - static_cast(column_family_id), j_key); - }; - auto status = WriteBatchHandlerJniCallback::k_op(key, singleDelete); - if (status == nullptr) { - return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is - // an Exception but we don't know - // the ROCKSDB_NAMESPACE::Status? - } else { - return ROCKSDB_NAMESPACE::Status(*status); - } -} - -void WriteBatchHandlerJniCallback::SingleDelete(const Slice& key) { - auto singleDelete = [this](jbyteArray j_key) { - m_env->CallVoidMethod(m_jcallback_obj, m_jSingleDeleteMethodId, j_key); - }; - WriteBatchHandlerJniCallback::k_op(key, singleDelete); -} - -ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::DeleteRangeCF( - uint32_t column_family_id, const Slice& beginKey, const Slice& endKey) { - auto deleteRange = [this, column_family_id](jbyteArray j_beginKey, - jbyteArray j_endKey) { - m_env->CallVoidMethod(m_jcallback_obj, m_jDeleteRangeCfMethodId, - static_cast(column_family_id), j_beginKey, - j_endKey); - }; - auto status = - WriteBatchHandlerJniCallback::kv_op(beginKey, endKey, deleteRange); - if (status == nullptr) { - return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is - // an Exception but we don't know - // the ROCKSDB_NAMESPACE::Status? - } else { - return ROCKSDB_NAMESPACE::Status(*status); - } -} - -void WriteBatchHandlerJniCallback::DeleteRange(const Slice& beginKey, - const Slice& endKey) { - auto deleteRange = [this](jbyteArray j_beginKey, jbyteArray j_endKey) { - m_env->CallVoidMethod(m_jcallback_obj, m_jDeleteRangeMethodId, j_beginKey, - j_endKey); - }; - WriteBatchHandlerJniCallback::kv_op(beginKey, endKey, deleteRange); -} - -void WriteBatchHandlerJniCallback::LogData(const Slice& blob) { - auto logData = [this](jbyteArray j_blob) { - m_env->CallVoidMethod(m_jcallback_obj, m_jLogDataMethodId, j_blob); - }; - WriteBatchHandlerJniCallback::k_op(blob, logData); -} - -ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::PutBlobIndexCF( - uint32_t column_family_id, const Slice& key, const Slice& value) { - auto putBlobIndex = [this, column_family_id](jbyteArray j_key, - jbyteArray j_value) { - m_env->CallVoidMethod(m_jcallback_obj, m_jPutBlobIndexCfMethodId, - static_cast(column_family_id), j_key, j_value); - }; - auto status = WriteBatchHandlerJniCallback::kv_op(key, value, putBlobIndex); - if (status == nullptr) { - return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is - // an Exception but we don't know - // the ROCKSDB_NAMESPACE::Status? - } else { - return ROCKSDB_NAMESPACE::Status(*status); - } -} - -ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::MarkBeginPrepare( - bool unprepare) { -#ifndef DEBUG - (void)unprepare; -#else - assert(!unprepare); -#endif - m_env->CallVoidMethod(m_jcallback_obj, m_jMarkBeginPrepareMethodId); - - // check for Exception, in-particular RocksDBException - if (m_env->ExceptionCheck()) { - // exception thrown - jthrowable exception = m_env->ExceptionOccurred(); - std::unique_ptr status = - ROCKSDB_NAMESPACE::RocksDBExceptionJni::toCppStatus(m_env, exception); - if (status == nullptr) { - // unkown status or exception occurred extracting status - m_env->ExceptionDescribe(); - return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) probably need a - // better error code here - - } else { - m_env->ExceptionClear(); // clear the exception, as we have extracted the - // status - return ROCKSDB_NAMESPACE::Status(*status); - } - } - - return ROCKSDB_NAMESPACE::Status::OK(); -} - -ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::MarkEndPrepare( - const Slice& xid) { - auto markEndPrepare = [this](jbyteArray j_xid) { - m_env->CallVoidMethod(m_jcallback_obj, m_jMarkEndPrepareMethodId, j_xid); - }; - auto status = WriteBatchHandlerJniCallback::k_op(xid, markEndPrepare); - if (status == nullptr) { - return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is - // an Exception but we don't know - // the ROCKSDB_NAMESPACE::Status? - } else { - return ROCKSDB_NAMESPACE::Status(*status); - } -} - -ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::MarkNoop( - bool empty_batch) { - m_env->CallVoidMethod(m_jcallback_obj, m_jMarkNoopMethodId, - static_cast(empty_batch)); - - // check for Exception, in-particular RocksDBException - if (m_env->ExceptionCheck()) { - // exception thrown - jthrowable exception = m_env->ExceptionOccurred(); - std::unique_ptr status = - ROCKSDB_NAMESPACE::RocksDBExceptionJni::toCppStatus(m_env, exception); - if (status == nullptr) { - // unkown status or exception occurred extracting status - m_env->ExceptionDescribe(); - return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) probably need a - // better error code here - - } else { - m_env->ExceptionClear(); // clear the exception, as we have extracted the - // status - return ROCKSDB_NAMESPACE::Status(*status); - } - } - - return ROCKSDB_NAMESPACE::Status::OK(); -} - -ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::MarkRollback( - const Slice& xid) { - auto markRollback = [this](jbyteArray j_xid) { - m_env->CallVoidMethod(m_jcallback_obj, m_jMarkRollbackMethodId, j_xid); - }; - auto status = WriteBatchHandlerJniCallback::k_op(xid, markRollback); - if (status == nullptr) { - return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is - // an Exception but we don't know - // the ROCKSDB_NAMESPACE::Status? - } else { - return ROCKSDB_NAMESPACE::Status(*status); - } -} - -ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::MarkCommit( - const Slice& xid) { - auto markCommit = [this](jbyteArray j_xid) { - m_env->CallVoidMethod(m_jcallback_obj, m_jMarkCommitMethodId, j_xid); - }; - auto status = WriteBatchHandlerJniCallback::k_op(xid, markCommit); - if (status == nullptr) { - return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is - // an Exception but we don't know - // the ROCKSDB_NAMESPACE::Status? - } else { - return ROCKSDB_NAMESPACE::Status(*status); - } -} - -ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::MarkCommitWithTimestamp( - const Slice& xid, const Slice& ts) { - auto markCommitWithTimestamp = [this](jbyteArray j_xid, jbyteArray j_ts) { - m_env->CallVoidMethod(m_jcallback_obj, m_jMarkCommitWithTimestampMethodId, - j_xid, j_ts); - }; - auto status = - WriteBatchHandlerJniCallback::kv_op(xid, ts, markCommitWithTimestamp); - if (status == nullptr) { - return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is - // an Exception but we don't know - // the ROCKSDB_NAMESPACE::Status? - } else { - return ROCKSDB_NAMESPACE::Status(*status); - } -} - -bool WriteBatchHandlerJniCallback::Continue() { - jboolean jContinue = - m_env->CallBooleanMethod(m_jcallback_obj, m_jContinueMethodId); - if (m_env->ExceptionCheck()) { - // exception thrown - m_env->ExceptionDescribe(); - } - - return static_cast(jContinue == JNI_TRUE); -} - -std::unique_ptr WriteBatchHandlerJniCallback::kv_op( - const Slice& key, const Slice& value, - std::function kvFn) { - const jbyteArray j_key = JniUtil::copyBytes(m_env, key); - if (j_key == nullptr) { - // exception thrown - if (m_env->ExceptionCheck()) { - m_env->ExceptionDescribe(); - } - return nullptr; - } - - const jbyteArray j_value = JniUtil::copyBytes(m_env, value); - if (j_value == nullptr) { - // exception thrown - if (m_env->ExceptionCheck()) { - m_env->ExceptionDescribe(); - } - if (j_key != nullptr) { - m_env->DeleteLocalRef(j_key); - } - return nullptr; - } - - kvFn(j_key, j_value); - - // check for Exception, in-particular RocksDBException - if (m_env->ExceptionCheck()) { - if (j_value != nullptr) { - m_env->DeleteLocalRef(j_value); - } - if (j_key != nullptr) { - m_env->DeleteLocalRef(j_key); - } - - // exception thrown - jthrowable exception = m_env->ExceptionOccurred(); - std::unique_ptr status = - ROCKSDB_NAMESPACE::RocksDBExceptionJni::toCppStatus(m_env, exception); - if (status == nullptr) { - // unkown status or exception occurred extracting status - m_env->ExceptionDescribe(); - return nullptr; - - } else { - m_env->ExceptionClear(); // clear the exception, as we have extracted the - // status - return status; - } - } - - if (j_value != nullptr) { - m_env->DeleteLocalRef(j_value); - } - if (j_key != nullptr) { - m_env->DeleteLocalRef(j_key); - } - - // all OK - return std::unique_ptr( - new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::OK())); -} - -std::unique_ptr WriteBatchHandlerJniCallback::k_op( - const Slice& key, std::function kFn) { - const jbyteArray j_key = JniUtil::copyBytes(m_env, key); - if (j_key == nullptr) { - // exception thrown - if (m_env->ExceptionCheck()) { - m_env->ExceptionDescribe(); - } - return nullptr; - } - - kFn(j_key); - - // check for Exception, in-particular RocksDBException - if (m_env->ExceptionCheck()) { - if (j_key != nullptr) { - m_env->DeleteLocalRef(j_key); - } - - // exception thrown - jthrowable exception = m_env->ExceptionOccurred(); - std::unique_ptr status = - ROCKSDB_NAMESPACE::RocksDBExceptionJni::toCppStatus(m_env, exception); - if (status == nullptr) { - // unkown status or exception occurred extracting status - m_env->ExceptionDescribe(); - return nullptr; - - } else { - m_env->ExceptionClear(); // clear the exception, as we have extracted the - // status - return status; - } - } - - if (j_key != nullptr) { - m_env->DeleteLocalRef(j_key); - } - - // all OK - return std::unique_ptr( - new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::OK())); -} -} // namespace ROCKSDB_NAMESPACE diff --git a/java/rocksjni/writebatchhandlerjnicallback.h b/java/rocksjni/writebatchhandlerjnicallback.h deleted file mode 100644 index 9629797ca..000000000 --- a/java/rocksjni/writebatchhandlerjnicallback.h +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// This file implements the callback "bridge" between Java and C++ for -// ROCKSDB_NAMESPACE::WriteBatch::Handler. - -#ifndef JAVA_ROCKSJNI_WRITEBATCHHANDLERJNICALLBACK_H_ -#define JAVA_ROCKSJNI_WRITEBATCHHANDLERJNICALLBACK_H_ - -#include - -#include -#include - -#include "rocksdb/write_batch.h" -#include "rocksjni/jnicallback.h" - -namespace ROCKSDB_NAMESPACE { -/** - * This class acts as a bridge between C++ - * and Java. The methods in this class will be - * called back from the RocksDB storage engine (C++) - * which calls the appropriate Java method. - * This enables Write Batch Handlers to be implemented in Java. - */ -class WriteBatchHandlerJniCallback : public JniCallback, - public WriteBatch::Handler { - public: - WriteBatchHandlerJniCallback(JNIEnv* env, jobject jWriteBackHandler); - Status PutCF(uint32_t column_family_id, const Slice& key, const Slice& value); - void Put(const Slice& key, const Slice& value); - Status MergeCF(uint32_t column_family_id, const Slice& key, - const Slice& value); - void Merge(const Slice& key, const Slice& value); - Status DeleteCF(uint32_t column_family_id, const Slice& key); - void Delete(const Slice& key); - Status SingleDeleteCF(uint32_t column_family_id, const Slice& key); - void SingleDelete(const Slice& key); - Status DeleteRangeCF(uint32_t column_family_id, const Slice& beginKey, - const Slice& endKey); - void DeleteRange(const Slice& beginKey, const Slice& endKey); - void LogData(const Slice& blob); - Status PutBlobIndexCF(uint32_t column_family_id, const Slice& key, - const Slice& value); - Status MarkBeginPrepare(bool); - Status MarkEndPrepare(const Slice& xid); - Status MarkNoop(bool empty_batch); - Status MarkRollback(const Slice& xid); - Status MarkCommit(const Slice& xid); - Status MarkCommitWithTimestamp(const Slice& xid, const Slice& commit_ts); - bool Continue(); - - private: - JNIEnv* m_env; - jmethodID m_jPutCfMethodId; - jmethodID m_jPutMethodId; - jmethodID m_jMergeCfMethodId; - jmethodID m_jMergeMethodId; - jmethodID m_jDeleteCfMethodId; - jmethodID m_jDeleteMethodId; - jmethodID m_jSingleDeleteCfMethodId; - jmethodID m_jSingleDeleteMethodId; - jmethodID m_jDeleteRangeCfMethodId; - jmethodID m_jDeleteRangeMethodId; - jmethodID m_jLogDataMethodId; - jmethodID m_jPutBlobIndexCfMethodId; - jmethodID m_jMarkBeginPrepareMethodId; - jmethodID m_jMarkEndPrepareMethodId; - jmethodID m_jMarkNoopMethodId; - jmethodID m_jMarkRollbackMethodId; - jmethodID m_jMarkCommitMethodId; - jmethodID m_jMarkCommitWithTimestampMethodId; - jmethodID m_jContinueMethodId; - /** - * @return A pointer to a ROCKSDB_NAMESPACE::Status or nullptr if an - * unexpected exception occurred - */ - std::unique_ptr kv_op( - const Slice& key, const Slice& value, - std::function kvFn); - /** - * @return A pointer to a ROCKSDB_NAMESPACE::Status or nullptr if an - * unexpected exception occurred - */ - std::unique_ptr k_op( - const Slice& key, std::function kFn); -}; -} // namespace ROCKSDB_NAMESPACE - -#endif // JAVA_ROCKSJNI_WRITEBATCHHANDLERJNICALLBACK_H_ diff --git a/java/samples/src/main/java/OptimisticTransactionSample.java b/java/samples/src/main/java/OptimisticTransactionSample.java deleted file mode 100644 index 7e7a22e94..000000000 --- a/java/samples/src/main/java/OptimisticTransactionSample.java +++ /dev/null @@ -1,184 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -import org.rocksdb.*; - -import static java.nio.charset.StandardCharsets.UTF_8; - -/** - * Demonstrates using Transactions on an OptimisticTransactionDB with - * varying isolation guarantees - */ -public class OptimisticTransactionSample { - private static final String dbPath = "/tmp/rocksdb_optimistic_transaction_example"; - - public static final void main(final String args[]) throws RocksDBException { - - try(final Options options = new Options() - .setCreateIfMissing(true); - final OptimisticTransactionDB txnDb = - OptimisticTransactionDB.open(options, dbPath)) { - - try (final WriteOptions writeOptions = new WriteOptions(); - final ReadOptions readOptions = new ReadOptions()) { - - //////////////////////////////////////////////////////// - // - // Simple OptimisticTransaction Example ("Read Committed") - // - //////////////////////////////////////////////////////// - readCommitted(txnDb, writeOptions, readOptions); - - - //////////////////////////////////////////////////////// - // - // "Repeatable Read" (Snapshot Isolation) Example - // -- Using a single Snapshot - // - //////////////////////////////////////////////////////// - repeatableRead(txnDb, writeOptions, readOptions); - - - //////////////////////////////////////////////////////// - // - // "Read Committed" (Monotonic Atomic Views) Example - // --Using multiple Snapshots - // - //////////////////////////////////////////////////////// - readCommitted_monotonicAtomicViews(txnDb, writeOptions, readOptions); - } - } - } - - /** - * Demonstrates "Read Committed" isolation - */ - private static void readCommitted(final OptimisticTransactionDB txnDb, - final WriteOptions writeOptions, final ReadOptions readOptions) - throws RocksDBException { - final byte key1[] = "abc".getBytes(UTF_8); - final byte value1[] = "def".getBytes(UTF_8); - - final byte key2[] = "xyz".getBytes(UTF_8); - final byte value2[] = "zzz".getBytes(UTF_8); - - // Start a transaction - try(final Transaction txn = txnDb.beginTransaction(writeOptions)) { - // Read a key in this transaction - byte[] value = txn.get(readOptions, key1); - assert(value == null); - - // Write a key in this transaction - txn.put(key1, value1); - - // Read a key OUTSIDE this transaction. Does not affect txn. - value = txnDb.get(readOptions, key1); - assert(value == null); - - // Write a key OUTSIDE of this transaction. - // Does not affect txn since this is an unrelated key. - // If we wrote key 'abc' here, the transaction would fail to commit. - txnDb.put(writeOptions, key2, value2); - - // Commit transaction - txn.commit(); - } - } - - /** - * Demonstrates "Repeatable Read" (Snapshot Isolation) isolation - */ - private static void repeatableRead(final OptimisticTransactionDB txnDb, - final WriteOptions writeOptions, final ReadOptions readOptions) - throws RocksDBException { - - final byte key1[] = "ghi".getBytes(UTF_8); - final byte value1[] = "jkl".getBytes(UTF_8); - - // Set a snapshot at start of transaction by setting setSnapshot(true) - try(final OptimisticTransactionOptions txnOptions = - new OptimisticTransactionOptions().setSetSnapshot(true); - final Transaction txn = - txnDb.beginTransaction(writeOptions, txnOptions)) { - - final Snapshot snapshot = txn.getSnapshot(); - - // Write a key OUTSIDE of transaction - txnDb.put(writeOptions, key1, value1); - - // Read a key using the snapshot. - readOptions.setSnapshot(snapshot); - final byte[] value = txn.getForUpdate(readOptions, key1, true); - assert (value == null); - - try { - // Attempt to commit transaction - txn.commit(); - throw new IllegalStateException(); - } catch(final RocksDBException e) { - // Transaction could not commit since the write outside of the txn - // conflicted with the read! - assert(e.getStatus().getCode() == Status.Code.Busy); - } - - txn.rollback(); - } finally { - // Clear snapshot from read options since it is no longer valid - readOptions.setSnapshot(null); - } - } - - /** - * Demonstrates "Read Committed" (Monotonic Atomic Views) isolation - * - * In this example, we set the snapshot multiple times. This is probably - * only necessary if you have very strict isolation requirements to - * implement. - */ - private static void readCommitted_monotonicAtomicViews( - final OptimisticTransactionDB txnDb, final WriteOptions writeOptions, - final ReadOptions readOptions) throws RocksDBException { - - final byte keyX[] = "x".getBytes(UTF_8); - final byte valueX[] = "x".getBytes(UTF_8); - - final byte keyY[] = "y".getBytes(UTF_8); - final byte valueY[] = "y".getBytes(UTF_8); - - try (final OptimisticTransactionOptions txnOptions = - new OptimisticTransactionOptions().setSetSnapshot(true); - final Transaction txn = - txnDb.beginTransaction(writeOptions, txnOptions)) { - - // Do some reads and writes to key "x" - Snapshot snapshot = txnDb.getSnapshot(); - readOptions.setSnapshot(snapshot); - byte[] value = txn.get(readOptions, keyX); - txn.put(valueX, valueX); - - // Do a write outside of the transaction to key "y" - txnDb.put(writeOptions, keyY, valueY); - - // Set a new snapshot in the transaction - txn.setSnapshot(); - snapshot = txnDb.getSnapshot(); - readOptions.setSnapshot(snapshot); - - // Do some reads and writes to key "y" - // Since the snapshot was advanced, the write done outside of the - // transaction does not conflict. - value = txn.getForUpdate(readOptions, keyY, true); - txn.put(keyY, valueY); - - // Commit. Since the snapshot was advanced, the write done outside of the - // transaction does not prevent this transaction from Committing. - txn.commit(); - - } finally { - // Clear snapshot from read options since it is no longer valid - readOptions.setSnapshot(null); - } - } -} diff --git a/java/samples/src/main/java/RocksDBColumnFamilySample.java b/java/samples/src/main/java/RocksDBColumnFamilySample.java deleted file mode 100644 index 72f5731a1..000000000 --- a/java/samples/src/main/java/RocksDBColumnFamilySample.java +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -import org.rocksdb.*; - -import java.util.ArrayList; -import java.util.List; - -public class RocksDBColumnFamilySample { - static { - RocksDB.loadLibrary(); - } - - public static void main(final String[] args) throws RocksDBException { - if (args.length < 1) { - System.out.println( - "usage: RocksDBColumnFamilySample db_path"); - System.exit(-1); - } - - final String db_path = args[0]; - - System.out.println("RocksDBColumnFamilySample"); - try(final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, db_path)) { - - assert(db != null); - - // create column family - try(final ColumnFamilyHandle columnFamilyHandle = db.createColumnFamily( - new ColumnFamilyDescriptor("new_cf".getBytes(), - new ColumnFamilyOptions()))) { - assert (columnFamilyHandle != null); - } - } - - // open DB with two column families - final List columnFamilyDescriptors = - new ArrayList<>(); - // have to open default column family - columnFamilyDescriptors.add(new ColumnFamilyDescriptor( - RocksDB.DEFAULT_COLUMN_FAMILY, new ColumnFamilyOptions())); - // open the new one, too - columnFamilyDescriptors.add(new ColumnFamilyDescriptor( - "new_cf".getBytes(), new ColumnFamilyOptions())); - final List columnFamilyHandles = new ArrayList<>(); - try(final DBOptions options = new DBOptions(); - final RocksDB db = RocksDB.open(options, db_path, - columnFamilyDescriptors, columnFamilyHandles)) { - assert(db != null); - - try { - // put and get from non-default column family - db.put( - columnFamilyHandles.get(1), new WriteOptions(), "key".getBytes(), "value".getBytes()); - - // atomic write - try (final WriteBatch wb = new WriteBatch()) { - wb.put(columnFamilyHandles.get(0), "key2".getBytes(), - "value2".getBytes()); - wb.put(columnFamilyHandles.get(1), "key3".getBytes(), - "value3".getBytes()); - wb.delete(columnFamilyHandles.get(1), "key".getBytes()); - db.write(new WriteOptions(), wb); - } - - // drop column family - db.dropColumnFamily(columnFamilyHandles.get(1)); - } finally { - for (final ColumnFamilyHandle handle : columnFamilyHandles) { - handle.close(); - } - } - } - } -} diff --git a/java/samples/src/main/java/RocksDBSample.java b/java/samples/src/main/java/RocksDBSample.java deleted file mode 100644 index 8ab9b2de3..000000000 --- a/java/samples/src/main/java/RocksDBSample.java +++ /dev/null @@ -1,295 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -import java.lang.IllegalArgumentException; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.ArrayList; - -import org.rocksdb.*; -import org.rocksdb.util.SizeUnit; - -public class RocksDBSample { - static { - RocksDB.loadLibrary(); - } - - public static void main(final String[] args) { - if (args.length < 1) { - System.out.println("usage: RocksDBSample db_path"); - System.exit(-1); - } - - final String db_path = args[0]; - final String db_path_not_found = db_path + "_not_found"; - - System.out.println("RocksDBSample"); - try (final Options options = new Options(); - final Filter bloomFilter = new BloomFilter(10); - final ReadOptions readOptions = new ReadOptions() - .setFillCache(false); - final Statistics stats = new Statistics(); - final RateLimiter rateLimiter = new RateLimiter(10000000,10000, 10)) { - - try (final RocksDB db = RocksDB.open(options, db_path_not_found)) { - assert (false); - } catch (final RocksDBException e) { - System.out.format("Caught the expected exception -- %s\n", e); - } - - try { - options.setCreateIfMissing(true) - .setStatistics(stats) - .setWriteBufferSize(8 * SizeUnit.KB) - .setMaxWriteBufferNumber(3) - .setMaxBackgroundJobs(10) - .setCompressionType(CompressionType.ZLIB_COMPRESSION) - .setCompactionStyle(CompactionStyle.UNIVERSAL); - } catch (final IllegalArgumentException e) { - assert (false); - } - - assert (options.createIfMissing() == true); - assert (options.writeBufferSize() == 8 * SizeUnit.KB); - assert (options.maxWriteBufferNumber() == 3); - assert (options.maxBackgroundJobs() == 10); - assert (options.compressionType() == CompressionType.ZLIB_COMPRESSION); - assert (options.compactionStyle() == CompactionStyle.UNIVERSAL); - - assert (options.memTableFactoryName().equals("SkipListFactory")); - options.setMemTableConfig( - new HashSkipListMemTableConfig() - .setHeight(4) - .setBranchingFactor(4) - .setBucketCount(2000000)); - assert (options.memTableFactoryName().equals("HashSkipListRepFactory")); - - options.setMemTableConfig( - new HashLinkedListMemTableConfig() - .setBucketCount(100000)); - assert (options.memTableFactoryName().equals("HashLinkedListRepFactory")); - - options.setMemTableConfig( - new VectorMemTableConfig().setReservedSize(10000)); - assert (options.memTableFactoryName().equals("VectorRepFactory")); - - options.setMemTableConfig(new SkipListMemTableConfig()); - assert (options.memTableFactoryName().equals("SkipListFactory")); - - options.setTableFormatConfig(new PlainTableConfig()); - // Plain-Table requires mmap read - options.setAllowMmapReads(true); - assert (options.tableFactoryName().equals("PlainTable")); - - options.setRateLimiter(rateLimiter); - - final BlockBasedTableConfig table_options = new BlockBasedTableConfig(); - Cache cache = new LRUCache(64 * 1024, 6); - table_options.setBlockCache(cache) - .setFilterPolicy(bloomFilter) - .setBlockSizeDeviation(5) - .setBlockRestartInterval(10) - .setCacheIndexAndFilterBlocks(true); - - assert (table_options.blockSizeDeviation() == 5); - assert (table_options.blockRestartInterval() == 10); - assert (table_options.cacheIndexAndFilterBlocks() == true); - - options.setTableFormatConfig(table_options); - assert (options.tableFactoryName().equals("BlockBasedTable")); - - try (final RocksDB db = RocksDB.open(options, db_path)) { - db.put("hello".getBytes(), "world".getBytes()); - - final byte[] value = db.get("hello".getBytes()); - assert ("world".equals(new String(value))); - - final String str = db.getProperty("rocksdb.stats"); - assert (str != null && !str.equals("")); - } catch (final RocksDBException e) { - System.out.format("[ERROR] caught the unexpected exception -- %s\n", e); - assert (false); - } - - try (final RocksDB db = RocksDB.open(options, db_path)) { - db.put("hello".getBytes(), "world".getBytes()); - byte[] value = db.get("hello".getBytes()); - System.out.format("Get('hello') = %s\n", - new String(value)); - - for (int i = 1; i <= 9; ++i) { - for (int j = 1; j <= 9; ++j) { - db.put(String.format("%dx%d", i, j).getBytes(), - String.format("%d", i * j).getBytes()); - } - } - - for (int i = 1; i <= 9; ++i) { - for (int j = 1; j <= 9; ++j) { - System.out.format("%s ", new String(db.get( - String.format("%dx%d", i, j).getBytes()))); - } - System.out.println(""); - } - - // write batch test - try (final WriteOptions writeOpt = new WriteOptions()) { - for (int i = 10; i <= 19; ++i) { - try (final WriteBatch batch = new WriteBatch()) { - for (int j = 10; j <= 19; ++j) { - batch.put(String.format("%dx%d", i, j).getBytes(), - String.format("%d", i * j).getBytes()); - } - db.write(writeOpt, batch); - } - } - } - for (int i = 10; i <= 19; ++i) { - for (int j = 10; j <= 19; ++j) { - assert (new String( - db.get(String.format("%dx%d", i, j).getBytes())).equals( - String.format("%d", i * j))); - System.out.format("%s ", new String(db.get( - String.format("%dx%d", i, j).getBytes()))); - } - System.out.println(""); - } - - value = db.get("1x1".getBytes()); - assert (value != null); - value = db.get("world".getBytes()); - assert (value == null); - value = db.get(readOptions, "world".getBytes()); - assert (value == null); - - final byte[] testKey = "asdf".getBytes(); - final byte[] testValue = - "asdfghjkl;'?> insufficientArray.length); - len = db.get("asdfjkl;".getBytes(), enoughArray); - assert (len == RocksDB.NOT_FOUND); - len = db.get(testKey, enoughArray); - assert (len == testValue.length); - - len = db.get(readOptions, testKey, insufficientArray); - assert (len > insufficientArray.length); - len = db.get(readOptions, "asdfjkl;".getBytes(), enoughArray); - assert (len == RocksDB.NOT_FOUND); - len = db.get(readOptions, testKey, enoughArray); - assert (len == testValue.length); - - db.delete(testKey); - len = db.get(testKey, enoughArray); - assert (len == RocksDB.NOT_FOUND); - - // repeat the test with WriteOptions - try (final WriteOptions writeOpts = new WriteOptions()) { - writeOpts.setSync(true); - writeOpts.setDisableWAL(false); - db.put(writeOpts, testKey, testValue); - len = db.get(testKey, enoughArray); - assert (len == testValue.length); - assert (new String(testValue).equals( - new String(enoughArray, 0, len))); - } - - try { - for (final TickerType statsType : TickerType.values()) { - if (statsType != TickerType.TICKER_ENUM_MAX) { - stats.getTickerCount(statsType); - } - } - System.out.println("getTickerCount() passed."); - } catch (final Exception e) { - System.out.println("Failed in call to getTickerCount()"); - assert (false); //Should never reach here. - } - - try { - for (final HistogramType histogramType : HistogramType.values()) { - if (histogramType != HistogramType.HISTOGRAM_ENUM_MAX) { - HistogramData data = stats.getHistogramData(histogramType); - } - } - System.out.println("getHistogramData() passed."); - } catch (final Exception e) { - System.out.println("Failed in call to getHistogramData()"); - assert (false); //Should never reach here. - } - - try (final RocksIterator iterator = db.newIterator()) { - - boolean seekToFirstPassed = false; - for (iterator.seekToFirst(); iterator.isValid(); iterator.next()) { - iterator.status(); - assert (iterator.key() != null); - assert (iterator.value() != null); - seekToFirstPassed = true; - } - if (seekToFirstPassed) { - System.out.println("iterator seekToFirst tests passed."); - } - - boolean seekToLastPassed = false; - for (iterator.seekToLast(); iterator.isValid(); iterator.prev()) { - iterator.status(); - assert (iterator.key() != null); - assert (iterator.value() != null); - seekToLastPassed = true; - } - - if (seekToLastPassed) { - System.out.println("iterator seekToLastPassed tests passed."); - } - - iterator.seekToFirst(); - iterator.seek(iterator.key()); - assert (iterator.key() != null); - assert (iterator.value() != null); - - System.out.println("iterator seek test passed."); - - } - System.out.println("iterator tests passed."); - - final List keys = new ArrayList<>(); - try (final RocksIterator iterator = db.newIterator()) { - for (iterator.seekToLast(); iterator.isValid(); iterator.prev()) { - keys.add(iterator.key()); - } - } - - List values = db.multiGetAsList(keys); - assert (values.size() == keys.size()); - for (final byte[] value1 : values) { - assert (value1 != null); - } - - values = db.multiGetAsList(new ReadOptions(), keys); - assert (values.size() == keys.size()); - for (final byte[] value1 : values) { - assert (value1 != null); - } - } catch (final RocksDBException e) { - System.err.println(e); - } - } - } -} diff --git a/java/samples/src/main/java/TransactionSample.java b/java/samples/src/main/java/TransactionSample.java deleted file mode 100644 index b88a68f12..000000000 --- a/java/samples/src/main/java/TransactionSample.java +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -import org.rocksdb.*; - -import static java.nio.charset.StandardCharsets.UTF_8; - -/** - * Demonstrates using Transactions on a TransactionDB with - * varying isolation guarantees - */ -public class TransactionSample { - private static final String dbPath = "/tmp/rocksdb_transaction_example"; - - public static final void main(final String args[]) throws RocksDBException { - - try(final Options options = new Options() - .setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB txnDb = - TransactionDB.open(options, txnDbOptions, dbPath)) { - - try (final WriteOptions writeOptions = new WriteOptions(); - final ReadOptions readOptions = new ReadOptions()) { - - //////////////////////////////////////////////////////// - // - // Simple Transaction Example ("Read Committed") - // - //////////////////////////////////////////////////////// - readCommitted(txnDb, writeOptions, readOptions); - - - //////////////////////////////////////////////////////// - // - // "Repeatable Read" (Snapshot Isolation) Example - // -- Using a single Snapshot - // - //////////////////////////////////////////////////////// - repeatableRead(txnDb, writeOptions, readOptions); - - - //////////////////////////////////////////////////////// - // - // "Read Committed" (Monotonic Atomic Views) Example - // --Using multiple Snapshots - // - //////////////////////////////////////////////////////// - readCommitted_monotonicAtomicViews(txnDb, writeOptions, readOptions); - } - } - } - - /** - * Demonstrates "Read Committed" isolation - */ - private static void readCommitted(final TransactionDB txnDb, - final WriteOptions writeOptions, final ReadOptions readOptions) - throws RocksDBException { - final byte key1[] = "abc".getBytes(UTF_8); - final byte value1[] = "def".getBytes(UTF_8); - - final byte key2[] = "xyz".getBytes(UTF_8); - final byte value2[] = "zzz".getBytes(UTF_8); - - // Start a transaction - try(final Transaction txn = txnDb.beginTransaction(writeOptions)) { - // Read a key in this transaction - byte[] value = txn.get(readOptions, key1); - assert(value == null); - - // Write a key in this transaction - txn.put(key1, value1); - - // Read a key OUTSIDE this transaction. Does not affect txn. - value = txnDb.get(readOptions, key1); - assert(value == null); - - // Write a key OUTSIDE of this transaction. - // Does not affect txn since this is an unrelated key. - // If we wrote key 'abc' here, the transaction would fail to commit. - txnDb.put(writeOptions, key2, value2); - - // Commit transaction - txn.commit(); - } - } - - /** - * Demonstrates "Repeatable Read" (Snapshot Isolation) isolation - */ - private static void repeatableRead(final TransactionDB txnDb, - final WriteOptions writeOptions, final ReadOptions readOptions) - throws RocksDBException { - - final byte key1[] = "ghi".getBytes(UTF_8); - final byte value1[] = "jkl".getBytes(UTF_8); - - // Set a snapshot at start of transaction by setting setSnapshot(true) - try(final TransactionOptions txnOptions = new TransactionOptions() - .setSetSnapshot(true); - final Transaction txn = - txnDb.beginTransaction(writeOptions, txnOptions)) { - - final Snapshot snapshot = txn.getSnapshot(); - - // Write a key OUTSIDE of transaction - txnDb.put(writeOptions, key1, value1); - - // Attempt to read a key using the snapshot. This will fail since - // the previous write outside this txn conflicts with this read. - readOptions.setSnapshot(snapshot); - - try { - final byte[] value = txn.getForUpdate(readOptions, key1, true); - throw new IllegalStateException(); - } catch(final RocksDBException e) { - assert(e.getStatus().getCode() == Status.Code.Busy); - } - - txn.rollback(); - } finally { - // Clear snapshot from read options since it is no longer valid - readOptions.setSnapshot(null); - } - } - - /** - * Demonstrates "Read Committed" (Monotonic Atomic Views) isolation - * - * In this example, we set the snapshot multiple times. This is probably - * only necessary if you have very strict isolation requirements to - * implement. - */ - private static void readCommitted_monotonicAtomicViews( - final TransactionDB txnDb, final WriteOptions writeOptions, - final ReadOptions readOptions) throws RocksDBException { - - final byte keyX[] = "x".getBytes(UTF_8); - final byte valueX[] = "x".getBytes(UTF_8); - - final byte keyY[] = "y".getBytes(UTF_8); - final byte valueY[] = "y".getBytes(UTF_8); - - try (final TransactionOptions txnOptions = new TransactionOptions() - .setSetSnapshot(true); - final Transaction txn = - txnDb.beginTransaction(writeOptions, txnOptions)) { - - // Do some reads and writes to key "x" - Snapshot snapshot = txnDb.getSnapshot(); - readOptions.setSnapshot(snapshot); - byte[] value = txn.get(readOptions, keyX); - txn.put(valueX, valueX); - - // Do a write outside of the transaction to key "y" - txnDb.put(writeOptions, keyY, valueY); - - // Set a new snapshot in the transaction - txn.setSnapshot(); - txn.setSavePoint(); - snapshot = txnDb.getSnapshot(); - readOptions.setSnapshot(snapshot); - - // Do some reads and writes to key "y" - // Since the snapshot was advanced, the write done outside of the - // transaction does not conflict. - value = txn.getForUpdate(readOptions, keyY, true); - txn.put(keyY, valueY); - - // Decide we want to revert the last write from this transaction. - txn.rollbackToSavePoint(); - - // Commit. - txn.commit(); - } finally { - // Clear snapshot from read options since it is no longer valid - readOptions.setSnapshot(null); - } - } -} diff --git a/java/src/main/java/org/rocksdb/AbstractCompactionFilter.java b/java/src/main/java/org/rocksdb/AbstractCompactionFilter.java deleted file mode 100644 index 2f0d4f3ca..000000000 --- a/java/src/main/java/org/rocksdb/AbstractCompactionFilter.java +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -/** - * A CompactionFilter allows an application to modify/delete a key-value at - * the time of compaction. - * - * At present we just permit an overriding Java class to wrap a C++ - * implementation - */ -public abstract class AbstractCompactionFilter> - extends RocksObject { - - public static class Context { - private final boolean fullCompaction; - private final boolean manualCompaction; - - public Context(final boolean fullCompaction, final boolean manualCompaction) { - this.fullCompaction = fullCompaction; - this.manualCompaction = manualCompaction; - } - - /** - * Does this compaction run include all data files - * - * @return true if this is a full compaction run - */ - public boolean isFullCompaction() { - return fullCompaction; - } - - /** - * Is this compaction requested by the client, - * or is it occurring as an automatic compaction process - * - * @return true if the compaction was initiated by the client - */ - public boolean isManualCompaction() { - return manualCompaction; - } - } - - protected AbstractCompactionFilter(final long nativeHandle) { - super(nativeHandle); - } - - /** - * Deletes underlying C++ compaction pointer. - * - * Note that this function should be called only after all - * RocksDB instances referencing the compaction filter are closed. - * Otherwise an undefined behavior will occur. - */ - @Override - protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java b/java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java deleted file mode 100644 index 380b4461d..000000000 --- a/java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Each compaction will create a new {@link AbstractCompactionFilter} - * allowing the application to know about different compactions - * - * @param The concrete type of the compaction filter - */ -public abstract class AbstractCompactionFilterFactory> - extends RocksCallbackObject { - - public AbstractCompactionFilterFactory() { - super(null); - } - - @Override - protected long initializeNative(final long... nativeParameterHandles) { - return createNewCompactionFilterFactory0(); - } - - /** - * Called from JNI, see compaction_filter_factory_jnicallback.cc - * - * @param fullCompaction {@link AbstractCompactionFilter.Context#fullCompaction} - * @param manualCompaction {@link AbstractCompactionFilter.Context#manualCompaction} - * - * @return native handle of the CompactionFilter - */ - private long createCompactionFilter(final boolean fullCompaction, - final boolean manualCompaction) { - final T filter = createCompactionFilter( - new AbstractCompactionFilter.Context(fullCompaction, manualCompaction)); - - // CompactionFilterFactory::CreateCompactionFilter returns a std::unique_ptr - // which therefore has ownership of the underlying native object - filter.disOwnNativeHandle(); - - return filter.nativeHandle_; - } - - /** - * Create a new compaction filter - * - * @param context The context describing the need for a new compaction filter - * - * @return A new instance of {@link AbstractCompactionFilter} - */ - public abstract T createCompactionFilter( - final AbstractCompactionFilter.Context context); - - /** - * A name which identifies this compaction filter - * - * The name will be printed to the LOG file on start up for diagnosis - * - * @return name which identifies this compaction filter. - */ - public abstract String name(); - - /** - * We override {@link RocksCallbackObject#disposeInternal()} - * as disposing of a rocksdb::AbstractCompactionFilterFactory requires - * a slightly different approach as it is a std::shared_ptr - */ - @Override - protected void disposeInternal() { - disposeInternal(nativeHandle_); - } - - private native long createNewCompactionFilterFactory0(); - private native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/AbstractComparator.java b/java/src/main/java/org/rocksdb/AbstractComparator.java deleted file mode 100644 index a89e79048..000000000 --- a/java/src/main/java/org/rocksdb/AbstractComparator.java +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.ByteBuffer; - -/** - * Comparators are used by RocksDB to determine - * the ordering of keys. - * - * Implementations of Comparators in Java should extend this class. - */ -public abstract class AbstractComparator - extends RocksCallbackObject { - - AbstractComparator() { - super(); - } - - protected AbstractComparator(final ComparatorOptions comparatorOptions) { - super(comparatorOptions.nativeHandle_); - } - - @Override - protected long initializeNative(final long... nativeParameterHandles) { - return createNewComparator(nativeParameterHandles[0]); - } - - /** - * Get the type of this comparator. - * - * Used for determining the correct C++ cast in native code. - * - * @return The type of the comparator. - */ - ComparatorType getComparatorType() { - return ComparatorType.JAVA_COMPARATOR; - } - - /** - * The name of the comparator. Used to check for comparator - * mismatches (i.e., a DB created with one comparator is - * accessed using a different comparator). - * - * A new name should be used whenever - * the comparator implementation changes in a way that will cause - * the relative ordering of any two keys to change. - * - * Names starting with "rocksdb." are reserved and should not be used. - * - * @return The name of this comparator implementation - */ - public abstract String name(); - - /** - * Three-way key comparison. Implementations should provide a - * total order - * on keys that might be passed to it. - * - * The implementation may modify the {@code ByteBuffer}s passed in, though - * it would be unconventional to modify the "limit" or any of the - * underlying bytes. As a callback, RocksJava will ensure that {@code a} - * is a different instance from {@code b}. - * - * @param a buffer containing the first key in its "remaining" elements - * @param b buffer containing the second key in its "remaining" elements - * - * @return Should return either: - * 1) < 0 if "a" < "b" - * 2) == 0 if "a" == "b" - * 3) > 0 if "a" > "b" - */ - public abstract int compare(final ByteBuffer a, final ByteBuffer b); - - /** - *

Used to reduce the space requirements - * for internal data structures like index blocks.

- * - *

If start < limit, you may modify start which is a - * shorter string in [start, limit).

- * - * If you modify start, it is expected that you set the byte buffer so that - * a subsequent read of start.remaining() bytes from start.position() - * to start.limit() will obtain the new start value. - * - *

Simple comparator implementations may return with start unchanged. - * i.e., an implementation of this method that does nothing is correct.

- * - * @param start the start - * @param limit the limit - */ - public void findShortestSeparator(final ByteBuffer start, - final ByteBuffer limit) { - // no-op - } - - /** - *

Used to reduce the space requirements - * for internal data structures like index blocks.

- * - *

You may change key to a shorter key (key1) where - * key1 ≥ key.

- * - *

Simple comparator implementations may return the key unchanged. - * i.e., an implementation of - * this method that does nothing is correct.

- * - * @param key the key - */ - public void findShortSuccessor(final ByteBuffer key) { - // no-op - } - - public final boolean usingDirectBuffers() { - return usingDirectBuffers(nativeHandle_); - } - - private native boolean usingDirectBuffers(final long nativeHandle); - - private native long createNewComparator(final long comparatorOptionsHandle); -} diff --git a/java/src/main/java/org/rocksdb/AbstractComparatorJniBridge.java b/java/src/main/java/org/rocksdb/AbstractComparatorJniBridge.java deleted file mode 100644 index b732d2495..000000000 --- a/java/src/main/java/org/rocksdb/AbstractComparatorJniBridge.java +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.ByteBuffer; - -/** - * This class is intentionally private, - * it holds methods which are called - * from C++ to interact with a Comparator - * written in Java. - * - * Placing these bridge methods in this - * class keeps the API of the - * {@link org.rocksdb.AbstractComparator} clean. - */ -class AbstractComparatorJniBridge { - - /** - * Only called from JNI. - * - * Simply a bridge to calling - * {@link AbstractComparator#compare(ByteBuffer, ByteBuffer)}, - * which ensures that the byte buffer lengths are correct - * before and after the call. - * - * @param comparator the comparator object on which to - * call {@link AbstractComparator#compare(ByteBuffer, ByteBuffer)} - * @param a buffer access to first key - * @param aLen the length of the a key, - * may be smaller than the buffer {@code a} - * @param b buffer access to second key - * @param bLen the length of the b key, - * may be smaller than the buffer {@code b} - * - * @return the result of the comparison - */ - private static int compareInternal( - final AbstractComparator comparator, - final ByteBuffer a, final int aLen, - final ByteBuffer b, final int bLen) { - if (aLen != -1) { - a.mark(); - a.limit(aLen); - } - if (bLen != -1) { - b.mark(); - b.limit(bLen); - } - - final int c = comparator.compare(a, b); - - if (aLen != -1) { - a.reset(); - } - if (bLen != -1) { - b.reset(); - } - - return c; - } - - /** - * Only called from JNI. - * - * Simply a bridge to calling - * {@link AbstractComparator#findShortestSeparator(ByteBuffer, ByteBuffer)}, - * which ensures that the byte buffer lengths are correct - * before the call. - * - * @param comparator the comparator object on which to - * call {@link AbstractComparator#findShortestSeparator(ByteBuffer, ByteBuffer)} - * @param start buffer access to the start key - * @param startLen the length of the start key, - * may be smaller than the buffer {@code start} - * @param limit buffer access to the limit key - * @param limitLen the length of the limit key, - * may be smaller than the buffer {@code limit} - * - * @return either {@code startLen} if the start key is unchanged, otherwise - * the new length of the start key - */ - private static int findShortestSeparatorInternal( - final AbstractComparator comparator, - final ByteBuffer start, final int startLen, - final ByteBuffer limit, final int limitLen) { - if (startLen != -1) { - start.limit(startLen); - } - if (limitLen != -1) { - limit.limit(limitLen); - } - comparator.findShortestSeparator(start, limit); - return start.remaining(); - } - - /** - * Only called from JNI. - * - * Simply a bridge to calling - * {@link AbstractComparator#findShortestSeparator(ByteBuffer, ByteBuffer)}, - * which ensures that the byte buffer length is correct - * before the call. - * - * @param comparator the comparator object on which to - * call {@link AbstractComparator#findShortSuccessor(ByteBuffer)} - * @param key buffer access to the key - * @param keyLen the length of the key, - * may be smaller than the buffer {@code key} - * - * @return either keyLen if the key is unchanged, otherwise the new length of the key - */ - private static int findShortSuccessorInternal( - final AbstractComparator comparator, - final ByteBuffer key, final int keyLen) { - if (keyLen != -1) { - key.limit(keyLen); - } - comparator.findShortSuccessor(key); - return key.remaining(); - } -} diff --git a/java/src/main/java/org/rocksdb/AbstractEventListener.java b/java/src/main/java/org/rocksdb/AbstractEventListener.java deleted file mode 100644 index 6698acf88..000000000 --- a/java/src/main/java/org/rocksdb/AbstractEventListener.java +++ /dev/null @@ -1,334 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.rocksdb.AbstractEventListener.EnabledEventCallback.*; - -/** - * Base class for Event Listeners. - */ -public abstract class AbstractEventListener extends RocksCallbackObject implements EventListener { - public enum EnabledEventCallback { - ON_FLUSH_COMPLETED((byte) 0x0), - ON_FLUSH_BEGIN((byte) 0x1), - ON_TABLE_FILE_DELETED((byte) 0x2), - ON_COMPACTION_BEGIN((byte) 0x3), - ON_COMPACTION_COMPLETED((byte) 0x4), - ON_TABLE_FILE_CREATED((byte) 0x5), - ON_TABLE_FILE_CREATION_STARTED((byte) 0x6), - ON_MEMTABLE_SEALED((byte) 0x7), - ON_COLUMN_FAMILY_HANDLE_DELETION_STARTED((byte) 0x8), - ON_EXTERNAL_FILE_INGESTED((byte) 0x9), - ON_BACKGROUND_ERROR((byte) 0xA), - ON_STALL_CONDITIONS_CHANGED((byte) 0xB), - ON_FILE_READ_FINISH((byte) 0xC), - ON_FILE_WRITE_FINISH((byte) 0xD), - ON_FILE_FLUSH_FINISH((byte) 0xE), - ON_FILE_SYNC_FINISH((byte) 0xF), - ON_FILE_RANGE_SYNC_FINISH((byte) 0x10), - ON_FILE_TRUNCATE_FINISH((byte) 0x11), - ON_FILE_CLOSE_FINISH((byte) 0x12), - SHOULD_BE_NOTIFIED_ON_FILE_IO((byte) 0x13), - ON_ERROR_RECOVERY_BEGIN((byte) 0x14), - ON_ERROR_RECOVERY_COMPLETED((byte) 0x15); - - private final byte value; - - EnabledEventCallback(final byte value) { - this.value = value; - } - - /** - * Get the internal representation value. - * - * @return the internal representation value - */ - byte getValue() { - return value; - } - - /** - * Get the EnabledEventCallbacks from the internal representation value. - * - * @return the enabled event callback. - * - * @throws IllegalArgumentException if the value is unknown. - */ - static EnabledEventCallback fromValue(final byte value) { - for (final EnabledEventCallback enabledEventCallback : EnabledEventCallback.values()) { - if (enabledEventCallback.value == value) { - return enabledEventCallback; - } - } - - throw new IllegalArgumentException( - "Illegal value provided for EnabledEventCallback: " + value); - } - } - - /** - * Creates an Event Listener that will - * received all callbacks from C++. - * - * If you don't need all callbacks, it is much more efficient to - * just register for the ones you need by calling - * {@link #AbstractEventListener(EnabledEventCallback...)} instead. - */ - protected AbstractEventListener() { - this(ON_FLUSH_COMPLETED, ON_FLUSH_BEGIN, ON_TABLE_FILE_DELETED, ON_COMPACTION_BEGIN, - ON_COMPACTION_COMPLETED, ON_TABLE_FILE_CREATED, ON_TABLE_FILE_CREATION_STARTED, - ON_MEMTABLE_SEALED, ON_COLUMN_FAMILY_HANDLE_DELETION_STARTED, ON_EXTERNAL_FILE_INGESTED, - ON_BACKGROUND_ERROR, ON_STALL_CONDITIONS_CHANGED, ON_FILE_READ_FINISH, ON_FILE_WRITE_FINISH, - ON_FILE_FLUSH_FINISH, ON_FILE_SYNC_FINISH, ON_FILE_RANGE_SYNC_FINISH, - ON_FILE_TRUNCATE_FINISH, ON_FILE_CLOSE_FINISH, SHOULD_BE_NOTIFIED_ON_FILE_IO, - ON_ERROR_RECOVERY_BEGIN, ON_ERROR_RECOVERY_COMPLETED); - } - - /** - * Creates an Event Listener that will - * receive only certain callbacks from C++. - * - * @param enabledEventCallbacks callbacks to enable in Java. - */ - protected AbstractEventListener(final EnabledEventCallback... enabledEventCallbacks) { - super(packToLong(enabledEventCallbacks)); - } - - /** - * Pack EnabledEventCallbacks to a long. - * - * @param enabledEventCallbacks the flags - * - * @return a long - */ - private static long packToLong(final EnabledEventCallback... enabledEventCallbacks) { - long l = 0; - for (int i = 0; i < enabledEventCallbacks.length; i++) { - l |= 1 << enabledEventCallbacks[i].getValue(); - } - return l; - } - - @Override - public void onFlushCompleted(final RocksDB db, final FlushJobInfo flushJobInfo) { - // no-op - } - - /** - * Called from JNI, proxy for - * {@link #onFlushCompleted(RocksDB, FlushJobInfo)}. - * - * @param dbHandle native handle of the database - * @param flushJobInfo the flush job info - */ - private void onFlushCompletedProxy(final long dbHandle, final FlushJobInfo flushJobInfo) { - final RocksDB db = new RocksDB(dbHandle); - db.disOwnNativeHandle(); // we don't own this! - onFlushCompleted(db, flushJobInfo); - } - - @Override - public void onFlushBegin(final RocksDB db, final FlushJobInfo flushJobInfo) { - // no-op - } - - /** - * Called from JNI, proxy for - * {@link #onFlushBegin(RocksDB, FlushJobInfo)}. - * - * @param dbHandle native handle of the database - * @param flushJobInfo the flush job info - */ - private void onFlushBeginProxy(final long dbHandle, final FlushJobInfo flushJobInfo) { - final RocksDB db = new RocksDB(dbHandle); - db.disOwnNativeHandle(); // we don't own this! - onFlushBegin(db, flushJobInfo); - } - - @Override - public void onTableFileDeleted(final TableFileDeletionInfo tableFileDeletionInfo) { - // no-op - } - - @Override - public void onCompactionBegin(final RocksDB db, final CompactionJobInfo compactionJobInfo) { - // no-op - } - - /** - * Called from JNI, proxy for - * {@link #onCompactionBegin(RocksDB, CompactionJobInfo)}. - * - * @param dbHandle native handle of the database - * @param compactionJobInfo the flush job info - */ - private void onCompactionBeginProxy( - final long dbHandle, final CompactionJobInfo compactionJobInfo) { - final RocksDB db = new RocksDB(dbHandle); - db.disOwnNativeHandle(); // we don't own this! - onCompactionBegin(db, compactionJobInfo); - } - - @Override - public void onCompactionCompleted(final RocksDB db, final CompactionJobInfo compactionJobInfo) { - // no-op - } - - /** - * Called from JNI, proxy for - * {@link #onCompactionCompleted(RocksDB, CompactionJobInfo)}. - * - * @param dbHandle native handle of the database - * @param compactionJobInfo the flush job info - */ - private void onCompactionCompletedProxy( - final long dbHandle, final CompactionJobInfo compactionJobInfo) { - final RocksDB db = new RocksDB(dbHandle); - db.disOwnNativeHandle(); // we don't own this! - onCompactionCompleted(db, compactionJobInfo); - } - - @Override - public void onTableFileCreated(final TableFileCreationInfo tableFileCreationInfo) { - // no-op - } - - @Override - public void onTableFileCreationStarted( - final TableFileCreationBriefInfo tableFileCreationBriefInfo) { - // no-op - } - - @Override - public void onMemTableSealed(final MemTableInfo memTableInfo) { - // no-op - } - - @Override - public void onColumnFamilyHandleDeletionStarted(final ColumnFamilyHandle columnFamilyHandle) { - // no-op - } - - @Override - public void onExternalFileIngested( - final RocksDB db, final ExternalFileIngestionInfo externalFileIngestionInfo) { - // no-op - } - - /** - * Called from JNI, proxy for - * {@link #onExternalFileIngested(RocksDB, ExternalFileIngestionInfo)}. - * - * @param dbHandle native handle of the database - * @param externalFileIngestionInfo the flush job info - */ - private void onExternalFileIngestedProxy( - final long dbHandle, final ExternalFileIngestionInfo externalFileIngestionInfo) { - final RocksDB db = new RocksDB(dbHandle); - db.disOwnNativeHandle(); // we don't own this! - onExternalFileIngested(db, externalFileIngestionInfo); - } - - @Override - public void onBackgroundError( - final BackgroundErrorReason backgroundErrorReason, final Status backgroundError) { - // no-op - } - - /** - * Called from JNI, proxy for - * {@link #onBackgroundError(BackgroundErrorReason, Status)}. - * - * @param reasonByte byte value representing error reason - * @param backgroundError status with error code - */ - private void onBackgroundErrorProxy(final byte reasonByte, final Status backgroundError) { - onBackgroundError(BackgroundErrorReason.fromValue(reasonByte), backgroundError); - } - - @Override - public void onStallConditionsChanged(final WriteStallInfo writeStallInfo) { - // no-op - } - - @Override - public void onFileReadFinish(final FileOperationInfo fileOperationInfo) { - // no-op - } - - @Override - public void onFileWriteFinish(final FileOperationInfo fileOperationInfo) { - // no-op - } - - @Override - public void onFileFlushFinish(final FileOperationInfo fileOperationInfo) { - // no-op - } - - @Override - public void onFileSyncFinish(final FileOperationInfo fileOperationInfo) { - // no-op - } - - @Override - public void onFileRangeSyncFinish(final FileOperationInfo fileOperationInfo) { - // no-op - } - - @Override - public void onFileTruncateFinish(final FileOperationInfo fileOperationInfo) { - // no-op - } - - @Override - public void onFileCloseFinish(final FileOperationInfo fileOperationInfo) { - // no-op - } - - @Override - public boolean shouldBeNotifiedOnFileIO() { - return false; - } - - @Override - public boolean onErrorRecoveryBegin( - final BackgroundErrorReason backgroundErrorReason, final Status backgroundError) { - return true; - } - - /** - * Called from JNI, proxy for - * {@link #onErrorRecoveryBegin(BackgroundErrorReason, Status)}. - * - * @param reasonByte byte value representing error reason - * @param backgroundError status with error code - */ - private boolean onErrorRecoveryBeginProxy(final byte reasonByte, final Status backgroundError) { - return onErrorRecoveryBegin(BackgroundErrorReason.fromValue(reasonByte), backgroundError); - } - - @Override - public void onErrorRecoveryCompleted(final Status oldBackgroundError) { - // no-op - } - - @Override - protected long initializeNative(final long... nativeParameterHandles) { - return createNewEventListener(nativeParameterHandles[0]); - } - - /** - * Deletes underlying C++ native callback object pointer - */ - @Override - protected void disposeInternal() { - disposeInternal(nativeHandle_); - } - - private native long createNewEventListener(final long enabledEventCallbackValues); - private native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/AbstractImmutableNativeReference.java b/java/src/main/java/org/rocksdb/AbstractImmutableNativeReference.java deleted file mode 100644 index 173d63e90..000000000 --- a/java/src/main/java/org/rocksdb/AbstractImmutableNativeReference.java +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (c) 2016, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.concurrent.atomic.AtomicBoolean; - -/** - * Offers functionality for implementations of - * {@link AbstractNativeReference} which have an immutable reference to the - * underlying native C++ object - */ -//@ThreadSafe -public abstract class AbstractImmutableNativeReference - extends AbstractNativeReference { - - /** - * A flag indicating whether the current {@code AbstractNativeReference} is - * responsible to free the underlying C++ object - */ - protected final AtomicBoolean owningHandle_; - - protected AbstractImmutableNativeReference(final boolean owningHandle) { - this.owningHandle_ = new AtomicBoolean(owningHandle); - } - - @Override - public boolean isOwningHandle() { - return owningHandle_.get(); - } - - /** - * Releases this {@code AbstractNativeReference} from the responsibility of - * freeing the underlying native C++ object - *

- * This will prevent the object from attempting to delete the underlying - * native object in {@code close()}. This must be used when another object - * takes over ownership of the native object or both will attempt to delete - * the underlying object when closed. - *

- * When {@code disOwnNativeHandle()} is called, {@code close()} will - * subsequently take no action. As a result, incorrect use of this function - * may cause a memory leak. - *

- */ - protected final void disOwnNativeHandle() { - owningHandle_.set(false); - } - - @Override - public void close() { - if (owningHandle_.compareAndSet(true, false)) { - disposeInternal(); - } - } - - /** - * The helper function of {@link AbstractImmutableNativeReference#close()} - * which all subclasses of {@code AbstractImmutableNativeReference} must - * implement to release their underlying native C++ objects. - */ - protected abstract void disposeInternal(); -} diff --git a/java/src/main/java/org/rocksdb/AbstractMutableOptions.java b/java/src/main/java/org/rocksdb/AbstractMutableOptions.java deleted file mode 100644 index 7189272b8..000000000 --- a/java/src/main/java/org/rocksdb/AbstractMutableOptions.java +++ /dev/null @@ -1,370 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -import java.util.*; - -public abstract class AbstractMutableOptions { - - protected static final String KEY_VALUE_PAIR_SEPARATOR = ";"; - protected static final char KEY_VALUE_SEPARATOR = '='; - static final String INT_ARRAY_INT_SEPARATOR = ":"; - - protected final String[] keys; - private final String[] values; - - /** - * User must use builder pattern, or parser. - * - * @param keys the keys - * @param values the values - */ - protected AbstractMutableOptions(final String[] keys, final String[] values) { - this.keys = keys; - this.values = values; - } - - String[] getKeys() { - return keys; - } - - String[] getValues() { - return values; - } - - /** - * Returns a string representation of MutableOptions which - * is suitable for consumption by {@code #parse(String)}. - * - * @return String representation of MutableOptions - */ - @Override - public String toString() { - final StringBuilder buffer = new StringBuilder(); - for(int i = 0; i < keys.length; i++) { - buffer - .append(keys[i]) - .append(KEY_VALUE_SEPARATOR) - .append(values[i]); - - if(i + 1 < keys.length) { - buffer.append(KEY_VALUE_PAIR_SEPARATOR); - } - } - return buffer.toString(); - } - - public static abstract class AbstractMutableOptionsBuilder< - T extends AbstractMutableOptions, - U extends AbstractMutableOptionsBuilder, - K extends MutableOptionKey> { - - private final Map> options = new LinkedHashMap<>(); - private final List unknown = new ArrayList<>(); - - protected abstract U self(); - - /** - * Get all of the possible keys - * - * @return A map of all keys, indexed by name. - */ - protected abstract Map allKeys(); - - /** - * Construct a sub-class instance of {@link AbstractMutableOptions}. - * - * @param keys the keys - * @param values the values - * - * @return an instance of the options. - */ - protected abstract T build(final String[] keys, final String[] values); - - public T build() { - final String[] keys = new String[options.size()]; - final String[] values = new String[options.size()]; - - int i = 0; - for (final Map.Entry> option : options.entrySet()) { - keys[i] = option.getKey().name(); - values[i] = option.getValue().asString(); - i++; - } - - return build(keys, values); - } - - protected U setDouble( - final K key, final double value) { - if (key.getValueType() != MutableOptionKey.ValueType.DOUBLE) { - throw new IllegalArgumentException( - key + " does not accept a double value"); - } - options.put(key, MutableOptionValue.fromDouble(value)); - return self(); - } - - protected double getDouble(final K key) - throws NoSuchElementException, NumberFormatException { - final MutableOptionValue value = options.get(key); - if(value == null) { - throw new NoSuchElementException(key.name() + " has not been set"); - } - return value.asDouble(); - } - - protected U setLong( - final K key, final long value) { - if(key.getValueType() != MutableOptionKey.ValueType.LONG) { - throw new IllegalArgumentException( - key + " does not accept a long value"); - } - options.put(key, MutableOptionValue.fromLong(value)); - return self(); - } - - protected long getLong(final K key) - throws NoSuchElementException, NumberFormatException { - final MutableOptionValue value = options.get(key); - if(value == null) { - throw new NoSuchElementException(key.name() + " has not been set"); - } - return value.asLong(); - } - - protected U setInt( - final K key, final int value) { - if(key.getValueType() != MutableOptionKey.ValueType.INT) { - throw new IllegalArgumentException( - key + " does not accept an integer value"); - } - options.put(key, MutableOptionValue.fromInt(value)); - return self(); - } - - protected int getInt(final K key) - throws NoSuchElementException, NumberFormatException { - final MutableOptionValue value = options.get(key); - if(value == null) { - throw new NoSuchElementException(key.name() + " has not been set"); - } - return value.asInt(); - } - - protected U setBoolean( - final K key, final boolean value) { - if(key.getValueType() != MutableOptionKey.ValueType.BOOLEAN) { - throw new IllegalArgumentException( - key + " does not accept a boolean value"); - } - options.put(key, MutableOptionValue.fromBoolean(value)); - return self(); - } - - protected boolean getBoolean(final K key) - throws NoSuchElementException, NumberFormatException { - final MutableOptionValue value = options.get(key); - if(value == null) { - throw new NoSuchElementException(key.name() + " has not been set"); - } - return value.asBoolean(); - } - - protected U setIntArray( - final K key, final int[] value) { - if(key.getValueType() != MutableOptionKey.ValueType.INT_ARRAY) { - throw new IllegalArgumentException( - key + " does not accept an int array value"); - } - options.put(key, MutableOptionValue.fromIntArray(value)); - return self(); - } - - protected int[] getIntArray(final K key) - throws NoSuchElementException, NumberFormatException { - final MutableOptionValue value = options.get(key); - if(value == null) { - throw new NoSuchElementException(key.name() + " has not been set"); - } - return value.asIntArray(); - } - - protected > U setEnum( - final K key, final N value) { - if(key.getValueType() != MutableOptionKey.ValueType.ENUM) { - throw new IllegalArgumentException( - key + " does not accept a Enum value"); - } - options.put(key, MutableOptionValue.fromEnum(value)); - return self(); - } - - @SuppressWarnings("unchecked") - protected > N getEnum(final K key) - throws NoSuchElementException, NumberFormatException { - final MutableOptionValue value = options.get(key); - if (value == null) { - throw new NoSuchElementException(key.name() + " has not been set"); - } - - if (!(value instanceof MutableOptionValue.MutableOptionEnumValue)) { - throw new NoSuchElementException(key.name() + " is not of Enum type"); - } - - return ((MutableOptionValue.MutableOptionEnumValue) value).asObject(); - } - - /** - * Parse a string into a long value, accepting values expressed as a double (such as 9.00) which - * are meant to be a long, not a double - * - * @param value the string containing a value which represents a long - * @return the long value of the parsed string - */ - private long parseAsLong(final String value) { - try { - return Long.parseLong(value); - } catch (NumberFormatException nfe) { - final double doubleValue = Double.parseDouble(value); - if (doubleValue != Math.round(doubleValue)) - throw new IllegalArgumentException("Unable to parse or round " + value + " to long"); - return Math.round(doubleValue); - } - } - - /** - * Parse a string into an int value, accepting values expressed as a double (such as 9.00) which - * are meant to be an int, not a double - * - * @param value the string containing a value which represents an int - * @return the int value of the parsed string - */ - private int parseAsInt(final String value) { - try { - return Integer.parseInt(value); - } catch (NumberFormatException nfe) { - final double doubleValue = Double.parseDouble(value); - if (doubleValue != Math.round(doubleValue)) - throw new IllegalArgumentException("Unable to parse or round " + value + " to int"); - return (int) Math.round(doubleValue); - } - } - - /** - * Constructs a builder for mutable column family options from a hierarchical parsed options - * string representation. The {@link OptionString.Parser} class output has been used to create a - * (name,value)-list; each value may be either a simple string or a (name, value)-list in turn. - * - * @param options a list of parsed option string objects - * @param ignoreUnknown what to do if the key is not one of the keys we expect - * - * @return a builder with the values from the parsed input set - * - * @throws IllegalArgumentException if an option value is of the wrong type, or a key is empty - */ - protected U fromParsed(final List options, final boolean ignoreUnknown) { - Objects.requireNonNull(options); - - for (final OptionString.Entry option : options) { - try { - if (option.key.isEmpty()) { - throw new IllegalArgumentException("options string is invalid: " + option); - } - fromOptionString(option, ignoreUnknown); - } catch (NumberFormatException nfe) { - throw new IllegalArgumentException( - "" + option.key + "=" + option.value + " - not a valid value for its type", nfe); - } - } - - return self(); - } - - /** - * Set a value in the builder from the supplied option string - * - * @param option the option key/value to add to this builder - * @param ignoreUnknown if this is not set, throw an exception when a key is not in the known - * set - * @return the same object, after adding options - * @throws IllegalArgumentException if the key is unkown, or a value has the wrong type/form - */ - private U fromOptionString(final OptionString.Entry option, final boolean ignoreUnknown) - throws IllegalArgumentException { - Objects.requireNonNull(option.key); - Objects.requireNonNull(option.value); - - final K key = allKeys().get(option.key); - if (key == null && ignoreUnknown) { - unknown.add(option); - return self(); - } else if (key == null) { - throw new IllegalArgumentException("Key: " + key + " is not a known option key"); - } - - if (!option.value.isList()) { - throw new IllegalArgumentException( - "Option: " + key + " is not a simple value or list, don't know how to parse it"); - } - - // Check that simple values are the single item in the array - if (key.getValueType() != MutableOptionKey.ValueType.INT_ARRAY) { - { - if (option.value.list.size() != 1) { - throw new IllegalArgumentException( - "Simple value does not have exactly 1 item: " + option.value.list); - } - } - } - - final List valueStrs = option.value.list; - final String valueStr = valueStrs.get(0); - - switch (key.getValueType()) { - case DOUBLE: - return setDouble(key, Double.parseDouble(valueStr)); - - case LONG: - return setLong(key, parseAsLong(valueStr)); - - case INT: - return setInt(key, parseAsInt(valueStr)); - - case BOOLEAN: - return setBoolean(key, Boolean.parseBoolean(valueStr)); - - case INT_ARRAY: - final int[] value = new int[valueStrs.size()]; - for (int i = 0; i < valueStrs.size(); i++) { - value[i] = Integer.parseInt(valueStrs.get(i)); - } - return setIntArray(key, value); - - case ENUM: - String optionName = key.name(); - if (optionName.equals("prepopulate_blob_cache")) { - final PrepopulateBlobCache prepopulateBlobCache = - PrepopulateBlobCache.getFromInternal(valueStr); - return setEnum(key, prepopulateBlobCache); - } else if (optionName.equals("compression") - || optionName.equals("blob_compression_type")) { - final CompressionType compressionType = CompressionType.getFromInternal(valueStr); - return setEnum(key, compressionType); - } else { - throw new IllegalArgumentException("Unknown enum type: " + key.name()); - } - - default: - throw new IllegalStateException(key + " has unknown value type: " + key.getValueType()); - } - } - - /** - * - * @return the list of keys encountered which were not known to the type being generated - */ - public List getUnknown() { - return new ArrayList<>(unknown); - } - } -} diff --git a/java/src/main/java/org/rocksdb/AbstractNativeReference.java b/java/src/main/java/org/rocksdb/AbstractNativeReference.java deleted file mode 100644 index 88b2963b6..000000000 --- a/java/src/main/java/org/rocksdb/AbstractNativeReference.java +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) 2016, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * AbstractNativeReference is the base-class of all RocksDB classes that have - * a pointer to a native C++ {@code rocksdb} object. - *

- * AbstractNativeReference has the {@link AbstractNativeReference#close()} - * method, which frees its associated C++ object.

- *

- * This function should be called manually, or even better, called implicitly using a - * try-with-resources - * statement, when you are finished with the object. It is no longer - * called automatically during the regular Java GC process via - * {@link AbstractNativeReference#finalize()}.

- *

- * Explanatory note - When or if the Garbage Collector calls {@link Object#finalize()} - * depends on the JVM implementation and system conditions, which the programmer - * cannot control. In addition, the GC cannot see through the native reference - * long member variable (which is the C++ pointer value to the native object), - * and cannot know what other resources depend on it. - *

- */ -public abstract class AbstractNativeReference implements AutoCloseable { - /** - * Returns true if we are responsible for freeing the underlying C++ object - * - * @return true if we are responsible to free the C++ object - */ - protected abstract boolean isOwningHandle(); - - /** - * Frees the underlying C++ object - *

- * It is strong recommended that the developer calls this after they - * have finished using the object.

- *

- * Note, that once an instance of {@link AbstractNativeReference} has been - * closed, calling any of its functions will lead to undefined - * behavior.

- */ - @Override public abstract void close(); -} diff --git a/java/src/main/java/org/rocksdb/AbstractRocksIterator.java b/java/src/main/java/org/rocksdb/AbstractRocksIterator.java deleted file mode 100644 index 1aade1b89..000000000 --- a/java/src/main/java/org/rocksdb/AbstractRocksIterator.java +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.ByteBuffer; - -/** - * Base class implementation for Rocks Iterators - * in the Java API - * - *

Multiple threads can invoke const methods on an RocksIterator without - * external synchronization, but if any of the threads may call a - * non-const method, all threads accessing the same RocksIterator must use - * external synchronization.

- * - * @param

The type of the Parent Object from which the Rocks Iterator was - * created. This is used by disposeInternal to avoid double-free - * issues with the underlying C++ object. - * @see org.rocksdb.RocksObject - */ -public abstract class AbstractRocksIterator

- extends RocksObject implements RocksIteratorInterface { - final P parent_; - - protected AbstractRocksIterator(final P parent, - final long nativeHandle) { - super(nativeHandle); - // parent must point to a valid RocksDB instance. - assert (parent != null); - // RocksIterator must hold a reference to the related parent instance - // to guarantee that while a GC cycle starts RocksIterator instances - // are freed prior to parent instances. - parent_ = parent; - } - - @Override - public boolean isValid() { - assert (isOwningHandle()); - return isValid0(nativeHandle_); - } - - @Override - public void seekToFirst() { - assert (isOwningHandle()); - seekToFirst0(nativeHandle_); - } - - @Override - public void seekToLast() { - assert (isOwningHandle()); - seekToLast0(nativeHandle_); - } - - @Override - public void seek(final byte[] target) { - assert (isOwningHandle()); - seek0(nativeHandle_, target, target.length); - } - - @Override - public void seekForPrev(final byte[] target) { - assert (isOwningHandle()); - seekForPrev0(nativeHandle_, target, target.length); - } - - @Override - public void seek(final ByteBuffer target) { - assert (isOwningHandle()); - if (target.isDirect()) { - seekDirect0(nativeHandle_, target, target.position(), target.remaining()); - } else { - seekByteArray0(nativeHandle_, target.array(), target.arrayOffset() + target.position(), - target.remaining()); - } - target.position(target.limit()); - } - - @Override - public void seekForPrev(final ByteBuffer target) { - assert (isOwningHandle()); - if (target.isDirect()) { - seekForPrevDirect0(nativeHandle_, target, target.position(), target.remaining()); - } else { - seekForPrevByteArray0(nativeHandle_, target.array(), target.arrayOffset() + target.position(), - target.remaining()); - } - target.position(target.limit()); - } - - @Override - public void next() { - assert (isOwningHandle()); - next0(nativeHandle_); - } - - @Override - public void prev() { - assert (isOwningHandle()); - prev0(nativeHandle_); - } - - @Override - public void refresh() throws RocksDBException { - assert (isOwningHandle()); - refresh0(nativeHandle_); - } - - @Override - public void status() throws RocksDBException { - assert (isOwningHandle()); - status0(nativeHandle_); - } - - /** - *

Deletes underlying C++ iterator pointer.

- * - *

Note: the underlying handle can only be safely deleted if the parent - * instance related to a certain RocksIterator is still valid and initialized. - * Therefore {@code disposeInternal()} checks if the parent is initialized - * before freeing the native handle.

- */ - @Override - protected void disposeInternal() { - if (parent_.isOwningHandle()) { - disposeInternal(nativeHandle_); - } - } - - abstract boolean isValid0(long handle); - abstract void seekToFirst0(long handle); - abstract void seekToLast0(long handle); - abstract void next0(long handle); - abstract void prev0(long handle); - abstract void refresh0(long handle) throws RocksDBException; - abstract void seek0(long handle, byte[] target, int targetLen); - abstract void seekForPrev0(long handle, byte[] target, int targetLen); - abstract void seekDirect0(long handle, ByteBuffer target, int targetOffset, int targetLen); - abstract void seekForPrevDirect0(long handle, ByteBuffer target, int targetOffset, int targetLen); - abstract void seekByteArray0(long handle, byte[] target, int targetOffset, int targetLen); - abstract void seekForPrevByteArray0(long handle, byte[] target, int targetOffset, int targetLen); - - abstract void status0(long handle) throws RocksDBException; -} diff --git a/java/src/main/java/org/rocksdb/AbstractSlice.java b/java/src/main/java/org/rocksdb/AbstractSlice.java deleted file mode 100644 index 5a22e2956..000000000 --- a/java/src/main/java/org/rocksdb/AbstractSlice.java +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Slices are used by RocksDB to provide - * efficient access to keys and values. - * - * This class is package private, implementers - * should extend either of the public abstract classes: - * @see org.rocksdb.Slice - * @see org.rocksdb.DirectSlice - * - * Regards the lifecycle of Java Slices in RocksDB: - * At present when you configure a Comparator from Java, it creates an - * instance of a C++ BaseComparatorJniCallback subclass and - * passes that to RocksDB as the comparator. That subclass of - * BaseComparatorJniCallback creates the Java - * @see org.rocksdb.AbstractSlice subclass Objects. When you dispose - * the Java @see org.rocksdb.AbstractComparator subclass, it disposes the - * C++ BaseComparatorJniCallback subclass, which in turn destroys the - * Java @see org.rocksdb.AbstractSlice subclass Objects. - */ -public abstract class AbstractSlice extends RocksMutableObject { - - protected AbstractSlice() { - super(); - } - - protected AbstractSlice(final long nativeHandle) { - super(nativeHandle); - } - - /** - * Returns the data of the slice. - * - * @return The slice data. Note, the type of access is - * determined by the subclass - * @see org.rocksdb.AbstractSlice#data0(long) - */ - public T data() { - return data0(getNativeHandle()); - } - - /** - * Access to the data is provided by the - * subtype as it needs to handle the - * generic typing. - * - * @param handle The address of the underlying - * native object. - * - * @return Java typed access to the data. - */ - protected abstract T data0(long handle); - - /** - * Drops the specified {@code n} - * number of bytes from the start - * of the backing slice - * - * @param n The number of bytes to drop - */ - public abstract void removePrefix(final int n); - - /** - * Clears the backing slice - */ - public abstract void clear(); - - /** - * Return the length (in bytes) of the data. - * - * @return The length in bytes. - */ - public int size() { - return size0(getNativeHandle()); - } - - /** - * Return true if the length of the - * data is zero. - * - * @return true if there is no data, false otherwise. - */ - public boolean empty() { - return empty0(getNativeHandle()); - } - - /** - * Creates a string representation of the data - * - * @param hex When true, the representation - * will be encoded in hexadecimal. - * - * @return The string representation of the data. - */ - public String toString(final boolean hex) { - return toString0(getNativeHandle(), hex); - } - - @Override - public String toString() { - return toString(false); - } - - /** - * Three-way key comparison - * - * @param other A slice to compare against - * - * @return Should return either: - * 1) < 0 if this < other - * 2) == 0 if this == other - * 3) > 0 if this > other - */ - public int compare(final AbstractSlice other) { - assert (other != null); - if(!isOwningHandle()) { - return other.isOwningHandle() ? -1 : 0; - } else { - if(!other.isOwningHandle()) { - return 1; - } else { - return compare0(getNativeHandle(), other.getNativeHandle()); - } - } - } - - @Override - public int hashCode() { - return toString().hashCode(); - } - - /** - * If other is a slice object, then - * we defer to {@link #compare(AbstractSlice) compare} - * to check equality, otherwise we return false. - * - * @param other Object to test for equality - * - * @return true when {@code this.compare(other) == 0}, - * false otherwise. - */ - @Override - public boolean equals(final Object other) { - if (other != null && other instanceof AbstractSlice) { - return compare((AbstractSlice)other) == 0; - } else { - return false; - } - } - - /** - * Determines whether this slice starts with - * another slice - * - * @param prefix Another slice which may of may not - * be a prefix of this slice. - * - * @return true when this slice starts with the - * {@code prefix} slice - */ - public boolean startsWith(final AbstractSlice prefix) { - if (prefix != null) { - return startsWith0(getNativeHandle(), prefix.getNativeHandle()); - } else { - return false; - } - } - - protected native static long createNewSliceFromString(final String str); - private native int size0(long handle); - private native boolean empty0(long handle); - private native String toString0(long handle, boolean hex); - private native int compare0(long handle, long otherHandle); - private native boolean startsWith0(long handle, long otherHandle); - - /** - * Deletes underlying C++ slice pointer. - * Note that this function should be called only after all - * RocksDB instances referencing the slice are closed. - * Otherwise an undefined behavior will occur. - */ - @Override - protected final native void disposeInternal(final long handle); - -} diff --git a/java/src/main/java/org/rocksdb/AbstractTableFilter.java b/java/src/main/java/org/rocksdb/AbstractTableFilter.java deleted file mode 100644 index c696c3e13..000000000 --- a/java/src/main/java/org/rocksdb/AbstractTableFilter.java +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -/** - * Base class for Table Filters. - */ -public abstract class AbstractTableFilter - extends RocksCallbackObject implements TableFilter { - - protected AbstractTableFilter() { - super(); - } - - @Override - protected long initializeNative(final long... nativeParameterHandles) { - return createNewTableFilter(); - } - - private native long createNewTableFilter(); -} diff --git a/java/src/main/java/org/rocksdb/AbstractTraceWriter.java b/java/src/main/java/org/rocksdb/AbstractTraceWriter.java deleted file mode 100644 index 806709b1f..000000000 --- a/java/src/main/java/org/rocksdb/AbstractTraceWriter.java +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Base class for TraceWriters. - */ -public abstract class AbstractTraceWriter - extends RocksCallbackObject implements TraceWriter { - - @Override - protected long initializeNative(final long... nativeParameterHandles) { - return createNewTraceWriter(); - } - - /** - * Called from JNI, proxy for {@link TraceWriter#write(Slice)}. - * - * @param sliceHandle the native handle of the slice (which we do not own) - * - * @return short (2 bytes) where the first byte is the - * {@link Status.Code#getValue()} and the second byte is the - * {@link Status.SubCode#getValue()}. - */ - private short writeProxy(final long sliceHandle) { - try { - write(new Slice(sliceHandle)); - return statusToShort(Status.Code.Ok, Status.SubCode.None); - } catch (final RocksDBException e) { - return statusToShort(e.getStatus()); - } - } - - /** - * Called from JNI, proxy for {@link TraceWriter#closeWriter()}. - * - * @return short (2 bytes) where the first byte is the - * {@link Status.Code#getValue()} and the second byte is the - * {@link Status.SubCode#getValue()}. - */ - private short closeWriterProxy() { - try { - closeWriter(); - return statusToShort(Status.Code.Ok, Status.SubCode.None); - } catch (final RocksDBException e) { - return statusToShort(e.getStatus()); - } - } - - private static short statusToShort(/*@Nullable*/ final Status status) { - final Status.Code code = status != null && status.getCode() != null - ? status.getCode() - : Status.Code.IOError; - final Status.SubCode subCode = status != null && status.getSubCode() != null - ? status.getSubCode() - : Status.SubCode.None; - return statusToShort(code, subCode); - } - - private static short statusToShort(final Status.Code code, - final Status.SubCode subCode) { - short result = (short)(code.getValue() << 8); - return (short)(result | subCode.getValue()); - } - - private native long createNewTraceWriter(); -} diff --git a/java/src/main/java/org/rocksdb/AbstractTransactionNotifier.java b/java/src/main/java/org/rocksdb/AbstractTransactionNotifier.java deleted file mode 100644 index cbb49836d..000000000 --- a/java/src/main/java/org/rocksdb/AbstractTransactionNotifier.java +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Provides notification to the caller of SetSnapshotOnNextOperation when - * the actual snapshot gets created - */ -public abstract class AbstractTransactionNotifier - extends RocksCallbackObject { - - protected AbstractTransactionNotifier() { - super(); - } - - /** - * Implement this method to receive notification when a snapshot is - * requested via {@link Transaction#setSnapshotOnNextOperation()}. - * - * @param newSnapshot the snapshot that has been created. - */ - public abstract void snapshotCreated(final Snapshot newSnapshot); - - /** - * This is intentionally private as it is the callback hook - * from JNI - */ - private void snapshotCreated(final long snapshotHandle) { - snapshotCreated(new Snapshot(snapshotHandle)); - } - - @Override - protected long initializeNative(final long... nativeParameterHandles) { - return createNewTransactionNotifier(); - } - - private native long createNewTransactionNotifier(); - - /** - * Deletes underlying C++ TransactionNotifier pointer. - * - * Note that this function should be called only after all - * Transactions referencing the comparator are closed. - * Otherwise an undefined behavior will occur. - */ - @Override - protected void disposeInternal() { - disposeInternal(nativeHandle_); - } - protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/AbstractWalFilter.java b/java/src/main/java/org/rocksdb/AbstractWalFilter.java deleted file mode 100644 index d525045c6..000000000 --- a/java/src/main/java/org/rocksdb/AbstractWalFilter.java +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Base class for WAL Filters. - */ -public abstract class AbstractWalFilter - extends RocksCallbackObject implements WalFilter { - - @Override - protected long initializeNative(final long... nativeParameterHandles) { - return createNewWalFilter(); - } - - /** - * Called from JNI, proxy for - * {@link WalFilter#logRecordFound(long, String, WriteBatch, WriteBatch)}. - * - * @param logNumber the log handle. - * @param logFileName the log file name - * @param batchHandle the native handle of a WriteBatch (which we do not own) - * @param newBatchHandle the native handle of a - * new WriteBatch (which we do not own) - * - * @return short (2 bytes) where the first byte is the - * {@link WalFilter.LogRecordFoundResult#walProcessingOption} - * {@link WalFilter.LogRecordFoundResult#batchChanged}. - */ - private short logRecordFoundProxy(final long logNumber, - final String logFileName, final long batchHandle, - final long newBatchHandle) { - final LogRecordFoundResult logRecordFoundResult = logRecordFound( - logNumber, logFileName, new WriteBatch(batchHandle), - new WriteBatch(newBatchHandle)); - return logRecordFoundResultToShort(logRecordFoundResult); - } - - private static short logRecordFoundResultToShort( - final LogRecordFoundResult logRecordFoundResult) { - short result = (short)(logRecordFoundResult.walProcessingOption.getValue() << 8); - return (short)(result | (logRecordFoundResult.batchChanged ? 1 : 0)); - } - - private native long createNewWalFilter(); -} diff --git a/java/src/main/java/org/rocksdb/AbstractWriteBatch.java b/java/src/main/java/org/rocksdb/AbstractWriteBatch.java deleted file mode 100644 index 9527a2fd9..000000000 --- a/java/src/main/java/org/rocksdb/AbstractWriteBatch.java +++ /dev/null @@ -1,204 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.ByteBuffer; - -public abstract class AbstractWriteBatch extends RocksObject - implements WriteBatchInterface { - - protected AbstractWriteBatch(final long nativeHandle) { - super(nativeHandle); - } - - @Override - public int count() { - return count0(nativeHandle_); - } - - @Override - public void put(byte[] key, byte[] value) throws RocksDBException { - put(nativeHandle_, key, key.length, value, value.length); - } - - @Override - public void put(ColumnFamilyHandle columnFamilyHandle, byte[] key, - byte[] value) throws RocksDBException { - put(nativeHandle_, key, key.length, value, value.length, - columnFamilyHandle.nativeHandle_); - } - - @Override - public void merge(byte[] key, byte[] value) throws RocksDBException { - merge(nativeHandle_, key, key.length, value, value.length); - } - - @Override - public void merge(ColumnFamilyHandle columnFamilyHandle, byte[] key, - byte[] value) throws RocksDBException { - merge(nativeHandle_, key, key.length, value, value.length, - columnFamilyHandle.nativeHandle_); - } - - @Override - public void put(final ByteBuffer key, final ByteBuffer value) throws RocksDBException { - assert key.isDirect() && value.isDirect(); - putDirect(nativeHandle_, key, key.position(), key.remaining(), value, value.position(), - value.remaining(), 0); - key.position(key.limit()); - value.position(value.limit()); - } - - @Override - public void put(ColumnFamilyHandle columnFamilyHandle, final ByteBuffer key, - final ByteBuffer value) throws RocksDBException { - assert key.isDirect() && value.isDirect(); - putDirect(nativeHandle_, key, key.position(), key.remaining(), value, value.position(), - value.remaining(), columnFamilyHandle.nativeHandle_); - key.position(key.limit()); - value.position(value.limit()); - } - - @Override - public void delete(byte[] key) throws RocksDBException { - delete(nativeHandle_, key, key.length); - } - - @Override - public void delete(ColumnFamilyHandle columnFamilyHandle, byte[] key) - throws RocksDBException { - delete(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_); - } - - @Override - public void delete(final ByteBuffer key) throws RocksDBException { - deleteDirect(nativeHandle_, key, key.position(), key.remaining(), 0); - key.position(key.limit()); - } - - @Override - public void delete(ColumnFamilyHandle columnFamilyHandle, final ByteBuffer key) - throws RocksDBException { - deleteDirect( - nativeHandle_, key, key.position(), key.remaining(), columnFamilyHandle.nativeHandle_); - key.position(key.limit()); - } - - @Override - public void singleDelete(byte[] key) throws RocksDBException { - singleDelete(nativeHandle_, key, key.length); - } - - @Override - public void singleDelete(ColumnFamilyHandle columnFamilyHandle, byte[] key) - throws RocksDBException { - singleDelete(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_); - } - - @Override - public void deleteRange(byte[] beginKey, byte[] endKey) - throws RocksDBException { - deleteRange(nativeHandle_, beginKey, beginKey.length, endKey, endKey.length); - } - - @Override - public void deleteRange(ColumnFamilyHandle columnFamilyHandle, - byte[] beginKey, byte[] endKey) throws RocksDBException { - deleteRange(nativeHandle_, beginKey, beginKey.length, endKey, endKey.length, - columnFamilyHandle.nativeHandle_); - } - - @Override - public void putLogData(byte[] blob) throws RocksDBException { - putLogData(nativeHandle_, blob, blob.length); - } - - @Override - public void clear() { - clear0(nativeHandle_); - } - - @Override - public void setSavePoint() { - setSavePoint0(nativeHandle_); - } - - @Override - public void rollbackToSavePoint() throws RocksDBException { - rollbackToSavePoint0(nativeHandle_); - } - - @Override - public void popSavePoint() throws RocksDBException { - popSavePoint(nativeHandle_); - } - - @Override - public void setMaxBytes(final long maxBytes) { - setMaxBytes(nativeHandle_, maxBytes); - } - - @Override - public WriteBatch getWriteBatch() { - return getWriteBatch(nativeHandle_); - } - - abstract int count0(final long handle); - - abstract void put(final long handle, final byte[] key, final int keyLen, - final byte[] value, final int valueLen) throws RocksDBException; - - abstract void put(final long handle, final byte[] key, final int keyLen, - final byte[] value, final int valueLen, final long cfHandle) - throws RocksDBException; - - abstract void putDirect(final long handle, final ByteBuffer key, final int keyOffset, - final int keyLength, final ByteBuffer value, final int valueOffset, final int valueLength, - final long cfHandle) throws RocksDBException; - - abstract void merge(final long handle, final byte[] key, final int keyLen, - final byte[] value, final int valueLen) throws RocksDBException; - - abstract void merge(final long handle, final byte[] key, final int keyLen, - final byte[] value, final int valueLen, final long cfHandle) - throws RocksDBException; - - abstract void delete(final long handle, final byte[] key, - final int keyLen) throws RocksDBException; - - abstract void delete(final long handle, final byte[] key, - final int keyLen, final long cfHandle) throws RocksDBException; - - abstract void singleDelete(final long handle, final byte[] key, final int keyLen) - throws RocksDBException; - - abstract void singleDelete(final long handle, final byte[] key, final int keyLen, - final long cfHandle) throws RocksDBException; - - abstract void deleteDirect(final long handle, final ByteBuffer key, final int keyOffset, - final int keyLength, final long cfHandle) throws RocksDBException; - - abstract void deleteRange(final long handle, final byte[] beginKey, final int beginKeyLen, - final byte[] endKey, final int endKeyLen) throws RocksDBException; - - abstract void deleteRange(final long handle, final byte[] beginKey, final int beginKeyLen, - final byte[] endKey, final int endKeyLen, final long cfHandle) throws RocksDBException; - - abstract void putLogData(final long handle, final byte[] blob, - final int blobLen) throws RocksDBException; - - abstract void clear0(final long handle); - - abstract void setSavePoint0(final long handle); - - abstract void rollbackToSavePoint0(final long handle); - - abstract void popSavePoint(final long handle) throws RocksDBException; - - abstract void setMaxBytes(final long handle, long maxBytes); - - abstract WriteBatch getWriteBatch(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/AccessHint.java b/java/src/main/java/org/rocksdb/AccessHint.java deleted file mode 100644 index 877c4ab39..000000000 --- a/java/src/main/java/org/rocksdb/AccessHint.java +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * File access pattern once a compaction has started - */ -public enum AccessHint { - NONE((byte)0x0), - NORMAL((byte)0x1), - SEQUENTIAL((byte)0x2), - WILLNEED((byte)0x3); - - private final byte value; - - AccessHint(final byte value) { - this.value = value; - } - - /** - *

Returns the byte value of the enumerations value.

- * - * @return byte representation - */ - public byte getValue() { - return value; - } - - /** - *

Get the AccessHint enumeration value by - * passing the byte identifier to this method.

- * - * @param byteIdentifier of AccessHint. - * - * @return AccessHint instance. - * - * @throws IllegalArgumentException if the access hint for the byteIdentifier - * cannot be found - */ - public static AccessHint getAccessHint(final byte byteIdentifier) { - for (final AccessHint accessHint : AccessHint.values()) { - if (accessHint.getValue() == byteIdentifier) { - return accessHint; - } - } - - throw new IllegalArgumentException( - "Illegal value provided for AccessHint."); - } -} diff --git a/java/src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java b/java/src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java deleted file mode 100644 index 5338bc42d..000000000 --- a/java/src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java +++ /dev/null @@ -1,464 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.List; - -/** - * Advanced Column Family Options which are not - * mutable (i.e. present in {@link AdvancedMutableColumnFamilyOptionsInterface} - * - * Taken from include/rocksdb/advanced_options.h - */ -public interface AdvancedColumnFamilyOptionsInterface< - T extends AdvancedColumnFamilyOptionsInterface> { - /** - * The minimum number of write buffers that will be merged together - * before writing to storage. If set to 1, then - * all write buffers are flushed to L0 as individual files and this increases - * read amplification because a get request has to check in all of these - * files. Also, an in-memory merge may result in writing lesser - * data to storage if there are duplicate records in each of these - * individual write buffers. Default: 1 - * - * @param minWriteBufferNumberToMerge the minimum number of write buffers - * that will be merged together. - * @return the reference to the current options. - */ - T setMinWriteBufferNumberToMerge( - int minWriteBufferNumberToMerge); - - /** - * The minimum number of write buffers that will be merged together - * before writing to storage. If set to 1, then - * all write buffers are flushed to L0 as individual files and this increases - * read amplification because a get request has to check in all of these - * files. Also, an in-memory merge may result in writing lesser - * data to storage if there are duplicate records in each of these - * individual write buffers. Default: 1 - * - * @return the minimum number of write buffers that will be merged together. - */ - int minWriteBufferNumberToMerge(); - - /** - * The total maximum number of write buffers to maintain in memory including - * copies of buffers that have already been flushed. Unlike - * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()}, - * this parameter does not affect flushing. - * This controls the minimum amount of write history that will be available - * in memory for conflict checking when Transactions are used. - * - * When using an OptimisticTransactionDB: - * If this value is too low, some transactions may fail at commit time due - * to not being able to determine whether there were any write conflicts. - * - * When using a TransactionDB: - * If Transaction::SetSnapshot is used, TransactionDB will read either - * in-memory write buffers or SST files to do write-conflict checking. - * Increasing this value can reduce the number of reads to SST files - * done for conflict detection. - * - * Setting this value to 0 will cause write buffers to be freed immediately - * after they are flushed. - * If this value is set to -1, - * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()} - * will be used. - * - * Default: - * If using a TransactionDB/OptimisticTransactionDB, the default value will - * be set to the value of - * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()} - * if it is not explicitly set by the user. Otherwise, the default is 0. - * - * @param maxWriteBufferNumberToMaintain The maximum number of write - * buffers to maintain - * - * @return the reference to the current options. - */ - T setMaxWriteBufferNumberToMaintain( - int maxWriteBufferNumberToMaintain); - - /** - * The total maximum number of write buffers to maintain in memory including - * copies of buffers that have already been flushed. - * - * @return maxWriteBufferNumberToMaintain The maximum number of write buffers - * to maintain - */ - int maxWriteBufferNumberToMaintain(); - - /** - * Allows thread-safe inplace updates. - * If inplace_callback function is not set, - * Put(key, new_value) will update inplace the existing_value iff - * * key exists in current memtable - * * new sizeof(new_value) ≤ sizeof(existing_value) - * * existing_value for that key is a put i.e. kTypeValue - * If inplace_callback function is set, check doc for inplace_callback. - * Default: false. - * - * @param inplaceUpdateSupport true if thread-safe inplace updates - * are allowed. - * @return the reference to the current options. - */ - T setInplaceUpdateSupport( - boolean inplaceUpdateSupport); - - /** - * Allows thread-safe inplace updates. - * If inplace_callback function is not set, - * Put(key, new_value) will update inplace the existing_value iff - * * key exists in current memtable - * * new sizeof(new_value) ≤ sizeof(existing_value) - * * existing_value for that key is a put i.e. kTypeValue - * If inplace_callback function is set, check doc for inplace_callback. - * Default: false. - * - * @return true if thread-safe inplace updates are allowed. - */ - boolean inplaceUpdateSupport(); - - /** - * Control locality of bloom filter probes to improve cache miss rate. - * This option only applies to memtable prefix bloom and plaintable - * prefix bloom. It essentially limits the max number of cache lines each - * bloom filter check can touch. - * This optimization is turned off when set to 0. The number should never - * be greater than number of probes. This option can boost performance - * for in-memory workload but should use with care since it can cause - * higher false positive rate. - * Default: 0 - * - * @param bloomLocality the level of locality of bloom-filter probes. - * @return the reference to the current options. - */ - T setBloomLocality(int bloomLocality); - - /** - * Control locality of bloom filter probes to improve cache miss rate. - * This option only applies to memtable prefix bloom and plaintable - * prefix bloom. It essentially limits the max number of cache lines each - * bloom filter check can touch. - * This optimization is turned off when set to 0. The number should never - * be greater than number of probes. This option can boost performance - * for in-memory workload but should use with care since it can cause - * higher false positive rate. - * Default: 0 - * - * @return the level of locality of bloom-filter probes. - * @see #setBloomLocality(int) - */ - int bloomLocality(); - - /** - *

Different levels can have different compression - * policies. There are cases where most lower levels - * would like to use quick compression algorithms while - * the higher levels (which have more data) use - * compression algorithms that have better compression - * but could be slower. This array, if non-empty, should - * have an entry for each level of the database; - * these override the value specified in the previous - * field 'compression'.

- * - * NOTICE - *

If {@code level_compaction_dynamic_level_bytes=true}, - * {@code compression_per_level[0]} still determines {@code L0}, - * but other elements of the array are based on base level - * (the level {@code L0} files are merged to), and may not - * match the level users see from info log for metadata. - *

- *

If {@code L0} files are merged to {@code level - n}, - * then, for {@code i>0}, {@code compression_per_level[i]} - * determines compaction type for level {@code n+i-1}.

- * - * Example - *

For example, if we have 5 levels, and we determine to - * merge {@code L0} data to {@code L4} (which means {@code L1..L3} - * will be empty), then the new files go to {@code L4} uses - * compression type {@code compression_per_level[1]}.

- * - *

If now {@code L0} is merged to {@code L2}. Data goes to - * {@code L2} will be compressed according to - * {@code compression_per_level[1]}, {@code L3} using - * {@code compression_per_level[2]}and {@code L4} using - * {@code compression_per_level[3]}. Compaction for each - * level can change when data grows.

- * - *

Default: empty

- * - * @param compressionLevels list of - * {@link org.rocksdb.CompressionType} instances. - * - * @return the reference to the current options. - */ - T setCompressionPerLevel( - List compressionLevels); - - /** - *

Return the currently set {@link org.rocksdb.CompressionType} - * per instances.

- * - *

See: {@link #setCompressionPerLevel(java.util.List)}

- * - * @return list of {@link org.rocksdb.CompressionType} - * instances. - */ - List compressionPerLevel(); - - /** - * Set the number of levels for this database - * If level-styled compaction is used, then this number determines - * the total number of levels. - * - * @param numLevels the number of levels. - * @return the reference to the current options. - */ - T setNumLevels(int numLevels); - - /** - * If level-styled compaction is used, then this number determines - * the total number of levels. - * - * @return the number of levels. - */ - int numLevels(); - - /** - *

If {@code true}, RocksDB will pick target size of each level - * dynamically. We will pick a base level b >= 1. L0 will be - * directly merged into level b, instead of always into level 1. - * Level 1 to b-1 need to be empty. We try to pick b and its target - * size so that

- * - *
    - *
  1. target size is in the range of - * (max_bytes_for_level_base / max_bytes_for_level_multiplier, - * max_bytes_for_level_base]
  2. - *
  3. target size of the last level (level num_levels-1) equals to extra size - * of the level.
  4. - *
- * - *

At the same time max_bytes_for_level_multiplier and - * max_bytes_for_level_multiplier_additional are still satisfied.

- * - *

With this option on, from an empty DB, we make last level the base - * level, which means merging L0 data into the last level, until it exceeds - * max_bytes_for_level_base. And then we make the second last level to be - * base level, to start to merge L0 data to second last level, with its - * target size to be {@code 1/max_bytes_for_level_multiplier} of the last - * levels extra size. After the data accumulates more so that we need to - * move the base level to the third last one, and so on.

- * - *

Example

- * - *

For example, assume {@code max_bytes_for_level_multiplier=10}, - * {@code num_levels=6}, and {@code max_bytes_for_level_base=10MB}.

- * - *

Target sizes of level 1 to 5 starts with:

- * {@code [- - - - 10MB]} - *

with base level is level. Target sizes of level 1 to 4 are not applicable - * because they will not be used. - * Until the size of Level 5 grows to more than 10MB, say 11MB, we make - * base target to level 4 and now the targets looks like:

- * {@code [- - - 1.1MB 11MB]} - *

While data are accumulated, size targets are tuned based on actual data - * of level 5. When level 5 has 50MB of data, the target is like:

- * {@code [- - - 5MB 50MB]} - *

Until level 5's actual size is more than 100MB, say 101MB. Now if we - * keep level 4 to be the base level, its target size needs to be 10.1MB, - * which doesn't satisfy the target size range. So now we make level 3 - * the target size and the target sizes of the levels look like:

- * {@code [- - 1.01MB 10.1MB 101MB]} - *

In the same way, while level 5 further grows, all levels' targets grow, - * like

- * {@code [- - 5MB 50MB 500MB]} - *

Until level 5 exceeds 1000MB and becomes 1001MB, we make level 2 the - * base level and make levels' target sizes like this:

- * {@code [- 1.001MB 10.01MB 100.1MB 1001MB]} - *

and go on...

- * - *

By doing it, we give {@code max_bytes_for_level_multiplier} a priority - * against {@code max_bytes_for_level_base}, for a more predictable LSM tree - * shape. It is useful to limit worse case space amplification.

- * - *

{@code max_bytes_for_level_multiplier_additional} is ignored with - * this flag on.

- * - *

Turning this feature on or off for an existing DB can cause unexpected - * LSM tree structure so it's not recommended.

- * - *

Caution: this option is experimental

- * - *

Default: false

- * - * @param enableLevelCompactionDynamicLevelBytes boolean value indicating - * if {@code LevelCompactionDynamicLevelBytes} shall be enabled. - * @return the reference to the current options. - */ - @Experimental("Turning this feature on or off for an existing DB can cause" + - " unexpected LSM tree structure so it's not recommended") - T setLevelCompactionDynamicLevelBytes( - boolean enableLevelCompactionDynamicLevelBytes); - - /** - *

Return if {@code LevelCompactionDynamicLevelBytes} is enabled. - *

- * - *

For further information see - * {@link #setLevelCompactionDynamicLevelBytes(boolean)}

- * - * @return boolean value indicating if - * {@code levelCompactionDynamicLevelBytes} is enabled. - */ - @Experimental("Caution: this option is experimental") - boolean levelCompactionDynamicLevelBytes(); - - /** - * Maximum size of each compaction (not guarantee) - * - * @param maxCompactionBytes the compaction size limit - * @return the reference to the current options. - */ - T setMaxCompactionBytes( - long maxCompactionBytes); - - /** - * Control maximum size of each compaction (not guaranteed) - * - * @return compaction size threshold - */ - long maxCompactionBytes(); - - /** - * Set compaction style for DB. - * - * Default: LEVEL. - * - * @param compactionStyle Compaction style. - * @return the reference to the current options. - */ - ColumnFamilyOptionsInterface setCompactionStyle( - CompactionStyle compactionStyle); - - /** - * Compaction style for DB. - * - * @return Compaction style. - */ - CompactionStyle compactionStyle(); - - /** - * If level {@link #compactionStyle()} == {@link CompactionStyle#LEVEL}, - * for each level, which files are prioritized to be picked to compact. - * - * Default: {@link CompactionPriority#ByCompensatedSize} - * - * @param compactionPriority The compaction priority - * - * @return the reference to the current options. - */ - T setCompactionPriority( - CompactionPriority compactionPriority); - - /** - * Get the Compaction priority if level compaction - * is used for all levels - * - * @return The compaction priority - */ - CompactionPriority compactionPriority(); - - /** - * Set the options needed to support Universal Style compactions - * - * @param compactionOptionsUniversal The Universal Style compaction options - * - * @return the reference to the current options. - */ - T setCompactionOptionsUniversal( - CompactionOptionsUniversal compactionOptionsUniversal); - - /** - * The options needed to support Universal Style compactions - * - * @return The Universal Style compaction options - */ - CompactionOptionsUniversal compactionOptionsUniversal(); - - /** - * The options for FIFO compaction style - * - * @param compactionOptionsFIFO The FIFO compaction options - * - * @return the reference to the current options. - */ - T setCompactionOptionsFIFO( - CompactionOptionsFIFO compactionOptionsFIFO); - - /** - * The options for FIFO compaction style - * - * @return The FIFO compaction options - */ - CompactionOptionsFIFO compactionOptionsFIFO(); - - /** - *

This flag specifies that the implementation should optimize the filters - * mainly for cases where keys are found rather than also optimize for keys - * missed. This would be used in cases where the application knows that - * there are very few misses or the performance in the case of misses is not - * important.

- * - *

For now, this flag allows us to not store filters for the last level i.e - * the largest level which contains data of the LSM store. For keys which - * are hits, the filters in this level are not useful because we will search - * for the data anyway.

- * - *

NOTE: the filters in other levels are still useful - * even for key hit because they tell us whether to look in that level or go - * to the higher level.

- * - *

Default: false

- * - * @param optimizeFiltersForHits boolean value indicating if this flag is set. - * @return the reference to the current options. - */ - T setOptimizeFiltersForHits( - boolean optimizeFiltersForHits); - - /** - *

Returns the current state of the {@code optimize_filters_for_hits} - * setting.

- * - * @return boolean value indicating if the flag - * {@code optimize_filters_for_hits} was set. - */ - boolean optimizeFiltersForHits(); - - /** - * By default, RocksDB runs consistency checks on the LSM every time the LSM - * changes (Flush, Compaction, AddFile). Use this option if you need to - * disable them. - * - * Default: true - * - * @param forceConsistencyChecks false to disable consistency checks - * - * @return the reference to the current options. - */ - T setForceConsistencyChecks( - boolean forceConsistencyChecks); - - /** - * By default, RocksDB runs consistency checks on the LSM every time the LSM - * changes (Flush, Compaction, AddFile). - * - * @return true if consistency checks are enforced - */ - boolean forceConsistencyChecks(); -} diff --git a/java/src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java b/java/src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java deleted file mode 100644 index 162d15d80..000000000 --- a/java/src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java +++ /dev/null @@ -1,830 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Advanced Column Family Options which are mutable - * - * Taken from include/rocksdb/advanced_options.h - * and MutableCFOptions in util/cf_options.h - */ -public interface AdvancedMutableColumnFamilyOptionsInterface< - T extends AdvancedMutableColumnFamilyOptionsInterface> { - /** - * The maximum number of write buffers that are built up in memory. - * The default is 2, so that when 1 write buffer is being flushed to - * storage, new writes can continue to the other write buffer. - * Default: 2 - * - * @param maxWriteBufferNumber maximum number of write buffers. - * @return the instance of the current options. - */ - T setMaxWriteBufferNumber( - int maxWriteBufferNumber); - - /** - * Returns maximum number of write buffers. - * - * @return maximum number of write buffers. - * @see #setMaxWriteBufferNumber(int) - */ - int maxWriteBufferNumber(); - - /** - * Number of locks used for inplace update - * Default: 10000, if inplace_update_support = true, else 0. - * - * @param inplaceUpdateNumLocks the number of locks used for - * inplace updates. - * @return the reference to the current options. - * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms - * while overflowing the underlying platform specific value. - */ - T setInplaceUpdateNumLocks( - long inplaceUpdateNumLocks); - - /** - * Number of locks used for inplace update - * Default: 10000, if inplace_update_support = true, else 0. - * - * @return the number of locks used for inplace update. - */ - long inplaceUpdateNumLocks(); - - /** - * if prefix_extractor is set and memtable_prefix_bloom_size_ratio is not 0, - * create prefix bloom for memtable with the size of - * write_buffer_size * memtable_prefix_bloom_size_ratio. - * If it is larger than 0.25, it is santinized to 0.25. - * - * Default: 0 (disabled) - * - * @param memtablePrefixBloomSizeRatio the ratio of memtable used by the - * bloom filter, 0 means no bloom filter - * @return the reference to the current options. - */ - T setMemtablePrefixBloomSizeRatio( - double memtablePrefixBloomSizeRatio); - - /** - * if prefix_extractor is set and memtable_prefix_bloom_size_ratio is not 0, - * create prefix bloom for memtable with the size of - * write_buffer_size * memtable_prefix_bloom_size_ratio. - * If it is larger than 0.25, it is santinized to 0.25. - * - * Default: 0 (disabled) - * - * @return the ratio of memtable used by the bloom filter - */ - double memtablePrefixBloomSizeRatio(); - - /** - * Threshold used in the MemPurge (memtable garbage collection) - * feature. A value of 0.0 corresponds to no MemPurge, - * a value of 1.0 will trigger a MemPurge as often as possible. - * - * Default: 0.0 (disabled) - * - * @param experimentalMempurgeThreshold the threshold used by - * the MemPurge decider. - * @return the reference to the current options. - */ - T setExperimentalMempurgeThreshold(double experimentalMempurgeThreshold); - - /** - * Threshold used in the MemPurge (memtable garbage collection) - * feature. A value of 0.0 corresponds to no MemPurge, - * a value of 1.0 will trigger a MemPurge as often as possible. - * - * Default: 0 (disabled) - * - * @return the threshold used by the MemPurge decider - */ - double experimentalMempurgeThreshold(); - - /** - * Enable whole key bloom filter in memtable. Note this will only take effect - * if memtable_prefix_bloom_size_ratio is not 0. Enabling whole key filtering - * can potentially reduce CPU usage for point-look-ups. - * - * Default: false (disabled) - * - * @param memtableWholeKeyFiltering true if whole key bloom filter is enabled - * in memtable - * @return the reference to the current options. - */ - T setMemtableWholeKeyFiltering(boolean memtableWholeKeyFiltering); - - /** - * Returns whether whole key bloom filter is enabled in memtable - * - * @return true if whole key bloom filter is enabled in memtable - */ - boolean memtableWholeKeyFiltering(); - - /** - * Page size for huge page TLB for bloom in memtable. If ≤ 0, not allocate - * from huge page TLB but from malloc. - * Need to reserve huge pages for it to be allocated. For example: - * sysctl -w vm.nr_hugepages=20 - * See linux doc Documentation/vm/hugetlbpage.txt - * - * @param memtableHugePageSize The page size of the huge - * page tlb - * @return the reference to the current options. - */ - T setMemtableHugePageSize( - long memtableHugePageSize); - - /** - * Page size for huge page TLB for bloom in memtable. If ≤ 0, not allocate - * from huge page TLB but from malloc. - * Need to reserve huge pages for it to be allocated. For example: - * sysctl -w vm.nr_hugepages=20 - * See linux doc Documentation/vm/hugetlbpage.txt - * - * @return The page size of the huge page tlb - */ - long memtableHugePageSize(); - - /** - * The size of one block in arena memory allocation. - * If ≤ 0, a proper value is automatically calculated (usually 1/10 of - * writer_buffer_size). - * - * There are two additional restriction of the specified size: - * (1) size should be in the range of [4096, 2 << 30] and - * (2) be the multiple of the CPU word (which helps with the memory - * alignment). - * - * We'll automatically check and adjust the size number to make sure it - * conforms to the restrictions. - * Default: 0 - * - * @param arenaBlockSize the size of an arena block - * @return the reference to the current options. - * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms - * while overflowing the underlying platform specific value. - */ - T setArenaBlockSize(long arenaBlockSize); - - /** - * The size of one block in arena memory allocation. - * If ≤ 0, a proper value is automatically calculated (usually 1/10 of - * writer_buffer_size). - * - * There are two additional restriction of the specified size: - * (1) size should be in the range of [4096, 2 << 30] and - * (2) be the multiple of the CPU word (which helps with the memory - * alignment). - * - * We'll automatically check and adjust the size number to make sure it - * conforms to the restrictions. - * Default: 0 - * - * @return the size of an arena block - */ - long arenaBlockSize(); - - /** - * Soft limit on number of level-0 files. We start slowing down writes at this - * point. A value < 0 means that no writing slow down will be triggered by - * number of files in level-0. - * - * @param level0SlowdownWritesTrigger The soft limit on the number of - * level-0 files - * @return the reference to the current options. - */ - T setLevel0SlowdownWritesTrigger( - int level0SlowdownWritesTrigger); - - /** - * Soft limit on number of level-0 files. We start slowing down writes at this - * point. A value < 0 means that no writing slow down will be triggered by - * number of files in level-0. - * - * @return The soft limit on the number of - * level-0 files - */ - int level0SlowdownWritesTrigger(); - - /** - * Maximum number of level-0 files. We stop writes at this point. - * - * @param level0StopWritesTrigger The maximum number of level-0 files - * @return the reference to the current options. - */ - T setLevel0StopWritesTrigger( - int level0StopWritesTrigger); - - /** - * Maximum number of level-0 files. We stop writes at this point. - * - * @return The maximum number of level-0 files - */ - int level0StopWritesTrigger(); - - /** - * The target file size for compaction. - * This targetFileSizeBase determines a level-1 file size. - * Target file size for level L can be calculated by - * targetFileSizeBase * (targetFileSizeMultiplier ^ (L-1)) - * For example, if targetFileSizeBase is 2MB and - * target_file_size_multiplier is 10, then each file on level-1 will - * be 2MB, and each file on level 2 will be 20MB, - * and each file on level-3 will be 200MB. - * by default targetFileSizeBase is 64MB. - * - * @param targetFileSizeBase the target size of a level-0 file. - * @return the reference to the current options. - * - * @see #setTargetFileSizeMultiplier(int) - */ - T setTargetFileSizeBase( - long targetFileSizeBase); - - /** - * The target file size for compaction. - * This targetFileSizeBase determines a level-1 file size. - * Target file size for level L can be calculated by - * targetFileSizeBase * (targetFileSizeMultiplier ^ (L-1)) - * For example, if targetFileSizeBase is 2MB and - * target_file_size_multiplier is 10, then each file on level-1 will - * be 2MB, and each file on level 2 will be 20MB, - * and each file on level-3 will be 200MB. - * by default targetFileSizeBase is 64MB. - * - * @return the target size of a level-0 file. - * - * @see #targetFileSizeMultiplier() - */ - long targetFileSizeBase(); - - /** - * targetFileSizeMultiplier defines the size ratio between a - * level-L file and level-(L+1) file. - * By default target_file_size_multiplier is 1, meaning - * files in different levels have the same target. - * - * @param multiplier the size ratio between a level-(L+1) file - * and level-L file. - * @return the reference to the current options. - */ - T setTargetFileSizeMultiplier( - int multiplier); - - /** - * targetFileSizeMultiplier defines the size ratio between a - * level-(L+1) file and level-L file. - * By default targetFileSizeMultiplier is 1, meaning - * files in different levels have the same target. - * - * @return the size ratio between a level-(L+1) file and level-L file. - */ - int targetFileSizeMultiplier(); - - /** - * The ratio between the total size of level-(L+1) files and the total - * size of level-L files for all L. - * DEFAULT: 10 - * - * @param multiplier the ratio between the total size of level-(L+1) - * files and the total size of level-L files for all L. - * @return the reference to the current options. - * - * See {@link MutableColumnFamilyOptionsInterface#setMaxBytesForLevelBase(long)} - */ - T setMaxBytesForLevelMultiplier(double multiplier); - - /** - * The ratio between the total size of level-(L+1) files and the total - * size of level-L files for all L. - * DEFAULT: 10 - * - * @return the ratio between the total size of level-(L+1) files and - * the total size of level-L files for all L. - * - * See {@link MutableColumnFamilyOptionsInterface#maxBytesForLevelBase()} - */ - double maxBytesForLevelMultiplier(); - - /** - * Different max-size multipliers for different levels. - * These are multiplied by max_bytes_for_level_multiplier to arrive - * at the max-size of each level. - * - * Default: 1 - * - * @param maxBytesForLevelMultiplierAdditional The max-size multipliers - * for each level - * @return the reference to the current options. - */ - T setMaxBytesForLevelMultiplierAdditional( - int[] maxBytesForLevelMultiplierAdditional); - - /** - * Different max-size multipliers for different levels. - * These are multiplied by max_bytes_for_level_multiplier to arrive - * at the max-size of each level. - * - * Default: 1 - * - * @return The max-size multipliers for each level - */ - int[] maxBytesForLevelMultiplierAdditional(); - - /** - * All writes will be slowed down to at least delayed_write_rate if estimated - * bytes needed to be compaction exceed this threshold. - * - * Default: 64GB - * - * @param softPendingCompactionBytesLimit The soft limit to impose on - * compaction - * @return the reference to the current options. - */ - T setSoftPendingCompactionBytesLimit( - long softPendingCompactionBytesLimit); - - /** - * All writes will be slowed down to at least delayed_write_rate if estimated - * bytes needed to be compaction exceed this threshold. - * - * Default: 64GB - * - * @return The soft limit to impose on compaction - */ - long softPendingCompactionBytesLimit(); - - /** - * All writes are stopped if estimated bytes needed to be compaction exceed - * this threshold. - * - * Default: 256GB - * - * @param hardPendingCompactionBytesLimit The hard limit to impose on - * compaction - * @return the reference to the current options. - */ - T setHardPendingCompactionBytesLimit( - long hardPendingCompactionBytesLimit); - - /** - * All writes are stopped if estimated bytes needed to be compaction exceed - * this threshold. - * - * Default: 256GB - * - * @return The hard limit to impose on compaction - */ - long hardPendingCompactionBytesLimit(); - - /** - * An iteration->Next() sequentially skips over keys with the same - * user-key unless this option is set. This number specifies the number - * of keys (with the same userkey) that will be sequentially - * skipped before a reseek is issued. - * Default: 8 - * - * @param maxSequentialSkipInIterations the number of keys could - * be skipped in a iteration. - * @return the reference to the current options. - */ - T setMaxSequentialSkipInIterations( - long maxSequentialSkipInIterations); - - /** - * An iteration->Next() sequentially skips over keys with the same - * user-key unless this option is set. This number specifies the number - * of keys (with the same userkey) that will be sequentially - * skipped before a reseek is issued. - * Default: 8 - * - * @return the number of keys could be skipped in a iteration. - */ - long maxSequentialSkipInIterations(); - - /** - * Maximum number of successive merge operations on a key in the memtable. - * - * When a merge operation is added to the memtable and the maximum number of - * successive merges is reached, the value of the key will be calculated and - * inserted into the memtable instead of the merge operation. This will - * ensure that there are never more than max_successive_merges merge - * operations in the memtable. - * - * Default: 0 (disabled) - * - * @param maxSuccessiveMerges the maximum number of successive merges. - * @return the reference to the current options. - * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms - * while overflowing the underlying platform specific value. - */ - T setMaxSuccessiveMerges( - long maxSuccessiveMerges); - - /** - * Maximum number of successive merge operations on a key in the memtable. - * - * When a merge operation is added to the memtable and the maximum number of - * successive merges is reached, the value of the key will be calculated and - * inserted into the memtable instead of the merge operation. This will - * ensure that there are never more than max_successive_merges merge - * operations in the memtable. - * - * Default: 0 (disabled) - * - * @return the maximum number of successive merges. - */ - long maxSuccessiveMerges(); - - /** - * After writing every SST file, reopen it and read all the keys. - * - * Default: false - * - * @param paranoidFileChecks true to enable paranoid file checks - * @return the reference to the current options. - */ - T setParanoidFileChecks( - boolean paranoidFileChecks); - - /** - * After writing every SST file, reopen it and read all the keys. - * - * Default: false - * - * @return true if paranoid file checks are enabled - */ - boolean paranoidFileChecks(); - - /** - * Measure IO stats in compactions and flushes, if true. - * - * Default: false - * - * @param reportBgIoStats true to enable reporting - * @return the reference to the current options. - */ - T setReportBgIoStats( - boolean reportBgIoStats); - - /** - * Determine whether IO stats in compactions and flushes are being measured - * - * @return true if reporting is enabled - */ - boolean reportBgIoStats(); - - /** - * Non-bottom-level files older than TTL will go through the compaction - * process. This needs {@link MutableDBOptionsInterface#maxOpenFiles()} to be - * set to -1. - * - * Enabled only for level compaction for now. - * - * Default: 0 (disabled) - * - * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param ttl the time-to-live. - * - * @return the reference to the current options. - */ - T setTtl(final long ttl); - - /** - * Get the TTL for Non-bottom-level files that will go through the compaction - * process. - * - * See {@link #setTtl(long)}. - * - * @return the time-to-live. - */ - long ttl(); - - /** - * Files older than this value will be picked up for compaction, and - * re-written to the same level as they were before. - * One main use of the feature is to make sure a file goes through compaction - * filters periodically. Users can also use the feature to clear up SST - * files using old format. - * - * A file's age is computed by looking at file_creation_time or creation_time - * table properties in order, if they have valid non-zero values; if not, the - * age is based on the file's last modified time (given by the underlying - * Env). - * - * Supported in Level and FIFO compaction. - * In FIFO compaction, this option has the same meaning as TTL and whichever - * stricter will be used. - * Pre-req: max_open_file == -1. - * unit: seconds. Ex: 7 days = 7 * 24 * 60 * 60 - * - * Values: - * 0: Turn off Periodic compactions. - * UINT64_MAX - 1 (i.e 0xfffffffffffffffe): Let RocksDB control this feature - * as needed. For now, RocksDB will change this value to 30 days - * (i.e 30 * 24 * 60 * 60) so that every file goes through the compaction - * process at least once every 30 days if not compacted sooner. - * In FIFO compaction, since the option has the same meaning as ttl, - * when this value is left default, and ttl is left to 0, 30 days will be - * used. Otherwise, min(ttl, periodic_compaction_seconds) will be used. - * - * Default: 0xfffffffffffffffe (allow RocksDB to auto-tune) - * - * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param periodicCompactionSeconds the periodic compaction in seconds. - * - * @return the reference to the current options. - */ - T setPeriodicCompactionSeconds(final long periodicCompactionSeconds); - - /** - * Get the periodicCompactionSeconds. - * - * See {@link #setPeriodicCompactionSeconds(long)}. - * - * @return the periodic compaction in seconds. - */ - long periodicCompactionSeconds(); - - // - // BEGIN options for blobs (integrated BlobDB) - // - - /** - * When set, large values (blobs) are written to separate blob files, and only - * pointers to them are stored in SST files. This can reduce write amplification - * for large-value use cases at the cost of introducing a level of indirection - * for reads. See also the options min_blob_size, blob_file_size, - * blob_compression_type, enable_blob_garbage_collection, and - * blob_garbage_collection_age_cutoff below. - * - * Default: false - * - * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param enableBlobFiles true iff blob files should be enabled - * - * @return the reference to the current options. - */ - T setEnableBlobFiles(final boolean enableBlobFiles); - - /** - * When set, large values (blobs) are written to separate blob files, and only - * pointers to them are stored in SST files. This can reduce write amplification - * for large-value use cases at the cost of introducing a level of indirection - * for reads. See also the options min_blob_size, blob_file_size, - * blob_compression_type, enable_blob_garbage_collection, and - * blob_garbage_collection_age_cutoff below. - * - * Default: false - * - * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @return true if blob files are enabled - */ - boolean enableBlobFiles(); - - /** - * Set the size of the smallest value to be stored separately in a blob file. Values - * which have an uncompressed size smaller than this threshold are stored - * alongside the keys in SST files in the usual fashion. A value of zero for - * this option means that all values are stored in blob files. Note that - * enable_blob_files has to be set in order for this option to have any effect. - * - * Default: 0 - * - * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param minBlobSize the size of the smallest value to be stored separately in a blob file - * @return the reference to the current options. - */ - T setMinBlobSize(final long minBlobSize); - - /** - * Get the size of the smallest value to be stored separately in a blob file. Values - * which have an uncompressed size smaller than this threshold are stored - * alongside the keys in SST files in the usual fashion. A value of zero for - * this option means that all values are stored in blob files. Note that - * enable_blob_files has to be set in order for this option to have any effect. - * - * Default: 0 - * - * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @return the current minimum size of value which is stored separately in a blob - */ - long minBlobSize(); - - /** - * Set the size limit for blob files. When writing blob files, a new file is opened - * once this limit is reached. Note that enable_blob_files has to be set in - * order for this option to have any effect. - * - * Default: 256 MB - * - * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param blobFileSize the size limit for blob files - * - * @return the reference to the current options. - */ - T setBlobFileSize(final long blobFileSize); - - /** - * The size limit for blob files. When writing blob files, a new file is opened - * once this limit is reached. - * - * @return the current size limit for blob files - */ - long blobFileSize(); - - /** - * Set the compression algorithm to use for large values stored in blob files. Note - * that enable_blob_files has to be set in order for this option to have any - * effect. - * - * Default: no compression - * - * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param compressionType the compression algorithm to use. - * - * @return the reference to the current options. - */ - T setBlobCompressionType(CompressionType compressionType); - - /** - * Get the compression algorithm in use for large values stored in blob files. - * Note that enable_blob_files has to be set in order for this option to have any - * effect. - * - * @return the current compression algorithm - */ - CompressionType blobCompressionType(); - - /** - * Enable/disable garbage collection of blobs. Blob GC is performed as part of - * compaction. Valid blobs residing in blob files older than a cutoff get - * relocated to new files as they are encountered during compaction, which makes - * it possible to clean up blob files once they contain nothing but - * obsolete/garbage blobs. See also blob_garbage_collection_age_cutoff below. - * - * Default: false - * - * @param enableBlobGarbageCollection the new enabled/disabled state of blob garbage collection - * - * @return the reference to the current options. - */ - T setEnableBlobGarbageCollection(final boolean enableBlobGarbageCollection); - - /** - * Query whether garbage collection of blobs is enabled.Blob GC is performed as part of - * compaction. Valid blobs residing in blob files older than a cutoff get - * relocated to new files as they are encountered during compaction, which makes - * it possible to clean up blob files once they contain nothing but - * obsolete/garbage blobs. See also blob_garbage_collection_age_cutoff below. - * - * Default: false - * - * @return true if blob garbage collection is currently enabled. - */ - boolean enableBlobGarbageCollection(); - - /** - * Set cutoff in terms of blob file age for garbage collection. Blobs in the - * oldest N blob files will be relocated when encountered during compaction, - * where N = garbage_collection_cutoff * number_of_blob_files. Note that - * enable_blob_garbage_collection has to be set in order for this option to have - * any effect. - * - * Default: 0.25 - * - * @param blobGarbageCollectionAgeCutoff the new age cutoff - * - * @return the reference to the current options. - */ - T setBlobGarbageCollectionAgeCutoff(double blobGarbageCollectionAgeCutoff); - /** - * Get cutoff in terms of blob file age for garbage collection. Blobs in the - * oldest N blob files will be relocated when encountered during compaction, - * where N = garbage_collection_cutoff * number_of_blob_files. Note that - * enable_blob_garbage_collection has to be set in order for this option to have - * any effect. - * - * Default: 0.25 - * - * @return the current age cutoff for garbage collection - */ - double blobGarbageCollectionAgeCutoff(); - - /** - * If the ratio of garbage in the oldest blob files exceeds this threshold, - * targeted compactions are scheduled in order to force garbage collecting - * the blob files in question, assuming they are all eligible based on the - * value of {@link #blobGarbageCollectionAgeCutoff} above. This option is - * currently only supported with leveled compactions. - * - * Note that {@link #enableBlobGarbageCollection} has to be set in order for this - * option to have any effect. - * - * Default: 1.0 - * - * Dynamically changeable through the SetOptions() API - * - * @param blobGarbageCollectionForceThreshold new value for the threshold - * @return the reference to the current options - */ - T setBlobGarbageCollectionForceThreshold(double blobGarbageCollectionForceThreshold); - - /** - * Get the current value for the {@link #blobGarbageCollectionForceThreshold} - * @return the current threshold at which garbage collection of blobs is forced - */ - double blobGarbageCollectionForceThreshold(); - - /** - * Set compaction readahead for blob files. - * - * Default: 0 - * - * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param blobCompactionReadaheadSize the compaction readahead for blob files - * - * @return the reference to the current options. - */ - T setBlobCompactionReadaheadSize(final long blobCompactionReadaheadSize); - - /** - * Get compaction readahead for blob files. - * - * @return the current compaction readahead for blob files - */ - long blobCompactionReadaheadSize(); - - /** - * Set a certain LSM tree level to enable blob files. - * - * Default: 0 - * - * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param blobFileStartingLevel the starting level to enable blob files - * - * @return the reference to the current options. - */ - T setBlobFileStartingLevel(final int blobFileStartingLevel); - - /** - * Get the starting LSM tree level to enable blob files. - * - * Default: 0 - * - * @return the current LSM tree level to enable blob files. - */ - int blobFileStartingLevel(); - - /** - * Set a certain prepopulate blob cache option. - * - * Default: 0 - * - * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param prepopulateBlobCache the prepopulate blob cache option - * - * @return the reference to the current options. - */ - T setPrepopulateBlobCache(final PrepopulateBlobCache prepopulateBlobCache); - - /** - * Get the prepopulate blob cache option. - * - * Default: 0 - * - * @return the current prepopulate blob cache option. - */ - PrepopulateBlobCache prepopulateBlobCache(); - - // - // END options for blobs (integrated BlobDB) - // -} diff --git a/java/src/main/java/org/rocksdb/BackgroundErrorReason.java b/java/src/main/java/org/rocksdb/BackgroundErrorReason.java deleted file mode 100644 index eec593d35..000000000 --- a/java/src/main/java/org/rocksdb/BackgroundErrorReason.java +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public enum BackgroundErrorReason { - FLUSH((byte) 0x0), - COMPACTION((byte) 0x1), - WRITE_CALLBACK((byte) 0x2), - MEMTABLE((byte) 0x3); - - private final byte value; - - BackgroundErrorReason(final byte value) { - this.value = value; - } - - /** - * Get the internal representation. - * - * @return the internal representation - */ - byte getValue() { - return value; - } - - /** - * Get the BackgroundErrorReason from the internal representation value. - * - * @return the background error reason. - * - * @throws IllegalArgumentException if the value is unknown. - */ - static BackgroundErrorReason fromValue(final byte value) { - for (final BackgroundErrorReason backgroundErrorReason : BackgroundErrorReason.values()) { - if (backgroundErrorReason.value == value) { - return backgroundErrorReason; - } - } - - throw new IllegalArgumentException( - "Illegal value provided for BackgroundErrorReason: " + value); - } -} diff --git a/java/src/main/java/org/rocksdb/BackupEngine.java b/java/src/main/java/org/rocksdb/BackupEngine.java deleted file mode 100644 index 515824a91..000000000 --- a/java/src/main/java/org/rocksdb/BackupEngine.java +++ /dev/null @@ -1,259 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import java.util.List; - -/** - * BackupEngine allows you to backup - * and restore the database - * - * Be aware, that `new BackupEngine` takes time proportional to the amount - * of backups. So if you have a slow filesystem to backup - * and you have a lot of backups then restoring can take some time. - * That's why we recommend to limit the number of backups. - * Also we recommend to keep BackupEngine alive and not to recreate it every - * time you need to do a backup. - */ -public class BackupEngine extends RocksObject implements AutoCloseable { - - protected BackupEngine(final long nativeHandle) { - super(nativeHandle); - } - - /** - * Opens a new Backup Engine - * - * @param env The environment that the backup engine should operate within - * @param options Any options for the backup engine - * - * @return A new BackupEngine instance - * @throws RocksDBException thrown if the backup engine could not be opened - */ - public static BackupEngine open(final Env env, final BackupEngineOptions options) - throws RocksDBException { - return new BackupEngine(open(env.nativeHandle_, options.nativeHandle_)); - } - - /** - * Captures the state of the database in the latest backup - * - * Just a convenience for {@link #createNewBackup(RocksDB, boolean)} with - * the flushBeforeBackup parameter set to false - * - * @param db The database to backup - * - * Note - This method is not thread safe - * - * @throws RocksDBException thrown if a new backup could not be created - */ - public void createNewBackup(final RocksDB db) throws RocksDBException { - createNewBackup(db, false); - } - - /** - * Captures the state of the database in the latest backup - * - * @param db The database to backup - * @param flushBeforeBackup When true, the Backup Engine will first issue a - * memtable flush and only then copy the DB files to - * the backup directory. Doing so will prevent log - * files from being copied to the backup directory - * (since flush will delete them). - * When false, the Backup Engine will not issue a - * flush before starting the backup. In that case, - * the backup will also include log files - * corresponding to live memtables. If writes have - * been performed with the write ahead log disabled, - * set flushBeforeBackup to true to prevent those - * writes from being lost. Otherwise, the backup will - * always be consistent with the current state of the - * database regardless of the flushBeforeBackup - * parameter. - * - * Note - This method is not thread safe - * - * @throws RocksDBException thrown if a new backup could not be created - */ - public void createNewBackup( - final RocksDB db, final boolean flushBeforeBackup) - throws RocksDBException { - assert (isOwningHandle()); - createNewBackup(nativeHandle_, db.nativeHandle_, flushBeforeBackup); - } - - /** - * Captures the state of the database in the latest backup along with - * application specific metadata. - * - * @param db The database to backup - * @param metadata Application metadata - * @param flushBeforeBackup When true, the Backup Engine will first issue a - * memtable flush and only then copy the DB files to - * the backup directory. Doing so will prevent log - * files from being copied to the backup directory - * (since flush will delete them). - * When false, the Backup Engine will not issue a - * flush before starting the backup. In that case, - * the backup will also include log files - * corresponding to live memtables. If writes have - * been performed with the write ahead log disabled, - * set flushBeforeBackup to true to prevent those - * writes from being lost. Otherwise, the backup will - * always be consistent with the current state of the - * database regardless of the flushBeforeBackup - * parameter. - * - * Note - This method is not thread safe - * - * @throws RocksDBException thrown if a new backup could not be created - */ - public void createNewBackupWithMetadata(final RocksDB db, final String metadata, - final boolean flushBeforeBackup) throws RocksDBException { - assert (isOwningHandle()); - createNewBackupWithMetadata(nativeHandle_, db.nativeHandle_, metadata, flushBeforeBackup); - } - - /** - * Gets information about the available - * backups - * - * @return A list of information about each available backup - */ - public List getBackupInfo() { - assert (isOwningHandle()); - return getBackupInfo(nativeHandle_); - } - - /** - *

Returns a list of corrupted backup ids. If there - * is no corrupted backup the method will return an - * empty list.

- * - * @return array of backup ids as int ids. - */ - public int[] getCorruptedBackups() { - assert(isOwningHandle()); - return getCorruptedBackups(nativeHandle_); - } - - /** - *

Will delete all the files we don't need anymore. It will - * do the full scan of the files/ directory and delete all the - * files that are not referenced.

- * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void garbageCollect() throws RocksDBException { - assert(isOwningHandle()); - garbageCollect(nativeHandle_); - } - - /** - * Deletes old backups, keeping just the latest numBackupsToKeep - * - * @param numBackupsToKeep The latest n backups to keep - * - * @throws RocksDBException thrown if the old backups could not be deleted - */ - public void purgeOldBackups( - final int numBackupsToKeep) throws RocksDBException { - assert (isOwningHandle()); - purgeOldBackups(nativeHandle_, numBackupsToKeep); - } - - /** - * Deletes a backup - * - * @param backupId The id of the backup to delete - * - * @throws RocksDBException thrown if the backup could not be deleted - */ - public void deleteBackup(final int backupId) throws RocksDBException { - assert (isOwningHandle()); - deleteBackup(nativeHandle_, backupId); - } - - /** - * Restore the database from a backup - * - * IMPORTANT: if options.share_table_files == true and you restore the DB - * from some backup that is not the latest, and you start creating new - * backups from the new DB, they will probably fail! - * - * Example: Let's say you have backups 1, 2, 3, 4, 5 and you restore 3. - * If you add new data to the DB and try creating a new backup now, the - * database will diverge from backups 4 and 5 and the new backup will fail. - * If you want to create new backup, you will first have to delete backups 4 - * and 5. - * - * @param backupId The id of the backup to restore - * @param dbDir The directory to restore the backup to, i.e. where your - * database is - * @param walDir The location of the log files for your database, - * often the same as dbDir - * @param restoreOptions Options for controlling the restore - * - * @throws RocksDBException thrown if the database could not be restored - */ - public void restoreDbFromBackup( - final int backupId, final String dbDir, final String walDir, - final RestoreOptions restoreOptions) throws RocksDBException { - assert (isOwningHandle()); - restoreDbFromBackup(nativeHandle_, backupId, dbDir, walDir, - restoreOptions.nativeHandle_); - } - - /** - * Restore the database from the latest backup - * - * @param dbDir The directory to restore the backup to, i.e. where your - * database is - * @param walDir The location of the log files for your database, often the - * same as dbDir - * @param restoreOptions Options for controlling the restore - * - * @throws RocksDBException thrown if the database could not be restored - */ - public void restoreDbFromLatestBackup( - final String dbDir, final String walDir, - final RestoreOptions restoreOptions) throws RocksDBException { - assert (isOwningHandle()); - restoreDbFromLatestBackup(nativeHandle_, dbDir, walDir, - restoreOptions.nativeHandle_); - } - - private native static long open(final long env, final long backupEngineOptions) - throws RocksDBException; - - private native void createNewBackup(final long handle, final long dbHandle, - final boolean flushBeforeBackup) throws RocksDBException; - - private native void createNewBackupWithMetadata(final long handle, final long dbHandle, - final String metadata, final boolean flushBeforeBackup) throws RocksDBException; - - private native List getBackupInfo(final long handle); - - private native int[] getCorruptedBackups(final long handle); - - private native void garbageCollect(final long handle) throws RocksDBException; - - private native void purgeOldBackups(final long handle, - final int numBackupsToKeep) throws RocksDBException; - - private native void deleteBackup(final long handle, final int backupId) - throws RocksDBException; - - private native void restoreDbFromBackup(final long handle, final int backupId, - final String dbDir, final String walDir, final long restoreOptionsHandle) - throws RocksDBException; - - private native void restoreDbFromLatestBackup(final long handle, - final String dbDir, final String walDir, final long restoreOptionsHandle) - throws RocksDBException; - - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/BackupEngineOptions.java b/java/src/main/java/org/rocksdb/BackupEngineOptions.java deleted file mode 100644 index 6e2dacc02..000000000 --- a/java/src/main/java/org/rocksdb/BackupEngineOptions.java +++ /dev/null @@ -1,458 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.io.File; - -/** - *

BackupEngineOptions controls the behavior of a - * {@link org.rocksdb.BackupEngine}. - *

- *

Note that dispose() must be called before an Options instance - * become out-of-scope to release the allocated memory in c++.

- * - * @see org.rocksdb.BackupEngine - */ -public class BackupEngineOptions extends RocksObject { - private Env backupEnv = null; - private Logger infoLog = null; - private RateLimiter backupRateLimiter = null; - private RateLimiter restoreRateLimiter = null; - - /** - *

BackupEngineOptions constructor.

- * - * @param path Where to keep the backup files. Has to be different than db - * name. Best to set this to {@code db name_ + "/backups"} - * @throws java.lang.IllegalArgumentException if illegal path is used. - */ - public BackupEngineOptions(final String path) { - super(newBackupEngineOptions(ensureWritableFile(path))); - } - - private static String ensureWritableFile(final String path) { - final File backupPath = path == null ? null : new File(path); - if (backupPath == null || !backupPath.isDirectory() || - !backupPath.canWrite()) { - throw new IllegalArgumentException("Illegal path provided."); - } else { - return path; - } - } - - /** - *

Returns the path to the BackupEngine directory.

- * - * @return the path to the BackupEngine directory. - */ - public String backupDir() { - assert(isOwningHandle()); - return backupDir(nativeHandle_); - } - - /** - * Backup Env object. It will be used for backup file I/O. If it's - * null, backups will be written out using DBs Env. Otherwise - * backup's I/O will be performed using this object. - * - * Default: null - * - * @param env The environment to use - * @return instance of current BackupEngineOptions. - */ - public BackupEngineOptions setBackupEnv(final Env env) { - assert(isOwningHandle()); - setBackupEnv(nativeHandle_, env.nativeHandle_); - this.backupEnv = env; - return this; - } - - /** - * Backup Env object. It will be used for backup file I/O. If it's - * null, backups will be written out using DBs Env. Otherwise - * backup's I/O will be performed using this object. - * - * Default: null - * - * @return The environment in use - */ - public Env backupEnv() { - return this.backupEnv; - } - - /** - *

Share table files between backups.

- * - * @param shareTableFiles If {@code share_table_files == true}, backup will - * assume that table files with same name have the same contents. This - * enables incremental backups and avoids unnecessary data copies. If - * {@code share_table_files == false}, each backup will be on its own and - * will not share any data with other backups. - * - *

Default: true

- * - * @return instance of current BackupEngineOptions. - */ - public BackupEngineOptions setShareTableFiles(final boolean shareTableFiles) { - assert(isOwningHandle()); - setShareTableFiles(nativeHandle_, shareTableFiles); - return this; - } - - /** - *

Share table files between backups.

- * - * @return boolean value indicating if SST files will be shared between - * backups. - */ - public boolean shareTableFiles() { - assert(isOwningHandle()); - return shareTableFiles(nativeHandle_); - } - - /** - * Set the logger to use for Backup info and error messages - * - * @param logger The logger to use for the backup - * @return instance of current BackupEngineOptions. - */ - public BackupEngineOptions setInfoLog(final Logger logger) { - assert(isOwningHandle()); - setInfoLog(nativeHandle_, logger.nativeHandle_); - this.infoLog = logger; - return this; - } - - /** - * Set the logger to use for Backup info and error messages - * - * Default: null - * - * @return The logger in use for the backup - */ - public Logger infoLog() { - return this.infoLog; - } - - /** - *

Set synchronous backups.

- * - * @param sync If {@code sync == true}, we can guarantee you'll get consistent - * backup even on a machine crash/reboot. Backup process is slower with sync - * enabled. If {@code sync == false}, we don't guarantee anything on machine - * reboot. However, chances are some of the backups are consistent. - * - *

Default: true

- * - * @return instance of current BackupEngineOptions. - */ - public BackupEngineOptions setSync(final boolean sync) { - assert(isOwningHandle()); - setSync(nativeHandle_, sync); - return this; - } - - /** - *

Are synchronous backups activated.

- * - * @return boolean value if synchronous backups are configured. - */ - public boolean sync() { - assert(isOwningHandle()); - return sync(nativeHandle_); - } - - /** - *

Set if old data will be destroyed.

- * - * @param destroyOldData If true, it will delete whatever backups there are - * already. - * - *

Default: false

- * - * @return instance of current BackupEngineOptions. - */ - public BackupEngineOptions setDestroyOldData(final boolean destroyOldData) { - assert(isOwningHandle()); - setDestroyOldData(nativeHandle_, destroyOldData); - return this; - } - - /** - *

Returns if old data will be destroyed will performing new backups.

- * - * @return boolean value indicating if old data will be destroyed. - */ - public boolean destroyOldData() { - assert(isOwningHandle()); - return destroyOldData(nativeHandle_); - } - - /** - *

Set if log files shall be persisted.

- * - * @param backupLogFiles If false, we won't backup log files. This option can - * be useful for backing up in-memory databases where log file are - * persisted, but table files are in memory. - * - *

Default: true

- * - * @return instance of current BackupEngineOptions. - */ - public BackupEngineOptions setBackupLogFiles(final boolean backupLogFiles) { - assert(isOwningHandle()); - setBackupLogFiles(nativeHandle_, backupLogFiles); - return this; - } - - /** - *

Return information if log files shall be persisted.

- * - * @return boolean value indicating if log files will be persisted. - */ - public boolean backupLogFiles() { - assert(isOwningHandle()); - return backupLogFiles(nativeHandle_); - } - - /** - *

Set backup rate limit.

- * - * @param backupRateLimit Max bytes that can be transferred in a second during - * backup. If 0 or negative, then go as fast as you can. - * - *

Default: 0

- * - * @return instance of current BackupEngineOptions. - */ - public BackupEngineOptions setBackupRateLimit(long backupRateLimit) { - assert(isOwningHandle()); - backupRateLimit = (backupRateLimit <= 0) ? 0 : backupRateLimit; - setBackupRateLimit(nativeHandle_, backupRateLimit); - return this; - } - - /** - *

Return backup rate limit which described the max bytes that can be - * transferred in a second during backup.

- * - * @return numerical value describing the backup transfer limit in bytes per - * second. - */ - public long backupRateLimit() { - assert(isOwningHandle()); - return backupRateLimit(nativeHandle_); - } - - /** - * Backup rate limiter. Used to control transfer speed for backup. If this is - * not null, {@link #backupRateLimit()} is ignored. - * - * Default: null - * - * @param backupRateLimiter The rate limiter to use for the backup - * @return instance of current BackupEngineOptions. - */ - public BackupEngineOptions setBackupRateLimiter(final RateLimiter backupRateLimiter) { - assert(isOwningHandle()); - setBackupRateLimiter(nativeHandle_, backupRateLimiter.nativeHandle_); - this.backupRateLimiter = backupRateLimiter; - return this; - } - - /** - * Backup rate limiter. Used to control transfer speed for backup. If this is - * not null, {@link #backupRateLimit()} is ignored. - * - * Default: null - * - * @return The rate limiter in use for the backup - */ - public RateLimiter backupRateLimiter() { - assert(isOwningHandle()); - return this.backupRateLimiter; - } - - /** - *

Set restore rate limit.

- * - * @param restoreRateLimit Max bytes that can be transferred in a second - * during restore. If 0 or negative, then go as fast as you can. - * - *

Default: 0

- * - * @return instance of current BackupEngineOptions. - */ - public BackupEngineOptions setRestoreRateLimit(long restoreRateLimit) { - assert(isOwningHandle()); - restoreRateLimit = (restoreRateLimit <= 0) ? 0 : restoreRateLimit; - setRestoreRateLimit(nativeHandle_, restoreRateLimit); - return this; - } - - /** - *

Return restore rate limit which described the max bytes that can be - * transferred in a second during restore.

- * - * @return numerical value describing the restore transfer limit in bytes per - * second. - */ - public long restoreRateLimit() { - assert(isOwningHandle()); - return restoreRateLimit(nativeHandle_); - } - - /** - * Restore rate limiter. Used to control transfer speed during restore. If - * this is not null, {@link #restoreRateLimit()} is ignored. - * - * Default: null - * - * @param restoreRateLimiter The rate limiter to use during restore - * @return instance of current BackupEngineOptions. - */ - public BackupEngineOptions setRestoreRateLimiter(final RateLimiter restoreRateLimiter) { - assert(isOwningHandle()); - setRestoreRateLimiter(nativeHandle_, restoreRateLimiter.nativeHandle_); - this.restoreRateLimiter = restoreRateLimiter; - return this; - } - - /** - * Restore rate limiter. Used to control transfer speed during restore. If - * this is not null, {@link #restoreRateLimit()} is ignored. - * - * Default: null - * - * @return The rate limiter in use during restore - */ - public RateLimiter restoreRateLimiter() { - assert(isOwningHandle()); - return this.restoreRateLimiter; - } - - /** - *

Only used if share_table_files is set to true. If true, will consider - * that backups can come from different databases, hence a sst is not uniquely - * identified by its name, but by the triple (file name, crc32, file length) - *

- * - * @param shareFilesWithChecksum boolean value indicating if SST files are - * stored using the triple (file name, crc32, file length) and not its name. - * - *

Note: this is an experimental option, and you'll need to set it manually - * turn it on only if you know what you're doing*

- * - *

Default: false

- * - * @return instance of current BackupEngineOptions. - */ - public BackupEngineOptions setShareFilesWithChecksum(final boolean shareFilesWithChecksum) { - assert(isOwningHandle()); - setShareFilesWithChecksum(nativeHandle_, shareFilesWithChecksum); - return this; - } - - /** - *

Return of share files with checksum is active.

- * - * @return boolean value indicating if share files with checksum - * is active. - */ - public boolean shareFilesWithChecksum() { - assert(isOwningHandle()); - return shareFilesWithChecksum(nativeHandle_); - } - - /** - * Up to this many background threads will copy files for - * {@link BackupEngine#createNewBackup(RocksDB, boolean)} and - * {@link BackupEngine#restoreDbFromBackup(int, String, String, RestoreOptions)} - * - * Default: 1 - * - * @param maxBackgroundOperations The maximum number of background threads - * @return instance of current BackupEngineOptions. - */ - public BackupEngineOptions setMaxBackgroundOperations(final int maxBackgroundOperations) { - assert(isOwningHandle()); - setMaxBackgroundOperations(nativeHandle_, maxBackgroundOperations); - return this; - } - - /** - * Up to this many background threads will copy files for - * {@link BackupEngine#createNewBackup(RocksDB, boolean)} and - * {@link BackupEngine#restoreDbFromBackup(int, String, String, RestoreOptions)} - * - * Default: 1 - * - * @return The maximum number of background threads - */ - public int maxBackgroundOperations() { - assert(isOwningHandle()); - return maxBackgroundOperations(nativeHandle_); - } - - /** - * During backup user can get callback every time next - * {@link #callbackTriggerIntervalSize()} bytes being copied. - * - * Default: 4194304 - * - * @param callbackTriggerIntervalSize The interval size for the - * callback trigger - * @return instance of current BackupEngineOptions. - */ - public BackupEngineOptions setCallbackTriggerIntervalSize( - final long callbackTriggerIntervalSize) { - assert(isOwningHandle()); - setCallbackTriggerIntervalSize(nativeHandle_, callbackTriggerIntervalSize); - return this; - } - - /** - * During backup user can get callback every time next - * {@link #callbackTriggerIntervalSize()} bytes being copied. - * - * Default: 4194304 - * - * @return The interval size for the callback trigger - */ - public long callbackTriggerIntervalSize() { - assert(isOwningHandle()); - return callbackTriggerIntervalSize(nativeHandle_); - } - - private native static long newBackupEngineOptions(final String path); - private native String backupDir(long handle); - private native void setBackupEnv(final long handle, final long envHandle); - private native void setShareTableFiles(long handle, boolean flag); - private native boolean shareTableFiles(long handle); - private native void setInfoLog(final long handle, final long infoLogHandle); - private native void setSync(long handle, boolean flag); - private native boolean sync(long handle); - private native void setDestroyOldData(long handle, boolean flag); - private native boolean destroyOldData(long handle); - private native void setBackupLogFiles(long handle, boolean flag); - private native boolean backupLogFiles(long handle); - private native void setBackupRateLimit(long handle, long rateLimit); - private native long backupRateLimit(long handle); - private native void setBackupRateLimiter(long handle, long rateLimiterHandle); - private native void setRestoreRateLimit(long handle, long rateLimit); - private native long restoreRateLimit(long handle); - private native void setRestoreRateLimiter(final long handle, - final long rateLimiterHandle); - private native void setShareFilesWithChecksum(long handle, boolean flag); - private native boolean shareFilesWithChecksum(long handle); - private native void setMaxBackgroundOperations(final long handle, - final int maxBackgroundOperations); - private native int maxBackgroundOperations(final long handle); - private native void setCallbackTriggerIntervalSize(final long handle, - long callbackTriggerIntervalSize); - private native long callbackTriggerIntervalSize(final long handle); - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/BackupInfo.java b/java/src/main/java/org/rocksdb/BackupInfo.java deleted file mode 100644 index 9244e4eb1..000000000 --- a/java/src/main/java/org/rocksdb/BackupInfo.java +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -/** - * Instances of this class describe a Backup made by - * {@link org.rocksdb.BackupEngine}. - */ -public class BackupInfo { - - /** - * Package private constructor used to create instances - * of BackupInfo by {@link org.rocksdb.BackupEngine} - * - * @param backupId id of backup - * @param timestamp timestamp of backup - * @param size size of backup - * @param numberFiles number of files related to this backup. - */ - BackupInfo(final int backupId, final long timestamp, final long size, final int numberFiles, - final String app_metadata) { - backupId_ = backupId; - timestamp_ = timestamp; - size_ = size; - numberFiles_ = numberFiles; - app_metadata_ = app_metadata; - } - - /** - * - * @return the backup id. - */ - public int backupId() { - return backupId_; - } - - /** - * - * @return the timestamp of the backup. - */ - public long timestamp() { - return timestamp_; - } - - /** - * - * @return the size of the backup - */ - public long size() { - return size_; - } - - /** - * - * @return the number of files of this backup. - */ - public int numberFiles() { - return numberFiles_; - } - - /** - * - * @return the associated application metadata, or null - */ - public String appMetadata() { - return app_metadata_; - } - - private int backupId_; - private long timestamp_; - private long size_; - private int numberFiles_; - private String app_metadata_; -} diff --git a/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java b/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java deleted file mode 100644 index 9300468b0..000000000 --- a/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java +++ /dev/null @@ -1,951 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -/** - * The config for plain table sst format. - * - * BlockBasedTable is a RocksDB's default SST file format. - */ -//TODO(AR) should be renamed BlockBasedTableOptions -public class BlockBasedTableConfig extends TableFormatConfig { - - public BlockBasedTableConfig() { - //TODO(AR) flushBlockPolicyFactory - cacheIndexAndFilterBlocks = false; - cacheIndexAndFilterBlocksWithHighPriority = true; - pinL0FilterAndIndexBlocksInCache = false; - pinTopLevelIndexAndFilter = true; - indexType = IndexType.kBinarySearch; - dataBlockIndexType = DataBlockIndexType.kDataBlockBinarySearch; - dataBlockHashTableUtilRatio = 0.75; - checksumType = ChecksumType.kCRC32c; - noBlockCache = false; - blockCache = null; - persistentCache = null; - blockSize = 4 * 1024; - blockSizeDeviation = 10; - blockRestartInterval = 16; - indexBlockRestartInterval = 1; - metadataBlockSize = 4096; - partitionFilters = false; - optimizeFiltersForMemory = false; - useDeltaEncoding = true; - filterPolicy = null; - wholeKeyFiltering = true; - verifyCompression = false; - readAmpBytesPerBit = 0; - formatVersion = 5; - enableIndexCompression = true; - blockAlign = false; - indexShortening = IndexShorteningMode.kShortenSeparators; - - // NOTE: ONLY used if blockCache == null - blockCacheSize = 8 * 1024 * 1024; - blockCacheNumShardBits = 0; - } - - /** - * Indicating if we'd put index/filter blocks to the block cache. - * If not specified, each "table reader" object will pre-load index/filter - * block during table initialization. - * - * @return if index and filter blocks should be put in block cache. - */ - public boolean cacheIndexAndFilterBlocks() { - return cacheIndexAndFilterBlocks; - } - - /** - * Indicating if we'd put index/filter blocks to the block cache. - * If not specified, each "table reader" object will pre-load index/filter - * block during table initialization. - * - * @param cacheIndexAndFilterBlocks and filter blocks should be put in block cache. - * @return the reference to the current config. - */ - public BlockBasedTableConfig setCacheIndexAndFilterBlocks( - final boolean cacheIndexAndFilterBlocks) { - this.cacheIndexAndFilterBlocks = cacheIndexAndFilterBlocks; - return this; - } - - /** - * Indicates if index and filter blocks will be treated as high-priority in the block cache. - * See note below about applicability. If not specified, defaults to true. - * - * @return if index and filter blocks will be treated as high-priority. - */ - public boolean cacheIndexAndFilterBlocksWithHighPriority() { - return cacheIndexAndFilterBlocksWithHighPriority; - } - - /** - * If true, cache index and filter blocks with high priority. If set to true, - * depending on implementation of block cache, index and filter blocks may be - * less likely to be evicted than data blocks. - * - * @param cacheIndexAndFilterBlocksWithHighPriority if index and filter blocks - * will be treated as high-priority. - * @return the reference to the current config. - */ - public BlockBasedTableConfig setCacheIndexAndFilterBlocksWithHighPriority( - final boolean cacheIndexAndFilterBlocksWithHighPriority) { - this.cacheIndexAndFilterBlocksWithHighPriority = cacheIndexAndFilterBlocksWithHighPriority; - return this; - } - - /** - * Indicating if we'd like to pin L0 index/filter blocks to the block cache. - If not specified, defaults to false. - * - * @return if L0 index and filter blocks should be pinned to the block cache. - */ - public boolean pinL0FilterAndIndexBlocksInCache() { - return pinL0FilterAndIndexBlocksInCache; - } - - /** - * Indicating if we'd like to pin L0 index/filter blocks to the block cache. - If not specified, defaults to false. - * - * @param pinL0FilterAndIndexBlocksInCache pin blocks in block cache - * @return the reference to the current config. - */ - public BlockBasedTableConfig setPinL0FilterAndIndexBlocksInCache( - final boolean pinL0FilterAndIndexBlocksInCache) { - this.pinL0FilterAndIndexBlocksInCache = pinL0FilterAndIndexBlocksInCache; - return this; - } - - /** - * Indicates if top-level index and filter blocks should be pinned. - * - * @return if top-level index and filter blocks should be pinned. - */ - public boolean pinTopLevelIndexAndFilter() { - return pinTopLevelIndexAndFilter; - } - - /** - * If cacheIndexAndFilterBlocks is true and the below is true, then - * the top-level index of partitioned filter and index blocks are stored in - * the cache, but a reference is held in the "table reader" object so the - * blocks are pinned and only evicted from cache when the table reader is - * freed. This is not limited to l0 in LSM tree. - * - * @param pinTopLevelIndexAndFilter if top-level index and filter blocks should be pinned. - * @return the reference to the current config. - */ - public BlockBasedTableConfig setPinTopLevelIndexAndFilter(final boolean pinTopLevelIndexAndFilter) { - this.pinTopLevelIndexAndFilter = pinTopLevelIndexAndFilter; - return this; - } - - /** - * Get the index type. - * - * @return the currently set index type - */ - public IndexType indexType() { - return indexType; - } - - /** - * Sets the index type to used with this table. - * - * @param indexType {@link org.rocksdb.IndexType} value - * @return the reference to the current option. - */ - public BlockBasedTableConfig setIndexType( - final IndexType indexType) { - this.indexType = indexType; - return this; - } - - /** - * Get the data block index type. - * - * @return the currently set data block index type - */ - public DataBlockIndexType dataBlockIndexType() { - return dataBlockIndexType; - } - - /** - * Sets the data block index type to used with this table. - * - * @param dataBlockIndexType {@link org.rocksdb.DataBlockIndexType} value - * @return the reference to the current option. - */ - public BlockBasedTableConfig setDataBlockIndexType( - final DataBlockIndexType dataBlockIndexType) { - this.dataBlockIndexType = dataBlockIndexType; - return this; - } - - /** - * Get the #entries/#buckets. It is valid only when {@link #dataBlockIndexType()} is - * {@link DataBlockIndexType#kDataBlockBinaryAndHash}. - * - * @return the #entries/#buckets. - */ - public double dataBlockHashTableUtilRatio() { - return dataBlockHashTableUtilRatio; - } - - /** - * Set the #entries/#buckets. It is valid only when {@link #dataBlockIndexType()} is - * {@link DataBlockIndexType#kDataBlockBinaryAndHash}. - * - * @param dataBlockHashTableUtilRatio #entries/#buckets - * @return the reference to the current option. - */ - public BlockBasedTableConfig setDataBlockHashTableUtilRatio( - final double dataBlockHashTableUtilRatio) { - this.dataBlockHashTableUtilRatio = dataBlockHashTableUtilRatio; - return this; - } - - /** - * Get the checksum type to be used with this table. - * - * @return the currently set checksum type - */ - public ChecksumType checksumType() { - return checksumType; - } - - /** - * Sets - * - * @param checksumType {@link org.rocksdb.ChecksumType} value. - * @return the reference to the current option. - */ - public BlockBasedTableConfig setChecksumType( - final ChecksumType checksumType) { - this.checksumType = checksumType; - return this; - } - - /** - * Determine if the block cache is disabled. - * - * @return if block cache is disabled - */ - public boolean noBlockCache() { - return noBlockCache; - } - - /** - * Disable block cache. If this is set to true, - * then no block cache should be used, and the {@link #setBlockCache(Cache)} - * should point to a {@code null} object. - * - * Default: false - * - * @param noBlockCache if use block cache - * @return the reference to the current config. - */ - public BlockBasedTableConfig setNoBlockCache(final boolean noBlockCache) { - this.noBlockCache = noBlockCache; - return this; - } - - /** - * Use the specified cache for blocks. - * When not null this take precedence even if the user sets a block cache size. - * - * {@link org.rocksdb.Cache} should not be disposed before options instances - * using this cache is disposed. - * - * {@link org.rocksdb.Cache} instance can be re-used in multiple options - * instances. - * - * @param blockCache {@link org.rocksdb.Cache} Cache java instance - * (e.g. LRUCache). - * - * @return the reference to the current config. - */ - public BlockBasedTableConfig setBlockCache(final Cache blockCache) { - this.blockCache = blockCache; - return this; - } - - /** - * Use the specified persistent cache. - * - * If {@code !null} use the specified cache for pages read from device, - * otherwise no page cache is used. - * - * @param persistentCache the persistent cache - * - * @return the reference to the current config. - */ - public BlockBasedTableConfig setPersistentCache( - final PersistentCache persistentCache) { - this.persistentCache = persistentCache; - return this; - } - - /** - * Get the approximate size of user data packed per block. - * - * @return block size in bytes - */ - public long blockSize() { - return blockSize; - } - - /** - * Approximate size of user data packed per block. Note that the - * block size specified here corresponds to uncompressed data. The - * actual size of the unit read from disk may be smaller if - * compression is enabled. This parameter can be changed dynamically. - * Default: 4K - * - * @param blockSize block size in bytes - * @return the reference to the current config. - */ - public BlockBasedTableConfig setBlockSize(final long blockSize) { - this.blockSize = blockSize; - return this; - } - - /** - * @return the hash table ratio. - */ - public int blockSizeDeviation() { - return blockSizeDeviation; - } - - /** - * This is used to close a block before it reaches the configured - * {@link #blockSize()}. If the percentage of free space in the current block - * is less than this specified number and adding a new record to the block - * will exceed the configured block size, then this block will be closed and - * the new record will be written to the next block. - * - * Default is 10. - * - * @param blockSizeDeviation the deviation to block size allowed - * @return the reference to the current config. - */ - public BlockBasedTableConfig setBlockSizeDeviation( - final int blockSizeDeviation) { - this.blockSizeDeviation = blockSizeDeviation; - return this; - } - - /** - * Get the block restart interval. - * - * @return block restart interval - */ - public int blockRestartInterval() { - return blockRestartInterval; - } - - /** - * Set the block restart interval. - * - * @param restartInterval block restart interval. - * @return the reference to the current config. - */ - public BlockBasedTableConfig setBlockRestartInterval( - final int restartInterval) { - blockRestartInterval = restartInterval; - return this; - } - - /** - * Get the index block restart interval. - * - * @return index block restart interval - */ - public int indexBlockRestartInterval() { - return indexBlockRestartInterval; - } - - /** - * Set the index block restart interval - * - * @param restartInterval index block restart interval. - * @return the reference to the current config. - */ - public BlockBasedTableConfig setIndexBlockRestartInterval( - final int restartInterval) { - indexBlockRestartInterval = restartInterval; - return this; - } - - /** - * Get the block size for partitioned metadata. - * - * @return block size for partitioned metadata. - */ - public long metadataBlockSize() { - return metadataBlockSize; - } - - /** - * Set block size for partitioned metadata. - * - * @param metadataBlockSize Partitioned metadata block size. - * @return the reference to the current config. - */ - public BlockBasedTableConfig setMetadataBlockSize( - final long metadataBlockSize) { - this.metadataBlockSize = metadataBlockSize; - return this; - } - - /** - * Indicates if we're using partitioned filters. - * - * @return if we're using partition filters. - */ - public boolean partitionFilters() { - return partitionFilters; - } - - /** - * Use partitioned full filters for each SST file. This option is incompatible - * with block-based filters. - * - * Defaults to false. - * - * @param partitionFilters use partition filters. - * @return the reference to the current config. - */ - public BlockBasedTableConfig setPartitionFilters(final boolean partitionFilters) { - this.partitionFilters = partitionFilters; - return this; - } - - /*** - * Option to generate Bloom filters that minimize memory - * internal fragmentation. - * - * See {@link #setOptimizeFiltersForMemory(boolean)}. - * - * @return true if bloom filters are used to minimize memory internal - * fragmentation - */ - @Experimental("Option to generate Bloom filters that minimize memory internal fragmentation") - public boolean optimizeFiltersForMemory() { - return optimizeFiltersForMemory; - } - - /** - * Option to generate Bloom filters that minimize memory - * internal fragmentation. - * - * When false, malloc_usable_size is not available, or format_version < 5, - * filters are generated without regard to internal fragmentation when - * loaded into memory (historical behavior). When true (and - * malloc_usable_size is available and {@link #formatVersion()} >= 5), - * then Bloom filters are generated to "round up" and "round down" their - * sizes to minimize internal fragmentation when loaded into memory, assuming - * the reading DB has the same memory allocation characteristics as the - * generating DB. This option does not break forward or backward - * compatibility. - * - * While individual filters will vary in bits/key and false positive rate - * when setting is true, the implementation attempts to maintain a weighted - * average FP rate for filters consistent with this option set to false. - * - * With Jemalloc for example, this setting is expected to save about 10% of - * the memory footprint and block cache charge of filters, while increasing - * disk usage of filters by about 1-2% due to encoding efficiency losses - * with variance in bits/key. - * - * NOTE: Because some memory counted by block cache might be unmapped pages - * within internal fragmentation, this option can increase observed RSS - * memory usage. With {@link #cacheIndexAndFilterBlocks()} == true, - * this option makes the block cache better at using space it is allowed. - * - * NOTE: Do not set to true if you do not trust malloc_usable_size. With - * this option, RocksDB might access an allocated memory object beyond its - * original size if malloc_usable_size says it is safe to do so. While this - * can be considered bad practice, it should not produce undefined behavior - * unless malloc_usable_size is buggy or broken. - * - * @param optimizeFiltersForMemory true to enable Bloom filters that minimize - * memory internal fragmentation, or false to disable. - * - * @return the reference to the current config. - */ - @Experimental("Option to generate Bloom filters that minimize memory internal fragmentation") - public BlockBasedTableConfig setOptimizeFiltersForMemory(final boolean optimizeFiltersForMemory) { - this.optimizeFiltersForMemory = optimizeFiltersForMemory; - return this; - } - - /** - * Determine if delta encoding is being used to compress block keys. - * - * @return true if delta encoding is enabled, false otherwise. - */ - public boolean useDeltaEncoding() { - return useDeltaEncoding; - } - - /** - * Use delta encoding to compress keys in blocks. - * - * NOTE: {@link ReadOptions#pinData()} requires this option to be disabled. - * - * Default: true - * - * @param useDeltaEncoding true to enable delta encoding - * - * @return the reference to the current config. - */ - public BlockBasedTableConfig setUseDeltaEncoding( - final boolean useDeltaEncoding) { - this.useDeltaEncoding = useDeltaEncoding; - return this; - } - - /** - * Get the filter policy. - * - * @return the current filter policy. - */ - public Filter filterPolicy() { - return filterPolicy; - } - - /** - * Use the specified filter policy to reduce disk reads. - * - * {@link org.rocksdb.Filter} should not be closed before options instances - * using this filter are closed. - * - * {@link org.rocksdb.Filter} instance can be re-used in multiple options - * instances. - * - * @param filterPolicy {@link org.rocksdb.Filter} Filter Policy java instance. - * @return the reference to the current config. - */ - public BlockBasedTableConfig setFilterPolicy( - final Filter filterPolicy) { - this.filterPolicy = filterPolicy; - return this; - } - - /** - * Set the filter. - * - * @param filter the filter - * @return the reference to the current config. - * - * @deprecated Use {@link #setFilterPolicy(Filter)} - */ - @Deprecated - public BlockBasedTableConfig setFilter( - final Filter filter) { - return setFilterPolicy(filter); - } - - /** - * Determine if whole keys as opposed to prefixes are placed in the filter. - * - * @return if whole key filtering is enabled - */ - public boolean wholeKeyFiltering() { - return wholeKeyFiltering; - } - - /** - * If true, place whole keys in the filter (not just prefixes). - * This must generally be true for gets to be efficient. - * Default: true - * - * @param wholeKeyFiltering if enable whole key filtering - * @return the reference to the current config. - */ - public BlockBasedTableConfig setWholeKeyFiltering( - final boolean wholeKeyFiltering) { - this.wholeKeyFiltering = wholeKeyFiltering; - return this; - } - - /** - * Returns true when compression verification is enabled. - * - * See {@link #setVerifyCompression(boolean)}. - * - * @return true if compression verification is enabled. - */ - public boolean verifyCompression() { - return verifyCompression; - } - - /** - * Verify that decompressing the compressed block gives back the input. This - * is a verification mode that we use to detect bugs in compression - * algorithms. - * - * @param verifyCompression true to enable compression verification. - * - * @return the reference to the current config. - */ - public BlockBasedTableConfig setVerifyCompression( - final boolean verifyCompression) { - this.verifyCompression = verifyCompression; - return this; - } - - /** - * Get the Read amplification bytes per-bit. - * - * See {@link #setReadAmpBytesPerBit(int)}. - * - * @return the bytes per-bit. - */ - public int readAmpBytesPerBit() { - return readAmpBytesPerBit; - } - - /** - * Set the Read amplification bytes per-bit. - * - * If used, For every data block we load into memory, we will create a bitmap - * of size ((block_size / `read_amp_bytes_per_bit`) / 8) bytes. This bitmap - * will be used to figure out the percentage we actually read of the blocks. - * - * When this feature is used Tickers::READ_AMP_ESTIMATE_USEFUL_BYTES and - * Tickers::READ_AMP_TOTAL_READ_BYTES can be used to calculate the - * read amplification using this formula - * (READ_AMP_TOTAL_READ_BYTES / READ_AMP_ESTIMATE_USEFUL_BYTES) - * - * value => memory usage (percentage of loaded blocks memory) - * 1 => 12.50 % - * 2 => 06.25 % - * 4 => 03.12 % - * 8 => 01.56 % - * 16 => 00.78 % - * - * Note: This number must be a power of 2, if not it will be sanitized - * to be the next lowest power of 2, for example a value of 7 will be - * treated as 4, a value of 19 will be treated as 16. - * - * Default: 0 (disabled) - * - * @param readAmpBytesPerBit the bytes per-bit - * - * @return the reference to the current config. - */ - public BlockBasedTableConfig setReadAmpBytesPerBit(final int readAmpBytesPerBit) { - this.readAmpBytesPerBit = readAmpBytesPerBit; - return this; - } - - /** - * Get the format version. - * See {@link #setFormatVersion(int)}. - * - * @return the currently configured format version. - */ - public int formatVersion() { - return formatVersion; - } - - /** - *

We currently have five versions:

- * - *
    - *
  • 0 - This version is currently written - * out by all RocksDB's versions by default. Can be read by really old - * RocksDB's. Doesn't support changing checksum (default is CRC32).
  • - *
  • 1 - Can be read by RocksDB's versions since 3.0. - * Supports non-default checksum, like xxHash. It is written by RocksDB when - * BlockBasedTableOptions::checksum is something other than kCRC32c. (version - * 0 is silently upconverted)
  • - *
  • 2 - Can be read by RocksDB's versions since 3.10. - * Changes the way we encode compressed blocks with LZ4, BZip2 and Zlib - * compression. If you don't plan to run RocksDB before version 3.10, - * you should probably use this.
  • - *
  • 3 - Can be read by RocksDB's versions since 5.15. Changes the way we - * encode the keys in index blocks. If you don't plan to run RocksDB before - * version 5.15, you should probably use this. - * This option only affects newly written tables. When reading existing - * tables, the information about version is read from the footer.
  • - *
  • 4 - Can be read by RocksDB's versions since 5.16. Changes the way we - * encode the values in index blocks. If you don't plan to run RocksDB before - * version 5.16 and you are using index_block_restart_interval > 1, you should - * probably use this as it would reduce the index size. - * This option only affects newly written tables. When reading existing - * tables, the information about version is read from the footer.
  • - *
  • 5 - Can be read by RocksDB's versions since 6.6.0. - * Full and partitioned filters use a generally faster and more accurate - * Bloom filter implementation, with a different schema.
  • - *
- * - * @param formatVersion integer representing the version to be used. - * - * @return the reference to the current option. - */ - public BlockBasedTableConfig setFormatVersion( - final int formatVersion) { - assert (formatVersion >= 0); - this.formatVersion = formatVersion; - return this; - } - - /** - * Determine if index compression is enabled. - * - * See {@link #setEnableIndexCompression(boolean)}. - * - * @return true if index compression is enabled, false otherwise - */ - public boolean enableIndexCompression() { - return enableIndexCompression; - } - - /** - * Store index blocks on disk in compressed format. - * - * Changing this option to false will avoid the overhead of decompression - * if index blocks are evicted and read back. - * - * @param enableIndexCompression true to enable index compression, - * false to disable - * - * @return the reference to the current option. - */ - public BlockBasedTableConfig setEnableIndexCompression( - final boolean enableIndexCompression) { - this.enableIndexCompression = enableIndexCompression; - return this; - } - - /** - * Determines whether data blocks are aligned on the lesser of page size - * and block size. - * - * @return true if data blocks are aligned on the lesser of page size - * and block size. - */ - public boolean blockAlign() { - return blockAlign; - } - - /** - * Set whether data blocks should be aligned on the lesser of page size - * and block size. - * - * @param blockAlign true to align data blocks on the lesser of page size - * and block size. - * - * @return the reference to the current option. - */ - public BlockBasedTableConfig setBlockAlign(final boolean blockAlign) { - this.blockAlign = blockAlign; - return this; - } - - /** - * Get the index shortening mode. - * - * @return the index shortening mode. - */ - public IndexShorteningMode indexShortening() { - return indexShortening; - } - - /** - * Set the index shortening mode. - * - * See {@link IndexShorteningMode}. - * - * @param indexShortening the index shortening mode. - * - * @return the reference to the current option. - */ - public BlockBasedTableConfig setIndexShortening(final IndexShorteningMode indexShortening) { - this.indexShortening = indexShortening; - return this; - } - - /** - * Get the size of the cache in bytes that will be used by RocksDB. - * - * @return block cache size in bytes - */ - @Deprecated - public long blockCacheSize() { - return blockCacheSize; - } - - /** - * Set the size of the cache in bytes that will be used by RocksDB. - * If cacheSize is negative, then cache will not be used. - * DEFAULT: 8M - * - * @param blockCacheSize block cache size in bytes - * @return the reference to the current config. - * - * @deprecated Use {@link #setBlockCache(Cache)}. - */ - @Deprecated - public BlockBasedTableConfig setBlockCacheSize(final long blockCacheSize) { - this.blockCacheSize = blockCacheSize; - return this; - } - - /** - * Returns the number of shard bits used in the block cache. - * The resulting number of shards would be 2 ^ (returned value). - * Any negative number means use default settings. - * - * @return the number of shard bits used in the block cache. - */ - @Deprecated - public int cacheNumShardBits() { - return blockCacheNumShardBits; - } - - /** - * Controls the number of shards for the block cache. - * This is applied only if cacheSize is set to non-negative. - * - * @param blockCacheNumShardBits the number of shard bits. The resulting - * number of shards would be 2 ^ numShardBits. Any negative - * number means use default settings." - * @return the reference to the current option. - * - * @deprecated Use {@link #setBlockCache(Cache)}. - */ - @Deprecated - public BlockBasedTableConfig setCacheNumShardBits( - final int blockCacheNumShardBits) { - this.blockCacheNumShardBits = blockCacheNumShardBits; - return this; - } - - /** - * Influence the behavior when kHashSearch is used. - * if false, stores a precise prefix to block range mapping - * if true, does not store prefix and allows prefix hash collision - * (less memory consumption) - * - * @return if hash collisions should be allowed. - * - * @deprecated This option is now deprecated. No matter what value it - * is set to, it will behave as - * if {@link #hashIndexAllowCollision()} == true. - */ - @Deprecated - public boolean hashIndexAllowCollision() { - return true; - } - - /** - * Influence the behavior when kHashSearch is used. - * if false, stores a precise prefix to block range mapping - * if true, does not store prefix and allows prefix hash collision - * (less memory consumption) - * - * @param hashIndexAllowCollision points out if hash collisions should be allowed. - * - * @return the reference to the current config. - * - * @deprecated This option is now deprecated. No matter what value it - * is set to, it will behave as - * if {@link #hashIndexAllowCollision()} == true. - */ - @Deprecated - public BlockBasedTableConfig setHashIndexAllowCollision( - final boolean hashIndexAllowCollision) { - // no-op - return this; - } - - @Override protected long newTableFactoryHandle() { - final long filterPolicyHandle; - if (filterPolicy != null) { - filterPolicyHandle = filterPolicy.nativeHandle_; - } else { - filterPolicyHandle = 0; - } - - final long blockCacheHandle; - if (blockCache != null) { - blockCacheHandle = blockCache.nativeHandle_; - } else { - blockCacheHandle = 0; - } - - final long persistentCacheHandle; - if (persistentCache != null) { - persistentCacheHandle = persistentCache.nativeHandle_; - } else { - persistentCacheHandle = 0; - } - - return newTableFactoryHandle(cacheIndexAndFilterBlocks, - cacheIndexAndFilterBlocksWithHighPriority, pinL0FilterAndIndexBlocksInCache, - pinTopLevelIndexAndFilter, indexType.getValue(), dataBlockIndexType.getValue(), - dataBlockHashTableUtilRatio, checksumType.getValue(), noBlockCache, blockCacheHandle, - persistentCacheHandle, blockSize, blockSizeDeviation, blockRestartInterval, - indexBlockRestartInterval, metadataBlockSize, partitionFilters, optimizeFiltersForMemory, - useDeltaEncoding, filterPolicyHandle, wholeKeyFiltering, verifyCompression, - readAmpBytesPerBit, formatVersion, enableIndexCompression, blockAlign, - indexShortening.getValue(), blockCacheSize, blockCacheNumShardBits); - } - - private native long newTableFactoryHandle(final boolean cacheIndexAndFilterBlocks, - final boolean cacheIndexAndFilterBlocksWithHighPriority, - final boolean pinL0FilterAndIndexBlocksInCache, final boolean pinTopLevelIndexAndFilter, - final byte indexTypeValue, final byte dataBlockIndexTypeValue, - final double dataBlockHashTableUtilRatio, final byte checksumTypeValue, - final boolean noBlockCache, final long blockCacheHandle, final long persistentCacheHandle, - final long blockSize, final int blockSizeDeviation, final int blockRestartInterval, - final int indexBlockRestartInterval, final long metadataBlockSize, - final boolean partitionFilters, final boolean optimizeFiltersForMemory, - final boolean useDeltaEncoding, final long filterPolicyHandle, - final boolean wholeKeyFiltering, final boolean verifyCompression, - final int readAmpBytesPerBit, final int formatVersion, final boolean enableIndexCompression, - final boolean blockAlign, final byte indexShortening, - - @Deprecated final long blockCacheSize, @Deprecated final int blockCacheNumShardBits); - - //TODO(AR) flushBlockPolicyFactory - private boolean cacheIndexAndFilterBlocks; - private boolean cacheIndexAndFilterBlocksWithHighPriority; - private boolean pinL0FilterAndIndexBlocksInCache; - private boolean pinTopLevelIndexAndFilter; - private IndexType indexType; - private DataBlockIndexType dataBlockIndexType; - private double dataBlockHashTableUtilRatio; - private ChecksumType checksumType; - private boolean noBlockCache; - private Cache blockCache; - private PersistentCache persistentCache; - private long blockSize; - private int blockSizeDeviation; - private int blockRestartInterval; - private int indexBlockRestartInterval; - private long metadataBlockSize; - private boolean partitionFilters; - private boolean optimizeFiltersForMemory; - private boolean useDeltaEncoding; - private Filter filterPolicy; - private boolean wholeKeyFiltering; - private boolean verifyCompression; - private int readAmpBytesPerBit; - private int formatVersion; - private boolean enableIndexCompression; - private boolean blockAlign; - private IndexShorteningMode indexShortening; - - // NOTE: ONLY used if blockCache == null - @Deprecated private long blockCacheSize; - @Deprecated private int blockCacheNumShardBits; -} diff --git a/java/src/main/java/org/rocksdb/BloomFilter.java b/java/src/main/java/org/rocksdb/BloomFilter.java deleted file mode 100644 index 8aff715b7..000000000 --- a/java/src/main/java/org/rocksdb/BloomFilter.java +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Bloom filter policy that uses a bloom filter with approximately - * the specified number of bits per key. - * - *

- * Note: if you are using a custom comparator that ignores some parts - * of the keys being compared, you must not use this {@code BloomFilter} - * and must provide your own FilterPolicy that also ignores the - * corresponding parts of the keys. For example, if the comparator - * ignores trailing spaces, it would be incorrect to use a - * FilterPolicy (like {@code BloomFilter}) that does not ignore - * trailing spaces in keys.

- */ -public class BloomFilter extends Filter { - - private static final double DEFAULT_BITS_PER_KEY = 10.0; - - /** - * BloomFilter constructor - * - *

- * Callers must delete the result after any database that is using the - * result has been closed.

- */ - public BloomFilter() { - this(DEFAULT_BITS_PER_KEY); - } - - /** - * BloomFilter constructor - * - *

- * bits_per_key: bits per key in bloom filter. A good value for bits_per_key - * is 9.9, which yields a filter with ~ 1% false positive rate. - *

- *

- * Callers must delete the result after any database that is using the - * result has been closed.

- * - * @param bitsPerKey number of bits to use - */ - public BloomFilter(final double bitsPerKey) { - super(createNewBloomFilter(bitsPerKey)); - } - - /** - * BloomFilter constructor - * - *

- * bits_per_key: bits per key in bloom filter. A good value for bits_per_key - * is 10, which yields a filter with ~ 1% false positive rate. - *

default bits_per_key: 10

- * - *

- * Callers must delete the result after any database that is using the - * result has been closed.

- * - * @param bitsPerKey number of bits to use - * @param IGNORED_useBlockBasedMode obsolete, ignored parameter - */ - public BloomFilter(final double bitsPerKey, final boolean IGNORED_useBlockBasedMode) { - this(bitsPerKey); - } - - private native static long createNewBloomFilter(final double bitsKeyKey); -} diff --git a/java/src/main/java/org/rocksdb/BuiltinComparator.java b/java/src/main/java/org/rocksdb/BuiltinComparator.java deleted file mode 100644 index 2c89bf218..000000000 --- a/java/src/main/java/org/rocksdb/BuiltinComparator.java +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Builtin RocksDB comparators - * - *
    - *
  1. BYTEWISE_COMPARATOR - Sorts all keys in ascending bytewise - * order.
  2. - *
  3. REVERSE_BYTEWISE_COMPARATOR - Sorts all keys in descending bytewise - * order
  4. - *
- */ -public enum BuiltinComparator { - BYTEWISE_COMPARATOR, REVERSE_BYTEWISE_COMPARATOR -} diff --git a/java/src/main/java/org/rocksdb/ByteBufferGetStatus.java b/java/src/main/java/org/rocksdb/ByteBufferGetStatus.java deleted file mode 100644 index 8eef95447..000000000 --- a/java/src/main/java/org/rocksdb/ByteBufferGetStatus.java +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.ByteBuffer; -import java.util.List; - -/** - * A ByteBuffer containing fetched data, together with a result for the fetch - * and the total size of the object fetched. - * - * Used for the individual results of - * {@link RocksDB#multiGetByteBuffers(List, List)} - * {@link RocksDB#multiGetByteBuffers(List, List, List)} - * {@link RocksDB#multiGetByteBuffers(ReadOptions, List, List)} - * {@link RocksDB#multiGetByteBuffers(ReadOptions, List, List, List)} - */ -public class ByteBufferGetStatus { - public final Status status; - public final int requiredSize; - public final ByteBuffer value; - - /** - * Constructor used for success status, when the value is contained in the buffer - * - * @param status the status of the request to fetch into the buffer - * @param requiredSize the size of the data, which may be bigger than the buffer - * @param value the buffer containing as much of the value as fits - */ - ByteBufferGetStatus(final Status status, final int requiredSize, final ByteBuffer value) { - this.status = status; - this.requiredSize = requiredSize; - this.value = value; - } - - /** - * Constructor used for a failure status, when no value is filled in - * - * @param status the status of the request to fetch into the buffer - */ - ByteBufferGetStatus(final Status status) { - this.status = status; - this.requiredSize = 0; - this.value = null; - } -} diff --git a/java/src/main/java/org/rocksdb/Cache.java b/java/src/main/java/org/rocksdb/Cache.java deleted file mode 100644 index 569a1df06..000000000 --- a/java/src/main/java/org/rocksdb/Cache.java +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - - -public abstract class Cache extends RocksObject { - protected Cache(final long nativeHandle) { - super(nativeHandle); - } - - /** - * Returns the memory size for the entries - * residing in cache. - * - * @return cache usage size. - * - */ - public long getUsage() { - assert (isOwningHandle()); - return getUsage(this.nativeHandle_); - } - - /** - * Returns the memory size for the entries - * being pinned in cache. - * - * @return cache pinned usage size. - * - */ - public long getPinnedUsage() { - assert (isOwningHandle()); - return getPinnedUsage(this.nativeHandle_); - } - - private native static long getUsage(final long handle); - private native static long getPinnedUsage(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/CassandraCompactionFilter.java b/java/src/main/java/org/rocksdb/CassandraCompactionFilter.java deleted file mode 100644 index 6c87cc188..000000000 --- a/java/src/main/java/org/rocksdb/CassandraCompactionFilter.java +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2017-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Just a Java wrapper around CassandraCompactionFilter implemented in C++ - */ -public class CassandraCompactionFilter - extends AbstractCompactionFilter { - public CassandraCompactionFilter(boolean purgeTtlOnExpiration, int gcGracePeriodInSeconds) { - super(createNewCassandraCompactionFilter0(purgeTtlOnExpiration, gcGracePeriodInSeconds)); - } - - private native static long createNewCassandraCompactionFilter0( - boolean purgeTtlOnExpiration, int gcGracePeriodInSeconds); -} diff --git a/java/src/main/java/org/rocksdb/CassandraValueMergeOperator.java b/java/src/main/java/org/rocksdb/CassandraValueMergeOperator.java deleted file mode 100644 index 4b0c71ba5..000000000 --- a/java/src/main/java/org/rocksdb/CassandraValueMergeOperator.java +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (c) 2017-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * CassandraValueMergeOperator is a merge operator that merges two cassandra wide column - * values. - */ -public class CassandraValueMergeOperator extends MergeOperator { - public CassandraValueMergeOperator(int gcGracePeriodInSeconds) { - super(newSharedCassandraValueMergeOperator(gcGracePeriodInSeconds, 0)); - } - - public CassandraValueMergeOperator(int gcGracePeriodInSeconds, int operandsLimit) { - super(newSharedCassandraValueMergeOperator(gcGracePeriodInSeconds, operandsLimit)); - } - - private native static long newSharedCassandraValueMergeOperator( - int gcGracePeriodInSeconds, int limit); - - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/Checkpoint.java b/java/src/main/java/org/rocksdb/Checkpoint.java deleted file mode 100644 index 000969932..000000000 --- a/java/src/main/java/org/rocksdb/Checkpoint.java +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Provides Checkpoint functionality. Checkpoints - * provide persistent snapshots of RocksDB databases. - */ -public class Checkpoint extends RocksObject { - - /** - * Creates a Checkpoint object to be used for creating open-able - * snapshots. - * - * @param db {@link RocksDB} instance. - * @return a Checkpoint instance. - * - * @throws java.lang.IllegalArgumentException if {@link RocksDB} - * instance is null. - * @throws java.lang.IllegalStateException if {@link RocksDB} - * instance is not initialized. - */ - public static Checkpoint create(final RocksDB db) { - if (db == null) { - throw new IllegalArgumentException( - "RocksDB instance shall not be null."); - } else if (!db.isOwningHandle()) { - throw new IllegalStateException( - "RocksDB instance must be initialized."); - } - Checkpoint checkpoint = new Checkpoint(db); - return checkpoint; - } - - /** - *

Builds an open-able snapshot of RocksDB on the same disk, which - * accepts an output directory on the same disk, and under the directory - * (1) hard-linked SST files pointing to existing live SST files - * (2) a copied manifest files and other files

- * - * @param checkpointPath path to the folder where the snapshot is going - * to be stored. - * @throws RocksDBException thrown if an error occurs within the native - * part of the library. - */ - public void createCheckpoint(final String checkpointPath) - throws RocksDBException { - createCheckpoint(nativeHandle_, checkpointPath); - } - - private Checkpoint(final RocksDB db) { - super(newCheckpoint(db.nativeHandle_)); - this.db_ = db; - } - - private final RocksDB db_; - - private static native long newCheckpoint(long dbHandle); - @Override protected final native void disposeInternal(final long handle); - - private native void createCheckpoint(long handle, String checkpointPath) - throws RocksDBException; -} diff --git a/java/src/main/java/org/rocksdb/ChecksumType.java b/java/src/main/java/org/rocksdb/ChecksumType.java deleted file mode 100644 index e03fa14ba..000000000 --- a/java/src/main/java/org/rocksdb/ChecksumType.java +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Checksum types used in conjunction with BlockBasedTable. - */ -public enum ChecksumType { - /** - * Not implemented yet. - */ - kNoChecksum((byte) 0), - /** - * CRC32 Checksum - */ - kCRC32c((byte) 1), - /** - * XX Hash - */ - kxxHash((byte) 2), - /** - * XX Hash 64 - */ - kxxHash64((byte) 3), - - kXXH3((byte) 4); - - /** - * Returns the byte value of the enumerations value - * - * @return byte representation - */ - public byte getValue() { - return value_; - } - - private ChecksumType(final byte value) { - value_ = value; - } - - private final byte value_; -} diff --git a/java/src/main/java/org/rocksdb/ClockCache.java b/java/src/main/java/org/rocksdb/ClockCache.java deleted file mode 100644 index a66dc0e8a..000000000 --- a/java/src/main/java/org/rocksdb/ClockCache.java +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Similar to {@link LRUCache}, but based on the CLOCK algorithm with - * better concurrent performance in some cases - */ -public class ClockCache extends Cache { - - /** - * Create a new cache with a fixed size capacity. - * - * @param capacity The fixed size capacity of the cache - */ - public ClockCache(final long capacity) { - super(newClockCache(capacity, -1, false)); - } - - /** - * Create a new cache with a fixed size capacity. The cache is sharded - * to 2^numShardBits shards, by hash of the key. The total capacity - * is divided and evenly assigned to each shard. - * numShardBits = -1 means it is automatically determined: every shard - * will be at least 512KB and number of shard bits will not exceed 6. - * - * @param capacity The fixed size capacity of the cache - * @param numShardBits The cache is sharded to 2^numShardBits shards, - * by hash of the key - */ - public ClockCache(final long capacity, final int numShardBits) { - super(newClockCache(capacity, numShardBits, false)); - } - - /** - * Create a new cache with a fixed size capacity. The cache is sharded - * to 2^numShardBits shards, by hash of the key. The total capacity - * is divided and evenly assigned to each shard. If strictCapacityLimit - * is set, insert to the cache will fail when cache is full. - * numShardBits = -1 means it is automatically determined: every shard - * will be at least 512KB and number of shard bits will not exceed 6. - * - * @param capacity The fixed size capacity of the cache - * @param numShardBits The cache is sharded to 2^numShardBits shards, - * by hash of the key - * @param strictCapacityLimit insert to the cache will fail when cache is full - */ - public ClockCache(final long capacity, final int numShardBits, - final boolean strictCapacityLimit) { - super(newClockCache(capacity, numShardBits, strictCapacityLimit)); - } - - private native static long newClockCache(final long capacity, - final int numShardBits, final boolean strictCapacityLimit); - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java b/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java deleted file mode 100644 index 125a8dcf8..000000000 --- a/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Arrays; - -/** - *

Describes a column family with a - * name and respective Options.

- */ -public class ColumnFamilyDescriptor { - - /** - *

Creates a new Column Family using a name and default - * options,

- * - * @param columnFamilyName name of column family. - * @since 3.10.0 - */ - public ColumnFamilyDescriptor(final byte[] columnFamilyName) { - this(columnFamilyName, new ColumnFamilyOptions()); - } - - /** - *

Creates a new Column Family using a name and custom - * options.

- * - * @param columnFamilyName name of column family. - * @param columnFamilyOptions options to be used with - * column family. - * @since 3.10.0 - */ - public ColumnFamilyDescriptor(final byte[] columnFamilyName, - final ColumnFamilyOptions columnFamilyOptions) { - columnFamilyName_ = columnFamilyName; - columnFamilyOptions_ = columnFamilyOptions; - } - - /** - * Retrieve name of column family. - * - * @return column family name. - * @since 3.10.0 - */ - public byte[] getName() { - return columnFamilyName_; - } - - /** - * Retrieve assigned options instance. - * - * @return Options instance assigned to this instance. - */ - public ColumnFamilyOptions getOptions() { - return columnFamilyOptions_; - } - - @Override - public boolean equals(final Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - final ColumnFamilyDescriptor that = (ColumnFamilyDescriptor) o; - return Arrays.equals(columnFamilyName_, that.columnFamilyName_) - && columnFamilyOptions_.nativeHandle_ == that.columnFamilyOptions_.nativeHandle_; - } - - @Override - public int hashCode() { - int result = (int) (columnFamilyOptions_.nativeHandle_ ^ (columnFamilyOptions_.nativeHandle_ >>> 32)); - result = 31 * result + Arrays.hashCode(columnFamilyName_); - return result; - } - - private final byte[] columnFamilyName_; - private final ColumnFamilyOptions columnFamilyOptions_; -} diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java b/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java deleted file mode 100644 index 1ac0a35bb..000000000 --- a/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Arrays; -import java.util.Objects; - -/** - * ColumnFamilyHandle class to hold handles to underlying rocksdb - * ColumnFamily Pointers. - */ -public class ColumnFamilyHandle extends RocksObject { - /** - * Constructs column family Java object, which operates on underlying native object. - * - * @param rocksDB db instance associated with this column family - * @param nativeHandle native handle to underlying native ColumnFamily object - */ - ColumnFamilyHandle(final RocksDB rocksDB, - final long nativeHandle) { - super(nativeHandle); - // rocksDB must point to a valid RocksDB instance; - assert(rocksDB != null); - // ColumnFamilyHandle must hold a reference to the related RocksDB instance - // to guarantee that while a GC cycle starts ColumnFamilyHandle instances - // are freed prior to RocksDB instances. - this.rocksDB_ = rocksDB; - } - - /** - * Constructor called only from JNI. - * - * NOTE: we are producing an additional Java Object here to represent the underlying native C++ - * ColumnFamilyHandle object. The underlying object is not owned by ourselves. The Java API user - * likely already had a ColumnFamilyHandle Java object which owns the underlying C++ object, as - * they will have been presented it when they opened the database or added a Column Family. - * - * - * TODO(AR) - Potentially a better design would be to cache the active Java Column Family Objects - * in RocksDB, and return the same Java Object instead of instantiating a new one here. This could - * also help us to improve the Java API semantics for Java users. See for example - * https://github.com/facebook/rocksdb/issues/2687. - * - * @param nativeHandle native handle to the column family. - */ - ColumnFamilyHandle(final long nativeHandle) { - super(nativeHandle); - rocksDB_ = null; - disOwnNativeHandle(); - } - - /** - * Gets the name of the Column Family. - * - * @return The name of the Column Family. - * - * @throws RocksDBException if an error occurs whilst retrieving the name. - */ - public byte[] getName() throws RocksDBException { - assert(isOwningHandle() || isDefaultColumnFamily()); - return getName(nativeHandle_); - } - - /** - * Gets the ID of the Column Family. - * - * @return the ID of the Column Family. - */ - public int getID() { - assert(isOwningHandle() || isDefaultColumnFamily()); - return getID(nativeHandle_); - } - - /** - * Gets the up-to-date descriptor of the column family - * associated with this handle. Since it fills "*desc" with the up-to-date - * information, this call might internally lock and release DB mutex to - * access the up-to-date CF options. In addition, all the pointer-typed - * options cannot be referenced any longer than the original options exist. - * - * Note that this function is not supported in RocksDBLite. - * - * @return the up-to-date descriptor. - * - * @throws RocksDBException if an error occurs whilst retrieving the - * descriptor. - */ - public ColumnFamilyDescriptor getDescriptor() throws RocksDBException { - assert(isOwningHandle() || isDefaultColumnFamily()); - return getDescriptor(nativeHandle_); - } - - @Override - public boolean equals(final Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - final ColumnFamilyHandle that = (ColumnFamilyHandle) o; - try { - return rocksDB_.nativeHandle_ == that.rocksDB_.nativeHandle_ && - getID() == that.getID() && - Arrays.equals(getName(), that.getName()); - } catch (RocksDBException e) { - throw new RuntimeException("Cannot compare column family handles", e); - } - } - - @Override - public int hashCode() { - try { - int result = Objects.hash(getID(), rocksDB_.nativeHandle_); - result = 31 * result + Arrays.hashCode(getName()); - return result; - } catch (RocksDBException e) { - throw new RuntimeException("Cannot calculate hash code of column family handle", e); - } - } - - protected boolean isDefaultColumnFamily() { - return nativeHandle_ == rocksDB_.getDefaultColumnFamily().nativeHandle_; - } - - /** - *

Deletes underlying C++ iterator pointer.

- * - *

Note: the underlying handle can only be safely deleted if the RocksDB - * instance related to a certain ColumnFamilyHandle is still valid and - * initialized. Therefore {@code disposeInternal()} checks if the RocksDB is - * initialized before freeing the native handle.

- */ - @Override - protected void disposeInternal() { - if(rocksDB_.isOwningHandle()) { - disposeInternal(nativeHandle_); - } - } - - private native byte[] getName(final long handle) throws RocksDBException; - private native int getID(final long handle); - private native ColumnFamilyDescriptor getDescriptor(final long handle) throws RocksDBException; - @Override protected final native void disposeInternal(final long handle); - - private final RocksDB rocksDB_; -} diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyMetaData.java b/java/src/main/java/org/rocksdb/ColumnFamilyMetaData.java deleted file mode 100644 index 191904017..000000000 --- a/java/src/main/java/org/rocksdb/ColumnFamilyMetaData.java +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Arrays; -import java.util.List; - -/** - * The metadata that describes a column family. - */ -public class ColumnFamilyMetaData { - private final long size; - private final long fileCount; - private final byte[] name; - private final LevelMetaData[] levels; - - /** - * Called from JNI C++ - */ - private ColumnFamilyMetaData( - final long size, - final long fileCount, - final byte[] name, - final LevelMetaData[] levels) { - this.size = size; - this.fileCount = fileCount; - this.name = name; - this.levels = levels; - } - - /** - * The size of this column family in bytes, which is equal to the sum of - * the file size of its {@link #levels()}. - * - * @return the size of this column family - */ - public long size() { - return size; - } - - /** - * The number of files in this column family. - * - * @return the number of files - */ - public long fileCount() { - return fileCount; - } - - /** - * The name of the column family. - * - * @return the name - */ - public byte[] name() { - return name; - } - - /** - * The metadata of all levels in this column family. - * - * @return the levels metadata - */ - public List levels() { - return Arrays.asList(levels); - } -} diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java b/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java deleted file mode 100644 index 65dfd328f..000000000 --- a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java +++ /dev/null @@ -1,1543 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.file.Paths; -import java.util.*; - -/** - * ColumnFamilyOptions to control the behavior of a database. It will be used - * during the creation of a {@link org.rocksdb.RocksDB} (i.e., RocksDB.open()). - *

- * As a descendant of {@link AbstractNativeReference}, this class is {@link AutoCloseable} - * and will be automatically released if opened in the preamble of a try with resources block. - */ -public class ColumnFamilyOptions extends RocksObject - implements ColumnFamilyOptionsInterface, - MutableColumnFamilyOptionsInterface { - static { - RocksDB.loadLibrary(); - } - - /** - * Construct ColumnFamilyOptions. - *

- * This constructor will create (by allocating a block of memory) - * an {@code rocksdb::ColumnFamilyOptions} in the c++ side. - */ - public ColumnFamilyOptions() { - super(newColumnFamilyOptions()); - } - - /** - * Copy constructor for ColumnFamilyOptions. - *

- * NOTE: This does a shallow copy, which means comparator, merge_operator, compaction_filter, - * compaction_filter_factory and other pointers will be cloned! - * - * @param other The ColumnFamilyOptions to copy. - */ - public ColumnFamilyOptions(final ColumnFamilyOptions other) { - super(copyColumnFamilyOptions(other.nativeHandle_)); - this.memTableConfig_ = other.memTableConfig_; - this.tableFormatConfig_ = other.tableFormatConfig_; - this.comparator_ = other.comparator_; - this.compactionFilter_ = other.compactionFilter_; - this.compactionFilterFactory_ = other.compactionFilterFactory_; - this.compactionOptionsUniversal_ = other.compactionOptionsUniversal_; - this.compactionOptionsFIFO_ = other.compactionOptionsFIFO_; - this.bottommostCompressionOptions_ = other.bottommostCompressionOptions_; - this.compressionOptions_ = other.compressionOptions_; - this.compactionThreadLimiter_ = other.compactionThreadLimiter_; - this.sstPartitionerFactory_ = other.sstPartitionerFactory_; - } - - /** - * Constructor from Options - * - * @param options The options. - */ - public ColumnFamilyOptions(final Options options) { - super(newColumnFamilyOptionsFromOptions(options.nativeHandle_)); - } - - /** - *

Constructor to be used by - * {@link #getColumnFamilyOptionsFromProps(java.util.Properties)}, - * {@link ColumnFamilyDescriptor#getOptions()} - * and also called via JNI.

- * - * @param handle native handle to ColumnFamilyOptions instance. - */ - ColumnFamilyOptions(final long handle) { - super(handle); - } - - /** - *

Method to get a options instance by using pre-configured - * property values. If one or many values are undefined in - * the context of RocksDB the method will return a null - * value.

- * - *

Note: Property keys can be derived from - * getter methods within the options class. Example: the method - * {@code writeBufferSize()} has a property key: - * {@code write_buffer_size}.

- * - * @param properties {@link java.util.Properties} instance. - * - * @return {@link org.rocksdb.ColumnFamilyOptions instance} - * or null. - * - * @throws java.lang.IllegalArgumentException if null or empty - * {@link Properties} instance is passed to the method call. - */ - public static ColumnFamilyOptions getColumnFamilyOptionsFromProps( - final Properties properties) { - ColumnFamilyOptions columnFamilyOptions = null; - final long handle = - getColumnFamilyOptionsFromProps(Options.getOptionStringFromProps(properties)); - if (handle != 0) { - columnFamilyOptions = new ColumnFamilyOptions(handle); - } - return columnFamilyOptions; - } - - /** - *

Method to get a options instance by using pre-configured - * property values. If one or many values are undefined in - * the context of RocksDB the method will return a null - * value.

- * - *

Note: Property keys can be derived from - * getter methods within the options class. Example: the method - * {@code writeBufferSize()} has a property key: - * {@code write_buffer_size}.

- * - * @param cfgOpts ConfigOptions controlling how the properties are parsed. - * @param properties {@link java.util.Properties} instance. - * - * @return {@link org.rocksdb.ColumnFamilyOptions instance} - * or null. - * - * @throws java.lang.IllegalArgumentException if null or empty - * {@link Properties} instance is passed to the method call. - */ - public static ColumnFamilyOptions getColumnFamilyOptionsFromProps( - final ConfigOptions cfgOpts, final Properties properties) { - ColumnFamilyOptions columnFamilyOptions = null; - final long handle = getColumnFamilyOptionsFromProps( - cfgOpts.nativeHandle_, Options.getOptionStringFromProps(properties)); - if (handle != 0){ - columnFamilyOptions = new ColumnFamilyOptions(handle); - } - return columnFamilyOptions; - } - - @Override - public ColumnFamilyOptions oldDefaults(final int majorVersion, final int minorVersion) { - oldDefaults(nativeHandle_, majorVersion, minorVersion); - return this; - } - - @Override - public ColumnFamilyOptions optimizeForSmallDb() { - optimizeForSmallDb(nativeHandle_); - return this; - } - - @Override - public ColumnFamilyOptions optimizeForSmallDb(final Cache cache) { - optimizeForSmallDb(nativeHandle_, cache.getNativeHandle()); - return this; - } - - @Override - public ColumnFamilyOptions optimizeForPointLookup( - final long blockCacheSizeMb) { - optimizeForPointLookup(nativeHandle_, - blockCacheSizeMb); - return this; - } - - @Override - public ColumnFamilyOptions optimizeLevelStyleCompaction() { - optimizeLevelStyleCompaction(nativeHandle_, - DEFAULT_COMPACTION_MEMTABLE_MEMORY_BUDGET); - return this; - } - - @Override - public ColumnFamilyOptions optimizeLevelStyleCompaction( - final long memtableMemoryBudget) { - optimizeLevelStyleCompaction(nativeHandle_, - memtableMemoryBudget); - return this; - } - - @Override - public ColumnFamilyOptions optimizeUniversalStyleCompaction() { - optimizeUniversalStyleCompaction(nativeHandle_, - DEFAULT_COMPACTION_MEMTABLE_MEMORY_BUDGET); - return this; - } - - @Override - public ColumnFamilyOptions optimizeUniversalStyleCompaction( - final long memtableMemoryBudget) { - optimizeUniversalStyleCompaction(nativeHandle_, - memtableMemoryBudget); - return this; - } - - @Override - public ColumnFamilyOptions setComparator( - final BuiltinComparator builtinComparator) { - assert(isOwningHandle()); - setComparatorHandle(nativeHandle_, builtinComparator.ordinal()); - return this; - } - - @Override - public ColumnFamilyOptions setComparator( - final AbstractComparator comparator) { - assert (isOwningHandle()); - setComparatorHandle(nativeHandle_, comparator.nativeHandle_, - comparator.getComparatorType().getValue()); - comparator_ = comparator; - return this; - } - - @Override - public ColumnFamilyOptions setMergeOperatorName(final String name) { - assert (isOwningHandle()); - if (name == null) { - throw new IllegalArgumentException( - "Merge operator name must not be null."); - } - setMergeOperatorName(nativeHandle_, name); - return this; - } - - @Override - public ColumnFamilyOptions setMergeOperator( - final MergeOperator mergeOperator) { - setMergeOperator(nativeHandle_, mergeOperator.nativeHandle_); - return this; - } - - @Override - public ColumnFamilyOptions setCompactionFilter( - final AbstractCompactionFilter> - compactionFilter) { - setCompactionFilterHandle(nativeHandle_, compactionFilter.nativeHandle_); - compactionFilter_ = compactionFilter; - return this; - } - - @Override - public AbstractCompactionFilter> compactionFilter() { - assert (isOwningHandle()); - return compactionFilter_; - } - - @Override - public ColumnFamilyOptions setCompactionFilterFactory(final AbstractCompactionFilterFactory> compactionFilterFactory) { - assert (isOwningHandle()); - setCompactionFilterFactoryHandle(nativeHandle_, compactionFilterFactory.nativeHandle_); - compactionFilterFactory_ = compactionFilterFactory; - return this; - } - - @Override - public AbstractCompactionFilterFactory> compactionFilterFactory() { - assert (isOwningHandle()); - return compactionFilterFactory_; - } - - @Override - public ColumnFamilyOptions setWriteBufferSize(final long writeBufferSize) { - assert(isOwningHandle()); - setWriteBufferSize(nativeHandle_, writeBufferSize); - return this; - } - - @Override - public long writeBufferSize() { - assert(isOwningHandle()); - return writeBufferSize(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setMaxWriteBufferNumber( - final int maxWriteBufferNumber) { - assert(isOwningHandle()); - setMaxWriteBufferNumber(nativeHandle_, maxWriteBufferNumber); - return this; - } - - @Override - public int maxWriteBufferNumber() { - assert(isOwningHandle()); - return maxWriteBufferNumber(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setMinWriteBufferNumberToMerge( - final int minWriteBufferNumberToMerge) { - setMinWriteBufferNumberToMerge(nativeHandle_, minWriteBufferNumberToMerge); - return this; - } - - @Override - public int minWriteBufferNumberToMerge() { - return minWriteBufferNumberToMerge(nativeHandle_); - } - - @Override - public ColumnFamilyOptions useFixedLengthPrefixExtractor(final int n) { - assert(isOwningHandle()); - useFixedLengthPrefixExtractor(nativeHandle_, n); - return this; - } - - @Override - public ColumnFamilyOptions useCappedPrefixExtractor(final int n) { - assert(isOwningHandle()); - useCappedPrefixExtractor(nativeHandle_, n); - return this; - } - - @Override - public ColumnFamilyOptions setCompressionType( - final CompressionType compressionType) { - setCompressionType(nativeHandle_, compressionType.getValue()); - return this; - } - - @Override - public CompressionType compressionType() { - return CompressionType.getCompressionType(compressionType(nativeHandle_)); - } - - @Override - public ColumnFamilyOptions setCompressionPerLevel( - final List compressionLevels) { - final byte[] byteCompressionTypes = new byte[ - compressionLevels.size()]; - for (int i = 0; i < compressionLevels.size(); i++) { - byteCompressionTypes[i] = compressionLevels.get(i).getValue(); - } - setCompressionPerLevel(nativeHandle_, byteCompressionTypes); - return this; - } - - @Override - public List compressionPerLevel() { - final byte[] byteCompressionTypes = - compressionPerLevel(nativeHandle_); - final List compressionLevels = new ArrayList<>(); - for (final byte byteCompressionType : byteCompressionTypes) { - compressionLevels.add(CompressionType.getCompressionType( - byteCompressionType)); - } - return compressionLevels; - } - - @Override - public ColumnFamilyOptions setBottommostCompressionType( - final CompressionType bottommostCompressionType) { - setBottommostCompressionType(nativeHandle_, - bottommostCompressionType.getValue()); - return this; - } - - @Override - public CompressionType bottommostCompressionType() { - return CompressionType.getCompressionType( - bottommostCompressionType(nativeHandle_)); - } - - @Override - public ColumnFamilyOptions setBottommostCompressionOptions( - final CompressionOptions bottommostCompressionOptions) { - setBottommostCompressionOptions(nativeHandle_, - bottommostCompressionOptions.nativeHandle_); - this.bottommostCompressionOptions_ = bottommostCompressionOptions; - return this; - } - - @Override - public CompressionOptions bottommostCompressionOptions() { - return this.bottommostCompressionOptions_; - } - - @Override - public ColumnFamilyOptions setCompressionOptions( - final CompressionOptions compressionOptions) { - setCompressionOptions(nativeHandle_, compressionOptions.nativeHandle_); - this.compressionOptions_ = compressionOptions; - return this; - } - - @Override - public CompressionOptions compressionOptions() { - return this.compressionOptions_; - } - - @Override - public ColumnFamilyOptions setNumLevels(final int numLevels) { - setNumLevels(nativeHandle_, numLevels); - return this; - } - - @Override - public int numLevels() { - return numLevels(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setLevelZeroFileNumCompactionTrigger( - final int numFiles) { - setLevelZeroFileNumCompactionTrigger( - nativeHandle_, numFiles); - return this; - } - - @Override - public int levelZeroFileNumCompactionTrigger() { - return levelZeroFileNumCompactionTrigger(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setLevelZeroSlowdownWritesTrigger( - final int numFiles) { - setLevelZeroSlowdownWritesTrigger(nativeHandle_, numFiles); - return this; - } - - @Override - public int levelZeroSlowdownWritesTrigger() { - return levelZeroSlowdownWritesTrigger(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setLevelZeroStopWritesTrigger(final int numFiles) { - setLevelZeroStopWritesTrigger(nativeHandle_, numFiles); - return this; - } - - @Override - public int levelZeroStopWritesTrigger() { - return levelZeroStopWritesTrigger(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setTargetFileSizeBase( - final long targetFileSizeBase) { - setTargetFileSizeBase(nativeHandle_, targetFileSizeBase); - return this; - } - - @Override - public long targetFileSizeBase() { - return targetFileSizeBase(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setTargetFileSizeMultiplier( - final int multiplier) { - setTargetFileSizeMultiplier(nativeHandle_, multiplier); - return this; - } - - @Override - public int targetFileSizeMultiplier() { - return targetFileSizeMultiplier(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setMaxBytesForLevelBase( - final long maxBytesForLevelBase) { - setMaxBytesForLevelBase(nativeHandle_, maxBytesForLevelBase); - return this; - } - - @Override - public long maxBytesForLevelBase() { - return maxBytesForLevelBase(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setLevelCompactionDynamicLevelBytes( - final boolean enableLevelCompactionDynamicLevelBytes) { - setLevelCompactionDynamicLevelBytes(nativeHandle_, - enableLevelCompactionDynamicLevelBytes); - return this; - } - - @Override - public boolean levelCompactionDynamicLevelBytes() { - return levelCompactionDynamicLevelBytes(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setMaxBytesForLevelMultiplier(final double multiplier) { - setMaxBytesForLevelMultiplier(nativeHandle_, multiplier); - return this; - } - - @Override - public double maxBytesForLevelMultiplier() { - return maxBytesForLevelMultiplier(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setMaxCompactionBytes(final long maxCompactionBytes) { - setMaxCompactionBytes(nativeHandle_, maxCompactionBytes); - return this; - } - - @Override - public long maxCompactionBytes() { - return maxCompactionBytes(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setArenaBlockSize( - final long arenaBlockSize) { - setArenaBlockSize(nativeHandle_, arenaBlockSize); - return this; - } - - @Override - public long arenaBlockSize() { - return arenaBlockSize(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setDisableAutoCompactions( - final boolean disableAutoCompactions) { - setDisableAutoCompactions(nativeHandle_, disableAutoCompactions); - return this; - } - - @Override - public boolean disableAutoCompactions() { - return disableAutoCompactions(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setCompactionStyle( - final CompactionStyle compactionStyle) { - setCompactionStyle(nativeHandle_, compactionStyle.getValue()); - return this; - } - - @Override - public CompactionStyle compactionStyle() { - return CompactionStyle.fromValue(compactionStyle(nativeHandle_)); - } - - @Override - public ColumnFamilyOptions setMaxTableFilesSizeFIFO( - final long maxTableFilesSize) { - assert(maxTableFilesSize > 0); // unsigned native type - assert(isOwningHandle()); - setMaxTableFilesSizeFIFO(nativeHandle_, maxTableFilesSize); - return this; - } - - @Override - public long maxTableFilesSizeFIFO() { - return maxTableFilesSizeFIFO(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setMaxSequentialSkipInIterations( - final long maxSequentialSkipInIterations) { - setMaxSequentialSkipInIterations(nativeHandle_, - maxSequentialSkipInIterations); - return this; - } - - @Override - public long maxSequentialSkipInIterations() { - return maxSequentialSkipInIterations(nativeHandle_); - } - - @Override - public MemTableConfig memTableConfig() { - return this.memTableConfig_; - } - - @Override - public ColumnFamilyOptions setMemTableConfig( - final MemTableConfig memTableConfig) { - setMemTableFactory( - nativeHandle_, memTableConfig.newMemTableFactoryHandle()); - this.memTableConfig_ = memTableConfig; - return this; - } - - @Override - public String memTableFactoryName() { - assert(isOwningHandle()); - return memTableFactoryName(nativeHandle_); - } - - @Override - public TableFormatConfig tableFormatConfig() { - return this.tableFormatConfig_; - } - - @Override - public ColumnFamilyOptions setTableFormatConfig( - final TableFormatConfig tableFormatConfig) { - setTableFactory(nativeHandle_, tableFormatConfig.newTableFactoryHandle()); - this.tableFormatConfig_ = tableFormatConfig; - return this; - } - - @Override - public String tableFactoryName() { - assert(isOwningHandle()); - return tableFactoryName(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setCfPaths(final Collection cfPaths) { - assert (isOwningHandle()); - - final int len = cfPaths.size(); - final String[] paths = new String[len]; - final long[] targetSizes = new long[len]; - - int i = 0; - for (final DbPath dbPath : cfPaths) { - paths[i] = dbPath.path.toString(); - targetSizes[i] = dbPath.targetSize; - i++; - } - setCfPaths(nativeHandle_, paths, targetSizes); - return this; - } - - @Override - public List cfPaths() { - final int len = (int) cfPathsLen(nativeHandle_); - - if (len == 0) { - return Collections.emptyList(); - } - - final String[] paths = new String[len]; - final long[] targetSizes = new long[len]; - - cfPaths(nativeHandle_, paths, targetSizes); - - final List cfPaths = new ArrayList<>(); - for (int i = 0; i < len; i++) { - cfPaths.add(new DbPath(Paths.get(paths[i]), targetSizes[i])); - } - - return cfPaths; - } - - @Override - public ColumnFamilyOptions setInplaceUpdateSupport( - final boolean inplaceUpdateSupport) { - setInplaceUpdateSupport(nativeHandle_, inplaceUpdateSupport); - return this; - } - - @Override - public boolean inplaceUpdateSupport() { - return inplaceUpdateSupport(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setInplaceUpdateNumLocks( - final long inplaceUpdateNumLocks) { - setInplaceUpdateNumLocks(nativeHandle_, inplaceUpdateNumLocks); - return this; - } - - @Override - public long inplaceUpdateNumLocks() { - return inplaceUpdateNumLocks(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setMemtablePrefixBloomSizeRatio( - final double memtablePrefixBloomSizeRatio) { - setMemtablePrefixBloomSizeRatio(nativeHandle_, memtablePrefixBloomSizeRatio); - return this; - } - - @Override - public double memtablePrefixBloomSizeRatio() { - return memtablePrefixBloomSizeRatio(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setExperimentalMempurgeThreshold( - final double experimentalMempurgeThreshold) { - setExperimentalMempurgeThreshold(nativeHandle_, experimentalMempurgeThreshold); - return this; - } - - @Override - public double experimentalMempurgeThreshold() { - return experimentalMempurgeThreshold(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setMemtableWholeKeyFiltering(final boolean memtableWholeKeyFiltering) { - setMemtableWholeKeyFiltering(nativeHandle_, memtableWholeKeyFiltering); - return this; - } - - @Override - public boolean memtableWholeKeyFiltering() { - return memtableWholeKeyFiltering(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setBloomLocality(final int bloomLocality) { - setBloomLocality(nativeHandle_, bloomLocality); - return this; - } - - @Override - public int bloomLocality() { - return bloomLocality(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setMaxSuccessiveMerges( - final long maxSuccessiveMerges) { - setMaxSuccessiveMerges(nativeHandle_, maxSuccessiveMerges); - return this; - } - - @Override - public long maxSuccessiveMerges() { - return maxSuccessiveMerges(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setOptimizeFiltersForHits( - final boolean optimizeFiltersForHits) { - setOptimizeFiltersForHits(nativeHandle_, optimizeFiltersForHits); - return this; - } - - @Override - public boolean optimizeFiltersForHits() { - return optimizeFiltersForHits(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setMemtableHugePageSize(final long memtableHugePageSize) { - setMemtableHugePageSize(nativeHandle_, - memtableHugePageSize); - return this; - } - - @Override - public long memtableHugePageSize() { - return memtableHugePageSize(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setSoftPendingCompactionBytesLimit( - final long softPendingCompactionBytesLimit) { - setSoftPendingCompactionBytesLimit(nativeHandle_, - softPendingCompactionBytesLimit); - return this; - } - - @Override - public long softPendingCompactionBytesLimit() { - return softPendingCompactionBytesLimit(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setHardPendingCompactionBytesLimit( - final long hardPendingCompactionBytesLimit) { - setHardPendingCompactionBytesLimit(nativeHandle_, hardPendingCompactionBytesLimit); - return this; - } - - @Override - public long hardPendingCompactionBytesLimit() { - return hardPendingCompactionBytesLimit(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setLevel0FileNumCompactionTrigger( - final int level0FileNumCompactionTrigger) { - setLevel0FileNumCompactionTrigger(nativeHandle_, level0FileNumCompactionTrigger); - return this; - } - - @Override - public int level0FileNumCompactionTrigger() { - return level0FileNumCompactionTrigger(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setLevel0SlowdownWritesTrigger(final int level0SlowdownWritesTrigger) { - setLevel0SlowdownWritesTrigger(nativeHandle_, level0SlowdownWritesTrigger); - return this; - } - - @Override - public int level0SlowdownWritesTrigger() { - return level0SlowdownWritesTrigger(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setLevel0StopWritesTrigger(final int level0StopWritesTrigger) { - setLevel0StopWritesTrigger(nativeHandle_, level0StopWritesTrigger); - return this; - } - - @Override - public int level0StopWritesTrigger() { - return level0StopWritesTrigger(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setMaxBytesForLevelMultiplierAdditional( - final int[] maxBytesForLevelMultiplierAdditional) { - setMaxBytesForLevelMultiplierAdditional(nativeHandle_, maxBytesForLevelMultiplierAdditional); - return this; - } - - @Override - public int[] maxBytesForLevelMultiplierAdditional() { - return maxBytesForLevelMultiplierAdditional(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setParanoidFileChecks(final boolean paranoidFileChecks) { - setParanoidFileChecks(nativeHandle_, paranoidFileChecks); - return this; - } - - @Override - public boolean paranoidFileChecks() { - return paranoidFileChecks(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setMaxWriteBufferNumberToMaintain( - final int maxWriteBufferNumberToMaintain) { - setMaxWriteBufferNumberToMaintain( - nativeHandle_, maxWriteBufferNumberToMaintain); - return this; - } - - @Override - public int maxWriteBufferNumberToMaintain() { - return maxWriteBufferNumberToMaintain(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setCompactionPriority( - final CompactionPriority compactionPriority) { - setCompactionPriority(nativeHandle_, compactionPriority.getValue()); - return this; - } - - @Override - public CompactionPriority compactionPriority() { - return CompactionPriority.getCompactionPriority( - compactionPriority(nativeHandle_)); - } - - @Override - public ColumnFamilyOptions setReportBgIoStats(final boolean reportBgIoStats) { - setReportBgIoStats(nativeHandle_, reportBgIoStats); - return this; - } - - @Override - public boolean reportBgIoStats() { - return reportBgIoStats(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setTtl(final long ttl) { - setTtl(nativeHandle_, ttl); - return this; - } - - @Override - public long ttl() { - return ttl(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setPeriodicCompactionSeconds(final long periodicCompactionSeconds) { - setPeriodicCompactionSeconds(nativeHandle_, periodicCompactionSeconds); - return this; - } - - @Override - public long periodicCompactionSeconds() { - return periodicCompactionSeconds(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setCompactionOptionsUniversal( - final CompactionOptionsUniversal compactionOptionsUniversal) { - setCompactionOptionsUniversal(nativeHandle_, - compactionOptionsUniversal.nativeHandle_); - this.compactionOptionsUniversal_ = compactionOptionsUniversal; - return this; - } - - @Override - public CompactionOptionsUniversal compactionOptionsUniversal() { - return this.compactionOptionsUniversal_; - } - - @Override - public ColumnFamilyOptions setCompactionOptionsFIFO(final CompactionOptionsFIFO compactionOptionsFIFO) { - setCompactionOptionsFIFO(nativeHandle_, - compactionOptionsFIFO.nativeHandle_); - this.compactionOptionsFIFO_ = compactionOptionsFIFO; - return this; - } - - @Override - public CompactionOptionsFIFO compactionOptionsFIFO() { - return this.compactionOptionsFIFO_; - } - - @Override - public ColumnFamilyOptions setForceConsistencyChecks(final boolean forceConsistencyChecks) { - setForceConsistencyChecks(nativeHandle_, forceConsistencyChecks); - return this; - } - - @Override - public boolean forceConsistencyChecks() { - return forceConsistencyChecks(nativeHandle_); - } - - @Override - public ColumnFamilyOptions setSstPartitionerFactory( - final SstPartitionerFactory sstPartitionerFactory) { - setSstPartitionerFactory(nativeHandle_, sstPartitionerFactory.nativeHandle_); - this.sstPartitionerFactory_ = sstPartitionerFactory; - return this; - } - - @Override - public ColumnFamilyOptions setCompactionThreadLimiter( - final ConcurrentTaskLimiter compactionThreadLimiter) { - setCompactionThreadLimiter(nativeHandle_, compactionThreadLimiter.nativeHandle_); - this.compactionThreadLimiter_ = compactionThreadLimiter; - return this; - } - - @Override - public ConcurrentTaskLimiter compactionThreadLimiter() { - assert (isOwningHandle()); - return this.compactionThreadLimiter_; - } - - @Override - public SstPartitionerFactory sstPartitionerFactory() { - return sstPartitionerFactory_; - } - - // - // BEGIN options for blobs (integrated BlobDB) - // - - /** - * When set, large values (blobs) are written to separate blob files, and only - * pointers to them are stored in SST files. This can reduce write amplification - * for large-value use cases at the cost of introducing a level of indirection - * for reads. See also the options min_blob_size, blob_file_size, - * blob_compression_type, enable_blob_garbage_collection, and - * blob_garbage_collection_age_cutoff below. - *

- * Default: false - *

- * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param enableBlobFiles true iff blob files should be enabled - * - * @return the reference to the current options. - */ - @Override - public ColumnFamilyOptions setEnableBlobFiles(final boolean enableBlobFiles) { - setEnableBlobFiles(nativeHandle_, enableBlobFiles); - return this; - } - - /** - * When set, large values (blobs) are written to separate blob files, and only - * pointers to them are stored in SST files. This can reduce write amplification - * for large-value use cases at the cost of introducing a level of indirection - * for reads. See also the options min_blob_size, blob_file_size, - * blob_compression_type, enable_blob_garbage_collection, and - * blob_garbage_collection_age_cutoff below. - *

- * Default: false - *

- * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @return true iff blob files are currently enabled - */ - public boolean enableBlobFiles() { - return enableBlobFiles(nativeHandle_); - } - - /** - * Set the size of the smallest value to be stored separately in a blob file. Values - * which have an uncompressed size smaller than this threshold are stored - * alongside the keys in SST files in the usual fashion. A value of zero for - * this option means that all values are stored in blob files. Note that - * enable_blob_files has to be set in order for this option to have any effect. - *

- * Default: 0 - *

- * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param minBlobSize the size of the smallest value to be stored separately in a blob file - * @return these options, updated with the supplied minimum blob size value - */ - @Override - public ColumnFamilyOptions setMinBlobSize(final long minBlobSize) { - setMinBlobSize(nativeHandle_, minBlobSize); - return this; - } - - /** - * Get the size of the smallest value to be stored separately in a blob file. Values - * which have an uncompressed size smaller than this threshold are stored - * alongside the keys in SST files in the usual fashion. A value of zero for - * this option means that all values are stored in blob files. Note that - * enable_blob_files has to be set in order for this option to have any effect. - *

- * Default: 0 - *

- * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @return the current minimum blob size - */ - @Override - public long minBlobSize() { - return minBlobSize(nativeHandle_); - } - - /** - * Set the size limit for blob files. When writing blob files, a new file is opened - * once this limit is reached. Note that enable_blob_files has to be set in - * order for this option to have any effect. - *

- * Default: 256 MB - *

- * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param blobFileSize the new size limit for blob files - * - * @return the reference to the current options. - */ - @Override - public ColumnFamilyOptions setBlobFileSize(final long blobFileSize) { - setBlobFileSize(nativeHandle_, blobFileSize); - return this; - } - - /** - * Get the size limit for blob files. When writing blob files, a new file is opened - * once this limit is reached. Note that enable_blob_files has to be set in - * order for this option to have any effect. - *

- * Default: 256 MB - *

- * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @return the size limit for blob files - */ - @Override - public long blobFileSize() { - return blobFileSize(nativeHandle_); - } - - /** - * Set the compression algorithm to use for large values stored in blob files. Note - * that enable_blob_files has to be set in order for this option to have any - * effect. - *

- * Default: no compression - *

- * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param compressionType the compression algorithm to use - * - * @return the reference to the current options. - */ - @Override - public ColumnFamilyOptions setBlobCompressionType(final CompressionType compressionType) { - setBlobCompressionType(nativeHandle_, compressionType.getValue()); - return this; - } - - /** - * Get the compression algorithm to use for large values stored in blob files. Note - * that enable_blob_files has to be set in order for this option to have any - * effect. - *

- * Default: no compression - *

- * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @return the compression algorithm currently in use for blobs - */ - @Override - public CompressionType blobCompressionType() { - return CompressionType.values()[blobCompressionType(nativeHandle_)]; - } - - /** - * Enable/disable garbage collection of blobs. Blob GC is performed as part of - * compaction. Valid blobs residing in blob files older than a cutoff get - * relocated to new files as they are encountered during compaction, which makes - * it possible to clean up blob files once they contain nothing but - * obsolete/garbage blobs. See also blob_garbage_collection_age_cutoff below. - *

- * Default: false - * - * @param enableBlobGarbageCollection true iff blob garbage collection is to be enabled - * - * @return the reference to the current options. - */ - @Override - public ColumnFamilyOptions setEnableBlobGarbageCollection( - final boolean enableBlobGarbageCollection) { - setEnableBlobGarbageCollection(nativeHandle_, enableBlobGarbageCollection); - return this; - } - - /** - * Get enabled/disables state for garbage collection of blobs. Blob GC is performed as part of - * compaction. Valid blobs residing in blob files older than a cutoff get - * relocated to new files as they are encountered during compaction, which makes - * it possible to clean up blob files once they contain nothing but - * obsolete/garbage blobs. See also blob_garbage_collection_age_cutoff below. - *

- * Default: false - * - * @return true iff blob garbage collection is currently enabled - */ - @Override - public boolean enableBlobGarbageCollection() { - return enableBlobGarbageCollection(nativeHandle_); - } - - /** - * Set the cutoff in terms of blob file age for garbage collection. Blobs in the - * oldest N blob files will be relocated when encountered during compaction, - * where N = garbage_collection_cutoff * number_of_blob_files. Note that - * enable_blob_garbage_collection has to be set in order for this option to have - * any effect. - *

- * Default: 0.25 - * - * @param blobGarbageCollectionAgeCutoff the new blob garbage collection age cutoff - * - * @return the reference to the current options. - */ - @Override - public ColumnFamilyOptions setBlobGarbageCollectionAgeCutoff( - final double blobGarbageCollectionAgeCutoff) { - setBlobGarbageCollectionAgeCutoff(nativeHandle_, blobGarbageCollectionAgeCutoff); - return this; - } - - /** - * Get the cutoff in terms of blob file age for garbage collection. Blobs in the - * oldest N blob files will be relocated when encountered during compaction, - * where N = garbage_collection_cutoff * number_of_blob_files. Note that - * enable_blob_garbage_collection has to be set in order for this option to have - * any effect. - *

- * Default: 0.25 - * - * @return the current blob garbage collection age cutoff - */ - @Override - public double blobGarbageCollectionAgeCutoff() { - return blobGarbageCollectionAgeCutoff(nativeHandle_); - } - - /** - * If the ratio of garbage in the oldest blob files exceeds this threshold, - * targeted compactions are scheduled in order to force garbage collecting - * the blob files in question, assuming they are all eligible based on the - * value of {@link #blobGarbageCollectionAgeCutoff} above. This option is - * currently only supported with leveled compactions. - *

- * Note that {@link #enableBlobGarbageCollection} has to be set in order for this - * option to have any effect. - *

- * Default: 1.0 - *

- * Dynamically changeable through the SetOptions() API - * - * @param blobGarbageCollectionForceThreshold new value for the threshold - * @return the reference to the current options - */ - @Override - public ColumnFamilyOptions setBlobGarbageCollectionForceThreshold( - final double blobGarbageCollectionForceThreshold) { - setBlobGarbageCollectionForceThreshold(nativeHandle_, blobGarbageCollectionForceThreshold); - return this; - } - - /** - * Get the current value for the {@link #blobGarbageCollectionForceThreshold} - * @return the current threshold at which garbage collection of blobs is forced - */ - @Override - public double blobGarbageCollectionForceThreshold() { - return blobGarbageCollectionForceThreshold(nativeHandle_); - } - - /** - * Set compaction readahead for blob files. - *

- * Default: 0 - *

- * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param blobCompactionReadaheadSize the compaction readahead for blob files - * - * @return the reference to the current options. - */ - @Override - public ColumnFamilyOptions setBlobCompactionReadaheadSize( - final long blobCompactionReadaheadSize) { - setBlobCompactionReadaheadSize(nativeHandle_, blobCompactionReadaheadSize); - return this; - } - - /** - * Get compaction readahead for blob files. - * - * @return the current compaction readahead for blob files - */ - @Override - public long blobCompactionReadaheadSize() { - return blobCompactionReadaheadSize(nativeHandle_); - } - - /** - * Set a certain LSM tree level to enable blob files. - *

- * Default: 0 - *

- * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param blobFileStartingLevel the starting level to enable blob files - * - * @return the reference to the current options. - */ - @Override - public ColumnFamilyOptions setBlobFileStartingLevel(final int blobFileStartingLevel) { - setBlobFileStartingLevel(nativeHandle_, blobFileStartingLevel); - return this; - } - - /** - * Get the starting LSM tree level to enable blob files. - *

- * Default: 0 - * - * @return the current LSM tree level to enable blob files. - */ - @Override - public int blobFileStartingLevel() { - return blobFileStartingLevel(nativeHandle_); - } - - /** - * Set a certain prepopulate blob cache option. - *

- * Default: 0 - *

- * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. - * - * @param prepopulateBlobCache the prepopulate blob cache option - * - * @return the reference to the current options. - */ - @Override - public ColumnFamilyOptions setPrepopulateBlobCache( - final PrepopulateBlobCache prepopulateBlobCache) { - setPrepopulateBlobCache(nativeHandle_, prepopulateBlobCache.getValue()); - return this; - } - - /** - * Get the prepopulate blob cache option. - *

- * Default: 0 - * - * @return the current prepopulate blob cache option. - */ - @Override - public PrepopulateBlobCache prepopulateBlobCache() { - return PrepopulateBlobCache.getPrepopulateBlobCache(prepopulateBlobCache(nativeHandle_)); - } - - // - // END options for blobs (integrated BlobDB) - // - - private static native long getColumnFamilyOptionsFromProps( - final long cfgHandle, String optString); - private static native long getColumnFamilyOptionsFromProps(final String optString); - - private static native long newColumnFamilyOptions(); - private static native long copyColumnFamilyOptions(final long handle); - private static native long newColumnFamilyOptionsFromOptions( - final long optionsHandle); - @Override protected final native void disposeInternal(final long handle); - - private static native void oldDefaults( - final long handle, final int majorVersion, final int minorVersion); - private native void optimizeForSmallDb(final long handle); - private static native void optimizeForSmallDb(final long handle, final long cacheHandle); - private native void optimizeForPointLookup(long handle, - long blockCacheSizeMb); - private native void optimizeLevelStyleCompaction(long handle, - long memtableMemoryBudget); - private native void optimizeUniversalStyleCompaction(long handle, - long memtableMemoryBudget); - private native void setComparatorHandle(long handle, int builtinComparator); - private native void setComparatorHandle(long optHandle, - long comparatorHandle, byte comparatorType); - private native void setMergeOperatorName(long handle, String name); - private native void setMergeOperator(long handle, long mergeOperatorHandle); - private native void setCompactionFilterHandle(long handle, - long compactionFilterHandle); - private native void setCompactionFilterFactoryHandle(long handle, - long compactionFilterFactoryHandle); - private native void setWriteBufferSize(long handle, long writeBufferSize) - throws IllegalArgumentException; - private native long writeBufferSize(long handle); - private native void setMaxWriteBufferNumber( - long handle, int maxWriteBufferNumber); - private native int maxWriteBufferNumber(long handle); - private native void setMinWriteBufferNumberToMerge( - long handle, int minWriteBufferNumberToMerge); - private native int minWriteBufferNumberToMerge(long handle); - private native void setCompressionType(long handle, byte compressionType); - private native byte compressionType(long handle); - private native void setCompressionPerLevel(long handle, - byte[] compressionLevels); - private native byte[] compressionPerLevel(long handle); - private native void setBottommostCompressionType(long handle, - byte bottommostCompressionType); - private native byte bottommostCompressionType(long handle); - private native void setBottommostCompressionOptions(final long handle, - final long bottommostCompressionOptionsHandle); - private native void setCompressionOptions(long handle, - long compressionOptionsHandle); - private native void useFixedLengthPrefixExtractor( - long handle, int prefixLength); - private native void useCappedPrefixExtractor( - long handle, int prefixLength); - private native void setNumLevels( - long handle, int numLevels); - private native int numLevels(long handle); - private native void setLevelZeroFileNumCompactionTrigger( - long handle, int numFiles); - private native int levelZeroFileNumCompactionTrigger(long handle); - private native void setLevelZeroSlowdownWritesTrigger( - long handle, int numFiles); - private native int levelZeroSlowdownWritesTrigger(long handle); - private native void setLevelZeroStopWritesTrigger( - long handle, int numFiles); - private native int levelZeroStopWritesTrigger(long handle); - private native void setTargetFileSizeBase( - long handle, long targetFileSizeBase); - private native long targetFileSizeBase(long handle); - private native void setTargetFileSizeMultiplier( - long handle, int multiplier); - private native int targetFileSizeMultiplier(long handle); - private native void setMaxBytesForLevelBase( - long handle, long maxBytesForLevelBase); - private native long maxBytesForLevelBase(long handle); - private native void setLevelCompactionDynamicLevelBytes( - long handle, boolean enableLevelCompactionDynamicLevelBytes); - private native boolean levelCompactionDynamicLevelBytes( - long handle); - private native void setMaxBytesForLevelMultiplier(long handle, double multiplier); - private native double maxBytesForLevelMultiplier(long handle); - private native void setMaxCompactionBytes(long handle, long maxCompactionBytes); - private native long maxCompactionBytes(long handle); - private native void setArenaBlockSize( - long handle, long arenaBlockSize) - throws IllegalArgumentException; - private native long arenaBlockSize(long handle); - private native void setDisableAutoCompactions( - long handle, boolean disableAutoCompactions); - private native boolean disableAutoCompactions(long handle); - private native void setCompactionStyle(long handle, byte compactionStyle); - private native byte compactionStyle(long handle); - private native void setMaxTableFilesSizeFIFO( - long handle, long max_table_files_size); - private native long maxTableFilesSizeFIFO(long handle); - private native void setMaxSequentialSkipInIterations( - long handle, long maxSequentialSkipInIterations); - private native long maxSequentialSkipInIterations(long handle); - private native void setMemTableFactory(long handle, long factoryHandle); - private native String memTableFactoryName(long handle); - private native void setTableFactory(long handle, long factoryHandle); - private native String tableFactoryName(long handle); - private static native void setCfPaths( - final long handle, final String[] paths, final long[] targetSizes); - private static native long cfPathsLen(final long handle); - private static native void cfPaths( - final long handle, final String[] paths, final long[] targetSizes); - private native void setInplaceUpdateSupport( - long handle, boolean inplaceUpdateSupport); - private native boolean inplaceUpdateSupport(long handle); - private native void setInplaceUpdateNumLocks( - long handle, long inplaceUpdateNumLocks) - throws IllegalArgumentException; - private native long inplaceUpdateNumLocks(long handle); - private native void setMemtablePrefixBloomSizeRatio( - long handle, double memtablePrefixBloomSizeRatio); - private native double memtablePrefixBloomSizeRatio(long handle); - private native void setExperimentalMempurgeThreshold( - long handle, double experimentalMempurgeThreshold); - private native double experimentalMempurgeThreshold(long handle); - private native void setMemtableWholeKeyFiltering(long handle, boolean memtableWholeKeyFiltering); - private native boolean memtableWholeKeyFiltering(long handle); - private native void setBloomLocality( - long handle, int bloomLocality); - private native int bloomLocality(long handle); - private native void setMaxSuccessiveMerges( - long handle, long maxSuccessiveMerges) - throws IllegalArgumentException; - private native long maxSuccessiveMerges(long handle); - private native void setOptimizeFiltersForHits(long handle, - boolean optimizeFiltersForHits); - private native boolean optimizeFiltersForHits(long handle); - private native void setMemtableHugePageSize(long handle, - long memtableHugePageSize); - private native long memtableHugePageSize(long handle); - private native void setSoftPendingCompactionBytesLimit(long handle, - long softPendingCompactionBytesLimit); - private native long softPendingCompactionBytesLimit(long handle); - private native void setHardPendingCompactionBytesLimit(long handle, - long hardPendingCompactionBytesLimit); - private native long hardPendingCompactionBytesLimit(long handle); - private native void setLevel0FileNumCompactionTrigger(long handle, - int level0FileNumCompactionTrigger); - private native int level0FileNumCompactionTrigger(long handle); - private native void setLevel0SlowdownWritesTrigger(long handle, - int level0SlowdownWritesTrigger); - private native int level0SlowdownWritesTrigger(long handle); - private native void setLevel0StopWritesTrigger(long handle, - int level0StopWritesTrigger); - private native int level0StopWritesTrigger(long handle); - private native void setMaxBytesForLevelMultiplierAdditional(long handle, - int[] maxBytesForLevelMultiplierAdditional); - private native int[] maxBytesForLevelMultiplierAdditional(long handle); - private native void setParanoidFileChecks(long handle, - boolean paranoidFileChecks); - private native boolean paranoidFileChecks(long handle); - private native void setMaxWriteBufferNumberToMaintain(final long handle, - final int maxWriteBufferNumberToMaintain); - private native int maxWriteBufferNumberToMaintain(final long handle); - private native void setCompactionPriority(final long handle, - final byte compactionPriority); - private native byte compactionPriority(final long handle); - private native void setReportBgIoStats(final long handle, - final boolean reportBgIoStats); - private native boolean reportBgIoStats(final long handle); - private native void setTtl(final long handle, final long ttl); - private native long ttl(final long handle); - private native void setPeriodicCompactionSeconds( - final long handle, final long periodicCompactionSeconds); - private native long periodicCompactionSeconds(final long handle); - private native void setCompactionOptionsUniversal(final long handle, - final long compactionOptionsUniversalHandle); - private native void setCompactionOptionsFIFO(final long handle, - final long compactionOptionsFIFOHandle); - private native void setForceConsistencyChecks(final long handle, - final boolean forceConsistencyChecks); - private native boolean forceConsistencyChecks(final long handle); - private native void setSstPartitionerFactory(long nativeHandle_, long newFactoryHandle); - private static native void setCompactionThreadLimiter( - final long nativeHandle_, final long compactionThreadLimiterHandle); - - private native void setEnableBlobFiles(final long nativeHandle_, final boolean enableBlobFiles); - private native boolean enableBlobFiles(final long nativeHandle_); - private native void setMinBlobSize(final long nativeHandle_, final long minBlobSize); - private native long minBlobSize(final long nativeHandle_); - private native void setBlobFileSize(final long nativeHandle_, final long blobFileSize); - private native long blobFileSize(final long nativeHandle_); - private native void setBlobCompressionType(final long nativeHandle_, final byte compressionType); - private native byte blobCompressionType(final long nativeHandle_); - private native void setEnableBlobGarbageCollection( - final long nativeHandle_, final boolean enableBlobGarbageCollection); - private native boolean enableBlobGarbageCollection(final long nativeHandle_); - private native void setBlobGarbageCollectionAgeCutoff( - final long nativeHandle_, final double blobGarbageCollectionAgeCutoff); - private native double blobGarbageCollectionAgeCutoff(final long nativeHandle_); - private native void setBlobGarbageCollectionForceThreshold( - final long nativeHandle_, final double blobGarbageCollectionForceThreshold); - private native double blobGarbageCollectionForceThreshold(final long nativeHandle_); - private native void setBlobCompactionReadaheadSize( - final long nativeHandle_, final long blobCompactionReadaheadSize); - private native long blobCompactionReadaheadSize(final long nativeHandle_); - private native void setBlobFileStartingLevel( - final long nativeHandle_, final int blobFileStartingLevel); - private native int blobFileStartingLevel(final long nativeHandle_); - private native void setPrepopulateBlobCache( - final long nativeHandle_, final byte prepopulateBlobCache); - private native byte prepopulateBlobCache(final long nativeHandle_); - - // instance variables - // NOTE: If you add new member variables, please update the copy constructor above! - private MemTableConfig memTableConfig_; - private TableFormatConfig tableFormatConfig_; - private AbstractComparator comparator_; - private AbstractCompactionFilter> compactionFilter_; - private AbstractCompactionFilterFactory> - compactionFilterFactory_; - private CompactionOptionsUniversal compactionOptionsUniversal_; - private CompactionOptionsFIFO compactionOptionsFIFO_; - private CompressionOptions bottommostCompressionOptions_; - private CompressionOptions compressionOptions_; - private SstPartitionerFactory sstPartitionerFactory_; - private ConcurrentTaskLimiter compactionThreadLimiter_; -} diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java b/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java deleted file mode 100644 index 97357aacf..000000000 --- a/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java +++ /dev/null @@ -1,536 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Collection; -import java.util.List; - -public interface ColumnFamilyOptionsInterface> - extends AdvancedColumnFamilyOptionsInterface { - /** - * The function recovers options to a previous version. Only 4.6 or later - * versions are supported. - * - * @param majorVersion The major version to recover default values of options - * @param minorVersion The minor version to recover default values of options - * @return the instance of the current object. - */ - T oldDefaults(int majorVersion, int minorVersion); - - /** - * Use this if your DB is very small (like under 1GB) and you don't want to - * spend lots of memory for memtables. - * - * @return the instance of the current object. - */ - T optimizeForSmallDb(); - - /** - * Some functions that make it easier to optimize RocksDB - * Use this if your DB is very small (like under 1GB) and you don't want to - * spend lots of memory for memtables. - * - * @param cache An optional cache object is passed in to be used as the block cache - * @return the instance of the current object. - */ - T optimizeForSmallDb(Cache cache); - - /** - * Use this if you don't need to keep the data sorted, i.e. you'll never use - * an iterator, only Put() and Get() API calls - * - * @param blockCacheSizeMb Block cache size in MB - * @return the instance of the current object. - */ - T optimizeForPointLookup(long blockCacheSizeMb); - - /** - *

Default values for some parameters in ColumnFamilyOptions are not - * optimized for heavy workloads and big datasets, which means you might - * observe write stalls under some conditions. As a starting point for tuning - * RocksDB options, use the following for level style compaction.

- * - *

Make sure to also call IncreaseParallelism(), which will provide the - * biggest performance gains.

- *

Note: we might use more memory than memtable_memory_budget during high - * write rate period

- * - * @return the instance of the current object. - */ - T optimizeLevelStyleCompaction(); - - /** - *

Default values for some parameters in ColumnFamilyOptions are not - * optimized for heavy workloads and big datasets, which means you might - * observe write stalls under some conditions. As a starting point for tuning - * RocksDB options, use the following for level style compaction.

- * - *

Make sure to also call IncreaseParallelism(), which will provide the - * biggest performance gains.

- *

Note: we might use more memory than memtable_memory_budget during high - * write rate period

- * - * @param memtableMemoryBudget memory budget in bytes - * @return the instance of the current object. - */ - T optimizeLevelStyleCompaction( - long memtableMemoryBudget); - - /** - *

Default values for some parameters in ColumnFamilyOptions are not - * optimized for heavy workloads and big datasets, which means you might - * observe write stalls under some conditions. As a starting point for tuning - * RocksDB options, use the following for universal style compaction.

- * - *

Universal style compaction is focused on reducing Write Amplification - * Factor for big data sets, but increases Space Amplification.

- * - *

Make sure to also call IncreaseParallelism(), which will provide the - * biggest performance gains.

- * - *

Note: we might use more memory than memtable_memory_budget during high - * write rate period

- * - * @return the instance of the current object. - */ - T optimizeUniversalStyleCompaction(); - - /** - *

Default values for some parameters in ColumnFamilyOptions are not - * optimized for heavy workloads and big datasets, which means you might - * observe write stalls under some conditions. As a starting point for tuning - * RocksDB options, use the following for universal style compaction.

- * - *

Universal style compaction is focused on reducing Write Amplification - * Factor for big data sets, but increases Space Amplification.

- * - *

Make sure to also call IncreaseParallelism(), which will provide the - * biggest performance gains.

- * - *

Note: we might use more memory than memtable_memory_budget during high - * write rate period

- * - * @param memtableMemoryBudget memory budget in bytes - * @return the instance of the current object. - */ - T optimizeUniversalStyleCompaction( - long memtableMemoryBudget); - - /** - * Set {@link BuiltinComparator} to be used with RocksDB. - * - * Note: Comparator can be set once upon database creation. - * - * Default: BytewiseComparator. - * @param builtinComparator a {@link BuiltinComparator} type. - * @return the instance of the current object. - */ - T setComparator( - BuiltinComparator builtinComparator); - - /** - * Use the specified comparator for key ordering. - * - * Comparator should not be disposed before options instances using this comparator is - * disposed. If dispose() function is not called, then comparator object will be - * GC'd automatically. - * - * Comparator instance can be re-used in multiple options instances. - * - * @param comparator java instance. - * @return the instance of the current object. - */ - T setComparator( - AbstractComparator comparator); - - /** - *

Set the merge operator to be used for merging two merge operands - * of the same key. The merge function is invoked during - * compaction and at lookup time, if multiple key/value pairs belonging - * to the same key are found in the database.

- * - * @param name the name of the merge function, as defined by - * the MergeOperators factory (see utilities/MergeOperators.h) - * The merge function is specified by name and must be one of the - * standard merge operators provided by RocksDB. The available - * operators are "put", "uint64add", "stringappend" and "stringappendtest". - * @return the instance of the current object. - */ - T setMergeOperatorName(String name); - - /** - *

Set the merge operator to be used for merging two different key/value - * pairs that share the same key. The merge function is invoked during - * compaction and at lookup time, if multiple key/value pairs belonging - * to the same key are found in the database.

- * - * @param mergeOperator {@link MergeOperator} instance. - * @return the instance of the current object. - */ - T setMergeOperator(MergeOperator mergeOperator); - - /** - * A single CompactionFilter instance to call into during compaction. - * Allows an application to modify/delete a key-value during background - * compaction. - * - * If the client requires a new compaction filter to be used for different - * compaction runs, it can specify call - * {@link #setCompactionFilterFactory(AbstractCompactionFilterFactory)} - * instead. - * - * The client should specify only set one of the two. - * {@link #setCompactionFilter(AbstractCompactionFilter)} takes precedence - * over {@link #setCompactionFilterFactory(AbstractCompactionFilterFactory)} - * if the client specifies both. - * - * If multithreaded compaction is being used, the supplied CompactionFilter - * instance may be used from different threads concurrently and so should be thread-safe. - * - * @param compactionFilter {@link AbstractCompactionFilter} instance. - * @return the instance of the current object. - */ - T setCompactionFilter( - final AbstractCompactionFilter> compactionFilter); - - /** - * Accessor for the CompactionFilter instance in use. - * - * @return Reference to the CompactionFilter, or null if one hasn't been set. - */ - AbstractCompactionFilter> compactionFilter(); - - /** - * This is a factory that provides {@link AbstractCompactionFilter} objects - * which allow an application to modify/delete a key-value during background - * compaction. - * - * A new filter will be created on each compaction run. If multithreaded - * compaction is being used, each created CompactionFilter will only be used - * from a single thread and so does not need to be thread-safe. - * - * @param compactionFilterFactory {@link AbstractCompactionFilterFactory} instance. - * @return the instance of the current object. - */ - T setCompactionFilterFactory( - final AbstractCompactionFilterFactory> - compactionFilterFactory); - - /** - * Accessor for the CompactionFilterFactory instance in use. - * - * @return Reference to the CompactionFilterFactory, or null if one hasn't been set. - */ - AbstractCompactionFilterFactory> compactionFilterFactory(); - - /** - * This prefix-extractor uses the first n bytes of a key as its prefix. - * - * In some hash-based memtable representation such as HashLinkedList - * and HashSkipList, prefixes are used to partition the keys into - * several buckets. Prefix extractor is used to specify how to - * extract the prefix given a key. - * - * @param n use the first n bytes of a key as its prefix. - * @return the reference to the current option. - */ - T useFixedLengthPrefixExtractor(int n); - - /** - * Same as fixed length prefix extractor, except that when slice is - * shorter than the fixed length, it will use the full key. - * - * @param n use the first n bytes of a key as its prefix. - * @return the reference to the current option. - */ - T useCappedPrefixExtractor(int n); - - /** - * Number of files to trigger level-0 compaction. A value < 0 means that - * level-0 compaction will not be triggered by number of files at all. - * Default: 4 - * - * @param numFiles the number of files in level-0 to trigger compaction. - * @return the reference to the current option. - */ - T setLevelZeroFileNumCompactionTrigger( - int numFiles); - - /** - * The number of files in level 0 to trigger compaction from level-0 to - * level-1. A value < 0 means that level-0 compaction will not be - * triggered by number of files at all. - * Default: 4 - * - * @return the number of files in level 0 to trigger compaction. - */ - int levelZeroFileNumCompactionTrigger(); - - /** - * Soft limit on number of level-0 files. We start slowing down writes at this - * point. A value < 0 means that no writing slow down will be triggered by - * number of files in level-0. - * - * @param numFiles soft limit on number of level-0 files. - * @return the reference to the current option. - */ - T setLevelZeroSlowdownWritesTrigger( - int numFiles); - - /** - * Soft limit on the number of level-0 files. We start slowing down writes - * at this point. A value < 0 means that no writing slow down will be - * triggered by number of files in level-0. - * - * @return the soft limit on the number of level-0 files. - */ - int levelZeroSlowdownWritesTrigger(); - - /** - * Maximum number of level-0 files. We stop writes at this point. - * - * @param numFiles the hard limit of the number of level-0 files. - * @return the reference to the current option. - */ - T setLevelZeroStopWritesTrigger(int numFiles); - - /** - * Maximum number of level-0 files. We stop writes at this point. - * - * @return the hard limit of the number of level-0 file. - */ - int levelZeroStopWritesTrigger(); - - /** - * The ratio between the total size of level-(L+1) files and the total - * size of level-L files for all L. - * DEFAULT: 10 - * - * @param multiplier the ratio between the total size of level-(L+1) - * files and the total size of level-L files for all L. - * @return the reference to the current option. - */ - T setMaxBytesForLevelMultiplier( - double multiplier); - - /** - * The ratio between the total size of level-(L+1) files and the total - * size of level-L files for all L. - * DEFAULT: 10 - * - * @return the ratio between the total size of level-(L+1) files and - * the total size of level-L files for all L. - */ - double maxBytesForLevelMultiplier(); - - /** - * FIFO compaction option. - * The oldest table file will be deleted - * once the sum of table files reaches this size. - * The default value is 1GB (1 * 1024 * 1024 * 1024). - * - * @param maxTableFilesSize the size limit of the total sum of table files. - * @return the instance of the current object. - */ - T setMaxTableFilesSizeFIFO( - long maxTableFilesSize); - - /** - * FIFO compaction option. - * The oldest table file will be deleted - * once the sum of table files reaches this size. - * The default value is 1GB (1 * 1024 * 1024 * 1024). - * - * @return the size limit of the total sum of table files. - */ - long maxTableFilesSizeFIFO(); - - /** - * Get the config for mem-table. - * - * @return the mem-table config. - */ - MemTableConfig memTableConfig(); - - /** - * Set the config for mem-table. - * - * @param memTableConfig the mem-table config. - * @return the instance of the current object. - * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms - * while overflowing the underlying platform specific value. - */ - T setMemTableConfig(MemTableConfig memTableConfig); - - /** - * Returns the name of the current mem table representation. - * Memtable format can be set using setTableFormatConfig. - * - * @return the name of the currently-used memtable factory. - * @see #setTableFormatConfig(org.rocksdb.TableFormatConfig) - */ - String memTableFactoryName(); - - /** - * Get the config for table format. - * - * @return the table format config. - */ - TableFormatConfig tableFormatConfig(); - - /** - * Set the config for table format. - * - * @param config the table format config. - * @return the reference of the current options. - */ - T setTableFormatConfig(TableFormatConfig config); - - /** - * @return the name of the currently used table factory. - */ - String tableFactoryName(); - - /** - * A list of paths where SST files for this column family - * can be put into, with its target size. Similar to db_paths, - * newer data is placed into paths specified earlier in the - * vector while older data gradually moves to paths specified - * later in the vector. - * Note that, if a path is supplied to multiple column - * families, it would have files and total size from all - * the column families combined. User should provision for the - * total size(from all the column families) in such cases. - * - * If left empty, db_paths will be used. - * Default: empty - * - * @param paths collection of paths for SST files. - * @return the reference of the current options. - */ - T setCfPaths(final Collection paths); - - /** - * @return collection of paths for SST files. - */ - List cfPaths(); - - /** - * Compression algorithm that will be used for the bottommost level that - * contain files. If level-compaction is used, this option will only affect - * levels after base level. - * - * Default: {@link CompressionType#DISABLE_COMPRESSION_OPTION} - * - * @param bottommostCompressionType The compression type to use for the - * bottommost level - * - * @return the reference of the current options. - */ - T setBottommostCompressionType( - final CompressionType bottommostCompressionType); - - /** - * Compression algorithm that will be used for the bottommost level that - * contain files. If level-compaction is used, this option will only affect - * levels after base level. - * - * Default: {@link CompressionType#DISABLE_COMPRESSION_OPTION} - * - * @return The compression type used for the bottommost level - */ - CompressionType bottommostCompressionType(); - - /** - * Set the options for compression algorithms used by - * {@link #bottommostCompressionType()} if it is enabled. - * - * To enable it, please see the definition of - * {@link CompressionOptions}. - * - * @param compressionOptions the bottom most compression options. - * - * @return the reference of the current options. - */ - T setBottommostCompressionOptions( - final CompressionOptions compressionOptions); - - /** - * Get the bottom most compression options. - * - * See {@link #setBottommostCompressionOptions(CompressionOptions)}. - * - * @return the bottom most compression options. - */ - CompressionOptions bottommostCompressionOptions(); - - /** - * Set the different options for compression algorithms - * - * @param compressionOptions The compression options - * - * @return the reference of the current options. - */ - T setCompressionOptions( - CompressionOptions compressionOptions); - - /** - * Get the different options for compression algorithms - * - * @return The compression options - */ - CompressionOptions compressionOptions(); - - /** - * If non-nullptr, use the specified factory for a function to determine the - * partitioning of sst files. This helps compaction to split the files - * on interesting boundaries (key prefixes) to make propagation of sst - * files less write amplifying (covering the whole key space). - * - * Default: nullptr - * - * @param factory The factory reference - * @return the reference of the current options. - */ - @Experimental("Caution: this option is experimental") - T setSstPartitionerFactory(SstPartitionerFactory factory); - - /** - * Get SST partitioner factory - * - * @return SST partitioner factory - */ - @Experimental("Caution: this option is experimental") - SstPartitionerFactory sstPartitionerFactory(); - - /** - * Compaction concurrent thread limiter for the column family. - * If non-nullptr, use given concurrent thread limiter to control - * the max outstanding compaction tasks. Limiter can be shared with - * multiple column families across db instances. - * - * @param concurrentTaskLimiter The compaction thread limiter. - * @return the reference of the current options. - */ - T setCompactionThreadLimiter(ConcurrentTaskLimiter concurrentTaskLimiter); - - /** - * Get compaction thread limiter - * - * @return Compaction thread limiter - */ - ConcurrentTaskLimiter compactionThreadLimiter(); - - /** - * Default memtable memory budget used with the following methods: - * - *
    - *
  1. {@link #optimizeLevelStyleCompaction()}
  2. - *
  3. {@link #optimizeUniversalStyleCompaction()}
  4. - *
- */ - long DEFAULT_COMPACTION_MEMTABLE_MEMORY_BUDGET = 512 * 1024 * 1024; -} diff --git a/java/src/main/java/org/rocksdb/CompactRangeOptions.java b/java/src/main/java/org/rocksdb/CompactRangeOptions.java deleted file mode 100644 index cf5708601..000000000 --- a/java/src/main/java/org/rocksdb/CompactRangeOptions.java +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * CompactRangeOptions is used by CompactRange() call. In the documentation of the methods "the compaction" refers to - * any compaction that is using this CompactRangeOptions. - */ -public class CompactRangeOptions extends RocksObject { - - private final static byte VALUE_kSkip = 0; - private final static byte VALUE_kIfHaveCompactionFilter = 1; - private final static byte VALUE_kForce = 2; - private final static byte VALUE_kForceOptimized = 3; - - // For level based compaction, we can configure if we want to skip/force bottommost level - // compaction. The order of this enum MUST follow the C++ layer. See BottommostLevelCompaction in - // db/options.h - public enum BottommostLevelCompaction { - /** - * Skip bottommost level compaction - */ - kSkip(VALUE_kSkip), - /** - * Only compact bottommost level if there is a compaction filter. This is the default option - */ - kIfHaveCompactionFilter(VALUE_kIfHaveCompactionFilter), - /** - * Always compact bottommost level - */ - kForce(VALUE_kForce), - /** - * Always compact bottommost level but in bottommost level avoid - * double-compacting files created in the same compaction - */ - kForceOptimized(VALUE_kForceOptimized); - - private final byte value; - - BottommostLevelCompaction(final byte value) { - this.value = value; - } - - /** - *

Returns the byte value of the enumerations value.

- * - * @return byte representation - */ - public byte getValue() { - return value; - } - - /** - * Returns the BottommostLevelCompaction for the given C++ rocks enum value. - * @param bottommostLevelCompaction The value of the BottommostLevelCompaction - * @return BottommostLevelCompaction instance, or null if none matches - */ - public static BottommostLevelCompaction fromRocksId(final int bottommostLevelCompaction) { - switch (bottommostLevelCompaction) { - case VALUE_kSkip: return kSkip; - case VALUE_kIfHaveCompactionFilter: return kIfHaveCompactionFilter; - case VALUE_kForce: return kForce; - case VALUE_kForceOptimized: - return kForceOptimized; - default: return null; - } - } - } - - /** - * Construct CompactRangeOptions. - */ - public CompactRangeOptions() { - super(newCompactRangeOptions()); - } - - /** - * Returns whether the compaction is exclusive or other compactions may run concurrently at the same time. - * - * @return true if exclusive, false if concurrent - */ - public boolean exclusiveManualCompaction() { - return exclusiveManualCompaction(nativeHandle_); - } - - /** - * Sets whether the compaction is exclusive or other compaction are allowed run concurrently at the same time. - * - * @param exclusiveCompaction true if compaction should be exclusive - * @return This CompactRangeOptions - */ - public CompactRangeOptions setExclusiveManualCompaction(final boolean exclusiveCompaction) { - setExclusiveManualCompaction(nativeHandle_, exclusiveCompaction); - return this; - } - - /** - * Returns whether compacted files will be moved to the minimum level capable of holding the data or given level - * (specified non-negative target_level). - * @return true, if compacted files will be moved to the minimum level - */ - public boolean changeLevel() { - return changeLevel(nativeHandle_); - } - - /** - * Whether compacted files will be moved to the minimum level capable of holding the data or given level - * (specified non-negative target_level). - * - * @param changeLevel If true, compacted files will be moved to the minimum level - * @return This CompactRangeOptions - */ - public CompactRangeOptions setChangeLevel(final boolean changeLevel) { - setChangeLevel(nativeHandle_, changeLevel); - return this; - } - - /** - * If change_level is true and target_level have non-negative value, compacted files will be moved to target_level. - * @return The target level for the compacted files - */ - public int targetLevel() { - return targetLevel(nativeHandle_); - } - - - /** - * If change_level is true and target_level have non-negative value, compacted files will be moved to target_level. - * - * @param targetLevel target level for the compacted files - * @return This CompactRangeOptions - */ - public CompactRangeOptions setTargetLevel(final int targetLevel) { - setTargetLevel(nativeHandle_, targetLevel); - return this; - } - - /** - * target_path_id for compaction output. Compaction outputs will be placed in options.db_paths[target_path_id]. - * - * @return target_path_id - */ - public int targetPathId() { - return targetPathId(nativeHandle_); - } - - /** - * Compaction outputs will be placed in options.db_paths[target_path_id]. Behavior is undefined if target_path_id is - * out of range. - * - * @param targetPathId target path id - * @return This CompactRangeOptions - */ - public CompactRangeOptions setTargetPathId(final int targetPathId) { - setTargetPathId(nativeHandle_, targetPathId); - return this; - } - - /** - * Returns the policy for compacting the bottommost level - * @return The BottommostLevelCompaction policy - */ - public BottommostLevelCompaction bottommostLevelCompaction() { - return BottommostLevelCompaction.fromRocksId(bottommostLevelCompaction(nativeHandle_)); - } - - /** - * Sets the policy for compacting the bottommost level - * - * @param bottommostLevelCompaction The policy for compacting the bottommost level - * @return This CompactRangeOptions - */ - public CompactRangeOptions setBottommostLevelCompaction(final BottommostLevelCompaction bottommostLevelCompaction) { - setBottommostLevelCompaction(nativeHandle_, bottommostLevelCompaction.getValue()); - return this; - } - - /** - * If true, compaction will execute immediately even if doing so would cause the DB to - * enter write stall mode. Otherwise, it'll sleep until load is low enough. - * @return true if compaction will execute immediately - */ - public boolean allowWriteStall() { - return allowWriteStall(nativeHandle_); - } - - - /** - * If true, compaction will execute immediately even if doing so would cause the DB to - * enter write stall mode. Otherwise, it'll sleep until load is low enough. - * - * @return This CompactRangeOptions - * @param allowWriteStall true if compaction should execute immediately - */ - public CompactRangeOptions setAllowWriteStall(final boolean allowWriteStall) { - setAllowWriteStall(nativeHandle_, allowWriteStall); - return this; - } - - /** - * If > 0, it will replace the option in the DBOptions for this compaction - * @return number of subcompactions - */ - public int maxSubcompactions() { - return maxSubcompactions(nativeHandle_); - } - - /** - * If > 0, it will replace the option in the DBOptions for this compaction - * - * @param maxSubcompactions number of subcompactions - * @return This CompactRangeOptions - */ - public CompactRangeOptions setMaxSubcompactions(final int maxSubcompactions) { - setMaxSubcompactions(nativeHandle_, maxSubcompactions); - return this; - } - - private native static long newCompactRangeOptions(); - @Override protected final native void disposeInternal(final long handle); - - private native boolean exclusiveManualCompaction(final long handle); - private native void setExclusiveManualCompaction(final long handle, - final boolean exclusive_manual_compaction); - private native boolean changeLevel(final long handle); - private native void setChangeLevel(final long handle, - final boolean changeLevel); - private native int targetLevel(final long handle); - private native void setTargetLevel(final long handle, - final int targetLevel); - private native int targetPathId(final long handle); - private native void setTargetPathId(final long handle, - final int targetPathId); - private native int bottommostLevelCompaction(final long handle); - private native void setBottommostLevelCompaction(final long handle, - final int bottommostLevelCompaction); - private native boolean allowWriteStall(final long handle); - private native void setAllowWriteStall(final long handle, - final boolean allowWriteStall); - private native void setMaxSubcompactions(final long handle, - final int maxSubcompactions); - private native int maxSubcompactions(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/CompactionJobInfo.java b/java/src/main/java/org/rocksdb/CompactionJobInfo.java deleted file mode 100644 index 4e3b8d68b..000000000 --- a/java/src/main/java/org/rocksdb/CompactionJobInfo.java +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Arrays; -import java.util.List; -import java.util.Map; - -public class CompactionJobInfo extends RocksObject { - - public CompactionJobInfo() { - super(newCompactionJobInfo()); - } - - /** - * Private as called from JNI C++ - */ - private CompactionJobInfo(final long nativeHandle) { - super(nativeHandle); - // We do not own the native object! - disOwnNativeHandle(); - } - - /** - * Get the name of the column family where the compaction happened. - * - * @return the name of the column family - */ - public byte[] columnFamilyName() { - return columnFamilyName(nativeHandle_); - } - - /** - * Get the status indicating whether the compaction was successful or not. - * - * @return the status - */ - public Status status() { - return status(nativeHandle_); - } - - /** - * Get the id of the thread that completed this compaction job. - * - * @return the id of the thread - */ - public long threadId() { - return threadId(nativeHandle_); - } - - /** - * Get the job id, which is unique in the same thread. - * - * @return the id of the thread - */ - public int jobId() { - return jobId(nativeHandle_); - } - - /** - * Get the smallest input level of the compaction. - * - * @return the input level - */ - public int baseInputLevel() { - return baseInputLevel(nativeHandle_); - } - - /** - * Get the output level of the compaction. - * - * @return the output level - */ - public int outputLevel() { - return outputLevel(nativeHandle_); - } - - /** - * Get the names of the compaction input files. - * - * @return the names of the input files. - */ - public List inputFiles() { - return Arrays.asList(inputFiles(nativeHandle_)); - } - - /** - * Get the names of the compaction output files. - * - * @return the names of the output files. - */ - public List outputFiles() { - return Arrays.asList(outputFiles(nativeHandle_)); - } - - /** - * Get the table properties for the input and output tables. - * - * The map is keyed by values from {@link #inputFiles()} and - * {@link #outputFiles()}. - * - * @return the table properties - */ - public Map tableProperties() { - return tableProperties(nativeHandle_); - } - - /** - * Get the Reason for running the compaction. - * - * @return the reason. - */ - public CompactionReason compactionReason() { - return CompactionReason.fromValue(compactionReason(nativeHandle_)); - } - - // - /** - * Get the compression algorithm used for output files. - * - * @return the compression algorithm - */ - public CompressionType compression() { - return CompressionType.getCompressionType(compression(nativeHandle_)); - } - - /** - * Get detailed information about this compaction. - * - * @return the detailed information, or null if not available. - */ - public /* @Nullable */ CompactionJobStats stats() { - final long statsHandle = stats(nativeHandle_); - if (statsHandle == 0) { - return null; - } - - return new CompactionJobStats(statsHandle); - } - - - private static native long newCompactionJobInfo(); - @Override protected native void disposeInternal(final long handle); - - private static native byte[] columnFamilyName(final long handle); - private static native Status status(final long handle); - private static native long threadId(final long handle); - private static native int jobId(final long handle); - private static native int baseInputLevel(final long handle); - private static native int outputLevel(final long handle); - private static native String[] inputFiles(final long handle); - private static native String[] outputFiles(final long handle); - private static native Map tableProperties( - final long handle); - private static native byte compactionReason(final long handle); - private static native byte compression(final long handle); - private static native long stats(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/CompactionJobStats.java b/java/src/main/java/org/rocksdb/CompactionJobStats.java deleted file mode 100644 index 3d53b5565..000000000 --- a/java/src/main/java/org/rocksdb/CompactionJobStats.java +++ /dev/null @@ -1,295 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public class CompactionJobStats extends RocksObject { - - public CompactionJobStats() { - super(newCompactionJobStats()); - } - - /** - * Private as called from JNI C++ - */ - CompactionJobStats(final long nativeHandle) { - super(nativeHandle); - } - - /** - * Reset the stats. - */ - public void reset() { - reset(nativeHandle_); - } - - /** - * Aggregate the CompactionJobStats from another instance with this one. - * - * @param compactionJobStats another instance of stats. - */ - public void add(final CompactionJobStats compactionJobStats) { - add(nativeHandle_, compactionJobStats.nativeHandle_); - } - - /** - * Get the elapsed time in micro of this compaction. - * - * @return the elapsed time in micro of this compaction. - */ - public long elapsedMicros() { - return elapsedMicros(nativeHandle_); - } - - /** - * Get the number of compaction input records. - * - * @return the number of compaction input records. - */ - public long numInputRecords() { - return numInputRecords(nativeHandle_); - } - - /** - * Get the number of compaction input files. - * - * @return the number of compaction input files. - */ - public long numInputFiles() { - return numInputFiles(nativeHandle_); - } - - /** - * Get the number of compaction input files at the output level. - * - * @return the number of compaction input files at the output level. - */ - public long numInputFilesAtOutputLevel() { - return numInputFilesAtOutputLevel(nativeHandle_); - } - - /** - * Get the number of compaction output records. - * - * @return the number of compaction output records. - */ - public long numOutputRecords() { - return numOutputRecords(nativeHandle_); - } - - /** - * Get the number of compaction output files. - * - * @return the number of compaction output files. - */ - public long numOutputFiles() { - return numOutputFiles(nativeHandle_); - } - - /** - * Determine if the compaction is a manual compaction. - * - * @return true if the compaction is a manual compaction, false otherwise. - */ - public boolean isManualCompaction() { - return isManualCompaction(nativeHandle_); - } - - /** - * Get the size of the compaction input in bytes. - * - * @return the size of the compaction input in bytes. - */ - public long totalInputBytes() { - return totalInputBytes(nativeHandle_); - } - - /** - * Get the size of the compaction output in bytes. - * - * @return the size of the compaction output in bytes. - */ - public long totalOutputBytes() { - return totalOutputBytes(nativeHandle_); - } - - /** - * Get the number of records being replaced by newer record associated - * with same key. - * - * This could be a new value or a deletion entry for that key so this field - * sums up all updated and deleted keys. - * - * @return the number of records being replaced by newer record associated - * with same key. - */ - public long numRecordsReplaced() { - return numRecordsReplaced(nativeHandle_); - } - - /** - * Get the sum of the uncompressed input keys in bytes. - * - * @return the sum of the uncompressed input keys in bytes. - */ - public long totalInputRawKeyBytes() { - return totalInputRawKeyBytes(nativeHandle_); - } - - /** - * Get the sum of the uncompressed input values in bytes. - * - * @return the sum of the uncompressed input values in bytes. - */ - public long totalInputRawValueBytes() { - return totalInputRawValueBytes(nativeHandle_); - } - - /** - * Get the number of deletion entries before compaction. - * - * Deletion entries can disappear after compaction because they expired. - * - * @return the number of deletion entries before compaction. - */ - public long numInputDeletionRecords() { - return numInputDeletionRecords(nativeHandle_); - } - - /** - * Get the number of deletion records that were found obsolete and discarded - * because it is not possible to delete any more keys with this entry. - * (i.e. all possible deletions resulting from it have been completed) - * - * @return the number of deletion records that were found obsolete and - * discarded. - */ - public long numExpiredDeletionRecords() { - return numExpiredDeletionRecords(nativeHandle_); - } - - /** - * Get the number of corrupt keys (ParseInternalKey returned false when - * applied to the key) encountered and written out. - * - * @return the number of corrupt keys. - */ - public long numCorruptKeys() { - return numCorruptKeys(nativeHandle_); - } - - /** - * Get the Time spent on file's Append() call. - * - * Only populated if {@link ColumnFamilyOptions#reportBgIoStats()} is set. - * - * @return the Time spent on file's Append() call. - */ - public long fileWriteNanos() { - return fileWriteNanos(nativeHandle_); - } - - /** - * Get the Time spent on sync file range. - * - * Only populated if {@link ColumnFamilyOptions#reportBgIoStats()} is set. - * - * @return the Time spent on sync file range. - */ - public long fileRangeSyncNanos() { - return fileRangeSyncNanos(nativeHandle_); - } - - /** - * Get the Time spent on file fsync. - * - * Only populated if {@link ColumnFamilyOptions#reportBgIoStats()} is set. - * - * @return the Time spent on file fsync. - */ - public long fileFsyncNanos() { - return fileFsyncNanos(nativeHandle_); - } - - /** - * Get the Time spent on preparing file write (falocate, etc) - * - * Only populated if {@link ColumnFamilyOptions#reportBgIoStats()} is set. - * - * @return the Time spent on preparing file write (falocate, etc). - */ - public long filePrepareWriteNanos() { - return filePrepareWriteNanos(nativeHandle_); - } - - /** - * Get the smallest output key prefix. - * - * @return the smallest output key prefix. - */ - public byte[] smallestOutputKeyPrefix() { - return smallestOutputKeyPrefix(nativeHandle_); - } - - /** - * Get the largest output key prefix. - * - * @return the smallest output key prefix. - */ - public byte[] largestOutputKeyPrefix() { - return largestOutputKeyPrefix(nativeHandle_); - } - - /** - * Get the number of single-deletes which do not meet a put. - * - * @return number of single-deletes which do not meet a put. - */ - @Experimental("Performance optimization for a very specific workload") - public long numSingleDelFallthru() { - return numSingleDelFallthru(nativeHandle_); - } - - /** - * Get the number of single-deletes which meet something other than a put. - * - * @return the number of single-deletes which meet something other than a put. - */ - @Experimental("Performance optimization for a very specific workload") - public long numSingleDelMismatch() { - return numSingleDelMismatch(nativeHandle_); - } - - private static native long newCompactionJobStats(); - @Override protected native void disposeInternal(final long handle); - - - private static native void reset(final long handle); - private static native void add(final long handle, - final long compactionJobStatsHandle); - private static native long elapsedMicros(final long handle); - private static native long numInputRecords(final long handle); - private static native long numInputFiles(final long handle); - private static native long numInputFilesAtOutputLevel(final long handle); - private static native long numOutputRecords(final long handle); - private static native long numOutputFiles(final long handle); - private static native boolean isManualCompaction(final long handle); - private static native long totalInputBytes(final long handle); - private static native long totalOutputBytes(final long handle); - private static native long numRecordsReplaced(final long handle); - private static native long totalInputRawKeyBytes(final long handle); - private static native long totalInputRawValueBytes(final long handle); - private static native long numInputDeletionRecords(final long handle); - private static native long numExpiredDeletionRecords(final long handle); - private static native long numCorruptKeys(final long handle); - private static native long fileWriteNanos(final long handle); - private static native long fileRangeSyncNanos(final long handle); - private static native long fileFsyncNanos(final long handle); - private static native long filePrepareWriteNanos(final long handle); - private static native byte[] smallestOutputKeyPrefix(final long handle); - private static native byte[] largestOutputKeyPrefix(final long handle); - private static native long numSingleDelFallthru(final long handle); - private static native long numSingleDelMismatch(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/CompactionOptions.java b/java/src/main/java/org/rocksdb/CompactionOptions.java deleted file mode 100644 index 2c7e391fb..000000000 --- a/java/src/main/java/org/rocksdb/CompactionOptions.java +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.List; - -/** - * CompactionOptions are used in - * {@link RocksDB#compactFiles(CompactionOptions, ColumnFamilyHandle, List, int, int, CompactionJobInfo)} - * calls. - */ -public class CompactionOptions extends RocksObject { - - public CompactionOptions() { - super(newCompactionOptions()); - } - - /** - * Get the compaction output compression type. - * - * See {@link #setCompression(CompressionType)}. - * - * @return the compression type. - */ - public CompressionType compression() { - return CompressionType.getCompressionType( - compression(nativeHandle_)); - } - - /** - * Set the compaction output compression type. - * - * Default: snappy - * - * If set to {@link CompressionType#DISABLE_COMPRESSION_OPTION}, - * RocksDB will choose compression type according to the - * {@link ColumnFamilyOptions#compressionType()}, taking into account - * the output level if {@link ColumnFamilyOptions#compressionPerLevel()} - * is specified. - * - * @param compression the compression type to use for compaction output. - * - * @return the instance of the current Options. - */ - public CompactionOptions setCompression(final CompressionType compression) { - setCompression(nativeHandle_, compression.getValue()); - return this; - } - - /** - * Get the compaction output file size limit. - * - * See {@link #setOutputFileSizeLimit(long)}. - * - * @return the file size limit. - */ - public long outputFileSizeLimit() { - return outputFileSizeLimit(nativeHandle_); - } - - /** - * Compaction will create files of size {@link #outputFileSizeLimit()}. - * - * Default: 2^64-1, which means that compaction will create a single file - * - * @param outputFileSizeLimit the size limit - * - * @return the instance of the current Options. - */ - public CompactionOptions setOutputFileSizeLimit( - final long outputFileSizeLimit) { - setOutputFileSizeLimit(nativeHandle_, outputFileSizeLimit); - return this; - } - - /** - * Get the maximum number of threads that will concurrently perform a - * compaction job. - * - * @return the maximum number of threads. - */ - public int maxSubcompactions() { - return maxSubcompactions(nativeHandle_); - } - - /** - * This value represents the maximum number of threads that will - * concurrently perform a compaction job by breaking it into multiple, - * smaller ones that are run simultaneously. - * - * Default: 0 (i.e. no subcompactions) - * - * If > 0, it will replace the option in - * {@link DBOptions#maxSubcompactions()} for this compaction. - * - * @param maxSubcompactions The maximum number of threads that will - * concurrently perform a compaction job - * - * @return the instance of the current Options. - */ - public CompactionOptions setMaxSubcompactions(final int maxSubcompactions) { - setMaxSubcompactions(nativeHandle_, maxSubcompactions); - return this; - } - - private static native long newCompactionOptions(); - @Override protected final native void disposeInternal(final long handle); - - private static native byte compression(final long handle); - private static native void setCompression(final long handle, - final byte compressionTypeValue); - private static native long outputFileSizeLimit(final long handle); - private static native void setOutputFileSizeLimit(final long handle, - final long outputFileSizeLimit); - private static native int maxSubcompactions(final long handle); - private static native void setMaxSubcompactions(final long handle, - final int maxSubcompactions); -} diff --git a/java/src/main/java/org/rocksdb/CompactionOptionsFIFO.java b/java/src/main/java/org/rocksdb/CompactionOptionsFIFO.java deleted file mode 100644 index 4c8d6545c..000000000 --- a/java/src/main/java/org/rocksdb/CompactionOptionsFIFO.java +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Options for FIFO Compaction - */ -public class CompactionOptionsFIFO extends RocksObject { - - public CompactionOptionsFIFO() { - super(newCompactionOptionsFIFO()); - } - - /** - * Once the total sum of table files reaches this, we will delete the oldest - * table file - * - * Default: 1GB - * - * @param maxTableFilesSize The maximum size of the table files - * - * @return the reference to the current options. - */ - public CompactionOptionsFIFO setMaxTableFilesSize( - final long maxTableFilesSize) { - setMaxTableFilesSize(nativeHandle_, maxTableFilesSize); - return this; - } - - /** - * Once the total sum of table files reaches this, we will delete the oldest - * table file - * - * Default: 1GB - * - * @return max table file size in bytes - */ - public long maxTableFilesSize() { - return maxTableFilesSize(nativeHandle_); - } - - /** - * If true, try to do compaction to compact smaller files into larger ones. - * Minimum files to compact follows options.level0_file_num_compaction_trigger - * and compaction won't trigger if average compact bytes per del file is - * larger than options.write_buffer_size. This is to protect large files - * from being compacted again. - * - * Default: false - * - * @param allowCompaction true to allow intra-L0 compaction - * - * @return the reference to the current options. - */ - public CompactionOptionsFIFO setAllowCompaction( - final boolean allowCompaction) { - setAllowCompaction(nativeHandle_, allowCompaction); - return this; - } - - - /** - * Check if intra-L0 compaction is enabled. - * When enabled, we try to compact smaller files into larger ones. - * - * See {@link #setAllowCompaction(boolean)}. - * - * Default: false - * - * @return true if intra-L0 compaction is enabled, false otherwise. - */ - public boolean allowCompaction() { - return allowCompaction(nativeHandle_); - } - - - private native static long newCompactionOptionsFIFO(); - @Override protected final native void disposeInternal(final long handle); - - private native void setMaxTableFilesSize(final long handle, - final long maxTableFilesSize); - private native long maxTableFilesSize(final long handle); - private native void setAllowCompaction(final long handle, - final boolean allowCompaction); - private native boolean allowCompaction(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/CompactionOptionsUniversal.java b/java/src/main/java/org/rocksdb/CompactionOptionsUniversal.java deleted file mode 100644 index d2dfa4eef..000000000 --- a/java/src/main/java/org/rocksdb/CompactionOptionsUniversal.java +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Options for Universal Compaction - */ -public class CompactionOptionsUniversal extends RocksObject { - - public CompactionOptionsUniversal() { - super(newCompactionOptionsUniversal()); - } - - /** - * Percentage flexibility while comparing file size. If the candidate file(s) - * size is 1% smaller than the next file's size, then include next file into - * this candidate set. - * - * Default: 1 - * - * @param sizeRatio The size ratio to use - * - * @return the reference to the current options. - */ - public CompactionOptionsUniversal setSizeRatio(final int sizeRatio) { - setSizeRatio(nativeHandle_, sizeRatio); - return this; - } - - /** - * Percentage flexibility while comparing file size. If the candidate file(s) - * size is 1% smaller than the next file's size, then include next file into - * this candidate set. - * - * Default: 1 - * - * @return The size ratio in use - */ - public int sizeRatio() { - return sizeRatio(nativeHandle_); - } - - /** - * The minimum number of files in a single compaction run. - * - * Default: 2 - * - * @param minMergeWidth minimum number of files in a single compaction run - * - * @return the reference to the current options. - */ - public CompactionOptionsUniversal setMinMergeWidth(final int minMergeWidth) { - setMinMergeWidth(nativeHandle_, minMergeWidth); - return this; - } - - /** - * The minimum number of files in a single compaction run. - * - * Default: 2 - * - * @return minimum number of files in a single compaction run - */ - public int minMergeWidth() { - return minMergeWidth(nativeHandle_); - } - - /** - * The maximum number of files in a single compaction run. - * - * Default: {@link Long#MAX_VALUE} - * - * @param maxMergeWidth maximum number of files in a single compaction run - * - * @return the reference to the current options. - */ - public CompactionOptionsUniversal setMaxMergeWidth(final int maxMergeWidth) { - setMaxMergeWidth(nativeHandle_, maxMergeWidth); - return this; - } - - /** - * The maximum number of files in a single compaction run. - * - * Default: {@link Long#MAX_VALUE} - * - * @return maximum number of files in a single compaction run - */ - public int maxMergeWidth() { - return maxMergeWidth(nativeHandle_); - } - - /** - * The size amplification is defined as the amount (in percentage) of - * additional storage needed to store a single byte of data in the database. - * For example, a size amplification of 2% means that a database that - * contains 100 bytes of user-data may occupy upto 102 bytes of - * physical storage. By this definition, a fully compacted database has - * a size amplification of 0%. Rocksdb uses the following heuristic - * to calculate size amplification: it assumes that all files excluding - * the earliest file contribute to the size amplification. - * - * Default: 200, which means that a 100 byte database could require upto - * 300 bytes of storage. - * - * @param maxSizeAmplificationPercent the amount of additional storage needed - * (as a percentage) to store a single byte in the database - * - * @return the reference to the current options. - */ - public CompactionOptionsUniversal setMaxSizeAmplificationPercent( - final int maxSizeAmplificationPercent) { - setMaxSizeAmplificationPercent(nativeHandle_, maxSizeAmplificationPercent); - return this; - } - - /** - * The size amplification is defined as the amount (in percentage) of - * additional storage needed to store a single byte of data in the database. - * For example, a size amplification of 2% means that a database that - * contains 100 bytes of user-data may occupy upto 102 bytes of - * physical storage. By this definition, a fully compacted database has - * a size amplification of 0%. Rocksdb uses the following heuristic - * to calculate size amplification: it assumes that all files excluding - * the earliest file contribute to the size amplification. - * - * Default: 200, which means that a 100 byte database could require upto - * 300 bytes of storage. - * - * @return the amount of additional storage needed (as a percentage) to store - * a single byte in the database - */ - public int maxSizeAmplificationPercent() { - return maxSizeAmplificationPercent(nativeHandle_); - } - - /** - * If this option is set to be -1 (the default value), all the output files - * will follow compression type specified. - * - * If this option is not negative, we will try to make sure compressed - * size is just above this value. In normal cases, at least this percentage - * of data will be compressed. - * - * When we are compacting to a new file, here is the criteria whether - * it needs to be compressed: assuming here are the list of files sorted - * by generation time: - * A1...An B1...Bm C1...Ct - * where A1 is the newest and Ct is the oldest, and we are going to compact - * B1...Bm, we calculate the total size of all the files as total_size, as - * well as the total size of C1...Ct as total_C, the compaction output file - * will be compressed iff - * total_C / total_size < this percentage - * - * Default: -1 - * - * @param compressionSizePercent percentage of size for compression - * - * @return the reference to the current options. - */ - public CompactionOptionsUniversal setCompressionSizePercent( - final int compressionSizePercent) { - setCompressionSizePercent(nativeHandle_, compressionSizePercent); - return this; - } - - /** - * If this option is set to be -1 (the default value), all the output files - * will follow compression type specified. - * - * If this option is not negative, we will try to make sure compressed - * size is just above this value. In normal cases, at least this percentage - * of data will be compressed. - * - * When we are compacting to a new file, here is the criteria whether - * it needs to be compressed: assuming here are the list of files sorted - * by generation time: - * A1...An B1...Bm C1...Ct - * where A1 is the newest and Ct is the oldest, and we are going to compact - * B1...Bm, we calculate the total size of all the files as total_size, as - * well as the total size of C1...Ct as total_C, the compaction output file - * will be compressed iff - * total_C / total_size < this percentage - * - * Default: -1 - * - * @return percentage of size for compression - */ - public int compressionSizePercent() { - return compressionSizePercent(nativeHandle_); - } - - /** - * The algorithm used to stop picking files into a single compaction run - * - * Default: {@link CompactionStopStyle#CompactionStopStyleTotalSize} - * - * @param compactionStopStyle The compaction algorithm - * - * @return the reference to the current options. - */ - public CompactionOptionsUniversal setStopStyle( - final CompactionStopStyle compactionStopStyle) { - setStopStyle(nativeHandle_, compactionStopStyle.getValue()); - return this; - } - - /** - * The algorithm used to stop picking files into a single compaction run - * - * Default: {@link CompactionStopStyle#CompactionStopStyleTotalSize} - * - * @return The compaction algorithm - */ - public CompactionStopStyle stopStyle() { - return CompactionStopStyle.getCompactionStopStyle(stopStyle(nativeHandle_)); - } - - /** - * Option to optimize the universal multi level compaction by enabling - * trivial move for non overlapping files. - * - * Default: false - * - * @param allowTrivialMove true if trivial move is allowed - * - * @return the reference to the current options. - */ - public CompactionOptionsUniversal setAllowTrivialMove( - final boolean allowTrivialMove) { - setAllowTrivialMove(nativeHandle_, allowTrivialMove); - return this; - } - - /** - * Option to optimize the universal multi level compaction by enabling - * trivial move for non overlapping files. - * - * Default: false - * - * @return true if trivial move is allowed - */ - public boolean allowTrivialMove() { - return allowTrivialMove(nativeHandle_); - } - - private native static long newCompactionOptionsUniversal(); - @Override protected final native void disposeInternal(final long handle); - - private native void setSizeRatio(final long handle, final int sizeRatio); - private native int sizeRatio(final long handle); - private native void setMinMergeWidth( - final long handle, final int minMergeWidth); - private native int minMergeWidth(final long handle); - private native void setMaxMergeWidth( - final long handle, final int maxMergeWidth); - private native int maxMergeWidth(final long handle); - private native void setMaxSizeAmplificationPercent( - final long handle, final int maxSizeAmplificationPercent); - private native int maxSizeAmplificationPercent(final long handle); - private native void setCompressionSizePercent( - final long handle, final int compressionSizePercent); - private native int compressionSizePercent(final long handle); - private native void setStopStyle( - final long handle, final byte stopStyle); - private native byte stopStyle(final long handle); - private native void setAllowTrivialMove( - final long handle, final boolean allowTrivialMove); - private native boolean allowTrivialMove(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/CompactionPriority.java b/java/src/main/java/org/rocksdb/CompactionPriority.java deleted file mode 100644 index eda05942e..000000000 --- a/java/src/main/java/org/rocksdb/CompactionPriority.java +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Compaction Priorities - */ -public enum CompactionPriority { - - /** - * Slightly Prioritize larger files by size compensated by #deletes - */ - ByCompensatedSize((byte)0x0), - - /** - * First compact files whose data's latest update time is oldest. - * Try this if you only update some hot keys in small ranges. - */ - OldestLargestSeqFirst((byte)0x1), - - /** - * First compact files whose range hasn't been compacted to the next level - * for the longest. If your updates are random across the key space, - * write amplification is slightly better with this option. - */ - OldestSmallestSeqFirst((byte)0x2), - - /** - * First compact files whose ratio between overlapping size in next level - * and its size is the smallest. It in many cases can optimize write - * amplification. - */ - MinOverlappingRatio((byte)0x3), - - /** - * Keeps a cursor(s) of the successor of the file (key range) was/were - * compacted before, and always picks the next files (key range) in that - * level. The file picking process will cycle through all the files in a - * round-robin manner. - */ - RoundRobin((byte)0x4); - - - private final byte value; - - CompactionPriority(final byte value) { - this.value = value; - } - - /** - * Returns the byte value of the enumerations value - * - * @return byte representation - */ - public byte getValue() { - return value; - } - - /** - * Get CompactionPriority by byte value. - * - * @param value byte representation of CompactionPriority. - * - * @return {@link org.rocksdb.CompactionPriority} instance or null. - * @throws java.lang.IllegalArgumentException if an invalid - * value is provided. - */ - public static CompactionPriority getCompactionPriority(final byte value) { - for (final CompactionPriority compactionPriority : - CompactionPriority.values()) { - if (compactionPriority.getValue() == value){ - return compactionPriority; - } - } - throw new IllegalArgumentException( - "Illegal value provided for CompactionPriority."); - } -} diff --git a/java/src/main/java/org/rocksdb/CompactionReason.java b/java/src/main/java/org/rocksdb/CompactionReason.java deleted file mode 100644 index 46ec33f3f..000000000 --- a/java/src/main/java/org/rocksdb/CompactionReason.java +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public enum CompactionReason { - kUnknown((byte)0x0), - - /** - * [Level] number of L0 files > level0_file_num_compaction_trigger - */ - kLevelL0FilesNum((byte)0x1), - - /** - * [Level] total size of level > MaxBytesForLevel() - */ - kLevelMaxLevelSize((byte)0x2), - - /** - * [Universal] Compacting for size amplification - */ - kUniversalSizeAmplification((byte)0x3), - - /** - * [Universal] Compacting for size ratio - */ - kUniversalSizeRatio((byte)0x4), - - /** - * [Universal] number of sorted runs > level0_file_num_compaction_trigger - */ - kUniversalSortedRunNum((byte)0x5), - - /** - * [FIFO] total size > max_table_files_size - */ - kFIFOMaxSize((byte)0x6), - - /** - * [FIFO] reduce number of files. - */ - kFIFOReduceNumFiles((byte)0x7), - - /** - * [FIFO] files with creation time < (current_time - interval) - */ - kFIFOTtl((byte)0x8), - - /** - * Manual compaction - */ - kManualCompaction((byte)0x9), - - /** - * DB::SuggestCompactRange() marked files for compaction - */ - kFilesMarkedForCompaction((byte)0x10), - - /** - * [Level] Automatic compaction within bottommost level to cleanup duplicate - * versions of same user key, usually due to a released snapshot. - */ - kBottommostFiles((byte)0x0A), - - /** - * Compaction based on TTL - */ - kTtl((byte)0x0B), - - /** - * According to the comments in flush_job.cc, RocksDB treats flush as - * a level 0 compaction in internal stats. - */ - kFlush((byte)0x0C), - - /** - * Compaction caused by external sst file ingestion - */ - kExternalSstIngestion((byte) 0x0D), - - /** - * Compaction due to SST file being too old - */ - kPeriodicCompaction((byte) 0x0E), - - /** - * Compaction in order to move files to temperature - */ - kChangeTemperature((byte) 0x0F), - - /** - * Compaction scheduled to force garbage collection of blob files - */ - kForcedBlobGC((byte) 0x11), - - /** - * A special TTL compaction for RoundRobin policy, which basically the same as - * kLevelMaxLevelSize, but the goal is to compact TTLed files. - */ - kRoundRobinTtl((byte) 0x12), - - /** - * Compaction by calling DBImpl::ReFitLevel - */ - kRefitLevel((byte) 0x13); - - private final byte value; - - CompactionReason(final byte value) { - this.value = value; - } - - /** - * Get the internal representation value. - * - * @return the internal representation value - */ - byte getValue() { - return value; - } - - /** - * Get the CompactionReason from the internal representation value. - * - * @return the compaction reason. - * - * @throws IllegalArgumentException if the value is unknown. - */ - static CompactionReason fromValue(final byte value) { - for (final CompactionReason compactionReason : CompactionReason.values()) { - if(compactionReason.value == value) { - return compactionReason; - } - } - - throw new IllegalArgumentException( - "Illegal value provided for CompactionReason: " + value); - } -} diff --git a/java/src/main/java/org/rocksdb/CompactionStopStyle.java b/java/src/main/java/org/rocksdb/CompactionStopStyle.java deleted file mode 100644 index f6e63209c..000000000 --- a/java/src/main/java/org/rocksdb/CompactionStopStyle.java +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -/** - * Algorithm used to make a compaction request stop picking new files - * into a single compaction run - */ -public enum CompactionStopStyle { - - /** - * Pick files of similar size - */ - CompactionStopStyleSimilarSize((byte)0x0), - - /** - * Total size of picked files > next file - */ - CompactionStopStyleTotalSize((byte)0x1); - - - private final byte value; - - CompactionStopStyle(final byte value) { - this.value = value; - } - - /** - * Returns the byte value of the enumerations value - * - * @return byte representation - */ - public byte getValue() { - return value; - } - - /** - * Get CompactionStopStyle by byte value. - * - * @param value byte representation of CompactionStopStyle. - * - * @return {@link org.rocksdb.CompactionStopStyle} instance or null. - * @throws java.lang.IllegalArgumentException if an invalid - * value is provided. - */ - public static CompactionStopStyle getCompactionStopStyle(final byte value) { - for (final CompactionStopStyle compactionStopStyle : - CompactionStopStyle.values()) { - if (compactionStopStyle.getValue() == value){ - return compactionStopStyle; - } - } - throw new IllegalArgumentException( - "Illegal value provided for CompactionStopStyle."); - } -} diff --git a/java/src/main/java/org/rocksdb/CompactionStyle.java b/java/src/main/java/org/rocksdb/CompactionStyle.java deleted file mode 100644 index b24bbf850..000000000 --- a/java/src/main/java/org/rocksdb/CompactionStyle.java +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.List; - -/** - * Enum CompactionStyle - * - * RocksDB supports different styles of compaction. Available - * compaction styles can be chosen using this enumeration. - * - *
    - *
  1. LEVEL - Level based Compaction style
  2. - *
  3. UNIVERSAL - Universal Compaction Style is a - * compaction style, targeting the use cases requiring lower write - * amplification, trading off read amplification and space - * amplification.
  4. - *
  5. FIFO - FIFO compaction style is the simplest - * compaction strategy. It is suited for keeping event log data with - * very low overhead (query log for example). It periodically deletes - * the old data, so it's basically a TTL compaction style.
  6. - *
  7. NONE - Disable background compaction. - * Compaction jobs are submitted - * {@link RocksDB#compactFiles(CompactionOptions, ColumnFamilyHandle, List, int, int, CompactionJobInfo)} ()}.
  8. - *
- * - * @see - * Universal Compaction - * @see - * FIFO Compaction - */ -public enum CompactionStyle { - LEVEL((byte) 0x0), - UNIVERSAL((byte) 0x1), - FIFO((byte) 0x2), - NONE((byte) 0x3); - - private final byte value; - - CompactionStyle(final byte value) { - this.value = value; - } - - /** - * Get the internal representation value. - * - * @return the internal representation value. - */ - //TODO(AR) should be made package-private - public byte getValue() { - return value; - } - - /** - * Get the Compaction style from the internal representation value. - * - * @param value the internal representation value. - * - * @return the Compaction style - * - * @throws IllegalArgumentException if the value does not match a - * CompactionStyle - */ - static CompactionStyle fromValue(final byte value) - throws IllegalArgumentException { - for (final CompactionStyle compactionStyle : CompactionStyle.values()) { - if (compactionStyle.value == value) { - return compactionStyle; - } - } - throw new IllegalArgumentException("Unknown value for CompactionStyle: " - + value); - } -} diff --git a/java/src/main/java/org/rocksdb/ComparatorOptions.java b/java/src/main/java/org/rocksdb/ComparatorOptions.java deleted file mode 100644 index 8c3162858..000000000 --- a/java/src/main/java/org/rocksdb/ComparatorOptions.java +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -/** - * This class controls the behaviour - * of Java implementations of - * AbstractComparator - * - * Note that dispose() must be called before a ComparatorOptions - * instance becomes out-of-scope to release the allocated memory in C++. - */ -public class ComparatorOptions extends RocksObject { - public ComparatorOptions() { - super(newComparatorOptions()); - } - - /** - * Get the synchronisation type used to guard the reused buffers. - * Only used if {@link #maxReusedBufferSize()} > 0 - * Default: {@link ReusedSynchronisationType#ADAPTIVE_MUTEX} - * - * @return the synchronisation type - */ - public ReusedSynchronisationType reusedSynchronisationType() { - assert(isOwningHandle()); - return ReusedSynchronisationType.getReusedSynchronisationType( - reusedSynchronisationType(nativeHandle_)); - } - - /** - * Set the synchronisation type used to guard the reused buffers. - * Only used if {@link #maxReusedBufferSize()} > 0 - * Default: {@link ReusedSynchronisationType#ADAPTIVE_MUTEX} - * - * @param reusedSynchronisationType the synchronisation type - * - * @return the reference to the current comparator options. - */ - public ComparatorOptions setReusedSynchronisationType( - final ReusedSynchronisationType reusedSynchronisationType) { - assert (isOwningHandle()); - setReusedSynchronisationType(nativeHandle_, - reusedSynchronisationType.getValue()); - return this; - } - - /** - * Indicates if a direct byte buffer (i.e. outside of the normal - * garbage-collected heap) is used, as opposed to a non-direct byte buffer - * which is a wrapper around an on-heap byte[]. - * - * Default: true - * - * @return true if a direct byte buffer will be used, false otherwise - */ - public boolean useDirectBuffer() { - assert(isOwningHandle()); - return useDirectBuffer(nativeHandle_); - } - - /** - * Controls whether a direct byte buffer (i.e. outside of the normal - * garbage-collected heap) is used, as opposed to a non-direct byte buffer - * which is a wrapper around an on-heap byte[]. - * - * Default: true - * - * @param useDirectBuffer true if a direct byte buffer should be used, - * false otherwise - * @return the reference to the current comparator options. - */ - public ComparatorOptions setUseDirectBuffer(final boolean useDirectBuffer) { - assert(isOwningHandle()); - setUseDirectBuffer(nativeHandle_, useDirectBuffer); - return this; - } - - /** - * Maximum size of a buffer (in bytes) that will be reused. - * Comparators will use 5 of these buffers, - * so the retained memory size will be 5 * max_reused_buffer_size. - * When a buffer is needed for transferring data to a callback, - * if it requires less than {@code maxReuseBufferSize}, then an - * existing buffer will be reused, else a new buffer will be - * allocated just for that callback. - * - * Default: 64 bytes - * - * @return the maximum size of a buffer which is reused, - * or 0 if reuse is disabled - */ - public int maxReusedBufferSize() { - assert(isOwningHandle()); - return maxReusedBufferSize(nativeHandle_); - } - - /** - * Sets the maximum size of a buffer (in bytes) that will be reused. - * Comparators will use 5 of these buffers, - * so the retained memory size will be 5 * max_reused_buffer_size. - * When a buffer is needed for transferring data to a callback, - * if it requires less than {@code maxReuseBufferSize}, then an - * existing buffer will be reused, else a new buffer will be - * allocated just for that callback. - * - * Default: 64 bytes - * - * @param maxReusedBufferSize the maximum size for a buffer to reuse, or 0 to - * disable reuse - * - * @return the maximum size of a buffer which is reused - */ - public ComparatorOptions setMaxReusedBufferSize(final int maxReusedBufferSize) { - assert(isOwningHandle()); - setMaxReusedBufferSize(nativeHandle_, maxReusedBufferSize); - return this; - } - - private native static long newComparatorOptions(); - private native byte reusedSynchronisationType(final long handle); - private native void setReusedSynchronisationType(final long handle, - final byte reusedSynchronisationType); - private native boolean useDirectBuffer(final long handle); - private native void setUseDirectBuffer(final long handle, - final boolean useDirectBuffer); - private native int maxReusedBufferSize(final long handle); - private native void setMaxReusedBufferSize(final long handle, - final int maxReuseBufferSize); - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/ComparatorType.java b/java/src/main/java/org/rocksdb/ComparatorType.java deleted file mode 100644 index 199980b6e..000000000 --- a/java/src/main/java/org/rocksdb/ComparatorType.java +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -enum ComparatorType { - JAVA_COMPARATOR((byte)0x0), - JAVA_NATIVE_COMPARATOR_WRAPPER((byte)0x1); - - private final byte value; - - ComparatorType(final byte value) { - this.value = value; - } - - /** - *

Returns the byte value of the enumerations value.

- * - * @return byte representation - */ - byte getValue() { - return value; - } - - /** - *

Get the ComparatorType enumeration value by - * passing the byte identifier to this method.

- * - * @param byteIdentifier of ComparatorType. - * - * @return ComparatorType instance. - * - * @throws IllegalArgumentException if the comparator type for the byteIdentifier - * cannot be found - */ - static ComparatorType getComparatorType(final byte byteIdentifier) { - for (final ComparatorType comparatorType : ComparatorType.values()) { - if (comparatorType.getValue() == byteIdentifier) { - return comparatorType; - } - } - - throw new IllegalArgumentException( - "Illegal value provided for ComparatorType."); - } -} diff --git a/java/src/main/java/org/rocksdb/CompressionOptions.java b/java/src/main/java/org/rocksdb/CompressionOptions.java deleted file mode 100644 index a9072bbb9..000000000 --- a/java/src/main/java/org/rocksdb/CompressionOptions.java +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Options for Compression - */ -public class CompressionOptions extends RocksObject { - - public CompressionOptions() { - super(newCompressionOptions()); - } - - public CompressionOptions setWindowBits(final int windowBits) { - setWindowBits(nativeHandle_, windowBits); - return this; - } - - public int windowBits() { - return windowBits(nativeHandle_); - } - - public CompressionOptions setLevel(final int level) { - setLevel(nativeHandle_, level); - return this; - } - - public int level() { - return level(nativeHandle_); - } - - public CompressionOptions setStrategy(final int strategy) { - setStrategy(nativeHandle_, strategy); - return this; - } - - public int strategy() { - return strategy(nativeHandle_); - } - - /** - * Maximum size of dictionary used to prime the compression library. Currently - * this dictionary will be constructed by sampling the first output file in a - * subcompaction when the target level is bottommost. This dictionary will be - * loaded into the compression library before compressing/uncompressing each - * data block of subsequent files in the subcompaction. Effectively, this - * improves compression ratios when there are repetitions across data blocks. - * - * A value of 0 indicates the feature is disabled. - * - * Default: 0. - * - * @param maxDictBytes Maximum bytes to use for the dictionary - * - * @return the reference to the current options - */ - public CompressionOptions setMaxDictBytes(final int maxDictBytes) { - setMaxDictBytes(nativeHandle_, maxDictBytes); - return this; - } - - /** - * Maximum size of dictionary used to prime the compression library. - * - * @return The maximum bytes to use for the dictionary - */ - public int maxDictBytes() { - return maxDictBytes(nativeHandle_); - } - - /** - * Maximum size of training data passed to zstd's dictionary trainer. Using - * zstd's dictionary trainer can achieve even better compression ratio - * improvements than using {@link #setMaxDictBytes(int)} alone. - * - * The training data will be used to generate a dictionary - * of {@link #maxDictBytes()}. - * - * Default: 0. - * - * @param zstdMaxTrainBytes Maximum bytes to use for training ZStd. - * - * @return the reference to the current options - */ - public CompressionOptions setZStdMaxTrainBytes(final int zstdMaxTrainBytes) { - setZstdMaxTrainBytes(nativeHandle_, zstdMaxTrainBytes); - return this; - } - - /** - * Maximum size of training data passed to zstd's dictionary trainer. - * - * @return Maximum bytes to use for training ZStd - */ - public int zstdMaxTrainBytes() { - return zstdMaxTrainBytes(nativeHandle_); - } - - /** - * When the compression options are set by the user, it will be set to "true". - * For bottommost_compression_opts, to enable it, user must set enabled=true. - * Otherwise, bottommost compression will use compression_opts as default - * compression options. - * - * For compression_opts, if compression_opts.enabled=false, it is still - * used as compression options for compression process. - * - * Default: false. - * - * @param enabled true to use these compression options - * for the bottommost_compression_opts, false otherwise - * - * @return the reference to the current options - */ - public CompressionOptions setEnabled(final boolean enabled) { - setEnabled(nativeHandle_, enabled); - return this; - } - - /** - * Determine whether these compression options - * are used for the bottommost_compression_opts. - * - * @return true if these compression options are used - * for the bottommost_compression_opts, false otherwise - */ - public boolean enabled() { - return enabled(nativeHandle_); - } - - - private native static long newCompressionOptions(); - @Override protected final native void disposeInternal(final long handle); - - private native void setWindowBits(final long handle, final int windowBits); - private native int windowBits(final long handle); - private native void setLevel(final long handle, final int level); - private native int level(final long handle); - private native void setStrategy(final long handle, final int strategy); - private native int strategy(final long handle); - private native void setMaxDictBytes(final long handle, final int maxDictBytes); - private native int maxDictBytes(final long handle); - private native void setZstdMaxTrainBytes(final long handle, - final int zstdMaxTrainBytes); - private native int zstdMaxTrainBytes(final long handle); - private native void setEnabled(final long handle, final boolean enabled); - private native boolean enabled(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/CompressionType.java b/java/src/main/java/org/rocksdb/CompressionType.java deleted file mode 100644 index d1d73d51a..000000000 --- a/java/src/main/java/org/rocksdb/CompressionType.java +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Enum CompressionType - * - *

DB contents are stored in a set of blocks, each of which holds a - * sequence of key,value pairs. Each block may be compressed before - * being stored in a file. The following enum describes which - * compression method (if any) is used to compress a block.

- */ -public enum CompressionType { - NO_COMPRESSION((byte) 0x0, null, "kNoCompression"), - SNAPPY_COMPRESSION((byte) 0x1, "snappy", "kSnappyCompression"), - ZLIB_COMPRESSION((byte) 0x2, "z", "kZlibCompression"), - BZLIB2_COMPRESSION((byte) 0x3, "bzip2", "kBZip2Compression"), - LZ4_COMPRESSION((byte) 0x4, "lz4", "kLZ4Compression"), - LZ4HC_COMPRESSION((byte) 0x5, "lz4hc", "kLZ4HCCompression"), - XPRESS_COMPRESSION((byte) 0x6, "xpress", "kXpressCompression"), - ZSTD_COMPRESSION((byte) 0x7, "zstd", "kZSTD"), - DISABLE_COMPRESSION_OPTION((byte) 0x7F, null, "kDisableCompressionOption"); - - /** - *

Get the CompressionType enumeration value by - * passing the library name to this method.

- * - *

If library cannot be found the enumeration - * value {@code NO_COMPRESSION} will be returned.

- * - * @param libraryName compression library name. - * - * @return CompressionType instance. - */ - public static CompressionType getCompressionType(String libraryName) { - if (libraryName != null) { - for (CompressionType compressionType : CompressionType.values()) { - if (compressionType.getLibraryName() != null && - compressionType.getLibraryName().equals(libraryName)) { - return compressionType; - } - } - } - return CompressionType.NO_COMPRESSION; - } - - /** - *

Get the CompressionType enumeration value by - * passing the byte identifier to this method.

- * - * @param byteIdentifier of CompressionType. - * - * @return CompressionType instance. - * - * @throws IllegalArgumentException If CompressionType cannot be found for the - * provided byteIdentifier - */ - public static CompressionType getCompressionType(byte byteIdentifier) { - for (final CompressionType compressionType : CompressionType.values()) { - if (compressionType.getValue() == byteIdentifier) { - return compressionType; - } - } - - throw new IllegalArgumentException( - "Illegal value provided for CompressionType."); - } - - /** - *

Get a CompressionType value based on the string key in the C++ options output. - * This gets used in support of getting options into Java from an options string, - * which is generated at the C++ level. - *

- * - * @param internalName the internal (C++) name by which the option is known. - * - * @return CompressionType instance (optional) - */ - static CompressionType getFromInternal(final String internalName) { - for (final CompressionType compressionType : CompressionType.values()) { - if (compressionType.internalName_.equals(internalName)) { - return compressionType; - } - } - - throw new IllegalArgumentException( - "Illegal internalName '" + internalName + " ' provided for CompressionType."); - } - - /** - *

Returns the byte value of the enumerations value.

- * - * @return byte representation - */ - public byte getValue() { - return value_; - } - - /** - *

Returns the library name of the compression type - * identified by the enumeration value.

- * - * @return library name - */ - public String getLibraryName() { - return libraryName_; - } - - CompressionType(final byte value, final String libraryName, final String internalName) { - value_ = value; - libraryName_ = libraryName; - internalName_ = internalName; - } - - private final byte value_; - private final String libraryName_; - private final String internalName_; -} diff --git a/java/src/main/java/org/rocksdb/ConcurrentTaskLimiter.java b/java/src/main/java/org/rocksdb/ConcurrentTaskLimiter.java deleted file mode 100644 index b4e34303b..000000000 --- a/java/src/main/java/org/rocksdb/ConcurrentTaskLimiter.java +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public abstract class ConcurrentTaskLimiter extends RocksObject { - protected ConcurrentTaskLimiter(final long nativeHandle) { - super(nativeHandle); - } - - /** - * Returns a name that identifies this concurrent task limiter. - * - * @return Concurrent task limiter name. - */ - public abstract String name(); - - /** - * Set max concurrent tasks.
- * limit = 0 means no new task allowed.
- * limit < 0 means no limitation. - * - * @param maxOutstandinsTask max concurrent tasks. - * @return the reference to the current instance of ConcurrentTaskLimiter. - */ - public abstract ConcurrentTaskLimiter setMaxOutstandingTask(final int maxOutstandinsTask); - - /** - * Reset to unlimited max concurrent task. - * - * @return the reference to the current instance of ConcurrentTaskLimiter. - */ - public abstract ConcurrentTaskLimiter resetMaxOutstandingTask(); - - /** - * Returns current outstanding task count. - * - * @return current outstanding task count. - */ - public abstract int outstandingTask(); -} diff --git a/java/src/main/java/org/rocksdb/ConcurrentTaskLimiterImpl.java b/java/src/main/java/org/rocksdb/ConcurrentTaskLimiterImpl.java deleted file mode 100644 index d28b9060a..000000000 --- a/java/src/main/java/org/rocksdb/ConcurrentTaskLimiterImpl.java +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public class ConcurrentTaskLimiterImpl extends ConcurrentTaskLimiter { - public ConcurrentTaskLimiterImpl(final String name, final int maxOutstandingTask) { - super(newConcurrentTaskLimiterImpl0(name, maxOutstandingTask)); - } - - @Override - public String name() { - assert (isOwningHandle()); - return name(nativeHandle_); - } - - @Override - public ConcurrentTaskLimiter setMaxOutstandingTask(final int maxOutstandingTask) { - assert (isOwningHandle()); - setMaxOutstandingTask(nativeHandle_, maxOutstandingTask); - return this; - } - - @Override - public ConcurrentTaskLimiter resetMaxOutstandingTask() { - assert (isOwningHandle()); - resetMaxOutstandingTask(nativeHandle_); - return this; - } - - @Override - public int outstandingTask() { - assert (isOwningHandle()); - return outstandingTask(nativeHandle_); - } - - private static native long newConcurrentTaskLimiterImpl0( - final String name, final int maxOutstandingTask); - private static native String name(final long handle); - private static native void setMaxOutstandingTask(final long handle, final int limit); - private static native void resetMaxOutstandingTask(final long handle); - private static native int outstandingTask(final long handle); - - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/ConfigOptions.java b/java/src/main/java/org/rocksdb/ConfigOptions.java deleted file mode 100644 index 4d93f0c99..000000000 --- a/java/src/main/java/org/rocksdb/ConfigOptions.java +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public class ConfigOptions extends RocksObject { - static { - RocksDB.loadLibrary(); - } - - /** - * Construct with default Options - */ - public ConfigOptions() { - super(newConfigOptions()); - } - - public ConfigOptions setDelimiter(final String delimiter) { - setDelimiter(nativeHandle_, delimiter); - return this; - } - public ConfigOptions setIgnoreUnknownOptions(final boolean ignore) { - setIgnoreUnknownOptions(nativeHandle_, ignore); - return this; - } - - public ConfigOptions setEnv(final Env env) { - setEnv(nativeHandle_, env.nativeHandle_); - return this; - } - - public ConfigOptions setInputStringsEscaped(final boolean escaped) { - setInputStringsEscaped(nativeHandle_, escaped); - return this; - } - - public ConfigOptions setSanityLevel(final SanityLevel level) { - setSanityLevel(nativeHandle_, level.getValue()); - return this; - } - - @Override protected final native void disposeInternal(final long handle); - - private native static long newConfigOptions(); - private native static void setEnv(final long handle, final long envHandle); - private native static void setDelimiter(final long handle, final String delimiter); - private native static void setIgnoreUnknownOptions(final long handle, final boolean ignore); - private native static void setInputStringsEscaped(final long handle, final boolean escaped); - private native static void setSanityLevel(final long handle, final byte level); -} diff --git a/java/src/main/java/org/rocksdb/DBOptions.java b/java/src/main/java/org/rocksdb/DBOptions.java deleted file mode 100644 index 9eb5ca873..000000000 --- a/java/src/main/java/org/rocksdb/DBOptions.java +++ /dev/null @@ -1,1496 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.file.Paths; -import java.util.*; - -/** - * DBOptions to control the behavior of a database. It will be used - * during the creation of a {@link org.rocksdb.RocksDB} (i.e., RocksDB.open()). - * - * As a descendent of {@link AbstractNativeReference}, this class is {@link AutoCloseable} - * and will be automatically released if opened in the preamble of a try with resources block. - */ -public class DBOptions extends RocksObject - implements DBOptionsInterface, - MutableDBOptionsInterface { - static { - RocksDB.loadLibrary(); - } - - /** - * Construct DBOptions. - * - * This constructor will create (by allocating a block of memory) - * an {@code rocksdb::DBOptions} in the c++ side. - */ - public DBOptions() { - super(newDBOptions()); - numShardBits_ = DEFAULT_NUM_SHARD_BITS; - env_ = Env.getDefault(); - } - - /** - * Copy constructor for DBOptions. - * - * NOTE: This does a shallow copy, which means env, rate_limiter, sst_file_manager, - * info_log and other pointers will be cloned! - * - * @param other The DBOptions to copy. - */ - public DBOptions(DBOptions other) { - super(copyDBOptions(other.nativeHandle_)); - this.env_ = other.env_; - this.numShardBits_ = other.numShardBits_; - this.rateLimiter_ = other.rateLimiter_; - this.rowCache_ = other.rowCache_; - this.walFilter_ = other.walFilter_; - this.writeBufferManager_ = other.writeBufferManager_; - } - - /** - * Constructor from Options - * - * @param options The options. - */ - public DBOptions(final Options options) { - super(newDBOptionsFromOptions(options.nativeHandle_)); - } - - /** - *

Method to get a options instance by using pre-configured - * property values. If one or many values are undefined in - * the context of RocksDB the method will return a null - * value.

- * - *

Note: Property keys can be derived from - * getter methods within the options class. Example: the method - * {@code allowMmapReads()} has a property key: - * {@code allow_mmap_reads}.

- * - * @param cfgOpts The ConfigOptions to control how the string is processed. - * @param properties {@link java.util.Properties} instance. - * - * @return {@link org.rocksdb.DBOptions instance} - * or null. - * - * @throws java.lang.IllegalArgumentException if null or empty - * {@link java.util.Properties} instance is passed to the method call. - */ - public static DBOptions getDBOptionsFromProps( - final ConfigOptions cfgOpts, final Properties properties) { - DBOptions dbOptions = null; - final String optionsString = Options.getOptionStringFromProps(properties); - final long handle = getDBOptionsFromProps(cfgOpts.nativeHandle_, optionsString); - if (handle != 0) { - dbOptions = new DBOptions(handle); - } - return dbOptions; - } - - /** - *

Method to get a options instance by using pre-configured - * property values. If one or many values are undefined in - * the context of RocksDB the method will return a null - * value.

- * - *

Note: Property keys can be derived from - * getter methods within the options class. Example: the method - * {@code allowMmapReads()} has a property key: - * {@code allow_mmap_reads}.

- * - * @param properties {@link java.util.Properties} instance. - * - * @return {@link org.rocksdb.DBOptions instance} - * or null. - * - * @throws java.lang.IllegalArgumentException if null or empty - * {@link java.util.Properties} instance is passed to the method call. - */ - public static DBOptions getDBOptionsFromProps(final Properties properties) { - DBOptions dbOptions = null; - final String optionsString = Options.getOptionStringFromProps(properties); - final long handle = getDBOptionsFromProps(optionsString); - if (handle != 0) { - dbOptions = new DBOptions(handle); - } - return dbOptions; - } - - @Override - public DBOptions optimizeForSmallDb() { - optimizeForSmallDb(nativeHandle_); - return this; - } - - @Override - public DBOptions setIncreaseParallelism( - final int totalThreads) { - assert(isOwningHandle()); - setIncreaseParallelism(nativeHandle_, totalThreads); - return this; - } - - @Override - public DBOptions setCreateIfMissing(final boolean flag) { - assert(isOwningHandle()); - setCreateIfMissing(nativeHandle_, flag); - return this; - } - - @Override - public boolean createIfMissing() { - assert(isOwningHandle()); - return createIfMissing(nativeHandle_); - } - - @Override - public DBOptions setCreateMissingColumnFamilies( - final boolean flag) { - assert(isOwningHandle()); - setCreateMissingColumnFamilies(nativeHandle_, flag); - return this; - } - - @Override - public boolean createMissingColumnFamilies() { - assert(isOwningHandle()); - return createMissingColumnFamilies(nativeHandle_); - } - - @Override - public DBOptions setErrorIfExists( - final boolean errorIfExists) { - assert(isOwningHandle()); - setErrorIfExists(nativeHandle_, errorIfExists); - return this; - } - - @Override - public boolean errorIfExists() { - assert(isOwningHandle()); - return errorIfExists(nativeHandle_); - } - - @Override - public DBOptions setParanoidChecks( - final boolean paranoidChecks) { - assert(isOwningHandle()); - setParanoidChecks(nativeHandle_, paranoidChecks); - return this; - } - - @Override - public boolean paranoidChecks() { - assert(isOwningHandle()); - return paranoidChecks(nativeHandle_); - } - - @Override - public DBOptions setEnv(final Env env) { - setEnv(nativeHandle_, env.nativeHandle_); - this.env_ = env; - return this; - } - - @Override - public Env getEnv() { - return env_; - } - - @Override - public DBOptions setRateLimiter(final RateLimiter rateLimiter) { - assert(isOwningHandle()); - rateLimiter_ = rateLimiter; - setRateLimiter(nativeHandle_, rateLimiter.nativeHandle_); - return this; - } - - @Override - public DBOptions setSstFileManager(final SstFileManager sstFileManager) { - assert(isOwningHandle()); - setSstFileManager(nativeHandle_, sstFileManager.nativeHandle_); - return this; - } - - @Override - public DBOptions setLogger(final Logger logger) { - assert(isOwningHandle()); - setLogger(nativeHandle_, logger.nativeHandle_); - return this; - } - - @Override - public DBOptions setInfoLogLevel( - final InfoLogLevel infoLogLevel) { - assert(isOwningHandle()); - setInfoLogLevel(nativeHandle_, infoLogLevel.getValue()); - return this; - } - - @Override - public InfoLogLevel infoLogLevel() { - assert(isOwningHandle()); - return InfoLogLevel.getInfoLogLevel( - infoLogLevel(nativeHandle_)); - } - - @Override - public DBOptions setMaxOpenFiles( - final int maxOpenFiles) { - assert(isOwningHandle()); - setMaxOpenFiles(nativeHandle_, maxOpenFiles); - return this; - } - - @Override - public int maxOpenFiles() { - assert(isOwningHandle()); - return maxOpenFiles(nativeHandle_); - } - - @Override - public DBOptions setMaxFileOpeningThreads(final int maxFileOpeningThreads) { - assert(isOwningHandle()); - setMaxFileOpeningThreads(nativeHandle_, maxFileOpeningThreads); - return this; - } - - @Override - public int maxFileOpeningThreads() { - assert(isOwningHandle()); - return maxFileOpeningThreads(nativeHandle_); - } - - @Override - public DBOptions setMaxTotalWalSize( - final long maxTotalWalSize) { - assert(isOwningHandle()); - setMaxTotalWalSize(nativeHandle_, maxTotalWalSize); - return this; - } - - @Override - public long maxTotalWalSize() { - assert(isOwningHandle()); - return maxTotalWalSize(nativeHandle_); - } - - @Override - public DBOptions setStatistics(final Statistics statistics) { - assert(isOwningHandle()); - setStatistics(nativeHandle_, statistics.nativeHandle_); - return this; - } - - @Override - public Statistics statistics() { - assert(isOwningHandle()); - final long statisticsNativeHandle = statistics(nativeHandle_); - if(statisticsNativeHandle == 0) { - return null; - } else { - return new Statistics(statisticsNativeHandle); - } - } - - @Override - public DBOptions setUseFsync( - final boolean useFsync) { - assert(isOwningHandle()); - setUseFsync(nativeHandle_, useFsync); - return this; - } - - @Override - public boolean useFsync() { - assert(isOwningHandle()); - return useFsync(nativeHandle_); - } - - @Override - public DBOptions setDbPaths(final Collection dbPaths) { - assert(isOwningHandle()); - - final int len = dbPaths.size(); - final String[] paths = new String[len]; - final long[] targetSizes = new long[len]; - - int i = 0; - for(final DbPath dbPath : dbPaths) { - paths[i] = dbPath.path.toString(); - targetSizes[i] = dbPath.targetSize; - i++; - } - setDbPaths(nativeHandle_, paths, targetSizes); - return this; - } - - @Override - public List dbPaths() { - final int len = (int)dbPathsLen(nativeHandle_); - if(len == 0) { - return Collections.emptyList(); - } else { - final String[] paths = new String[len]; - final long[] targetSizes = new long[len]; - - dbPaths(nativeHandle_, paths, targetSizes); - - final List dbPaths = new ArrayList<>(); - for(int i = 0; i < len; i++) { - dbPaths.add(new DbPath(Paths.get(paths[i]), targetSizes[i])); - } - return dbPaths; - } - } - - @Override - public DBOptions setDbLogDir( - final String dbLogDir) { - assert(isOwningHandle()); - setDbLogDir(nativeHandle_, dbLogDir); - return this; - } - - @Override - public String dbLogDir() { - assert(isOwningHandle()); - return dbLogDir(nativeHandle_); - } - - @Override - public DBOptions setWalDir( - final String walDir) { - assert(isOwningHandle()); - setWalDir(nativeHandle_, walDir); - return this; - } - - @Override - public String walDir() { - assert(isOwningHandle()); - return walDir(nativeHandle_); - } - - @Override - public DBOptions setDeleteObsoleteFilesPeriodMicros( - final long micros) { - assert(isOwningHandle()); - setDeleteObsoleteFilesPeriodMicros(nativeHandle_, micros); - return this; - } - - @Override - public long deleteObsoleteFilesPeriodMicros() { - assert(isOwningHandle()); - return deleteObsoleteFilesPeriodMicros(nativeHandle_); - } - - @Override - public DBOptions setMaxBackgroundJobs(final int maxBackgroundJobs) { - assert(isOwningHandle()); - setMaxBackgroundJobs(nativeHandle_, maxBackgroundJobs); - return this; - } - - @Override - public int maxBackgroundJobs() { - assert(isOwningHandle()); - return maxBackgroundJobs(nativeHandle_); - } - - @Override - @Deprecated - public DBOptions setMaxBackgroundCompactions( - final int maxBackgroundCompactions) { - assert(isOwningHandle()); - setMaxBackgroundCompactions(nativeHandle_, maxBackgroundCompactions); - return this; - } - - @Override - @Deprecated - public int maxBackgroundCompactions() { - assert(isOwningHandle()); - return maxBackgroundCompactions(nativeHandle_); - } - - @Override - public DBOptions setMaxSubcompactions(final int maxSubcompactions) { - assert(isOwningHandle()); - setMaxSubcompactions(nativeHandle_, maxSubcompactions); - return this; - } - - @Override - public int maxSubcompactions() { - assert(isOwningHandle()); - return maxSubcompactions(nativeHandle_); - } - - @Override - @Deprecated - public DBOptions setMaxBackgroundFlushes( - final int maxBackgroundFlushes) { - assert(isOwningHandle()); - setMaxBackgroundFlushes(nativeHandle_, maxBackgroundFlushes); - return this; - } - - @Override - @Deprecated - public int maxBackgroundFlushes() { - assert(isOwningHandle()); - return maxBackgroundFlushes(nativeHandle_); - } - - @Override - public DBOptions setMaxLogFileSize(final long maxLogFileSize) { - assert(isOwningHandle()); - setMaxLogFileSize(nativeHandle_, maxLogFileSize); - return this; - } - - @Override - public long maxLogFileSize() { - assert(isOwningHandle()); - return maxLogFileSize(nativeHandle_); - } - - @Override - public DBOptions setLogFileTimeToRoll( - final long logFileTimeToRoll) { - assert(isOwningHandle()); - setLogFileTimeToRoll(nativeHandle_, logFileTimeToRoll); - return this; - } - - @Override - public long logFileTimeToRoll() { - assert(isOwningHandle()); - return logFileTimeToRoll(nativeHandle_); - } - - @Override - public DBOptions setKeepLogFileNum( - final long keepLogFileNum) { - assert(isOwningHandle()); - setKeepLogFileNum(nativeHandle_, keepLogFileNum); - return this; - } - - @Override - public long keepLogFileNum() { - assert(isOwningHandle()); - return keepLogFileNum(nativeHandle_); - } - - @Override - public DBOptions setRecycleLogFileNum(final long recycleLogFileNum) { - assert(isOwningHandle()); - setRecycleLogFileNum(nativeHandle_, recycleLogFileNum); - return this; - } - - @Override - public long recycleLogFileNum() { - assert(isOwningHandle()); - return recycleLogFileNum(nativeHandle_); - } - - @Override - public DBOptions setMaxManifestFileSize( - final long maxManifestFileSize) { - assert(isOwningHandle()); - setMaxManifestFileSize(nativeHandle_, maxManifestFileSize); - return this; - } - - @Override - public long maxManifestFileSize() { - assert(isOwningHandle()); - return maxManifestFileSize(nativeHandle_); - } - - @Override - public DBOptions setTableCacheNumshardbits( - final int tableCacheNumshardbits) { - assert(isOwningHandle()); - setTableCacheNumshardbits(nativeHandle_, tableCacheNumshardbits); - return this; - } - - @Override - public int tableCacheNumshardbits() { - assert(isOwningHandle()); - return tableCacheNumshardbits(nativeHandle_); - } - - @Override - public DBOptions setWalTtlSeconds( - final long walTtlSeconds) { - assert(isOwningHandle()); - setWalTtlSeconds(nativeHandle_, walTtlSeconds); - return this; - } - - @Override - public long walTtlSeconds() { - assert(isOwningHandle()); - return walTtlSeconds(nativeHandle_); - } - - @Override - public DBOptions setWalSizeLimitMB( - final long sizeLimitMB) { - assert(isOwningHandle()); - setWalSizeLimitMB(nativeHandle_, sizeLimitMB); - return this; - } - - @Override - public long walSizeLimitMB() { - assert(isOwningHandle()); - return walSizeLimitMB(nativeHandle_); - } - - @Override - public DBOptions setMaxWriteBatchGroupSizeBytes(final long maxWriteBatchGroupSizeBytes) { - setMaxWriteBatchGroupSizeBytes(nativeHandle_, maxWriteBatchGroupSizeBytes); - return this; - } - - @Override - public long maxWriteBatchGroupSizeBytes() { - assert (isOwningHandle()); - return maxWriteBatchGroupSizeBytes(nativeHandle_); - } - - @Override - public DBOptions setManifestPreallocationSize( - final long size) { - assert(isOwningHandle()); - setManifestPreallocationSize(nativeHandle_, size); - return this; - } - - @Override - public long manifestPreallocationSize() { - assert(isOwningHandle()); - return manifestPreallocationSize(nativeHandle_); - } - - @Override - public DBOptions setAllowMmapReads( - final boolean allowMmapReads) { - assert(isOwningHandle()); - setAllowMmapReads(nativeHandle_, allowMmapReads); - return this; - } - - @Override - public boolean allowMmapReads() { - assert(isOwningHandle()); - return allowMmapReads(nativeHandle_); - } - - @Override - public DBOptions setAllowMmapWrites( - final boolean allowMmapWrites) { - assert(isOwningHandle()); - setAllowMmapWrites(nativeHandle_, allowMmapWrites); - return this; - } - - @Override - public boolean allowMmapWrites() { - assert(isOwningHandle()); - return allowMmapWrites(nativeHandle_); - } - - @Override - public DBOptions setUseDirectReads( - final boolean useDirectReads) { - assert(isOwningHandle()); - setUseDirectReads(nativeHandle_, useDirectReads); - return this; - } - - @Override - public boolean useDirectReads() { - assert(isOwningHandle()); - return useDirectReads(nativeHandle_); - } - - @Override - public DBOptions setUseDirectIoForFlushAndCompaction( - final boolean useDirectIoForFlushAndCompaction) { - assert(isOwningHandle()); - setUseDirectIoForFlushAndCompaction(nativeHandle_, - useDirectIoForFlushAndCompaction); - return this; - } - - @Override - public boolean useDirectIoForFlushAndCompaction() { - assert(isOwningHandle()); - return useDirectIoForFlushAndCompaction(nativeHandle_); - } - - @Override - public DBOptions setAllowFAllocate(final boolean allowFAllocate) { - assert(isOwningHandle()); - setAllowFAllocate(nativeHandle_, allowFAllocate); - return this; - } - - @Override - public boolean allowFAllocate() { - assert(isOwningHandle()); - return allowFAllocate(nativeHandle_); - } - - @Override - public DBOptions setIsFdCloseOnExec( - final boolean isFdCloseOnExec) { - assert(isOwningHandle()); - setIsFdCloseOnExec(nativeHandle_, isFdCloseOnExec); - return this; - } - - @Override - public boolean isFdCloseOnExec() { - assert(isOwningHandle()); - return isFdCloseOnExec(nativeHandle_); - } - - @Override - public DBOptions setStatsDumpPeriodSec( - final int statsDumpPeriodSec) { - assert(isOwningHandle()); - setStatsDumpPeriodSec(nativeHandle_, statsDumpPeriodSec); - return this; - } - - @Override - public int statsDumpPeriodSec() { - assert(isOwningHandle()); - return statsDumpPeriodSec(nativeHandle_); - } - - @Override - public DBOptions setStatsPersistPeriodSec( - final int statsPersistPeriodSec) { - assert(isOwningHandle()); - setStatsPersistPeriodSec(nativeHandle_, statsPersistPeriodSec); - return this; - } - - @Override - public int statsPersistPeriodSec() { - assert(isOwningHandle()); - return statsPersistPeriodSec(nativeHandle_); - } - - @Override - public DBOptions setStatsHistoryBufferSize( - final long statsHistoryBufferSize) { - assert(isOwningHandle()); - setStatsHistoryBufferSize(nativeHandle_, statsHistoryBufferSize); - return this; - } - - @Override - public long statsHistoryBufferSize() { - assert(isOwningHandle()); - return statsHistoryBufferSize(nativeHandle_); - } - - @Override - public DBOptions setAdviseRandomOnOpen( - final boolean adviseRandomOnOpen) { - assert(isOwningHandle()); - setAdviseRandomOnOpen(nativeHandle_, adviseRandomOnOpen); - return this; - } - - @Override - public boolean adviseRandomOnOpen() { - return adviseRandomOnOpen(nativeHandle_); - } - - @Override - public DBOptions setDbWriteBufferSize(final long dbWriteBufferSize) { - assert(isOwningHandle()); - setDbWriteBufferSize(nativeHandle_, dbWriteBufferSize); - return this; - } - - @Override - public DBOptions setWriteBufferManager(final WriteBufferManager writeBufferManager) { - assert(isOwningHandle()); - setWriteBufferManager(nativeHandle_, writeBufferManager.nativeHandle_); - this.writeBufferManager_ = writeBufferManager; - return this; - } - - @Override - public WriteBufferManager writeBufferManager() { - assert(isOwningHandle()); - return this.writeBufferManager_; - } - - @Override - public long dbWriteBufferSize() { - assert(isOwningHandle()); - return dbWriteBufferSize(nativeHandle_); - } - - @Override - public DBOptions setAccessHintOnCompactionStart(final AccessHint accessHint) { - assert(isOwningHandle()); - setAccessHintOnCompactionStart(nativeHandle_, accessHint.getValue()); - return this; - } - - @Override - public AccessHint accessHintOnCompactionStart() { - assert(isOwningHandle()); - return AccessHint.getAccessHint(accessHintOnCompactionStart(nativeHandle_)); - } - - @Override - public DBOptions setCompactionReadaheadSize(final long compactionReadaheadSize) { - assert(isOwningHandle()); - setCompactionReadaheadSize(nativeHandle_, compactionReadaheadSize); - return this; - } - - @Override - public long compactionReadaheadSize() { - assert(isOwningHandle()); - return compactionReadaheadSize(nativeHandle_); - } - - @Override - public DBOptions setRandomAccessMaxBufferSize(final long randomAccessMaxBufferSize) { - assert(isOwningHandle()); - setRandomAccessMaxBufferSize(nativeHandle_, randomAccessMaxBufferSize); - return this; - } - - @Override - public long randomAccessMaxBufferSize() { - assert(isOwningHandle()); - return randomAccessMaxBufferSize(nativeHandle_); - } - - @Override - public DBOptions setWritableFileMaxBufferSize(final long writableFileMaxBufferSize) { - assert(isOwningHandle()); - setWritableFileMaxBufferSize(nativeHandle_, writableFileMaxBufferSize); - return this; - } - - @Override - public long writableFileMaxBufferSize() { - assert(isOwningHandle()); - return writableFileMaxBufferSize(nativeHandle_); - } - - @Override - public DBOptions setUseAdaptiveMutex( - final boolean useAdaptiveMutex) { - assert(isOwningHandle()); - setUseAdaptiveMutex(nativeHandle_, useAdaptiveMutex); - return this; - } - - @Override - public boolean useAdaptiveMutex() { - assert(isOwningHandle()); - return useAdaptiveMutex(nativeHandle_); - } - - @Override - public DBOptions setBytesPerSync( - final long bytesPerSync) { - assert(isOwningHandle()); - setBytesPerSync(nativeHandle_, bytesPerSync); - return this; - } - - @Override - public long bytesPerSync() { - return bytesPerSync(nativeHandle_); - } - - @Override - public DBOptions setWalBytesPerSync(final long walBytesPerSync) { - assert(isOwningHandle()); - setWalBytesPerSync(nativeHandle_, walBytesPerSync); - return this; - } - - @Override - public long walBytesPerSync() { - assert(isOwningHandle()); - return walBytesPerSync(nativeHandle_); - } - - @Override - public DBOptions setStrictBytesPerSync(final boolean strictBytesPerSync) { - assert(isOwningHandle()); - setStrictBytesPerSync(nativeHandle_, strictBytesPerSync); - return this; - } - - @Override - public boolean strictBytesPerSync() { - assert(isOwningHandle()); - return strictBytesPerSync(nativeHandle_); - } - - @Override - public DBOptions setListeners(final List listeners) { - assert (isOwningHandle()); - setEventListeners(nativeHandle_, RocksCallbackObject.toNativeHandleList(listeners)); - return this; - } - - @Override - public List listeners() { - assert (isOwningHandle()); - return Arrays.asList(eventListeners(nativeHandle_)); - } - - @Override - public DBOptions setEnableThreadTracking(final boolean enableThreadTracking) { - assert(isOwningHandle()); - setEnableThreadTracking(nativeHandle_, enableThreadTracking); - return this; - } - - @Override - public boolean enableThreadTracking() { - assert(isOwningHandle()); - return enableThreadTracking(nativeHandle_); - } - - @Override - public DBOptions setDelayedWriteRate(final long delayedWriteRate) { - assert(isOwningHandle()); - setDelayedWriteRate(nativeHandle_, delayedWriteRate); - return this; - } - - @Override - public long delayedWriteRate(){ - return delayedWriteRate(nativeHandle_); - } - - @Override - public DBOptions setEnablePipelinedWrite(final boolean enablePipelinedWrite) { - assert(isOwningHandle()); - setEnablePipelinedWrite(nativeHandle_, enablePipelinedWrite); - return this; - } - - @Override - public boolean enablePipelinedWrite() { - assert(isOwningHandle()); - return enablePipelinedWrite(nativeHandle_); - } - - @Override - public DBOptions setUnorderedWrite(final boolean unorderedWrite) { - setUnorderedWrite(nativeHandle_, unorderedWrite); - return this; - } - - @Override - public boolean unorderedWrite() { - return unorderedWrite(nativeHandle_); - } - - - @Override - public DBOptions setAllowConcurrentMemtableWrite( - final boolean allowConcurrentMemtableWrite) { - setAllowConcurrentMemtableWrite(nativeHandle_, - allowConcurrentMemtableWrite); - return this; - } - - @Override - public boolean allowConcurrentMemtableWrite() { - return allowConcurrentMemtableWrite(nativeHandle_); - } - - @Override - public DBOptions setEnableWriteThreadAdaptiveYield( - final boolean enableWriteThreadAdaptiveYield) { - setEnableWriteThreadAdaptiveYield(nativeHandle_, - enableWriteThreadAdaptiveYield); - return this; - } - - @Override - public boolean enableWriteThreadAdaptiveYield() { - return enableWriteThreadAdaptiveYield(nativeHandle_); - } - - @Override - public DBOptions setWriteThreadMaxYieldUsec(final long writeThreadMaxYieldUsec) { - setWriteThreadMaxYieldUsec(nativeHandle_, writeThreadMaxYieldUsec); - return this; - } - - @Override - public long writeThreadMaxYieldUsec() { - return writeThreadMaxYieldUsec(nativeHandle_); - } - - @Override - public DBOptions setWriteThreadSlowYieldUsec(final long writeThreadSlowYieldUsec) { - setWriteThreadSlowYieldUsec(nativeHandle_, writeThreadSlowYieldUsec); - return this; - } - - @Override - public long writeThreadSlowYieldUsec() { - return writeThreadSlowYieldUsec(nativeHandle_); - } - - @Override - public DBOptions setSkipStatsUpdateOnDbOpen(final boolean skipStatsUpdateOnDbOpen) { - assert(isOwningHandle()); - setSkipStatsUpdateOnDbOpen(nativeHandle_, skipStatsUpdateOnDbOpen); - return this; - } - - @Override - public boolean skipStatsUpdateOnDbOpen() { - assert(isOwningHandle()); - return skipStatsUpdateOnDbOpen(nativeHandle_); - } - - @Override - public DBOptions setSkipCheckingSstFileSizesOnDbOpen( - final boolean skipCheckingSstFileSizesOnDbOpen) { - setSkipCheckingSstFileSizesOnDbOpen(nativeHandle_, skipCheckingSstFileSizesOnDbOpen); - return this; - } - - @Override - public boolean skipCheckingSstFileSizesOnDbOpen() { - assert (isOwningHandle()); - return skipCheckingSstFileSizesOnDbOpen(nativeHandle_); - } - - @Override - public DBOptions setWalRecoveryMode(final WALRecoveryMode walRecoveryMode) { - assert(isOwningHandle()); - setWalRecoveryMode(nativeHandle_, walRecoveryMode.getValue()); - return this; - } - - @Override - public WALRecoveryMode walRecoveryMode() { - assert(isOwningHandle()); - return WALRecoveryMode.getWALRecoveryMode(walRecoveryMode(nativeHandle_)); - } - - @Override - public DBOptions setAllow2pc(final boolean allow2pc) { - assert(isOwningHandle()); - setAllow2pc(nativeHandle_, allow2pc); - return this; - } - - @Override - public boolean allow2pc() { - assert(isOwningHandle()); - return allow2pc(nativeHandle_); - } - - @Override - public DBOptions setRowCache(final Cache rowCache) { - assert(isOwningHandle()); - setRowCache(nativeHandle_, rowCache.nativeHandle_); - this.rowCache_ = rowCache; - return this; - } - - @Override - public Cache rowCache() { - assert(isOwningHandle()); - return this.rowCache_; - } - - @Override - public DBOptions setWalFilter(final AbstractWalFilter walFilter) { - assert(isOwningHandle()); - setWalFilter(nativeHandle_, walFilter.nativeHandle_); - this.walFilter_ = walFilter; - return this; - } - - @Override - public WalFilter walFilter() { - assert(isOwningHandle()); - return this.walFilter_; - } - - @Override - public DBOptions setFailIfOptionsFileError(final boolean failIfOptionsFileError) { - assert(isOwningHandle()); - setFailIfOptionsFileError(nativeHandle_, failIfOptionsFileError); - return this; - } - - @Override - public boolean failIfOptionsFileError() { - assert(isOwningHandle()); - return failIfOptionsFileError(nativeHandle_); - } - - @Override - public DBOptions setDumpMallocStats(final boolean dumpMallocStats) { - assert(isOwningHandle()); - setDumpMallocStats(nativeHandle_, dumpMallocStats); - return this; - } - - @Override - public boolean dumpMallocStats() { - assert(isOwningHandle()); - return dumpMallocStats(nativeHandle_); - } - - @Override - public DBOptions setAvoidFlushDuringRecovery(final boolean avoidFlushDuringRecovery) { - assert(isOwningHandle()); - setAvoidFlushDuringRecovery(nativeHandle_, avoidFlushDuringRecovery); - return this; - } - - @Override - public boolean avoidFlushDuringRecovery() { - assert(isOwningHandle()); - return avoidFlushDuringRecovery(nativeHandle_); - } - - @Override - public DBOptions setAvoidFlushDuringShutdown(final boolean avoidFlushDuringShutdown) { - assert(isOwningHandle()); - setAvoidFlushDuringShutdown(nativeHandle_, avoidFlushDuringShutdown); - return this; - } - - @Override - public boolean avoidFlushDuringShutdown() { - assert(isOwningHandle()); - return avoidFlushDuringShutdown(nativeHandle_); - } - - @Override - public DBOptions setAllowIngestBehind(final boolean allowIngestBehind) { - assert(isOwningHandle()); - setAllowIngestBehind(nativeHandle_, allowIngestBehind); - return this; - } - - @Override - public boolean allowIngestBehind() { - assert(isOwningHandle()); - return allowIngestBehind(nativeHandle_); - } - - @Override - public DBOptions setTwoWriteQueues(final boolean twoWriteQueues) { - assert(isOwningHandle()); - setTwoWriteQueues(nativeHandle_, twoWriteQueues); - return this; - } - - @Override - public boolean twoWriteQueues() { - assert(isOwningHandle()); - return twoWriteQueues(nativeHandle_); - } - - @Override - public DBOptions setManualWalFlush(final boolean manualWalFlush) { - assert(isOwningHandle()); - setManualWalFlush(nativeHandle_, manualWalFlush); - return this; - } - - @Override - public boolean manualWalFlush() { - assert(isOwningHandle()); - return manualWalFlush(nativeHandle_); - } - - @Override - public DBOptions setAtomicFlush(final boolean atomicFlush) { - setAtomicFlush(nativeHandle_, atomicFlush); - return this; - } - - @Override - public boolean atomicFlush() { - return atomicFlush(nativeHandle_); - } - - @Override - public DBOptions setAvoidUnnecessaryBlockingIO(final boolean avoidUnnecessaryBlockingIO) { - setAvoidUnnecessaryBlockingIO(nativeHandle_, avoidUnnecessaryBlockingIO); - return this; - } - - @Override - public boolean avoidUnnecessaryBlockingIO() { - assert (isOwningHandle()); - return avoidUnnecessaryBlockingIO(nativeHandle_); - } - - @Override - public DBOptions setPersistStatsToDisk(final boolean persistStatsToDisk) { - setPersistStatsToDisk(nativeHandle_, persistStatsToDisk); - return this; - } - - @Override - public boolean persistStatsToDisk() { - assert (isOwningHandle()); - return persistStatsToDisk(nativeHandle_); - } - - @Override - public DBOptions setWriteDbidToManifest(final boolean writeDbidToManifest) { - setWriteDbidToManifest(nativeHandle_, writeDbidToManifest); - return this; - } - - @Override - public boolean writeDbidToManifest() { - assert (isOwningHandle()); - return writeDbidToManifest(nativeHandle_); - } - - @Override - public DBOptions setLogReadaheadSize(final long logReadaheadSize) { - setLogReadaheadSize(nativeHandle_, logReadaheadSize); - return this; - } - - @Override - public long logReadaheadSize() { - assert (isOwningHandle()); - return logReadaheadSize(nativeHandle_); - } - - @Override - public DBOptions setBestEffortsRecovery(final boolean bestEffortsRecovery) { - setBestEffortsRecovery(nativeHandle_, bestEffortsRecovery); - return this; - } - - @Override - public boolean bestEffortsRecovery() { - assert (isOwningHandle()); - return bestEffortsRecovery(nativeHandle_); - } - - @Override - public DBOptions setMaxBgErrorResumeCount(final int maxBgerrorResumeCount) { - setMaxBgErrorResumeCount(nativeHandle_, maxBgerrorResumeCount); - return this; - } - - @Override - public int maxBgerrorResumeCount() { - assert (isOwningHandle()); - return maxBgerrorResumeCount(nativeHandle_); - } - - @Override - public DBOptions setBgerrorResumeRetryInterval(final long bgerrorResumeRetryInterval) { - setBgerrorResumeRetryInterval(nativeHandle_, bgerrorResumeRetryInterval); - return this; - } - - @Override - public long bgerrorResumeRetryInterval() { - assert (isOwningHandle()); - return bgerrorResumeRetryInterval(nativeHandle_); - } - - static final int DEFAULT_NUM_SHARD_BITS = -1; - - - - - /** - *

Private constructor to be used by - * {@link #getDBOptionsFromProps(java.util.Properties)}

- * - * @param nativeHandle native handle to DBOptions instance. - */ - private DBOptions(final long nativeHandle) { - super(nativeHandle); - } - - private static native long getDBOptionsFromProps(long cfgHandle, String optString); - private static native long getDBOptionsFromProps(String optString); - - private static native long newDBOptions(); - private static native long copyDBOptions(final long handle); - private static native long newDBOptionsFromOptions(final long optionsHandle); - @Override protected final native void disposeInternal(final long handle); - - private native void optimizeForSmallDb(final long handle); - private native void setIncreaseParallelism(long handle, int totalThreads); - private native void setCreateIfMissing(long handle, boolean flag); - private native boolean createIfMissing(long handle); - private native void setCreateMissingColumnFamilies( - long handle, boolean flag); - private native boolean createMissingColumnFamilies(long handle); - private native void setEnv(long handle, long envHandle); - private native void setErrorIfExists(long handle, boolean errorIfExists); - private native boolean errorIfExists(long handle); - private native void setParanoidChecks( - long handle, boolean paranoidChecks); - private native boolean paranoidChecks(long handle); - private native void setRateLimiter(long handle, - long rateLimiterHandle); - private native void setSstFileManager(final long handle, - final long sstFileManagerHandle); - private native void setLogger(long handle, - long loggerHandle); - private native void setInfoLogLevel(long handle, byte logLevel); - private native byte infoLogLevel(long handle); - private native void setMaxOpenFiles(long handle, int maxOpenFiles); - private native int maxOpenFiles(long handle); - private native void setMaxFileOpeningThreads(final long handle, - final int maxFileOpeningThreads); - private native int maxFileOpeningThreads(final long handle); - private native void setMaxTotalWalSize(long handle, - long maxTotalWalSize); - private native long maxTotalWalSize(long handle); - private native void setStatistics(final long handle, final long statisticsHandle); - private native long statistics(final long handle); - private native boolean useFsync(long handle); - private native void setUseFsync(long handle, boolean useFsync); - private native void setDbPaths(final long handle, final String[] paths, - final long[] targetSizes); - private native long dbPathsLen(final long handle); - private native void dbPaths(final long handle, final String[] paths, - final long[] targetSizes); - private native void setDbLogDir(long handle, String dbLogDir); - private native String dbLogDir(long handle); - private native void setWalDir(long handle, String walDir); - private native String walDir(long handle); - private native void setDeleteObsoleteFilesPeriodMicros( - long handle, long micros); - private native long deleteObsoleteFilesPeriodMicros(long handle); - private native void setMaxBackgroundCompactions( - long handle, int maxBackgroundCompactions); - private native int maxBackgroundCompactions(long handle); - private native void setMaxSubcompactions(long handle, int maxSubcompactions); - private native int maxSubcompactions(long handle); - private native void setMaxBackgroundFlushes( - long handle, int maxBackgroundFlushes); - private native int maxBackgroundFlushes(long handle); - private native void setMaxBackgroundJobs(long handle, int maxBackgroundJobs); - private native int maxBackgroundJobs(long handle); - private native void setMaxLogFileSize(long handle, long maxLogFileSize) - throws IllegalArgumentException; - private native long maxLogFileSize(long handle); - private native void setLogFileTimeToRoll( - long handle, long logFileTimeToRoll) throws IllegalArgumentException; - private native long logFileTimeToRoll(long handle); - private native void setKeepLogFileNum(long handle, long keepLogFileNum) - throws IllegalArgumentException; - private native long keepLogFileNum(long handle); - private native void setRecycleLogFileNum(long handle, long recycleLogFileNum); - private native long recycleLogFileNum(long handle); - private native void setMaxManifestFileSize( - long handle, long maxManifestFileSize); - private native long maxManifestFileSize(long handle); - private native void setTableCacheNumshardbits( - long handle, int tableCacheNumshardbits); - private native int tableCacheNumshardbits(long handle); - private native void setWalTtlSeconds(long handle, long walTtlSeconds); - private native long walTtlSeconds(long handle); - private native void setWalSizeLimitMB(long handle, long sizeLimitMB); - private native long walSizeLimitMB(long handle); - private static native void setMaxWriteBatchGroupSizeBytes( - final long handle, final long maxWriteBatchGroupSizeBytes); - private static native long maxWriteBatchGroupSizeBytes(final long handle); - private native void setManifestPreallocationSize( - long handle, long size) throws IllegalArgumentException; - private native long manifestPreallocationSize(long handle); - private native void setUseDirectReads(long handle, boolean useDirectReads); - private native boolean useDirectReads(long handle); - private native void setUseDirectIoForFlushAndCompaction( - long handle, boolean useDirectIoForFlushAndCompaction); - private native boolean useDirectIoForFlushAndCompaction(long handle); - private native void setAllowFAllocate(final long handle, - final boolean allowFAllocate); - private native boolean allowFAllocate(final long handle); - private native void setAllowMmapReads( - long handle, boolean allowMmapReads); - private native boolean allowMmapReads(long handle); - private native void setAllowMmapWrites( - long handle, boolean allowMmapWrites); - private native boolean allowMmapWrites(long handle); - private native void setIsFdCloseOnExec( - long handle, boolean isFdCloseOnExec); - private native boolean isFdCloseOnExec(long handle); - private native void setStatsDumpPeriodSec( - long handle, int statsDumpPeriodSec); - private native int statsDumpPeriodSec(long handle); - private native void setStatsPersistPeriodSec( - final long handle, final int statsPersistPeriodSec); - private native int statsPersistPeriodSec( - final long handle); - private native void setStatsHistoryBufferSize( - final long handle, final long statsHistoryBufferSize); - private native long statsHistoryBufferSize( - final long handle); - private native void setAdviseRandomOnOpen( - long handle, boolean adviseRandomOnOpen); - private native boolean adviseRandomOnOpen(long handle); - private native void setDbWriteBufferSize(final long handle, - final long dbWriteBufferSize); - private native void setWriteBufferManager(final long dbOptionsHandle, - final long writeBufferManagerHandle); - private native long dbWriteBufferSize(final long handle); - private native void setAccessHintOnCompactionStart(final long handle, - final byte accessHintOnCompactionStart); - private native byte accessHintOnCompactionStart(final long handle); - private native void setCompactionReadaheadSize(final long handle, - final long compactionReadaheadSize); - private native long compactionReadaheadSize(final long handle); - private native void setRandomAccessMaxBufferSize(final long handle, - final long randomAccessMaxBufferSize); - private native long randomAccessMaxBufferSize(final long handle); - private native void setWritableFileMaxBufferSize(final long handle, - final long writableFileMaxBufferSize); - private native long writableFileMaxBufferSize(final long handle); - private native void setUseAdaptiveMutex( - long handle, boolean useAdaptiveMutex); - private native boolean useAdaptiveMutex(long handle); - private native void setBytesPerSync( - long handle, long bytesPerSync); - private native long bytesPerSync(long handle); - private native void setWalBytesPerSync(long handle, long walBytesPerSync); - private native long walBytesPerSync(long handle); - private native void setStrictBytesPerSync( - final long handle, final boolean strictBytesPerSync); - private native boolean strictBytesPerSync( - final long handle); - private static native void setEventListeners( - final long handle, final long[] eventListenerHandles); - private static native AbstractEventListener[] eventListeners(final long handle); - private native void setEnableThreadTracking(long handle, - boolean enableThreadTracking); - private native boolean enableThreadTracking(long handle); - private native void setDelayedWriteRate(long handle, long delayedWriteRate); - private native long delayedWriteRate(long handle); - private native void setEnablePipelinedWrite(final long handle, - final boolean enablePipelinedWrite); - private native boolean enablePipelinedWrite(final long handle); - private native void setUnorderedWrite(final long handle, - final boolean unorderedWrite); - private native boolean unorderedWrite(final long handle); - private native void setAllowConcurrentMemtableWrite(long handle, - boolean allowConcurrentMemtableWrite); - private native boolean allowConcurrentMemtableWrite(long handle); - private native void setEnableWriteThreadAdaptiveYield(long handle, - boolean enableWriteThreadAdaptiveYield); - private native boolean enableWriteThreadAdaptiveYield(long handle); - private native void setWriteThreadMaxYieldUsec(long handle, - long writeThreadMaxYieldUsec); - private native long writeThreadMaxYieldUsec(long handle); - private native void setWriteThreadSlowYieldUsec(long handle, - long writeThreadSlowYieldUsec); - private native long writeThreadSlowYieldUsec(long handle); - private native void setSkipStatsUpdateOnDbOpen(final long handle, - final boolean skipStatsUpdateOnDbOpen); - private native boolean skipStatsUpdateOnDbOpen(final long handle); - private static native void setSkipCheckingSstFileSizesOnDbOpen( - final long handle, final boolean skipChecking); - private static native boolean skipCheckingSstFileSizesOnDbOpen(final long handle); - private native void setWalRecoveryMode(final long handle, - final byte walRecoveryMode); - private native byte walRecoveryMode(final long handle); - private native void setAllow2pc(final long handle, - final boolean allow2pc); - private native boolean allow2pc(final long handle); - private native void setRowCache(final long handle, - final long rowCacheHandle); - private native void setWalFilter(final long handle, - final long walFilterHandle); - private native void setFailIfOptionsFileError(final long handle, - final boolean failIfOptionsFileError); - private native boolean failIfOptionsFileError(final long handle); - private native void setDumpMallocStats(final long handle, - final boolean dumpMallocStats); - private native boolean dumpMallocStats(final long handle); - private native void setAvoidFlushDuringRecovery(final long handle, - final boolean avoidFlushDuringRecovery); - private native boolean avoidFlushDuringRecovery(final long handle); - private native void setAvoidFlushDuringShutdown(final long handle, - final boolean avoidFlushDuringShutdown); - private native boolean avoidFlushDuringShutdown(final long handle); - private native void setAllowIngestBehind(final long handle, - final boolean allowIngestBehind); - private native boolean allowIngestBehind(final long handle); - private native void setTwoWriteQueues(final long handle, - final boolean twoWriteQueues); - private native boolean twoWriteQueues(final long handle); - private native void setManualWalFlush(final long handle, - final boolean manualWalFlush); - private native boolean manualWalFlush(final long handle); - private native void setAtomicFlush(final long handle, - final boolean atomicFlush); - private native boolean atomicFlush(final long handle); - private static native void setAvoidUnnecessaryBlockingIO( - final long handle, final boolean avoidBlockingIO); - private static native boolean avoidUnnecessaryBlockingIO(final long handle); - private static native void setPersistStatsToDisk( - final long handle, final boolean persistStatsToDisk); - private static native boolean persistStatsToDisk(final long handle); - private static native void setWriteDbidToManifest( - final long handle, final boolean writeDbidToManifest); - private static native boolean writeDbidToManifest(final long handle); - private static native void setLogReadaheadSize(final long handle, final long logReadaheadSize); - private static native long logReadaheadSize(final long handle); - private static native void setBestEffortsRecovery( - final long handle, final boolean bestEffortsRecovery); - private static native boolean bestEffortsRecovery(final long handle); - private static native void setMaxBgErrorResumeCount( - final long handle, final int maxBgerrorRecumeCount); - private static native int maxBgerrorResumeCount(final long handle); - private static native void setBgerrorResumeRetryInterval( - final long handle, final long bgerrorResumeRetryInterval); - private static native long bgerrorResumeRetryInterval(final long handle); - - // instance variables - // NOTE: If you add new member variables, please update the copy constructor above! - private Env env_; - private int numShardBits_; - private RateLimiter rateLimiter_; - private Cache rowCache_; - private WalFilter walFilter_; - private WriteBufferManager writeBufferManager_; -} diff --git a/java/src/main/java/org/rocksdb/DBOptionsInterface.java b/java/src/main/java/org/rocksdb/DBOptionsInterface.java deleted file mode 100644 index ef1b86bff..000000000 --- a/java/src/main/java/org/rocksdb/DBOptionsInterface.java +++ /dev/null @@ -1,1756 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Collection; -import java.util.List; - -public interface DBOptionsInterface> { - /** - * Use this if your DB is very small (like under 1GB) and you don't want to - * spend lots of memory for memtables. - * - * @return the instance of the current object. - */ - T optimizeForSmallDb(); - - /** - * Use the specified object to interact with the environment, - * e.g. to read/write files, schedule background work, etc. - * Default: {@link Env#getDefault()} - * - * @param env {@link Env} instance. - * @return the instance of the current Options. - */ - T setEnv(final Env env); - - /** - * Returns the set RocksEnv instance. - * - * @return {@link RocksEnv} instance set in the options. - */ - Env getEnv(); - - /** - *

By default, RocksDB uses only one background thread for flush and - * compaction. Calling this function will set it up such that total of - * `total_threads` is used.

- * - *

You almost definitely want to call this function if your system is - * bottlenecked by RocksDB.

- * - * @param totalThreads The total number of threads to be used by RocksDB. - * A good value is the number of cores. - * - * @return the instance of the current Options - */ - T setIncreaseParallelism(int totalThreads); - - /** - * If this value is set to true, then the database will be created - * if it is missing during {@code RocksDB.open()}. - * Default: false - * - * @param flag a flag indicating whether to create a database the - * specified database in {@link RocksDB#open(org.rocksdb.Options, String)} operation - * is missing. - * @return the instance of the current Options - * @see RocksDB#open(org.rocksdb.Options, String) - */ - T setCreateIfMissing(boolean flag); - - /** - * Return true if the create_if_missing flag is set to true. - * If true, the database will be created if it is missing. - * - * @return true if the createIfMissing option is set to true. - * @see #setCreateIfMissing(boolean) - */ - boolean createIfMissing(); - - /** - *

If true, missing column families will be automatically created

- * - *

Default: false

- * - * @param flag a flag indicating if missing column families shall be - * created automatically. - * @return true if missing column families shall be created automatically - * on open. - */ - T setCreateMissingColumnFamilies(boolean flag); - - /** - * Return true if the create_missing_column_families flag is set - * to true. If true column families be created if missing. - * - * @return true if the createMissingColumnFamilies is set to - * true. - * @see #setCreateMissingColumnFamilies(boolean) - */ - boolean createMissingColumnFamilies(); - - /** - * If true, an error will be thrown during RocksDB.open() if the - * database already exists. - * Default: false - * - * @param errorIfExists if true, an exception will be thrown - * during {@code RocksDB.open()} if the database already exists. - * @return the reference to the current option. - * @see RocksDB#open(org.rocksdb.Options, String) - */ - T setErrorIfExists(boolean errorIfExists); - - /** - * If true, an error will be thrown during RocksDB.open() if the - * database already exists. - * - * @return if true, an error is raised when the specified database - * already exists before open. - */ - boolean errorIfExists(); - - /** - * If true, the implementation will do aggressive checking of the - * data it is processing and will stop early if it detects any - * errors. This may have unforeseen ramifications: for example, a - * corruption of one DB entry may cause a large number of entries to - * become unreadable or for the entire DB to become unopenable. - * If any of the writes to the database fails (Put, Delete, Merge, Write), - * the database will switch to read-only mode and fail all other - * Write operations. - * Default: true - * - * @param paranoidChecks a flag to indicate whether paranoid-check - * is on. - * @return the reference to the current option. - */ - T setParanoidChecks(boolean paranoidChecks); - - /** - * If true, the implementation will do aggressive checking of the - * data it is processing and will stop early if it detects any - * errors. This may have unforeseen ramifications: for example, a - * corruption of one DB entry may cause a large number of entries to - * become unreadable or for the entire DB to become unopenable. - * If any of the writes to the database fails (Put, Delete, Merge, Write), - * the database will switch to read-only mode and fail all other - * Write operations. - * - * @return a boolean indicating whether paranoid-check is on. - */ - boolean paranoidChecks(); - - /** - * Use to control write rate of flush and compaction. Flush has higher - * priority than compaction. Rate limiting is disabled if nullptr. - * Default: nullptr - * - * @param rateLimiter {@link org.rocksdb.RateLimiter} instance. - * @return the instance of the current object. - * - * @since 3.10.0 - */ - T setRateLimiter(RateLimiter rateLimiter); - - /** - * Use to track SST files and control their file deletion rate. - * - * Features: - * - Throttle the deletion rate of the SST files. - * - Keep track the total size of all SST files. - * - Set a maximum allowed space limit for SST files that when reached - * the DB wont do any further flushes or compactions and will set the - * background error. - * - Can be shared between multiple dbs. - * - * Limitations: - * - Only track and throttle deletes of SST files in - * first db_path (db_name if db_paths is empty). - * - * @param sstFileManager The SST File Manager for the db. - * @return the instance of the current object. - */ - T setSstFileManager(SstFileManager sstFileManager); - - /** - *

Any internal progress/error information generated by - * the db will be written to the Logger if it is non-nullptr, - * or to a file stored in the same directory as the DB - * contents if info_log is nullptr.

- * - *

Default: nullptr

- * - * @param logger {@link Logger} instance. - * @return the instance of the current object. - */ - T setLogger(Logger logger); - - /** - *

Sets the RocksDB log level. Default level is INFO

- * - * @param infoLogLevel log level to set. - * @return the instance of the current object. - */ - T setInfoLogLevel(InfoLogLevel infoLogLevel); - - /** - *

Returns currently set log level.

- * @return {@link org.rocksdb.InfoLogLevel} instance. - */ - InfoLogLevel infoLogLevel(); - - /** - * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open - * all files on DB::Open(). You can use this option to increase the number - * of threads used to open the files. - * - * Default: 16 - * - * @param maxFileOpeningThreads the maximum number of threads to use to - * open files - * - * @return the reference to the current options. - */ - T setMaxFileOpeningThreads(int maxFileOpeningThreads); - - /** - * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open all - * files on DB::Open(). You can use this option to increase the number of - * threads used to open the files. - * - * Default: 16 - * - * @return the maximum number of threads to use to open files - */ - int maxFileOpeningThreads(); - - /** - *

Sets the statistics object which collects metrics about database operations. - * Statistics objects should not be shared between DB instances as - * it does not use any locks to prevent concurrent updates.

- * - * @param statistics The statistics to set - * - * @return the instance of the current object. - * - * @see RocksDB#open(org.rocksdb.Options, String) - */ - T setStatistics(final Statistics statistics); - - /** - *

Returns statistics object.

- * - * @return the instance of the statistics object or null if there is no - * statistics object. - * - * @see #setStatistics(Statistics) - */ - Statistics statistics(); - - /** - *

If true, then every store to stable storage will issue a fsync.

- *

If false, then every store to stable storage will issue a fdatasync. - * This parameter should be set to true while storing data to - * filesystem like ext3 that can lose files after a reboot.

- *

Default: false

- * - * @param useFsync a boolean flag to specify whether to use fsync - * @return the instance of the current object. - */ - T setUseFsync(boolean useFsync); - - /** - *

If true, then every store to stable storage will issue a fsync.

- *

If false, then every store to stable storage will issue a fdatasync. - * This parameter should be set to true while storing data to - * filesystem like ext3 that can lose files after a reboot.

- * - * @return boolean value indicating if fsync is used. - */ - boolean useFsync(); - - /** - * A list of paths where SST files can be put into, with its target size. - * Newer data is placed into paths specified earlier in the vector while - * older data gradually moves to paths specified later in the vector. - * - * For example, you have a flash device with 10GB allocated for the DB, - * as well as a hard drive of 2TB, you should config it to be: - * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}] - * - * The system will try to guarantee data under each path is close to but - * not larger than the target size. But current and future file sizes used - * by determining where to place a file are based on best-effort estimation, - * which means there is a chance that the actual size under the directory - * is slightly more than target size under some workloads. User should give - * some buffer room for those cases. - * - * If none of the paths has sufficient room to place a file, the file will - * be placed to the last path anyway, despite to the target size. - * - * Placing newer data to earlier paths is also best-efforts. User should - * expect user files to be placed in higher levels in some extreme cases. - * - * If left empty, only one path will be used, which is db_name passed when - * opening the DB. - * - * Default: empty - * - * @param dbPaths the paths and target sizes - * - * @return the reference to the current options - */ - T setDbPaths(final Collection dbPaths); - - /** - * A list of paths where SST files can be put into, with its target size. - * Newer data is placed into paths specified earlier in the vector while - * older data gradually moves to paths specified later in the vector. - * - * For example, you have a flash device with 10GB allocated for the DB, - * as well as a hard drive of 2TB, you should config it to be: - * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}] - * - * The system will try to guarantee data under each path is close to but - * not larger than the target size. But current and future file sizes used - * by determining where to place a file are based on best-effort estimation, - * which means there is a chance that the actual size under the directory - * is slightly more than target size under some workloads. User should give - * some buffer room for those cases. - * - * If none of the paths has sufficient room to place a file, the file will - * be placed to the last path anyway, despite to the target size. - * - * Placing newer data to earlier paths is also best-efforts. User should - * expect user files to be placed in higher levels in some extreme cases. - * - * If left empty, only one path will be used, which is db_name passed when - * opening the DB. - * - * Default: {@link java.util.Collections#emptyList()} - * - * @return dbPaths the paths and target sizes - */ - List dbPaths(); - - /** - * This specifies the info LOG dir. - * If it is empty, the log files will be in the same dir as data. - * If it is non empty, the log files will be in the specified dir, - * and the db data dir's absolute path will be used as the log file - * name's prefix. - * - * @param dbLogDir the path to the info log directory - * @return the instance of the current object. - */ - T setDbLogDir(String dbLogDir); - - /** - * Returns the directory of info log. - * - * If it is empty, the log files will be in the same dir as data. - * If it is non empty, the log files will be in the specified dir, - * and the db data dir's absolute path will be used as the log file - * name's prefix. - * - * @return the path to the info log directory - */ - String dbLogDir(); - - /** - * This specifies the absolute dir path for write-ahead logs (WAL). - * If it is empty, the log files will be in the same dir as data, - * dbname is used as the data dir by default - * If it is non empty, the log files will be in kept the specified dir. - * When destroying the db, - * all log files in wal_dir and the dir itself is deleted - * - * @param walDir the path to the write-ahead-log directory. - * @return the instance of the current object. - */ - T setWalDir(String walDir); - - /** - * Returns the path to the write-ahead-logs (WAL) directory. - * - * If it is empty, the log files will be in the same dir as data, - * dbname is used as the data dir by default - * If it is non empty, the log files will be in kept the specified dir. - * When destroying the db, - * all log files in wal_dir and the dir itself is deleted - * - * @return the path to the write-ahead-logs (WAL) directory. - */ - String walDir(); - - /** - * The periodicity when obsolete files get deleted. The default - * value is 6 hours. The files that get out of scope by compaction - * process will still get automatically delete on every compaction, - * regardless of this setting - * - * @param micros the time interval in micros - * @return the instance of the current object. - */ - T setDeleteObsoleteFilesPeriodMicros(long micros); - - /** - * The periodicity when obsolete files get deleted. The default - * value is 6 hours. The files that get out of scope by compaction - * process will still get automatically delete on every compaction, - * regardless of this setting - * - * @return the time interval in micros when obsolete files will be deleted. - */ - long deleteObsoleteFilesPeriodMicros(); - - /** - * This value represents the maximum number of threads that will - * concurrently perform a compaction job by breaking it into multiple, - * smaller ones that are run simultaneously. - * Default: 1 (i.e. no subcompactions) - * - * @param maxSubcompactions The maximum number of threads that will - * concurrently perform a compaction job - * - * @return the instance of the current object. - */ - T setMaxSubcompactions(int maxSubcompactions); - - /** - * This value represents the maximum number of threads that will - * concurrently perform a compaction job by breaking it into multiple, - * smaller ones that are run simultaneously. - * Default: 1 (i.e. no subcompactions) - * - * @return The maximum number of threads that will concurrently perform a - * compaction job - */ - int maxSubcompactions(); - - /** - * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the - * value of max_background_jobs. For backwards compatibility we will set - * `max_background_jobs = max_background_compactions + max_background_flushes` - * in the case where user sets at least one of `max_background_compactions` or - * `max_background_flushes`. - * - * Specifies the maximum number of concurrent background flush jobs. - * If you're increasing this, also consider increasing number of threads in - * HIGH priority thread pool. For more information, see - * Default: -1 - * - * @param maxBackgroundFlushes number of max concurrent flush jobs - * @return the instance of the current object. - * - * @see RocksEnv#setBackgroundThreads(int) - * @see RocksEnv#setBackgroundThreads(int, Priority) - * @see MutableDBOptionsInterface#maxBackgroundCompactions() - * - * @deprecated Use {@link MutableDBOptionsInterface#setMaxBackgroundJobs(int)} - */ - @Deprecated - T setMaxBackgroundFlushes(int maxBackgroundFlushes); - - /** - * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the - * value of max_background_jobs. For backwards compatibility we will set - * `max_background_jobs = max_background_compactions + max_background_flushes` - * in the case where user sets at least one of `max_background_compactions` or - * `max_background_flushes`. - * - * Returns the maximum number of concurrent background flush jobs. - * If you're increasing this, also consider increasing number of threads in - * HIGH priority thread pool. For more information, see - * Default: -1 - * - * @return the maximum number of concurrent background flush jobs. - * @see RocksEnv#setBackgroundThreads(int) - * @see RocksEnv#setBackgroundThreads(int, Priority) - */ - @Deprecated - int maxBackgroundFlushes(); - - /** - * Specifies the maximum size of a info log file. If the current log file - * is larger than `max_log_file_size`, a new info log file will - * be created. - * If 0, all logs will be written to one log file. - * - * @param maxLogFileSize the maximum size of a info log file. - * @return the instance of the current object. - * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms - * while overflowing the underlying platform specific value. - */ - T setMaxLogFileSize(long maxLogFileSize); - - /** - * Returns the maximum size of a info log file. If the current log file - * is larger than this size, a new info log file will be created. - * If 0, all logs will be written to one log file. - * - * @return the maximum size of the info log file. - */ - long maxLogFileSize(); - - /** - * Specifies the time interval for the info log file to roll (in seconds). - * If specified with non-zero value, log file will be rolled - * if it has been active longer than `log_file_time_to_roll`. - * Default: 0 (disabled) - * - * @param logFileTimeToRoll the time interval in seconds. - * @return the instance of the current object. - * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms - * while overflowing the underlying platform specific value. - */ - T setLogFileTimeToRoll(long logFileTimeToRoll); - - /** - * Returns the time interval for the info log file to roll (in seconds). - * If specified with non-zero value, log file will be rolled - * if it has been active longer than `log_file_time_to_roll`. - * Default: 0 (disabled) - * - * @return the time interval in seconds. - */ - long logFileTimeToRoll(); - - /** - * Specifies the maximum number of info log files to be kept. - * Default: 1000 - * - * @param keepLogFileNum the maximum number of info log files to be kept. - * @return the instance of the current object. - * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms - * while overflowing the underlying platform specific value. - */ - T setKeepLogFileNum(long keepLogFileNum); - - /** - * Returns the maximum number of info log files to be kept. - * Default: 1000 - * - * @return the maximum number of info log files to be kept. - */ - long keepLogFileNum(); - - /** - * Recycle log files. - * - * If non-zero, we will reuse previously written log files for new - * logs, overwriting the old data. The value indicates how many - * such files we will keep around at any point in time for later - * use. - * - * This is more efficient because the blocks are already - * allocated and fdatasync does not need to update the inode after - * each write. - * - * Default: 0 - * - * @param recycleLogFileNum the number of log files to keep for recycling - * - * @return the reference to the current options - */ - T setRecycleLogFileNum(long recycleLogFileNum); - - /** - * Recycle log files. - * - * If non-zero, we will reuse previously written log files for new - * logs, overwriting the old data. The value indicates how many - * such files we will keep around at any point in time for later - * use. - * - * This is more efficient because the blocks are already - * allocated and fdatasync does not need to update the inode after - * each write. - * - * Default: 0 - * - * @return the number of log files kept for recycling - */ - long recycleLogFileNum(); - - /** - * Manifest file is rolled over on reaching this limit. - * The older manifest file be deleted. - * The default value is 1GB so that the manifest file can grow, but not - * reach the limit of storage capacity. - * - * @param maxManifestFileSize the size limit of a manifest file. - * @return the instance of the current object. - */ - T setMaxManifestFileSize(long maxManifestFileSize); - - /** - * Manifest file is rolled over on reaching this limit. - * The older manifest file be deleted. - * The default value is 1GB so that the manifest file can grow, but not - * reach the limit of storage capacity. - * - * @return the size limit of a manifest file. - */ - long maxManifestFileSize(); - - /** - * Number of shards used for table cache. - * - * @param tableCacheNumshardbits the number of chards - * @return the instance of the current object. - */ - T setTableCacheNumshardbits(int tableCacheNumshardbits); - - /** - * Number of shards used for table cache. - * - * @return the number of shards used for table cache. - */ - int tableCacheNumshardbits(); - - /** - * {@link #walTtlSeconds()} and {@link #walSizeLimitMB()} affect how archived logs - * will be deleted. - *
    - *
  1. If both set to 0, logs will be deleted asap and will not get into - * the archive.
  2. - *
  3. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, - * WAL files will be checked every 10 min and if total size is greater - * then WAL_size_limit_MB, they will be deleted starting with the - * earliest until size_limit is met. All empty files will be deleted.
  4. - *
  5. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then - * WAL files will be checked every WAL_ttl_seconds / 2 and those that - * are older than WAL_ttl_seconds will be deleted.
  6. - *
  7. If both are not 0, WAL files will be checked every 10 min and both - * checks will be performed with ttl being first.
  8. - *
- * - * @param walTtlSeconds the ttl seconds - * @return the instance of the current object. - * @see #setWalSizeLimitMB(long) - */ - T setWalTtlSeconds(long walTtlSeconds); - - /** - * WalTtlSeconds() and walSizeLimitMB() affect how archived logs - * will be deleted. - *
    - *
  1. If both set to 0, logs will be deleted asap and will not get into - * the archive.
  2. - *
  3. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, - * WAL files will be checked every 10 min and if total size is greater - * then WAL_size_limit_MB, they will be deleted starting with the - * earliest until size_limit is met. All empty files will be deleted.
  4. - *
  5. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then - * WAL files will be checked every WAL_ttl_seconds / 2 and those that - * are older than WAL_ttl_seconds will be deleted.
  6. - *
  7. If both are not 0, WAL files will be checked every 10 min and both - * checks will be performed with ttl being first.
  8. - *
- * - * @return the wal-ttl seconds - * @see #walSizeLimitMB() - */ - long walTtlSeconds(); - - /** - * WalTtlSeconds() and walSizeLimitMB() affect how archived logs - * will be deleted. - *
    - *
  1. If both set to 0, logs will be deleted asap and will not get into - * the archive.
  2. - *
  3. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, - * WAL files will be checked every 10 min and if total size is greater - * then WAL_size_limit_MB, they will be deleted starting with the - * earliest until size_limit is met. All empty files will be deleted.
  4. - *
  5. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then - * WAL files will be checked every WAL_ttl_secondsi / 2 and those that - * are older than WAL_ttl_seconds will be deleted.
  6. - *
  7. If both are not 0, WAL files will be checked every 10 min and both - * checks will be performed with ttl being first.
  8. - *
- * - * @param sizeLimitMB size limit in mega-bytes. - * @return the instance of the current object. - * @see #setWalSizeLimitMB(long) - */ - T setWalSizeLimitMB(long sizeLimitMB); - - /** - * {@link #walTtlSeconds()} and {@code #walSizeLimitMB()} affect how archived logs - * will be deleted. - *
    - *
  1. If both set to 0, logs will be deleted asap and will not get into - * the archive.
  2. - *
  3. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, - * WAL files will be checked every 10 min and if total size is greater - * then WAL_size_limit_MB, they will be deleted starting with the - * earliest until size_limit is met. All empty files will be deleted.
  4. - *
  5. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then - * WAL files will be checked every WAL_ttl_seconds i / 2 and those that - * are older than WAL_ttl_seconds will be deleted.
  6. - *
  7. If both are not 0, WAL files will be checked every 10 min and both - * checks will be performed with ttl being first.
  8. - *
- * @return size limit in mega-bytes. - * @see #walSizeLimitMB() - */ - long walSizeLimitMB(); - - /** - * The maximum limit of number of bytes that are written in a single batch - * of WAL or memtable write. It is followed when the leader write size - * is larger than 1/8 of this limit. - * - * Default: 1 MB - * - * @param maxWriteBatchGroupSizeBytes the maximum limit of number of bytes, see description. - * @return the instance of the current object. - */ - T setMaxWriteBatchGroupSizeBytes(final long maxWriteBatchGroupSizeBytes); - - /** - * The maximum limit of number of bytes that are written in a single batch - * of WAL or memtable write. It is followed when the leader write size - * is larger than 1/8 of this limit. - * - * Default: 1 MB - * - * @return the maximum limit of number of bytes, see description. - */ - long maxWriteBatchGroupSizeBytes(); - - /** - * Number of bytes to preallocate (via fallocate) the manifest - * files. Default is 4mb, which is reasonable to reduce random IO - * as well as prevent overallocation for mounts that preallocate - * large amounts of data (such as xfs's allocsize option). - * - * @param size the size in byte - * @return the instance of the current object. - * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms - * while overflowing the underlying platform specific value. - */ - T setManifestPreallocationSize(long size); - - /** - * Number of bytes to preallocate (via fallocate) the manifest - * files. Default is 4mb, which is reasonable to reduce random IO - * as well as prevent overallocation for mounts that preallocate - * large amounts of data (such as xfs's allocsize option). - * - * @return size in bytes. - */ - long manifestPreallocationSize(); - - /** - * Enable the OS to use direct I/O for reading sst tables. - * Default: false - * - * @param useDirectReads if true, then direct read is enabled - * @return the instance of the current object. - */ - T setUseDirectReads(boolean useDirectReads); - - /** - * Enable the OS to use direct I/O for reading sst tables. - * Default: false - * - * @return if true, then direct reads are enabled - */ - boolean useDirectReads(); - - /** - * Enable the OS to use direct reads and writes in flush and - * compaction - * Default: false - * - * @param useDirectIoForFlushAndCompaction if true, then direct - * I/O will be enabled for background flush and compactions - * @return the instance of the current object. - */ - T setUseDirectIoForFlushAndCompaction(boolean useDirectIoForFlushAndCompaction); - - /** - * Enable the OS to use direct reads and writes in flush and - * compaction - * - * @return if true, then direct I/O is enabled for flush and - * compaction - */ - boolean useDirectIoForFlushAndCompaction(); - - /** - * Whether fallocate calls are allowed - * - * @param allowFAllocate false if fallocate() calls are bypassed - * - * @return the reference to the current options. - */ - T setAllowFAllocate(boolean allowFAllocate); - - /** - * Whether fallocate calls are allowed - * - * @return false if fallocate() calls are bypassed - */ - boolean allowFAllocate(); - - /** - * Allow the OS to mmap file for reading sst tables. - * Default: false - * - * @param allowMmapReads true if mmap reads are allowed. - * @return the instance of the current object. - */ - T setAllowMmapReads(boolean allowMmapReads); - - /** - * Allow the OS to mmap file for reading sst tables. - * Default: false - * - * @return true if mmap reads are allowed. - */ - boolean allowMmapReads(); - - /** - * Allow the OS to mmap file for writing. Default: false - * - * @param allowMmapWrites true if mmap writes are allowd. - * @return the instance of the current object. - */ - T setAllowMmapWrites(boolean allowMmapWrites); - - /** - * Allow the OS to mmap file for writing. Default: false - * - * @return true if mmap writes are allowed. - */ - boolean allowMmapWrites(); - - /** - * Disable child process inherit open files. Default: true - * - * @param isFdCloseOnExec true if child process inheriting open - * files is disabled. - * @return the instance of the current object. - */ - T setIsFdCloseOnExec(boolean isFdCloseOnExec); - - /** - * Disable child process inherit open files. Default: true - * - * @return true if child process inheriting open files is disabled. - */ - boolean isFdCloseOnExec(); - - /** - * If set true, will hint the underlying file system that the file - * access pattern is random, when a sst file is opened. - * Default: true - * - * @param adviseRandomOnOpen true if hinting random access is on. - * @return the instance of the current object. - */ - T setAdviseRandomOnOpen(boolean adviseRandomOnOpen); - - /** - * If set true, will hint the underlying file system that the file - * access pattern is random, when a sst file is opened. - * Default: true - * - * @return true if hinting random access is on. - */ - boolean adviseRandomOnOpen(); - - /** - * Amount of data to build up in memtables across all column - * families before writing to disk. - * - * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()}, - * which enforces a limit for a single memtable. - * - * This feature is disabled by default. Specify a non-zero value - * to enable it. - * - * Default: 0 (disabled) - * - * @param dbWriteBufferSize the size of the write buffer - * - * @return the reference to the current options. - */ - T setDbWriteBufferSize(long dbWriteBufferSize); - - /** - * Use passed {@link WriteBufferManager} to control memory usage across - * multiple column families and/or DB instances. - * - * Check - * https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager - * for more details on when to use it - * - * @param writeBufferManager The WriteBufferManager to use - * @return the reference of the current options. - */ - T setWriteBufferManager(final WriteBufferManager writeBufferManager); - - /** - * Reference to {@link WriteBufferManager} used by it.
- * - * Default: null (Disabled) - * - * @return a reference to WriteBufferManager - */ - WriteBufferManager writeBufferManager(); - - /** - * Amount of data to build up in memtables across all column - * families before writing to disk. - * - * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()}, - * which enforces a limit for a single memtable. - * - * This feature is disabled by default. Specify a non-zero value - * to enable it. - * - * Default: 0 (disabled) - * - * @return the size of the write buffer - */ - long dbWriteBufferSize(); - - /** - * Specify the file access pattern once a compaction is started. - * It will be applied to all input files of a compaction. - * - * Default: {@link AccessHint#NORMAL} - * - * @param accessHint The access hint - * - * @return the reference to the current options. - */ - T setAccessHintOnCompactionStart(final AccessHint accessHint); - - /** - * Specify the file access pattern once a compaction is started. - * It will be applied to all input files of a compaction. - * - * Default: {@link AccessHint#NORMAL} - * - * @return The access hint - */ - AccessHint accessHintOnCompactionStart(); - - /** - * This is a maximum buffer size that is used by WinMmapReadableFile in - * unbuffered disk I/O mode. We need to maintain an aligned buffer for - * reads. We allow the buffer to grow until the specified value and then - * for bigger requests allocate one shot buffers. In unbuffered mode we - * always bypass read-ahead buffer at ReadaheadRandomAccessFile - * When read-ahead is required we then make use of - * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and - * always try to read ahead. - * With read-ahead we always pre-allocate buffer to the size instead of - * growing it up to a limit. - * - * This option is currently honored only on Windows - * - * Default: 1 Mb - * - * Special value: 0 - means do not maintain per instance buffer. Allocate - * per request buffer and avoid locking. - * - * @param randomAccessMaxBufferSize the maximum size of the random access - * buffer - * - * @return the reference to the current options. - */ - T setRandomAccessMaxBufferSize(long randomAccessMaxBufferSize); - - /** - * This is a maximum buffer size that is used by WinMmapReadableFile in - * unbuffered disk I/O mode. We need to maintain an aligned buffer for - * reads. We allow the buffer to grow until the specified value and then - * for bigger requests allocate one shot buffers. In unbuffered mode we - * always bypass read-ahead buffer at ReadaheadRandomAccessFile - * When read-ahead is required we then make use of - * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and - * always try to read ahead. With read-ahead we always pre-allocate buffer - * to the size instead of growing it up to a limit. - * - * This option is currently honored only on Windows - * - * Default: 1 Mb - * - * Special value: 0 - means do not maintain per instance buffer. Allocate - * per request buffer and avoid locking. - * - * @return the maximum size of the random access buffer - */ - long randomAccessMaxBufferSize(); - - /** - * Use adaptive mutex, which spins in the user space before resorting - * to kernel. This could reduce context switch when the mutex is not - * heavily contended. However, if the mutex is hot, we could end up - * wasting spin time. - * Default: false - * - * @param useAdaptiveMutex true if adaptive mutex is used. - * @return the instance of the current object. - */ - T setUseAdaptiveMutex(boolean useAdaptiveMutex); - - /** - * Use adaptive mutex, which spins in the user space before resorting - * to kernel. This could reduce context switch when the mutex is not - * heavily contended. However, if the mutex is hot, we could end up - * wasting spin time. - * Default: false - * - * @return true if adaptive mutex is used. - */ - boolean useAdaptiveMutex(); - - /** - * Sets the {@link EventListener}s whose callback functions - * will be called when specific RocksDB event happens. - * - * Note: the RocksJava API currently only supports EventListeners implemented in Java. - * It could be extended in future to also support adding/removing EventListeners implemented in - * C++. - * - * @param listeners the listeners who should be notified on various events. - * - * @return the instance of the current object. - */ - T setListeners(final List listeners); - - /** - * Sets the {@link EventListener}s whose callback functions - * will be called when specific RocksDB event happens. - * - * Note: the RocksJava API currently only supports EventListeners implemented in Java. - * It could be extended in future to also support adding/removing EventListeners implemented in - * C++. - * - * @return the instance of the current object. - */ - List listeners(); - - /** - * If true, then the status of the threads involved in this DB will - * be tracked and available via GetThreadList() API. - * - * Default: false - * - * @param enableThreadTracking true to enable tracking - * - * @return the reference to the current options. - */ - T setEnableThreadTracking(boolean enableThreadTracking); - - /** - * If true, then the status of the threads involved in this DB will - * be tracked and available via GetThreadList() API. - * - * Default: false - * - * @return true if tracking is enabled - */ - boolean enableThreadTracking(); - - /** - * By default, a single write thread queue is maintained. The thread gets - * to the head of the queue becomes write batch group leader and responsible - * for writing to WAL and memtable for the batch group. - * - * If {@link #enablePipelinedWrite()} is true, separate write thread queue is - * maintained for WAL write and memtable write. A write thread first enter WAL - * writer queue and then memtable writer queue. Pending thread on the WAL - * writer queue thus only have to wait for previous writers to finish their - * WAL writing but not the memtable writing. Enabling the feature may improve - * write throughput and reduce latency of the prepare phase of two-phase - * commit. - * - * Default: false - * - * @param enablePipelinedWrite true to enabled pipelined writes - * - * @return the reference to the current options. - */ - T setEnablePipelinedWrite(final boolean enablePipelinedWrite); - - /** - * Returns true if pipelined writes are enabled. - * See {@link #setEnablePipelinedWrite(boolean)}. - * - * @return true if pipelined writes are enabled, false otherwise. - */ - boolean enablePipelinedWrite(); - - /** - * Setting {@link #unorderedWrite()} to true trades higher write throughput with - * relaxing the immutability guarantee of snapshots. This violates the - * repeatability one expects from ::Get from a snapshot, as well as - * ::MultiGet and Iterator's consistent-point-in-time view property. - * If the application cannot tolerate the relaxed guarantees, it can implement - * its own mechanisms to work around that and yet benefit from the higher - * throughput. Using TransactionDB with WRITE_PREPARED write policy and - * {@link #twoWriteQueues()} true is one way to achieve immutable snapshots despite - * unordered_write. - * - * By default, i.e., when it is false, rocksdb does not advance the sequence - * number for new snapshots unless all the writes with lower sequence numbers - * are already finished. This provides the immutability that we except from - * snapshots. Moreover, since Iterator and MultiGet internally depend on - * snapshots, the snapshot immutability results into Iterator and MultiGet - * offering consistent-point-in-time view. If set to true, although - * Read-Your-Own-Write property is still provided, the snapshot immutability - * property is relaxed: the writes issued after the snapshot is obtained (with - * larger sequence numbers) will be still not visible to the reads from that - * snapshot, however, there still might be pending writes (with lower sequence - * number) that will change the state visible to the snapshot after they are - * landed to the memtable. - * - * @param unorderedWrite true to enabled unordered write - * - * @return the reference to the current options. - */ - T setUnorderedWrite(final boolean unorderedWrite); - - /** - * Returns true if unordered write are enabled. - * See {@link #setUnorderedWrite(boolean)}. - * - * @return true if unordered write are enabled, false otherwise. - */ - boolean unorderedWrite(); - - /** - * If true, allow multi-writers to update mem tables in parallel. - * Only some memtable factorys support concurrent writes; currently it - * is implemented only for SkipListFactory. Concurrent memtable writes - * are not compatible with inplace_update_support or filter_deletes. - * It is strongly recommended to set - * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use - * this feature. - * Default: true - * - * @param allowConcurrentMemtableWrite true to enable concurrent writes - * for the memtable - * - * @return the reference to the current options. - */ - T setAllowConcurrentMemtableWrite(boolean allowConcurrentMemtableWrite); - - /** - * If true, allow multi-writers to update mem tables in parallel. - * Only some memtable factorys support concurrent writes; currently it - * is implemented only for SkipListFactory. Concurrent memtable writes - * are not compatible with inplace_update_support or filter_deletes. - * It is strongly recommended to set - * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use - * this feature. - * Default: true - * - * @return true if concurrent writes are enabled for the memtable - */ - boolean allowConcurrentMemtableWrite(); - - /** - * If true, threads synchronizing with the write batch group leader will - * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a - * mutex. This can substantially improve throughput for concurrent workloads, - * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled. - * Default: true - * - * @param enableWriteThreadAdaptiveYield true to enable adaptive yield for the - * write threads - * - * @return the reference to the current options. - */ - T setEnableWriteThreadAdaptiveYield( - boolean enableWriteThreadAdaptiveYield); - - /** - * If true, threads synchronizing with the write batch group leader will - * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a - * mutex. This can substantially improve throughput for concurrent workloads, - * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled. - * Default: true - * - * @return true if adaptive yield is enabled - * for the writing threads - */ - boolean enableWriteThreadAdaptiveYield(); - - /** - * The maximum number of microseconds that a write operation will use - * a yielding spin loop to coordinate with other write threads before - * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is - * set properly) increasing this value is likely to increase RocksDB - * throughput at the expense of increased CPU usage. - * Default: 100 - * - * @param writeThreadMaxYieldUsec maximum number of microseconds - * - * @return the reference to the current options. - */ - T setWriteThreadMaxYieldUsec(long writeThreadMaxYieldUsec); - - /** - * The maximum number of microseconds that a write operation will use - * a yielding spin loop to coordinate with other write threads before - * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is - * set properly) increasing this value is likely to increase RocksDB - * throughput at the expense of increased CPU usage. - * Default: 100 - * - * @return the maximum number of microseconds - */ - long writeThreadMaxYieldUsec(); - - /** - * The latency in microseconds after which a std::this_thread::yield - * call (sched_yield on Linux) is considered to be a signal that - * other processes or threads would like to use the current core. - * Increasing this makes writer threads more likely to take CPU - * by spinning, which will show up as an increase in the number of - * involuntary context switches. - * Default: 3 - * - * @param writeThreadSlowYieldUsec the latency in microseconds - * - * @return the reference to the current options. - */ - T setWriteThreadSlowYieldUsec(long writeThreadSlowYieldUsec); - - /** - * The latency in microseconds after which a std::this_thread::yield - * call (sched_yield on Linux) is considered to be a signal that - * other processes or threads would like to use the current core. - * Increasing this makes writer threads more likely to take CPU - * by spinning, which will show up as an increase in the number of - * involuntary context switches. - * Default: 3 - * - * @return writeThreadSlowYieldUsec the latency in microseconds - */ - long writeThreadSlowYieldUsec(); - - /** - * If true, then DB::Open() will not update the statistics used to optimize - * compaction decision by loading table properties from many files. - * Turning off this feature will improve DBOpen time especially in - * disk environment. - * - * Default: false - * - * @param skipStatsUpdateOnDbOpen true if updating stats will be skipped - * - * @return the reference to the current options. - */ - T setSkipStatsUpdateOnDbOpen(boolean skipStatsUpdateOnDbOpen); - - /** - * If true, then DB::Open() will not update the statistics used to optimize - * compaction decision by loading table properties from many files. - * Turning off this feature will improve DBOpen time especially in - * disk environment. - * - * Default: false - * - * @return true if updating stats will be skipped - */ - boolean skipStatsUpdateOnDbOpen(); - - /** - * If true, then {@link RocksDB#open(String)} will not fetch and check sizes of all sst files. - * This may significantly speed up startup if there are many sst files, - * especially when using non-default Env with expensive GetFileSize(). - * We'll still check that all required sst files exist. - * If {@code paranoid_checks} is false, this option is ignored, and sst files are - * not checked at all. - * - * Default: false - * - * @param skipCheckingSstFileSizesOnDbOpen if true, then SST file sizes will not be checked - * when calling {@link RocksDB#open(String)}. - * @return the reference to the current options. - */ - T setSkipCheckingSstFileSizesOnDbOpen(final boolean skipCheckingSstFileSizesOnDbOpen); - - /** - * If true, then {@link RocksDB#open(String)} will not fetch and check sizes of all sst files. - * This may significantly speed up startup if there are many sst files, - * especially when using non-default Env with expensive GetFileSize(). - * We'll still check that all required sst files exist. - * If {@code paranoid_checks} is false, this option is ignored, and sst files are - * not checked at all. - * - * Default: false - * - * @return true, if file sizes will not be checked when calling {@link RocksDB#open(String)}. - */ - boolean skipCheckingSstFileSizesOnDbOpen(); - - /** - * Recovery mode to control the consistency while replaying WAL - * - * Default: {@link WALRecoveryMode#PointInTimeRecovery} - * - * @param walRecoveryMode The WAL recover mode - * - * @return the reference to the current options. - */ - T setWalRecoveryMode(WALRecoveryMode walRecoveryMode); - - /** - * Recovery mode to control the consistency while replaying WAL - * - * Default: {@link WALRecoveryMode#PointInTimeRecovery} - * - * @return The WAL recover mode - */ - WALRecoveryMode walRecoveryMode(); - - /** - * if set to false then recovery will fail when a prepared - * transaction is encountered in the WAL - * - * Default: false - * - * @param allow2pc true if two-phase-commit is enabled - * - * @return the reference to the current options. - */ - T setAllow2pc(boolean allow2pc); - - /** - * if set to false then recovery will fail when a prepared - * transaction is encountered in the WAL - * - * Default: false - * - * @return true if two-phase-commit is enabled - */ - boolean allow2pc(); - - /** - * A global cache for table-level rows. - * - * Default: null (disabled) - * - * @param rowCache The global row cache - * - * @return the reference to the current options. - */ - T setRowCache(final Cache rowCache); - - /** - * A global cache for table-level rows. - * - * Default: null (disabled) - * - * @return The global row cache - */ - Cache rowCache(); - - /** - * A filter object supplied to be invoked while processing write-ahead-logs - * (WALs) during recovery. The filter provides a way to inspect log - * records, ignoring a particular record or skipping replay. - * The filter is invoked at startup and is invoked from a single-thread - * currently. - * - * @param walFilter the filter for processing WALs during recovery. - * - * @return the reference to the current options. - */ - T setWalFilter(final AbstractWalFilter walFilter); - - /** - * Get's the filter for processing WALs during recovery. - * See {@link #setWalFilter(AbstractWalFilter)}. - * - * @return the filter used for processing WALs during recovery. - */ - WalFilter walFilter(); - - /** - * If true, then DB::Open / CreateColumnFamily / DropColumnFamily - * / SetOptions will fail if options file is not detected or properly - * persisted. - * - * DEFAULT: false - * - * @param failIfOptionsFileError true if we should fail if there is an error - * in the options file - * - * @return the reference to the current options. - */ - T setFailIfOptionsFileError(boolean failIfOptionsFileError); - - /** - * If true, then DB::Open / CreateColumnFamily / DropColumnFamily - * / SetOptions will fail if options file is not detected or properly - * persisted. - * - * DEFAULT: false - * - * @return true if we should fail if there is an error in the options file - */ - boolean failIfOptionsFileError(); - - /** - * If true, then print malloc stats together with rocksdb.stats - * when printing to LOG. - * - * DEFAULT: false - * - * @param dumpMallocStats true if malloc stats should be printed to LOG - * - * @return the reference to the current options. - */ - T setDumpMallocStats(boolean dumpMallocStats); - - /** - * If true, then print malloc stats together with rocksdb.stats - * when printing to LOG. - * - * DEFAULT: false - * - * @return true if malloc stats should be printed to LOG - */ - boolean dumpMallocStats(); - - /** - * By default RocksDB replay WAL logs and flush them on DB open, which may - * create very small SST files. If this option is enabled, RocksDB will try - * to avoid (but not guarantee not to) flush during recovery. Also, existing - * WAL logs will be kept, so that if crash happened before flush, we still - * have logs to recover from. - * - * DEFAULT: false - * - * @param avoidFlushDuringRecovery true to try to avoid (but not guarantee - * not to) flush during recovery - * - * @return the reference to the current options. - */ - T setAvoidFlushDuringRecovery(boolean avoidFlushDuringRecovery); - - /** - * By default RocksDB replay WAL logs and flush them on DB open, which may - * create very small SST files. If this option is enabled, RocksDB will try - * to avoid (but not guarantee not to) flush during recovery. Also, existing - * WAL logs will be kept, so that if crash happened before flush, we still - * have logs to recover from. - * - * DEFAULT: false - * - * @return true to try to avoid (but not guarantee not to) flush during - * recovery - */ - boolean avoidFlushDuringRecovery(); - - /** - * Set this option to true during creation of database if you want - * to be able to ingest behind (call IngestExternalFile() skipping keys - * that already exist, rather than overwriting matching keys). - * Setting this option to true will affect 2 things: - * 1) Disable some internal optimizations around SST file compression - * 2) Reserve bottom-most level for ingested files only. - * 3) Note that num_levels should be >= 3 if this option is turned on. - * - * DEFAULT: false - * - * @param allowIngestBehind true to allow ingest behind, false to disallow. - * - * @return the reference to the current options. - */ - T setAllowIngestBehind(final boolean allowIngestBehind); - - /** - * Returns true if ingest behind is allowed. - * See {@link #setAllowIngestBehind(boolean)}. - * - * @return true if ingest behind is allowed, false otherwise. - */ - boolean allowIngestBehind(); - - /** - * If enabled it uses two queues for writes, one for the ones with - * disable_memtable and one for the ones that also write to memtable. This - * allows the memtable writes not to lag behind other writes. It can be used - * to optimize MySQL 2PC in which only the commits, which are serial, write to - * memtable. - * - * DEFAULT: false - * - * @param twoWriteQueues true to enable two write queues, false otherwise. - * - * @return the reference to the current options. - */ - T setTwoWriteQueues(final boolean twoWriteQueues); - - /** - * Returns true if two write queues are enabled. - * - * @return true if two write queues are enabled, false otherwise. - */ - boolean twoWriteQueues(); - - /** - * If true WAL is not flushed automatically after each write. Instead it - * relies on manual invocation of FlushWAL to write the WAL buffer to its - * file. - * - * DEFAULT: false - * - * @param manualWalFlush true to set disable automatic WAL flushing, - * false otherwise. - * - * @return the reference to the current options. - */ - T setManualWalFlush(final boolean manualWalFlush); - - /** - * Returns true if automatic WAL flushing is disabled. - * See {@link #setManualWalFlush(boolean)}. - * - * @return true if automatic WAL flushing is disabled, false otherwise. - */ - boolean manualWalFlush(); - - /** - * If true, RocksDB supports flushing multiple column families and committing - * their results atomically to MANIFEST. Note that it is not - * necessary to set atomic_flush to true if WAL is always enabled since WAL - * allows the database to be restored to the last persistent state in WAL. - * This option is useful when there are column families with writes NOT - * protected by WAL. - * For manual flush, application has to specify which column families to - * flush atomically in {@link RocksDB#flush(FlushOptions, List)}. - * For auto-triggered flush, RocksDB atomically flushes ALL column families. - * - * Currently, any WAL-enabled writes after atomic flush may be replayed - * independently if the process crashes later and tries to recover. - * - * @param atomicFlush true to enable atomic flush of multiple column families. - * - * @return the reference to the current options. - */ - T setAtomicFlush(final boolean atomicFlush); - - /** - * Determine if atomic flush of multiple column families is enabled. - * - * See {@link #setAtomicFlush(boolean)}. - * - * @return true if atomic flush is enabled. - */ - boolean atomicFlush(); - - /** - * If true, working thread may avoid doing unnecessary and long-latency - * operation (such as deleting obsolete files directly or deleting memtable) - * and will instead schedule a background job to do it. - * Use it if you're latency-sensitive. - * If set to true, takes precedence over - * {@link ReadOptions#setBackgroundPurgeOnIteratorCleanup(boolean)}. - * - * @param avoidUnnecessaryBlockingIO If true, working thread may avoid doing unnecessary - * operation. - * @return the reference to the current options. - */ - T setAvoidUnnecessaryBlockingIO(final boolean avoidUnnecessaryBlockingIO); - - /** - * If true, working thread may avoid doing unnecessary and long-latency - * operation (such as deleting obsolete files directly or deleting memtable) - * and will instead schedule a background job to do it. - * Use it if you're latency-sensitive. - * If set to true, takes precedence over - * {@link ReadOptions#setBackgroundPurgeOnIteratorCleanup(boolean)}. - * - * @return true, if working thread may avoid doing unnecessary operation. - */ - boolean avoidUnnecessaryBlockingIO(); - - /** - * If true, automatically persist stats to a hidden column family (column - * family name: ___rocksdb_stats_history___) every - * stats_persist_period_sec seconds; otherwise, write to an in-memory - * struct. User can query through `GetStatsHistory` API. - * If user attempts to create a column family with the same name on a DB - * which have previously set persist_stats_to_disk to true, the column family - * creation will fail, but the hidden column family will survive, as well as - * the previously persisted statistics. - * When peristing stats to disk, the stat name will be limited at 100 bytes. - * Default: false - * - * @param persistStatsToDisk true if stats should be persisted to hidden column family. - * @return the instance of the current object. - */ - T setPersistStatsToDisk(final boolean persistStatsToDisk); - - /** - * If true, automatically persist stats to a hidden column family (column - * family name: ___rocksdb_stats_history___) every - * stats_persist_period_sec seconds; otherwise, write to an in-memory - * struct. User can query through `GetStatsHistory` API. - * If user attempts to create a column family with the same name on a DB - * which have previously set persist_stats_to_disk to true, the column family - * creation will fail, but the hidden column family will survive, as well as - * the previously persisted statistics. - * When peristing stats to disk, the stat name will be limited at 100 bytes. - * Default: false - * - * @return true if stats should be persisted to hidden column family. - */ - boolean persistStatsToDisk(); - - /** - * Historically DB ID has always been stored in Identity File in DB folder. - * If this flag is true, the DB ID is written to Manifest file in addition - * to the Identity file. By doing this 2 problems are solved - * 1. We don't checksum the Identity file where as Manifest file is. - * 2. Since the source of truth for DB is Manifest file DB ID will sit with - * the source of truth. Previously the Identity file could be copied - * independent of Manifest and that can result in wrong DB ID. - * We recommend setting this flag to true. - * Default: false - * - * @param writeDbidToManifest if true, then DB ID will be written to Manifest file. - * @return the instance of the current object. - */ - T setWriteDbidToManifest(final boolean writeDbidToManifest); - - /** - * Historically DB ID has always been stored in Identity File in DB folder. - * If this flag is true, the DB ID is written to Manifest file in addition - * to the Identity file. By doing this 2 problems are solved - * 1. We don't checksum the Identity file where as Manifest file is. - * 2. Since the source of truth for DB is Manifest file DB ID will sit with - * the source of truth. Previously the Identity file could be copied - * independent of Manifest and that can result in wrong DB ID. - * We recommend setting this flag to true. - * Default: false - * - * @return true, if DB ID will be written to Manifest file. - */ - boolean writeDbidToManifest(); - - /** - * The number of bytes to prefetch when reading the log. This is mostly useful - * for reading a remotely located log, as it can save the number of - * round-trips. If 0, then the prefetching is disabled. - * - * Default: 0 - * - * @param logReadaheadSize the number of bytes to prefetch when reading the log. - * @return the instance of the current object. - */ - T setLogReadaheadSize(final long logReadaheadSize); - - /** - * The number of bytes to prefetch when reading the log. This is mostly useful - * for reading a remotely located log, as it can save the number of - * round-trips. If 0, then the prefetching is disabled. - * - * Default: 0 - * - * @return the number of bytes to prefetch when reading the log. - */ - long logReadaheadSize(); - - /** - * By default, RocksDB recovery fails if any table file referenced in - * MANIFEST are missing after scanning the MANIFEST. - * Best-efforts recovery is another recovery mode that - * tries to restore the database to the most recent point in time without - * missing file. - * Currently not compatible with atomic flush. Furthermore, WAL files will - * not be used for recovery if best_efforts_recovery is true. - * Default: false - * - * @param bestEffortsRecovery if true, RocksDB will use best-efforts mode when recovering. - * @return the instance of the current object. - */ - T setBestEffortsRecovery(final boolean bestEffortsRecovery); - - /** - * By default, RocksDB recovery fails if any table file referenced in - * MANIFEST are missing after scanning the MANIFEST. - * Best-efforts recovery is another recovery mode that - * tries to restore the database to the most recent point in time without - * missing file. - * Currently not compatible with atomic flush. Furthermore, WAL files will - * not be used for recovery if best_efforts_recovery is true. - * Default: false - * - * @return true, if RocksDB uses best-efforts mode when recovering. - */ - boolean bestEffortsRecovery(); - - /** - * It defines how many times db resume is called by a separate thread when - * background retryable IO Error happens. When background retryable IO - * Error happens, SetBGError is called to deal with the error. If the error - * can be auto-recovered (e.g., retryable IO Error during Flush or WAL write), - * then db resume is called in background to recover from the error. If this - * value is 0 or negative, db resume will not be called. - * - * Default: INT_MAX - * - * @param maxBgerrorResumeCount maximum number of times db resume should be called when IO Error - * happens. - * @return the instance of the current object. - */ - T setMaxBgErrorResumeCount(final int maxBgerrorResumeCount); - - /** - * It defines how many times db resume is called by a separate thread when - * background retryable IO Error happens. When background retryable IO - * Error happens, SetBGError is called to deal with the error. If the error - * can be auto-recovered (e.g., retryable IO Error during Flush or WAL write), - * then db resume is called in background to recover from the error. If this - * value is 0 or negative, db resume will not be called. - * - * Default: INT_MAX - * - * @return maximum number of times db resume should be called when IO Error happens. - */ - int maxBgerrorResumeCount(); - - /** - * If max_bgerror_resume_count is ≥ 2, db resume is called multiple times. - * This option decides how long to wait to retry the next resume if the - * previous resume fails and satisfy redo resume conditions. - * - * Default: 1000000 (microseconds). - * - * @param bgerrorResumeRetryInterval how many microseconds to wait between DB resume attempts. - * @return the instance of the current object. - */ - T setBgerrorResumeRetryInterval(final long bgerrorResumeRetryInterval); - - /** - * If max_bgerror_resume_count is ≥ 2, db resume is called multiple times. - * This option decides how long to wait to retry the next resume if the - * previous resume fails and satisfy redo resume conditions. - * - * Default: 1000000 (microseconds). - * - * @return the instance of the current object. - */ - long bgerrorResumeRetryInterval(); -} diff --git a/java/src/main/java/org/rocksdb/DataBlockIndexType.java b/java/src/main/java/org/rocksdb/DataBlockIndexType.java deleted file mode 100644 index 513e5b429..000000000 --- a/java/src/main/java/org/rocksdb/DataBlockIndexType.java +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - - -/** - * DataBlockIndexType used in conjunction with BlockBasedTable. - */ -public enum DataBlockIndexType { - /** - * traditional block type - */ - kDataBlockBinarySearch((byte)0x0), - - /** - * additional hash index - */ - kDataBlockBinaryAndHash((byte)0x1); - - private final byte value; - - DataBlockIndexType(final byte value) { - this.value = value; - } - - byte getValue() { - return value; - } -} diff --git a/java/src/main/java/org/rocksdb/DbPath.java b/java/src/main/java/org/rocksdb/DbPath.java deleted file mode 100644 index 3f0b67557..000000000 --- a/java/src/main/java/org/rocksdb/DbPath.java +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.file.Path; - -/** - * Tuple of database path and target size - */ -public class DbPath { - final Path path; - final long targetSize; - - public DbPath(final Path path, final long targetSize) { - this.path = path; - this.targetSize = targetSize; - } - - @Override - public boolean equals(final Object o) { - if (this == o) { - return true; - } - - if (o == null || getClass() != o.getClass()) { - return false; - } - - final DbPath dbPath = (DbPath) o; - - if (targetSize != dbPath.targetSize) { - return false; - } - - return path != null ? path.equals(dbPath.path) : dbPath.path == null; - } - - @Override - public int hashCode() { - int result = path != null ? path.hashCode() : 0; - result = 31 * result + (int) (targetSize ^ (targetSize >>> 32)); - return result; - } -} diff --git a/java/src/main/java/org/rocksdb/DirectSlice.java b/java/src/main/java/org/rocksdb/DirectSlice.java deleted file mode 100644 index 02fa3511f..000000000 --- a/java/src/main/java/org/rocksdb/DirectSlice.java +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.ByteBuffer; - -/** - * Base class for slices which will receive direct - * ByteBuffer based access to the underlying data. - * - * ByteBuffer backed slices typically perform better with - * larger keys and values. When using smaller keys and - * values consider using @see org.rocksdb.Slice - */ -public class DirectSlice extends AbstractSlice { - public final static DirectSlice NONE = new DirectSlice(); - - /** - * Indicates whether we have to free the memory pointed to by the Slice - */ - private final boolean internalBuffer; - private volatile boolean cleared = false; - private volatile long internalBufferOffset = 0; - - /** - * Called from JNI to construct a new Java DirectSlice - * without an underlying C++ object set - * at creation time. - * - * Note: You should be aware that it is intentionally marked as - * package-private. This is so that developers cannot construct their own - * default DirectSlice objects (at present). As developers cannot construct - * their own DirectSlice objects through this, they are not creating - * underlying C++ DirectSlice objects, and so there is nothing to free - * (dispose) from Java. - */ - DirectSlice() { - super(); - this.internalBuffer = false; - } - - /** - * Constructs a slice - * where the data is taken from - * a String. - * - * @param str The string - */ - public DirectSlice(final String str) { - super(createNewSliceFromString(str)); - this.internalBuffer = true; - } - - /** - * Constructs a slice where the data is - * read from the provided - * ByteBuffer up to a certain length - * - * @param data The buffer containing the data - * @param length The length of the data to use for the slice - */ - public DirectSlice(final ByteBuffer data, final int length) { - super(createNewDirectSlice0(ensureDirect(data), length)); - this.internalBuffer = false; - } - - /** - * Constructs a slice where the data is - * read from the provided - * ByteBuffer - * - * @param data The bugger containing the data - */ - public DirectSlice(final ByteBuffer data) { - super(createNewDirectSlice1(ensureDirect(data))); - this.internalBuffer = false; - } - - private static ByteBuffer ensureDirect(final ByteBuffer data) { - if(!data.isDirect()) { - throw new IllegalArgumentException("The ByteBuffer must be direct"); - } - return data; - } - - /** - * Retrieves the byte at a specific offset - * from the underlying data - * - * @param offset The (zero-based) offset of the byte to retrieve - * - * @return the requested byte - */ - public byte get(final int offset) { - return get0(getNativeHandle(), offset); - } - - @Override - public void clear() { - clear0(getNativeHandle(), !cleared && internalBuffer, internalBufferOffset); - cleared = true; - } - - @Override - public void removePrefix(final int n) { - removePrefix0(getNativeHandle(), n); - this.internalBufferOffset += n; - } - - public void setLength(final int n) { - setLength0(getNativeHandle(), n); - } - - @Override - protected void disposeInternal() { - final long nativeHandle = getNativeHandle(); - if(!cleared && internalBuffer) { - disposeInternalBuf(nativeHandle, internalBufferOffset); - } - disposeInternal(nativeHandle); - } - - private native static long createNewDirectSlice0(final ByteBuffer data, - final int length); - private native static long createNewDirectSlice1(final ByteBuffer data); - @Override protected final native ByteBuffer data0(long handle); - private native byte get0(long handle, int offset); - private native void clear0(long handle, boolean internalBuffer, - long internalBufferOffset); - private native void removePrefix0(long handle, int length); - private native void setLength0(long handle, int length); - private native void disposeInternalBuf(final long handle, - long internalBufferOffset); -} diff --git a/java/src/main/java/org/rocksdb/EncodingType.java b/java/src/main/java/org/rocksdb/EncodingType.java deleted file mode 100644 index 5ceeb54c8..000000000 --- a/java/src/main/java/org/rocksdb/EncodingType.java +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * EncodingType - * - *

The value will determine how to encode keys - * when writing to a new SST file.

- * - *

This value will be stored - * inside the SST file which will be used when reading from - * the file, which makes it possible for users to choose - * different encoding type when reopening a DB. Files with - * different encoding types can co-exist in the same DB and - * can be read.

- */ -public enum EncodingType { - /** - * Always write full keys without any special encoding. - */ - kPlain((byte) 0), - /** - *

Find opportunity to write the same prefix once for multiple rows. - * In some cases, when a key follows a previous key with the same prefix, - * instead of writing out the full key, it just writes out the size of the - * shared prefix, as well as other bytes, to save some bytes.

- * - *

When using this option, the user is required to use the same prefix - * extractor to make sure the same prefix will be extracted from the same key. - * The Name() value of the prefix extractor will be stored in the file. When - * reopening the file, the name of the options.prefix_extractor given will be - * bitwise compared to the prefix extractors stored in the file. An error - * will be returned if the two don't match.

- */ - kPrefix((byte) 1); - - /** - * Returns the byte value of the enumerations value - * - * @return byte representation - */ - public byte getValue() { - return value_; - } - - private EncodingType(byte value) { - value_ = value; - } - - private final byte value_; -} diff --git a/java/src/main/java/org/rocksdb/Env.java b/java/src/main/java/org/rocksdb/Env.java deleted file mode 100644 index 07b5319bb..000000000 --- a/java/src/main/java/org/rocksdb/Env.java +++ /dev/null @@ -1,167 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Arrays; -import java.util.List; - -/** - * Base class for all Env implementations in RocksDB. - */ -public abstract class Env extends RocksObject { - - static { - RocksDB.loadLibrary(); - } - - private static final Env DEFAULT_ENV = new RocksEnv(getDefaultEnvInternal()); - static { - /** - * The Ownership of the Default Env belongs to C++ - * and so we disown the native handle here so that - * we cannot accidentally free it from Java. - */ - DEFAULT_ENV.disOwnNativeHandle(); - } - - /** - *

Returns the default environment suitable for the current operating - * system.

- * - *

The result of {@code getDefault()} is a singleton whose ownership - * belongs to rocksdb c++. As a result, the returned RocksEnv will not - * have the ownership of its c++ resource, and calling its dispose()/close() - * will be no-op.

- * - * @return the default {@link org.rocksdb.RocksEnv} instance. - */ - public static Env getDefault() { - return DEFAULT_ENV; - } - - /** - *

Sets the number of background worker threads of the low priority - * pool for this environment.

- *

Default number: 1

- * - * @param number the number of threads - * - * @return current {@link RocksEnv} instance. - */ - public Env setBackgroundThreads(final int number) { - return setBackgroundThreads(number, Priority.LOW); - } - - /** - *

Gets the number of background worker threads of the pool - * for this environment.

- * - * @param priority the priority id of a specified thread pool. - * - * @return the number of threads. - */ - public int getBackgroundThreads(final Priority priority) { - return getBackgroundThreads(nativeHandle_, priority.getValue()); - } - - /** - *

Sets the number of background worker threads of the specified thread - * pool for this environment.

- * - * @param number the number of threads - * @param priority the priority id of a specified thread pool. - * - *

Default number: 1

- * @return current {@link RocksEnv} instance. - */ - public Env setBackgroundThreads(final int number, final Priority priority) { - setBackgroundThreads(nativeHandle_, number, priority.getValue()); - return this; - } - - /** - *

Returns the length of the queue associated with the specified - * thread pool.

- * - * @param priority the priority id of a specified thread pool. - * - * @return the thread pool queue length. - */ - public int getThreadPoolQueueLen(final Priority priority) { - return getThreadPoolQueueLen(nativeHandle_, priority.getValue()); - } - - /** - * Enlarge number of background worker threads of a specific thread pool - * for this environment if it is smaller than specified. 'LOW' is the default - * pool. - * - * @param number the number of threads. - * @param priority the priority id of a specified thread pool. - * - * @return current {@link RocksEnv} instance. - */ - public Env incBackgroundThreadsIfNeeded(final int number, - final Priority priority) { - incBackgroundThreadsIfNeeded(nativeHandle_, number, priority.getValue()); - return this; - } - - /** - * Lower IO priority for threads from the specified pool. - * - * @param priority the priority id of a specified thread pool. - * - * @return current {@link RocksEnv} instance. - */ - public Env lowerThreadPoolIOPriority(final Priority priority) { - lowerThreadPoolIOPriority(nativeHandle_, priority.getValue()); - return this; - } - - /** - * Lower CPU priority for threads from the specified pool. - * - * @param priority the priority id of a specified thread pool. - * - * @return current {@link RocksEnv} instance. - */ - public Env lowerThreadPoolCPUPriority(final Priority priority) { - lowerThreadPoolCPUPriority(nativeHandle_, priority.getValue()); - return this; - } - - /** - * Returns the status of all threads that belong to the current Env. - * - * @return the status of all threads belong to this env. - * - * @throws RocksDBException if the thread list cannot be acquired. - */ - public List getThreadList() throws RocksDBException { - return Arrays.asList(getThreadList(nativeHandle_)); - } - - Env(final long nativeHandle) { - super(nativeHandle); - } - - private static native long getDefaultEnvInternal(); - private native void setBackgroundThreads( - final long handle, final int number, final byte priority); - private native int getBackgroundThreads(final long handle, - final byte priority); - private native int getThreadPoolQueueLen(final long handle, - final byte priority); - private native void incBackgroundThreadsIfNeeded(final long handle, - final int number, final byte priority); - private native void lowerThreadPoolIOPriority(final long handle, - final byte priority); - private native void lowerThreadPoolCPUPriority(final long handle, - final byte priority); - private native ThreadStatus[] getThreadList(final long handle) - throws RocksDBException; -} diff --git a/java/src/main/java/org/rocksdb/EnvOptions.java b/java/src/main/java/org/rocksdb/EnvOptions.java deleted file mode 100644 index 6baddb310..000000000 --- a/java/src/main/java/org/rocksdb/EnvOptions.java +++ /dev/null @@ -1,366 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Options while opening a file to read/write - */ -public class EnvOptions extends RocksObject { - static { - RocksDB.loadLibrary(); - } - - /** - * Construct with default Options - */ - public EnvOptions() { - super(newEnvOptions()); - } - - /** - * Construct from {@link DBOptions}. - * - * @param dbOptions the database options. - */ - public EnvOptions(final DBOptions dbOptions) { - super(newEnvOptions(dbOptions.nativeHandle_)); - } - - /** - * Enable/Disable memory mapped reads. - * - * Default: false - * - * @param useMmapReads true to enable memory mapped reads, false to disable. - * - * @return the reference to these options. - */ - public EnvOptions setUseMmapReads(final boolean useMmapReads) { - setUseMmapReads(nativeHandle_, useMmapReads); - return this; - } - - /** - * Determine if memory mapped reads are in-use. - * - * @return true if memory mapped reads are in-use, false otherwise. - */ - public boolean useMmapReads() { - assert(isOwningHandle()); - return useMmapReads(nativeHandle_); - } - - /** - * Enable/Disable memory mapped Writes. - * - * Default: true - * - * @param useMmapWrites true to enable memory mapped writes, false to disable. - * - * @return the reference to these options. - */ - public EnvOptions setUseMmapWrites(final boolean useMmapWrites) { - setUseMmapWrites(nativeHandle_, useMmapWrites); - return this; - } - - /** - * Determine if memory mapped writes are in-use. - * - * @return true if memory mapped writes are in-use, false otherwise. - */ - public boolean useMmapWrites() { - assert(isOwningHandle()); - return useMmapWrites(nativeHandle_); - } - - /** - * Enable/Disable direct reads, i.e. {@code O_DIRECT}. - * - * Default: false - * - * @param useDirectReads true to enable direct reads, false to disable. - * - * @return the reference to these options. - */ - public EnvOptions setUseDirectReads(final boolean useDirectReads) { - setUseDirectReads(nativeHandle_, useDirectReads); - return this; - } - - /** - * Determine if direct reads are in-use. - * - * @return true if direct reads are in-use, false otherwise. - */ - public boolean useDirectReads() { - assert(isOwningHandle()); - return useDirectReads(nativeHandle_); - } - - /** - * Enable/Disable direct writes, i.e. {@code O_DIRECT}. - * - * Default: false - * - * @param useDirectWrites true to enable direct writes, false to disable. - * - * @return the reference to these options. - */ - public EnvOptions setUseDirectWrites(final boolean useDirectWrites) { - setUseDirectWrites(nativeHandle_, useDirectWrites); - return this; - } - - /** - * Determine if direct writes are in-use. - * - * @return true if direct writes are in-use, false otherwise. - */ - public boolean useDirectWrites() { - assert(isOwningHandle()); - return useDirectWrites(nativeHandle_); - } - - /** - * Enable/Disable fallocate calls. - * - * Default: true - * - * If false, {@code fallocate()} calls are bypassed. - * - * @param allowFallocate true to enable fallocate calls, false to disable. - * - * @return the reference to these options. - */ - public EnvOptions setAllowFallocate(final boolean allowFallocate) { - setAllowFallocate(nativeHandle_, allowFallocate); - return this; - } - - /** - * Determine if fallocate calls are used. - * - * @return true if fallocate calls are used, false otherwise. - */ - public boolean allowFallocate() { - assert(isOwningHandle()); - return allowFallocate(nativeHandle_); - } - - /** - * Enable/Disable the {@code FD_CLOEXEC} bit when opening file descriptors. - * - * Default: true - * - * @param setFdCloexec true to enable the {@code FB_CLOEXEC} bit, - * false to disable. - * - * @return the reference to these options. - */ - public EnvOptions setSetFdCloexec(final boolean setFdCloexec) { - setSetFdCloexec(nativeHandle_, setFdCloexec); - return this; - } - - /** - * Determine i fthe {@code FD_CLOEXEC} bit is set when opening file - * descriptors. - * - * @return true if the {@code FB_CLOEXEC} bit is enabled, false otherwise. - */ - public boolean setFdCloexec() { - assert(isOwningHandle()); - return setFdCloexec(nativeHandle_); - } - - /** - * Allows OS to incrementally sync files to disk while they are being - * written, in the background. Issue one request for every - * {@code bytesPerSync} written. - * - * Default: 0 - * - * @param bytesPerSync 0 to disable, otherwise the number of bytes. - * - * @return the reference to these options. - */ - public EnvOptions setBytesPerSync(final long bytesPerSync) { - setBytesPerSync(nativeHandle_, bytesPerSync); - return this; - } - - /** - * Get the number of incremental bytes per sync written in the background. - * - * @return 0 if disabled, otherwise the number of bytes. - */ - public long bytesPerSync() { - assert(isOwningHandle()); - return bytesPerSync(nativeHandle_); - } - - /** - * If true, we will preallocate the file with {@code FALLOC_FL_KEEP_SIZE} - * flag, which means that file size won't change as part of preallocation. - * If false, preallocation will also change the file size. This option will - * improve the performance in workloads where you sync the data on every - * write. By default, we set it to true for MANIFEST writes and false for - * WAL writes - * - * @param fallocateWithKeepSize true to preallocate, false otherwise. - * - * @return the reference to these options. - */ - public EnvOptions setFallocateWithKeepSize( - final boolean fallocateWithKeepSize) { - setFallocateWithKeepSize(nativeHandle_, fallocateWithKeepSize); - return this; - } - - /** - * Determine if file is preallocated. - * - * @return true if the file is preallocated, false otherwise. - */ - public boolean fallocateWithKeepSize() { - assert(isOwningHandle()); - return fallocateWithKeepSize(nativeHandle_); - } - - /** - * See {@link DBOptions#setCompactionReadaheadSize(long)}. - * - * @param compactionReadaheadSize the compaction read-ahead size. - * - * @return the reference to these options. - */ - public EnvOptions setCompactionReadaheadSize( - final long compactionReadaheadSize) { - setCompactionReadaheadSize(nativeHandle_, compactionReadaheadSize); - return this; - } - - /** - * See {@link DBOptions#compactionReadaheadSize()}. - * - * @return the compaction read-ahead size. - */ - public long compactionReadaheadSize() { - assert(isOwningHandle()); - return compactionReadaheadSize(nativeHandle_); - } - - /** - * See {@link DBOptions#setRandomAccessMaxBufferSize(long)}. - * - * @param randomAccessMaxBufferSize the max buffer size for random access. - * - * @return the reference to these options. - */ - public EnvOptions setRandomAccessMaxBufferSize( - final long randomAccessMaxBufferSize) { - setRandomAccessMaxBufferSize(nativeHandle_, randomAccessMaxBufferSize); - return this; - } - - /** - * See {@link DBOptions#randomAccessMaxBufferSize()}. - * - * @return the max buffer size for random access. - */ - public long randomAccessMaxBufferSize() { - assert(isOwningHandle()); - return randomAccessMaxBufferSize(nativeHandle_); - } - - /** - * See {@link DBOptions#setWritableFileMaxBufferSize(long)}. - * - * @param writableFileMaxBufferSize the max buffer size. - * - * @return the reference to these options. - */ - public EnvOptions setWritableFileMaxBufferSize( - final long writableFileMaxBufferSize) { - setWritableFileMaxBufferSize(nativeHandle_, writableFileMaxBufferSize); - return this; - } - - /** - * See {@link DBOptions#writableFileMaxBufferSize()}. - * - * @return the max buffer size. - */ - public long writableFileMaxBufferSize() { - assert(isOwningHandle()); - return writableFileMaxBufferSize(nativeHandle_); - } - - /** - * Set the write rate limiter for flush and compaction. - * - * @param rateLimiter the rate limiter. - * - * @return the reference to these options. - */ - public EnvOptions setRateLimiter(final RateLimiter rateLimiter) { - this.rateLimiter = rateLimiter; - setRateLimiter(nativeHandle_, rateLimiter.nativeHandle_); - return this; - } - - /** - * Get the write rate limiter for flush and compaction. - * - * @return the rate limiter. - */ - public RateLimiter rateLimiter() { - assert(isOwningHandle()); - return rateLimiter; - } - - private native static long newEnvOptions(); - private native static long newEnvOptions(final long dboptions_handle); - @Override protected final native void disposeInternal(final long handle); - - private native void setUseMmapReads(final long handle, - final boolean useMmapReads); - private native boolean useMmapReads(final long handle); - private native void setUseMmapWrites(final long handle, - final boolean useMmapWrites); - private native boolean useMmapWrites(final long handle); - private native void setUseDirectReads(final long handle, - final boolean useDirectReads); - private native boolean useDirectReads(final long handle); - private native void setUseDirectWrites(final long handle, - final boolean useDirectWrites); - private native boolean useDirectWrites(final long handle); - private native void setAllowFallocate(final long handle, - final boolean allowFallocate); - private native boolean allowFallocate(final long handle); - private native void setSetFdCloexec(final long handle, - final boolean setFdCloexec); - private native boolean setFdCloexec(final long handle); - private native void setBytesPerSync(final long handle, - final long bytesPerSync); - private native long bytesPerSync(final long handle); - private native void setFallocateWithKeepSize( - final long handle, final boolean fallocateWithKeepSize); - private native boolean fallocateWithKeepSize(final long handle); - private native void setCompactionReadaheadSize( - final long handle, final long compactionReadaheadSize); - private native long compactionReadaheadSize(final long handle); - private native void setRandomAccessMaxBufferSize( - final long handle, final long randomAccessMaxBufferSize); - private native long randomAccessMaxBufferSize(final long handle); - private native void setWritableFileMaxBufferSize( - final long handle, final long writableFileMaxBufferSize); - private native long writableFileMaxBufferSize(final long handle); - private native void setRateLimiter(final long handle, - final long rateLimiterHandle); - private RateLimiter rateLimiter; -} diff --git a/java/src/main/java/org/rocksdb/EventListener.java b/java/src/main/java/org/rocksdb/EventListener.java deleted file mode 100644 index a12ab92ba..000000000 --- a/java/src/main/java/org/rocksdb/EventListener.java +++ /dev/null @@ -1,335 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.List; - -/** - * EventListener class contains a set of callback functions that will - * be called when specific RocksDB event happens such as flush. It can - * be used as a building block for developing custom features such as - * stats-collector or external compaction algorithm. - * - * Note that callback functions should not run for an extended period of - * time before the function returns, otherwise RocksDB may be blocked. - * For example, it is not suggested to do - * {@link RocksDB#compactFiles(CompactionOptions, ColumnFamilyHandle, List, int, int, - * CompactionJobInfo)} (as it may run for a long while) or issue many of - * {@link RocksDB#put(ColumnFamilyHandle, WriteOptions, byte[], byte[])} - * (as Put may be blocked in certain cases) in the same thread in the - * EventListener callback. - * - * However, doing - * {@link RocksDB#compactFiles(CompactionOptions, ColumnFamilyHandle, List, int, int, - * CompactionJobInfo)} and {@link RocksDB#put(ColumnFamilyHandle, WriteOptions, byte[], byte[])} in - * another thread is considered safe. - * - * [Threading] All EventListener callback will be called using the - * actual thread that involves in that specific event. For example, it - * is the RocksDB background flush thread that does the actual flush to - * call {@link #onFlushCompleted(RocksDB, FlushJobInfo)}. - * - * [Locking] All EventListener callbacks are designed to be called without - * the current thread holding any DB mutex. This is to prevent potential - * deadlock and performance issue when using EventListener callback - * in a complex way. - */ -public interface EventListener { - /** - * A callback function to RocksDB which will be called before a - * RocksDB starts to flush memtables. - * - * Note that the this function must be implemented in a way such that - * it should not run for an extended period of time before the function - * returns. Otherwise, RocksDB may be blocked. - * - * @param db the database - * @param flushJobInfo the flush job info, contains data copied from - * respective native structure. - */ - void onFlushBegin(final RocksDB db, final FlushJobInfo flushJobInfo); - - /** - * callback function to RocksDB which will be called whenever a - * registered RocksDB flushes a file. - * - * Note that the this function must be implemented in a way such that - * it should not run for an extended period of time before the function - * returns. Otherwise, RocksDB may be blocked. - * - * @param db the database - * @param flushJobInfo the flush job info, contains data copied from - * respective native structure. - */ - void onFlushCompleted(final RocksDB db, final FlushJobInfo flushJobInfo); - - /** - * A callback function for RocksDB which will be called whenever - * a SST file is deleted. Different from - * {@link #onCompactionCompleted(RocksDB, CompactionJobInfo)} and - * {@link #onFlushCompleted(RocksDB, FlushJobInfo)}, - * this callback is designed for external logging - * service and thus only provide string parameters instead - * of a pointer to DB. Applications that build logic basic based - * on file creations and deletions is suggested to implement - * {@link #onFlushCompleted(RocksDB, FlushJobInfo)} and - * {@link #onCompactionCompleted(RocksDB, CompactionJobInfo)}. - * - * Note that if applications would like to use the passed reference - * outside this function call, they should make copies from the - * returned value. - * - * @param tableFileDeletionInfo the table file deletion info, - * contains data copied from respective native structure. - */ - void onTableFileDeleted(final TableFileDeletionInfo tableFileDeletionInfo); - - /** - * A callback function to RocksDB which will be called before a - * RocksDB starts to compact. The default implementation is - * no-op. - * - * Note that the this function must be implemented in a way such that - * it should not run for an extended period of time before the function - * returns. Otherwise, RocksDB may be blocked. - * - * @param db a pointer to the rocksdb instance which just compacted - * a file. - * @param compactionJobInfo a reference to a native CompactionJobInfo struct, - * which is released after this function is returned, and must be copied - * if it is needed outside of this function. - */ - void onCompactionBegin(final RocksDB db, final CompactionJobInfo compactionJobInfo); - - /** - * A callback function for RocksDB which will be called whenever - * a registered RocksDB compacts a file. The default implementation - * is a no-op. - * - * Note that this function must be implemented in a way such that - * it should not run for an extended period of time before the function - * returns. Otherwise, RocksDB may be blocked. - * - * @param db a pointer to the rocksdb instance which just compacted - * a file. - * @param compactionJobInfo a reference to a native CompactionJobInfo struct, - * which is released after this function is returned, and must be copied - * if it is needed outside of this function. - */ - void onCompactionCompleted(final RocksDB db, final CompactionJobInfo compactionJobInfo); - - /** - * A callback function for RocksDB which will be called whenever - * a SST file is created. Different from OnCompactionCompleted and - * OnFlushCompleted, this callback is designed for external logging - * service and thus only provide string parameters instead - * of a pointer to DB. Applications that build logic basic based - * on file creations and deletions is suggested to implement - * OnFlushCompleted and OnCompactionCompleted. - * - * Historically it will only be called if the file is successfully created. - * Now it will also be called on failure case. User can check info.status - * to see if it succeeded or not. - * - * Note that if applications would like to use the passed reference - * outside this function call, they should make copies from these - * returned value. - * - * @param tableFileCreationInfo the table file creation info, - * contains data copied from respective native structure. - */ - void onTableFileCreated(final TableFileCreationInfo tableFileCreationInfo); - - /** - * A callback function for RocksDB which will be called before - * a SST file is being created. It will follow by OnTableFileCreated after - * the creation finishes. - * - * Note that if applications would like to use the passed reference - * outside this function call, they should make copies from these - * returned value. - * - * @param tableFileCreationBriefInfo the table file creation brief info, - * contains data copied from respective native structure. - */ - void onTableFileCreationStarted(final TableFileCreationBriefInfo tableFileCreationBriefInfo); - - /** - * A callback function for RocksDB which will be called before - * a memtable is made immutable. - * - * Note that the this function must be implemented in a way such that - * it should not run for an extended period of time before the function - * returns. Otherwise, RocksDB may be blocked. - * - * Note that if applications would like to use the passed reference - * outside this function call, they should make copies from these - * returned value. - * - * @param memTableInfo the mem table info, contains data - * copied from respective native structure. - */ - void onMemTableSealed(final MemTableInfo memTableInfo); - - /** - * A callback function for RocksDB which will be called before - * a column family handle is deleted. - * - * Note that the this function must be implemented in a way such that - * it should not run for an extended period of time before the function - * returns. Otherwise, RocksDB may be blocked. - * - * @param columnFamilyHandle is a pointer to the column family handle to be - * deleted which will become a dangling pointer after the deletion. - */ - void onColumnFamilyHandleDeletionStarted(final ColumnFamilyHandle columnFamilyHandle); - - /** - * A callback function for RocksDB which will be called after an external - * file is ingested using IngestExternalFile. - * - * Note that the this function will run on the same thread as - * IngestExternalFile(), if this function is blocked, IngestExternalFile() - * will be blocked from finishing. - * - * @param db the database - * @param externalFileIngestionInfo the external file ingestion info, - * contains data copied from respective native structure. - */ - void onExternalFileIngested( - final RocksDB db, final ExternalFileIngestionInfo externalFileIngestionInfo); - - /** - * A callback function for RocksDB which will be called before setting the - * background error status to a non-OK value. The new background error status - * is provided in `bg_error` and can be modified by the callback. E.g., a - * callback can suppress errors by resetting it to Status::OK(), thus - * preventing the database from entering read-only mode. We do not provide any - * guarantee when failed flushes/compactions will be rescheduled if the user - * suppresses an error. - * - * Note that this function can run on the same threads as flush, compaction, - * and user writes. So, it is extremely important not to perform heavy - * computations or blocking calls in this function. - * - * @param backgroundErrorReason background error reason code - * @param backgroundError background error codes - */ - void onBackgroundError( - final BackgroundErrorReason backgroundErrorReason, final Status backgroundError); - - /** - * A callback function for RocksDB which will be called whenever a change - * of superversion triggers a change of the stall conditions. - * - * Note that the this function must be implemented in a way such that - * it should not run for an extended period of time before the function - * returns. Otherwise, RocksDB may be blocked. - * - * @param writeStallInfo write stall info, - * contains data copied from respective native structure. - */ - void onStallConditionsChanged(final WriteStallInfo writeStallInfo); - - /** - * A callback function for RocksDB which will be called whenever a file read - * operation finishes. - * - * @param fileOperationInfo file operation info, - * contains data copied from respective native structure. - */ - void onFileReadFinish(final FileOperationInfo fileOperationInfo); - - /** - * A callback function for RocksDB which will be called whenever a file write - * operation finishes. - * - * @param fileOperationInfo file operation info, - * contains data copied from respective native structure. - */ - void onFileWriteFinish(final FileOperationInfo fileOperationInfo); - - /** - * A callback function for RocksDB which will be called whenever a file flush - * operation finishes. - * - * @param fileOperationInfo file operation info, - * contains data copied from respective native structure. - */ - void onFileFlushFinish(final FileOperationInfo fileOperationInfo); - - /** - * A callback function for RocksDB which will be called whenever a file sync - * operation finishes. - * - * @param fileOperationInfo file operation info, - * contains data copied from respective native structure. - */ - void onFileSyncFinish(final FileOperationInfo fileOperationInfo); - - /** - * A callback function for RocksDB which will be called whenever a file - * rangeSync operation finishes. - * - * @param fileOperationInfo file operation info, - * contains data copied from respective native structure. - */ - void onFileRangeSyncFinish(final FileOperationInfo fileOperationInfo); - - /** - * A callback function for RocksDB which will be called whenever a file - * truncate operation finishes. - * - * @param fileOperationInfo file operation info, - * contains data copied from respective native structure. - */ - void onFileTruncateFinish(final FileOperationInfo fileOperationInfo); - - /** - * A callback function for RocksDB which will be called whenever a file close - * operation finishes. - * - * @param fileOperationInfo file operation info, - * contains data copied from respective native structure. - */ - void onFileCloseFinish(final FileOperationInfo fileOperationInfo); - - /** - * If true, the {@link #onFileReadFinish(FileOperationInfo)} - * and {@link #onFileWriteFinish(FileOperationInfo)} will be called. If - * false, then they won't be called. - * - * Default: false - * - * @return whether to callback when file read/write is finished - */ - boolean shouldBeNotifiedOnFileIO(); - - /** - * A callback function for RocksDB which will be called just before - * starting the automatic recovery process for recoverable background - * errors, such as NoSpace(). The callback can suppress the automatic - * recovery by setting returning false. The database will then - * have to be transitioned out of read-only mode by calling - * RocksDB#resume(). - * - * @param backgroundErrorReason background error reason code - * @param backgroundError background error codes - * @return return {@code false} if the automatic recovery should be suppressed - */ - boolean onErrorRecoveryBegin( - final BackgroundErrorReason backgroundErrorReason, final Status backgroundError); - - /** - * A callback function for RocksDB which will be called once the database - * is recovered from read-only mode after an error. When this is called, it - * means normal writes to the database can be issued and the user can - * initiate any further recovery actions needed - * - * @param oldBackgroundError old background error codes - */ - void onErrorRecoveryCompleted(final Status oldBackgroundError); -} diff --git a/java/src/main/java/org/rocksdb/Experimental.java b/java/src/main/java/org/rocksdb/Experimental.java deleted file mode 100644 index 64b404d6f..000000000 --- a/java/src/main/java/org/rocksdb/Experimental.java +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.lang.annotation.ElementType; -import java.lang.annotation.Documented; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; - -/** - * Marks a feature as experimental, meaning that it is likely - * to change or even be removed/re-engineered in the future - */ -@Documented -@Retention(RetentionPolicy.SOURCE) -@Target({ElementType.TYPE, ElementType.METHOD}) -public @interface Experimental { - String value(); -} diff --git a/java/src/main/java/org/rocksdb/ExternalFileIngestionInfo.java b/java/src/main/java/org/rocksdb/ExternalFileIngestionInfo.java deleted file mode 100644 index 6b14a8024..000000000 --- a/java/src/main/java/org/rocksdb/ExternalFileIngestionInfo.java +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Objects; - -public class ExternalFileIngestionInfo { - private final String columnFamilyName; - private final String externalFilePath; - private final String internalFilePath; - private final long globalSeqno; - private final TableProperties tableProperties; - - /** - * Access is package private as this will only be constructed from - * C++ via JNI and for testing. - */ - ExternalFileIngestionInfo(final String columnFamilyName, final String externalFilePath, - final String internalFilePath, final long globalSeqno, - final TableProperties tableProperties) { - this.columnFamilyName = columnFamilyName; - this.externalFilePath = externalFilePath; - this.internalFilePath = internalFilePath; - this.globalSeqno = globalSeqno; - this.tableProperties = tableProperties; - } - - /** - * Get the name of the column family. - * - * @return the name of the column family. - */ - public String getColumnFamilyName() { - return columnFamilyName; - } - - /** - * Get the path of the file outside the DB. - * - * @return the path of the file outside the DB. - */ - public String getExternalFilePath() { - return externalFilePath; - } - - /** - * Get the path of the file inside the DB. - * - * @return the path of the file inside the DB. - */ - public String getInternalFilePath() { - return internalFilePath; - } - - /** - * Get the global sequence number assigned to keys in this file. - * - * @return the global sequence number. - */ - public long getGlobalSeqno() { - return globalSeqno; - } - - /** - * Get the Table properties of the table being flushed. - * - * @return the table properties. - */ - public TableProperties getTableProperties() { - return tableProperties; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - ExternalFileIngestionInfo that = (ExternalFileIngestionInfo) o; - return globalSeqno == that.globalSeqno - && Objects.equals(columnFamilyName, that.columnFamilyName) - && Objects.equals(externalFilePath, that.externalFilePath) - && Objects.equals(internalFilePath, that.internalFilePath) - && Objects.equals(tableProperties, that.tableProperties); - } - - @Override - public int hashCode() { - return Objects.hash( - columnFamilyName, externalFilePath, internalFilePath, globalSeqno, tableProperties); - } - - @Override - public String toString() { - return "ExternalFileIngestionInfo{" - + "columnFamilyName='" + columnFamilyName + '\'' + ", externalFilePath='" + externalFilePath - + '\'' + ", internalFilePath='" + internalFilePath + '\'' + ", globalSeqno=" + globalSeqno - + ", tableProperties=" + tableProperties + '}'; - } -} diff --git a/java/src/main/java/org/rocksdb/FileOperationInfo.java b/java/src/main/java/org/rocksdb/FileOperationInfo.java deleted file mode 100644 index aa5743ed3..000000000 --- a/java/src/main/java/org/rocksdb/FileOperationInfo.java +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Objects; - -/** - * Java representation of FileOperationInfo struct from include/rocksdb/listener.h - */ -public class FileOperationInfo { - private final String path; - private final long offset; - private final long length; - private final long startTimestamp; - private final long duration; - private final Status status; - - /** - * Access is private as this will only be constructed from - * C++ via JNI. - */ - FileOperationInfo(final String path, final long offset, final long length, - final long startTimestamp, final long duration, final Status status) { - this.path = path; - this.offset = offset; - this.length = length; - this.startTimestamp = startTimestamp; - this.duration = duration; - this.status = status; - } - - /** - * Get the file path. - * - * @return the file path. - */ - public String getPath() { - return path; - } - - /** - * Get the offset. - * - * @return the offset. - */ - public long getOffset() { - return offset; - } - - /** - * Get the length. - * - * @return the length. - */ - public long getLength() { - return length; - } - - /** - * Get the start timestamp (in nanoseconds). - * - * @return the start timestamp. - */ - public long getStartTimestamp() { - return startTimestamp; - } - - /** - * Get the operation duration (in nanoseconds). - * - * @return the operation duration. - */ - public long getDuration() { - return duration; - } - - /** - * Get the status. - * - * @return the status. - */ - public Status getStatus() { - return status; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - FileOperationInfo that = (FileOperationInfo) o; - return offset == that.offset && length == that.length && startTimestamp == that.startTimestamp - && duration == that.duration && Objects.equals(path, that.path) - && Objects.equals(status, that.status); - } - - @Override - public int hashCode() { - return Objects.hash(path, offset, length, startTimestamp, duration, status); - } - - @Override - public String toString() { - return "FileOperationInfo{" - + "path='" + path + '\'' + ", offset=" + offset + ", length=" + length + ", startTimestamp=" - + startTimestamp + ", duration=" + duration + ", status=" + status + '}'; - } -} diff --git a/java/src/main/java/org/rocksdb/Filter.java b/java/src/main/java/org/rocksdb/Filter.java deleted file mode 100644 index 7f490cf59..000000000 --- a/java/src/main/java/org/rocksdb/Filter.java +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Filters are stored in rocksdb and are consulted automatically - * by rocksdb to decide whether or not to read some - * information from disk. In many cases, a filter can cut down the - * number of disk seeks form a handful to a single disk seek per - * DB::Get() call. - */ -//TODO(AR) should be renamed FilterPolicy -public abstract class Filter extends RocksObject { - - protected Filter(final long nativeHandle) { - super(nativeHandle); - } - - /** - * Deletes underlying C++ filter pointer. - * - * Note that this function should be called only after all - * RocksDB instances referencing the filter are closed. - * Otherwise an undefined behavior will occur. - */ - @Override - protected void disposeInternal() { - disposeInternal(nativeHandle_); - } - - @Override - protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/FlushJobInfo.java b/java/src/main/java/org/rocksdb/FlushJobInfo.java deleted file mode 100644 index ca9aa0523..000000000 --- a/java/src/main/java/org/rocksdb/FlushJobInfo.java +++ /dev/null @@ -1,186 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Objects; - -public class FlushJobInfo { - private final long columnFamilyId; - private final String columnFamilyName; - private final String filePath; - private final long threadId; - private final int jobId; - private final boolean triggeredWritesSlowdown; - private final boolean triggeredWritesStop; - private final long smallestSeqno; - private final long largestSeqno; - private final TableProperties tableProperties; - private final FlushReason flushReason; - - /** - * Access is package private as this will only be constructed from - * C++ via JNI and for testing. - */ - FlushJobInfo(final long columnFamilyId, final String columnFamilyName, final String filePath, - final long threadId, final int jobId, final boolean triggeredWritesSlowdown, - final boolean triggeredWritesStop, final long smallestSeqno, final long largestSeqno, - final TableProperties tableProperties, final byte flushReasonValue) { - this.columnFamilyId = columnFamilyId; - this.columnFamilyName = columnFamilyName; - this.filePath = filePath; - this.threadId = threadId; - this.jobId = jobId; - this.triggeredWritesSlowdown = triggeredWritesSlowdown; - this.triggeredWritesStop = triggeredWritesStop; - this.smallestSeqno = smallestSeqno; - this.largestSeqno = largestSeqno; - this.tableProperties = tableProperties; - this.flushReason = FlushReason.fromValue(flushReasonValue); - } - - /** - * Get the id of the column family. - * - * @return the id of the column family - */ - public long getColumnFamilyId() { - return columnFamilyId; - } - - /** - * Get the name of the column family. - * - * @return the name of the column family - */ - public String getColumnFamilyName() { - return columnFamilyName; - } - - /** - * Get the path to the newly created file. - * - * @return the path to the newly created file - */ - public String getFilePath() { - return filePath; - } - - /** - * Get the id of the thread that completed this flush job. - * - * @return the id of the thread that completed this flush job - */ - public long getThreadId() { - return threadId; - } - - /** - * Get the job id, which is unique in the same thread. - * - * @return the job id - */ - public int getJobId() { - return jobId; - } - - /** - * Determine if rocksdb is currently slowing-down all writes to prevent - * creating too many Level 0 files as compaction seems not able to - * catch up the write request speed. - * - * This indicates that there are too many files in Level 0. - * - * @return true if rocksdb is currently slowing-down all writes, - * false otherwise - */ - public boolean isTriggeredWritesSlowdown() { - return triggeredWritesSlowdown; - } - - /** - * Determine if rocksdb is currently blocking any writes to prevent - * creating more L0 files. - * - * This indicates that there are too many files in level 0. - * Compactions should try to compact L0 files down to lower levels as soon - * as possible. - * - * @return true if rocksdb is currently blocking any writes, false otherwise - */ - public boolean isTriggeredWritesStop() { - return triggeredWritesStop; - } - - /** - * Get the smallest sequence number in the newly created file. - * - * @return the smallest sequence number - */ - public long getSmallestSeqno() { - return smallestSeqno; - } - - /** - * Get the largest sequence number in the newly created file. - * - * @return the largest sequence number - */ - public long getLargestSeqno() { - return largestSeqno; - } - - /** - * Get the Table properties of the table being flushed. - * - * @return the Table properties of the table being flushed - */ - public TableProperties getTableProperties() { - return tableProperties; - } - - /** - * Get the reason for initiating the flush. - * - * @return the reason for initiating the flush. - */ - public FlushReason getFlushReason() { - return flushReason; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - FlushJobInfo that = (FlushJobInfo) o; - return columnFamilyId == that.columnFamilyId && threadId == that.threadId && jobId == that.jobId - && triggeredWritesSlowdown == that.triggeredWritesSlowdown - && triggeredWritesStop == that.triggeredWritesStop && smallestSeqno == that.smallestSeqno - && largestSeqno == that.largestSeqno - && Objects.equals(columnFamilyName, that.columnFamilyName) - && Objects.equals(filePath, that.filePath) - && Objects.equals(tableProperties, that.tableProperties) && flushReason == that.flushReason; - } - - @Override - public int hashCode() { - return Objects.hash(columnFamilyId, columnFamilyName, filePath, threadId, jobId, - triggeredWritesSlowdown, triggeredWritesStop, smallestSeqno, largestSeqno, tableProperties, - flushReason); - } - - @Override - public String toString() { - return "FlushJobInfo{" - + "columnFamilyId=" + columnFamilyId + ", columnFamilyName='" + columnFamilyName + '\'' - + ", filePath='" + filePath + '\'' + ", threadId=" + threadId + ", jobId=" + jobId - + ", triggeredWritesSlowdown=" + triggeredWritesSlowdown - + ", triggeredWritesStop=" + triggeredWritesStop + ", smallestSeqno=" + smallestSeqno - + ", largestSeqno=" + largestSeqno + ", tableProperties=" + tableProperties - + ", flushReason=" + flushReason + '}'; - } -} diff --git a/java/src/main/java/org/rocksdb/FlushOptions.java b/java/src/main/java/org/rocksdb/FlushOptions.java deleted file mode 100644 index 760b515fd..000000000 --- a/java/src/main/java/org/rocksdb/FlushOptions.java +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * FlushOptions to be passed to flush operations of - * {@link org.rocksdb.RocksDB}. - */ -public class FlushOptions extends RocksObject { - static { - RocksDB.loadLibrary(); - } - - /** - * Construct a new instance of FlushOptions. - */ - public FlushOptions(){ - super(newFlushOptions()); - } - - /** - * Set if the flush operation shall block until it terminates. - * - * @param waitForFlush boolean value indicating if the flush - * operations waits for termination of the flush process. - * - * @return instance of current FlushOptions. - */ - public FlushOptions setWaitForFlush(final boolean waitForFlush) { - assert(isOwningHandle()); - setWaitForFlush(nativeHandle_, waitForFlush); - return this; - } - - /** - * Wait for flush to finished. - * - * @return boolean value indicating if the flush operation - * waits for termination of the flush process. - */ - public boolean waitForFlush() { - assert(isOwningHandle()); - return waitForFlush(nativeHandle_); - } - - /** - * Set to true so that flush would proceeds immediately even it it means - * writes will stall for the duration of the flush. - * - * Set to false so that the operation will wait until it's possible to do - * the flush without causing stall or until required flush is performed by - * someone else (foreground call or background thread). - * - * Default: false - * - * @param allowWriteStall true to allow writes to stall for flush, false - * otherwise. - * - * @return instance of current FlushOptions. - */ - public FlushOptions setAllowWriteStall(final boolean allowWriteStall) { - assert(isOwningHandle()); - setAllowWriteStall(nativeHandle_, allowWriteStall); - return this; - } - - /** - * Returns true if writes are allowed to stall for flushes to complete, false - * otherwise. - * - * @return true if writes are allowed to stall for flushes - */ - public boolean allowWriteStall() { - assert(isOwningHandle()); - return allowWriteStall(nativeHandle_); - } - - private native static long newFlushOptions(); - @Override protected final native void disposeInternal(final long handle); - - private native void setWaitForFlush(final long handle, - final boolean wait); - private native boolean waitForFlush(final long handle); - private native void setAllowWriteStall(final long handle, - final boolean allowWriteStall); - private native boolean allowWriteStall(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/FlushReason.java b/java/src/main/java/org/rocksdb/FlushReason.java deleted file mode 100644 index 9d486cda1..000000000 --- a/java/src/main/java/org/rocksdb/FlushReason.java +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public enum FlushReason { - OTHERS((byte) 0x00), - GET_LIVE_FILES((byte) 0x01), - SHUTDOWN((byte) 0x02), - EXTERNAL_FILE_INGESTION((byte) 0x03), - MANUAL_COMPACTION((byte) 0x04), - WRITE_BUFFER_MANAGER((byte) 0x05), - WRITE_BUFFER_FULL((byte) 0x06), - TEST((byte) 0x07), - DELETE_FILES((byte) 0x08), - AUTO_COMPACTION((byte) 0x09), - MANUAL_FLUSH((byte) 0x0a), - ERROR_RECOVERY((byte) 0xb); - - private final byte value; - - FlushReason(final byte value) { - this.value = value; - } - - /** - * Get the internal representation. - * - * @return the internal representation - */ - byte getValue() { - return value; - } - - /** - * Get the FlushReason from the internal representation value. - * - * @return the flush reason. - * - * @throws IllegalArgumentException if the value is unknown. - */ - static FlushReason fromValue(final byte value) { - for (final FlushReason flushReason : FlushReason.values()) { - if (flushReason.value == value) { - return flushReason; - } - } - - throw new IllegalArgumentException("Illegal value provided for FlushReason: " + value); - } -} diff --git a/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java b/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java deleted file mode 100644 index 05cc2bb90..000000000 --- a/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -/** - * The config for hash linked list memtable representation - * Such memtable contains a fix-sized array of buckets, where - * each bucket points to a sorted singly-linked - * list (or null if the bucket is empty). - * - * Note that since this mem-table representation relies on the - * key prefix, it is required to invoke one of the usePrefixExtractor - * functions to specify how to extract key prefix given a key. - * If proper prefix-extractor is not set, then RocksDB will - * use the default memtable representation (SkipList) instead - * and post a warning in the LOG. - */ -public class HashLinkedListMemTableConfig extends MemTableConfig { - public static final long DEFAULT_BUCKET_COUNT = 50000; - public static final long DEFAULT_HUGE_PAGE_TLB_SIZE = 0; - public static final int DEFAULT_BUCKET_ENTRIES_LOG_THRES = 4096; - public static final boolean - DEFAULT_IF_LOG_BUCKET_DIST_WHEN_FLUSH = true; - public static final int DEFAUL_THRESHOLD_USE_SKIPLIST = 256; - - /** - * HashLinkedListMemTableConfig constructor - */ - public HashLinkedListMemTableConfig() { - bucketCount_ = DEFAULT_BUCKET_COUNT; - hugePageTlbSize_ = DEFAULT_HUGE_PAGE_TLB_SIZE; - bucketEntriesLoggingThreshold_ = DEFAULT_BUCKET_ENTRIES_LOG_THRES; - ifLogBucketDistWhenFlush_ = DEFAULT_IF_LOG_BUCKET_DIST_WHEN_FLUSH; - thresholdUseSkiplist_ = DEFAUL_THRESHOLD_USE_SKIPLIST; - } - - /** - * Set the number of buckets in the fixed-size array used - * in the hash linked-list mem-table. - * - * @param count the number of hash buckets. - * @return the reference to the current HashLinkedListMemTableConfig. - */ - public HashLinkedListMemTableConfig setBucketCount( - final long count) { - bucketCount_ = count; - return this; - } - - /** - * Returns the number of buckets that will be used in the memtable - * created based on this config. - * - * @return the number of buckets - */ - public long bucketCount() { - return bucketCount_; - } - - /** - *

Set the size of huge tlb or allocate the hashtable bytes from - * malloc if {@code size <= 0}.

- * - *

The user needs to reserve huge pages for it to be allocated, - * like: {@code sysctl -w vm.nr_hugepages=20}

- * - *

See linux documentation/vm/hugetlbpage.txt

- * - * @param size if set to {@code <= 0} hashtable bytes from malloc - * @return the reference to the current HashLinkedListMemTableConfig. - */ - public HashLinkedListMemTableConfig setHugePageTlbSize( - final long size) { - hugePageTlbSize_ = size; - return this; - } - - /** - * Returns the size value of hugePageTlbSize. - * - * @return the hugePageTlbSize. - */ - public long hugePageTlbSize() { - return hugePageTlbSize_; - } - - /** - * If number of entries in one bucket exceeds that setting, log - * about it. - * - * @param threshold - number of entries in a single bucket before - * logging starts. - * @return the reference to the current HashLinkedListMemTableConfig. - */ - public HashLinkedListMemTableConfig - setBucketEntriesLoggingThreshold(final int threshold) { - bucketEntriesLoggingThreshold_ = threshold; - return this; - } - - /** - * Returns the maximum number of entries in one bucket before - * logging starts. - * - * @return maximum number of entries in one bucket before logging - * starts. - */ - public int bucketEntriesLoggingThreshold() { - return bucketEntriesLoggingThreshold_; - } - - /** - * If true the distrubition of number of entries will be logged. - * - * @param logDistribution - boolean parameter indicating if number - * of entry distribution shall be logged. - * @return the reference to the current HashLinkedListMemTableConfig. - */ - public HashLinkedListMemTableConfig - setIfLogBucketDistWhenFlush(final boolean logDistribution) { - ifLogBucketDistWhenFlush_ = logDistribution; - return this; - } - - /** - * Returns information about logging the distribution of - * number of entries on flush. - * - * @return if distribution of number of entries shall be logged. - */ - public boolean ifLogBucketDistWhenFlush() { - return ifLogBucketDistWhenFlush_; - } - - /** - * Set maximum number of entries in one bucket. Exceeding this val - * leads to a switch from LinkedList to SkipList. - * - * @param threshold maximum number of entries before SkipList is - * used. - * @return the reference to the current HashLinkedListMemTableConfig. - */ - public HashLinkedListMemTableConfig - setThresholdUseSkiplist(final int threshold) { - thresholdUseSkiplist_ = threshold; - return this; - } - - /** - * Returns entries per bucket threshold before LinkedList is - * replaced by SkipList usage for that bucket. - * - * @return entries per bucket threshold before SkipList is used. - */ - public int thresholdUseSkiplist() { - return thresholdUseSkiplist_; - } - - @Override protected long newMemTableFactoryHandle() { - return newMemTableFactoryHandle(bucketCount_, hugePageTlbSize_, - bucketEntriesLoggingThreshold_, ifLogBucketDistWhenFlush_, - thresholdUseSkiplist_); - } - - private native long newMemTableFactoryHandle(long bucketCount, - long hugePageTlbSize, int bucketEntriesLoggingThreshold, - boolean ifLogBucketDistWhenFlush, int thresholdUseSkiplist) - throws IllegalArgumentException; - - private long bucketCount_; - private long hugePageTlbSize_; - private int bucketEntriesLoggingThreshold_; - private boolean ifLogBucketDistWhenFlush_; - private int thresholdUseSkiplist_; -} diff --git a/java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java b/java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java deleted file mode 100644 index efc78b14e..000000000 --- a/java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -/** - * The config for hash skip-list mem-table representation. - * Such mem-table representation contains a fix-sized array of - * buckets, where each bucket points to a skiplist (or null if the - * bucket is empty). - * - * Note that since this mem-table representation relies on the - * key prefix, it is required to invoke one of the usePrefixExtractor - * functions to specify how to extract key prefix given a key. - * If proper prefix-extractor is not set, then RocksDB will - * use the default memtable representation (SkipList) instead - * and post a warning in the LOG. - */ -public class HashSkipListMemTableConfig extends MemTableConfig { - public static final int DEFAULT_BUCKET_COUNT = 1000000; - public static final int DEFAULT_BRANCHING_FACTOR = 4; - public static final int DEFAULT_HEIGHT = 4; - - /** - * HashSkipListMemTableConfig constructor - */ - public HashSkipListMemTableConfig() { - bucketCount_ = DEFAULT_BUCKET_COUNT; - branchingFactor_ = DEFAULT_BRANCHING_FACTOR; - height_ = DEFAULT_HEIGHT; - } - - /** - * Set the number of hash buckets used in the hash skiplist memtable. - * Default = 1000000. - * - * @param count the number of hash buckets used in the hash - * skiplist memtable. - * @return the reference to the current HashSkipListMemTableConfig. - */ - public HashSkipListMemTableConfig setBucketCount( - final long count) { - bucketCount_ = count; - return this; - } - - /** - * @return the number of hash buckets - */ - public long bucketCount() { - return bucketCount_; - } - - /** - * Set the height of the skip list. Default = 4. - * - * @param height height to set. - * - * @return the reference to the current HashSkipListMemTableConfig. - */ - public HashSkipListMemTableConfig setHeight(final int height) { - height_ = height; - return this; - } - - /** - * @return the height of the skip list. - */ - public int height() { - return height_; - } - - /** - * Set the branching factor used in the hash skip-list memtable. - * This factor controls the probabilistic size ratio between adjacent - * links in the skip list. - * - * @param bf the probabilistic size ratio between adjacent link - * lists in the skip list. - * @return the reference to the current HashSkipListMemTableConfig. - */ - public HashSkipListMemTableConfig setBranchingFactor( - final int bf) { - branchingFactor_ = bf; - return this; - } - - /** - * @return branching factor, the probabilistic size ratio between - * adjacent links in the skip list. - */ - public int branchingFactor() { - return branchingFactor_; - } - - @Override protected long newMemTableFactoryHandle() { - return newMemTableFactoryHandle( - bucketCount_, height_, branchingFactor_); - } - - private native long newMemTableFactoryHandle( - long bucketCount, int height, int branchingFactor) - throws IllegalArgumentException; - - private long bucketCount_; - private int branchingFactor_; - private int height_; -} diff --git a/java/src/main/java/org/rocksdb/HistogramData.java b/java/src/main/java/org/rocksdb/HistogramData.java deleted file mode 100644 index 81d890883..000000000 --- a/java/src/main/java/org/rocksdb/HistogramData.java +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public class HistogramData { - private final double median_; - private final double percentile95_; - private final double percentile99_; - private final double average_; - private final double standardDeviation_; - private final double max_; - private final long count_; - private final long sum_; - private final double min_; - - public HistogramData(final double median, final double percentile95, - final double percentile99, final double average, - final double standardDeviation) { - this(median, percentile95, percentile99, average, standardDeviation, 0.0, 0, 0, 0.0); - } - - public HistogramData(final double median, final double percentile95, - final double percentile99, final double average, - final double standardDeviation, final double max, final long count, - final long sum, final double min) { - median_ = median; - percentile95_ = percentile95; - percentile99_ = percentile99; - average_ = average; - standardDeviation_ = standardDeviation; - min_ = min; - max_ = max; - count_ = count; - sum_ = sum; - } - - public double getMedian() { - return median_; - } - - public double getPercentile95() { - return percentile95_; - } - - public double getPercentile99() { - return percentile99_; - } - - public double getAverage() { - return average_; - } - - public double getStandardDeviation() { - return standardDeviation_; - } - - public double getMax() { - return max_; - } - - public long getCount() { - return count_; - } - - public long getSum() { - return sum_; - } - - public double getMin() { - return min_; - } -} diff --git a/java/src/main/java/org/rocksdb/HistogramType.java b/java/src/main/java/org/rocksdb/HistogramType.java deleted file mode 100644 index 20c54422c..000000000 --- a/java/src/main/java/org/rocksdb/HistogramType.java +++ /dev/null @@ -1,208 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public enum HistogramType { - - DB_GET((byte) 0x0), - - DB_WRITE((byte) 0x1), - - COMPACTION_TIME((byte) 0x2), - - SUBCOMPACTION_SETUP_TIME((byte) 0x3), - - TABLE_SYNC_MICROS((byte) 0x4), - - COMPACTION_OUTFILE_SYNC_MICROS((byte) 0x5), - - WAL_FILE_SYNC_MICROS((byte) 0x6), - - MANIFEST_FILE_SYNC_MICROS((byte) 0x7), - - /** - * TIME SPENT IN IO DURING TABLE OPEN. - */ - TABLE_OPEN_IO_MICROS((byte) 0x8), - - DB_MULTIGET((byte) 0x9), - - READ_BLOCK_COMPACTION_MICROS((byte) 0xA), - - READ_BLOCK_GET_MICROS((byte) 0xB), - - WRITE_RAW_BLOCK_MICROS((byte) 0xC), - - NUM_FILES_IN_SINGLE_COMPACTION((byte) 0x12), - - DB_SEEK((byte) 0x13), - - WRITE_STALL((byte) 0x14), - - SST_READ_MICROS((byte) 0x15), - - /** - * The number of subcompactions actually scheduled during a compaction. - */ - NUM_SUBCOMPACTIONS_SCHEDULED((byte) 0x16), - - /** - * Value size distribution in each operation. - */ - BYTES_PER_READ((byte) 0x17), - BYTES_PER_WRITE((byte) 0x18), - BYTES_PER_MULTIGET((byte) 0x19), - - /** - * number of bytes compressed. - */ - BYTES_COMPRESSED((byte) 0x1A), - - /** - * number of bytes decompressed. - * - * number of bytes is when uncompressed; i.e. before/after respectively - */ - BYTES_DECOMPRESSED((byte) 0x1B), - - COMPRESSION_TIMES_NANOS((byte) 0x1C), - - DECOMPRESSION_TIMES_NANOS((byte) 0x1D), - - READ_NUM_MERGE_OPERANDS((byte) 0x1E), - - /** - * Time spent flushing memtable to disk. - */ - FLUSH_TIME((byte) 0x20), - - /** - * Size of keys written to BlobDB. - */ - BLOB_DB_KEY_SIZE((byte) 0x21), - - /** - * Size of values written to BlobDB. - */ - BLOB_DB_VALUE_SIZE((byte) 0x22), - - /** - * BlobDB Put/PutWithTTL/PutUntil/Write latency. - */ - BLOB_DB_WRITE_MICROS((byte) 0x23), - - /** - * BlobDB Get lagency. - */ - BLOB_DB_GET_MICROS((byte) 0x24), - - /** - * BlobDB MultiGet latency. - */ - BLOB_DB_MULTIGET_MICROS((byte) 0x25), - - /** - * BlobDB Seek/SeekToFirst/SeekToLast/SeekForPrev latency. - */ - BLOB_DB_SEEK_MICROS((byte) 0x26), - - /** - * BlobDB Next latency. - */ - BLOB_DB_NEXT_MICROS((byte) 0x27), - - /** - * BlobDB Prev latency. - */ - BLOB_DB_PREV_MICROS((byte) 0x28), - - /** - * Blob file write latency. - */ - BLOB_DB_BLOB_FILE_WRITE_MICROS((byte) 0x29), - - /** - * Blob file read latency. - */ - BLOB_DB_BLOB_FILE_READ_MICROS((byte) 0x2A), - - /** - * Blob file sync latency. - */ - BLOB_DB_BLOB_FILE_SYNC_MICROS((byte) 0x2B), - - /** - * BlobDB compression time. - */ - BLOB_DB_COMPRESSION_MICROS((byte) 0x2D), - - /** - * BlobDB decompression time. - */ - BLOB_DB_DECOMPRESSION_MICROS((byte) 0x2E), - - /** - * Num of Index and Filter blocks read from file system per level in MultiGet - * request - */ - NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL((byte) 0x2F), - - /** - * Num of SST files read from file system per level in MultiGet request. - */ - NUM_SST_READ_PER_LEVEL((byte) 0x31), - - /** - * The number of retry in auto resume - */ - ERROR_HANDLER_AUTORESUME_RETRY_COUNT((byte) 0x32), - - ASYNC_READ_BYTES((byte) 0x33), - - /** - * Number of bytes read for RocksDB's prefetching contents - * (as opposed to file system's prefetch) - * from the end of SST table during block based table open - */ - TABLE_OPEN_PREFETCH_TAIL_READ_BYTES((byte) 0x39), - - // 0x1F for backwards compatibility on current minor version. - HISTOGRAM_ENUM_MAX((byte) 0x1F); - - private final byte value; - - HistogramType(final byte value) { - this.value = value; - } - - /** - * Returns the byte value of the enumerations value - * - * @return byte representation - */ - public byte getValue() { - return value; - } - - /** - * Get Histogram type by byte value. - * - * @param value byte representation of HistogramType. - * - * @return {@link org.rocksdb.HistogramType} instance. - * @throws java.lang.IllegalArgumentException if an invalid - * value is provided. - */ - public static HistogramType getHistogramType(final byte value) { - for (final HistogramType histogramType : HistogramType.values()) { - if (histogramType.getValue() == value) { - return histogramType; - } - } - throw new IllegalArgumentException( - "Illegal value provided for HistogramType."); - } -} diff --git a/java/src/main/java/org/rocksdb/Holder.java b/java/src/main/java/org/rocksdb/Holder.java deleted file mode 100644 index 716a0bda0..000000000 --- a/java/src/main/java/org/rocksdb/Holder.java +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2016, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Simple instance reference wrapper. - */ -public class Holder { - private /* @Nullable */ T value; - - /** - * Constructs a new Holder with null instance. - */ - public Holder() { - } - - /** - * Constructs a new Holder. - * - * @param value the instance or null - */ - public Holder(/* @Nullable */ final T value) { - this.value = value; - } - - /** - * Get the instance reference. - * - * @return value the instance reference or null - */ - public /* @Nullable */ T getValue() { - return value; - } - - /** - * Set the instance reference. - * - * @param value the instance reference or null - */ - public void setValue(/* @Nullable */ final T value) { - this.value = value; - } -} diff --git a/java/src/main/java/org/rocksdb/IndexShorteningMode.java b/java/src/main/java/org/rocksdb/IndexShorteningMode.java deleted file mode 100644 index a68346c38..000000000 --- a/java/src/main/java/org/rocksdb/IndexShorteningMode.java +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -/** - * This enum allows trading off increased index size for improved iterator - * seek performance in some situations, particularly when block cache is - * disabled ({@link ReadOptions#fillCache()} == false and direct IO is - * enabled ({@link DBOptions#useDirectReads()} == true). - * The default mode is the best tradeoff for most use cases. - * This option only affects newly written tables. - * - * The index contains a key separating each pair of consecutive blocks. - * Let A be the highest key in one block, B the lowest key in the next block, - * and I the index entry separating these two blocks: - * [ ... A] I [B ...] - * I is allowed to be anywhere in [A, B). - * If an iterator is seeked to a key in (A, I], we'll unnecessarily read the - * first block, then immediately fall through to the second block. - * However, if I=A, this can't happen, and we'll read only the second block. - * In kNoShortening mode, we use I=A. In other modes, we use the shortest - * key in [A, B), which usually significantly reduces index size. - * - * There's a similar story for the last index entry, which is an upper bound - * of the highest key in the file. If it's shortened and therefore - * overestimated, iterator is likely to unnecessarily read the last data block - * from each file on each seek. - */ -public enum IndexShorteningMode { - /** - * Use full keys. - */ - kNoShortening((byte) 0), - /** - * Shorten index keys between blocks, but use full key for the last index - * key, which is the upper bound of the whole file. - */ - kShortenSeparators((byte) 1), - /** - * Shorten both keys between blocks and key after last block. - */ - kShortenSeparatorsAndSuccessor((byte) 2); - - private final byte value; - - IndexShorteningMode(final byte value) { - this.value = value; - } - - /** - * Returns the byte value of the enumerations value. - * - * @return byte representation - */ - byte getValue() { - return value; - } -} diff --git a/java/src/main/java/org/rocksdb/IndexType.java b/java/src/main/java/org/rocksdb/IndexType.java deleted file mode 100644 index 162edad1b..000000000 --- a/java/src/main/java/org/rocksdb/IndexType.java +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * IndexType used in conjunction with BlockBasedTable. - */ -public enum IndexType { - /** - * A space efficient index block that is optimized for - * binary-search-based index. - */ - kBinarySearch((byte) 0), - /** - * The hash index, if enabled, will do the hash lookup when - * {@code Options.prefix_extractor} is provided. - */ - kHashSearch((byte) 1), - /** - * A two-level index implementation. Both levels are binary search indexes. - */ - kTwoLevelIndexSearch((byte) 2), - /** - * Like {@link #kBinarySearch}, but index also contains first key of each block. - * This allows iterators to defer reading the block until it's actually - * needed. May significantly reduce read amplification of short range scans. - * Without it, iterator seek usually reads one block from each level-0 file - * and from each level, which may be expensive. - * Works best in combination with: - * - IndexShorteningMode::kNoShortening, - * - custom FlushBlockPolicy to cut blocks at some meaningful boundaries, - * e.g. when prefix changes. - * Makes the index significantly bigger (2x or more), especially when keys - * are long. - */ - kBinarySearchWithFirstKey((byte) 3); - - /** - * Returns the byte value of the enumerations value - * - * @return byte representation - */ - public byte getValue() { - return value_; - } - - IndexType(byte value) { - value_ = value; - } - - private final byte value_; -} diff --git a/java/src/main/java/org/rocksdb/InfoLogLevel.java b/java/src/main/java/org/rocksdb/InfoLogLevel.java deleted file mode 100644 index b7c0f0700..000000000 --- a/java/src/main/java/org/rocksdb/InfoLogLevel.java +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -/** - * RocksDB log levels. - */ -public enum InfoLogLevel { - DEBUG_LEVEL((byte)0), - INFO_LEVEL((byte)1), - WARN_LEVEL((byte)2), - ERROR_LEVEL((byte)3), - FATAL_LEVEL((byte)4), - HEADER_LEVEL((byte)5), - NUM_INFO_LOG_LEVELS((byte)6); - - private final byte value_; - - private InfoLogLevel(final byte value) { - value_ = value; - } - - /** - * Returns the byte value of the enumerations value - * - * @return byte representation - */ - public byte getValue() { - return value_; - } - - /** - * Get InfoLogLevel by byte value. - * - * @param value byte representation of InfoLogLevel. - * - * @return {@link org.rocksdb.InfoLogLevel} instance. - * @throws java.lang.IllegalArgumentException if an invalid - * value is provided. - */ - public static InfoLogLevel getInfoLogLevel(final byte value) { - for (final InfoLogLevel infoLogLevel : InfoLogLevel.values()) { - if (infoLogLevel.getValue() == value) { - return infoLogLevel; - } - } - throw new IllegalArgumentException( - "Illegal value provided for InfoLogLevel."); - } -} diff --git a/java/src/main/java/org/rocksdb/IngestExternalFileOptions.java b/java/src/main/java/org/rocksdb/IngestExternalFileOptions.java deleted file mode 100644 index a6a308daa..000000000 --- a/java/src/main/java/org/rocksdb/IngestExternalFileOptions.java +++ /dev/null @@ -1,227 +0,0 @@ -package org.rocksdb; -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -import java.util.List; - -/** - * IngestExternalFileOptions is used by - * {@link RocksDB#ingestExternalFile(ColumnFamilyHandle, List, IngestExternalFileOptions)}. - */ -public class IngestExternalFileOptions extends RocksObject { - - public IngestExternalFileOptions() { - super(newIngestExternalFileOptions()); - } - - /** - * @param moveFiles {@link #setMoveFiles(boolean)} - * @param snapshotConsistency {@link #setSnapshotConsistency(boolean)} - * @param allowGlobalSeqNo {@link #setAllowGlobalSeqNo(boolean)} - * @param allowBlockingFlush {@link #setAllowBlockingFlush(boolean)} - */ - public IngestExternalFileOptions(final boolean moveFiles, - final boolean snapshotConsistency, final boolean allowGlobalSeqNo, - final boolean allowBlockingFlush) { - super(newIngestExternalFileOptions(moveFiles, snapshotConsistency, - allowGlobalSeqNo, allowBlockingFlush)); - } - - /** - * Can be set to true to move the files instead of copying them. - * - * @return true if files will be moved - */ - public boolean moveFiles() { - return moveFiles(nativeHandle_); - } - - /** - * Can be set to true to move the files instead of copying them. - * - * @param moveFiles true if files should be moved instead of copied - * - * @return the reference to the current IngestExternalFileOptions. - */ - public IngestExternalFileOptions setMoveFiles(final boolean moveFiles) { - setMoveFiles(nativeHandle_, moveFiles); - return this; - } - - /** - * If set to false, an ingested file keys could appear in existing snapshots - * that where created before the file was ingested. - * - * @return true if snapshot consistency is assured - */ - public boolean snapshotConsistency() { - return snapshotConsistency(nativeHandle_); - } - - /** - * If set to false, an ingested file keys could appear in existing snapshots - * that where created before the file was ingested. - * - * @param snapshotConsistency true if snapshot consistency is required - * - * @return the reference to the current IngestExternalFileOptions. - */ - public IngestExternalFileOptions setSnapshotConsistency( - final boolean snapshotConsistency) { - setSnapshotConsistency(nativeHandle_, snapshotConsistency); - return this; - } - - /** - * If set to false, {@link RocksDB#ingestExternalFile(ColumnFamilyHandle, List, IngestExternalFileOptions)} - * will fail if the file key range overlaps with existing keys or tombstones in the DB. - * - * @return true if global seq numbers are assured - */ - public boolean allowGlobalSeqNo() { - return allowGlobalSeqNo(nativeHandle_); - } - - /** - * If set to false, {@link RocksDB#ingestExternalFile(ColumnFamilyHandle, List, IngestExternalFileOptions)} - * will fail if the file key range overlaps with existing keys or tombstones in the DB. - * - * @param allowGlobalSeqNo true if global seq numbers are required - * - * @return the reference to the current IngestExternalFileOptions. - */ - public IngestExternalFileOptions setAllowGlobalSeqNo( - final boolean allowGlobalSeqNo) { - setAllowGlobalSeqNo(nativeHandle_, allowGlobalSeqNo); - return this; - } - - /** - * If set to false and the file key range overlaps with the memtable key range - * (memtable flush required), IngestExternalFile will fail. - * - * @return true if blocking flushes may occur - */ - public boolean allowBlockingFlush() { - return allowBlockingFlush(nativeHandle_); - } - - /** - * If set to false and the file key range overlaps with the memtable key range - * (memtable flush required), IngestExternalFile will fail. - * - * @param allowBlockingFlush true if blocking flushes are allowed - * - * @return the reference to the current IngestExternalFileOptions. - */ - public IngestExternalFileOptions setAllowBlockingFlush( - final boolean allowBlockingFlush) { - setAllowBlockingFlush(nativeHandle_, allowBlockingFlush); - return this; - } - - /** - * Returns true if duplicate keys in the file being ingested are - * to be skipped rather than overwriting existing data under that key. - * - * @return true if duplicate keys in the file being ingested are to be - * skipped, false otherwise. - */ - public boolean ingestBehind() { - return ingestBehind(nativeHandle_); - } - - /** - * Set to true if you would like duplicate keys in the file being ingested - * to be skipped rather than overwriting existing data under that key. - * - * Usecase: back-fill of some historical data in the database without - * over-writing existing newer version of data. - * - * This option could only be used if the DB has been running - * with DBOptions#allowIngestBehind() == true since the dawn of time. - * - * All files will be ingested at the bottommost level with seqno=0. - * - * Default: false - * - * @param ingestBehind true if you would like duplicate keys in the file being - * ingested to be skipped. - * - * @return the reference to the current IngestExternalFileOptions. - */ - public IngestExternalFileOptions setIngestBehind(final boolean ingestBehind) { - setIngestBehind(nativeHandle_, ingestBehind); - return this; - } - - /** - * Returns true write if the global_seqno is written to a given offset - * in the external SST file for backward compatibility. - * - * See {@link #setWriteGlobalSeqno(boolean)}. - * - * @return true if the global_seqno is written to a given offset, - * false otherwise. - */ - public boolean writeGlobalSeqno() { - return writeGlobalSeqno(nativeHandle_); - } - - /** - * Set to true if you would like to write the global_seqno to a given offset - * in the external SST file for backward compatibility. - * - * Older versions of RocksDB write the global_seqno to a given offset within - * the ingested SST files, and new versions of RocksDB do not. - * - * If you ingest an external SST using new version of RocksDB and would like - * to be able to downgrade to an older version of RocksDB, you should set - * {@link #writeGlobalSeqno()} to true. - * - * If your service is just starting to use the new RocksDB, we recommend that - * you set this option to false, which brings two benefits: - * 1. No extra random write for global_seqno during ingestion. - * 2. Without writing external SST file, it's possible to do checksum. - * - * We have a plan to set this option to false by default in the future. - * - * Default: true - * - * @param writeGlobalSeqno true to write the gloal_seqno to a given offset, - * false otherwise - * - * @return the reference to the current IngestExternalFileOptions. - */ - public IngestExternalFileOptions setWriteGlobalSeqno( - final boolean writeGlobalSeqno) { - setWriteGlobalSeqno(nativeHandle_, writeGlobalSeqno); - return this; - } - - private native static long newIngestExternalFileOptions(); - private native static long newIngestExternalFileOptions( - final boolean moveFiles, final boolean snapshotConsistency, - final boolean allowGlobalSeqNo, final boolean allowBlockingFlush); - @Override protected final native void disposeInternal(final long handle); - - private native boolean moveFiles(final long handle); - private native void setMoveFiles(final long handle, final boolean move_files); - private native boolean snapshotConsistency(final long handle); - private native void setSnapshotConsistency(final long handle, - final boolean snapshotConsistency); - private native boolean allowGlobalSeqNo(final long handle); - private native void setAllowGlobalSeqNo(final long handle, - final boolean allowGloablSeqNo); - private native boolean allowBlockingFlush(final long handle); - private native void setAllowBlockingFlush(final long handle, - final boolean allowBlockingFlush); - private native boolean ingestBehind(final long handle); - private native void setIngestBehind(final long handle, - final boolean ingestBehind); - private native boolean writeGlobalSeqno(final long handle); - private native void setWriteGlobalSeqno(final long handle, - final boolean writeGlobalSeqNo); -} diff --git a/java/src/main/java/org/rocksdb/KeyMayExist.java b/java/src/main/java/org/rocksdb/KeyMayExist.java deleted file mode 100644 index 36185d8c9..000000000 --- a/java/src/main/java/org/rocksdb/KeyMayExist.java +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Objects; - -public class KeyMayExist { - @Override - public boolean equals(final Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - final KeyMayExist that = (KeyMayExist) o; - return (valueLength == that.valueLength && exists == that.exists); - } - - @Override - public int hashCode() { - return Objects.hash(exists, valueLength); - } - - public enum KeyMayExistEnum { kNotExist, kExistsWithoutValue, kExistsWithValue } - ; - - public KeyMayExist(final KeyMayExistEnum exists, final int valueLength) { - this.exists = exists; - this.valueLength = valueLength; - } - - public final KeyMayExistEnum exists; - public final int valueLength; -} diff --git a/java/src/main/java/org/rocksdb/LRUCache.java b/java/src/main/java/org/rocksdb/LRUCache.java deleted file mode 100644 index db90b17c5..000000000 --- a/java/src/main/java/org/rocksdb/LRUCache.java +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Least Recently Used Cache - */ -public class LRUCache extends Cache { - - /** - * Create a new cache with a fixed size capacity - * - * @param capacity The fixed size capacity of the cache - */ - public LRUCache(final long capacity) { - this(capacity, -1, false, 0.0, 0.0); - } - - /** - * Create a new cache with a fixed size capacity. The cache is sharded - * to 2^numShardBits shards, by hash of the key. The total capacity - * is divided and evenly assigned to each shard. - * numShardBits = -1 means it is automatically determined: every shard - * will be at least 512KB and number of shard bits will not exceed 6. - * - * @param capacity The fixed size capacity of the cache - * @param numShardBits The cache is sharded to 2^numShardBits shards, - * by hash of the key - */ - public LRUCache(final long capacity, final int numShardBits) { - super(newLRUCache(capacity, numShardBits, false, 0.0, 0.0)); - } - - /** - * Create a new cache with a fixed size capacity. The cache is sharded - * to 2^numShardBits shards, by hash of the key. The total capacity - * is divided and evenly assigned to each shard. If strictCapacityLimit - * is set, insert to the cache will fail when cache is full. - * numShardBits = -1 means it is automatically determined: every shard - * will be at least 512KB and number of shard bits will not exceed 6. - * - * @param capacity The fixed size capacity of the cache - * @param numShardBits The cache is sharded to 2^numShardBits shards, - * by hash of the key - * @param strictCapacityLimit insert to the cache will fail when cache is full - */ - public LRUCache(final long capacity, final int numShardBits, - final boolean strictCapacityLimit) { - super(newLRUCache(capacity, numShardBits, strictCapacityLimit, 0.0, 0.0)); - } - - /** - * Create a new cache with a fixed size capacity. The cache is sharded - * to 2^numShardBits shards, by hash of the key. The total capacity - * is divided and evenly assigned to each shard. If strictCapacityLimit - * is set, insert to the cache will fail when cache is full. User can also - * set percentage of the cache reserves for high priority entries via - * highPriPoolRatio. - * numShardBits = -1 means it is automatically determined: every shard - * will be at least 512KB and number of shard bits will not exceed 6. - * - * @param capacity The fixed size capacity of the cache - * @param numShardBits The cache is sharded to 2^numShardBits shards, - * by hash of the key - * @param strictCapacityLimit insert to the cache will fail when cache is full - * @param highPriPoolRatio percentage of the cache reserves for high priority - * entries - */ - public LRUCache(final long capacity, final int numShardBits, final boolean strictCapacityLimit, - final double highPriPoolRatio) { - super(newLRUCache(capacity, numShardBits, strictCapacityLimit, highPriPoolRatio, 0.0)); - } - - /** - * Create a new cache with a fixed size capacity. The cache is sharded - * to 2^numShardBits shards, by hash of the key. The total capacity - * is divided and evenly assigned to each shard. If strictCapacityLimit - * is set, insert to the cache will fail when cache is full. User can also - * set percentage of the cache reserves for high priority entries and low - * priority entries via highPriPoolRatio and lowPriPoolRatio. - * numShardBits = -1 means it is automatically determined: every shard - * will be at least 512KB and number of shard bits will not exceed 6. - * - * @param capacity The fixed size capacity of the cache - * @param numShardBits The cache is sharded to 2^numShardBits shards, - * by hash of the key - * @param strictCapacityLimit insert to the cache will fail when cache is full - * @param highPriPoolRatio percentage of the cache reserves for high priority - * entries - * @param lowPriPoolRatio percentage of the cache reserves for low priority - * entries - */ - public LRUCache(final long capacity, final int numShardBits, final boolean strictCapacityLimit, - final double highPriPoolRatio, final double lowPriPoolRatio) { - super(newLRUCache( - capacity, numShardBits, strictCapacityLimit, highPriPoolRatio, lowPriPoolRatio)); - } - - private native static long newLRUCache(final long capacity, final int numShardBits, - final boolean strictCapacityLimit, final double highPriPoolRatio, - final double lowPriPoolRatio); - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/LevelMetaData.java b/java/src/main/java/org/rocksdb/LevelMetaData.java deleted file mode 100644 index c5685098b..000000000 --- a/java/src/main/java/org/rocksdb/LevelMetaData.java +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Arrays; -import java.util.List; - -/** - * The metadata that describes a level. - */ -public class LevelMetaData { - private final int level; - private final long size; - private final SstFileMetaData[] files; - - /** - * Called from JNI C++ - */ - private LevelMetaData(final int level, final long size, - final SstFileMetaData[] files) { - this.level = level; - this.size = size; - this.files = files; - } - - /** - * The level which this meta data describes. - * - * @return the level - */ - public int level() { - return level; - } - - /** - * The size of this level in bytes, which is equal to the sum of - * the file size of its {@link #files()}. - * - * @return the size - */ - public long size() { - return size; - } - - /** - * The metadata of all sst files in this level. - * - * @return the metadata of the files - */ - public List files() { - return Arrays.asList(files); - } -} diff --git a/java/src/main/java/org/rocksdb/LiveFileMetaData.java b/java/src/main/java/org/rocksdb/LiveFileMetaData.java deleted file mode 100644 index 35d883e18..000000000 --- a/java/src/main/java/org/rocksdb/LiveFileMetaData.java +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * The full set of metadata associated with each SST file. - */ -public class LiveFileMetaData extends SstFileMetaData { - private final byte[] columnFamilyName; - private final int level; - - /** - * Called from JNI C++ - */ - private LiveFileMetaData( - final byte[] columnFamilyName, - final int level, - final String fileName, - final String path, - final long size, - final long smallestSeqno, - final long largestSeqno, - final byte[] smallestKey, - final byte[] largestKey, - final long numReadsSampled, - final boolean beingCompacted, - final long numEntries, - final long numDeletions) { - super(fileName, path, size, smallestSeqno, largestSeqno, smallestKey, - largestKey, numReadsSampled, beingCompacted, numEntries, numDeletions); - this.columnFamilyName = columnFamilyName; - this.level = level; - } - - /** - * Get the name of the column family. - * - * @return the name of the column family - */ - public byte[] columnFamilyName() { - return columnFamilyName; - } - - /** - * Get the level at which this file resides. - * - * @return the level at which the file resides. - */ - public int level() { - return level; - } -} diff --git a/java/src/main/java/org/rocksdb/LogFile.java b/java/src/main/java/org/rocksdb/LogFile.java deleted file mode 100644 index ef24a6427..000000000 --- a/java/src/main/java/org/rocksdb/LogFile.java +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public class LogFile { - private final String pathName; - private final long logNumber; - private final WalFileType type; - private final long startSequence; - private final long sizeFileBytes; - - /** - * Called from JNI C++ - */ - private LogFile(final String pathName, final long logNumber, - final byte walFileTypeValue, final long startSequence, - final long sizeFileBytes) { - this.pathName = pathName; - this.logNumber = logNumber; - this.type = WalFileType.fromValue(walFileTypeValue); - this.startSequence = startSequence; - this.sizeFileBytes = sizeFileBytes; - } - - /** - * Returns log file's pathname relative to the main db dir - * Eg. For a live-log-file = /000003.log - * For an archived-log-file = /archive/000003.log - * - * @return log file's pathname - */ - public String pathName() { - return pathName; - } - - /** - * Primary identifier for log file. - * This is directly proportional to creation time of the log file - * - * @return the log number - */ - public long logNumber() { - return logNumber; - } - - /** - * Log file can be either alive or archived. - * - * @return the type of the log file. - */ - public WalFileType type() { - return type; - } - - /** - * Starting sequence number of writebatch written in this log file. - * - * @return the stating sequence number - */ - public long startSequence() { - return startSequence; - } - - /** - * Size of log file on disk in Bytes. - * - * @return size of log file - */ - public long sizeFileBytes() { - return sizeFileBytes; - } -} diff --git a/java/src/main/java/org/rocksdb/Logger.java b/java/src/main/java/org/rocksdb/Logger.java deleted file mode 100644 index 00a5d5674..000000000 --- a/java/src/main/java/org/rocksdb/Logger.java +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - *

This class provides a custom logger functionality - * in Java which wraps {@code RocksDB} logging facilities. - *

- * - *

Using this class RocksDB can log with common - * Java logging APIs like Log4j or Slf4j without keeping - * database logs in the filesystem.

- * - * Performance - *

There are certain performance penalties using a Java - * {@code Logger} implementation within production code. - *

- * - *

- * A log level can be set using {@link org.rocksdb.Options} or - * {@link Logger#setInfoLogLevel(InfoLogLevel)}. The set log level - * influences the underlying native code. Each log message is - * checked against the set log level and if the log level is more - * verbose as the set log level, native allocations will be made - * and data structures are allocated. - *

- * - *

Every log message which will be emitted by native code will - * trigger expensive native to Java transitions. So the preferred - * setting for production use is either - * {@link org.rocksdb.InfoLogLevel#ERROR_LEVEL} or - * {@link org.rocksdb.InfoLogLevel#FATAL_LEVEL}. - *

- */ -public abstract class Logger extends RocksCallbackObject { - - private final static long WITH_OPTIONS = 0; - private final static long WITH_DBOPTIONS = 1; - - /** - *

AbstractLogger constructor.

- * - *

Important: the log level set within - * the {@link org.rocksdb.Options} instance will be used as - * maximum log level of RocksDB.

- * - * @param options {@link org.rocksdb.Options} instance. - */ - public Logger(final Options options) { - super(options.nativeHandle_, WITH_OPTIONS); - - } - - /** - *

AbstractLogger constructor.

- * - *

Important: the log level set within - * the {@link org.rocksdb.DBOptions} instance will be used - * as maximum log level of RocksDB.

- * - * @param dboptions {@link org.rocksdb.DBOptions} instance. - */ - public Logger(final DBOptions dboptions) { - super(dboptions.nativeHandle_, WITH_DBOPTIONS); - } - - @Override - protected long initializeNative(long... nativeParameterHandles) { - if(nativeParameterHandles[1] == WITH_OPTIONS) { - return createNewLoggerOptions(nativeParameterHandles[0]); - } else if(nativeParameterHandles[1] == WITH_DBOPTIONS) { - return createNewLoggerDbOptions(nativeParameterHandles[0]); - } else { - throw new IllegalArgumentException(); - } - } - - /** - * Set {@link org.rocksdb.InfoLogLevel} to AbstractLogger. - * - * @param infoLogLevel {@link org.rocksdb.InfoLogLevel} instance. - */ - public void setInfoLogLevel(final InfoLogLevel infoLogLevel) { - setInfoLogLevel(nativeHandle_, infoLogLevel.getValue()); - } - - /** - * Return the loggers log level. - * - * @return {@link org.rocksdb.InfoLogLevel} instance. - */ - public InfoLogLevel infoLogLevel() { - return InfoLogLevel.getInfoLogLevel( - infoLogLevel(nativeHandle_)); - } - - protected abstract void log(InfoLogLevel infoLogLevel, - String logMsg); - - protected native long createNewLoggerOptions( - long options); - protected native long createNewLoggerDbOptions( - long dbOptions); - protected native void setInfoLogLevel(long handle, - byte infoLogLevel); - protected native byte infoLogLevel(long handle); - - /** - * We override {@link RocksCallbackObject#disposeInternal()} - * as disposing of a rocksdb::LoggerJniCallback requires - * a slightly different approach as it is a std::shared_ptr - */ - @Override - protected void disposeInternal() { - disposeInternal(nativeHandle_); - } - - private native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/MemTableConfig.java b/java/src/main/java/org/rocksdb/MemTableConfig.java deleted file mode 100644 index 83cee974a..000000000 --- a/java/src/main/java/org/rocksdb/MemTableConfig.java +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -/** - * MemTableConfig is used to config the internal mem-table of a RocksDB. - * It is required for each memtable to have one such sub-class to allow - * Java developers to use it. - * - * To make a RocksDB to use a specific MemTable format, its associated - * MemTableConfig should be properly set and passed into Options - * via Options.setMemTableFactory() and open the db using that Options. - * - * @see Options - */ -public abstract class MemTableConfig { - /** - * This function should only be called by Options.setMemTableConfig(), - * which will create a c++ shared-pointer to the c++ MemTableRepFactory - * that associated with the Java MemTableConfig. - * - * @see Options#setMemTableConfig(MemTableConfig) - * - * @return native handle address to native memory table instance. - */ - abstract protected long newMemTableFactoryHandle(); -} diff --git a/java/src/main/java/org/rocksdb/MemTableInfo.java b/java/src/main/java/org/rocksdb/MemTableInfo.java deleted file mode 100644 index f4fb577c3..000000000 --- a/java/src/main/java/org/rocksdb/MemTableInfo.java +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Objects; - -public class MemTableInfo { - private final String columnFamilyName; - private final long firstSeqno; - private final long earliestSeqno; - private final long numEntries; - private final long numDeletes; - - /** - * Access is package private as this will only be constructed from - * C++ via JNI and for testing. - */ - MemTableInfo(final String columnFamilyName, final long firstSeqno, final long earliestSeqno, - final long numEntries, final long numDeletes) { - this.columnFamilyName = columnFamilyName; - this.firstSeqno = firstSeqno; - this.earliestSeqno = earliestSeqno; - this.numEntries = numEntries; - this.numDeletes = numDeletes; - } - - /** - * Get the name of the column family to which memtable belongs. - * - * @return the name of the column family. - */ - public String getColumnFamilyName() { - return columnFamilyName; - } - - /** - * Get the Sequence number of the first element that was inserted into the - * memtable. - * - * @return the sequence number of the first inserted element. - */ - public long getFirstSeqno() { - return firstSeqno; - } - - /** - * Get the Sequence number that is guaranteed to be smaller than or equal - * to the sequence number of any key that could be inserted into this - * memtable. It can then be assumed that any write with a larger(or equal) - * sequence number will be present in this memtable or a later memtable. - * - * @return the earliest sequence number. - */ - public long getEarliestSeqno() { - return earliestSeqno; - } - - /** - * Get the total number of entries in memtable. - * - * @return the total number of entries. - */ - public long getNumEntries() { - return numEntries; - } - - /** - * Get the total number of deletes in memtable. - * - * @return the total number of deletes. - */ - public long getNumDeletes() { - return numDeletes; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - MemTableInfo that = (MemTableInfo) o; - return firstSeqno == that.firstSeqno && earliestSeqno == that.earliestSeqno - && numEntries == that.numEntries && numDeletes == that.numDeletes - && Objects.equals(columnFamilyName, that.columnFamilyName); - } - - @Override - public int hashCode() { - return Objects.hash(columnFamilyName, firstSeqno, earliestSeqno, numEntries, numDeletes); - } - - @Override - public String toString() { - return "MemTableInfo{" - + "columnFamilyName='" + columnFamilyName + '\'' + ", firstSeqno=" + firstSeqno - + ", earliestSeqno=" + earliestSeqno + ", numEntries=" + numEntries - + ", numDeletes=" + numDeletes + '}'; - } -} diff --git a/java/src/main/java/org/rocksdb/MemoryUsageType.java b/java/src/main/java/org/rocksdb/MemoryUsageType.java deleted file mode 100644 index 6010ce7af..000000000 --- a/java/src/main/java/org/rocksdb/MemoryUsageType.java +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * MemoryUsageType - * - *

The value will be used as a key to indicate the type of memory usage - * described

- */ -public enum MemoryUsageType { - /** - * Memory usage of all the mem-tables. - */ - kMemTableTotal((byte) 0), - /** - * Memory usage of those un-flushed mem-tables. - */ - kMemTableUnFlushed((byte) 1), - /** - * Memory usage of all the table readers. - */ - kTableReadersTotal((byte) 2), - /** - * Memory usage by Cache. - */ - kCacheTotal((byte) 3), - /** - * Max usage types - copied to keep 1:1 with native. - */ - kNumUsageTypes((byte) 4); - - /** - * Returns the byte value of the enumerations value - * - * @return byte representation - */ - public byte getValue() { - return value_; - } - - /** - *

Get the MemoryUsageType enumeration value by - * passing the byte identifier to this method.

- * - * @param byteIdentifier of MemoryUsageType. - * - * @return MemoryUsageType instance. - * - * @throws IllegalArgumentException if the usage type for the byteIdentifier - * cannot be found - */ - public static MemoryUsageType getMemoryUsageType(final byte byteIdentifier) { - for (final MemoryUsageType memoryUsageType : MemoryUsageType.values()) { - if (memoryUsageType.getValue() == byteIdentifier) { - return memoryUsageType; - } - } - - throw new IllegalArgumentException( - "Illegal value provided for MemoryUsageType."); - } - - MemoryUsageType(byte value) { - value_ = value; - } - - private final byte value_; -} diff --git a/java/src/main/java/org/rocksdb/MemoryUtil.java b/java/src/main/java/org/rocksdb/MemoryUtil.java deleted file mode 100644 index 52b2175e6..000000000 --- a/java/src/main/java/org/rocksdb/MemoryUtil.java +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.*; - -/** - * JNI passthrough for MemoryUtil. - */ -public class MemoryUtil { - - /** - *

Returns the approximate memory usage of different types in the input - * list of DBs and Cache set. For instance, in the output map the key - * kMemTableTotal will be associated with the memory - * usage of all the mem-tables from all the input rocksdb instances.

- * - *

Note that for memory usage inside Cache class, we will - * only report the usage of the input "cache_set" without - * including those Cache usage inside the input list "dbs" - * of DBs.

- * - * @param dbs List of dbs to collect memory usage for. - * @param caches Set of caches to collect memory usage for. - * @return Map from {@link MemoryUsageType} to memory usage as a {@link Long}. - */ - public static Map getApproximateMemoryUsageByType(final List dbs, final Set caches) { - int dbCount = (dbs == null) ? 0 : dbs.size(); - int cacheCount = (caches == null) ? 0 : caches.size(); - long[] dbHandles = new long[dbCount]; - long[] cacheHandles = new long[cacheCount]; - if (dbCount > 0) { - ListIterator dbIter = dbs.listIterator(); - while (dbIter.hasNext()) { - dbHandles[dbIter.nextIndex()] = dbIter.next().nativeHandle_; - } - } - if (cacheCount > 0) { - // NOTE: This index handling is super ugly but I couldn't get a clean way to track both the - // index and the iterator simultaneously within a Set. - int i = 0; - for (Cache cache : caches) { - cacheHandles[i] = cache.nativeHandle_; - i++; - } - } - Map byteOutput = getApproximateMemoryUsageByType(dbHandles, cacheHandles); - Map output = new HashMap<>(); - for(Map.Entry longEntry : byteOutput.entrySet()) { - output.put(MemoryUsageType.getMemoryUsageType(longEntry.getKey()), longEntry.getValue()); - } - return output; - } - - private native static Map getApproximateMemoryUsageByType(final long[] dbHandles, - final long[] cacheHandles); -} diff --git a/java/src/main/java/org/rocksdb/MergeOperator.java b/java/src/main/java/org/rocksdb/MergeOperator.java deleted file mode 100644 index c299f6221..000000000 --- a/java/src/main/java/org/rocksdb/MergeOperator.java +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -// Copyright (c) 2014, Vlad Balan (vlad.gm@gmail.com). All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * MergeOperator holds an operator to be applied when compacting - * two merge operands held under the same key in order to obtain a single - * value. - */ -public abstract class MergeOperator extends RocksObject { - protected MergeOperator(final long nativeHandle) { - super(nativeHandle); - } -} diff --git a/java/src/main/java/org/rocksdb/MutableColumnFamilyOptions.java b/java/src/main/java/org/rocksdb/MutableColumnFamilyOptions.java deleted file mode 100644 index af28fa8ce..000000000 --- a/java/src/main/java/org/rocksdb/MutableColumnFamilyOptions.java +++ /dev/null @@ -1,623 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.*; - -public class MutableColumnFamilyOptions - extends AbstractMutableOptions { - - /** - * User must use builder pattern, or parser. - * - * @param keys the keys - * @param values the values - * - * See {@link #builder()} and {@link #parse(String)}. - */ - private MutableColumnFamilyOptions(final String[] keys, - final String[] values) { - super(keys, values); - } - - /** - * Creates a builder which allows you - * to set MutableColumnFamilyOptions in a fluent - * manner - * - * @return A builder for MutableColumnFamilyOptions - */ - public static MutableColumnFamilyOptionsBuilder builder() { - return new MutableColumnFamilyOptionsBuilder(); - } - - /** - * Parses a String representation of MutableColumnFamilyOptions - * - * The format is: key1=value1;key2=value2;key3=value3 etc - * - * For int[] values, each int should be separated by a colon, e.g. - * - * key1=value1;intArrayKey1=1:2:3 - * - * @param str The string representation of the mutable column family options - * @param ignoreUnknown what to do if the key is not one of the keys we expect - * - * @return A builder for the mutable column family options - */ - public static MutableColumnFamilyOptionsBuilder parse( - final String str, final boolean ignoreUnknown) { - Objects.requireNonNull(str); - - final List parsedOptions = OptionString.Parser.parse(str); - return new MutableColumnFamilyOptionsBuilder().fromParsed(parsedOptions, ignoreUnknown); - } - - public static MutableColumnFamilyOptionsBuilder parse(final String str) { - return parse(str, false); - } - - private interface MutableColumnFamilyOptionKey extends MutableOptionKey {} - - public enum MemtableOption implements MutableColumnFamilyOptionKey { - write_buffer_size(ValueType.LONG), - arena_block_size(ValueType.LONG), - memtable_prefix_bloom_size_ratio(ValueType.DOUBLE), - memtable_whole_key_filtering(ValueType.BOOLEAN), - @Deprecated memtable_prefix_bloom_bits(ValueType.INT), - @Deprecated memtable_prefix_bloom_probes(ValueType.INT), - memtable_huge_page_size(ValueType.LONG), - max_successive_merges(ValueType.LONG), - @Deprecated filter_deletes(ValueType.BOOLEAN), - max_write_buffer_number(ValueType.INT), - inplace_update_num_locks(ValueType.LONG), - experimental_mempurge_threshold(ValueType.DOUBLE); - - private final ValueType valueType; - MemtableOption(final ValueType valueType) { - this.valueType = valueType; - } - - @Override - public ValueType getValueType() { - return valueType; - } - } - - public enum CompactionOption implements MutableColumnFamilyOptionKey { - disable_auto_compactions(ValueType.BOOLEAN), - soft_pending_compaction_bytes_limit(ValueType.LONG), - hard_pending_compaction_bytes_limit(ValueType.LONG), - level0_file_num_compaction_trigger(ValueType.INT), - level0_slowdown_writes_trigger(ValueType.INT), - level0_stop_writes_trigger(ValueType.INT), - max_compaction_bytes(ValueType.LONG), - target_file_size_base(ValueType.LONG), - target_file_size_multiplier(ValueType.INT), - max_bytes_for_level_base(ValueType.LONG), - max_bytes_for_level_multiplier(ValueType.INT), - max_bytes_for_level_multiplier_additional(ValueType.INT_ARRAY), - ttl(ValueType.LONG), - periodic_compaction_seconds(ValueType.LONG); - - private final ValueType valueType; - CompactionOption(final ValueType valueType) { - this.valueType = valueType; - } - - @Override - public ValueType getValueType() { - return valueType; - } - } - - public enum BlobOption implements MutableColumnFamilyOptionKey { - enable_blob_files(ValueType.BOOLEAN), - min_blob_size(ValueType.LONG), - blob_file_size(ValueType.LONG), - blob_compression_type(ValueType.ENUM), - enable_blob_garbage_collection(ValueType.BOOLEAN), - blob_garbage_collection_age_cutoff(ValueType.DOUBLE), - blob_garbage_collection_force_threshold(ValueType.DOUBLE), - blob_compaction_readahead_size(ValueType.LONG), - blob_file_starting_level(ValueType.INT), - prepopulate_blob_cache(ValueType.ENUM); - - private final ValueType valueType; - BlobOption(final ValueType valueType) { - this.valueType = valueType; - } - - @Override - public ValueType getValueType() { - return valueType; - } - } - - public enum MiscOption implements MutableColumnFamilyOptionKey { - max_sequential_skip_in_iterations(ValueType.LONG), - paranoid_file_checks(ValueType.BOOLEAN), - report_bg_io_stats(ValueType.BOOLEAN), - compression(ValueType.ENUM); - - private final ValueType valueType; - MiscOption(final ValueType valueType) { - this.valueType = valueType; - } - - @Override - public ValueType getValueType() { - return valueType; - } - } - - public static class MutableColumnFamilyOptionsBuilder - extends AbstractMutableOptionsBuilder - implements MutableColumnFamilyOptionsInterface { - - private final static Map ALL_KEYS_LOOKUP = new HashMap<>(); - static { - for(final MutableColumnFamilyOptionKey key : MemtableOption.values()) { - ALL_KEYS_LOOKUP.put(key.name(), key); - } - - for(final MutableColumnFamilyOptionKey key : CompactionOption.values()) { - ALL_KEYS_LOOKUP.put(key.name(), key); - } - - for(final MutableColumnFamilyOptionKey key : MiscOption.values()) { - ALL_KEYS_LOOKUP.put(key.name(), key); - } - - for (final MutableColumnFamilyOptionKey key : BlobOption.values()) { - ALL_KEYS_LOOKUP.put(key.name(), key); - } - } - - private MutableColumnFamilyOptionsBuilder() { - super(); - } - - @Override - protected MutableColumnFamilyOptionsBuilder self() { - return this; - } - - @Override - protected Map allKeys() { - return ALL_KEYS_LOOKUP; - } - - @Override - protected MutableColumnFamilyOptions build(final String[] keys, - final String[] values) { - return new MutableColumnFamilyOptions(keys, values); - } - - @Override - public MutableColumnFamilyOptionsBuilder setWriteBufferSize( - final long writeBufferSize) { - return setLong(MemtableOption.write_buffer_size, writeBufferSize); - } - - @Override - public long writeBufferSize() { - return getLong(MemtableOption.write_buffer_size); - } - - @Override - public MutableColumnFamilyOptionsBuilder setArenaBlockSize( - final long arenaBlockSize) { - return setLong(MemtableOption.arena_block_size, arenaBlockSize); - } - - @Override - public long arenaBlockSize() { - return getLong(MemtableOption.arena_block_size); - } - - @Override - public MutableColumnFamilyOptionsBuilder setMemtablePrefixBloomSizeRatio( - final double memtablePrefixBloomSizeRatio) { - return setDouble(MemtableOption.memtable_prefix_bloom_size_ratio, - memtablePrefixBloomSizeRatio); - } - - @Override - public double memtablePrefixBloomSizeRatio() { - return getDouble(MemtableOption.memtable_prefix_bloom_size_ratio); - } - - @Override - public MutableColumnFamilyOptionsBuilder setMemtableWholeKeyFiltering( - final boolean memtableWholeKeyFiltering) { - return setBoolean(MemtableOption.memtable_whole_key_filtering, memtableWholeKeyFiltering); - } - - @Override - public boolean memtableWholeKeyFiltering() { - return getBoolean(MemtableOption.memtable_whole_key_filtering); - } - - @Override - public MutableColumnFamilyOptionsBuilder setMemtableHugePageSize( - final long memtableHugePageSize) { - return setLong(MemtableOption.memtable_huge_page_size, - memtableHugePageSize); - } - - @Override - public long memtableHugePageSize() { - return getLong(MemtableOption.memtable_huge_page_size); - } - - @Override - public MutableColumnFamilyOptionsBuilder setMaxSuccessiveMerges( - final long maxSuccessiveMerges) { - return setLong(MemtableOption.max_successive_merges, maxSuccessiveMerges); - } - - @Override - public long maxSuccessiveMerges() { - return getLong(MemtableOption.max_successive_merges); - } - - @Override - public MutableColumnFamilyOptionsBuilder setMaxWriteBufferNumber( - final int maxWriteBufferNumber) { - return setInt(MemtableOption.max_write_buffer_number, - maxWriteBufferNumber); - } - - @Override - public int maxWriteBufferNumber() { - return getInt(MemtableOption.max_write_buffer_number); - } - - @Override - public MutableColumnFamilyOptionsBuilder setInplaceUpdateNumLocks( - final long inplaceUpdateNumLocks) { - return setLong(MemtableOption.inplace_update_num_locks, - inplaceUpdateNumLocks); - } - - @Override - public long inplaceUpdateNumLocks() { - return getLong(MemtableOption.inplace_update_num_locks); - } - - @Override - public MutableColumnFamilyOptionsBuilder setExperimentalMempurgeThreshold( - final double experimentalMempurgeThreshold) { - return setDouble( - MemtableOption.experimental_mempurge_threshold, experimentalMempurgeThreshold); - } - - @Override - public double experimentalMempurgeThreshold() { - return getDouble(MemtableOption.experimental_mempurge_threshold); - } - - @Override - public MutableColumnFamilyOptionsBuilder setDisableAutoCompactions( - final boolean disableAutoCompactions) { - return setBoolean(CompactionOption.disable_auto_compactions, - disableAutoCompactions); - } - - @Override - public boolean disableAutoCompactions() { - return getBoolean(CompactionOption.disable_auto_compactions); - } - - @Override - public MutableColumnFamilyOptionsBuilder setSoftPendingCompactionBytesLimit( - final long softPendingCompactionBytesLimit) { - return setLong(CompactionOption.soft_pending_compaction_bytes_limit, - softPendingCompactionBytesLimit); - } - - @Override - public long softPendingCompactionBytesLimit() { - return getLong(CompactionOption.soft_pending_compaction_bytes_limit); - } - - @Override - public MutableColumnFamilyOptionsBuilder setHardPendingCompactionBytesLimit( - final long hardPendingCompactionBytesLimit) { - return setLong(CompactionOption.hard_pending_compaction_bytes_limit, - hardPendingCompactionBytesLimit); - } - - @Override - public long hardPendingCompactionBytesLimit() { - return getLong(CompactionOption.hard_pending_compaction_bytes_limit); - } - - @Override - public MutableColumnFamilyOptionsBuilder setLevel0FileNumCompactionTrigger( - final int level0FileNumCompactionTrigger) { - return setInt(CompactionOption.level0_file_num_compaction_trigger, - level0FileNumCompactionTrigger); - } - - @Override - public int level0FileNumCompactionTrigger() { - return getInt(CompactionOption.level0_file_num_compaction_trigger); - } - - @Override - public MutableColumnFamilyOptionsBuilder setLevel0SlowdownWritesTrigger( - final int level0SlowdownWritesTrigger) { - return setInt(CompactionOption.level0_slowdown_writes_trigger, - level0SlowdownWritesTrigger); - } - - @Override - public int level0SlowdownWritesTrigger() { - return getInt(CompactionOption.level0_slowdown_writes_trigger); - } - - @Override - public MutableColumnFamilyOptionsBuilder setLevel0StopWritesTrigger( - final int level0StopWritesTrigger) { - return setInt(CompactionOption.level0_stop_writes_trigger, - level0StopWritesTrigger); - } - - @Override - public int level0StopWritesTrigger() { - return getInt(CompactionOption.level0_stop_writes_trigger); - } - - @Override - public MutableColumnFamilyOptionsBuilder setMaxCompactionBytes(final long maxCompactionBytes) { - return setLong(CompactionOption.max_compaction_bytes, maxCompactionBytes); - } - - @Override - public long maxCompactionBytes() { - return getLong(CompactionOption.max_compaction_bytes); - } - - - @Override - public MutableColumnFamilyOptionsBuilder setTargetFileSizeBase( - final long targetFileSizeBase) { - return setLong(CompactionOption.target_file_size_base, - targetFileSizeBase); - } - - @Override - public long targetFileSizeBase() { - return getLong(CompactionOption.target_file_size_base); - } - - @Override - public MutableColumnFamilyOptionsBuilder setTargetFileSizeMultiplier( - final int targetFileSizeMultiplier) { - return setInt(CompactionOption.target_file_size_multiplier, - targetFileSizeMultiplier); - } - - @Override - public int targetFileSizeMultiplier() { - return getInt(CompactionOption.target_file_size_multiplier); - } - - @Override - public MutableColumnFamilyOptionsBuilder setMaxBytesForLevelBase( - final long maxBytesForLevelBase) { - return setLong(CompactionOption.max_bytes_for_level_base, - maxBytesForLevelBase); - } - - @Override - public long maxBytesForLevelBase() { - return getLong(CompactionOption.max_bytes_for_level_base); - } - - @Override - public MutableColumnFamilyOptionsBuilder setMaxBytesForLevelMultiplier( - final double maxBytesForLevelMultiplier) { - return setDouble(CompactionOption.max_bytes_for_level_multiplier, maxBytesForLevelMultiplier); - } - - @Override - public double maxBytesForLevelMultiplier() { - return getDouble(CompactionOption.max_bytes_for_level_multiplier); - } - - @Override - public MutableColumnFamilyOptionsBuilder setMaxBytesForLevelMultiplierAdditional( - final int[] maxBytesForLevelMultiplierAdditional) { - return setIntArray( - CompactionOption.max_bytes_for_level_multiplier_additional, - maxBytesForLevelMultiplierAdditional); - } - - @Override - public int[] maxBytesForLevelMultiplierAdditional() { - return getIntArray( - CompactionOption.max_bytes_for_level_multiplier_additional); - } - - @Override - public MutableColumnFamilyOptionsBuilder setMaxSequentialSkipInIterations( - final long maxSequentialSkipInIterations) { - return setLong(MiscOption.max_sequential_skip_in_iterations, - maxSequentialSkipInIterations); - } - - @Override - public long maxSequentialSkipInIterations() { - return getLong(MiscOption.max_sequential_skip_in_iterations); - } - - @Override - public MutableColumnFamilyOptionsBuilder setParanoidFileChecks( - final boolean paranoidFileChecks) { - return setBoolean(MiscOption.paranoid_file_checks, paranoidFileChecks); - } - - @Override - public boolean paranoidFileChecks() { - return getBoolean(MiscOption.paranoid_file_checks); - } - - @Override - public MutableColumnFamilyOptionsBuilder setCompressionType( - final CompressionType compressionType) { - return setEnum(MiscOption.compression, compressionType); - } - - @Override - public CompressionType compressionType() { - return (CompressionType) getEnum(MiscOption.compression); - } - - @Override - public MutableColumnFamilyOptionsBuilder setReportBgIoStats( - final boolean reportBgIoStats) { - return setBoolean(MiscOption.report_bg_io_stats, reportBgIoStats); - } - - @Override - public boolean reportBgIoStats() { - return getBoolean(MiscOption.report_bg_io_stats); - } - - @Override - public MutableColumnFamilyOptionsBuilder setTtl(final long ttl) { - return setLong(CompactionOption.ttl, ttl); - } - - @Override - public long ttl() { - return getLong(CompactionOption.ttl); - } - - @Override - public MutableColumnFamilyOptionsBuilder setPeriodicCompactionSeconds( - final long periodicCompactionSeconds) { - return setLong(CompactionOption.periodic_compaction_seconds, periodicCompactionSeconds); - } - - @Override - public long periodicCompactionSeconds() { - return getLong(CompactionOption.periodic_compaction_seconds); - } - - @Override - public MutableColumnFamilyOptionsBuilder setEnableBlobFiles(final boolean enableBlobFiles) { - return setBoolean(BlobOption.enable_blob_files, enableBlobFiles); - } - - @Override - public boolean enableBlobFiles() { - return getBoolean(BlobOption.enable_blob_files); - } - - @Override - public MutableColumnFamilyOptionsBuilder setMinBlobSize(final long minBlobSize) { - return setLong(BlobOption.min_blob_size, minBlobSize); - } - - @Override - public long minBlobSize() { - return getLong(BlobOption.min_blob_size); - } - - @Override - public MutableColumnFamilyOptionsBuilder setBlobFileSize(final long blobFileSize) { - return setLong(BlobOption.blob_file_size, blobFileSize); - } - - @Override - public long blobFileSize() { - return getLong(BlobOption.blob_file_size); - } - - @Override - public MutableColumnFamilyOptionsBuilder setBlobCompressionType( - final CompressionType compressionType) { - return setEnum(BlobOption.blob_compression_type, compressionType); - } - - @Override - public CompressionType blobCompressionType() { - return (CompressionType) getEnum(BlobOption.blob_compression_type); - } - - @Override - public MutableColumnFamilyOptionsBuilder setEnableBlobGarbageCollection( - final boolean enableBlobGarbageCollection) { - return setBoolean(BlobOption.enable_blob_garbage_collection, enableBlobGarbageCollection); - } - - @Override - public boolean enableBlobGarbageCollection() { - return getBoolean(BlobOption.enable_blob_garbage_collection); - } - - @Override - public MutableColumnFamilyOptionsBuilder setBlobGarbageCollectionAgeCutoff( - final double blobGarbageCollectionAgeCutoff) { - return setDouble( - BlobOption.blob_garbage_collection_age_cutoff, blobGarbageCollectionAgeCutoff); - } - - @Override - public double blobGarbageCollectionAgeCutoff() { - return getDouble(BlobOption.blob_garbage_collection_age_cutoff); - } - - @Override - public MutableColumnFamilyOptionsBuilder setBlobGarbageCollectionForceThreshold( - final double blobGarbageCollectionForceThreshold) { - return setDouble( - BlobOption.blob_garbage_collection_force_threshold, blobGarbageCollectionForceThreshold); - } - - @Override - public double blobGarbageCollectionForceThreshold() { - return getDouble(BlobOption.blob_garbage_collection_force_threshold); - } - - @Override - public MutableColumnFamilyOptionsBuilder setBlobCompactionReadaheadSize( - final long blobCompactionReadaheadSize) { - return setLong(BlobOption.blob_compaction_readahead_size, blobCompactionReadaheadSize); - } - - @Override - public long blobCompactionReadaheadSize() { - return getLong(BlobOption.blob_compaction_readahead_size); - } - - @Override - public MutableColumnFamilyOptionsBuilder setBlobFileStartingLevel( - final int blobFileStartingLevel) { - return setInt(BlobOption.blob_file_starting_level, blobFileStartingLevel); - } - - @Override - public int blobFileStartingLevel() { - return getInt(BlobOption.blob_file_starting_level); - } - - @Override - public MutableColumnFamilyOptionsBuilder setPrepopulateBlobCache( - final PrepopulateBlobCache prepopulateBlobCache) { - return setEnum(BlobOption.prepopulate_blob_cache, prepopulateBlobCache); - } - - @Override - public PrepopulateBlobCache prepopulateBlobCache() { - return (PrepopulateBlobCache) getEnum(BlobOption.prepopulate_blob_cache); - } - } -} diff --git a/java/src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java b/java/src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java deleted file mode 100644 index 0f5fe7d78..000000000 --- a/java/src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public interface MutableColumnFamilyOptionsInterface< - T extends MutableColumnFamilyOptionsInterface> - extends AdvancedMutableColumnFamilyOptionsInterface { - /** - * Amount of data to build up in memory (backed by an unsorted log - * on disk) before converting to a sorted on-disk file. - * - * Larger values increase performance, especially during bulk loads. - * Up to {@code max_write_buffer_number} write buffers may be held in memory - * at the same time, so you may wish to adjust this parameter - * to control memory usage. - * - * Also, a larger write buffer will result in a longer recovery time - * the next time the database is opened. - * - * Default: 64MB - * @param writeBufferSize the size of write buffer. - * @return the instance of the current object. - * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms - * while overflowing the underlying platform specific value. - */ - T setWriteBufferSize(long writeBufferSize); - - /** - * Return size of write buffer size. - * - * @return size of write buffer. - * @see #setWriteBufferSize(long) - */ - long writeBufferSize(); - - /** - * Disable automatic compactions. Manual compactions can still - * be issued on this column family - * - * @param disableAutoCompactions true if auto-compactions are disabled. - * @return the reference to the current option. - */ - T setDisableAutoCompactions(boolean disableAutoCompactions); - - /** - * Disable automatic compactions. Manual compactions can still - * be issued on this column family - * - * @return true if auto-compactions are disabled. - */ - boolean disableAutoCompactions(); - - /** - * Number of files to trigger level-0 compaction. A value < 0 means that - * level-0 compaction will not be triggered by number of files at all. - * - * Default: 4 - * - * @param level0FileNumCompactionTrigger The number of files to trigger - * level-0 compaction - * @return the reference to the current option. - */ - T setLevel0FileNumCompactionTrigger(int level0FileNumCompactionTrigger); - - /** - * Number of files to trigger level-0 compaction. A value < 0 means that - * level-0 compaction will not be triggered by number of files at all. - * - * Default: 4 - * - * @return The number of files to trigger - */ - int level0FileNumCompactionTrigger(); - - /** - * We try to limit number of bytes in one compaction to be lower than this - * threshold. But it's not guaranteed. - * Value 0 will be sanitized. - * - * @param maxCompactionBytes max bytes in a compaction - * @return the reference to the current option. - * @see #maxCompactionBytes() - */ - T setMaxCompactionBytes(final long maxCompactionBytes); - - /** - * We try to limit number of bytes in one compaction to be lower than this - * threshold. But it's not guaranteed. - * Value 0 will be sanitized. - * - * @return the maximum number of bytes in for a compaction. - * @see #setMaxCompactionBytes(long) - */ - long maxCompactionBytes(); - - /** - * The upper-bound of the total size of level-1 files in bytes. - * Maximum number of bytes for level L can be calculated as - * (maxBytesForLevelBase) * (maxBytesForLevelMultiplier ^ (L-1)) - * For example, if maxBytesForLevelBase is 20MB, and if - * max_bytes_for_level_multiplier is 10, total data size for level-1 - * will be 200MB, total file size for level-2 will be 2GB, - * and total file size for level-3 will be 20GB. - * by default 'maxBytesForLevelBase' is 256MB. - * - * @param maxBytesForLevelBase maximum bytes for level base. - * - * @return the reference to the current option. - * - * See {@link AdvancedMutableColumnFamilyOptionsInterface#setMaxBytesForLevelMultiplier(double)} - */ - T setMaxBytesForLevelBase( - long maxBytesForLevelBase); - - /** - * The upper-bound of the total size of level-1 files in bytes. - * Maximum number of bytes for level L can be calculated as - * (maxBytesForLevelBase) * (maxBytesForLevelMultiplier ^ (L-1)) - * For example, if maxBytesForLevelBase is 20MB, and if - * max_bytes_for_level_multiplier is 10, total data size for level-1 - * will be 200MB, total file size for level-2 will be 2GB, - * and total file size for level-3 will be 20GB. - * by default 'maxBytesForLevelBase' is 256MB. - * - * @return the upper-bound of the total size of level-1 files - * in bytes. - * - * See {@link AdvancedMutableColumnFamilyOptionsInterface#maxBytesForLevelMultiplier()} - */ - long maxBytesForLevelBase(); - - /** - * Compress blocks using the specified compression algorithm. This - * parameter can be changed dynamically. - * - * Default: SNAPPY_COMPRESSION, which gives lightweight but fast compression. - * - * @param compressionType Compression Type. - * @return the reference to the current option. - */ - T setCompressionType( - CompressionType compressionType); - - /** - * Compress blocks using the specified compression algorithm. This - * parameter can be changed dynamically. - * - * Default: SNAPPY_COMPRESSION, which gives lightweight but fast compression. - * - * @return Compression type. - */ - CompressionType compressionType(); -} diff --git a/java/src/main/java/org/rocksdb/MutableDBOptions.java b/java/src/main/java/org/rocksdb/MutableDBOptions.java deleted file mode 100644 index bfba1dab3..000000000 --- a/java/src/main/java/org/rocksdb/MutableDBOptions.java +++ /dev/null @@ -1,294 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; - -public class MutableDBOptions extends AbstractMutableOptions { - - /** - * User must use builder pattern, or parser. - * - * @param keys the keys - * @param values the values - * - * See {@link #builder()} and {@link #parse(String)}. - */ - private MutableDBOptions(final String[] keys, final String[] values) { - super(keys, values); - } - - /** - * Creates a builder which allows you - * to set MutableDBOptions in a fluent - * manner - * - * @return A builder for MutableDBOptions - */ - public static MutableDBOptionsBuilder builder() { - return new MutableDBOptionsBuilder(); - } - - /** - * Parses a String representation of MutableDBOptions - * - * The format is: key1=value1;key2=value2;key3=value3 etc - * - * For int[] values, each int should be separated by a comma, e.g. - * - * key1=value1;intArrayKey1=1:2:3 - * - * @param str The string representation of the mutable db options - * @param ignoreUnknown what to do if the key is not one of the keys we expect - * - * @return A builder for the mutable db options - */ - public static MutableDBOptionsBuilder parse(final String str, boolean ignoreUnknown) { - Objects.requireNonNull(str); - - final List parsedOptions = OptionString.Parser.parse(str); - return new MutableDBOptions.MutableDBOptionsBuilder().fromParsed(parsedOptions, ignoreUnknown); - } - - public static MutableDBOptionsBuilder parse(final String str) { - return parse(str, false); - } - - private interface MutableDBOptionKey extends MutableOptionKey {} - - public enum DBOption implements MutableDBOptionKey { - max_background_jobs(ValueType.INT), - max_background_compactions(ValueType.INT), - avoid_flush_during_shutdown(ValueType.BOOLEAN), - writable_file_max_buffer_size(ValueType.LONG), - delayed_write_rate(ValueType.LONG), - max_total_wal_size(ValueType.LONG), - delete_obsolete_files_period_micros(ValueType.LONG), - stats_dump_period_sec(ValueType.INT), - stats_persist_period_sec(ValueType.INT), - stats_history_buffer_size(ValueType.LONG), - max_open_files(ValueType.INT), - bytes_per_sync(ValueType.LONG), - wal_bytes_per_sync(ValueType.LONG), - strict_bytes_per_sync(ValueType.BOOLEAN), - compaction_readahead_size(ValueType.LONG); - - private final ValueType valueType; - DBOption(final ValueType valueType) { - this.valueType = valueType; - } - - @Override - public ValueType getValueType() { - return valueType; - } - } - - public static class MutableDBOptionsBuilder - extends AbstractMutableOptionsBuilder - implements MutableDBOptionsInterface { - - private final static Map ALL_KEYS_LOOKUP = new HashMap<>(); - static { - for(final MutableDBOptionKey key : DBOption.values()) { - ALL_KEYS_LOOKUP.put(key.name(), key); - } - } - - private MutableDBOptionsBuilder() { - super(); - } - - @Override - protected MutableDBOptionsBuilder self() { - return this; - } - - @Override - protected Map allKeys() { - return ALL_KEYS_LOOKUP; - } - - @Override - protected MutableDBOptions build(final String[] keys, - final String[] values) { - return new MutableDBOptions(keys, values); - } - - @Override - public MutableDBOptionsBuilder setMaxBackgroundJobs( - final int maxBackgroundJobs) { - return setInt(DBOption.max_background_jobs, maxBackgroundJobs); - } - - @Override - public int maxBackgroundJobs() { - return getInt(DBOption.max_background_jobs); - } - - @Override - @Deprecated - public MutableDBOptionsBuilder setMaxBackgroundCompactions( - final int maxBackgroundCompactions) { - return setInt(DBOption.max_background_compactions, - maxBackgroundCompactions); - } - - @Override - @Deprecated - public int maxBackgroundCompactions() { - return getInt(DBOption.max_background_compactions); - } - - @Override - public MutableDBOptionsBuilder setAvoidFlushDuringShutdown( - final boolean avoidFlushDuringShutdown) { - return setBoolean(DBOption.avoid_flush_during_shutdown, - avoidFlushDuringShutdown); - } - - @Override - public boolean avoidFlushDuringShutdown() { - return getBoolean(DBOption.avoid_flush_during_shutdown); - } - - @Override - public MutableDBOptionsBuilder setWritableFileMaxBufferSize( - final long writableFileMaxBufferSize) { - return setLong(DBOption.writable_file_max_buffer_size, - writableFileMaxBufferSize); - } - - @Override - public long writableFileMaxBufferSize() { - return getLong(DBOption.writable_file_max_buffer_size); - } - - @Override - public MutableDBOptionsBuilder setDelayedWriteRate( - final long delayedWriteRate) { - return setLong(DBOption.delayed_write_rate, - delayedWriteRate); - } - - @Override - public long delayedWriteRate() { - return getLong(DBOption.delayed_write_rate); - } - - @Override - public MutableDBOptionsBuilder setMaxTotalWalSize( - final long maxTotalWalSize) { - return setLong(DBOption.max_total_wal_size, maxTotalWalSize); - } - - @Override - public long maxTotalWalSize() { - return getLong(DBOption.max_total_wal_size); - } - - @Override - public MutableDBOptionsBuilder setDeleteObsoleteFilesPeriodMicros( - final long micros) { - return setLong(DBOption.delete_obsolete_files_period_micros, micros); - } - - @Override - public long deleteObsoleteFilesPeriodMicros() { - return getLong(DBOption.delete_obsolete_files_period_micros); - } - - @Override - public MutableDBOptionsBuilder setStatsDumpPeriodSec( - final int statsDumpPeriodSec) { - return setInt(DBOption.stats_dump_period_sec, statsDumpPeriodSec); - } - - @Override - public int statsDumpPeriodSec() { - return getInt(DBOption.stats_dump_period_sec); - } - - @Override - public MutableDBOptionsBuilder setStatsPersistPeriodSec( - final int statsPersistPeriodSec) { - return setInt(DBOption.stats_persist_period_sec, statsPersistPeriodSec); - } - - @Override - public int statsPersistPeriodSec() { - return getInt(DBOption.stats_persist_period_sec); - } - - @Override - public MutableDBOptionsBuilder setStatsHistoryBufferSize( - final long statsHistoryBufferSize) { - return setLong(DBOption.stats_history_buffer_size, statsHistoryBufferSize); - } - - @Override - public long statsHistoryBufferSize() { - return getLong(DBOption.stats_history_buffer_size); - } - - @Override - public MutableDBOptionsBuilder setMaxOpenFiles(final int maxOpenFiles) { - return setInt(DBOption.max_open_files, maxOpenFiles); - } - - @Override - public int maxOpenFiles() { - return getInt(DBOption.max_open_files); - } - - @Override - public MutableDBOptionsBuilder setBytesPerSync(final long bytesPerSync) { - return setLong(DBOption.bytes_per_sync, bytesPerSync); - } - - @Override - public long bytesPerSync() { - return getLong(DBOption.bytes_per_sync); - } - - @Override - public MutableDBOptionsBuilder setWalBytesPerSync( - final long walBytesPerSync) { - return setLong(DBOption.wal_bytes_per_sync, walBytesPerSync); - } - - @Override - public long walBytesPerSync() { - return getLong(DBOption.wal_bytes_per_sync); - } - - @Override - public MutableDBOptionsBuilder setStrictBytesPerSync( - final boolean strictBytesPerSync) { - return setBoolean(DBOption.strict_bytes_per_sync, strictBytesPerSync); - } - - @Override - public boolean strictBytesPerSync() { - return getBoolean(DBOption.strict_bytes_per_sync); - } - - @Override - public MutableDBOptionsBuilder setCompactionReadaheadSize( - final long compactionReadaheadSize) { - return setLong(DBOption.compaction_readahead_size, - compactionReadaheadSize); - } - - @Override - public long compactionReadaheadSize() { - return getLong(DBOption.compaction_readahead_size); - } - } -} diff --git a/java/src/main/java/org/rocksdb/MutableDBOptionsInterface.java b/java/src/main/java/org/rocksdb/MutableDBOptionsInterface.java deleted file mode 100644 index bdf9d7bf6..000000000 --- a/java/src/main/java/org/rocksdb/MutableDBOptionsInterface.java +++ /dev/null @@ -1,440 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -public interface MutableDBOptionsInterface> { - /** - * Specifies the maximum number of concurrent background jobs (both flushes - * and compactions combined). - * Default: 2 - * - * @param maxBackgroundJobs number of max concurrent background jobs - * @return the instance of the current object. - */ - T setMaxBackgroundJobs(int maxBackgroundJobs); - - /** - * Returns the maximum number of concurrent background jobs (both flushes - * and compactions combined). - * Default: 2 - * - * @return the maximum number of concurrent background jobs. - */ - int maxBackgroundJobs(); - - /** - * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the - * value of max_background_jobs. For backwards compatibility we will set - * `max_background_jobs = max_background_compactions + max_background_flushes` - * in the case where user sets at least one of `max_background_compactions` or - * `max_background_flushes` (we replace -1 by 1 in case one option is unset). - * - * Specifies the maximum number of concurrent background compaction jobs, - * submitted to the default LOW priority thread pool. - * If you're increasing this, also consider increasing number of threads in - * LOW priority thread pool. For more information, see - * Default: -1 - * - * @param maxBackgroundCompactions the maximum number of background - * compaction jobs. - * @return the instance of the current object. - * - * @see RocksEnv#setBackgroundThreads(int) - * @see RocksEnv#setBackgroundThreads(int, Priority) - * @see DBOptionsInterface#maxBackgroundFlushes() - * @deprecated Use {@link #setMaxBackgroundJobs(int)} - */ - @Deprecated - T setMaxBackgroundCompactions(int maxBackgroundCompactions); - - /** - * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the - * value of max_background_jobs. For backwards compatibility we will set - * `max_background_jobs = max_background_compactions + max_background_flushes` - * in the case where user sets at least one of `max_background_compactions` or - * `max_background_flushes` (we replace -1 by 1 in case one option is unset). - * - * Returns the maximum number of concurrent background compaction jobs, - * submitted to the default LOW priority thread pool. - * When increasing this number, we may also want to consider increasing - * number of threads in LOW priority thread pool. - * Default: -1 - * - * @return the maximum number of concurrent background compaction jobs. - * @see RocksEnv#setBackgroundThreads(int) - * @see RocksEnv#setBackgroundThreads(int, Priority) - * - * @deprecated Use {@link #setMaxBackgroundJobs(int)} - */ - @Deprecated - int maxBackgroundCompactions(); - - /** - * By default RocksDB will flush all memtables on DB close if there are - * unpersisted data (i.e. with WAL disabled) The flush can be skip to speedup - * DB close. Unpersisted data WILL BE LOST. - * - * DEFAULT: false - * - * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)} - * API. - * - * @param avoidFlushDuringShutdown true if we should avoid flush during - * shutdown - * - * @return the reference to the current options. - */ - T setAvoidFlushDuringShutdown(boolean avoidFlushDuringShutdown); - - /** - * By default RocksDB will flush all memtables on DB close if there are - * unpersisted data (i.e. with WAL disabled) The flush can be skip to speedup - * DB close. Unpersisted data WILL BE LOST. - * - * DEFAULT: false - * - * Dynamically changeable through - * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)} - * API. - * - * @return true if we should avoid flush during shutdown - */ - boolean avoidFlushDuringShutdown(); - - /** - * This is the maximum buffer size that is used by WritableFileWriter. - * On Windows, we need to maintain an aligned buffer for writes. - * We allow the buffer to grow until it's size hits the limit. - * - * Default: 1024 * 1024 (1 MB) - * - * @param writableFileMaxBufferSize the maximum buffer size - * - * @return the reference to the current options. - */ - T setWritableFileMaxBufferSize(long writableFileMaxBufferSize); - - /** - * This is the maximum buffer size that is used by WritableFileWriter. - * On Windows, we need to maintain an aligned buffer for writes. - * We allow the buffer to grow until it's size hits the limit. - * - * Default: 1024 * 1024 (1 MB) - * - * @return the maximum buffer size - */ - long writableFileMaxBufferSize(); - - /** - * The limited write rate to DB if - * {@link ColumnFamilyOptions#softPendingCompactionBytesLimit()} or - * {@link ColumnFamilyOptions#level0SlowdownWritesTrigger()} is triggered, - * or we are writing to the last mem table allowed and we allow more than 3 - * mem tables. It is calculated using size of user write requests before - * compression. RocksDB may decide to slow down more if the compaction still - * gets behind further. - * If the value is 0, we will infer a value from `rater_limiter` value - * if it is not empty, or 16MB if `rater_limiter` is empty. Note that - * if users change the rate in `rate_limiter` after DB is opened, - * `delayed_write_rate` won't be adjusted. - * - * Unit: bytes per second. - * - * Default: 0 - * - * Dynamically changeable through {@link RocksDB#setDBOptions(MutableDBOptions)}. - * - * @param delayedWriteRate the rate in bytes per second - * - * @return the reference to the current options. - */ - T setDelayedWriteRate(long delayedWriteRate); - - /** - * The limited write rate to DB if - * {@link ColumnFamilyOptions#softPendingCompactionBytesLimit()} or - * {@link ColumnFamilyOptions#level0SlowdownWritesTrigger()} is triggered, - * or we are writing to the last mem table allowed and we allow more than 3 - * mem tables. It is calculated using size of user write requests before - * compression. RocksDB may decide to slow down more if the compaction still - * gets behind further. - * If the value is 0, we will infer a value from `rater_limiter` value - * if it is not empty, or 16MB if `rater_limiter` is empty. Note that - * if users change the rate in `rate_limiter` after DB is opened, - * `delayed_write_rate` won't be adjusted. - * - * Unit: bytes per second. - * - * Default: 0 - * - * Dynamically changeable through {@link RocksDB#setDBOptions(MutableDBOptions)}. - * - * @return the rate in bytes per second - */ - long delayedWriteRate(); - - /** - *

Set the max total write-ahead log size. Once write-ahead logs exceed this size, we will - * start forcing the flush of column families whose memtables are backed by the oldest live WAL - * file - *

- *

The oldest WAL files are the ones that are causing all the space amplification. - *

- * For example, with 15 column families, each with - * write_buffer_size = 128 MB - * max_write_buffer_number = 6 - * max_total_wal_size will be calculated to be [15 * 128MB * 6] * 4 = - * 45GB - *

- * The RocksDB wiki has some discussion about how the WAL interacts - * with memtables and flushing of column families, at - * ... - *

- *

If set to 0 (default), we will dynamically choose the WAL size limit to - * be [sum of all write_buffer_size * max_write_buffer_number] * 4

- *

This option takes effect only when there are more than one column family as - * otherwise the wal size is dictated by the write_buffer_size.

- *

Default: 0

- * - * @param maxTotalWalSize max total wal size. - * @return the instance of the current object. - */ - T setMaxTotalWalSize(long maxTotalWalSize); - - /** - *

Returns the max total write-ahead log size. Once write-ahead logs exceed this size, - * we will start forcing the flush of column families whose memtables are - * backed by the oldest live WAL file.

- *

The oldest WAL files are the ones that are causing all the space amplification. - *

- * For example, with 15 column families, each with - * write_buffer_size = 128 MB - * max_write_buffer_number = 6 - * max_total_wal_size will be calculated to be [15 * 128MB * 6] * 4 = - * 45GB - *

- * The RocksDB wiki has some discussion about how the WAL interacts - * with memtables and flushing of column families, at - * ... - *

- *

If set to 0 (default), we will dynamically choose the WAL size limit to - * be [sum of all write_buffer_size * max_write_buffer_number] * 4

- *

This option takes effect only when there are more than one column family as - * otherwise the wal size is dictated by the write_buffer_size.

- *

Default: 0

- * - * - *

If set to 0 (default), we will dynamically choose the WAL size limit - * to be [sum of all write_buffer_size * max_write_buffer_number] * 4 - *

- * - * @return max total wal size - */ - long maxTotalWalSize(); - - /** - * The periodicity when obsolete files get deleted. The default - * value is 6 hours. The files that get out of scope by compaction - * process will still get automatically delete on every compaction, - * regardless of this setting - * - * @param micros the time interval in micros - * @return the instance of the current object. - */ - T setDeleteObsoleteFilesPeriodMicros(long micros); - - /** - * The periodicity when obsolete files get deleted. The default - * value is 6 hours. The files that get out of scope by compaction - * process will still get automatically delete on every compaction, - * regardless of this setting - * - * @return the time interval in micros when obsolete files will be deleted. - */ - long deleteObsoleteFilesPeriodMicros(); - - /** - * if not zero, dump rocksdb.stats to LOG every stats_dump_period_sec - * Default: 600 (10 minutes) - * - * @param statsDumpPeriodSec time interval in seconds. - * @return the instance of the current object. - */ - T setStatsDumpPeriodSec(int statsDumpPeriodSec); - - /** - * If not zero, dump rocksdb.stats to LOG every stats_dump_period_sec - * Default: 600 (10 minutes) - * - * @return time interval in seconds. - */ - int statsDumpPeriodSec(); - - /** - * If not zero, dump rocksdb.stats to RocksDB every - * {@code statsPersistPeriodSec} - * - * Default: 600 - * - * @param statsPersistPeriodSec time interval in seconds. - * @return the instance of the current object. - */ - T setStatsPersistPeriodSec(int statsPersistPeriodSec); - - /** - * If not zero, dump rocksdb.stats to RocksDB every - * {@code statsPersistPeriodSec} - * - * @return time interval in seconds. - */ - int statsPersistPeriodSec(); - - /** - * If not zero, periodically take stats snapshots and store in memory, the - * memory size for stats snapshots is capped at {@code statsHistoryBufferSize} - * - * Default: 1MB - * - * @param statsHistoryBufferSize the size of the buffer. - * @return the instance of the current object. - */ - T setStatsHistoryBufferSize(long statsHistoryBufferSize); - - /** - * If not zero, periodically take stats snapshots and store in memory, the - * memory size for stats snapshots is capped at {@code statsHistoryBufferSize} - * - * @return the size of the buffer. - */ - long statsHistoryBufferSize(); - - /** - * Number of open files that can be used by the DB. You may need to - * increase this if your database has a large working set. Value -1 means - * files opened are always kept open. You can estimate number of files based - * on {@code target_file_size_base} and {@code target_file_size_multiplier} - * for level-based compaction. For universal-style compaction, you can usually - * set it to -1. - * Default: -1 - * - * @param maxOpenFiles the maximum number of open files. - * @return the instance of the current object. - */ - T setMaxOpenFiles(int maxOpenFiles); - - /** - * Number of open files that can be used by the DB. You may need to - * increase this if your database has a large working set. Value -1 means - * files opened are always kept open. You can estimate number of files based - * on {@code target_file_size_base} and {@code target_file_size_multiplier} - * for level-based compaction. For universal-style compaction, you can usually - * set it to -1. - * Default: -1 - * - * @return the maximum number of open files. - */ - int maxOpenFiles(); - - /** - * Allows OS to incrementally sync files to disk while they are being - * written, asynchronously, in the background. - * Issue one request for every bytes_per_sync written. 0 turns it off. - * Default: 0 - * - * @param bytesPerSync size in bytes - * @return the instance of the current object. - */ - T setBytesPerSync(long bytesPerSync); - - /** - * Allows OS to incrementally sync files to disk while they are being - * written, asynchronously, in the background. - * Issue one request for every bytes_per_sync written. 0 turns it off. - * Default: 0 - * - * @return size in bytes - */ - long bytesPerSync(); - - /** - * Same as {@link #setBytesPerSync(long)} , but applies to WAL files - * - * Default: 0, turned off - * - * @param walBytesPerSync size in bytes - * @return the instance of the current object. - */ - T setWalBytesPerSync(long walBytesPerSync); - - /** - * Same as {@link #bytesPerSync()} , but applies to WAL files - * - * Default: 0, turned off - * - * @return size in bytes - */ - long walBytesPerSync(); - - /** - * When true, guarantees WAL files have at most {@link #walBytesPerSync()} - * bytes submitted for writeback at any given time, and SST files have at most - * {@link #bytesPerSync()} bytes pending writeback at any given time. This - * can be used to handle cases where processing speed exceeds I/O speed - * during file generation, which can lead to a huge sync when the file is - * finished, even with {@link #bytesPerSync()} / {@link #walBytesPerSync()} - * properly configured. - * - * - If `sync_file_range` is supported it achieves this by waiting for any - * prior `sync_file_range`s to finish before proceeding. In this way, - * processing (compression, etc.) can proceed uninhibited in the gap - * between `sync_file_range`s, and we block only when I/O falls - * behind. - * - Otherwise the `WritableFile::Sync` method is used. Note this mechanism - * always blocks, thus preventing the interleaving of I/O and processing. - * - * Note: Enabling this option does not provide any additional persistence - * guarantees, as it may use `sync_file_range`, which does not write out - * metadata. - * - * Default: false - * - * @param strictBytesPerSync the bytes per sync - * @return the instance of the current object. - */ - T setStrictBytesPerSync(boolean strictBytesPerSync); - - /** - * Return the strict byte limit per sync. - * - * See {@link #setStrictBytesPerSync(boolean)} - * - * @return the limit in bytes. - */ - boolean strictBytesPerSync(); - - /** - * If non-zero, we perform bigger reads when doing compaction. If you're - * running RocksDB on spinning disks, you should set this to at least 2MB. - * - * That way RocksDB's compaction is doing sequential instead of random reads. - * - * Default: 0 - * - * @param compactionReadaheadSize The compaction read-ahead size - * - * @return the reference to the current options. - */ - T setCompactionReadaheadSize(final long compactionReadaheadSize); - - /** - * If non-zero, we perform bigger reads when doing compaction. If you're - * running RocksDB on spinning disks, you should set this to at least 2MB. - * - * That way RocksDB's compaction is doing sequential instead of random reads. - * - * Default: 0 - * - * @return The compaction read-ahead size - */ - long compactionReadaheadSize(); -} diff --git a/java/src/main/java/org/rocksdb/MutableOptionKey.java b/java/src/main/java/org/rocksdb/MutableOptionKey.java deleted file mode 100644 index ec1b9ff3b..000000000 --- a/java/src/main/java/org/rocksdb/MutableOptionKey.java +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -public interface MutableOptionKey { - enum ValueType { - DOUBLE, - LONG, - INT, - BOOLEAN, - INT_ARRAY, - ENUM - } - - String name(); - ValueType getValueType(); -} diff --git a/java/src/main/java/org/rocksdb/MutableOptionValue.java b/java/src/main/java/org/rocksdb/MutableOptionValue.java deleted file mode 100644 index 7f69eeb9e..000000000 --- a/java/src/main/java/org/rocksdb/MutableOptionValue.java +++ /dev/null @@ -1,369 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -import static org.rocksdb.AbstractMutableOptions.INT_ARRAY_INT_SEPARATOR; - -public abstract class MutableOptionValue { - - abstract double asDouble() throws NumberFormatException; - abstract long asLong() throws NumberFormatException; - abstract int asInt() throws NumberFormatException; - abstract boolean asBoolean() throws IllegalStateException; - abstract int[] asIntArray() throws IllegalStateException; - abstract String asString(); - abstract T asObject(); - - private static abstract class MutableOptionValueObject - extends MutableOptionValue { - protected final T value; - - protected MutableOptionValueObject(final T value) { - this.value = value; - } - - @Override T asObject() { - return value; - } - } - - static MutableOptionValue fromString(final String s) { - return new MutableOptionStringValue(s); - } - - static MutableOptionValue fromDouble(final double d) { - return new MutableOptionDoubleValue(d); - } - - static MutableOptionValue fromLong(final long d) { - return new MutableOptionLongValue(d); - } - - static MutableOptionValue fromInt(final int i) { - return new MutableOptionIntValue(i); - } - - static MutableOptionValue fromBoolean(final boolean b) { - return new MutableOptionBooleanValue(b); - } - - static MutableOptionValue fromIntArray(final int[] ix) { - return new MutableOptionIntArrayValue(ix); - } - - static > MutableOptionValue fromEnum(final N value) { - return new MutableOptionEnumValue<>(value); - } - - static class MutableOptionStringValue - extends MutableOptionValueObject { - MutableOptionStringValue(final String value) { - super(value); - } - - @Override - double asDouble() throws NumberFormatException { - return Double.parseDouble(value); - } - - @Override - long asLong() throws NumberFormatException { - return Long.parseLong(value); - } - - @Override - int asInt() throws NumberFormatException { - return Integer.parseInt(value); - } - - @Override - boolean asBoolean() throws IllegalStateException { - return Boolean.parseBoolean(value); - } - - @Override - int[] asIntArray() throws IllegalStateException { - throw new IllegalStateException("String is not applicable as int[]"); - } - - @Override - String asString() { - return value; - } - } - - static class MutableOptionDoubleValue - extends MutableOptionValue { - private final double value; - MutableOptionDoubleValue(final double value) { - this.value = value; - } - - @Override - double asDouble() { - return value; - } - - @Override - long asLong() throws NumberFormatException { - return Double.valueOf(value).longValue(); - } - - @Override - int asInt() throws NumberFormatException { - if(value > Integer.MAX_VALUE || value < Integer.MIN_VALUE) { - throw new NumberFormatException( - "double value lies outside the bounds of int"); - } - return Double.valueOf(value).intValue(); - } - - @Override - boolean asBoolean() throws IllegalStateException { - throw new IllegalStateException( - "double is not applicable as boolean"); - } - - @Override - int[] asIntArray() throws IllegalStateException { - if(value > Integer.MAX_VALUE || value < Integer.MIN_VALUE) { - throw new NumberFormatException( - "double value lies outside the bounds of int"); - } - return new int[] { Double.valueOf(value).intValue() }; - } - - @Override - String asString() { - return String.valueOf(value); - } - - @Override - Double asObject() { - return value; - } - } - - static class MutableOptionLongValue - extends MutableOptionValue { - private final long value; - - MutableOptionLongValue(final long value) { - this.value = value; - } - - @Override - double asDouble() { - return Long.valueOf(value).doubleValue(); - } - - @Override - long asLong() throws NumberFormatException { - return value; - } - - @Override - int asInt() throws NumberFormatException { - if(value > Integer.MAX_VALUE || value < Integer.MIN_VALUE) { - throw new NumberFormatException( - "long value lies outside the bounds of int"); - } - return Long.valueOf(value).intValue(); - } - - @Override - boolean asBoolean() throws IllegalStateException { - throw new IllegalStateException( - "long is not applicable as boolean"); - } - - @Override - int[] asIntArray() throws IllegalStateException { - if(value > Integer.MAX_VALUE || value < Integer.MIN_VALUE) { - throw new NumberFormatException( - "long value lies outside the bounds of int"); - } - return new int[] { Long.valueOf(value).intValue() }; - } - - @Override - String asString() { - return String.valueOf(value); - } - - @Override - Long asObject() { - return value; - } - } - - static class MutableOptionIntValue - extends MutableOptionValue { - private final int value; - - MutableOptionIntValue(final int value) { - this.value = value; - } - - @Override - double asDouble() { - return Integer.valueOf(value).doubleValue(); - } - - @Override - long asLong() throws NumberFormatException { - return value; - } - - @Override - int asInt() throws NumberFormatException { - return value; - } - - @Override - boolean asBoolean() throws IllegalStateException { - throw new IllegalStateException("int is not applicable as boolean"); - } - - @Override - int[] asIntArray() throws IllegalStateException { - return new int[] { value }; - } - - @Override - String asString() { - return String.valueOf(value); - } - - @Override - Integer asObject() { - return value; - } - } - - static class MutableOptionBooleanValue - extends MutableOptionValue { - private final boolean value; - - MutableOptionBooleanValue(final boolean value) { - this.value = value; - } - - @Override - double asDouble() { - throw new NumberFormatException("boolean is not applicable as double"); - } - - @Override - long asLong() throws NumberFormatException { - throw new NumberFormatException("boolean is not applicable as Long"); - } - - @Override - int asInt() throws NumberFormatException { - throw new NumberFormatException("boolean is not applicable as int"); - } - - @Override - boolean asBoolean() { - return value; - } - - @Override - int[] asIntArray() throws IllegalStateException { - throw new IllegalStateException("boolean is not applicable as int[]"); - } - - @Override - String asString() { - return String.valueOf(value); - } - - @Override - Boolean asObject() { - return value; - } - } - - static class MutableOptionIntArrayValue - extends MutableOptionValueObject { - MutableOptionIntArrayValue(final int[] value) { - super(value); - } - - @Override - double asDouble() { - throw new NumberFormatException("int[] is not applicable as double"); - } - - @Override - long asLong() throws NumberFormatException { - throw new NumberFormatException("int[] is not applicable as Long"); - } - - @Override - int asInt() throws NumberFormatException { - throw new NumberFormatException("int[] is not applicable as int"); - } - - @Override - boolean asBoolean() { - throw new NumberFormatException("int[] is not applicable as boolean"); - } - - @Override - int[] asIntArray() throws IllegalStateException { - return value; - } - - @Override - String asString() { - final StringBuilder builder = new StringBuilder(); - for(int i = 0; i < value.length; i++) { - builder.append(value[i]); - if(i + 1 < value.length) { - builder.append(INT_ARRAY_INT_SEPARATOR); - } - } - return builder.toString(); - } - } - - static class MutableOptionEnumValue> - extends MutableOptionValueObject { - - MutableOptionEnumValue(final T value) { - super(value); - } - - @Override - double asDouble() throws NumberFormatException { - throw new NumberFormatException("Enum is not applicable as double"); - } - - @Override - long asLong() throws NumberFormatException { - throw new NumberFormatException("Enum is not applicable as long"); - } - - @Override - int asInt() throws NumberFormatException { - throw new NumberFormatException("Enum is not applicable as int"); - } - - @Override - boolean asBoolean() throws IllegalStateException { - throw new NumberFormatException("Enum is not applicable as boolean"); - } - - @Override - int[] asIntArray() throws IllegalStateException { - throw new NumberFormatException("Enum is not applicable as int[]"); - } - - @Override - String asString() { - return value.name(); - } - } - -} diff --git a/java/src/main/java/org/rocksdb/NativeComparatorWrapper.java b/java/src/main/java/org/rocksdb/NativeComparatorWrapper.java deleted file mode 100644 index 6acc146f7..000000000 --- a/java/src/main/java/org/rocksdb/NativeComparatorWrapper.java +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.ByteBuffer; - -/** - * A simple abstraction to allow a Java class to wrap a custom comparator - * implemented in C++. - * - * The native comparator must directly extend rocksdb::Comparator. - */ -public abstract class NativeComparatorWrapper - extends AbstractComparator { - - @Override - final ComparatorType getComparatorType() { - return ComparatorType.JAVA_NATIVE_COMPARATOR_WRAPPER; - } - - @Override - public final String name() { - throw new IllegalStateException("This should not be called. " + - "Implementation is in Native code"); - } - - @Override - public final int compare(final ByteBuffer s1, final ByteBuffer s2) { - throw new IllegalStateException("This should not be called. " + - "Implementation is in Native code"); - } - - @Override - public final void findShortestSeparator(final ByteBuffer start, final ByteBuffer limit) { - throw new IllegalStateException("This should not be called. " + - "Implementation is in Native code"); - } - - @Override - public final void findShortSuccessor(final ByteBuffer key) { - throw new IllegalStateException("This should not be called. " + - "Implementation is in Native code"); - } - - /** - * We override {@link RocksCallbackObject#disposeInternal()} - * as disposing of a native rocksdb::Comparator extension requires - * a slightly different approach as it is not really a RocksCallbackObject - */ - @Override - protected void disposeInternal() { - disposeInternal(nativeHandle_); - } - - private native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/NativeLibraryLoader.java b/java/src/main/java/org/rocksdb/NativeLibraryLoader.java deleted file mode 100644 index b97cf28b9..000000000 --- a/java/src/main/java/org/rocksdb/NativeLibraryLoader.java +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -import java.io.*; -import java.nio.file.Files; -import java.nio.file.StandardCopyOption; - -import org.rocksdb.util.Environment; - -/** - * This class is used to load the RocksDB shared library from within the jar. - * The shared library is extracted to a temp folder and loaded from there. - */ -public class NativeLibraryLoader { - //singleton - private static final NativeLibraryLoader instance = new NativeLibraryLoader(); - private static boolean initialized = false; - - private static final String sharedLibraryName = Environment.getSharedLibraryName("rocksdb"); - private static final String jniLibraryName = Environment.getJniLibraryName("rocksdb"); - private static final /* @Nullable */ String fallbackJniLibraryName = - Environment.getFallbackJniLibraryName("rocksdb"); - private static final String jniLibraryFileName = Environment.getJniLibraryFileName("rocksdb"); - private static final /* @Nullable */ String fallbackJniLibraryFileName = - Environment.getFallbackJniLibraryFileName("rocksdb"); - private static final String tempFilePrefix = "librocksdbjni"; - private static final String tempFileSuffix = Environment.getJniLibraryExtension(); - - /** - * Get a reference to the NativeLibraryLoader - * - * @return The NativeLibraryLoader - */ - public static NativeLibraryLoader getInstance() { - return instance; - } - - /** - * Firstly attempts to load the library from java.library.path, - * if that fails then it falls back to extracting - * the library from the classpath - * {@link org.rocksdb.NativeLibraryLoader#loadLibraryFromJar(java.lang.String)} - * - * @param tmpDir A temporary directory to use - * to copy the native library to when loading from the classpath. - * If null, or the empty string, we rely on Java's - * {@link java.io.File#createTempFile(String, String)} - * function to provide a temporary location. - * The temporary file will be registered for deletion - * on exit. - * - * @throws java.io.IOException if a filesystem operation fails. - */ - public synchronized void loadLibrary(final String tmpDir) throws IOException { - try { - // try dynamic library - System.loadLibrary(sharedLibraryName); - return; - } catch (final UnsatisfiedLinkError ule) { - // ignore - try from static library - } - - try { - // try static library - System.loadLibrary(jniLibraryName); - return; - } catch (final UnsatisfiedLinkError ule) { - // ignore - then try static library fallback or from jar - } - - if (fallbackJniLibraryName != null) { - try { - // try static library fallback - System.loadLibrary(fallbackJniLibraryName); - return; - } catch (final UnsatisfiedLinkError ule) { - // ignore - then try from jar - } - } - - // try jar - loadLibraryFromJar(tmpDir); - } - - /** - * Attempts to extract the native RocksDB library - * from the classpath and load it - * - * @param tmpDir A temporary directory to use - * to copy the native library to. If null, - * or the empty string, we rely on Java's - * {@link java.io.File#createTempFile(String, String)} - * function to provide a temporary location. - * The temporary file will be registered for deletion - * on exit. - * - * @throws java.io.IOException if a filesystem operation fails. - */ - void loadLibraryFromJar(final String tmpDir) - throws IOException { - if (!initialized) { - System.load(loadLibraryFromJarToTemp(tmpDir).getAbsolutePath()); - initialized = true; - } - } - - File loadLibraryFromJarToTemp(final String tmpDir) - throws IOException { - InputStream is = null; - try { - // attempt to look up the static library in the jar file - String libraryFileName = jniLibraryFileName; - is = getClass().getClassLoader().getResourceAsStream(libraryFileName); - - if (is == null) { - // is there a fallback we can try - if (fallbackJniLibraryFileName == null) { - throw new RuntimeException(libraryFileName + " was not found inside JAR."); - } - - // attempt to look up the fallback static library in the jar file - libraryFileName = fallbackJniLibraryFileName; - is = getClass().getClassLoader().getResourceAsStream(libraryFileName); - if (is == null) { - throw new RuntimeException(libraryFileName + " was not found inside JAR."); - } - } - - // create a temporary file to copy the library to - final File temp; - if (tmpDir == null || tmpDir.isEmpty()) { - temp = File.createTempFile(tempFilePrefix, tempFileSuffix); - } else { - final File parentDir = new File(tmpDir); - if (!parentDir.exists()) { - throw new RuntimeException( - "Directory: " + parentDir.getAbsolutePath() + " does not exist!"); - } - temp = new File(parentDir, libraryFileName); - if (temp.exists() && !temp.delete()) { - throw new RuntimeException( - "File: " + temp.getAbsolutePath() + " already exists and cannot be removed."); - } - if (!temp.createNewFile()) { - throw new RuntimeException("File: " + temp.getAbsolutePath() + " could not be created."); - } - } - if (!temp.exists()) { - throw new RuntimeException("File " + temp.getAbsolutePath() + " does not exist."); - } else { - temp.deleteOnExit(); - } - - // copy the library from the Jar file to the temp destination - Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); - - // return the temporary library file - return temp; - - } finally { - if (is != null) { - is.close(); - } - } - } - - /** - * Private constructor to disallow instantiation - */ - private NativeLibraryLoader() { - } -} diff --git a/java/src/main/java/org/rocksdb/OperationStage.java b/java/src/main/java/org/rocksdb/OperationStage.java deleted file mode 100644 index 6ac0a15a2..000000000 --- a/java/src/main/java/org/rocksdb/OperationStage.java +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * The operation stage. - */ -public enum OperationStage { - STAGE_UNKNOWN((byte)0x0), - STAGE_FLUSH_RUN((byte)0x1), - STAGE_FLUSH_WRITE_L0((byte)0x2), - STAGE_COMPACTION_PREPARE((byte)0x3), - STAGE_COMPACTION_RUN((byte)0x4), - STAGE_COMPACTION_PROCESS_KV((byte)0x5), - STAGE_COMPACTION_INSTALL((byte)0x6), - STAGE_COMPACTION_SYNC_FILE((byte)0x7), - STAGE_PICK_MEMTABLES_TO_FLUSH((byte)0x8), - STAGE_MEMTABLE_ROLLBACK((byte)0x9), - STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS((byte)0xA); - - private final byte value; - - OperationStage(final byte value) { - this.value = value; - } - - /** - * Get the internal representation value. - * - * @return the internal representation value. - */ - byte getValue() { - return value; - } - - /** - * Get the Operation stage from the internal representation value. - * - * @param value the internal representation value. - * - * @return the operation stage - * - * @throws IllegalArgumentException if the value does not match - * an OperationStage - */ - static OperationStage fromValue(final byte value) - throws IllegalArgumentException { - for (final OperationStage threadType : OperationStage.values()) { - if (threadType.value == value) { - return threadType; - } - } - throw new IllegalArgumentException( - "Unknown value for OperationStage: " + value); - } -} diff --git a/java/src/main/java/org/rocksdb/OperationType.java b/java/src/main/java/org/rocksdb/OperationType.java deleted file mode 100644 index 7cc9b65cd..000000000 --- a/java/src/main/java/org/rocksdb/OperationType.java +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * The type used to refer to a thread operation. - * - * A thread operation describes high-level action of a thread, - * examples include compaction and flush. - */ -public enum OperationType { - OP_UNKNOWN((byte)0x0), - OP_COMPACTION((byte)0x1), - OP_FLUSH((byte)0x2); - - private final byte value; - - OperationType(final byte value) { - this.value = value; - } - - /** - * Get the internal representation value. - * - * @return the internal representation value. - */ - byte getValue() { - return value; - } - - /** - * Get the Operation type from the internal representation value. - * - * @param value the internal representation value. - * - * @return the operation type - * - * @throws IllegalArgumentException if the value does not match - * an OperationType - */ - static OperationType fromValue(final byte value) - throws IllegalArgumentException { - for (final OperationType threadType : OperationType.values()) { - if (threadType.value == value) { - return threadType; - } - } - throw new IllegalArgumentException( - "Unknown value for OperationType: " + value); - } -} diff --git a/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java b/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java deleted file mode 100644 index 5a2e1f3ed..000000000 --- a/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java +++ /dev/null @@ -1,226 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.List; - -/** - * Database with Transaction support. - */ -public class OptimisticTransactionDB extends RocksDB - implements TransactionalDB { - - /** - * Private constructor. - * - * @param nativeHandle The native handle of the C++ OptimisticTransactionDB - * object - */ - private OptimisticTransactionDB(final long nativeHandle) { - super(nativeHandle); - } - - /** - * Open an OptimisticTransactionDB similar to - * {@link RocksDB#open(Options, String)}. - * - * @param options {@link org.rocksdb.Options} instance. - * @param path the path to the rocksdb. - * - * @return a {@link OptimisticTransactionDB} instance on success, null if the - * specified {@link OptimisticTransactionDB} can not be opened. - * - * @throws RocksDBException if an error occurs whilst opening the database. - */ - public static OptimisticTransactionDB open(final Options options, - final String path) throws RocksDBException { - final OptimisticTransactionDB otdb = new OptimisticTransactionDB(open( - options.nativeHandle_, path)); - - // when non-default Options is used, keeping an Options reference - // in RocksDB can prevent Java to GC during the life-time of - // the currently-created RocksDB. - otdb.storeOptionsInstance(options); - - return otdb; - } - - /** - * Open an OptimisticTransactionDB similar to - * {@link RocksDB#open(DBOptions, String, List, List)}. - * - * @param dbOptions {@link org.rocksdb.DBOptions} instance. - * @param path the path to the rocksdb. - * @param columnFamilyDescriptors list of column family descriptors - * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances - * - * @return a {@link OptimisticTransactionDB} instance on success, null if the - * specified {@link OptimisticTransactionDB} can not be opened. - * - * @throws RocksDBException if an error occurs whilst opening the database. - */ - public static OptimisticTransactionDB open(final DBOptions dbOptions, - final String path, - final List columnFamilyDescriptors, - final List columnFamilyHandles) - throws RocksDBException { - - final byte[][] cfNames = new byte[columnFamilyDescriptors.size()][]; - final long[] cfOptionHandles = new long[columnFamilyDescriptors.size()]; - for (int i = 0; i < columnFamilyDescriptors.size(); i++) { - final ColumnFamilyDescriptor cfDescriptor = columnFamilyDescriptors - .get(i); - cfNames[i] = cfDescriptor.getName(); - cfOptionHandles[i] = cfDescriptor.getOptions().nativeHandle_; - } - - final long[] handles = open(dbOptions.nativeHandle_, path, cfNames, - cfOptionHandles); - final OptimisticTransactionDB otdb = - new OptimisticTransactionDB(handles[0]); - - // when non-default Options is used, keeping an Options reference - // in RocksDB can prevent Java to GC during the life-time of - // the currently-created RocksDB. - otdb.storeOptionsInstance(dbOptions); - - for (int i = 1; i < handles.length; i++) { - columnFamilyHandles.add(new ColumnFamilyHandle(otdb, handles[i])); - } - - return otdb; - } - - - /** - * This is similar to {@link #close()} except that it - * throws an exception if any error occurs. - * - * This will not fsync the WAL files. - * If syncing is required, the caller must first call {@link #syncWal()} - * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch - * with {@link WriteOptions#setSync(boolean)} set to true. - * - * See also {@link #close()}. - * - * @throws RocksDBException if an error occurs whilst closing. - */ - public void closeE() throws RocksDBException { - if (owningHandle_.compareAndSet(true, false)) { - try { - closeDatabase(nativeHandle_); - } finally { - disposeInternal(); - } - } - } - - /** - * This is similar to {@link #closeE()} except that it - * silently ignores any errors. - * - * This will not fsync the WAL files. - * If syncing is required, the caller must first call {@link #syncWal()} - * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch - * with {@link WriteOptions#setSync(boolean)} set to true. - * - * See also {@link #close()}. - */ - @Override - public void close() { - if (owningHandle_.compareAndSet(true, false)) { - try { - closeDatabase(nativeHandle_); - } catch (final RocksDBException e) { - // silently ignore the error report - } finally { - disposeInternal(); - } - } - } - - @Override - public Transaction beginTransaction(final WriteOptions writeOptions) { - return new Transaction(this, beginTransaction(nativeHandle_, - writeOptions.nativeHandle_)); - } - - @Override - public Transaction beginTransaction(final WriteOptions writeOptions, - final OptimisticTransactionOptions optimisticTransactionOptions) { - return new Transaction(this, beginTransaction(nativeHandle_, - writeOptions.nativeHandle_, - optimisticTransactionOptions.nativeHandle_)); - } - - // TODO(AR) consider having beingTransaction(... oldTransaction) set a - // reference count inside Transaction, so that we can always call - // Transaction#close but the object is only disposed when there are as many - // closes as beginTransaction. Makes the try-with-resources paradigm easier for - // java developers - - @Override - public Transaction beginTransaction(final WriteOptions writeOptions, - final Transaction oldTransaction) { - final long jtxn_handle = beginTransaction_withOld(nativeHandle_, - writeOptions.nativeHandle_, oldTransaction.nativeHandle_); - - // RocksJava relies on the assumption that - // we do not allocate a new Transaction object - // when providing an old_txn - assert(jtxn_handle == oldTransaction.nativeHandle_); - - return oldTransaction; - } - - @Override - public Transaction beginTransaction(final WriteOptions writeOptions, - final OptimisticTransactionOptions optimisticTransactionOptions, - final Transaction oldTransaction) { - final long jtxn_handle = beginTransaction_withOld(nativeHandle_, - writeOptions.nativeHandle_, optimisticTransactionOptions.nativeHandle_, - oldTransaction.nativeHandle_); - - // RocksJava relies on the assumption that - // we do not allocate a new Transaction object - // when providing an old_txn - assert(jtxn_handle == oldTransaction.nativeHandle_); - - return oldTransaction; - } - - /** - * Get the underlying database that was opened. - * - * @return The underlying database that was opened. - */ - public RocksDB getBaseDB() { - final RocksDB db = new RocksDB(getBaseDB(nativeHandle_)); - db.disOwnNativeHandle(); - return db; - } - - @Override protected final native void disposeInternal(final long handle); - - protected static native long open(final long optionsHandle, - final String path) throws RocksDBException; - protected static native long[] open(final long handle, final String path, - final byte[][] columnFamilyNames, final long[] columnFamilyOptions); - private native static void closeDatabase(final long handle) - throws RocksDBException; - private native long beginTransaction(final long handle, - final long writeOptionsHandle); - private native long beginTransaction(final long handle, - final long writeOptionsHandle, - final long optimisticTransactionOptionsHandle); - private native long beginTransaction_withOld(final long handle, - final long writeOptionsHandle, final long oldTransactionHandle); - private native long beginTransaction_withOld(final long handle, - final long writeOptionsHandle, - final long optimisticTransactionOptionsHandle, - final long oldTransactionHandle); - private native long getBaseDB(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/OptimisticTransactionOptions.java b/java/src/main/java/org/rocksdb/OptimisticTransactionOptions.java deleted file mode 100644 index 250edf806..000000000 --- a/java/src/main/java/org/rocksdb/OptimisticTransactionOptions.java +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public class OptimisticTransactionOptions extends RocksObject - implements TransactionalOptions { - - public OptimisticTransactionOptions() { - super(newOptimisticTransactionOptions()); - } - - @Override - public boolean isSetSnapshot() { - assert(isOwningHandle()); - return isSetSnapshot(nativeHandle_); - } - - @Override - public OptimisticTransactionOptions setSetSnapshot( - final boolean setSnapshot) { - assert(isOwningHandle()); - setSetSnapshot(nativeHandle_, setSnapshot); - return this; - } - - /** - * Should be set if the DB has a non-default comparator. - * See comment in - * {@link WriteBatchWithIndex#WriteBatchWithIndex(AbstractComparator, int, boolean)} - * constructor. - * - * @param comparator The comparator to use for the transaction. - * - * @return this OptimisticTransactionOptions instance - */ - public OptimisticTransactionOptions setComparator( - final AbstractComparator comparator) { - assert(isOwningHandle()); - setComparator(nativeHandle_, comparator.nativeHandle_); - return this; - } - - private native static long newOptimisticTransactionOptions(); - private native boolean isSetSnapshot(final long handle); - private native void setSetSnapshot(final long handle, - final boolean setSnapshot); - private native void setComparator(final long handle, - final long comparatorHandle); - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/OptionString.java b/java/src/main/java/org/rocksdb/OptionString.java deleted file mode 100644 index 7f97827cb..000000000 --- a/java/src/main/java/org/rocksdb/OptionString.java +++ /dev/null @@ -1,256 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; - -public class OptionString { - private final static char kvPairSeparator = ';'; - private final static char kvSeparator = '='; - private final static char complexValueBegin = '{'; - private final static char complexValueEnd = '}'; - private final static char wrappedValueBegin = '{'; - private final static char wrappedValueEnd = '}'; - private final static char arrayValueSeparator = ':'; - - static class Value { - final List list; - final List complex; - - public Value(final List list, final List complex) { - this.list = list; - this.complex = complex; - } - - public boolean isList() { - return (this.list != null && this.complex == null); - } - - public static Value fromList(final List list) { - return new Value(list, null); - } - - public static Value fromComplex(final List complex) { - return new Value(null, complex); - } - - public String toString() { - final StringBuilder sb = new StringBuilder(); - if (isList()) { - for (final String item : list) { - sb.append(item).append(arrayValueSeparator); - } - // remove the final separator - if (sb.length() > 0) - sb.delete(sb.length() - 1, sb.length()); - } else { - sb.append('['); - for (final Entry entry : complex) { - sb.append(entry.toString()).append(';'); - } - sb.append(']'); - } - return sb.toString(); - } - } - - static class Entry { - public final String key; - public final Value value; - - private Entry(final String key, final Value value) { - this.key = key; - this.value = value; - } - - public String toString() { - return "" + key + "=" + value; - } - } - - static class Parser { - static class Exception extends RuntimeException { - public Exception(final String s) { - super(s); - } - } - - final String str; - final StringBuilder sb; - - private Parser(final String str) { - this.str = str; - this.sb = new StringBuilder(str); - } - - private void exception(final String message) { - final int pos = str.length() - sb.length(); - final int before = Math.min(pos, 64); - final int after = Math.min(64, str.length() - pos); - final String here = - str.substring(pos - before, pos) + "__*HERE*__" + str.substring(pos, pos + after); - - throw new Parser.Exception(message + " at [" + here + "]"); - } - - private void skipWhite() { - while (sb.length() > 0 && Character.isWhitespace(sb.charAt(0))) { - sb.delete(0, 1); - } - } - - private char first() { - if (sb.length() == 0) - exception("Unexpected end of input"); - return sb.charAt(0); - } - - private char next() { - if (sb.length() == 0) - exception("Unexpected end of input"); - final char c = sb.charAt(0); - sb.delete(0, 1); - return c; - } - - private boolean hasNext() { - return (sb.length() > 0); - } - - private boolean is(final char c) { - return (sb.length() > 0 && sb.charAt(0) == c); - } - - private boolean isKeyChar() { - if (!hasNext()) - return false; - final char c = first(); - return (Character.isAlphabetic(c) || Character.isDigit(c) || "_".indexOf(c) != -1); - } - - private boolean isValueChar() { - if (!hasNext()) - return false; - final char c = first(); - return (Character.isAlphabetic(c) || Character.isDigit(c) || "_-+.[]".indexOf(c) != -1); - } - - private String parseKey() { - final StringBuilder sbKey = new StringBuilder(); - sbKey.append(next()); - while (isKeyChar()) { - sbKey.append(next()); - } - - return sbKey.toString(); - } - - private String parseSimpleValue() { - if (is(wrappedValueBegin)) { - next(); - final String result = parseSimpleValue(); - if (!is(wrappedValueEnd)) { - exception("Expected to end a wrapped value with " + wrappedValueEnd); - } - next(); - - return result; - } else { - final StringBuilder sbValue = new StringBuilder(); - while (isValueChar()) sbValue.append(next()); - - return sbValue.toString(); - } - } - - private List parseList() { - final List list = new ArrayList<>(1); - while (true) { - list.add(parseSimpleValue()); - if (!is(arrayValueSeparator)) - break; - - next(); - } - - return list; - } - - private Entry parseOption() { - skipWhite(); - if (!isKeyChar()) { - exception("No valid key character(s) for key in key=value "); - } - final String key = parseKey(); - skipWhite(); - if (is(kvSeparator)) { - next(); - } else { - exception("Expected = separating key and value"); - } - skipWhite(); - final Value value = parseValue(); - return new Entry(key, value); - } - - private Value parseValue() { - skipWhite(); - if (is(complexValueBegin)) { - next(); - skipWhite(); - final Value value = Value.fromComplex(parseComplex()); - skipWhite(); - if (is(complexValueEnd)) { - next(); - skipWhite(); - } else { - exception("Expected } ending complex value"); - } - return value; - } else if (isValueChar()) { - return Value.fromList(parseList()); - } - - exception("No valid value character(s) for value in key=value"); - return null; - } - - private List parseComplex() { - final List entries = new ArrayList<>(); - - skipWhite(); - if (hasNext()) { - entries.add(parseOption()); - skipWhite(); - while (is(kvPairSeparator)) { - next(); - skipWhite(); - if (!isKeyChar()) { - // the separator was a terminator - break; - } - entries.add(parseOption()); - skipWhite(); - } - } - return entries; - } - - public static List parse(final String str) { - Objects.requireNonNull(str); - - final Parser parser = new Parser(str); - final List result = parser.parseComplex(); - if (parser.hasNext()) { - parser.exception("Unexpected end of parsing "); - } - - return result; - } - } -} diff --git a/java/src/main/java/org/rocksdb/Options.java b/java/src/main/java/org/rocksdb/Options.java deleted file mode 100644 index 54f88262b..000000000 --- a/java/src/main/java/org/rocksdb/Options.java +++ /dev/null @@ -1,2578 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.file.Paths; -import java.util.*; - -/** - * Options to control the behavior of a database. It will be used - * during the creation of a {@link org.rocksdb.RocksDB} (i.e., RocksDB.open()). - * - * As a descendent of {@link AbstractNativeReference}, this class is {@link AutoCloseable} - * and will be automatically released if opened in the preamble of a try with resources block. - */ -public class Options extends RocksObject - implements DBOptionsInterface, - MutableDBOptionsInterface, - ColumnFamilyOptionsInterface, - MutableColumnFamilyOptionsInterface { - static { - RocksDB.loadLibrary(); - } - - /** - * Converts the input properties into a Options-style formatted string - * @param properties The set of properties to convert - * @return The Options-style representation of those properties. - */ - public static String getOptionStringFromProps(final Properties properties) { - if (properties == null || properties.size() == 0) { - throw new IllegalArgumentException("Properties value must contain at least one value."); - } - StringBuilder stringBuilder = new StringBuilder(); - for (final String name : properties.stringPropertyNames()) { - stringBuilder.append(name); - stringBuilder.append("="); - stringBuilder.append(properties.getProperty(name)); - stringBuilder.append(";"); - } - return stringBuilder.toString(); - } - - /** - * Construct options for opening a RocksDB. - * - * This constructor will create (by allocating a block of memory) - * an {@code rocksdb::Options} in the c++ side. - */ - public Options() { - super(newOptions()); - env_ = Env.getDefault(); - } - - /** - * Construct options for opening a RocksDB. Reusing database options - * and column family options. - * - * @param dbOptions {@link org.rocksdb.DBOptions} instance - * @param columnFamilyOptions {@link org.rocksdb.ColumnFamilyOptions} - * instance - */ - public Options(final DBOptions dbOptions, - final ColumnFamilyOptions columnFamilyOptions) { - super(newOptions(dbOptions.nativeHandle_, - columnFamilyOptions.nativeHandle_)); - env_ = dbOptions.getEnv() != null ? dbOptions.getEnv() : Env.getDefault(); - } - - /** - * Copy constructor for ColumnFamilyOptions. - * - * NOTE: This does a shallow copy, which means comparator, merge_operator - * and other pointers will be cloned! - * - * @param other The Options to copy. - */ - public Options(Options other) { - super(copyOptions(other.nativeHandle_)); - this.env_ = other.env_; - this.memTableConfig_ = other.memTableConfig_; - this.tableFormatConfig_ = other.tableFormatConfig_; - this.rateLimiter_ = other.rateLimiter_; - this.comparator_ = other.comparator_; - this.compactionFilter_ = other.compactionFilter_; - this.compactionFilterFactory_ = other.compactionFilterFactory_; - this.compactionOptionsUniversal_ = other.compactionOptionsUniversal_; - this.compactionOptionsFIFO_ = other.compactionOptionsFIFO_; - this.compressionOptions_ = other.compressionOptions_; - this.rowCache_ = other.rowCache_; - this.writeBufferManager_ = other.writeBufferManager_; - this.compactionThreadLimiter_ = other.compactionThreadLimiter_; - this.bottommostCompressionOptions_ = other.bottommostCompressionOptions_; - this.walFilter_ = other.walFilter_; - this.sstPartitionerFactory_ = other.sstPartitionerFactory_; - } - - @Override - public Options setIncreaseParallelism(final int totalThreads) { - assert(isOwningHandle()); - setIncreaseParallelism(nativeHandle_, totalThreads); - return this; - } - - @Override - public Options setCreateIfMissing(final boolean flag) { - assert(isOwningHandle()); - setCreateIfMissing(nativeHandle_, flag); - return this; - } - - @Override - public Options setCreateMissingColumnFamilies(final boolean flag) { - assert(isOwningHandle()); - setCreateMissingColumnFamilies(nativeHandle_, flag); - return this; - } - - @Override - public Options setEnv(final Env env) { - assert(isOwningHandle()); - setEnv(nativeHandle_, env.nativeHandle_); - env_ = env; - return this; - } - - @Override - public Env getEnv() { - return env_; - } - - /** - *

Set appropriate parameters for bulk loading. - * The reason that this is a function that returns "this" instead of a - * constructor is to enable chaining of multiple similar calls in the future. - *

- * - *

All data will be in level 0 without any automatic compaction. - * It's recommended to manually call CompactRange(NULL, NULL) before reading - * from the database, because otherwise the read can be very slow.

- * - * @return the instance of the current Options. - */ - public Options prepareForBulkLoad() { - prepareForBulkLoad(nativeHandle_); - return this; - } - - @Override - public boolean createIfMissing() { - assert(isOwningHandle()); - return createIfMissing(nativeHandle_); - } - - @Override - public boolean createMissingColumnFamilies() { - assert(isOwningHandle()); - return createMissingColumnFamilies(nativeHandle_); - } - - @Override - public Options oldDefaults(final int majorVersion, final int minorVersion) { - oldDefaults(nativeHandle_, majorVersion, minorVersion); - return this; - } - - @Override - public Options optimizeForSmallDb() { - optimizeForSmallDb(nativeHandle_); - return this; - } - - @Override - public Options optimizeForSmallDb(final Cache cache) { - optimizeForSmallDb(nativeHandle_, cache.getNativeHandle()); - return this; - } - - @Override - public Options optimizeForPointLookup( - long blockCacheSizeMb) { - optimizeForPointLookup(nativeHandle_, - blockCacheSizeMb); - return this; - } - - @Override - public Options optimizeLevelStyleCompaction() { - optimizeLevelStyleCompaction(nativeHandle_, - DEFAULT_COMPACTION_MEMTABLE_MEMORY_BUDGET); - return this; - } - - @Override - public Options optimizeLevelStyleCompaction( - long memtableMemoryBudget) { - optimizeLevelStyleCompaction(nativeHandle_, - memtableMemoryBudget); - return this; - } - - @Override - public Options optimizeUniversalStyleCompaction() { - optimizeUniversalStyleCompaction(nativeHandle_, - DEFAULT_COMPACTION_MEMTABLE_MEMORY_BUDGET); - return this; - } - - @Override - public Options optimizeUniversalStyleCompaction( - final long memtableMemoryBudget) { - optimizeUniversalStyleCompaction(nativeHandle_, - memtableMemoryBudget); - return this; - } - - @Override - public Options setComparator(final BuiltinComparator builtinComparator) { - assert(isOwningHandle()); - setComparatorHandle(nativeHandle_, builtinComparator.ordinal()); - return this; - } - - @Override - public Options setComparator( - final AbstractComparator comparator) { - assert(isOwningHandle()); - setComparatorHandle(nativeHandle_, comparator.nativeHandle_, - comparator.getComparatorType().getValue()); - comparator_ = comparator; - return this; - } - - @Override - public Options setMergeOperatorName(final String name) { - assert(isOwningHandle()); - if (name == null) { - throw new IllegalArgumentException( - "Merge operator name must not be null."); - } - setMergeOperatorName(nativeHandle_, name); - return this; - } - - @Override - public Options setMergeOperator(final MergeOperator mergeOperator) { - setMergeOperator(nativeHandle_, mergeOperator.nativeHandle_); - return this; - } - - @Override - public Options setCompactionFilter( - final AbstractCompactionFilter> - compactionFilter) { - setCompactionFilterHandle(nativeHandle_, compactionFilter.nativeHandle_); - compactionFilter_ = compactionFilter; - return this; - } - - @Override - public AbstractCompactionFilter> compactionFilter() { - assert (isOwningHandle()); - return compactionFilter_; - } - - @Override - public Options setCompactionFilterFactory(final AbstractCompactionFilterFactory> compactionFilterFactory) { - assert (isOwningHandle()); - setCompactionFilterFactoryHandle(nativeHandle_, compactionFilterFactory.nativeHandle_); - compactionFilterFactory_ = compactionFilterFactory; - return this; - } - - @Override - public AbstractCompactionFilterFactory> compactionFilterFactory() { - assert (isOwningHandle()); - return compactionFilterFactory_; - } - - @Override - public Options setWriteBufferSize(final long writeBufferSize) { - assert(isOwningHandle()); - setWriteBufferSize(nativeHandle_, writeBufferSize); - return this; - } - - @Override - public long writeBufferSize() { - assert(isOwningHandle()); - return writeBufferSize(nativeHandle_); - } - - @Override - public Options setMaxWriteBufferNumber(final int maxWriteBufferNumber) { - assert(isOwningHandle()); - setMaxWriteBufferNumber(nativeHandle_, maxWriteBufferNumber); - return this; - } - - @Override - public int maxWriteBufferNumber() { - assert(isOwningHandle()); - return maxWriteBufferNumber(nativeHandle_); - } - - @Override - public boolean errorIfExists() { - assert(isOwningHandle()); - return errorIfExists(nativeHandle_); - } - - @Override - public Options setErrorIfExists(final boolean errorIfExists) { - assert(isOwningHandle()); - setErrorIfExists(nativeHandle_, errorIfExists); - return this; - } - - @Override - public boolean paranoidChecks() { - assert(isOwningHandle()); - return paranoidChecks(nativeHandle_); - } - - @Override - public Options setParanoidChecks(final boolean paranoidChecks) { - assert(isOwningHandle()); - setParanoidChecks(nativeHandle_, paranoidChecks); - return this; - } - - @Override - public int maxOpenFiles() { - assert(isOwningHandle()); - return maxOpenFiles(nativeHandle_); - } - - @Override - public Options setMaxFileOpeningThreads(final int maxFileOpeningThreads) { - assert(isOwningHandle()); - setMaxFileOpeningThreads(nativeHandle_, maxFileOpeningThreads); - return this; - } - - @Override - public int maxFileOpeningThreads() { - assert(isOwningHandle()); - return maxFileOpeningThreads(nativeHandle_); - } - - @Override - public Options setMaxTotalWalSize(final long maxTotalWalSize) { - assert(isOwningHandle()); - setMaxTotalWalSize(nativeHandle_, maxTotalWalSize); - return this; - } - - @Override - public long maxTotalWalSize() { - assert(isOwningHandle()); - return maxTotalWalSize(nativeHandle_); - } - - @Override - public Options setMaxOpenFiles(final int maxOpenFiles) { - assert(isOwningHandle()); - setMaxOpenFiles(nativeHandle_, maxOpenFiles); - return this; - } - - @Override - public boolean useFsync() { - assert(isOwningHandle()); - return useFsync(nativeHandle_); - } - - @Override - public Options setUseFsync(final boolean useFsync) { - assert(isOwningHandle()); - setUseFsync(nativeHandle_, useFsync); - return this; - } - - @Override - public Options setDbPaths(final Collection dbPaths) { - assert(isOwningHandle()); - - final int len = dbPaths.size(); - final String paths[] = new String[len]; - final long targetSizes[] = new long[len]; - - int i = 0; - for(final DbPath dbPath : dbPaths) { - paths[i] = dbPath.path.toString(); - targetSizes[i] = dbPath.targetSize; - i++; - } - setDbPaths(nativeHandle_, paths, targetSizes); - return this; - } - - @Override - public List dbPaths() { - final int len = (int)dbPathsLen(nativeHandle_); - if(len == 0) { - return Collections.emptyList(); - } else { - final String paths[] = new String[len]; - final long targetSizes[] = new long[len]; - - dbPaths(nativeHandle_, paths, targetSizes); - - final List dbPaths = new ArrayList<>(); - for(int i = 0; i < len; i++) { - dbPaths.add(new DbPath(Paths.get(paths[i]), targetSizes[i])); - } - return dbPaths; - } - } - - @Override - public String dbLogDir() { - assert(isOwningHandle()); - return dbLogDir(nativeHandle_); - } - - @Override - public Options setDbLogDir(final String dbLogDir) { - assert(isOwningHandle()); - setDbLogDir(nativeHandle_, dbLogDir); - return this; - } - - @Override - public String walDir() { - assert(isOwningHandle()); - return walDir(nativeHandle_); - } - - @Override - public Options setWalDir(final String walDir) { - assert(isOwningHandle()); - setWalDir(nativeHandle_, walDir); - return this; - } - - @Override - public long deleteObsoleteFilesPeriodMicros() { - assert(isOwningHandle()); - return deleteObsoleteFilesPeriodMicros(nativeHandle_); - } - - @Override - public Options setDeleteObsoleteFilesPeriodMicros( - final long micros) { - assert(isOwningHandle()); - setDeleteObsoleteFilesPeriodMicros(nativeHandle_, micros); - return this; - } - - @Override - @Deprecated - public int maxBackgroundCompactions() { - assert(isOwningHandle()); - return maxBackgroundCompactions(nativeHandle_); - } - - @Override - public Options setStatistics(final Statistics statistics) { - assert(isOwningHandle()); - setStatistics(nativeHandle_, statistics.nativeHandle_); - return this; - } - - @Override - public Statistics statistics() { - assert(isOwningHandle()); - final long statisticsNativeHandle = statistics(nativeHandle_); - if(statisticsNativeHandle == 0) { - return null; - } else { - return new Statistics(statisticsNativeHandle); - } - } - - @Override - @Deprecated - public Options setMaxBackgroundCompactions( - final int maxBackgroundCompactions) { - assert(isOwningHandle()); - setMaxBackgroundCompactions(nativeHandle_, maxBackgroundCompactions); - return this; - } - - @Override - public Options setMaxSubcompactions(final int maxSubcompactions) { - assert(isOwningHandle()); - setMaxSubcompactions(nativeHandle_, maxSubcompactions); - return this; - } - - @Override - public int maxSubcompactions() { - assert(isOwningHandle()); - return maxSubcompactions(nativeHandle_); - } - - @Override - @Deprecated - public int maxBackgroundFlushes() { - assert(isOwningHandle()); - return maxBackgroundFlushes(nativeHandle_); - } - - @Override - @Deprecated - public Options setMaxBackgroundFlushes( - final int maxBackgroundFlushes) { - assert(isOwningHandle()); - setMaxBackgroundFlushes(nativeHandle_, maxBackgroundFlushes); - return this; - } - - @Override - public int maxBackgroundJobs() { - assert(isOwningHandle()); - return maxBackgroundJobs(nativeHandle_); - } - - @Override - public Options setMaxBackgroundJobs(final int maxBackgroundJobs) { - assert(isOwningHandle()); - setMaxBackgroundJobs(nativeHandle_, maxBackgroundJobs); - return this; - } - - @Override - public long maxLogFileSize() { - assert(isOwningHandle()); - return maxLogFileSize(nativeHandle_); - } - - @Override - public Options setMaxLogFileSize(final long maxLogFileSize) { - assert(isOwningHandle()); - setMaxLogFileSize(nativeHandle_, maxLogFileSize); - return this; - } - - @Override - public long logFileTimeToRoll() { - assert(isOwningHandle()); - return logFileTimeToRoll(nativeHandle_); - } - - @Override - public Options setLogFileTimeToRoll(final long logFileTimeToRoll) { - assert(isOwningHandle()); - setLogFileTimeToRoll(nativeHandle_, logFileTimeToRoll); - return this; - } - - @Override - public long keepLogFileNum() { - assert(isOwningHandle()); - return keepLogFileNum(nativeHandle_); - } - - @Override - public Options setKeepLogFileNum(final long keepLogFileNum) { - assert(isOwningHandle()); - setKeepLogFileNum(nativeHandle_, keepLogFileNum); - return this; - } - - - @Override - public Options setRecycleLogFileNum(final long recycleLogFileNum) { - assert(isOwningHandle()); - setRecycleLogFileNum(nativeHandle_, recycleLogFileNum); - return this; - } - - @Override - public long recycleLogFileNum() { - assert(isOwningHandle()); - return recycleLogFileNum(nativeHandle_); - } - - @Override - public long maxManifestFileSize() { - assert(isOwningHandle()); - return maxManifestFileSize(nativeHandle_); - } - - @Override - public Options setMaxManifestFileSize( - final long maxManifestFileSize) { - assert(isOwningHandle()); - setMaxManifestFileSize(nativeHandle_, maxManifestFileSize); - return this; - } - - @Override - public Options setMaxTableFilesSizeFIFO( - final long maxTableFilesSize) { - assert(maxTableFilesSize > 0); // unsigned native type - assert(isOwningHandle()); - setMaxTableFilesSizeFIFO(nativeHandle_, maxTableFilesSize); - return this; - } - - @Override - public long maxTableFilesSizeFIFO() { - return maxTableFilesSizeFIFO(nativeHandle_); - } - - @Override - public int tableCacheNumshardbits() { - assert(isOwningHandle()); - return tableCacheNumshardbits(nativeHandle_); - } - - @Override - public Options setTableCacheNumshardbits( - final int tableCacheNumshardbits) { - assert(isOwningHandle()); - setTableCacheNumshardbits(nativeHandle_, tableCacheNumshardbits); - return this; - } - - @Override - public long walTtlSeconds() { - assert(isOwningHandle()); - return walTtlSeconds(nativeHandle_); - } - - @Override - public Options setWalTtlSeconds(final long walTtlSeconds) { - assert(isOwningHandle()); - setWalTtlSeconds(nativeHandle_, walTtlSeconds); - return this; - } - - @Override - public long walSizeLimitMB() { - assert(isOwningHandle()); - return walSizeLimitMB(nativeHandle_); - } - - @Override - public Options setMaxWriteBatchGroupSizeBytes(long maxWriteBatchGroupSizeBytes) { - setMaxWriteBatchGroupSizeBytes(nativeHandle_, maxWriteBatchGroupSizeBytes); - return this; - } - - @Override - public long maxWriteBatchGroupSizeBytes() { - assert (isOwningHandle()); - return maxWriteBatchGroupSizeBytes(nativeHandle_); - } - - @Override - public Options setWalSizeLimitMB(final long sizeLimitMB) { - assert(isOwningHandle()); - setWalSizeLimitMB(nativeHandle_, sizeLimitMB); - return this; - } - - @Override - public long manifestPreallocationSize() { - assert(isOwningHandle()); - return manifestPreallocationSize(nativeHandle_); - } - - @Override - public Options setManifestPreallocationSize(final long size) { - assert(isOwningHandle()); - setManifestPreallocationSize(nativeHandle_, size); - return this; - } - - @Override - public Options setUseDirectReads(final boolean useDirectReads) { - assert(isOwningHandle()); - setUseDirectReads(nativeHandle_, useDirectReads); - return this; - } - - @Override - public boolean useDirectReads() { - assert(isOwningHandle()); - return useDirectReads(nativeHandle_); - } - - @Override - public Options setUseDirectIoForFlushAndCompaction( - final boolean useDirectIoForFlushAndCompaction) { - assert(isOwningHandle()); - setUseDirectIoForFlushAndCompaction(nativeHandle_, useDirectIoForFlushAndCompaction); - return this; - } - - @Override - public boolean useDirectIoForFlushAndCompaction() { - assert(isOwningHandle()); - return useDirectIoForFlushAndCompaction(nativeHandle_); - } - - @Override - public Options setAllowFAllocate(final boolean allowFAllocate) { - assert(isOwningHandle()); - setAllowFAllocate(nativeHandle_, allowFAllocate); - return this; - } - - @Override - public boolean allowFAllocate() { - assert(isOwningHandle()); - return allowFAllocate(nativeHandle_); - } - - @Override - public boolean allowMmapReads() { - assert(isOwningHandle()); - return allowMmapReads(nativeHandle_); - } - - @Override - public Options setAllowMmapReads(final boolean allowMmapReads) { - assert(isOwningHandle()); - setAllowMmapReads(nativeHandle_, allowMmapReads); - return this; - } - - @Override - public boolean allowMmapWrites() { - assert(isOwningHandle()); - return allowMmapWrites(nativeHandle_); - } - - @Override - public Options setAllowMmapWrites(final boolean allowMmapWrites) { - assert(isOwningHandle()); - setAllowMmapWrites(nativeHandle_, allowMmapWrites); - return this; - } - - @Override - public boolean isFdCloseOnExec() { - assert(isOwningHandle()); - return isFdCloseOnExec(nativeHandle_); - } - - @Override - public Options setIsFdCloseOnExec(final boolean isFdCloseOnExec) { - assert(isOwningHandle()); - setIsFdCloseOnExec(nativeHandle_, isFdCloseOnExec); - return this; - } - - @Override - public int statsDumpPeriodSec() { - assert(isOwningHandle()); - return statsDumpPeriodSec(nativeHandle_); - } - - @Override - public Options setStatsDumpPeriodSec(final int statsDumpPeriodSec) { - assert(isOwningHandle()); - setStatsDumpPeriodSec(nativeHandle_, statsDumpPeriodSec); - return this; - } - - @Override - public Options setStatsPersistPeriodSec( - final int statsPersistPeriodSec) { - assert(isOwningHandle()); - setStatsPersistPeriodSec(nativeHandle_, statsPersistPeriodSec); - return this; - } - - @Override - public int statsPersistPeriodSec() { - assert(isOwningHandle()); - return statsPersistPeriodSec(nativeHandle_); - } - - @Override - public Options setStatsHistoryBufferSize( - final long statsHistoryBufferSize) { - assert(isOwningHandle()); - setStatsHistoryBufferSize(nativeHandle_, statsHistoryBufferSize); - return this; - } - - @Override - public long statsHistoryBufferSize() { - assert(isOwningHandle()); - return statsHistoryBufferSize(nativeHandle_); - } - - @Override - public boolean adviseRandomOnOpen() { - return adviseRandomOnOpen(nativeHandle_); - } - - @Override - public Options setAdviseRandomOnOpen(final boolean adviseRandomOnOpen) { - assert(isOwningHandle()); - setAdviseRandomOnOpen(nativeHandle_, adviseRandomOnOpen); - return this; - } - - @Override - public Options setDbWriteBufferSize(final long dbWriteBufferSize) { - assert(isOwningHandle()); - setDbWriteBufferSize(nativeHandle_, dbWriteBufferSize); - return this; - } - - @Override - public Options setWriteBufferManager(final WriteBufferManager writeBufferManager) { - assert(isOwningHandle()); - setWriteBufferManager(nativeHandle_, writeBufferManager.nativeHandle_); - this.writeBufferManager_ = writeBufferManager; - return this; - } - - @Override - public WriteBufferManager writeBufferManager() { - assert(isOwningHandle()); - return this.writeBufferManager_; - } - - @Override - public long dbWriteBufferSize() { - assert(isOwningHandle()); - return dbWriteBufferSize(nativeHandle_); - } - - @Override - public Options setAccessHintOnCompactionStart(final AccessHint accessHint) { - assert(isOwningHandle()); - setAccessHintOnCompactionStart(nativeHandle_, accessHint.getValue()); - return this; - } - - @Override - public AccessHint accessHintOnCompactionStart() { - assert(isOwningHandle()); - return AccessHint.getAccessHint(accessHintOnCompactionStart(nativeHandle_)); - } - - @Override - public Options setCompactionReadaheadSize(final long compactionReadaheadSize) { - assert(isOwningHandle()); - setCompactionReadaheadSize(nativeHandle_, compactionReadaheadSize); - return this; - } - - @Override - public long compactionReadaheadSize() { - assert(isOwningHandle()); - return compactionReadaheadSize(nativeHandle_); - } - - @Override - public Options setRandomAccessMaxBufferSize(final long randomAccessMaxBufferSize) { - assert(isOwningHandle()); - setRandomAccessMaxBufferSize(nativeHandle_, randomAccessMaxBufferSize); - return this; - } - - @Override - public long randomAccessMaxBufferSize() { - assert(isOwningHandle()); - return randomAccessMaxBufferSize(nativeHandle_); - } - - @Override - public Options setWritableFileMaxBufferSize(final long writableFileMaxBufferSize) { - assert(isOwningHandle()); - setWritableFileMaxBufferSize(nativeHandle_, writableFileMaxBufferSize); - return this; - } - - @Override - public long writableFileMaxBufferSize() { - assert(isOwningHandle()); - return writableFileMaxBufferSize(nativeHandle_); - } - - @Override - public boolean useAdaptiveMutex() { - assert(isOwningHandle()); - return useAdaptiveMutex(nativeHandle_); - } - - @Override - public Options setUseAdaptiveMutex(final boolean useAdaptiveMutex) { - assert(isOwningHandle()); - setUseAdaptiveMutex(nativeHandle_, useAdaptiveMutex); - return this; - } - - @Override - public long bytesPerSync() { - return bytesPerSync(nativeHandle_); - } - - @Override - public Options setBytesPerSync(final long bytesPerSync) { - assert(isOwningHandle()); - setBytesPerSync(nativeHandle_, bytesPerSync); - return this; - } - - @Override - public Options setWalBytesPerSync(final long walBytesPerSync) { - assert(isOwningHandle()); - setWalBytesPerSync(nativeHandle_, walBytesPerSync); - return this; - } - - @Override - public long walBytesPerSync() { - assert(isOwningHandle()); - return walBytesPerSync(nativeHandle_); - } - - @Override - public Options setStrictBytesPerSync(final boolean strictBytesPerSync) { - assert(isOwningHandle()); - setStrictBytesPerSync(nativeHandle_, strictBytesPerSync); - return this; - } - - @Override - public boolean strictBytesPerSync() { - assert(isOwningHandle()); - return strictBytesPerSync(nativeHandle_); - } - - @Override - public Options setListeners(final List listeners) { - assert (isOwningHandle()); - setEventListeners(nativeHandle_, RocksCallbackObject.toNativeHandleList(listeners)); - return this; - } - - @Override - public List listeners() { - assert (isOwningHandle()); - return Arrays.asList(eventListeners(nativeHandle_)); - } - - @Override - public Options setEnableThreadTracking(final boolean enableThreadTracking) { - assert(isOwningHandle()); - setEnableThreadTracking(nativeHandle_, enableThreadTracking); - return this; - } - - @Override - public boolean enableThreadTracking() { - assert(isOwningHandle()); - return enableThreadTracking(nativeHandle_); - } - - @Override - public Options setDelayedWriteRate(final long delayedWriteRate) { - assert(isOwningHandle()); - setDelayedWriteRate(nativeHandle_, delayedWriteRate); - return this; - } - - @Override - public long delayedWriteRate(){ - return delayedWriteRate(nativeHandle_); - } - - @Override - public Options setEnablePipelinedWrite(final boolean enablePipelinedWrite) { - setEnablePipelinedWrite(nativeHandle_, enablePipelinedWrite); - return this; - } - - @Override - public boolean enablePipelinedWrite() { - return enablePipelinedWrite(nativeHandle_); - } - - @Override - public Options setUnorderedWrite(final boolean unorderedWrite) { - setUnorderedWrite(nativeHandle_, unorderedWrite); - return this; - } - - @Override - public boolean unorderedWrite() { - return unorderedWrite(nativeHandle_); - } - - @Override - public Options setAllowConcurrentMemtableWrite( - final boolean allowConcurrentMemtableWrite) { - setAllowConcurrentMemtableWrite(nativeHandle_, - allowConcurrentMemtableWrite); - return this; - } - - @Override - public boolean allowConcurrentMemtableWrite() { - return allowConcurrentMemtableWrite(nativeHandle_); - } - - @Override - public Options setEnableWriteThreadAdaptiveYield( - final boolean enableWriteThreadAdaptiveYield) { - setEnableWriteThreadAdaptiveYield(nativeHandle_, - enableWriteThreadAdaptiveYield); - return this; - } - - @Override - public boolean enableWriteThreadAdaptiveYield() { - return enableWriteThreadAdaptiveYield(nativeHandle_); - } - - @Override - public Options setWriteThreadMaxYieldUsec(final long writeThreadMaxYieldUsec) { - setWriteThreadMaxYieldUsec(nativeHandle_, writeThreadMaxYieldUsec); - return this; - } - - @Override - public long writeThreadMaxYieldUsec() { - return writeThreadMaxYieldUsec(nativeHandle_); - } - - @Override - public Options setWriteThreadSlowYieldUsec(final long writeThreadSlowYieldUsec) { - setWriteThreadSlowYieldUsec(nativeHandle_, writeThreadSlowYieldUsec); - return this; - } - - @Override - public long writeThreadSlowYieldUsec() { - return writeThreadSlowYieldUsec(nativeHandle_); - } - - @Override - public Options setSkipStatsUpdateOnDbOpen(final boolean skipStatsUpdateOnDbOpen) { - assert(isOwningHandle()); - setSkipStatsUpdateOnDbOpen(nativeHandle_, skipStatsUpdateOnDbOpen); - return this; - } - - @Override - public boolean skipStatsUpdateOnDbOpen() { - assert(isOwningHandle()); - return skipStatsUpdateOnDbOpen(nativeHandle_); - } - - @Override - public Options setSkipCheckingSstFileSizesOnDbOpen(boolean skipCheckingSstFileSizesOnDbOpen) { - setSkipCheckingSstFileSizesOnDbOpen(nativeHandle_, skipCheckingSstFileSizesOnDbOpen); - return this; - } - - @Override - public boolean skipCheckingSstFileSizesOnDbOpen() { - assert (isOwningHandle()); - return skipCheckingSstFileSizesOnDbOpen(nativeHandle_); - } - - @Override - public Options setWalRecoveryMode(final WALRecoveryMode walRecoveryMode) { - assert(isOwningHandle()); - setWalRecoveryMode(nativeHandle_, walRecoveryMode.getValue()); - return this; - } - - @Override - public WALRecoveryMode walRecoveryMode() { - assert(isOwningHandle()); - return WALRecoveryMode.getWALRecoveryMode(walRecoveryMode(nativeHandle_)); - } - - @Override - public Options setAllow2pc(final boolean allow2pc) { - assert(isOwningHandle()); - setAllow2pc(nativeHandle_, allow2pc); - return this; - } - - @Override - public boolean allow2pc() { - assert(isOwningHandle()); - return allow2pc(nativeHandle_); - } - - @Override - public Options setRowCache(final Cache rowCache) { - assert(isOwningHandle()); - setRowCache(nativeHandle_, rowCache.nativeHandle_); - this.rowCache_ = rowCache; - return this; - } - - @Override - public Cache rowCache() { - assert(isOwningHandle()); - return this.rowCache_; - } - - @Override - public Options setWalFilter(final AbstractWalFilter walFilter) { - assert(isOwningHandle()); - setWalFilter(nativeHandle_, walFilter.nativeHandle_); - this.walFilter_ = walFilter; - return this; - } - - @Override - public WalFilter walFilter() { - assert(isOwningHandle()); - return this.walFilter_; - } - - @Override - public Options setFailIfOptionsFileError(final boolean failIfOptionsFileError) { - assert(isOwningHandle()); - setFailIfOptionsFileError(nativeHandle_, failIfOptionsFileError); - return this; - } - - @Override - public boolean failIfOptionsFileError() { - assert(isOwningHandle()); - return failIfOptionsFileError(nativeHandle_); - } - - @Override - public Options setDumpMallocStats(final boolean dumpMallocStats) { - assert(isOwningHandle()); - setDumpMallocStats(nativeHandle_, dumpMallocStats); - return this; - } - - @Override - public boolean dumpMallocStats() { - assert(isOwningHandle()); - return dumpMallocStats(nativeHandle_); - } - - @Override - public Options setAvoidFlushDuringRecovery(final boolean avoidFlushDuringRecovery) { - assert(isOwningHandle()); - setAvoidFlushDuringRecovery(nativeHandle_, avoidFlushDuringRecovery); - return this; - } - - @Override - public boolean avoidFlushDuringRecovery() { - assert(isOwningHandle()); - return avoidFlushDuringRecovery(nativeHandle_); - } - - @Override - public Options setAvoidFlushDuringShutdown(final boolean avoidFlushDuringShutdown) { - assert(isOwningHandle()); - setAvoidFlushDuringShutdown(nativeHandle_, avoidFlushDuringShutdown); - return this; - } - - @Override - public boolean avoidFlushDuringShutdown() { - assert(isOwningHandle()); - return avoidFlushDuringShutdown(nativeHandle_); - } - - @Override - public Options setAllowIngestBehind(final boolean allowIngestBehind) { - assert(isOwningHandle()); - setAllowIngestBehind(nativeHandle_, allowIngestBehind); - return this; - } - - @Override - public boolean allowIngestBehind() { - assert(isOwningHandle()); - return allowIngestBehind(nativeHandle_); - } - - @Override - public Options setTwoWriteQueues(final boolean twoWriteQueues) { - assert(isOwningHandle()); - setTwoWriteQueues(nativeHandle_, twoWriteQueues); - return this; - } - - @Override - public boolean twoWriteQueues() { - assert(isOwningHandle()); - return twoWriteQueues(nativeHandle_); - } - - @Override - public Options setManualWalFlush(final boolean manualWalFlush) { - assert(isOwningHandle()); - setManualWalFlush(nativeHandle_, manualWalFlush); - return this; - } - - @Override - public boolean manualWalFlush() { - assert(isOwningHandle()); - return manualWalFlush(nativeHandle_); - } - - @Override - public MemTableConfig memTableConfig() { - return this.memTableConfig_; - } - - @Override - public Options setMemTableConfig(final MemTableConfig config) { - memTableConfig_ = config; - setMemTableFactory(nativeHandle_, config.newMemTableFactoryHandle()); - return this; - } - - @Override - public Options setRateLimiter(final RateLimiter rateLimiter) { - assert(isOwningHandle()); - rateLimiter_ = rateLimiter; - setRateLimiter(nativeHandle_, rateLimiter.nativeHandle_); - return this; - } - - @Override - public Options setSstFileManager(final SstFileManager sstFileManager) { - assert(isOwningHandle()); - setSstFileManager(nativeHandle_, sstFileManager.nativeHandle_); - return this; - } - - @Override - public Options setLogger(final Logger logger) { - assert(isOwningHandle()); - setLogger(nativeHandle_, logger.nativeHandle_); - return this; - } - - @Override - public Options setInfoLogLevel(final InfoLogLevel infoLogLevel) { - assert(isOwningHandle()); - setInfoLogLevel(nativeHandle_, infoLogLevel.getValue()); - return this; - } - - @Override - public InfoLogLevel infoLogLevel() { - assert(isOwningHandle()); - return InfoLogLevel.getInfoLogLevel( - infoLogLevel(nativeHandle_)); - } - - @Override - public String memTableFactoryName() { - assert(isOwningHandle()); - return memTableFactoryName(nativeHandle_); - } - - @Override - public TableFormatConfig tableFormatConfig() { - return this.tableFormatConfig_; - } - - @Override - public Options setTableFormatConfig(final TableFormatConfig config) { - tableFormatConfig_ = config; - setTableFactory(nativeHandle_, config.newTableFactoryHandle()); - return this; - } - - @Override - public String tableFactoryName() { - assert(isOwningHandle()); - return tableFactoryName(nativeHandle_); - } - - @Override - public Options setCfPaths(final Collection cfPaths) { - assert (isOwningHandle()); - - final int len = cfPaths.size(); - final String[] paths = new String[len]; - final long[] targetSizes = new long[len]; - - int i = 0; - for (final DbPath dbPath : cfPaths) { - paths[i] = dbPath.path.toString(); - targetSizes[i] = dbPath.targetSize; - i++; - } - setCfPaths(nativeHandle_, paths, targetSizes); - return this; - } - - @Override - public List cfPaths() { - final int len = (int) cfPathsLen(nativeHandle_); - - if (len == 0) { - return Collections.emptyList(); - } - - final String[] paths = new String[len]; - final long[] targetSizes = new long[len]; - - cfPaths(nativeHandle_, paths, targetSizes); - - final List cfPaths = new ArrayList<>(); - for (int i = 0; i < len; i++) { - cfPaths.add(new DbPath(Paths.get(paths[i]), targetSizes[i])); - } - - return cfPaths; - } - - @Override - public Options useFixedLengthPrefixExtractor(final int n) { - assert(isOwningHandle()); - useFixedLengthPrefixExtractor(nativeHandle_, n); - return this; - } - - @Override - public Options useCappedPrefixExtractor(final int n) { - assert(isOwningHandle()); - useCappedPrefixExtractor(nativeHandle_, n); - return this; - } - - @Override - public CompressionType compressionType() { - return CompressionType.getCompressionType(compressionType(nativeHandle_)); - } - - @Override - public Options setCompressionPerLevel( - final List compressionLevels) { - final byte[] byteCompressionTypes = new byte[ - compressionLevels.size()]; - for (int i = 0; i < compressionLevels.size(); i++) { - byteCompressionTypes[i] = compressionLevels.get(i).getValue(); - } - setCompressionPerLevel(nativeHandle_, byteCompressionTypes); - return this; - } - - @Override - public List compressionPerLevel() { - final byte[] byteCompressionTypes = - compressionPerLevel(nativeHandle_); - final List compressionLevels = new ArrayList<>(); - for (final byte byteCompressionType : byteCompressionTypes) { - compressionLevels.add(CompressionType.getCompressionType( - byteCompressionType)); - } - return compressionLevels; - } - - @Override - public Options setCompressionType(CompressionType compressionType) { - setCompressionType(nativeHandle_, compressionType.getValue()); - return this; - } - - - @Override - public Options setBottommostCompressionType( - final CompressionType bottommostCompressionType) { - setBottommostCompressionType(nativeHandle_, - bottommostCompressionType.getValue()); - return this; - } - - @Override - public CompressionType bottommostCompressionType() { - return CompressionType.getCompressionType( - bottommostCompressionType(nativeHandle_)); - } - - @Override - public Options setBottommostCompressionOptions( - final CompressionOptions bottommostCompressionOptions) { - setBottommostCompressionOptions(nativeHandle_, - bottommostCompressionOptions.nativeHandle_); - this.bottommostCompressionOptions_ = bottommostCompressionOptions; - return this; - } - - @Override - public CompressionOptions bottommostCompressionOptions() { - return this.bottommostCompressionOptions_; - } - - @Override - public Options setCompressionOptions( - final CompressionOptions compressionOptions) { - setCompressionOptions(nativeHandle_, compressionOptions.nativeHandle_); - this.compressionOptions_ = compressionOptions; - return this; - } - - @Override - public CompressionOptions compressionOptions() { - return this.compressionOptions_; - } - - @Override - public CompactionStyle compactionStyle() { - return CompactionStyle.fromValue(compactionStyle(nativeHandle_)); - } - - @Override - public Options setCompactionStyle( - final CompactionStyle compactionStyle) { - setCompactionStyle(nativeHandle_, compactionStyle.getValue()); - return this; - } - - @Override - public int numLevels() { - return numLevels(nativeHandle_); - } - - @Override - public Options setNumLevels(int numLevels) { - setNumLevels(nativeHandle_, numLevels); - return this; - } - - @Override - public int levelZeroFileNumCompactionTrigger() { - return levelZeroFileNumCompactionTrigger(nativeHandle_); - } - - @Override - public Options setLevelZeroFileNumCompactionTrigger( - final int numFiles) { - setLevelZeroFileNumCompactionTrigger( - nativeHandle_, numFiles); - return this; - } - - @Override - public int levelZeroSlowdownWritesTrigger() { - return levelZeroSlowdownWritesTrigger(nativeHandle_); - } - - @Override - public Options setLevelZeroSlowdownWritesTrigger( - final int numFiles) { - setLevelZeroSlowdownWritesTrigger(nativeHandle_, numFiles); - return this; - } - - @Override - public int levelZeroStopWritesTrigger() { - return levelZeroStopWritesTrigger(nativeHandle_); - } - - @Override - public Options setLevelZeroStopWritesTrigger( - final int numFiles) { - setLevelZeroStopWritesTrigger(nativeHandle_, numFiles); - return this; - } - - @Override - public long targetFileSizeBase() { - return targetFileSizeBase(nativeHandle_); - } - - @Override - public Options setTargetFileSizeBase(long targetFileSizeBase) { - setTargetFileSizeBase(nativeHandle_, targetFileSizeBase); - return this; - } - - @Override - public int targetFileSizeMultiplier() { - return targetFileSizeMultiplier(nativeHandle_); - } - - @Override - public Options setTargetFileSizeMultiplier(int multiplier) { - setTargetFileSizeMultiplier(nativeHandle_, multiplier); - return this; - } - - @Override - public Options setMaxBytesForLevelBase(final long maxBytesForLevelBase) { - setMaxBytesForLevelBase(nativeHandle_, maxBytesForLevelBase); - return this; - } - - @Override - public long maxBytesForLevelBase() { - return maxBytesForLevelBase(nativeHandle_); - } - - @Override - public Options setLevelCompactionDynamicLevelBytes( - final boolean enableLevelCompactionDynamicLevelBytes) { - setLevelCompactionDynamicLevelBytes(nativeHandle_, - enableLevelCompactionDynamicLevelBytes); - return this; - } - - @Override - public boolean levelCompactionDynamicLevelBytes() { - return levelCompactionDynamicLevelBytes(nativeHandle_); - } - - @Override - public double maxBytesForLevelMultiplier() { - return maxBytesForLevelMultiplier(nativeHandle_); - } - - @Override - public Options setMaxBytesForLevelMultiplier(final double multiplier) { - setMaxBytesForLevelMultiplier(nativeHandle_, multiplier); - return this; - } - - @Override - public long maxCompactionBytes() { - return maxCompactionBytes(nativeHandle_); - } - - @Override - public Options setMaxCompactionBytes(final long maxCompactionBytes) { - setMaxCompactionBytes(nativeHandle_, maxCompactionBytes); - return this; - } - - @Override - public long arenaBlockSize() { - return arenaBlockSize(nativeHandle_); - } - - @Override - public Options setArenaBlockSize(final long arenaBlockSize) { - setArenaBlockSize(nativeHandle_, arenaBlockSize); - return this; - } - - @Override - public boolean disableAutoCompactions() { - return disableAutoCompactions(nativeHandle_); - } - - @Override - public Options setDisableAutoCompactions( - final boolean disableAutoCompactions) { - setDisableAutoCompactions(nativeHandle_, disableAutoCompactions); - return this; - } - - @Override - public long maxSequentialSkipInIterations() { - return maxSequentialSkipInIterations(nativeHandle_); - } - - @Override - public Options setMaxSequentialSkipInIterations( - final long maxSequentialSkipInIterations) { - setMaxSequentialSkipInIterations(nativeHandle_, - maxSequentialSkipInIterations); - return this; - } - - @Override - public boolean inplaceUpdateSupport() { - return inplaceUpdateSupport(nativeHandle_); - } - - @Override - public Options setInplaceUpdateSupport( - final boolean inplaceUpdateSupport) { - setInplaceUpdateSupport(nativeHandle_, inplaceUpdateSupport); - return this; - } - - @Override - public long inplaceUpdateNumLocks() { - return inplaceUpdateNumLocks(nativeHandle_); - } - - @Override - public Options setInplaceUpdateNumLocks( - final long inplaceUpdateNumLocks) { - setInplaceUpdateNumLocks(nativeHandle_, inplaceUpdateNumLocks); - return this; - } - - @Override - public double memtablePrefixBloomSizeRatio() { - return memtablePrefixBloomSizeRatio(nativeHandle_); - } - - @Override - public Options setMemtablePrefixBloomSizeRatio(final double memtablePrefixBloomSizeRatio) { - setMemtablePrefixBloomSizeRatio(nativeHandle_, memtablePrefixBloomSizeRatio); - return this; - } - - @Override - public double experimentalMempurgeThreshold() { - return experimentalMempurgeThreshold(nativeHandle_); - } - - @Override - public Options setExperimentalMempurgeThreshold(final double experimentalMempurgeThreshold) { - setExperimentalMempurgeThreshold(nativeHandle_, experimentalMempurgeThreshold); - return this; - } - - @Override - public boolean memtableWholeKeyFiltering() { - return memtableWholeKeyFiltering(nativeHandle_); - } - - @Override - public Options setMemtableWholeKeyFiltering(final boolean memtableWholeKeyFiltering) { - setMemtableWholeKeyFiltering(nativeHandle_, memtableWholeKeyFiltering); - return this; - } - - @Override - public int bloomLocality() { - return bloomLocality(nativeHandle_); - } - - @Override - public Options setBloomLocality(final int bloomLocality) { - setBloomLocality(nativeHandle_, bloomLocality); - return this; - } - - @Override - public long maxSuccessiveMerges() { - return maxSuccessiveMerges(nativeHandle_); - } - - @Override - public Options setMaxSuccessiveMerges(long maxSuccessiveMerges) { - setMaxSuccessiveMerges(nativeHandle_, maxSuccessiveMerges); - return this; - } - - @Override - public int minWriteBufferNumberToMerge() { - return minWriteBufferNumberToMerge(nativeHandle_); - } - - @Override - public Options setMinWriteBufferNumberToMerge( - final int minWriteBufferNumberToMerge) { - setMinWriteBufferNumberToMerge(nativeHandle_, minWriteBufferNumberToMerge); - return this; - } - - @Override - public Options setOptimizeFiltersForHits( - final boolean optimizeFiltersForHits) { - setOptimizeFiltersForHits(nativeHandle_, optimizeFiltersForHits); - return this; - } - - @Override - public boolean optimizeFiltersForHits() { - return optimizeFiltersForHits(nativeHandle_); - } - - @Override - public Options - setMemtableHugePageSize( - long memtableHugePageSize) { - setMemtableHugePageSize(nativeHandle_, - memtableHugePageSize); - return this; - } - - @Override - public long memtableHugePageSize() { - return memtableHugePageSize(nativeHandle_); - } - - @Override - public Options setSoftPendingCompactionBytesLimit(long softPendingCompactionBytesLimit) { - setSoftPendingCompactionBytesLimit(nativeHandle_, - softPendingCompactionBytesLimit); - return this; - } - - @Override - public long softPendingCompactionBytesLimit() { - return softPendingCompactionBytesLimit(nativeHandle_); - } - - @Override - public Options setHardPendingCompactionBytesLimit(long hardPendingCompactionBytesLimit) { - setHardPendingCompactionBytesLimit(nativeHandle_, hardPendingCompactionBytesLimit); - return this; - } - - @Override - public long hardPendingCompactionBytesLimit() { - return hardPendingCompactionBytesLimit(nativeHandle_); - } - - @Override - public Options setLevel0FileNumCompactionTrigger(int level0FileNumCompactionTrigger) { - setLevel0FileNumCompactionTrigger(nativeHandle_, level0FileNumCompactionTrigger); - return this; - } - - @Override - public int level0FileNumCompactionTrigger() { - return level0FileNumCompactionTrigger(nativeHandle_); - } - - @Override - public Options setLevel0SlowdownWritesTrigger(int level0SlowdownWritesTrigger) { - setLevel0SlowdownWritesTrigger(nativeHandle_, level0SlowdownWritesTrigger); - return this; - } - - @Override - public int level0SlowdownWritesTrigger() { - return level0SlowdownWritesTrigger(nativeHandle_); - } - - @Override - public Options setLevel0StopWritesTrigger(int level0StopWritesTrigger) { - setLevel0StopWritesTrigger(nativeHandle_, level0StopWritesTrigger); - return this; - } - - @Override - public int level0StopWritesTrigger() { - return level0StopWritesTrigger(nativeHandle_); - } - - @Override - public Options setMaxBytesForLevelMultiplierAdditional(int[] maxBytesForLevelMultiplierAdditional) { - setMaxBytesForLevelMultiplierAdditional(nativeHandle_, maxBytesForLevelMultiplierAdditional); - return this; - } - - @Override - public int[] maxBytesForLevelMultiplierAdditional() { - return maxBytesForLevelMultiplierAdditional(nativeHandle_); - } - - @Override - public Options setParanoidFileChecks(boolean paranoidFileChecks) { - setParanoidFileChecks(nativeHandle_, paranoidFileChecks); - return this; - } - - @Override - public boolean paranoidFileChecks() { - return paranoidFileChecks(nativeHandle_); - } - - @Override - public Options setMaxWriteBufferNumberToMaintain( - final int maxWriteBufferNumberToMaintain) { - setMaxWriteBufferNumberToMaintain( - nativeHandle_, maxWriteBufferNumberToMaintain); - return this; - } - - @Override - public int maxWriteBufferNumberToMaintain() { - return maxWriteBufferNumberToMaintain(nativeHandle_); - } - - @Override - public Options setCompactionPriority( - final CompactionPriority compactionPriority) { - setCompactionPriority(nativeHandle_, compactionPriority.getValue()); - return this; - } - - @Override - public CompactionPriority compactionPriority() { - return CompactionPriority.getCompactionPriority( - compactionPriority(nativeHandle_)); - } - - @Override - public Options setReportBgIoStats(final boolean reportBgIoStats) { - setReportBgIoStats(nativeHandle_, reportBgIoStats); - return this; - } - - @Override - public boolean reportBgIoStats() { - return reportBgIoStats(nativeHandle_); - } - - @Override - public Options setTtl(final long ttl) { - setTtl(nativeHandle_, ttl); - return this; - } - - @Override - public long ttl() { - return ttl(nativeHandle_); - } - - @Override - public Options setPeriodicCompactionSeconds(final long periodicCompactionSeconds) { - setPeriodicCompactionSeconds(nativeHandle_, periodicCompactionSeconds); - return this; - } - - @Override - public long periodicCompactionSeconds() { - return periodicCompactionSeconds(nativeHandle_); - } - - @Override - public Options setCompactionOptionsUniversal( - final CompactionOptionsUniversal compactionOptionsUniversal) { - setCompactionOptionsUniversal(nativeHandle_, - compactionOptionsUniversal.nativeHandle_); - this.compactionOptionsUniversal_ = compactionOptionsUniversal; - return this; - } - - @Override - public CompactionOptionsUniversal compactionOptionsUniversal() { - return this.compactionOptionsUniversal_; - } - - @Override - public Options setCompactionOptionsFIFO(final CompactionOptionsFIFO compactionOptionsFIFO) { - setCompactionOptionsFIFO(nativeHandle_, - compactionOptionsFIFO.nativeHandle_); - this.compactionOptionsFIFO_ = compactionOptionsFIFO; - return this; - } - - @Override - public CompactionOptionsFIFO compactionOptionsFIFO() { - return this.compactionOptionsFIFO_; - } - - @Override - public Options setForceConsistencyChecks(final boolean forceConsistencyChecks) { - setForceConsistencyChecks(nativeHandle_, forceConsistencyChecks); - return this; - } - - @Override - public boolean forceConsistencyChecks() { - return forceConsistencyChecks(nativeHandle_); - } - - @Override - public Options setAtomicFlush(final boolean atomicFlush) { - setAtomicFlush(nativeHandle_, atomicFlush); - return this; - } - - @Override - public boolean atomicFlush() { - return atomicFlush(nativeHandle_); - } - - @Override - public Options setAvoidUnnecessaryBlockingIO(boolean avoidUnnecessaryBlockingIO) { - setAvoidUnnecessaryBlockingIO(nativeHandle_, avoidUnnecessaryBlockingIO); - return this; - } - - @Override - public boolean avoidUnnecessaryBlockingIO() { - assert (isOwningHandle()); - return avoidUnnecessaryBlockingIO(nativeHandle_); - } - - @Override - public Options setPersistStatsToDisk(boolean persistStatsToDisk) { - setPersistStatsToDisk(nativeHandle_, persistStatsToDisk); - return this; - } - - @Override - public boolean persistStatsToDisk() { - assert (isOwningHandle()); - return persistStatsToDisk(nativeHandle_); - } - - @Override - public Options setWriteDbidToManifest(boolean writeDbidToManifest) { - setWriteDbidToManifest(nativeHandle_, writeDbidToManifest); - return this; - } - - @Override - public boolean writeDbidToManifest() { - assert (isOwningHandle()); - return writeDbidToManifest(nativeHandle_); - } - - @Override - public Options setLogReadaheadSize(long logReadaheadSize) { - setLogReadaheadSize(nativeHandle_, logReadaheadSize); - return this; - } - - @Override - public long logReadaheadSize() { - assert (isOwningHandle()); - return logReadaheadSize(nativeHandle_); - } - - @Override - public Options setBestEffortsRecovery(boolean bestEffortsRecovery) { - setBestEffortsRecovery(nativeHandle_, bestEffortsRecovery); - return this; - } - - @Override - public boolean bestEffortsRecovery() { - assert (isOwningHandle()); - return bestEffortsRecovery(nativeHandle_); - } - - @Override - public Options setMaxBgErrorResumeCount(int maxBgerrorResumeCount) { - setMaxBgErrorResumeCount(nativeHandle_, maxBgerrorResumeCount); - return this; - } - - @Override - public int maxBgerrorResumeCount() { - assert (isOwningHandle()); - return maxBgerrorResumeCount(nativeHandle_); - } - - @Override - public Options setBgerrorResumeRetryInterval(long bgerrorResumeRetryInterval) { - setBgerrorResumeRetryInterval(nativeHandle_, bgerrorResumeRetryInterval); - return this; - } - - @Override - public long bgerrorResumeRetryInterval() { - assert (isOwningHandle()); - return bgerrorResumeRetryInterval(nativeHandle_); - } - - @Override - public Options setSstPartitionerFactory(SstPartitionerFactory sstPartitionerFactory) { - setSstPartitionerFactory(nativeHandle_, sstPartitionerFactory.nativeHandle_); - this.sstPartitionerFactory_ = sstPartitionerFactory; - return this; - } - - @Override - public SstPartitionerFactory sstPartitionerFactory() { - return sstPartitionerFactory_; - } - - @Override - public Options setCompactionThreadLimiter(final ConcurrentTaskLimiter compactionThreadLimiter) { - setCompactionThreadLimiter(nativeHandle_, compactionThreadLimiter.nativeHandle_); - this.compactionThreadLimiter_ = compactionThreadLimiter; - return this; - } - - @Override - public ConcurrentTaskLimiter compactionThreadLimiter() { - assert (isOwningHandle()); - return this.compactionThreadLimiter_; - } - - // - // BEGIN options for blobs (integrated BlobDB) - // - - @Override - public Options setEnableBlobFiles(final boolean enableBlobFiles) { - setEnableBlobFiles(nativeHandle_, enableBlobFiles); - return this; - } - - @Override - public boolean enableBlobFiles() { - return enableBlobFiles(nativeHandle_); - } - - @Override - public Options setMinBlobSize(final long minBlobSize) { - setMinBlobSize(nativeHandle_, minBlobSize); - return this; - } - - @Override - public long minBlobSize() { - return minBlobSize(nativeHandle_); - } - - @Override - public Options setBlobFileSize(final long blobFileSize) { - setBlobFileSize(nativeHandle_, blobFileSize); - return this; - } - - @Override - public long blobFileSize() { - return blobFileSize(nativeHandle_); - } - - @Override - public Options setBlobCompressionType(CompressionType compressionType) { - setBlobCompressionType(nativeHandle_, compressionType.getValue()); - return this; - } - - @Override - public CompressionType blobCompressionType() { - return CompressionType.values()[blobCompressionType(nativeHandle_)]; - } - - @Override - public Options setEnableBlobGarbageCollection(final boolean enableBlobGarbageCollection) { - setEnableBlobGarbageCollection(nativeHandle_, enableBlobGarbageCollection); - return this; - } - - @Override - public boolean enableBlobGarbageCollection() { - return enableBlobGarbageCollection(nativeHandle_); - } - - @Override - public Options setBlobGarbageCollectionAgeCutoff(final double blobGarbageCollectionAgeCutoff) { - setBlobGarbageCollectionAgeCutoff(nativeHandle_, blobGarbageCollectionAgeCutoff); - return this; - } - - @Override - public double blobGarbageCollectionAgeCutoff() { - return blobGarbageCollectionAgeCutoff(nativeHandle_); - } - - @Override - public Options setBlobGarbageCollectionForceThreshold( - final double blobGarbageCollectionForceThreshold) { - setBlobGarbageCollectionForceThreshold(nativeHandle_, blobGarbageCollectionForceThreshold); - return this; - } - - @Override - public double blobGarbageCollectionForceThreshold() { - return blobGarbageCollectionForceThreshold(nativeHandle_); - } - - @Override - public Options setBlobCompactionReadaheadSize(final long blobCompactionReadaheadSize) { - setBlobCompactionReadaheadSize(nativeHandle_, blobCompactionReadaheadSize); - return this; - } - - @Override - public long blobCompactionReadaheadSize() { - return blobCompactionReadaheadSize(nativeHandle_); - } - - @Override - public Options setBlobFileStartingLevel(final int blobFileStartingLevel) { - setBlobFileStartingLevel(nativeHandle_, blobFileStartingLevel); - return this; - } - - @Override - public int blobFileStartingLevel() { - return blobFileStartingLevel(nativeHandle_); - } - - @Override - public Options setPrepopulateBlobCache(final PrepopulateBlobCache prepopulateBlobCache) { - setPrepopulateBlobCache(nativeHandle_, prepopulateBlobCache.getValue()); - return this; - } - - @Override - public PrepopulateBlobCache prepopulateBlobCache() { - return PrepopulateBlobCache.getPrepopulateBlobCache(prepopulateBlobCache(nativeHandle_)); - } - - // - // END options for blobs (integrated BlobDB) - // - - private native static long newOptions(); - private native static long newOptions(long dbOptHandle, - long cfOptHandle); - private native static long copyOptions(long handle); - @Override protected final native void disposeInternal(final long handle); - private native void setEnv(long optHandle, long envHandle); - private native void prepareForBulkLoad(long handle); - - // DB native handles - private native void setIncreaseParallelism(long handle, int totalThreads); - private native void setCreateIfMissing(long handle, boolean flag); - private native boolean createIfMissing(long handle); - private native void setCreateMissingColumnFamilies( - long handle, boolean flag); - private native boolean createMissingColumnFamilies(long handle); - private native void setErrorIfExists(long handle, boolean errorIfExists); - private native boolean errorIfExists(long handle); - private native void setParanoidChecks( - long handle, boolean paranoidChecks); - private native boolean paranoidChecks(long handle); - private native void setRateLimiter(long handle, - long rateLimiterHandle); - private native void setSstFileManager(final long handle, - final long sstFileManagerHandle); - private native void setLogger(long handle, - long loggerHandle); - private native void setInfoLogLevel(long handle, byte logLevel); - private native byte infoLogLevel(long handle); - private native void setMaxOpenFiles(long handle, int maxOpenFiles); - private native int maxOpenFiles(long handle); - private native void setMaxTotalWalSize(long handle, - long maxTotalWalSize); - private native void setMaxFileOpeningThreads(final long handle, - final int maxFileOpeningThreads); - private native int maxFileOpeningThreads(final long handle); - private native long maxTotalWalSize(long handle); - private native void setStatistics(final long handle, final long statisticsHandle); - private native long statistics(final long handle); - private native boolean useFsync(long handle); - private native void setUseFsync(long handle, boolean useFsync); - private native void setDbPaths(final long handle, final String[] paths, - final long[] targetSizes); - private native long dbPathsLen(final long handle); - private native void dbPaths(final long handle, final String[] paths, - final long[] targetSizes); - private native void setDbLogDir(long handle, String dbLogDir); - private native String dbLogDir(long handle); - private native void setWalDir(long handle, String walDir); - private native String walDir(long handle); - private native void setDeleteObsoleteFilesPeriodMicros( - long handle, long micros); - private native long deleteObsoleteFilesPeriodMicros(long handle); - private native void setMaxBackgroundCompactions( - long handle, int maxBackgroundCompactions); - private native int maxBackgroundCompactions(long handle); - private native void setMaxSubcompactions(long handle, int maxSubcompactions); - private native int maxSubcompactions(long handle); - private native void setMaxBackgroundFlushes( - long handle, int maxBackgroundFlushes); - private native int maxBackgroundFlushes(long handle); - private native void setMaxBackgroundJobs(long handle, int maxMaxBackgroundJobs); - private native int maxBackgroundJobs(long handle); - private native void setMaxLogFileSize(long handle, long maxLogFileSize) - throws IllegalArgumentException; - private native long maxLogFileSize(long handle); - private native void setLogFileTimeToRoll( - long handle, long logFileTimeToRoll) throws IllegalArgumentException; - private native long logFileTimeToRoll(long handle); - private native void setKeepLogFileNum(long handle, long keepLogFileNum) - throws IllegalArgumentException; - private native long keepLogFileNum(long handle); - private native void setRecycleLogFileNum(long handle, long recycleLogFileNum); - private native long recycleLogFileNum(long handle); - private native void setMaxManifestFileSize( - long handle, long maxManifestFileSize); - private native long maxManifestFileSize(long handle); - private native void setMaxTableFilesSizeFIFO( - long handle, long maxTableFilesSize); - private native long maxTableFilesSizeFIFO(long handle); - private native void setTableCacheNumshardbits( - long handle, int tableCacheNumshardbits); - private native int tableCacheNumshardbits(long handle); - private native void setWalTtlSeconds(long handle, long walTtlSeconds); - private native long walTtlSeconds(long handle); - private native void setWalSizeLimitMB(long handle, long sizeLimitMB); - private native long walSizeLimitMB(long handle); - private static native void setMaxWriteBatchGroupSizeBytes( - final long handle, final long maxWriteBatchGroupSizeBytes); - private static native long maxWriteBatchGroupSizeBytes(final long handle); - private native void setManifestPreallocationSize( - long handle, long size) throws IllegalArgumentException; - private native long manifestPreallocationSize(long handle); - private native void setUseDirectReads(long handle, boolean useDirectReads); - private native boolean useDirectReads(long handle); - private native void setUseDirectIoForFlushAndCompaction( - long handle, boolean useDirectIoForFlushAndCompaction); - private native boolean useDirectIoForFlushAndCompaction(long handle); - private native void setAllowFAllocate(final long handle, - final boolean allowFAllocate); - private native boolean allowFAllocate(final long handle); - private native void setAllowMmapReads( - long handle, boolean allowMmapReads); - private native boolean allowMmapReads(long handle); - private native void setAllowMmapWrites( - long handle, boolean allowMmapWrites); - private native boolean allowMmapWrites(long handle); - private native void setIsFdCloseOnExec( - long handle, boolean isFdCloseOnExec); - private native boolean isFdCloseOnExec(long handle); - private native void setStatsDumpPeriodSec( - long handle, int statsDumpPeriodSec); - private native int statsDumpPeriodSec(long handle); - private native void setStatsPersistPeriodSec( - final long handle, final int statsPersistPeriodSec); - private native int statsPersistPeriodSec( - final long handle); - private native void setStatsHistoryBufferSize( - final long handle, final long statsHistoryBufferSize); - private native long statsHistoryBufferSize( - final long handle); - private native void setAdviseRandomOnOpen( - long handle, boolean adviseRandomOnOpen); - private native boolean adviseRandomOnOpen(long handle); - private native void setDbWriteBufferSize(final long handle, - final long dbWriteBufferSize); - private native void setWriteBufferManager(final long handle, - final long writeBufferManagerHandle); - private native long dbWriteBufferSize(final long handle); - private native void setAccessHintOnCompactionStart(final long handle, - final byte accessHintOnCompactionStart); - private native byte accessHintOnCompactionStart(final long handle); - private native void setCompactionReadaheadSize(final long handle, - final long compactionReadaheadSize); - private native long compactionReadaheadSize(final long handle); - private native void setRandomAccessMaxBufferSize(final long handle, - final long randomAccessMaxBufferSize); - private native long randomAccessMaxBufferSize(final long handle); - private native void setWritableFileMaxBufferSize(final long handle, - final long writableFileMaxBufferSize); - private native long writableFileMaxBufferSize(final long handle); - private native void setUseAdaptiveMutex( - long handle, boolean useAdaptiveMutex); - private native boolean useAdaptiveMutex(long handle); - private native void setBytesPerSync( - long handle, long bytesPerSync); - private native long bytesPerSync(long handle); - private native void setWalBytesPerSync(long handle, long walBytesPerSync); - private native long walBytesPerSync(long handle); - private native void setStrictBytesPerSync( - final long handle, final boolean strictBytesPerSync); - private native boolean strictBytesPerSync( - final long handle); - private static native void setEventListeners( - final long handle, final long[] eventListenerHandles); - private static native AbstractEventListener[] eventListeners(final long handle); - private native void setEnableThreadTracking(long handle, - boolean enableThreadTracking); - private native boolean enableThreadTracking(long handle); - private native void setDelayedWriteRate(long handle, long delayedWriteRate); - private native long delayedWriteRate(long handle); - private native void setEnablePipelinedWrite(final long handle, - final boolean pipelinedWrite); - private native boolean enablePipelinedWrite(final long handle); - private native void setUnorderedWrite(final long handle, - final boolean unorderedWrite); - private native boolean unorderedWrite(final long handle); - private native void setAllowConcurrentMemtableWrite(long handle, - boolean allowConcurrentMemtableWrite); - private native boolean allowConcurrentMemtableWrite(long handle); - private native void setEnableWriteThreadAdaptiveYield(long handle, - boolean enableWriteThreadAdaptiveYield); - private native boolean enableWriteThreadAdaptiveYield(long handle); - private native void setWriteThreadMaxYieldUsec(long handle, - long writeThreadMaxYieldUsec); - private native long writeThreadMaxYieldUsec(long handle); - private native void setWriteThreadSlowYieldUsec(long handle, - long writeThreadSlowYieldUsec); - private native long writeThreadSlowYieldUsec(long handle); - private native void setSkipStatsUpdateOnDbOpen(final long handle, - final boolean skipStatsUpdateOnDbOpen); - private native boolean skipStatsUpdateOnDbOpen(final long handle); - private static native void setSkipCheckingSstFileSizesOnDbOpen( - final long handle, final boolean skipChecking); - private static native boolean skipCheckingSstFileSizesOnDbOpen(final long handle); - private native void setWalRecoveryMode(final long handle, - final byte walRecoveryMode); - private native byte walRecoveryMode(final long handle); - private native void setAllow2pc(final long handle, - final boolean allow2pc); - private native boolean allow2pc(final long handle); - private native void setRowCache(final long handle, - final long rowCacheHandle); - private native void setWalFilter(final long handle, - final long walFilterHandle); - private native void setFailIfOptionsFileError(final long handle, - final boolean failIfOptionsFileError); - private native boolean failIfOptionsFileError(final long handle); - private native void setDumpMallocStats(final long handle, - final boolean dumpMallocStats); - private native boolean dumpMallocStats(final long handle); - private native void setAvoidFlushDuringRecovery(final long handle, - final boolean avoidFlushDuringRecovery); - private native boolean avoidFlushDuringRecovery(final long handle); - private native void setAvoidFlushDuringShutdown(final long handle, - final boolean avoidFlushDuringShutdown); - private native boolean avoidFlushDuringShutdown(final long handle); - private native void setAllowIngestBehind(final long handle, - final boolean allowIngestBehind); - private native boolean allowIngestBehind(final long handle); - private native void setTwoWriteQueues(final long handle, - final boolean twoWriteQueues); - private native boolean twoWriteQueues(final long handle); - private native void setManualWalFlush(final long handle, - final boolean manualWalFlush); - private native boolean manualWalFlush(final long handle); - - - // CF native handles - private static native void oldDefaults( - final long handle, final int majorVersion, final int minorVersion); - private native void optimizeForSmallDb(final long handle); - private static native void optimizeForSmallDb(final long handle, final long cacheHandle); - private native void optimizeForPointLookup(long handle, - long blockCacheSizeMb); - private native void optimizeLevelStyleCompaction(long handle, - long memtableMemoryBudget); - private native void optimizeUniversalStyleCompaction(long handle, - long memtableMemoryBudget); - private native void setComparatorHandle(long handle, int builtinComparator); - private native void setComparatorHandle(long optHandle, - long comparatorHandle, byte comparatorType); - private native void setMergeOperatorName( - long handle, String name); - private native void setMergeOperator( - long handle, long mergeOperatorHandle); - private native void setCompactionFilterHandle( - long handle, long compactionFilterHandle); - private native void setCompactionFilterFactoryHandle( - long handle, long compactionFilterFactoryHandle); - private native void setWriteBufferSize(long handle, long writeBufferSize) - throws IllegalArgumentException; - private native long writeBufferSize(long handle); - private native void setMaxWriteBufferNumber( - long handle, int maxWriteBufferNumber); - private native int maxWriteBufferNumber(long handle); - private native void setMinWriteBufferNumberToMerge( - long handle, int minWriteBufferNumberToMerge); - private native int minWriteBufferNumberToMerge(long handle); - private native void setCompressionType(long handle, byte compressionType); - private native byte compressionType(long handle); - private native void setCompressionPerLevel(long handle, - byte[] compressionLevels); - private native byte[] compressionPerLevel(long handle); - private native void setBottommostCompressionType(long handle, - byte bottommostCompressionType); - private native byte bottommostCompressionType(long handle); - private native void setBottommostCompressionOptions(final long handle, - final long bottommostCompressionOptionsHandle); - private native void setCompressionOptions(long handle, - long compressionOptionsHandle); - private native void useFixedLengthPrefixExtractor( - long handle, int prefixLength); - private native void useCappedPrefixExtractor( - long handle, int prefixLength); - private native void setNumLevels( - long handle, int numLevels); - private native int numLevels(long handle); - private native void setLevelZeroFileNumCompactionTrigger( - long handle, int numFiles); - private native int levelZeroFileNumCompactionTrigger(long handle); - private native void setLevelZeroSlowdownWritesTrigger( - long handle, int numFiles); - private native int levelZeroSlowdownWritesTrigger(long handle); - private native void setLevelZeroStopWritesTrigger( - long handle, int numFiles); - private native int levelZeroStopWritesTrigger(long handle); - private native void setTargetFileSizeBase( - long handle, long targetFileSizeBase); - private native long targetFileSizeBase(long handle); - private native void setTargetFileSizeMultiplier( - long handle, int multiplier); - private native int targetFileSizeMultiplier(long handle); - private native void setMaxBytesForLevelBase( - long handle, long maxBytesForLevelBase); - private native long maxBytesForLevelBase(long handle); - private native void setLevelCompactionDynamicLevelBytes( - long handle, boolean enableLevelCompactionDynamicLevelBytes); - private native boolean levelCompactionDynamicLevelBytes( - long handle); - private native void setMaxBytesForLevelMultiplier(long handle, double multiplier); - private native double maxBytesForLevelMultiplier(long handle); - private native void setMaxCompactionBytes(long handle, long maxCompactionBytes); - private native long maxCompactionBytes(long handle); - private native void setArenaBlockSize( - long handle, long arenaBlockSize) throws IllegalArgumentException; - private native long arenaBlockSize(long handle); - private native void setDisableAutoCompactions( - long handle, boolean disableAutoCompactions); - private native boolean disableAutoCompactions(long handle); - private native void setCompactionStyle(long handle, byte compactionStyle); - private native byte compactionStyle(long handle); - private native void setMaxSequentialSkipInIterations( - long handle, long maxSequentialSkipInIterations); - private native long maxSequentialSkipInIterations(long handle); - private native void setMemTableFactory(long handle, long factoryHandle); - private native String memTableFactoryName(long handle); - private native void setTableFactory(long handle, long factoryHandle); - private native String tableFactoryName(long handle); - private static native void setCfPaths( - final long handle, final String[] paths, final long[] targetSizes); - private static native long cfPathsLen(final long handle); - private static native void cfPaths( - final long handle, final String[] paths, final long[] targetSizes); - private native void setInplaceUpdateSupport( - long handle, boolean inplaceUpdateSupport); - private native boolean inplaceUpdateSupport(long handle); - private native void setInplaceUpdateNumLocks( - long handle, long inplaceUpdateNumLocks) - throws IllegalArgumentException; - private native long inplaceUpdateNumLocks(long handle); - private native void setMemtablePrefixBloomSizeRatio( - long handle, double memtablePrefixBloomSizeRatio); - private native double memtablePrefixBloomSizeRatio(long handle); - private native void setExperimentalMempurgeThreshold( - long handle, double experimentalMempurgeThreshold); - private native double experimentalMempurgeThreshold(long handle); - private native void setMemtableWholeKeyFiltering(long handle, boolean memtableWholeKeyFiltering); - private native boolean memtableWholeKeyFiltering(long handle); - private native void setBloomLocality( - long handle, int bloomLocality); - private native int bloomLocality(long handle); - private native void setMaxSuccessiveMerges( - long handle, long maxSuccessiveMerges) - throws IllegalArgumentException; - private native long maxSuccessiveMerges(long handle); - private native void setOptimizeFiltersForHits(long handle, - boolean optimizeFiltersForHits); - private native boolean optimizeFiltersForHits(long handle); - private native void setMemtableHugePageSize(long handle, - long memtableHugePageSize); - private native long memtableHugePageSize(long handle); - private native void setSoftPendingCompactionBytesLimit(long handle, - long softPendingCompactionBytesLimit); - private native long softPendingCompactionBytesLimit(long handle); - private native void setHardPendingCompactionBytesLimit(long handle, - long hardPendingCompactionBytesLimit); - private native long hardPendingCompactionBytesLimit(long handle); - private native void setLevel0FileNumCompactionTrigger(long handle, - int level0FileNumCompactionTrigger); - private native int level0FileNumCompactionTrigger(long handle); - private native void setLevel0SlowdownWritesTrigger(long handle, - int level0SlowdownWritesTrigger); - private native int level0SlowdownWritesTrigger(long handle); - private native void setLevel0StopWritesTrigger(long handle, - int level0StopWritesTrigger); - private native int level0StopWritesTrigger(long handle); - private native void setMaxBytesForLevelMultiplierAdditional(long handle, - int[] maxBytesForLevelMultiplierAdditional); - private native int[] maxBytesForLevelMultiplierAdditional(long handle); - private native void setParanoidFileChecks(long handle, - boolean paranoidFileChecks); - private native boolean paranoidFileChecks(long handle); - private native void setMaxWriteBufferNumberToMaintain(final long handle, - final int maxWriteBufferNumberToMaintain); - private native int maxWriteBufferNumberToMaintain(final long handle); - private native void setCompactionPriority(final long handle, - final byte compactionPriority); - private native byte compactionPriority(final long handle); - private native void setReportBgIoStats(final long handle, - final boolean reportBgIoStats); - private native boolean reportBgIoStats(final long handle); - private native void setTtl(final long handle, final long ttl); - private native long ttl(final long handle); - private native void setPeriodicCompactionSeconds( - final long handle, final long periodicCompactionSeconds); - private native long periodicCompactionSeconds(final long handle); - private native void setCompactionOptionsUniversal(final long handle, - final long compactionOptionsUniversalHandle); - private native void setCompactionOptionsFIFO(final long handle, - final long compactionOptionsFIFOHandle); - private native void setForceConsistencyChecks(final long handle, - final boolean forceConsistencyChecks); - private native boolean forceConsistencyChecks(final long handle); - private native void setAtomicFlush(final long handle, - final boolean atomicFlush); - private native boolean atomicFlush(final long handle); - private native void setSstPartitionerFactory(long nativeHandle_, long newFactoryHandle); - private static native void setCompactionThreadLimiter( - final long nativeHandle_, final long newLimiterHandle); - private static native void setAvoidUnnecessaryBlockingIO( - final long handle, final boolean avoidBlockingIO); - private static native boolean avoidUnnecessaryBlockingIO(final long handle); - private static native void setPersistStatsToDisk( - final long handle, final boolean persistStatsToDisk); - private static native boolean persistStatsToDisk(final long handle); - private static native void setWriteDbidToManifest( - final long handle, final boolean writeDbidToManifest); - private static native boolean writeDbidToManifest(final long handle); - private static native void setLogReadaheadSize(final long handle, final long logReadaheadSize); - private static native long logReadaheadSize(final long handle); - private static native void setBestEffortsRecovery( - final long handle, final boolean bestEffortsRecovery); - private static native boolean bestEffortsRecovery(final long handle); - private static native void setMaxBgErrorResumeCount( - final long handle, final int maxBgerrorRecumeCount); - private static native int maxBgerrorResumeCount(final long handle); - private static native void setBgerrorResumeRetryInterval( - final long handle, final long bgerrorResumeRetryInterval); - private static native long bgerrorResumeRetryInterval(final long handle); - - private native void setEnableBlobFiles(final long nativeHandle_, final boolean enableBlobFiles); - private native boolean enableBlobFiles(final long nativeHandle_); - private native void setMinBlobSize(final long nativeHandle_, final long minBlobSize); - private native long minBlobSize(final long nativeHandle_); - private native void setBlobFileSize(final long nativeHandle_, final long blobFileSize); - private native long blobFileSize(final long nativeHandle_); - private native void setBlobCompressionType(final long nativeHandle_, final byte compressionType); - private native byte blobCompressionType(final long nativeHandle_); - private native void setEnableBlobGarbageCollection( - final long nativeHandle_, final boolean enableBlobGarbageCollection); - private native boolean enableBlobGarbageCollection(final long nativeHandle_); - private native void setBlobGarbageCollectionAgeCutoff( - final long nativeHandle_, final double blobGarbageCollectionAgeCutoff); - private native double blobGarbageCollectionAgeCutoff(final long nativeHandle_); - private native void setBlobGarbageCollectionForceThreshold( - final long nativeHandle_, final double blobGarbageCollectionForceThreshold); - private native double blobGarbageCollectionForceThreshold(final long nativeHandle_); - private native void setBlobCompactionReadaheadSize( - final long nativeHandle_, final long blobCompactionReadaheadSize); - private native long blobCompactionReadaheadSize(final long nativeHandle_); - private native void setBlobFileStartingLevel( - final long nativeHandle_, final int blobFileStartingLevel); - private native int blobFileStartingLevel(final long nativeHandle_); - private native void setPrepopulateBlobCache( - final long nativeHandle_, final byte prepopulateBlobCache); - private native byte prepopulateBlobCache(final long nativeHandle_); - - // instance variables - // NOTE: If you add new member variables, please update the copy constructor above! - private Env env_; - private MemTableConfig memTableConfig_; - private TableFormatConfig tableFormatConfig_; - private RateLimiter rateLimiter_; - private AbstractComparator comparator_; - private AbstractCompactionFilter> compactionFilter_; - private AbstractCompactionFilterFactory> - compactionFilterFactory_; - private CompactionOptionsUniversal compactionOptionsUniversal_; - private CompactionOptionsFIFO compactionOptionsFIFO_; - private CompressionOptions bottommostCompressionOptions_; - private CompressionOptions compressionOptions_; - private Cache rowCache_; - private WalFilter walFilter_; - private WriteBufferManager writeBufferManager_; - private SstPartitionerFactory sstPartitionerFactory_; - private ConcurrentTaskLimiter compactionThreadLimiter_; -} diff --git a/java/src/main/java/org/rocksdb/OptionsUtil.java b/java/src/main/java/org/rocksdb/OptionsUtil.java deleted file mode 100644 index e21121a2b..000000000 --- a/java/src/main/java/org/rocksdb/OptionsUtil.java +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.List; - -public class OptionsUtil { - /** - * A static method to construct the DBOptions and ColumnFamilyDescriptors by - * loading the latest RocksDB options file stored in the specified rocksdb - * database. - * - * Note that the all the pointer options (except table_factory, which will - * be described in more details below) will be initialized with the default - * values. Developers can further initialize them after this function call. - * Below is an example list of pointer options which will be initialized. - * - * - env - * - memtable_factory - * - compaction_filter_factory - * - prefix_extractor - * - comparator - * - merge_operator - * - compaction_filter - * - * For table_factory, this function further supports deserializing - * BlockBasedTableFactory and its BlockBasedTableOptions except the - * pointer options of BlockBasedTableOptions (flush_block_policy_factory, - * and block_cache), which will be initialized with - * default values. Developers can further specify these three options by - * casting the return value of TableFactoroy::GetOptions() to - * BlockBasedTableOptions and making necessary changes. - * - * @param dbPath the path to the RocksDB. - * @param configOptions {@link org.rocksdb.ConfigOptions} instance. - * @param dbOptions {@link org.rocksdb.DBOptions} instance. This will be - * filled and returned. - * @param cfDescs A list of {@link org.rocksdb.ColumnFamilyDescriptor}'s be - * returned. - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public static void loadLatestOptions(ConfigOptions configOptions, String dbPath, - DBOptions dbOptions, List cfDescs) throws RocksDBException { - loadLatestOptions(configOptions.nativeHandle_, dbPath, dbOptions.nativeHandle_, cfDescs); - } - - /** - * Similar to LoadLatestOptions, this function constructs the DBOptions - * and ColumnFamilyDescriptors based on the specified RocksDB Options file. - * See LoadLatestOptions above. - * - * @param optionsFileName the RocksDB options file path. - * @param configOptions {@link org.rocksdb.ConfigOptions} instance. - * @param dbOptions {@link org.rocksdb.DBOptions} instance. This will be - * filled and returned. - * @param cfDescs A list of {@link org.rocksdb.ColumnFamilyDescriptor}'s be - * returned. - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public static void loadOptionsFromFile(ConfigOptions configOptions, String optionsFileName, - DBOptions dbOptions, List cfDescs) throws RocksDBException { - loadOptionsFromFile( - configOptions.nativeHandle_, optionsFileName, dbOptions.nativeHandle_, cfDescs); - } - - /** - * Returns the latest options file name under the specified RocksDB path. - * - * @param dbPath the path to the RocksDB. - * @param env {@link org.rocksdb.Env} instance. - * @return the latest options file name under the db path. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public static String getLatestOptionsFileName(String dbPath, Env env) throws RocksDBException { - return getLatestOptionsFileName(dbPath, env.nativeHandle_); - } - - /** - * Private constructor. - * This class has only static methods and shouldn't be instantiated. - */ - private OptionsUtil() {} - - // native methods - private native static void loadLatestOptions(long cfgHandle, String dbPath, long dbOptionsHandle, - List cfDescs) throws RocksDBException; - private native static void loadOptionsFromFile(long cfgHandle, String optionsFileName, - long dbOptionsHandle, List cfDescs) throws RocksDBException; - private native static String getLatestOptionsFileName(String dbPath, long envHandle) - throws RocksDBException; -} diff --git a/java/src/main/java/org/rocksdb/PersistentCache.java b/java/src/main/java/org/rocksdb/PersistentCache.java deleted file mode 100644 index aed565297..000000000 --- a/java/src/main/java/org/rocksdb/PersistentCache.java +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Persistent cache for caching IO pages on a persistent medium. The - * cache is specifically designed for persistent read cache. - */ -public class PersistentCache extends RocksObject { - - public PersistentCache(final Env env, final String path, final long size, - final Logger logger, final boolean optimizedForNvm) - throws RocksDBException { - super(newPersistentCache(env.nativeHandle_, path, size, - logger.nativeHandle_, optimizedForNvm)); - } - - private native static long newPersistentCache(final long envHandle, - final String path, final long size, final long loggerHandle, - final boolean optimizedForNvm) throws RocksDBException; - - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/PlainTableConfig.java b/java/src/main/java/org/rocksdb/PlainTableConfig.java deleted file mode 100644 index c09998167..000000000 --- a/java/src/main/java/org/rocksdb/PlainTableConfig.java +++ /dev/null @@ -1,251 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -/** - * The config for plain table sst format. - * - *

PlainTable is a RocksDB's SST file format optimized for low query - * latency on pure-memory or really low-latency media.

- * - *

It also support prefix hash feature.

- */ -public class PlainTableConfig extends TableFormatConfig { - public static final int VARIABLE_LENGTH = 0; - public static final int DEFAULT_BLOOM_BITS_PER_KEY = 10; - public static final double DEFAULT_HASH_TABLE_RATIO = 0.75; - public static final int DEFAULT_INDEX_SPARSENESS = 16; - public static final int DEFAULT_HUGE_TLB_SIZE = 0; - public static final EncodingType DEFAULT_ENCODING_TYPE = - EncodingType.kPlain; - public static final boolean DEFAULT_FULL_SCAN_MODE = false; - public static final boolean DEFAULT_STORE_INDEX_IN_FILE - = false; - - public PlainTableConfig() { - keySize_ = VARIABLE_LENGTH; - bloomBitsPerKey_ = DEFAULT_BLOOM_BITS_PER_KEY; - hashTableRatio_ = DEFAULT_HASH_TABLE_RATIO; - indexSparseness_ = DEFAULT_INDEX_SPARSENESS; - hugePageTlbSize_ = DEFAULT_HUGE_TLB_SIZE; - encodingType_ = DEFAULT_ENCODING_TYPE; - fullScanMode_ = DEFAULT_FULL_SCAN_MODE; - storeIndexInFile_ = DEFAULT_STORE_INDEX_IN_FILE; - } - - /** - *

Set the length of the user key. If it is set to be - * VARIABLE_LENGTH, then it indicates the user keys are - * of variable length.

- * - *

Otherwise,all the keys need to have the same length - * in byte.

- * - *

DEFAULT: VARIABLE_LENGTH

- * - * @param keySize the length of the user key. - * @return the reference to the current config. - */ - public PlainTableConfig setKeySize(int keySize) { - keySize_ = keySize; - return this; - } - - /** - * @return the specified size of the user key. If VARIABLE_LENGTH, - * then it indicates variable-length key. - */ - public int keySize() { - return keySize_; - } - - /** - * Set the number of bits per key used by the internal bloom filter - * in the plain table sst format. - * - * @param bitsPerKey the number of bits per key for bloom filer. - * @return the reference to the current config. - */ - public PlainTableConfig setBloomBitsPerKey(int bitsPerKey) { - bloomBitsPerKey_ = bitsPerKey; - return this; - } - - /** - * @return the number of bits per key used for the bloom filter. - */ - public int bloomBitsPerKey() { - return bloomBitsPerKey_; - } - - /** - * hashTableRatio is the desired utilization of the hash table used - * for prefix hashing. The ideal ratio would be the number of - * prefixes / the number of hash buckets. If this value is set to - * zero, then hash table will not be used. - * - * @param ratio the hash table ratio. - * @return the reference to the current config. - */ - public PlainTableConfig setHashTableRatio(double ratio) { - hashTableRatio_ = ratio; - return this; - } - - /** - * @return the hash table ratio. - */ - public double hashTableRatio() { - return hashTableRatio_; - } - - /** - * Index sparseness determines the index interval for keys inside the - * same prefix. This number is equal to the maximum number of linear - * search required after hash and binary search. If it's set to 0, - * then each key will be indexed. - * - * @param sparseness the index sparseness. - * @return the reference to the current config. - */ - public PlainTableConfig setIndexSparseness(int sparseness) { - indexSparseness_ = sparseness; - return this; - } - - /** - * @return the index sparseness. - */ - public long indexSparseness() { - return indexSparseness_; - } - - /** - *

huge_page_tlb_size: if ≤0, allocate hash indexes and blooms - * from malloc otherwise from huge page TLB.

- * - *

The user needs to reserve huge pages for it to be allocated, - * like: {@code sysctl -w vm.nr_hugepages=20}

- * - *

See linux doc Documentation/vm/hugetlbpage.txt

- * - * @param hugePageTlbSize huge page tlb size - * @return the reference to the current config. - */ - public PlainTableConfig setHugePageTlbSize(int hugePageTlbSize) { - this.hugePageTlbSize_ = hugePageTlbSize; - return this; - } - - /** - * Returns the value for huge page tlb size - * - * @return hugePageTlbSize - */ - public int hugePageTlbSize() { - return hugePageTlbSize_; - } - - /** - * Sets the encoding type. - * - *

This setting determines how to encode - * the keys. See enum {@link EncodingType} for - * the choices.

- * - *

The value will determine how to encode keys - * when writing to a new SST file. This value will be stored - * inside the SST file which will be used when reading from - * the file, which makes it possible for users to choose - * different encoding type when reopening a DB. Files with - * different encoding types can co-exist in the same DB and - * can be read.

- * - * @param encodingType {@link org.rocksdb.EncodingType} value. - * @return the reference to the current config. - */ - public PlainTableConfig setEncodingType(EncodingType encodingType) { - this.encodingType_ = encodingType; - return this; - } - - /** - * Returns the active EncodingType - * - * @return currently set encoding type - */ - public EncodingType encodingType() { - return encodingType_; - } - - /** - * Set full scan mode, if true the whole file will be read - * one record by one without using the index. - * - * @param fullScanMode boolean value indicating if full - * scan mode shall be enabled. - * @return the reference to the current config. - */ - public PlainTableConfig setFullScanMode(boolean fullScanMode) { - this.fullScanMode_ = fullScanMode; - return this; - } - - /** - * Return if full scan mode is active - * @return boolean value indicating if the full scan mode is - * enabled. - */ - public boolean fullScanMode() { - return fullScanMode_; - } - - /** - *

If set to true: compute plain table index and bloom - * filter during file building and store it in file. - * When reading file, index will be mmaped instead - * of doing recomputation.

- * - * @param storeIndexInFile value indicating if index shall - * be stored in a file - * @return the reference to the current config. - */ - public PlainTableConfig setStoreIndexInFile(boolean storeIndexInFile) { - this.storeIndexInFile_ = storeIndexInFile; - return this; - } - - /** - * Return a boolean value indicating if index shall be stored - * in a file. - * - * @return currently set value for store index in file. - */ - public boolean storeIndexInFile() { - return storeIndexInFile_; - } - - @Override protected long newTableFactoryHandle() { - return newTableFactoryHandle(keySize_, bloomBitsPerKey_, - hashTableRatio_, indexSparseness_, hugePageTlbSize_, - encodingType_.getValue(), fullScanMode_, - storeIndexInFile_); - } - - private native long newTableFactoryHandle( - int keySize, int bloomBitsPerKey, - double hashTableRatio, int indexSparseness, - int hugePageTlbSize, byte encodingType, - boolean fullScanMode, boolean storeIndexInFile); - - private int keySize_; - private int bloomBitsPerKey_; - private double hashTableRatio_; - private int indexSparseness_; - private int hugePageTlbSize_; - private EncodingType encodingType_; - private boolean fullScanMode_; - private boolean storeIndexInFile_; -} diff --git a/java/src/main/java/org/rocksdb/PrepopulateBlobCache.java b/java/src/main/java/org/rocksdb/PrepopulateBlobCache.java deleted file mode 100644 index f1237aa7c..000000000 --- a/java/src/main/java/org/rocksdb/PrepopulateBlobCache.java +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Enum PrepopulateBlobCache - * - *

Prepopulate warm/hot blobs which are already in memory into blob - * cache at the time of flush. On a flush, the blob that is in memory - * (in memtables) get flushed to the device. If using Direct IO, - * additional IO is incurred to read this blob back into memory again, - * which is avoided by enabling this option. This further helps if the - * workload exhibits high temporal locality, where most of the reads go - * to recently written data. This also helps in case of the remote file - * system since it involves network traffic and higher latencies.

- */ -public enum PrepopulateBlobCache { - PREPOPULATE_BLOB_DISABLE((byte) 0x0, "prepopulate_blob_disable", "kDisable"), - PREPOPULATE_BLOB_FLUSH_ONLY((byte) 0x1, "prepopulate_blob_flush_only", "kFlushOnly"); - - /** - *

Get the PrepopulateBlobCache enumeration value by - * passing the library name to this method.

- * - *

If library cannot be found the enumeration - * value {@code PREPOPULATE_BLOB_DISABLE} will be returned.

- * - * @param libraryName prepopulate blob cache library name. - * - * @return PrepopulateBlobCache instance. - */ - public static PrepopulateBlobCache getPrepopulateBlobCache(String libraryName) { - if (libraryName != null) { - for (PrepopulateBlobCache prepopulateBlobCache : PrepopulateBlobCache.values()) { - if (prepopulateBlobCache.getLibraryName() != null - && prepopulateBlobCache.getLibraryName().equals(libraryName)) { - return prepopulateBlobCache; - } - } - } - return PrepopulateBlobCache.PREPOPULATE_BLOB_DISABLE; - } - - /** - *

Get the PrepopulateBlobCache enumeration value by - * passing the byte identifier to this method.

- * - * @param byteIdentifier of PrepopulateBlobCache. - * - * @return PrepopulateBlobCache instance. - * - * @throws IllegalArgumentException If PrepopulateBlobCache cannot be found for the - * provided byteIdentifier - */ - public static PrepopulateBlobCache getPrepopulateBlobCache(byte byteIdentifier) { - for (final PrepopulateBlobCache prepopulateBlobCache : PrepopulateBlobCache.values()) { - if (prepopulateBlobCache.getValue() == byteIdentifier) { - return prepopulateBlobCache; - } - } - - throw new IllegalArgumentException("Illegal value provided for PrepopulateBlobCache."); - } - - /** - *

Get a PrepopulateBlobCache value based on the string key in the C++ options output. - * This gets used in support of getting options into Java from an options string, - * which is generated at the C++ level. - *

- * - * @param internalName the internal (C++) name by which the option is known. - * - * @return PrepopulateBlobCache instance (optional) - */ - static PrepopulateBlobCache getFromInternal(final String internalName) { - for (final PrepopulateBlobCache prepopulateBlobCache : PrepopulateBlobCache.values()) { - if (prepopulateBlobCache.internalName_.equals(internalName)) { - return prepopulateBlobCache; - } - } - - throw new IllegalArgumentException( - "Illegal internalName '" + internalName + " ' provided for PrepopulateBlobCache."); - } - - /** - *

Returns the byte value of the enumerations value.

- * - * @return byte representation - */ - public byte getValue() { - return value_; - } - - /** - *

Returns the library name of the prepopulate blob cache mode - * identified by the enumeration value.

- * - * @return library name - */ - public String getLibraryName() { - return libraryName_; - } - - PrepopulateBlobCache(final byte value, final String libraryName, final String internalName) { - value_ = value; - libraryName_ = libraryName; - internalName_ = internalName; - } - - private final byte value_; - private final String libraryName_; - private final String internalName_; -} diff --git a/java/src/main/java/org/rocksdb/Priority.java b/java/src/main/java/org/rocksdb/Priority.java deleted file mode 100644 index 34a56edcb..000000000 --- a/java/src/main/java/org/rocksdb/Priority.java +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * The Thread Pool priority. - */ -public enum Priority { - BOTTOM((byte) 0x0), - LOW((byte) 0x1), - HIGH((byte)0x2), - TOTAL((byte)0x3); - - private final byte value; - - Priority(final byte value) { - this.value = value; - } - - /** - *

Returns the byte value of the enumerations value.

- * - * @return byte representation - */ - byte getValue() { - return value; - } - - /** - * Get Priority by byte value. - * - * @param value byte representation of Priority. - * - * @return {@link org.rocksdb.Priority} instance. - * @throws java.lang.IllegalArgumentException if an invalid - * value is provided. - */ - static Priority getPriority(final byte value) { - for (final Priority priority : Priority.values()) { - if (priority.getValue() == value){ - return priority; - } - } - throw new IllegalArgumentException("Illegal value provided for Priority."); - } -} diff --git a/java/src/main/java/org/rocksdb/Range.java b/java/src/main/java/org/rocksdb/Range.java deleted file mode 100644 index 74c85e5f0..000000000 --- a/java/src/main/java/org/rocksdb/Range.java +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Range from start to limit. - */ -public class Range { - final Slice start; - final Slice limit; - - public Range(final Slice start, final Slice limit) { - this.start = start; - this.limit = limit; - } -} diff --git a/java/src/main/java/org/rocksdb/RateLimiter.java b/java/src/main/java/org/rocksdb/RateLimiter.java deleted file mode 100644 index c2b8a0fd9..000000000 --- a/java/src/main/java/org/rocksdb/RateLimiter.java +++ /dev/null @@ -1,227 +0,0 @@ -// Copyright (c) 2015, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * RateLimiter, which is used to control write rate of flush and - * compaction. - * - * @since 3.10.0 - */ -public class RateLimiter extends RocksObject { - public static final long DEFAULT_REFILL_PERIOD_MICROS = 100 * 1000; - public static final int DEFAULT_FAIRNESS = 10; - public static final RateLimiterMode DEFAULT_MODE = - RateLimiterMode.WRITES_ONLY; - public static final boolean DEFAULT_AUTOTUNE = false; - - /** - * RateLimiter constructor - * - * @param rateBytesPerSecond this is the only parameter you want to set - * most of the time. It controls the total write rate of compaction - * and flush in bytes per second. Currently, RocksDB does not enforce - * rate limit for anything other than flush and compaction, e.g. write to - * WAL. - */ - public RateLimiter(final long rateBytesPerSecond) { - this(rateBytesPerSecond, DEFAULT_REFILL_PERIOD_MICROS, DEFAULT_FAIRNESS, - DEFAULT_MODE, DEFAULT_AUTOTUNE); - } - - /** - * RateLimiter constructor - * - * @param rateBytesPerSecond this is the only parameter you want to set - * most of the time. It controls the total write rate of compaction - * and flush in bytes per second. Currently, RocksDB does not enforce - * rate limit for anything other than flush and compaction, e.g. write to - * WAL. - * @param refillPeriodMicros this controls how often tokens are refilled. For - * example, - * when rate_bytes_per_sec is set to 10MB/s and refill_period_us is set to - * 100ms, then 1MB is refilled every 100ms internally. Larger value can - * lead to burstier writes while smaller value introduces more CPU - * overhead. The default of 100,000ms should work for most cases. - */ - public RateLimiter(final long rateBytesPerSecond, - final long refillPeriodMicros) { - this(rateBytesPerSecond, refillPeriodMicros, DEFAULT_FAIRNESS, DEFAULT_MODE, - DEFAULT_AUTOTUNE); - } - - /** - * RateLimiter constructor - * - * @param rateBytesPerSecond this is the only parameter you want to set - * most of the time. It controls the total write rate of compaction - * and flush in bytes per second. Currently, RocksDB does not enforce - * rate limit for anything other than flush and compaction, e.g. write to - * WAL. - * @param refillPeriodMicros this controls how often tokens are refilled. For - * example, - * when rate_bytes_per_sec is set to 10MB/s and refill_period_us is set to - * 100ms, then 1MB is refilled every 100ms internally. Larger value can - * lead to burstier writes while smaller value introduces more CPU - * overhead. The default of 100,000ms should work for most cases. - * @param fairness RateLimiter accepts high-pri requests and low-pri requests. - * A low-pri request is usually blocked in favor of hi-pri request. - * Currently, RocksDB assigns low-pri to request from compaction and - * high-pri to request from flush. Low-pri requests can get blocked if - * flush requests come in continuously. This fairness parameter grants - * low-pri requests permission by fairness chance even though high-pri - * requests exist to avoid starvation. - * You should be good by leaving it at default 10. - */ - public RateLimiter(final long rateBytesPerSecond, - final long refillPeriodMicros, final int fairness) { - this(rateBytesPerSecond, refillPeriodMicros, fairness, DEFAULT_MODE, - DEFAULT_AUTOTUNE); - } - - /** - * RateLimiter constructor - * - * @param rateBytesPerSecond this is the only parameter you want to set - * most of the time. It controls the total write rate of compaction - * and flush in bytes per second. Currently, RocksDB does not enforce - * rate limit for anything other than flush and compaction, e.g. write to - * WAL. - * @param refillPeriodMicros this controls how often tokens are refilled. For - * example, - * when rate_bytes_per_sec is set to 10MB/s and refill_period_us is set to - * 100ms, then 1MB is refilled every 100ms internally. Larger value can - * lead to burstier writes while smaller value introduces more CPU - * overhead. The default of 100,000ms should work for most cases. - * @param fairness RateLimiter accepts high-pri requests and low-pri requests. - * A low-pri request is usually blocked in favor of hi-pri request. - * Currently, RocksDB assigns low-pri to request from compaction and - * high-pri to request from flush. Low-pri requests can get blocked if - * flush requests come in continuously. This fairness parameter grants - * low-pri requests permission by fairness chance even though high-pri - * requests exist to avoid starvation. - * You should be good by leaving it at default 10. - * @param rateLimiterMode indicates which types of operations count against - * the limit. - */ - public RateLimiter(final long rateBytesPerSecond, - final long refillPeriodMicros, final int fairness, - final RateLimiterMode rateLimiterMode) { - this(rateBytesPerSecond, refillPeriodMicros, fairness, rateLimiterMode, - DEFAULT_AUTOTUNE); - } - - /** - * RateLimiter constructor - * - * @param rateBytesPerSecond this is the only parameter you want to set - * most of the time. It controls the total write rate of compaction - * and flush in bytes per second. Currently, RocksDB does not enforce - * rate limit for anything other than flush and compaction, e.g. write to - * WAL. - * @param refillPeriodMicros this controls how often tokens are refilled. For - * example, - * when rate_bytes_per_sec is set to 10MB/s and refill_period_us is set to - * 100ms, then 1MB is refilled every 100ms internally. Larger value can - * lead to burstier writes while smaller value introduces more CPU - * overhead. The default of 100,000ms should work for most cases. - * @param fairness RateLimiter accepts high-pri requests and low-pri requests. - * A low-pri request is usually blocked in favor of hi-pri request. - * Currently, RocksDB assigns low-pri to request from compaction and - * high-pri to request from flush. Low-pri requests can get blocked if - * flush requests come in continuously. This fairness parameter grants - * low-pri requests permission by fairness chance even though high-pri - * requests exist to avoid starvation. - * You should be good by leaving it at default 10. - * @param rateLimiterMode indicates which types of operations count against - * the limit. - * @param autoTune Enables dynamic adjustment of rate limit within the range - * {@code [rate_bytes_per_sec / 20, rate_bytes_per_sec]}, according to - * the recent demand for background I/O. - */ - public RateLimiter(final long rateBytesPerSecond, - final long refillPeriodMicros, final int fairness, - final RateLimiterMode rateLimiterMode, final boolean autoTune) { - super(newRateLimiterHandle(rateBytesPerSecond, - refillPeriodMicros, fairness, rateLimiterMode.getValue(), autoTune)); - } - - /** - *

This API allows user to dynamically change rate limiter's bytes per second. - * REQUIRED: bytes_per_second > 0

- * - * @param bytesPerSecond bytes per second. - */ - public void setBytesPerSecond(final long bytesPerSecond) { - assert(isOwningHandle()); - setBytesPerSecond(nativeHandle_, bytesPerSecond); - } - - /** - * Returns the bytes per second. - * - * @return bytes per second. - */ - public long getBytesPerSecond() { - assert(isOwningHandle()); - return getBytesPerSecond(nativeHandle_); - } - - /** - *

Request for token to write bytes. If this request can not be satisfied, - * the call is blocked. Caller is responsible to make sure - * {@code bytes < GetSingleBurstBytes()}.

- * - * @param bytes requested bytes. - */ - public void request(final long bytes) { - assert(isOwningHandle()); - request(nativeHandle_, bytes); - } - - /** - *

Max bytes can be granted in a single burst.

- * - * @return max bytes can be granted in a single burst. - */ - public long getSingleBurstBytes() { - assert(isOwningHandle()); - return getSingleBurstBytes(nativeHandle_); - } - - /** - *

Total bytes that go through rate limiter.

- * - * @return total bytes that go through rate limiter. - */ - public long getTotalBytesThrough() { - assert(isOwningHandle()); - return getTotalBytesThrough(nativeHandle_); - } - - /** - *

Total # of requests that go through rate limiter.

- * - * @return total # of requests that go through rate limiter. - */ - public long getTotalRequests() { - assert(isOwningHandle()); - return getTotalRequests(nativeHandle_); - } - - private static native long newRateLimiterHandle(final long rateBytesPerSecond, - final long refillPeriodMicros, final int fairness, - final byte rateLimiterMode, final boolean autoTune); - @Override protected final native void disposeInternal(final long handle); - - private native void setBytesPerSecond(final long handle, - final long bytesPerSecond); - private native long getBytesPerSecond(final long handle); - private native void request(final long handle, final long bytes); - private native long getSingleBurstBytes(final long handle); - private native long getTotalBytesThrough(final long handle); - private native long getTotalRequests(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/RateLimiterMode.java b/java/src/main/java/org/rocksdb/RateLimiterMode.java deleted file mode 100644 index 4b029d816..000000000 --- a/java/src/main/java/org/rocksdb/RateLimiterMode.java +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Mode for {@link RateLimiter#RateLimiter(long, long, int, RateLimiterMode)}. - */ -public enum RateLimiterMode { - READS_ONLY((byte)0x0), - WRITES_ONLY((byte)0x1), - ALL_IO((byte)0x2); - - private final byte value; - - RateLimiterMode(final byte value) { - this.value = value; - } - - /** - *

Returns the byte value of the enumerations value.

- * - * @return byte representation - */ - public byte getValue() { - return value; - } - - /** - *

Get the RateLimiterMode enumeration value by - * passing the byte identifier to this method.

- * - * @param byteIdentifier of RateLimiterMode. - * - * @return AccessHint instance. - * - * @throws IllegalArgumentException if the access hint for the byteIdentifier - * cannot be found - */ - public static RateLimiterMode getRateLimiterMode(final byte byteIdentifier) { - for (final RateLimiterMode rateLimiterMode : RateLimiterMode.values()) { - if (rateLimiterMode.getValue() == byteIdentifier) { - return rateLimiterMode; - } - } - - throw new IllegalArgumentException( - "Illegal value provided for RateLimiterMode."); - } -} diff --git a/java/src/main/java/org/rocksdb/ReadOptions.java b/java/src/main/java/org/rocksdb/ReadOptions.java deleted file mode 100755 index c638b17b7..000000000 --- a/java/src/main/java/org/rocksdb/ReadOptions.java +++ /dev/null @@ -1,823 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * The class that controls the get behavior. - * - * Note that dispose() must be called before an Options instance - * become out-of-scope to release the allocated memory in c++. - */ -public class ReadOptions extends RocksObject { - public ReadOptions() { - super(newReadOptions()); - } - - /** - * @param verifyChecksums verification will be performed on every read - * when set to true - * @param fillCache if true, then fill-cache behavior will be performed. - */ - public ReadOptions(final boolean verifyChecksums, final boolean fillCache) { - super(newReadOptions(verifyChecksums, fillCache)); - } - - /** - * Copy constructor. - * - * NOTE: This does a shallow copy, which means snapshot, iterate_upper_bound - * and other pointers will be cloned! - * - * @param other The ReadOptions to copy. - */ - public ReadOptions(ReadOptions other) { - super(copyReadOptions(other.nativeHandle_)); - this.iterateLowerBoundSlice_ = other.iterateLowerBoundSlice_; - this.iterateUpperBoundSlice_ = other.iterateUpperBoundSlice_; - this.timestampSlice_ = other.timestampSlice_; - this.iterStartTs_ = other.iterStartTs_; - } - - /** - * If true, all data read from underlying storage will be - * verified against corresponding checksums. - * Default: true - * - * @return true if checksum verification is on. - */ - public boolean verifyChecksums() { - assert(isOwningHandle()); - return verifyChecksums(nativeHandle_); - } - - /** - * If true, all data read from underlying storage will be - * verified against corresponding checksums. - * Default: true - * - * @param verifyChecksums if true, then checksum verification - * will be performed on every read. - * @return the reference to the current ReadOptions. - */ - public ReadOptions setVerifyChecksums( - final boolean verifyChecksums) { - assert(isOwningHandle()); - setVerifyChecksums(nativeHandle_, verifyChecksums); - return this; - } - - // TODO(yhchiang): this option seems to be block-based table only. - // move this to a better place? - /** - * Fill the cache when loading the block-based sst formated db. - * Callers may wish to set this field to false for bulk scans. - * Default: true - * - * @return true if the fill-cache behavior is on. - */ - public boolean fillCache() { - assert(isOwningHandle()); - return fillCache(nativeHandle_); - } - - /** - * Fill the cache when loading the block-based sst formatted db. - * Callers may wish to set this field to false for bulk scans. - * Default: true - * - * @param fillCache if true, then fill-cache behavior will be - * performed. - * @return the reference to the current ReadOptions. - */ - public ReadOptions setFillCache(final boolean fillCache) { - assert(isOwningHandle()); - setFillCache(nativeHandle_, fillCache); - return this; - } - - /** - * Returns the currently assigned Snapshot instance. - * - * @return the Snapshot assigned to this instance. If no Snapshot - * is assigned null. - */ - public Snapshot snapshot() { - assert(isOwningHandle()); - long snapshotHandle = snapshot(nativeHandle_); - if (snapshotHandle != 0) { - return new Snapshot(snapshotHandle); - } - return null; - } - - /** - *

If "snapshot" is non-nullptr, read as of the supplied snapshot - * (which must belong to the DB that is being read and which must - * not have been released). If "snapshot" is nullptr, use an implicit - * snapshot of the state at the beginning of this read operation.

- *

Default: null

- * - * @param snapshot {@link Snapshot} instance - * @return the reference to the current ReadOptions. - */ - public ReadOptions setSnapshot(final Snapshot snapshot) { - assert(isOwningHandle()); - if (snapshot != null) { - setSnapshot(nativeHandle_, snapshot.nativeHandle_); - } else { - setSnapshot(nativeHandle_, 0l); - } - return this; - } - - /** - * Returns the current read tier. - * - * @return the read tier in use, by default {@link ReadTier#READ_ALL_TIER} - */ - public ReadTier readTier() { - assert(isOwningHandle()); - return ReadTier.getReadTier(readTier(nativeHandle_)); - } - - /** - * Specify if this read request should process data that ALREADY - * resides on a particular cache. If the required data is not - * found at the specified cache, then {@link RocksDBException} is thrown. - * - * @param readTier {@link ReadTier} instance - * @return the reference to the current ReadOptions. - */ - public ReadOptions setReadTier(final ReadTier readTier) { - assert(isOwningHandle()); - setReadTier(nativeHandle_, readTier.getValue()); - return this; - } - - /** - * Specify to create a tailing iterator -- a special iterator that has a - * view of the complete database (i.e. it can also be used to read newly - * added data) and is optimized for sequential reads. It will return records - * that were inserted into the database after the creation of the iterator. - * Default: false - * @return true if tailing iterator is enabled. - */ - public boolean tailing() { - assert(isOwningHandle()); - return tailing(nativeHandle_); - } - - /** - * Specify to create a tailing iterator -- a special iterator that has a - * view of the complete database (i.e. it can also be used to read newly - * added data) and is optimized for sequential reads. It will return records - * that were inserted into the database after the creation of the iterator. - * Default: false - * - * @param tailing if true, then tailing iterator will be enabled. - * @return the reference to the current ReadOptions. - */ - public ReadOptions setTailing(final boolean tailing) { - assert(isOwningHandle()); - setTailing(nativeHandle_, tailing); - return this; - } - - /** - * Returns whether managed iterators will be used. - * - * @return the setting of whether managed iterators will be used, - * by default false - * - * @deprecated This options is not used anymore. - */ - @Deprecated - public boolean managed() { - assert(isOwningHandle()); - return managed(nativeHandle_); - } - - /** - * Specify to create a managed iterator -- a special iterator that - * uses less resources by having the ability to free its underlying - * resources on request. - * - * @param managed if true, then managed iterators will be enabled. - * @return the reference to the current ReadOptions. - * - * @deprecated This options is not used anymore. - */ - @Deprecated - public ReadOptions setManaged(final boolean managed) { - assert(isOwningHandle()); - setManaged(nativeHandle_, managed); - return this; - } - - /** - * Returns whether a total seek order will be used - * - * @return the setting of whether a total seek order will be used - */ - public boolean totalOrderSeek() { - assert(isOwningHandle()); - return totalOrderSeek(nativeHandle_); - } - - /** - * Enable a total order seek regardless of index format (e.g. hash index) - * used in the table. Some table format (e.g. plain table) may not support - * this option. - * - * @param totalOrderSeek if true, then total order seek will be enabled. - * @return the reference to the current ReadOptions. - */ - public ReadOptions setTotalOrderSeek(final boolean totalOrderSeek) { - assert(isOwningHandle()); - setTotalOrderSeek(nativeHandle_, totalOrderSeek); - return this; - } - - /** - * Returns whether the iterator only iterates over the same prefix as the seek - * - * @return the setting of whether the iterator only iterates over the same - * prefix as the seek, default is false - */ - public boolean prefixSameAsStart() { - assert(isOwningHandle()); - return prefixSameAsStart(nativeHandle_); - } - - /** - * Enforce that the iterator only iterates over the same prefix as the seek. - * This option is effective only for prefix seeks, i.e. prefix_extractor is - * non-null for the column family and {@link #totalOrderSeek()} is false. - * Unlike iterate_upper_bound, {@link #setPrefixSameAsStart(boolean)} only - * works within a prefix but in both directions. - * - * @param prefixSameAsStart if true, then the iterator only iterates over the - * same prefix as the seek - * @return the reference to the current ReadOptions. - */ - public ReadOptions setPrefixSameAsStart(final boolean prefixSameAsStart) { - assert(isOwningHandle()); - setPrefixSameAsStart(nativeHandle_, prefixSameAsStart); - return this; - } - - /** - * Returns whether the blocks loaded by the iterator will be pinned in memory - * - * @return the setting of whether the blocks loaded by the iterator will be - * pinned in memory - */ - public boolean pinData() { - assert(isOwningHandle()); - return pinData(nativeHandle_); - } - - /** - * Keep the blocks loaded by the iterator pinned in memory as long as the - * iterator is not deleted, If used when reading from tables created with - * BlockBasedTableOptions::use_delta_encoding = false, - * Iterator's property "rocksdb.iterator.is-key-pinned" is guaranteed to - * return 1. - * - * @param pinData if true, the blocks loaded by the iterator will be pinned - * @return the reference to the current ReadOptions. - */ - public ReadOptions setPinData(final boolean pinData) { - assert(isOwningHandle()); - setPinData(nativeHandle_, pinData); - return this; - } - - /** - * If true, when PurgeObsoleteFile is called in CleanupIteratorState, we - * schedule a background job in the flush job queue and delete obsolete files - * in background. - * - * Default: false - * - * @return true when PurgeObsoleteFile is called in CleanupIteratorState - */ - public boolean backgroundPurgeOnIteratorCleanup() { - assert(isOwningHandle()); - return backgroundPurgeOnIteratorCleanup(nativeHandle_); - } - - /** - * If true, when PurgeObsoleteFile is called in CleanupIteratorState, we - * schedule a background job in the flush job queue and delete obsolete files - * in background. - * - * Default: false - * - * @param backgroundPurgeOnIteratorCleanup true when PurgeObsoleteFile is - * called in CleanupIteratorState - * @return the reference to the current ReadOptions. - */ - public ReadOptions setBackgroundPurgeOnIteratorCleanup( - final boolean backgroundPurgeOnIteratorCleanup) { - assert(isOwningHandle()); - setBackgroundPurgeOnIteratorCleanup(nativeHandle_, - backgroundPurgeOnIteratorCleanup); - return this; - } - - /** - * If non-zero, NewIterator will create a new table reader which - * performs reads of the given size. Using a large size (> 2MB) can - * improve the performance of forward iteration on spinning disks. - * - * Default: 0 - * - * @return The readahead size is bytes - */ - public long readaheadSize() { - assert(isOwningHandle()); - return readaheadSize(nativeHandle_); - } - - /** - * If non-zero, NewIterator will create a new table reader which - * performs reads of the given size. Using a large size (> 2MB) can - * improve the performance of forward iteration on spinning disks. - * - * Default: 0 - * - * @param readaheadSize The readahead size is bytes - * @return the reference to the current ReadOptions. - */ - public ReadOptions setReadaheadSize(final long readaheadSize) { - assert(isOwningHandle()); - setReadaheadSize(nativeHandle_, readaheadSize); - return this; - } - - /** - * A threshold for the number of keys that can be skipped before failing an - * iterator seek as incomplete. - * - * @return the number of keys that can be skipped - * before failing an iterator seek as incomplete. - */ - public long maxSkippableInternalKeys() { - assert(isOwningHandle()); - return maxSkippableInternalKeys(nativeHandle_); - } - - /** - * A threshold for the number of keys that can be skipped before failing an - * iterator seek as incomplete. The default value of 0 should be used to - * never fail a request as incomplete, even on skipping too many keys. - * - * Default: 0 - * - * @param maxSkippableInternalKeys the number of keys that can be skipped - * before failing an iterator seek as incomplete. - * - * @return the reference to the current ReadOptions. - */ - public ReadOptions setMaxSkippableInternalKeys( - final long maxSkippableInternalKeys) { - assert(isOwningHandle()); - setMaxSkippableInternalKeys(nativeHandle_, maxSkippableInternalKeys); - return this; - } - - /** - * If true, keys deleted using the DeleteRange() API will be visible to - * readers until they are naturally deleted during compaction. This improves - * read performance in DBs with many range deletions. - * - * Default: false - * - * @return true if keys deleted using the DeleteRange() API will be visible - */ - public boolean ignoreRangeDeletions() { - assert(isOwningHandle()); - return ignoreRangeDeletions(nativeHandle_); - } - - /** - * If true, keys deleted using the DeleteRange() API will be visible to - * readers until they are naturally deleted during compaction. This improves - * read performance in DBs with many range deletions. - * - * Default: false - * - * @param ignoreRangeDeletions true if keys deleted using the DeleteRange() - * API should be visible - * @return the reference to the current ReadOptions. - */ - public ReadOptions setIgnoreRangeDeletions(final boolean ignoreRangeDeletions) { - assert(isOwningHandle()); - setIgnoreRangeDeletions(nativeHandle_, ignoreRangeDeletions); - return this; - } - - /** - * Defines the smallest key at which the backward - * iterator can return an entry. Once the bound is passed, - * {@link RocksIterator#isValid()} will be false. - * - * The lower bound is inclusive i.e. the bound value is a valid - * entry. - * - * If prefix_extractor is not null, the Seek target and `iterate_lower_bound` - * need to have the same prefix. This is because ordering is not guaranteed - * outside of prefix domain. - * - * Default: null - * - * @param iterateLowerBound Slice representing the lower bound - * @return the reference to the current ReadOptions. - */ - public ReadOptions setIterateLowerBound(final AbstractSlice iterateLowerBound) { - assert(isOwningHandle()); - setIterateLowerBound( - nativeHandle_, iterateLowerBound == null ? 0 : iterateLowerBound.getNativeHandle()); - // Hold onto a reference so it doesn't get garbage collected out from under us. - iterateLowerBoundSlice_ = iterateLowerBound; - return this; - } - - /** - * Returns the smallest key at which the backward - * iterator can return an entry. - * - * The lower bound is inclusive i.e. the bound value is a valid entry. - * - * @return the smallest key, or null if there is no lower bound defined. - */ - public Slice iterateLowerBound() { - assert(isOwningHandle()); - final long lowerBoundSliceHandle = iterateLowerBound(nativeHandle_); - if (lowerBoundSliceHandle != 0) { - // Disown the new slice - it's owned by the C++ side of the JNI boundary - // from the perspective of this method. - return new Slice(lowerBoundSliceHandle, false); - } - return null; - } - - /** - * Defines the extent up to which the forward iterator - * can returns entries. Once the bound is reached, - * {@link RocksIterator#isValid()} will be false. - * - * The upper bound is exclusive i.e. the bound value is not a valid entry. - * - * If prefix_extractor is not null, the Seek target and iterate_upper_bound - * need to have the same prefix. This is because ordering is not guaranteed - * outside of prefix domain. - * - * Default: null - * - * @param iterateUpperBound Slice representing the upper bound - * @return the reference to the current ReadOptions. - */ - public ReadOptions setIterateUpperBound(final AbstractSlice iterateUpperBound) { - assert(isOwningHandle()); - setIterateUpperBound( - nativeHandle_, iterateUpperBound == null ? 0 : iterateUpperBound.getNativeHandle()); - // Hold onto a reference so it doesn't get garbage collected out from under us. - iterateUpperBoundSlice_ = iterateUpperBound; - return this; - } - - /** - * Returns the largest key at which the forward - * iterator can return an entry. - * - * The upper bound is exclusive i.e. the bound value is not a valid entry. - * - * @return the largest key, or null if there is no upper bound defined. - */ - public Slice iterateUpperBound() { - assert(isOwningHandle()); - final long upperBoundSliceHandle = iterateUpperBound(nativeHandle_); - if (upperBoundSliceHandle != 0) { - // Disown the new slice - it's owned by the C++ side of the JNI boundary - // from the perspective of this method. - return new Slice(upperBoundSliceHandle, false); - } - return null; - } - - /** - * A callback to determine whether relevant keys for this scan exist in a - * given table based on the table's properties. The callback is passed the - * properties of each table during iteration. If the callback returns false, - * the table will not be scanned. This option only affects Iterators and has - * no impact on point lookups. - * - * Default: null (every table will be scanned) - * - * @param tableFilter the table filter for the callback. - * - * @return the reference to the current ReadOptions. - */ - public ReadOptions setTableFilter(final AbstractTableFilter tableFilter) { - assert(isOwningHandle()); - setTableFilter(nativeHandle_, tableFilter.nativeHandle_); - return this; - } - - /** - * When true, by default use total_order_seek = true, and RocksDB can - * selectively enable prefix seek mode if won't generate a different result - * from total_order_seek, based on seek key, and iterator upper bound. - * Default: false - * - * @return true if auto prefix mode is set. - * - */ - public boolean autoPrefixMode() { - assert (isOwningHandle()); - return autoPrefixMode(nativeHandle_); - } - - /** - * When true, by default use total_order_seek = true, and RocksDB can - * selectively enable prefix seek mode if won't generate a different result - * from total_order_seek, based on seek key, and iterator upper bound. - * Default: false - * @param mode auto prefix mode - * @return the reference to the current ReadOptions. - */ - public ReadOptions setAutoPrefixMode(final boolean mode) { - assert (isOwningHandle()); - setAutoPrefixMode(nativeHandle_, mode); - return this; - } - - /** - * Timestamp of operation. Read should return the latest data visible to the - * specified timestamp. All timestamps of the same database must be of the - * same length and format. The user is responsible for providing a customized - * compare function via Comparator to order >key, timestamp> tuples. - * For iterator, iter_start_ts is the lower bound (older) and timestamp - * serves as the upper bound. Versions of the same record that fall in - * the timestamp range will be returned. If iter_start_ts is nullptr, - * only the most recent version visible to timestamp is returned. - * The user-specified timestamp feature is still under active development, - * and the API is subject to change. - * - * Default: null - * @see #iterStartTs() - * @return Reference to timestamp or null if there is no timestamp defined. - */ - public Slice timestamp() { - assert (isOwningHandle()); - final long timestampSliceHandle = timestamp(nativeHandle_); - if (timestampSliceHandle != 0) { - return new Slice(timestampSliceHandle); - } else { - return null; - } - } - - /** - * Timestamp of operation. Read should return the latest data visible to the - * specified timestamp. All timestamps of the same database must be of the - * same length and format. The user is responsible for providing a customized - * compare function via Comparator to order {@code } tuples. - * For iterator, {@code iter_start_ts} is the lower bound (older) and timestamp - * serves as the upper bound. Versions of the same record that fall in - * the timestamp range will be returned. If iter_start_ts is nullptr, - * only the most recent version visible to timestamp is returned. - * The user-specified timestamp feature is still under active development, - * and the API is subject to change. - * - * Default: null - * @see #setIterStartTs(AbstractSlice) - * @param timestamp Slice representing the timestamp - * @return the reference to the current ReadOptions. - */ - public ReadOptions setTimestamp(final AbstractSlice timestamp) { - assert (isOwningHandle()); - setTimestamp(nativeHandle_, timestamp == null ? 0 : timestamp.getNativeHandle()); - timestampSlice_ = timestamp; - return this; - } - - /** - * Timestamp of operation. Read should return the latest data visible to the - * specified timestamp. All timestamps of the same database must be of the - * same length and format. The user is responsible for providing a customized - * compare function via Comparator to order {@code } tuples. - * For iterator, {@code iter_start_ts} is the lower bound (older) and timestamp - * serves as the upper bound. Versions of the same record that fall in - * the timestamp range will be returned. If iter_start_ts is nullptr, - * only the most recent version visible to timestamp is returned. - * The user-specified timestamp feature is still under active development, - * and the API is subject to change. - * - * Default: null - * @return Reference to lower bound timestamp or null if there is no lower bound timestamp - * defined. - */ - public Slice iterStartTs() { - assert (isOwningHandle()); - final long iterStartTsHandle = iterStartTs(nativeHandle_); - if (iterStartTsHandle != 0) { - return new Slice(iterStartTsHandle); - } else { - return null; - } - } - - /** - * Timestamp of operation. Read should return the latest data visible to the - * specified timestamp. All timestamps of the same database must be of the - * same length and format. The user is responsible for providing a customized - * compare function via Comparator to order {@code } tuples. - * For iterator, {@code iter_start_ts} is the lower bound (older) and timestamp - * serves as the upper bound. Versions of the same record that fall in - * the timestamp range will be returned. If iter_start_ts is nullptr, - * only the most recent version visible to timestamp is returned. - * The user-specified timestamp feature is still under active development, - * and the API is subject to change. - * - * Default: null - * - * @param iterStartTs Reference to lower bound timestamp or null if there is no lower bound - * timestamp defined - * @return the reference to the current ReadOptions. - */ - public ReadOptions setIterStartTs(final AbstractSlice iterStartTs) { - assert (isOwningHandle()); - setIterStartTs(nativeHandle_, iterStartTs == null ? 0 : iterStartTs.getNativeHandle()); - iterStartTs_ = iterStartTs; - return this; - } - - /** - * Deadline for completing an API call (Get/MultiGet/Seek/Next for now) - * in microseconds. - * It should be set to microseconds since epoch, i.e, {@code gettimeofday} or - * equivalent plus allowed duration in microseconds. The best way is to use - * {@code env->NowMicros() + some timeout}. - * This is best efforts. The call may exceed the deadline if there is IO - * involved and the file system doesn't support deadlines, or due to - * checking for deadline periodically rather than for every key if - * processing a batch - * - * @return deadline time in microseconds - */ - public long deadline() { - assert (isOwningHandle()); - return deadline(nativeHandle_); - } - - /** - * Deadline for completing an API call (Get/MultiGet/Seek/Next for now) - * in microseconds. - * It should be set to microseconds since epoch, i.e, {@code gettimeofday} or - * equivalent plus allowed duration in microseconds. The best way is to use - * {@code env->NowMicros() + some timeout}. - * This is best efforts. The call may exceed the deadline if there is IO - * involved and the file system doesn't support deadlines, or due to - * checking for deadline periodically rather than for every key if - * processing a batch - * - * @param deadlineTime deadline time in microseconds. - * @return the reference to the current ReadOptions. - */ - public ReadOptions setDeadline(final long deadlineTime) { - assert (isOwningHandle()); - setDeadline(nativeHandle_, deadlineTime); - return this; - } - - /** - * A timeout in microseconds to be passed to the underlying FileSystem for - * reads. As opposed to deadline, this determines the timeout for each - * individual file read request. If a MultiGet/Get/Seek/Next etc call - * results in multiple reads, each read can last up to io_timeout us. - * @return ioTimeout time in microseconds - */ - public long ioTimeout() { - assert (isOwningHandle()); - return ioTimeout(nativeHandle_); - } - - /** - * A timeout in microseconds to be passed to the underlying FileSystem for - * reads. As opposed to deadline, this determines the timeout for each - * individual file read request. If a MultiGet/Get/Seek/Next etc call - * results in multiple reads, each read can last up to io_timeout us. - * - * @param ioTimeout time in microseconds. - * @return the reference to the current ReadOptions. - */ - public ReadOptions setIoTimeout(final long ioTimeout) { - assert (isOwningHandle()); - setIoTimeout(nativeHandle_, ioTimeout); - return this; - } - - /** - * It limits the maximum cumulative value size of the keys in batch while - * reading through MultiGet. Once the cumulative value size exceeds this - * soft limit then all the remaining keys are returned with status Aborted. - * - * Default: {@code std::numeric_limits::max()} - * @return actual valueSizeSofLimit - */ - public long valueSizeSoftLimit() { - assert (isOwningHandle()); - return valueSizeSoftLimit(nativeHandle_); - } - - /** - * It limits the maximum cumulative value size of the keys in batch while - * reading through MultiGet. Once the cumulative value size exceeds this - * soft limit then all the remaining keys are returned with status Aborted. - * - * Default: {@code std::numeric_limits::max()} - * - * @param valueSizeSoftLimit the maximum cumulative value size of the keys - * @return the reference to the current ReadOptions - */ - public ReadOptions setValueSizeSoftLimit(final long valueSizeSoftLimit) { - assert (isOwningHandle()); - setValueSizeSoftLimit(nativeHandle_, valueSizeSoftLimit); - return this; - } - - // instance variables - // NOTE: If you add new member variables, please update the copy constructor above! - // - // Hold a reference to any iterate lower or upper bound that was set on this - // object until we're destroyed or it's overwritten. That way the caller can - // freely leave scope without us losing the Java Slice object, which during - // close() would also reap its associated rocksdb::Slice native object since - // it's possibly (likely) to be an owning handle. - private AbstractSlice iterateLowerBoundSlice_; - private AbstractSlice iterateUpperBoundSlice_; - private AbstractSlice timestampSlice_; - private AbstractSlice iterStartTs_; - - private native static long newReadOptions(); - private native static long newReadOptions(final boolean verifyChecksums, - final boolean fillCache); - private native static long copyReadOptions(long handle); - @Override protected final native void disposeInternal(final long handle); - - private native boolean verifyChecksums(long handle); - private native void setVerifyChecksums(long handle, boolean verifyChecksums); - private native boolean fillCache(long handle); - private native void setFillCache(long handle, boolean fillCache); - private native long snapshot(long handle); - private native void setSnapshot(long handle, long snapshotHandle); - private native byte readTier(long handle); - private native void setReadTier(long handle, byte readTierValue); - private native boolean tailing(long handle); - private native void setTailing(long handle, boolean tailing); - private native boolean managed(long handle); - private native void setManaged(long handle, boolean managed); - private native boolean totalOrderSeek(long handle); - private native void setTotalOrderSeek(long handle, boolean totalOrderSeek); - private native boolean prefixSameAsStart(long handle); - private native void setPrefixSameAsStart(long handle, boolean prefixSameAsStart); - private native boolean pinData(long handle); - private native void setPinData(long handle, boolean pinData); - private native boolean backgroundPurgeOnIteratorCleanup(final long handle); - private native void setBackgroundPurgeOnIteratorCleanup(final long handle, - final boolean backgroundPurgeOnIteratorCleanup); - private native long readaheadSize(final long handle); - private native void setReadaheadSize(final long handle, - final long readaheadSize); - private native long maxSkippableInternalKeys(final long handle); - private native void setMaxSkippableInternalKeys(final long handle, - final long maxSkippableInternalKeys); - private native boolean ignoreRangeDeletions(final long handle); - private native void setIgnoreRangeDeletions(final long handle, - final boolean ignoreRangeDeletions); - private native void setIterateUpperBound(final long handle, - final long upperBoundSliceHandle); - private native long iterateUpperBound(final long handle); - private native void setIterateLowerBound(final long handle, - final long lowerBoundSliceHandle); - private native long iterateLowerBound(final long handle); - private native void setTableFilter(final long handle, final long tableFilterHandle); - private native boolean autoPrefixMode(final long handle); - private native void setAutoPrefixMode(final long handle, final boolean autoPrefixMode); - private native long timestamp(final long handle); - private native void setTimestamp(final long handle, final long timestampSliceHandle); - private native long iterStartTs(final long handle); - private native void setIterStartTs(final long handle, final long iterStartTsHandle); - private native long deadline(final long handle); - private native void setDeadline(final long handle, final long deadlineTime); - private native long ioTimeout(final long handle); - private native void setIoTimeout(final long handle, final long ioTimeout); - private native long valueSizeSoftLimit(final long handle); - private native void setValueSizeSoftLimit(final long handle, final long softLimit); -} diff --git a/java/src/main/java/org/rocksdb/ReadTier.java b/java/src/main/java/org/rocksdb/ReadTier.java deleted file mode 100644 index 78f83f6ad..000000000 --- a/java/src/main/java/org/rocksdb/ReadTier.java +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * RocksDB {@link ReadOptions} read tiers. - */ -public enum ReadTier { - READ_ALL_TIER((byte)0), - BLOCK_CACHE_TIER((byte)1), - PERSISTED_TIER((byte)2), - MEMTABLE_TIER((byte)3); - - private final byte value; - - ReadTier(final byte value) { - this.value = value; - } - - /** - * Returns the byte value of the enumerations value - * - * @return byte representation - */ - public byte getValue() { - return value; - } - - /** - * Get ReadTier by byte value. - * - * @param value byte representation of ReadTier. - * - * @return {@link org.rocksdb.ReadTier} instance or null. - * @throws java.lang.IllegalArgumentException if an invalid - * value is provided. - */ - public static ReadTier getReadTier(final byte value) { - for (final ReadTier readTier : ReadTier.values()) { - if (readTier.getValue() == value){ - return readTier; - } - } - throw new IllegalArgumentException("Illegal value provided for ReadTier."); - } -} diff --git a/java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java b/java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java deleted file mode 100644 index 6ee81d858..000000000 --- a/java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Just a Java wrapper around EmptyValueCompactionFilter implemented in C++ - */ -public class RemoveEmptyValueCompactionFilter - extends AbstractCompactionFilter { - public RemoveEmptyValueCompactionFilter() { - super(createNewRemoveEmptyValueCompactionFilter0()); - } - - private native static long createNewRemoveEmptyValueCompactionFilter0(); -} diff --git a/java/src/main/java/org/rocksdb/RestoreOptions.java b/java/src/main/java/org/rocksdb/RestoreOptions.java deleted file mode 100644 index 54dc0e61c..000000000 --- a/java/src/main/java/org/rocksdb/RestoreOptions.java +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * RestoreOptions to control the behavior of restore. - * - * Note that dispose() must be called before this instance become out-of-scope - * to release the allocated memory in c++. - * - */ -public class RestoreOptions extends RocksObject { - /** - * Constructor - * - * @param keepLogFiles If true, restore won't overwrite the existing log files - * in wal_dir. It will also move all log files from archive directory to - * wal_dir. Use this option in combination with - * BackupEngineOptions::backup_log_files = false for persisting in-memory - * databases. - * Default: false - */ - public RestoreOptions(final boolean keepLogFiles) { - super(newRestoreOptions(keepLogFiles)); - } - - private native static long newRestoreOptions(boolean keepLogFiles); - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/ReusedSynchronisationType.java b/java/src/main/java/org/rocksdb/ReusedSynchronisationType.java deleted file mode 100644 index 2709a5d59..000000000 --- a/java/src/main/java/org/rocksdb/ReusedSynchronisationType.java +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -/** - * Determines the type of synchronisation primitive used - * in native code. - */ -public enum ReusedSynchronisationType { - /** - * Standard mutex. - */ - MUTEX((byte)0x0), - - /** - * Use adaptive mutex, which spins in the user space before resorting - * to kernel. This could reduce context switch when the mutex is not - * heavily contended. However, if the mutex is hot, we could end up - * wasting spin time. - */ - ADAPTIVE_MUTEX((byte)0x1), - - /** - * There is a reused buffer per-thread. - */ - THREAD_LOCAL((byte)0x2); - - private final byte value; - - ReusedSynchronisationType(final byte value) { - this.value = value; - } - - /** - * Returns the byte value of the enumerations value - * - * @return byte representation - */ - public byte getValue() { - return value; - } - - /** - * Get ReusedSynchronisationType by byte value. - * - * @param value byte representation of ReusedSynchronisationType. - * - * @return {@link org.rocksdb.ReusedSynchronisationType} instance. - * @throws java.lang.IllegalArgumentException if an invalid - * value is provided. - */ - public static ReusedSynchronisationType getReusedSynchronisationType( - final byte value) { - for (final ReusedSynchronisationType reusedSynchronisationType - : ReusedSynchronisationType.values()) { - if (reusedSynchronisationType.getValue() == value) { - return reusedSynchronisationType; - } - } - throw new IllegalArgumentException( - "Illegal value provided for ReusedSynchronisationType."); - } -} diff --git a/java/src/main/java/org/rocksdb/RocksCallbackObject.java b/java/src/main/java/org/rocksdb/RocksCallbackObject.java deleted file mode 100644 index 8d7a867ee..000000000 --- a/java/src/main/java/org/rocksdb/RocksCallbackObject.java +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.List; - -/** - * RocksCallbackObject is similar to {@link RocksObject} but varies - * in its construction as it is designed for Java objects which have functions - * which are called from C++ via JNI. - * - * RocksCallbackObject is the base-class any RocksDB classes that acts as a - * callback from some underlying underlying native C++ {@code rocksdb} object. - * - * The use of {@code RocksObject} should always be preferred over - * {@link RocksCallbackObject} if callbacks are not required. - */ -public abstract class RocksCallbackObject extends - AbstractImmutableNativeReference { - - protected final long nativeHandle_; - - protected RocksCallbackObject(final long... nativeParameterHandles) { - super(true); - this.nativeHandle_ = initializeNative(nativeParameterHandles); - } - - /** - * Given a list of RocksCallbackObjects, it returns a list - * of the native handles of the underlying objects. - * - * @param objectList the rocks callback objects - * - * @return the native handles - */ - static /* @Nullable */ long[] toNativeHandleList( - /* @Nullable */ final List objectList) { - if (objectList == null) { - return null; - } - final int len = objectList.size(); - final long[] handleList = new long[len]; - for (int i = 0; i < len; i++) { - handleList[i] = objectList.get(i).nativeHandle_; - } - return handleList; - } - - /** - * Construct the Native C++ object which will callback - * to our object methods - * - * @param nativeParameterHandles An array of native handles for any parameter - * objects that are needed during construction - * - * @return The native handle of the C++ object which will callback to us - */ - protected abstract long initializeNative( - final long... nativeParameterHandles); - - /** - * Deletes underlying C++ native callback object pointer - */ - @Override - protected void disposeInternal() { - disposeInternal(nativeHandle_); - } - - private native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/RocksDB.java b/java/src/main/java/org/rocksdb/RocksDB.java deleted file mode 100644 index 77484288f..000000000 --- a/java/src/main/java/org/rocksdb/RocksDB.java +++ /dev/null @@ -1,4694 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static java.nio.charset.StandardCharsets.UTF_8; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicReference; -import org.rocksdb.util.Environment; - -/** - * A RocksDB is a persistent ordered map from keys to values. It is safe for - * concurrent access from multiple threads without any external synchronization. - * All methods of this class could potentially throw RocksDBException, which - * indicates sth wrong at the RocksDB library side and the call failed. - */ -public class RocksDB extends RocksObject { - public static final byte[] DEFAULT_COLUMN_FAMILY = "default".getBytes(UTF_8); - public static final int NOT_FOUND = -1; - - private enum LibraryState { - NOT_LOADED, - LOADING, - LOADED - } - - private static final AtomicReference libraryLoaded = - new AtomicReference<>(LibraryState.NOT_LOADED); - - static { - RocksDB.loadLibrary(); - } - - private final List ownedColumnFamilyHandles = new ArrayList<>(); - - /** - * Loads the necessary library files. - * Calling this method twice will have no effect. - * By default the method extracts the shared library for loading at - * java.io.tmpdir, however, you can override this temporary location by - * setting the environment variable ROCKSDB_SHAREDLIB_DIR. - */ - public static void loadLibrary() { - if (libraryLoaded.get() == LibraryState.LOADED) { - return; - } - - if (libraryLoaded.compareAndSet(LibraryState.NOT_LOADED, - LibraryState.LOADING)) { - final String tmpDir = System.getenv("ROCKSDB_SHAREDLIB_DIR"); - // loading possibly necessary libraries. - for (final CompressionType compressionType : CompressionType.values()) { - try { - if (compressionType.getLibraryName() != null) { - System.loadLibrary(compressionType.getLibraryName()); - } - } catch (final UnsatisfiedLinkError e) { - // since it may be optional, we ignore its loading failure here. - } - } - try { - NativeLibraryLoader.getInstance().loadLibrary(tmpDir); - } catch (final IOException e) { - libraryLoaded.set(LibraryState.NOT_LOADED); - throw new RuntimeException("Unable to load the RocksDB shared library", - e); - } - - final int encodedVersion = version(); - version = Version.fromEncodedVersion(encodedVersion); - - libraryLoaded.set(LibraryState.LOADED); - return; - } - - while (libraryLoaded.get() == LibraryState.LOADING) { - try { - Thread.sleep(10); - } catch(final InterruptedException e) { - //ignore - } - } - } - - /** - * Tries to load the necessary library files from the given list of - * directories. - * - * @param paths a list of strings where each describes a directory - * of a library. - */ - public static void loadLibrary(final List paths) { - if (libraryLoaded.get() == LibraryState.LOADED) { - return; - } - - if (libraryLoaded.compareAndSet(LibraryState.NOT_LOADED, - LibraryState.LOADING)) { - for (final CompressionType compressionType : CompressionType.values()) { - if (compressionType.equals(CompressionType.NO_COMPRESSION)) { - continue; - } - for (final String path : paths) { - try { - System.load(path + "/" + Environment.getSharedLibraryFileName( - compressionType.getLibraryName())); - break; - } catch (final UnsatisfiedLinkError e) { - // since they are optional, we ignore loading fails. - } - } - } - boolean success = false; - UnsatisfiedLinkError err = null; - for (final String path : paths) { - try { - System.load(path + "/" + - Environment.getJniLibraryFileName("rocksdbjni")); - success = true; - break; - } catch (final UnsatisfiedLinkError e) { - err = e; - } - } - if (!success) { - libraryLoaded.set(LibraryState.NOT_LOADED); - throw err; - } - - final int encodedVersion = version(); - version = Version.fromEncodedVersion(encodedVersion); - - libraryLoaded.set(LibraryState.LOADED); - return; - } - - while (libraryLoaded.get() == LibraryState.LOADING) { - try { - Thread.sleep(10); - } catch(final InterruptedException e) { - //ignore - } - } - } - - public static Version rocksdbVersion() { - return version; - } - - /** - * Private constructor. - * - * @param nativeHandle The native handle of the C++ RocksDB object - */ - protected RocksDB(final long nativeHandle) { - super(nativeHandle); - } - - /** - * The factory constructor of RocksDB that opens a RocksDB instance given - * the path to the database using the default options w/ createIfMissing - * set to true. - * - * @param path the path to the rocksdb. - * @return a {@link RocksDB} instance on success, null if the specified - * {@link RocksDB} can not be opened. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * @see Options#setCreateIfMissing(boolean) - */ - public static RocksDB open(final String path) throws RocksDBException { - final Options options = new Options(); - options.setCreateIfMissing(true); - return open(options, path); - } - - /** - * The factory constructor of RocksDB that opens a RocksDB instance given - * the path to the database using the specified options and db path and a list - * of column family names. - *

- * If opened in read write mode every existing column family name must be - * passed within the list to this method.

- *

- * If opened in read-only mode only a subset of existing column families must - * be passed to this method.

- *

- * Options instance *should* not be disposed before all DBs using this options - * instance have been closed. If user doesn't call options dispose explicitly, - * then this options instance will be GC'd automatically

- *

- * ColumnFamily handles are disposed when the RocksDB instance is disposed. - *

- * - * @param path the path to the rocksdb. - * @param columnFamilyDescriptors list of column family descriptors - * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances - * on open. - * @return a {@link RocksDB} instance on success, null if the specified - * {@link RocksDB} can not be opened. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * @see DBOptions#setCreateIfMissing(boolean) - */ - public static RocksDB open(final String path, - final List columnFamilyDescriptors, - final List columnFamilyHandles) - throws RocksDBException { - final DBOptions options = new DBOptions(); - return open(options, path, columnFamilyDescriptors, columnFamilyHandles); - } - - /** - * The factory constructor of RocksDB that opens a RocksDB instance given - * the path to the database using the specified options and db path. - * - *

- * Options instance *should* not be disposed before all DBs using this options - * instance have been closed. If user doesn't call options dispose explicitly, - * then this options instance will be GC'd automatically.

- *

- * Options instance can be re-used to open multiple DBs if DB statistics is - * not used. If DB statistics are required, then its recommended to open DB - * with new Options instance as underlying native statistics instance does not - * use any locks to prevent concurrent updates.

- * - * @param options {@link org.rocksdb.Options} instance. - * @param path the path to the rocksdb. - * @return a {@link RocksDB} instance on success, null if the specified - * {@link RocksDB} can not be opened. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * - * @see Options#setCreateIfMissing(boolean) - */ - public static RocksDB open(final Options options, final String path) - throws RocksDBException { - // when non-default Options is used, keeping an Options reference - // in RocksDB can prevent Java to GC during the life-time of - // the currently-created RocksDB. - final RocksDB db = new RocksDB(open(options.nativeHandle_, path)); - db.storeOptionsInstance(options); - return db; - } - - /** - * The factory constructor of RocksDB that opens a RocksDB instance given - * the path to the database using the specified options and db path and a list - * of column family names. - *

- * If opened in read write mode every existing column family name must be - * passed within the list to this method.

- *

- * If opened in read-only mode only a subset of existing column families must - * be passed to this method.

- *

- * Options instance *should* not be disposed before all DBs using this options - * instance have been closed. If user doesn't call options dispose explicitly, - * then this options instance will be GC'd automatically.

- *

- * Options instance can be re-used to open multiple DBs if DB statistics is - * not used. If DB statistics are required, then its recommended to open DB - * with new Options instance as underlying native statistics instance does not - * use any locks to prevent concurrent updates.

- *

- * ColumnFamily handles are disposed when the RocksDB instance is disposed. - *

- * - * @param options {@link org.rocksdb.DBOptions} instance. - * @param path the path to the rocksdb. - * @param columnFamilyDescriptors list of column family descriptors - * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances - * on open. - * @return a {@link RocksDB} instance on success, null if the specified - * {@link RocksDB} can not be opened. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * - * @see DBOptions#setCreateIfMissing(boolean) - */ - public static RocksDB open(final DBOptions options, final String path, - final List columnFamilyDescriptors, - final List columnFamilyHandles) - throws RocksDBException { - - final byte[][] cfNames = new byte[columnFamilyDescriptors.size()][]; - final long[] cfOptionHandles = new long[columnFamilyDescriptors.size()]; - for (int i = 0; i < columnFamilyDescriptors.size(); i++) { - final ColumnFamilyDescriptor cfDescriptor = columnFamilyDescriptors - .get(i); - cfNames[i] = cfDescriptor.getName(); - cfOptionHandles[i] = cfDescriptor.getOptions().nativeHandle_; - } - - final long[] handles = open(options.nativeHandle_, path, cfNames, - cfOptionHandles); - final RocksDB db = new RocksDB(handles[0]); - db.storeOptionsInstance(options); - - for (int i = 1; i < handles.length; i++) { - final ColumnFamilyHandle columnFamilyHandle = new ColumnFamilyHandle(db, handles[i]); - columnFamilyHandles.add(columnFamilyHandle); - } - - db.ownedColumnFamilyHandles.addAll(columnFamilyHandles); - - return db; - } - - /** - * The factory constructor of RocksDB that opens a RocksDB instance in - * Read-Only mode given the path to the database using the default - * options. - * - * @param path the path to the RocksDB. - * @return a {@link RocksDB} instance on success, null if the specified - * {@link RocksDB} can not be opened. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public static RocksDB openReadOnly(final String path) - throws RocksDBException { - // This allows to use the rocksjni default Options instead of - // the c++ one. - final Options options = new Options(); - return openReadOnly(options, path); - } - - /** - * The factory constructor of RocksDB that opens a RocksDB instance in - * Read-Only mode given the path to the database using the specified - * options and db path. - * - * Options instance *should* not be disposed before all DBs using this options - * instance have been closed. If user doesn't call options dispose explicitly, - * then this options instance will be GC'd automatically. - * - * @param options {@link Options} instance. - * @param path the path to the RocksDB. - * @return a {@link RocksDB} instance on success, null if the specified - * {@link RocksDB} can not be opened. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public static RocksDB openReadOnly(final Options options, final String path) - throws RocksDBException { - return openReadOnly(options, path, false); - } - - /** - * The factory constructor of RocksDB that opens a RocksDB instance in - * Read-Only mode given the path to the database using the specified - * options and db path. - * - * Options instance *should* not be disposed before all DBs using this options - * instance have been closed. If user doesn't call options dispose explicitly, - * then this options instance will be GC'd automatically. - * - * @param options {@link Options} instance. - * @param path the path to the RocksDB. - * @param errorIfWalFileExists true to raise an error when opening the db - * if a Write Ahead Log file exists, false otherwise. - * @return a {@link RocksDB} instance on success, null if the specified - * {@link RocksDB} can not be opened. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public static RocksDB openReadOnly(final Options options, final String path, - final boolean errorIfWalFileExists) throws RocksDBException { - // when non-default Options is used, keeping an Options reference - // in RocksDB can prevent Java to GC during the life-time of - // the currently-created RocksDB. - final RocksDB db = new RocksDB(openROnly(options.nativeHandle_, path, errorIfWalFileExists)); - db.storeOptionsInstance(options); - return db; - } - - /** - * The factory constructor of RocksDB that opens a RocksDB instance in - * Read-Only mode given the path to the database using the default - * options. - * - * @param path the path to the RocksDB. - * @param columnFamilyDescriptors list of column family descriptors - * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances - * on open. - * @return a {@link RocksDB} instance on success, null if the specified - * {@link RocksDB} can not be opened. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public static RocksDB openReadOnly(final String path, - final List columnFamilyDescriptors, - final List columnFamilyHandles) - throws RocksDBException { - // This allows to use the rocksjni default Options instead of - // the c++ one. - final DBOptions options = new DBOptions(); - return openReadOnly(options, path, columnFamilyDescriptors, columnFamilyHandles, false); - } - - /** - * The factory constructor of RocksDB that opens a RocksDB instance in - * Read-Only mode given the path to the database using the specified - * options and db path. - * - *

This open method allows to open RocksDB using a subset of available - * column families

- *

Options instance *should* not be disposed before all DBs using this - * options instance have been closed. If user doesn't call options dispose - * explicitly,then this options instance will be GC'd automatically.

- * - * @param options {@link DBOptions} instance. - * @param path the path to the RocksDB. - * @param columnFamilyDescriptors list of column family descriptors - * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances - * on open. - * @return a {@link RocksDB} instance on success, null if the specified - * {@link RocksDB} can not be opened. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public static RocksDB openReadOnly(final DBOptions options, final String path, - final List columnFamilyDescriptors, - final List columnFamilyHandles) throws RocksDBException { - return openReadOnly(options, path, columnFamilyDescriptors, columnFamilyHandles, false); - } - - /** - * The factory constructor of RocksDB that opens a RocksDB instance in - * Read-Only mode given the path to the database using the specified - * options and db path. - * - *

This open method allows to open RocksDB using a subset of available - * column families

- *

Options instance *should* not be disposed before all DBs using this - * options instance have been closed. If user doesn't call options dispose - * explicitly,then this options instance will be GC'd automatically.

- * - * @param options {@link DBOptions} instance. - * @param path the path to the RocksDB. - * @param columnFamilyDescriptors list of column family descriptors - * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances - * on open. - * @param errorIfWalFileExists true to raise an error when opening the db - * if a Write Ahead Log file exists, false otherwise. - * @return a {@link RocksDB} instance on success, null if the specified - * {@link RocksDB} can not be opened. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public static RocksDB openReadOnly(final DBOptions options, final String path, - final List columnFamilyDescriptors, - final List columnFamilyHandles, final boolean errorIfWalFileExists) - throws RocksDBException { - // when non-default Options is used, keeping an Options reference - // in RocksDB can prevent Java to GC during the life-time of - // the currently-created RocksDB. - - final byte[][] cfNames = new byte[columnFamilyDescriptors.size()][]; - final long[] cfOptionHandles = new long[columnFamilyDescriptors.size()]; - for (int i = 0; i < columnFamilyDescriptors.size(); i++) { - final ColumnFamilyDescriptor cfDescriptor = columnFamilyDescriptors - .get(i); - cfNames[i] = cfDescriptor.getName(); - cfOptionHandles[i] = cfDescriptor.getOptions().nativeHandle_; - } - - final long[] handles = - openROnly(options.nativeHandle_, path, cfNames, cfOptionHandles, errorIfWalFileExists); - final RocksDB db = new RocksDB(handles[0]); - db.storeOptionsInstance(options); - - for (int i = 1; i < handles.length; i++) { - final ColumnFamilyHandle columnFamilyHandle = new ColumnFamilyHandle(db, handles[i]); - columnFamilyHandles.add(columnFamilyHandle); - } - - db.ownedColumnFamilyHandles.addAll(columnFamilyHandles); - - return db; - } - - /** - * Open DB as secondary instance with only the default column family. - * - * The secondary instance can dynamically tail the MANIFEST of - * a primary that must have already been created. User can call - * {@link #tryCatchUpWithPrimary()} to make the secondary instance catch up - * with primary (WAL tailing is NOT supported now) whenever the user feels - * necessary. Column families created by the primary after the secondary - * instance starts are currently ignored by the secondary instance. - * Column families opened by secondary and dropped by the primary will be - * dropped by secondary as well. However the user of the secondary instance - * can still access the data of such dropped column family as long as they - * do not destroy the corresponding column family handle. - * WAL tailing is not supported at present, but will arrive soon. - * - * @param options the options to open the secondary instance. - * @param path the path to the primary RocksDB instance. - * @param secondaryPath points to a directory where the secondary instance - * stores its info log - * - * @return a {@link RocksDB} instance on success, null if the specified - * {@link RocksDB} can not be opened. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public static RocksDB openAsSecondary(final Options options, final String path, - final String secondaryPath) throws RocksDBException { - // when non-default Options is used, keeping an Options reference - // in RocksDB can prevent Java to GC during the life-time of - // the currently-created RocksDB. - final RocksDB db = new RocksDB(openAsSecondary(options.nativeHandle_, path, secondaryPath)); - db.storeOptionsInstance(options); - return db; - } - - /** - * Open DB as secondary instance with column families. - * You can open a subset of column families in secondary mode. - * - * The secondary instance can dynamically tail the MANIFEST of - * a primary that must have already been created. User can call - * {@link #tryCatchUpWithPrimary()} to make the secondary instance catch up - * with primary (WAL tailing is NOT supported now) whenever the user feels - * necessary. Column families created by the primary after the secondary - * instance starts are currently ignored by the secondary instance. - * Column families opened by secondary and dropped by the primary will be - * dropped by secondary as well. However the user of the secondary instance - * can still access the data of such dropped column family as long as they - * do not destroy the corresponding column family handle. - * WAL tailing is not supported at present, but will arrive soon. - * - * @param options the options to open the secondary instance. - * @param path the path to the primary RocksDB instance. - * @param secondaryPath points to a directory where the secondary instance - * stores its info log. - * @param columnFamilyDescriptors list of column family descriptors - * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances - * on open. - * - * @return a {@link RocksDB} instance on success, null if the specified - * {@link RocksDB} can not be opened. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public static RocksDB openAsSecondary(final DBOptions options, final String path, - final String secondaryPath, final List columnFamilyDescriptors, - final List columnFamilyHandles) throws RocksDBException { - // when non-default Options is used, keeping an Options reference - // in RocksDB can prevent Java to GC during the life-time of - // the currently-created RocksDB. - - final byte[][] cfNames = new byte[columnFamilyDescriptors.size()][]; - final long[] cfOptionHandles = new long[columnFamilyDescriptors.size()]; - for (int i = 0; i < columnFamilyDescriptors.size(); i++) { - final ColumnFamilyDescriptor cfDescriptor = columnFamilyDescriptors.get(i); - cfNames[i] = cfDescriptor.getName(); - cfOptionHandles[i] = cfDescriptor.getOptions().nativeHandle_; - } - - final long[] handles = - openAsSecondary(options.nativeHandle_, path, secondaryPath, cfNames, cfOptionHandles); - final RocksDB db = new RocksDB(handles[0]); - db.storeOptionsInstance(options); - - for (int i = 1; i < handles.length; i++) { - final ColumnFamilyHandle columnFamilyHandle = new ColumnFamilyHandle(db, handles[i]); - columnFamilyHandles.add(columnFamilyHandle); - } - - db.ownedColumnFamilyHandles.addAll(columnFamilyHandles); - - return db; - } - - /** - * This is similar to {@link #close()} except that it - * throws an exception if any error occurs. - * - * This will not fsync the WAL files. - * If syncing is required, the caller must first call {@link #syncWal()} - * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch - * with {@link WriteOptions#setSync(boolean)} set to true. - * - * See also {@link #close()}. - * - * @throws RocksDBException if an error occurs whilst closing. - */ - public void closeE() throws RocksDBException { - for (final ColumnFamilyHandle columnFamilyHandle : ownedColumnFamilyHandles) { - columnFamilyHandle.close(); - } - ownedColumnFamilyHandles.clear(); - - if (owningHandle_.compareAndSet(true, false)) { - try { - closeDatabase(nativeHandle_); - } finally { - disposeInternal(); - } - } - } - - /** - * This is similar to {@link #closeE()} except that it - * silently ignores any errors. - * - * This will not fsync the WAL files. - * If syncing is required, the caller must first call {@link #syncWal()} - * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch - * with {@link WriteOptions#setSync(boolean)} set to true. - * - * See also {@link #close()}. - */ - @Override - public void close() { - for (final ColumnFamilyHandle columnFamilyHandle : ownedColumnFamilyHandles) { - columnFamilyHandle.close(); - } - ownedColumnFamilyHandles.clear(); - - if (owningHandle_.compareAndSet(true, false)) { - try { - closeDatabase(nativeHandle_); - } catch (final RocksDBException e) { - // silently ignore the error report - } finally { - disposeInternal(); - } - } - } - - /** - * Static method to determine all available column families for a - * rocksdb database identified by path - * - * @param options Options for opening the database - * @param path Absolute path to rocksdb database - * @return List<byte[]> List containing the column family names - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public static List listColumnFamilies(final Options options, - final String path) throws RocksDBException { - return Arrays.asList(RocksDB.listColumnFamilies(options.nativeHandle_, - path)); - } - - /** - * Creates a new column family with the name columnFamilyName and - * allocates a ColumnFamilyHandle within an internal structure. - * The ColumnFamilyHandle is automatically disposed with DB disposal. - * - * @param columnFamilyDescriptor column family to be created. - * @return {@link org.rocksdb.ColumnFamilyHandle} instance. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public ColumnFamilyHandle createColumnFamily( - final ColumnFamilyDescriptor columnFamilyDescriptor) - throws RocksDBException { - final ColumnFamilyHandle columnFamilyHandle = new ColumnFamilyHandle(this, - createColumnFamily(nativeHandle_, columnFamilyDescriptor.getName(), - columnFamilyDescriptor.getName().length, - columnFamilyDescriptor.getOptions().nativeHandle_)); - ownedColumnFamilyHandles.add(columnFamilyHandle); - return columnFamilyHandle; - } - - /** - * Bulk create column families with the same column family options. - * - * @param columnFamilyOptions the options for the column families. - * @param columnFamilyNames the names of the column families. - * - * @return the handles to the newly created column families. - * - * @throws RocksDBException if an error occurs whilst creating - * the column families - */ - public List createColumnFamilies( - final ColumnFamilyOptions columnFamilyOptions, - final List columnFamilyNames) throws RocksDBException { - final byte[][] cfNames = columnFamilyNames.toArray( - new byte[0][]); - final long[] cfHandles = createColumnFamilies(nativeHandle_, - columnFamilyOptions.nativeHandle_, cfNames); - final List columnFamilyHandles = - new ArrayList<>(cfHandles.length); - for (int i = 0; i < cfHandles.length; i++) { - final ColumnFamilyHandle columnFamilyHandle = new ColumnFamilyHandle(this, cfHandles[i]); - columnFamilyHandles.add(columnFamilyHandle); - } - ownedColumnFamilyHandles.addAll(columnFamilyHandles); - return columnFamilyHandles; - } - - /** - * Bulk create column families with the same column family options. - * - * @param columnFamilyDescriptors the descriptions of the column families. - * - * @return the handles to the newly created column families. - * - * @throws RocksDBException if an error occurs whilst creating - * the column families - */ - public List createColumnFamilies( - final List columnFamilyDescriptors) - throws RocksDBException { - final long[] cfOptsHandles = new long[columnFamilyDescriptors.size()]; - final byte[][] cfNames = new byte[columnFamilyDescriptors.size()][]; - for (int i = 0; i < columnFamilyDescriptors.size(); i++) { - final ColumnFamilyDescriptor columnFamilyDescriptor - = columnFamilyDescriptors.get(i); - cfOptsHandles[i] = columnFamilyDescriptor.getOptions().nativeHandle_; - cfNames[i] = columnFamilyDescriptor.getName(); - } - final long[] cfHandles = createColumnFamilies(nativeHandle_, - cfOptsHandles, cfNames); - final List columnFamilyHandles = - new ArrayList<>(cfHandles.length); - for (int i = 0; i < cfHandles.length; i++) { - final ColumnFamilyHandle columnFamilyHandle = new ColumnFamilyHandle(this, cfHandles[i]); - columnFamilyHandles.add(columnFamilyHandle); - } - ownedColumnFamilyHandles.addAll(columnFamilyHandles); - return columnFamilyHandles; - } - - /** - * Drops the column family specified by {@code columnFamilyHandle}. This call - * only records a drop record in the manifest and prevents the column - * family from flushing and compacting. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void dropColumnFamily(final ColumnFamilyHandle columnFamilyHandle) - throws RocksDBException { - dropColumnFamily(nativeHandle_, columnFamilyHandle.nativeHandle_); - } - - // Bulk drop column families. This call only records drop records in the - // manifest and prevents the column families from flushing and compacting. - // In case of error, the request may succeed partially. User may call - // ListColumnFamilies to check the result. - public void dropColumnFamilies( - final List columnFamilies) throws RocksDBException { - final long[] cfHandles = new long[columnFamilies.size()]; - for (int i = 0; i < columnFamilies.size(); i++) { - cfHandles[i] = columnFamilies.get(i).nativeHandle_; - } - dropColumnFamilies(nativeHandle_, cfHandles); - } - - /** - * Deletes native column family handle of given {@link ColumnFamilyHandle} Java object - * and removes reference from {@link RocksDB#ownedColumnFamilyHandles}. - * - * @param columnFamilyHandle column family handle object. - */ - public void destroyColumnFamilyHandle(final ColumnFamilyHandle columnFamilyHandle) { - for (int i = 0; i < ownedColumnFamilyHandles.size(); ++i) { - final ColumnFamilyHandle ownedHandle = ownedColumnFamilyHandles.get(i); - if (ownedHandle.equals(columnFamilyHandle)) { - columnFamilyHandle.close(); - ownedColumnFamilyHandles.remove(i); - return; - } - } - } - - /** - * Set the database entry for "key" to "value". - * - * @param key the specified key to be inserted. - * @param value the value associated with the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void put(final byte[] key, final byte[] value) - throws RocksDBException { - put(nativeHandle_, key, 0, key.length, value, 0, value.length); - } - - /** - * Set the database entry for "key" to "value". - * - * @param key The specified key to be inserted - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than ("key".length - offset) - * @param value the value associated with the specified key - * @param vOffset the offset of the "value" array to be used, must be - * non-negative and no longer than "key".length - * @param vLen the length of the "value" array to be used, must be - * non-negative and no larger than ("value".length - offset) - * - * @throws RocksDBException thrown if errors happens in underlying native - * library. - * @throws IndexOutOfBoundsException if an offset or length is out of bounds - */ - public void put(final byte[] key, final int offset, final int len, - final byte[] value, final int vOffset, final int vLen) - throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); - put(nativeHandle_, key, offset, len, value, vOffset, vLen); - } - - /** - * Set the database entry for "key" to "value" in the specified - * column family. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param key the specified key to be inserted. - * @param value the value associated with the specified key. - * - * throws IllegalArgumentException if column family is not present - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void put(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key, final byte[] value) throws RocksDBException { - put(nativeHandle_, key, 0, key.length, value, 0, value.length, - columnFamilyHandle.nativeHandle_); - } - - /** - * Set the database entry for "key" to "value" in the specified - * column family. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param key The specified key to be inserted - * @param offset the offset of the "key" array to be used, must - * be non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than ("key".length - offset) - * @param value the value associated with the specified key - * @param vOffset the offset of the "value" array to be used, must be - * non-negative and no longer than "key".length - * @param vLen the length of the "value" array to be used, must be - * non-negative and no larger than ("value".length - offset) - * - * @throws RocksDBException thrown if errors happens in underlying native - * library. - * @throws IndexOutOfBoundsException if an offset or length is out of bounds - */ - public void put(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key, final int offset, final int len, - final byte[] value, final int vOffset, final int vLen) - throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); - put(nativeHandle_, key, offset, len, value, vOffset, vLen, - columnFamilyHandle.nativeHandle_); - } - - /** - * Set the database entry for "key" to "value". - * - * @param writeOpts {@link org.rocksdb.WriteOptions} instance. - * @param key the specified key to be inserted. - * @param value the value associated with the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void put(final WriteOptions writeOpts, final byte[] key, - final byte[] value) throws RocksDBException { - put(nativeHandle_, writeOpts.nativeHandle_, - key, 0, key.length, value, 0, value.length); - } - - /** - * Set the database entry for "key" to "value". - * - * @param writeOpts {@link org.rocksdb.WriteOptions} instance. - * @param key The specified key to be inserted - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than ("key".length - offset) - * @param value the value associated with the specified key - * @param vOffset the offset of the "value" array to be used, must be - * non-negative and no longer than "key".length - * @param vLen the length of the "value" array to be used, must be - * non-negative and no larger than ("value".length - offset) - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * @throws IndexOutOfBoundsException if an offset or length is out of bounds - */ - public void put(final WriteOptions writeOpts, - final byte[] key, final int offset, final int len, - final byte[] value, final int vOffset, final int vLen) - throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); - put(nativeHandle_, writeOpts.nativeHandle_, - key, offset, len, value, vOffset, vLen); - } - - /** - * Set the database entry for "key" to "value" for the specified - * column family. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param writeOpts {@link org.rocksdb.WriteOptions} instance. - * @param key the specified key to be inserted. - * @param value the value associated with the specified key. - * - * throws IllegalArgumentException if column family is not present - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * @see IllegalArgumentException - */ - public void put(final ColumnFamilyHandle columnFamilyHandle, - final WriteOptions writeOpts, final byte[] key, - final byte[] value) throws RocksDBException { - put(nativeHandle_, writeOpts.nativeHandle_, key, 0, key.length, value, - 0, value.length, columnFamilyHandle.nativeHandle_); - } - - /** - * Set the database entry for "key" to "value" for the specified - * column family. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param writeOpts {@link org.rocksdb.WriteOptions} instance. - * @param key the specified key to be inserted. Position and limit is used. - * Supports direct buffer only. - * @param value the value associated with the specified key. Position and limit is used. - * Supports direct buffer only. - * - * throws IllegalArgumentException if column family is not present - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * @see IllegalArgumentException - */ - public void put(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpts, - final ByteBuffer key, final ByteBuffer value) throws RocksDBException { - assert key.isDirect() && value.isDirect(); - putDirect(nativeHandle_, writeOpts.nativeHandle_, key, key.position(), key.remaining(), value, - value.position(), value.remaining(), columnFamilyHandle.nativeHandle_); - key.position(key.limit()); - value.position(value.limit()); - } - - /** - * Set the database entry for "key" to "value". - * - * @param writeOpts {@link org.rocksdb.WriteOptions} instance. - * @param key the specified key to be inserted. Position and limit is used. - * Supports direct buffer only. - * @param value the value associated with the specified key. Position and limit is used. - * Supports direct buffer only. - * - * throws IllegalArgumentException if column family is not present - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * @see IllegalArgumentException - */ - public void put(final WriteOptions writeOpts, final ByteBuffer key, final ByteBuffer value) - throws RocksDBException { - assert key.isDirect() && value.isDirect(); - putDirect(nativeHandle_, writeOpts.nativeHandle_, key, key.position(), key.remaining(), value, - value.position(), value.remaining(), 0); - key.position(key.limit()); - value.position(value.limit()); - } - - /** - * Set the database entry for "key" to "value" for the specified - * column family. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param writeOpts {@link org.rocksdb.WriteOptions} instance. - * @param key The specified key to be inserted - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than ("key".length - offset) - * @param value the value associated with the specified key - * @param vOffset the offset of the "value" array to be used, must be - * non-negative and no longer than "key".length - * @param vLen the length of the "value" array to be used, must be - * non-negative and no larger than ("value".length - offset) - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * @throws IndexOutOfBoundsException if an offset or length is out of bounds - */ - public void put(final ColumnFamilyHandle columnFamilyHandle, - final WriteOptions writeOpts, - final byte[] key, final int offset, final int len, - final byte[] value, final int vOffset, final int vLen) - throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); - put(nativeHandle_, writeOpts.nativeHandle_, key, offset, len, value, - vOffset, vLen, columnFamilyHandle.nativeHandle_); - } - - /** - * Delete the database entry (if any) for "key". Returns OK on - * success, and a non-OK status on error. It is not an error if "key" - * did not exist in the database. - * - * @param key Key to delete within database - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void delete(final byte[] key) throws RocksDBException { - delete(nativeHandle_, key, 0, key.length); - } - - /** - * Delete the database entry (if any) for "key". Returns OK on - * success, and a non-OK status on error. It is not an error if "key" - * did not exist in the database. - * - * @param key Key to delete within database - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be - * non-negative and no larger than ("key".length - offset) - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void delete(final byte[] key, final int offset, final int len) - throws RocksDBException { - delete(nativeHandle_, key, offset, len); - } - - /** - * Delete the database entry (if any) for "key". Returns OK on - * success, and a non-OK status on error. It is not an error if "key" - * did not exist in the database. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param key Key to delete within database - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void delete(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key) throws RocksDBException { - delete(nativeHandle_, key, 0, key.length, columnFamilyHandle.nativeHandle_); - } - - /** - * Delete the database entry (if any) for "key". Returns OK on - * success, and a non-OK status on error. It is not an error if "key" - * did not exist in the database. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param key Key to delete within database - * @param offset the offset of the "key" array to be used, - * must be non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than ("value".length - offset) - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void delete(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key, final int offset, final int len) - throws RocksDBException { - delete(nativeHandle_, key, offset, len, columnFamilyHandle.nativeHandle_); - } - - /** - * Delete the database entry (if any) for "key". Returns OK on - * success, and a non-OK status on error. It is not an error if "key" - * did not exist in the database. - * - * @param writeOpt WriteOptions to be used with delete operation - * @param key Key to delete within database - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void delete(final WriteOptions writeOpt, final byte[] key) - throws RocksDBException { - delete(nativeHandle_, writeOpt.nativeHandle_, key, 0, key.length); - } - - /** - * Delete the database entry (if any) for "key". Returns OK on - * success, and a non-OK status on error. It is not an error if "key" - * did not exist in the database. - * - * @param writeOpt WriteOptions to be used with delete operation - * @param key Key to delete within database - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be - * non-negative and no larger than ("key".length - offset) - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void delete(final WriteOptions writeOpt, final byte[] key, - final int offset, final int len) throws RocksDBException { - delete(nativeHandle_, writeOpt.nativeHandle_, key, offset, len); - } - - /** - * Delete the database entry (if any) for "key". Returns OK on - * success, and a non-OK status on error. It is not an error if "key" - * did not exist in the database. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param writeOpt WriteOptions to be used with delete operation - * @param key Key to delete within database - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void delete(final ColumnFamilyHandle columnFamilyHandle, - final WriteOptions writeOpt, final byte[] key) - throws RocksDBException { - delete(nativeHandle_, writeOpt.nativeHandle_, key, 0, key.length, - columnFamilyHandle.nativeHandle_); - } - - /** - * Delete the database entry (if any) for "key". Returns OK on - * success, and a non-OK status on error. It is not an error if "key" - * did not exist in the database. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param writeOpt WriteOptions to be used with delete operation - * @param key Key to delete within database - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be - * non-negative and no larger than ("key".length - offset) - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void delete(final ColumnFamilyHandle columnFamilyHandle, - final WriteOptions writeOpt, final byte[] key, final int offset, - final int len) throws RocksDBException { - delete(nativeHandle_, writeOpt.nativeHandle_, key, offset, len, - columnFamilyHandle.nativeHandle_); - } - - /** - * Get the value associated with the specified key within column family. - * - * @param opt {@link org.rocksdb.ReadOptions} instance. - * @param key the key to retrieve the value. It is using position and limit. - * Supports direct buffer only. - * @param value the out-value to receive the retrieved value. - * It is using position and limit. Limit is set according to value size. - * Supports direct buffer only. - * @return The size of the actual value that matches the specified - * {@code key} in byte. If the return value is greater than the - * length of {@code value}, then it indicates that the size of the - * input buffer {@code value} is insufficient and partial result will - * be returned. RocksDB.NOT_FOUND will be returned if the value not - * found. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public int get(final ReadOptions opt, final ByteBuffer key, final ByteBuffer value) - throws RocksDBException { - assert key.isDirect() && value.isDirect(); - int result = getDirect(nativeHandle_, opt.nativeHandle_, key, key.position(), key.remaining(), - value, value.position(), value.remaining(), 0); - if (result != NOT_FOUND) { - value.limit(Math.min(value.limit(), value.position() + result)); - } - key.position(key.limit()); - return result; - } - - /** - * Get the value associated with the specified key within column family. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param opt {@link org.rocksdb.ReadOptions} instance. - * @param key the key to retrieve the value. It is using position and limit. - * Supports direct buffer only. - * @param value the out-value to receive the retrieved value. - * It is using position and limit. Limit is set according to value size. - * Supports direct buffer only. - * @return The size of the actual value that matches the specified - * {@code key} in byte. If the return value is greater than the - * length of {@code value}, then it indicates that the size of the - * input buffer {@code value} is insufficient and partial result will - * be returned. RocksDB.NOT_FOUND will be returned if the value not - * found. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public int get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions opt, - final ByteBuffer key, final ByteBuffer value) throws RocksDBException { - assert key.isDirect() && value.isDirect(); - int result = getDirect(nativeHandle_, opt.nativeHandle_, key, key.position(), key.remaining(), - value, value.position(), value.remaining(), columnFamilyHandle.nativeHandle_); - if (result != NOT_FOUND) { - value.limit(Math.min(value.limit(), value.position() + result)); - } - key.position(key.limit()); - return result; - } - - /** - * Remove the database entry for {@code key}. Requires that the key exists - * and was not overwritten. It is not an error if the key did not exist - * in the database. - * - * If a key is overwritten (by calling {@link #put(byte[], byte[])} multiple - * times), then the result of calling SingleDelete() on this key is undefined. - * SingleDelete() only behaves correctly if there has been only one Put() - * for this key since the previous call to SingleDelete() for this key. - * - * This feature is currently an experimental performance optimization - * for a very specific workload. It is up to the caller to ensure that - * SingleDelete is only used for a key that is not deleted using Delete() or - * written using Merge(). Mixing SingleDelete operations with Deletes and - * Merges can result in undefined behavior. - * - * @param key Key to delete within database - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final byte[] key) throws RocksDBException { - singleDelete(nativeHandle_, key, key.length); - } - - /** - * Remove the database entry for {@code key}. Requires that the key exists - * and was not overwritten. It is not an error if the key did not exist - * in the database. - * - * If a key is overwritten (by calling {@link #put(byte[], byte[])} multiple - * times), then the result of calling SingleDelete() on this key is undefined. - * SingleDelete() only behaves correctly if there has been only one Put() - * for this key since the previous call to SingleDelete() for this key. - * - * This feature is currently an experimental performance optimization - * for a very specific workload. It is up to the caller to ensure that - * SingleDelete is only used for a key that is not deleted using Delete() or - * written using Merge(). Mixing SingleDelete operations with Deletes and - * Merges can result in undefined behavior. - * - * @param columnFamilyHandle The column family to delete the key from - * @param key Key to delete within database - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key) throws RocksDBException { - singleDelete(nativeHandle_, key, key.length, - columnFamilyHandle.nativeHandle_); - } - - /** - * Remove the database entry for {@code key}. Requires that the key exists - * and was not overwritten. It is not an error if the key did not exist - * in the database. - * - * If a key is overwritten (by calling {@link #put(byte[], byte[])} multiple - * times), then the result of calling SingleDelete() on this key is undefined. - * SingleDelete() only behaves correctly if there has been only one Put() - * for this key since the previous call to SingleDelete() for this key. - * - * This feature is currently an experimental performance optimization - * for a very specific workload. It is up to the caller to ensure that - * SingleDelete is only used for a key that is not deleted using Delete() or - * written using Merge(). Mixing SingleDelete operations with Deletes and - * Merges can result in undefined behavior. - * - * Note: consider setting {@link WriteOptions#setSync(boolean)} true. - * - * @param writeOpt Write options for the delete - * @param key Key to delete within database - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final WriteOptions writeOpt, final byte[] key) - throws RocksDBException { - singleDelete(nativeHandle_, writeOpt.nativeHandle_, key, key.length); - } - - /** - * Remove the database entry for {@code key}. Requires that the key exists - * and was not overwritten. It is not an error if the key did not exist - * in the database. - * - * If a key is overwritten (by calling {@link #put(byte[], byte[])} multiple - * times), then the result of calling SingleDelete() on this key is undefined. - * SingleDelete() only behaves correctly if there has been only one Put() - * for this key since the previous call to SingleDelete() for this key. - * - * This feature is currently an experimental performance optimization - * for a very specific workload. It is up to the caller to ensure that - * SingleDelete is only used for a key that is not deleted using Delete() or - * written using Merge(). Mixing SingleDelete operations with Deletes and - * Merges can result in undefined behavior. - * - * Note: consider setting {@link WriteOptions#setSync(boolean)} true. - * - * @param columnFamilyHandle The column family to delete the key from - * @param writeOpt Write options for the delete - * @param key Key to delete within database - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, - final WriteOptions writeOpt, final byte[] key) throws RocksDBException { - singleDelete(nativeHandle_, writeOpt.nativeHandle_, key, key.length, - columnFamilyHandle.nativeHandle_); - } - - - /** - * Removes the database entries in the range ["beginKey", "endKey"), i.e., - * including "beginKey" and excluding "endKey". a non-OK status on error. It - * is not an error if no keys exist in the range ["beginKey", "endKey"). - * - * Delete the database entry (if any) for "key". Returns OK on success, and a - * non-OK status on error. It is not an error if "key" did not exist in the - * database. - * - * @param beginKey First key to delete within database (inclusive) - * @param endKey Last key to delete within database (exclusive) - * - * @throws RocksDBException thrown if error happens in underlying native - * library. - */ - public void deleteRange(final byte[] beginKey, final byte[] endKey) - throws RocksDBException { - deleteRange(nativeHandle_, beginKey, 0, beginKey.length, endKey, 0, - endKey.length); - } - - /** - * Removes the database entries in the range ["beginKey", "endKey"), i.e., - * including "beginKey" and excluding "endKey". a non-OK status on error. It - * is not an error if no keys exist in the range ["beginKey", "endKey"). - * - * Delete the database entry (if any) for "key". Returns OK on success, and a - * non-OK status on error. It is not an error if "key" did not exist in the - * database. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance - * @param beginKey First key to delete within database (inclusive) - * @param endKey Last key to delete within database (exclusive) - * - * @throws RocksDBException thrown if error happens in underlying native - * library. - */ - public void deleteRange(final ColumnFamilyHandle columnFamilyHandle, - final byte[] beginKey, final byte[] endKey) throws RocksDBException { - deleteRange(nativeHandle_, beginKey, 0, beginKey.length, endKey, 0, - endKey.length, columnFamilyHandle.nativeHandle_); - } - - /** - * Removes the database entries in the range ["beginKey", "endKey"), i.e., - * including "beginKey" and excluding "endKey". a non-OK status on error. It - * is not an error if no keys exist in the range ["beginKey", "endKey"). - * - * Delete the database entry (if any) for "key". Returns OK on success, and a - * non-OK status on error. It is not an error if "key" did not exist in the - * database. - * - * @param writeOpt WriteOptions to be used with delete operation - * @param beginKey First key to delete within database (inclusive) - * @param endKey Last key to delete within database (exclusive) - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void deleteRange(final WriteOptions writeOpt, final byte[] beginKey, - final byte[] endKey) throws RocksDBException { - deleteRange(nativeHandle_, writeOpt.nativeHandle_, beginKey, 0, - beginKey.length, endKey, 0, endKey.length); - } - - /** - * Removes the database entries in the range ["beginKey", "endKey"), i.e., - * including "beginKey" and excluding "endKey". a non-OK status on error. It - * is not an error if no keys exist in the range ["beginKey", "endKey"). - * - * Delete the database entry (if any) for "key". Returns OK on success, and a - * non-OK status on error. It is not an error if "key" did not exist in the - * database. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance - * @param writeOpt WriteOptions to be used with delete operation - * @param beginKey First key to delete within database (included) - * @param endKey Last key to delete within database (excluded) - * - * @throws RocksDBException thrown if error happens in underlying native - * library. - */ - public void deleteRange(final ColumnFamilyHandle columnFamilyHandle, - final WriteOptions writeOpt, final byte[] beginKey, final byte[] endKey) - throws RocksDBException { - deleteRange(nativeHandle_, writeOpt.nativeHandle_, beginKey, 0, - beginKey.length, endKey, 0, endKey.length, - columnFamilyHandle.nativeHandle_); - } - - - /** - * Add merge operand for key/value pair. - * - * @param key the specified key to be merged. - * @param value the value to be merged with the current value for the - * specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void merge(final byte[] key, final byte[] value) - throws RocksDBException { - merge(nativeHandle_, key, 0, key.length, value, 0, value.length); - } - - /** - * Add merge operand for key/value pair. - * - * @param key the specified key to be merged. - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than ("key".length - offset) - * @param value the value to be merged with the current value for the - * specified key. - * @param vOffset the offset of the "value" array to be used, must be - * non-negative and no longer than "key".length - * @param vLen the length of the "value" array to be used, must be - * non-negative and must be non-negative and no larger than - * ("value".length - offset) - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * @throws IndexOutOfBoundsException if an offset or length is out of bounds - */ - public void merge(final byte[] key, int offset, int len, final byte[] value, - final int vOffset, final int vLen) throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); - merge(nativeHandle_, key, offset, len, value, vOffset, vLen); - } - - /** - * Add merge operand for key/value pair in a ColumnFamily. - * - * @param columnFamilyHandle {@link ColumnFamilyHandle} instance - * @param key the specified key to be merged. - * @param value the value to be merged with the current value for - * the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void merge(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key, final byte[] value) throws RocksDBException { - merge(nativeHandle_, key, 0, key.length, value, 0, value.length, - columnFamilyHandle.nativeHandle_); - } - - /** - * Add merge operand for key/value pair in a ColumnFamily. - * - * @param columnFamilyHandle {@link ColumnFamilyHandle} instance - * @param key the specified key to be merged. - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than ("key".length - offset) - * @param value the value to be merged with the current value for - * the specified key. - * @param vOffset the offset of the "value" array to be used, must be - * non-negative and no longer than "key".length - * @param vLen the length of the "value" array to be used, must be - * must be non-negative and no larger than ("value".length - offset) - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * @throws IndexOutOfBoundsException if an offset or length is out of bounds - */ - public void merge(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key, final int offset, final int len, final byte[] value, - final int vOffset, final int vLen) throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); - merge(nativeHandle_, key, offset, len, value, vOffset, vLen, - columnFamilyHandle.nativeHandle_); - } - - /** - * Add merge operand for key/value pair. - * - * @param writeOpts {@link WriteOptions} for this write. - * @param key the specified key to be merged. - * @param value the value to be merged with the current value for - * the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void merge(final WriteOptions writeOpts, final byte[] key, - final byte[] value) throws RocksDBException { - merge(nativeHandle_, writeOpts.nativeHandle_, - key, 0, key.length, value, 0, value.length); - } - - /** - * Add merge operand for key/value pair. - * - * @param writeOpts {@link WriteOptions} for this write. - * @param key the specified key to be merged. - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than ("value".length - offset) - * @param value the value to be merged with the current value for - * the specified key. - * @param vOffset the offset of the "value" array to be used, must be - * non-negative and no longer than "key".length - * @param vLen the length of the "value" array to be used, must be - * non-negative and no larger than ("value".length - offset) - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * @throws IndexOutOfBoundsException if an offset or length is out of bounds - */ - public void merge(final WriteOptions writeOpts, - final byte[] key, final int offset, final int len, - final byte[] value, final int vOffset, final int vLen) - throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); - merge(nativeHandle_, writeOpts.nativeHandle_, - key, offset, len, value, vOffset, vLen); - } - - /** - * Delete the database entry (if any) for "key". Returns OK on - * success, and a non-OK status on error. It is not an error if "key" - * did not exist in the database. - * - * @param writeOpt WriteOptions to be used with delete operation - * @param key Key to delete within database. It is using position and limit. - * Supports direct buffer only. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void delete(final WriteOptions writeOpt, final ByteBuffer key) throws RocksDBException { - assert key.isDirect(); - deleteDirect(nativeHandle_, writeOpt.nativeHandle_, key, key.position(), key.remaining(), 0); - key.position(key.limit()); - } - - /** - * Delete the database entry (if any) for "key". Returns OK on - * success, and a non-OK status on error. It is not an error if "key" - * did not exist in the database. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param writeOpt WriteOptions to be used with delete operation - * @param key Key to delete within database. It is using position and limit. - * Supports direct buffer only. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void delete(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpt, - final ByteBuffer key) throws RocksDBException { - assert key.isDirect(); - deleteDirect(nativeHandle_, writeOpt.nativeHandle_, key, key.position(), key.remaining(), - columnFamilyHandle.nativeHandle_); - key.position(key.limit()); - } - - /** - * Add merge operand for key/value pair. - * - * @param columnFamilyHandle {@link ColumnFamilyHandle} instance - * @param writeOpts {@link WriteOptions} for this write. - * @param key the specified key to be merged. - * @param value the value to be merged with the current value for the - * specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void merge(final ColumnFamilyHandle columnFamilyHandle, - final WriteOptions writeOpts, final byte[] key, final byte[] value) - throws RocksDBException { - merge(nativeHandle_, writeOpts.nativeHandle_, - key, 0, key.length, value, 0, value.length, - columnFamilyHandle.nativeHandle_); - } - - /** - * Add merge operand for key/value pair. - * - * @param columnFamilyHandle {@link ColumnFamilyHandle} instance - * @param writeOpts {@link WriteOptions} for this write. - * @param key the specified key to be merged. - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than ("key".length - offset) - * @param value the value to be merged with the current value for - * the specified key. - * @param vOffset the offset of the "value" array to be used, must be - * non-negative and no longer than "key".length - * @param vLen the length of the "value" array to be used, must be - * non-negative and no larger than ("value".length - offset) - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * @throws IndexOutOfBoundsException if an offset or length is out of bounds - */ - public void merge( - final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpts, - final byte[] key, final int offset, final int len, - final byte[] value, final int vOffset, final int vLen) - throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); - merge(nativeHandle_, writeOpts.nativeHandle_, - key, offset, len, value, vOffset, vLen, - columnFamilyHandle.nativeHandle_); - } - - /** - * Apply the specified updates to the database. - * - * @param writeOpts WriteOptions instance - * @param updates WriteBatch instance - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void write(final WriteOptions writeOpts, final WriteBatch updates) - throws RocksDBException { - write0(nativeHandle_, writeOpts.nativeHandle_, updates.nativeHandle_); - } - - /** - * Apply the specified updates to the database. - * - * @param writeOpts WriteOptions instance - * @param updates WriteBatchWithIndex instance - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void write(final WriteOptions writeOpts, - final WriteBatchWithIndex updates) throws RocksDBException { - write1(nativeHandle_, writeOpts.nativeHandle_, updates.nativeHandle_); - } - - // TODO(AR) we should improve the #get() API, returning -1 (RocksDB.NOT_FOUND) is not very nice - // when we could communicate better status into, also the C++ code show that -2 could be returned - - /** - * Get the value associated with the specified key within column family* - * - * @param key the key to retrieve the value. - * @param value the out-value to receive the retrieved value. - * - * @return The size of the actual value that matches the specified - * {@code key} in byte. If the return value is greater than the - * length of {@code value}, then it indicates that the size of the - * input buffer {@code value} is insufficient and partial result will - * be returned. RocksDB.NOT_FOUND will be returned if the value not - * found. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public int get(final byte[] key, final byte[] value) throws RocksDBException { - return get(nativeHandle_, key, 0, key.length, value, 0, value.length); - } - - /** - * Get the value associated with the specified key within column family* - * - * @param key the key to retrieve the value. - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than ("key".length - offset) - * @param value the out-value to receive the retrieved value. - * @param vOffset the offset of the "value" array to be used, must be - * non-negative and no longer than "value".length - * @param vLen the length of the "value" array to be used, must be - * non-negative and and no larger than ("value".length - offset) - * - * @return The size of the actual value that matches the specified - * {@code key} in byte. If the return value is greater than the - * length of {@code value}, then it indicates that the size of the - * input buffer {@code value} is insufficient and partial result will - * be returned. RocksDB.NOT_FOUND will be returned if the value not - * found. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public int get(final byte[] key, final int offset, final int len, - final byte[] value, final int vOffset, final int vLen) - throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); - return get(nativeHandle_, key, offset, len, value, vOffset, vLen); - } - - /** - * Get the value associated with the specified key within column family. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param key the key to retrieve the value. - * @param value the out-value to receive the retrieved value. - * @return The size of the actual value that matches the specified - * {@code key} in byte. If the return value is greater than the - * length of {@code value}, then it indicates that the size of the - * input buffer {@code value} is insufficient and partial result will - * be returned. RocksDB.NOT_FOUND will be returned if the value not - * found. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public int get(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, - final byte[] value) throws RocksDBException, IllegalArgumentException { - return get(nativeHandle_, key, 0, key.length, value, 0, value.length, - columnFamilyHandle.nativeHandle_); - } - - /** - * Get the value associated with the specified key within column family. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param key the key to retrieve the value. - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * an no larger than ("key".length - offset) - * @param value the out-value to receive the retrieved value. - * @param vOffset the offset of the "value" array to be used, must be - * non-negative and no longer than "key".length - * @param vLen the length of the "value" array to be used, must be - * non-negative and no larger than ("value".length - offset) - * - * @return The size of the actual value that matches the specified - * {@code key} in byte. If the return value is greater than the - * length of {@code value}, then it indicates that the size of the - * input buffer {@code value} is insufficient and partial result will - * be returned. RocksDB.NOT_FOUND will be returned if the value not - * found. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public int get(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, - final int offset, final int len, final byte[] value, final int vOffset, - final int vLen) throws RocksDBException, IllegalArgumentException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); - return get(nativeHandle_, key, offset, len, value, vOffset, vLen, - columnFamilyHandle.nativeHandle_); - } - - /** - * Get the value associated with the specified key. - * - * @param opt {@link org.rocksdb.ReadOptions} instance. - * @param key the key to retrieve the value. - * @param value the out-value to receive the retrieved value. - * @return The size of the actual value that matches the specified - * {@code key} in byte. If the return value is greater than the - * length of {@code value}, then it indicates that the size of the - * input buffer {@code value} is insufficient and partial result will - * be returned. RocksDB.NOT_FOUND will be returned if the value not - * found. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public int get(final ReadOptions opt, final byte[] key, - final byte[] value) throws RocksDBException { - return get(nativeHandle_, opt.nativeHandle_, - key, 0, key.length, value, 0, value.length); - } - - /** - * Get the value associated with the specified key. - * - * @param opt {@link org.rocksdb.ReadOptions} instance. - * @param key the key to retrieve the value. - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than ("key".length - offset) - * @param value the out-value to receive the retrieved value. - * @param vOffset the offset of the "value" array to be used, must be - * non-negative and no longer than "key".length - * @param vLen the length of the "value" array to be used, must be - * non-negative and no larger than ("value".length - offset) - * @return The size of the actual value that matches the specified - * {@code key} in byte. If the return value is greater than the - * length of {@code value}, then it indicates that the size of the - * input buffer {@code value} is insufficient and partial result will - * be returned. RocksDB.NOT_FOUND will be returned if the value not - * found. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public int get(final ReadOptions opt, final byte[] key, final int offset, - final int len, final byte[] value, final int vOffset, final int vLen) - throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); - return get(nativeHandle_, opt.nativeHandle_, - key, offset, len, value, vOffset, vLen); - } - - /** - * Get the value associated with the specified key within column family. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param opt {@link org.rocksdb.ReadOptions} instance. - * @param key the key to retrieve the value. - * @param value the out-value to receive the retrieved value. - * @return The size of the actual value that matches the specified - * {@code key} in byte. If the return value is greater than the - * length of {@code value}, then it indicates that the size of the - * input buffer {@code value} is insufficient and partial result will - * be returned. RocksDB.NOT_FOUND will be returned if the value not - * found. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public int get(final ColumnFamilyHandle columnFamilyHandle, - final ReadOptions opt, final byte[] key, final byte[] value) - throws RocksDBException { - return get(nativeHandle_, opt.nativeHandle_, key, 0, key.length, value, - 0, value.length, columnFamilyHandle.nativeHandle_); - } - - /** - * Get the value associated with the specified key within column family. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param opt {@link org.rocksdb.ReadOptions} instance. - * @param key the key to retrieve the value. - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be - * non-negative and and no larger than ("key".length - offset) - * @param value the out-value to receive the retrieved value. - * @param vOffset the offset of the "value" array to be used, must be - * non-negative and no longer than "key".length - * @param vLen the length of the "value" array to be used, and must be - * non-negative and no larger than ("value".length - offset) - * @return The size of the actual value that matches the specified - * {@code key} in byte. If the return value is greater than the - * length of {@code value}, then it indicates that the size of the - * input buffer {@code value} is insufficient and partial result will - * be returned. RocksDB.NOT_FOUND will be returned if the value not - * found. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public int get(final ColumnFamilyHandle columnFamilyHandle, - final ReadOptions opt, final byte[] key, final int offset, final int len, - final byte[] value, final int vOffset, final int vLen) - throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); - return get(nativeHandle_, opt.nativeHandle_, key, offset, len, value, - vOffset, vLen, columnFamilyHandle.nativeHandle_); - } - - /** - * The simplified version of get which returns a new byte array storing - * the value associated with the specified input key if any. null will be - * returned if the specified key is not found. - * - * @param key the key retrieve the value. - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public byte[] get(final byte[] key) throws RocksDBException { - return get(nativeHandle_, key, 0, key.length); - } - - /** - * The simplified version of get which returns a new byte array storing - * the value associated with the specified input key if any. null will be - * returned if the specified key is not found. - * - * @param key the key retrieve the value. - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than ("key".length - offset) - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public byte[] get(final byte[] key, final int offset, - final int len) throws RocksDBException { - checkBounds(offset, len, key.length); - return get(nativeHandle_, key, offset, len); - } - - /** - * The simplified version of get which returns a new byte array storing - * the value associated with the specified input key if any. null will be - * returned if the specified key is not found. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param key the key retrieve the value. - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public byte[] get(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key) throws RocksDBException { - return get(nativeHandle_, key, 0, key.length, - columnFamilyHandle.nativeHandle_); - } - - /** - * The simplified version of get which returns a new byte array storing - * the value associated with the specified input key if any. null will be - * returned if the specified key is not found. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param key the key retrieve the value. - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than ("key".length - offset) - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public byte[] get(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key, final int offset, final int len) - throws RocksDBException { - checkBounds(offset, len, key.length); - return get(nativeHandle_, key, offset, len, - columnFamilyHandle.nativeHandle_); - } - - /** - * The simplified version of get which returns a new byte array storing - * the value associated with the specified input key if any. null will be - * returned if the specified key is not found. - * - * @param key the key retrieve the value. - * @param opt Read options. - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public byte[] get(final ReadOptions opt, final byte[] key) - throws RocksDBException { - return get(nativeHandle_, opt.nativeHandle_, key, 0, key.length); - } - - /** - * The simplified version of get which returns a new byte array storing - * the value associated with the specified input key if any. null will be - * returned if the specified key is not found. - * - * @param key the key retrieve the value. - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than ("key".length - offset) - * @param opt Read options. - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public byte[] get(final ReadOptions opt, final byte[] key, final int offset, - final int len) throws RocksDBException { - checkBounds(offset, len, key.length); - return get(nativeHandle_, opt.nativeHandle_, key, offset, len); - } - - /** - * The simplified version of get which returns a new byte array storing - * the value associated with the specified input key if any. null will be - * returned if the specified key is not found. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param key the key retrieve the value. - * @param opt Read options. - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public byte[] get(final ColumnFamilyHandle columnFamilyHandle, - final ReadOptions opt, final byte[] key) throws RocksDBException { - return get(nativeHandle_, opt.nativeHandle_, key, 0, key.length, - columnFamilyHandle.nativeHandle_); - } - - /** - * The simplified version of get which returns a new byte array storing - * the value associated with the specified input key if any. null will be - * returned if the specified key is not found. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param key the key retrieve the value. - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than ("key".length - offset) - * @param opt Read options. - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public byte[] get(final ColumnFamilyHandle columnFamilyHandle, - final ReadOptions opt, final byte[] key, final int offset, final int len) - throws RocksDBException { - checkBounds(offset, len, key.length); - return get(nativeHandle_, opt.nativeHandle_, key, offset, len, - columnFamilyHandle.nativeHandle_); - } - - /** - * Takes a list of keys, and returns a list of values for the given list of - * keys. List will contain null for keys which could not be found. - * - * @param keys List of keys for which values need to be retrieved. - * @return List of values for the given list of keys. List will contain - * null for keys which could not be found. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public List multiGetAsList(final List keys) - throws RocksDBException { - assert(keys.size() != 0); - - final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); - final int[] keyOffsets = new int[keysArray.length]; - final int[] keyLengths = new int[keysArray.length]; - for(int i = 0; i < keyLengths.length; i++) { - keyLengths[i] = keysArray[i].length; - } - - return Arrays.asList(multiGet(nativeHandle_, keysArray, keyOffsets, - keyLengths)); - } - - /** - * Returns a list of values for the given list of keys. List will contain - * null for keys which could not be found. - *

- * Note: Every key needs to have a related column family name in - * {@code columnFamilyHandleList}. - *

- * - * @param columnFamilyHandleList {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. - * @param keys List of keys for which values need to be retrieved. - * @return List of values for the given list of keys. List will contain - * null for keys which could not be found. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * @throws IllegalArgumentException thrown if the size of passed keys is not - * equal to the amount of passed column family handles. - */ - public List multiGetAsList( - final List columnFamilyHandleList, - final List keys) throws RocksDBException, - IllegalArgumentException { - assert(keys.size() != 0); - // Check if key size equals cfList size. If not a exception must be - // thrown. If not a Segmentation fault happens. - if (keys.size() != columnFamilyHandleList.size()) { - throw new IllegalArgumentException( - "For each key there must be a ColumnFamilyHandle."); - } - final long[] cfHandles = new long[columnFamilyHandleList.size()]; - for (int i = 0; i < columnFamilyHandleList.size(); i++) { - cfHandles[i] = columnFamilyHandleList.get(i).nativeHandle_; - } - - final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); - final int[] keyOffsets = new int[keysArray.length]; - final int[] keyLengths = new int[keysArray.length]; - for(int i = 0; i < keyLengths.length; i++) { - keyLengths[i] = keysArray[i].length; - } - - return Arrays.asList(multiGet(nativeHandle_, keysArray, keyOffsets, - keyLengths, cfHandles)); - } - - /** - * Returns a list of values for the given list of keys. List will contain - * null for keys which could not be found. - * - * @param opt Read options. - * @param keys of keys for which values need to be retrieved. - * @return List of values for the given list of keys. List will contain - * null for keys which could not be found. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public List multiGetAsList(final ReadOptions opt, - final List keys) throws RocksDBException { - assert(keys.size() != 0); - - final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); - final int[] keyOffsets = new int[keysArray.length]; - final int[] keyLengths = new int[keysArray.length]; - for(int i = 0; i < keyLengths.length; i++) { - keyLengths[i] = keysArray[i].length; - } - - return Arrays.asList(multiGet(nativeHandle_, opt.nativeHandle_, - keysArray, keyOffsets, keyLengths)); - } - - /** - * Returns a list of values for the given list of keys. List will contain - * null for keys which could not be found. - *

- * Note: Every key needs to have a related column family name in - * {@code columnFamilyHandleList}. - *

- * - * @param opt Read options. - * @param columnFamilyHandleList {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. - * @param keys of keys for which values need to be retrieved. - * @return List of values for the given list of keys. List will contain - * null for keys which could not be found. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * @throws IllegalArgumentException thrown if the size of passed keys is not - * equal to the amount of passed column family handles. - */ - public List multiGetAsList(final ReadOptions opt, - final List columnFamilyHandleList, - final List keys) throws RocksDBException { - assert(keys.size() != 0); - // Check if key size equals cfList size. If not a exception must be - // thrown. If not a Segmentation fault happens. - if (keys.size()!=columnFamilyHandleList.size()){ - throw new IllegalArgumentException( - "For each key there must be a ColumnFamilyHandle."); - } - final long[] cfHandles = new long[columnFamilyHandleList.size()]; - for (int i = 0; i < columnFamilyHandleList.size(); i++) { - cfHandles[i] = columnFamilyHandleList.get(i).nativeHandle_; - } - - final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); - final int[] keyOffsets = new int[keysArray.length]; - final int[] keyLengths = new int[keysArray.length]; - for(int i = 0; i < keyLengths.length; i++) { - keyLengths[i] = keysArray[i].length; - } - - return Arrays.asList(multiGet(nativeHandle_, opt.nativeHandle_, - keysArray, keyOffsets, keyLengths, cfHandles)); - } - - /** - * Fetches a list of values for the given list of keys, all from the default column family. - * - * @param keys list of keys for which values need to be retrieved. - * @param values list of buffers to return retrieved values in - * @return list of number of bytes in DB for each requested key - * this can be more than the size of the corresponding buffer; then the buffer will be filled - * with the appropriate truncation of the database value. - * @throws RocksDBException if error happens in underlying native library. - * @throws IllegalArgumentException thrown if the number of passed keys and passed values - * do not match. - */ - public List multiGetByteBuffers( - final List keys, final List values) throws RocksDBException { - final ReadOptions readOptions = new ReadOptions(); - final List columnFamilyHandleList = new ArrayList<>(1); - columnFamilyHandleList.add(getDefaultColumnFamily()); - return multiGetByteBuffers(readOptions, columnFamilyHandleList, keys, values); - } - - /** - * Fetches a list of values for the given list of keys, all from the default column family. - * - * @param readOptions Read options - * @param keys list of keys for which values need to be retrieved. - * @param values list of buffers to return retrieved values in - * @throws RocksDBException if error happens in underlying native library. - * @throws IllegalArgumentException thrown if the number of passed keys and passed values - * do not match. - * @return the list of values for the given list of keys - */ - public List multiGetByteBuffers(final ReadOptions readOptions, - final List keys, final List values) throws RocksDBException { - final List columnFamilyHandleList = new ArrayList<>(1); - columnFamilyHandleList.add(getDefaultColumnFamily()); - return multiGetByteBuffers(readOptions, columnFamilyHandleList, keys, values); - } - - /** - * Fetches a list of values for the given list of keys. - *

- * Note: Every key needs to have a related column family name in - * {@code columnFamilyHandleList}. - *

- * - * @param columnFamilyHandleList {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. - * @param keys list of keys for which values need to be retrieved. - * @param values list of buffers to return retrieved values in - * @throws RocksDBException if error happens in underlying native library. - * @throws IllegalArgumentException thrown if the number of passed keys, passed values and - * passed column family handles do not match. - * @return the list of values for the given list of keys - */ - public List multiGetByteBuffers( - final List columnFamilyHandleList, final List keys, - final List values) throws RocksDBException { - final ReadOptions readOptions = new ReadOptions(); - return multiGetByteBuffers(readOptions, columnFamilyHandleList, keys, values); - } - - /** - * Fetches a list of values for the given list of keys. - *

- * Note: Every key needs to have a related column family name in - * {@code columnFamilyHandleList}. - *

- * - * @param readOptions Read options - * @param columnFamilyHandleList {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. - * @param keys list of keys for which values need to be retrieved. - * @param values list of buffers to return retrieved values in - * @throws RocksDBException if error happens in underlying native library. - * @throws IllegalArgumentException thrown if the number of passed keys, passed values and - * passed column family handles do not match. - * @return the list of values for the given list of keys - */ - public List multiGetByteBuffers(final ReadOptions readOptions, - final List columnFamilyHandleList, final List keys, - final List values) throws RocksDBException { - assert (keys.size() != 0); - - // Check if key size equals cfList size. If not a exception must be - // thrown. If not a Segmentation fault happens. - if (keys.size() != columnFamilyHandleList.size() && columnFamilyHandleList.size() > 1) { - throw new IllegalArgumentException( - "Wrong number of ColumnFamilyHandle(s) supplied. Provide 0, 1, or as many as there are key/value(s)"); - } - - // Check if key size equals cfList size. If not a exception must be - // thrown. If not a Segmentation fault happens. - if (values.size() != keys.size()) { - throw new IllegalArgumentException("For each key there must be a corresponding value."); - } - - // TODO (AP) support indirect buffers - for (final ByteBuffer key : keys) { - if (!key.isDirect()) { - throw new IllegalArgumentException("All key buffers must be direct byte buffers"); - } - } - - // TODO (AP) support indirect buffers, though probably via a less efficient code path - for (final ByteBuffer value : values) { - if (!value.isDirect()) { - throw new IllegalArgumentException("All value buffers must be direct byte buffers"); - } - } - - final int numCFHandles = columnFamilyHandleList.size(); - final long[] cfHandles = new long[numCFHandles]; - for (int i = 0; i < numCFHandles; i++) { - cfHandles[i] = columnFamilyHandleList.get(i).nativeHandle_; - } - - final int numValues = keys.size(); - - final ByteBuffer[] keysArray = keys.toArray(new ByteBuffer[0]); - final int[] keyOffsets = new int[numValues]; - final int[] keyLengths = new int[numValues]; - for (int i = 0; i < numValues; i++) { - // TODO (AP) add keysArray[i].arrayOffset() if the buffer is indirect - // TODO (AP) because in that case we have to pass the array directly, - // so that the JNI C++ code will not know to compensate for the array offset - keyOffsets[i] = keysArray[i].position(); - keyLengths[i] = keysArray[i].limit(); - } - final ByteBuffer[] valuesArray = values.toArray(new ByteBuffer[0]); - final int[] valuesSizeArray = new int[numValues]; - final Status[] statusArray = new Status[numValues]; - - multiGet(nativeHandle_, readOptions.nativeHandle_, cfHandles, keysArray, keyOffsets, keyLengths, - valuesArray, valuesSizeArray, statusArray); - - final List results = new ArrayList<>(); - for (int i = 0; i < numValues; i++) { - final Status status = statusArray[i]; - if (status.getCode() == Status.Code.Ok) { - final ByteBuffer value = valuesArray[i]; - value.position(Math.min(valuesSizeArray[i], value.capacity())); - value.flip(); // prepare for read out - results.add(new ByteBufferGetStatus(status, valuesSizeArray[i], value)); - } else { - results.add(new ByteBufferGetStatus(status)); - } - } - - return results; - } - - /** - * If the key definitely does not exist in the database, then this method - * returns false, otherwise it returns true if the key might exist. - * That is to say that this method is probabilistic and may return false - * positives, but never a false negative. - * - * If the caller wants to obtain value when the key - * is found in memory, then {@code valueHolder} must be set. - * - * This check is potentially lighter-weight than invoking - * {@link #get(byte[])}. One way to make this lighter weight is to avoid - * doing any IOs. - * - * @param key byte array of a key to search for - * @param valueHolder non-null to retrieve the value if it is found, or null - * if the value is not needed. If non-null, upon return of the function, - * the {@code value} will be set if it could be retrieved. - * - * @return false if the key definitely does not exist in the database, - * otherwise true. - */ - public boolean keyMayExist(final byte[] key, - /* @Nullable */ final Holder valueHolder) { - return keyMayExist(key, 0, key.length, valueHolder); - } - - /** - * If the key definitely does not exist in the database, then this method - * returns false, otherwise it returns true if the key might exist. - * That is to say that this method is probabilistic and may return false - * positives, but never a false negative. - * - * If the caller wants to obtain value when the key - * is found in memory, then {@code valueHolder} must be set. - * - * This check is potentially lighter-weight than invoking - * {@link #get(byte[], int, int)}. One way to make this lighter weight is to - * avoid doing any IOs. - * - * @param key byte array of a key to search for - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than "key".length - * @param valueHolder non-null to retrieve the value if it is found, or null - * if the value is not needed. If non-null, upon return of the function, - * the {@code value} will be set if it could be retrieved. - * - * @return false if the key definitely does not exist in the database, - * otherwise true. - */ - public boolean keyMayExist(final byte[] key, - final int offset, final int len, - /* @Nullable */ final Holder valueHolder) { - return keyMayExist((ColumnFamilyHandle)null, key, offset, len, valueHolder); - } - - /** - * If the key definitely does not exist in the database, then this method - * returns false, otherwise it returns true if the key might exist. - * That is to say that this method is probabilistic and may return false - * positives, but never a false negative. - * - * If the caller wants to obtain value when the key - * is found in memory, then {@code valueHolder} must be set. - * - * This check is potentially lighter-weight than invoking - * {@link #get(ColumnFamilyHandle,byte[])}. One way to make this lighter - * weight is to avoid doing any IOs. - * - * @param columnFamilyHandle {@link ColumnFamilyHandle} instance - * @param key byte array of a key to search for - * @param valueHolder non-null to retrieve the value if it is found, or null - * if the value is not needed. If non-null, upon return of the function, - * the {@code value} will be set if it could be retrieved. - * - * @return false if the key definitely does not exist in the database, - * otherwise true. - */ - public boolean keyMayExist( - final ColumnFamilyHandle columnFamilyHandle, final byte[] key, - /* @Nullable */ final Holder valueHolder) { - return keyMayExist(columnFamilyHandle, key, 0, key.length, - valueHolder); - } - - /** - * If the key definitely does not exist in the database, then this method - * returns false, otherwise it returns true if the key might exist. - * That is to say that this method is probabilistic and may return false - * positives, but never a false negative. - * - * If the caller wants to obtain value when the key - * is found in memory, then {@code valueHolder} must be set. - * - * This check is potentially lighter-weight than invoking - * {@link #get(ColumnFamilyHandle, byte[], int, int)}. One way to make this - * lighter weight is to avoid doing any IOs. - * - * @param columnFamilyHandle {@link ColumnFamilyHandle} instance - * @param key byte array of a key to search for - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than "key".length - * @param valueHolder non-null to retrieve the value if it is found, or null - * if the value is not needed. If non-null, upon return of the function, - * the {@code value} will be set if it could be retrieved. - * - * @return false if the key definitely does not exist in the database, - * otherwise true. - */ - public boolean keyMayExist( - final ColumnFamilyHandle columnFamilyHandle, - final byte[] key, int offset, int len, - /* @Nullable */ final Holder valueHolder) { - return keyMayExist(columnFamilyHandle, null, key, offset, len, - valueHolder); - } - - /** - * If the key definitely does not exist in the database, then this method - * returns false, otherwise it returns true if the key might exist. - * That is to say that this method is probabilistic and may return false - * positives, but never a true negative. - * - * If the caller wants to obtain value when the key - * is found in memory, then {@code valueHolder} must be set. - * - * This check is potentially lighter-weight than invoking - * {@link #get(ReadOptions, byte[])}. One way to make this - * lighter weight is to avoid doing any IOs. - * - * @param readOptions {@link ReadOptions} instance - * @param key byte array of a key to search for - * @param valueHolder non-null to retrieve the value if it is found, or null - * if the value is not needed. If non-null, upon return of the function, - * the {@code value} will be set if it could be retrieved. - * - * @return false if the key definitely does not exist in the database, - * otherwise true. - */ - public boolean keyMayExist( - final ReadOptions readOptions, final byte[] key, - /* @Nullable */ final Holder valueHolder) { - return keyMayExist(readOptions, key, 0, key.length, - valueHolder); - } - - /** - * If the key definitely does not exist in the database, then this method - * returns false, otherwise it returns true if the key might exist. - * That is to say that this method is probabilistic and may return false - * positives, but never a true negative. - * - * If the caller wants to obtain value when the key - * is found in memory, then {@code valueHolder} must be set. - * - * This check is potentially lighter-weight than invoking - * {@link #get(ReadOptions, byte[], int, int)}. One way to make this - * lighter weight is to avoid doing any IOs. - * - * @param readOptions {@link ReadOptions} instance - * @param key byte array of a key to search for - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than "key".length - * @param valueHolder non-null to retrieve the value if it is found, or null - * if the value is not needed. If non-null, upon return of the function, - * the {@code value} will be set if it could be retrieved. - * - * @return false if the key definitely does not exist in the database, - * otherwise true. - */ - public boolean keyMayExist( - final ReadOptions readOptions, - final byte[] key, final int offset, final int len, - /* @Nullable */ final Holder valueHolder) { - return keyMayExist(null, readOptions, - key, offset, len, valueHolder); - } - - /** - * If the key definitely does not exist in the database, then this method - * returns false, otherwise it returns true if the key might exist. - * That is to say that this method is probabilistic and may return false - * positives, but never a true negative. - * - * If the caller wants to obtain value when the key - * is found in memory, then {@code valueHolder} must be set. - * - * This check is potentially lighter-weight than invoking - * {@link #get(ColumnFamilyHandle, ReadOptions, byte[])}. One way to make this - * lighter weight is to avoid doing any IOs. - * - * @param columnFamilyHandle {@link ColumnFamilyHandle} instance - * @param readOptions {@link ReadOptions} instance - * @param key byte array of a key to search for - * @param valueHolder non-null to retrieve the value if it is found, or null - * if the value is not needed. If non-null, upon return of the function, - * the {@code value} will be set if it could be retrieved. - * - * @return false if the key definitely does not exist in the database, - * otherwise true. - */ - public boolean keyMayExist( - final ColumnFamilyHandle columnFamilyHandle, - final ReadOptions readOptions, final byte[] key, - /* @Nullable */ final Holder valueHolder) { - return keyMayExist(columnFamilyHandle, readOptions, - key, 0, key.length, valueHolder); - } - - /** - * If the key definitely does not exist in the database, then this method - * returns false, otherwise it returns true if the key might exist. - * That is to say that this method is probabilistic and may return false - * positives, but never a false negative. - * - * If the caller wants to obtain value when the key - * is found in memory, then {@code valueHolder} must be set. - * - * This check is potentially lighter-weight than invoking - * {@link #get(ColumnFamilyHandle, ReadOptions, byte[], int, int)}. - * One way to make this lighter weight is to avoid doing any IOs. - * - * @param columnFamilyHandle {@link ColumnFamilyHandle} instance - * @param readOptions {@link ReadOptions} instance - * @param key byte array of a key to search for - * @param offset the offset of the "key" array to be used, must be - * non-negative and no larger than "key".length - * @param len the length of the "key" array to be used, must be non-negative - * and no larger than "key".length - * @param valueHolder non-null to retrieve the value if it is found, or null - * if the value is not needed. If non-null, upon return of the function, - * the {@code value} will be set if it could be retrieved. - * - * @return false if the key definitely does not exist in the database, - * otherwise true. - */ - public boolean keyMayExist( - final ColumnFamilyHandle columnFamilyHandle, - final ReadOptions readOptions, - final byte[] key, final int offset, final int len, - /* @Nullable */ final Holder valueHolder) { - checkBounds(offset, len, key.length); - if (valueHolder == null) { - return keyMayExist(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, - readOptions == null ? 0 : readOptions.nativeHandle_, - key, offset, len); - } else { - final byte[][] result = keyMayExistFoundValue( - nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, - readOptions == null ? 0 : readOptions.nativeHandle_, - key, offset, len); - if (result[0][0] == 0x0) { - valueHolder.setValue(null); - return false; - } else if (result[0][0] == 0x1) { - valueHolder.setValue(null); - return true; - } else { - valueHolder.setValue(result[1]); - return true; - } - } - } - - /** - * If the key definitely does not exist in the database, then this method - * returns false, otherwise it returns true if the key might exist. - * That is to say that this method is probabilistic and may return false - * positives, but never a false negative. - * - * @param key bytebuffer containing the value of the key - * @return false if the key definitely does not exist in the database, - * otherwise true. - */ - public boolean keyMayExist(final ByteBuffer key) { - return keyMayExist(null, (ReadOptions) null, key); - } - - /** - * If the key definitely does not exist in the database, then this method - * returns false, otherwise it returns true if the key might exist. - * That is to say that this method is probabilistic and may return false - * positives, but never a false negative. - * - * @param columnFamilyHandle the {@link ColumnFamilyHandle} to look for the key in - * @param key bytebuffer containing the value of the key - * @return false if the key definitely does not exist in the database, - * otherwise true. - */ - public boolean keyMayExist(final ColumnFamilyHandle columnFamilyHandle, final ByteBuffer key) { - return keyMayExist(columnFamilyHandle, (ReadOptions) null, key); - } - - /** - * If the key definitely does not exist in the database, then this method - * returns false, otherwise it returns true if the key might exist. - * That is to say that this method is probabilistic and may return false - * positives, but never a false negative. - * - * @param readOptions the {@link ReadOptions} to use when reading the key/value - * @param key bytebuffer containing the value of the key - * @return false if the key definitely does not exist in the database, - * otherwise true. - */ - public boolean keyMayExist(final ReadOptions readOptions, final ByteBuffer key) { - return keyMayExist(null, readOptions, key); - } - - /** - * If the key definitely does not exist in the database, then this method - * returns {@link KeyMayExist.KeyMayExistEnum#kNotExist}, - * otherwise if it can with best effort retreive the value, it returns {@link - * KeyMayExist.KeyMayExistEnum#kExistsWithValue} otherwise it returns {@link - * KeyMayExist.KeyMayExistEnum#kExistsWithoutValue}. The choice not to return a value which might - * exist is at the discretion of the implementation; the only guarantee is that {@link - * KeyMayExist.KeyMayExistEnum#kNotExist} is an assurance that the key does not exist. - * - * @param key bytebuffer containing the value of the key - * @param value bytebuffer which will receive a value if the key exists and a value is known - * @return a {@link KeyMayExist} object reporting if key may exist and if a value is provided - */ - public KeyMayExist keyMayExist(final ByteBuffer key, final ByteBuffer value) { - return keyMayExist(null, null, key, value); - } - - /** - * If the key definitely does not exist in the database, then this method - * returns {@link KeyMayExist.KeyMayExistEnum#kNotExist}, - * otherwise if it can with best effort retreive the value, it returns {@link - * KeyMayExist.KeyMayExistEnum#kExistsWithValue} otherwise it returns {@link - * KeyMayExist.KeyMayExistEnum#kExistsWithoutValue}. The choice not to return a value which might - * exist is at the discretion of the implementation; the only guarantee is that {@link - * KeyMayExist.KeyMayExistEnum#kNotExist} is an assurance that the key does not exist. - * - * @param columnFamilyHandle the {@link ColumnFamilyHandle} to look for the key in - * @param key bytebuffer containing the value of the key - * @param value bytebuffer which will receive a value if the key exists and a value is known - * @return a {@link KeyMayExist} object reporting if key may exist and if a value is provided - */ - public KeyMayExist keyMayExist( - final ColumnFamilyHandle columnFamilyHandle, final ByteBuffer key, final ByteBuffer value) { - return keyMayExist(columnFamilyHandle, null, key, value); - } - - /** - * If the key definitely does not exist in the database, then this method - * returns {@link KeyMayExist.KeyMayExistEnum#kNotExist}, - * otherwise if it can with best effort retreive the value, it returns {@link - * KeyMayExist.KeyMayExistEnum#kExistsWithValue} otherwise it returns {@link - * KeyMayExist.KeyMayExistEnum#kExistsWithoutValue}. The choice not to return a value which might - * exist is at the discretion of the implementation; the only guarantee is that {@link - * KeyMayExist.KeyMayExistEnum#kNotExist} is an assurance that the key does not exist. - * - * @param readOptions the {@link ReadOptions} to use when reading the key/value - * @param key bytebuffer containing the value of the key - * @param value bytebuffer which will receive a value if the key exists and a value is known - * @return a {@link KeyMayExist} object reporting if key may exist and if a value is provided - */ - public KeyMayExist keyMayExist( - final ReadOptions readOptions, final ByteBuffer key, final ByteBuffer value) { - return keyMayExist(null, readOptions, key, value); - } - - /** - * If the key definitely does not exist in the database, then this method - * returns false, otherwise it returns true if the key might exist. - * That is to say that this method is probabilistic and may return false - * positives, but never a false negative. - * - * @param columnFamilyHandle the {@link ColumnFamilyHandle} to look for the key in - * @param readOptions the {@link ReadOptions} to use when reading the key/value - * @param key bytebuffer containing the value of the key - * @return false if the key definitely does not exist in the database, - * otherwise true. - */ - public boolean keyMayExist(final ColumnFamilyHandle columnFamilyHandle, - final ReadOptions readOptions, final ByteBuffer key) { - assert key != null : "key ByteBuffer parameter cannot be null"; - assert key.isDirect() : "key parameter must be a direct ByteBuffer"; - return keyMayExistDirect(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, - readOptions == null ? 0 : readOptions.nativeHandle_, key, key.position(), key.limit()); - } - - /** - * If the key definitely does not exist in the database, then this method - * returns {@link KeyMayExist.KeyMayExistEnum#kNotExist}, - * otherwise if it can with best effort retreive the value, it returns {@link - * KeyMayExist.KeyMayExistEnum#kExistsWithValue} otherwise it returns {@link - * KeyMayExist.KeyMayExistEnum#kExistsWithoutValue}. The choice not to return a value which might - * exist is at the discretion of the implementation; the only guarantee is that {@link - * KeyMayExist.KeyMayExistEnum#kNotExist} is an assurance that the key does not exist. - * - * @param columnFamilyHandle the {@link ColumnFamilyHandle} to look for the key in - * @param readOptions the {@link ReadOptions} to use when reading the key/value - * @param key bytebuffer containing the value of the key - * @param value bytebuffer which will receive a value if the key exists and a value is known - * @return a {@link KeyMayExist} object reporting if key may exist and if a value is provided - */ - public KeyMayExist keyMayExist(final ColumnFamilyHandle columnFamilyHandle, - final ReadOptions readOptions, final ByteBuffer key, final ByteBuffer value) { - assert key != null : "key ByteBuffer parameter cannot be null"; - assert key.isDirect() : "key parameter must be a direct ByteBuffer"; - assert value - != null - : "value ByteBuffer parameter cannot be null. If you do not need the value, use a different version of the method"; - assert value.isDirect() : "value parameter must be a direct ByteBuffer"; - - final int[] result = keyMayExistDirectFoundValue(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, - readOptions == null ? 0 : readOptions.nativeHandle_, key, key.position(), key.remaining(), - value, value.position(), value.remaining()); - final int valueLength = result[1]; - value.limit(value.position() + Math.min(valueLength, value.remaining())); - return new KeyMayExist(KeyMayExist.KeyMayExistEnum.values()[result[0]], valueLength); - } - - /** - *

Return a heap-allocated iterator over the contents of the - * database. The result of newIterator() is initially invalid - * (caller must call one of the Seek methods on the iterator - * before using it).

- * - *

Caller should close the iterator when it is no longer needed. - * The returned iterator should be closed before this db is closed. - *

- * - * @return instance of iterator object. - */ - public RocksIterator newIterator() { - return new RocksIterator(this, iterator(nativeHandle_)); - } - - /** - *

Return a heap-allocated iterator over the contents of the - * database. The result of newIterator() is initially invalid - * (caller must call one of the Seek methods on the iterator - * before using it).

- * - *

Caller should close the iterator when it is no longer needed. - * The returned iterator should be closed before this db is closed. - *

- * - * @param readOptions {@link ReadOptions} instance. - * @return instance of iterator object. - */ - public RocksIterator newIterator(final ReadOptions readOptions) { - return new RocksIterator(this, iterator(nativeHandle_, - readOptions.nativeHandle_)); - } - - /** - *

Return a heap-allocated iterator over the contents of a - * ColumnFamily. The result of newIterator() is initially invalid - * (caller must call one of the Seek methods on the iterator - * before using it).

- * - *

Caller should close the iterator when it is no longer needed. - * The returned iterator should be closed before this db is closed. - *

- * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @return instance of iterator object. - */ - public RocksIterator newIterator( - final ColumnFamilyHandle columnFamilyHandle) { - return new RocksIterator(this, iteratorCF(nativeHandle_, - columnFamilyHandle.nativeHandle_)); - } - - /** - *

Return a heap-allocated iterator over the contents of a - * ColumnFamily. The result of newIterator() is initially invalid - * (caller must call one of the Seek methods on the iterator - * before using it).

- * - *

Caller should close the iterator when it is no longer needed. - * The returned iterator should be closed before this db is closed. - *

- * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param readOptions {@link ReadOptions} instance. - * @return instance of iterator object. - */ - public RocksIterator newIterator(final ColumnFamilyHandle columnFamilyHandle, - final ReadOptions readOptions) { - return new RocksIterator(this, iteratorCF(nativeHandle_, - columnFamilyHandle.nativeHandle_, readOptions.nativeHandle_)); - } - - /** - * Returns iterators from a consistent database state across multiple - * column families. Iterators are heap allocated and need to be deleted - * before the db is deleted - * - * @param columnFamilyHandleList {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. - * @return {@link java.util.List} containing {@link org.rocksdb.RocksIterator} - * instances - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public List newIterators( - final List columnFamilyHandleList) - throws RocksDBException { - return newIterators(columnFamilyHandleList, new ReadOptions()); - } - - /** - * Returns iterators from a consistent database state across multiple - * column families. Iterators are heap allocated and need to be deleted - * before the db is deleted - * - * @param columnFamilyHandleList {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. - * @param readOptions {@link ReadOptions} instance. - * @return {@link java.util.List} containing {@link org.rocksdb.RocksIterator} - * instances - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public List newIterators( - final List columnFamilyHandleList, - final ReadOptions readOptions) throws RocksDBException { - - final long[] columnFamilyHandles = new long[columnFamilyHandleList.size()]; - for (int i = 0; i < columnFamilyHandleList.size(); i++) { - columnFamilyHandles[i] = columnFamilyHandleList.get(i).nativeHandle_; - } - - final long[] iteratorRefs = iterators(nativeHandle_, columnFamilyHandles, - readOptions.nativeHandle_); - - final List iterators = new ArrayList<>( - columnFamilyHandleList.size()); - for (int i=0; iReturn a handle to the current DB state. Iterators created with - * this handle will all observe a stable snapshot of the current DB - * state. The caller must call ReleaseSnapshot(result) when the - * snapshot is no longer needed.

- * - *

nullptr will be returned if the DB fails to take a snapshot or does - * not support snapshot.

- * - * @return Snapshot {@link Snapshot} instance - */ - public Snapshot getSnapshot() { - long snapshotHandle = getSnapshot(nativeHandle_); - if (snapshotHandle != 0) { - return new Snapshot(snapshotHandle); - } - return null; - } - - /** - * Release a previously acquired snapshot. - * - * The caller must not use "snapshot" after this call. - * - * @param snapshot {@link Snapshot} instance - */ - public void releaseSnapshot(final Snapshot snapshot) { - if (snapshot != null) { - releaseSnapshot(nativeHandle_, snapshot.nativeHandle_); - } - } - - /** - * DB implements can export properties about their state - * via this method on a per column family level. - * - *

If {@code property} is a valid property understood by this DB - * implementation, fills {@code value} with its current value and - * returns true. Otherwise returns false.

- * - *

Valid property names include: - *

    - *
  • "rocksdb.num-files-at-level<N>" - return the number of files at - * level <N>, where <N> is an ASCII representation of a level - * number (e.g. "0").
  • - *
  • "rocksdb.stats" - returns a multi-line string that describes statistics - * about the internal operation of the DB.
  • - *
  • "rocksdb.sstables" - returns a multi-line string that describes all - * of the sstables that make up the db contents.
  • - *
- * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance, or null for the default column family. - * @param property to be fetched. See above for examples - * @return property value - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public String getProperty( - /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle, - final String property) throws RocksDBException { - return getProperty(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, - property, property.length()); - } - - /** - * DB implementations can export properties about their state - * via this method. If "property" is a valid property understood by this - * DB implementation, fills "*value" with its current value and returns - * true. Otherwise returns false. - * - *

Valid property names include: - *

    - *
  • "rocksdb.num-files-at-level<N>" - return the number of files at - * level <N>, where <N> is an ASCII representation of a level - * number (e.g. "0").
  • - *
  • "rocksdb.stats" - returns a multi-line string that describes statistics - * about the internal operation of the DB.
  • - *
  • "rocksdb.sstables" - returns a multi-line string that describes all - * of the sstables that make up the db contents.
  • - *
- * - * @param property to be fetched. See above for examples - * @return property value - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public String getProperty(final String property) throws RocksDBException { - return getProperty(null, property); - } - - - /** - * Gets a property map. - * - * @param property to be fetched. - * - * @return the property map - * - * @throws RocksDBException if an error happens in the underlying native code. - */ - public Map getMapProperty(final String property) - throws RocksDBException { - return getMapProperty(null, property); - } - - /** - * Gets a property map. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance, or null for the default column family. - * @param property to be fetched. - * - * @return the property map - * - * @throws RocksDBException if an error happens in the underlying native code. - */ - public Map getMapProperty( - /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle, - final String property) throws RocksDBException { - return getMapProperty(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, - property, property.length()); - } - - /** - *

Similar to GetProperty(), but only works for a subset of properties - * whose return value is a numerical value. Return the value as long.

- * - *

Note: As the returned property is of type - * {@code uint64_t} on C++ side the returning value can be negative - * because Java supports in Java 7 only signed long values.

- * - *

Java 7: To mitigate the problem of the non - * existent unsigned long tpye, values should be encapsulated using - * {@link java.math.BigInteger} to reflect the correct value. The correct - * behavior is guaranteed if {@code 2^64} is added to negative values.

- * - *

Java 8: In Java 8 the value should be treated as - * unsigned long using provided methods of type {@link Long}.

- * - * @param property to be fetched. - * - * @return numerical property value. - * - * @throws RocksDBException if an error happens in the underlying native code. - */ - public long getLongProperty(final String property) throws RocksDBException { - return getLongProperty(null, property); - } - - /** - *

Similar to GetProperty(), but only works for a subset of properties - * whose return value is a numerical value. Return the value as long.

- * - *

Note: As the returned property is of type - * {@code uint64_t} on C++ side the returning value can be negative - * because Java supports in Java 7 only signed long values.

- * - *

Java 7: To mitigate the problem of the non - * existent unsigned long tpye, values should be encapsulated using - * {@link java.math.BigInteger} to reflect the correct value. The correct - * behavior is guaranteed if {@code 2^64} is added to negative values.

- * - *

Java 8: In Java 8 the value should be treated as - * unsigned long using provided methods of type {@link Long}.

- * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance, or null for the default column family - * @param property to be fetched. - * - * @return numerical property value - * - * @throws RocksDBException if an error happens in the underlying native code. - */ - public long getLongProperty( - /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle, - final String property) throws RocksDBException { - return getLongProperty(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, - property, property.length()); - } - - /** - * Reset internal stats for DB and all column families. - * - * Note this doesn't reset {@link Options#statistics()} as it is not - * owned by DB. - * - * @throws RocksDBException if an error occurs whilst reseting the stats - */ - public void resetStats() throws RocksDBException { - resetStats(nativeHandle_); - } - - /** - *

Return sum of the getLongProperty of all the column families

- * - *

Note: As the returned property is of type - * {@code uint64_t} on C++ side the returning value can be negative - * because Java supports in Java 7 only signed long values.

- * - *

Java 7: To mitigate the problem of the non - * existent unsigned long tpye, values should be encapsulated using - * {@link java.math.BigInteger} to reflect the correct value. The correct - * behavior is guaranteed if {@code 2^64} is added to negative values.

- * - *

Java 8: In Java 8 the value should be treated as - * unsigned long using provided methods of type {@link Long}.

- * - * @param property to be fetched. - * - * @return numerical property value - * - * @throws RocksDBException if an error happens in the underlying native code. - */ - public long getAggregatedLongProperty(final String property) - throws RocksDBException { - return getAggregatedLongProperty(nativeHandle_, property, - property.length()); - } - - /** - * Get the approximate file system space used by keys in each range. - * - * Note that the returned sizes measure file system space usage, so - * if the user data compresses by a factor of ten, the returned - * sizes will be one-tenth the size of the corresponding user data size. - * - * If {@code sizeApproximationFlags} defines whether the returned size - * should include the recently written data in the mem-tables (if - * the mem-table type supports it), data serialized to disk, or both. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance, or null for the default column family - * @param ranges the ranges over which to approximate sizes - * @param sizeApproximationFlags flags to determine what to include in the - * approximation. - * - * @return the sizes - */ - public long[] getApproximateSizes( - /*@Nullable*/ final ColumnFamilyHandle columnFamilyHandle, - final List ranges, - final SizeApproximationFlag... sizeApproximationFlags) { - - byte flags = 0x0; - for (final SizeApproximationFlag sizeApproximationFlag - : sizeApproximationFlags) { - flags |= sizeApproximationFlag.getValue(); - } - - return getApproximateSizes(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, - toRangeSliceHandles(ranges), flags); - } - - /** - * Get the approximate file system space used by keys in each range for - * the default column family. - * - * Note that the returned sizes measure file system space usage, so - * if the user data compresses by a factor of ten, the returned - * sizes will be one-tenth the size of the corresponding user data size. - * - * If {@code sizeApproximationFlags} defines whether the returned size - * should include the recently written data in the mem-tables (if - * the mem-table type supports it), data serialized to disk, or both. - * - * @param ranges the ranges over which to approximate sizes - * @param sizeApproximationFlags flags to determine what to include in the - * approximation. - * - * @return the sizes. - */ - public long[] getApproximateSizes(final List ranges, - final SizeApproximationFlag... sizeApproximationFlags) { - return getApproximateSizes(null, ranges, sizeApproximationFlags); - } - - public static class CountAndSize { - public final long count; - public final long size; - - public CountAndSize(final long count, final long size) { - this.count = count; - this.size = size; - } - } - - /** - * This method is similar to - * {@link #getApproximateSizes(ColumnFamilyHandle, List, SizeApproximationFlag...)}, - * except that it returns approximate number of records and size in memtables. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance, or null for the default column family - * @param range the ranges over which to get the memtable stats - * - * @return the count and size for the range - */ - public CountAndSize getApproximateMemTableStats( - /*@Nullable*/ final ColumnFamilyHandle columnFamilyHandle, - final Range range) { - final long[] result = getApproximateMemTableStats(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, - range.start.getNativeHandle(), - range.limit.getNativeHandle()); - return new CountAndSize(result[0], result[1]); - } - - /** - * This method is similar to - * {@link #getApproximateSizes(ColumnFamilyHandle, List, SizeApproximationFlag...)}, - * except that it returns approximate number of records and size in memtables. - * - * @param range the ranges over which to get the memtable stats - * - * @return the count and size for the range - */ - public CountAndSize getApproximateMemTableStats( - final Range range) { - return getApproximateMemTableStats(null, range); - } - - /** - *

Range compaction of database.

- *

Note: After the database has been compacted, - * all data will have been pushed down to the last level containing - * any data.

- * - *

See also

- *
    - *
  • {@link #compactRange(byte[], byte[])}
  • - *
- * - * @throws RocksDBException thrown if an error occurs within the native - * part of the library. - */ - public void compactRange() throws RocksDBException { - compactRange(null); - } - - /** - *

Range compaction of column family.

- *

Note: After the database has been compacted, - * all data will have been pushed down to the last level containing - * any data.

- * - *

See also

- *
    - *
  • - * {@link #compactRange(ColumnFamilyHandle, byte[], byte[])} - *
  • - *
- * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance, or null for the default column family. - * - * @throws RocksDBException thrown if an error occurs within the native - * part of the library. - */ - public void compactRange( - /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle) - throws RocksDBException { - compactRange(nativeHandle_, null, -1, null, -1, 0, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); - } - - /** - *

Range compaction of database.

- *

Note: After the database has been compacted, - * all data will have been pushed down to the last level containing - * any data.

- * - *

See also

- *
    - *
  • {@link #compactRange()}
  • - *
- * - * @param begin start of key range (included in range) - * @param end end of key range (excluded from range) - * - * @throws RocksDBException thrown if an error occurs within the native - * part of the library. - */ - public void compactRange(final byte[] begin, final byte[] end) - throws RocksDBException { - compactRange(null, begin, end); - } - - /** - *

Range compaction of column family.

- *

Note: After the database has been compacted, - * all data will have been pushed down to the last level containing - * any data.

- * - *

See also

- *
    - *
  • {@link #compactRange(ColumnFamilyHandle)}
  • - *
- * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance, or null for the default column family. - * @param begin start of key range (included in range) - * @param end end of key range (excluded from range) - * - * @throws RocksDBException thrown if an error occurs within the native - * part of the library. - */ - public void compactRange( - /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle, - final byte[] begin, final byte[] end) throws RocksDBException { - compactRange(nativeHandle_, - begin, begin == null ? -1 : begin.length, - end, end == null ? -1 : end.length, - 0, columnFamilyHandle == null ? 0: columnFamilyHandle.nativeHandle_); - } - - /** - *

Range compaction of column family.

- *

Note: After the database has been compacted, - * all data will have been pushed down to the last level containing - * any data.

- * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance. - * @param begin start of key range (included in range) - * @param end end of key range (excluded from range) - * @param compactRangeOptions options for the compaction - * - * @throws RocksDBException thrown if an error occurs within the native - * part of the library. - */ - public void compactRange( - /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle, - final byte[] begin, final byte[] end, - final CompactRangeOptions compactRangeOptions) throws RocksDBException { - compactRange(nativeHandle_, - begin, begin == null ? -1 : begin.length, - end, end == null ? -1 : end.length, - compactRangeOptions.nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); - } - - /** - * Change the options for the column family handle. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance, or null for the default column family. - * @param mutableColumnFamilyOptions the options. - * - * @throws RocksDBException if an error occurs whilst setting the options - */ - public void setOptions( - /* @Nullable */final ColumnFamilyHandle columnFamilyHandle, - final MutableColumnFamilyOptions mutableColumnFamilyOptions) - throws RocksDBException { - setOptions(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, - mutableColumnFamilyOptions.getKeys(), mutableColumnFamilyOptions.getValues()); - } - - /** - * Get the options for the column family handle - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance, or null for the default column family. - * - * @return the options parsed from the options string return by RocksDB - * - * @throws RocksDBException if an error occurs while getting the options string, or parsing the - * resulting options string into options - */ - public MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder getOptions( - /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle) throws RocksDBException { - String optionsString = getOptions( - nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); - return MutableColumnFamilyOptions.parse(optionsString, true); - } - - /** - * Default column family options - * - * @return the options parsed from the options string return by RocksDB - * - * @throws RocksDBException if an error occurs while getting the options string, or parsing the - * resulting options string into options - */ - public MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder getOptions() - throws RocksDBException { - return getOptions(null); - } - - /** - * Get the database options - * - * @return the DB options parsed from the options string return by RocksDB - * - * @throws RocksDBException if an error occurs while getting the options string, or parsing the - * resulting options string into options - */ - public MutableDBOptions.MutableDBOptionsBuilder getDBOptions() throws RocksDBException { - String optionsString = getDBOptions(nativeHandle_); - return MutableDBOptions.parse(optionsString, true); - } - - /** - * Change the options for the default column family handle. - * - * @param mutableColumnFamilyOptions the options. - * - * @throws RocksDBException if an error occurs whilst setting the options - */ - public void setOptions( - final MutableColumnFamilyOptions mutableColumnFamilyOptions) - throws RocksDBException { - setOptions(null, mutableColumnFamilyOptions); - } - - /** - * Set the options for the column family handle. - * - * @param mutableDBoptions the options. - * - * @throws RocksDBException if an error occurs whilst setting the options - */ - public void setDBOptions(final MutableDBOptions mutableDBoptions) - throws RocksDBException { - setDBOptions(nativeHandle_, - mutableDBoptions.getKeys(), - mutableDBoptions.getValues()); - } - - /** - * Takes a list of files specified by file names and - * compacts them to the specified level. - * - * Note that the behavior is different from - * {@link #compactRange(ColumnFamilyHandle, byte[], byte[])} - * in that CompactFiles() performs the compaction job using the CURRENT - * thread. - * - * @param compactionOptions compaction options - * @param inputFileNames the name of the files to compact - * @param outputLevel the level to which they should be compacted - * @param outputPathId the id of the output path, or -1 - * @param compactionJobInfo the compaction job info, this parameter - * will be updated with the info from compacting the files, - * can just be null if you don't need it. - * - * @return the list of compacted files - * - * @throws RocksDBException if an error occurs during compaction - */ - public List compactFiles( - final CompactionOptions compactionOptions, - final List inputFileNames, - final int outputLevel, - final int outputPathId, - /* @Nullable */ final CompactionJobInfo compactionJobInfo) - throws RocksDBException { - return compactFiles(compactionOptions, null, inputFileNames, outputLevel, - outputPathId, compactionJobInfo); - } - - /** - * Takes a list of files specified by file names and - * compacts them to the specified level. - * - * Note that the behavior is different from - * {@link #compactRange(ColumnFamilyHandle, byte[], byte[])} - * in that CompactFiles() performs the compaction job using the CURRENT - * thread. - * - * @param compactionOptions compaction options - * @param columnFamilyHandle columnFamilyHandle, or null for the - * default column family - * @param inputFileNames the name of the files to compact - * @param outputLevel the level to which they should be compacted - * @param outputPathId the id of the output path, or -1 - * @param compactionJobInfo the compaction job info, this parameter - * will be updated with the info from compacting the files, - * can just be null if you don't need it. - * - * @return the list of compacted files - * - * @throws RocksDBException if an error occurs during compaction - */ - public List compactFiles( - final CompactionOptions compactionOptions, - /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle, - final List inputFileNames, - final int outputLevel, - final int outputPathId, - /* @Nullable */ final CompactionJobInfo compactionJobInfo) - throws RocksDBException { - return Arrays.asList(compactFiles(nativeHandle_, compactionOptions.nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, - inputFileNames.toArray(new String[0]), - outputLevel, - outputPathId, - compactionJobInfo == null ? 0 : compactionJobInfo.nativeHandle_)); - } - - /** - * This function will cancel all currently running background processes. - * - * @param wait if true, wait for all background work to be cancelled before - * returning. - * - */ - public void cancelAllBackgroundWork(boolean wait) { - cancelAllBackgroundWork(nativeHandle_, wait); - } - - /** - * This function will wait until all currently running background processes - * finish. After it returns, no background process will be run until - * {@link #continueBackgroundWork()} is called - * - * @throws RocksDBException if an error occurs when pausing background work - */ - public void pauseBackgroundWork() throws RocksDBException { - pauseBackgroundWork(nativeHandle_); - } - - /** - * Resumes background work which was suspended by - * previously calling {@link #pauseBackgroundWork()} - * - * @throws RocksDBException if an error occurs when resuming background work - */ - public void continueBackgroundWork() throws RocksDBException { - continueBackgroundWork(nativeHandle_); - } - - /** - * Enable automatic compactions for the given column - * families if they were previously disabled. - * - * The function will first set the - * {@link ColumnFamilyOptions#disableAutoCompactions()} option for each - * column family to false, after which it will schedule a flush/compaction. - * - * NOTE: Setting disableAutoCompactions to 'false' through - * {@link #setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)} - * does NOT schedule a flush/compaction afterwards, and only changes the - * parameter itself within the column family option. - * - * @param columnFamilyHandles the column family handles - * - * @throws RocksDBException if an error occurs whilst enabling auto-compaction - */ - public void enableAutoCompaction( - final List columnFamilyHandles) - throws RocksDBException { - enableAutoCompaction(nativeHandle_, - toNativeHandleList(columnFamilyHandles)); - } - - /** - * Number of levels used for this DB. - * - * @return the number of levels - */ - public int numberLevels() { - return numberLevels(null); - } - - /** - * Number of levels used for a column family in this DB. - * - * @param columnFamilyHandle the column family handle, or null - * for the default column family - * - * @return the number of levels - */ - public int numberLevels(/* @Nullable */final ColumnFamilyHandle columnFamilyHandle) { - return numberLevels(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); - } - - /** - * Maximum level to which a new compacted memtable is pushed if it - * does not create overlap. - * - * @return the maximum level - */ - public int maxMemCompactionLevel() { - return maxMemCompactionLevel(null); - } - - /** - * Maximum level to which a new compacted memtable is pushed if it - * does not create overlap. - * - * @param columnFamilyHandle the column family handle - * - * @return the maximum level - */ - public int maxMemCompactionLevel( - /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle) { - return maxMemCompactionLevel(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); - } - - /** - * Number of files in level-0 that would stop writes. - * - * @return the number of files - */ - public int level0StopWriteTrigger() { - return level0StopWriteTrigger(null); - } - - /** - * Number of files in level-0 that would stop writes. - * - * @param columnFamilyHandle the column family handle - * - * @return the number of files - */ - public int level0StopWriteTrigger( - /* @Nullable */final ColumnFamilyHandle columnFamilyHandle) { - return level0StopWriteTrigger(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); - } - - /** - * Get DB name -- the exact same name that was provided as an argument to - * as path to {@link #open(Options, String)}. - * - * @return the DB name - */ - public String getName() { - return getName(nativeHandle_); - } - - /** - * Get the Env object from the DB - * - * @return the env - */ - public Env getEnv() { - final long envHandle = getEnv(nativeHandle_); - if (envHandle == Env.getDefault().nativeHandle_) { - return Env.getDefault(); - } else { - final Env env = new RocksEnv(envHandle); - env.disOwnNativeHandle(); // we do not own the Env! - return env; - } - } - - /** - *

Flush all memory table data.

- * - *

Note: it must be ensured that the FlushOptions instance - * is not GC'ed before this method finishes. If the wait parameter is - * set to false, flush processing is asynchronous.

- * - * @param flushOptions {@link org.rocksdb.FlushOptions} instance. - * @throws RocksDBException thrown if an error occurs within the native - * part of the library. - */ - public void flush(final FlushOptions flushOptions) - throws RocksDBException { - flush(flushOptions, (List) null); - } - - /** - *

Flush all memory table data.

- * - *

Note: it must be ensured that the FlushOptions instance - * is not GC'ed before this method finishes. If the wait parameter is - * set to false, flush processing is asynchronous.

- * - * @param flushOptions {@link org.rocksdb.FlushOptions} instance. - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance. - * @throws RocksDBException thrown if an error occurs within the native - * part of the library. - */ - public void flush(final FlushOptions flushOptions, - /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle) - throws RocksDBException { - flush(flushOptions, - columnFamilyHandle == null ? null : Arrays.asList(columnFamilyHandle)); - } - - /** - * Flushes multiple column families. - * - * If atomic flush is not enabled, this is equivalent to calling - * {@link #flush(FlushOptions, ColumnFamilyHandle)} multiple times. - * - * If atomic flush is enabled, this will flush all column families - * specified up to the latest sequence number at the time when flush is - * requested. - * - * @param flushOptions {@link org.rocksdb.FlushOptions} instance. - * @param columnFamilyHandles column family handles. - * @throws RocksDBException thrown if an error occurs within the native - * part of the library. - */ - public void flush(final FlushOptions flushOptions, - /* @Nullable */ final List columnFamilyHandles) - throws RocksDBException { - flush(nativeHandle_, flushOptions.nativeHandle_, - toNativeHandleList(columnFamilyHandles)); - } - - /** - * Flush the WAL memory buffer to the file. If {@code sync} is true, - * it calls {@link #syncWal()} afterwards. - * - * @param sync true to also fsync to disk. - * - * @throws RocksDBException if an error occurs whilst flushing - */ - public void flushWal(final boolean sync) throws RocksDBException { - flushWal(nativeHandle_, sync); - } - - /** - * Sync the WAL. - * - * Note that {@link #write(WriteOptions, WriteBatch)} followed by - * {@link #syncWal()} is not exactly the same as - * {@link #write(WriteOptions, WriteBatch)} with - * {@link WriteOptions#sync()} set to true; In the latter case the changes - * won't be visible until the sync is done. - * - * Currently only works if {@link Options#allowMmapWrites()} is set to false. - * - * @throws RocksDBException if an error occurs whilst syncing - */ - public void syncWal() throws RocksDBException { - syncWal(nativeHandle_); - } - - /** - *

The sequence number of the most recent transaction.

- * - * @return sequence number of the most - * recent transaction. - */ - public long getLatestSequenceNumber() { - return getLatestSequenceNumber(nativeHandle_); - } - - /** - *

Prevent file deletions. Compactions will continue to occur, - * but no obsolete files will be deleted. Calling this multiple - * times have the same effect as calling it once.

- * - * @throws RocksDBException thrown if operation was not performed - * successfully. - */ - public void disableFileDeletions() throws RocksDBException { - disableFileDeletions(nativeHandle_); - } - - /** - *

Allow compactions to delete obsolete files. - * If force == true, the call to EnableFileDeletions() - * will guarantee that file deletions are enabled after - * the call, even if DisableFileDeletions() was called - * multiple times before.

- * - *

If force == false, EnableFileDeletions will only - * enable file deletion after it's been called at least - * as many times as DisableFileDeletions(), enabling - * the two methods to be called by two threads - * concurrently without synchronization - * -- i.e., file deletions will be enabled only after both - * threads call EnableFileDeletions()

- * - * @param force boolean value described above. - * - * @throws RocksDBException thrown if operation was not performed - * successfully. - */ - public void enableFileDeletions(final boolean force) - throws RocksDBException { - enableFileDeletions(nativeHandle_, force); - } - - public static class LiveFiles { - /** - * The valid size of the manifest file. The manifest file is an ever growing - * file, but only the portion specified here is valid for this snapshot. - */ - public final long manifestFileSize; - - /** - * The files are relative to the {@link #getName()} and are not - * absolute paths. Despite being relative paths, the file names begin - * with "/". - */ - public final List files; - - LiveFiles(final long manifestFileSize, final List files) { - this.manifestFileSize = manifestFileSize; - this.files = files; - } - } - - /** - * Retrieve the list of all files in the database after flushing the memtable. - * - * See {@link #getLiveFiles(boolean)}. - * - * @return the live files - * - * @throws RocksDBException if an error occurs whilst retrieving the list - * of live files - */ - public LiveFiles getLiveFiles() throws RocksDBException { - return getLiveFiles(true); - } - - /** - * Retrieve the list of all files in the database. - * - * In case you have multiple column families, even if {@code flushMemtable} - * is true, you still need to call {@link #getSortedWalFiles()} - * after {@link #getLiveFiles(boolean)} to compensate for new data that - * arrived to already-flushed column families while other column families - * were flushing. - * - * NOTE: Calling {@link #getLiveFiles(boolean)} followed by - * {@link #getSortedWalFiles()} can generate a lossless backup. - * - * @param flushMemtable set to true to flush before recoding the live - * files. Setting to false is useful when we don't want to wait for flush - * which may have to wait for compaction to complete taking an - * indeterminate time. - * - * @return the live files - * - * @throws RocksDBException if an error occurs whilst retrieving the list - * of live files - */ - public LiveFiles getLiveFiles(final boolean flushMemtable) - throws RocksDBException { - final String[] result = getLiveFiles(nativeHandle_, flushMemtable); - if (result == null) { - return null; - } - final String[] files = Arrays.copyOf(result, result.length - 1); - final long manifestFileSize = Long.parseLong(result[result.length - 1]); - - return new LiveFiles(manifestFileSize, Arrays.asList(files)); - } - - /** - * Retrieve the sorted list of all wal files with earliest file first. - * - * @return the log files - * - * @throws RocksDBException if an error occurs whilst retrieving the list - * of sorted WAL files - */ - public List getSortedWalFiles() throws RocksDBException { - final LogFile[] logFiles = getSortedWalFiles(nativeHandle_); - return Arrays.asList(logFiles); - } - - /** - *

Returns an iterator that is positioned at a write-batch containing - * seq_number. If the sequence number is non existent, it returns an iterator - * at the first available seq_no after the requested seq_no.

- * - *

Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to - * use this api, else the WAL files will get - * cleared aggressively and the iterator might keep getting invalid before - * an update is read.

- * - * @param sequenceNumber sequence number offset - * - * @return {@link org.rocksdb.TransactionLogIterator} instance. - * - * @throws org.rocksdb.RocksDBException if iterator cannot be retrieved - * from native-side. - */ - public TransactionLogIterator getUpdatesSince(final long sequenceNumber) - throws RocksDBException { - return new TransactionLogIterator( - getUpdatesSince(nativeHandle_, sequenceNumber)); - } - - /** - * Delete the file name from the db directory and update the internal state to - * reflect that. Supports deletion of sst and log files only. 'name' must be - * path relative to the db directory. eg. 000001.sst, /archive/000003.log - * - * @param name the file name - * - * @throws RocksDBException if an error occurs whilst deleting the file - */ - public void deleteFile(final String name) throws RocksDBException { - deleteFile(nativeHandle_, name); - } - - /** - * Gets a list of all table files metadata. - * - * @return table files metadata. - */ - public List getLiveFilesMetaData() { - return Arrays.asList(getLiveFilesMetaData(nativeHandle_)); - } - - /** - * Obtains the meta data of the specified column family of the DB. - * - * @param columnFamilyHandle the column family - * - * @return the column family metadata - */ - public ColumnFamilyMetaData getColumnFamilyMetaData( - /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle) { - return getColumnFamilyMetaData(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); - } - - /** - * Obtains the meta data of the default column family of the DB. - * - * @return the column family metadata - */ - public ColumnFamilyMetaData getColumnFamilyMetaData() { - return getColumnFamilyMetaData(null); - } - - /** - * ingestExternalFile will load a list of external SST files (1) into the DB - * We will try to find the lowest possible level that the file can fit in, and - * ingest the file into this level (2). A file that have a key range that - * overlap with the memtable key range will require us to Flush the memtable - * first before ingesting the file. - * - * (1) External SST files can be created using {@link SstFileWriter} - * (2) We will try to ingest the files to the lowest possible level - * even if the file compression doesn't match the level compression - * - * @param filePathList The list of files to ingest - * @param ingestExternalFileOptions the options for the ingestion - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void ingestExternalFile(final List filePathList, - final IngestExternalFileOptions ingestExternalFileOptions) - throws RocksDBException { - ingestExternalFile(nativeHandle_, getDefaultColumnFamily().nativeHandle_, - filePathList.toArray(new String[0]), - filePathList.size(), ingestExternalFileOptions.nativeHandle_); - } - - /** - * ingestExternalFile will load a list of external SST files (1) into the DB - * We will try to find the lowest possible level that the file can fit in, and - * ingest the file into this level (2). A file that have a key range that - * overlap with the memtable key range will require us to Flush the memtable - * first before ingesting the file. - * - * (1) External SST files can be created using {@link SstFileWriter} - * (2) We will try to ingest the files to the lowest possible level - * even if the file compression doesn't match the level compression - * - * @param columnFamilyHandle The column family for the ingested files - * @param filePathList The list of files to ingest - * @param ingestExternalFileOptions the options for the ingestion - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void ingestExternalFile(final ColumnFamilyHandle columnFamilyHandle, - final List filePathList, - final IngestExternalFileOptions ingestExternalFileOptions) - throws RocksDBException { - ingestExternalFile(nativeHandle_, columnFamilyHandle.nativeHandle_, - filePathList.toArray(new String[0]), - filePathList.size(), ingestExternalFileOptions.nativeHandle_); - } - - /** - * Verify checksum - * - * @throws RocksDBException if the checksum is not valid - */ - public void verifyChecksum() throws RocksDBException { - verifyChecksum(nativeHandle_); - } - - /** - * Gets the handle for the default column family - * - * @return The handle of the default column family - */ - public ColumnFamilyHandle getDefaultColumnFamily() { - final ColumnFamilyHandle cfHandle = new ColumnFamilyHandle(this, - getDefaultColumnFamily(nativeHandle_)); - cfHandle.disOwnNativeHandle(); - return cfHandle; - } - - /** - * Get the properties of all tables. - * - * @param columnFamilyHandle the column family handle, or null for the default - * column family. - * - * @return the properties - * - * @throws RocksDBException if an error occurs whilst getting the properties - */ - public Map getPropertiesOfAllTables( - /* @Nullable */final ColumnFamilyHandle columnFamilyHandle) - throws RocksDBException { - return getPropertiesOfAllTables(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); - } - - /** - * Get the properties of all tables in the default column family. - * - * @return the properties - * - * @throws RocksDBException if an error occurs whilst getting the properties - */ - public Map getPropertiesOfAllTables() - throws RocksDBException { - return getPropertiesOfAllTables(null); - } - - /** - * Get the properties of tables in range. - * - * @param columnFamilyHandle the column family handle, or null for the default - * column family. - * @param ranges the ranges over which to get the table properties - * - * @return the properties - * - * @throws RocksDBException if an error occurs whilst getting the properties - */ - public Map getPropertiesOfTablesInRange( - /* @Nullable */final ColumnFamilyHandle columnFamilyHandle, - final List ranges) throws RocksDBException { - return getPropertiesOfTablesInRange(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, - toRangeSliceHandles(ranges)); - } - - /** - * Get the properties of tables in range for the default column family. - * - * @param ranges the ranges over which to get the table properties - * - * @return the properties - * - * @throws RocksDBException if an error occurs whilst getting the properties - */ - public Map getPropertiesOfTablesInRange( - final List ranges) throws RocksDBException { - return getPropertiesOfTablesInRange(null, ranges); - } - - /** - * Suggest the range to compact. - * - * @param columnFamilyHandle the column family handle, or null for the default - * column family. - * - * @return the suggested range. - * - * @throws RocksDBException if an error occurs whilst suggesting the range - */ - public Range suggestCompactRange( - /* @Nullable */final ColumnFamilyHandle columnFamilyHandle) - throws RocksDBException { - final long[] rangeSliceHandles = suggestCompactRange(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); - return new Range(new Slice(rangeSliceHandles[0]), - new Slice(rangeSliceHandles[1])); - } - - /** - * Suggest the range to compact for the default column family. - * - * @return the suggested range. - * - * @throws RocksDBException if an error occurs whilst suggesting the range - */ - public Range suggestCompactRange() - throws RocksDBException { - return suggestCompactRange(null); - } - - /** - * Promote L0. - * - * @param columnFamilyHandle the column family handle, - * or null for the default column family. - * @param targetLevel the target level for L0 - * - * @throws RocksDBException if an error occurs whilst promoting L0 - */ - public void promoteL0( - /* @Nullable */final ColumnFamilyHandle columnFamilyHandle, - final int targetLevel) throws RocksDBException { - promoteL0(nativeHandle_, - columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, - targetLevel); - } - - /** - * Promote L0 for the default column family. - * - * @param targetLevel the target level for L0 - * - * @throws RocksDBException if an error occurs whilst promoting L0 - */ - public void promoteL0(final int targetLevel) - throws RocksDBException { - promoteL0(null, targetLevel); - } - - /** - * Trace DB operations. - * - * Use {@link #endTrace()} to stop tracing. - * - * @param traceOptions the options - * @param traceWriter the trace writer - * - * @throws RocksDBException if an error occurs whilst starting the trace - */ - public void startTrace(final TraceOptions traceOptions, - final AbstractTraceWriter traceWriter) throws RocksDBException { - startTrace(nativeHandle_, traceOptions.getMaxTraceFileSize(), - traceWriter.nativeHandle_); - /** - * NOTE: {@link #startTrace(long, long, long) transfers the ownership - * from Java to C++, so we must disown the native handle here. - */ - traceWriter.disOwnNativeHandle(); - } - - /** - * Stop tracing DB operations. - * - * See {@link #startTrace(TraceOptions, AbstractTraceWriter)} - * - * @throws RocksDBException if an error occurs whilst ending the trace - */ - public void endTrace() throws RocksDBException { - endTrace(nativeHandle_); - } - - /** - * Make the secondary instance catch up with the primary by tailing and - * replaying the MANIFEST and WAL of the primary. - * Column families created by the primary after the secondary instance starts - * will be ignored unless the secondary instance closes and restarts with the - * newly created column families. - * Column families that exist before secondary instance starts and dropped by - * the primary afterwards will be marked as dropped. However, as long as the - * secondary instance does not delete the corresponding column family - * handles, the data of the column family is still accessible to the - * secondary. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void tryCatchUpWithPrimary() throws RocksDBException { - tryCatchUpWithPrimary(nativeHandle_); - } - - /** - * Delete files in multiple ranges at once. - * Delete files in a lot of ranges one at a time can be slow, use this API for - * better performance in that case. - * - * @param columnFamily - The column family for operation (null for default) - * @param includeEnd - Whether ranges should include end - * @param ranges - pairs of ranges (from1, to1, from2, to2, ...) - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void deleteFilesInRanges(final ColumnFamilyHandle columnFamily, - final List ranges, final boolean includeEnd) - throws RocksDBException { - if (ranges.size() == 0) { - return; - } - if ((ranges.size() % 2) != 0) { - throw new IllegalArgumentException("Ranges size needs to be multiple of 2 " - + "(from1, to1, from2, to2, ...), but is " + ranges.size()); - } - - final byte[][] rangesArray = ranges.toArray(new byte[ranges.size()][]); - - deleteFilesInRanges(nativeHandle_, columnFamily == null ? 0 : columnFamily.nativeHandle_, - rangesArray, includeEnd); - } - - /** - * Static method to destroy the contents of the specified database. - * Be very careful using this method. - * - * @param path the path to the Rocksdb database. - * @param options {@link org.rocksdb.Options} instance. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public static void destroyDB(final String path, final Options options) - throws RocksDBException { - destroyDB(path, options.nativeHandle_); - } - - private /* @Nullable */ long[] toNativeHandleList( - /* @Nullable */ final List objectList) { - if (objectList == null) { - return null; - } - final int len = objectList.size(); - final long[] handleList = new long[len]; - for (int i = 0; i < len; i++) { - handleList[i] = objectList.get(i).nativeHandle_; - } - return handleList; - } - - private static long[] toRangeSliceHandles(final List ranges) { - final long rangeSliceHandles[] = new long [ranges.size() * 2]; - for (int i = 0, j = 0; i < ranges.size(); i++) { - final Range range = ranges.get(i); - rangeSliceHandles[j++] = range.start.getNativeHandle(); - rangeSliceHandles[j++] = range.limit.getNativeHandle(); - } - return rangeSliceHandles; - } - - protected void storeOptionsInstance(DBOptionsInterface options) { - options_ = options; - } - - private static void checkBounds(int offset, int len, int size) { - if ((offset | len | (offset + len) | (size - (offset + len))) < 0) { - throw new IndexOutOfBoundsException(String.format("offset(%d), len(%d), size(%d)", offset, len, size)); - } - } - - private static int computeCapacityHint(final int estimatedNumberOfItems) { - // Default load factor for HashMap is 0.75, so N * 1.5 will be at the load - // limit. We add +1 for a buffer. - return (int)Math.ceil(estimatedNumberOfItems * 1.5 + 1.0); - } - - // native methods - private native static long open(final long optionsHandle, - final String path) throws RocksDBException; - - /** - * @param optionsHandle Native handle pointing to an Options object - * @param path The directory path for the database files - * @param columnFamilyNames An array of column family names - * @param columnFamilyOptions An array of native handles pointing to - * ColumnFamilyOptions objects - * - * @return An array of native handles, [0] is the handle of the RocksDB object - * [1..1+n] are handles of the ColumnFamilyReferences - * - * @throws RocksDBException thrown if the database could not be opened - */ - private native static long[] open(final long optionsHandle, - final String path, final byte[][] columnFamilyNames, - final long[] columnFamilyOptions) throws RocksDBException; - - private native static long openROnly(final long optionsHandle, final String path, - final boolean errorIfWalFileExists) throws RocksDBException; - - /** - * @param optionsHandle Native handle pointing to an Options object - * @param path The directory path for the database files - * @param columnFamilyNames An array of column family names - * @param columnFamilyOptions An array of native handles pointing to - * ColumnFamilyOptions objects - * - * @return An array of native handles, [0] is the handle of the RocksDB object - * [1..1+n] are handles of the ColumnFamilyReferences - * - * @throws RocksDBException thrown if the database could not be opened - */ - private native static long[] openROnly(final long optionsHandle, final String path, - final byte[][] columnFamilyNames, final long[] columnFamilyOptions, - final boolean errorIfWalFileExists) throws RocksDBException; - - private native static long openAsSecondary(final long optionsHandle, final String path, - final String secondaryPath) throws RocksDBException; - - private native static long[] openAsSecondary(final long optionsHandle, final String path, - final String secondaryPath, final byte[][] columnFamilyNames, - final long[] columnFamilyOptions) throws RocksDBException; - - @Override protected native void disposeInternal(final long handle); - - private native static void closeDatabase(final long handle) - throws RocksDBException; - private native static byte[][] listColumnFamilies(final long optionsHandle, - final String path) throws RocksDBException; - private native long createColumnFamily(final long handle, - final byte[] columnFamilyName, final int columnFamilyNamelen, - final long columnFamilyOptions) throws RocksDBException; - private native long[] createColumnFamilies(final long handle, - final long columnFamilyOptionsHandle, final byte[][] columnFamilyNames) - throws RocksDBException; - private native long[] createColumnFamilies(final long handle, - final long columnFamilyOptionsHandles[], final byte[][] columnFamilyNames) - throws RocksDBException; - private native void dropColumnFamily( - final long handle, final long cfHandle) throws RocksDBException; - private native void dropColumnFamilies(final long handle, - final long[] cfHandles) throws RocksDBException; - private native void put(final long handle, final byte[] key, - final int keyOffset, final int keyLength, final byte[] value, - final int valueOffset, int valueLength) throws RocksDBException; - private native void put(final long handle, final byte[] key, final int keyOffset, - final int keyLength, final byte[] value, final int valueOffset, - final int valueLength, final long cfHandle) throws RocksDBException; - private native void put(final long handle, final long writeOptHandle, - final byte[] key, final int keyOffset, final int keyLength, - final byte[] value, final int valueOffset, final int valueLength) - throws RocksDBException; - private native void put(final long handle, final long writeOptHandle, - final byte[] key, final int keyOffset, final int keyLength, - final byte[] value, final int valueOffset, final int valueLength, - final long cfHandle) throws RocksDBException; - private native void delete(final long handle, final byte[] key, - final int keyOffset, final int keyLength) throws RocksDBException; - private native void delete(final long handle, final byte[] key, - final int keyOffset, final int keyLength, final long cfHandle) - throws RocksDBException; - private native void delete(final long handle, final long writeOptHandle, - final byte[] key, final int keyOffset, final int keyLength) - throws RocksDBException; - private native void delete(final long handle, final long writeOptHandle, - final byte[] key, final int keyOffset, final int keyLength, - final long cfHandle) throws RocksDBException; - private native void singleDelete( - final long handle, final byte[] key, final int keyLen) - throws RocksDBException; - private native void singleDelete( - final long handle, final byte[] key, final int keyLen, - final long cfHandle) throws RocksDBException; - private native void singleDelete( - final long handle, final long writeOptHandle, final byte[] key, - final int keyLen) throws RocksDBException; - private native void singleDelete( - final long handle, final long writeOptHandle, - final byte[] key, final int keyLen, final long cfHandle) - throws RocksDBException; - private native void deleteRange(final long handle, final byte[] beginKey, - final int beginKeyOffset, final int beginKeyLength, final byte[] endKey, - final int endKeyOffset, final int endKeyLength) throws RocksDBException; - private native void deleteRange(final long handle, final byte[] beginKey, - final int beginKeyOffset, final int beginKeyLength, final byte[] endKey, - final int endKeyOffset, final int endKeyLength, final long cfHandle) - throws RocksDBException; - private native void deleteRange(final long handle, final long writeOptHandle, - final byte[] beginKey, final int beginKeyOffset, final int beginKeyLength, - final byte[] endKey, final int endKeyOffset, final int endKeyLength) - throws RocksDBException; - private native void deleteRange( - final long handle, final long writeOptHandle, final byte[] beginKey, - final int beginKeyOffset, final int beginKeyLength, final byte[] endKey, - final int endKeyOffset, final int endKeyLength, final long cfHandle) - throws RocksDBException; - private native void merge(final long handle, final byte[] key, - final int keyOffset, final int keyLength, final byte[] value, - final int valueOffset, final int valueLength) throws RocksDBException; - private native void merge(final long handle, final byte[] key, - final int keyOffset, final int keyLength, final byte[] value, - final int valueOffset, final int valueLength, final long cfHandle) - throws RocksDBException; - private native void merge(final long handle, final long writeOptHandle, - final byte[] key, final int keyOffset, final int keyLength, - final byte[] value, final int valueOffset, final int valueLength) - throws RocksDBException; - private native void merge(final long handle, final long writeOptHandle, - final byte[] key, final int keyOffset, final int keyLength, - final byte[] value, final int valueOffset, final int valueLength, - final long cfHandle) throws RocksDBException; - private native void write0(final long handle, final long writeOptHandle, - final long wbHandle) throws RocksDBException; - private native void write1(final long handle, final long writeOptHandle, - final long wbwiHandle) throws RocksDBException; - private native int get(final long handle, final byte[] key, - final int keyOffset, final int keyLength, final byte[] value, - final int valueOffset, final int valueLength) throws RocksDBException; - private native int get(final long handle, final byte[] key, - final int keyOffset, final int keyLength, byte[] value, - final int valueOffset, final int valueLength, final long cfHandle) - throws RocksDBException; - private native int get(final long handle, final long readOptHandle, - final byte[] key, final int keyOffset, final int keyLength, - final byte[] value, final int valueOffset, final int valueLength) - throws RocksDBException; - private native int get(final long handle, final long readOptHandle, - final byte[] key, final int keyOffset, final int keyLength, - final byte[] value, final int valueOffset, final int valueLength, - final long cfHandle) throws RocksDBException; - private native byte[] get(final long handle, byte[] key, final int keyOffset, - final int keyLength) throws RocksDBException; - private native byte[] get(final long handle, final byte[] key, - final int keyOffset, final int keyLength, final long cfHandle) - throws RocksDBException; - private native byte[] get(final long handle, final long readOptHandle, - final byte[] key, final int keyOffset, final int keyLength) - throws RocksDBException; - private native byte[] get(final long handle, - final long readOptHandle, final byte[] key, final int keyOffset, - final int keyLength, final long cfHandle) throws RocksDBException; - private native byte[][] multiGet(final long dbHandle, final byte[][] keys, - final int[] keyOffsets, final int[] keyLengths); - private native byte[][] multiGet(final long dbHandle, final byte[][] keys, - final int[] keyOffsets, final int[] keyLengths, - final long[] columnFamilyHandles); - private native byte[][] multiGet(final long dbHandle, final long rOptHandle, - final byte[][] keys, final int[] keyOffsets, final int[] keyLengths); - private native byte[][] multiGet(final long dbHandle, final long rOptHandle, - final byte[][] keys, final int[] keyOffsets, final int[] keyLengths, - final long[] columnFamilyHandles); - - private native void multiGet(final long dbHandle, final long rOptHandle, - final long[] columnFamilyHandles, final ByteBuffer[] keysArray, final int[] keyOffsets, - final int[] keyLengths, final ByteBuffer[] valuesArray, final int[] valuesSizeArray, - final Status[] statusArray); - - private native boolean keyMayExist( - final long handle, final long cfHandle, final long readOptHandle, - final byte[] key, final int keyOffset, final int keyLength); - private native byte[][] keyMayExistFoundValue( - final long handle, final long cfHandle, final long readOptHandle, - final byte[] key, final int keyOffset, final int keyLength); - private native void putDirect(long handle, long writeOptHandle, ByteBuffer key, int keyOffset, - int keyLength, ByteBuffer value, int valueOffset, int valueLength, long cfHandle) - throws RocksDBException; - private native long iterator(final long handle); - private native long iterator(final long handle, final long readOptHandle); - private native long iteratorCF(final long handle, final long cfHandle); - private native long iteratorCF(final long handle, final long cfHandle, - final long readOptHandle); - private native long[] iterators(final long handle, - final long[] columnFamilyHandles, final long readOptHandle) - throws RocksDBException; - private native long getSnapshot(final long nativeHandle); - private native void releaseSnapshot( - final long nativeHandle, final long snapshotHandle); - private native String getProperty(final long nativeHandle, - final long cfHandle, final String property, final int propertyLength) - throws RocksDBException; - private native Map getMapProperty(final long nativeHandle, - final long cfHandle, final String property, final int propertyLength) - throws RocksDBException; - private native int getDirect(long handle, long readOptHandle, ByteBuffer key, int keyOffset, - int keyLength, ByteBuffer value, int valueOffset, int valueLength, long cfHandle) - throws RocksDBException; - private native boolean keyMayExistDirect(final long handle, final long cfHhandle, - final long readOptHandle, final ByteBuffer key, final int keyOffset, final int keyLength); - private native int[] keyMayExistDirectFoundValue(final long handle, final long cfHhandle, - final long readOptHandle, final ByteBuffer key, final int keyOffset, final int keyLength, - final ByteBuffer value, final int valueOffset, final int valueLength); - private native void deleteDirect(long handle, long optHandle, ByteBuffer key, int keyOffset, - int keyLength, long cfHandle) throws RocksDBException; - private native long getLongProperty(final long nativeHandle, - final long cfHandle, final String property, final int propertyLength) - throws RocksDBException; - private native void resetStats(final long nativeHandle) - throws RocksDBException; - private native long getAggregatedLongProperty(final long nativeHandle, - final String property, int propertyLength) throws RocksDBException; - private native long[] getApproximateSizes(final long nativeHandle, - final long columnFamilyHandle, final long[] rangeSliceHandles, - final byte includeFlags); - private native long[] getApproximateMemTableStats(final long nativeHandle, - final long columnFamilyHandle, final long rangeStartSliceHandle, - final long rangeLimitSliceHandle); - private native void compactRange(final long handle, - /* @Nullable */ final byte[] begin, final int beginLen, - /* @Nullable */ final byte[] end, final int endLen, - final long compactRangeOptHandle, final long cfHandle) - throws RocksDBException; - private native void setOptions(final long handle, final long cfHandle, - final String[] keys, final String[] values) throws RocksDBException; - private native String getOptions(final long handle, final long cfHandle); - private native void setDBOptions(final long handle, - final String[] keys, final String[] values) throws RocksDBException; - private native String getDBOptions(final long handle); - private native String[] compactFiles(final long handle, - final long compactionOptionsHandle, - final long columnFamilyHandle, - final String[] inputFileNames, - final int outputLevel, - final int outputPathId, - final long compactionJobInfoHandle) throws RocksDBException; - private native void cancelAllBackgroundWork(final long handle, - final boolean wait); - private native void pauseBackgroundWork(final long handle) - throws RocksDBException; - private native void continueBackgroundWork(final long handle) - throws RocksDBException; - private native void enableAutoCompaction(final long handle, - final long[] columnFamilyHandles) throws RocksDBException; - private native int numberLevels(final long handle, - final long columnFamilyHandle); - private native int maxMemCompactionLevel(final long handle, - final long columnFamilyHandle); - private native int level0StopWriteTrigger(final long handle, - final long columnFamilyHandle); - private native String getName(final long handle); - private native long getEnv(final long handle); - private native void flush(final long handle, final long flushOptHandle, - /* @Nullable */ final long[] cfHandles) throws RocksDBException; - private native void flushWal(final long handle, final boolean sync) - throws RocksDBException; - private native void syncWal(final long handle) throws RocksDBException; - private native long getLatestSequenceNumber(final long handle); - private native void disableFileDeletions(long handle) throws RocksDBException; - private native void enableFileDeletions(long handle, boolean force) - throws RocksDBException; - private native String[] getLiveFiles(final long handle, - final boolean flushMemtable) throws RocksDBException; - private native LogFile[] getSortedWalFiles(final long handle) - throws RocksDBException; - private native long getUpdatesSince(final long handle, - final long sequenceNumber) throws RocksDBException; - private native void deleteFile(final long handle, final String name) - throws RocksDBException; - private native LiveFileMetaData[] getLiveFilesMetaData(final long handle); - private native ColumnFamilyMetaData getColumnFamilyMetaData( - final long handle, final long columnFamilyHandle); - private native void ingestExternalFile(final long handle, - final long columnFamilyHandle, final String[] filePathList, - final int filePathListLen, final long ingestExternalFileOptionsHandle) - throws RocksDBException; - private native void verifyChecksum(final long handle) throws RocksDBException; - private native long getDefaultColumnFamily(final long handle); - private native Map getPropertiesOfAllTables( - final long handle, final long columnFamilyHandle) throws RocksDBException; - private native Map getPropertiesOfTablesInRange( - final long handle, final long columnFamilyHandle, - final long[] rangeSliceHandles); - private native long[] suggestCompactRange(final long handle, - final long columnFamilyHandle) throws RocksDBException; - private native void promoteL0(final long handle, - final long columnFamilyHandle, final int tragetLevel) - throws RocksDBException; - private native void startTrace(final long handle, final long maxTraceFileSize, - final long traceWriterHandle) throws RocksDBException; - private native void endTrace(final long handle) throws RocksDBException; - private native void tryCatchUpWithPrimary(final long handle) throws RocksDBException; - private native void deleteFilesInRanges(long handle, long cfHandle, final byte[][] ranges, - boolean include_end) throws RocksDBException; - - private native static void destroyDB(final String path, - final long optionsHandle) throws RocksDBException; - - private native static int version(); - - protected DBOptionsInterface options_; - private static Version version; - - public static class Version { - private final byte major; - private final byte minor; - private final byte patch; - - public Version(final byte major, final byte minor, final byte patch) { - this.major = major; - this.minor = minor; - this.patch = patch; - } - - public int getMajor() { - return major; - } - - public int getMinor() { - return minor; - } - - public int getPatch() { - return patch; - } - - @Override - public String toString() { - return getMajor() + "." + getMinor() + "." + getPatch(); - } - - private static Version fromEncodedVersion(int encodedVersion) { - final byte patch = (byte) (encodedVersion & 0xff); - encodedVersion >>= 8; - final byte minor = (byte) (encodedVersion & 0xff); - encodedVersion >>= 8; - final byte major = (byte) (encodedVersion & 0xff); - - return new Version(major, minor, patch); - } - } -} diff --git a/java/src/main/java/org/rocksdb/RocksDBException.java b/java/src/main/java/org/rocksdb/RocksDBException.java deleted file mode 100644 index 8b035f458..000000000 --- a/java/src/main/java/org/rocksdb/RocksDBException.java +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * A RocksDBException encapsulates the error of an operation. This exception - * type is used to describe an internal error from the c++ rocksdb library. - */ -public class RocksDBException extends Exception { - - /* @Nullable */ private final Status status; - - /** - * The private construct used by a set of public static factory method. - * - * @param msg the specified error message. - */ - public RocksDBException(final String msg) { - this(msg, null); - } - - public RocksDBException(final String msg, final Status status) { - super(msg); - this.status = status; - } - - public RocksDBException(final Status status) { - super(status.getState() != null ? status.getState() - : status.getCodeString()); - this.status = status; - } - - /** - * Get the status returned from RocksDB - * - * @return The status reported by RocksDB, or null if no status is available - */ - public Status getStatus() { - return status; - } -} diff --git a/java/src/main/java/org/rocksdb/RocksEnv.java b/java/src/main/java/org/rocksdb/RocksEnv.java deleted file mode 100644 index b3681d77d..000000000 --- a/java/src/main/java/org/rocksdb/RocksEnv.java +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - *

A RocksEnv is an interface used by the rocksdb implementation to access - * operating system functionality like the filesystem etc.

- * - *

All Env implementations are safe for concurrent access from - * multiple threads without any external synchronization.

- */ -public class RocksEnv extends Env { - - /** - *

Package-private constructor that uses the specified native handle - * to construct a RocksEnv.

- * - *

Note that the ownership of the input handle - * belongs to the caller, and the newly created RocksEnv will not take - * the ownership of the input handle. As a result, calling - * {@code dispose()} of the created RocksEnv will be no-op.

- */ - RocksEnv(final long handle) { - super(handle); - } - - @Override - protected native final void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/RocksIterator.java b/java/src/main/java/org/rocksdb/RocksIterator.java deleted file mode 100644 index 20e56d2eb..000000000 --- a/java/src/main/java/org/rocksdb/RocksIterator.java +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.ByteBuffer; - -/** - *

An iterator that yields a sequence of key/value pairs from a source. - * Multiple implementations are provided by this library. - * In particular, iterators are provided - * to access the contents of a Table or a DB.

- * - *

Multiple threads can invoke const methods on an RocksIterator without - * external synchronization, but if any of the threads may call a - * non-const method, all threads accessing the same RocksIterator must use - * external synchronization.

- * - * @see org.rocksdb.RocksObject - */ -public class RocksIterator extends AbstractRocksIterator { - protected RocksIterator(final RocksDB rocksDB, final long nativeHandle) { - super(rocksDB, nativeHandle); - } - - /** - *

Return the key for the current entry. The underlying storage for - * the returned slice is valid only until the next modification of - * the iterator.

- * - *

REQUIRES: {@link #isValid()}

- * - * @return key for the current entry. - */ - public byte[] key() { - assert(isOwningHandle()); - return key0(nativeHandle_); - } - - /** - *

Return the key for the current entry. The underlying storage for - * the returned slice is valid only until the next modification of - * the iterator.

- * - *

REQUIRES: {@link #isValid()}

- * - * @param key the out-value to receive the retrieved key. - * It is using position and limit. Limit is set according to key size. - * Supports direct buffer only. - * @return The size of the actual key. If the return key is greater than the - * length of {@code key}, then it indicates that the size of the - * input buffer {@code key} is insufficient and partial result will - * be returned. - */ - public int key(final ByteBuffer key) { - assert isOwningHandle(); - final int result; - if (key.isDirect()) { - result = keyDirect0(nativeHandle_, key, key.position(), key.remaining()); - } else { - assert key.hasArray(); - result = keyByteArray0( - nativeHandle_, key.array(), key.arrayOffset() + key.position(), key.remaining()); - } - key.limit(Math.min(key.position() + result, key.limit())); - return result; - } - - /** - *

Return the value for the current entry. The underlying storage for - * the returned slice is valid only until the next modification of - * the iterator.

- * - *

REQUIRES: !AtEnd() && !AtStart()

- * @return value for the current entry. - */ - public byte[] value() { - assert(isOwningHandle()); - return value0(nativeHandle_); - } - - /** - *

Return the value for the current entry. The underlying storage for - * the returned slice is valid only until the next modification of - * the iterator.

- * - *

REQUIRES: {@link #isValid()}

- * - * @param value the out-value to receive the retrieved value. - * It is using position and limit. Limit is set according to value size. - * Supports direct buffer only. - * @return The size of the actual value. If the return value is greater than the - * length of {@code value}, then it indicates that the size of the - * input buffer {@code value} is insufficient and partial result will - * be returned. - */ - public int value(final ByteBuffer value) { - assert isOwningHandle(); - final int result; - if (value.isDirect()) { - result = valueDirect0(nativeHandle_, value, value.position(), value.remaining()); - } else { - assert value.hasArray(); - result = valueByteArray0( - nativeHandle_, value.array(), value.arrayOffset() + value.position(), value.remaining()); - } - value.limit(Math.min(value.position() + result, value.limit())); - return result; - } - - @Override protected final native void disposeInternal(final long handle); - @Override final native boolean isValid0(long handle); - @Override final native void seekToFirst0(long handle); - @Override final native void seekToLast0(long handle); - @Override final native void next0(long handle); - @Override final native void prev0(long handle); - @Override final native void refresh0(long handle); - @Override final native void seek0(long handle, byte[] target, int targetLen); - @Override final native void seekForPrev0(long handle, byte[] target, int targetLen); - @Override - final native void seekDirect0(long handle, ByteBuffer target, int targetOffset, int targetLen); - @Override - final native void seekByteArray0(long handle, byte[] target, int targetOffset, int targetLen); - @Override - final native void seekForPrevDirect0( - long handle, ByteBuffer target, int targetOffset, int targetLen); - @Override - final native void seekForPrevByteArray0( - long handle, byte[] target, int targetOffset, int targetLen); - @Override final native void status0(long handle) throws RocksDBException; - - private native byte[] key0(long handle); - private native byte[] value0(long handle); - private native int keyDirect0(long handle, ByteBuffer buffer, int bufferOffset, int bufferLen); - private native int keyByteArray0(long handle, byte[] array, int arrayOffset, int arrayLen); - private native int valueDirect0(long handle, ByteBuffer buffer, int bufferOffset, int bufferLen); - private native int valueByteArray0(long handle, byte[] array, int arrayOffset, int arrayLen); -} diff --git a/java/src/main/java/org/rocksdb/RocksIteratorInterface.java b/java/src/main/java/org/rocksdb/RocksIteratorInterface.java deleted file mode 100644 index 819c21c2c..000000000 --- a/java/src/main/java/org/rocksdb/RocksIteratorInterface.java +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.ByteBuffer; - -/** - *

Defines the interface for an Iterator which provides - * access to data one entry at a time. Multiple implementations - * are provided by this library. In particular, iterators are provided - * to access the contents of a DB and Write Batch.

- * - *

Multiple threads can invoke const methods on an RocksIterator without - * external synchronization, but if any of the threads may call a - * non-const method, all threads accessing the same RocksIterator must use - * external synchronization.

- * - * @see org.rocksdb.RocksObject - */ -public interface RocksIteratorInterface { - - /** - *

An iterator is either positioned at an entry, or - * not valid. This method returns true if the iterator is valid.

- * - * @return true if iterator is valid. - */ - boolean isValid(); - - /** - *

Position at the first entry in the source. The iterator is Valid() - * after this call if the source is not empty.

- */ - void seekToFirst(); - - /** - *

Position at the last entry in the source. The iterator is - * valid after this call if the source is not empty.

- */ - void seekToLast(); - - /** - *

Position at the first entry in the source whose key is at or - * past target.

- * - *

The iterator is valid after this call if the source contains - * a key that comes at or past target.

- * - * @param target byte array describing a key or a - * key prefix to seek for. - */ - void seek(byte[] target); - - /** - *

Position at the first entry in the source whose key is that or - * before target.

- * - *

The iterator is valid after this call if the source contains - * a key that comes at or before target.

- * - * @param target byte array describing a key or a - * key prefix to seek for. - */ - void seekForPrev(byte[] target); - - /** - *

Position at the first entry in the source whose key is that or - * past target.

- * - *

The iterator is valid after this call if the source contains - * a key that comes at or past target.

- * - * @param target byte array describing a key or a - * key prefix to seek for. Supports direct buffer only. - */ - void seek(ByteBuffer target); - - /** - *

Position at the last key that is less than or equal to the target key.

- * - *

The iterator is valid after this call if the source contains - * a key that comes at or past target.

- * - * @param target byte array describing a key or a - * key prefix to seek for. Supports direct buffer only. - */ - void seekForPrev(ByteBuffer target); - - /** - *

Moves to the next entry in the source. After this call, Valid() is - * true if the iterator was not positioned at the last entry in the source.

- * - *

REQUIRES: {@link #isValid()}

- */ - void next(); - - /** - *

Moves to the previous entry in the source. After this call, Valid() is - * true if the iterator was not positioned at the first entry in source.

- * - *

REQUIRES: {@link #isValid()}

- */ - void prev(); - - /** - *

If an error has occurred, return it. Else return an ok status. - * If non-blocking IO is requested and this operation cannot be - * satisfied without doing some IO, then this returns Status::Incomplete().

- * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - void status() throws RocksDBException; - - /** - *

If supported, renew the iterator to represent the latest state. The iterator will be - * invalidated after the call. Not supported if {@link ReadOptions#setSnapshot(Snapshot)} was - * specified when creating the iterator.

- * - * @throws RocksDBException thrown if the operation is not supported or an error happens in the - * underlying native library - */ - void refresh() throws RocksDBException; -} diff --git a/java/src/main/java/org/rocksdb/RocksMemEnv.java b/java/src/main/java/org/rocksdb/RocksMemEnv.java deleted file mode 100644 index 39a6f6e1c..000000000 --- a/java/src/main/java/org/rocksdb/RocksMemEnv.java +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Memory environment. - */ -//TODO(AR) rename to MemEnv -public class RocksMemEnv extends Env { - - /** - *

Creates a new environment that stores its data - * in memory and delegates all non-file-storage tasks to - * {@code baseEnv}.

- * - *

The caller must delete the result when it is - * no longer needed.

- * - * @param baseEnv the base environment, - * must remain live while the result is in use. - */ - public RocksMemEnv(final Env baseEnv) { - super(createMemEnv(baseEnv.nativeHandle_)); - } - - private static native long createMemEnv(final long baseEnvHandle); - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/RocksMutableObject.java b/java/src/main/java/org/rocksdb/RocksMutableObject.java deleted file mode 100644 index e92289dc0..000000000 --- a/java/src/main/java/org/rocksdb/RocksMutableObject.java +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright (c) 2016, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * RocksMutableObject is an implementation of {@link AbstractNativeReference} - * whose reference to the underlying native C++ object can change. - * - *

The use of {@code RocksMutableObject} should be kept to a minimum, as it - * has synchronization overheads and introduces complexity. Instead it is - * recommended to use {@link RocksObject} where possible.

- */ -public abstract class RocksMutableObject extends AbstractNativeReference { - - /** - * An mutable reference to the value of the C++ pointer pointing to some - * underlying native RocksDB C++ object. - */ - private long nativeHandle_; - private boolean owningHandle_; - - protected RocksMutableObject() { - } - - protected RocksMutableObject(final long nativeHandle) { - this.nativeHandle_ = nativeHandle; - this.owningHandle_ = true; - } - - /** - * Closes the existing handle, and changes the handle to the new handle - * - * @param newNativeHandle The C++ pointer to the new native object - * @param owningNativeHandle true if we own the new native object - */ - public synchronized void resetNativeHandle(final long newNativeHandle, - final boolean owningNativeHandle) { - close(); - setNativeHandle(newNativeHandle, owningNativeHandle); - } - - /** - * Sets the handle (C++ pointer) of the underlying C++ native object - * - * @param nativeHandle The C++ pointer to the native object - * @param owningNativeHandle true if we own the native object - */ - public synchronized void setNativeHandle(final long nativeHandle, - final boolean owningNativeHandle) { - this.nativeHandle_ = nativeHandle; - this.owningHandle_ = owningNativeHandle; - } - - @Override - protected synchronized boolean isOwningHandle() { - return this.owningHandle_; - } - - /** - * Gets the value of the C++ pointer pointing to the underlying - * native C++ object - * - * @return the pointer value for the native object - */ - protected synchronized long getNativeHandle() { - assert (this.nativeHandle_ != 0); - return this.nativeHandle_; - } - - @Override - public synchronized final void close() { - if (isOwningHandle()) { - disposeInternal(); - this.owningHandle_ = false; - this.nativeHandle_ = 0; - } - } - - protected void disposeInternal() { - disposeInternal(nativeHandle_); - } - - protected abstract void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/RocksObject.java b/java/src/main/java/org/rocksdb/RocksObject.java deleted file mode 100644 index f07e1018a..000000000 --- a/java/src/main/java/org/rocksdb/RocksObject.java +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * RocksObject is an implementation of {@link AbstractNativeReference} which - * has an immutable and therefore thread-safe reference to the underlying - * native C++ RocksDB object. - *

- * RocksObject is the base-class of almost all RocksDB classes that have a - * pointer to some underlying native C++ {@code rocksdb} object.

- *

- * The use of {@code RocksObject} should always be preferred over - * {@link RocksMutableObject}.

- */ -public abstract class RocksObject extends AbstractImmutableNativeReference { - - /** - * An immutable reference to the value of the C++ pointer pointing to some - * underlying native RocksDB C++ object. - */ - protected final long nativeHandle_; - - protected RocksObject(final long nativeHandle) { - super(true); - this.nativeHandle_ = nativeHandle; - } - - /** - * Deletes underlying C++ object pointer. - */ - @Override - protected void disposeInternal() { - disposeInternal(nativeHandle_); - } - - protected abstract void disposeInternal(final long handle); - - public long getNativeHandle() { - return nativeHandle_; - } -} diff --git a/java/src/main/java/org/rocksdb/SanityLevel.java b/java/src/main/java/org/rocksdb/SanityLevel.java deleted file mode 100644 index 30568c363..000000000 --- a/java/src/main/java/org/rocksdb/SanityLevel.java +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public enum SanityLevel { - NONE((byte) 0x0), - LOOSELY_COMPATIBLE((byte) 0x1), - EXACT_MATCH((byte) 0xFF); - - private final byte value; - - SanityLevel(final byte value) { - this.value = value; - } - - /** - * Get the internal representation value. - * - * @return the internal representation value. - */ - byte getValue() { - return value; - } - - /** - * Get the SanityLevel from the internal representation value. - * - * @param value the internal representation value. - * - * @return the SanityLevel - * - * @throws IllegalArgumentException if the value does not match a - * SanityLevel - */ - static SanityLevel fromValue(final byte value) throws IllegalArgumentException { - for (final SanityLevel level : SanityLevel.values()) { - if (level.value == value) { - return level; - } - } - throw new IllegalArgumentException("Unknown value for SanityLevel: " + value); - } -} diff --git a/java/src/main/java/org/rocksdb/SizeApproximationFlag.java b/java/src/main/java/org/rocksdb/SizeApproximationFlag.java deleted file mode 100644 index fe3c2dd05..000000000 --- a/java/src/main/java/org/rocksdb/SizeApproximationFlag.java +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -import java.util.List; - -/** - * Flags for - * {@link RocksDB#getApproximateSizes(ColumnFamilyHandle, List, SizeApproximationFlag...)} - * that specify whether memtable stats should be included, - * or file stats approximation or both. - */ -public enum SizeApproximationFlag { - NONE((byte)0x0), - INCLUDE_MEMTABLES((byte)0x1), - INCLUDE_FILES((byte)0x2); - - private final byte value; - - SizeApproximationFlag(final byte value) { - this.value = value; - } - - /** - * Get the internal byte representation. - * - * @return the internal representation. - */ - byte getValue() { - return value; - } -} diff --git a/java/src/main/java/org/rocksdb/SkipListMemTableConfig.java b/java/src/main/java/org/rocksdb/SkipListMemTableConfig.java deleted file mode 100644 index e2c1b97d8..000000000 --- a/java/src/main/java/org/rocksdb/SkipListMemTableConfig.java +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -/** - * The config for skip-list memtable representation. - */ -public class SkipListMemTableConfig extends MemTableConfig { - - public static final long DEFAULT_LOOKAHEAD = 0; - - /** - * SkipListMemTableConfig constructor - */ - public SkipListMemTableConfig() { - lookahead_ = DEFAULT_LOOKAHEAD; - } - - /** - * Sets lookahead for SkipList - * - * @param lookahead If non-zero, each iterator's seek operation - * will start the search from the previously visited record - * (doing at most 'lookahead' steps). This is an - * optimization for the access pattern including many - * seeks with consecutive keys. - * @return the current instance of SkipListMemTableConfig - */ - public SkipListMemTableConfig setLookahead(final long lookahead) { - lookahead_ = lookahead; - return this; - } - - /** - * Returns the currently set lookahead value. - * - * @return lookahead value - */ - public long lookahead() { - return lookahead_; - } - - - @Override protected long newMemTableFactoryHandle() { - return newMemTableFactoryHandle0(lookahead_); - } - - private native long newMemTableFactoryHandle0(long lookahead) - throws IllegalArgumentException; - - private long lookahead_; -} diff --git a/java/src/main/java/org/rocksdb/Slice.java b/java/src/main/java/org/rocksdb/Slice.java deleted file mode 100644 index 50d9f7652..000000000 --- a/java/src/main/java/org/rocksdb/Slice.java +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - *

Base class for slices which will receive - * byte[] based access to the underlying data.

- * - *

byte[] backed slices typically perform better with - * small keys and values. When using larger keys and - * values consider using {@link org.rocksdb.DirectSlice}

- */ -public class Slice extends AbstractSlice { - - /** - * Indicates whether we have to free the memory pointed to by the Slice - */ - private volatile boolean cleared; - private volatile long internalBufferOffset = 0; - - /** - *

Called from JNI to construct a new Java Slice - * without an underlying C++ object set - * at creation time.

- * - *

Note: You should be aware that - * {@see org.rocksdb.RocksObject#disOwnNativeHandle()} is intentionally - * called from the default Slice constructor, and that it is marked as - * private. This is so that developers cannot construct their own default - * Slice objects (at present). As developers cannot construct their own - * Slice objects through this, they are not creating underlying C++ Slice - * objects, and so there is nothing to free (dispose) from Java.

- */ - @SuppressWarnings("unused") - private Slice() { - super(); - } - - /** - *

Package-private Slice constructor which is used to construct - * Slice instances from C++ side. As the reference to this - * object is also managed from C++ side the handle will be disowned.

- * - * @param nativeHandle address of native instance. - */ - Slice(final long nativeHandle) { - this(nativeHandle, false); - } - - /** - *

Package-private Slice constructor which is used to construct - * Slice instances using a handle.

- * - * @param nativeHandle address of native instance. - * @param owningNativeHandle true if the Java side owns the memory pointed to - * by this reference, false if ownership belongs to the C++ side - */ - Slice(final long nativeHandle, final boolean owningNativeHandle) { - super(); - setNativeHandle(nativeHandle, owningNativeHandle); - } - - /** - *

Constructs a slice where the data is taken from - * a String.

- * - * @param str String value. - */ - public Slice(final String str) { - super(createNewSliceFromString(str)); - } - - /** - *

Constructs a slice where the data is a copy of - * the byte array from a specific offset.

- * - * @param data byte array. - * @param offset offset within the byte array. - */ - public Slice(final byte[] data, final int offset) { - super(createNewSlice0(data, offset)); - } - - /** - *

Constructs a slice where the data is a copy of - * the byte array.

- * - * @param data byte array. - */ - public Slice(final byte[] data) { - super(createNewSlice1(data)); - } - - @Override - public void clear() { - clear0(getNativeHandle(), !cleared, internalBufferOffset); - cleared = true; - } - - @Override - public void removePrefix(final int n) { - removePrefix0(getNativeHandle(), n); - this.internalBufferOffset += n; - } - - /** - *

Deletes underlying C++ slice pointer - * and any buffered data.

- * - *

- * Note that this function should be called only after all - * RocksDB instances referencing the slice are closed. - * Otherwise an undefined behavior will occur.

- */ - @Override - protected void disposeInternal() { - final long nativeHandle = getNativeHandle(); - if(!cleared) { - disposeInternalBuf(nativeHandle, internalBufferOffset); - } - super.disposeInternal(nativeHandle); - } - - @Override protected final native byte[] data0(long handle); - private native static long createNewSlice0(final byte[] data, - final int length); - private native static long createNewSlice1(final byte[] data); - private native void clear0(long handle, boolean internalBuffer, - long internalBufferOffset); - private native void removePrefix0(long handle, int length); - private native void disposeInternalBuf(final long handle, - long internalBufferOffset); -} diff --git a/java/src/main/java/org/rocksdb/Snapshot.java b/java/src/main/java/org/rocksdb/Snapshot.java deleted file mode 100644 index 39cdf0c2d..000000000 --- a/java/src/main/java/org/rocksdb/Snapshot.java +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Snapshot of database - */ -public class Snapshot extends RocksObject { - Snapshot(final long nativeHandle) { - super(nativeHandle); - - // The pointer to the snapshot is always released - // by the database instance. - disOwnNativeHandle(); - } - - /** - * Return the associated sequence number; - * - * @return the associated sequence number of - * this snapshot. - */ - public long getSequenceNumber() { - return getSequenceNumber(nativeHandle_); - } - - @Override - protected final void disposeInternal(final long handle) { - /** - * Nothing to release, we never own the pointer for a - * Snapshot. The pointer - * to the snapshot is released by the database - * instance. - */ - } - - private native long getSequenceNumber(long handle); -} diff --git a/java/src/main/java/org/rocksdb/SstFileManager.java b/java/src/main/java/org/rocksdb/SstFileManager.java deleted file mode 100644 index 8805410aa..000000000 --- a/java/src/main/java/org/rocksdb/SstFileManager.java +++ /dev/null @@ -1,251 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Map; - -/** - * SstFileManager is used to track SST files in the DB and control their - * deletion rate. - * - * All SstFileManager public functions are thread-safe. - * - * SstFileManager is not extensible. - */ -//@ThreadSafe -public final class SstFileManager extends RocksObject { - - public static final long RATE_BYTES_PER_SEC_DEFAULT = 0; - public static final boolean DELETE_EXISTING_TRASH_DEFAULT = true; - public static final double MAX_TRASH_DB_RATION_DEFAULT = 0.25; - public static final long BYTES_MAX_DELETE_CHUNK_DEFAULT = 64 * 1024 * 1024; - - /** - * Create a new SstFileManager that can be shared among multiple RocksDB - * instances to track SST file and control there deletion rate. - * - * @param env the environment. - * - * @throws RocksDBException thrown if error happens in underlying native library. - */ - public SstFileManager(final Env env) throws RocksDBException { - this(env, null); - } - - /** - * Create a new SstFileManager that can be shared among multiple RocksDB - * instances to track SST file and control there deletion rate. - * - * @param env the environment. - * @param logger if not null, the logger will be used to log errors. - * - * @throws RocksDBException thrown if error happens in underlying native library. - */ - public SstFileManager(final Env env, /*@Nullable*/ final Logger logger) - throws RocksDBException { - this(env, logger, RATE_BYTES_PER_SEC_DEFAULT); - } - - /** - * Create a new SstFileManager that can be shared among multiple RocksDB - * instances to track SST file and control there deletion rate. - * - * @param env the environment. - * @param logger if not null, the logger will be used to log errors. - * - * == Deletion rate limiting specific arguments == - * @param rateBytesPerSec how many bytes should be deleted per second, If - * this value is set to 1024 (1 Kb / sec) and we deleted a file of size - * 4 Kb in 1 second, we will wait for another 3 seconds before we delete - * other files, Set to 0 to disable deletion rate limiting. - * - * @throws RocksDBException thrown if error happens in underlying native library. - */ - public SstFileManager(final Env env, /*@Nullable*/ final Logger logger, - final long rateBytesPerSec) throws RocksDBException { - this(env, logger, rateBytesPerSec, MAX_TRASH_DB_RATION_DEFAULT); - } - - /** - * Create a new SstFileManager that can be shared among multiple RocksDB - * instances to track SST file and control there deletion rate. - * - * @param env the environment. - * @param logger if not null, the logger will be used to log errors. - * - * == Deletion rate limiting specific arguments == - * @param rateBytesPerSec how many bytes should be deleted per second, If - * this value is set to 1024 (1 Kb / sec) and we deleted a file of size - * 4 Kb in 1 second, we will wait for another 3 seconds before we delete - * other files, Set to 0 to disable deletion rate limiting. - * @param maxTrashDbRatio if the trash size constitutes for more than this - * fraction of the total DB size we will start deleting new files passed - * to DeleteScheduler immediately. - * - * @throws RocksDBException thrown if error happens in underlying native library. - */ - public SstFileManager(final Env env, /*@Nullable*/ final Logger logger, - final long rateBytesPerSec, final double maxTrashDbRatio) - throws RocksDBException { - this(env, logger, rateBytesPerSec, maxTrashDbRatio, - BYTES_MAX_DELETE_CHUNK_DEFAULT); - } - - /** - * Create a new SstFileManager that can be shared among multiple RocksDB - * instances to track SST file and control there deletion rate. - * - * @param env the environment. - * @param logger if not null, the logger will be used to log errors. - * - * == Deletion rate limiting specific arguments == - * @param rateBytesPerSec how many bytes should be deleted per second, If - * this value is set to 1024 (1 Kb / sec) and we deleted a file of size - * 4 Kb in 1 second, we will wait for another 3 seconds before we delete - * other files, Set to 0 to disable deletion rate limiting. - * @param maxTrashDbRatio if the trash size constitutes for more than this - * fraction of the total DB size we will start deleting new files passed - * to DeleteScheduler immediately. - * @param bytesMaxDeleteChunk if a single file is larger than delete chunk, - * ftruncate the file by this size each time, rather than dropping the whole - * file. 0 means to always delete the whole file. - * - * @throws RocksDBException thrown if error happens in underlying native library. - */ - public SstFileManager(final Env env, /*@Nullable*/final Logger logger, - final long rateBytesPerSec, final double maxTrashDbRatio, - final long bytesMaxDeleteChunk) throws RocksDBException { - super(newSstFileManager(env.nativeHandle_, - logger != null ? logger.nativeHandle_ : 0, - rateBytesPerSec, maxTrashDbRatio, bytesMaxDeleteChunk)); - } - - - /** - * Update the maximum allowed space that should be used by RocksDB, if - * the total size of the SST files exceeds {@code maxAllowedSpace}, writes to - * RocksDB will fail. - * - * Setting {@code maxAllowedSpace} to 0 will disable this feature; - * maximum allowed space will be infinite (Default value). - * - * @param maxAllowedSpace the maximum allowed space that should be used by - * RocksDB. - */ - public void setMaxAllowedSpaceUsage(final long maxAllowedSpace) { - setMaxAllowedSpaceUsage(nativeHandle_, maxAllowedSpace); - } - - /** - * Set the amount of buffer room each compaction should be able to leave. - * In other words, at its maximum disk space consumption, the compaction - * should still leave {@code compactionBufferSize} available on the disk so - * that other background functions may continue, such as logging and flushing. - * - * @param compactionBufferSize the amount of buffer room each compaction - * should be able to leave. - */ - public void setCompactionBufferSize(final long compactionBufferSize) { - setCompactionBufferSize(nativeHandle_, compactionBufferSize); - } - - /** - * Determines if the total size of SST files exceeded the maximum allowed - * space usage. - * - * @return true when the maximum allows space usage has been exceeded. - */ - public boolean isMaxAllowedSpaceReached() { - return isMaxAllowedSpaceReached(nativeHandle_); - } - - /** - * Determines if the total size of SST files as well as estimated size - * of ongoing compactions exceeds the maximums allowed space usage. - * - * @return true when the total size of SST files as well as estimated size - * of ongoing compactions exceeds the maximums allowed space usage. - */ - public boolean isMaxAllowedSpaceReachedIncludingCompactions() { - return isMaxAllowedSpaceReachedIncludingCompactions(nativeHandle_); - } - - /** - * Get the total size of all tracked files. - * - * @return the total size of all tracked files. - */ - public long getTotalSize() { - return getTotalSize(nativeHandle_); - } - - /** - * Gets all tracked files and their corresponding sizes. - * - * @return a map containing all tracked files and there corresponding sizes. - */ - public Map getTrackedFiles() { - return getTrackedFiles(nativeHandle_); - } - - /** - * Gets the delete rate limit. - * - * @return the delete rate limit (in bytes per second). - */ - public long getDeleteRateBytesPerSecond() { - return getDeleteRateBytesPerSecond(nativeHandle_); - } - - /** - * Set the delete rate limit. - * - * Zero means disable delete rate limiting and delete files immediately. - * - * @param deleteRate the delete rate limit (in bytes per second). - */ - public void setDeleteRateBytesPerSecond(final long deleteRate) { - setDeleteRateBytesPerSecond(nativeHandle_, deleteRate); - } - - /** - * Get the trash/DB size ratio where new files will be deleted immediately. - * - * @return the trash/DB size ratio. - */ - public double getMaxTrashDBRatio() { - return getMaxTrashDBRatio(nativeHandle_); - } - - /** - * Set the trash/DB size ratio where new files will be deleted immediately. - * - * @param ratio the trash/DB size ratio. - */ - public void setMaxTrashDBRatio(final double ratio) { - setMaxTrashDBRatio(nativeHandle_, ratio); - } - - private native static long newSstFileManager(final long handle, - final long logger_handle, final long rateBytesPerSec, - final double maxTrashDbRatio, final long bytesMaxDeleteChunk) - throws RocksDBException; - private native void setMaxAllowedSpaceUsage(final long handle, - final long maxAllowedSpace); - private native void setCompactionBufferSize(final long handle, - final long compactionBufferSize); - private native boolean isMaxAllowedSpaceReached(final long handle); - private native boolean isMaxAllowedSpaceReachedIncludingCompactions( - final long handle); - private native long getTotalSize(final long handle); - private native Map getTrackedFiles(final long handle); - private native long getDeleteRateBytesPerSecond(final long handle); - private native void setDeleteRateBytesPerSecond(final long handle, - final long deleteRate); - private native double getMaxTrashDBRatio(final long handle); - private native void setMaxTrashDBRatio(final long handle, final double ratio); - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/SstFileMetaData.java b/java/src/main/java/org/rocksdb/SstFileMetaData.java deleted file mode 100644 index a04d05cb5..000000000 --- a/java/src/main/java/org/rocksdb/SstFileMetaData.java +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * The metadata that describes a SST file. - */ -public class SstFileMetaData { - private final String fileName; - private final String path; - private final long size; - private final long smallestSeqno; - private final long largestSeqno; - private final byte[] smallestKey; - private final byte[] largestKey; - private final long numReadsSampled; - private final boolean beingCompacted; - private final long numEntries; - private final long numDeletions; - - /** - * Called from JNI C++ - * - * @param fileName the file name - * @param path the file path - * @param size the size of the file - * @param smallestSeqno the smallest sequence number - * @param largestSeqno the largest sequence number - * @param smallestKey the smallest key - * @param largestKey the largest key - * @param numReadsSampled the number of reads sampled - * @param beingCompacted true if the file is being compacted, false otherwise - * @param numEntries the number of entries - * @param numDeletions the number of deletions - */ - protected SstFileMetaData( - final String fileName, - final String path, - final long size, - final long smallestSeqno, - final long largestSeqno, - final byte[] smallestKey, - final byte[] largestKey, - final long numReadsSampled, - final boolean beingCompacted, - final long numEntries, - final long numDeletions) { - this.fileName = fileName; - this.path = path; - this.size = size; - this.smallestSeqno = smallestSeqno; - this.largestSeqno = largestSeqno; - this.smallestKey = smallestKey; - this.largestKey = largestKey; - this.numReadsSampled = numReadsSampled; - this.beingCompacted = beingCompacted; - this.numEntries = numEntries; - this.numDeletions = numDeletions; - } - - /** - * Get the name of the file. - * - * @return the name of the file. - */ - public String fileName() { - return fileName; - } - - /** - * Get the full path where the file locates. - * - * @return the full path - */ - public String path() { - return path; - } - - /** - * Get the file size in bytes. - * - * @return file size - */ - public long size() { - return size; - } - - /** - * Get the smallest sequence number in file. - * - * @return the smallest sequence number - */ - public long smallestSeqno() { - return smallestSeqno; - } - - /** - * Get the largest sequence number in file. - * - * @return the largest sequence number - */ - public long largestSeqno() { - return largestSeqno; - } - - /** - * Get the smallest user defined key in the file. - * - * @return the smallest user defined key - */ - public byte[] smallestKey() { - return smallestKey; - } - - /** - * Get the largest user defined key in the file. - * - * @return the largest user defined key - */ - public byte[] largestKey() { - return largestKey; - } - - /** - * Get the number of times the file has been read. - * - * @return the number of times the file has been read - */ - public long numReadsSampled() { - return numReadsSampled; - } - - /** - * Returns true if the file is currently being compacted. - * - * @return true if the file is currently being compacted, false otherwise. - */ - public boolean beingCompacted() { - return beingCompacted; - } - - /** - * Get the number of entries. - * - * @return the number of entries. - */ - public long numEntries() { - return numEntries; - } - - /** - * Get the number of deletions. - * - * @return the number of deletions. - */ - public long numDeletions() { - return numDeletions; - } -} diff --git a/java/src/main/java/org/rocksdb/SstFileReader.java b/java/src/main/java/org/rocksdb/SstFileReader.java deleted file mode 100644 index bb1e94ee0..000000000 --- a/java/src/main/java/org/rocksdb/SstFileReader.java +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public class SstFileReader extends RocksObject { - static { - RocksDB.loadLibrary(); - } - - public SstFileReader(final Options options) { - super(newSstFileReader(options.nativeHandle_)); - } - - /** - * Returns an iterator that will iterate on all keys in the default - * column family including both keys in the DB and uncommitted keys in this - * transaction. - * - * Setting {@link ReadOptions#setSnapshot(Snapshot)} will affect what is read - * from the DB but will NOT change which keys are read from this transaction - * (the keys in this transaction do not yet belong to any snapshot and will be - * fetched regardless). - * - * Caller is responsible for deleting the returned Iterator. - * - * @param readOptions Read options. - * - * @return instance of iterator object. - */ - public SstFileReaderIterator newIterator(final ReadOptions readOptions) { - assert (isOwningHandle()); - long iter = newIterator(nativeHandle_, readOptions.nativeHandle_); - return new SstFileReaderIterator(this, iter); - } - - /** - * Prepare SstFileReader to read a file. - * - * @param filePath the location of file - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void open(final String filePath) throws RocksDBException { - open(nativeHandle_, filePath); - } - - /** - * Verify checksum - * - * @throws RocksDBException if the checksum is not valid - */ - public void verifyChecksum() throws RocksDBException { - verifyChecksum(nativeHandle_); - } - - /** - * Get the properties of the table. - * - * @return the properties - * - * @throws RocksDBException if an error occurs whilst getting the table - * properties - */ - public TableProperties getTableProperties() throws RocksDBException { - return getTableProperties(nativeHandle_); - } - - @Override protected final native void disposeInternal(final long handle); - private native long newIterator(final long handle, final long readOptionsHandle); - - private native void open(final long handle, final String filePath) - throws RocksDBException; - - private native static long newSstFileReader(final long optionsHandle); - private native void verifyChecksum(final long handle) throws RocksDBException; - private native TableProperties getTableProperties(final long handle) - throws RocksDBException; -} diff --git a/java/src/main/java/org/rocksdb/SstFileReaderIterator.java b/java/src/main/java/org/rocksdb/SstFileReaderIterator.java deleted file mode 100644 index a4a08167b..000000000 --- a/java/src/main/java/org/rocksdb/SstFileReaderIterator.java +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.ByteBuffer; - -/** - *

An iterator that yields a sequence of key/value pairs from a source. - * Multiple implementations are provided by this library. - * In particular, iterators are provided - * to access the contents of a Table or a DB.

- * - *

Multiple threads can invoke const methods on an RocksIterator without - * external synchronization, but if any of the threads may call a - * non-const method, all threads accessing the same RocksIterator must use - * external synchronization.

- * - * @see RocksObject - */ -public class SstFileReaderIterator extends AbstractRocksIterator { - protected SstFileReaderIterator(final SstFileReader reader, final long nativeHandle) { - super(reader, nativeHandle); - } - - /** - *

Return the key for the current entry. The underlying storage for - * the returned slice is valid only until the next modification of - * the iterator.

- * - *

REQUIRES: {@link #isValid()}

- * - * @return key for the current entry. - */ - public byte[] key() { - assert (isOwningHandle()); - return key0(nativeHandle_); - } - - /** - *

Return the key for the current entry. The underlying storage for - * the returned slice is valid only until the next modification of - * the iterator.

- * - *

REQUIRES: {@link #isValid()}

- * - * @param key the out-value to receive the retrieved key. - * It is using position and limit. Limit is set according to key size. - * Supports direct buffer only. - * @return The size of the actual key. If the return key is greater than the - * length of {@code key}, then it indicates that the size of the - * input buffer {@code key} is insufficient and partial result will - * be returned. - */ - public int key(final ByteBuffer key) { - assert (isOwningHandle()); - final int result; - if (key.isDirect()) { - result = keyDirect0(nativeHandle_, key, key.position(), key.remaining()); - } else { - result = keyByteArray0( - nativeHandle_, key.array(), key.arrayOffset() + key.position(), key.remaining()); - } - key.limit(Math.min(key.position() + result, key.limit())); - return result; - } - - /** - *

Return the value for the current entry. The underlying storage for - * the returned slice is valid only until the next modification of - * the iterator.

- * - *

REQUIRES: !AtEnd() && !AtStart()

- * @return value for the current entry. - */ - public byte[] value() { - assert (isOwningHandle()); - return value0(nativeHandle_); - } - - /** - *

Return the value for the current entry. The underlying storage for - * the returned slice is valid only until the next modification of - * the iterator.

- * - *

REQUIRES: {@link #isValid()}

- * - * @param value the out-value to receive the retrieved value. - * It is using position and limit. Limit is set according to value size. - * Supports direct buffer only. - * @return The size of the actual value. If the return value is greater than the - * length of {@code value}, then it indicates that the size of the - * input buffer {@code value} is insufficient and partial result will - * be returned. - */ - public int value(final ByteBuffer value) { - assert (isOwningHandle()); - final int result; - if (value.isDirect()) { - result = valueDirect0(nativeHandle_, value, value.position(), value.remaining()); - } else { - result = valueByteArray0( - nativeHandle_, value.array(), value.arrayOffset() + value.position(), value.remaining()); - } - value.limit(Math.min(value.position() + result, value.limit())); - return result; - } - - @Override protected final native void disposeInternal(final long handle); - @Override final native boolean isValid0(long handle); - @Override final native void seekToFirst0(long handle); - @Override final native void seekToLast0(long handle); - @Override final native void next0(long handle); - @Override final native void prev0(long handle); - @Override final native void refresh0(long handle) throws RocksDBException; - @Override final native void seek0(long handle, byte[] target, int targetLen); - @Override final native void seekForPrev0(long handle, byte[] target, int targetLen); - @Override final native void status0(long handle) throws RocksDBException; - @Override - final native void seekDirect0(long handle, ByteBuffer target, int targetOffset, int targetLen); - @Override - final native void seekForPrevDirect0( - long handle, ByteBuffer target, int targetOffset, int targetLen); - @Override - final native void seekByteArray0( - final long handle, final byte[] target, final int targetOffset, final int targetLen); - @Override - final native void seekForPrevByteArray0( - final long handle, final byte[] target, final int targetOffset, final int targetLen); - - private native byte[] key0(long handle); - private native byte[] value0(long handle); - - private native int keyDirect0(long handle, ByteBuffer buffer, int bufferOffset, int bufferLen); - private native int keyByteArray0(long handle, byte[] buffer, int bufferOffset, int bufferLen); - private native int valueDirect0(long handle, ByteBuffer buffer, int bufferOffset, int bufferLen); - private native int valueByteArray0(long handle, byte[] buffer, int bufferOffset, int bufferLen); -} diff --git a/java/src/main/java/org/rocksdb/SstFileWriter.java b/java/src/main/java/org/rocksdb/SstFileWriter.java deleted file mode 100644 index fe00c1a12..000000000 --- a/java/src/main/java/org/rocksdb/SstFileWriter.java +++ /dev/null @@ -1,238 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.ByteBuffer; - -/** - * SstFileWriter is used to create sst files that can be added to the - * database later. All keys in files generated by SstFileWriter will have - * sequence number = 0. - */ -public class SstFileWriter extends RocksObject { - static { - RocksDB.loadLibrary(); - } - - /** - * SstFileWriter Constructor. - * - * @param envOptions {@link org.rocksdb.EnvOptions} instance. - * @param options {@link org.rocksdb.Options} instance. - */ - public SstFileWriter(final EnvOptions envOptions, final Options options) { - super(newSstFileWriter( - envOptions.nativeHandle_, options.nativeHandle_)); - } - - /** - * Prepare SstFileWriter to write to a file. - * - * @param filePath the location of file - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void open(final String filePath) throws RocksDBException { - open(nativeHandle_, filePath); - } - - /** - * Add a Put key with value to currently opened file. - * - * @param key the specified key to be inserted. - * @param value the value associated with the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void put(final Slice key, final Slice value) throws RocksDBException { - put(nativeHandle_, key.getNativeHandle(), value.getNativeHandle()); - } - - /** - * Add a Put key with value to currently opened file. - * - * @param key the specified key to be inserted. - * @param value the value associated with the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void put(final DirectSlice key, final DirectSlice value) - throws RocksDBException { - put(nativeHandle_, key.getNativeHandle(), value.getNativeHandle()); - } - - /** - * Add a Put key with value to currently opened file. - * - * @param key the specified key to be inserted. - * @param value the value associated with the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void put(final ByteBuffer key, final ByteBuffer value) throws RocksDBException { - assert key.isDirect() && value.isDirect(); - putDirect(nativeHandle_, key, key.position(), key.remaining(), value, value.position(), - value.remaining()); - key.position(key.limit()); - value.position(value.limit()); - } - - /** - * Add a Put key with value to currently opened file. - * - * @param key the specified key to be inserted. - * @param value the value associated with the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void put(final byte[] key, final byte[] value) throws RocksDBException { - put(nativeHandle_, key, value); - } - - /** - * Add a Merge key with value to currently opened file. - * - * @param key the specified key to be merged. - * @param value the value to be merged with the current value for - * the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void merge(final Slice key, final Slice value) - throws RocksDBException { - merge(nativeHandle_, key.getNativeHandle(), value.getNativeHandle()); - } - - /** - * Add a Merge key with value to currently opened file. - * - * @param key the specified key to be merged. - * @param value the value to be merged with the current value for - * the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void merge(final byte[] key, final byte[] value) - throws RocksDBException { - merge(nativeHandle_, key, value); - } - - /** - * Add a Merge key with value to currently opened file. - * - * @param key the specified key to be merged. - * @param value the value to be merged with the current value for - * the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void merge(final DirectSlice key, final DirectSlice value) - throws RocksDBException { - merge(nativeHandle_, key.getNativeHandle(), value.getNativeHandle()); - } - - /** - * Add a deletion key to currently opened file. - * - * @param key the specified key to be deleted. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void delete(final Slice key) throws RocksDBException { - delete(nativeHandle_, key.getNativeHandle()); - } - - /** - * Add a deletion key to currently opened file. - * - * @param key the specified key to be deleted. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void delete(final DirectSlice key) throws RocksDBException { - delete(nativeHandle_, key.getNativeHandle()); - } - - /** - * Add a deletion key to currently opened file. - * - * @param key the specified key to be deleted. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void delete(final byte[] key) throws RocksDBException { - delete(nativeHandle_, key); - } - - /** - * Finish the process and close the sst file. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public void finish() throws RocksDBException { - finish(nativeHandle_); - } - - /** - * Return the current file size. - * - * @return the current file size. - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public long fileSize() throws RocksDBException { - return fileSize(nativeHandle_); - } - - private native static long newSstFileWriter( - final long envOptionsHandle, final long optionsHandle, - final long userComparatorHandle, final byte comparatorType); - - private native static long newSstFileWriter(final long envOptionsHandle, - final long optionsHandle); - - private native void open(final long handle, final String filePath) - throws RocksDBException; - - private native void put(final long handle, final long keyHandle, - final long valueHandle) throws RocksDBException; - - private native void put(final long handle, final byte[] key, - final byte[] value) throws RocksDBException; - - private native void putDirect(long handle, ByteBuffer key, int keyOffset, int keyLength, - ByteBuffer value, int valueOffset, int valueLength) throws RocksDBException; - - private native long fileSize(long handle) throws RocksDBException; - - private native void merge(final long handle, final long keyHandle, - final long valueHandle) throws RocksDBException; - - private native void merge(final long handle, final byte[] key, - final byte[] value) throws RocksDBException; - - private native void delete(final long handle, final long keyHandle) - throws RocksDBException; - - private native void delete(final long handle, final byte[] key) - throws RocksDBException; - - private native void finish(final long handle) throws RocksDBException; - - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/SstPartitionerFactory.java b/java/src/main/java/org/rocksdb/SstPartitionerFactory.java deleted file mode 100644 index ea6f13565..000000000 --- a/java/src/main/java/org/rocksdb/SstPartitionerFactory.java +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Handle to factory for SstPartitioner. It is used in {@link ColumnFamilyOptions} - */ -public abstract class SstPartitionerFactory extends RocksObject { - protected SstPartitionerFactory(final long nativeHandle) { - super(nativeHandle); - } -} diff --git a/java/src/main/java/org/rocksdb/SstPartitionerFixedPrefixFactory.java b/java/src/main/java/org/rocksdb/SstPartitionerFixedPrefixFactory.java deleted file mode 100644 index d513c5f15..000000000 --- a/java/src/main/java/org/rocksdb/SstPartitionerFixedPrefixFactory.java +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Fixed prefix factory. It partitions SST files using fixed prefix of the key. - */ -public class SstPartitionerFixedPrefixFactory extends SstPartitionerFactory { - public SstPartitionerFixedPrefixFactory(long prefixLength) { - super(newSstPartitionerFixedPrefixFactory0(prefixLength)); - } - - private native static long newSstPartitionerFixedPrefixFactory0(long prefixLength); - - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/StateType.java b/java/src/main/java/org/rocksdb/StateType.java deleted file mode 100644 index 803456bb2..000000000 --- a/java/src/main/java/org/rocksdb/StateType.java +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * The type used to refer to a thread state. - * - * A state describes lower-level action of a thread - * such as reading / writing a file or waiting for a mutex. - */ -public enum StateType { - STATE_UNKNOWN((byte)0x0), - STATE_MUTEX_WAIT((byte)0x1); - - private final byte value; - - StateType(final byte value) { - this.value = value; - } - - /** - * Get the internal representation value. - * - * @return the internal representation value. - */ - byte getValue() { - return value; - } - - /** - * Get the State type from the internal representation value. - * - * @param value the internal representation value. - * - * @return the state type - * - * @throws IllegalArgumentException if the value does not match - * a StateType - */ - static StateType fromValue(final byte value) - throws IllegalArgumentException { - for (final StateType threadType : StateType.values()) { - if (threadType.value == value) { - return threadType; - } - } - throw new IllegalArgumentException( - "Unknown value for StateType: " + value); - } -} diff --git a/java/src/main/java/org/rocksdb/Statistics.java b/java/src/main/java/org/rocksdb/Statistics.java deleted file mode 100644 index 0938a6d58..000000000 --- a/java/src/main/java/org/rocksdb/Statistics.java +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.EnumSet; - -/** - * Statistics to analyze the performance of a db. Pointer for statistics object - * is managed by Options class. - */ -public class Statistics extends RocksObject { - - public Statistics() { - super(newStatistics()); - } - - public Statistics(final Statistics otherStatistics) { - super(newStatistics(otherStatistics.nativeHandle_)); - } - - public Statistics(final EnumSet ignoreHistograms) { - super(newStatistics(toArrayValues(ignoreHistograms))); - } - - public Statistics(final EnumSet ignoreHistograms, final Statistics otherStatistics) { - super(newStatistics(toArrayValues(ignoreHistograms), otherStatistics.nativeHandle_)); - } - - /** - * Intentionally package-private. - * - * Used from {@link DBOptions#statistics()} - * - * @param existingStatisticsHandle The C++ pointer to an existing statistics object - */ - Statistics(final long existingStatisticsHandle) { - super(existingStatisticsHandle); - } - - private static byte[] toArrayValues(final EnumSet histogramTypes) { - final byte[] values = new byte[histogramTypes.size()]; - int i = 0; - for(final HistogramType histogramType : histogramTypes) { - values[i++] = histogramType.getValue(); - } - return values; - } - - /** - * Gets the current stats level. - * - * @return The stats level. - */ - public StatsLevel statsLevel() { - return StatsLevel.getStatsLevel(statsLevel(nativeHandle_)); - } - - /** - * Sets the stats level. - * - * @param statsLevel The stats level to set. - */ - public void setStatsLevel(final StatsLevel statsLevel) { - setStatsLevel(nativeHandle_, statsLevel.getValue()); - } - - /** - * Get the count for a ticker. - * - * @param tickerType The ticker to get the count for - * - * @return The count for the ticker - */ - public long getTickerCount(final TickerType tickerType) { - assert(isOwningHandle()); - return getTickerCount(nativeHandle_, tickerType.getValue()); - } - - /** - * Get the count for a ticker and reset the tickers count. - * - * @param tickerType The ticker to get the count for - * - * @return The count for the ticker - */ - public long getAndResetTickerCount(final TickerType tickerType) { - assert(isOwningHandle()); - return getAndResetTickerCount(nativeHandle_, tickerType.getValue()); - } - - /** - * Gets the histogram data for a particular histogram. - * - * @param histogramType The histogram to retrieve the data for - * - * @return The histogram data - */ - public HistogramData getHistogramData(final HistogramType histogramType) { - assert(isOwningHandle()); - return getHistogramData(nativeHandle_, histogramType.getValue()); - } - - /** - * Gets a string representation of a particular histogram. - * - * @param histogramType The histogram to retrieve the data for - * - * @return A string representation of the histogram data - */ - public String getHistogramString(final HistogramType histogramType) { - assert(isOwningHandle()); - return getHistogramString(nativeHandle_, histogramType.getValue()); - } - - /** - * Resets all ticker and histogram stats. - * - * @throws RocksDBException if an error occurs when resetting the statistics. - */ - public void reset() throws RocksDBException { - assert(isOwningHandle()); - reset(nativeHandle_); - } - - /** - * String representation of the statistic object. - */ - @Override - public String toString() { - assert(isOwningHandle()); - return toString(nativeHandle_); - } - - private native static long newStatistics(); - private native static long newStatistics(final long otherStatisticsHandle); - private native static long newStatistics(final byte[] ignoreHistograms); - private native static long newStatistics(final byte[] ignoreHistograms, final long otherStatisticsHandle); - - @Override protected final native void disposeInternal(final long handle); - - private native byte statsLevel(final long handle); - private native void setStatsLevel(final long handle, final byte statsLevel); - private native long getTickerCount(final long handle, final byte tickerType); - private native long getAndResetTickerCount(final long handle, final byte tickerType); - private native HistogramData getHistogramData(final long handle, final byte histogramType); - private native String getHistogramString(final long handle, final byte histogramType); - private native void reset(final long nativeHandle) throws RocksDBException; - private native String toString(final long nativeHandle); -} diff --git a/java/src/main/java/org/rocksdb/StatisticsCollector.java b/java/src/main/java/org/rocksdb/StatisticsCollector.java deleted file mode 100644 index fb3f57150..000000000 --- a/java/src/main/java/org/rocksdb/StatisticsCollector.java +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.List; -import java.util.concurrent.Executors; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; - -/** - *

Helper class to collect DB statistics periodically at a period specified in - * constructor. Callback function (provided in constructor) is called with - * every statistics collection.

- * - *

Caller should call start() to start statistics collection. Shutdown() should - * be called to stop stats collection and should be called before statistics ( - * provided in constructor) reference has been disposed.

- */ -public class StatisticsCollector { - private final List _statsCollectorInputList; - private final ExecutorService _executorService; - private final int _statsCollectionInterval; - private volatile boolean _isRunning = true; - - /** - * Constructor for statistics collector. - * - * @param statsCollectorInputList List of statistics collector input. - * @param statsCollectionIntervalInMilliSeconds Statistics collection time - * period (specified in milliseconds). - */ - public StatisticsCollector( - final List statsCollectorInputList, - final int statsCollectionIntervalInMilliSeconds) { - _statsCollectorInputList = statsCollectorInputList; - _statsCollectionInterval = statsCollectionIntervalInMilliSeconds; - - _executorService = Executors.newSingleThreadExecutor(); - } - - public void start() { - _executorService.submit(collectStatistics()); - } - - /** - * Shuts down statistics collector. - * - * @param shutdownTimeout Time in milli-seconds to wait for shutdown before - * killing the collection process. - * @throws java.lang.InterruptedException thrown if Threads are interrupted. - */ - public void shutDown(final int shutdownTimeout) throws InterruptedException { - _isRunning = false; - - _executorService.shutdownNow(); - // Wait for collectStatistics runnable to finish so that disposal of - // statistics does not cause any exceptions to be thrown. - _executorService.awaitTermination(shutdownTimeout, TimeUnit.MILLISECONDS); - } - - private Runnable collectStatistics() { - return new Runnable() { - - @Override - public void run() { - while (_isRunning) { - try { - if(Thread.currentThread().isInterrupted()) { - break; - } - for(final StatsCollectorInput statsCollectorInput : - _statsCollectorInputList) { - Statistics statistics = statsCollectorInput.getStatistics(); - StatisticsCollectorCallback statsCallback = - statsCollectorInput.getCallback(); - - // Collect ticker data - for(final TickerType ticker : TickerType.values()) { - if(ticker != TickerType.TICKER_ENUM_MAX) { - final long tickerValue = statistics.getTickerCount(ticker); - statsCallback.tickerCallback(ticker, tickerValue); - } - } - - // Collect histogram data - for(final HistogramType histogramType : HistogramType.values()) { - if(histogramType != HistogramType.HISTOGRAM_ENUM_MAX) { - final HistogramData histogramData = - statistics.getHistogramData(histogramType); - statsCallback.histogramCallback(histogramType, histogramData); - } - } - } - - Thread.sleep(_statsCollectionInterval); - } - catch (final InterruptedException e) { - Thread.currentThread().interrupt(); - break; - } - catch (final Exception e) { - throw new RuntimeException("Error while calculating statistics", e); - } - } - } - }; - } -} diff --git a/java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java b/java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java deleted file mode 100644 index f3785b15f..000000000 --- a/java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Callback interface provided to StatisticsCollector. - * - * Thread safety: - * StatisticsCollector doesn't make any guarantees about thread safety. - * If the same reference of StatisticsCollectorCallback is passed to multiple - * StatisticsCollector references, then its the responsibility of the - * user to make StatisticsCollectorCallback's implementation thread-safe. - * - */ -public interface StatisticsCollectorCallback { - /** - * Callback function to get ticker values. - * @param tickerType Ticker type. - * @param tickerCount Value of ticker type. - */ - void tickerCallback(TickerType tickerType, long tickerCount); - - /** - * Callback function to get histogram values. - * @param histType Histogram type. - * @param histData Histogram data. - */ - void histogramCallback(HistogramType histType, HistogramData histData); -} diff --git a/java/src/main/java/org/rocksdb/StatsCollectorInput.java b/java/src/main/java/org/rocksdb/StatsCollectorInput.java deleted file mode 100644 index 5bf43ade5..000000000 --- a/java/src/main/java/org/rocksdb/StatsCollectorInput.java +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Contains all information necessary to collect statistics from one instance - * of DB statistics. - */ -public class StatsCollectorInput { - private final Statistics _statistics; - private final StatisticsCollectorCallback _statsCallback; - - /** - * Constructor for StatsCollectorInput. - * - * @param statistics Reference of DB statistics. - * @param statsCallback Reference of statistics callback interface. - */ - public StatsCollectorInput(final Statistics statistics, - final StatisticsCollectorCallback statsCallback) { - _statistics = statistics; - _statsCallback = statsCallback; - } - - public Statistics getStatistics() { - return _statistics; - } - - public StatisticsCollectorCallback getCallback() { - return _statsCallback; - } -} diff --git a/java/src/main/java/org/rocksdb/StatsLevel.java b/java/src/main/java/org/rocksdb/StatsLevel.java deleted file mode 100644 index 58504b84a..000000000 --- a/java/src/main/java/org/rocksdb/StatsLevel.java +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * The level of Statistics to report. - */ -public enum StatsLevel { - /** - * Collect all stats except time inside mutex lock AND time spent on - * compression. - */ - EXCEPT_DETAILED_TIMERS((byte) 0x0), - - /** - * Collect all stats except the counters requiring to get time inside the - * mutex lock. - */ - EXCEPT_TIME_FOR_MUTEX((byte) 0x1), - - /** - * Collect all stats, including measuring duration of mutex operations. - * - * If getting time is expensive on the platform to run, it can - * reduce scalability to more threads, especially for writes. - */ - ALL((byte) 0x2); - - private final byte value; - - StatsLevel(final byte value) { - this.value = value; - } - - /** - *

Returns the byte value of the enumerations value.

- * - * @return byte representation - */ - public byte getValue() { - return value; - } - - /** - * Get StatsLevel by byte value. - * - * @param value byte representation of StatsLevel. - * - * @return {@link org.rocksdb.StatsLevel} instance. - * @throws java.lang.IllegalArgumentException if an invalid - * value is provided. - */ - public static StatsLevel getStatsLevel(final byte value) { - for (final StatsLevel statsLevel : StatsLevel.values()) { - if (statsLevel.getValue() == value){ - return statsLevel; - } - } - throw new IllegalArgumentException( - "Illegal value provided for StatsLevel."); - } -} diff --git a/java/src/main/java/org/rocksdb/Status.java b/java/src/main/java/org/rocksdb/Status.java deleted file mode 100644 index 033ed3ea1..000000000 --- a/java/src/main/java/org/rocksdb/Status.java +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Objects; - -/** - * Represents the status returned by a function call in RocksDB. - * - * Currently only used with {@link RocksDBException} when the - * status is not {@link Code#Ok} - */ -public class Status { - private final Code code; - /* @Nullable */ private final SubCode subCode; - /* @Nullable */ private final String state; - - public Status(final Code code, final SubCode subCode, final String state) { - this.code = code; - this.subCode = subCode; - this.state = state; - } - - /** - * Intentionally private as this will be called from JNI - */ - private Status(final byte code, final byte subCode, final String state) { - this.code = Code.getCode(code); - this.subCode = SubCode.getSubCode(subCode); - this.state = state; - } - - public Code getCode() { - return code; - } - - public SubCode getSubCode() { - return subCode; - } - - public String getState() { - return state; - } - - public String getCodeString() { - final StringBuilder builder = new StringBuilder() - .append(code.name()); - if(subCode != null && subCode != SubCode.None) { - builder.append("(") - .append(subCode.name()) - .append(")"); - } - return builder.toString(); - } - - // should stay in sync with /include/rocksdb/status.h:Code and /java/rocksjni/portal.h:toJavaStatusCode - public enum Code { - Ok( (byte)0x0), - NotFound( (byte)0x1), - Corruption( (byte)0x2), - NotSupported( (byte)0x3), - InvalidArgument( (byte)0x4), - IOError( (byte)0x5), - MergeInProgress( (byte)0x6), - Incomplete( (byte)0x7), - ShutdownInProgress( (byte)0x8), - TimedOut( (byte)0x9), - Aborted( (byte)0xA), - Busy( (byte)0xB), - Expired( (byte)0xC), - TryAgain( (byte)0xD), - Undefined( (byte)0x7F); - - private final byte value; - - Code(final byte value) { - this.value = value; - } - - public static Code getCode(final byte value) { - for (final Code code : Code.values()) { - if (code.value == value){ - return code; - } - } - throw new IllegalArgumentException( - "Illegal value provided for Code (" + value + ")."); - } - - /** - * Returns the byte value of the enumerations value. - * - * @return byte representation - */ - public byte getValue() { - return value; - } - } - - // should stay in sync with /include/rocksdb/status.h:SubCode and /java/rocksjni/portal.h:toJavaStatusSubCode - public enum SubCode { - None( (byte)0x0), - MutexTimeout( (byte)0x1), - LockTimeout( (byte)0x2), - LockLimit( (byte)0x3), - NoSpace( (byte)0x4), - Deadlock( (byte)0x5), - StaleFile( (byte)0x6), - MemoryLimit( (byte)0x7), - Undefined( (byte)0x7F); - - private final byte value; - - SubCode(final byte value) { - this.value = value; - } - - public static SubCode getSubCode(final byte value) { - for (final SubCode subCode : SubCode.values()) { - if (subCode.value == value){ - return subCode; - } - } - throw new IllegalArgumentException( - "Illegal value provided for SubCode (" + value + ")."); - } - - /** - * Returns the byte value of the enumerations value. - * - * @return byte representation - */ - public byte getValue() { - return value; - } - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - Status status = (Status) o; - return code == status.code && subCode == status.subCode && Objects.equals(state, status.state); - } - - @Override - public int hashCode() { - return Objects.hash(code, subCode, state); - } -} diff --git a/java/src/main/java/org/rocksdb/StringAppendOperator.java b/java/src/main/java/org/rocksdb/StringAppendOperator.java deleted file mode 100644 index ddbccff46..000000000 --- a/java/src/main/java/org/rocksdb/StringAppendOperator.java +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -// Copyright (c) 2014, Vlad Balan (vlad.gm@gmail.com). All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * StringAppendOperator is a merge operator that concatenates - * two strings. - */ -public class StringAppendOperator extends MergeOperator { - public StringAppendOperator() { - this(','); - } - - public StringAppendOperator(char delim) { - super(newSharedStringAppendOperator(delim)); - } - - public StringAppendOperator(String delim) { - super(newSharedStringAppendOperator(delim)); - } - - private native static long newSharedStringAppendOperator(final char delim); - private native static long newSharedStringAppendOperator(final String delim); - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/TableFileCreationBriefInfo.java b/java/src/main/java/org/rocksdb/TableFileCreationBriefInfo.java deleted file mode 100644 index 5a383ade4..000000000 --- a/java/src/main/java/org/rocksdb/TableFileCreationBriefInfo.java +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Objects; - -public class TableFileCreationBriefInfo { - private final String dbName; - private final String columnFamilyName; - private final String filePath; - private final int jobId; - private final TableFileCreationReason reason; - - /** - * Access is private as this will only be constructed from - * C++ via JNI, either directly of via - * {@link TableFileCreationInfo#TableFileCreationInfo(long, TableProperties, Status, String, - * String, String, int, byte)}. - * - * @param dbName the database name - * @param columnFamilyName the column family name - * @param filePath the path to the table file - * @param jobId the job identifier - * @param tableFileCreationReasonValue the reason for creation of the table file - */ - protected TableFileCreationBriefInfo(final String dbName, final String columnFamilyName, - final String filePath, final int jobId, final byte tableFileCreationReasonValue) { - this.dbName = dbName; - this.columnFamilyName = columnFamilyName; - this.filePath = filePath; - this.jobId = jobId; - this.reason = TableFileCreationReason.fromValue(tableFileCreationReasonValue); - } - - /** - * Get the name of the database where the file was created. - * - * @return the name of the database. - */ - public String getDbName() { - return dbName; - } - - /** - * Get the name of the column family where the file was created. - * - * @return the name of the column family. - */ - public String getColumnFamilyName() { - return columnFamilyName; - } - - /** - * Get the path to the created file. - * - * @return the path. - */ - public String getFilePath() { - return filePath; - } - - /** - * Get the id of the job (which could be flush or compaction) that - * created the file. - * - * @return the id of the job. - */ - public int getJobId() { - return jobId; - } - - /** - * Get the reason for creating the table. - * - * @return the reason for creating the table. - */ - public TableFileCreationReason getReason() { - return reason; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - TableFileCreationBriefInfo that = (TableFileCreationBriefInfo) o; - return jobId == that.jobId && Objects.equals(dbName, that.dbName) - && Objects.equals(columnFamilyName, that.columnFamilyName) - && Objects.equals(filePath, that.filePath) && reason == that.reason; - } - - @Override - public int hashCode() { - return Objects.hash(dbName, columnFamilyName, filePath, jobId, reason); - } - - @Override - public String toString() { - return "TableFileCreationBriefInfo{" - + "dbName='" + dbName + '\'' + ", columnFamilyName='" + columnFamilyName + '\'' - + ", filePath='" + filePath + '\'' + ", jobId=" + jobId + ", reason=" + reason + '}'; - } -} diff --git a/java/src/main/java/org/rocksdb/TableFileCreationInfo.java b/java/src/main/java/org/rocksdb/TableFileCreationInfo.java deleted file mode 100644 index 7742f32f1..000000000 --- a/java/src/main/java/org/rocksdb/TableFileCreationInfo.java +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Objects; - -public class TableFileCreationInfo extends TableFileCreationBriefInfo { - private final long fileSize; - private final TableProperties tableProperties; - private final Status status; - - /** - * Access is protected as this will only be constructed from - * C++ via JNI. - * - * @param fileSize the size of the table file - * @param tableProperties the properties of the table file - * @param status the status of the creation operation - * @param dbName the database name - * @param columnFamilyName the column family name - * @param filePath the path to the table file - * @param jobId the job identifier - * @param tableFileCreationReasonValue the reason for creation of the table file - */ - protected TableFileCreationInfo(final long fileSize, final TableProperties tableProperties, - final Status status, final String dbName, final String columnFamilyName, - final String filePath, final int jobId, final byte tableFileCreationReasonValue) { - super(dbName, columnFamilyName, filePath, jobId, tableFileCreationReasonValue); - this.fileSize = fileSize; - this.tableProperties = tableProperties; - this.status = status; - } - - /** - * Get the size of the file. - * - * @return the size. - */ - public long getFileSize() { - return fileSize; - } - - /** - * Get the detailed properties of the created file. - * - * @return the properties. - */ - public TableProperties getTableProperties() { - return tableProperties; - } - - /** - * Get the status indicating whether the creation was successful or not. - * - * @return the status. - */ - public Status getStatus() { - return status; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - TableFileCreationInfo that = (TableFileCreationInfo) o; - return fileSize == that.fileSize && Objects.equals(tableProperties, that.tableProperties) - && Objects.equals(status, that.status); - } - - @Override - public int hashCode() { - return Objects.hash(fileSize, tableProperties, status); - } - - @Override - public String toString() { - return "TableFileCreationInfo{" - + "fileSize=" + fileSize + ", tableProperties=" + tableProperties + ", status=" + status - + '}'; - } -} diff --git a/java/src/main/java/org/rocksdb/TableFileCreationReason.java b/java/src/main/java/org/rocksdb/TableFileCreationReason.java deleted file mode 100644 index d3984663d..000000000 --- a/java/src/main/java/org/rocksdb/TableFileCreationReason.java +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public enum TableFileCreationReason { - FLUSH((byte) 0x00), - COMPACTION((byte) 0x01), - RECOVERY((byte) 0x02), - MISC((byte) 0x03); - - private final byte value; - - TableFileCreationReason(final byte value) { - this.value = value; - } - - /** - * Get the internal representation. - * - * @return the internal representation - */ - byte getValue() { - return value; - } - - /** - * Get the TableFileCreationReason from the internal representation value. - * - * @return the table file creation reason. - * - * @throws IllegalArgumentException if the value is unknown. - */ - static TableFileCreationReason fromValue(final byte value) { - for (final TableFileCreationReason tableFileCreationReason : TableFileCreationReason.values()) { - if (tableFileCreationReason.value == value) { - return tableFileCreationReason; - } - } - - throw new IllegalArgumentException( - "Illegal value provided for TableFileCreationReason: " + value); - } -} diff --git a/java/src/main/java/org/rocksdb/TableFileDeletionInfo.java b/java/src/main/java/org/rocksdb/TableFileDeletionInfo.java deleted file mode 100644 index 8aad03ae8..000000000 --- a/java/src/main/java/org/rocksdb/TableFileDeletionInfo.java +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Objects; - -public class TableFileDeletionInfo { - private final String dbName; - private final String filePath; - private final int jobId; - private final Status status; - - /** - * Access is package private as this will only be constructed from - * C++ via JNI and for testing. - */ - TableFileDeletionInfo( - final String dbName, final String filePath, final int jobId, final Status status) { - this.dbName = dbName; - this.filePath = filePath; - this.jobId = jobId; - this.status = status; - } - - /** - * Get the name of the database where the file was deleted. - * - * @return the name of the database. - */ - public String getDbName() { - return dbName; - } - - /** - * Get the path to the deleted file. - * - * @return the path. - */ - public String getFilePath() { - return filePath; - } - - /** - * Get the id of the job which deleted the file. - * - * @return the id of the job. - */ - public int getJobId() { - return jobId; - } - - /** - * Get the status indicating whether the deletion was successful or not. - * - * @return the status - */ - public Status getStatus() { - return status; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - TableFileDeletionInfo that = (TableFileDeletionInfo) o; - return jobId == that.jobId && Objects.equals(dbName, that.dbName) - && Objects.equals(filePath, that.filePath) && Objects.equals(status, that.status); - } - - @Override - public int hashCode() { - return Objects.hash(dbName, filePath, jobId, status); - } - - @Override - public String toString() { - return "TableFileDeletionInfo{" - + "dbName='" + dbName + '\'' + ", filePath='" + filePath + '\'' + ", jobId=" + jobId - + ", status=" + status + '}'; - } -} diff --git a/java/src/main/java/org/rocksdb/TableFilter.java b/java/src/main/java/org/rocksdb/TableFilter.java deleted file mode 100644 index a39a329fb..000000000 --- a/java/src/main/java/org/rocksdb/TableFilter.java +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -/** - * Filter for iterating a table. - */ -public interface TableFilter { - - /** - * A callback to determine whether relevant keys for this scan exist in a - * given table based on the table's properties. The callback is passed the - * properties of each table during iteration. If the callback returns false, - * the table will not be scanned. This option only affects Iterators and has - * no impact on point lookups. - * - * @param tableProperties the table properties. - * - * @return true if the table should be scanned, false otherwise. - */ - boolean filter(final TableProperties tableProperties); -} diff --git a/java/src/main/java/org/rocksdb/TableFormatConfig.java b/java/src/main/java/org/rocksdb/TableFormatConfig.java deleted file mode 100644 index dbe524c42..000000000 --- a/java/src/main/java/org/rocksdb/TableFormatConfig.java +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -/** - * TableFormatConfig is used to config the internal Table format of a RocksDB. - * To make a RocksDB to use a specific Table format, its associated - * TableFormatConfig should be properly set and passed into Options via - * Options.setTableFormatConfig() and open the db using that Options. - */ -public abstract class TableFormatConfig { - /** - *

This function should only be called by Options.setTableFormatConfig(), - * which will create a c++ shared-pointer to the c++ TableFactory - * that associated with the Java TableFormatConfig.

- * - * @return native handle address to native table instance. - */ - abstract protected long newTableFactoryHandle(); -} diff --git a/java/src/main/java/org/rocksdb/TableProperties.java b/java/src/main/java/org/rocksdb/TableProperties.java deleted file mode 100644 index 096341a4c..000000000 --- a/java/src/main/java/org/rocksdb/TableProperties.java +++ /dev/null @@ -1,426 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -import java.util.Arrays; -import java.util.Map; -import java.util.Objects; - -/** - * TableProperties contains read-only properties of its associated - * table. - */ -public class TableProperties { - private final long dataSize; - private final long indexSize; - private final long indexPartitions; - private final long topLevelIndexSize; - private final long indexKeyIsUserKey; - private final long indexValueIsDeltaEncoded; - private final long filterSize; - private final long rawKeySize; - private final long rawValueSize; - private final long numDataBlocks; - private final long numEntries; - private final long numDeletions; - private final long numMergeOperands; - private final long numRangeDeletions; - private final long formatVersion; - private final long fixedKeyLen; - private final long columnFamilyId; - private final long creationTime; - private final long oldestKeyTime; - private final long slowCompressionEstimatedDataSize; - private final long fastCompressionEstimatedDataSize; - private final long externalSstFileGlobalSeqnoOffset; - private final byte[] columnFamilyName; - private final String filterPolicyName; - private final String comparatorName; - private final String mergeOperatorName; - private final String prefixExtractorName; - private final String propertyCollectorsNames; - private final String compressionName; - private final Map userCollectedProperties; - private final Map readableProperties; - - /** - * Access is package private as this will only be constructed from - * C++ via JNI and for testing. - */ - TableProperties(final long dataSize, final long indexSize, final long indexPartitions, - final long topLevelIndexSize, final long indexKeyIsUserKey, - final long indexValueIsDeltaEncoded, final long filterSize, final long rawKeySize, - final long rawValueSize, final long numDataBlocks, final long numEntries, - final long numDeletions, final long numMergeOperands, final long numRangeDeletions, - final long formatVersion, final long fixedKeyLen, final long columnFamilyId, - final long creationTime, final long oldestKeyTime, - final long slowCompressionEstimatedDataSize, final long fastCompressionEstimatedDataSize, - final long externalSstFileGlobalSeqnoOffset, final byte[] columnFamilyName, - final String filterPolicyName, final String comparatorName, final String mergeOperatorName, - final String prefixExtractorName, final String propertyCollectorsNames, - final String compressionName, final Map userCollectedProperties, - final Map readableProperties) { - this.dataSize = dataSize; - this.indexSize = indexSize; - this.indexPartitions = indexPartitions; - this.topLevelIndexSize = topLevelIndexSize; - this.indexKeyIsUserKey = indexKeyIsUserKey; - this.indexValueIsDeltaEncoded = indexValueIsDeltaEncoded; - this.filterSize = filterSize; - this.rawKeySize = rawKeySize; - this.rawValueSize = rawValueSize; - this.numDataBlocks = numDataBlocks; - this.numEntries = numEntries; - this.numDeletions = numDeletions; - this.numMergeOperands = numMergeOperands; - this.numRangeDeletions = numRangeDeletions; - this.formatVersion = formatVersion; - this.fixedKeyLen = fixedKeyLen; - this.columnFamilyId = columnFamilyId; - this.creationTime = creationTime; - this.oldestKeyTime = oldestKeyTime; - this.slowCompressionEstimatedDataSize = slowCompressionEstimatedDataSize; - this.fastCompressionEstimatedDataSize = fastCompressionEstimatedDataSize; - this.externalSstFileGlobalSeqnoOffset = externalSstFileGlobalSeqnoOffset; - this.columnFamilyName = columnFamilyName; - this.filterPolicyName = filterPolicyName; - this.comparatorName = comparatorName; - this.mergeOperatorName = mergeOperatorName; - this.prefixExtractorName = prefixExtractorName; - this.propertyCollectorsNames = propertyCollectorsNames; - this.compressionName = compressionName; - this.userCollectedProperties = userCollectedProperties; - this.readableProperties = readableProperties; - } - - /** - * Get the total size of all data blocks. - * - * @return the total size of all data blocks. - */ - public long getDataSize() { - return dataSize; - } - - /** - * Get the size of index block. - * - * @return the size of index block. - */ - public long getIndexSize() { - return indexSize; - } - - /** - * Get the total number of index partitions - * if {@link IndexType#kTwoLevelIndexSearch} is used. - * - * @return the total number of index partitions. - */ - public long getIndexPartitions() { - return indexPartitions; - } - - /** - * Size of the top-level index - * if {@link IndexType#kTwoLevelIndexSearch} is used. - * - * @return the size of the top-level index. - */ - public long getTopLevelIndexSize() { - return topLevelIndexSize; - } - - /** - * Whether the index key is user key. - * Otherwise it includes 8 byte of sequence - * number added by internal key format. - * - * @return the index key - */ - public long getIndexKeyIsUserKey() { - return indexKeyIsUserKey; - } - - /** - * Whether delta encoding is used to encode the index values. - * - * @return whether delta encoding is used to encode the index values. - */ - public long getIndexValueIsDeltaEncoded() { - return indexValueIsDeltaEncoded; - } - - /** - * Get the size of filter block. - * - * @return the size of filter block. - */ - public long getFilterSize() { - return filterSize; - } - - /** - * Get the total raw key size. - * - * @return the total raw key size. - */ - public long getRawKeySize() { - return rawKeySize; - } - - /** - * Get the total raw value size. - * - * @return the total raw value size. - */ - public long getRawValueSize() { - return rawValueSize; - } - - /** - * Get the number of blocks in this table. - * - * @return the number of blocks in this table. - */ - public long getNumDataBlocks() { - return numDataBlocks; - } - - /** - * Get the number of entries in this table. - * - * @return the number of entries in this table. - */ - public long getNumEntries() { - return numEntries; - } - - /** - * Get the number of deletions in the table. - * - * @return the number of deletions in the table. - */ - public long getNumDeletions() { - return numDeletions; - } - - /** - * Get the number of merge operands in the table. - * - * @return the number of merge operands in the table. - */ - public long getNumMergeOperands() { - return numMergeOperands; - } - - /** - * Get the number of range deletions in this table. - * - * @return the number of range deletions in this table. - */ - public long getNumRangeDeletions() { - return numRangeDeletions; - } - - /** - * Get the format version, reserved for backward compatibility. - * - * @return the format version. - */ - public long getFormatVersion() { - return formatVersion; - } - - /** - * Get the length of the keys. - * - * @return 0 when the key is variable length, otherwise number of - * bytes for each key. - */ - public long getFixedKeyLen() { - return fixedKeyLen; - } - - /** - * Get the ID of column family for this SST file, - * corresponding to the column family identified by - * {@link #getColumnFamilyName()}. - * - * @return the id of the column family. - */ - public long getColumnFamilyId() { - return columnFamilyId; - } - - /** - * The time when the SST file was created. - * Since SST files are immutable, this is equivalent - * to last modified time. - * - * @return the created time. - */ - public long getCreationTime() { - return creationTime; - } - - /** - * Get the timestamp of the earliest key. - * - * @return 0 means unknown, otherwise the timestamp. - */ - public long getOldestKeyTime() { - return oldestKeyTime; - } - - /** - * Get the estimated size of data blocks compressed with a relatively slower - * compression algorithm. - * - * @return 0 means unknown, otherwise the timestamp. - */ - public long getSlowCompressionEstimatedDataSize() { - return slowCompressionEstimatedDataSize; - } - - /** - * Get the estimated size of data blocks compressed with a relatively faster - * compression algorithm. - * - * @return 0 means unknown, otherwise the timestamp. - */ - public long getFastCompressionEstimatedDataSize() { - return fastCompressionEstimatedDataSize; - } - - /** - * Get the name of the column family with which this - * SST file is associated. - * - * @return the name of the column family, or null if the - * column family is unknown. - */ - /*@Nullable*/ public byte[] getColumnFamilyName() { - return columnFamilyName; - } - - /** - * Get the name of the filter policy used in this table. - * - * @return the name of the filter policy, or null if - * no filter policy is used. - */ - /*@Nullable*/ public String getFilterPolicyName() { - return filterPolicyName; - } - - /** - * Get the name of the comparator used in this table. - * - * @return the name of the comparator. - */ - public String getComparatorName() { - return comparatorName; - } - - /** - * Get the name of the merge operator used in this table. - * - * @return the name of the merge operator, or null if no merge operator - * is used. - */ - /*@Nullable*/ public String getMergeOperatorName() { - return mergeOperatorName; - } - - /** - * Get the name of the prefix extractor used in this table. - * - * @return the name of the prefix extractor, or null if no prefix - * extractor is used. - */ - /*@Nullable*/ public String getPrefixExtractorName() { - return prefixExtractorName; - } - - /** - * Get the names of the property collectors factories used in this table. - * - * @return the names of the property collector factories separated - * by commas, e.g. {collector_name[1]},{collector_name[2]},... - */ - public String getPropertyCollectorsNames() { - return propertyCollectorsNames; - } - - /** - * Get the name of the compression algorithm used to compress the SST files. - * - * @return the name of the compression algorithm. - */ - public String getCompressionName() { - return compressionName; - } - - /** - * Get the user collected properties. - * - * @return the user collected properties. - */ - public Map getUserCollectedProperties() { - return userCollectedProperties; - } - - /** - * Get the readable properties. - * - * @return the readable properties. - */ - public Map getReadableProperties() { - return readableProperties; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - TableProperties that = (TableProperties) o; - return dataSize == that.dataSize && indexSize == that.indexSize - && indexPartitions == that.indexPartitions && topLevelIndexSize == that.topLevelIndexSize - && indexKeyIsUserKey == that.indexKeyIsUserKey - && indexValueIsDeltaEncoded == that.indexValueIsDeltaEncoded - && filterSize == that.filterSize && rawKeySize == that.rawKeySize - && rawValueSize == that.rawValueSize && numDataBlocks == that.numDataBlocks - && numEntries == that.numEntries && numDeletions == that.numDeletions - && numMergeOperands == that.numMergeOperands && numRangeDeletions == that.numRangeDeletions - && formatVersion == that.formatVersion && fixedKeyLen == that.fixedKeyLen - && columnFamilyId == that.columnFamilyId && creationTime == that.creationTime - && oldestKeyTime == that.oldestKeyTime - && slowCompressionEstimatedDataSize == that.slowCompressionEstimatedDataSize - && fastCompressionEstimatedDataSize == that.fastCompressionEstimatedDataSize - && externalSstFileGlobalSeqnoOffset == that.externalSstFileGlobalSeqnoOffset - && Arrays.equals(columnFamilyName, that.columnFamilyName) - && Objects.equals(filterPolicyName, that.filterPolicyName) - && Objects.equals(comparatorName, that.comparatorName) - && Objects.equals(mergeOperatorName, that.mergeOperatorName) - && Objects.equals(prefixExtractorName, that.prefixExtractorName) - && Objects.equals(propertyCollectorsNames, that.propertyCollectorsNames) - && Objects.equals(compressionName, that.compressionName) - && Objects.equals(userCollectedProperties, that.userCollectedProperties) - && Objects.equals(readableProperties, that.readableProperties); - } - - @Override - public int hashCode() { - int result = Objects.hash(dataSize, indexSize, indexPartitions, topLevelIndexSize, - indexKeyIsUserKey, indexValueIsDeltaEncoded, filterSize, rawKeySize, rawValueSize, - numDataBlocks, numEntries, numDeletions, numMergeOperands, numRangeDeletions, formatVersion, - fixedKeyLen, columnFamilyId, creationTime, oldestKeyTime, slowCompressionEstimatedDataSize, - fastCompressionEstimatedDataSize, externalSstFileGlobalSeqnoOffset, filterPolicyName, - comparatorName, mergeOperatorName, prefixExtractorName, propertyCollectorsNames, - compressionName, userCollectedProperties, readableProperties); - result = 31 * result + Arrays.hashCode(columnFamilyName); - return result; - } -} diff --git a/java/src/main/java/org/rocksdb/ThreadStatus.java b/java/src/main/java/org/rocksdb/ThreadStatus.java deleted file mode 100644 index 062df5889..000000000 --- a/java/src/main/java/org/rocksdb/ThreadStatus.java +++ /dev/null @@ -1,224 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Map; - -public class ThreadStatus { - private final long threadId; - private final ThreadType threadType; - private final String dbName; - private final String cfName; - private final OperationType operationType; - private final long operationElapsedTime; // microseconds - private final OperationStage operationStage; - private final long operationProperties[]; - private final StateType stateType; - - /** - * Invoked from C++ via JNI - */ - private ThreadStatus(final long threadId, - final byte threadTypeValue, - final String dbName, - final String cfName, - final byte operationTypeValue, - final long operationElapsedTime, - final byte operationStageValue, - final long[] operationProperties, - final byte stateTypeValue) { - this.threadId = threadId; - this.threadType = ThreadType.fromValue(threadTypeValue); - this.dbName = dbName; - this.cfName = cfName; - this.operationType = OperationType.fromValue(operationTypeValue); - this.operationElapsedTime = operationElapsedTime; - this.operationStage = OperationStage.fromValue(operationStageValue); - this.operationProperties = operationProperties; - this.stateType = StateType.fromValue(stateTypeValue); - } - - /** - * Get the unique ID of the thread. - * - * @return the thread id - */ - public long getThreadId() { - return threadId; - } - - /** - * Get the type of the thread. - * - * @return the type of the thread. - */ - public ThreadType getThreadType() { - return threadType; - } - - /** - * The name of the DB instance that the thread is currently - * involved with. - * - * @return the name of the db, or null if the thread is not involved - * in any DB operation. - */ - /* @Nullable */ public String getDbName() { - return dbName; - } - - /** - * The name of the Column Family that the thread is currently - * involved with. - * - * @return the name of the db, or null if the thread is not involved - * in any column Family operation. - */ - /* @Nullable */ public String getCfName() { - return cfName; - } - - /** - * Get the operation (high-level action) that the current thread is involved - * with. - * - * @return the operation - */ - public OperationType getOperationType() { - return operationType; - } - - /** - * Get the elapsed time of the current thread operation in microseconds. - * - * @return the elapsed time - */ - public long getOperationElapsedTime() { - return operationElapsedTime; - } - - /** - * Get the current stage where the thread is involved in the current - * operation. - * - * @return the current stage of the current operation - */ - public OperationStage getOperationStage() { - return operationStage; - } - - /** - * Get the list of properties that describe some details about the current - * operation. - * - * Each field in might have different meanings for different operations. - * - * @return the properties - */ - public long[] getOperationProperties() { - return operationProperties; - } - - /** - * Get the state (lower-level action) that the current thread is involved - * with. - * - * @return the state - */ - public StateType getStateType() { - return stateType; - } - - /** - * Get the name of the thread type. - * - * @param threadType the thread type - * - * @return the name of the thread type. - */ - public static String getThreadTypeName(final ThreadType threadType) { - return getThreadTypeName(threadType.getValue()); - } - - /** - * Get the name of an operation given its type. - * - * @param operationType the type of operation. - * - * @return the name of the operation. - */ - public static String getOperationName(final OperationType operationType) { - return getOperationName(operationType.getValue()); - } - - public static String microsToString(final long operationElapsedTime) { - return microsToStringNative(operationElapsedTime); - } - - /** - * Obtain a human-readable string describing the specified operation stage. - * - * @param operationStage the stage of the operation. - * - * @return the description of the operation stage. - */ - public static String getOperationStageName( - final OperationStage operationStage) { - return getOperationStageName(operationStage.getValue()); - } - - /** - * Obtain the name of the "i"th operation property of the - * specified operation. - * - * @param operationType the operation type. - * @param i the index of the operation property. - * - * @return the name of the operation property - */ - public static String getOperationPropertyName( - final OperationType operationType, final int i) { - return getOperationPropertyName(operationType.getValue(), i); - } - - /** - * Translate the "i"th property of the specified operation given - * a property value. - * - * @param operationType the operation type. - * @param operationProperties the operation properties. - * - * @return the property values. - */ - public static Map interpretOperationProperties( - final OperationType operationType, final long[] operationProperties) { - return interpretOperationProperties(operationType.getValue(), - operationProperties); - } - - /** - * Obtain the name of a state given its type. - * - * @param stateType the state type. - * - * @return the name of the state. - */ - public static String getStateName(final StateType stateType) { - return getStateName(stateType.getValue()); - } - - private static native String getThreadTypeName(final byte threadTypeValue); - private static native String getOperationName(final byte operationTypeValue); - private static native String microsToStringNative( - final long operationElapsedTime); - private static native String getOperationStageName( - final byte operationStageTypeValue); - private static native String getOperationPropertyName( - final byte operationTypeValue, final int i); - private static native MapinterpretOperationProperties( - final byte operationTypeValue, final long[] operationProperties); - private static native String getStateName(final byte stateTypeValue); -} diff --git a/java/src/main/java/org/rocksdb/ThreadType.java b/java/src/main/java/org/rocksdb/ThreadType.java deleted file mode 100644 index cc329f442..000000000 --- a/java/src/main/java/org/rocksdb/ThreadType.java +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * The type of a thread. - */ -public enum ThreadType { - /** - * RocksDB BG thread in high-pri thread pool. - */ - HIGH_PRIORITY((byte)0x0), - - /** - * RocksDB BG thread in low-pri thread pool. - */ - LOW_PRIORITY((byte)0x1), - - /** - * User thread (Non-RocksDB BG thread). - */ - USER((byte)0x2), - - /** - * RocksDB BG thread in bottom-pri thread pool - */ - BOTTOM_PRIORITY((byte)0x3); - - private final byte value; - - ThreadType(final byte value) { - this.value = value; - } - - /** - * Get the internal representation value. - * - * @return the internal representation value. - */ - byte getValue() { - return value; - } - - /** - * Get the Thread type from the internal representation value. - * - * @param value the internal representation value. - * - * @return the thread type - * - * @throws IllegalArgumentException if the value does not match a ThreadType - */ - static ThreadType fromValue(final byte value) - throws IllegalArgumentException { - for (final ThreadType threadType : ThreadType.values()) { - if (threadType.value == value) { - return threadType; - } - } - throw new IllegalArgumentException("Unknown value for ThreadType: " + value); - } -} diff --git a/java/src/main/java/org/rocksdb/TickerType.java b/java/src/main/java/org/rocksdb/TickerType.java deleted file mode 100644 index 98e3043c6..000000000 --- a/java/src/main/java/org/rocksdb/TickerType.java +++ /dev/null @@ -1,792 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * The logical mapping of tickers defined in rocksdb::Tickers. - * - * Java byte value mappings don't align 1:1 to the c++ values. c++ rocksdb::Tickers enumeration type - * is uint32_t and java org.rocksdb.TickerType is byte, this causes mapping issues when - * rocksdb::Tickers value is greater then 127 (0x7F) for jbyte jni interface as range greater is not - * available. Without breaking interface in minor versions, value mappings for - * org.rocksdb.TickerType leverage full byte range [-128 (-0x80), (0x7F)]. Newer tickers added - * should descend into negative values until TICKER_ENUM_MAX reaches -128 (-0x80). - */ -public enum TickerType { - - /** - * total block cache misses - * - * REQUIRES: BLOCK_CACHE_MISS == BLOCK_CACHE_INDEX_MISS + - * BLOCK_CACHE_FILTER_MISS + - * BLOCK_CACHE_DATA_MISS; - */ - BLOCK_CACHE_MISS((byte) 0x0), - - /** - * total block cache hit - * - * REQUIRES: BLOCK_CACHE_HIT == BLOCK_CACHE_INDEX_HIT + - * BLOCK_CACHE_FILTER_HIT + - * BLOCK_CACHE_DATA_HIT; - */ - BLOCK_CACHE_HIT((byte) 0x1), - - BLOCK_CACHE_ADD((byte) 0x2), - - /** - * # of failures when adding blocks to block cache. - */ - BLOCK_CACHE_ADD_FAILURES((byte) 0x3), - - /** - * # of times cache miss when accessing index block from block cache. - */ - BLOCK_CACHE_INDEX_MISS((byte) 0x4), - - /** - * # of times cache hit when accessing index block from block cache. - */ - BLOCK_CACHE_INDEX_HIT((byte) 0x5), - - /** - * # of index blocks added to block cache. - */ - BLOCK_CACHE_INDEX_ADD((byte) 0x6), - - /** - * # of bytes of index blocks inserted into cache - */ - BLOCK_CACHE_INDEX_BYTES_INSERT((byte) 0x7), - - /** - * # of times cache miss when accessing filter block from block cache. - */ - BLOCK_CACHE_FILTER_MISS((byte) 0x9), - - /** - * # of times cache hit when accessing filter block from block cache. - */ - BLOCK_CACHE_FILTER_HIT((byte) 0xA), - - /** - * # of filter blocks added to block cache. - */ - BLOCK_CACHE_FILTER_ADD((byte) 0xB), - - /** - * # of bytes of bloom filter blocks inserted into cache - */ - BLOCK_CACHE_FILTER_BYTES_INSERT((byte) 0xC), - - /** - * # of times cache miss when accessing data block from block cache. - */ - BLOCK_CACHE_DATA_MISS((byte) 0xE), - - /** - * # of times cache hit when accessing data block from block cache. - */ - BLOCK_CACHE_DATA_HIT((byte) 0xF), - - /** - * # of data blocks added to block cache. - */ - BLOCK_CACHE_DATA_ADD((byte) 0x10), - - /** - * # of bytes of data blocks inserted into cache - */ - BLOCK_CACHE_DATA_BYTES_INSERT((byte) 0x11), - - /** - * # of bytes read from cache. - */ - BLOCK_CACHE_BYTES_READ((byte) 0x12), - - /** - * # of bytes written into cache. - */ - BLOCK_CACHE_BYTES_WRITE((byte) 0x13), - - /** - * # of times bloom filter has avoided file reads. - */ - BLOOM_FILTER_USEFUL((byte) 0x14), - - /** - * # persistent cache hit - */ - PERSISTENT_CACHE_HIT((byte) 0x15), - - /** - * # persistent cache miss - */ - PERSISTENT_CACHE_MISS((byte) 0x16), - - /** - * # total simulation block cache hits - */ - SIM_BLOCK_CACHE_HIT((byte) 0x17), - - /** - * # total simulation block cache misses - */ - SIM_BLOCK_CACHE_MISS((byte) 0x18), - - /** - * # of memtable hits. - */ - MEMTABLE_HIT((byte) 0x19), - - /** - * # of memtable misses. - */ - MEMTABLE_MISS((byte) 0x1A), - - /** - * # of Get() queries served by L0 - */ - GET_HIT_L0((byte) 0x1B), - - /** - * # of Get() queries served by L1 - */ - GET_HIT_L1((byte) 0x1C), - - /** - * # of Get() queries served by L2 and up - */ - GET_HIT_L2_AND_UP((byte) 0x1D), - - /** - * COMPACTION_KEY_DROP_* count the reasons for key drop during compaction - * There are 4 reasons currently. - */ - - /** - * key was written with a newer value. - */ - COMPACTION_KEY_DROP_NEWER_ENTRY((byte) 0x1E), - - /** - * Also includes keys dropped for range del. - * The key is obsolete. - */ - COMPACTION_KEY_DROP_OBSOLETE((byte) 0x1F), - - /** - * key was covered by a range tombstone. - */ - COMPACTION_KEY_DROP_RANGE_DEL((byte) 0x20), - - /** - * User compaction function has dropped the key. - */ - COMPACTION_KEY_DROP_USER((byte) 0x21), - - /** - * all keys in range were deleted. - */ - COMPACTION_RANGE_DEL_DROP_OBSOLETE((byte) 0x22), - - /** - * Number of keys written to the database via the Put and Write call's. - */ - NUMBER_KEYS_WRITTEN((byte) 0x23), - - /** - * Number of Keys read. - */ - NUMBER_KEYS_READ((byte) 0x24), - - /** - * Number keys updated, if inplace update is enabled - */ - NUMBER_KEYS_UPDATED((byte) 0x25), - - /** - * The number of uncompressed bytes issued by DB::Put(), DB::Delete(),\ - * DB::Merge(), and DB::Write(). - */ - BYTES_WRITTEN((byte) 0x26), - - /** - * The number of uncompressed bytes read from DB::Get(). It could be - * either from memtables, cache, or table files. - * - * For the number of logical bytes read from DB::MultiGet(), - * please use {@link #NUMBER_MULTIGET_BYTES_READ}. - */ - BYTES_READ((byte) 0x27), - - /** - * The number of calls to seek. - */ - NUMBER_DB_SEEK((byte) 0x28), - - /** - * The number of calls to next. - */ - NUMBER_DB_NEXT((byte) 0x29), - - /** - * The number of calls to prev. - */ - NUMBER_DB_PREV((byte) 0x2A), - - /** - * The number of calls to seek that returned data. - */ - NUMBER_DB_SEEK_FOUND((byte) 0x2B), - - /** - * The number of calls to next that returned data. - */ - NUMBER_DB_NEXT_FOUND((byte) 0x2C), - - /** - * The number of calls to prev that returned data. - */ - NUMBER_DB_PREV_FOUND((byte) 0x2D), - - /** - * The number of uncompressed bytes read from an iterator. - * Includes size of key and value. - */ - ITER_BYTES_READ((byte) 0x2E), - - NO_FILE_OPENS((byte) 0x30), - - NO_FILE_ERRORS((byte) 0x31), - - /** - * Writer has to wait for compaction or flush to finish. - */ - STALL_MICROS((byte) 0x35), - - /** - * The wait time for db mutex. - * - * Disabled by default. To enable it set stats level to {@link StatsLevel#ALL} - */ - DB_MUTEX_WAIT_MICROS((byte) 0x36), - - /** - * Number of MultiGet calls. - */ - NUMBER_MULTIGET_CALLS((byte) 0x39), - - /** - * Number of MultiGet keys read. - */ - NUMBER_MULTIGET_KEYS_READ((byte) 0x3A), - - /** - * Number of MultiGet bytes read. - */ - NUMBER_MULTIGET_BYTES_READ((byte) 0x3B), - - NUMBER_MERGE_FAILURES((byte) 0x3D), - - /** - * Number of times bloom was checked before creating iterator on a - * file, and the number of times the check was useful in avoiding - * iterator creation (and thus likely IOPs). - */ - BLOOM_FILTER_PREFIX_CHECKED((byte) 0x3E), - BLOOM_FILTER_PREFIX_USEFUL((byte) 0x3F), - - /** - * Number of times we had to reseek inside an iteration to skip - * over large number of keys with same userkey. - */ - NUMBER_OF_RESEEKS_IN_ITERATION((byte) 0x40), - - /** - * Record the number of calls to {@link RocksDB#getUpdatesSince(long)}. Useful to keep track of - * transaction log iterator refreshes. - */ - GET_UPDATES_SINCE_CALLS((byte) 0x41), - - /** - * Number of times WAL sync is done. - */ - WAL_FILE_SYNCED((byte) 0x46), - - /** - * Number of bytes written to WAL. - */ - WAL_FILE_BYTES((byte) 0x47), - - /** - * Writes can be processed by requesting thread or by the thread at the - * head of the writers queue. - */ - WRITE_DONE_BY_SELF((byte) 0x48), - - /** - * Equivalent to writes done for others. - */ - WRITE_DONE_BY_OTHER((byte) 0x49), - - /** - * Number of Write calls that request WAL. - */ - WRITE_WITH_WAL((byte) 0x4B), - - /** - * Bytes read during compaction. - */ - COMPACT_READ_BYTES((byte) 0x4C), - - /** - * Bytes written during compaction. - */ - COMPACT_WRITE_BYTES((byte) 0x4D), - - /** - * Bytes written during flush. - */ - FLUSH_WRITE_BYTES((byte) 0x4E), - - /** - * Number of table's properties loaded directly from file, without creating - * table reader object. - */ - NUMBER_DIRECT_LOAD_TABLE_PROPERTIES((byte) 0x4F), - NUMBER_SUPERVERSION_ACQUIRES((byte) 0x50), - NUMBER_SUPERVERSION_RELEASES((byte) 0x51), - NUMBER_SUPERVERSION_CLEANUPS((byte) 0x52), - - /** - * # of compressions/decompressions executed - */ - NUMBER_BLOCK_COMPRESSED((byte) 0x53), - NUMBER_BLOCK_DECOMPRESSED((byte) 0x54), - - NUMBER_BLOCK_NOT_COMPRESSED((byte) 0x55), - MERGE_OPERATION_TOTAL_TIME((byte) 0x56), - FILTER_OPERATION_TOTAL_TIME((byte) 0x57), - - /** - * Row cache. - */ - ROW_CACHE_HIT((byte) 0x58), - ROW_CACHE_MISS((byte) 0x59), - - /** - * Read amplification statistics. - * - * Read amplification can be calculated using this formula - * (READ_AMP_TOTAL_READ_BYTES / READ_AMP_ESTIMATE_USEFUL_BYTES) - * - * REQUIRES: ReadOptions::read_amp_bytes_per_bit to be enabled - */ - - /** - * Estimate of total bytes actually used. - */ - READ_AMP_ESTIMATE_USEFUL_BYTES((byte) 0x5A), - - /** - * Total size of loaded data blocks. - */ - READ_AMP_TOTAL_READ_BYTES((byte) 0x5B), - - /** - * Number of refill intervals where rate limiter's bytes are fully consumed. - */ - NUMBER_RATE_LIMITER_DRAINS((byte) 0x5C), - - /** - * Number of internal skipped during iteration - */ - NUMBER_ITER_SKIP((byte) 0x5D), - - /** - * Number of MultiGet keys found (vs number requested) - */ - NUMBER_MULTIGET_KEYS_FOUND((byte) 0x5E), - - // -0x01 to fixate the new value that incorrectly changed TICKER_ENUM_MAX - /** - * Number of iterators created. - */ - NO_ITERATOR_CREATED((byte) -0x01), - - /** - * Number of iterators deleted. - */ - NO_ITERATOR_DELETED((byte) 0x60), - - /** - * Deletions obsoleted before bottom level due to file gap optimization. - */ - COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE((byte) 0x61), - - /** - * If a compaction was cancelled in sfm to prevent ENOSPC - */ - COMPACTION_CANCELLED((byte) 0x62), - - /** - * # of times bloom FullFilter has not avoided the reads. - */ - BLOOM_FILTER_FULL_POSITIVE((byte) 0x63), - - /** - * # of times bloom FullFilter has not avoided the reads and data actually - * exist. - */ - BLOOM_FILTER_FULL_TRUE_POSITIVE((byte) 0x64), - - /** - * BlobDB specific stats - * # of Put/PutTTL/PutUntil to BlobDB. - */ - BLOB_DB_NUM_PUT((byte) 0x65), - - /** - * # of Write to BlobDB. - */ - BLOB_DB_NUM_WRITE((byte) 0x66), - - /** - * # of Get to BlobDB. - */ - BLOB_DB_NUM_GET((byte) 0x67), - - /** - * # of MultiGet to BlobDB. - */ - BLOB_DB_NUM_MULTIGET((byte) 0x68), - - /** - * # of Seek/SeekToFirst/SeekToLast/SeekForPrev to BlobDB iterator. - */ - BLOB_DB_NUM_SEEK((byte) 0x69), - - /** - * # of Next to BlobDB iterator. - */ - BLOB_DB_NUM_NEXT((byte) 0x6A), - - /** - * # of Prev to BlobDB iterator. - */ - BLOB_DB_NUM_PREV((byte) 0x6B), - - /** - * # of keys written to BlobDB. - */ - BLOB_DB_NUM_KEYS_WRITTEN((byte) 0x6C), - - /** - * # of keys read from BlobDB. - */ - BLOB_DB_NUM_KEYS_READ((byte) 0x6D), - - /** - * # of bytes (key + value) written to BlobDB. - */ - BLOB_DB_BYTES_WRITTEN((byte) 0x6E), - - /** - * # of bytes (keys + value) read from BlobDB. - */ - BLOB_DB_BYTES_READ((byte) 0x6F), - - /** - * # of keys written by BlobDB as non-TTL inlined value. - */ - BLOB_DB_WRITE_INLINED((byte) 0x70), - - /** - * # of keys written by BlobDB as TTL inlined value. - */ - BLOB_DB_WRITE_INLINED_TTL((byte) 0x71), - - /** - * # of keys written by BlobDB as non-TTL blob value. - */ - BLOB_DB_WRITE_BLOB((byte) 0x72), - - /** - * # of keys written by BlobDB as TTL blob value. - */ - BLOB_DB_WRITE_BLOB_TTL((byte) 0x73), - - /** - * # of bytes written to blob file. - */ - BLOB_DB_BLOB_FILE_BYTES_WRITTEN((byte) 0x74), - - /** - * # of bytes read from blob file. - */ - BLOB_DB_BLOB_FILE_BYTES_READ((byte) 0x75), - - /** - * # of times a blob files being synced. - */ - BLOB_DB_BLOB_FILE_SYNCED((byte) 0x76), - - /** - * # of blob index evicted from base DB by BlobDB compaction filter because - * of expiration. - */ - BLOB_DB_BLOB_INDEX_EXPIRED_COUNT((byte) 0x77), - - /** - * Size of blob index evicted from base DB by BlobDB compaction filter - * because of expiration. - */ - BLOB_DB_BLOB_INDEX_EXPIRED_SIZE((byte) 0x78), - - /** - * # of blob index evicted from base DB by BlobDB compaction filter because - * of corresponding file deleted. - */ - BLOB_DB_BLOB_INDEX_EVICTED_COUNT((byte) 0x79), - - /** - * Size of blob index evicted from base DB by BlobDB compaction filter - * because of corresponding file deleted. - */ - BLOB_DB_BLOB_INDEX_EVICTED_SIZE((byte) 0x7A), - - /** - * # of blob files being garbage collected. - */ - BLOB_DB_GC_NUM_FILES((byte) 0x7B), - - /** - * # of blob files generated by garbage collection. - */ - BLOB_DB_GC_NUM_NEW_FILES((byte) 0x7C), - - /** - * # of BlobDB garbage collection failures. - */ - BLOB_DB_GC_FAILURES((byte) 0x7D), - - /** - * # of keys relocated to new blob file by garbage collection. - */ - BLOB_DB_GC_NUM_KEYS_RELOCATED((byte) -0x02), - - /** - * # of bytes relocated to new blob file by garbage collection. - */ - BLOB_DB_GC_BYTES_RELOCATED((byte) -0x05), - - /** - * # of blob files evicted because of BlobDB is full. - */ - BLOB_DB_FIFO_NUM_FILES_EVICTED((byte) -0x06), - - /** - * # of keys in the blob files evicted because of BlobDB is full. - */ - BLOB_DB_FIFO_NUM_KEYS_EVICTED((byte) -0x07), - - /** - * # of bytes in the blob files evicted because of BlobDB is full. - */ - BLOB_DB_FIFO_BYTES_EVICTED((byte) -0x08), - - /** - * These counters indicate a performance issue in WritePrepared transactions. - * We should not seem them ticking them much. - * # of times prepare_mutex_ is acquired in the fast path. - */ - TXN_PREPARE_MUTEX_OVERHEAD((byte) -0x09), - - /** - * # of times old_commit_map_mutex_ is acquired in the fast path. - */ - TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD((byte) -0x0A), - - /** - * # of times we checked a batch for duplicate keys. - */ - TXN_DUPLICATE_KEY_OVERHEAD((byte) -0x0B), - - /** - * # of times snapshot_mutex_ is acquired in the fast path. - */ - TXN_SNAPSHOT_MUTEX_OVERHEAD((byte) -0x0C), - - /** - * # of times ::Get returned TryAgain due to expired snapshot seq - */ - TXN_GET_TRY_AGAIN((byte) -0x0D), - - /** - * # of files marked as trash by delete scheduler - */ - FILES_MARKED_TRASH((byte) -0x0E), - - /** - * # of files deleted immediately by delete scheduler - */ - FILES_DELETED_IMMEDIATELY((byte) -0x0f), - - /** - * Compaction read and write statistics broken down by CompactionReason - */ - COMPACT_READ_BYTES_MARKED((byte) -0x10), - COMPACT_READ_BYTES_PERIODIC((byte) -0x11), - COMPACT_READ_BYTES_TTL((byte) -0x12), - COMPACT_WRITE_BYTES_MARKED((byte) -0x13), - COMPACT_WRITE_BYTES_PERIODIC((byte) -0x14), - COMPACT_WRITE_BYTES_TTL((byte) -0x15), - - /** - * DB error handler statistics - */ - ERROR_HANDLER_BG_ERROR_COUNT((byte) -0x16), - ERROR_HANDLER_BG_IO_ERROR_COUNT((byte) -0x17), - ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT((byte) -0x18), - ERROR_HANDLER_AUTORESUME_COUNT((byte) -0x19), - ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT((byte) -0x1A), - ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT((byte) -0x1B), - - /** - * Bytes of raw data (payload) found on memtable at flush time. - * Contains the sum of garbage payload (bytes that are discarded - * at flush time) and useful payload (bytes of data that will - * eventually be written to SSTable). - */ - MEMTABLE_PAYLOAD_BYTES_AT_FLUSH((byte) -0x1C), - /** - * Outdated bytes of data present on memtable at flush time. - */ - MEMTABLE_GARBAGE_BYTES_AT_FLUSH((byte) -0x1D), - - /** - * Number of secondary cache hits - */ - SECONDARY_CACHE_HITS((byte) -0x1E), - - /** - * Bytes read by `VerifyChecksum()` and `VerifyFileChecksums()` APIs. - */ - VERIFY_CHECKSUM_READ_BYTES((byte) -0x1F), - - /** - * Bytes read/written while creating backups - */ - BACKUP_READ_BYTES((byte) -0x20), - BACKUP_WRITE_BYTES((byte) -0x21), - - /** - * Remote compaction read/write statistics - */ - REMOTE_COMPACT_READ_BYTES((byte) -0x22), - REMOTE_COMPACT_WRITE_BYTES((byte) -0x23), - - /** - * Tiered storage related statistics - */ - HOT_FILE_READ_BYTES((byte) -0x24), - WARM_FILE_READ_BYTES((byte) -0x25), - COLD_FILE_READ_BYTES((byte) -0x26), - HOT_FILE_READ_COUNT((byte) -0x27), - WARM_FILE_READ_COUNT((byte) -0x28), - COLD_FILE_READ_COUNT((byte) -0x29), - - /** - * (non-)last level read statistics - */ - LAST_LEVEL_READ_BYTES((byte) -0x2A), - LAST_LEVEL_READ_COUNT((byte) -0x2B), - NON_LAST_LEVEL_READ_BYTES((byte) -0x2C), - NON_LAST_LEVEL_READ_COUNT((byte) -0x2D), - - BLOCK_CHECKSUM_COMPUTE_COUNT((byte) -0x2E), - - /** - * # of times cache miss when accessing blob from blob cache. - */ - BLOB_DB_CACHE_MISS((byte) -0x2F), - - /** - * # of times cache hit when accessing blob from blob cache. - */ - BLOB_DB_CACHE_HIT((byte) -0x30), - - /** - * # of data blocks added to blob cache. - */ - BLOB_DB_CACHE_ADD((byte) -0x31), - - /** - * # # of failures when adding blobs to blob cache. - */ - BLOB_DB_CACHE_ADD_FAILURES((byte) -0x32), - - /** - * # of bytes read from blob cache. - */ - BLOB_DB_CACHE_BYTES_READ((byte) -0x33), - - /** - * # of bytes written into blob cache. - */ - BLOB_DB_CACHE_BYTES_WRITE((byte) -0x34), - - /** - * Number of lookup into the prefetched tail (see - * `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`) - * that can't find its data for table open - */ - TABLE_OPEN_PREFETCH_TAIL_MISS((byte) -0x3A), - - /** - * Number of lookup into the prefetched tail (see - * `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`) - * that finds its data for table open - */ - TABLE_OPEN_PREFETCH_TAIL_HIT((byte) -0x3B), - - TICKER_ENUM_MAX((byte) 0x5F); - - private final byte value; - - TickerType(final byte value) { - this.value = value; - } - - /** - * Returns the byte value of the enumerations value - * - * @return byte representation - */ - public byte getValue() { - return value; - } - - /** - * Get Ticker type by byte value. - * - * @param value byte representation of TickerType. - * - * @return {@link org.rocksdb.TickerType} instance. - * @throws java.lang.IllegalArgumentException if an invalid - * value is provided. - */ - public static TickerType getTickerType(final byte value) { - for (final TickerType tickerType : TickerType.values()) { - if (tickerType.getValue() == value) { - return tickerType; - } - } - throw new IllegalArgumentException( - "Illegal value provided for TickerType."); - } -} diff --git a/java/src/main/java/org/rocksdb/TimedEnv.java b/java/src/main/java/org/rocksdb/TimedEnv.java deleted file mode 100644 index dc8b5d6ef..000000000 --- a/java/src/main/java/org/rocksdb/TimedEnv.java +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Timed environment. - */ -public class TimedEnv extends Env { - - /** - *

Creates a new environment that measures function call times for - * filesystem operations, reporting results to variables in PerfContext.

- * - * - *

The caller must delete the result when it is - * no longer needed.

- * - * @param baseEnv the base environment, - * must remain live while the result is in use. - */ - public TimedEnv(final Env baseEnv) { - super(createTimedEnv(baseEnv.nativeHandle_)); - } - - private static native long createTimedEnv(final long baseEnvHandle); - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/TraceOptions.java b/java/src/main/java/org/rocksdb/TraceOptions.java deleted file mode 100644 index cf5f7bbe1..000000000 --- a/java/src/main/java/org/rocksdb/TraceOptions.java +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * TraceOptions is used for - * {@link RocksDB#startTrace(TraceOptions, AbstractTraceWriter)}. - */ -public class TraceOptions { - private final long maxTraceFileSize; - - public TraceOptions() { - this.maxTraceFileSize = 64L * 1024L * 1024L * 1024L; // 64 GB - } - - public TraceOptions(final long maxTraceFileSize) { - this.maxTraceFileSize = maxTraceFileSize; - } - - /** - * To avoid the trace file size grows larger than the storage space, - * user can set the max trace file size in Bytes. Default is 64 GB. - * - * @return the max trace size - */ - public long getMaxTraceFileSize() { - return maxTraceFileSize; - } -} diff --git a/java/src/main/java/org/rocksdb/TraceWriter.java b/java/src/main/java/org/rocksdb/TraceWriter.java deleted file mode 100644 index cb0234e9b..000000000 --- a/java/src/main/java/org/rocksdb/TraceWriter.java +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * TraceWriter allows exporting RocksDB traces to any system, - * one operation at a time. - */ -public interface TraceWriter { - - /** - * Write the data. - * - * @param data the data - * - * @throws RocksDBException if an error occurs whilst writing. - */ - void write(final Slice data) throws RocksDBException; - - /** - * Close the writer. - * - * @throws RocksDBException if an error occurs whilst closing the writer. - */ - void closeWriter() throws RocksDBException; - - /** - * Get the size of the file that this writer is writing to. - * - * @return the file size - */ - long getFileSize(); -} diff --git a/java/src/main/java/org/rocksdb/Transaction.java b/java/src/main/java/org/rocksdb/Transaction.java deleted file mode 100644 index b2cc8a932..000000000 --- a/java/src/main/java/org/rocksdb/Transaction.java +++ /dev/null @@ -1,2170 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -/** - * Provides BEGIN/COMMIT/ROLLBACK transactions. - * - * To use transactions, you must first create either an - * {@link OptimisticTransactionDB} or a {@link TransactionDB} - * - * To create a transaction, use - * {@link OptimisticTransactionDB#beginTransaction(org.rocksdb.WriteOptions)} or - * {@link TransactionDB#beginTransaction(org.rocksdb.WriteOptions)} - * - * It is up to the caller to synchronize access to this object. - * - * See samples/src/main/java/OptimisticTransactionSample.java and - * samples/src/main/java/TransactionSample.java for some simple - * examples. - */ -public class Transaction extends RocksObject { - - private final RocksDB parent; - - /** - * Intentionally package private - * as this is called from - * {@link OptimisticTransactionDB#beginTransaction(org.rocksdb.WriteOptions)} - * or {@link TransactionDB#beginTransaction(org.rocksdb.WriteOptions)} - * - * @param parent This must be either {@link TransactionDB} or - * {@link OptimisticTransactionDB} - * @param transactionHandle The native handle to the underlying C++ - * transaction object - */ - Transaction(final RocksDB parent, final long transactionHandle) { - super(transactionHandle); - this.parent = parent; - } - - /** - * If a transaction has a snapshot set, the transaction will ensure that - * any keys successfully written (or fetched via {@link #getForUpdate}) have - * not been modified outside of this transaction since the time the snapshot - * was set. - * - * If a snapshot has not been set, the transaction guarantees that keys have - * not been modified since the time each key was first written (or fetched via - * {@link #getForUpdate}). - * - * Using {@link #setSnapshot()} will provide stricter isolation guarantees - * at the expense of potentially more transaction failures due to conflicts - * with other writes. - * - * Calling {@link #setSnapshot()} has no effect on keys written before this - * function has been called. - * - * {@link #setSnapshot()} may be called multiple times if you would like to - * change the snapshot used for different operations in this transaction. - * - * Calling {@link #setSnapshot()} will not affect the version of Data returned - * by get(...) methods. See {@link #get} for more details. - */ - public void setSnapshot() { - assert(isOwningHandle()); - setSnapshot(nativeHandle_); - } - - /** - * Similar to {@link #setSnapshot()}, but will not change the current snapshot - * until put/merge/delete/getForUpdate/multiGetForUpdate is called. - * By calling this function, the transaction will essentially call - * {@link #setSnapshot()} for you right before performing the next - * write/getForUpdate. - * - * Calling {@link #setSnapshotOnNextOperation()} will not affect what - * snapshot is returned by {@link #getSnapshot} until the next - * write/getForUpdate is executed. - * - * When the snapshot is created the notifier's snapshotCreated method will - * be called so that the caller can get access to the snapshot. - * - * This is an optimization to reduce the likelihood of conflicts that - * could occur in between the time {@link #setSnapshot()} is called and the - * first write/getForUpdate operation. i.e. this prevents the following - * race-condition: - * - * txn1->setSnapshot(); - * txn2->put("A", ...); - * txn2->commit(); - * txn1->getForUpdate(opts, "A", ...); * FAIL! - */ - public void setSnapshotOnNextOperation() { - assert(isOwningHandle()); - setSnapshotOnNextOperation(nativeHandle_); - } - - /** - * Similar to {@link #setSnapshot()}, but will not change the current snapshot - * until put/merge/delete/getForUpdate/multiGetForUpdate is called. - * By calling this function, the transaction will essentially call - * {@link #setSnapshot()} for you right before performing the next - * write/getForUpdate. - * - * Calling {@link #setSnapshotOnNextOperation()} will not affect what - * snapshot is returned by {@link #getSnapshot} until the next - * write/getForUpdate is executed. - * - * When the snapshot is created the - * {@link AbstractTransactionNotifier#snapshotCreated(Snapshot)} method will - * be called so that the caller can get access to the snapshot. - * - * This is an optimization to reduce the likelihood of conflicts that - * could occur in between the time {@link #setSnapshot()} is called and the - * first write/getForUpdate operation. i.e. this prevents the following - * race-condition: - * - * txn1->setSnapshot(); - * txn2->put("A", ...); - * txn2->commit(); - * txn1->getForUpdate(opts, "A", ...); * FAIL! - * - * @param transactionNotifier A handler for receiving snapshot notifications - * for the transaction - * - */ - public void setSnapshotOnNextOperation( - final AbstractTransactionNotifier transactionNotifier) { - assert(isOwningHandle()); - setSnapshotOnNextOperation(nativeHandle_, transactionNotifier.nativeHandle_); - } - - /** - * Returns the Snapshot created by the last call to {@link #setSnapshot()}. - * - * REQUIRED: The returned Snapshot is only valid up until the next time - * {@link #setSnapshot()}/{@link #setSnapshotOnNextOperation()} is called, - * {@link #clearSnapshot()} is called, or the Transaction is deleted. - * - * @return The snapshot or null if there is no snapshot - */ - public Snapshot getSnapshot() { - assert(isOwningHandle()); - final long snapshotNativeHandle = getSnapshot(nativeHandle_); - if(snapshotNativeHandle == 0) { - return null; - } else { - final Snapshot snapshot = new Snapshot(snapshotNativeHandle); - return snapshot; - } - } - - /** - * Clears the current snapshot (i.e. no snapshot will be 'set') - * - * This removes any snapshot that currently exists or is set to be created - * on the next update operation ({@link #setSnapshotOnNextOperation()}). - * - * Calling {@link #clearSnapshot()} has no effect on keys written before this - * function has been called. - * - * If a reference to a snapshot was retrieved via {@link #getSnapshot()}, it - * will no longer be valid and should be discarded after a call to - * {@link #clearSnapshot()}. - */ - public void clearSnapshot() { - assert(isOwningHandle()); - clearSnapshot(nativeHandle_); - } - - /** - * Prepare the current transaction for 2PC - */ - public void prepare() throws RocksDBException { - //TODO(AR) consider a Java'ish version of this function, which returns an AutoCloseable (commit) - assert(isOwningHandle()); - prepare(nativeHandle_); - } - - /** - * Write all batched keys to the db atomically. - * - * Returns OK on success. - * - * May return any error status that could be returned by DB:Write(). - * - * If this transaction was created by an {@link OptimisticTransactionDB} - * Status::Busy() may be returned if the transaction could not guarantee - * that there are no write conflicts. Status::TryAgain() may be returned - * if the memtable history size is not large enough - * (See max_write_buffer_number_to_maintain). - * - * If this transaction was created by a {@link TransactionDB}, - * Status::Expired() may be returned if this transaction has lived for - * longer than {@link TransactionOptions#getExpiration()}. - * - * @throws RocksDBException if an error occurs when committing the transaction - */ - public void commit() throws RocksDBException { - assert(isOwningHandle()); - commit(nativeHandle_); - } - - /** - * Discard all batched writes in this transaction. - * - * @throws RocksDBException if an error occurs when rolling back the transaction - */ - public void rollback() throws RocksDBException { - assert(isOwningHandle()); - rollback(nativeHandle_); - } - - /** - * Records the state of the transaction for future calls to - * {@link #rollbackToSavePoint()}. - * - * May be called multiple times to set multiple save points. - * - * @throws RocksDBException if an error occurs whilst setting a save point - */ - public void setSavePoint() throws RocksDBException { - assert(isOwningHandle()); - setSavePoint(nativeHandle_); - } - - /** - * Undo all operations in this transaction (put, merge, delete, putLogData) - * since the most recent call to {@link #setSavePoint()} and removes the most - * recent {@link #setSavePoint()}. - * - * If there is no previous call to {@link #setSavePoint()}, - * returns Status::NotFound() - * - * @throws RocksDBException if an error occurs when rolling back to a save point - */ - public void rollbackToSavePoint() throws RocksDBException { - assert(isOwningHandle()); - rollbackToSavePoint(nativeHandle_); - } - - /** - * This function is similar to - * {@link RocksDB#get(ColumnFamilyHandle, ReadOptions, byte[])} except it will - * also read pending changes in this transaction. - * Currently, this function will return Status::MergeInProgress if the most - * recent write to the queried key in this batch is a Merge. - * - * If {@link ReadOptions#snapshot()} is not set, the current version of the - * key will be read. Calling {@link #setSnapshot()} does not affect the - * version of the data returned. - * - * Note that setting {@link ReadOptions#setSnapshot(Snapshot)} will affect - * what is read from the DB but will NOT change which keys are read from this - * transaction (the keys in this transaction do not yet belong to any snapshot - * and will be fetched regardless). - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance - * @param readOptions Read options. - * @param key the key to retrieve the value for. - * - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException thrown if error happens in underlying native - * library. - */ - public byte[] get(final ColumnFamilyHandle columnFamilyHandle, - final ReadOptions readOptions, final byte[] key) throws RocksDBException { - assert(isOwningHandle()); - return get(nativeHandle_, readOptions.nativeHandle_, key, key.length, - columnFamilyHandle.nativeHandle_); - } - - /** - * This function is similar to - * {@link RocksDB#get(ReadOptions, byte[])} except it will - * also read pending changes in this transaction. - * Currently, this function will return Status::MergeInProgress if the most - * recent write to the queried key in this batch is a Merge. - * - * If {@link ReadOptions#snapshot()} is not set, the current version of the - * key will be read. Calling {@link #setSnapshot()} does not affect the - * version of the data returned. - * - * Note that setting {@link ReadOptions#setSnapshot(Snapshot)} will affect - * what is read from the DB but will NOT change which keys are read from this - * transaction (the keys in this transaction do not yet belong to any snapshot - * and will be fetched regardless). - * - * @param readOptions Read options. - * @param key the key to retrieve the value for. - * - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException thrown if error happens in underlying native - * library. - */ - public byte[] get(final ReadOptions readOptions, final byte[] key) - throws RocksDBException { - assert(isOwningHandle()); - return get(nativeHandle_, readOptions.nativeHandle_, key, key.length); - } - - /** - * This function is similar to - * {@link RocksDB#multiGetAsList} except it will - * also read pending changes in this transaction. - * Currently, this function will return Status::MergeInProgress if the most - * recent write to the queried key in this batch is a Merge. - * - * If {@link ReadOptions#snapshot()} is not set, the current version of the - * key will be read. Calling {@link #setSnapshot()} does not affect the - * version of the data returned. - * - * Note that setting {@link ReadOptions#setSnapshot(Snapshot)} will affect - * what is read from the DB but will NOT change which keys are read from this - * transaction (the keys in this transaction do not yet belong to any snapshot - * and will be fetched regardless). - * - * @param readOptions Read options. - * @param columnFamilyHandles {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. - * @param keys of keys for which values need to be retrieved. - * - * @return Array of values, one for each key - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * @throws IllegalArgumentException thrown if the size of passed keys is not - * equal to the amount of passed column family handles. - */ - @Deprecated - public byte[][] multiGet(final ReadOptions readOptions, - final List columnFamilyHandles, final byte[][] keys) - throws RocksDBException { - assert(isOwningHandle()); - // Check if key size equals cfList size. If not a exception must be - // thrown. If not a Segmentation fault happens. - if (keys.length != columnFamilyHandles.size()) { - throw new IllegalArgumentException( - "For each key there must be a ColumnFamilyHandle."); - } - if(keys.length == 0) { - return new byte[0][0]; - } - final long[] cfHandles = new long[columnFamilyHandles.size()]; - for (int i = 0; i < columnFamilyHandles.size(); i++) { - cfHandles[i] = columnFamilyHandles.get(i).nativeHandle_; - } - - return multiGet(nativeHandle_, readOptions.nativeHandle_, - keys, cfHandles); - } - - /** - * This function is similar to - * {@link RocksDB#multiGetAsList(ReadOptions, List, List)} except it will - * also read pending changes in this transaction. - * Currently, this function will return Status::MergeInProgress if the most - * recent write to the queried key in this batch is a Merge. - * - * If {@link ReadOptions#snapshot()} is not set, the current version of the - * key will be read. Calling {@link #setSnapshot()} does not affect the - * version of the data returned. - * - * Note that setting {@link ReadOptions#setSnapshot(Snapshot)} will affect - * what is read from the DB but will NOT change which keys are read from this - * transaction (the keys in this transaction do not yet belong to any snapshot - * and will be fetched regardless). - * - * @param readOptions Read options. - * @param columnFamilyHandles {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. - * @param keys of keys for which values need to be retrieved. - * - * @return Array of values, one for each key - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - * @throws IllegalArgumentException thrown if the size of passed keys is not - * equal to the amount of passed column family handles. - */ - - public List multiGetAsList(final ReadOptions readOptions, - final List columnFamilyHandles, final List keys) - throws RocksDBException { - assert (isOwningHandle()); - // Check if key size equals cfList size. If not a exception must be - // thrown. If not a Segmentation fault happens. - if (keys.size() != columnFamilyHandles.size()) { - throw new IllegalArgumentException("For each key there must be a ColumnFamilyHandle."); - } - if (keys.size() == 0) { - return new ArrayList<>(0); - } - final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); - final long[] cfHandles = new long[columnFamilyHandles.size()]; - for (int i = 0; i < columnFamilyHandles.size(); i++) { - cfHandles[i] = columnFamilyHandles.get(i).nativeHandle_; - } - - return Arrays.asList(multiGet(nativeHandle_, readOptions.nativeHandle_, keysArray, cfHandles)); - } - - /** - * This function is similar to - * {@link RocksDB#multiGetAsList} except it will - * also read pending changes in this transaction. - * Currently, this function will return Status::MergeInProgress if the most - * recent write to the queried key in this batch is a Merge. - * - * If {@link ReadOptions#snapshot()} is not set, the current version of the - * key will be read. Calling {@link #setSnapshot()} does not affect the - * version of the data returned. - * - * Note that setting {@link ReadOptions#setSnapshot(Snapshot)} will affect - * what is read from the DB but will NOT change which keys are read from this - * transaction (the keys in this transaction do not yet belong to any snapshot - * and will be fetched regardless). - * - * @param readOptions Read options.= - * {@link org.rocksdb.ColumnFamilyHandle} instances. - * @param keys of keys for which values need to be retrieved. - * - * @return Array of values, one for each key - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - @Deprecated - public byte[][] multiGet(final ReadOptions readOptions, final byte[][] keys) - throws RocksDBException { - assert(isOwningHandle()); - if(keys.length == 0) { - return new byte[0][0]; - } - - return multiGet(nativeHandle_, readOptions.nativeHandle_, - keys); - } - - /** - * This function is similar to - * {@link RocksDB#multiGetAsList} except it will - * also read pending changes in this transaction. - * Currently, this function will return Status::MergeInProgress if the most - * recent write to the queried key in this batch is a Merge. - * - * If {@link ReadOptions#snapshot()} is not set, the current version of the - * key will be read. Calling {@link #setSnapshot()} does not affect the - * version of the data returned. - * - * Note that setting {@link ReadOptions#setSnapshot(Snapshot)} will affect - * what is read from the DB but will NOT change which keys are read from this - * transaction (the keys in this transaction do not yet belong to any snapshot - * and will be fetched regardless). - * - * @param readOptions Read options.= - * {@link org.rocksdb.ColumnFamilyHandle} instances. - * @param keys of keys for which values need to be retrieved. - * - * @return Array of values, one for each key - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public List multiGetAsList(final ReadOptions readOptions, final List keys) - throws RocksDBException { - if (keys.size() == 0) { - return new ArrayList<>(0); - } - final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); - - return Arrays.asList(multiGet(nativeHandle_, readOptions.nativeHandle_, keysArray)); - } - - /** - * Read this key and ensure that this transaction will only - * be able to be committed if this key is not written outside this - * transaction after it has first been read (or after the snapshot if a - * snapshot is set in this transaction). The transaction behavior is the - * same regardless of whether the key exists or not. - * - * Note: Currently, this function will return Status::MergeInProgress - * if the most recent write to the queried key in this batch is a Merge. - * - * The values returned by this function are similar to - * {@link RocksDB#get(ColumnFamilyHandle, ReadOptions, byte[])}. - * If value==nullptr, then this function will not read any data, but will - * still ensure that this key cannot be written to by outside of this - * transaction. - * - * If this transaction was created by an {@link OptimisticTransactionDB}, - * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)} - * could cause {@link #commit()} to fail. Otherwise, it could return any error - * that could be returned by - * {@link RocksDB#get(ColumnFamilyHandle, ReadOptions, byte[])}. - * - * If this transaction was created on a {@link TransactionDB}, an - * {@link RocksDBException} may be thrown with an accompanying {@link Status} - * when: - * {@link Status.Code#Busy} if there is a write conflict, - * {@link Status.Code#TimedOut} if a lock could not be acquired, - * {@link Status.Code#TryAgain} if the memtable history size is not large - * enough. See - * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} - * {@link Status.Code#MergeInProgress} if merge operations cannot be - * resolved. - * - * @param readOptions Read options. - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param key the key to retrieve the value for. - * @param exclusive true if the transaction should have exclusive access to - * the key, otherwise false for shared access. - * @param doValidate true if it should validate the snapshot before doing the read - * - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public byte[] getForUpdate(final ReadOptions readOptions, - final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final boolean exclusive, - final boolean doValidate) throws RocksDBException { - assert (isOwningHandle()); - return getForUpdate(nativeHandle_, readOptions.nativeHandle_, key, key.length, - columnFamilyHandle.nativeHandle_, exclusive, doValidate); - } - - /** - * Same as - * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean, boolean)} - * with doValidate=true. - * - * @param readOptions Read options. - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param key the key to retrieve the value for. - * @param exclusive true if the transaction should have exclusive access to - * the key, otherwise false for shared access. - * - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public byte[] getForUpdate(final ReadOptions readOptions, - final ColumnFamilyHandle columnFamilyHandle, final byte[] key, - final boolean exclusive) throws RocksDBException { - assert(isOwningHandle()); - return getForUpdate(nativeHandle_, readOptions.nativeHandle_, key, key.length, - columnFamilyHandle.nativeHandle_, exclusive, true /*doValidate*/); - } - - /** - * Read this key and ensure that this transaction will only - * be able to be committed if this key is not written outside this - * transaction after it has first been read (or after the snapshot if a - * snapshot is set in this transaction). The transaction behavior is the - * same regardless of whether the key exists or not. - * - * Note: Currently, this function will return Status::MergeInProgress - * if the most recent write to the queried key in this batch is a Merge. - * - * The values returned by this function are similar to - * {@link RocksDB#get(ReadOptions, byte[])}. - * If value==nullptr, then this function will not read any data, but will - * still ensure that this key cannot be written to by outside of this - * transaction. - * - * If this transaction was created on an {@link OptimisticTransactionDB}, - * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)} - * could cause {@link #commit()} to fail. Otherwise, it could return any error - * that could be returned by - * {@link RocksDB#get(ReadOptions, byte[])}. - * - * If this transaction was created on a {@link TransactionDB}, an - * {@link RocksDBException} may be thrown with an accompanying {@link Status} - * when: - * {@link Status.Code#Busy} if there is a write conflict, - * {@link Status.Code#TimedOut} if a lock could not be acquired, - * {@link Status.Code#TryAgain} if the memtable history size is not large - * enough. See - * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} - * {@link Status.Code#MergeInProgress} if merge operations cannot be - * resolved. - * - * @param readOptions Read options. - * @param key the key to retrieve the value for. - * @param exclusive true if the transaction should have exclusive access to - * the key, otherwise false for shared access. - * - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public byte[] getForUpdate(final ReadOptions readOptions, final byte[] key, - final boolean exclusive) throws RocksDBException { - assert(isOwningHandle()); - return getForUpdate( - nativeHandle_, readOptions.nativeHandle_, key, key.length, exclusive, true /*doValidate*/); - } - - /** - * A multi-key version of - * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)}. - * - * - * @param readOptions Read options. - * @param columnFamilyHandles {@link org.rocksdb.ColumnFamilyHandle} - * instances - * @param keys the keys to retrieve the values for. - * - * @return Array of values, one for each key - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - @Deprecated - public byte[][] multiGetForUpdate(final ReadOptions readOptions, - final List columnFamilyHandles, final byte[][] keys) - throws RocksDBException { - assert(isOwningHandle()); - // Check if key size equals cfList size. If not a exception must be - // thrown. If not a Segmentation fault happens. - if (keys.length != columnFamilyHandles.size()){ - throw new IllegalArgumentException( - "For each key there must be a ColumnFamilyHandle."); - } - if(keys.length == 0) { - return new byte[0][0]; - } - final long[] cfHandles = new long[columnFamilyHandles.size()]; - for (int i = 0; i < columnFamilyHandles.size(); i++) { - cfHandles[i] = columnFamilyHandles.get(i).nativeHandle_; - } - return multiGetForUpdate(nativeHandle_, readOptions.nativeHandle_, - keys, cfHandles); - } - - /** - * A multi-key version of - * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)}. - * - * - * @param readOptions Read options. - * @param columnFamilyHandles {@link org.rocksdb.ColumnFamilyHandle} - * instances - * @param keys the keys to retrieve the values for. - * - * @return Array of values, one for each key - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public List multiGetForUpdateAsList(final ReadOptions readOptions, - final List columnFamilyHandles, final List keys) - throws RocksDBException { - assert (isOwningHandle()); - // Check if key size equals cfList size. If not a exception must be - // thrown. If not a Segmentation fault happens. - if (keys.size() != columnFamilyHandles.size()) { - throw new IllegalArgumentException("For each key there must be a ColumnFamilyHandle."); - } - if (keys.size() == 0) { - return new ArrayList<>(); - } - final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); - - final long[] cfHandles = new long[columnFamilyHandles.size()]; - for (int i = 0; i < columnFamilyHandles.size(); i++) { - cfHandles[i] = columnFamilyHandles.get(i).nativeHandle_; - } - return Arrays.asList( - multiGetForUpdate(nativeHandle_, readOptions.nativeHandle_, keysArray, cfHandles)); - } - - /** - * A multi-key version of {@link #getForUpdate(ReadOptions, byte[], boolean)}. - * - * - * @param readOptions Read options. - * @param keys the keys to retrieve the values for. - * - * @return Array of values, one for each key - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - @Deprecated - public byte[][] multiGetForUpdate(final ReadOptions readOptions, final byte[][] keys) - throws RocksDBException { - assert(isOwningHandle()); - if(keys.length == 0) { - return new byte[0][0]; - } - - return multiGetForUpdate(nativeHandle_, - readOptions.nativeHandle_, keys); - } - - /** - * A multi-key version of {@link #getForUpdate(ReadOptions, byte[], boolean)}. - * - * - * @param readOptions Read options. - * @param keys the keys to retrieve the values for. - * - * @return List of values, one for each key - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public List multiGetForUpdateAsList( - final ReadOptions readOptions, final List keys) throws RocksDBException { - assert (isOwningHandle()); - if (keys.size() == 0) { - return new ArrayList<>(0); - } - - final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); - - return Arrays.asList(multiGetForUpdate(nativeHandle_, readOptions.nativeHandle_, keysArray)); - } - - /** - * Returns an iterator that will iterate on all keys in the default - * column family including both keys in the DB and uncommitted keys in this - * transaction. - * - * Setting {@link ReadOptions#setSnapshot(Snapshot)} will affect what is read - * from the DB but will NOT change which keys are read from this transaction - * (the keys in this transaction do not yet belong to any snapshot and will be - * fetched regardless). - * - * Caller is responsible for deleting the returned Iterator. - * - * The returned iterator is only valid until {@link #commit()}, - * {@link #rollback()}, or {@link #rollbackToSavePoint()} is called. - * - * @param readOptions Read options. - * - * @return instance of iterator object. - */ - public RocksIterator getIterator(final ReadOptions readOptions) { - assert(isOwningHandle()); - return new RocksIterator(parent, getIterator(nativeHandle_, - readOptions.nativeHandle_)); - } - - /** - * Returns an iterator that will iterate on all keys in the column family - * specified by {@code columnFamilyHandle} including both keys in the DB - * and uncommitted keys in this transaction. - * - * Setting {@link ReadOptions#setSnapshot(Snapshot)} will affect what is read - * from the DB but will NOT change which keys are read from this transaction - * (the keys in this transaction do not yet belong to any snapshot and will be - * fetched regardless). - * - * Caller is responsible for calling {@link RocksIterator#close()} on - * the returned Iterator. - * - * The returned iterator is only valid until {@link #commit()}, - * {@link #rollback()}, or {@link #rollbackToSavePoint()} is called. - * - * @param readOptions Read options. - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * - * @return instance of iterator object. - */ - public RocksIterator getIterator(final ReadOptions readOptions, - final ColumnFamilyHandle columnFamilyHandle) { - assert(isOwningHandle()); - return new RocksIterator(parent, getIterator(nativeHandle_, - readOptions.nativeHandle_, columnFamilyHandle.nativeHandle_)); - } - - /** - * Similar to {@link RocksDB#put(ColumnFamilyHandle, byte[], byte[])}, but - * will also perform conflict checking on the keys be written. - * - * If this Transaction was created on an {@link OptimisticTransactionDB}, - * these functions should always succeed. - * - * If this Transaction was created on a {@link TransactionDB}, an - * {@link RocksDBException} may be thrown with an accompanying {@link Status} - * when: - * {@link Status.Code#Busy} if there is a write conflict, - * {@link Status.Code#TimedOut} if a lock could not be acquired, - * {@link Status.Code#TryAgain} if the memtable history size is not large - * enough. See - * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} - * - * @param columnFamilyHandle The column family to put the key/value into - * @param key the specified key to be inserted. - * @param value the value associated with the specified key. - * @param assumeTracked true when it is expected that the key is already - * tracked. More specifically, it means the the key was previous tracked - * in the same savepoint, with the same exclusive flag, and at a lower - * sequence number. If valid then it skips ValidateSnapshot, - * throws an error otherwise. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void put(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, - final byte[] value, final boolean assumeTracked) throws RocksDBException { - assert (isOwningHandle()); - put(nativeHandle_, key, key.length, value, value.length, - columnFamilyHandle.nativeHandle_, assumeTracked); - } - - /** - * Similar to {@link #put(ColumnFamilyHandle, byte[], byte[], boolean)} - * but with {@code assumeTracked = false}. - * - * Will also perform conflict checking on the keys be written. - * - * If this Transaction was created on an {@link OptimisticTransactionDB}, - * these functions should always succeed. - * - * If this Transaction was created on a {@link TransactionDB}, an - * {@link RocksDBException} may be thrown with an accompanying {@link Status} - * when: - * {@link Status.Code#Busy} if there is a write conflict, - * {@link Status.Code#TimedOut} if a lock could not be acquired, - * {@link Status.Code#TryAgain} if the memtable history size is not large - * enough. See - * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} - * - * @param columnFamilyHandle The column family to put the key/value into - * @param key the specified key to be inserted. - * @param value the value associated with the specified key. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void put(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, - final byte[] value) throws RocksDBException { - assert(isOwningHandle()); - put(nativeHandle_, key, key.length, value, value.length, - columnFamilyHandle.nativeHandle_, false); - } - - /** - * Similar to {@link RocksDB#put(byte[], byte[])}, but - * will also perform conflict checking on the keys be written. - * - * If this Transaction was created on an {@link OptimisticTransactionDB}, - * these functions should always succeed. - * - * If this Transaction was created on a {@link TransactionDB}, an - * {@link RocksDBException} may be thrown with an accompanying {@link Status} - * when: - * {@link Status.Code#Busy} if there is a write conflict, - * {@link Status.Code#TimedOut} if a lock could not be acquired, - * {@link Status.Code#TryAgain} if the memtable history size is not large - * enough. See - * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} - * - * @param key the specified key to be inserted. - * @param value the value associated with the specified key. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void put(final byte[] key, final byte[] value) - throws RocksDBException { - assert(isOwningHandle()); - put(nativeHandle_, key, key.length, value, value.length); - } - - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future - /** - * Similar to {@link #put(ColumnFamilyHandle, byte[], byte[])} but allows - * you to specify the key and value in several parts that will be - * concatenated together. - * - * @param columnFamilyHandle The column family to put the key/value into - * @param keyParts the specified key to be inserted. - * @param valueParts the value associated with the specified key. - * @param assumeTracked true when it is expected that the key is already - * tracked. More specifically, it means the the key was previous tracked - * in the same savepoint, with the same exclusive flag, and at a lower - * sequence number. If valid then it skips ValidateSnapshot, - * throws an error otherwise. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void put(final ColumnFamilyHandle columnFamilyHandle, - final byte[][] keyParts, final byte[][] valueParts, - final boolean assumeTracked) throws RocksDBException { - assert (isOwningHandle()); - put(nativeHandle_, keyParts, keyParts.length, valueParts, valueParts.length, - columnFamilyHandle.nativeHandle_, assumeTracked); - } - - /** - * Similar to {@link #put(ColumnFamilyHandle, byte[][], byte[][], boolean)} - * but with with {@code assumeTracked = false}. - * - * Allows you to specify the key and value in several parts that will be - * concatenated together. - * - * @param columnFamilyHandle The column family to put the key/value into - * @param keyParts the specified key to be inserted. - * @param valueParts the value associated with the specified key. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void put(final ColumnFamilyHandle columnFamilyHandle, - final byte[][] keyParts, final byte[][] valueParts) - throws RocksDBException { - assert(isOwningHandle()); - put(nativeHandle_, keyParts, keyParts.length, valueParts, valueParts.length, - columnFamilyHandle.nativeHandle_, false); - } - - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future - /** - * Similar to {@link #put(byte[], byte[])} but allows - * you to specify the key and value in several parts that will be - * concatenated together - * - * @param keyParts the specified key to be inserted. - * @param valueParts the value associated with the specified key. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void put(final byte[][] keyParts, final byte[][] valueParts) - throws RocksDBException { - assert(isOwningHandle()); - put(nativeHandle_, keyParts, keyParts.length, valueParts, - valueParts.length); - } - - /** - * Similar to {@link RocksDB#merge(ColumnFamilyHandle, byte[], byte[])}, but - * will also perform conflict checking on the keys be written. - * - * If this Transaction was created on an {@link OptimisticTransactionDB}, - * these functions should always succeed. - * - * If this Transaction was created on a {@link TransactionDB}, an - * {@link RocksDBException} may be thrown with an accompanying {@link Status} - * when: - * {@link Status.Code#Busy} if there is a write conflict, - * {@link Status.Code#TimedOut} if a lock could not be acquired, - * {@link Status.Code#TryAgain} if the memtable history size is not large - * enough. See - * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} - * - * @param columnFamilyHandle The column family to merge the key/value into - * @param key the specified key to be merged. - * @param value the value associated with the specified key. - * @param assumeTracked true when it is expected that the key is already - * tracked. More specifically, it means the the key was previous tracked - * in the same savepoint, with the same exclusive flag, and at a lower - * sequence number. If valid then it skips ValidateSnapshot, - * throws an error otherwise. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void merge(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key, final byte[] value, final boolean assumeTracked) - throws RocksDBException { - assert (isOwningHandle()); - merge(nativeHandle_, key, key.length, value, value.length, - columnFamilyHandle.nativeHandle_, assumeTracked); - } - - /** - * Similar to {@link #merge(ColumnFamilyHandle, byte[], byte[], boolean)} - * but with {@code assumeTracked = false}. - * - * Will also perform conflict checking on the keys be written. - * - * If this Transaction was created on an {@link OptimisticTransactionDB}, - * these functions should always succeed. - * - * If this Transaction was created on a {@link TransactionDB}, an - * {@link RocksDBException} may be thrown with an accompanying {@link Status} - * when: - * {@link Status.Code#Busy} if there is a write conflict, - * {@link Status.Code#TimedOut} if a lock could not be acquired, - * {@link Status.Code#TryAgain} if the memtable history size is not large - * enough. See - * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} - * - * @param columnFamilyHandle The column family to merge the key/value into - * @param key the specified key to be merged. - * @param value the value associated with the specified key. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void merge(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key, final byte[] value) throws RocksDBException { - assert(isOwningHandle()); - merge(nativeHandle_, key, key.length, value, value.length, - columnFamilyHandle.nativeHandle_, false); - } - - /** - * Similar to {@link RocksDB#merge(byte[], byte[])}, but - * will also perform conflict checking on the keys be written. - * - * If this Transaction was created on an {@link OptimisticTransactionDB}, - * these functions should always succeed. - * - * If this Transaction was created on a {@link TransactionDB}, an - * {@link RocksDBException} may be thrown with an accompanying {@link Status} - * when: - * {@link Status.Code#Busy} if there is a write conflict, - * {@link Status.Code#TimedOut} if a lock could not be acquired, - * {@link Status.Code#TryAgain} if the memtable history size is not large - * enough. See - * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} - * - * @param key the specified key to be merged. - * @param value the value associated with the specified key. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void merge(final byte[] key, final byte[] value) - throws RocksDBException { - assert(isOwningHandle()); - merge(nativeHandle_, key, key.length, value, value.length); - } - - /** - * Similar to {@link RocksDB#delete(ColumnFamilyHandle, byte[])}, but - * will also perform conflict checking on the keys be written. - * - * If this Transaction was created on an {@link OptimisticTransactionDB}, - * these functions should always succeed. - * - * If this Transaction was created on a {@link TransactionDB}, an - * {@link RocksDBException} may be thrown with an accompanying {@link Status} - * when: - * {@link Status.Code#Busy} if there is a write conflict, - * {@link Status.Code#TimedOut} if a lock could not be acquired, - * {@link Status.Code#TryAgain} if the memtable history size is not large - * enough. See - * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} - * - * @param columnFamilyHandle The column family to delete the key/value from - * @param key the specified key to be deleted. - * @param assumeTracked true when it is expected that the key is already - * tracked. More specifically, it means the the key was previous tracked - * in the same savepoint, with the same exclusive flag, and at a lower - * sequence number. If valid then it skips ValidateSnapshot, - * throws an error otherwise. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void delete(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key, final boolean assumeTracked) throws RocksDBException { - assert (isOwningHandle()); - delete(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_, - assumeTracked); - } - - /** - * Similar to {@link #delete(ColumnFamilyHandle, byte[], boolean)} - * but with {@code assumeTracked = false}. - * - * Will also perform conflict checking on the keys be written. - * - * If this Transaction was created on an {@link OptimisticTransactionDB}, - * these functions should always succeed. - * - * If this Transaction was created on a {@link TransactionDB}, an - * {@link RocksDBException} may be thrown with an accompanying {@link Status} - * when: - * {@link Status.Code#Busy} if there is a write conflict, - * {@link Status.Code#TimedOut} if a lock could not be acquired, - * {@link Status.Code#TryAgain} if the memtable history size is not large - * enough. See - * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} - * - * @param columnFamilyHandle The column family to delete the key/value from - * @param key the specified key to be deleted. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void delete(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key) throws RocksDBException { - assert(isOwningHandle()); - delete(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_, - /*assumeTracked*/ false); - } - - /** - * Similar to {@link RocksDB#delete(byte[])}, but - * will also perform conflict checking on the keys be written. - * - * If this Transaction was created on an {@link OptimisticTransactionDB}, - * these functions should always succeed. - * - * If this Transaction was created on a {@link TransactionDB}, an - * {@link RocksDBException} may be thrown with an accompanying {@link Status} - * when: - * {@link Status.Code#Busy} if there is a write conflict, - * {@link Status.Code#TimedOut} if a lock could not be acquired, - * {@link Status.Code#TryAgain} if the memtable history size is not large - * enough. See - * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} - * - * @param key the specified key to be deleted. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void delete(final byte[] key) throws RocksDBException { - assert(isOwningHandle()); - delete(nativeHandle_, key, key.length); - } - - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future - /** - * Similar to {@link #delete(ColumnFamilyHandle, byte[])} but allows - * you to specify the key in several parts that will be - * concatenated together. - * - * @param columnFamilyHandle The column family to delete the key/value from - * @param keyParts the specified key to be deleted. - * @param assumeTracked true when it is expected that the key is already - * tracked. More specifically, it means the the key was previous tracked - * in the same savepoint, with the same exclusive flag, and at a lower - * sequence number. If valid then it skips ValidateSnapshot, - * throws an error otherwise. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void delete(final ColumnFamilyHandle columnFamilyHandle, - final byte[][] keyParts, final boolean assumeTracked) - throws RocksDBException { - assert (isOwningHandle()); - delete(nativeHandle_, keyParts, keyParts.length, - columnFamilyHandle.nativeHandle_, assumeTracked); - } - - /** - * Similar to{@link #delete(ColumnFamilyHandle, byte[][], boolean)} - * but with {@code assumeTracked = false}. - * - * Allows you to specify the key in several parts that will be - * concatenated together. - * - * @param columnFamilyHandle The column family to delete the key/value from - * @param keyParts the specified key to be deleted. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void delete(final ColumnFamilyHandle columnFamilyHandle, - final byte[][] keyParts) throws RocksDBException { - assert(isOwningHandle()); - delete(nativeHandle_, keyParts, keyParts.length, - columnFamilyHandle.nativeHandle_, false); - } - - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future - /** - * Similar to {@link #delete(byte[])} but allows - * you to specify key the in several parts that will be - * concatenated together. - * - * @param keyParts the specified key to be deleted - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void delete(final byte[][] keyParts) throws RocksDBException { - assert(isOwningHandle()); - delete(nativeHandle_, keyParts, keyParts.length); - } - - /** - * Similar to {@link RocksDB#singleDelete(ColumnFamilyHandle, byte[])}, but - * will also perform conflict checking on the keys be written. - * - * If this Transaction was created on an {@link OptimisticTransactionDB}, - * these functions should always succeed. - * - * If this Transaction was created on a {@link TransactionDB}, an - * {@link RocksDBException} may be thrown with an accompanying {@link Status} - * when: - * {@link Status.Code#Busy} if there is a write conflict, - * {@link Status.Code#TimedOut} if a lock could not be acquired, - * {@link Status.Code#TryAgain} if the memtable history size is not large - * enough. See - * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} - * - * @param columnFamilyHandle The column family to delete the key/value from - * @param key the specified key to be deleted. - * @param assumeTracked true when it is expected that the key is already - * tracked. More specifically, it means the key was previously tracked - * in the same savepoint, with the same exclusive flag, and at a lower - * sequence number. If valid then it skips ValidateSnapshot, - * throws an error otherwise. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key, final boolean assumeTracked) throws RocksDBException { - assert (isOwningHandle()); - singleDelete(nativeHandle_, key, key.length, - columnFamilyHandle.nativeHandle_, assumeTracked); - } - - /** - * Similar to {@link #singleDelete(ColumnFamilyHandle, byte[], boolean)} - * but with {@code assumeTracked = false}. - * - * will also perform conflict checking on the keys be written. - * - * If this Transaction was created on an {@link OptimisticTransactionDB}, - * these functions should always succeed. - * - * If this Transaction was created on a {@link TransactionDB}, an - * {@link RocksDBException} may be thrown with an accompanying {@link Status} - * when: - * {@link Status.Code#Busy} if there is a write conflict, - * {@link Status.Code#TimedOut} if a lock could not be acquired, - * {@link Status.Code#TryAgain} if the memtable history size is not large - * enough. See - * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} - * - * @param columnFamilyHandle The column family to delete the key/value from - * @param key the specified key to be deleted. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key) throws RocksDBException { - assert(isOwningHandle()); - singleDelete(nativeHandle_, key, key.length, - columnFamilyHandle.nativeHandle_, false); - } - - /** - * Similar to {@link RocksDB#singleDelete(byte[])}, but - * will also perform conflict checking on the keys be written. - * - * If this Transaction was created on an {@link OptimisticTransactionDB}, - * these functions should always succeed. - * - * If this Transaction was created on a {@link TransactionDB}, an - * {@link RocksDBException} may be thrown with an accompanying {@link Status} - * when: - * {@link Status.Code#Busy} if there is a write conflict, - * {@link Status.Code#TimedOut} if a lock could not be acquired, - * {@link Status.Code#TryAgain} if the memtable history size is not large - * enough. See - * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} - * - * @param key the specified key to be deleted. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final byte[] key) throws RocksDBException { - assert(isOwningHandle()); - singleDelete(nativeHandle_, key, key.length); - } - - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future - /** - * Similar to {@link #singleDelete(ColumnFamilyHandle, byte[])} but allows - * you to specify the key in several parts that will be - * concatenated together. - * - * @param columnFamilyHandle The column family to delete the key/value from - * @param keyParts the specified key to be deleted. - * @param assumeTracked true when it is expected that the key is already - * tracked. More specifically, it means the key was previously tracked - * in the same savepoint, with the same exclusive flag, and at a lower - * sequence number. If valid then it skips ValidateSnapshot, - * throws an error otherwise. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, - final byte[][] keyParts, final boolean assumeTracked) - throws RocksDBException { - assert (isOwningHandle()); - singleDelete(nativeHandle_, keyParts, keyParts.length, - columnFamilyHandle.nativeHandle_, assumeTracked); - } - - /** - * Similar to{@link #singleDelete(ColumnFamilyHandle, byte[][], boolean)} - * but with {@code assumeTracked = false}. - * - * Allows you to specify the key in several parts that will be - * concatenated together. - * - * @param columnFamilyHandle The column family to delete the key/value from - * @param keyParts the specified key to be deleted. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, - final byte[][] keyParts) throws RocksDBException { - assert(isOwningHandle()); - singleDelete(nativeHandle_, keyParts, keyParts.length, - columnFamilyHandle.nativeHandle_, false); - } - - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future - /** - * Similar to {@link #singleDelete(byte[])} but allows - * you to specify the key in several parts that will be - * concatenated together. - * - * @param keyParts the specified key to be deleted. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final byte[][] keyParts) throws RocksDBException { - assert(isOwningHandle()); - singleDelete(nativeHandle_, keyParts, keyParts.length); - } - - /** - * Similar to {@link RocksDB#put(ColumnFamilyHandle, byte[], byte[])}, - * but operates on the transactions write batch. This write will only happen - * if this transaction gets committed successfully. - * - * Unlike {@link #put(ColumnFamilyHandle, byte[], byte[])} no conflict - * checking will be performed for this key. - * - * If this Transaction was created on a {@link TransactionDB}, this function - * will still acquire locks necessary to make sure this write doesn't cause - * conflicts in other transactions; This may cause a {@link RocksDBException} - * with associated {@link Status.Code#Busy}. - * - * @param columnFamilyHandle The column family to put the key/value into - * @param key the specified key to be inserted. - * @param value the value associated with the specified key. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void putUntracked(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key, final byte[] value) throws RocksDBException { - assert(isOwningHandle()); - putUntracked(nativeHandle_, key, key.length, value, value.length, - columnFamilyHandle.nativeHandle_); - } - - /** - * Similar to {@link RocksDB#put(byte[], byte[])}, - * but operates on the transactions write batch. This write will only happen - * if this transaction gets committed successfully. - * - * Unlike {@link #put(byte[], byte[])} no conflict - * checking will be performed for this key. - * - * If this Transaction was created on a {@link TransactionDB}, this function - * will still acquire locks necessary to make sure this write doesn't cause - * conflicts in other transactions; This may cause a {@link RocksDBException} - * with associated {@link Status.Code#Busy}. - * - * @param key the specified key to be inserted. - * @param value the value associated with the specified key. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void putUntracked(final byte[] key, final byte[] value) - throws RocksDBException { - assert(isOwningHandle()); - putUntracked(nativeHandle_, key, key.length, value, value.length); - } - - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future - /** - * Similar to {@link #putUntracked(ColumnFamilyHandle, byte[], byte[])} but - * allows you to specify the key and value in several parts that will be - * concatenated together. - * - * @param columnFamilyHandle The column family to put the key/value into - * @param keyParts the specified key to be inserted. - * @param valueParts the value associated with the specified key. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void putUntracked(final ColumnFamilyHandle columnFamilyHandle, - final byte[][] keyParts, final byte[][] valueParts) - throws RocksDBException { - assert(isOwningHandle()); - putUntracked(nativeHandle_, keyParts, keyParts.length, valueParts, - valueParts.length, columnFamilyHandle.nativeHandle_); - } - - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future - /** - * Similar to {@link #putUntracked(byte[], byte[])} but - * allows you to specify the key and value in several parts that will be - * concatenated together. - * - * @param keyParts the specified key to be inserted. - * @param valueParts the value associated with the specified key. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void putUntracked(final byte[][] keyParts, final byte[][] valueParts) - throws RocksDBException { - assert(isOwningHandle()); - putUntracked(nativeHandle_, keyParts, keyParts.length, valueParts, - valueParts.length); - } - - /** - * Similar to {@link RocksDB#merge(ColumnFamilyHandle, byte[], byte[])}, - * but operates on the transactions write batch. This write will only happen - * if this transaction gets committed successfully. - * - * Unlike {@link #merge(ColumnFamilyHandle, byte[], byte[])} no conflict - * checking will be performed for this key. - * - * If this Transaction was created on a {@link TransactionDB}, this function - * will still acquire locks necessary to make sure this write doesn't cause - * conflicts in other transactions; This may cause a {@link RocksDBException} - * with associated {@link Status.Code#Busy}. - * - * @param columnFamilyHandle The column family to merge the key/value into - * @param key the specified key to be merged. - * @param value the value associated with the specified key. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void mergeUntracked(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key, final byte[] value) throws RocksDBException { - mergeUntracked(nativeHandle_, key, key.length, value, value.length, - columnFamilyHandle.nativeHandle_); - } - - /** - * Similar to {@link RocksDB#merge(byte[], byte[])}, - * but operates on the transactions write batch. This write will only happen - * if this transaction gets committed successfully. - * - * Unlike {@link #merge(byte[], byte[])} no conflict - * checking will be performed for this key. - * - * If this Transaction was created on a {@link TransactionDB}, this function - * will still acquire locks necessary to make sure this write doesn't cause - * conflicts in other transactions; This may cause a {@link RocksDBException} - * with associated {@link Status.Code#Busy}. - * - * @param key the specified key to be merged. - * @param value the value associated with the specified key. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void mergeUntracked(final byte[] key, final byte[] value) - throws RocksDBException { - assert(isOwningHandle()); - mergeUntracked(nativeHandle_, key, key.length, value, value.length); - } - - /** - * Similar to {@link RocksDB#delete(ColumnFamilyHandle, byte[])}, - * but operates on the transactions write batch. This write will only happen - * if this transaction gets committed successfully. - * - * Unlike {@link #delete(ColumnFamilyHandle, byte[])} no conflict - * checking will be performed for this key. - * - * If this Transaction was created on a {@link TransactionDB}, this function - * will still acquire locks necessary to make sure this write doesn't cause - * conflicts in other transactions; This may cause a {@link RocksDBException} - * with associated {@link Status.Code#Busy}. - * - * @param columnFamilyHandle The column family to delete the key/value from - * @param key the specified key to be deleted. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void deleteUntracked(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key) throws RocksDBException { - assert(isOwningHandle()); - deleteUntracked(nativeHandle_, key, key.length, - columnFamilyHandle.nativeHandle_); - } - - /** - * Similar to {@link RocksDB#delete(byte[])}, - * but operates on the transactions write batch. This write will only happen - * if this transaction gets committed successfully. - * - * Unlike {@link #delete(byte[])} no conflict - * checking will be performed for this key. - * - * If this Transaction was created on a {@link TransactionDB}, this function - * will still acquire locks necessary to make sure this write doesn't cause - * conflicts in other transactions; This may cause a {@link RocksDBException} - * with associated {@link Status.Code#Busy}. - * - * @param key the specified key to be deleted. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void deleteUntracked(final byte[] key) throws RocksDBException { - assert(isOwningHandle()); - deleteUntracked(nativeHandle_, key, key.length); - } - - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future - /** - * Similar to {@link #deleteUntracked(ColumnFamilyHandle, byte[])} but allows - * you to specify the key in several parts that will be - * concatenated together. - * - * @param columnFamilyHandle The column family to delete the key/value from - * @param keyParts the specified key to be deleted. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void deleteUntracked(final ColumnFamilyHandle columnFamilyHandle, - final byte[][] keyParts) throws RocksDBException { - assert(isOwningHandle()); - deleteUntracked(nativeHandle_, keyParts, keyParts.length, - columnFamilyHandle.nativeHandle_); - } - - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future - /** - * Similar to {@link #deleteUntracked(byte[])} but allows - * you to specify the key in several parts that will be - * concatenated together. - * - * @param keyParts the specified key to be deleted. - * - * @throws RocksDBException when one of the TransactionalDB conditions - * described above occurs, or in the case of an unexpected error - */ - public void deleteUntracked(final byte[][] keyParts) throws RocksDBException { - assert(isOwningHandle()); - deleteUntracked(nativeHandle_, keyParts, keyParts.length); - } - - /** - * Similar to {@link WriteBatch#putLogData(byte[])} - * - * @param blob binary object to be inserted - */ - public void putLogData(final byte[] blob) { - assert(isOwningHandle()); - putLogData(nativeHandle_, blob, blob.length); - } - - /** - * By default, all put/merge/delete operations will be indexed in the - * transaction so that get/getForUpdate/getIterator can search for these - * keys. - * - * If the caller does not want to fetch the keys about to be written, - * they may want to avoid indexing as a performance optimization. - * Calling {@link #disableIndexing()} will turn off indexing for all future - * put/merge/delete operations until {@link #enableIndexing()} is called. - * - * If a key is put/merge/deleted after {@link #disableIndexing()} is called - * and then is fetched via get/getForUpdate/getIterator, the result of the - * fetch is undefined. - */ - public void disableIndexing() { - assert(isOwningHandle()); - disableIndexing(nativeHandle_); - } - - /** - * Re-enables indexing after a previous call to {@link #disableIndexing()} - */ - public void enableIndexing() { - assert(isOwningHandle()); - enableIndexing(nativeHandle_); - } - - /** - * Returns the number of distinct Keys being tracked by this transaction. - * If this transaction was created by a {@link TransactionDB}, this is the - * number of keys that are currently locked by this transaction. - * If this transaction was created by an {@link OptimisticTransactionDB}, - * this is the number of keys that need to be checked for conflicts at commit - * time. - * - * @return the number of distinct Keys being tracked by this transaction - */ - public long getNumKeys() { - assert(isOwningHandle()); - return getNumKeys(nativeHandle_); - } - - /** - * Returns the number of puts that have been applied to this - * transaction so far. - * - * @return the number of puts that have been applied to this transaction - */ - public long getNumPuts() { - assert(isOwningHandle()); - return getNumPuts(nativeHandle_); - } - - /** - * Returns the number of deletes that have been applied to this - * transaction so far. - * - * @return the number of deletes that have been applied to this transaction - */ - public long getNumDeletes() { - assert(isOwningHandle()); - return getNumDeletes(nativeHandle_); - } - - /** - * Returns the number of merges that have been applied to this - * transaction so far. - * - * @return the number of merges that have been applied to this transaction - */ - public long getNumMerges() { - assert(isOwningHandle()); - return getNumMerges(nativeHandle_); - } - - /** - * Returns the elapsed time in milliseconds since this Transaction began. - * - * @return the elapsed time in milliseconds since this transaction began. - */ - public long getElapsedTime() { - assert(isOwningHandle()); - return getElapsedTime(nativeHandle_); - } - - /** - * Fetch the underlying write batch that contains all pending changes to be - * committed. - * - * Note: You should not write or delete anything from the batch directly and - * should only use the functions in the {@link Transaction} class to - * write to this transaction. - * - * @return The write batch - */ - public WriteBatchWithIndex getWriteBatch() { - assert(isOwningHandle()); - final WriteBatchWithIndex writeBatchWithIndex = - new WriteBatchWithIndex(getWriteBatch(nativeHandle_)); - return writeBatchWithIndex; - } - - /** - * Change the value of {@link TransactionOptions#getLockTimeout()} - * (in milliseconds) for this transaction. - * - * Has no effect on OptimisticTransactions. - * - * @param lockTimeout the timeout (in milliseconds) for locks used by this - * transaction. - */ - public void setLockTimeout(final long lockTimeout) { - assert(isOwningHandle()); - setLockTimeout(nativeHandle_, lockTimeout); - } - - /** - * Return the WriteOptions that will be used during {@link #commit()}. - * - * @return the WriteOptions that will be used - */ - public WriteOptions getWriteOptions() { - assert(isOwningHandle()); - final WriteOptions writeOptions = - new WriteOptions(getWriteOptions(nativeHandle_)); - return writeOptions; - } - - /** - * Reset the WriteOptions that will be used during {@link #commit()}. - * - * @param writeOptions The new WriteOptions - */ - public void setWriteOptions(final WriteOptions writeOptions) { - assert(isOwningHandle()); - setWriteOptions(nativeHandle_, writeOptions.nativeHandle_); - } - - /** - * If this key was previously fetched in this transaction using - * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)}/ - * {@link #multiGetForUpdate(ReadOptions, List, byte[][])}, calling - * {@link #undoGetForUpdate(ColumnFamilyHandle, byte[])} will tell - * the transaction that it no longer needs to do any conflict checking - * for this key. - * - * If a key has been fetched N times via - * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)}/ - * {@link #multiGetForUpdate(ReadOptions, List, byte[][])}, then - * {@link #undoGetForUpdate(ColumnFamilyHandle, byte[])} will only have an - * effect if it is also called N times. If this key has been written to in - * this transaction, {@link #undoGetForUpdate(ColumnFamilyHandle, byte[])} - * will have no effect. - * - * If {@link #setSavePoint()} has been called after the - * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)}, - * {@link #undoGetForUpdate(ColumnFamilyHandle, byte[])} will not have any - * effect. - * - * If this Transaction was created by an {@link OptimisticTransactionDB}, - * calling {@link #undoGetForUpdate(ColumnFamilyHandle, byte[])} can affect - * whether this key is conflict checked at commit time. - * If this Transaction was created by a {@link TransactionDB}, - * calling {@link #undoGetForUpdate(ColumnFamilyHandle, byte[])} may release - * any held locks for this key. - * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param key the key to retrieve the value for. - */ - public void undoGetForUpdate(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key) { - assert(isOwningHandle()); - undoGetForUpdate(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_); - } - - /** - * If this key was previously fetched in this transaction using - * {@link #getForUpdate(ReadOptions, byte[], boolean)}/ - * {@link #multiGetForUpdate(ReadOptions, List, byte[][])}, calling - * {@link #undoGetForUpdate(byte[])} will tell - * the transaction that it no longer needs to do any conflict checking - * for this key. - * - * If a key has been fetched N times via - * {@link #getForUpdate(ReadOptions, byte[], boolean)}/ - * {@link #multiGetForUpdate(ReadOptions, List, byte[][])}, then - * {@link #undoGetForUpdate(byte[])} will only have an - * effect if it is also called N times. If this key has been written to in - * this transaction, {@link #undoGetForUpdate(byte[])} - * will have no effect. - * - * If {@link #setSavePoint()} has been called after the - * {@link #getForUpdate(ReadOptions, byte[], boolean)}, - * {@link #undoGetForUpdate(byte[])} will not have any - * effect. - * - * If this Transaction was created by an {@link OptimisticTransactionDB}, - * calling {@link #undoGetForUpdate(byte[])} can affect - * whether this key is conflict checked at commit time. - * If this Transaction was created by a {@link TransactionDB}, - * calling {@link #undoGetForUpdate(byte[])} may release - * any held locks for this key. - * - * @param key the key to retrieve the value for. - */ - public void undoGetForUpdate(final byte[] key) { - assert(isOwningHandle()); - undoGetForUpdate(nativeHandle_, key, key.length); - } - - /** - * Adds the keys from the WriteBatch to the transaction - * - * @param writeBatch The write batch to read from - * - * @throws RocksDBException if an error occurs whilst rebuilding from the - * write batch. - */ - public void rebuildFromWriteBatch(final WriteBatch writeBatch) - throws RocksDBException { - assert(isOwningHandle()); - rebuildFromWriteBatch(nativeHandle_, writeBatch.nativeHandle_); - } - - /** - * Get the Commit time Write Batch. - * - * @return the commit time write batch. - */ - public WriteBatch getCommitTimeWriteBatch() { - assert(isOwningHandle()); - final WriteBatch writeBatch = - new WriteBatch(getCommitTimeWriteBatch(nativeHandle_)); - return writeBatch; - } - - /** - * Set the log number. - * - * @param logNumber the log number - */ - public void setLogNumber(final long logNumber) { - assert(isOwningHandle()); - setLogNumber(nativeHandle_, logNumber); - } - - /** - * Get the log number. - * - * @return the log number - */ - public long getLogNumber() { - assert(isOwningHandle()); - return getLogNumber(nativeHandle_); - } - - /** - * Set the name of the transaction. - * - * @param transactionName the name of the transaction - * - * @throws RocksDBException if an error occurs when setting the transaction - * name. - */ - public void setName(final String transactionName) throws RocksDBException { - assert(isOwningHandle()); - setName(nativeHandle_, transactionName); - } - - /** - * Get the name of the transaction. - * - * @return the name of the transaction - */ - public String getName() { - assert(isOwningHandle()); - return getName(nativeHandle_); - } - - /** - * Get the ID of the transaction. - * - * @return the ID of the transaction. - */ - public long getID() { - assert(isOwningHandle()); - return getID(nativeHandle_); - } - - /** - * Determine if a deadlock has been detected. - * - * @return true if a deadlock has been detected. - */ - public boolean isDeadlockDetect() { - assert(isOwningHandle()); - return isDeadlockDetect(nativeHandle_); - } - - /** - * Get the list of waiting transactions. - * - * @return The list of waiting transactions. - */ - public WaitingTransactions getWaitingTxns() { - assert(isOwningHandle()); - return getWaitingTxns(nativeHandle_); - } - - /** - * Get the execution status of the transaction. - * - * NOTE: The execution status of an Optimistic Transaction - * never changes. This is only useful for non-optimistic transactions! - * - * @return The execution status of the transaction - */ - public TransactionState getState() { - assert(isOwningHandle()); - return TransactionState.getTransactionState( - getState(nativeHandle_)); - } - - /** - * The globally unique id with which the transaction is identified. This id - * might or might not be set depending on the implementation. Similarly the - * implementation decides the point in lifetime of a transaction at which it - * assigns the id. Although currently it is the case, the id is not guaranteed - * to remain the same across restarts. - * - * @return the transaction id. - */ - @Experimental("NOTE: Experimental feature") - public long getId() { - assert(isOwningHandle()); - return getId(nativeHandle_); - } - - public enum TransactionState { - STARTED((byte)0), - AWAITING_PREPARE((byte)1), - PREPARED((byte)2), - AWAITING_COMMIT((byte)3), - COMMITTED((byte)4), - AWAITING_ROLLBACK((byte)5), - ROLLEDBACK((byte)6), - LOCKS_STOLEN((byte)7); - - /* - * Keep old misspelled variable as alias - * Tip from https://stackoverflow.com/a/37092410/454544 - */ - public static final TransactionState COMMITED = COMMITTED; - - private final byte value; - - TransactionState(final byte value) { - this.value = value; - } - - /** - * Get TransactionState by byte value. - * - * @param value byte representation of TransactionState. - * - * @return {@link org.rocksdb.Transaction.TransactionState} instance or null. - * @throws java.lang.IllegalArgumentException if an invalid - * value is provided. - */ - public static TransactionState getTransactionState(final byte value) { - for (final TransactionState transactionState : TransactionState.values()) { - if (transactionState.value == value){ - return transactionState; - } - } - throw new IllegalArgumentException( - "Illegal value provided for TransactionState."); - } - } - - /** - * Called from C++ native method {@link #getWaitingTxns(long)} - * to construct a WaitingTransactions object. - * - * @param columnFamilyId The id of the {@link ColumnFamilyHandle} - * @param key The key - * @param transactionIds The transaction ids - * - * @return The waiting transactions - */ - private WaitingTransactions newWaitingTransactions( - final long columnFamilyId, final String key, - final long[] transactionIds) { - return new WaitingTransactions(columnFamilyId, key, transactionIds); - } - - public static class WaitingTransactions { - private final long columnFamilyId; - private final String key; - private final long[] transactionIds; - - private WaitingTransactions(final long columnFamilyId, final String key, - final long[] transactionIds) { - this.columnFamilyId = columnFamilyId; - this.key = key; - this.transactionIds = transactionIds; - } - - /** - * Get the Column Family ID. - * - * @return The column family ID - */ - public long getColumnFamilyId() { - return columnFamilyId; - } - - /** - * Get the key on which the transactions are waiting. - * - * @return The key - */ - public String getKey() { - return key; - } - - /** - * Get the IDs of the waiting transactions. - * - * @return The IDs of the waiting transactions - */ - public long[] getTransactionIds() { - return transactionIds; - } - } - - private native void setSnapshot(final long handle); - private native void setSnapshotOnNextOperation(final long handle); - private native void setSnapshotOnNextOperation(final long handle, - final long transactionNotifierHandle); - private native long getSnapshot(final long handle); - private native void clearSnapshot(final long handle); - private native void prepare(final long handle) throws RocksDBException; - private native void commit(final long handle) throws RocksDBException; - private native void rollback(final long handle) throws RocksDBException; - private native void setSavePoint(final long handle) throws RocksDBException; - private native void rollbackToSavePoint(final long handle) - throws RocksDBException; - private native byte[] get(final long handle, final long readOptionsHandle, - final byte key[], final int keyLength, final long columnFamilyHandle) - throws RocksDBException; - private native byte[] get(final long handle, final long readOptionsHandle, - final byte key[], final int keyLen) throws RocksDBException; - private native byte[][] multiGet(final long handle, - final long readOptionsHandle, final byte[][] keys, - final long[] columnFamilyHandles) throws RocksDBException; - private native byte[][] multiGet(final long handle, - final long readOptionsHandle, final byte[][] keys) - throws RocksDBException; - private native byte[] getForUpdate(final long handle, final long readOptionsHandle, - final byte key[], final int keyLength, final long columnFamilyHandle, final boolean exclusive, - final boolean doValidate) throws RocksDBException; - private native byte[] getForUpdate(final long handle, final long readOptionsHandle, - final byte key[], final int keyLen, final boolean exclusive, final boolean doValidate) - throws RocksDBException; - private native byte[][] multiGetForUpdate(final long handle, - final long readOptionsHandle, final byte[][] keys, - final long[] columnFamilyHandles) throws RocksDBException; - private native byte[][] multiGetForUpdate(final long handle, - final long readOptionsHandle, final byte[][] keys) - throws RocksDBException; - private native long getIterator(final long handle, - final long readOptionsHandle); - private native long getIterator(final long handle, - final long readOptionsHandle, final long columnFamilyHandle); - private native void put(final long handle, final byte[] key, final int keyLength, - final byte[] value, final int valueLength, final long columnFamilyHandle, - final boolean assumeTracked) throws RocksDBException; - private native void put(final long handle, final byte[] key, - final int keyLength, final byte[] value, final int valueLength) - throws RocksDBException; - private native void put(final long handle, final byte[][] keys, final int keysLength, - final byte[][] values, final int valuesLength, final long columnFamilyHandle, - final boolean assumeTracked) throws RocksDBException; - private native void put(final long handle, final byte[][] keys, - final int keysLength, final byte[][] values, final int valuesLength) - throws RocksDBException; - private native void merge(final long handle, final byte[] key, final int keyLength, - final byte[] value, final int valueLength, final long columnFamilyHandle, - final boolean assumeTracked) throws RocksDBException; - private native void merge(final long handle, final byte[] key, - final int keyLength, final byte[] value, final int valueLength) - throws RocksDBException; - private native void delete(final long handle, final byte[] key, final int keyLength, - final long columnFamilyHandle, final boolean assumeTracked) throws RocksDBException; - private native void delete(final long handle, final byte[] key, - final int keyLength) throws RocksDBException; - private native void delete(final long handle, final byte[][] keys, final int keysLength, - final long columnFamilyHandle, final boolean assumeTracked) throws RocksDBException; - private native void delete(final long handle, final byte[][] keys, - final int keysLength) throws RocksDBException; - private native void singleDelete(final long handle, final byte[] key, final int keyLength, - final long columnFamilyHandle, final boolean assumeTracked) throws RocksDBException; - private native void singleDelete(final long handle, final byte[] key, - final int keyLength) throws RocksDBException; - private native void singleDelete(final long handle, final byte[][] keys, final int keysLength, - final long columnFamilyHandle, final boolean assumeTracked) throws RocksDBException; - private native void singleDelete(final long handle, final byte[][] keys, - final int keysLength) throws RocksDBException; - private native void putUntracked(final long handle, final byte[] key, - final int keyLength, final byte[] value, final int valueLength, - final long columnFamilyHandle) throws RocksDBException; - private native void putUntracked(final long handle, final byte[] key, - final int keyLength, final byte[] value, final int valueLength) - throws RocksDBException; - private native void putUntracked(final long handle, final byte[][] keys, - final int keysLength, final byte[][] values, final int valuesLength, - final long columnFamilyHandle) throws RocksDBException; - private native void putUntracked(final long handle, final byte[][] keys, - final int keysLength, final byte[][] values, final int valuesLength) - throws RocksDBException; - private native void mergeUntracked(final long handle, final byte[] key, - final int keyLength, final byte[] value, final int valueLength, - final long columnFamilyHandle) throws RocksDBException; - private native void mergeUntracked(final long handle, final byte[] key, - final int keyLength, final byte[] value, final int valueLength) - throws RocksDBException; - private native void deleteUntracked(final long handle, final byte[] key, - final int keyLength, final long columnFamilyHandle) - throws RocksDBException; - private native void deleteUntracked(final long handle, final byte[] key, - final int keyLength) throws RocksDBException; - private native void deleteUntracked(final long handle, final byte[][] keys, - final int keysLength, final long columnFamilyHandle) - throws RocksDBException; - private native void deleteUntracked(final long handle, final byte[][] keys, - final int keysLength) throws RocksDBException; - private native void putLogData(final long handle, final byte[] blob, - final int blobLength); - private native void disableIndexing(final long handle); - private native void enableIndexing(final long handle); - private native long getNumKeys(final long handle); - private native long getNumPuts(final long handle); - private native long getNumDeletes(final long handle); - private native long getNumMerges(final long handle); - private native long getElapsedTime(final long handle); - private native long getWriteBatch(final long handle); - private native void setLockTimeout(final long handle, final long lockTimeout); - private native long getWriteOptions(final long handle); - private native void setWriteOptions(final long handle, - final long writeOptionsHandle); - private native void undoGetForUpdate(final long handle, final byte[] key, - final int keyLength, final long columnFamilyHandle); - private native void undoGetForUpdate(final long handle, final byte[] key, - final int keyLength); - private native void rebuildFromWriteBatch(final long handle, - final long writeBatchHandle) throws RocksDBException; - private native long getCommitTimeWriteBatch(final long handle); - private native void setLogNumber(final long handle, final long logNumber); - private native long getLogNumber(final long handle); - private native void setName(final long handle, final String name) - throws RocksDBException; - private native String getName(final long handle); - private native long getID(final long handle); - private native boolean isDeadlockDetect(final long handle); - private native WaitingTransactions getWaitingTxns(final long handle); - private native byte getState(final long handle); - private native long getId(final long handle); - - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/TransactionDB.java b/java/src/main/java/org/rocksdb/TransactionDB.java deleted file mode 100644 index 86f25fe15..000000000 --- a/java/src/main/java/org/rocksdb/TransactionDB.java +++ /dev/null @@ -1,403 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * Database with Transaction support - */ -public class TransactionDB extends RocksDB - implements TransactionalDB { - - private TransactionDBOptions transactionDbOptions_; - - /** - * Private constructor. - * - * @param nativeHandle The native handle of the C++ TransactionDB object - */ - private TransactionDB(final long nativeHandle) { - super(nativeHandle); - } - - /** - * Open a TransactionDB, similar to {@link RocksDB#open(Options, String)}. - * - * @param options {@link org.rocksdb.Options} instance. - * @param transactionDbOptions {@link org.rocksdb.TransactionDBOptions} - * instance. - * @param path the path to the rocksdb. - * - * @return a {@link TransactionDB} instance on success, null if the specified - * {@link TransactionDB} can not be opened. - * - * @throws RocksDBException if an error occurs whilst opening the database. - */ - public static TransactionDB open(final Options options, - final TransactionDBOptions transactionDbOptions, final String path) - throws RocksDBException { - final TransactionDB tdb = new TransactionDB(open(options.nativeHandle_, - transactionDbOptions.nativeHandle_, path)); - - // when non-default Options is used, keeping an Options reference - // in RocksDB can prevent Java to GC during the life-time of - // the currently-created RocksDB. - tdb.storeOptionsInstance(options); - tdb.storeTransactionDbOptions(transactionDbOptions); - - return tdb; - } - - /** - * Open a TransactionDB, similar to - * {@link RocksDB#open(DBOptions, String, List, List)}. - * - * @param dbOptions {@link org.rocksdb.DBOptions} instance. - * @param transactionDbOptions {@link org.rocksdb.TransactionDBOptions} - * instance. - * @param path the path to the rocksdb. - * @param columnFamilyDescriptors list of column family descriptors - * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances - * - * @return a {@link TransactionDB} instance on success, null if the specified - * {@link TransactionDB} can not be opened. - * - * @throws RocksDBException if an error occurs whilst opening the database. - */ - public static TransactionDB open(final DBOptions dbOptions, - final TransactionDBOptions transactionDbOptions, - final String path, - final List columnFamilyDescriptors, - final List columnFamilyHandles) - throws RocksDBException { - - final byte[][] cfNames = new byte[columnFamilyDescriptors.size()][]; - final long[] cfOptionHandles = new long[columnFamilyDescriptors.size()]; - for (int i = 0; i < columnFamilyDescriptors.size(); i++) { - final ColumnFamilyDescriptor cfDescriptor = columnFamilyDescriptors - .get(i); - cfNames[i] = cfDescriptor.getName(); - cfOptionHandles[i] = cfDescriptor.getOptions().nativeHandle_; - } - - final long[] handles = open(dbOptions.nativeHandle_, - transactionDbOptions.nativeHandle_, path, cfNames, cfOptionHandles); - final TransactionDB tdb = new TransactionDB(handles[0]); - - // when non-default Options is used, keeping an Options reference - // in RocksDB can prevent Java to GC during the life-time of - // the currently-created RocksDB. - tdb.storeOptionsInstance(dbOptions); - tdb.storeTransactionDbOptions(transactionDbOptions); - - for (int i = 1; i < handles.length; i++) { - columnFamilyHandles.add(new ColumnFamilyHandle(tdb, handles[i])); - } - - return tdb; - } - - /** - * This is similar to {@link #close()} except that it - * throws an exception if any error occurs. - * - * This will not fsync the WAL files. - * If syncing is required, the caller must first call {@link #syncWal()} - * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch - * with {@link WriteOptions#setSync(boolean)} set to true. - * - * See also {@link #close()}. - * - * @throws RocksDBException if an error occurs whilst closing. - */ - public void closeE() throws RocksDBException { - if (owningHandle_.compareAndSet(true, false)) { - try { - closeDatabase(nativeHandle_); - } finally { - disposeInternal(); - } - } - } - - /** - * This is similar to {@link #closeE()} except that it - * silently ignores any errors. - * - * This will not fsync the WAL files. - * If syncing is required, the caller must first call {@link #syncWal()} - * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch - * with {@link WriteOptions#setSync(boolean)} set to true. - * - * See also {@link #close()}. - */ - @Override - public void close() { - if (owningHandle_.compareAndSet(true, false)) { - try { - closeDatabase(nativeHandle_); - } catch (final RocksDBException e) { - // silently ignore the error report - } finally { - disposeInternal(); - } - } - } - - @Override - public Transaction beginTransaction(final WriteOptions writeOptions) { - return new Transaction(this, beginTransaction(nativeHandle_, - writeOptions.nativeHandle_)); - } - - @Override - public Transaction beginTransaction(final WriteOptions writeOptions, - final TransactionOptions transactionOptions) { - return new Transaction(this, beginTransaction(nativeHandle_, - writeOptions.nativeHandle_, transactionOptions.nativeHandle_)); - } - - // TODO(AR) consider having beingTransaction(... oldTransaction) set a - // reference count inside Transaction, so that we can always call - // Transaction#close but the object is only disposed when there are as many - // closes as beginTransaction. Makes the try-with-resources paradigm easier for - // java developers - - @Override - public Transaction beginTransaction(final WriteOptions writeOptions, - final Transaction oldTransaction) { - final long jtxnHandle = beginTransaction_withOld(nativeHandle_, - writeOptions.nativeHandle_, oldTransaction.nativeHandle_); - - // RocksJava relies on the assumption that - // we do not allocate a new Transaction object - // when providing an old_txn - assert(jtxnHandle == oldTransaction.nativeHandle_); - - return oldTransaction; - } - - @Override - public Transaction beginTransaction(final WriteOptions writeOptions, - final TransactionOptions transactionOptions, - final Transaction oldTransaction) { - final long jtxn_handle = beginTransaction_withOld(nativeHandle_, - writeOptions.nativeHandle_, transactionOptions.nativeHandle_, - oldTransaction.nativeHandle_); - - // RocksJava relies on the assumption that - // we do not allocate a new Transaction object - // when providing an old_txn - assert(jtxn_handle == oldTransaction.nativeHandle_); - - return oldTransaction; - } - - public Transaction getTransactionByName(final String transactionName) { - final long jtxnHandle = getTransactionByName(nativeHandle_, transactionName); - if(jtxnHandle == 0) { - return null; - } - - final Transaction txn = new Transaction(this, jtxnHandle); - - // this instance doesn't own the underlying C++ object - txn.disOwnNativeHandle(); - - return txn; - } - - public List getAllPreparedTransactions() { - final long[] jtxnHandles = getAllPreparedTransactions(nativeHandle_); - - final List txns = new ArrayList<>(); - for(final long jtxnHandle : jtxnHandles) { - final Transaction txn = new Transaction(this, jtxnHandle); - - // this instance doesn't own the underlying C++ object - txn.disOwnNativeHandle(); - - txns.add(txn); - } - return txns; - } - - public static class KeyLockInfo { - private final String key; - private final long[] transactionIDs; - private final boolean exclusive; - - public KeyLockInfo(final String key, final long transactionIDs[], - final boolean exclusive) { - this.key = key; - this.transactionIDs = transactionIDs; - this.exclusive = exclusive; - } - - /** - * Get the key. - * - * @return the key - */ - public String getKey() { - return key; - } - - /** - * Get the Transaction IDs. - * - * @return the Transaction IDs. - */ - public long[] getTransactionIDs() { - return transactionIDs; - } - - /** - * Get the Lock status. - * - * @return true if the lock is exclusive, false if the lock is shared. - */ - public boolean isExclusive() { - return exclusive; - } - } - - /** - * Returns map of all locks held. - * - * @return a map of all the locks held. - */ - public Map getLockStatusData() { - return getLockStatusData(nativeHandle_); - } - - /** - * Called from C++ native method {@link #getDeadlockInfoBuffer(long)} - * to construct a DeadlockInfo object. - * - * @param transactionID The transaction id - * @param columnFamilyId The id of the {@link ColumnFamilyHandle} - * @param waitingKey the key that we are waiting on - * @param exclusive true if the lock is exclusive, false if the lock is shared - * - * @return The waiting transactions - */ - private DeadlockInfo newDeadlockInfo( - final long transactionID, final long columnFamilyId, - final String waitingKey, final boolean exclusive) { - return new DeadlockInfo(transactionID, columnFamilyId, - waitingKey, exclusive); - } - - public static class DeadlockInfo { - private final long transactionID; - private final long columnFamilyId; - private final String waitingKey; - private final boolean exclusive; - - private DeadlockInfo(final long transactionID, final long columnFamilyId, - final String waitingKey, final boolean exclusive) { - this.transactionID = transactionID; - this.columnFamilyId = columnFamilyId; - this.waitingKey = waitingKey; - this.exclusive = exclusive; - } - - /** - * Get the Transaction ID. - * - * @return the transaction ID - */ - public long getTransactionID() { - return transactionID; - } - - /** - * Get the Column Family ID. - * - * @return The column family ID - */ - public long getColumnFamilyId() { - return columnFamilyId; - } - - /** - * Get the key that we are waiting on. - * - * @return the key that we are waiting on - */ - public String getWaitingKey() { - return waitingKey; - } - - /** - * Get the Lock status. - * - * @return true if the lock is exclusive, false if the lock is shared. - */ - public boolean isExclusive() { - return exclusive; - } - } - - public static class DeadlockPath { - final DeadlockInfo[] path; - final boolean limitExceeded; - - public DeadlockPath(final DeadlockInfo[] path, final boolean limitExceeded) { - this.path = path; - this.limitExceeded = limitExceeded; - } - - public boolean isEmpty() { - return path.length == 0 && !limitExceeded; - } - } - - public DeadlockPath[] getDeadlockInfoBuffer() { - return getDeadlockInfoBuffer(nativeHandle_); - } - - public void setDeadlockInfoBufferSize(final int targetSize) { - setDeadlockInfoBufferSize(nativeHandle_, targetSize); - } - - private void storeTransactionDbOptions( - final TransactionDBOptions transactionDbOptions) { - this.transactionDbOptions_ = transactionDbOptions; - } - - @Override protected final native void disposeInternal(final long handle); - - private static native long open(final long optionsHandle, - final long transactionDbOptionsHandle, final String path) - throws RocksDBException; - private static native long[] open(final long dbOptionsHandle, - final long transactionDbOptionsHandle, final String path, - final byte[][] columnFamilyNames, final long[] columnFamilyOptions); - private native static void closeDatabase(final long handle) - throws RocksDBException; - private native long beginTransaction(final long handle, - final long writeOptionsHandle); - private native long beginTransaction(final long handle, - final long writeOptionsHandle, final long transactionOptionsHandle); - private native long beginTransaction_withOld(final long handle, - final long writeOptionsHandle, final long oldTransactionHandle); - private native long beginTransaction_withOld(final long handle, - final long writeOptionsHandle, final long transactionOptionsHandle, - final long oldTransactionHandle); - private native long getTransactionByName(final long handle, - final String name); - private native long[] getAllPreparedTransactions(final long handle); - private native Map getLockStatusData( - final long handle); - private native DeadlockPath[] getDeadlockInfoBuffer(final long handle); - private native void setDeadlockInfoBufferSize(final long handle, - final int targetSize); -} diff --git a/java/src/main/java/org/rocksdb/TransactionDBOptions.java b/java/src/main/java/org/rocksdb/TransactionDBOptions.java deleted file mode 100644 index 7f4296a7c..000000000 --- a/java/src/main/java/org/rocksdb/TransactionDBOptions.java +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public class TransactionDBOptions extends RocksObject { - - public TransactionDBOptions() { - super(newTransactionDBOptions()); - } - - /** - * Specifies the maximum number of keys that can be locked at the same time - * per column family. - * - * If the number of locked keys is greater than {@link #getMaxNumLocks()}, - * transaction writes (or GetForUpdate) will return an error. - * - * @return The maximum number of keys that can be locked - */ - public long getMaxNumLocks() { - assert(isOwningHandle()); - return getMaxNumLocks(nativeHandle_); - } - - /** - * Specifies the maximum number of keys that can be locked at the same time - * per column family. - * - * If the number of locked keys is greater than {@link #getMaxNumLocks()}, - * transaction writes (or GetForUpdate) will return an error. - * - * @param maxNumLocks The maximum number of keys that can be locked; - * If this value is not positive, no limit will be enforced. - * - * @return this TransactionDBOptions instance - */ - public TransactionDBOptions setMaxNumLocks(final long maxNumLocks) { - assert(isOwningHandle()); - setMaxNumLocks(nativeHandle_, maxNumLocks); - return this; - } - - /** - * The number of sub-tables per lock table (per column family) - * - * @return The number of sub-tables - */ - public long getNumStripes() { - assert(isOwningHandle()); - return getNumStripes(nativeHandle_); - } - - /** - * Increasing this value will increase the concurrency by dividing the lock - * table (per column family) into more sub-tables, each with their own - * separate mutex. - * - * Default: 16 - * - * @param numStripes The number of sub-tables - * - * @return this TransactionDBOptions instance - */ - public TransactionDBOptions setNumStripes(final long numStripes) { - assert(isOwningHandle()); - setNumStripes(nativeHandle_, numStripes); - return this; - } - - /** - * The default wait timeout in milliseconds when - * a transaction attempts to lock a key if not specified by - * {@link TransactionOptions#setLockTimeout(long)} - * - * If 0, no waiting is done if a lock cannot instantly be acquired. - * If negative, there is no timeout. - * - * @return the default wait timeout in milliseconds - */ - public long getTransactionLockTimeout() { - assert(isOwningHandle()); - return getTransactionLockTimeout(nativeHandle_); - } - - /** - * If positive, specifies the default wait timeout in milliseconds when - * a transaction attempts to lock a key if not specified by - * {@link TransactionOptions#setLockTimeout(long)} - * - * If 0, no waiting is done if a lock cannot instantly be acquired. - * If negative, there is no timeout. Not using a timeout is not recommended - * as it can lead to deadlocks. Currently, there is no deadlock-detection to - * recover from a deadlock. - * - * Default: 1000 - * - * @param transactionLockTimeout the default wait timeout in milliseconds - * - * @return this TransactionDBOptions instance - */ - public TransactionDBOptions setTransactionLockTimeout( - final long transactionLockTimeout) { - assert(isOwningHandle()); - setTransactionLockTimeout(nativeHandle_, transactionLockTimeout); - return this; - } - - /** - * The wait timeout in milliseconds when writing a key - * OUTSIDE of a transaction (ie by calling {@link RocksDB#put}, - * {@link RocksDB#merge}, {@link RocksDB#delete} or {@link RocksDB#write} - * directly). - * - * If 0, no waiting is done if a lock cannot instantly be acquired. - * If negative, there is no timeout and will block indefinitely when acquiring - * a lock. - * - * @return the timeout in milliseconds when writing a key OUTSIDE of a - * transaction - */ - public long getDefaultLockTimeout() { - assert(isOwningHandle()); - return getDefaultLockTimeout(nativeHandle_); - } - - /** - * If positive, specifies the wait timeout in milliseconds when writing a key - * OUTSIDE of a transaction (ie by calling {@link RocksDB#put}, - * {@link RocksDB#merge}, {@link RocksDB#delete} or {@link RocksDB#write} - * directly). - * - * If 0, no waiting is done if a lock cannot instantly be acquired. - * If negative, there is no timeout and will block indefinitely when acquiring - * a lock. - * - * Not using a timeout can lead to deadlocks. Currently, there - * is no deadlock-detection to recover from a deadlock. While DB writes - * cannot deadlock with other DB writes, they can deadlock with a transaction. - * A negative timeout should only be used if all transactions have a small - * expiration set. - * - * Default: 1000 - * - * @param defaultLockTimeout the timeout in milliseconds when writing a key - * OUTSIDE of a transaction - * @return this TransactionDBOptions instance - */ - public TransactionDBOptions setDefaultLockTimeout( - final long defaultLockTimeout) { - assert(isOwningHandle()); - setDefaultLockTimeout(nativeHandle_, defaultLockTimeout); - return this; - } - -// /** -// * If set, the {@link TransactionDB} will use this implementation of a mutex -// * and condition variable for all transaction locking instead of the default -// * mutex/condvar implementation. -// * -// * @param transactionDbMutexFactory the mutex factory for the transactions -// * -// * @return this TransactionDBOptions instance -// */ -// public TransactionDBOptions setCustomMutexFactory( -// final TransactionDBMutexFactory transactionDbMutexFactory) { -// -// } - - /** - * The policy for when to write the data into the DB. The default policy is to - * write only the committed data {@link TxnDBWritePolicy#WRITE_COMMITTED}. - * The data could be written before the commit phase. The DB then needs to - * provide the mechanisms to tell apart committed from uncommitted data. - * - * @return The write policy. - */ - public TxnDBWritePolicy getWritePolicy() { - assert(isOwningHandle()); - return TxnDBWritePolicy.getTxnDBWritePolicy(getWritePolicy(nativeHandle_)); - } - - /** - * The policy for when to write the data into the DB. The default policy is to - * write only the committed data {@link TxnDBWritePolicy#WRITE_COMMITTED}. - * The data could be written before the commit phase. The DB then needs to - * provide the mechanisms to tell apart committed from uncommitted data. - * - * @param writePolicy The write policy. - * - * @return this TransactionDBOptions instance - */ - public TransactionDBOptions setWritePolicy( - final TxnDBWritePolicy writePolicy) { - assert(isOwningHandle()); - setWritePolicy(nativeHandle_, writePolicy.getValue()); - return this; - } - - private native static long newTransactionDBOptions(); - private native long getMaxNumLocks(final long handle); - private native void setMaxNumLocks(final long handle, - final long maxNumLocks); - private native long getNumStripes(final long handle); - private native void setNumStripes(final long handle, final long numStripes); - private native long getTransactionLockTimeout(final long handle); - private native void setTransactionLockTimeout(final long handle, - final long transactionLockTimeout); - private native long getDefaultLockTimeout(final long handle); - private native void setDefaultLockTimeout(final long handle, - final long transactionLockTimeout); - private native byte getWritePolicy(final long handle); - private native void setWritePolicy(final long handle, final byte writePolicy); - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/TransactionLogIterator.java b/java/src/main/java/org/rocksdb/TransactionLogIterator.java deleted file mode 100644 index 5d9ec58d7..000000000 --- a/java/src/main/java/org/rocksdb/TransactionLogIterator.java +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -/** - *

A TransactionLogIterator is used to iterate over the transactions in a db. - * One run of the iterator is continuous, i.e. the iterator will stop at the - * beginning of any gap in sequences.

- */ -public class TransactionLogIterator extends RocksObject { - - /** - *

An iterator is either positioned at a WriteBatch - * or not valid. This method returns true if the iterator - * is valid. Can read data from a valid iterator.

- * - * @return true if iterator position is valid. - */ - public boolean isValid() { - return isValid(nativeHandle_); - } - - /** - *

Moves the iterator to the next WriteBatch. - * REQUIRES: Valid() to be true.

- */ - public void next() { - next(nativeHandle_); - } - - /** - *

Throws RocksDBException if something went wrong.

- * - * @throws org.rocksdb.RocksDBException if something went - * wrong in the underlying C++ code. - */ - public void status() throws RocksDBException { - status(nativeHandle_); - } - - /** - *

If iterator position is valid, return the current - * write_batch and the sequence number of the earliest - * transaction contained in the batch.

- * - *

ONLY use if Valid() is true and status() is OK.

- * - * @return {@link org.rocksdb.TransactionLogIterator.BatchResult} - * instance. - */ - public BatchResult getBatch() { - assert(isValid()); - return getBatch(nativeHandle_); - } - - /** - *

TransactionLogIterator constructor.

- * - * @param nativeHandle address to native address. - */ - TransactionLogIterator(final long nativeHandle) { - super(nativeHandle); - } - - /** - *

BatchResult represents a data structure returned - * by a TransactionLogIterator containing a sequence - * number and a {@link WriteBatch} instance.

- */ - public static final class BatchResult { - /** - *

Constructor of BatchResult class.

- * - * @param sequenceNumber related to this BatchResult instance. - * @param nativeHandle to {@link org.rocksdb.WriteBatch} - * native instance. - */ - public BatchResult(final long sequenceNumber, - final long nativeHandle) { - sequenceNumber_ = sequenceNumber; - writeBatch_ = new WriteBatch(nativeHandle, true); - } - - /** - *

Return sequence number related to this BatchResult.

- * - * @return Sequence number. - */ - public long sequenceNumber() { - return sequenceNumber_; - } - - /** - *

Return contained {@link org.rocksdb.WriteBatch} - * instance

- * - * @return {@link org.rocksdb.WriteBatch} instance. - */ - public WriteBatch writeBatch() { - return writeBatch_; - } - - private final long sequenceNumber_; - private final WriteBatch writeBatch_; - } - - @Override protected final native void disposeInternal(final long handle); - private native boolean isValid(long handle); - private native void next(long handle); - private native void status(long handle) - throws RocksDBException; - private native BatchResult getBatch(long handle); -} diff --git a/java/src/main/java/org/rocksdb/TransactionOptions.java b/java/src/main/java/org/rocksdb/TransactionOptions.java deleted file mode 100644 index 195fc85e4..000000000 --- a/java/src/main/java/org/rocksdb/TransactionOptions.java +++ /dev/null @@ -1,189 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public class TransactionOptions extends RocksObject - implements TransactionalOptions { - - public TransactionOptions() { - super(newTransactionOptions()); - } - - @Override - public boolean isSetSnapshot() { - assert(isOwningHandle()); - return isSetSnapshot(nativeHandle_); - } - - @Override - public TransactionOptions setSetSnapshot(final boolean setSnapshot) { - assert(isOwningHandle()); - setSetSnapshot(nativeHandle_, setSnapshot); - return this; - } - - /** - * True means that before acquiring locks, this transaction will - * check if doing so will cause a deadlock. If so, it will return with - * {@link Status.Code#Busy}. The user should retry their transaction. - * - * @return true if a deadlock is detected. - */ - public boolean isDeadlockDetect() { - assert(isOwningHandle()); - return isDeadlockDetect(nativeHandle_); - } - - /** - * Setting to true means that before acquiring locks, this transaction will - * check if doing so will cause a deadlock. If so, it will return with - * {@link Status.Code#Busy}. The user should retry their transaction. - * - * @param deadlockDetect true if we should detect deadlocks. - * - * @return this TransactionOptions instance - */ - public TransactionOptions setDeadlockDetect(final boolean deadlockDetect) { - assert(isOwningHandle()); - setDeadlockDetect(nativeHandle_, deadlockDetect); - return this; - } - - /** - * The wait timeout in milliseconds when a transaction attempts to lock a key. - * - * If 0, no waiting is done if a lock cannot instantly be acquired. - * If negative, {@link TransactionDBOptions#getTransactionLockTimeout(long)} - * will be used - * - * @return the lock timeout in milliseconds - */ - public long getLockTimeout() { - assert(isOwningHandle()); - return getLockTimeout(nativeHandle_); - } - - /** - * If positive, specifies the wait timeout in milliseconds when - * a transaction attempts to lock a key. - * - * If 0, no waiting is done if a lock cannot instantly be acquired. - * If negative, {@link TransactionDBOptions#getTransactionLockTimeout(long)} - * will be used - * - * Default: -1 - * - * @param lockTimeout the lock timeout in milliseconds - * - * @return this TransactionOptions instance - */ - public TransactionOptions setLockTimeout(final long lockTimeout) { - assert(isOwningHandle()); - setLockTimeout(nativeHandle_, lockTimeout); - return this; - } - - /** - * Expiration duration in milliseconds. - * - * If non-negative, transactions that last longer than this many milliseconds - * will fail to commit. If not set, a forgotten transaction that is never - * committed, rolled back, or deleted will never relinquish any locks it - * holds. This could prevent keys from being written by other writers. - * - * @return expiration the expiration duration in milliseconds - */ - public long getExpiration() { - assert(isOwningHandle()); - return getExpiration(nativeHandle_); - } - - /** - * Expiration duration in milliseconds. - * - * If non-negative, transactions that last longer than this many milliseconds - * will fail to commit. If not set, a forgotten transaction that is never - * committed, rolled back, or deleted will never relinquish any locks it - * holds. This could prevent keys from being written by other writers. - * - * Default: -1 - * - * @param expiration the expiration duration in milliseconds - * - * @return this TransactionOptions instance - */ - public TransactionOptions setExpiration(final long expiration) { - assert(isOwningHandle()); - setExpiration(nativeHandle_, expiration); - return this; - } - - /** - * Gets the number of traversals to make during deadlock detection. - * - * @return the number of traversals to make during - * deadlock detection - */ - public long getDeadlockDetectDepth() { - return getDeadlockDetectDepth(nativeHandle_); - } - - /** - * Sets the number of traversals to make during deadlock detection. - * - * Default: 50 - * - * @param deadlockDetectDepth the number of traversals to make during - * deadlock detection - * - * @return this TransactionOptions instance - */ - public TransactionOptions setDeadlockDetectDepth( - final long deadlockDetectDepth) { - setDeadlockDetectDepth(nativeHandle_, deadlockDetectDepth); - return this; - } - - /** - * Get the maximum number of bytes that may be used for the write batch. - * - * @return the maximum number of bytes, 0 means no limit. - */ - public long getMaxWriteBatchSize() { - return getMaxWriteBatchSize(nativeHandle_); - } - - /** - * Set the maximum number of bytes that may be used for the write batch. - * - * @param maxWriteBatchSize the maximum number of bytes, 0 means no limit. - * - * @return this TransactionOptions instance - */ - public TransactionOptions setMaxWriteBatchSize(final long maxWriteBatchSize) { - setMaxWriteBatchSize(nativeHandle_, maxWriteBatchSize); - return this; - } - - private native static long newTransactionOptions(); - private native boolean isSetSnapshot(final long handle); - private native void setSetSnapshot(final long handle, - final boolean setSnapshot); - private native boolean isDeadlockDetect(final long handle); - private native void setDeadlockDetect(final long handle, - final boolean deadlockDetect); - private native long getLockTimeout(final long handle); - private native void setLockTimeout(final long handle, final long lockTimeout); - private native long getExpiration(final long handle); - private native void setExpiration(final long handle, final long expiration); - private native long getDeadlockDetectDepth(final long handle); - private native void setDeadlockDetectDepth(final long handle, - final long deadlockDetectDepth); - private native long getMaxWriteBatchSize(final long handle); - private native void setMaxWriteBatchSize(final long handle, - final long maxWriteBatchSize); - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/TransactionalDB.java b/java/src/main/java/org/rocksdb/TransactionalDB.java deleted file mode 100644 index 740181989..000000000 --- a/java/src/main/java/org/rocksdb/TransactionalDB.java +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -interface TransactionalDB> extends AutoCloseable { - /** - * Starts a new Transaction. - * - * Caller is responsible for calling {@link #close()} on the returned - * transaction when it is no longer needed. - * - * @param writeOptions Any write options for the transaction - * @return a new transaction - */ - Transaction beginTransaction(final WriteOptions writeOptions); - - /** - * Starts a new Transaction. - * - * Caller is responsible for calling {@link #close()} on the returned - * transaction when it is no longer needed. - * - * @param writeOptions Any write options for the transaction - * @param transactionOptions Any options for the transaction - * @return a new transaction - */ - Transaction beginTransaction(final WriteOptions writeOptions, - final T transactionOptions); - - /** - * Starts a new Transaction. - * - * Caller is responsible for calling {@link #close()} on the returned - * transaction when it is no longer needed. - * - * @param writeOptions Any write options for the transaction - * @param oldTransaction this Transaction will be reused instead of allocating - * a new one. This is an optimization to avoid extra allocations - * when repeatedly creating transactions. - * @return The oldTransaction which has been reinitialized as a new - * transaction - */ - Transaction beginTransaction(final WriteOptions writeOptions, - final Transaction oldTransaction); - - /** - * Starts a new Transaction. - * - * Caller is responsible for calling {@link #close()} on the returned - * transaction when it is no longer needed. - * - * @param writeOptions Any write options for the transaction - * @param transactionOptions Any options for the transaction - * @param oldTransaction this Transaction will be reused instead of allocating - * a new one. This is an optimization to avoid extra allocations - * when repeatedly creating transactions. - * @return The oldTransaction which has been reinitialized as a new - * transaction - */ - Transaction beginTransaction(final WriteOptions writeOptions, - final T transactionOptions, final Transaction oldTransaction); -} diff --git a/java/src/main/java/org/rocksdb/TransactionalOptions.java b/java/src/main/java/org/rocksdb/TransactionalOptions.java deleted file mode 100644 index d55ee900c..000000000 --- a/java/src/main/java/org/rocksdb/TransactionalOptions.java +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - - -interface TransactionalOptions> - extends AutoCloseable { - - /** - * True indicates snapshots will be set, just like if - * {@link Transaction#setSnapshot()} had been called - * - * @return whether a snapshot will be set - */ - boolean isSetSnapshot(); - - /** - * Setting the setSnapshot to true is the same as calling - * {@link Transaction#setSnapshot()}. - * - * Default: false - * - * @param setSnapshot Whether to set a snapshot - * - * @return this TransactionalOptions instance - */ - T setSetSnapshot(final boolean setSnapshot); -} diff --git a/java/src/main/java/org/rocksdb/TtlDB.java b/java/src/main/java/org/rocksdb/TtlDB.java deleted file mode 100644 index a7adaf4b2..000000000 --- a/java/src/main/java/org/rocksdb/TtlDB.java +++ /dev/null @@ -1,245 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.List; - -/** - * Database with TTL support. - * - *

Use case

- *

This API should be used to open the db when key-values inserted are - * meant to be removed from the db in a non-strict 'ttl' amount of time - * Therefore, this guarantees that key-values inserted will remain in the - * db for >= ttl amount of time and the db will make efforts to remove the - * key-values as soon as possible after ttl seconds of their insertion. - *

- * - *

Behaviour

- *

TTL is accepted in seconds - * (int32_t)Timestamp(creation) is suffixed to values in Put internally - * Expired TTL values deleted in compaction only:(Timestamp+ttl<time_now) - * Get/Iterator may return expired entries(compaction not run on them yet) - * Different TTL may be used during different Opens - *

- * - *

Example

- *
    - *
  • Open1 at t=0 with ttl=4 and insert k1,k2, close at t=2
  • - *
  • Open2 at t=3 with ttl=5. Now k1,k2 should be deleted at t>=5
  • - *
- * - *

- * read_only=true opens in the usual read-only mode. Compactions will not be - * triggered(neither manual nor automatic), so no expired entries removed - *

- * - *

Constraints

- *

Not specifying/passing or non-positive TTL behaves - * like TTL = infinity

- * - *

!!!WARNING!!!

- *

Calling DB::Open directly to re-open a db created by this API will get - * corrupt values(timestamp suffixed) and no ttl effect will be there - * during the second Open, so use this API consistently to open the db - * Be careful when passing ttl with a small positive value because the - * whole database may be deleted in a small amount of time.

- */ -public class TtlDB extends RocksDB { - - /** - *

Opens a TtlDB.

- * - *

Database is opened in read-write mode without default TTL.

- * - * @param options {@link org.rocksdb.Options} instance. - * @param db_path path to database. - * - * @return TtlDB instance. - * - * @throws RocksDBException thrown if an error occurs within the native - * part of the library. - */ - public static TtlDB open(final Options options, final String db_path) - throws RocksDBException { - return open(options, db_path, 0, false); - } - - /** - *

Opens a TtlDB.

- * - * @param options {@link org.rocksdb.Options} instance. - * @param db_path path to database. - * @param ttl time to live for new entries. - * @param readOnly boolean value indicating if database if db is - * opened read-only. - * - * @return TtlDB instance. - * - * @throws RocksDBException thrown if an error occurs within the native - * part of the library. - */ - public static TtlDB open(final Options options, final String db_path, - final int ttl, final boolean readOnly) throws RocksDBException { - return new TtlDB(open(options.nativeHandle_, db_path, ttl, readOnly)); - } - - /** - *

Opens a TtlDB.

- * - * @param options {@link org.rocksdb.Options} instance. - * @param db_path path to database. - * @param columnFamilyDescriptors list of column family descriptors - * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances - * on open. - * @param ttlValues time to live values per column family handle - * @param readOnly boolean value indicating if database if db is - * opened read-only. - * - * @return TtlDB instance. - * - * @throws RocksDBException thrown if an error occurs within the native - * part of the library. - * @throws java.lang.IllegalArgumentException when there is not a ttl value - * per given column family handle. - */ - public static TtlDB open(final DBOptions options, final String db_path, - final List columnFamilyDescriptors, - final List columnFamilyHandles, - final List ttlValues, final boolean readOnly) - throws RocksDBException { - if (columnFamilyDescriptors.size() != ttlValues.size()) { - throw new IllegalArgumentException("There must be a ttl value per column" - + " family handle."); - } - - final byte[][] cfNames = new byte[columnFamilyDescriptors.size()][]; - final long[] cfOptionHandles = new long[columnFamilyDescriptors.size()]; - for (int i = 0; i < columnFamilyDescriptors.size(); i++) { - final ColumnFamilyDescriptor cfDescriptor = - columnFamilyDescriptors.get(i); - cfNames[i] = cfDescriptor.getName(); - cfOptionHandles[i] = cfDescriptor.getOptions().nativeHandle_; - } - - final int ttlVals[] = new int[ttlValues.size()]; - for(int i = 0; i < ttlValues.size(); i++) { - ttlVals[i] = ttlValues.get(i); - } - final long[] handles = openCF(options.nativeHandle_, db_path, - cfNames, cfOptionHandles, ttlVals, readOnly); - - final TtlDB ttlDB = new TtlDB(handles[0]); - for (int i = 1; i < handles.length; i++) { - columnFamilyHandles.add(new ColumnFamilyHandle(ttlDB, handles[i])); - } - return ttlDB; - } - - /** - *

Close the TtlDB instance and release resource.

- * - * This is similar to {@link #close()} except that it - * throws an exception if any error occurs. - * - * This will not fsync the WAL files. - * If syncing is required, the caller must first call {@link #syncWal()} - * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch - * with {@link WriteOptions#setSync(boolean)} set to true. - * - * See also {@link #close()}. - * - * @throws RocksDBException if an error occurs whilst closing. - */ - public void closeE() throws RocksDBException { - if (owningHandle_.compareAndSet(true, false)) { - try { - closeDatabase(nativeHandle_); - } finally { - disposeInternal(); - } - } - } - - /** - *

Close the TtlDB instance and release resource.

- * - * - * This will not fsync the WAL files. - * If syncing is required, the caller must first call {@link #syncWal()} - * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch - * with {@link WriteOptions#setSync(boolean)} set to true. - * - * See also {@link #close()}. - */ - @Override - public void close() { - if (owningHandle_.compareAndSet(true, false)) { - try { - closeDatabase(nativeHandle_); - } catch (final RocksDBException e) { - // silently ignore the error report - } finally { - disposeInternal(); - } - } - } - - /** - *

Creates a new ttl based column family with a name defined - * in given ColumnFamilyDescriptor and allocates a - * ColumnFamilyHandle within an internal structure.

- * - *

The ColumnFamilyHandle is automatically disposed with DB - * disposal.

- * - * @param columnFamilyDescriptor column family to be created. - * @param ttl TTL to set for this column family. - * - * @return {@link org.rocksdb.ColumnFamilyHandle} instance. - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - public ColumnFamilyHandle createColumnFamilyWithTtl( - final ColumnFamilyDescriptor columnFamilyDescriptor, - final int ttl) throws RocksDBException { - return new ColumnFamilyHandle(this, - createColumnFamilyWithTtl(nativeHandle_, - columnFamilyDescriptor.getName(), - columnFamilyDescriptor.getOptions().nativeHandle_, ttl)); - } - - /** - *

A protected constructor that will be used in the static - * factory method - * {@link #open(Options, String, int, boolean)} - * and - * {@link #open(DBOptions, String, java.util.List, java.util.List, - * java.util.List, boolean)}. - *

- * - * @param nativeHandle The native handle of the C++ TtlDB object - */ - protected TtlDB(final long nativeHandle) { - super(nativeHandle); - } - - @Override protected native void disposeInternal(final long handle); - - private native static long open(final long optionsHandle, - final String db_path, final int ttl, final boolean readOnly) - throws RocksDBException; - private native static long[] openCF(final long optionsHandle, - final String db_path, final byte[][] columnFamilyNames, - final long[] columnFamilyOptions, final int[] ttlValues, - final boolean readOnly) throws RocksDBException; - private native long createColumnFamilyWithTtl(final long handle, - final byte[] columnFamilyName, final long columnFamilyOptions, int ttl) - throws RocksDBException; - private native static void closeDatabase(final long handle) - throws RocksDBException; -} diff --git a/java/src/main/java/org/rocksdb/TxnDBWritePolicy.java b/java/src/main/java/org/rocksdb/TxnDBWritePolicy.java deleted file mode 100644 index 837ce6157..000000000 --- a/java/src/main/java/org/rocksdb/TxnDBWritePolicy.java +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -/** - * The transaction db write policy. - */ -public enum TxnDBWritePolicy { - /** - * Write only the committed data. - */ - WRITE_COMMITTED((byte)0x00), - - /** - * Write data after the prepare phase of 2pc. - */ - WRITE_PREPARED((byte)0x1), - - /** - * Write data before the prepare phase of 2pc. - */ - WRITE_UNPREPARED((byte)0x2); - - private byte value; - - TxnDBWritePolicy(final byte value) { - this.value = value; - } - - /** - *

Returns the byte value of the enumerations value.

- * - * @return byte representation - */ - public byte getValue() { - return value; - } - - /** - *

Get the TxnDBWritePolicy enumeration value by - * passing the byte identifier to this method.

- * - * @param byteIdentifier of TxnDBWritePolicy. - * - * @return TxnDBWritePolicy instance. - * - * @throws IllegalArgumentException If TxnDBWritePolicy cannot be found for - * the provided byteIdentifier - */ - public static TxnDBWritePolicy getTxnDBWritePolicy(final byte byteIdentifier) { - for (final TxnDBWritePolicy txnDBWritePolicy : TxnDBWritePolicy.values()) { - if (txnDBWritePolicy.getValue() == byteIdentifier) { - return txnDBWritePolicy; - } - } - - throw new IllegalArgumentException( - "Illegal value provided for TxnDBWritePolicy."); - } -} diff --git a/java/src/main/java/org/rocksdb/UInt64AddOperator.java b/java/src/main/java/org/rocksdb/UInt64AddOperator.java deleted file mode 100644 index cce9b298d..000000000 --- a/java/src/main/java/org/rocksdb/UInt64AddOperator.java +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Uint64AddOperator is a merge operator that accumlates a long - * integer value. - */ -public class UInt64AddOperator extends MergeOperator { - public UInt64AddOperator() { - super(newSharedUInt64AddOperator()); - } - - private native static long newSharedUInt64AddOperator(); - @Override protected final native void disposeInternal(final long handle); -} diff --git a/java/src/main/java/org/rocksdb/VectorMemTableConfig.java b/java/src/main/java/org/rocksdb/VectorMemTableConfig.java deleted file mode 100644 index fb1e7a948..000000000 --- a/java/src/main/java/org/rocksdb/VectorMemTableConfig.java +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -/** - * The config for vector memtable representation. - */ -public class VectorMemTableConfig extends MemTableConfig { - public static final int DEFAULT_RESERVED_SIZE = 0; - - /** - * VectorMemTableConfig constructor - */ - public VectorMemTableConfig() { - reservedSize_ = DEFAULT_RESERVED_SIZE; - } - - /** - * Set the initial size of the vector that will be used - * by the memtable created based on this config. - * - * @param size the initial size of the vector. - * @return the reference to the current config. - */ - public VectorMemTableConfig setReservedSize(final int size) { - reservedSize_ = size; - return this; - } - - /** - * Returns the initial size of the vector used by the memtable - * created based on this config. - * - * @return the initial size of the vector. - */ - public int reservedSize() { - return reservedSize_; - } - - @Override protected long newMemTableFactoryHandle() { - return newMemTableFactoryHandle(reservedSize_); - } - - private native long newMemTableFactoryHandle(long reservedSize) - throws IllegalArgumentException; - private int reservedSize_; -} diff --git a/java/src/main/java/org/rocksdb/WALRecoveryMode.java b/java/src/main/java/org/rocksdb/WALRecoveryMode.java deleted file mode 100644 index d8b9eeced..000000000 --- a/java/src/main/java/org/rocksdb/WALRecoveryMode.java +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * The WAL Recover Mode - */ -public enum WALRecoveryMode { - - /** - * Original levelDB recovery - * - * We tolerate incomplete record in trailing data on all logs - * Use case : This is legacy behavior (default) - */ - TolerateCorruptedTailRecords((byte)0x00), - - /** - * Recover from clean shutdown - * - * We don't expect to find any corruption in the WAL - * Use case : This is ideal for unit tests and rare applications that - * can require high consistency guarantee - */ - AbsoluteConsistency((byte)0x01), - - /** - * Recover to point-in-time consistency - * We stop the WAL playback on discovering WAL inconsistency - * Use case : Ideal for systems that have disk controller cache like - * hard disk, SSD without super capacitor that store related data - */ - PointInTimeRecovery((byte)0x02), - - /** - * Recovery after a disaster - * We ignore any corruption in the WAL and try to salvage as much data as - * possible - * Use case : Ideal for last ditch effort to recover data or systems that - * operate with low grade unrelated data - */ - SkipAnyCorruptedRecords((byte)0x03); - - private byte value; - - WALRecoveryMode(final byte value) { - this.value = value; - } - - /** - *

Returns the byte value of the enumerations value.

- * - * @return byte representation - */ - public byte getValue() { - return value; - } - - /** - *

Get the WALRecoveryMode enumeration value by - * passing the byte identifier to this method.

- * - * @param byteIdentifier of WALRecoveryMode. - * - * @return WALRecoveryMode instance. - * - * @throws IllegalArgumentException If WALRecoveryMode cannot be found for the - * provided byteIdentifier - */ - public static WALRecoveryMode getWALRecoveryMode(final byte byteIdentifier) { - for (final WALRecoveryMode walRecoveryMode : WALRecoveryMode.values()) { - if (walRecoveryMode.getValue() == byteIdentifier) { - return walRecoveryMode; - } - } - - throw new IllegalArgumentException( - "Illegal value provided for WALRecoveryMode."); - } -} diff --git a/java/src/main/java/org/rocksdb/WBWIRocksIterator.java b/java/src/main/java/org/rocksdb/WBWIRocksIterator.java deleted file mode 100644 index ce146eb3f..000000000 --- a/java/src/main/java/org/rocksdb/WBWIRocksIterator.java +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.ByteBuffer; - -public class WBWIRocksIterator - extends AbstractRocksIterator { - private final WriteEntry entry = new WriteEntry(); - - protected WBWIRocksIterator(final WriteBatchWithIndex wbwi, - final long nativeHandle) { - super(wbwi, nativeHandle); - } - - /** - * Get the current entry - * - * The WriteEntry is only valid - * until the iterator is repositioned. - * If you want to keep the WriteEntry across iterator - * movements, you must make a copy of its data! - * - * Note - This method is not thread-safe with respect to the WriteEntry - * as it performs a non-atomic update across the fields of the WriteEntry - * - * @return The WriteEntry of the current entry - */ - public WriteEntry entry() { - assert(isOwningHandle()); - final long[] ptrs = entry1(nativeHandle_); - - entry.type = WriteType.fromId((byte)ptrs[0]); - entry.key.resetNativeHandle(ptrs[1], ptrs[1] != 0); - entry.value.resetNativeHandle(ptrs[2], ptrs[2] != 0); - - return entry; - } - - @Override protected final native void disposeInternal(final long handle); - @Override final native boolean isValid0(long handle); - @Override final native void seekToFirst0(long handle); - @Override final native void seekToLast0(long handle); - @Override final native void next0(long handle); - @Override final native void prev0(long handle); - @Override final native void refresh0(final long handle) throws RocksDBException; - @Override final native void seek0(long handle, byte[] target, int targetLen); - @Override final native void seekForPrev0(long handle, byte[] target, int targetLen); - @Override final native void status0(long handle) throws RocksDBException; - @Override - final native void seekDirect0( - final long handle, final ByteBuffer target, final int targetOffset, final int targetLen); - @Override - final native void seekForPrevDirect0( - final long handle, final ByteBuffer target, final int targetOffset, final int targetLen); - @Override - final native void seekByteArray0( - final long handle, final byte[] target, final int targetOffset, final int targetLen); - @Override - final native void seekForPrevByteArray0( - final long handle, final byte[] target, final int targetOffset, final int targetLen); - - private native long[] entry1(final long handle); - - /** - * Enumeration of the Write operation - * that created the record in the Write Batch - */ - public enum WriteType { - PUT((byte)0x0), - MERGE((byte)0x1), - DELETE((byte)0x2), - SINGLE_DELETE((byte)0x3), - DELETE_RANGE((byte)0x4), - LOG((byte)0x5), - XID((byte)0x6); - - final byte id; - WriteType(final byte id) { - this.id = id; - } - - public static WriteType fromId(final byte id) { - for(final WriteType wt : WriteType.values()) { - if(id == wt.id) { - return wt; - } - } - throw new IllegalArgumentException("No WriteType with id=" + id); - } - } - - @Override - public void close() { - entry.close(); - super.close(); - } - - /** - * Represents an entry returned by - * {@link org.rocksdb.WBWIRocksIterator#entry()} - * - * It is worth noting that a WriteEntry with - * the type {@link org.rocksdb.WBWIRocksIterator.WriteType#DELETE} - * or {@link org.rocksdb.WBWIRocksIterator.WriteType#LOG} - * will not have a value. - */ - public static class WriteEntry implements AutoCloseable { - WriteType type = null; - final DirectSlice key; - final DirectSlice value; - - /** - * Intentionally private as this - * should only be instantiated in - * this manner by the outer WBWIRocksIterator - * class; The class members are then modified - * by calling {@link org.rocksdb.WBWIRocksIterator#entry()} - */ - private WriteEntry() { - key = new DirectSlice(); - value = new DirectSlice(); - } - - public WriteEntry(final WriteType type, final DirectSlice key, - final DirectSlice value) { - this.type = type; - this.key = key; - this.value = value; - } - - /** - * Returns the type of the Write Entry - * - * @return the WriteType of the WriteEntry - */ - public WriteType getType() { - return type; - } - - /** - * Returns the key of the Write Entry - * - * @return The slice containing the key - * of the WriteEntry - */ - public DirectSlice getKey() { - return key; - } - - /** - * Returns the value of the Write Entry - * - * @return The slice containing the value of - * the WriteEntry or null if the WriteEntry has - * no value - */ - public DirectSlice getValue() { - if(!value.isOwningHandle()) { - return null; //TODO(AR) migrate to JDK8 java.util.Optional#empty() - } else { - return value; - } - } - - /** - * Generates a hash code for the Write Entry. NOTE: The hash code is based - * on the string representation of the key, so it may not work correctly - * with exotic custom comparators. - * - * @return The hash code for the Write Entry - */ - @Override - public int hashCode() { - return (key == null) ? 0 : key.hashCode(); - } - - @Override - public boolean equals(final Object other) { - if(other == null) { - return false; - } else if (this == other) { - return true; - } else if(other instanceof WriteEntry) { - final WriteEntry otherWriteEntry = (WriteEntry)other; - return type.equals(otherWriteEntry.type) - && key.equals(otherWriteEntry.key) - && value.equals(otherWriteEntry.value); - } else { - return false; - } - } - - @Override - public void close() { - value.close(); - key.close(); - } - } -} diff --git a/java/src/main/java/org/rocksdb/WalFileType.java b/java/src/main/java/org/rocksdb/WalFileType.java deleted file mode 100644 index fed27ed11..000000000 --- a/java/src/main/java/org/rocksdb/WalFileType.java +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public enum WalFileType { - /** - * Indicates that WAL file is in archive directory. WAL files are moved from - * the main db directory to archive directory once they are not live and stay - * there until cleaned up. Files are cleaned depending on archive size - * (Options::WAL_size_limit_MB) and time since last cleaning - * (Options::WAL_ttl_seconds). - */ - kArchivedLogFile((byte)0x0), - - /** - * Indicates that WAL file is live and resides in the main db directory - */ - kAliveLogFile((byte)0x1); - - private final byte value; - - WalFileType(final byte value) { - this.value = value; - } - - /** - * Get the internal representation value. - * - * @return the internal representation value - */ - byte getValue() { - return value; - } - - /** - * Get the WalFileType from the internal representation value. - * - * @return the wal file type. - * - * @throws IllegalArgumentException if the value is unknown. - */ - static WalFileType fromValue(final byte value) { - for (final WalFileType walFileType : WalFileType.values()) { - if(walFileType.value == value) { - return walFileType; - } - } - - throw new IllegalArgumentException( - "Illegal value provided for WalFileType: " + value); - } -} diff --git a/java/src/main/java/org/rocksdb/WalFilter.java b/java/src/main/java/org/rocksdb/WalFilter.java deleted file mode 100644 index 37e36213a..000000000 --- a/java/src/main/java/org/rocksdb/WalFilter.java +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Map; - -/** - * WALFilter allows an application to inspect write-ahead-log (WAL) - * records or modify their processing on recovery. - */ -public interface WalFilter { - - /** - * Provide ColumnFamily->LogNumber map to filter - * so that filter can determine whether a log number applies to a given - * column family (i.e. that log hasn't been flushed to SST already for the - * column family). - * - * We also pass in name>id map as only name is known during - * recovery (as handles are opened post-recovery). - * while write batch callbacks happen in terms of column family id. - * - * @param cfLognumber column_family_id to lognumber map - * @param cfNameId column_family_name to column_family_id map - */ - void columnFamilyLogNumberMap(final Map cfLognumber, - final Map cfNameId); - - /** - * LogRecord is invoked for each log record encountered for all the logs - * during replay on logs on recovery. This method can be used to: - * * inspect the record (using the batch parameter) - * * ignoring current record - * (by returning WalProcessingOption::kIgnoreCurrentRecord) - * * reporting corrupted record - * (by returning WalProcessingOption::kCorruptedRecord) - * * stop log replay - * (by returning kStop replay) - please note that this implies - * discarding the logs from current record onwards. - * - * @param logNumber log number of the current log. - * Filter might use this to determine if the log - * record is applicable to a certain column family. - * @param logFileName log file name - only for informational purposes - * @param batch batch encountered in the log during recovery - * @param newBatch new batch to populate if filter wants to change - * the batch (for example to filter some records out, or alter some - * records). Please note that the new batch MUST NOT contain - * more records than original, else recovery would be failed. - * - * @return Processing option for the current record. - */ - LogRecordFoundResult logRecordFound(final long logNumber, - final String logFileName, final WriteBatch batch, - final WriteBatch newBatch); - - class LogRecordFoundResult { - public static LogRecordFoundResult CONTINUE_UNCHANGED = - new LogRecordFoundResult(WalProcessingOption.CONTINUE_PROCESSING, false); - - final WalProcessingOption walProcessingOption; - final boolean batchChanged; - - /** - * @param walProcessingOption the processing option - * @param batchChanged Whether batch was changed by the filter. - * It must be set to true if newBatch was populated, - * else newBatch has no effect. - */ - public LogRecordFoundResult(final WalProcessingOption walProcessingOption, - final boolean batchChanged) { - this.walProcessingOption = walProcessingOption; - this.batchChanged = batchChanged; - } - } - - /** - * Returns a name that identifies this WAL filter. - * The name will be printed to LOG file on start up for diagnosis. - * - * @return the name - */ - String name(); -} diff --git a/java/src/main/java/org/rocksdb/WalProcessingOption.java b/java/src/main/java/org/rocksdb/WalProcessingOption.java deleted file mode 100644 index 889602edc..000000000 --- a/java/src/main/java/org/rocksdb/WalProcessingOption.java +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public enum WalProcessingOption { - /** - * Continue processing as usual. - */ - CONTINUE_PROCESSING((byte)0x0), - - /** - * Ignore the current record but continue processing of log(s). - */ - IGNORE_CURRENT_RECORD((byte)0x1), - - /** - * Stop replay of logs and discard logs. - * Logs won't be replayed on subsequent recovery. - */ - STOP_REPLAY((byte)0x2), - - /** - * Corrupted record detected by filter. - */ - CORRUPTED_RECORD((byte)0x3); - - private final byte value; - - WalProcessingOption(final byte value) { - this.value = value; - } - - /** - * Get the internal representation. - * - * @return the internal representation. - */ - byte getValue() { - return value; - } - - public static WalProcessingOption fromValue(final byte value) { - for (final WalProcessingOption walProcessingOption : WalProcessingOption.values()) { - if (walProcessingOption.value == value) { - return walProcessingOption; - } - } - throw new IllegalArgumentException( - "Illegal value provided for WalProcessingOption: " + value); - } -} diff --git a/java/src/main/java/org/rocksdb/WriteBatch.java b/java/src/main/java/org/rocksdb/WriteBatch.java deleted file mode 100644 index 9b46108d0..000000000 --- a/java/src/main/java/org/rocksdb/WriteBatch.java +++ /dev/null @@ -1,396 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.ByteBuffer; - -/** - * WriteBatch holds a collection of updates to apply atomically to a DB. - * - * The updates are applied in the order in which they are added - * to the WriteBatch. For example, the value of "key" will be "v3" - * after the following batch is written: - * - * batch.put("key", "v1"); - * batch.remove("key"); - * batch.put("key", "v2"); - * batch.put("key", "v3"); - * - * Multiple threads can invoke const methods on a WriteBatch without - * external synchronization, but if any of the threads may call a - * non-const method, all threads accessing the same WriteBatch must use - * external synchronization. - */ -public class WriteBatch extends AbstractWriteBatch { - /** - * Constructs a WriteBatch instance. - */ - public WriteBatch() { - this(0); - } - - /** - * Constructs a WriteBatch instance with a given size. - * - * @param reserved_bytes reserved size for WriteBatch - */ - public WriteBatch(final int reserved_bytes) { - super(newWriteBatch(reserved_bytes)); - } - - /** - * Constructs a WriteBatch instance from a serialized representation - * as returned by {@link #data()}. - * - * @param serialized the serialized representation. - */ - public WriteBatch(final byte[] serialized) { - super(newWriteBatch(serialized, serialized.length)); - } - - /** - * Support for iterating over the contents of a batch. - * - * @param handler A handler that is called back for each - * update present in the batch - * - * @throws RocksDBException If we cannot iterate over the batch - */ - public void iterate(final Handler handler) throws RocksDBException { - iterate(nativeHandle_, handler.nativeHandle_); - } - - /** - * Retrieve the serialized version of this batch. - * - * @return the serialized representation of this write batch. - * - * @throws RocksDBException if an error occurs whilst retrieving - * the serialized batch data. - */ - public byte[] data() throws RocksDBException { - return data(nativeHandle_); - } - - /** - * Retrieve data size of the batch. - * - * @return the serialized data size of the batch. - */ - public long getDataSize() { - return getDataSize(nativeHandle_); - } - - /** - * Returns true if Put will be called during Iterate. - * - * @return true if Put will be called during Iterate. - */ - public boolean hasPut() { - return hasPut(nativeHandle_); - } - - /** - * Returns true if Delete will be called during Iterate. - * - * @return true if Delete will be called during Iterate. - */ - public boolean hasDelete() { - return hasDelete(nativeHandle_); - } - - /** - * Returns true if SingleDelete will be called during Iterate. - * - * @return true if SingleDelete will be called during Iterate. - */ - public boolean hasSingleDelete() { - return hasSingleDelete(nativeHandle_); - } - - /** - * Returns true if DeleteRange will be called during Iterate. - * - * @return true if DeleteRange will be called during Iterate. - */ - public boolean hasDeleteRange() { - return hasDeleteRange(nativeHandle_); - } - - /** - * Returns true if Merge will be called during Iterate. - * - * @return true if Merge will be called during Iterate. - */ - public boolean hasMerge() { - return hasMerge(nativeHandle_); - } - - /** - * Returns true if MarkBeginPrepare will be called during Iterate. - * - * @return true if MarkBeginPrepare will be called during Iterate. - */ - public boolean hasBeginPrepare() { - return hasBeginPrepare(nativeHandle_); - } - - /** - * Returns true if MarkEndPrepare will be called during Iterate. - * - * @return true if MarkEndPrepare will be called during Iterate. - */ - public boolean hasEndPrepare() { - return hasEndPrepare(nativeHandle_); - } - - /** - * Returns true if MarkCommit will be called during Iterate. - * - * @return true if MarkCommit will be called during Iterate. - */ - public boolean hasCommit() { - return hasCommit(nativeHandle_); - } - - /** - * Returns true if MarkRollback will be called during Iterate. - * - * @return true if MarkRollback will be called during Iterate. - */ - public boolean hasRollback() { - return hasRollback(nativeHandle_); - } - - @Override - public WriteBatch getWriteBatch() { - return this; - } - - /** - * Marks this point in the WriteBatch as the last record to - * be inserted into the WAL, provided the WAL is enabled. - */ - public void markWalTerminationPoint() { - markWalTerminationPoint(nativeHandle_); - } - - /** - * Gets the WAL termination point. - * - * See {@link #markWalTerminationPoint()} - * - * @return the WAL termination point - */ - public SavePoint getWalTerminationPoint() { - return getWalTerminationPoint(nativeHandle_); - } - - @Override - WriteBatch getWriteBatch(final long handle) { - return this; - } - - /** - *

Private WriteBatch constructor which is used to construct - * WriteBatch instances from C++ side. As the reference to this - * object is also managed from C++ side the handle will be disowned.

- * - * @param nativeHandle address of native instance. - */ - WriteBatch(final long nativeHandle) { - this(nativeHandle, false); - } - - /** - *

Private WriteBatch constructor which is used to construct - * WriteBatch instances.

- * - * @param nativeHandle address of native instance. - * @param owningNativeHandle whether to own this reference from the C++ side or not - */ - WriteBatch(final long nativeHandle, final boolean owningNativeHandle) { - super(nativeHandle); - if(!owningNativeHandle) - disOwnNativeHandle(); - } - - @Override protected final native void disposeInternal(final long handle); - @Override final native int count0(final long handle); - @Override final native void put(final long handle, final byte[] key, - final int keyLen, final byte[] value, final int valueLen); - @Override final native void put(final long handle, final byte[] key, - final int keyLen, final byte[] value, final int valueLen, - final long cfHandle); - @Override - final native void putDirect(final long handle, final ByteBuffer key, final int keyOffset, - final int keyLength, final ByteBuffer value, final int valueOffset, final int valueLength, - final long cfHandle); - @Override final native void merge(final long handle, final byte[] key, - final int keyLen, final byte[] value, final int valueLen); - @Override final native void merge(final long handle, final byte[] key, - final int keyLen, final byte[] value, final int valueLen, - final long cfHandle); - @Override final native void delete(final long handle, final byte[] key, - final int keyLen) throws RocksDBException; - @Override final native void delete(final long handle, final byte[] key, - final int keyLen, final long cfHandle) throws RocksDBException; - @Override final native void singleDelete(final long handle, final byte[] key, - final int keyLen) throws RocksDBException; - @Override final native void singleDelete(final long handle, final byte[] key, - final int keyLen, final long cfHandle) throws RocksDBException; - @Override - final native void deleteDirect(final long handle, final ByteBuffer key, final int keyOffset, - final int keyLength, final long cfHandle) throws RocksDBException; - @Override - final native void deleteRange(final long handle, final byte[] beginKey, final int beginKeyLen, - final byte[] endKey, final int endKeyLen); - @Override - final native void deleteRange(final long handle, final byte[] beginKey, final int beginKeyLen, - final byte[] endKey, final int endKeyLen, final long cfHandle); - @Override final native void putLogData(final long handle, - final byte[] blob, final int blobLen) throws RocksDBException; - @Override final native void clear0(final long handle); - @Override final native void setSavePoint0(final long handle); - @Override final native void rollbackToSavePoint0(final long handle); - @Override final native void popSavePoint(final long handle) throws RocksDBException; - @Override final native void setMaxBytes(final long nativeHandle, - final long maxBytes); - - private native static long newWriteBatch(final int reserved_bytes); - private native static long newWriteBatch(final byte[] serialized, - final int serializedLength); - private native void iterate(final long handle, final long handlerHandle) - throws RocksDBException; - private native byte[] data(final long nativeHandle) throws RocksDBException; - private native long getDataSize(final long nativeHandle); - private native boolean hasPut(final long nativeHandle); - private native boolean hasDelete(final long nativeHandle); - private native boolean hasSingleDelete(final long nativeHandle); - private native boolean hasDeleteRange(final long nativeHandle); - private native boolean hasMerge(final long nativeHandle); - private native boolean hasBeginPrepare(final long nativeHandle); - private native boolean hasEndPrepare(final long nativeHandle); - private native boolean hasCommit(final long nativeHandle); - private native boolean hasRollback(final long nativeHandle); - private native void markWalTerminationPoint(final long nativeHandle); - private native SavePoint getWalTerminationPoint(final long nativeHandle); - - /** - * Handler callback for iterating over the contents of a batch. - */ - public static abstract class Handler - extends RocksCallbackObject { - public Handler() { - super(null); - } - - @Override - protected long initializeNative(final long... nativeParameterHandles) { - return createNewHandler0(); - } - - public abstract void put(final int columnFamilyId, final byte[] key, - final byte[] value) throws RocksDBException; - public abstract void put(final byte[] key, final byte[] value); - public abstract void merge(final int columnFamilyId, final byte[] key, - final byte[] value) throws RocksDBException; - public abstract void merge(final byte[] key, final byte[] value); - public abstract void delete(final int columnFamilyId, final byte[] key) - throws RocksDBException; - public abstract void delete(final byte[] key); - public abstract void singleDelete(final int columnFamilyId, - final byte[] key) throws RocksDBException; - public abstract void singleDelete(final byte[] key); - public abstract void deleteRange(final int columnFamilyId, - final byte[] beginKey, final byte[] endKey) throws RocksDBException; - public abstract void deleteRange(final byte[] beginKey, - final byte[] endKey); - public abstract void logData(final byte[] blob); - public abstract void putBlobIndex(final int columnFamilyId, - final byte[] key, final byte[] value) throws RocksDBException; - public abstract void markBeginPrepare() throws RocksDBException; - public abstract void markEndPrepare(final byte[] xid) - throws RocksDBException; - public abstract void markNoop(final boolean emptyBatch) - throws RocksDBException; - public abstract void markRollback(final byte[] xid) - throws RocksDBException; - public abstract void markCommit(final byte[] xid) - throws RocksDBException; - public abstract void markCommitWithTimestamp(final byte[] xid, final byte[] ts) - throws RocksDBException; - - /** - * shouldContinue is called by the underlying iterator - * {@link WriteBatch#iterate(Handler)}. If it returns false, - * iteration is halted. Otherwise, it continues - * iterating. The default implementation always - * returns true. - * - * @return boolean value indicating if the - * iteration is halted. - */ - public boolean shouldContinue() { - return true; - } - - private native long createNewHandler0(); - } - - /** - * A structure for describing the save point in the Write Batch. - */ - public static class SavePoint { - private long size; - private long count; - private long contentFlags; - - public SavePoint(final long size, final long count, - final long contentFlags) { - this.size = size; - this.count = count; - this.contentFlags = contentFlags; - } - - public void clear() { - this.size = 0; - this.count = 0; - this.contentFlags = 0; - } - - /** - * Get the size of the serialized representation. - * - * @return the size of the serialized representation. - */ - public long getSize() { - return size; - } - - /** - * Get the number of elements. - * - * @return the number of elements. - */ - public long getCount() { - return count; - } - - /** - * Get the content flags. - * - * @return the content flags. - */ - public long getContentFlags() { - return contentFlags; - } - - public boolean isCleared() { - return (size | count | contentFlags) == 0; - } - } -} diff --git a/java/src/main/java/org/rocksdb/WriteBatchInterface.java b/java/src/main/java/org/rocksdb/WriteBatchInterface.java deleted file mode 100644 index 92caa22b3..000000000 --- a/java/src/main/java/org/rocksdb/WriteBatchInterface.java +++ /dev/null @@ -1,283 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.ByteBuffer; - -/** - *

Defines the interface for a Write Batch which - * holds a collection of updates to apply atomically to a DB.

- */ -public interface WriteBatchInterface { - - /** - * Returns the number of updates in the batch. - * - * @return number of items in WriteBatch - */ - int count(); - - /** - *

Store the mapping "key->value" in the database.

- * - * @param key the specified key to be inserted. - * @param value the value associated with the specified key. - * @throws RocksDBException thrown if error happens in underlying native library. - */ - void put(byte[] key, byte[] value) throws RocksDBException; - - /** - *

Store the mapping "key->value" within given column - * family.

- * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param key the specified key to be inserted. - * @param value the value associated with the specified key. - * @throws RocksDBException thrown if error happens in underlying native library. - */ - void put(ColumnFamilyHandle columnFamilyHandle, byte[] key, byte[] value) - throws RocksDBException; - - /** - *

Store the mapping "key->value" within given column - * family.

- * - * @param key the specified key to be inserted. It is using position and limit. - * Supports direct buffer only. - * @param value the value associated with the specified key. It is using position and limit. - * Supports direct buffer only. - * @throws RocksDBException thrown if error happens in underlying native library. - */ - void put(final ByteBuffer key, final ByteBuffer value) throws RocksDBException; - - /** - *

Store the mapping "key->value" within given column - * family.

- * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} - * instance - * @param key the specified key to be inserted. It is using position and limit. - * Supports direct buffer only. - * @param value the value associated with the specified key. It is using position and limit. - * Supports direct buffer only. - * @throws RocksDBException thrown if error happens in underlying native library. - */ - void put(ColumnFamilyHandle columnFamilyHandle, final ByteBuffer key, final ByteBuffer value) - throws RocksDBException; - - /** - *

Merge "value" with the existing value of "key" in the database. - * "key->merge(existing, value)"

- * - * @param key the specified key to be merged. - * @param value the value to be merged with the current value for - * the specified key. - * @throws RocksDBException thrown if error happens in underlying native library. - */ - void merge(byte[] key, byte[] value) throws RocksDBException; - - /** - *

Merge "value" with the existing value of "key" in given column family. - * "key->merge(existing, value)"

- * - * @param columnFamilyHandle {@link ColumnFamilyHandle} instance - * @param key the specified key to be merged. - * @param value the value to be merged with the current value for - * the specified key. - * @throws RocksDBException thrown if error happens in underlying native library. - */ - void merge(ColumnFamilyHandle columnFamilyHandle, byte[] key, byte[] value) - throws RocksDBException; - - /** - *

If the database contains a mapping for "key", erase it. Else do nothing.

- * - * @param key Key to delete within database - * @throws RocksDBException thrown if error happens in underlying native library. - */ - void delete(byte[] key) throws RocksDBException; - - /** - *

If column family contains a mapping for "key", erase it. Else do nothing.

- * - * @param columnFamilyHandle {@link ColumnFamilyHandle} instance - * @param key Key to delete within database - * @throws RocksDBException thrown if error happens in underlying native library. - */ - void delete(ColumnFamilyHandle columnFamilyHandle, byte[] key) throws RocksDBException; - - /** - *

If column family contains a mapping for "key", erase it. Else do nothing.

- * - * @param key Key to delete within database. It is using position and limit. - * Supports direct buffer only. - * - * @throws RocksDBException thrown if error happens in underlying native library. - */ - void delete(final ByteBuffer key) throws RocksDBException; - - /** - *

If column family contains a mapping for "key", erase it. Else do nothing.

- * - * @param columnFamilyHandle {@link ColumnFamilyHandle} instance - * @param key Key to delete within database. It is using position and limit. - * Supports direct buffer only. - * - * @throws RocksDBException thrown if error happens in underlying native library. - */ - void delete(ColumnFamilyHandle columnFamilyHandle, final ByteBuffer key) - throws RocksDBException; - - /** - * Remove the database entry for {@code key}. Requires that the key exists - * and was not overwritten. It is not an error if the key did not exist - * in the database. - * - * If a key is overwritten (by calling {@link #put(byte[], byte[])} multiple - * times), then the result of calling SingleDelete() on this key is undefined. - * SingleDelete() only behaves correctly if there has been only one Put() - * for this key since the previous call to SingleDelete() for this key. - * - * This feature is currently an experimental performance optimization - * for a very specific workload. It is up to the caller to ensure that - * SingleDelete is only used for a key that is not deleted using Delete() or - * written using Merge(). Mixing SingleDelete operations with Deletes and - * Merges can result in undefined behavior. - * - * @param key Key to delete within database - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - @Experimental("Performance optimization for a very specific workload") - void singleDelete(final byte[] key) throws RocksDBException; - - /** - * Remove the database entry for {@code key}. Requires that the key exists - * and was not overwritten. It is not an error if the key did not exist - * in the database. - * - * If a key is overwritten (by calling {@link #put(byte[], byte[])} multiple - * times), then the result of calling SingleDelete() on this key is undefined. - * SingleDelete() only behaves correctly if there has been only one Put() - * for this key since the previous call to SingleDelete() for this key. - * - * This feature is currently an experimental performance optimization - * for a very specific workload. It is up to the caller to ensure that - * SingleDelete is only used for a key that is not deleted using Delete() or - * written using Merge(). Mixing SingleDelete operations with Deletes and - * Merges can result in undefined behavior. - * - * @param columnFamilyHandle The column family to delete the key from - * @param key Key to delete within database - * - * @throws RocksDBException thrown if error happens in underlying - * native library. - */ - @Experimental("Performance optimization for a very specific workload") - void singleDelete(final ColumnFamilyHandle columnFamilyHandle, final byte[] key) - throws RocksDBException; - - /** - * Removes the database entries in the range ["beginKey", "endKey"), i.e., - * including "beginKey" and excluding "endKey". a non-OK status on error. It - * is not an error if no keys exist in the range ["beginKey", "endKey"). - * - * Delete the database entry (if any) for "key". Returns OK on success, and a - * non-OK status on error. It is not an error if "key" did not exist in the - * database. - * - * @param beginKey - * First key to delete within database (included) - * @param endKey - * Last key to delete within database (excluded) - * @throws RocksDBException thrown if error happens in underlying native library. - */ - void deleteRange(byte[] beginKey, byte[] endKey) throws RocksDBException; - - /** - * Removes the database entries in the range ["beginKey", "endKey"), i.e., - * including "beginKey" and excluding "endKey". a non-OK status on error. It - * is not an error if no keys exist in the range ["beginKey", "endKey"). - * - * Delete the database entry (if any) for "key". Returns OK on success, and a - * non-OK status on error. It is not an error if "key" did not exist in the - * database. - * - * @param columnFamilyHandle {@link ColumnFamilyHandle} instance - * @param beginKey - * First key to delete within database (included) - * @param endKey - * Last key to delete within database (excluded) - * @throws RocksDBException thrown if error happens in underlying native library. - */ - void deleteRange(ColumnFamilyHandle columnFamilyHandle, byte[] beginKey, byte[] endKey) - throws RocksDBException; - - /** - * Append a blob of arbitrary size to the records in this batch. The blob will - * be stored in the transaction log but not in any other file. In particular, - * it will not be persisted to the SST files. When iterating over this - * WriteBatch, WriteBatch::Handler::LogData will be called with the contents - * of the blob as it is encountered. Blobs, puts, deletes, and merges will be - * encountered in the same order in thich they were inserted. The blob will - * NOT consume sequence number(s) and will NOT increase the count of the batch - * - * Example application: add timestamps to the transaction log for use in - * replication. - * - * @param blob binary object to be inserted - * @throws RocksDBException thrown if error happens in underlying native library. - */ - void putLogData(byte[] blob) throws RocksDBException; - - /** - * Clear all updates buffered in this batch - */ - void clear(); - - /** - * Records the state of the batch for future calls to RollbackToSavePoint(). - * May be called multiple times to set multiple save points. - */ - void setSavePoint(); - - /** - * Remove all entries in this batch (Put, Merge, Delete, PutLogData) since - * the most recent call to SetSavePoint() and removes the most recent save - * point. - * - * @throws RocksDBException if there is no previous call to SetSavePoint() - */ - void rollbackToSavePoint() throws RocksDBException; - - /** - * Pop the most recent save point. - * - * That is to say that it removes the last save point, - * which was set by {@link #setSavePoint()}. - * - * @throws RocksDBException If there is no previous call to - * {@link #setSavePoint()}, an exception with - * {@link Status.Code#NotFound} will be thrown. - */ - void popSavePoint() throws RocksDBException; - - /** - * Set the maximum size of the write batch. - * - * @param maxBytes the maximum size in bytes. - */ - void setMaxBytes(long maxBytes); - - /** - * Get the underlying Write Batch. - * - * @return the underlying WriteBatch. - */ - WriteBatch getWriteBatch(); -} diff --git a/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java b/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java deleted file mode 100644 index d85b8e3f7..000000000 --- a/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java +++ /dev/null @@ -1,361 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.ByteBuffer; - -/** - * Similar to {@link org.rocksdb.WriteBatch} but with a binary searchable - * index built for all the keys inserted. - * - * Calling put, merge, remove or putLogData calls the same function - * as with {@link org.rocksdb.WriteBatch} whilst also building an index. - * - * A user can call {@link org.rocksdb.WriteBatchWithIndex#newIterator()} to - * create an iterator over the write batch or - * {@link org.rocksdb.WriteBatchWithIndex#newIteratorWithBase(org.rocksdb.RocksIterator)} - * to get an iterator for the database with Read-Your-Own-Writes like capability - */ -public class WriteBatchWithIndex extends AbstractWriteBatch { - /** - * Creates a WriteBatchWithIndex where no bytes - * are reserved up-front, bytewise comparison is - * used for fallback key comparisons, - * and duplicate keys operations are retained - */ - public WriteBatchWithIndex() { - super(newWriteBatchWithIndex()); - } - - - /** - * Creates a WriteBatchWithIndex where no bytes - * are reserved up-front, bytewise comparison is - * used for fallback key comparisons, and duplicate key - * assignment is determined by the constructor argument - * - * @param overwriteKey if true, overwrite the key in the index when - * inserting a duplicate key, in this way an iterator will never - * show two entries with the same key. - */ - public WriteBatchWithIndex(final boolean overwriteKey) { - super(newWriteBatchWithIndex(overwriteKey)); - } - - /** - * Creates a WriteBatchWithIndex - * - * @param fallbackIndexComparator We fallback to this comparator - * to compare keys within a column family if we cannot determine - * the column family and so look up it's comparator. - * - * @param reservedBytes reserved bytes in underlying WriteBatch - * - * @param overwriteKey if true, overwrite the key in the index when - * inserting a duplicate key, in this way an iterator will never - * show two entries with the same key. - */ - public WriteBatchWithIndex( - final AbstractComparator - fallbackIndexComparator, final int reservedBytes, - final boolean overwriteKey) { - super(newWriteBatchWithIndex(fallbackIndexComparator.nativeHandle_, - fallbackIndexComparator.getComparatorType().getValue(), reservedBytes, - overwriteKey)); - } - - /** - *

Private WriteBatchWithIndex constructor which is used to construct - * WriteBatchWithIndex instances from C++ side. As the reference to this - * object is also managed from C++ side the handle will be disowned.

- * - * @param nativeHandle address of native instance. - */ - WriteBatchWithIndex(final long nativeHandle) { - super(nativeHandle); - disOwnNativeHandle(); - } - - /** - * Create an iterator of a column family. User can call - * {@link org.rocksdb.RocksIteratorInterface#seek(byte[])} to - * search to the next entry of or after a key. Keys will be iterated in the - * order given by index_comparator. For multiple updates on the same key, - * each update will be returned as a separate entry, in the order of update - * time. - * - * @param columnFamilyHandle The column family to iterate over - * @return An iterator for the Write Batch contents, restricted to the column - * family - */ - public WBWIRocksIterator newIterator( - final ColumnFamilyHandle columnFamilyHandle) { - return new WBWIRocksIterator(this, iterator1(nativeHandle_, - columnFamilyHandle.nativeHandle_)); - } - - /** - * Create an iterator of the default column family. User can call - * {@link org.rocksdb.RocksIteratorInterface#seek(byte[])} to - * search to the next entry of or after a key. Keys will be iterated in the - * order given by index_comparator. For multiple updates on the same key, - * each update will be returned as a separate entry, in the order of update - * time. - * - * @return An iterator for the Write Batch contents - */ - public WBWIRocksIterator newIterator() { - return new WBWIRocksIterator(this, iterator0(nativeHandle_)); - } - - /** - * Provides Read-Your-Own-Writes like functionality by - * creating a new Iterator that will use {@link org.rocksdb.WBWIRocksIterator} - * as a delta and baseIterator as a base - * - * Updating write batch with the current key of the iterator is not safe. - * We strongly recommend users not to do it. It will invalidate the current - * key() and value() of the iterator. This invalidation happens even before - * the write batch update finishes. The state may recover after Next() is - * called. - * - * @param columnFamilyHandle The column family to iterate over - * @param baseIterator The base iterator, - * e.g. {@link org.rocksdb.RocksDB#newIterator()} - * @return An iterator which shows a view comprised of both the database - * point-in-time from baseIterator and modifications made in this write batch. - */ - public RocksIterator newIteratorWithBase( - final ColumnFamilyHandle columnFamilyHandle, - final RocksIterator baseIterator) { - return newIteratorWithBase(columnFamilyHandle, baseIterator, null); - } - - /** - * Provides Read-Your-Own-Writes like functionality by - * creating a new Iterator that will use {@link org.rocksdb.WBWIRocksIterator} - * as a delta and baseIterator as a base - * - * Updating write batch with the current key of the iterator is not safe. - * We strongly recommend users not to do it. It will invalidate the current - * key() and value() of the iterator. This invalidation happens even before - * the write batch update finishes. The state may recover after Next() is - * called. - * - * @param columnFamilyHandle The column family to iterate over - * @param baseIterator The base iterator, - * e.g. {@link org.rocksdb.RocksDB#newIterator()} - * @param readOptions the read options, or null - * @return An iterator which shows a view comprised of both the database - * point-in-time from baseIterator and modifications made in this write batch. - */ - public RocksIterator newIteratorWithBase(final ColumnFamilyHandle columnFamilyHandle, - final RocksIterator baseIterator, /* @Nullable */ final ReadOptions readOptions) { - final RocksIterator iterator = new RocksIterator(baseIterator.parent_, - iteratorWithBase(nativeHandle_, columnFamilyHandle.nativeHandle_, - baseIterator.nativeHandle_, readOptions == null ? 0 : readOptions.nativeHandle_)); - - // when the iterator is deleted it will also delete the baseIterator - baseIterator.disOwnNativeHandle(); - - return iterator; - } - - /** - * Provides Read-Your-Own-Writes like functionality by - * creating a new Iterator that will use {@link org.rocksdb.WBWIRocksIterator} - * as a delta and baseIterator as a base. Operates on the default column - * family. - * - * @param baseIterator The base iterator, - * e.g. {@link org.rocksdb.RocksDB#newIterator()} - * @return An iterator which shows a view comprised of both the database - * point-in-timefrom baseIterator and modifications made in this write batch. - */ - public RocksIterator newIteratorWithBase(final RocksIterator baseIterator) { - return newIteratorWithBase(baseIterator.parent_.getDefaultColumnFamily(), baseIterator, null); - } - - /** - * Provides Read-Your-Own-Writes like functionality by - * creating a new Iterator that will use {@link org.rocksdb.WBWIRocksIterator} - * as a delta and baseIterator as a base. Operates on the default column - * family. - * - * @param baseIterator The base iterator, - * e.g. {@link org.rocksdb.RocksDB#newIterator()} - * @param readOptions the read options, or null - * @return An iterator which shows a view comprised of both the database - * point-in-timefrom baseIterator and modifications made in this write batch. - */ - public RocksIterator newIteratorWithBase(final RocksIterator baseIterator, - /* @Nullable */ final ReadOptions readOptions) { - return newIteratorWithBase( - baseIterator.parent_.getDefaultColumnFamily(), baseIterator, readOptions); - } - - /** - * Similar to {@link RocksDB#get(ColumnFamilyHandle, byte[])} but will only - * read the key from this batch. - * - * @param columnFamilyHandle The column family to retrieve the value from - * @param options The database options to use - * @param key The key to read the value for - * - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException if the batch does not have enough data to resolve - * Merge operations, MergeInProgress status may be returned. - */ - public byte[] getFromBatch(final ColumnFamilyHandle columnFamilyHandle, - final DBOptions options, final byte[] key) throws RocksDBException { - return getFromBatch(nativeHandle_, options.nativeHandle_, - key, key.length, columnFamilyHandle.nativeHandle_); - } - - /** - * Similar to {@link RocksDB#get(byte[])} but will only - * read the key from this batch. - * - * @param options The database options to use - * @param key The key to read the value for - * - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException if the batch does not have enough data to resolve - * Merge operations, MergeInProgress status may be returned. - */ - public byte[] getFromBatch(final DBOptions options, final byte[] key) - throws RocksDBException { - return getFromBatch(nativeHandle_, options.nativeHandle_, key, key.length); - } - - /** - * Similar to {@link RocksDB#get(ColumnFamilyHandle, byte[])} but will also - * read writes from this batch. - * - * This function will query both this batch and the DB and then merge - * the results using the DB's merge operator (if the batch contains any - * merge requests). - * - * Setting {@link ReadOptions#setSnapshot(Snapshot)} will affect what is - * read from the DB but will NOT change which keys are read from the batch - * (the keys in this batch do not yet belong to any snapshot and will be - * fetched regardless). - * - * @param db The Rocks database - * @param columnFamilyHandle The column family to retrieve the value from - * @param options The read options to use - * @param key The key to read the value for - * - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException if the value for the key cannot be read - */ - public byte[] getFromBatchAndDB(final RocksDB db, final ColumnFamilyHandle columnFamilyHandle, - final ReadOptions options, final byte[] key) throws RocksDBException { - return getFromBatchAndDB(nativeHandle_, db.nativeHandle_, - options.nativeHandle_, key, key.length, - columnFamilyHandle.nativeHandle_); - } - - /** - * Similar to {@link RocksDB#get(byte[])} but will also - * read writes from this batch. - * - * This function will query both this batch and the DB and then merge - * the results using the DB's merge operator (if the batch contains any - * merge requests). - * - * Setting {@link ReadOptions#setSnapshot(Snapshot)} will affect what is - * read from the DB but will NOT change which keys are read from the batch - * (the keys in this batch do not yet belong to any snapshot and will be - * fetched regardless). - * - * @param db The Rocks database - * @param options The read options to use - * @param key The key to read the value for - * - * @return a byte array storing the value associated with the input key if - * any. null if it does not find the specified key. - * - * @throws RocksDBException if the value for the key cannot be read - */ - public byte[] getFromBatchAndDB(final RocksDB db, final ReadOptions options, - final byte[] key) throws RocksDBException { - return getFromBatchAndDB(nativeHandle_, db.nativeHandle_, - options.nativeHandle_, key, key.length); - } - - @Override protected final native void disposeInternal(final long handle); - @Override final native int count0(final long handle); - @Override final native void put(final long handle, final byte[] key, - final int keyLen, final byte[] value, final int valueLen); - @Override final native void put(final long handle, final byte[] key, - final int keyLen, final byte[] value, final int valueLen, - final long cfHandle); - @Override - final native void putDirect(final long handle, final ByteBuffer key, final int keyOffset, - final int keyLength, final ByteBuffer value, final int valueOffset, final int valueLength, - final long cfHandle); - @Override final native void merge(final long handle, final byte[] key, - final int keyLen, final byte[] value, final int valueLen); - @Override final native void merge(final long handle, final byte[] key, - final int keyLen, final byte[] value, final int valueLen, - final long cfHandle); - @Override final native void delete(final long handle, final byte[] key, - final int keyLen) throws RocksDBException; - @Override final native void delete(final long handle, final byte[] key, - final int keyLen, final long cfHandle) throws RocksDBException; - @Override final native void singleDelete(final long handle, final byte[] key, - final int keyLen) throws RocksDBException; - @Override final native void singleDelete(final long handle, final byte[] key, - final int keyLen, final long cfHandle) throws RocksDBException; - @Override - final native void deleteDirect(final long handle, final ByteBuffer key, final int keyOffset, - final int keyLength, final long cfHandle) throws RocksDBException; - // DO NOT USE - `WriteBatchWithIndex::deleteRange` is not yet supported - @Override - final native void deleteRange(final long handle, final byte[] beginKey, final int beginKeyLen, - final byte[] endKey, final int endKeyLen); - // DO NOT USE - `WriteBatchWithIndex::deleteRange` is not yet supported - @Override - final native void deleteRange(final long handle, final byte[] beginKey, final int beginKeyLen, - final byte[] endKey, final int endKeyLen, final long cfHandle); - @Override final native void putLogData(final long handle, final byte[] blob, - final int blobLen) throws RocksDBException; - @Override final native void clear0(final long handle); - @Override final native void setSavePoint0(final long handle); - @Override final native void rollbackToSavePoint0(final long handle); - @Override final native void popSavePoint(final long handle) throws RocksDBException; - @Override final native void setMaxBytes(final long nativeHandle, - final long maxBytes); - @Override final native WriteBatch getWriteBatch(final long handle); - - private native static long newWriteBatchWithIndex(); - private native static long newWriteBatchWithIndex(final boolean overwriteKey); - private native static long newWriteBatchWithIndex( - final long fallbackIndexComparatorHandle, - final byte comparatorType, final int reservedBytes, - final boolean overwriteKey); - private native long iterator0(final long handle); - private native long iterator1(final long handle, final long cfHandle); - private native long iteratorWithBase(final long handle, final long cfHandle, - final long baseIteratorHandle, final long readOptionsHandle); - private native byte[] getFromBatch(final long handle, final long optHandle, - final byte[] key, final int keyLen); - private native byte[] getFromBatch(final long handle, final long optHandle, - final byte[] key, final int keyLen, final long cfHandle); - private native byte[] getFromBatchAndDB(final long handle, - final long dbHandle, final long readOptHandle, final byte[] key, - final int keyLen); - private native byte[] getFromBatchAndDB(final long handle, - final long dbHandle, final long readOptHandle, final byte[] key, - final int keyLen, final long cfHandle); -} diff --git a/java/src/main/java/org/rocksdb/WriteBufferManager.java b/java/src/main/java/org/rocksdb/WriteBufferManager.java deleted file mode 100644 index 8ec963958..000000000 --- a/java/src/main/java/org/rocksdb/WriteBufferManager.java +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Java wrapper over native write_buffer_manager class - */ -public class WriteBufferManager extends RocksObject { - static { - RocksDB.loadLibrary(); - } - - /** - * Construct a new instance of WriteBufferManager. - * - * Check - * https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager - * for more details on when to use it - * - * @param bufferSizeBytes buffer size(in bytes) to use for native write_buffer_manager - * @param cache cache whose memory should be bounded by this write buffer manager - * @param allowStall if set true, it will enable stalling of writes when memory_usage() exceeds - * buffer_size. - * It will wait for flush to complete and memory usage to drop down. - */ - public WriteBufferManager( - final long bufferSizeBytes, final Cache cache, final boolean allowStall) { - super(newWriteBufferManager(bufferSizeBytes, cache.nativeHandle_, allowStall)); - this.allowStall_ = allowStall; - } - - public WriteBufferManager(final long bufferSizeBytes, final Cache cache){ - this(bufferSizeBytes, cache, false); - } - - public boolean allowStall() { - return allowStall_; - } - - private native static long newWriteBufferManager( - final long bufferSizeBytes, final long cacheHandle, final boolean allowStall); - - @Override - protected native void disposeInternal(final long handle); - - private boolean allowStall_; -} diff --git a/java/src/main/java/org/rocksdb/WriteOptions.java b/java/src/main/java/org/rocksdb/WriteOptions.java deleted file mode 100644 index 5a3ffa6c5..000000000 --- a/java/src/main/java/org/rocksdb/WriteOptions.java +++ /dev/null @@ -1,256 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Options that control write operations. - * - * Note that developers should call WriteOptions.dispose() to release the - * c++ side memory before a WriteOptions instance runs out of scope. - */ -public class WriteOptions extends RocksObject { - /** - * Construct WriteOptions instance. - */ - public WriteOptions() { - super(newWriteOptions()); - - } - - // TODO(AR) consider ownership - WriteOptions(final long nativeHandle) { - super(nativeHandle); - disOwnNativeHandle(); - } - - /** - * Copy constructor for WriteOptions. - * - * NOTE: This does a shallow copy, which means comparator, merge_operator, compaction_filter, - * compaction_filter_factory and other pointers will be cloned! - * - * @param other The ColumnFamilyOptions to copy. - */ - public WriteOptions(WriteOptions other) { - super(copyWriteOptions(other.nativeHandle_)); - } - - - /** - * If true, the write will be flushed from the operating system - * buffer cache (by calling WritableFile::Sync()) before the write - * is considered complete. If this flag is true, writes will be - * slower. - * - * If this flag is false, and the machine crashes, some recent - * writes may be lost. Note that if it is just the process that - * crashes (i.e., the machine does not reboot), no writes will be - * lost even if sync==false. - * - * In other words, a DB write with sync==false has similar - * crash semantics as the "write()" system call. A DB write - * with sync==true has similar crash semantics to a "write()" - * system call followed by "fdatasync()". - * - * Default: false - * - * @param flag a boolean flag to indicate whether a write - * should be synchronized. - * @return the instance of the current WriteOptions. - */ - public WriteOptions setSync(final boolean flag) { - setSync(nativeHandle_, flag); - return this; - } - - /** - * If true, the write will be flushed from the operating system - * buffer cache (by calling WritableFile::Sync()) before the write - * is considered complete. If this flag is true, writes will be - * slower. - * - * If this flag is false, and the machine crashes, some recent - * writes may be lost. Note that if it is just the process that - * crashes (i.e., the machine does not reboot), no writes will be - * lost even if sync==false. - * - * In other words, a DB write with sync==false has similar - * crash semantics as the "write()" system call. A DB write - * with sync==true has similar crash semantics to a "write()" - * system call followed by "fdatasync()". - * - * @return boolean value indicating if sync is active. - */ - public boolean sync() { - return sync(nativeHandle_); - } - - /** - * If true, writes will not first go to the write ahead log, - * and the write may got lost after a crash. The backup engine - * relies on write-ahead logs to back up the memtable, so if - * you disable write-ahead logs, you must create backups with - * flush_before_backup=true to avoid losing unflushed memtable data. - * - * @param flag a boolean flag to specify whether to disable - * write-ahead-log on writes. - * @return the instance of the current WriteOptions. - */ - public WriteOptions setDisableWAL(final boolean flag) { - setDisableWAL(nativeHandle_, flag); - return this; - } - - /** - * If true, writes will not first go to the write ahead log, - * and the write may got lost after a crash. The backup engine - * relies on write-ahead logs to back up the memtable, so if - * you disable write-ahead logs, you must create backups with - * flush_before_backup=true to avoid losing unflushed memtable data. - * - * @return boolean value indicating if WAL is disabled. - */ - public boolean disableWAL() { - return disableWAL(nativeHandle_); - } - - /** - * If true and if user is trying to write to column families that don't exist - * (they were dropped), ignore the write (don't return an error). If there - * are multiple writes in a WriteBatch, other writes will succeed. - * - * Default: false - * - * @param ignoreMissingColumnFamilies true to ignore writes to column families - * which don't exist - * @return the instance of the current WriteOptions. - */ - public WriteOptions setIgnoreMissingColumnFamilies( - final boolean ignoreMissingColumnFamilies) { - setIgnoreMissingColumnFamilies(nativeHandle_, ignoreMissingColumnFamilies); - return this; - } - - /** - * If true and if user is trying to write to column families that don't exist - * (they were dropped), ignore the write (don't return an error). If there - * are multiple writes in a WriteBatch, other writes will succeed. - * - * Default: false - * - * @return true if writes to column families which don't exist are ignored - */ - public boolean ignoreMissingColumnFamilies() { - return ignoreMissingColumnFamilies(nativeHandle_); - } - - /** - * If true and we need to wait or sleep for the write request, fails - * immediately with {@link Status.Code#Incomplete}. - * - * @param noSlowdown true to fail write requests if we need to wait or sleep - * @return the instance of the current WriteOptions. - */ - public WriteOptions setNoSlowdown(final boolean noSlowdown) { - setNoSlowdown(nativeHandle_, noSlowdown); - return this; - } - - /** - * If true and we need to wait or sleep for the write request, fails - * immediately with {@link Status.Code#Incomplete}. - * - * @return true when write requests are failed if we need to wait or sleep - */ - public boolean noSlowdown() { - return noSlowdown(nativeHandle_); - } - - /** - * If true, this write request is of lower priority if compaction is - * behind. In the case that, {@link #noSlowdown()} == true, the request - * will be cancelled immediately with {@link Status.Code#Incomplete} returned. - * Otherwise, it will be slowed down. The slowdown value is determined by - * RocksDB to guarantee it introduces minimum impacts to high priority writes. - * - * Default: false - * - * @param lowPri true if the write request should be of lower priority than - * compactions which are behind. - * - * @return the instance of the current WriteOptions. - */ - public WriteOptions setLowPri(final boolean lowPri) { - setLowPri(nativeHandle_, lowPri); - return this; - } - - /** - * Returns true if this write request is of lower priority if compaction is - * behind. - * - * See {@link #setLowPri(boolean)}. - * - * @return true if this write request is of lower priority, false otherwise. - */ - public boolean lowPri() { - return lowPri(nativeHandle_); - } - - /** - * If true, this writebatch will maintain the last insert positions of each - * memtable as hints in concurrent write. It can improve write performance - * in concurrent writes if keys in one writebatch are sequential. In - * non-concurrent writes (when {@code concurrent_memtable_writes} is false) this - * option will be ignored. - * - * Default: false - * - * @return true if writebatch will maintain the last insert positions of each memtable as hints in - * concurrent write. - */ - public boolean memtableInsertHintPerBatch() { - return memtableInsertHintPerBatch(nativeHandle_); - } - - /** - * If true, this writebatch will maintain the last insert positions of each - * memtable as hints in concurrent write. It can improve write performance - * in concurrent writes if keys in one writebatch are sequential. In - * non-concurrent writes (when {@code concurrent_memtable_writes} is false) this - * option will be ignored. - * - * Default: false - * - * @param memtableInsertHintPerBatch true if writebatch should maintain the last insert positions - * of each memtable as hints in concurrent write. - * @return the instance of the current WriteOptions. - */ - public WriteOptions setMemtableInsertHintPerBatch(final boolean memtableInsertHintPerBatch) { - setMemtableInsertHintPerBatch(nativeHandle_, memtableInsertHintPerBatch); - return this; - } - - private native static long newWriteOptions(); - private native static long copyWriteOptions(long handle); - @Override protected final native void disposeInternal(final long handle); - - private native void setSync(long handle, boolean flag); - private native boolean sync(long handle); - private native void setDisableWAL(long handle, boolean flag); - private native boolean disableWAL(long handle); - private native void setIgnoreMissingColumnFamilies(final long handle, - final boolean ignoreMissingColumnFamilies); - private native boolean ignoreMissingColumnFamilies(final long handle); - private native void setNoSlowdown(final long handle, - final boolean noSlowdown); - private native boolean noSlowdown(final long handle); - private native void setLowPri(final long handle, final boolean lowPri); - private native boolean lowPri(final long handle); - private native boolean memtableInsertHintPerBatch(final long handle); - private native void setMemtableInsertHintPerBatch( - final long handle, final boolean memtableInsertHintPerBatch); -} diff --git a/java/src/main/java/org/rocksdb/WriteStallCondition.java b/java/src/main/java/org/rocksdb/WriteStallCondition.java deleted file mode 100644 index 98d9e2ce4..000000000 --- a/java/src/main/java/org/rocksdb/WriteStallCondition.java +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public enum WriteStallCondition { - DELAYED((byte) 0x0), - STOPPED((byte) 0x1), - NORMAL((byte) 0x2); - - private final byte value; - - WriteStallCondition(final byte value) { - this.value = value; - } - - /** - * Get the internal representation. - * - * @return the internal representation - */ - byte getValue() { - return value; - } - - /** - * Get the WriteStallCondition from the internal representation value. - * - * @return the flush reason. - * - * @throws IllegalArgumentException if the value is unknown. - */ - static WriteStallCondition fromValue(final byte value) { - for (final WriteStallCondition writeStallCondition : WriteStallCondition.values()) { - if (writeStallCondition.value == value) { - return writeStallCondition; - } - } - - throw new IllegalArgumentException("Illegal value provided for WriteStallCondition: " + value); - } -} diff --git a/java/src/main/java/org/rocksdb/WriteStallInfo.java b/java/src/main/java/org/rocksdb/WriteStallInfo.java deleted file mode 100644 index 4aef0eda9..000000000 --- a/java/src/main/java/org/rocksdb/WriteStallInfo.java +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Objects; - -public class WriteStallInfo { - private final String columnFamilyName; - private final WriteStallCondition currentCondition; - private final WriteStallCondition previousCondition; - - /** - * Access is package private as this will only be constructed from - * C++ via JNI and for testing. - */ - WriteStallInfo(final String columnFamilyName, final byte currentConditionValue, - final byte previousConditionValue) { - this.columnFamilyName = columnFamilyName; - this.currentCondition = WriteStallCondition.fromValue(currentConditionValue); - this.previousCondition = WriteStallCondition.fromValue(previousConditionValue); - } - - /** - * Get the name of the column family. - * - * @return the name of the column family. - */ - public String getColumnFamilyName() { - return columnFamilyName; - } - - /** - * Get the current state of the write controller. - * - * @return the current state. - */ - public WriteStallCondition getCurrentCondition() { - return currentCondition; - } - - /** - * Get the previous state of the write controller. - * - * @return the previous state. - */ - public WriteStallCondition getPreviousCondition() { - return previousCondition; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - WriteStallInfo that = (WriteStallInfo) o; - return Objects.equals(columnFamilyName, that.columnFamilyName) - && currentCondition == that.currentCondition && previousCondition == that.previousCondition; - } - - @Override - public int hashCode() { - return Objects.hash(columnFamilyName, currentCondition, previousCondition); - } - - @Override - public String toString() { - return "WriteStallInfo{" - + "columnFamilyName='" + columnFamilyName + '\'' + ", currentCondition=" + currentCondition - + ", previousCondition=" + previousCondition + '}'; - } -} diff --git a/java/src/main/java/org/rocksdb/util/ByteUtil.java b/java/src/main/java/org/rocksdb/util/ByteUtil.java deleted file mode 100644 index 5d64d5dcf..000000000 --- a/java/src/main/java/org/rocksdb/util/ByteUtil.java +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb.util; - -import java.nio.ByteBuffer; - -import static java.nio.charset.StandardCharsets.UTF_8; - -public class ByteUtil { - - /** - * Convert a String to a UTF-8 byte array. - * - * @param str the string - * - * @return the byte array. - */ - public static byte[] bytes(final String str) { - return str.getBytes(UTF_8); - } - - /** - * Compares the first {@code count} bytes of two areas of memory. Returns - * zero if they are the same, a value less than zero if {@code x} is - * lexically less than {@code y}, or a value greater than zero if {@code x} - * is lexically greater than {@code y}. Note that lexical order is determined - * as if comparing unsigned char arrays. - * - * Similar to memcmp.c. - * - * @param x the first value to compare with - * @param y the second value to compare against - * @param count the number of bytes to compare - * - * @return the result of the comparison - */ - public static int memcmp(final ByteBuffer x, final ByteBuffer y, - final int count) { - for (int idx = 0; idx < count; idx++) { - final int aa = x.get(idx) & 0xff; - final int bb = y.get(idx) & 0xff; - if (aa != bb) { - return aa - bb; - } - } - return 0; - } -} diff --git a/java/src/main/java/org/rocksdb/util/BytewiseComparator.java b/java/src/main/java/org/rocksdb/util/BytewiseComparator.java deleted file mode 100644 index 9561b0a31..000000000 --- a/java/src/main/java/org/rocksdb/util/BytewiseComparator.java +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb.util; - -import org.rocksdb.*; - -import java.nio.ByteBuffer; - -import static org.rocksdb.util.ByteUtil.memcmp; - -/** - * This is a Java Native implementation of the C++ - * equivalent BytewiseComparatorImpl using {@link Slice} - * - * The performance of Comparators implemented in Java is always - * less than their C++ counterparts due to the bridging overhead, - * as such you likely don't want to use this apart from benchmarking - * and you most likely instead wanted - * {@link org.rocksdb.BuiltinComparator#BYTEWISE_COMPARATOR} - */ -public final class BytewiseComparator extends AbstractComparator { - - public BytewiseComparator(final ComparatorOptions copt) { - super(copt); - } - - @Override - public String name() { - return "rocksdb.java.BytewiseComparator"; - } - - @Override - public int compare(final ByteBuffer a, final ByteBuffer b) { - return _compare(a, b); - } - - static int _compare(final ByteBuffer a, final ByteBuffer b) { - assert(a != null && b != null); - final int minLen = a.remaining() < b.remaining() ? - a.remaining() : b.remaining(); - int r = memcmp(a, b, minLen); - if (r == 0) { - if (a.remaining() < b.remaining()) { - r = -1; - } else if (a.remaining() > b.remaining()) { - r = +1; - } - } - return r; - } - - @Override - public void findShortestSeparator(final ByteBuffer start, - final ByteBuffer limit) { - // Find length of common prefix - final int minLength = Math.min(start.remaining(), limit.remaining()); - int diffIndex = 0; - while (diffIndex < minLength && - start.get(diffIndex) == limit.get(diffIndex)) { - diffIndex++; - } - - if (diffIndex >= minLength) { - // Do not shorten if one string is a prefix of the other - } else { - final int startByte = start.get(diffIndex) & 0xff; - final int limitByte = limit.get(diffIndex) & 0xff; - if (startByte >= limitByte) { - // Cannot shorten since limit is smaller than start or start is - // already the shortest possible. - return; - } - assert(startByte < limitByte); - - if (diffIndex < limit.remaining() - 1 || startByte + 1 < limitByte) { - start.put(diffIndex, (byte)((start.get(diffIndex) & 0xff) + 1)); - start.limit(diffIndex + 1); - } else { - // v - // A A 1 A A A - // A A 2 - // - // Incrementing the current byte will make start bigger than limit, we - // will skip this byte, and find the first non 0xFF byte in start and - // increment it. - diffIndex++; - - while (diffIndex < start.remaining()) { - // Keep moving until we find the first non 0xFF byte to - // increment it - if ((start.get(diffIndex) & 0xff) < - 0xff) { - start.put(diffIndex, (byte)((start.get(diffIndex) & 0xff) + 1)); - start.limit(diffIndex + 1); - break; - } - diffIndex++; - } - } - assert(compare(start.duplicate(), limit.duplicate()) < 0); - } - } - - @Override - public void findShortSuccessor(final ByteBuffer key) { - // Find first character that can be incremented - final int n = key.remaining(); - for (int i = 0; i < n; i++) { - final int byt = key.get(i) & 0xff; - if (byt != 0xff) { - key.put(i, (byte)(byt + 1)); - key.limit(i+1); - return; - } - } - // *key is a run of 0xffs. Leave it alone. - } -} diff --git a/java/src/main/java/org/rocksdb/util/Environment.java b/java/src/main/java/org/rocksdb/util/Environment.java deleted file mode 100644 index 9ad51c7c7..000000000 --- a/java/src/main/java/org/rocksdb/util/Environment.java +++ /dev/null @@ -1,245 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb.util; - -import java.io.File; -import java.io.IOException; - -public class Environment { - private static String OS = System.getProperty("os.name").toLowerCase(); - private static String ARCH = System.getProperty("os.arch").toLowerCase(); - private static String MUSL_ENVIRONMENT = System.getenv("ROCKSDB_MUSL_LIBC"); - - /** - * Will be lazily initialised by {@link #isMuslLibc()} instead of the previous static - * initialisation. The lazy initialisation prevents Windows from reporting suspicious behaviour of - * the JVM attempting IO on Unix paths. - */ - private static Boolean MUSL_LIBC = null; - - public static boolean isAarch64() { - return ARCH.contains("aarch64"); - } - - public static boolean isPowerPC() { - return ARCH.contains("ppc"); - } - - public static boolean isS390x() { - return ARCH.contains("s390x"); - } - - public static boolean isWindows() { - return (OS.contains("win")); - } - - public static boolean isFreeBSD() { - return (OS.contains("freebsd")); - } - - public static boolean isMac() { - return (OS.contains("mac")); - } - - public static boolean isAix() { - return OS.contains("aix"); - } - - public static boolean isUnix() { - return OS.contains("nix") || - OS.contains("nux"); - } - - /** - * Determine if the environment has a musl libc. - * - * @return true if the environment has a musl libc, false otherwise. - */ - public static boolean isMuslLibc() { - if (MUSL_LIBC == null) { - MUSL_LIBC = initIsMuslLibc(); - } - return MUSL_LIBC; - } - - /** - * Determine if the environment has a musl libc. - * - * The initialisation counterpart of {@link #isMuslLibc()}. - * - * Intentionally package-private for testing. - * - * @return true if the environment has a musl libc, false otherwise. - */ - static boolean initIsMuslLibc() { - // consider explicit user setting from environment first - if ("true".equalsIgnoreCase(MUSL_ENVIRONMENT)) { - return true; - } - if ("false".equalsIgnoreCase(MUSL_ENVIRONMENT)) { - return false; - } - - // check if ldd indicates a muslc lib - try { - final Process p = - new ProcessBuilder("/usr/bin/env", "sh", "-c", "ldd /usr/bin/env | grep -q musl").start(); - if (p.waitFor() == 0) { - return true; - } - } catch (final IOException | InterruptedException e) { - // do nothing, and move on to the next check - } - - final File lib = new File("/lib"); - if (lib.exists() && lib.isDirectory() && lib.canRead()) { - // attempt the most likely musl libc name first - final String possibleMuslcLibName; - if (isPowerPC()) { - possibleMuslcLibName = "libc.musl-ppc64le.so.1"; - } else if (isAarch64()) { - possibleMuslcLibName = "libc.musl-aarch64.so.1"; - } else if (isS390x()) { - possibleMuslcLibName = "libc.musl-s390x.so.1"; - } else { - possibleMuslcLibName = "libc.musl-x86_64.so.1"; - } - final File possibleMuslcLib = new File(lib, possibleMuslcLibName); - if (possibleMuslcLib.exists() && possibleMuslcLib.canRead()) { - return true; - } - - // fallback to scanning for a musl libc - final File[] libFiles = lib.listFiles(); - if (libFiles == null) { - return false; - } - for (final File f : libFiles) { - if (f.getName().startsWith("libc.musl")) { - return true; - } - } - } - - return false; - } - - public static boolean isSolaris() { - return OS.contains("sunos"); - } - - public static boolean isOpenBSD() { - return (OS.contains("openbsd")); - } - - public static boolean is64Bit() { - if (ARCH.indexOf("sparcv9") >= 0) { - return true; - } - return (ARCH.indexOf("64") > 0); - } - - public static String getSharedLibraryName(final String name) { - return name + "jni"; - } - - public static String getSharedLibraryFileName(final String name) { - return appendLibOsSuffix("lib" + getSharedLibraryName(name), true); - } - - /** - * Get the name of the libc implementation - * - * @return the name of the implementation, - * or null if the default for that platform (e.g. glibc on Linux). - */ - public static /* @Nullable */ String getLibcName() { - if (isMuslLibc()) { - return "musl"; - } else { - return null; - } - } - - private static String getLibcPostfix() { - final String libcName = getLibcName(); - if (libcName == null) { - return ""; - } - return "-" + libcName; - } - - public static String getJniLibraryName(final String name) { - if (isUnix()) { - final String arch = is64Bit() ? "64" : "32"; - if (isPowerPC() || isAarch64()) { - return String.format("%sjni-linux-%s%s", name, ARCH, getLibcPostfix()); - } else if (isS390x()) { - return String.format("%sjni-linux-%s", name, ARCH); - } else { - return String.format("%sjni-linux%s%s", name, arch, getLibcPostfix()); - } - } else if (isMac()) { - if (is64Bit()) { - final String arch; - if (isAarch64()) { - arch = "arm64"; - } else { - arch = "x86_64"; - } - return String.format("%sjni-osx-%s", name, arch); - } else { - return String.format("%sjni-osx", name); - } - } else if (isFreeBSD()) { - return String.format("%sjni-freebsd%s", name, is64Bit() ? "64" : "32"); - } else if (isAix() && is64Bit()) { - return String.format("%sjni-aix64", name); - } else if (isSolaris()) { - final String arch = is64Bit() ? "64" : "32"; - return String.format("%sjni-solaris%s", name, arch); - } else if (isWindows() && is64Bit()) { - return String.format("%sjni-win64", name); - } else if (isOpenBSD()) { - return String.format("%sjni-openbsd%s", name, is64Bit() ? "64" : "32"); - } - - throw new UnsupportedOperationException(String.format("Cannot determine JNI library name for ARCH='%s' OS='%s' name='%s'", ARCH, OS, name)); - } - - public static /*@Nullable*/ String getFallbackJniLibraryName(final String name) { - if (isMac() && is64Bit()) { - return String.format("%sjni-osx", name); - } - return null; - } - - public static String getJniLibraryFileName(final String name) { - return appendLibOsSuffix("lib" + getJniLibraryName(name), false); - } - - public static /*@Nullable*/ String getFallbackJniLibraryFileName(final String name) { - final String fallbackJniLibraryName = getFallbackJniLibraryName(name); - if (fallbackJniLibraryName == null) { - return null; - } - return appendLibOsSuffix("lib" + fallbackJniLibraryName, false); - } - - private static String appendLibOsSuffix(final String libraryFileName, final boolean shared) { - if (isUnix() || isAix() || isSolaris() || isFreeBSD() || isOpenBSD()) { - return libraryFileName + ".so"; - } else if (isMac()) { - return libraryFileName + (shared ? ".dylib" : ".jnilib"); - } else if (isWindows()) { - return libraryFileName + ".dll"; - } - throw new UnsupportedOperationException(); - } - - public static String getJniLibraryExtension() { - if (isWindows()) { - return ".dll"; - } - return (isMac()) ? ".jnilib" : ".so"; - } -} diff --git a/java/src/main/java/org/rocksdb/util/IntComparator.java b/java/src/main/java/org/rocksdb/util/IntComparator.java deleted file mode 100644 index cc096cd14..000000000 --- a/java/src/main/java/org/rocksdb/util/IntComparator.java +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb.util; - -import org.rocksdb.AbstractComparator; -import org.rocksdb.ComparatorOptions; - -import java.nio.ByteBuffer; - -/** - * This is a Java implementation of a Comparator for Java int - * keys. - * - * This comparator assumes keys are (at least) four bytes, so - * the caller must guarantee that in accessing other APIs in - * combination with this comparator. - * - * The performance of Comparators implemented in Java is always - * less than their C++ counterparts due to the bridging overhead, - * as such you likely don't want to use this apart from benchmarking - * or testing. - */ -public final class IntComparator extends AbstractComparator { - - public IntComparator(final ComparatorOptions copt) { - super(copt); - } - - @Override - public String name() { - return "rocksdb.java.IntComparator"; - } - - @Override - public int compare(final ByteBuffer a, final ByteBuffer b) { - return compareIntKeys(a, b); - } - - /** - * Compares integer keys - * so that they are in ascending order - * - * @param a 4-bytes representing an integer key - * @param b 4-bytes representing an integer key - * - * @return negative if a < b, 0 if a == b, positive otherwise - */ - private final int compareIntKeys(final ByteBuffer a, final ByteBuffer b) { - final int iA = a.getInt(); - final int iB = b.getInt(); - - // protect against int key calculation overflow - final long diff = (long)iA - iB; - final int result; - if (diff < Integer.MIN_VALUE) { - result = Integer.MIN_VALUE; - } else if(diff > Integer.MAX_VALUE) { - result = Integer.MAX_VALUE; - } else { - result = (int)diff; - } - return result; - } -} diff --git a/java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java b/java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java deleted file mode 100644 index 4c06f80aa..000000000 --- a/java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb.util; - -import org.rocksdb.AbstractComparator; -import org.rocksdb.BuiltinComparator; -import org.rocksdb.ComparatorOptions; -import org.rocksdb.Slice; - -import java.nio.ByteBuffer; - -/** - * This is a Java Native implementation of the C++ - * equivalent ReverseBytewiseComparatorImpl using {@link Slice} - * - * The performance of Comparators implemented in Java is always - * less than their C++ counterparts due to the bridging overhead, - * as such you likely don't want to use this apart from benchmarking - * and you most likely instead wanted - * {@link BuiltinComparator#REVERSE_BYTEWISE_COMPARATOR} - */ -public final class ReverseBytewiseComparator extends AbstractComparator { - - public ReverseBytewiseComparator(final ComparatorOptions copt) { - super(copt); - } - - @Override - public String name() { - return "rocksdb.java.ReverseBytewiseComparator"; - } - - @Override - public int compare(final ByteBuffer a, final ByteBuffer b) { - return -BytewiseComparator._compare(a, b); - } - - @Override - public void findShortestSeparator(final ByteBuffer start, - final ByteBuffer limit) { - // Find length of common prefix - final int minLength = Math.min(start.remaining(), limit.remaining()); - int diffIndex = 0; - while (diffIndex < minLength && - start.get(diffIndex) == limit.get(diffIndex)) { - diffIndex++; - } - - assert(diffIndex <= minLength); - if (diffIndex == minLength) { - // Do not shorten if one string is a prefix of the other - // - // We could handle cases like: - // V - // A A 2 X Y - // A A 2 - // in a similar way as BytewiseComparator::FindShortestSeparator(). - // We keep it simple by not implementing it. We can come back to it - // later when needed. - } else { - final int startByte = start.get(diffIndex) & 0xff; - final int limitByte = limit.get(diffIndex) & 0xff; - if (startByte > limitByte && diffIndex < start.remaining() - 1) { - // Case like - // V - // A A 3 A A - // A A 1 B B - // - // or - // v - // A A 2 A A - // A A 1 B B - // In this case "AA2" will be good. -//#ifndef NDEBUG -// std::string old_start = *start; -//#endif - start.limit(diffIndex + 1); -//#ifndef NDEBUG -// assert(old_start >= *start); -//#endif - assert(BytewiseComparator._compare(start.duplicate(), limit.duplicate()) > 0); - } - } - } -} diff --git a/java/src/main/java/org/rocksdb/util/SizeUnit.java b/java/src/main/java/org/rocksdb/util/SizeUnit.java deleted file mode 100644 index 0f717e8d4..000000000 --- a/java/src/main/java/org/rocksdb/util/SizeUnit.java +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb.util; - -public class SizeUnit { - public static final long KB = 1024L; - public static final long MB = KB * KB; - public static final long GB = KB * MB; - public static final long TB = KB * GB; - public static final long PB = KB * TB; - - private SizeUnit() {} -} diff --git a/java/src/test/java/org/rocksdb/AbstractTransactionTest.java b/java/src/test/java/org/rocksdb/AbstractTransactionTest.java deleted file mode 100644 index 46685f9fd..000000000 --- a/java/src/test/java/org/rocksdb/AbstractTransactionTest.java +++ /dev/null @@ -1,965 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Random; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.fail; - -/** - * Base class of {@link TransactionTest} and {@link OptimisticTransactionTest} - */ -public abstract class AbstractTransactionTest { - - protected final static byte[] TXN_TEST_COLUMN_FAMILY = "txn_test_cf" - .getBytes(); - - protected static final Random rand = PlatformRandomHelper. - getPlatformSpecificRandomFactory(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - public abstract DBContainer startDb() - throws RocksDBException; - - @Test - public void setSnapshot() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - txn.setSnapshot(); - } - } - - @Test - public void setSnapshotOnNextOperation() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - txn.setSnapshotOnNextOperation(); - txn.put("key1".getBytes(), "value1".getBytes()); - } - } - - @Test - public void setSnapshotOnNextOperation_transactionNotifier() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - - try(final TestTransactionNotifier notifier = new TestTransactionNotifier()) { - txn.setSnapshotOnNextOperation(notifier); - txn.put("key1".getBytes(), "value1".getBytes()); - - txn.setSnapshotOnNextOperation(notifier); - txn.put("key2".getBytes(), "value2".getBytes()); - - assertThat(notifier.getCreatedSnapshots().size()).isEqualTo(2); - } - } - } - - @Test - public void getSnapshot() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - txn.setSnapshot(); - final Snapshot snapshot = txn.getSnapshot(); - assertThat(snapshot.isOwningHandle()).isFalse(); - } - } - - @Test - public void getSnapshot_null() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - final Snapshot snapshot = txn.getSnapshot(); - assertThat(snapshot).isNull(); - } - } - - @Test - public void clearSnapshot() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - txn.setSnapshot(); - txn.clearSnapshot(); - } - } - - @Test - public void clearSnapshot_none() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - txn.clearSnapshot(); - } - } - - @Test - public void commit() throws RocksDBException { - final byte k1[] = "rollback-key1".getBytes(UTF_8); - final byte v1[] = "rollback-value1".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb()) { - try(final Transaction txn = dbContainer.beginTransaction()) { - txn.put(k1, v1); - txn.commit(); - } - - try(final ReadOptions readOptions = new ReadOptions(); - final Transaction txn2 = dbContainer.beginTransaction()) { - assertThat(txn2.get(readOptions, k1)).isEqualTo(v1); - } - } - } - - @Test - public void rollback() throws RocksDBException { - final byte k1[] = "rollback-key1".getBytes(UTF_8); - final byte v1[] = "rollback-value1".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb()) { - try(final Transaction txn = dbContainer.beginTransaction()) { - txn.put(k1, v1); - txn.rollback(); - } - - try(final ReadOptions readOptions = new ReadOptions(); - final Transaction txn2 = dbContainer.beginTransaction()) { - assertThat(txn2.get(readOptions, k1)).isNull(); - } - } - } - - @Test - public void savePoint() throws RocksDBException { - final byte k1[] = "savePoint-key1".getBytes(UTF_8); - final byte v1[] = "savePoint-value1".getBytes(UTF_8); - final byte k2[] = "savePoint-key2".getBytes(UTF_8); - final byte v2[] = "savePoint-value2".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - - - try(final Transaction txn = dbContainer.beginTransaction()) { - txn.put(k1, v1); - - assertThat(txn.get(readOptions, k1)).isEqualTo(v1); - - txn.setSavePoint(); - - txn.put(k2, v2); - - assertThat(txn.get(readOptions, k1)).isEqualTo(v1); - assertThat(txn.get(readOptions, k2)).isEqualTo(v2); - - txn.rollbackToSavePoint(); - - assertThat(txn.get(readOptions, k1)).isEqualTo(v1); - assertThat(txn.get(readOptions, k2)).isNull(); - - txn.commit(); - } - - try(final Transaction txn2 = dbContainer.beginTransaction()) { - assertThat(txn2.get(readOptions, k1)).isEqualTo(v1); - assertThat(txn2.get(readOptions, k2)).isNull(); - } - } - } - - @Test - public void getPut_cf() throws RocksDBException { - final byte k1[] = "key1".getBytes(UTF_8); - final byte v1[] = "value1".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - assertThat(txn.get(testCf, readOptions, k1)).isNull(); - txn.put(testCf, k1, v1); - assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); - } - } - - @Test - public void getPut() throws RocksDBException { - final byte k1[] = "key1".getBytes(UTF_8); - final byte v1[] = "value1".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.get(readOptions, k1)).isNull(); - txn.put(k1, v1); - assertThat(txn.get(readOptions, k1)).isEqualTo(v1); - } - } - - @Test - public void multiGetPut_cf() throws RocksDBException { - final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; - final byte[][] values = new byte[][] {"value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - final List cfList = Arrays.asList(testCf, testCf); - - assertThat(txn.multiGet(readOptions, cfList, keys)).isEqualTo(new byte[][] { null, null }); - - txn.put(testCf, keys[0], values[0]); - txn.put(testCf, keys[1], values[1]); - assertThat(txn.multiGet(readOptions, cfList, keys)).isEqualTo(values); - } - } - - @Test - public void multiGetPutAsList_cf() throws RocksDBException { - final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; - final byte[][] values = new byte[][] {"value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; - - try (final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - final List cfList = Arrays.asList(testCf, testCf); - - assertThat(txn.multiGetAsList(readOptions, cfList, Arrays.asList(keys))) - .containsExactly(null, null); - - txn.put(testCf, keys[0], values[0]); - txn.put(testCf, keys[1], values[1]); - assertThat(txn.multiGetAsList(readOptions, cfList, Arrays.asList(keys))) - .containsExactly(values); - } - } - - @Test - public void multiGetPut() throws RocksDBException { - final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; - final byte[][] values = new byte[][] {"value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - - assertThat(txn.multiGet(readOptions, keys)).isEqualTo(new byte[][] { null, null }); - - txn.put(keys[0], values[0]); - txn.put(keys[1], values[1]); - assertThat(txn.multiGet(readOptions, keys)).isEqualTo(values); - } - } - - @Test - public void multiGetPutAsList() throws RocksDBException { - final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; - final byte[][] values = new byte[][] {"value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; - - try (final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.multiGetAsList(readOptions, Arrays.asList(keys))).containsExactly(null, null); - - txn.put(keys[0], values[0]); - txn.put(keys[1], values[1]); - assertThat(txn.multiGetAsList(readOptions, Arrays.asList(keys))).containsExactly(values); - } - } - - @Test - public void getForUpdate_cf() throws RocksDBException { - final byte k1[] = "key1".getBytes(UTF_8); - final byte v1[] = "value1".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - assertThat(txn.getForUpdate(readOptions, testCf, k1, true)).isNull(); - txn.put(testCf, k1, v1); - assertThat(txn.getForUpdate(readOptions, testCf, k1, true)).isEqualTo(v1); - } - } - - @Test - public void getForUpdate() throws RocksDBException { - final byte k1[] = "key1".getBytes(UTF_8); - final byte v1[] = "value1".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.getForUpdate(readOptions, k1, true)).isNull(); - txn.put(k1, v1); - assertThat(txn.getForUpdate(readOptions, k1, true)).isEqualTo(v1); - } - } - - @Test - public void multiGetForUpdate_cf() throws RocksDBException { - final byte keys[][] = new byte[][] { - "key1".getBytes(UTF_8), - "key2".getBytes(UTF_8)}; - final byte values[][] = new byte[][] { - "value1".getBytes(UTF_8), - "value2".getBytes(UTF_8)}; - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - final List cfList = Arrays.asList(testCf, testCf); - - assertThat(txn.multiGetForUpdate(readOptions, cfList, keys)) - .isEqualTo(new byte[][] { null, null }); - - txn.put(testCf, keys[0], values[0]); - txn.put(testCf, keys[1], values[1]); - assertThat(txn.multiGetForUpdate(readOptions, cfList, keys)) - .isEqualTo(values); - } - } - - @Test - public void multiGetForUpdate() throws RocksDBException { - final byte keys[][] = new byte[][]{ - "key1".getBytes(UTF_8), - "key2".getBytes(UTF_8)}; - final byte values[][] = new byte[][]{ - "value1".getBytes(UTF_8), - "value2".getBytes(UTF_8)}; - - try (final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.multiGetForUpdate(readOptions, keys)).isEqualTo(new byte[][]{null, null}); - - txn.put(keys[0], values[0]); - txn.put(keys[1], values[1]); - assertThat(txn.multiGetForUpdate(readOptions, keys)).isEqualTo(values); - } - } - - @Test - public void getIterator() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - - txn.put(k1, v1); - - try(final RocksIterator iterator = txn.getIterator(readOptions)) { - iterator.seek(k1); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo(k1); - assertThat(iterator.value()).isEqualTo(v1); - } - } - } - - @Test - public void getIterator_cf() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - - txn.put(testCf, k1, v1); - - try(final RocksIterator iterator = txn.getIterator(readOptions, testCf)) { - iterator.seek(k1); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo(k1); - assertThat(iterator.value()).isEqualTo(v1); - } - } - } - - @Test - public void merge_cf() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - txn.merge(testCf, k1, v1); - } - } - - @Test - public void merge() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - txn.merge(k1, v1); - } - } - - - @Test - public void delete_cf() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - txn.put(testCf, k1, v1); - assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); - - txn.delete(testCf, k1); - assertThat(txn.get(testCf, readOptions, k1)).isNull(); - } - } - - @Test - public void delete() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - txn.put(k1, v1); - assertThat(txn.get(readOptions, k1)).isEqualTo(v1); - - txn.delete(k1); - assertThat(txn.get(readOptions, k1)).isNull(); - } - } - - @Test - public void delete_parts_cf() throws RocksDBException { - final byte keyParts[][] = new byte[][] { - "ke".getBytes(UTF_8), - "y1".getBytes(UTF_8)}; - final byte valueParts[][] = new byte[][] { - "val".getBytes(UTF_8), - "ue1".getBytes(UTF_8)}; - final byte[] key = concat(keyParts); - final byte[] value = concat(valueParts); - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - txn.put(testCf, keyParts, valueParts); - assertThat(txn.get(testCf, readOptions, key)).isEqualTo(value); - - txn.delete(testCf, keyParts); - - assertThat(txn.get(testCf, readOptions, key)) - .isNull(); - } - } - - @Test - public void delete_parts() throws RocksDBException { - final byte keyParts[][] = new byte[][] { - "ke".getBytes(UTF_8), - "y1".getBytes(UTF_8)}; - final byte valueParts[][] = new byte[][] { - "val".getBytes(UTF_8), - "ue1".getBytes(UTF_8)}; - final byte[] key = concat(keyParts); - final byte[] value = concat(valueParts); - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - - txn.put(keyParts, valueParts); - - assertThat(txn.get(readOptions, key)).isEqualTo(value); - - txn.delete(keyParts); - - assertThat(txn.get(readOptions, key)).isNull(); - } - } - - @Test - public void getPutUntracked_cf() throws RocksDBException { - final byte k1[] = "key1".getBytes(UTF_8); - final byte v1[] = "value1".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - assertThat(txn.get(testCf, readOptions, k1)).isNull(); - txn.putUntracked(testCf, k1, v1); - assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); - } - } - - @Test - public void getPutUntracked() throws RocksDBException { - final byte k1[] = "key1".getBytes(UTF_8); - final byte v1[] = "value1".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.get(readOptions, k1)).isNull(); - txn.putUntracked(k1, v1); - assertThat(txn.get(readOptions, k1)).isEqualTo(v1); - } - } - - @Deprecated - @Test - public void multiGetPutUntracked_cf() throws RocksDBException { - final byte keys[][] = new byte[][] { - "key1".getBytes(UTF_8), - "key2".getBytes(UTF_8)}; - final byte values[][] = new byte[][] { - "value1".getBytes(UTF_8), - "value2".getBytes(UTF_8)}; - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - - final List cfList = Arrays.asList(testCf, testCf); - - assertThat(txn.multiGet(readOptions, cfList, keys)).isEqualTo(new byte[][] { null, null }); - txn.putUntracked(testCf, keys[0], values[0]); - txn.putUntracked(testCf, keys[1], values[1]); - assertThat(txn.multiGet(readOptions, cfList, keys)).isEqualTo(values); - } - } - - @Test - public void multiGetPutUntrackedAsList_cf() throws RocksDBException { - final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; - final byte[][] values = new byte[][] {"value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; - - try (final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - - final List cfList = Arrays.asList(testCf, testCf); - - assertThat(txn.multiGetAsList(readOptions, cfList, Arrays.asList(keys))) - .containsExactly(null, null); - txn.putUntracked(testCf, keys[0], values[0]); - txn.putUntracked(testCf, keys[1], values[1]); - assertThat(txn.multiGetAsList(readOptions, cfList, Arrays.asList(keys))) - .containsExactly(values); - } - } - - @Deprecated - @Test - public void multiGetPutUntracked() throws RocksDBException { - final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; - final byte[][] values = new byte[][] {"value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - - assertThat(txn.multiGet(readOptions, keys)).isEqualTo(new byte[][] { null, null }); - txn.putUntracked(keys[0], values[0]); - txn.putUntracked(keys[1], values[1]); - assertThat(txn.multiGet(readOptions, keys)).isEqualTo(values); - } - } - - @Test - public void multiGetPutAsListUntracked() throws RocksDBException { - final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; - final byte[][] values = new byte[][] {"value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; - - try (final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.multiGetAsList(readOptions, Arrays.asList(keys))).containsExactly(null, null); - txn.putUntracked(keys[0], values[0]); - txn.putUntracked(keys[1], values[1]); - assertThat(txn.multiGetAsList(readOptions, Arrays.asList(keys))).containsExactly(values); - } - } - - @Test - public void mergeUntracked_cf() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - txn.mergeUntracked(testCf, k1, v1); - } - } - - @Test - public void mergeUntracked() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - txn.mergeUntracked(k1, v1); - } - } - - @Test - public void deleteUntracked_cf() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - txn.put(testCf, k1, v1); - assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); - - txn.deleteUntracked(testCf, k1); - assertThat(txn.get(testCf, readOptions, k1)).isNull(); - } - } - - @Test - public void deleteUntracked() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - txn.put(k1, v1); - assertThat(txn.get(readOptions, k1)).isEqualTo(v1); - - txn.deleteUntracked(k1); - assertThat(txn.get(readOptions, k1)).isNull(); - } - } - - @Test - public void deleteUntracked_parts_cf() throws RocksDBException { - final byte keyParts[][] = new byte[][] { - "ke".getBytes(UTF_8), - "y1".getBytes(UTF_8)}; - final byte valueParts[][] = new byte[][] { - "val".getBytes(UTF_8), - "ue1".getBytes(UTF_8)}; - final byte[] key = concat(keyParts); - final byte[] value = concat(valueParts); - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - txn.put(testCf, keyParts, valueParts); - assertThat(txn.get(testCf, readOptions, key)).isEqualTo(value); - - txn.deleteUntracked(testCf, keyParts); - assertThat(txn.get(testCf, readOptions, key)).isNull(); - } - } - - @Test - public void deleteUntracked_parts() throws RocksDBException { - final byte keyParts[][] = new byte[][] { - "ke".getBytes(UTF_8), - "y1".getBytes(UTF_8)}; - final byte valueParts[][] = new byte[][] { - "val".getBytes(UTF_8), - "ue1".getBytes(UTF_8)}; - final byte[] key = concat(keyParts); - final byte[] value = concat(valueParts); - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - txn.put(keyParts, valueParts); - assertThat(txn.get(readOptions, key)).isEqualTo(value); - - txn.deleteUntracked(keyParts); - assertThat(txn.get(readOptions, key)).isNull(); - } - } - - @Test - public void putLogData() throws RocksDBException { - final byte[] blob = "blobby".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - txn.putLogData(blob); - } - } - - @Test - public void enabledDisableIndexing() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - txn.disableIndexing(); - txn.enableIndexing(); - txn.disableIndexing(); - txn.enableIndexing(); - } - } - - @Test - public void numKeys() throws RocksDBException { - final byte k1[] = "key1".getBytes(UTF_8); - final byte v1[] = "value1".getBytes(UTF_8); - final byte k2[] = "key2".getBytes(UTF_8); - final byte v2[] = "value2".getBytes(UTF_8); - final byte k3[] = "key3".getBytes(UTF_8); - final byte v3[] = "value3".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - txn.put(k1, v1); - txn.put(testCf, k2, v2); - txn.merge(k3, v3); - txn.delete(testCf, k2); - - assertThat(txn.getNumKeys()).isEqualTo(3); - assertThat(txn.getNumPuts()).isEqualTo(2); - assertThat(txn.getNumMerges()).isEqualTo(1); - assertThat(txn.getNumDeletes()).isEqualTo(1); - } - } - - @Test - public void elapsedTime() throws RocksDBException, InterruptedException { - final long preStartTxnTime = System.currentTimeMillis(); - try (final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - Thread.sleep(2); - - final long txnElapsedTime = txn.getElapsedTime(); - assertThat(txnElapsedTime).isLessThan(System.currentTimeMillis() - preStartTxnTime); - assertThat(txnElapsedTime).isGreaterThan(0); - } - } - - @Test - public void getWriteBatch() throws RocksDBException { - final byte k1[] = "key1".getBytes(UTF_8); - final byte v1[] = "value1".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - - txn.put(k1, v1); - - final WriteBatchWithIndex writeBatch = txn.getWriteBatch(); - assertThat(writeBatch).isNotNull(); - assertThat(writeBatch.isOwningHandle()).isFalse(); - assertThat(writeBatch.count()).isEqualTo(1); - } - } - - @Test - public void setLockTimeout() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - txn.setLockTimeout(1000); - } - } - - @Test - public void writeOptions() throws RocksDBException { - final byte k1[] = "key1".getBytes(UTF_8); - final byte v1[] = "value1".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final WriteOptions writeOptions = new WriteOptions() - .setDisableWAL(true) - .setSync(true); - final Transaction txn = dbContainer.beginTransaction(writeOptions)) { - - txn.put(k1, v1); - - WriteOptions txnWriteOptions = txn.getWriteOptions(); - assertThat(txnWriteOptions).isNotNull(); - assertThat(txnWriteOptions.isOwningHandle()).isFalse(); - assertThat(txnWriteOptions).isNotSameAs(writeOptions); - assertThat(txnWriteOptions.disableWAL()).isTrue(); - assertThat(txnWriteOptions.sync()).isTrue(); - - txn.setWriteOptions(txnWriteOptions.setSync(false)); - txnWriteOptions = txn.getWriteOptions(); - assertThat(txnWriteOptions).isNotNull(); - assertThat(txnWriteOptions.isOwningHandle()).isFalse(); - assertThat(txnWriteOptions).isNotSameAs(writeOptions); - assertThat(txnWriteOptions.disableWAL()).isTrue(); - assertThat(txnWriteOptions.sync()).isFalse(); - } - } - - @Test - public void undoGetForUpdate_cf() throws RocksDBException { - final byte k1[] = "key1".getBytes(UTF_8); - final byte v1[] = "value1".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - assertThat(txn.getForUpdate(readOptions, testCf, k1, true)).isNull(); - txn.put(testCf, k1, v1); - assertThat(txn.getForUpdate(readOptions, testCf, k1, true)).isEqualTo(v1); - txn.undoGetForUpdate(testCf, k1); - } - } - - @Test - public void undoGetForUpdate() throws RocksDBException { - final byte k1[] = "key1".getBytes(UTF_8); - final byte v1[] = "value1".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.getForUpdate(readOptions, k1, true)).isNull(); - txn.put(k1, v1); - assertThat(txn.getForUpdate(readOptions, k1, true)).isEqualTo(v1); - txn.undoGetForUpdate(k1); - } - } - - @Test - public void rebuildFromWriteBatch() throws RocksDBException { - final byte k1[] = "key1".getBytes(UTF_8); - final byte v1[] = "value1".getBytes(UTF_8); - final byte k2[] = "key2".getBytes(UTF_8); - final byte v2[] = "value2".getBytes(UTF_8); - final byte k3[] = "key3".getBytes(UTF_8); - final byte v3[] = "value3".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions(); - final Transaction txn = dbContainer.beginTransaction()) { - - txn.put(k1, v1); - - assertThat(txn.get(readOptions, k1)).isEqualTo(v1); - assertThat(txn.getNumKeys()).isEqualTo(1); - - try(final WriteBatch writeBatch = new WriteBatch()) { - writeBatch.put(k2, v2); - writeBatch.put(k3, v3); - txn.rebuildFromWriteBatch(writeBatch); - - assertThat(txn.get(readOptions, k1)).isEqualTo(v1); - assertThat(txn.get(readOptions, k2)).isEqualTo(v2); - assertThat(txn.get(readOptions, k3)).isEqualTo(v3); - assertThat(txn.getNumKeys()).isEqualTo(3); - } - } - } - - @Test - public void getCommitTimeWriteBatch() throws RocksDBException { - final byte k1[] = "key1".getBytes(UTF_8); - final byte v1[] = "value1".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - - txn.put(k1, v1); - final WriteBatch writeBatch = txn.getCommitTimeWriteBatch(); - - assertThat(writeBatch).isNotNull(); - assertThat(writeBatch.isOwningHandle()).isFalse(); - assertThat(writeBatch.count()).isEqualTo(0); - } - } - - @Test - public void logNumber() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.getLogNumber()).isEqualTo(0); - final long logNumber = rand.nextLong(); - txn.setLogNumber(logNumber); - assertThat(txn.getLogNumber()).isEqualTo(logNumber); - } - } - - private static byte[] concat(final byte[][] bufs) { - int resultLength = 0; - for(final byte[] buf : bufs) { - resultLength += buf.length; - } - - final byte[] result = new byte[resultLength]; - int resultOffset = 0; - for(final byte[] buf : bufs) { - final int srcLength = buf.length; - System.arraycopy(buf, 0, result, resultOffset, srcLength); - resultOffset += srcLength; - } - - return result; - } - - private static class TestTransactionNotifier - extends AbstractTransactionNotifier { - private final List createdSnapshots = new ArrayList<>(); - - @Override - public void snapshotCreated(final Snapshot newSnapshot) { - createdSnapshots.add(newSnapshot); - } - - public List getCreatedSnapshots() { - return createdSnapshots; - } - } - - protected static abstract class DBContainer - implements AutoCloseable { - protected final WriteOptions writeOptions; - protected final List columnFamilyHandles; - protected final ColumnFamilyOptions columnFamilyOptions; - protected final DBOptions options; - - public DBContainer(final WriteOptions writeOptions, - final List columnFamilyHandles, - final ColumnFamilyOptions columnFamilyOptions, - final DBOptions options) { - this.writeOptions = writeOptions; - this.columnFamilyHandles = columnFamilyHandles; - this.columnFamilyOptions = columnFamilyOptions; - this.options = options; - } - - public abstract Transaction beginTransaction(); - - public abstract Transaction beginTransaction( - final WriteOptions writeOptions); - - public ColumnFamilyHandle getTestColumnFamily() { - return columnFamilyHandles.get(1); - } - - @Override - public abstract void close(); - } -} diff --git a/java/src/test/java/org/rocksdb/BackupEngineOptionsTest.java b/java/src/test/java/org/rocksdb/BackupEngineOptionsTest.java deleted file mode 100644 index 794bf04fb..000000000 --- a/java/src/test/java/org/rocksdb/BackupEngineOptionsTest.java +++ /dev/null @@ -1,300 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.util.Random; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -public class BackupEngineOptionsTest { - private final static String ARBITRARY_PATH = - System.getProperty("java.io.tmpdir"); - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public ExpectedException exception = ExpectedException.none(); - - public static final Random rand = PlatformRandomHelper. - getPlatformSpecificRandomFactory(); - - @Test - public void backupDir() { - try (final BackupEngineOptions backupEngineOptions = new BackupEngineOptions(ARBITRARY_PATH)) { - assertThat(backupEngineOptions.backupDir()).isEqualTo(ARBITRARY_PATH); - } - } - - @Test - public void env() { - try (final BackupEngineOptions backupEngineOptions = new BackupEngineOptions(ARBITRARY_PATH)) { - assertThat(backupEngineOptions.backupEnv()).isNull(); - - try(final Env env = new RocksMemEnv(Env.getDefault())) { - backupEngineOptions.setBackupEnv(env); - assertThat(backupEngineOptions.backupEnv()).isEqualTo(env); - } - } - } - - @Test - public void shareTableFiles() { - try (final BackupEngineOptions backupEngineOptions = new BackupEngineOptions(ARBITRARY_PATH)) { - final boolean value = rand.nextBoolean(); - backupEngineOptions.setShareTableFiles(value); - assertThat(backupEngineOptions.shareTableFiles()).isEqualTo(value); - } - } - - @Test - public void infoLog() { - try (final BackupEngineOptions backupEngineOptions = new BackupEngineOptions(ARBITRARY_PATH)) { - assertThat(backupEngineOptions.infoLog()).isNull(); - - try(final Options options = new Options(); - final Logger logger = new Logger(options){ - @Override - protected void log(InfoLogLevel infoLogLevel, String logMsg) { - - } - }) { - backupEngineOptions.setInfoLog(logger); - assertThat(backupEngineOptions.infoLog()).isEqualTo(logger); - } - } - } - - @Test - public void sync() { - try (final BackupEngineOptions backupEngineOptions = new BackupEngineOptions(ARBITRARY_PATH)) { - final boolean value = rand.nextBoolean(); - backupEngineOptions.setSync(value); - assertThat(backupEngineOptions.sync()).isEqualTo(value); - } - } - - @Test - public void destroyOldData() { - try (final BackupEngineOptions backupEngineOptions = new BackupEngineOptions(ARBITRARY_PATH);) { - final boolean value = rand.nextBoolean(); - backupEngineOptions.setDestroyOldData(value); - assertThat(backupEngineOptions.destroyOldData()).isEqualTo(value); - } - } - - @Test - public void backupLogFiles() { - try (final BackupEngineOptions backupEngineOptions = new BackupEngineOptions(ARBITRARY_PATH)) { - final boolean value = rand.nextBoolean(); - backupEngineOptions.setBackupLogFiles(value); - assertThat(backupEngineOptions.backupLogFiles()).isEqualTo(value); - } - } - - @Test - public void backupRateLimit() { - try (final BackupEngineOptions backupEngineOptions = new BackupEngineOptions(ARBITRARY_PATH)) { - final long value = Math.abs(rand.nextLong()); - backupEngineOptions.setBackupRateLimit(value); - assertThat(backupEngineOptions.backupRateLimit()).isEqualTo(value); - // negative will be mapped to 0 - backupEngineOptions.setBackupRateLimit(-1); - assertThat(backupEngineOptions.backupRateLimit()).isEqualTo(0); - } - } - - @Test - public void backupRateLimiter() { - try (final BackupEngineOptions backupEngineOptions = new BackupEngineOptions(ARBITRARY_PATH)) { - assertThat(backupEngineOptions.backupEnv()).isNull(); - - try(final RateLimiter backupRateLimiter = - new RateLimiter(999)) { - backupEngineOptions.setBackupRateLimiter(backupRateLimiter); - assertThat(backupEngineOptions.backupRateLimiter()).isEqualTo(backupRateLimiter); - } - } - } - - @Test - public void restoreRateLimit() { - try (final BackupEngineOptions backupEngineOptions = new BackupEngineOptions(ARBITRARY_PATH)) { - final long value = Math.abs(rand.nextLong()); - backupEngineOptions.setRestoreRateLimit(value); - assertThat(backupEngineOptions.restoreRateLimit()).isEqualTo(value); - // negative will be mapped to 0 - backupEngineOptions.setRestoreRateLimit(-1); - assertThat(backupEngineOptions.restoreRateLimit()).isEqualTo(0); - } - } - - @Test - public void restoreRateLimiter() { - try (final BackupEngineOptions backupEngineOptions = new BackupEngineOptions(ARBITRARY_PATH)) { - assertThat(backupEngineOptions.backupEnv()).isNull(); - - try(final RateLimiter restoreRateLimiter = - new RateLimiter(911)) { - backupEngineOptions.setRestoreRateLimiter(restoreRateLimiter); - assertThat(backupEngineOptions.restoreRateLimiter()).isEqualTo(restoreRateLimiter); - } - } - } - - @Test - public void shareFilesWithChecksum() { - try (final BackupEngineOptions backupEngineOptions = new BackupEngineOptions(ARBITRARY_PATH)) { - boolean value = rand.nextBoolean(); - backupEngineOptions.setShareFilesWithChecksum(value); - assertThat(backupEngineOptions.shareFilesWithChecksum()).isEqualTo(value); - } - } - - @Test - public void maxBackgroundOperations() { - try (final BackupEngineOptions backupEngineOptions = new BackupEngineOptions(ARBITRARY_PATH)) { - final int value = rand.nextInt(); - backupEngineOptions.setMaxBackgroundOperations(value); - assertThat(backupEngineOptions.maxBackgroundOperations()).isEqualTo(value); - } - } - - @Test - public void callbackTriggerIntervalSize() { - try (final BackupEngineOptions backupEngineOptions = new BackupEngineOptions(ARBITRARY_PATH)) { - final long value = rand.nextLong(); - backupEngineOptions.setCallbackTriggerIntervalSize(value); - assertThat(backupEngineOptions.callbackTriggerIntervalSize()).isEqualTo(value); - } - } - - @Test - public void failBackupDirIsNull() { - exception.expect(IllegalArgumentException.class); - try (final BackupEngineOptions opts = new BackupEngineOptions(null)) { - //no-op - } - } - - @Test - public void failBackupDirIfDisposed() { - try (final BackupEngineOptions options = setupUninitializedBackupEngineOptions(exception)) { - options.backupDir(); - } - } - - @Test - public void failSetShareTableFilesIfDisposed() { - try (final BackupEngineOptions options = setupUninitializedBackupEngineOptions(exception)) { - options.setShareTableFiles(true); - } - } - - @Test - public void failShareTableFilesIfDisposed() { - try (BackupEngineOptions options = setupUninitializedBackupEngineOptions(exception)) { - options.shareTableFiles(); - } - } - - @Test - public void failSetSyncIfDisposed() { - try (final BackupEngineOptions options = setupUninitializedBackupEngineOptions(exception)) { - options.setSync(true); - } - } - - @Test - public void failSyncIfDisposed() { - try (final BackupEngineOptions options = setupUninitializedBackupEngineOptions(exception)) { - options.sync(); - } - } - - @Test - public void failSetDestroyOldDataIfDisposed() { - try (final BackupEngineOptions options = setupUninitializedBackupEngineOptions(exception)) { - options.setDestroyOldData(true); - } - } - - @Test - public void failDestroyOldDataIfDisposed() { - try (final BackupEngineOptions options = setupUninitializedBackupEngineOptions(exception)) { - options.destroyOldData(); - } - } - - @Test - public void failSetBackupLogFilesIfDisposed() { - try (final BackupEngineOptions options = setupUninitializedBackupEngineOptions(exception)) { - options.setBackupLogFiles(true); - } - } - - @Test - public void failBackupLogFilesIfDisposed() { - try (final BackupEngineOptions options = setupUninitializedBackupEngineOptions(exception)) { - options.backupLogFiles(); - } - } - - @Test - public void failSetBackupRateLimitIfDisposed() { - try (final BackupEngineOptions options = setupUninitializedBackupEngineOptions(exception)) { - options.setBackupRateLimit(1); - } - } - - @Test - public void failBackupRateLimitIfDisposed() { - try (final BackupEngineOptions options = setupUninitializedBackupEngineOptions(exception)) { - options.backupRateLimit(); - } - } - - @Test - public void failSetRestoreRateLimitIfDisposed() { - try (final BackupEngineOptions options = setupUninitializedBackupEngineOptions(exception)) { - options.setRestoreRateLimit(1); - } - } - - @Test - public void failRestoreRateLimitIfDisposed() { - try (final BackupEngineOptions options = setupUninitializedBackupEngineOptions(exception)) { - options.restoreRateLimit(); - } - } - - @Test - public void failSetShareFilesWithChecksumIfDisposed() { - try (final BackupEngineOptions options = setupUninitializedBackupEngineOptions(exception)) { - options.setShareFilesWithChecksum(true); - } - } - - @Test - public void failShareFilesWithChecksumIfDisposed() { - try (final BackupEngineOptions options = setupUninitializedBackupEngineOptions(exception)) { - options.shareFilesWithChecksum(); - } - } - - private BackupEngineOptions setupUninitializedBackupEngineOptions(ExpectedException exception) { - final BackupEngineOptions backupEngineOptions = new BackupEngineOptions(ARBITRARY_PATH); - backupEngineOptions.close(); - exception.expect(AssertionError.class); - return backupEngineOptions; - } -} diff --git a/java/src/test/java/org/rocksdb/BackupEngineTest.java b/java/src/test/java/org/rocksdb/BackupEngineTest.java deleted file mode 100644 index 67145f846..000000000 --- a/java/src/test/java/org/rocksdb/BackupEngineTest.java +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.util.List; -import java.util.concurrent.ThreadLocalRandom; - -import static org.assertj.core.api.Assertions.assertThat; - -public class BackupEngineTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Rule - public TemporaryFolder backupFolder = new TemporaryFolder(); - - @Test - public void backupDb() throws RocksDBException { - // Open empty database. - try(final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - - // Fill database with some test values - prepareDatabase(db); - - // Create two backups - try (final BackupEngineOptions bopt = - new BackupEngineOptions(backupFolder.getRoot().getAbsolutePath()); - final BackupEngine be = BackupEngine.open(opt.getEnv(), bopt)) { - be.createNewBackup(db, false); - be.createNewBackup(db, true); - verifyNumberOfValidBackups(be, 2); - } - } - } - - @Test - public void deleteBackup() throws RocksDBException { - // Open empty database. - try(final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - // Fill database with some test values - prepareDatabase(db); - // Create two backups - try (final BackupEngineOptions bopt = - new BackupEngineOptions(backupFolder.getRoot().getAbsolutePath()); - final BackupEngine be = BackupEngine.open(opt.getEnv(), bopt)) { - be.createNewBackup(db, false); - be.createNewBackup(db, true); - final List backupInfo = - verifyNumberOfValidBackups(be, 2); - // Delete the first backup - be.deleteBackup(backupInfo.get(0).backupId()); - final List newBackupInfo = - verifyNumberOfValidBackups(be, 1); - - // The second backup must remain. - assertThat(newBackupInfo.get(0).backupId()). - isEqualTo(backupInfo.get(1).backupId()); - } - } - } - - @Test - public void purgeOldBackups() throws RocksDBException { - // Open empty database. - try(final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - // Fill database with some test values - prepareDatabase(db); - // Create four backups - try (final BackupEngineOptions bopt = - new BackupEngineOptions(backupFolder.getRoot().getAbsolutePath()); - final BackupEngine be = BackupEngine.open(opt.getEnv(), bopt)) { - be.createNewBackup(db, false); - be.createNewBackup(db, true); - be.createNewBackup(db, true); - be.createNewBackup(db, true); - final List backupInfo = - verifyNumberOfValidBackups(be, 4); - // Delete everything except the latest backup - be.purgeOldBackups(1); - final List newBackupInfo = - verifyNumberOfValidBackups(be, 1); - // The latest backup must remain. - assertThat(newBackupInfo.get(0).backupId()). - isEqualTo(backupInfo.get(3).backupId()); - } - } - } - - @Test - public void restoreLatestBackup() throws RocksDBException { - try(final Options opt = new Options().setCreateIfMissing(true)) { - // Open empty database. - RocksDB db = null; - try { - db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath()); - // Fill database with some test values - prepareDatabase(db); - - try (final BackupEngineOptions bopt = - new BackupEngineOptions(backupFolder.getRoot().getAbsolutePath()); - final BackupEngine be = BackupEngine.open(opt.getEnv(), bopt)) { - be.createNewBackup(db, true); - verifyNumberOfValidBackups(be, 1); - db.put("key1".getBytes(), "valueV2".getBytes()); - db.put("key2".getBytes(), "valueV2".getBytes()); - be.createNewBackup(db, true); - verifyNumberOfValidBackups(be, 2); - db.put("key1".getBytes(), "valueV3".getBytes()); - db.put("key2".getBytes(), "valueV3".getBytes()); - assertThat(new String(db.get("key1".getBytes()))).endsWith("V3"); - assertThat(new String(db.get("key2".getBytes()))).endsWith("V3"); - - db.close(); - db = null; - - verifyNumberOfValidBackups(be, 2); - // restore db from latest backup - try(final RestoreOptions ropts = new RestoreOptions(false)) { - be.restoreDbFromLatestBackup(dbFolder.getRoot().getAbsolutePath(), - dbFolder.getRoot().getAbsolutePath(), ropts); - } - - // Open database again. - db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath()); - - // Values must have suffix V2 because of restoring latest backup. - assertThat(new String(db.get("key1".getBytes()))).endsWith("V2"); - assertThat(new String(db.get("key2".getBytes()))).endsWith("V2"); - } - } finally { - if(db != null) { - db.close(); - } - } - } - } - - @Test - public void restoreFromBackup() - throws RocksDBException { - try(final Options opt = new Options().setCreateIfMissing(true)) { - RocksDB db = null; - try { - // Open empty database. - db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath()); - // Fill database with some test values - prepareDatabase(db); - try (final BackupEngineOptions bopt = - new BackupEngineOptions(backupFolder.getRoot().getAbsolutePath()); - final BackupEngine be = BackupEngine.open(opt.getEnv(), bopt)) { - be.createNewBackup(db, true); - verifyNumberOfValidBackups(be, 1); - db.put("key1".getBytes(), "valueV2".getBytes()); - db.put("key2".getBytes(), "valueV2".getBytes()); - be.createNewBackup(db, true); - verifyNumberOfValidBackups(be, 2); - db.put("key1".getBytes(), "valueV3".getBytes()); - db.put("key2".getBytes(), "valueV3".getBytes()); - assertThat(new String(db.get("key1".getBytes()))).endsWith("V3"); - assertThat(new String(db.get("key2".getBytes()))).endsWith("V3"); - - //close the database - db.close(); - db = null; - - //restore the backup - final List backupInfo = verifyNumberOfValidBackups(be, 2); - // restore db from first backup - be.restoreDbFromBackup(backupInfo.get(0).backupId(), - dbFolder.getRoot().getAbsolutePath(), - dbFolder.getRoot().getAbsolutePath(), - new RestoreOptions(false)); - // Open database again. - db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath()); - // Values must have suffix V2 because of restoring latest backup. - assertThat(new String(db.get("key1".getBytes()))).endsWith("V1"); - assertThat(new String(db.get("key2".getBytes()))).endsWith("V1"); - } - } finally { - if(db != null) { - db.close(); - } - } - } - } - - @Test - public void backupDbWithMetadata() throws RocksDBException { - // Open empty database. - try (final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - // Fill database with some test values - prepareDatabase(db); - - // Create two backups - try (final BackupEngineOptions bopt = - new BackupEngineOptions(backupFolder.getRoot().getAbsolutePath()); - final BackupEngine be = BackupEngine.open(opt.getEnv(), bopt)) { - final String metadata = String.valueOf(ThreadLocalRandom.current().nextInt()); - be.createNewBackupWithMetadata(db, metadata, true); - final List backupInfoList = verifyNumberOfValidBackups(be, 1); - assertThat(backupInfoList.get(0).appMetadata()).isEqualTo(metadata); - } - } - } - - /** - * Verify backups. - * - * @param be {@link BackupEngine} instance. - * @param expectedNumberOfBackups numerical value - * @throws RocksDBException thrown if an error occurs within the native - * part of the library. - */ - private List verifyNumberOfValidBackups(final BackupEngine be, - final int expectedNumberOfBackups) throws RocksDBException { - // Verify that backups exist - assertThat(be.getCorruptedBackups().length). - isEqualTo(0); - be.garbageCollect(); - final List backupInfo = be.getBackupInfo(); - assertThat(backupInfo.size()). - isEqualTo(expectedNumberOfBackups); - return backupInfo; - } - - /** - * Fill database with some test values. - * - * @param db {@link RocksDB} instance. - * @throws RocksDBException thrown if an error occurs within the native - * part of the library. - */ - private void prepareDatabase(final RocksDB db) - throws RocksDBException { - db.put("key1".getBytes(), "valueV1".getBytes()); - db.put("key2".getBytes(), "valueV1".getBytes()); - } -} diff --git a/java/src/test/java/org/rocksdb/BlobOptionsTest.java b/java/src/test/java/org/rocksdb/BlobOptionsTest.java deleted file mode 100644 index fe3d9b246..000000000 --- a/java/src/test/java/org/rocksdb/BlobOptionsTest.java +++ /dev/null @@ -1,351 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; - -import java.io.File; -import java.io.FilenameFilter; -import java.util.*; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class BlobOptionsTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); - - final int minBlobSize = 65536; - final int largeBlobSize = 65536 * 2; - - /** - * Count the files in the temporary folder which end with a particular suffix - * Used to query the state of a test database to check if it is as the test expects - * - * @param endsWith the suffix to match - * @return the number of files with a matching suffix - */ - @SuppressWarnings("CallToStringConcatCanBeReplacedByOperator") - private int countDBFiles(final String endsWith) { - return Objects - .requireNonNull(dbFolder.getRoot().list(new FilenameFilter() { - @Override - public boolean accept(File dir, String name) { - return name.endsWith(endsWith); - } - })) - .length; - } - - @SuppressWarnings("SameParameterValue") - private byte[] small_key(String suffix) { - return ("small_key_" + suffix).getBytes(UTF_8); - } - - @SuppressWarnings("SameParameterValue") - private byte[] small_value(String suffix) { - return ("small_value_" + suffix).getBytes(UTF_8); - } - - private byte[] large_key(String suffix) { - return ("large_key_" + suffix).getBytes(UTF_8); - } - - private byte[] large_value(String repeat) { - final byte[] large_value = ("" + repeat + "_" + largeBlobSize + "b").getBytes(UTF_8); - final byte[] large_buffer = new byte[largeBlobSize]; - for (int pos = 0; pos < largeBlobSize; pos += large_value.length) { - int numBytes = Math.min(large_value.length, large_buffer.length - pos); - System.arraycopy(large_value, 0, large_buffer, pos, numBytes); - } - return large_buffer; - } - - @Test - public void blobOptions() { - try (final Options options = new Options()) { - assertThat(options.enableBlobFiles()).isEqualTo(false); - assertThat(options.minBlobSize()).isEqualTo(0); - assertThat(options.blobCompressionType()).isEqualTo(CompressionType.NO_COMPRESSION); - assertThat(options.enableBlobGarbageCollection()).isEqualTo(false); - assertThat(options.blobFileSize()).isEqualTo(268435456L); - assertThat(options.blobGarbageCollectionAgeCutoff()).isEqualTo(0.25); - assertThat(options.blobGarbageCollectionForceThreshold()).isEqualTo(1.0); - assertThat(options.blobCompactionReadaheadSize()).isEqualTo(0); - assertThat(options.prepopulateBlobCache()) - .isEqualTo(PrepopulateBlobCache.PREPOPULATE_BLOB_DISABLE); - - assertThat(options.setEnableBlobFiles(true)).isEqualTo(options); - assertThat(options.setMinBlobSize(132768L)).isEqualTo(options); - assertThat(options.setBlobCompressionType(CompressionType.BZLIB2_COMPRESSION)) - .isEqualTo(options); - assertThat(options.setEnableBlobGarbageCollection(true)).isEqualTo(options); - assertThat(options.setBlobFileSize(132768L)).isEqualTo(options); - assertThat(options.setBlobGarbageCollectionAgeCutoff(0.89)).isEqualTo(options); - assertThat(options.setBlobGarbageCollectionForceThreshold(0.80)).isEqualTo(options); - assertThat(options.setBlobCompactionReadaheadSize(262144L)).isEqualTo(options); - assertThat(options.setBlobFileStartingLevel(0)).isEqualTo(options); - assertThat(options.setPrepopulateBlobCache(PrepopulateBlobCache.PREPOPULATE_BLOB_FLUSH_ONLY)) - .isEqualTo(options); - - assertThat(options.enableBlobFiles()).isEqualTo(true); - assertThat(options.minBlobSize()).isEqualTo(132768L); - assertThat(options.blobCompressionType()).isEqualTo(CompressionType.BZLIB2_COMPRESSION); - assertThat(options.enableBlobGarbageCollection()).isEqualTo(true); - assertThat(options.blobFileSize()).isEqualTo(132768L); - assertThat(options.blobGarbageCollectionAgeCutoff()).isEqualTo(0.89); - assertThat(options.blobGarbageCollectionForceThreshold()).isEqualTo(0.80); - assertThat(options.blobCompactionReadaheadSize()).isEqualTo(262144L); - assertThat(options.blobFileStartingLevel()).isEqualTo(0); - assertThat(options.prepopulateBlobCache()) - .isEqualTo(PrepopulateBlobCache.PREPOPULATE_BLOB_FLUSH_ONLY); - } - } - - @Test - public void blobColumnFamilyOptions() { - try (final ColumnFamilyOptions columnFamilyOptions = new ColumnFamilyOptions()) { - assertThat(columnFamilyOptions.enableBlobFiles()).isEqualTo(false); - assertThat(columnFamilyOptions.minBlobSize()).isEqualTo(0); - assertThat(columnFamilyOptions.blobCompressionType()) - .isEqualTo(CompressionType.NO_COMPRESSION); - assertThat(columnFamilyOptions.enableBlobGarbageCollection()).isEqualTo(false); - assertThat(columnFamilyOptions.blobFileSize()).isEqualTo(268435456L); - assertThat(columnFamilyOptions.blobGarbageCollectionAgeCutoff()).isEqualTo(0.25); - assertThat(columnFamilyOptions.blobGarbageCollectionForceThreshold()).isEqualTo(1.0); - assertThat(columnFamilyOptions.blobCompactionReadaheadSize()).isEqualTo(0); - - assertThat(columnFamilyOptions.setEnableBlobFiles(true)).isEqualTo(columnFamilyOptions); - assertThat(columnFamilyOptions.setMinBlobSize(132768L)).isEqualTo(columnFamilyOptions); - assertThat(columnFamilyOptions.setBlobCompressionType(CompressionType.BZLIB2_COMPRESSION)) - .isEqualTo(columnFamilyOptions); - assertThat(columnFamilyOptions.setEnableBlobGarbageCollection(true)) - .isEqualTo(columnFamilyOptions); - assertThat(columnFamilyOptions.setBlobFileSize(132768L)).isEqualTo(columnFamilyOptions); - assertThat(columnFamilyOptions.setBlobGarbageCollectionAgeCutoff(0.89)) - .isEqualTo(columnFamilyOptions); - assertThat(columnFamilyOptions.setBlobGarbageCollectionForceThreshold(0.80)) - .isEqualTo(columnFamilyOptions); - assertThat(columnFamilyOptions.setBlobCompactionReadaheadSize(262144L)) - .isEqualTo(columnFamilyOptions); - assertThat(columnFamilyOptions.setBlobFileStartingLevel(0)).isEqualTo(columnFamilyOptions); - assertThat(columnFamilyOptions.setPrepopulateBlobCache( - PrepopulateBlobCache.PREPOPULATE_BLOB_DISABLE)) - .isEqualTo(columnFamilyOptions); - - assertThat(columnFamilyOptions.enableBlobFiles()).isEqualTo(true); - assertThat(columnFamilyOptions.minBlobSize()).isEqualTo(132768L); - assertThat(columnFamilyOptions.blobCompressionType()) - .isEqualTo(CompressionType.BZLIB2_COMPRESSION); - assertThat(columnFamilyOptions.enableBlobGarbageCollection()).isEqualTo(true); - assertThat(columnFamilyOptions.blobFileSize()).isEqualTo(132768L); - assertThat(columnFamilyOptions.blobGarbageCollectionAgeCutoff()).isEqualTo(0.89); - assertThat(columnFamilyOptions.blobGarbageCollectionForceThreshold()).isEqualTo(0.80); - assertThat(columnFamilyOptions.blobCompactionReadaheadSize()).isEqualTo(262144L); - assertThat(columnFamilyOptions.blobFileStartingLevel()).isEqualTo(0); - assertThat(columnFamilyOptions.prepopulateBlobCache()) - .isEqualTo(PrepopulateBlobCache.PREPOPULATE_BLOB_DISABLE); - } - } - - @Test - public void blobMutableColumnFamilyOptionsBuilder() { - final MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder builder = - MutableColumnFamilyOptions.builder(); - builder.setEnableBlobFiles(true) - .setMinBlobSize(1024) - .setBlobFileSize(132768) - .setBlobCompressionType(CompressionType.BZLIB2_COMPRESSION) - .setEnableBlobGarbageCollection(true) - .setBlobGarbageCollectionAgeCutoff(0.89) - .setBlobGarbageCollectionForceThreshold(0.80) - .setBlobCompactionReadaheadSize(262144) - .setBlobFileStartingLevel(1) - .setPrepopulateBlobCache(PrepopulateBlobCache.PREPOPULATE_BLOB_FLUSH_ONLY); - - assertThat(builder.enableBlobFiles()).isEqualTo(true); - assertThat(builder.minBlobSize()).isEqualTo(1024); - assertThat(builder.blobFileSize()).isEqualTo(132768); - assertThat(builder.blobCompressionType()).isEqualTo(CompressionType.BZLIB2_COMPRESSION); - assertThat(builder.enableBlobGarbageCollection()).isEqualTo(true); - assertThat(builder.blobGarbageCollectionAgeCutoff()).isEqualTo(0.89); - assertThat(builder.blobGarbageCollectionForceThreshold()).isEqualTo(0.80); - assertThat(builder.blobCompactionReadaheadSize()).isEqualTo(262144); - assertThat(builder.blobFileStartingLevel()).isEqualTo(1); - assertThat(builder.prepopulateBlobCache()) - .isEqualTo(PrepopulateBlobCache.PREPOPULATE_BLOB_FLUSH_ONLY); - - builder.setEnableBlobFiles(false) - .setMinBlobSize(4096) - .setBlobFileSize(2048) - .setBlobCompressionType(CompressionType.LZ4_COMPRESSION) - .setEnableBlobGarbageCollection(false) - .setBlobGarbageCollectionAgeCutoff(0.91) - .setBlobGarbageCollectionForceThreshold(0.96) - .setBlobCompactionReadaheadSize(1024) - .setBlobFileStartingLevel(0) - .setPrepopulateBlobCache(PrepopulateBlobCache.PREPOPULATE_BLOB_DISABLE); - - assertThat(builder.enableBlobFiles()).isEqualTo(false); - assertThat(builder.minBlobSize()).isEqualTo(4096); - assertThat(builder.blobFileSize()).isEqualTo(2048); - assertThat(builder.blobCompressionType()).isEqualTo(CompressionType.LZ4_COMPRESSION); - assertThat(builder.enableBlobGarbageCollection()).isEqualTo(false); - assertThat(builder.blobGarbageCollectionAgeCutoff()).isEqualTo(0.91); - assertThat(builder.blobGarbageCollectionForceThreshold()).isEqualTo(0.96); - assertThat(builder.blobCompactionReadaheadSize()).isEqualTo(1024); - assertThat(builder.blobFileStartingLevel()).isEqualTo(0); - assertThat(builder.prepopulateBlobCache()) - .isEqualTo(PrepopulateBlobCache.PREPOPULATE_BLOB_DISABLE); - - final MutableColumnFamilyOptions options = builder.build(); - assertThat(options.getKeys()) - .isEqualTo(new String[] {"enable_blob_files", "min_blob_size", "blob_file_size", - "blob_compression_type", "enable_blob_garbage_collection", - "blob_garbage_collection_age_cutoff", "blob_garbage_collection_force_threshold", - "blob_compaction_readahead_size", "blob_file_starting_level", - "prepopulate_blob_cache"}); - assertThat(options.getValues()) - .isEqualTo(new String[] {"false", "4096", "2048", "LZ4_COMPRESSION", "false", "0.91", - "0.96", "1024", "0", "PREPOPULATE_BLOB_DISABLE"}); - } - - /** - * Configure the default column family with BLOBs. - * Confirm that BLOBs are generated when appropriately-sized writes are flushed. - * - * @throws RocksDBException if a db access throws an exception - */ - @Test - public void testBlobWriteAboveThreshold() throws RocksDBException { - try (final Options options = new Options() - .setCreateIfMissing(true) - .setMinBlobSize(minBlobSize) - .setEnableBlobFiles(true); - - final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { - db.put(small_key("default"), small_value("default")); - db.flush(new FlushOptions().setWaitForFlush(true)); - - // check there are no blobs in the database - assertThat(countDBFiles(".sst")).isEqualTo(1); - assertThat(countDBFiles(".blob")).isEqualTo(0); - - db.put(large_key("default"), large_value("default")); - db.flush(new FlushOptions().setWaitForFlush(true)); - - // wrote and flushed a value larger than the blobbing threshold - // check there is a single blob in the database - assertThat(countDBFiles(".sst")).isEqualTo(2); - assertThat(countDBFiles(".blob")).isEqualTo(1); - - assertThat(db.get(small_key("default"))).isEqualTo(small_value("default")); - assertThat(db.get(large_key("default"))).isEqualTo(large_value("default")); - - final MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder fetchOptions = - db.getOptions(null); - assertThat(fetchOptions.minBlobSize()).isEqualTo(minBlobSize); - assertThat(fetchOptions.enableBlobFiles()).isEqualTo(true); - assertThat(fetchOptions.writeBufferSize()).isEqualTo(64 << 20); - } - } - - /** - * Configure 2 column families respectively with and without BLOBs. - * Confirm that BLOB files are generated (once the DB is flushed) only for the appropriate column - * family. - * - * @throws RocksDBException if a db access throws an exception - */ - @Test - public void testBlobWriteAboveThresholdCF() throws RocksDBException { - final ColumnFamilyOptions columnFamilyOptions0 = new ColumnFamilyOptions(); - final ColumnFamilyDescriptor columnFamilyDescriptor0 = - new ColumnFamilyDescriptor("default".getBytes(UTF_8), columnFamilyOptions0); - List columnFamilyDescriptors = - Collections.singletonList(columnFamilyDescriptor0); - List columnFamilyHandles = new ArrayList<>(); - - try (final DBOptions dbOptions = new DBOptions().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(dbOptions, dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, columnFamilyHandles)) { - db.put(columnFamilyHandles.get(0), small_key("default"), small_value("default")); - db.flush(new FlushOptions().setWaitForFlush(true)); - - assertThat(countDBFiles(".blob")).isEqualTo(0); - - try (final ColumnFamilyOptions columnFamilyOptions1 = - new ColumnFamilyOptions().setMinBlobSize(minBlobSize).setEnableBlobFiles(true); - - final ColumnFamilyOptions columnFamilyOptions2 = - new ColumnFamilyOptions().setMinBlobSize(minBlobSize).setEnableBlobFiles(false)) { - final ColumnFamilyDescriptor columnFamilyDescriptor1 = - new ColumnFamilyDescriptor("column_family_1".getBytes(UTF_8), columnFamilyOptions1); - final ColumnFamilyDescriptor columnFamilyDescriptor2 = - new ColumnFamilyDescriptor("column_family_2".getBytes(UTF_8), columnFamilyOptions2); - - // Create the first column family with blob options - db.createColumnFamily(columnFamilyDescriptor1); - - // Create the second column family with not-blob options - db.createColumnFamily(columnFamilyDescriptor2); - } - } - - // Now re-open after auto-close - at this point the CF options we use are recognized. - try (final ColumnFamilyOptions columnFamilyOptions1 = - new ColumnFamilyOptions().setMinBlobSize(minBlobSize).setEnableBlobFiles(true); - - final ColumnFamilyOptions columnFamilyOptions2 = - new ColumnFamilyOptions().setMinBlobSize(minBlobSize).setEnableBlobFiles(false)) { - assertThat(columnFamilyOptions1.enableBlobFiles()).isEqualTo(true); - assertThat(columnFamilyOptions1.minBlobSize()).isEqualTo(minBlobSize); - assertThat(columnFamilyOptions2.enableBlobFiles()).isEqualTo(false); - assertThat(columnFamilyOptions1.minBlobSize()).isEqualTo(minBlobSize); - - final ColumnFamilyDescriptor columnFamilyDescriptor1 = - new ColumnFamilyDescriptor("column_family_1".getBytes(UTF_8), columnFamilyOptions1); - final ColumnFamilyDescriptor columnFamilyDescriptor2 = - new ColumnFamilyDescriptor("column_family_2".getBytes(UTF_8), columnFamilyOptions2); - columnFamilyDescriptors = new ArrayList<>(); - columnFamilyDescriptors.add(columnFamilyDescriptor0); - columnFamilyDescriptors.add(columnFamilyDescriptor1); - columnFamilyDescriptors.add(columnFamilyDescriptor2); - columnFamilyHandles = new ArrayList<>(); - - assertThat(columnFamilyDescriptor1.getOptions().enableBlobFiles()).isEqualTo(true); - assertThat(columnFamilyDescriptor2.getOptions().enableBlobFiles()).isEqualTo(false); - - try (final DBOptions dbOptions = new DBOptions(); - final RocksDB db = RocksDB.open(dbOptions, dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, columnFamilyHandles)) { - final MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder builder1 = - db.getOptions(columnFamilyHandles.get(1)); - assertThat(builder1.enableBlobFiles()).isEqualTo(true); - assertThat(builder1.minBlobSize()).isEqualTo(minBlobSize); - - final MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder builder2 = - db.getOptions(columnFamilyHandles.get(2)); - assertThat(builder2.enableBlobFiles()).isEqualTo(false); - assertThat(builder2.minBlobSize()).isEqualTo(minBlobSize); - - db.put(columnFamilyHandles.get(1), large_key("column_family_1_k2"), - large_value("column_family_1_k2")); - db.flush(new FlushOptions().setWaitForFlush(true), columnFamilyHandles.get(1)); - assertThat(countDBFiles(".blob")).isEqualTo(1); - - db.put(columnFamilyHandles.get(2), large_key("column_family_2_k2"), - large_value("column_family_2_k2")); - db.flush(new FlushOptions().setWaitForFlush(true), columnFamilyHandles.get(2)); - assertThat(countDBFiles(".blob")).isEqualTo(1); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java b/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java deleted file mode 100644 index 005c8bc6d..000000000 --- a/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java +++ /dev/null @@ -1,416 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.Assert.fail; - -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.stream.Stream; -import org.junit.ClassRule; -import org.junit.Ignore; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class BlockBasedTableConfigTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void cacheIndexAndFilterBlocks() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setCacheIndexAndFilterBlocks(true); - assertThat(blockBasedTableConfig.cacheIndexAndFilterBlocks()). - isTrue(); - } - - @Test - public void cacheIndexAndFilterBlocksWithHighPriority() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - assertThat(blockBasedTableConfig.cacheIndexAndFilterBlocksWithHighPriority()). - isTrue(); - blockBasedTableConfig.setCacheIndexAndFilterBlocksWithHighPriority(false); - assertThat(blockBasedTableConfig.cacheIndexAndFilterBlocksWithHighPriority()).isFalse(); - } - - @Test - public void pinL0FilterAndIndexBlocksInCache() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setPinL0FilterAndIndexBlocksInCache(true); - assertThat(blockBasedTableConfig.pinL0FilterAndIndexBlocksInCache()). - isTrue(); - } - - @Test - public void pinTopLevelIndexAndFilter() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setPinTopLevelIndexAndFilter(false); - assertThat(blockBasedTableConfig.pinTopLevelIndexAndFilter()). - isFalse(); - } - - @Test - public void indexType() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - assertThat(IndexType.values().length).isEqualTo(4); - blockBasedTableConfig.setIndexType(IndexType.kHashSearch); - assertThat(blockBasedTableConfig.indexType()).isEqualTo(IndexType.kHashSearch); - assertThat(IndexType.valueOf("kBinarySearch")).isNotNull(); - blockBasedTableConfig.setIndexType(IndexType.valueOf("kBinarySearch")); - assertThat(blockBasedTableConfig.indexType()).isEqualTo(IndexType.kBinarySearch); - } - - @Test - public void dataBlockIndexType() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setDataBlockIndexType(DataBlockIndexType.kDataBlockBinaryAndHash); - assertThat(blockBasedTableConfig.dataBlockIndexType()) - .isEqualTo(DataBlockIndexType.kDataBlockBinaryAndHash); - blockBasedTableConfig.setDataBlockIndexType(DataBlockIndexType.kDataBlockBinarySearch); - assertThat(blockBasedTableConfig.dataBlockIndexType()) - .isEqualTo(DataBlockIndexType.kDataBlockBinarySearch); - } - - @Test - public void checksumType() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - assertThat(ChecksumType.values().length).isEqualTo(5); - assertThat(ChecksumType.valueOf("kxxHash")). - isEqualTo(ChecksumType.kxxHash); - blockBasedTableConfig.setChecksumType(ChecksumType.kNoChecksum); - assertThat(blockBasedTableConfig.checksumType()).isEqualTo(ChecksumType.kNoChecksum); - blockBasedTableConfig.setChecksumType(ChecksumType.kxxHash); - assertThat(blockBasedTableConfig.checksumType()).isEqualTo(ChecksumType.kxxHash); - blockBasedTableConfig.setChecksumType(ChecksumType.kxxHash64); - assertThat(blockBasedTableConfig.checksumType()).isEqualTo(ChecksumType.kxxHash64); - blockBasedTableConfig.setChecksumType(ChecksumType.kXXH3); - assertThat(blockBasedTableConfig.checksumType()).isEqualTo(ChecksumType.kXXH3); - } - - @Test - public void jniPortal() throws Exception { - // Verifies that the JNI layer is correctly translating options. - // Since introspecting the options requires creating a database, the checks - // cover multiple options at the same time. - - final BlockBasedTableConfig tableConfig = new BlockBasedTableConfig(); - - tableConfig.setIndexType(IndexType.kBinarySearch); - tableConfig.setDataBlockIndexType(DataBlockIndexType.kDataBlockBinarySearch); - tableConfig.setChecksumType(ChecksumType.kNoChecksum); - try (final Options options = new Options().setTableFormatConfig(tableConfig)) { - String opts = getOptionAsString(options); - assertThat(opts).contains("index_type=kBinarySearch"); - assertThat(opts).contains("data_block_index_type=kDataBlockBinarySearch"); - assertThat(opts).contains("checksum=kNoChecksum"); - } - - tableConfig.setIndexType(IndexType.kHashSearch); - tableConfig.setDataBlockIndexType(DataBlockIndexType.kDataBlockBinaryAndHash); - tableConfig.setChecksumType(ChecksumType.kCRC32c); - try (final Options options = new Options().setTableFormatConfig(tableConfig)) { - options.useCappedPrefixExtractor(1); // Needed to use kHashSearch - String opts = getOptionAsString(options); - assertThat(opts).contains("index_type=kHashSearch"); - assertThat(opts).contains("data_block_index_type=kDataBlockBinaryAndHash"); - assertThat(opts).contains("checksum=kCRC32c"); - } - - tableConfig.setIndexType(IndexType.kTwoLevelIndexSearch); - tableConfig.setChecksumType(ChecksumType.kxxHash); - try (final Options options = new Options().setTableFormatConfig(tableConfig)) { - String opts = getOptionAsString(options); - assertThat(opts).contains("index_type=kTwoLevelIndexSearch"); - assertThat(opts).contains("checksum=kxxHash"); - } - - tableConfig.setIndexType(IndexType.kBinarySearchWithFirstKey); - tableConfig.setChecksumType(ChecksumType.kxxHash64); - try (final Options options = new Options().setTableFormatConfig(tableConfig)) { - String opts = getOptionAsString(options); - assertThat(opts).contains("index_type=kBinarySearchWithFirstKey"); - assertThat(opts).contains("checksum=kxxHash64"); - } - - tableConfig.setChecksumType(ChecksumType.kXXH3); - try (final Options options = new Options().setTableFormatConfig(tableConfig)) { - String opts = getOptionAsString(options); - assertThat(opts).contains("checksum=kXXH3"); - } - } - - private String getOptionAsString(Options options) throws Exception { - options.setCreateIfMissing(true); - String dbPath = dbFolder.getRoot().getAbsolutePath(); - String result; - try (final RocksDB db = RocksDB.open(options, dbPath); - final Stream pathStream = Files.walk(Paths.get(dbPath))) { - Path optionsPath = - pathStream - .filter(p -> p.getFileName().toString().startsWith("OPTIONS")) - .findAny() - .orElseThrow(() -> new AssertionError("Missing options file")); - byte[] optionsData = Files.readAllBytes(optionsPath); - result = new String(optionsData, StandardCharsets.UTF_8); - } - RocksDB.destroyDB(dbPath, options); - return result; - } - - @Test - public void noBlockCache() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setNoBlockCache(true); - assertThat(blockBasedTableConfig.noBlockCache()).isTrue(); - } - - @Test - public void blockCache() { - try ( - final Cache cache = new LRUCache(17 * 1024 * 1024); - final Options options = new Options().setTableFormatConfig( - new BlockBasedTableConfig().setBlockCache(cache))) { - assertThat(options.tableFactoryName()).isEqualTo("BlockBasedTable"); - } - } - - @Test - public void blockCacheIntegration() throws RocksDBException { - try (final Cache cache = new LRUCache(8 * 1024 * 1024); - final Statistics statistics = new Statistics()) { - for (int shard = 0; shard < 8; shard++) { - try (final Options options = - new Options() - .setCreateIfMissing(true) - .setStatistics(statistics) - .setTableFormatConfig(new BlockBasedTableConfig().setBlockCache(cache)); - final RocksDB db = - RocksDB.open(options, dbFolder.getRoot().getAbsolutePath() + "/" + shard)) { - final byte[] key = "some-key".getBytes(StandardCharsets.UTF_8); - final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); - - db.put(key, value); - db.flush(new FlushOptions()); - db.get(key); - - assertThat(statistics.getTickerCount(TickerType.BLOCK_CACHE_ADD)).isEqualTo(shard + 1); - } - } - } - } - - @Test - public void persistentCache() throws RocksDBException { - try (final DBOptions dbOptions = new DBOptions(). - setInfoLogLevel(InfoLogLevel.INFO_LEVEL). - setCreateIfMissing(true); - final Logger logger = new Logger(dbOptions) { - @Override - protected void log(final InfoLogLevel infoLogLevel, final String logMsg) { - System.out.println(infoLogLevel.name() + ": " + logMsg); - } - }) { - try (final PersistentCache persistentCache = - new PersistentCache(Env.getDefault(), dbFolder.getRoot().getPath(), 1024 * 1024 * 100, logger, false); - final Options options = new Options().setTableFormatConfig( - new BlockBasedTableConfig().setPersistentCache(persistentCache))) { - assertThat(options.tableFactoryName()).isEqualTo("BlockBasedTable"); - } - } - } - - @Test - public void blockSize() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setBlockSize(10); - assertThat(blockBasedTableConfig.blockSize()).isEqualTo(10); - } - - @Test - public void blockSizeDeviation() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setBlockSizeDeviation(12); - assertThat(blockBasedTableConfig.blockSizeDeviation()). - isEqualTo(12); - } - - @Test - public void blockRestartInterval() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setBlockRestartInterval(15); - assertThat(blockBasedTableConfig.blockRestartInterval()). - isEqualTo(15); - } - - @Test - public void indexBlockRestartInterval() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setIndexBlockRestartInterval(15); - assertThat(blockBasedTableConfig.indexBlockRestartInterval()). - isEqualTo(15); - } - - @Test - public void metadataBlockSize() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setMetadataBlockSize(1024); - assertThat(blockBasedTableConfig.metadataBlockSize()). - isEqualTo(1024); - } - - @Test - public void partitionFilters() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setPartitionFilters(true); - assertThat(blockBasedTableConfig.partitionFilters()). - isTrue(); - } - - @Test - public void optimizeFiltersForMemory() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setOptimizeFiltersForMemory(true); - assertThat(blockBasedTableConfig.optimizeFiltersForMemory()).isTrue(); - } - - @Test - public void useDeltaEncoding() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setUseDeltaEncoding(false); - assertThat(blockBasedTableConfig.useDeltaEncoding()). - isFalse(); - } - - @Test - public void blockBasedTableWithFilterPolicy() { - try(final Options options = new Options() - .setTableFormatConfig(new BlockBasedTableConfig() - .setFilterPolicy(new BloomFilter(10)))) { - assertThat(options.tableFactoryName()). - isEqualTo("BlockBasedTable"); - } - } - - @Test - public void blockBasedTableWithoutFilterPolicy() { - try(final Options options = new Options().setTableFormatConfig( - new BlockBasedTableConfig().setFilterPolicy(null))) { - assertThat(options.tableFactoryName()). - isEqualTo("BlockBasedTable"); - } - } - - @Test - public void wholeKeyFiltering() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setWholeKeyFiltering(false); - assertThat(blockBasedTableConfig.wholeKeyFiltering()). - isFalse(); - } - - @Test - public void verifyCompression() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - assertThat(blockBasedTableConfig.verifyCompression()).isFalse(); - blockBasedTableConfig.setVerifyCompression(true); - assertThat(blockBasedTableConfig.verifyCompression()). - isTrue(); - } - - @Test - public void readAmpBytesPerBit() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setReadAmpBytesPerBit(2); - assertThat(blockBasedTableConfig.readAmpBytesPerBit()). - isEqualTo(2); - } - - @Test - public void formatVersion() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - for (int version = 0; version <= 5; version++) { - blockBasedTableConfig.setFormatVersion(version); - assertThat(blockBasedTableConfig.formatVersion()).isEqualTo(version); - } - } - - @Test(expected = AssertionError.class) - public void formatVersionFailNegative() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setFormatVersion(-1); - } - - @Test(expected = RocksDBException.class) - public void invalidFormatVersion() throws RocksDBException { - final BlockBasedTableConfig blockBasedTableConfig = - new BlockBasedTableConfig().setFormatVersion(99999); - - try (final Options options = new Options().setTableFormatConfig(blockBasedTableConfig); - final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { - fail("Opening the database with an invalid format_version should have raised an exception"); - } - } - - @Test - public void enableIndexCompression() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setEnableIndexCompression(false); - assertThat(blockBasedTableConfig.enableIndexCompression()). - isFalse(); - } - - @Test - public void blockAlign() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setBlockAlign(true); - assertThat(blockBasedTableConfig.blockAlign()). - isTrue(); - } - - @Test - public void indexShortening() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setIndexShortening(IndexShorteningMode.kShortenSeparatorsAndSuccessor); - assertThat(blockBasedTableConfig.indexShortening()) - .isEqualTo(IndexShorteningMode.kShortenSeparatorsAndSuccessor); - } - - @Deprecated - @Test - public void hashIndexAllowCollision() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setHashIndexAllowCollision(false); - assertThat(blockBasedTableConfig.hashIndexAllowCollision()). - isTrue(); // NOTE: setHashIndexAllowCollision should do nothing! - } - - @Deprecated - @Test - public void blockCacheSize() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setBlockCacheSize(8 * 1024); - assertThat(blockBasedTableConfig.blockCacheSize()). - isEqualTo(8 * 1024); - } - - @Deprecated - @Test - public void blockCacheNumShardBits() { - final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); - blockBasedTableConfig.setCacheNumShardBits(5); - assertThat(blockBasedTableConfig.cacheNumShardBits()). - isEqualTo(5); - } - -} diff --git a/java/src/test/java/org/rocksdb/BuiltinComparatorTest.java b/java/src/test/java/org/rocksdb/BuiltinComparatorTest.java deleted file mode 100644 index e238ae07b..000000000 --- a/java/src/test/java/org/rocksdb/BuiltinComparatorTest.java +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import static org.assertj.core.api.Assertions.assertThat; - -public class BuiltinComparatorTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void builtinForwardComparator() - throws RocksDBException { - try (final Options options = new Options() - .setCreateIfMissing(true) - .setComparator(BuiltinComparator.BYTEWISE_COMPARATOR); - final RocksDB rocksDb = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath()) - ) { - rocksDb.put("abc1".getBytes(), "abc1".getBytes()); - rocksDb.put("abc2".getBytes(), "abc2".getBytes()); - rocksDb.put("abc3".getBytes(), "abc3".getBytes()); - - try(final RocksIterator rocksIterator = rocksDb.newIterator()) { - // Iterate over keys using a iterator - rocksIterator.seekToFirst(); - assertThat(rocksIterator.isValid()).isTrue(); - assertThat(rocksIterator.key()).isEqualTo( - "abc1".getBytes()); - assertThat(rocksIterator.value()).isEqualTo( - "abc1".getBytes()); - rocksIterator.next(); - assertThat(rocksIterator.isValid()).isTrue(); - assertThat(rocksIterator.key()).isEqualTo( - "abc2".getBytes()); - assertThat(rocksIterator.value()).isEqualTo( - "abc2".getBytes()); - rocksIterator.next(); - assertThat(rocksIterator.isValid()).isTrue(); - assertThat(rocksIterator.key()).isEqualTo( - "abc3".getBytes()); - assertThat(rocksIterator.value()).isEqualTo( - "abc3".getBytes()); - rocksIterator.next(); - assertThat(rocksIterator.isValid()).isFalse(); - // Get last one - rocksIterator.seekToLast(); - assertThat(rocksIterator.isValid()).isTrue(); - assertThat(rocksIterator.key()).isEqualTo( - "abc3".getBytes()); - assertThat(rocksIterator.value()).isEqualTo( - "abc3".getBytes()); - // Seek for abc - rocksIterator.seek("abc".getBytes()); - assertThat(rocksIterator.isValid()).isTrue(); - assertThat(rocksIterator.key()).isEqualTo( - "abc1".getBytes()); - assertThat(rocksIterator.value()).isEqualTo( - "abc1".getBytes()); - } - } - } - - @Test - public void builtinReverseComparator() - throws RocksDBException { - try (final Options options = new Options() - .setCreateIfMissing(true) - .setComparator(BuiltinComparator.REVERSE_BYTEWISE_COMPARATOR); - final RocksDB rocksDb = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath()) - ) { - - rocksDb.put("abc1".getBytes(), "abc1".getBytes()); - rocksDb.put("abc2".getBytes(), "abc2".getBytes()); - rocksDb.put("abc3".getBytes(), "abc3".getBytes()); - - try (final RocksIterator rocksIterator = rocksDb.newIterator()) { - // Iterate over keys using a iterator - rocksIterator.seekToFirst(); - assertThat(rocksIterator.isValid()).isTrue(); - assertThat(rocksIterator.key()).isEqualTo( - "abc3".getBytes()); - assertThat(rocksIterator.value()).isEqualTo( - "abc3".getBytes()); - rocksIterator.next(); - assertThat(rocksIterator.isValid()).isTrue(); - assertThat(rocksIterator.key()).isEqualTo( - "abc2".getBytes()); - assertThat(rocksIterator.value()).isEqualTo( - "abc2".getBytes()); - rocksIterator.next(); - assertThat(rocksIterator.isValid()).isTrue(); - assertThat(rocksIterator.key()).isEqualTo( - "abc1".getBytes()); - assertThat(rocksIterator.value()).isEqualTo( - "abc1".getBytes()); - rocksIterator.next(); - assertThat(rocksIterator.isValid()).isFalse(); - // Get last one - rocksIterator.seekToLast(); - assertThat(rocksIterator.isValid()).isTrue(); - assertThat(rocksIterator.key()).isEqualTo( - "abc1".getBytes()); - assertThat(rocksIterator.value()).isEqualTo( - "abc1".getBytes()); - // Will be invalid because abc is after abc1 - rocksIterator.seek("abc".getBytes()); - assertThat(rocksIterator.isValid()).isFalse(); - // Will be abc3 because the next one after abc999 - // is abc3 - rocksIterator.seek("abc999".getBytes()); - assertThat(rocksIterator.key()).isEqualTo( - "abc3".getBytes()); - assertThat(rocksIterator.value()).isEqualTo( - "abc3".getBytes()); - } - } - } - - @Test - public void builtinComparatorEnum(){ - assertThat(BuiltinComparator.BYTEWISE_COMPARATOR.ordinal()) - .isEqualTo(0); - assertThat( - BuiltinComparator.REVERSE_BYTEWISE_COMPARATOR.ordinal()) - .isEqualTo(1); - assertThat(BuiltinComparator.values().length).isEqualTo(2); - assertThat(BuiltinComparator.valueOf("BYTEWISE_COMPARATOR")). - isEqualTo(BuiltinComparator.BYTEWISE_COMPARATOR); - } -} diff --git a/java/src/test/java/org/rocksdb/ByteBufferUnsupportedOperationTest.java b/java/src/test/java/org/rocksdb/ByteBufferUnsupportedOperationTest.java deleted file mode 100644 index f596f573f..000000000 --- a/java/src/test/java/org/rocksdb/ByteBufferUnsupportedOperationTest.java +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.nio.charset.StandardCharsets; -import java.util.*; -import java.util.concurrent.ConcurrentHashMap; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.ReverseBytewiseComparator; - -public class ByteBufferUnsupportedOperationTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); - - public static class Handler { - private final RocksDB database; - private final Map columnFamilies; - - public Handler(final String path, final Options options) throws RocksDBException { - RocksDB.destroyDB(path, options); - this.database = RocksDB.open(options, path); - this.columnFamilies = new ConcurrentHashMap<>(); - } - - public void addTable(final UUID streamID) throws RocksDBException { - final ColumnFamilyOptions tableOptions = new ColumnFamilyOptions(); - tableOptions.optimizeUniversalStyleCompaction(); - try (final ComparatorOptions comparatorOptions = new ComparatorOptions()) { - // comparatorOptions.setReusedSynchronisationType(ReusedSynchronisationType.ADAPTIVE_MUTEX); - tableOptions.setComparator(new ReverseBytewiseComparator(comparatorOptions)); - final ColumnFamilyDescriptor tableDescriptor = new ColumnFamilyDescriptor( - streamID.toString().getBytes(StandardCharsets.UTF_8), tableOptions); - final ColumnFamilyHandle tableHandle = database.createColumnFamily(tableDescriptor); - columnFamilies.put(streamID, tableHandle); - } - } - - public void updateAll(final List keyValuePairs, final UUID streamID) - throws RocksDBException { - final ColumnFamilyHandle currTable = columnFamilies.get(streamID); - try (final WriteBatch batchedWrite = new WriteBatch(); - final WriteOptions writeOptions = new WriteOptions()) { - for (final byte[][] pair : keyValuePairs) { - final byte[] keyBytes = pair[0]; - final byte[] valueBytes = pair[1]; - batchedWrite.put(currTable, keyBytes, valueBytes); - } - database.write(writeOptions, batchedWrite); - } - } - public boolean containsValue(final byte[] encodedValue, final UUID streamID) { - try (final RocksIterator iter = database.newIterator(columnFamilies.get(streamID))) { - iter.seekToFirst(); - while (iter.isValid()) { - final byte[] val = iter.value(); - if (Arrays.equals(val, encodedValue)) { - return true; - } - iter.next(); - } - } - return false; - } - - public void close() { - for (final ColumnFamilyHandle handle : columnFamilies.values()) { - handle.close(); - } - database.close(); - } - } - - private void inner(final int numRepeats) throws RocksDBException { - final Options opts = new Options(); - opts.setCreateIfMissing(true); - final Handler handler = new Handler("testDB", opts); - final UUID stream1 = UUID.randomUUID(); - - final List entries = new ArrayList<>(); - for (int i = 0; i < numRepeats; i++) { - final byte[] value = value(i); - final byte[] key = key(i); - entries.add(new byte[][] {key, value}); - } - handler.addTable(stream1); - handler.updateAll(entries, stream1); - - for (int i = 0; i < numRepeats; i++) { - final byte[] val = value(i); - final boolean hasValue = handler.containsValue(val, stream1); - if (!hasValue) { - throw new IllegalStateException("not has value " + i); - } - } - - handler.close(); - } - - private static byte[] key(final int i) { - return ("key" + i).getBytes(StandardCharsets.UTF_8); - } - - private static byte[] value(final int i) { - return ("value" + i).getBytes(StandardCharsets.UTF_8); - } - - @Test - public void unsupportedOperation() throws RocksDBException { - final int numRepeats = 1000; - final int repeatTest = 10; - - // the error is not always reproducible... let's try to increase the odds by repeating the main - // test body - for (int i = 0; i < repeatTest; i++) { - try { - inner(numRepeats); - } catch (final RuntimeException runtimeException) { - System.out.println("Exception on repeat " + i); - throw runtimeException; - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/BytewiseComparatorRegressionTest.java b/java/src/test/java/org/rocksdb/BytewiseComparatorRegressionTest.java deleted file mode 100644 index fe950362b..000000000 --- a/java/src/test/java/org/rocksdb/BytewiseComparatorRegressionTest.java +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.junit.Assert.assertArrayEquals; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.BytewiseComparator; - -/** - * This test confirms that the following issues were in fact resolved - * by a change made between 6.2.2 and 6.22.1, - * to wit {@link ...} - * which as part of its effect, changed the Java bytewise comparators. - * - * {@link ...} - * {@link ...} - */ -public class BytewiseComparatorRegressionTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Rule public TemporaryFolder temporarySSTFolder = new TemporaryFolder(); - - private final static byte[][] testData = {{10, -11, 13}, {10, 11, 12}, {10, 11, 14}}; - private final static byte[][] orderedData = {{10, 11, 12}, {10, 11, 14}, {10, -11, 13}}; - - /** - * {@link ...} - */ - @Test - public void testJavaComparator() throws RocksDBException { - final BytewiseComparator comparator = new BytewiseComparator(new ComparatorOptions()); - performTest(new Options().setCreateIfMissing(true).setComparator(comparator)); - } - - @Test - public void testDefaultComparator() throws RocksDBException { - performTest(new Options().setCreateIfMissing(true)); - } - - /** - * {@link ...} - */ - @Test - public void testCppComparator() throws RocksDBException { - performTest(new Options().setCreateIfMissing(true).setComparator( - BuiltinComparator.BYTEWISE_COMPARATOR)); - } - - private void performTest(final Options options) throws RocksDBException { - try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { - for (final byte[] item : testData) { - db.put(item, item); - } - try (final RocksIterator iterator = db.newIterator()) { - iterator.seekToFirst(); - final ArrayList result = new ArrayList<>(); - while (iterator.isValid()) { - result.add(iterator.key()); - iterator.next(); - } - assertArrayEquals(orderedData, result.toArray()); - } - } - } - - private byte[] hexToByte(final String hexString) { - final byte[] bytes = new byte[hexString.length() / 2]; - if (bytes.length * 2 < hexString.length()) { - throw new RuntimeException("Hex string has odd length: " + hexString); - } - - for (int i = 0; i < bytes.length; i++) { - final int firstDigit = toDigit(hexString.charAt(i + i)); - final int secondDigit = toDigit(hexString.charAt(i + i + 1)); - bytes[i] = (byte) ((firstDigit << 4) + secondDigit); - } - - return bytes; - } - - private int toDigit(final char hexChar) { - final int digit = Character.digit(hexChar, 16); - if (digit == -1) { - throw new IllegalArgumentException("Invalid Hexadecimal Character: " + hexChar); - } - return digit; - } - - /** - * {@link ...} - * - * @throws RocksDBException if something goes wrong, or if the regression occurs - * @throws IOException if we can't make the temporary file - */ - @Test - public void testSST() throws RocksDBException, IOException { - final File tempSSTFile = temporarySSTFolder.newFile("test_file_with_weird_keys.sst"); - - final EnvOptions envOpts = new EnvOptions(); - final Options opts = new Options(); - opts.setComparator(new BytewiseComparator(new ComparatorOptions())); - final SstFileWriter writer = new SstFileWriter(envOpts, opts); - writer.open(tempSSTFile.getAbsolutePath()); - final byte[] gKey = - hexToByte("000000293030303030303030303030303030303030303032303736343730696E666F33"); - final byte[] wKey = - hexToByte("0000008d3030303030303030303030303030303030303030303437363433696e666f34"); - writer.put(new Slice(gKey), new Slice("dummyV1")); - writer.put(new Slice(wKey), new Slice("dummyV2")); - writer.finish(); - } -} diff --git a/java/src/test/java/org/rocksdb/CheckPointTest.java b/java/src/test/java/org/rocksdb/CheckPointTest.java deleted file mode 100644 index c2cc6fc62..000000000 --- a/java/src/test/java/org/rocksdb/CheckPointTest.java +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import static org.assertj.core.api.Assertions.assertThat; - -public class CheckPointTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Rule - public TemporaryFolder checkpointFolder = new TemporaryFolder(); - - @Test - public void checkPoint() throws RocksDBException { - try (final Options options = new Options(). - setCreateIfMissing(true)) { - - try (final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - db.put("key".getBytes(), "value".getBytes()); - try (final Checkpoint checkpoint = Checkpoint.create(db)) { - checkpoint.createCheckpoint(checkpointFolder. - getRoot().getAbsolutePath() + "/snapshot1"); - db.put("key2".getBytes(), "value2".getBytes()); - checkpoint.createCheckpoint(checkpointFolder. - getRoot().getAbsolutePath() + "/snapshot2"); - } - } - - try (final RocksDB db = RocksDB.open(options, - checkpointFolder.getRoot().getAbsolutePath() + - "/snapshot1")) { - assertThat(new String(db.get("key".getBytes()))). - isEqualTo("value"); - assertThat(db.get("key2".getBytes())).isNull(); - } - - try (final RocksDB db = RocksDB.open(options, - checkpointFolder.getRoot().getAbsolutePath() + - "/snapshot2")) { - assertThat(new String(db.get("key".getBytes()))). - isEqualTo("value"); - assertThat(new String(db.get("key2".getBytes()))). - isEqualTo("value2"); - } - } - } - - @Test(expected = IllegalArgumentException.class) - public void failIfDbIsNull() { - try (final Checkpoint checkpoint = Checkpoint.create(null)) { - - } - } - - @Test(expected = IllegalStateException.class) - public void failIfDbNotInitialized() throws RocksDBException { - try (final RocksDB db = RocksDB.open( - dbFolder.getRoot().getAbsolutePath())) { - db.close(); - Checkpoint.create(db); - } - } - - @Test(expected = RocksDBException.class) - public void failWithIllegalPath() throws RocksDBException { - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); - final Checkpoint checkpoint = Checkpoint.create(db)) { - checkpoint.createCheckpoint("/Z:///:\\C:\\TZ/-"); - } - } -} diff --git a/java/src/test/java/org/rocksdb/ClockCacheTest.java b/java/src/test/java/org/rocksdb/ClockCacheTest.java deleted file mode 100644 index d1241ac75..000000000 --- a/java/src/test/java/org/rocksdb/ClockCacheTest.java +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; - -public class ClockCacheTest { - - static { - RocksDB.loadLibrary(); - } - - @Test - public void newClockCache() { - final long capacity = 1000; - final int numShardBits = 16; - final boolean strictCapacityLimit = true; - try(final Cache clockCache = new ClockCache(capacity, - numShardBits, strictCapacityLimit)) { - //no op - } - } -} diff --git a/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java b/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java deleted file mode 100644 index 7d7581048..000000000 --- a/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java +++ /dev/null @@ -1,714 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.Assert.assertEquals; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.*; -import org.junit.ClassRule; -import org.junit.Test; -import org.rocksdb.test.RemoveEmptyValueCompactionFilterFactory; - -public class ColumnFamilyOptionsTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - public static final Random rand = PlatformRandomHelper. - getPlatformSpecificRandomFactory(); - - @Test - public void copyConstructor() { - ColumnFamilyOptions origOpts = new ColumnFamilyOptions(); - origOpts.setNumLevels(rand.nextInt(8)); - origOpts.setTargetFileSizeMultiplier(rand.nextInt(100)); - origOpts.setLevel0StopWritesTrigger(rand.nextInt(50)); - ColumnFamilyOptions copyOpts = new ColumnFamilyOptions(origOpts); - assertThat(origOpts.numLevels()).isEqualTo(copyOpts.numLevels()); - assertThat(origOpts.targetFileSizeMultiplier()).isEqualTo(copyOpts.targetFileSizeMultiplier()); - assertThat(origOpts.level0StopWritesTrigger()).isEqualTo(copyOpts.level0StopWritesTrigger()); - } - - @Test - public void getColumnFamilyOptionsFromProps() { - Properties properties = new Properties(); - properties.put("write_buffer_size", "112"); - properties.put("max_write_buffer_number", "13"); - - try (final ColumnFamilyOptions opt = ColumnFamilyOptions. - getColumnFamilyOptionsFromProps(properties)) { - // setup sample properties - assertThat(opt).isNotNull(); - assertThat(String.valueOf(opt.writeBufferSize())). - isEqualTo(properties.get("write_buffer_size")); - assertThat(String.valueOf(opt.maxWriteBufferNumber())). - isEqualTo(properties.get("max_write_buffer_number")); - } - } - - @Test - public void getColumnFamilyOptionsFromPropsWithIgnoreIllegalValue() { - // setup sample properties - final Properties properties = new Properties(); - properties.put("tomato", "1024"); - properties.put("burger", "2"); - properties.put("write_buffer_size", "112"); - properties.put("max_write_buffer_number", "13"); - - try (final ConfigOptions cfgOpts = new ConfigOptions().setIgnoreUnknownOptions(true); - final ColumnFamilyOptions opt = - ColumnFamilyOptions.getColumnFamilyOptionsFromProps(cfgOpts, properties)) { - // setup sample properties - assertThat(opt).isNotNull(); - assertThat(String.valueOf(opt.writeBufferSize())) - .isEqualTo(properties.get("write_buffer_size")); - assertThat(String.valueOf(opt.maxWriteBufferNumber())) - .isEqualTo(properties.get("max_write_buffer_number")); - } - } - - @Test - public void failColumnFamilyOptionsFromPropsWithIllegalValue() { - // setup sample properties - final Properties properties = new Properties(); - properties.put("tomato", "1024"); - properties.put("burger", "2"); - - try (final ColumnFamilyOptions opt = - ColumnFamilyOptions.getColumnFamilyOptionsFromProps(properties)) { - assertThat(opt).isNull(); - } - } - - @Test(expected = IllegalArgumentException.class) - public void failColumnFamilyOptionsFromPropsWithNullValue() { - try (final ColumnFamilyOptions opt = - ColumnFamilyOptions.getColumnFamilyOptionsFromProps(null)) { - } - } - - @Test(expected = IllegalArgumentException.class) - public void failColumnFamilyOptionsFromPropsWithEmptyProps() { - try (final ColumnFamilyOptions opt = - ColumnFamilyOptions.getColumnFamilyOptionsFromProps( - new Properties())) { - } - } - - @Test - public void writeBufferSize() throws RocksDBException { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final long longValue = rand.nextLong(); - opt.setWriteBufferSize(longValue); - assertThat(opt.writeBufferSize()).isEqualTo(longValue); - } - } - - @Test - public void maxWriteBufferNumber() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final int intValue = rand.nextInt(); - opt.setMaxWriteBufferNumber(intValue); - assertThat(opt.maxWriteBufferNumber()).isEqualTo(intValue); - } - } - - @Test - public void minWriteBufferNumberToMerge() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final int intValue = rand.nextInt(); - opt.setMinWriteBufferNumberToMerge(intValue); - assertThat(opt.minWriteBufferNumberToMerge()).isEqualTo(intValue); - } - } - - @Test - public void numLevels() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final int intValue = rand.nextInt(); - opt.setNumLevels(intValue); - assertThat(opt.numLevels()).isEqualTo(intValue); - } - } - - @Test - public void levelZeroFileNumCompactionTrigger() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final int intValue = rand.nextInt(); - opt.setLevelZeroFileNumCompactionTrigger(intValue); - assertThat(opt.levelZeroFileNumCompactionTrigger()).isEqualTo(intValue); - } - } - - @Test - public void levelZeroSlowdownWritesTrigger() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final int intValue = rand.nextInt(); - opt.setLevelZeroSlowdownWritesTrigger(intValue); - assertThat(opt.levelZeroSlowdownWritesTrigger()).isEqualTo(intValue); - } - } - - @Test - public void levelZeroStopWritesTrigger() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final int intValue = rand.nextInt(); - opt.setLevelZeroStopWritesTrigger(intValue); - assertThat(opt.levelZeroStopWritesTrigger()).isEqualTo(intValue); - } - } - - @Test - public void targetFileSizeBase() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final long longValue = rand.nextLong(); - opt.setTargetFileSizeBase(longValue); - assertThat(opt.targetFileSizeBase()).isEqualTo(longValue); - } - } - - @Test - public void targetFileSizeMultiplier() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final int intValue = rand.nextInt(); - opt.setTargetFileSizeMultiplier(intValue); - assertThat(opt.targetFileSizeMultiplier()).isEqualTo(intValue); - } - } - - @Test - public void maxBytesForLevelBase() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final long longValue = rand.nextLong(); - opt.setMaxBytesForLevelBase(longValue); - assertThat(opt.maxBytesForLevelBase()).isEqualTo(longValue); - } - } - - @Test - public void levelCompactionDynamicLevelBytes() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setLevelCompactionDynamicLevelBytes(boolValue); - assertThat(opt.levelCompactionDynamicLevelBytes()) - .isEqualTo(boolValue); - } - } - - @Test - public void maxBytesForLevelMultiplier() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final double doubleValue = rand.nextDouble(); - opt.setMaxBytesForLevelMultiplier(doubleValue); - assertThat(opt.maxBytesForLevelMultiplier()).isEqualTo(doubleValue); - } - } - - @Test - public void maxBytesForLevelMultiplierAdditional() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final int intValue1 = rand.nextInt(); - final int intValue2 = rand.nextInt(); - final int[] ints = new int[]{intValue1, intValue2}; - opt.setMaxBytesForLevelMultiplierAdditional(ints); - assertThat(opt.maxBytesForLevelMultiplierAdditional()).isEqualTo(ints); - } - } - - @Test - public void maxCompactionBytes() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final long longValue = rand.nextLong(); - opt.setMaxCompactionBytes(longValue); - assertThat(opt.maxCompactionBytes()).isEqualTo(longValue); - } - } - - @Test - public void softPendingCompactionBytesLimit() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final long longValue = rand.nextLong(); - opt.setSoftPendingCompactionBytesLimit(longValue); - assertThat(opt.softPendingCompactionBytesLimit()).isEqualTo(longValue); - } - } - - @Test - public void hardPendingCompactionBytesLimit() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final long longValue = rand.nextLong(); - opt.setHardPendingCompactionBytesLimit(longValue); - assertThat(opt.hardPendingCompactionBytesLimit()).isEqualTo(longValue); - } - } - - @Test - public void level0FileNumCompactionTrigger() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final int intValue = rand.nextInt(); - opt.setLevel0FileNumCompactionTrigger(intValue); - assertThat(opt.level0FileNumCompactionTrigger()).isEqualTo(intValue); - } - } - - @Test - public void level0SlowdownWritesTrigger() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final int intValue = rand.nextInt(); - opt.setLevel0SlowdownWritesTrigger(intValue); - assertThat(opt.level0SlowdownWritesTrigger()).isEqualTo(intValue); - } - } - - @Test - public void level0StopWritesTrigger() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final int intValue = rand.nextInt(); - opt.setLevel0StopWritesTrigger(intValue); - assertThat(opt.level0StopWritesTrigger()).isEqualTo(intValue); - } - } - - @Test - public void arenaBlockSize() throws RocksDBException { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final long longValue = rand.nextLong(); - opt.setArenaBlockSize(longValue); - assertThat(opt.arenaBlockSize()).isEqualTo(longValue); - } - } - - @Test - public void disableAutoCompactions() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setDisableAutoCompactions(boolValue); - assertThat(opt.disableAutoCompactions()).isEqualTo(boolValue); - } - } - - @Test - public void maxSequentialSkipInIterations() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final long longValue = rand.nextLong(); - opt.setMaxSequentialSkipInIterations(longValue); - assertThat(opt.maxSequentialSkipInIterations()).isEqualTo(longValue); - } - } - - @Test - public void inplaceUpdateSupport() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setInplaceUpdateSupport(boolValue); - assertThat(opt.inplaceUpdateSupport()).isEqualTo(boolValue); - } - } - - @Test - public void inplaceUpdateNumLocks() throws RocksDBException { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final long longValue = rand.nextLong(); - opt.setInplaceUpdateNumLocks(longValue); - assertThat(opt.inplaceUpdateNumLocks()).isEqualTo(longValue); - } - } - - @Test - public void memtablePrefixBloomSizeRatio() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final double doubleValue = rand.nextDouble(); - opt.setMemtablePrefixBloomSizeRatio(doubleValue); - assertThat(opt.memtablePrefixBloomSizeRatio()).isEqualTo(doubleValue); - } - } - - @Test - public void experimentalMempurgeThreshold() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final double doubleValue = rand.nextDouble(); - opt.setExperimentalMempurgeThreshold(doubleValue); - assertThat(opt.experimentalMempurgeThreshold()).isEqualTo(doubleValue); - } - } - - @Test - public void memtableWholeKeyFiltering() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final boolean booleanValue = rand.nextBoolean(); - opt.setMemtableWholeKeyFiltering(booleanValue); - assertThat(opt.memtableWholeKeyFiltering()).isEqualTo(booleanValue); - } - } - - @Test - public void memtableHugePageSize() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final long longValue = rand.nextLong(); - opt.setMemtableHugePageSize(longValue); - assertThat(opt.memtableHugePageSize()).isEqualTo(longValue); - } - } - - @Test - public void bloomLocality() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final int intValue = rand.nextInt(); - opt.setBloomLocality(intValue); - assertThat(opt.bloomLocality()).isEqualTo(intValue); - } - } - - @Test - public void maxSuccessiveMerges() throws RocksDBException { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final long longValue = rand.nextLong(); - opt.setMaxSuccessiveMerges(longValue); - assertThat(opt.maxSuccessiveMerges()).isEqualTo(longValue); - } - } - - @Test - public void optimizeFiltersForHits() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final boolean aBoolean = rand.nextBoolean(); - opt.setOptimizeFiltersForHits(aBoolean); - assertThat(opt.optimizeFiltersForHits()).isEqualTo(aBoolean); - } - } - - @Test - public void memTable() throws RocksDBException { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - opt.setMemTableConfig(new HashLinkedListMemTableConfig()); - assertThat(opt.memTableFactoryName()). - isEqualTo("HashLinkedListRepFactory"); - } - } - - @Test - public void comparator() throws RocksDBException { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - opt.setComparator(BuiltinComparator.BYTEWISE_COMPARATOR); - } - } - - @Test - public void linkageOfPrepMethods() { - try (final ColumnFamilyOptions options = new ColumnFamilyOptions()) { - options.optimizeUniversalStyleCompaction(); - options.optimizeUniversalStyleCompaction(4000); - options.optimizeLevelStyleCompaction(); - options.optimizeLevelStyleCompaction(3000); - options.optimizeForPointLookup(10); - options.optimizeForSmallDb(); - } - } - - @Test - public void shouldSetTestPrefixExtractor() { - try (final ColumnFamilyOptions options = new ColumnFamilyOptions()) { - options.useFixedLengthPrefixExtractor(100); - options.useFixedLengthPrefixExtractor(10); - } - } - - @Test - public void shouldSetTestCappedPrefixExtractor() { - try (final ColumnFamilyOptions options = new ColumnFamilyOptions()) { - options.useCappedPrefixExtractor(100); - options.useCappedPrefixExtractor(10); - } - } - - @Test - public void compressionTypes() { - try (final ColumnFamilyOptions columnFamilyOptions - = new ColumnFamilyOptions()) { - for (final CompressionType compressionType : - CompressionType.values()) { - columnFamilyOptions.setCompressionType(compressionType); - assertThat(columnFamilyOptions.compressionType()). - isEqualTo(compressionType); - assertThat(CompressionType.valueOf("NO_COMPRESSION")). - isEqualTo(CompressionType.NO_COMPRESSION); - } - } - } - - @Test - public void compressionPerLevel() { - try (final ColumnFamilyOptions columnFamilyOptions - = new ColumnFamilyOptions()) { - assertThat(columnFamilyOptions.compressionPerLevel()).isEmpty(); - List compressionTypeList = new ArrayList<>(); - for (int i = 0; i < columnFamilyOptions.numLevels(); i++) { - compressionTypeList.add(CompressionType.NO_COMPRESSION); - } - columnFamilyOptions.setCompressionPerLevel(compressionTypeList); - compressionTypeList = columnFamilyOptions.compressionPerLevel(); - for (CompressionType compressionType : compressionTypeList) { - assertThat(compressionType).isEqualTo( - CompressionType.NO_COMPRESSION); - } - } - } - - @Test - public void differentCompressionsPerLevel() { - try (final ColumnFamilyOptions columnFamilyOptions - = new ColumnFamilyOptions()) { - columnFamilyOptions.setNumLevels(3); - - assertThat(columnFamilyOptions.compressionPerLevel()).isEmpty(); - List compressionTypeList = new ArrayList<>(); - - compressionTypeList.add(CompressionType.BZLIB2_COMPRESSION); - compressionTypeList.add(CompressionType.SNAPPY_COMPRESSION); - compressionTypeList.add(CompressionType.LZ4_COMPRESSION); - - columnFamilyOptions.setCompressionPerLevel(compressionTypeList); - compressionTypeList = columnFamilyOptions.compressionPerLevel(); - - assertThat(compressionTypeList.size()).isEqualTo(3); - assertThat(compressionTypeList). - containsExactly( - CompressionType.BZLIB2_COMPRESSION, - CompressionType.SNAPPY_COMPRESSION, - CompressionType.LZ4_COMPRESSION); - - } - } - - @Test - public void bottommostCompressionType() { - try (final ColumnFamilyOptions columnFamilyOptions - = new ColumnFamilyOptions()) { - assertThat(columnFamilyOptions.bottommostCompressionType()) - .isEqualTo(CompressionType.DISABLE_COMPRESSION_OPTION); - - for (final CompressionType compressionType : CompressionType.values()) { - columnFamilyOptions.setBottommostCompressionType(compressionType); - assertThat(columnFamilyOptions.bottommostCompressionType()) - .isEqualTo(compressionType); - } - } - } - - @Test - public void bottommostCompressionOptions() { - try (final ColumnFamilyOptions columnFamilyOptions = - new ColumnFamilyOptions(); - final CompressionOptions bottommostCompressionOptions = - new CompressionOptions() - .setMaxDictBytes(123)) { - - columnFamilyOptions.setBottommostCompressionOptions( - bottommostCompressionOptions); - assertThat(columnFamilyOptions.bottommostCompressionOptions()) - .isEqualTo(bottommostCompressionOptions); - assertThat(columnFamilyOptions.bottommostCompressionOptions() - .maxDictBytes()).isEqualTo(123); - } - } - - @Test - public void compressionOptions() { - try (final ColumnFamilyOptions columnFamilyOptions - = new ColumnFamilyOptions(); - final CompressionOptions compressionOptions = new CompressionOptions() - .setMaxDictBytes(123)) { - - columnFamilyOptions.setCompressionOptions(compressionOptions); - assertThat(columnFamilyOptions.compressionOptions()) - .isEqualTo(compressionOptions); - assertThat(columnFamilyOptions.compressionOptions().maxDictBytes()) - .isEqualTo(123); - } - } - - @Test - public void compactionStyles() { - try (final ColumnFamilyOptions columnFamilyOptions - = new ColumnFamilyOptions()) { - for (final CompactionStyle compactionStyle : - CompactionStyle.values()) { - columnFamilyOptions.setCompactionStyle(compactionStyle); - assertThat(columnFamilyOptions.compactionStyle()). - isEqualTo(compactionStyle); - assertThat(CompactionStyle.valueOf("FIFO")). - isEqualTo(CompactionStyle.FIFO); - } - } - } - - @Test - public void maxTableFilesSizeFIFO() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - long longValue = rand.nextLong(); - // Size has to be positive - longValue = (longValue < 0) ? -longValue : longValue; - longValue = (longValue == 0) ? longValue + 1 : longValue; - opt.setMaxTableFilesSizeFIFO(longValue); - assertThat(opt.maxTableFilesSizeFIFO()). - isEqualTo(longValue); - } - } - - @Test - public void maxWriteBufferNumberToMaintain() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - int intValue = rand.nextInt(); - // Size has to be positive - intValue = (intValue < 0) ? -intValue : intValue; - intValue = (intValue == 0) ? intValue + 1 : intValue; - opt.setMaxWriteBufferNumberToMaintain(intValue); - assertThat(opt.maxWriteBufferNumberToMaintain()). - isEqualTo(intValue); - } - } - - @Test - public void compactionPriorities() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - for (final CompactionPriority compactionPriority : - CompactionPriority.values()) { - opt.setCompactionPriority(compactionPriority); - assertThat(opt.compactionPriority()). - isEqualTo(compactionPriority); - } - } - } - - @Test - public void reportBgIoStats() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final boolean booleanValue = true; - opt.setReportBgIoStats(booleanValue); - assertThat(opt.reportBgIoStats()). - isEqualTo(booleanValue); - } - } - - @Test - public void ttl() { - try (final ColumnFamilyOptions options = new ColumnFamilyOptions()) { - options.setTtl(1000 * 60); - assertThat(options.ttl()). - isEqualTo(1000 * 60); - } - } - - @Test - public void periodicCompactionSeconds() { - try (final ColumnFamilyOptions options = new ColumnFamilyOptions()) { - options.setPeriodicCompactionSeconds(1000 * 60); - assertThat(options.periodicCompactionSeconds()).isEqualTo(1000 * 60); - } - } - - @Test - public void compactionOptionsUniversal() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions(); - final CompactionOptionsUniversal optUni = new CompactionOptionsUniversal() - .setCompressionSizePercent(7)) { - opt.setCompactionOptionsUniversal(optUni); - assertThat(opt.compactionOptionsUniversal()). - isEqualTo(optUni); - assertThat(opt.compactionOptionsUniversal().compressionSizePercent()) - .isEqualTo(7); - } - } - - @Test - public void compactionOptionsFIFO() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions(); - final CompactionOptionsFIFO optFifo = new CompactionOptionsFIFO() - .setMaxTableFilesSize(2000)) { - opt.setCompactionOptionsFIFO(optFifo); - assertThat(opt.compactionOptionsFIFO()). - isEqualTo(optFifo); - assertThat(opt.compactionOptionsFIFO().maxTableFilesSize()) - .isEqualTo(2000); - } - } - - @Test - public void forceConsistencyChecks() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - final boolean booleanValue = true; - opt.setForceConsistencyChecks(booleanValue); - assertThat(opt.forceConsistencyChecks()). - isEqualTo(booleanValue); - } - } - - @Test - public void compactionFilter() { - try(final ColumnFamilyOptions options = new ColumnFamilyOptions(); - final RemoveEmptyValueCompactionFilter cf = new RemoveEmptyValueCompactionFilter()) { - options.setCompactionFilter(cf); - assertThat(options.compactionFilter()).isEqualTo(cf); - } - } - - @Test - public void compactionFilterFactory() { - try(final ColumnFamilyOptions options = new ColumnFamilyOptions(); - final RemoveEmptyValueCompactionFilterFactory cff = new RemoveEmptyValueCompactionFilterFactory()) { - options.setCompactionFilterFactory(cff); - assertThat(options.compactionFilterFactory()).isEqualTo(cff); - } - } - - @Test - public void compactionThreadLimiter() { - try (final ColumnFamilyOptions options = new ColumnFamilyOptions(); - final ConcurrentTaskLimiter compactionThreadLimiter = - new ConcurrentTaskLimiterImpl("name", 3)) { - options.setCompactionThreadLimiter(compactionThreadLimiter); - assertThat(options.compactionThreadLimiter()).isEqualTo(compactionThreadLimiter); - } - } - - @Test - public void oldDefaults() { - try (final ColumnFamilyOptions options = new ColumnFamilyOptions()) { - options.oldDefaults(4, 6); - assertEquals(4 << 20, options.writeBufferSize()); - assertThat(options.compactionPriority()).isEqualTo(CompactionPriority.ByCompensatedSize); - assertThat(options.targetFileSizeBase()).isEqualTo(2 * 1048576); - assertThat(options.maxBytesForLevelBase()).isEqualTo(10 * 1048576); - assertThat(options.softPendingCompactionBytesLimit()).isEqualTo(0); - assertThat(options.hardPendingCompactionBytesLimit()).isEqualTo(0); - assertThat(options.level0StopWritesTrigger()).isEqualTo(24); - } - } - - @Test - public void optimizeForSmallDbWithCache() { - try (final ColumnFamilyOptions options = new ColumnFamilyOptions(); - final Cache cache = new LRUCache(1024)) { - assertThat(options.optimizeForSmallDb(cache)).isEqualTo(options); - } - } - - @Test - public void cfPaths() throws IOException { - try (final ColumnFamilyOptions options = new ColumnFamilyOptions()) { - final List paths = Arrays.asList( - new DbPath(Paths.get("test1"), 2 << 25), new DbPath(Paths.get("/test2/path"), 2 << 25)); - assertThat(options.cfPaths()).isEqualTo(Collections.emptyList()); - assertThat(options.setCfPaths(paths)).isEqualTo(options); - assertThat(options.cfPaths()).isEqualTo(paths); - } - } -} diff --git a/java/src/test/java/org/rocksdb/ColumnFamilyTest.java b/java/src/test/java/org/rocksdb/ColumnFamilyTest.java deleted file mode 100644 index e98327d93..000000000 --- a/java/src/test/java/org/rocksdb/ColumnFamilyTest.java +++ /dev/null @@ -1,582 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.util.*; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class ColumnFamilyTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void columnFamilyDescriptorName() throws RocksDBException { - final byte[] cfName = "some_name".getBytes(UTF_8); - - try(final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions()) { - final ColumnFamilyDescriptor cfDescriptor = - new ColumnFamilyDescriptor(cfName, cfOptions); - assertThat(cfDescriptor.getName()).isEqualTo(cfName); - } - } - - @Test - public void columnFamilyDescriptorOptions() throws RocksDBException { - final byte[] cfName = "some_name".getBytes(UTF_8); - - try(final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions() - .setCompressionType(CompressionType.BZLIB2_COMPRESSION)) { - final ColumnFamilyDescriptor cfDescriptor = - new ColumnFamilyDescriptor(cfName, cfOptions); - - assertThat(cfDescriptor.getOptions().compressionType()) - .isEqualTo(CompressionType.BZLIB2_COMPRESSION); - } - } - - @Test - public void listColumnFamilies() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - // Test listColumnFamilies - final List columnFamilyNames = RocksDB.listColumnFamilies(options, - dbFolder.getRoot().getAbsolutePath()); - assertThat(columnFamilyNames).isNotNull(); - assertThat(columnFamilyNames.size()).isGreaterThan(0); - assertThat(columnFamilyNames.size()).isEqualTo(1); - assertThat(new String(columnFamilyNames.get(0))).isEqualTo("default"); - } - } - - @Test - public void defaultColumnFamily() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - final ColumnFamilyHandle cfh = db.getDefaultColumnFamily(); - try { - assertThat(cfh).isNotNull(); - - assertThat(cfh.getName()).isEqualTo("default".getBytes(UTF_8)); - assertThat(cfh.getID()).isEqualTo(0); - assertThat(cfh.getDescriptor().getName()).isEqualTo("default".getBytes(UTF_8)); - - final byte[] key = "key".getBytes(); - final byte[] value = "value".getBytes(); - - db.put(cfh, key, value); - - final byte[] actualValue = db.get(cfh, key); - - assertThat(cfh).isNotNull(); - assertThat(actualValue).isEqualTo(value); - } finally { - cfh.close(); - } - } - } - - @Test - public void createColumnFamily() throws RocksDBException { - final byte[] cfName = "new_cf".getBytes(UTF_8); - final ColumnFamilyDescriptor cfDescriptor = new ColumnFamilyDescriptor(cfName, - new ColumnFamilyOptions()); - - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - - final ColumnFamilyHandle columnFamilyHandle = db.createColumnFamily(cfDescriptor); - - try { - assertThat(columnFamilyHandle.getName()).isEqualTo(cfName); - assertThat(columnFamilyHandle.getID()).isEqualTo(1); - - final ColumnFamilyDescriptor latestDescriptor = columnFamilyHandle.getDescriptor(); - assertThat(latestDescriptor.getName()).isEqualTo(cfName); - - final List columnFamilyNames = RocksDB.listColumnFamilies( - options, dbFolder.getRoot().getAbsolutePath()); - assertThat(columnFamilyNames).isNotNull(); - assertThat(columnFamilyNames.size()).isGreaterThan(0); - assertThat(columnFamilyNames.size()).isEqualTo(2); - assertThat(new String(columnFamilyNames.get(0))).isEqualTo("default"); - assertThat(new String(columnFamilyNames.get(1))).isEqualTo("new_cf"); - } finally { - columnFamilyHandle.close(); - } - } - } - - @Test - public void openWithColumnFamilies() throws RocksDBException { - final List cfNames = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes()) - ); - - final List columnFamilyHandleList = - new ArrayList<>(); - - // Test open database with column family names - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), cfNames, - columnFamilyHandleList)) { - assertThat(columnFamilyHandleList.size()).isEqualTo(2); - db.put("dfkey1".getBytes(), "dfvalue".getBytes()); - db.put(columnFamilyHandleList.get(0), "dfkey2".getBytes(), "dfvalue".getBytes()); - db.put(columnFamilyHandleList.get(1), "newcfkey1".getBytes(), "newcfvalue".getBytes()); - - String retVal = new String(db.get(columnFamilyHandleList.get(1), "newcfkey1".getBytes())); - assertThat(retVal).isEqualTo("newcfvalue"); - assertThat((db.get(columnFamilyHandleList.get(1), "dfkey1".getBytes()))).isNull(); - db.delete(columnFamilyHandleList.get(1), "newcfkey1".getBytes()); - assertThat((db.get(columnFamilyHandleList.get(1), "newcfkey1".getBytes()))).isNull(); - db.delete(columnFamilyHandleList.get(0), new WriteOptions(), "dfkey2".getBytes()); - assertThat(db.get(columnFamilyHandleList.get(0), new ReadOptions(), "dfkey2".getBytes())) - .isNull(); - } - } - - @Test - public void getWithOutValueAndCf() throws RocksDBException { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY)); - final List columnFamilyHandleList = new ArrayList<>(); - - // Test open database with column family names - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, - columnFamilyHandleList)) { - db.put( - columnFamilyHandleList.get(0), new WriteOptions(), "key1".getBytes(), "value".getBytes()); - db.put("key2".getBytes(), "12345678".getBytes()); - final byte[] outValue = new byte[5]; - // not found value - int getResult = db.get("keyNotFound".getBytes(), outValue); - assertThat(getResult).isEqualTo(RocksDB.NOT_FOUND); - // found value which fits in outValue - getResult = db.get(columnFamilyHandleList.get(0), "key1".getBytes(), outValue); - assertThat(getResult).isNotEqualTo(RocksDB.NOT_FOUND); - assertThat(outValue).isEqualTo("value".getBytes()); - // found value which fits partially - getResult = - db.get(columnFamilyHandleList.get(0), new ReadOptions(), "key2".getBytes(), outValue); - assertThat(getResult).isNotEqualTo(RocksDB.NOT_FOUND); - assertThat(outValue).isEqualTo("12345".getBytes()); - } - } - - @Test - public void createWriteDropColumnFamily() throws RocksDBException { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, - columnFamilyHandleList)) { - ColumnFamilyHandle tmpColumnFamilyHandle; - tmpColumnFamilyHandle = db.createColumnFamily( - new ColumnFamilyDescriptor("tmpCF".getBytes(), new ColumnFamilyOptions())); - db.put(tmpColumnFamilyHandle, "key".getBytes(), "value".getBytes()); - db.dropColumnFamily(tmpColumnFamilyHandle); - assertThat(tmpColumnFamilyHandle.isOwningHandle()).isTrue(); - } - } - - @Test - public void createWriteDropColumnFamilies() throws RocksDBException { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, - columnFamilyHandleList)) { - ColumnFamilyHandle tmpColumnFamilyHandle = null; - ColumnFamilyHandle tmpColumnFamilyHandle2 = null; - tmpColumnFamilyHandle = db.createColumnFamily( - new ColumnFamilyDescriptor("tmpCF".getBytes(), new ColumnFamilyOptions())); - tmpColumnFamilyHandle2 = db.createColumnFamily( - new ColumnFamilyDescriptor("tmpCF2".getBytes(), new ColumnFamilyOptions())); - db.put(tmpColumnFamilyHandle, "key".getBytes(), "value".getBytes()); - db.put(tmpColumnFamilyHandle2, "key".getBytes(), "value".getBytes()); - db.dropColumnFamilies(Arrays.asList(tmpColumnFamilyHandle, tmpColumnFamilyHandle2)); - assertThat(tmpColumnFamilyHandle.isOwningHandle()).isTrue(); - assertThat(tmpColumnFamilyHandle2.isOwningHandle()).isTrue(); - } - } - - @Test - public void writeBatch() throws RocksDBException { - try (final StringAppendOperator stringAppendOperator = new StringAppendOperator(); - final ColumnFamilyOptions defaultCfOptions = new ColumnFamilyOptions() - .setMergeOperator(stringAppendOperator)) { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, - defaultCfOptions), - new ColumnFamilyDescriptor("new_cf".getBytes())); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), - cfDescriptors, columnFamilyHandleList); - final WriteBatch writeBatch = new WriteBatch(); - final WriteOptions writeOpt = new WriteOptions()) { - writeBatch.put("key".getBytes(), "value".getBytes()); - writeBatch.put(db.getDefaultColumnFamily(), "mergeKey".getBytes(), "merge".getBytes()); - writeBatch.merge(db.getDefaultColumnFamily(), "mergeKey".getBytes(), "merge".getBytes()); - writeBatch.put(columnFamilyHandleList.get(1), "newcfkey".getBytes(), "value".getBytes()); - writeBatch.put(columnFamilyHandleList.get(1), "newcfkey2".getBytes(), "value2".getBytes()); - writeBatch.delete("xyz".getBytes()); - writeBatch.delete(columnFamilyHandleList.get(1), "xyz".getBytes()); - db.write(writeOpt, writeBatch); - - assertThat(db.get(columnFamilyHandleList.get(1), "xyz".getBytes()) == null); - assertThat(new String(db.get(columnFamilyHandleList.get(1), "newcfkey".getBytes()))) - .isEqualTo("value"); - assertThat(new String(db.get(columnFamilyHandleList.get(1), "newcfkey2".getBytes()))) - .isEqualTo("value2"); - assertThat(new String(db.get("key".getBytes()))).isEqualTo("value"); - // check if key is merged - assertThat(new String(db.get(db.getDefaultColumnFamily(), "mergeKey".getBytes()))) - .isEqualTo("merge,merge"); - } - } - } - - @Test - public void iteratorOnColumnFamily() throws RocksDBException { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), - cfDescriptors, columnFamilyHandleList)) { - db.put(columnFamilyHandleList.get(1), "newcfkey".getBytes(), "value".getBytes()); - db.put(columnFamilyHandleList.get(1), "newcfkey2".getBytes(), "value2".getBytes()); - try (final RocksIterator rocksIterator = db.newIterator(columnFamilyHandleList.get(1))) { - rocksIterator.seekToFirst(); - Map refMap = new HashMap<>(); - refMap.put("newcfkey", "value"); - refMap.put("newcfkey2", "value2"); - int i = 0; - while (rocksIterator.isValid()) { - i++; - assertThat(refMap.get(new String(rocksIterator.key()))) - .isEqualTo(new String(rocksIterator.value())); - rocksIterator.next(); - } - assertThat(i).isEqualTo(2); - } - } - } - - @Test - public void multiGet() throws RocksDBException { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), - cfDescriptors, columnFamilyHandleList)) { - db.put(columnFamilyHandleList.get(0), "key".getBytes(), "value".getBytes()); - db.put(columnFamilyHandleList.get(1), "newcfkey".getBytes(), "value".getBytes()); - - final List keys = - Arrays.asList(new byte[][] {"key".getBytes(), "newcfkey".getBytes()}); - - List retValues = db.multiGetAsList(columnFamilyHandleList, keys); - assertThat(retValues.size()).isEqualTo(2); - assertThat(new String(retValues.get(0))).isEqualTo("value"); - assertThat(new String(retValues.get(1))).isEqualTo("value"); - retValues = db.multiGetAsList(new ReadOptions(), columnFamilyHandleList, keys); - assertThat(retValues.size()).isEqualTo(2); - assertThat(new String(retValues.get(0))).isEqualTo("value"); - assertThat(new String(retValues.get(1))).isEqualTo("value"); - } - } - - @Test - public void multiGetAsList() throws RocksDBException { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), - cfDescriptors, columnFamilyHandleList)) { - db.put(columnFamilyHandleList.get(0), "key".getBytes(), "value".getBytes()); - db.put(columnFamilyHandleList.get(1), "newcfkey".getBytes(), "value".getBytes()); - - final List keys = - Arrays.asList(new byte[][] {"key".getBytes(), "newcfkey".getBytes()}); - List retValues = db.multiGetAsList(columnFamilyHandleList, keys); - assertThat(retValues.size()).isEqualTo(2); - assertThat(new String(retValues.get(0))).isEqualTo("value"); - assertThat(new String(retValues.get(1))).isEqualTo("value"); - retValues = db.multiGetAsList(new ReadOptions(), columnFamilyHandleList, keys); - assertThat(retValues.size()).isEqualTo(2); - assertThat(new String(retValues.get(0))).isEqualTo("value"); - assertThat(new String(retValues.get(1))).isEqualTo("value"); - } - } - - @Test - public void properties() throws RocksDBException { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), - cfDescriptors, columnFamilyHandleList)) { - assertThat(db.getProperty("rocksdb.estimate-num-keys")).isNotNull(); - assertThat(db.getLongProperty(columnFamilyHandleList.get(0), "rocksdb.estimate-num-keys")) - .isGreaterThanOrEqualTo(0); - assertThat(db.getProperty("rocksdb.stats")).isNotNull(); - assertThat(db.getProperty(columnFamilyHandleList.get(0), "rocksdb.sstables")).isNotNull(); - assertThat(db.getProperty(columnFamilyHandleList.get(1), "rocksdb.estimate-num-keys")) - .isNotNull(); - assertThat(db.getProperty(columnFamilyHandleList.get(1), "rocksdb.stats")).isNotNull(); - assertThat(db.getProperty(columnFamilyHandleList.get(1), "rocksdb.sstables")).isNotNull(); - assertThat(db.getAggregatedLongProperty("rocksdb.estimate-num-keys")).isNotNull(); - assertThat(db.getAggregatedLongProperty("rocksdb.estimate-num-keys")) - .isGreaterThanOrEqualTo(0); - } - } - - - @Test - public void iterators() throws RocksDBException { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, - columnFamilyHandleList)) { - List iterators = null; - try { - iterators = db.newIterators(columnFamilyHandleList); - assertThat(iterators.size()).isEqualTo(2); - RocksIterator iter = iterators.get(0); - iter.seekToFirst(); - final Map defRefMap = new HashMap<>(); - defRefMap.put("dfkey1", "dfvalue"); - defRefMap.put("key", "value"); - while (iter.isValid()) { - assertThat(defRefMap.get(new String(iter.key()))). - isEqualTo(new String(iter.value())); - iter.next(); - } - // iterate over new_cf key/value pairs - final Map cfRefMap = new HashMap<>(); - cfRefMap.put("newcfkey", "value"); - cfRefMap.put("newcfkey2", "value2"); - iter = iterators.get(1); - iter.seekToFirst(); - while (iter.isValid()) { - assertThat(cfRefMap.get(new String(iter.key()))). - isEqualTo(new String(iter.value())); - iter.next(); - } - } finally { - if (iterators != null) { - for (final RocksIterator rocksIterator : iterators) { - rocksIterator.close(); - } - } - } - } - } - - @Test(expected = RocksDBException.class) - public void failPutDisposedCF() throws RocksDBException { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), - cfDescriptors, columnFamilyHandleList)) { - db.dropColumnFamily(columnFamilyHandleList.get(1)); - db.put(columnFamilyHandleList.get(1), "key".getBytes(), "value".getBytes()); - } - } - - @Test(expected = RocksDBException.class) - public void failRemoveDisposedCF() throws RocksDBException { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), - cfDescriptors, columnFamilyHandleList)) { - db.dropColumnFamily(columnFamilyHandleList.get(1)); - db.delete(columnFamilyHandleList.get(1), "key".getBytes()); - } - } - - @Test(expected = RocksDBException.class) - public void failGetDisposedCF() throws RocksDBException { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, - columnFamilyHandleList)) { - db.dropColumnFamily(columnFamilyHandleList.get(1)); - db.get(columnFamilyHandleList.get(1), "key".getBytes()); - } - } - - @Test(expected = RocksDBException.class) - public void failMultiGetWithoutCorrectNumberOfCF() throws RocksDBException { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, - columnFamilyHandleList)) { - final List keys = new ArrayList<>(); - keys.add("key".getBytes()); - keys.add("newcfkey".getBytes()); - final List cfCustomList = new ArrayList<>(); - db.multiGetAsList(cfCustomList, keys); - } - } - - @Test - public void testByteCreateFolumnFamily() throws RocksDBException { - - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath()) - ) { - final byte[] b0 = new byte[]{(byte) 0x00}; - final byte[] b1 = new byte[]{(byte) 0x01}; - final byte[] b2 = new byte[]{(byte) 0x02}; - db.createColumnFamily(new ColumnFamilyDescriptor(b0)); - db.createColumnFamily(new ColumnFamilyDescriptor(b1)); - final List families = - RocksDB.listColumnFamilies(options, dbFolder.getRoot().getAbsolutePath()); - assertThat(families).contains("default".getBytes(), b0, b1); - db.createColumnFamily(new ColumnFamilyDescriptor(b2)); - } - } - - @Test - public void testCFNamesWithZeroBytes() throws RocksDBException { - ColumnFamilyHandle cf1 = null, cf2 = null; - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath()); - ) { - final byte[] b0 = new byte[] {0, 0}; - final byte[] b1 = new byte[] {0, 1}; - cf1 = db.createColumnFamily(new ColumnFamilyDescriptor(b0)); - cf2 = db.createColumnFamily(new ColumnFamilyDescriptor(b1)); - final List families = - RocksDB.listColumnFamilies(options, dbFolder.getRoot().getAbsolutePath()); - assertThat(families).contains("default".getBytes(), b0, b1); - } - } - - @Test - public void testCFNameSimplifiedChinese() throws RocksDBException { - ColumnFamilyHandle columnFamilyHandle = null; - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath()); - ) { - final String simplifiedChinese = "\u7b80\u4f53\u5b57"; - columnFamilyHandle = - db.createColumnFamily(new ColumnFamilyDescriptor(simplifiedChinese.getBytes())); - - final List families = - RocksDB.listColumnFamilies(options, dbFolder.getRoot().getAbsolutePath()); - assertThat(families).contains("default".getBytes(), simplifiedChinese.getBytes()); - } - } - - @Test - public void testDestroyColumnFamilyHandle() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath());) { - final byte[] name1 = "cf1".getBytes(); - final byte[] name2 = "cf2".getBytes(); - final ColumnFamilyDescriptor desc1 = new ColumnFamilyDescriptor(name1); - final ColumnFamilyDescriptor desc2 = new ColumnFamilyDescriptor(name2); - final ColumnFamilyHandle cf1 = db.createColumnFamily(desc1); - final ColumnFamilyHandle cf2 = db.createColumnFamily(desc2); - assertTrue(cf1.isOwningHandle()); - assertTrue(cf2.isOwningHandle()); - assertFalse(cf1.isDefaultColumnFamily()); - db.destroyColumnFamilyHandle(cf1); - // At this point cf1 should not be used! - assertFalse(cf1.isOwningHandle()); - assertTrue(cf2.isOwningHandle()); - } - } -} diff --git a/java/src/test/java/org/rocksdb/CompactRangeOptionsTest.java b/java/src/test/java/org/rocksdb/CompactRangeOptionsTest.java deleted file mode 100644 index 57bf22b57..000000000 --- a/java/src/test/java/org/rocksdb/CompactRangeOptionsTest.java +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; -import org.rocksdb.CompactRangeOptions.BottommostLevelCompaction; - -import static org.assertj.core.api.Assertions.assertThat; - -public class CompactRangeOptionsTest { - - static { - RocksDB.loadLibrary(); - } - - @Test - public void exclusiveManualCompaction() { - CompactRangeOptions opt = new CompactRangeOptions(); - boolean value = false; - opt.setExclusiveManualCompaction(value); - assertThat(opt.exclusiveManualCompaction()).isEqualTo(value); - value = true; - opt.setExclusiveManualCompaction(value); - assertThat(opt.exclusiveManualCompaction()).isEqualTo(value); - } - - @Test - public void bottommostLevelCompaction() { - CompactRangeOptions opt = new CompactRangeOptions(); - BottommostLevelCompaction value = BottommostLevelCompaction.kSkip; - opt.setBottommostLevelCompaction(value); - assertThat(opt.bottommostLevelCompaction()).isEqualTo(value); - value = BottommostLevelCompaction.kForce; - opt.setBottommostLevelCompaction(value); - assertThat(opt.bottommostLevelCompaction()).isEqualTo(value); - value = BottommostLevelCompaction.kIfHaveCompactionFilter; - opt.setBottommostLevelCompaction(value); - assertThat(opt.bottommostLevelCompaction()).isEqualTo(value); - value = BottommostLevelCompaction.kForceOptimized; - opt.setBottommostLevelCompaction(value); - assertThat(opt.bottommostLevelCompaction()).isEqualTo(value); - } - - @Test - public void changeLevel() { - CompactRangeOptions opt = new CompactRangeOptions(); - boolean value = false; - opt.setChangeLevel(value); - assertThat(opt.changeLevel()).isEqualTo(value); - value = true; - opt.setChangeLevel(value); - assertThat(opt.changeLevel()).isEqualTo(value); - } - - @Test - public void targetLevel() { - CompactRangeOptions opt = new CompactRangeOptions(); - int value = 2; - opt.setTargetLevel(value); - assertThat(opt.targetLevel()).isEqualTo(value); - value = 3; - opt.setTargetLevel(value); - assertThat(opt.targetLevel()).isEqualTo(value); - } - - @Test - public void targetPathId() { - CompactRangeOptions opt = new CompactRangeOptions(); - int value = 2; - opt.setTargetPathId(value); - assertThat(opt.targetPathId()).isEqualTo(value); - value = 3; - opt.setTargetPathId(value); - assertThat(opt.targetPathId()).isEqualTo(value); - } - - @Test - public void allowWriteStall() { - CompactRangeOptions opt = new CompactRangeOptions(); - boolean value = false; - opt.setAllowWriteStall(value); - assertThat(opt.allowWriteStall()).isEqualTo(value); - value = true; - opt.setAllowWriteStall(value); - assertThat(opt.allowWriteStall()).isEqualTo(value); - } - - @Test - public void maxSubcompactions() { - CompactRangeOptions opt = new CompactRangeOptions(); - int value = 2; - opt.setMaxSubcompactions(value); - assertThat(opt.maxSubcompactions()).isEqualTo(value); - value = 3; - opt.setMaxSubcompactions(value); - assertThat(opt.maxSubcompactions()).isEqualTo(value); - } -} diff --git a/java/src/test/java/org/rocksdb/CompactionFilterFactoryTest.java b/java/src/test/java/org/rocksdb/CompactionFilterFactoryTest.java deleted file mode 100644 index 35a14eb54..000000000 --- a/java/src/test/java/org/rocksdb/CompactionFilterFactoryTest.java +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.rocksdb.test.RemoveEmptyValueCompactionFilterFactory; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; - -public class CompactionFilterFactoryTest { - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void columnFamilyOptions_setCompactionFilterFactory() - throws RocksDBException { - try(final DBOptions options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RemoveEmptyValueCompactionFilterFactory compactionFilterFactory - = new RemoveEmptyValueCompactionFilterFactory(); - final ColumnFamilyOptions new_cf_opts - = new ColumnFamilyOptions() - .setCompactionFilterFactory(compactionFilterFactory)) { - - final List cfNames = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts)); - - final List cfHandles = new ArrayList<>(); - - try (final RocksDB rocksDb = - RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfNames, cfHandles)) { - final byte[] key1 = "key1".getBytes(); - final byte[] key2 = "key2".getBytes(); - - final byte[] value1 = "value1".getBytes(); - final byte[] value2 = new byte[0]; - - rocksDb.put(cfHandles.get(1), key1, value1); - rocksDb.put(cfHandles.get(1), key2, value2); - - rocksDb.compactRange(cfHandles.get(1)); - - assertThat(rocksDb.get(cfHandles.get(1), key1)).isEqualTo(value1); - final boolean exists = rocksDb.keyMayExist(cfHandles.get(1), key2, null); - assertThat(exists).isFalse(); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/CompactionJobInfoTest.java b/java/src/test/java/org/rocksdb/CompactionJobInfoTest.java deleted file mode 100644 index c71b0da16..000000000 --- a/java/src/test/java/org/rocksdb/CompactionJobInfoTest.java +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class CompactionJobInfoTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Test - public void columnFamilyName() { - try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { - assertThat(compactionJobInfo.columnFamilyName()) - .isEmpty(); - } - } - - @Test - public void status() { - try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { - assertThat(compactionJobInfo.status().getCode()) - .isEqualTo(Status.Code.Ok); - } - } - - @Test - public void threadId() { - try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { - assertThat(compactionJobInfo.threadId()) - .isEqualTo(0); - } - } - - @Test - public void jobId() { - try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { - assertThat(compactionJobInfo.jobId()) - .isEqualTo(0); - } - } - - @Test - public void baseInputLevel() { - try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { - assertThat(compactionJobInfo.baseInputLevel()) - .isEqualTo(0); - } - } - - @Test - public void outputLevel() { - try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { - assertThat(compactionJobInfo.outputLevel()) - .isEqualTo(0); - } - } - - @Test - public void inputFiles() { - try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { - assertThat(compactionJobInfo.inputFiles()) - .isEmpty(); - } - } - - @Test - public void outputFiles() { - try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { - assertThat(compactionJobInfo.outputFiles()) - .isEmpty(); - } - } - - @Test - public void tableProperties() { - try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { - assertThat(compactionJobInfo.tableProperties()) - .isEmpty(); - } - } - - @Test - public void compactionReason() { - try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { - assertThat(compactionJobInfo.compactionReason()) - .isEqualTo(CompactionReason.kUnknown); - } - } - - @Test - public void compression() { - try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { - assertThat(compactionJobInfo.compression()) - .isEqualTo(CompressionType.NO_COMPRESSION); - } - } - - @Test - public void stats() { - try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { - assertThat(compactionJobInfo.stats()) - .isNotNull(); - } - } -} diff --git a/java/src/test/java/org/rocksdb/CompactionJobStatsTest.java b/java/src/test/java/org/rocksdb/CompactionJobStatsTest.java deleted file mode 100644 index 5c1eb2aab..000000000 --- a/java/src/test/java/org/rocksdb/CompactionJobStatsTest.java +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class CompactionJobStatsTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Test - public void reset() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - compactionJobStats.reset(); - assertThat(compactionJobStats.elapsedMicros()).isEqualTo(0); - } - } - - @Test - public void add() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats(); - final CompactionJobStats otherCompactionJobStats = new CompactionJobStats()) { - compactionJobStats.add(otherCompactionJobStats); - } - } - - @Test - public void elapsedMicros() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.elapsedMicros()).isEqualTo(0); - } - } - - @Test - public void numInputRecords() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.numInputRecords()).isEqualTo(0); - } - } - - @Test - public void numInputFiles() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.numInputFiles()).isEqualTo(0); - } - } - - @Test - public void numInputFilesAtOutputLevel() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.numInputFilesAtOutputLevel()).isEqualTo(0); - } - } - - @Test - public void numOutputRecords() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.numOutputRecords()).isEqualTo(0); - } - } - - @Test - public void numOutputFiles() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.numOutputFiles()).isEqualTo(0); - } - } - - @Test - public void isManualCompaction() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.isManualCompaction()).isFalse(); - } - } - - @Test - public void totalInputBytes() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.totalInputBytes()).isEqualTo(0); - } - } - - @Test - public void totalOutputBytes() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.totalOutputBytes()).isEqualTo(0); - } - } - - - @Test - public void numRecordsReplaced() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.numRecordsReplaced()).isEqualTo(0); - } - } - - @Test - public void totalInputRawKeyBytes() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.totalInputRawKeyBytes()).isEqualTo(0); - } - } - - @Test - public void totalInputRawValueBytes() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.totalInputRawValueBytes()).isEqualTo(0); - } - } - - @Test - public void numInputDeletionRecords() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.numInputDeletionRecords()).isEqualTo(0); - } - } - - @Test - public void numExpiredDeletionRecords() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.numExpiredDeletionRecords()).isEqualTo(0); - } - } - - @Test - public void numCorruptKeys() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.numCorruptKeys()).isEqualTo(0); - } - } - - @Test - public void fileWriteNanos() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.fileWriteNanos()).isEqualTo(0); - } - } - - @Test - public void fileRangeSyncNanos() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.fileRangeSyncNanos()).isEqualTo(0); - } - } - - @Test - public void fileFsyncNanos() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.fileFsyncNanos()).isEqualTo(0); - } - } - - @Test - public void filePrepareWriteNanos() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.filePrepareWriteNanos()).isEqualTo(0); - } - } - - @Test - public void smallestOutputKeyPrefix() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.smallestOutputKeyPrefix()).isEmpty(); - } - } - - @Test - public void largestOutputKeyPrefix() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.largestOutputKeyPrefix()).isEmpty(); - } - } - - @Test - public void numSingleDelFallthru() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.numSingleDelFallthru()).isEqualTo(0); - } - } - - @Test - public void numSingleDelMismatch() { - try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { - assertThat(compactionJobStats.numSingleDelMismatch()).isEqualTo(0); - } - } -} diff --git a/java/src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java b/java/src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java deleted file mode 100644 index 841615e67..000000000 --- a/java/src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class CompactionOptionsFIFOTest { - - static { - RocksDB.loadLibrary(); - } - - @Test - public void maxTableFilesSize() { - final long size = 500 * 1024 * 1026; - try (final CompactionOptionsFIFO opt = new CompactionOptionsFIFO()) { - opt.setMaxTableFilesSize(size); - assertThat(opt.maxTableFilesSize()).isEqualTo(size); - } - } - - @Test - public void allowCompaction() { - final boolean allowCompaction = true; - try (final CompactionOptionsFIFO opt = new CompactionOptionsFIFO()) { - opt.setAllowCompaction(allowCompaction); - assertThat(opt.allowCompaction()).isEqualTo(allowCompaction); - } - } -} diff --git a/java/src/test/java/org/rocksdb/CompactionOptionsTest.java b/java/src/test/java/org/rocksdb/CompactionOptionsTest.java deleted file mode 100644 index 9b7d79694..000000000 --- a/java/src/test/java/org/rocksdb/CompactionOptionsTest.java +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class CompactionOptionsTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Test - public void compression() { - try (final CompactionOptions compactionOptions = new CompactionOptions()) { - assertThat(compactionOptions.compression()) - .isEqualTo(CompressionType.SNAPPY_COMPRESSION); - compactionOptions.setCompression(CompressionType.NO_COMPRESSION); - assertThat(compactionOptions.compression()) - .isEqualTo(CompressionType.NO_COMPRESSION); - } - } - - @Test - public void outputFileSizeLimit() { - final long mb250 = 1024 * 1024 * 250; - try (final CompactionOptions compactionOptions = new CompactionOptions()) { - assertThat(compactionOptions.outputFileSizeLimit()) - .isEqualTo(-1); - compactionOptions.setOutputFileSizeLimit(mb250); - assertThat(compactionOptions.outputFileSizeLimit()) - .isEqualTo(mb250); - } - } - - @Test - public void maxSubcompactions() { - try (final CompactionOptions compactionOptions = new CompactionOptions()) { - assertThat(compactionOptions.maxSubcompactions()) - .isEqualTo(0); - compactionOptions.setMaxSubcompactions(9); - assertThat(compactionOptions.maxSubcompactions()) - .isEqualTo(9); - } - } -} diff --git a/java/src/test/java/org/rocksdb/CompactionOptionsUniversalTest.java b/java/src/test/java/org/rocksdb/CompactionOptionsUniversalTest.java deleted file mode 100644 index 5e2d195b6..000000000 --- a/java/src/test/java/org/rocksdb/CompactionOptionsUniversalTest.java +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class CompactionOptionsUniversalTest { - - static { - RocksDB.loadLibrary(); - } - - @Test - public void sizeRatio() { - final int sizeRatio = 4; - try(final CompactionOptionsUniversal opt = new CompactionOptionsUniversal()) { - opt.setSizeRatio(sizeRatio); - assertThat(opt.sizeRatio()).isEqualTo(sizeRatio); - } - } - - @Test - public void minMergeWidth() { - final int minMergeWidth = 3; - try(final CompactionOptionsUniversal opt = new CompactionOptionsUniversal()) { - opt.setMinMergeWidth(minMergeWidth); - assertThat(opt.minMergeWidth()).isEqualTo(minMergeWidth); - } - } - - @Test - public void maxMergeWidth() { - final int maxMergeWidth = Integer.MAX_VALUE - 1234; - try(final CompactionOptionsUniversal opt = new CompactionOptionsUniversal()) { - opt.setMaxMergeWidth(maxMergeWidth); - assertThat(opt.maxMergeWidth()).isEqualTo(maxMergeWidth); - } - } - - @Test - public void maxSizeAmplificationPercent() { - final int maxSizeAmplificationPercent = 150; - try(final CompactionOptionsUniversal opt = new CompactionOptionsUniversal()) { - opt.setMaxSizeAmplificationPercent(maxSizeAmplificationPercent); - assertThat(opt.maxSizeAmplificationPercent()).isEqualTo(maxSizeAmplificationPercent); - } - } - - @Test - public void compressionSizePercent() { - final int compressionSizePercent = 500; - try(final CompactionOptionsUniversal opt = new CompactionOptionsUniversal()) { - opt.setCompressionSizePercent(compressionSizePercent); - assertThat(opt.compressionSizePercent()).isEqualTo(compressionSizePercent); - } - } - - @Test - public void stopStyle() { - final CompactionStopStyle stopStyle = CompactionStopStyle.CompactionStopStyleSimilarSize; - try(final CompactionOptionsUniversal opt = new CompactionOptionsUniversal()) { - opt.setStopStyle(stopStyle); - assertThat(opt.stopStyle()).isEqualTo(stopStyle); - } - } - - @Test - public void allowTrivialMove() { - final boolean allowTrivialMove = true; - try(final CompactionOptionsUniversal opt = new CompactionOptionsUniversal()) { - opt.setAllowTrivialMove(allowTrivialMove); - assertThat(opt.allowTrivialMove()).isEqualTo(allowTrivialMove); - } - } -} diff --git a/java/src/test/java/org/rocksdb/CompactionPriorityTest.java b/java/src/test/java/org/rocksdb/CompactionPriorityTest.java deleted file mode 100644 index b078e132f..000000000 --- a/java/src/test/java/org/rocksdb/CompactionPriorityTest.java +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class CompactionPriorityTest { - - @Test(expected = IllegalArgumentException.class) - public void failIfIllegalByteValueProvided() { - CompactionPriority.getCompactionPriority((byte) -1); - } - - @Test - public void getCompactionPriority() { - assertThat(CompactionPriority.getCompactionPriority( - CompactionPriority.OldestLargestSeqFirst.getValue())) - .isEqualTo(CompactionPriority.OldestLargestSeqFirst); - } - - @Test - public void valueOf() { - assertThat(CompactionPriority.valueOf("OldestSmallestSeqFirst")). - isEqualTo(CompactionPriority.OldestSmallestSeqFirst); - } -} diff --git a/java/src/test/java/org/rocksdb/CompactionStopStyleTest.java b/java/src/test/java/org/rocksdb/CompactionStopStyleTest.java deleted file mode 100644 index 4c8a20950..000000000 --- a/java/src/test/java/org/rocksdb/CompactionStopStyleTest.java +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class CompactionStopStyleTest { - - @Test(expected = IllegalArgumentException.class) - public void failIfIllegalByteValueProvided() { - CompactionStopStyle.getCompactionStopStyle((byte) -1); - } - - @Test - public void getCompactionStopStyle() { - assertThat(CompactionStopStyle.getCompactionStopStyle( - CompactionStopStyle.CompactionStopStyleTotalSize.getValue())) - .isEqualTo(CompactionStopStyle.CompactionStopStyleTotalSize); - } - - @Test - public void valueOf() { - assertThat(CompactionStopStyle.valueOf("CompactionStopStyleSimilarSize")). - isEqualTo(CompactionStopStyle.CompactionStopStyleSimilarSize); - } -} diff --git a/java/src/test/java/org/rocksdb/ComparatorOptionsTest.java b/java/src/test/java/org/rocksdb/ComparatorOptionsTest.java deleted file mode 100644 index 3e90b9f10..000000000 --- a/java/src/test/java/org/rocksdb/ComparatorOptionsTest.java +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class ComparatorOptionsTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Test - public void reusedSynchronisationType() { - try(final ComparatorOptions copt = new ComparatorOptions()) { - - copt.setReusedSynchronisationType(ReusedSynchronisationType.MUTEX); - assertThat(copt.reusedSynchronisationType()) - .isEqualTo(ReusedSynchronisationType.MUTEX); - - copt.setReusedSynchronisationType(ReusedSynchronisationType.ADAPTIVE_MUTEX); - assertThat(copt.reusedSynchronisationType()) - .isEqualTo(ReusedSynchronisationType.ADAPTIVE_MUTEX); - - copt.setReusedSynchronisationType(ReusedSynchronisationType.THREAD_LOCAL); - assertThat(copt.reusedSynchronisationType()) - .isEqualTo(ReusedSynchronisationType.THREAD_LOCAL); - } - } - - @Test - public void useDirectBuffer() { - try(final ComparatorOptions copt = new ComparatorOptions()) { - copt.setUseDirectBuffer(true); - assertThat(copt.useDirectBuffer()).isTrue(); - - copt.setUseDirectBuffer(false); - assertThat(copt.useDirectBuffer()).isFalse(); - } - } - - @Test - public void maxReusedBufferSize() { - try(final ComparatorOptions copt = new ComparatorOptions()) { - copt.setMaxReusedBufferSize(12345); - assertThat(copt.maxReusedBufferSize()).isEqualTo(12345); - - copt.setMaxReusedBufferSize(-1); - assertThat(copt.maxReusedBufferSize()).isEqualTo(-1); - } - } -} diff --git a/java/src/test/java/org/rocksdb/CompressionOptionsTest.java b/java/src/test/java/org/rocksdb/CompressionOptionsTest.java deleted file mode 100644 index 116552c32..000000000 --- a/java/src/test/java/org/rocksdb/CompressionOptionsTest.java +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class CompressionOptionsTest { - - static { - RocksDB.loadLibrary(); - } - - @Test - public void windowBits() { - final int windowBits = 7; - try(final CompressionOptions opt = new CompressionOptions()) { - opt.setWindowBits(windowBits); - assertThat(opt.windowBits()).isEqualTo(windowBits); - } - } - - @Test - public void level() { - final int level = 6; - try(final CompressionOptions opt = new CompressionOptions()) { - opt.setLevel(level); - assertThat(opt.level()).isEqualTo(level); - } - } - - @Test - public void strategy() { - final int strategy = 2; - try(final CompressionOptions opt = new CompressionOptions()) { - opt.setStrategy(strategy); - assertThat(opt.strategy()).isEqualTo(strategy); - } - } - - @Test - public void maxDictBytes() { - final int maxDictBytes = 999; - try(final CompressionOptions opt = new CompressionOptions()) { - opt.setMaxDictBytes(maxDictBytes); - assertThat(opt.maxDictBytes()).isEqualTo(maxDictBytes); - } - } - - @Test - public void zstdMaxTrainBytes() { - final int zstdMaxTrainBytes = 999; - try(final CompressionOptions opt = new CompressionOptions()) { - opt.setZStdMaxTrainBytes(zstdMaxTrainBytes); - assertThat(opt.zstdMaxTrainBytes()).isEqualTo(zstdMaxTrainBytes); - } - } - - @Test - public void enabled() { - try(final CompressionOptions opt = new CompressionOptions()) { - assertThat(opt.enabled()).isFalse(); - opt.setEnabled(true); - assertThat(opt.enabled()).isTrue(); - } - } -} diff --git a/java/src/test/java/org/rocksdb/CompressionTypesTest.java b/java/src/test/java/org/rocksdb/CompressionTypesTest.java deleted file mode 100644 index e26cc0aca..000000000 --- a/java/src/test/java/org/rocksdb/CompressionTypesTest.java +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; - - -public class CompressionTypesTest { - @Test - public void getCompressionType() { - for (final CompressionType compressionType : CompressionType.values()) { - String libraryName = compressionType.getLibraryName(); - compressionType.equals(CompressionType.getCompressionType( - libraryName)); - } - } -} diff --git a/java/src/test/java/org/rocksdb/ConcurrentTaskLimiterTest.java b/java/src/test/java/org/rocksdb/ConcurrentTaskLimiterTest.java deleted file mode 100644 index 165f4f24c..000000000 --- a/java/src/test/java/org/rocksdb/ConcurrentTaskLimiterTest.java +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.junit.Assert.assertEquals; - -import org.junit.After; -import org.junit.Before; -import org.junit.ClassRule; -import org.junit.Test; - -public class ConcurrentTaskLimiterTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - private static final String NAME = "name"; - - private ConcurrentTaskLimiter concurrentTaskLimiter; - - @Before - public void beforeTest() { - concurrentTaskLimiter = new ConcurrentTaskLimiterImpl(NAME, 3); - } - - @Test - public void name() { - assertEquals(NAME, concurrentTaskLimiter.name()); - } - - @Test - public void outstandingTask() { - assertEquals(0, concurrentTaskLimiter.outstandingTask()); - } - - @Test - public void setMaxOutstandingTask() { - assertEquals(concurrentTaskLimiter, concurrentTaskLimiter.setMaxOutstandingTask(4)); - assertEquals(0, concurrentTaskLimiter.outstandingTask()); - } - - @Test - public void resetMaxOutstandingTask() { - assertEquals(concurrentTaskLimiter, concurrentTaskLimiter.resetMaxOutstandingTask()); - assertEquals(0, concurrentTaskLimiter.outstandingTask()); - } - - @After - public void afterTest() { - concurrentTaskLimiter.close(); - } -} diff --git a/java/src/test/java/org/rocksdb/DBOptionsTest.java b/java/src/test/java/org/rocksdb/DBOptionsTest.java deleted file mode 100644 index d55ceebcf..000000000 --- a/java/src/test/java/org/rocksdb/DBOptionsTest.java +++ /dev/null @@ -1,904 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; - -import java.nio.file.Paths; -import java.util.*; -import java.util.concurrent.atomic.AtomicBoolean; -import org.junit.ClassRule; -import org.junit.Test; - -public class DBOptionsTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - public static final Random rand = PlatformRandomHelper. - getPlatformSpecificRandomFactory(); - - @Test - public void copyConstructor() { - DBOptions origOpts = new DBOptions(); - origOpts.setCreateIfMissing(rand.nextBoolean()); - origOpts.setAllow2pc(rand.nextBoolean()); - origOpts.setMaxBackgroundJobs(rand.nextInt(10)); - DBOptions copyOpts = new DBOptions(origOpts); - assertThat(origOpts.createIfMissing()).isEqualTo(copyOpts.createIfMissing()); - assertThat(origOpts.allow2pc()).isEqualTo(copyOpts.allow2pc()); - } - - @Test - public void getDBOptionsFromProps() { - // setup sample properties - final Properties properties = new Properties(); - properties.put("allow_mmap_reads", "true"); - properties.put("bytes_per_sync", "13"); - try(final DBOptions opt = DBOptions.getDBOptionsFromProps(properties)) { - assertThat(opt).isNotNull(); - assertThat(String.valueOf(opt.allowMmapReads())). - isEqualTo(properties.get("allow_mmap_reads")); - assertThat(String.valueOf(opt.bytesPerSync())). - isEqualTo(properties.get("bytes_per_sync")); - } - } - - @Test - public void failDBOptionsFromPropsWithIllegalValue() { - // setup sample properties - final Properties properties = new Properties(); - properties.put("tomato", "1024"); - properties.put("burger", "2"); - try(final DBOptions opt = DBOptions.getDBOptionsFromProps(properties)) { - assertThat(opt).isNull(); - } - } - - @Test(expected = IllegalArgumentException.class) - public void failDBOptionsFromPropsWithNullValue() { - try(final DBOptions opt = DBOptions.getDBOptionsFromProps(null)) { - //no-op - } - } - - @Test(expected = IllegalArgumentException.class) - public void failDBOptionsFromPropsWithEmptyProps() { - try(final DBOptions opt = DBOptions.getDBOptionsFromProps( - new Properties())) { - //no-op - } - } - - @Test - public void linkageOfPrepMethods() { - try (final DBOptions opt = new DBOptions()) { - opt.optimizeForSmallDb(); - } - } - - @Test - public void env() { - try (final DBOptions opt = new DBOptions(); - final Env env = Env.getDefault()) { - opt.setEnv(env); - assertThat(opt.getEnv()).isSameAs(env); - } - } - - @Test - public void setIncreaseParallelism() { - try(final DBOptions opt = new DBOptions()) { - final int threads = Runtime.getRuntime().availableProcessors() * 2; - opt.setIncreaseParallelism(threads); - } - } - - @Test - public void createIfMissing() { - try(final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setCreateIfMissing(boolValue); - assertThat(opt.createIfMissing()).isEqualTo(boolValue); - } - } - - @Test - public void createMissingColumnFamilies() { - try(final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setCreateMissingColumnFamilies(boolValue); - assertThat(opt.createMissingColumnFamilies()).isEqualTo(boolValue); - } - } - - @Test - public void errorIfExists() { - try(final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setErrorIfExists(boolValue); - assertThat(opt.errorIfExists()).isEqualTo(boolValue); - } - } - - @Test - public void paranoidChecks() { - try(final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setParanoidChecks(boolValue); - assertThat(opt.paranoidChecks()).isEqualTo(boolValue); - } - } - - @Test - public void maxTotalWalSize() { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setMaxTotalWalSize(longValue); - assertThat(opt.maxTotalWalSize()).isEqualTo(longValue); - } - } - - @Test - public void maxOpenFiles() { - try(final DBOptions opt = new DBOptions()) { - final int intValue = rand.nextInt(); - opt.setMaxOpenFiles(intValue); - assertThat(opt.maxOpenFiles()).isEqualTo(intValue); - } - } - - @Test - public void maxFileOpeningThreads() { - try(final DBOptions opt = new DBOptions()) { - final int intValue = rand.nextInt(); - opt.setMaxFileOpeningThreads(intValue); - assertThat(opt.maxFileOpeningThreads()).isEqualTo(intValue); - } - } - - @Test - public void useFsync() { - try(final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setUseFsync(boolValue); - assertThat(opt.useFsync()).isEqualTo(boolValue); - } - } - - @Test - public void dbPaths() { - final List dbPaths = new ArrayList<>(); - dbPaths.add(new DbPath(Paths.get("/a"), 10)); - dbPaths.add(new DbPath(Paths.get("/b"), 100)); - dbPaths.add(new DbPath(Paths.get("/c"), 1000)); - - try(final DBOptions opt = new DBOptions()) { - assertThat(opt.dbPaths()).isEqualTo(Collections.emptyList()); - - opt.setDbPaths(dbPaths); - - assertThat(opt.dbPaths()).isEqualTo(dbPaths); - } - } - - @Test - public void dbLogDir() { - try(final DBOptions opt = new DBOptions()) { - final String str = "path/to/DbLogDir"; - opt.setDbLogDir(str); - assertThat(opt.dbLogDir()).isEqualTo(str); - } - } - - @Test - public void walDir() { - try(final DBOptions opt = new DBOptions()) { - final String str = "path/to/WalDir"; - opt.setWalDir(str); - assertThat(opt.walDir()).isEqualTo(str); - } - } - - @Test - public void deleteObsoleteFilesPeriodMicros() { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setDeleteObsoleteFilesPeriodMicros(longValue); - assertThat(opt.deleteObsoleteFilesPeriodMicros()).isEqualTo(longValue); - } - } - - @SuppressWarnings("deprecated") - @Test - public void maxBackgroundCompactions() { - try(final DBOptions opt = new DBOptions()) { - final int intValue = rand.nextInt(); - opt.setMaxBackgroundCompactions(intValue); - assertThat(opt.maxBackgroundCompactions()).isEqualTo(intValue); - } - } - - @Test - public void maxSubcompactions() { - try (final DBOptions opt = new DBOptions()) { - final int intValue = rand.nextInt(); - opt.setMaxSubcompactions(intValue); - assertThat(opt.maxSubcompactions()). - isEqualTo(intValue); - } - } - - @SuppressWarnings("deprecated") - @Test - public void maxBackgroundFlushes() { - try(final DBOptions opt = new DBOptions()) { - final int intValue = rand.nextInt(); - opt.setMaxBackgroundFlushes(intValue); - assertThat(opt.maxBackgroundFlushes()).isEqualTo(intValue); - } - } - - @Test - public void maxBackgroundJobs() { - try (final DBOptions opt = new DBOptions()) { - final int intValue = rand.nextInt(); - opt.setMaxBackgroundJobs(intValue); - assertThat(opt.maxBackgroundJobs()).isEqualTo(intValue); - } - } - - @Test - public void maxLogFileSize() throws RocksDBException { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setMaxLogFileSize(longValue); - assertThat(opt.maxLogFileSize()).isEqualTo(longValue); - } - } - - @Test - public void logFileTimeToRoll() throws RocksDBException { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setLogFileTimeToRoll(longValue); - assertThat(opt.logFileTimeToRoll()).isEqualTo(longValue); - } - } - - @Test - public void keepLogFileNum() throws RocksDBException { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setKeepLogFileNum(longValue); - assertThat(opt.keepLogFileNum()).isEqualTo(longValue); - } - } - - @Test - public void recycleLogFileNum() throws RocksDBException { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setRecycleLogFileNum(longValue); - assertThat(opt.recycleLogFileNum()).isEqualTo(longValue); - } - } - - @Test - public void maxManifestFileSize() { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setMaxManifestFileSize(longValue); - assertThat(opt.maxManifestFileSize()).isEqualTo(longValue); - } - } - - @Test - public void tableCacheNumshardbits() { - try(final DBOptions opt = new DBOptions()) { - final int intValue = rand.nextInt(); - opt.setTableCacheNumshardbits(intValue); - assertThat(opt.tableCacheNumshardbits()).isEqualTo(intValue); - } - } - - @Test - public void walSizeLimitMB() { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setWalSizeLimitMB(longValue); - assertThat(opt.walSizeLimitMB()).isEqualTo(longValue); - } - } - - @Test - public void walTtlSeconds() { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setWalTtlSeconds(longValue); - assertThat(opt.walTtlSeconds()).isEqualTo(longValue); - } - } - - @Test - public void manifestPreallocationSize() throws RocksDBException { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setManifestPreallocationSize(longValue); - assertThat(opt.manifestPreallocationSize()).isEqualTo(longValue); - } - } - - @Test - public void useDirectReads() { - try(final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setUseDirectReads(boolValue); - assertThat(opt.useDirectReads()).isEqualTo(boolValue); - } - } - - @Test - public void useDirectIoForFlushAndCompaction() { - try(final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setUseDirectIoForFlushAndCompaction(boolValue); - assertThat(opt.useDirectIoForFlushAndCompaction()).isEqualTo(boolValue); - } - } - - @Test - public void allowFAllocate() { - try(final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAllowFAllocate(boolValue); - assertThat(opt.allowFAllocate()).isEqualTo(boolValue); - } - } - - @Test - public void allowMmapReads() { - try(final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAllowMmapReads(boolValue); - assertThat(opt.allowMmapReads()).isEqualTo(boolValue); - } - } - - @Test - public void allowMmapWrites() { - try(final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAllowMmapWrites(boolValue); - assertThat(opt.allowMmapWrites()).isEqualTo(boolValue); - } - } - - @Test - public void isFdCloseOnExec() { - try(final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setIsFdCloseOnExec(boolValue); - assertThat(opt.isFdCloseOnExec()).isEqualTo(boolValue); - } - } - - @Test - public void statsDumpPeriodSec() { - try(final DBOptions opt = new DBOptions()) { - final int intValue = rand.nextInt(); - opt.setStatsDumpPeriodSec(intValue); - assertThat(opt.statsDumpPeriodSec()).isEqualTo(intValue); - } - } - - @Test - public void statsPersistPeriodSec() { - try (final DBOptions opt = new DBOptions()) { - final int intValue = rand.nextInt(); - opt.setStatsPersistPeriodSec(intValue); - assertThat(opt.statsPersistPeriodSec()).isEqualTo(intValue); - } - } - - @Test - public void statsHistoryBufferSize() { - try (final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setStatsHistoryBufferSize(longValue); - assertThat(opt.statsHistoryBufferSize()).isEqualTo(longValue); - } - } - - @Test - public void adviseRandomOnOpen() { - try(final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAdviseRandomOnOpen(boolValue); - assertThat(opt.adviseRandomOnOpen()).isEqualTo(boolValue); - } - } - - @Test - public void dbWriteBufferSize() { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setDbWriteBufferSize(longValue); - assertThat(opt.dbWriteBufferSize()).isEqualTo(longValue); - } - } - - @Test - public void setWriteBufferManager() throws RocksDBException { - try (final DBOptions opt = new DBOptions(); - final Cache cache = new LRUCache(1 * 1024 * 1024); - final WriteBufferManager writeBufferManager = new WriteBufferManager(2000l, cache)) { - opt.setWriteBufferManager(writeBufferManager); - assertThat(opt.writeBufferManager()).isEqualTo(writeBufferManager); - } - } - - @Test - public void setWriteBufferManagerWithZeroBufferSize() throws RocksDBException { - try (final DBOptions opt = new DBOptions(); - final Cache cache = new LRUCache(1 * 1024 * 1024); - final WriteBufferManager writeBufferManager = new WriteBufferManager(0l, cache)) { - opt.setWriteBufferManager(writeBufferManager); - assertThat(opt.writeBufferManager()).isEqualTo(writeBufferManager); - } - } - - @Test - public void accessHintOnCompactionStart() { - try(final DBOptions opt = new DBOptions()) { - final AccessHint accessHint = AccessHint.SEQUENTIAL; - opt.setAccessHintOnCompactionStart(accessHint); - assertThat(opt.accessHintOnCompactionStart()).isEqualTo(accessHint); - } - } - - @Test - public void compactionReadaheadSize() { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setCompactionReadaheadSize(longValue); - assertThat(opt.compactionReadaheadSize()).isEqualTo(longValue); - } - } - - @Test - public void randomAccessMaxBufferSize() { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setRandomAccessMaxBufferSize(longValue); - assertThat(opt.randomAccessMaxBufferSize()).isEqualTo(longValue); - } - } - - @Test - public void writableFileMaxBufferSize() { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setWritableFileMaxBufferSize(longValue); - assertThat(opt.writableFileMaxBufferSize()).isEqualTo(longValue); - } - } - - @Test - public void useAdaptiveMutex() { - try(final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setUseAdaptiveMutex(boolValue); - assertThat(opt.useAdaptiveMutex()).isEqualTo(boolValue); - } - } - - @Test - public void bytesPerSync() { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setBytesPerSync(longValue); - assertThat(opt.bytesPerSync()).isEqualTo(longValue); - } - } - - @Test - public void walBytesPerSync() { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setWalBytesPerSync(longValue); - assertThat(opt.walBytesPerSync()).isEqualTo(longValue); - } - } - - @Test - public void strictBytesPerSync() { - try (final DBOptions opt = new DBOptions()) { - assertThat(opt.strictBytesPerSync()).isFalse(); - opt.setStrictBytesPerSync(true); - assertThat(opt.strictBytesPerSync()).isTrue(); - } - } - - @Test - public void enableThreadTracking() { - try (final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setEnableThreadTracking(boolValue); - assertThat(opt.enableThreadTracking()).isEqualTo(boolValue); - } - } - - @Test - public void delayedWriteRate() { - try(final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setDelayedWriteRate(longValue); - assertThat(opt.delayedWriteRate()).isEqualTo(longValue); - } - } - - @Test - public void enablePipelinedWrite() { - try(final DBOptions opt = new DBOptions()) { - assertThat(opt.enablePipelinedWrite()).isFalse(); - opt.setEnablePipelinedWrite(true); - assertThat(opt.enablePipelinedWrite()).isTrue(); - } - } - - @Test - public void unordredWrite() { - try(final DBOptions opt = new DBOptions()) { - assertThat(opt.unorderedWrite()).isFalse(); - opt.setUnorderedWrite(true); - assertThat(opt.unorderedWrite()).isTrue(); - } - } - - @Test - public void allowConcurrentMemtableWrite() { - try (final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAllowConcurrentMemtableWrite(boolValue); - assertThat(opt.allowConcurrentMemtableWrite()).isEqualTo(boolValue); - } - } - - @Test - public void enableWriteThreadAdaptiveYield() { - try (final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setEnableWriteThreadAdaptiveYield(boolValue); - assertThat(opt.enableWriteThreadAdaptiveYield()).isEqualTo(boolValue); - } - } - - @Test - public void writeThreadMaxYieldUsec() { - try (final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setWriteThreadMaxYieldUsec(longValue); - assertThat(opt.writeThreadMaxYieldUsec()).isEqualTo(longValue); - } - } - - @Test - public void writeThreadSlowYieldUsec() { - try (final DBOptions opt = new DBOptions()) { - final long longValue = rand.nextLong(); - opt.setWriteThreadSlowYieldUsec(longValue); - assertThat(opt.writeThreadSlowYieldUsec()).isEqualTo(longValue); - } - } - - @Test - public void skipStatsUpdateOnDbOpen() { - try (final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setSkipStatsUpdateOnDbOpen(boolValue); - assertThat(opt.skipStatsUpdateOnDbOpen()).isEqualTo(boolValue); - } - } - - @Test - public void walRecoveryMode() { - try (final DBOptions opt = new DBOptions()) { - for (final WALRecoveryMode walRecoveryMode : WALRecoveryMode.values()) { - opt.setWalRecoveryMode(walRecoveryMode); - assertThat(opt.walRecoveryMode()).isEqualTo(walRecoveryMode); - } - } - } - - @Test - public void allow2pc() { - try (final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAllow2pc(boolValue); - assertThat(opt.allow2pc()).isEqualTo(boolValue); - } - } - - @Test - public void rowCache() { - try (final DBOptions opt = new DBOptions()) { - assertThat(opt.rowCache()).isNull(); - - try(final Cache lruCache = new LRUCache(1000)) { - opt.setRowCache(lruCache); - assertThat(opt.rowCache()).isEqualTo(lruCache); - } - - try(final Cache clockCache = new ClockCache(1000)) { - opt.setRowCache(clockCache); - assertThat(opt.rowCache()).isEqualTo(clockCache); - } - } - } - - @Test - public void walFilter() { - try (final DBOptions opt = new DBOptions()) { - assertThat(opt.walFilter()).isNull(); - - try (final AbstractWalFilter walFilter = new AbstractWalFilter() { - @Override - public void columnFamilyLogNumberMap( - final Map cfLognumber, - final Map cfNameId) { - // no-op - } - - @Override - public LogRecordFoundResult logRecordFound(final long logNumber, - final String logFileName, final WriteBatch batch, - final WriteBatch newBatch) { - return new LogRecordFoundResult( - WalProcessingOption.CONTINUE_PROCESSING, false); - } - - @Override - public String name() { - return "test-wal-filter"; - } - }) { - opt.setWalFilter(walFilter); - assertThat(opt.walFilter()).isEqualTo(walFilter); - } - } - } - - @Test - public void failIfOptionsFileError() { - try (final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setFailIfOptionsFileError(boolValue); - assertThat(opt.failIfOptionsFileError()).isEqualTo(boolValue); - } - } - - @Test - public void dumpMallocStats() { - try (final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setDumpMallocStats(boolValue); - assertThat(opt.dumpMallocStats()).isEqualTo(boolValue); - } - } - - @Test - public void avoidFlushDuringRecovery() { - try (final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAvoidFlushDuringRecovery(boolValue); - assertThat(opt.avoidFlushDuringRecovery()).isEqualTo(boolValue); - } - } - - @Test - public void avoidFlushDuringShutdown() { - try (final DBOptions opt = new DBOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAvoidFlushDuringShutdown(boolValue); - assertThat(opt.avoidFlushDuringShutdown()).isEqualTo(boolValue); - } - } - - @Test - public void allowIngestBehind() { - try (final DBOptions opt = new DBOptions()) { - assertThat(opt.allowIngestBehind()).isFalse(); - opt.setAllowIngestBehind(true); - assertThat(opt.allowIngestBehind()).isTrue(); - } - } - - @Test - public void twoWriteQueues() { - try (final DBOptions opt = new DBOptions()) { - assertThat(opt.twoWriteQueues()).isFalse(); - opt.setTwoWriteQueues(true); - assertThat(opt.twoWriteQueues()).isTrue(); - } - } - - @Test - public void manualWalFlush() { - try (final DBOptions opt = new DBOptions()) { - assertThat(opt.manualWalFlush()).isFalse(); - opt.setManualWalFlush(true); - assertThat(opt.manualWalFlush()).isTrue(); - } - } - - @Test - public void atomicFlush() { - try (final DBOptions opt = new DBOptions()) { - assertThat(opt.atomicFlush()).isFalse(); - opt.setAtomicFlush(true); - assertThat(opt.atomicFlush()).isTrue(); - } - } - - @Test - public void rateLimiter() { - try(final DBOptions options = new DBOptions(); - final DBOptions anotherOptions = new DBOptions(); - final RateLimiter rateLimiter = new RateLimiter(1000, 100 * 1000, 1)) { - options.setRateLimiter(rateLimiter); - // Test with parameter initialization - anotherOptions.setRateLimiter( - new RateLimiter(1000)); - } - } - - @Test - public void sstFileManager() throws RocksDBException { - try (final DBOptions options = new DBOptions(); - final SstFileManager sstFileManager = - new SstFileManager(Env.getDefault())) { - options.setSstFileManager(sstFileManager); - } - } - - @Test - public void statistics() { - try(final DBOptions options = new DBOptions()) { - final Statistics statistics = options.statistics(); - assertThat(statistics).isNull(); - } - - try(final Statistics statistics = new Statistics(); - final DBOptions options = new DBOptions().setStatistics(statistics); - final Statistics stats = options.statistics()) { - assertThat(stats).isNotNull(); - } - } - - @Test - public void avoidUnnecessaryBlockingIO() { - try (final DBOptions options = new DBOptions()) { - assertThat(options.avoidUnnecessaryBlockingIO()).isEqualTo(false); - assertThat(options.setAvoidUnnecessaryBlockingIO(true)).isEqualTo(options); - assertThat(options.avoidUnnecessaryBlockingIO()).isEqualTo(true); - } - } - - @Test - public void persistStatsToDisk() { - try (final DBOptions options = new DBOptions()) { - assertThat(options.persistStatsToDisk()).isEqualTo(false); - assertThat(options.setPersistStatsToDisk(true)).isEqualTo(options); - assertThat(options.persistStatsToDisk()).isEqualTo(true); - } - } - - @Test - public void writeDbidToManifest() { - try (final DBOptions options = new DBOptions()) { - assertThat(options.writeDbidToManifest()).isEqualTo(false); - assertThat(options.setWriteDbidToManifest(true)).isEqualTo(options); - assertThat(options.writeDbidToManifest()).isEqualTo(true); - } - } - - @Test - public void logReadaheadSize() { - try (final DBOptions options = new DBOptions()) { - assertThat(options.logReadaheadSize()).isEqualTo(0); - final int size = 1024 * 1024 * 100; - assertThat(options.setLogReadaheadSize(size)).isEqualTo(options); - assertThat(options.logReadaheadSize()).isEqualTo(size); - } - } - - @Test - public void bestEffortsRecovery() { - try (final DBOptions options = new DBOptions()) { - assertThat(options.bestEffortsRecovery()).isEqualTo(false); - assertThat(options.setBestEffortsRecovery(true)).isEqualTo(options); - assertThat(options.bestEffortsRecovery()).isEqualTo(true); - } - } - - @Test - public void maxBgerrorResumeCount() { - try (final DBOptions options = new DBOptions()) { - final int INT_MAX = 2147483647; - assertThat(options.maxBgerrorResumeCount()).isEqualTo(INT_MAX); - assertThat(options.setMaxBgErrorResumeCount(-1)).isEqualTo(options); - assertThat(options.maxBgerrorResumeCount()).isEqualTo(-1); - } - } - - @Test - public void bgerrorResumeRetryInterval() { - try (final DBOptions options = new DBOptions()) { - assertThat(options.bgerrorResumeRetryInterval()).isEqualTo(1000000); - final long newRetryInterval = 24 * 3600 * 1000000L; - assertThat(options.setBgerrorResumeRetryInterval(newRetryInterval)).isEqualTo(options); - assertThat(options.bgerrorResumeRetryInterval()).isEqualTo(newRetryInterval); - } - } - - @Test - public void maxWriteBatchGroupSizeBytes() { - try (final DBOptions options = new DBOptions()) { - assertThat(options.maxWriteBatchGroupSizeBytes()).isEqualTo(1024 * 1024); - final long size = 1024 * 1024 * 1024 * 10L; - assertThat(options.setMaxWriteBatchGroupSizeBytes(size)).isEqualTo(options); - assertThat(options.maxWriteBatchGroupSizeBytes()).isEqualTo(size); - } - } - - @Test - public void skipCheckingSstFileSizesOnDbOpen() { - try (final DBOptions options = new DBOptions()) { - assertThat(options.skipCheckingSstFileSizesOnDbOpen()).isEqualTo(false); - assertThat(options.setSkipCheckingSstFileSizesOnDbOpen(true)).isEqualTo(options); - assertThat(options.skipCheckingSstFileSizesOnDbOpen()).isEqualTo(true); - } - } - - @Test - public void eventListeners() { - final AtomicBoolean wasCalled1 = new AtomicBoolean(); - final AtomicBoolean wasCalled2 = new AtomicBoolean(); - try (final DBOptions options = new DBOptions(); - final AbstractEventListener el1 = - new AbstractEventListener() { - @Override - public void onTableFileDeleted(final TableFileDeletionInfo tableFileDeletionInfo) { - wasCalled1.set(true); - } - }; - final AbstractEventListener el2 = - new AbstractEventListener() { - @Override - public void onMemTableSealed(final MemTableInfo memTableInfo) { - wasCalled2.set(true); - } - }) { - assertThat(options.setListeners(Arrays.asList(el1, el2))).isEqualTo(options); - List listeners = options.listeners(); - assertEquals(el1, listeners.get(0)); - assertEquals(el2, listeners.get(1)); - options.setListeners(Collections.emptyList()); - listeners.get(0).onTableFileDeleted(null); - assertTrue(wasCalled1.get()); - listeners.get(1).onMemTableSealed(null); - assertTrue(wasCalled2.get()); - List listeners2 = options.listeners(); - assertNotNull(listeners2); - assertEquals(0, listeners2.size()); - } - } -} diff --git a/java/src/test/java/org/rocksdb/DefaultEnvTest.java b/java/src/test/java/org/rocksdb/DefaultEnvTest.java deleted file mode 100644 index 3fb563ecb..000000000 --- a/java/src/test/java/org/rocksdb/DefaultEnvTest.java +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.util.Collection; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; - -public class DefaultEnvTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void backgroundThreads() { - try (final Env defaultEnv = RocksEnv.getDefault()) { - defaultEnv.setBackgroundThreads(5, Priority.BOTTOM); - assertThat(defaultEnv.getBackgroundThreads(Priority.BOTTOM)).isEqualTo(5); - - defaultEnv.setBackgroundThreads(5); - assertThat(defaultEnv.getBackgroundThreads(Priority.LOW)).isEqualTo(5); - - defaultEnv.setBackgroundThreads(5, Priority.LOW); - assertThat(defaultEnv.getBackgroundThreads(Priority.LOW)).isEqualTo(5); - - defaultEnv.setBackgroundThreads(5, Priority.HIGH); - assertThat(defaultEnv.getBackgroundThreads(Priority.HIGH)).isEqualTo(5); - } - } - - @Test - public void threadPoolQueueLen() { - try (final Env defaultEnv = RocksEnv.getDefault()) { - assertThat(defaultEnv.getThreadPoolQueueLen(Priority.BOTTOM)).isEqualTo(0); - assertThat(defaultEnv.getThreadPoolQueueLen(Priority.LOW)).isEqualTo(0); - assertThat(defaultEnv.getThreadPoolQueueLen(Priority.HIGH)).isEqualTo(0); - } - } - - @Test - public void incBackgroundThreadsIfNeeded() { - try (final Env defaultEnv = RocksEnv.getDefault()) { - defaultEnv.incBackgroundThreadsIfNeeded(20, Priority.BOTTOM); - assertThat(defaultEnv.getBackgroundThreads(Priority.BOTTOM)).isGreaterThanOrEqualTo(20); - - defaultEnv.incBackgroundThreadsIfNeeded(20, Priority.LOW); - assertThat(defaultEnv.getBackgroundThreads(Priority.LOW)).isGreaterThanOrEqualTo(20); - - defaultEnv.incBackgroundThreadsIfNeeded(20, Priority.HIGH); - assertThat(defaultEnv.getBackgroundThreads(Priority.HIGH)).isGreaterThanOrEqualTo(20); - } - } - - @Test - public void lowerThreadPoolIOPriority() { - try (final Env defaultEnv = RocksEnv.getDefault()) { - defaultEnv.lowerThreadPoolIOPriority(Priority.BOTTOM); - - defaultEnv.lowerThreadPoolIOPriority(Priority.LOW); - - defaultEnv.lowerThreadPoolIOPriority(Priority.HIGH); - } - } - - @Test - public void lowerThreadPoolCPUPriority() { - try (final Env defaultEnv = RocksEnv.getDefault()) { - defaultEnv.lowerThreadPoolCPUPriority(Priority.BOTTOM); - - defaultEnv.lowerThreadPoolCPUPriority(Priority.LOW); - - defaultEnv.lowerThreadPoolCPUPriority(Priority.HIGH); - } - } - - @Test - public void threadList() throws RocksDBException { - try (final Env defaultEnv = RocksEnv.getDefault()) { - final Collection threadList = defaultEnv.getThreadList(); - assertThat(threadList.size()).isGreaterThan(0); - } - } - - @Test - public void threadList_integration() throws RocksDBException { - try (final Env env = RocksEnv.getDefault(); - final Options opt = new Options() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true) - .setEnv(env)) { - // open database - try (final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - - final List threadList = env.getThreadList(); - assertThat(threadList.size()).isGreaterThan(0); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/DirectSliceTest.java b/java/src/test/java/org/rocksdb/DirectSliceTest.java deleted file mode 100644 index 67385345c..000000000 --- a/java/src/test/java/org/rocksdb/DirectSliceTest.java +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Test; - -import java.nio.ByteBuffer; - -import static org.assertj.core.api.Assertions.assertThat; - -public class DirectSliceTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Test - public void directSlice() { - try(final DirectSlice directSlice = new DirectSlice("abc"); - final DirectSlice otherSlice = new DirectSlice("abc")) { - assertThat(directSlice.toString()).isEqualTo("abc"); - // clear first slice - directSlice.clear(); - assertThat(directSlice.toString()).isEmpty(); - // get first char in otherslice - assertThat(otherSlice.get(0)).isEqualTo("a".getBytes()[0]); - // remove prefix - otherSlice.removePrefix(1); - assertThat(otherSlice.toString()).isEqualTo("bc"); - } - } - - @Test - public void directSliceWithByteBuffer() { - final byte[] data = "Some text".getBytes(); - final ByteBuffer buffer = ByteBuffer.allocateDirect(data.length + 1); - buffer.put(data); - buffer.put(data.length, (byte)0); - - try(final DirectSlice directSlice = new DirectSlice(buffer)) { - assertThat(directSlice.toString()).isEqualTo("Some text"); - } - } - - @Test - public void directSliceWithByteBufferAndLength() { - final byte[] data = "Some text".getBytes(); - final ByteBuffer buffer = ByteBuffer.allocateDirect(data.length); - buffer.put(data); - try(final DirectSlice directSlice = new DirectSlice(buffer, 4)) { - assertThat(directSlice.toString()).isEqualTo("Some"); - } - } - - @Test(expected = IllegalArgumentException.class) - public void directSliceInitWithoutDirectAllocation() { - final byte[] data = "Some text".getBytes(); - final ByteBuffer buffer = ByteBuffer.wrap(data); - try(final DirectSlice directSlice = new DirectSlice(buffer)) { - //no-op - } - } - - @Test(expected = IllegalArgumentException.class) - public void directSlicePrefixInitWithoutDirectAllocation() { - final byte[] data = "Some text".getBytes(); - final ByteBuffer buffer = ByteBuffer.wrap(data); - try(final DirectSlice directSlice = new DirectSlice(buffer, 4)) { - //no-op - } - } - - @Test - public void directSliceClear() { - try(final DirectSlice directSlice = new DirectSlice("abc")) { - assertThat(directSlice.toString()).isEqualTo("abc"); - directSlice.clear(); - assertThat(directSlice.toString()).isEmpty(); - directSlice.clear(); // make sure we don't double-free - } - } - - @Test - public void directSliceRemovePrefix() { - try(final DirectSlice directSlice = new DirectSlice("abc")) { - assertThat(directSlice.toString()).isEqualTo("abc"); - directSlice.removePrefix(1); - assertThat(directSlice.toString()).isEqualTo("bc"); - } - } -} diff --git a/java/src/test/java/org/rocksdb/EnvOptionsTest.java b/java/src/test/java/org/rocksdb/EnvOptionsTest.java deleted file mode 100644 index 0f3d8e234..000000000 --- a/java/src/test/java/org/rocksdb/EnvOptionsTest.java +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Test; - -import java.util.Random; - -import static org.assertj.core.api.Assertions.assertThat; - -public class EnvOptionsTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); - - public static final Random rand = PlatformRandomHelper.getPlatformSpecificRandomFactory(); - - @Test - public void dbOptionsConstructor() { - final long compactionReadaheadSize = 4 * 1024 * 1024; - try (final DBOptions dbOptions = new DBOptions() - .setCompactionReadaheadSize(compactionReadaheadSize)) { - try (final EnvOptions envOptions = new EnvOptions(dbOptions)) { - assertThat(envOptions.compactionReadaheadSize()) - .isEqualTo(compactionReadaheadSize); - } - } - } - - @Test - public void useMmapReads() { - try (final EnvOptions envOptions = new EnvOptions()) { - final boolean boolValue = rand.nextBoolean(); - envOptions.setUseMmapReads(boolValue); - assertThat(envOptions.useMmapReads()).isEqualTo(boolValue); - } - } - - @Test - public void useMmapWrites() { - try (final EnvOptions envOptions = new EnvOptions()) { - final boolean boolValue = rand.nextBoolean(); - envOptions.setUseMmapWrites(boolValue); - assertThat(envOptions.useMmapWrites()).isEqualTo(boolValue); - } - } - - @Test - public void useDirectReads() { - try (final EnvOptions envOptions = new EnvOptions()) { - final boolean boolValue = rand.nextBoolean(); - envOptions.setUseDirectReads(boolValue); - assertThat(envOptions.useDirectReads()).isEqualTo(boolValue); - } - } - - @Test - public void useDirectWrites() { - try (final EnvOptions envOptions = new EnvOptions()) { - final boolean boolValue = rand.nextBoolean(); - envOptions.setUseDirectWrites(boolValue); - assertThat(envOptions.useDirectWrites()).isEqualTo(boolValue); - } - } - - @Test - public void allowFallocate() { - try (final EnvOptions envOptions = new EnvOptions()) { - final boolean boolValue = rand.nextBoolean(); - envOptions.setAllowFallocate(boolValue); - assertThat(envOptions.allowFallocate()).isEqualTo(boolValue); - } - } - - @Test - public void setFdCloexecs() { - try (final EnvOptions envOptions = new EnvOptions()) { - final boolean boolValue = rand.nextBoolean(); - envOptions.setSetFdCloexec(boolValue); - assertThat(envOptions.setFdCloexec()).isEqualTo(boolValue); - } - } - - @Test - public void bytesPerSync() { - try (final EnvOptions envOptions = new EnvOptions()) { - final long longValue = rand.nextLong(); - envOptions.setBytesPerSync(longValue); - assertThat(envOptions.bytesPerSync()).isEqualTo(longValue); - } - } - - @Test - public void fallocateWithKeepSize() { - try (final EnvOptions envOptions = new EnvOptions()) { - final boolean boolValue = rand.nextBoolean(); - envOptions.setFallocateWithKeepSize(boolValue); - assertThat(envOptions.fallocateWithKeepSize()).isEqualTo(boolValue); - } - } - - @Test - public void compactionReadaheadSize() { - try (final EnvOptions envOptions = new EnvOptions()) { - final int intValue = rand.nextInt(2147483647); - envOptions.setCompactionReadaheadSize(intValue); - assertThat(envOptions.compactionReadaheadSize()).isEqualTo(intValue); - } - } - - @Test - public void randomAccessMaxBufferSize() { - try (final EnvOptions envOptions = new EnvOptions()) { - final int intValue = rand.nextInt(2147483647); - envOptions.setRandomAccessMaxBufferSize(intValue); - assertThat(envOptions.randomAccessMaxBufferSize()).isEqualTo(intValue); - } - } - - @Test - public void writableFileMaxBufferSize() { - try (final EnvOptions envOptions = new EnvOptions()) { - final int intValue = rand.nextInt(2147483647); - envOptions.setWritableFileMaxBufferSize(intValue); - assertThat(envOptions.writableFileMaxBufferSize()).isEqualTo(intValue); - } - } - - @Test - public void rateLimiter() { - try (final EnvOptions envOptions = new EnvOptions(); - final RateLimiter rateLimiter1 = new RateLimiter(1000, 100 * 1000, 1)) { - envOptions.setRateLimiter(rateLimiter1); - assertThat(envOptions.rateLimiter()).isEqualTo(rateLimiter1); - - try(final RateLimiter rateLimiter2 = new RateLimiter(1000)) { - envOptions.setRateLimiter(rateLimiter2); - assertThat(envOptions.rateLimiter()).isEqualTo(rateLimiter2); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/EventListenerTest.java b/java/src/test/java/org/rocksdb/EventListenerTest.java deleted file mode 100644 index 93ea19c2f..000000000 --- a/java/src/test/java/org/rocksdb/EventListenerTest.java +++ /dev/null @@ -1,725 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.nio.charset.StandardCharsets; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.*; -import java.util.concurrent.atomic.AtomicBoolean; -import org.assertj.core.api.AbstractObjectAssert; -import org.assertj.core.api.ObjectAssert; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.rocksdb.AbstractEventListener.EnabledEventCallback; -import org.rocksdb.test.TestableEventListener; - -public class EventListenerTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); - - public static final Random rand = PlatformRandomHelper.getPlatformSpecificRandomFactory(); - - void flushDb(final AbstractEventListener el, final AtomicBoolean wasCbCalled) - throws RocksDBException { - try (final Options opt = - new Options().setCreateIfMissing(true).setListeners(Collections.singletonList(el)); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - assertThat(db).isNotNull(); - final byte[] value = new byte[24]; - rand.nextBytes(value); - db.put("testKey".getBytes(), value); - db.flush(new FlushOptions()); - assertThat(wasCbCalled.get()).isTrue(); - } - } - - @Test - public void onFlushCompleted() throws RocksDBException { - final AtomicBoolean wasCbCalled = new AtomicBoolean(); - final AbstractEventListener onFlushCompletedListener = new AbstractEventListener() { - @Override - public void onFlushCompleted(final RocksDB rocksDb, final FlushJobInfo flushJobInfo) { - assertThat(flushJobInfo.getColumnFamilyName()).isNotNull(); - assertThat(flushJobInfo.getFlushReason()).isEqualTo(FlushReason.MANUAL_FLUSH); - wasCbCalled.set(true); - } - }; - flushDb(onFlushCompletedListener, wasCbCalled); - } - - @Test - public void onFlushBegin() throws RocksDBException { - final AtomicBoolean wasCbCalled = new AtomicBoolean(); - final AbstractEventListener onFlushBeginListener = new AbstractEventListener() { - @Override - public void onFlushBegin(final RocksDB rocksDb, final FlushJobInfo flushJobInfo) { - assertThat(flushJobInfo.getColumnFamilyName()).isNotNull(); - assertThat(flushJobInfo.getFlushReason()).isEqualTo(FlushReason.MANUAL_FLUSH); - wasCbCalled.set(true); - } - }; - flushDb(onFlushBeginListener, wasCbCalled); - } - - void deleteTableFile(final AbstractEventListener el, final AtomicBoolean wasCbCalled) - throws RocksDBException { - try (final Options opt = - new Options().setCreateIfMissing(true).setListeners(Collections.singletonList(el)); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - assertThat(db).isNotNull(); - final byte[] value = new byte[24]; - rand.nextBytes(value); - db.put("testKey".getBytes(), value); - final RocksDB.LiveFiles liveFiles = db.getLiveFiles(); - assertThat(liveFiles).isNotNull(); - assertThat(liveFiles.files).isNotNull(); - assertThat(liveFiles.files.isEmpty()).isFalse(); - db.deleteFile(liveFiles.files.get(0)); - assertThat(wasCbCalled.get()).isTrue(); - } - } - - @Test - public void onTableFileDeleted() throws RocksDBException { - final AtomicBoolean wasCbCalled = new AtomicBoolean(); - final AbstractEventListener onTableFileDeletedListener = new AbstractEventListener() { - @Override - public void onTableFileDeleted(final TableFileDeletionInfo tableFileDeletionInfo) { - assertThat(tableFileDeletionInfo.getDbName()).isNotNull(); - wasCbCalled.set(true); - } - }; - deleteTableFile(onTableFileDeletedListener, wasCbCalled); - } - - void compactRange(final AbstractEventListener el, final AtomicBoolean wasCbCalled) - throws RocksDBException { - try (final Options opt = - new Options().setCreateIfMissing(true).setListeners(Collections.singletonList(el)); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - assertThat(db).isNotNull(); - final byte[] value = new byte[24]; - rand.nextBytes(value); - db.put("testKey".getBytes(), value); - db.compactRange(); - assertThat(wasCbCalled.get()).isTrue(); - } - } - - @Test - public void onCompactionBegin() throws RocksDBException { - final AtomicBoolean wasCbCalled = new AtomicBoolean(); - final AbstractEventListener onCompactionBeginListener = new AbstractEventListener() { - @Override - public void onCompactionBegin(final RocksDB db, final CompactionJobInfo compactionJobInfo) { - assertThat(compactionJobInfo.compactionReason()) - .isEqualTo(CompactionReason.kManualCompaction); - wasCbCalled.set(true); - } - }; - compactRange(onCompactionBeginListener, wasCbCalled); - } - - @Test - public void onCompactionCompleted() throws RocksDBException { - final AtomicBoolean wasCbCalled = new AtomicBoolean(); - final AbstractEventListener onCompactionCompletedListener = new AbstractEventListener() { - @Override - public void onCompactionCompleted( - final RocksDB db, final CompactionJobInfo compactionJobInfo) { - assertThat(compactionJobInfo.compactionReason()) - .isEqualTo(CompactionReason.kManualCompaction); - wasCbCalled.set(true); - } - }; - compactRange(onCompactionCompletedListener, wasCbCalled); - } - - @Test - public void onTableFileCreated() throws RocksDBException { - final AtomicBoolean wasCbCalled = new AtomicBoolean(); - final AbstractEventListener onTableFileCreatedListener = new AbstractEventListener() { - @Override - public void onTableFileCreated(final TableFileCreationInfo tableFileCreationInfo) { - assertThat(tableFileCreationInfo.getReason()).isEqualTo(TableFileCreationReason.FLUSH); - wasCbCalled.set(true); - } - }; - flushDb(onTableFileCreatedListener, wasCbCalled); - } - - @Test - public void onTableFileCreationStarted() throws RocksDBException { - final AtomicBoolean wasCbCalled = new AtomicBoolean(); - final AbstractEventListener onTableFileCreationStartedListener = new AbstractEventListener() { - @Override - public void onTableFileCreationStarted( - final TableFileCreationBriefInfo tableFileCreationBriefInfo) { - assertThat(tableFileCreationBriefInfo.getReason()).isEqualTo(TableFileCreationReason.FLUSH); - wasCbCalled.set(true); - } - }; - flushDb(onTableFileCreationStartedListener, wasCbCalled); - } - - void deleteColumnFamilyHandle(final AbstractEventListener el, final AtomicBoolean wasCbCalled) - throws RocksDBException { - try (final Options opt = - new Options().setCreateIfMissing(true).setListeners(Collections.singletonList(el)); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - assertThat(db).isNotNull(); - final byte[] value = new byte[24]; - rand.nextBytes(value); - db.put("testKey".getBytes(), value); - ColumnFamilyHandle columnFamilyHandle = db.getDefaultColumnFamily(); - columnFamilyHandle.close(); - assertThat(wasCbCalled.get()).isTrue(); - } - } - - @Test - public void onColumnFamilyHandleDeletionStarted() throws RocksDBException { - final AtomicBoolean wasCbCalled = new AtomicBoolean(); - final AbstractEventListener onColumnFamilyHandleDeletionStartedListener = - new AbstractEventListener() { - @Override - public void onColumnFamilyHandleDeletionStarted( - final ColumnFamilyHandle columnFamilyHandle) { - assertThat(columnFamilyHandle).isNotNull(); - wasCbCalled.set(true); - } - }; - deleteColumnFamilyHandle(onColumnFamilyHandleDeletionStartedListener, wasCbCalled); - } - - void ingestExternalFile(final AbstractEventListener el, final AtomicBoolean wasCbCalled) - throws RocksDBException { - try (final Options opt = - new Options().setCreateIfMissing(true).setListeners(Collections.singletonList(el)); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - assertThat(db).isNotNull(); - final String uuid = UUID.randomUUID().toString(); - final SstFileWriter sstFileWriter = new SstFileWriter(new EnvOptions(), opt); - final Path externalFilePath = Paths.get(db.getName(), uuid); - sstFileWriter.open(externalFilePath.toString()); - sstFileWriter.put("testKey".getBytes(), uuid.getBytes()); - sstFileWriter.finish(); - db.ingestExternalFile( - Collections.singletonList(externalFilePath.toString()), new IngestExternalFileOptions()); - assertThat(wasCbCalled.get()).isTrue(); - } - } - - @Test - public void onExternalFileIngested() throws RocksDBException { - final AtomicBoolean wasCbCalled = new AtomicBoolean(); - final AbstractEventListener onExternalFileIngestedListener = new AbstractEventListener() { - @Override - public void onExternalFileIngested( - final RocksDB db, final ExternalFileIngestionInfo externalFileIngestionInfo) { - assertThat(db).isNotNull(); - wasCbCalled.set(true); - } - }; - ingestExternalFile(onExternalFileIngestedListener, wasCbCalled); - } - - @Test - public void testAllCallbacksInvocation() { - final long TEST_LONG_VAL = -1; - // Expected test data objects - final Map userCollectedPropertiesTestData = - Collections.singletonMap("key", "value"); - final Map readablePropertiesTestData = Collections.singletonMap("key", "value"); - final TableProperties tablePropertiesTestData = new TableProperties(TEST_LONG_VAL, - TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, - TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, - TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, - TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, "columnFamilyName".getBytes(), - "filterPolicyName", "comparatorName", "mergeOperatorName", "prefixExtractorName", - "propertyCollectorsNames", "compressionName", userCollectedPropertiesTestData, - readablePropertiesTestData); - final FlushJobInfo flushJobInfoTestData = new FlushJobInfo(Integer.MAX_VALUE, - "testColumnFamily", "/file/path", TEST_LONG_VAL, Integer.MAX_VALUE, true, true, - TEST_LONG_VAL, TEST_LONG_VAL, tablePropertiesTestData, (byte) 0x0a); - final Status statusTestData = new Status(Status.Code.Incomplete, Status.SubCode.NoSpace, null); - final TableFileDeletionInfo tableFileDeletionInfoTestData = - new TableFileDeletionInfo("dbName", "/file/path", Integer.MAX_VALUE, statusTestData); - final TableFileCreationInfo tableFileCreationInfoTestData = - new TableFileCreationInfo(TEST_LONG_VAL, tablePropertiesTestData, statusTestData, "dbName", - "columnFamilyName", "/file/path", Integer.MAX_VALUE, (byte) 0x03); - final TableFileCreationBriefInfo tableFileCreationBriefInfoTestData = - new TableFileCreationBriefInfo( - "dbName", "columnFamilyName", "/file/path", Integer.MAX_VALUE, (byte) 0x03); - final MemTableInfo memTableInfoTestData = new MemTableInfo( - "columnFamilyName", TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL, TEST_LONG_VAL); - final FileOperationInfo fileOperationInfoTestData = new FileOperationInfo("/file/path", - TEST_LONG_VAL, TEST_LONG_VAL, 1_600_699_420_000_000_000L, 5_000_000_000L, statusTestData); - final WriteStallInfo writeStallInfoTestData = - new WriteStallInfo("columnFamilyName", (byte) 0x0, (byte) 0x1); - final ExternalFileIngestionInfo externalFileIngestionInfoTestData = - new ExternalFileIngestionInfo("columnFamilyName", "/external/file/path", - "/internal/file/path", TEST_LONG_VAL, tablePropertiesTestData); - - final CapturingTestableEventListener listener = new CapturingTestableEventListener() { - @Override - public void onFlushCompleted(final RocksDB db, final FlushJobInfo flushJobInfo) { - super.onFlushCompleted(db, flushJobInfo); - assertThat(flushJobInfo).isEqualTo(flushJobInfoTestData); - } - - @Override - public void onFlushBegin(final RocksDB db, final FlushJobInfo flushJobInfo) { - super.onFlushBegin(db, flushJobInfo); - assertThat(flushJobInfo).isEqualTo(flushJobInfoTestData); - } - - @Override - public void onTableFileDeleted(final TableFileDeletionInfo tableFileDeletionInfo) { - super.onTableFileDeleted(tableFileDeletionInfo); - assertThat(tableFileDeletionInfo).isEqualTo(tableFileDeletionInfoTestData); - } - - @Override - public void onCompactionBegin(final RocksDB db, final CompactionJobInfo compactionJobInfo) { - super.onCompactionBegin(db, compactionJobInfo); - assertThat(new String(compactionJobInfo.columnFamilyName(), StandardCharsets.UTF_8)) - .isEqualTo("compactionColumnFamily"); - assertThat(compactionJobInfo.status()).isEqualTo(statusTestData); - assertThat(compactionJobInfo.threadId()).isEqualTo(TEST_LONG_VAL); - assertThat(compactionJobInfo.jobId()).isEqualTo(Integer.MAX_VALUE); - assertThat(compactionJobInfo.baseInputLevel()).isEqualTo(Integer.MAX_VALUE); - assertThat(compactionJobInfo.outputLevel()).isEqualTo(Integer.MAX_VALUE); - assertThat(compactionJobInfo.inputFiles()) - .isEqualTo(Collections.singletonList("inputFile.sst")); - assertThat(compactionJobInfo.outputFiles()) - .isEqualTo(Collections.singletonList("outputFile.sst")); - assertThat(compactionJobInfo.tableProperties()) - .isEqualTo(Collections.singletonMap("tableProperties", tablePropertiesTestData)); - assertThat(compactionJobInfo.compactionReason()).isEqualTo(CompactionReason.kFlush); - assertThat(compactionJobInfo.compression()).isEqualTo(CompressionType.SNAPPY_COMPRESSION); - } - - @Override - public void onCompactionCompleted( - final RocksDB db, final CompactionJobInfo compactionJobInfo) { - super.onCompactionCompleted(db, compactionJobInfo); - assertThat(new String(compactionJobInfo.columnFamilyName())) - .isEqualTo("compactionColumnFamily"); - assertThat(compactionJobInfo.status()).isEqualTo(statusTestData); - assertThat(compactionJobInfo.threadId()).isEqualTo(TEST_LONG_VAL); - assertThat(compactionJobInfo.jobId()).isEqualTo(Integer.MAX_VALUE); - assertThat(compactionJobInfo.baseInputLevel()).isEqualTo(Integer.MAX_VALUE); - assertThat(compactionJobInfo.outputLevel()).isEqualTo(Integer.MAX_VALUE); - assertThat(compactionJobInfo.inputFiles()) - .isEqualTo(Collections.singletonList("inputFile.sst")); - assertThat(compactionJobInfo.outputFiles()) - .isEqualTo(Collections.singletonList("outputFile.sst")); - assertThat(compactionJobInfo.tableProperties()) - .isEqualTo(Collections.singletonMap("tableProperties", tablePropertiesTestData)); - assertThat(compactionJobInfo.compactionReason()).isEqualTo(CompactionReason.kFlush); - assertThat(compactionJobInfo.compression()).isEqualTo(CompressionType.SNAPPY_COMPRESSION); - } - - @Override - public void onTableFileCreated(final TableFileCreationInfo tableFileCreationInfo) { - super.onTableFileCreated(tableFileCreationInfo); - assertThat(tableFileCreationInfo).isEqualTo(tableFileCreationInfoTestData); - } - - @Override - public void onTableFileCreationStarted( - final TableFileCreationBriefInfo tableFileCreationBriefInfo) { - super.onTableFileCreationStarted(tableFileCreationBriefInfo); - assertThat(tableFileCreationBriefInfo).isEqualTo(tableFileCreationBriefInfoTestData); - } - - @Override - public void onMemTableSealed(final MemTableInfo memTableInfo) { - super.onMemTableSealed(memTableInfo); - assertThat(memTableInfo).isEqualTo(memTableInfoTestData); - } - - @Override - public void onColumnFamilyHandleDeletionStarted(final ColumnFamilyHandle columnFamilyHandle) { - super.onColumnFamilyHandleDeletionStarted(columnFamilyHandle); - } - - @Override - public void onExternalFileIngested( - final RocksDB db, final ExternalFileIngestionInfo externalFileIngestionInfo) { - super.onExternalFileIngested(db, externalFileIngestionInfo); - assertThat(externalFileIngestionInfo).isEqualTo(externalFileIngestionInfoTestData); - } - - @Override - public void onBackgroundError( - final BackgroundErrorReason backgroundErrorReason, final Status backgroundError) { - super.onBackgroundError(backgroundErrorReason, backgroundError); - } - - @Override - public void onStallConditionsChanged(final WriteStallInfo writeStallInfo) { - super.onStallConditionsChanged(writeStallInfo); - assertThat(writeStallInfo).isEqualTo(writeStallInfoTestData); - } - - @Override - public void onFileReadFinish(final FileOperationInfo fileOperationInfo) { - super.onFileReadFinish(fileOperationInfo); - assertThat(fileOperationInfo).isEqualTo(fileOperationInfoTestData); - } - - @Override - public void onFileWriteFinish(final FileOperationInfo fileOperationInfo) { - super.onFileWriteFinish(fileOperationInfo); - assertThat(fileOperationInfo).isEqualTo(fileOperationInfoTestData); - } - - @Override - public void onFileFlushFinish(final FileOperationInfo fileOperationInfo) { - super.onFileFlushFinish(fileOperationInfo); - assertThat(fileOperationInfo).isEqualTo(fileOperationInfoTestData); - } - - @Override - public void onFileSyncFinish(final FileOperationInfo fileOperationInfo) { - super.onFileSyncFinish(fileOperationInfo); - assertThat(fileOperationInfo).isEqualTo(fileOperationInfoTestData); - } - - @Override - public void onFileRangeSyncFinish(final FileOperationInfo fileOperationInfo) { - super.onFileRangeSyncFinish(fileOperationInfo); - assertThat(fileOperationInfo).isEqualTo(fileOperationInfoTestData); - } - - @Override - public void onFileTruncateFinish(final FileOperationInfo fileOperationInfo) { - super.onFileTruncateFinish(fileOperationInfo); - assertThat(fileOperationInfo).isEqualTo(fileOperationInfoTestData); - } - - @Override - public void onFileCloseFinish(final FileOperationInfo fileOperationInfo) { - super.onFileCloseFinish(fileOperationInfo); - assertThat(fileOperationInfo).isEqualTo(fileOperationInfoTestData); - } - - @Override - public boolean shouldBeNotifiedOnFileIO() { - super.shouldBeNotifiedOnFileIO(); - return false; - } - - @Override - public boolean onErrorRecoveryBegin( - final BackgroundErrorReason backgroundErrorReason, final Status backgroundError) { - super.onErrorRecoveryBegin(backgroundErrorReason, backgroundError); - assertThat(backgroundErrorReason).isEqualTo(BackgroundErrorReason.FLUSH); - assertThat(backgroundError).isEqualTo(statusTestData); - return true; - } - - @Override - public void onErrorRecoveryCompleted(final Status oldBackgroundError) { - super.onErrorRecoveryCompleted(oldBackgroundError); - assertThat(oldBackgroundError).isEqualTo(statusTestData); - } - }; - - // test action - listener.invokeAllCallbacks(); - - // assert - assertAllEventsCalled(listener); - - assertNoCallbackErrors(listener); - } - - @Test - public void testEnabledCallbacks() { - final EnabledEventCallback[] enabledEvents = { - EnabledEventCallback.ON_MEMTABLE_SEALED, EnabledEventCallback.ON_ERROR_RECOVERY_COMPLETED}; - - final CapturingTestableEventListener listener = - new CapturingTestableEventListener(enabledEvents); - - // test action - listener.invokeAllCallbacks(); - - // assert - assertEventsCalled(listener, enabledEvents); - } - - private static void assertAllEventsCalled( - final CapturingTestableEventListener capturingTestableEventListener) { - assertEventsCalled(capturingTestableEventListener, EnumSet.allOf(EnabledEventCallback.class)); - } - - private static void assertEventsCalled( - final CapturingTestableEventListener capturingTestableEventListener, - final EnabledEventCallback[] expected) { - assertEventsCalled(capturingTestableEventListener, EnumSet.copyOf(Arrays.asList(expected))); - } - - private static void assertNoCallbackErrors( - final CapturingTestableEventListener capturingTestableEventListener) { - for (AssertionError error : capturingTestableEventListener.capturedAssertionErrors) { - throw new Error("An assertion failed in callback", error); - } - } - - private static void assertEventsCalled( - final CapturingTestableEventListener capturingTestableEventListener, - final EnumSet expected) { - final ListenerEvents capturedEvents = capturingTestableEventListener.capturedListenerEvents; - - assertThat(capturedEvents.flushCompleted) - .isEqualTo(expected.contains(EnabledEventCallback.ON_FLUSH_COMPLETED)); - assertThat(capturedEvents.flushBegin) - .isEqualTo(expected.contains(EnabledEventCallback.ON_FLUSH_BEGIN)); - assertThat(capturedEvents.tableFileDeleted) - .isEqualTo(expected.contains(EnabledEventCallback.ON_TABLE_FILE_DELETED)); - assertThat(capturedEvents.compactionBegin) - .isEqualTo(expected.contains(EnabledEventCallback.ON_COMPACTION_BEGIN)); - assertThat(capturedEvents.compactionCompleted) - .isEqualTo(expected.contains(EnabledEventCallback.ON_COMPACTION_COMPLETED)); - assertThat(capturedEvents.tableFileCreated) - .isEqualTo(expected.contains(EnabledEventCallback.ON_TABLE_FILE_CREATED)); - assertThat(capturedEvents.tableFileCreationStarted) - .isEqualTo(expected.contains(EnabledEventCallback.ON_TABLE_FILE_CREATION_STARTED)); - assertThat(capturedEvents.memTableSealed) - .isEqualTo(expected.contains(EnabledEventCallback.ON_MEMTABLE_SEALED)); - assertThat(capturedEvents.columnFamilyHandleDeletionStarted) - .isEqualTo( - expected.contains(EnabledEventCallback.ON_COLUMN_FAMILY_HANDLE_DELETION_STARTED)); - assertThat(capturedEvents.externalFileIngested) - .isEqualTo(expected.contains(EnabledEventCallback.ON_EXTERNAL_FILE_INGESTED)); - assertThat(capturedEvents.backgroundError) - .isEqualTo(expected.contains(EnabledEventCallback.ON_BACKGROUND_ERROR)); - assertThat(capturedEvents.stallConditionsChanged) - .isEqualTo(expected.contains(EnabledEventCallback.ON_STALL_CONDITIONS_CHANGED)); - assertThat(capturedEvents.fileReadFinish) - .isEqualTo(expected.contains(EnabledEventCallback.ON_FILE_READ_FINISH)); - assertThat(capturedEvents.fileWriteFinish) - .isEqualTo(expected.contains(EnabledEventCallback.ON_FILE_WRITE_FINISH)); - assertThat(capturedEvents.fileFlushFinish) - .isEqualTo(expected.contains(EnabledEventCallback.ON_FILE_FLUSH_FINISH)); - assertThat(capturedEvents.fileSyncFinish) - .isEqualTo(expected.contains(EnabledEventCallback.ON_FILE_SYNC_FINISH)); - assertThat(capturedEvents.fileRangeSyncFinish) - .isEqualTo(expected.contains(EnabledEventCallback.ON_FILE_RANGE_SYNC_FINISH)); - assertThat(capturedEvents.fileTruncateFinish) - .isEqualTo(expected.contains(EnabledEventCallback.ON_FILE_TRUNCATE_FINISH)); - assertThat(capturedEvents.fileCloseFinish) - .isEqualTo(expected.contains(EnabledEventCallback.ON_FILE_CLOSE_FINISH)); - assertThat(capturedEvents.shouldBeNotifiedOnFileIO) - .isEqualTo(expected.contains(EnabledEventCallback.SHOULD_BE_NOTIFIED_ON_FILE_IO)); - assertThat(capturedEvents.errorRecoveryBegin) - .isEqualTo(expected.contains(EnabledEventCallback.ON_ERROR_RECOVERY_BEGIN)); - assertThat(capturedEvents.errorRecoveryCompleted) - .isEqualTo(expected.contains(EnabledEventCallback.ON_ERROR_RECOVERY_COMPLETED)); - assertThat(capturedEvents.errorRecoveryCompleted) - .isEqualTo(expected.contains(EnabledEventCallback.ON_ERROR_RECOVERY_COMPLETED)); - } - - /** - * Members are volatile as they may be written - * and read by different threads. - */ - private static class ListenerEvents { - volatile boolean flushCompleted; - volatile boolean flushBegin; - volatile boolean tableFileDeleted; - volatile boolean compactionBegin; - volatile boolean compactionCompleted; - volatile boolean tableFileCreated; - volatile boolean tableFileCreationStarted; - volatile boolean memTableSealed; - volatile boolean columnFamilyHandleDeletionStarted; - volatile boolean externalFileIngested; - volatile boolean backgroundError; - volatile boolean stallConditionsChanged; - volatile boolean fileReadFinish; - volatile boolean fileWriteFinish; - volatile boolean fileFlushFinish; - volatile boolean fileSyncFinish; - volatile boolean fileRangeSyncFinish; - volatile boolean fileTruncateFinish; - volatile boolean fileCloseFinish; - volatile boolean shouldBeNotifiedOnFileIO; - volatile boolean errorRecoveryBegin; - volatile boolean errorRecoveryCompleted; - } - - private static class CapturingObjectAssert extends ObjectAssert { - private final List assertionErrors; - public CapturingObjectAssert(T t, List assertionErrors) { - super(t); - this.assertionErrors = assertionErrors; - } - - @Override - public ObjectAssert isEqualTo(Object other) { - try { - return super.isEqualTo(other); - } catch (AssertionError error) { - assertionErrors.add(error); - throw error; - } - } - - @Override - public ObjectAssert isNotNull() { - try { - return super.isNotNull(); - } catch (AssertionError error) { - assertionErrors.add(error); - throw error; - } - } - } - - private static class CapturingTestableEventListener extends TestableEventListener { - final ListenerEvents capturedListenerEvents = new ListenerEvents(); - - final List capturedAssertionErrors = new ArrayList<>(); - - protected AbstractObjectAssert assertThat(T actual) { - return new CapturingObjectAssert(actual, capturedAssertionErrors); - } - - public CapturingTestableEventListener() {} - - public CapturingTestableEventListener(final EnabledEventCallback... enabledEventCallbacks) { - super(enabledEventCallbacks); - } - - @Override - public void onFlushCompleted(final RocksDB db, final FlushJobInfo flushJobInfo) { - capturedListenerEvents.flushCompleted = true; - } - - @Override - public void onFlushBegin(final RocksDB db, final FlushJobInfo flushJobInfo) { - capturedListenerEvents.flushBegin = true; - } - - @Override - public void onTableFileDeleted(final TableFileDeletionInfo tableFileDeletionInfo) { - capturedListenerEvents.tableFileDeleted = true; - } - - @Override - public void onCompactionBegin(final RocksDB db, final CompactionJobInfo compactionJobInfo) { - capturedListenerEvents.compactionBegin = true; - } - - @Override - public void onCompactionCompleted(final RocksDB db, final CompactionJobInfo compactionJobInfo) { - capturedListenerEvents.compactionCompleted = true; - } - - @Override - public void onTableFileCreated(final TableFileCreationInfo tableFileCreationInfo) { - capturedListenerEvents.tableFileCreated = true; - } - - @Override - public void onTableFileCreationStarted( - final TableFileCreationBriefInfo tableFileCreationBriefInfo) { - capturedListenerEvents.tableFileCreationStarted = true; - } - - @Override - public void onMemTableSealed(final MemTableInfo memTableInfo) { - capturedListenerEvents.memTableSealed = true; - } - - @Override - public void onColumnFamilyHandleDeletionStarted(final ColumnFamilyHandle columnFamilyHandle) { - capturedListenerEvents.columnFamilyHandleDeletionStarted = true; - } - - @Override - public void onExternalFileIngested( - final RocksDB db, final ExternalFileIngestionInfo externalFileIngestionInfo) { - capturedListenerEvents.externalFileIngested = true; - } - - @Override - public void onBackgroundError( - final BackgroundErrorReason backgroundErrorReason, final Status backgroundError) { - capturedListenerEvents.backgroundError = true; - } - - @Override - public void onStallConditionsChanged(final WriteStallInfo writeStallInfo) { - capturedListenerEvents.stallConditionsChanged = true; - } - - @Override - public void onFileReadFinish(final FileOperationInfo fileOperationInfo) { - capturedListenerEvents.fileReadFinish = true; - } - - @Override - public void onFileWriteFinish(final FileOperationInfo fileOperationInfo) { - capturedListenerEvents.fileWriteFinish = true; - } - - @Override - public void onFileFlushFinish(final FileOperationInfo fileOperationInfo) { - capturedListenerEvents.fileFlushFinish = true; - } - - @Override - public void onFileSyncFinish(final FileOperationInfo fileOperationInfo) { - capturedListenerEvents.fileSyncFinish = true; - } - - @Override - public void onFileRangeSyncFinish(final FileOperationInfo fileOperationInfo) { - capturedListenerEvents.fileRangeSyncFinish = true; - } - - @Override - public void onFileTruncateFinish(final FileOperationInfo fileOperationInfo) { - capturedListenerEvents.fileTruncateFinish = true; - } - - @Override - public void onFileCloseFinish(final FileOperationInfo fileOperationInfo) { - capturedListenerEvents.fileCloseFinish = true; - } - - @Override - public boolean shouldBeNotifiedOnFileIO() { - capturedListenerEvents.shouldBeNotifiedOnFileIO = true; - return false; - } - - @Override - public boolean onErrorRecoveryBegin( - final BackgroundErrorReason backgroundErrorReason, final Status backgroundError) { - capturedListenerEvents.errorRecoveryBegin = true; - return true; - } - - @Override - public void onErrorRecoveryCompleted(final Status oldBackgroundError) { - capturedListenerEvents.errorRecoveryCompleted = true; - } - } -} diff --git a/java/src/test/java/org/rocksdb/FilterTest.java b/java/src/test/java/org/rocksdb/FilterTest.java deleted file mode 100644 index dc5c19fbc..000000000 --- a/java/src/test/java/org/rocksdb/FilterTest.java +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Test; - -public class FilterTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Test - public void filter() { - // new Bloom filter - final BlockBasedTableConfig blockConfig = new BlockBasedTableConfig(); - try(final Options options = new Options()) { - - try(final Filter bloomFilter = new BloomFilter()) { - blockConfig.setFilterPolicy(bloomFilter); - options.setTableFormatConfig(blockConfig); - } - - try(final Filter bloomFilter = new BloomFilter(10)) { - blockConfig.setFilterPolicy(bloomFilter); - options.setTableFormatConfig(blockConfig); - } - - try(final Filter bloomFilter = new BloomFilter(10, false)) { - blockConfig.setFilterPolicy(bloomFilter); - options.setTableFormatConfig(blockConfig); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/FlushOptionsTest.java b/java/src/test/java/org/rocksdb/FlushOptionsTest.java deleted file mode 100644 index f90ae911d..000000000 --- a/java/src/test/java/org/rocksdb/FlushOptionsTest.java +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class FlushOptionsTest { - - @Test - public void waitForFlush() { - try (final FlushOptions flushOptions = new FlushOptions()) { - assertThat(flushOptions.waitForFlush()).isTrue(); - flushOptions.setWaitForFlush(false); - assertThat(flushOptions.waitForFlush()).isFalse(); - } - } - - @Test - public void allowWriteStall() { - try (final FlushOptions flushOptions = new FlushOptions()) { - assertThat(flushOptions.allowWriteStall()).isFalse(); - flushOptions.setAllowWriteStall(true); - assertThat(flushOptions.allowWriteStall()).isTrue(); - } - } -} diff --git a/java/src/test/java/org/rocksdb/FlushTest.java b/java/src/test/java/org/rocksdb/FlushTest.java deleted file mode 100644 index 1a354f4ce..000000000 --- a/java/src/test/java/org/rocksdb/FlushTest.java +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import static org.assertj.core.api.Assertions.assertThat; - -public class FlushTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void flush() throws RocksDBException { - try(final Options options = new Options() - .setCreateIfMissing(true) - .setMaxWriteBufferNumber(10) - .setMinWriteBufferNumberToMerge(10); - final WriteOptions wOpt = new WriteOptions() - .setDisableWAL(true); - final FlushOptions flushOptions = new FlushOptions() - .setWaitForFlush(true)) { - assertThat(flushOptions.waitForFlush()).isTrue(); - - try(final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - db.put(wOpt, "key1".getBytes(), "value1".getBytes()); - db.put(wOpt, "key2".getBytes(), "value2".getBytes()); - db.put(wOpt, "key3".getBytes(), "value3".getBytes()); - db.put(wOpt, "key4".getBytes(), "value4".getBytes()); - assertThat(db.getProperty("rocksdb.num-entries-active-mem-table")) - .isEqualTo("4"); - db.flush(flushOptions); - assertThat(db.getProperty("rocksdb.num-entries-active-mem-table")) - .isEqualTo("0"); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/InfoLogLevelTest.java b/java/src/test/java/org/rocksdb/InfoLogLevelTest.java deleted file mode 100644 index 12ee537d9..000000000 --- a/java/src/test/java/org/rocksdb/InfoLogLevelTest.java +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.Environment; - -import java.io.IOException; - -import static java.nio.file.Files.readAllBytes; -import static java.nio.file.Paths.get; -import static org.assertj.core.api.Assertions.assertThat; - -public class InfoLogLevelTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void testInfoLogLevel() throws RocksDBException, - IOException { - try (final RocksDB db = - RocksDB.open(dbFolder.getRoot().getAbsolutePath())) { - db.put("key".getBytes(), "value".getBytes()); - db.flush(new FlushOptions().setWaitForFlush(true)); - assertThat(getLogContentsWithoutHeader()).isNotEmpty(); - } - } - - @Test - public void testFatalLogLevel() throws RocksDBException, - IOException { - try (final Options options = new Options(). - setCreateIfMissing(true). - setInfoLogLevel(InfoLogLevel.FATAL_LEVEL); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - assertThat(options.infoLogLevel()). - isEqualTo(InfoLogLevel.FATAL_LEVEL); - db.put("key".getBytes(), "value".getBytes()); - // As InfoLogLevel is set to FATAL_LEVEL, here we expect the log - // content to be empty. - assertThat(getLogContentsWithoutHeader()).isEmpty(); - } - } - - @Test - public void testFatalLogLevelWithDBOptions() - throws RocksDBException, IOException { - try (final DBOptions dbOptions = new DBOptions(). - setInfoLogLevel(InfoLogLevel.FATAL_LEVEL); - final Options options = new Options(dbOptions, - new ColumnFamilyOptions()). - setCreateIfMissing(true); - final RocksDB db = - RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { - assertThat(dbOptions.infoLogLevel()). - isEqualTo(InfoLogLevel.FATAL_LEVEL); - assertThat(options.infoLogLevel()). - isEqualTo(InfoLogLevel.FATAL_LEVEL); - db.put("key".getBytes(), "value".getBytes()); - assertThat(getLogContentsWithoutHeader()).isEmpty(); - } - } - - @Test(expected = IllegalArgumentException.class) - public void failIfIllegalByteValueProvided() { - InfoLogLevel.getInfoLogLevel((byte) -1); - } - - @Test - public void valueOf() { - assertThat(InfoLogLevel.valueOf("DEBUG_LEVEL")). - isEqualTo(InfoLogLevel.DEBUG_LEVEL); - } - - /** - * Read LOG file contents into String. - * - * @return LOG file contents as String. - * @throws IOException if file is not found. - */ - private String getLogContentsWithoutHeader() throws IOException { - final String separator = Environment.isWindows() ? - "\n" : System.getProperty("line.separator"); - final String[] lines = new String(readAllBytes(get( - dbFolder.getRoot().getAbsolutePath() + "/LOG"))).split(separator); - - int first_non_header = lines.length; - // Identify the last line of the header - for (int i = lines.length - 1; i >= 0; --i) { - if (lines[i].indexOf("DB pointer") >= 0) { - first_non_header = i + 1; - break; - } - } - StringBuilder builder = new StringBuilder(); - for (int i = first_non_header; i < lines.length; ++i) { - builder.append(lines[i]).append(separator); - } - return builder.toString(); - } -} diff --git a/java/src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java b/java/src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java deleted file mode 100644 index 230694615..000000000 --- a/java/src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Test; - -import java.util.Random; - -import static org.assertj.core.api.Assertions.assertThat; - -public class IngestExternalFileOptionsTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE - = new RocksNativeLibraryResource(); - - public static final Random rand = - PlatformRandomHelper.getPlatformSpecificRandomFactory(); - - @Test - public void createExternalSstFileInfoWithoutParameters() { - try (final IngestExternalFileOptions options = - new IngestExternalFileOptions()) { - assertThat(options).isNotNull(); - } - } - - @Test - public void createExternalSstFileInfoWithParameters() { - final boolean moveFiles = rand.nextBoolean(); - final boolean snapshotConsistency = rand.nextBoolean(); - final boolean allowGlobalSeqNo = rand.nextBoolean(); - final boolean allowBlockingFlush = rand.nextBoolean(); - try (final IngestExternalFileOptions options = - new IngestExternalFileOptions(moveFiles, snapshotConsistency, - allowGlobalSeqNo, allowBlockingFlush)) { - assertThat(options).isNotNull(); - assertThat(options.moveFiles()).isEqualTo(moveFiles); - assertThat(options.snapshotConsistency()).isEqualTo(snapshotConsistency); - assertThat(options.allowGlobalSeqNo()).isEqualTo(allowGlobalSeqNo); - assertThat(options.allowBlockingFlush()).isEqualTo(allowBlockingFlush); - } - } - - @Test - public void moveFiles() { - try (final IngestExternalFileOptions options = - new IngestExternalFileOptions()) { - final boolean moveFiles = rand.nextBoolean(); - options.setMoveFiles(moveFiles); - assertThat(options.moveFiles()).isEqualTo(moveFiles); - } - } - - @Test - public void snapshotConsistency() { - try (final IngestExternalFileOptions options = - new IngestExternalFileOptions()) { - final boolean snapshotConsistency = rand.nextBoolean(); - options.setSnapshotConsistency(snapshotConsistency); - assertThat(options.snapshotConsistency()).isEqualTo(snapshotConsistency); - } - } - - @Test - public void allowGlobalSeqNo() { - try (final IngestExternalFileOptions options = - new IngestExternalFileOptions()) { - final boolean allowGlobalSeqNo = rand.nextBoolean(); - options.setAllowGlobalSeqNo(allowGlobalSeqNo); - assertThat(options.allowGlobalSeqNo()).isEqualTo(allowGlobalSeqNo); - } - } - - @Test - public void allowBlockingFlush() { - try (final IngestExternalFileOptions options = - new IngestExternalFileOptions()) { - final boolean allowBlockingFlush = rand.nextBoolean(); - options.setAllowBlockingFlush(allowBlockingFlush); - assertThat(options.allowBlockingFlush()).isEqualTo(allowBlockingFlush); - } - } - - @Test - public void ingestBehind() { - try (final IngestExternalFileOptions options = - new IngestExternalFileOptions()) { - assertThat(options.ingestBehind()).isFalse(); - options.setIngestBehind(true); - assertThat(options.ingestBehind()).isTrue(); - } - } - - @Test - public void writeGlobalSeqno() { - try (final IngestExternalFileOptions options = - new IngestExternalFileOptions()) { - assertThat(options.writeGlobalSeqno()).isFalse(); - options.setWriteGlobalSeqno(true); - assertThat(options.writeGlobalSeqno()).isTrue(); - } - } -} diff --git a/java/src/test/java/org/rocksdb/KeyMayExistTest.java b/java/src/test/java/org/rocksdb/KeyMayExistTest.java deleted file mode 100644 index 3f3bec6ba..000000000 --- a/java/src/test/java/org/rocksdb/KeyMayExistTest.java +++ /dev/null @@ -1,528 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; - -import java.nio.BufferUnderflowException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.junit.*; -import org.junit.rules.ExpectedException; -import org.junit.rules.TemporaryFolder; - -public class KeyMayExistTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Rule public ExpectedException exceptionRule = ExpectedException.none(); - - List cfDescriptors; - List columnFamilyHandleList = new ArrayList<>(); - RocksDB db; - - // Slice key - int offset; - int len; - - byte[] sliceKey; - byte[] sliceValue; - - @Before - public void before() throws RocksDBException { - cfDescriptors = Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - final DBOptions options = - new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); - - db = RocksDB.open( - options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList); - - // Build the slice key - final StringBuilder builder = new StringBuilder("prefix"); - offset = builder.toString().length(); - builder.append("slice key 0"); - len = builder.toString().length() - offset; - builder.append("suffix"); - sliceKey = builder.toString().getBytes(UTF_8); - sliceValue = "slice value 0".getBytes(UTF_8); - } - - @After - public void after() { - for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { - columnFamilyHandle.close(); - } - db.close(); - } - - @Test - public void keyMayExist() throws RocksDBException { - assertThat(columnFamilyHandleList.size()).isEqualTo(2); - - // Standard key - db.put("key".getBytes(UTF_8), "value".getBytes(UTF_8)); - - // Test without column family - final Holder holder = new Holder<>(); - boolean exists = db.keyMayExist("key".getBytes(UTF_8), holder); - assertThat(exists).isTrue(); - assertThat(holder.getValue()).isNotNull(); - assertThat(new String(holder.getValue(), UTF_8)).isEqualTo("value"); - - exists = db.keyMayExist("key".getBytes(UTF_8), null); - assertThat(exists).isTrue(); - } - - @Test - public void keyMayExistReadOptions() throws RocksDBException { - // Test without column family but with readOptions - try (final ReadOptions readOptions = new ReadOptions()) { - // Standard key - db.put("key".getBytes(UTF_8), "value".getBytes(UTF_8)); - - // Slice key - db.put(sliceKey, offset, len, sliceValue, 0, sliceValue.length); - - final Holder holder = new Holder<>(); - boolean exists = db.keyMayExist(readOptions, "key".getBytes(UTF_8), holder); - assertThat(exists).isTrue(); - assertThat(holder.getValue()).isNotNull(); - assertThat(new String(holder.getValue(), UTF_8)).isEqualTo("value"); - - exists = db.keyMayExist(readOptions, "key".getBytes(UTF_8), null); - assertThat(exists).isTrue(); - - exists = db.keyMayExist(readOptions, sliceKey, offset, len, holder); - assertThat(exists).isTrue(); - assertThat(holder.getValue()).isNotNull(); - assertThat(holder.getValue()).isEqualTo(sliceValue); - - exists = db.keyMayExist(readOptions, sliceKey, offset, len, null); - assertThat(exists).isTrue(); - } - } - - @Test - public void keyMayExistColumnFamily() throws RocksDBException { - // Standard key - db.put("key".getBytes(UTF_8), "value".getBytes(UTF_8)); - - // Slice key - db.put(sliceKey, offset, len, sliceValue, 0, sliceValue.length); - - // Test slice key with column family - final Holder holder = new Holder<>(); - boolean exists = db.keyMayExist(columnFamilyHandleList.get(0), sliceKey, offset, len, holder); - assertThat(exists).isTrue(); - assertThat(holder.getValue()).isNotNull(); - assertThat(holder.getValue()).isEqualTo(sliceValue); - - exists = db.keyMayExist(columnFamilyHandleList.get(0), sliceKey, offset, len, null); - assertThat(exists).isTrue(); - } - - @Test - public void keyMayExistColumnFamilyReadOptions() throws RocksDBException { - // Standard key - db.put("key".getBytes(UTF_8), "value".getBytes(UTF_8)); - - // Slice key - db.put(sliceKey, offset, len, sliceValue, 0, sliceValue.length); - - // Test slice key with column family and read options - final Holder holder = new Holder<>(); - try (final ReadOptions readOptions = new ReadOptions()) { - boolean exists = - db.keyMayExist(columnFamilyHandleList.get(0), readOptions, "key".getBytes(UTF_8), holder); - assertThat(exists).isTrue(); - assertThat(holder.getValue()).isNotNull(); - assertThat(new String(holder.getValue(), UTF_8)).isEqualTo("value"); - - exists = - db.keyMayExist(columnFamilyHandleList.get(0), readOptions, "key".getBytes(UTF_8), null); - assertThat(exists).isTrue(); - - // Test slice key with column family and read options - exists = - db.keyMayExist(columnFamilyHandleList.get(0), readOptions, sliceKey, offset, len, holder); - assertThat(exists).isTrue(); - assertThat(holder.getValue()).isNotNull(); - assertThat(holder.getValue()).isEqualTo(sliceValue); - - exists = - db.keyMayExist(columnFamilyHandleList.get(0), readOptions, sliceKey, offset, len, null); - assertThat(exists).isTrue(); - } - } - - @Test - public void keyMayExistSliceKey() throws RocksDBException { - assertThat(columnFamilyHandleList.size()).isEqualTo(2); - - // Standard key - db.put("key".getBytes(UTF_8), "value".getBytes(UTF_8)); - - // Slice key - db.put(sliceKey, offset, len, sliceValue, 0, sliceValue.length); - - final Holder holder = new Holder<>(); - boolean exists = db.keyMayExist(sliceKey, offset, len, holder); - assertThat(exists).isTrue(); - assertThat(holder.getValue()).isNotNull(); - assertThat(holder.getValue()).isEqualTo(sliceValue); - - exists = db.keyMayExist(sliceKey, offset, len, null); - assertThat(exists).isTrue(); - - exists = db.keyMayExist("slice key".getBytes(UTF_8), null); - assertThat(exists).isFalse(); - - exists = db.keyMayExist("slice key 0".getBytes(UTF_8), null); - assertThat(exists).isTrue(); - - // Test with column family - exists = db.keyMayExist(columnFamilyHandleList.get(0), "key".getBytes(UTF_8), holder); - assertThat(exists).isTrue(); - assertThat(holder.getValue()).isNotNull(); - assertThat(new String(holder.getValue(), UTF_8)).isEqualTo("value"); - - exists = db.keyMayExist(columnFamilyHandleList.get(0), "key".getBytes(UTF_8), null); - assertThat(exists).isTrue(); - - // KeyMayExist in CF1 must return null value - exists = db.keyMayExist(columnFamilyHandleList.get(1), "key".getBytes(UTF_8), holder); - assertThat(exists).isFalse(); - assertThat(holder.getValue()).isNull(); - exists = db.keyMayExist(columnFamilyHandleList.get(1), "key".getBytes(UTF_8), null); - assertThat(exists).isFalse(); - - // slice key - exists = db.keyMayExist(columnFamilyHandleList.get(1), sliceKey, 1, 3, holder); - assertThat(exists).isFalse(); - assertThat(holder.getValue()).isNull(); - exists = db.keyMayExist(columnFamilyHandleList.get(1), sliceKey, 1, 3, null); - assertThat(exists).isFalse(); - } - - @Test - public void keyMayExistCF1() throws RocksDBException { - // Standard key - db.put("key".getBytes(UTF_8), "value".getBytes(UTF_8)); - - // Slice key - db.put(sliceKey, offset, len, sliceValue, 0, sliceValue.length); - - // KeyMayExist in CF1 must return null value - final Holder holder = new Holder<>(); - boolean exists = db.keyMayExist(columnFamilyHandleList.get(1), "key".getBytes(UTF_8), holder); - assertThat(exists).isFalse(); - assertThat(holder.getValue()).isNull(); - exists = db.keyMayExist(columnFamilyHandleList.get(1), "key".getBytes(UTF_8), null); - assertThat(exists).isFalse(); - } - - @Test - public void keyMayExistCF1Slice() throws RocksDBException { - // Standard key - db.put("key".getBytes(UTF_8), "value".getBytes(UTF_8)); - - // Slice key - db.put(sliceKey, offset, len, sliceValue, 0, sliceValue.length); - - // slice key - final Holder holder = new Holder<>(); - boolean exists = db.keyMayExist(columnFamilyHandleList.get(1), sliceKey, 1, 3, holder); - assertThat(exists).isFalse(); - assertThat(holder.getValue()).isNull(); - exists = db.keyMayExist(columnFamilyHandleList.get(1), sliceKey, 1, 3, null); - assertThat(exists).isFalse(); - } - - @Test - public void keyMayExistBB() throws RocksDBException { - // Standard key - db.put("keyBB".getBytes(UTF_8), "valueBB".getBytes(UTF_8)); - - final byte[] key = "keyBB".getBytes(UTF_8); - final byte[] value = "valueBB".getBytes(UTF_8); - - final ByteBuffer keyBuffer = ByteBuffer.allocateDirect(key.length); - keyBuffer.put(key, 0, key.length); - keyBuffer.flip(); - - assertThat(db.keyMayExist(keyBuffer)).isEqualTo(true); - - final ByteBuffer valueBuffer = ByteBuffer.allocateDirect(value.length + 24); - valueBuffer.position(12); - KeyMayExist keyMayExist = db.keyMayExist(keyBuffer, valueBuffer); - assertThat(keyMayExist.exists).isEqualTo(KeyMayExist.KeyMayExistEnum.kExistsWithValue); - assertThat(keyMayExist.valueLength).isEqualTo(value.length); - assertThat(valueBuffer.position()).isEqualTo(12); - assertThat(valueBuffer.limit()).isEqualTo(12 + value.length); - byte[] valueGet = new byte[value.length]; - valueBuffer.get(valueGet); - assertThat(valueGet).isEqualTo(value); - - valueBuffer.limit(value.length + 24); - valueBuffer.position(25); - keyMayExist = db.keyMayExist(keyBuffer, valueBuffer); - assertThat(keyMayExist.exists).isEqualTo(KeyMayExist.KeyMayExistEnum.kExistsWithValue); - assertThat(keyMayExist.valueLength).isEqualTo(value.length); - assertThat(valueBuffer.position()).isEqualTo(25); - assertThat(valueBuffer.limit()).isEqualTo(24 + value.length); - valueGet = new byte[value.length - 1]; - valueBuffer.get(valueGet); - assertThat(valueGet).isEqualTo(Arrays.copyOfRange(value, 0, value.length - 1)); - - exceptionRule.expect(BufferUnderflowException.class); - valueGet = new byte[value.length]; - valueBuffer.get(valueGet); - } - - @Test - public void keyMayExistBBReadOptions() throws RocksDBException { - // Standard key - db.put("keyBB".getBytes(UTF_8), "valueBB".getBytes(UTF_8)); - - final byte[] key = "keyBB".getBytes(UTF_8); - final byte[] value = "valueBB".getBytes(UTF_8); - - final ByteBuffer keyBuffer = ByteBuffer.allocateDirect(key.length); - keyBuffer.put(key, 0, key.length); - keyBuffer.flip(); - - try (final ReadOptions readOptions = new ReadOptions()) { - assertThat(db.keyMayExist(readOptions, keyBuffer)).isEqualTo(true); - - final ByteBuffer valueBuffer = ByteBuffer.allocateDirect(value.length + 24); - valueBuffer.position(12); - KeyMayExist keyMayExist = db.keyMayExist(readOptions, keyBuffer, valueBuffer); - assertThat(keyMayExist.exists).isEqualTo(KeyMayExist.KeyMayExistEnum.kExistsWithValue); - assertThat(keyMayExist.valueLength).isEqualTo(value.length); - assertThat(valueBuffer.position()).isEqualTo(12); - assertThat(valueBuffer.limit()).isEqualTo(12 + value.length); - byte[] valueGet = new byte[value.length]; - valueBuffer.get(valueGet); - assertThat(valueGet).isEqualTo(value); - - valueBuffer.limit(value.length + 24); - valueBuffer.position(25); - keyMayExist = db.keyMayExist(readOptions, keyBuffer, valueBuffer); - assertThat(keyMayExist.exists).isEqualTo(KeyMayExist.KeyMayExistEnum.kExistsWithValue); - assertThat(keyMayExist.valueLength).isEqualTo(value.length); - assertThat(valueBuffer.position()).isEqualTo(25); - assertThat(valueBuffer.limit()).isEqualTo(24 + value.length); - valueGet = new byte[value.length - 1]; - valueBuffer.get(valueGet); - assertThat(valueGet).isEqualTo(Arrays.copyOfRange(value, 0, value.length - 1)); - - exceptionRule.expect(BufferUnderflowException.class); - valueGet = new byte[value.length]; - valueBuffer.get(valueGet); - } - } - - @Test - public void keyMayExistBBNullValue() throws RocksDBException { - // Standard key - db.put("keyBB".getBytes(UTF_8), "valueBB".getBytes(UTF_8)); - - final byte[] key = "keyBB".getBytes(UTF_8); - - final ByteBuffer keyBuffer = ByteBuffer.allocateDirect(key.length); - keyBuffer.put(key, 0, key.length); - keyBuffer.flip(); - - exceptionRule.expect(AssertionError.class); - exceptionRule.expectMessage( - "value ByteBuffer parameter cannot be null. If you do not need the value, use a different version of the method"); - final KeyMayExist keyMayExist = db.keyMayExist(keyBuffer, null); - } - - @Test - public void keyMayExistBBCF() throws RocksDBException { - // Standard key - db.put(columnFamilyHandleList.get(0), "keyBBCF0".getBytes(UTF_8), "valueBBCF0".getBytes(UTF_8)); - db.put(columnFamilyHandleList.get(1), "keyBBCF1".getBytes(UTF_8), "valueBBCF1".getBytes(UTF_8)); - - // 0 is the default CF - byte[] key = "keyBBCF0".getBytes(UTF_8); - ByteBuffer keyBuffer = ByteBuffer.allocateDirect(key.length); - keyBuffer.put(key, 0, key.length); - keyBuffer.flip(); - - assertThat(db.keyMayExist(keyBuffer)).isEqualTo(true); - assertThat(db.keyMayExist(columnFamilyHandleList.get(1), keyBuffer)).isEqualTo(false); - assertThat(db.keyMayExist(columnFamilyHandleList.get(0), keyBuffer)).isEqualTo(true); - - // 1 is just a CF - key = "keyBBCF1".getBytes(UTF_8); - keyBuffer = ByteBuffer.allocateDirect(key.length); - keyBuffer.put(key, 0, key.length); - keyBuffer.flip(); - - assertThat(db.keyMayExist(keyBuffer)).isEqualTo(false); - assertThat(db.keyMayExist(columnFamilyHandleList.get(1), keyBuffer)).isEqualTo(true); - assertThat(db.keyMayExist(columnFamilyHandleList.get(0), keyBuffer)).isEqualTo(false); - - exceptionRule.expect(AssertionError.class); - exceptionRule.expectMessage( - "value ByteBuffer parameter cannot be null. If you do not need the value, use a different version of the method"); - final KeyMayExist keyMayExist = db.keyMayExist(columnFamilyHandleList.get(0), keyBuffer, null); - } - - @Test - public void keyMayExistBBCFReadOptions() throws RocksDBException { - // Standard key - db.put(columnFamilyHandleList.get(0), "keyBBCF0".getBytes(UTF_8), "valueBBCF0".getBytes(UTF_8)); - db.put(columnFamilyHandleList.get(1), "keyBBCF1".getBytes(UTF_8), "valueBBCF1".getBytes(UTF_8)); - - // 0 is the default CF - byte[] key = "keyBBCF0".getBytes(UTF_8); - ByteBuffer keyBuffer = ByteBuffer.allocateDirect(key.length); - keyBuffer.put(key, 0, key.length); - keyBuffer.flip(); - - try (final ReadOptions readOptions = new ReadOptions()) { - assertThat(db.keyMayExist(keyBuffer)).isEqualTo(true); - assertThat(db.keyMayExist(columnFamilyHandleList.get(1), readOptions, keyBuffer)) - .isEqualTo(false); - assertThat(db.keyMayExist(columnFamilyHandleList.get(0), readOptions, keyBuffer)) - .isEqualTo(true); - - // 1 is just a CF - key = "keyBBCF1".getBytes(UTF_8); - keyBuffer = ByteBuffer.allocateDirect(key.length); - keyBuffer.put(key, 0, key.length); - keyBuffer.flip(); - - assertThat(db.keyMayExist(readOptions, keyBuffer)).isEqualTo(false); - assertThat(db.keyMayExist(columnFamilyHandleList.get(1), readOptions, keyBuffer)) - .isEqualTo(true); - assertThat(db.keyMayExist(columnFamilyHandleList.get(0), readOptions, keyBuffer)) - .isEqualTo(false); - - exceptionRule.expect(AssertionError.class); - exceptionRule.expectMessage( - "value ByteBuffer parameter cannot be null. If you do not need the value, use a different version of the method"); - final KeyMayExist keyMayExist = - db.keyMayExist(columnFamilyHandleList.get(0), readOptions, keyBuffer, null); - } - } - - @Test - public void keyMayExistBBCFOffset() throws RocksDBException { - db.put(columnFamilyHandleList.get(1), "keyBBCF1".getBytes(UTF_8), "valueBBCF1".getBytes(UTF_8)); - - final byte[] key = "keyBBCF1".getBytes(UTF_8); - final byte[] value = "valueBBCF1".getBytes(UTF_8); - - final ByteBuffer keyBuffer = ByteBuffer.allocateDirect(key.length); - keyBuffer.put(key, 0, key.length); - keyBuffer.flip(); - - assertThat(db.keyMayExist(columnFamilyHandleList.get(1), keyBuffer)).isEqualTo(true); - - final ByteBuffer valueBuffer = ByteBuffer.allocateDirect(value.length + 24); - valueBuffer.position(12); - KeyMayExist keyMayExist = db.keyMayExist(columnFamilyHandleList.get(1), keyBuffer, valueBuffer); - assertThat(keyMayExist.exists).isEqualTo(KeyMayExist.KeyMayExistEnum.kExistsWithValue); - assertThat(keyMayExist.valueLength).isEqualTo(value.length); - assertThat(valueBuffer.position()).isEqualTo(12); - assertThat(valueBuffer.limit()).isEqualTo(12 + value.length); - byte[] valueGet = new byte[value.length]; - valueBuffer.get(valueGet); - assertThat(valueGet).isEqualTo(value); - - valueBuffer.limit(value.length + 24); - valueBuffer.position(25); - keyMayExist = db.keyMayExist(columnFamilyHandleList.get(1), keyBuffer, valueBuffer); - assertThat(keyMayExist.exists).isEqualTo(KeyMayExist.KeyMayExistEnum.kExistsWithValue); - assertThat(keyMayExist.valueLength).isEqualTo(value.length); - assertThat(valueBuffer.position()).isEqualTo(25); - assertThat(valueBuffer.limit()).isEqualTo(24 + value.length); - valueGet = new byte[value.length - 1]; - valueBuffer.get(valueGet); - assertThat(valueGet).isEqualTo(Arrays.copyOfRange(value, 0, value.length - 1)); - - exceptionRule.expect(BufferUnderflowException.class); - valueGet = new byte[value.length]; - valueBuffer.get(valueGet); - } - - @Test - public void keyMayExistBBCFOffsetReadOptions() throws RocksDBException { - db.put(columnFamilyHandleList.get(1), "keyBBCF1".getBytes(UTF_8), "valueBBCF1".getBytes(UTF_8)); - - final byte[] key = "keyBBCF1".getBytes(UTF_8); - final byte[] value = "valueBBCF1".getBytes(UTF_8); - - final ByteBuffer keyBuffer = ByteBuffer.allocateDirect(key.length); - keyBuffer.put(key, 0, key.length); - keyBuffer.flip(); - - try (final ReadOptions readOptions = new ReadOptions()) { - assertThat(db.keyMayExist(columnFamilyHandleList.get(1), readOptions, keyBuffer)) - .isEqualTo(true); - - final ByteBuffer valueBuffer = ByteBuffer.allocateDirect(value.length + 24); - valueBuffer.position(12); - KeyMayExist keyMayExist = - db.keyMayExist(columnFamilyHandleList.get(1), readOptions, keyBuffer, valueBuffer); - assertThat(keyMayExist.exists).isEqualTo(KeyMayExist.KeyMayExistEnum.kExistsWithValue); - assertThat(keyMayExist.valueLength).isEqualTo(value.length); - assertThat(valueBuffer.position()).isEqualTo(12); - assertThat(valueBuffer.limit()).isEqualTo(12 + value.length); - byte[] valueGet = new byte[value.length]; - valueBuffer.get(valueGet); - assertThat(valueGet).isEqualTo(value); - - valueBuffer.limit(value.length + 24); - valueBuffer.position(25); - keyMayExist = - db.keyMayExist(columnFamilyHandleList.get(1), readOptions, keyBuffer, valueBuffer); - assertThat(keyMayExist.exists).isEqualTo(KeyMayExist.KeyMayExistEnum.kExistsWithValue); - assertThat(keyMayExist.valueLength).isEqualTo(value.length); - assertThat(valueBuffer.position()).isEqualTo(25); - assertThat(valueBuffer.limit()).isEqualTo(24 + value.length); - valueGet = new byte[value.length - 1]; - valueBuffer.get(valueGet); - assertThat(valueGet).isEqualTo(Arrays.copyOfRange(value, 0, value.length - 1)); - - exceptionRule.expect(BufferUnderflowException.class); - valueGet = new byte[value.length]; - valueBuffer.get(valueGet); - } - } - - @Test - public void keyMayExistNonUnicodeString() throws RocksDBException { - final byte[] key = "key".getBytes(UTF_8); - final byte[] value = {(byte) 0x80}; // invalid unicode code-point - db.put(key, value); - - final byte[] buf = new byte[10]; - final int read = db.get(key, buf); - assertThat(read).isEqualTo(1); - assertThat(buf).startsWith(value); - - final Holder holder = new Holder<>(); - boolean exists = db.keyMayExist("key".getBytes(UTF_8), holder); - assertThat(exists).isTrue(); - assertThat(holder.getValue()).isNotNull(); - assertThat(holder.getValue()).isEqualTo(value); - - exists = db.keyMayExist("key".getBytes(UTF_8), null); - assertThat(exists).isTrue(); - } -} diff --git a/java/src/test/java/org/rocksdb/LRUCacheTest.java b/java/src/test/java/org/rocksdb/LRUCacheTest.java deleted file mode 100644 index 4d194e712..000000000 --- a/java/src/test/java/org/rocksdb/LRUCacheTest.java +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; - -import org.junit.ClassRule; -import org.junit.Test; - -public class LRUCacheTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Test - public void newLRUCache() { - final long capacity = 80000000; - final int numShardBits = 16; - final boolean strictCapacityLimit = true; - final double highPriPoolRatio = 0.5; - final double lowPriPoolRatio = 0.5; - try (final Cache lruCache = new LRUCache( - capacity, numShardBits, strictCapacityLimit, highPriPoolRatio, lowPriPoolRatio)) { - //no op - assertThat(lruCache.getUsage()).isGreaterThanOrEqualTo(0); - assertThat(lruCache.getPinnedUsage()).isGreaterThanOrEqualTo(0); - } - } -} diff --git a/java/src/test/java/org/rocksdb/LoggerTest.java b/java/src/test/java/org/rocksdb/LoggerTest.java deleted file mode 100644 index 5bc299f11..000000000 --- a/java/src/test/java/org/rocksdb/LoggerTest.java +++ /dev/null @@ -1,239 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; - -import static org.assertj.core.api.Assertions.assertThat; - -public class LoggerTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void customLogger() throws RocksDBException { - final AtomicInteger logMessageCounter = new AtomicInteger(); - try (final Options options = new Options(). - setInfoLogLevel(InfoLogLevel.DEBUG_LEVEL). - setCreateIfMissing(true); - final Logger logger = new Logger(options) { - // Create new logger with max log level passed by options - @Override - protected void log(InfoLogLevel infoLogLevel, String logMsg) { - assertThat(logMsg).isNotNull(); - assertThat(logMsg.length()).isGreaterThan(0); - logMessageCounter.incrementAndGet(); - } - } - ) { - // Set custom logger to options - options.setLogger(logger); - - try (final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - // there should be more than zero received log messages in - // debug level. - assertThat(logMessageCounter.get()).isGreaterThan(0); - } - } - } - - @Test - public void warnLogger() throws RocksDBException { - final AtomicInteger logMessageCounter = new AtomicInteger(); - try (final Options options = new Options(). - setInfoLogLevel(InfoLogLevel.WARN_LEVEL). - setCreateIfMissing(true); - - final Logger logger = new Logger(options) { - // Create new logger with max log level passed by options - @Override - protected void log(InfoLogLevel infoLogLevel, String logMsg) { - assertThat(logMsg).isNotNull(); - assertThat(logMsg.length()).isGreaterThan(0); - logMessageCounter.incrementAndGet(); - } - } - ) { - - // Set custom logger to options - options.setLogger(logger); - - try (final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - // there should be zero messages - // using warn level as log level. - assertThat(logMessageCounter.get()).isEqualTo(0); - } - } - } - - - @Test - public void fatalLogger() throws RocksDBException { - final AtomicInteger logMessageCounter = new AtomicInteger(); - try (final Options options = new Options(). - setInfoLogLevel(InfoLogLevel.FATAL_LEVEL). - setCreateIfMissing(true); - - final Logger logger = new Logger(options) { - // Create new logger with max log level passed by options - @Override - protected void log(InfoLogLevel infoLogLevel, String logMsg) { - assertThat(logMsg).isNotNull(); - assertThat(logMsg.length()).isGreaterThan(0); - logMessageCounter.incrementAndGet(); - } - } - ) { - - // Set custom logger to options - options.setLogger(logger); - - try (final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - // there should be zero messages - // using fatal level as log level. - assertThat(logMessageCounter.get()).isEqualTo(0); - } - } - } - - @Test - public void dbOptionsLogger() throws RocksDBException { - final AtomicInteger logMessageCounter = new AtomicInteger(); - try (final DBOptions options = new DBOptions(). - setInfoLogLevel(InfoLogLevel.FATAL_LEVEL). - setCreateIfMissing(true); - final Logger logger = new Logger(options) { - // Create new logger with max log level passed by options - @Override - protected void log(InfoLogLevel infoLogLevel, String logMsg) { - assertThat(logMsg).isNotNull(); - assertThat(logMsg.length()).isGreaterThan(0); - logMessageCounter.incrementAndGet(); - } - } - ) { - // Set custom logger to options - options.setLogger(logger); - - final List cfDescriptors = - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY)); - final List cfHandles = new ArrayList<>(); - - try (final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), - cfDescriptors, cfHandles)) { - try { - // there should be zero messages - // using fatal level as log level. - assertThat(logMessageCounter.get()).isEqualTo(0); - } finally { - for (final ColumnFamilyHandle columnFamilyHandle : cfHandles) { - columnFamilyHandle.close(); - } - } - } - } - } - - @Test - public void setWarnLogLevel() { - final AtomicInteger logMessageCounter = new AtomicInteger(); - try (final Options options = new Options(). - setInfoLogLevel(InfoLogLevel.FATAL_LEVEL). - setCreateIfMissing(true); - final Logger logger = new Logger(options) { - // Create new logger with max log level passed by options - @Override - protected void log(InfoLogLevel infoLogLevel, String logMsg) { - assertThat(logMsg).isNotNull(); - assertThat(logMsg.length()).isGreaterThan(0); - logMessageCounter.incrementAndGet(); - } - } - ) { - assertThat(logger.infoLogLevel()). - isEqualTo(InfoLogLevel.FATAL_LEVEL); - logger.setInfoLogLevel(InfoLogLevel.WARN_LEVEL); - assertThat(logger.infoLogLevel()). - isEqualTo(InfoLogLevel.WARN_LEVEL); - } - } - - @Test - public void setInfoLogLevel() { - final AtomicInteger logMessageCounter = new AtomicInteger(); - try (final Options options = new Options(). - setInfoLogLevel(InfoLogLevel.FATAL_LEVEL). - setCreateIfMissing(true); - final Logger logger = new Logger(options) { - // Create new logger with max log level passed by options - @Override - protected void log(InfoLogLevel infoLogLevel, String logMsg) { - assertThat(logMsg).isNotNull(); - assertThat(logMsg.length()).isGreaterThan(0); - logMessageCounter.incrementAndGet(); - } - } - ) { - assertThat(logger.infoLogLevel()). - isEqualTo(InfoLogLevel.FATAL_LEVEL); - logger.setInfoLogLevel(InfoLogLevel.DEBUG_LEVEL); - assertThat(logger.infoLogLevel()). - isEqualTo(InfoLogLevel.DEBUG_LEVEL); - } - } - - @Test - public void changeLogLevelAtRuntime() throws RocksDBException { - final AtomicInteger logMessageCounter = new AtomicInteger(); - try (final Options options = new Options(). - setInfoLogLevel(InfoLogLevel.FATAL_LEVEL). - setCreateIfMissing(true); - - // Create new logger with max log level passed by options - final Logger logger = new Logger(options) { - @Override - protected void log(InfoLogLevel infoLogLevel, String logMsg) { - assertThat(logMsg).isNotNull(); - assertThat(logMsg.length()).isGreaterThan(0); - logMessageCounter.incrementAndGet(); - } - } - ) { - // Set custom logger to options - options.setLogger(logger); - - try (final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - - // there should be zero messages - // using fatal level as log level. - assertThat(logMessageCounter.get()).isEqualTo(0); - - // change log level to debug level - logger.setInfoLogLevel(InfoLogLevel.DEBUG_LEVEL); - - db.put("key".getBytes(), "value".getBytes()); - db.flush(new FlushOptions().setWaitForFlush(true)); - - // messages shall be received due to previous actions. - assertThat(logMessageCounter.get()).isNotEqualTo(0); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/MemTableTest.java b/java/src/test/java/org/rocksdb/MemTableTest.java deleted file mode 100644 index 73ac589a9..000000000 --- a/java/src/test/java/org/rocksdb/MemTableTest.java +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class MemTableTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Test - public void hashSkipListMemTable() throws RocksDBException { - try(final Options options = new Options()) { - // Test HashSkipListMemTableConfig - HashSkipListMemTableConfig memTableConfig = - new HashSkipListMemTableConfig(); - assertThat(memTableConfig.bucketCount()). - isEqualTo(1000000); - memTableConfig.setBucketCount(2000000); - assertThat(memTableConfig.bucketCount()). - isEqualTo(2000000); - assertThat(memTableConfig.height()). - isEqualTo(4); - memTableConfig.setHeight(5); - assertThat(memTableConfig.height()). - isEqualTo(5); - assertThat(memTableConfig.branchingFactor()). - isEqualTo(4); - memTableConfig.setBranchingFactor(6); - assertThat(memTableConfig.branchingFactor()). - isEqualTo(6); - options.setMemTableConfig(memTableConfig); - } - } - - @Test - public void skipListMemTable() throws RocksDBException { - try(final Options options = new Options()) { - SkipListMemTableConfig skipMemTableConfig = - new SkipListMemTableConfig(); - assertThat(skipMemTableConfig.lookahead()). - isEqualTo(0); - skipMemTableConfig.setLookahead(20); - assertThat(skipMemTableConfig.lookahead()). - isEqualTo(20); - options.setMemTableConfig(skipMemTableConfig); - } - } - - @Test - public void hashLinkedListMemTable() throws RocksDBException { - try(final Options options = new Options()) { - HashLinkedListMemTableConfig hashLinkedListMemTableConfig = - new HashLinkedListMemTableConfig(); - assertThat(hashLinkedListMemTableConfig.bucketCount()). - isEqualTo(50000); - hashLinkedListMemTableConfig.setBucketCount(100000); - assertThat(hashLinkedListMemTableConfig.bucketCount()). - isEqualTo(100000); - assertThat(hashLinkedListMemTableConfig.hugePageTlbSize()). - isEqualTo(0); - hashLinkedListMemTableConfig.setHugePageTlbSize(1); - assertThat(hashLinkedListMemTableConfig.hugePageTlbSize()). - isEqualTo(1); - assertThat(hashLinkedListMemTableConfig. - bucketEntriesLoggingThreshold()). - isEqualTo(4096); - hashLinkedListMemTableConfig. - setBucketEntriesLoggingThreshold(200); - assertThat(hashLinkedListMemTableConfig. - bucketEntriesLoggingThreshold()). - isEqualTo(200); - assertThat(hashLinkedListMemTableConfig. - ifLogBucketDistWhenFlush()).isTrue(); - hashLinkedListMemTableConfig. - setIfLogBucketDistWhenFlush(false); - assertThat(hashLinkedListMemTableConfig. - ifLogBucketDistWhenFlush()).isFalse(); - assertThat(hashLinkedListMemTableConfig. - thresholdUseSkiplist()). - isEqualTo(256); - hashLinkedListMemTableConfig.setThresholdUseSkiplist(29); - assertThat(hashLinkedListMemTableConfig. - thresholdUseSkiplist()). - isEqualTo(29); - options.setMemTableConfig(hashLinkedListMemTableConfig); - } - } - - @Test - public void vectorMemTable() throws RocksDBException { - try(final Options options = new Options()) { - VectorMemTableConfig vectorMemTableConfig = - new VectorMemTableConfig(); - assertThat(vectorMemTableConfig.reservedSize()). - isEqualTo(0); - vectorMemTableConfig.setReservedSize(123); - assertThat(vectorMemTableConfig.reservedSize()). - isEqualTo(123); - options.setMemTableConfig(vectorMemTableConfig); - } - } -} diff --git a/java/src/test/java/org/rocksdb/MemoryUtilTest.java b/java/src/test/java/org/rocksdb/MemoryUtilTest.java deleted file mode 100644 index 1bea02379..000000000 --- a/java/src/test/java/org/rocksdb/MemoryUtilTest.java +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.nio.charset.StandardCharsets; -import java.util.*; - -import static org.assertj.core.api.Assertions.assertThat; - -public class MemoryUtilTest { - - private static final String MEMTABLE_SIZE = "rocksdb.size-all-mem-tables"; - private static final String UNFLUSHED_MEMTABLE_SIZE = "rocksdb.cur-size-all-mem-tables"; - private static final String TABLE_READERS = "rocksdb.estimate-table-readers-mem"; - - private final byte[] key = "some-key".getBytes(StandardCharsets.UTF_8); - private final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule public TemporaryFolder dbFolder1 = new TemporaryFolder(); - @Rule public TemporaryFolder dbFolder2 = new TemporaryFolder(); - - /** - * Test MemoryUtil.getApproximateMemoryUsageByType before and after a put + get - */ - @Test - public void getApproximateMemoryUsageByType() throws RocksDBException { - try (final Cache cache = new LRUCache(8 * 1024 * 1024); - final Options options = - new Options() - .setCreateIfMissing(true) - .setTableFormatConfig(new BlockBasedTableConfig().setBlockCache(cache)); - final FlushOptions flushOptions = - new FlushOptions().setWaitForFlush(true); - final RocksDB db = - RocksDB.open(options, dbFolder1.getRoot().getAbsolutePath())) { - - List dbs = new ArrayList<>(1); - dbs.add(db); - Set caches = new HashSet<>(1); - caches.add(cache); - Map usage = MemoryUtil.getApproximateMemoryUsageByType(dbs, caches); - - assertThat(usage.get(MemoryUsageType.kMemTableTotal)).isEqualTo( - db.getAggregatedLongProperty(MEMTABLE_SIZE)); - assertThat(usage.get(MemoryUsageType.kMemTableUnFlushed)).isEqualTo( - db.getAggregatedLongProperty(UNFLUSHED_MEMTABLE_SIZE)); - assertThat(usage.get(MemoryUsageType.kTableReadersTotal)).isEqualTo( - db.getAggregatedLongProperty(TABLE_READERS)); - // TODO(peterd): disable block cache entry stats and check for 0 - assertThat(usage.get(MemoryUsageType.kCacheTotal)).isLessThan(1024); - - db.put(key, value); - db.flush(flushOptions); - db.get(key); - - usage = MemoryUtil.getApproximateMemoryUsageByType(dbs, caches); - assertThat(usage.get(MemoryUsageType.kMemTableTotal)).isGreaterThan(0); - assertThat(usage.get(MemoryUsageType.kMemTableTotal)).isEqualTo( - db.getAggregatedLongProperty(MEMTABLE_SIZE)); - assertThat(usage.get(MemoryUsageType.kMemTableUnFlushed)).isGreaterThan(0); - assertThat(usage.get(MemoryUsageType.kMemTableUnFlushed)).isEqualTo( - db.getAggregatedLongProperty(UNFLUSHED_MEMTABLE_SIZE)); - assertThat(usage.get(MemoryUsageType.kTableReadersTotal)).isGreaterThan(0); - assertThat(usage.get(MemoryUsageType.kTableReadersTotal)).isEqualTo( - db.getAggregatedLongProperty(TABLE_READERS)); - assertThat(usage.get(MemoryUsageType.kCacheTotal)).isGreaterThan(0); - - } - } - - /** - * Test MemoryUtil.getApproximateMemoryUsageByType with null inputs - */ - @Test - public void getApproximateMemoryUsageByTypeNulls() throws RocksDBException { - Map usage = MemoryUtil.getApproximateMemoryUsageByType(null, null); - - assertThat(usage.get(MemoryUsageType.kMemTableTotal)).isEqualTo(null); - assertThat(usage.get(MemoryUsageType.kMemTableUnFlushed)).isEqualTo(null); - assertThat(usage.get(MemoryUsageType.kTableReadersTotal)).isEqualTo(null); - assertThat(usage.get(MemoryUsageType.kCacheTotal)).isEqualTo(null); - } - - /** - * Test MemoryUtil.getApproximateMemoryUsageByType with two DBs and two caches - */ - @Test - public void getApproximateMemoryUsageByTypeMultiple() throws RocksDBException { - try (final Cache cache1 = new LRUCache(1 * 1024 * 1024); - final Options options1 = - new Options() - .setCreateIfMissing(true) - .setTableFormatConfig(new BlockBasedTableConfig().setBlockCache(cache1)); - final RocksDB db1 = - RocksDB.open(options1, dbFolder1.getRoot().getAbsolutePath()); - final Cache cache2 = new LRUCache(1 * 1024 * 1024); - final Options options2 = - new Options() - .setCreateIfMissing(true) - .setTableFormatConfig(new BlockBasedTableConfig().setBlockCache(cache2)); - final RocksDB db2 = - RocksDB.open(options2, dbFolder2.getRoot().getAbsolutePath()); - final FlushOptions flushOptions = - new FlushOptions().setWaitForFlush(true); - - ) { - List dbs = new ArrayList<>(1); - dbs.add(db1); - dbs.add(db2); - Set caches = new HashSet<>(1); - caches.add(cache1); - caches.add(cache2); - - for (RocksDB db: dbs) { - db.put(key, value); - db.flush(flushOptions); - db.get(key); - } - - Map usage = MemoryUtil.getApproximateMemoryUsageByType(dbs, caches); - assertThat(usage.get(MemoryUsageType.kMemTableTotal)).isEqualTo( - db1.getAggregatedLongProperty(MEMTABLE_SIZE) + db2.getAggregatedLongProperty(MEMTABLE_SIZE)); - assertThat(usage.get(MemoryUsageType.kMemTableUnFlushed)).isEqualTo( - db1.getAggregatedLongProperty(UNFLUSHED_MEMTABLE_SIZE) + db2.getAggregatedLongProperty(UNFLUSHED_MEMTABLE_SIZE)); - assertThat(usage.get(MemoryUsageType.kTableReadersTotal)).isEqualTo( - db1.getAggregatedLongProperty(TABLE_READERS) + db2.getAggregatedLongProperty(TABLE_READERS)); - assertThat(usage.get(MemoryUsageType.kCacheTotal)).isGreaterThan(0); - - } - } - -} diff --git a/java/src/test/java/org/rocksdb/MergeTest.java b/java/src/test/java/org/rocksdb/MergeTest.java deleted file mode 100644 index a840eb104..000000000 --- a/java/src/test/java/org/rocksdb/MergeTest.java +++ /dev/null @@ -1,465 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class MergeTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void stringOption() - throws InterruptedException, RocksDBException { - try (final Options opt = new Options() - .setCreateIfMissing(true) - .setMergeOperatorName("stringappend"); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - // writing aa under key - db.put("key".getBytes(), "aa".getBytes()); - // merge bb under key - db.merge("key".getBytes(), "bb".getBytes()); - - final byte[] value = db.get("key".getBytes()); - final String strValue = new String(value); - assertThat(strValue).isEqualTo("aa,bb"); - } - } - - private byte[] longToByteArray(long l) { - ByteBuffer buf = ByteBuffer.allocate(Long.SIZE / Byte.SIZE).order(ByteOrder.LITTLE_ENDIAN); - buf.putLong(l); - return buf.array(); - } - - private long longFromByteArray(byte[] a) { - ByteBuffer buf = ByteBuffer.allocate(Long.SIZE / Byte.SIZE).order(ByteOrder.LITTLE_ENDIAN); - buf.put(a); - buf.flip(); - return buf.getLong(); - } - - @Test - public void uint64AddOption() - throws InterruptedException, RocksDBException { - try (final Options opt = new Options() - .setCreateIfMissing(true) - .setMergeOperatorName("uint64add"); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - // writing (long)100 under key - db.put("key".getBytes(), longToByteArray(100)); - // merge (long)1 under key - db.merge("key".getBytes(), longToByteArray(1)); - - final byte[] value = db.get("key".getBytes()); - final long longValue = longFromByteArray(value); - assertThat(longValue).isEqualTo(101); - } - } - - @Test - public void cFStringOption() - throws InterruptedException, RocksDBException { - - try (final ColumnFamilyOptions cfOpt1 = new ColumnFamilyOptions() - .setMergeOperatorName("stringappend"); - final ColumnFamilyOptions cfOpt2 = new ColumnFamilyOptions() - .setMergeOperatorName("stringappend") - ) { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt1), - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt2) - ); - - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions opt = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, - columnFamilyHandleList)) { - try { - // writing aa under key - db.put(columnFamilyHandleList.get(1), - "cfkey".getBytes(), "aa".getBytes()); - // merge bb under key - db.merge(columnFamilyHandleList.get(1), - "cfkey".getBytes(), "bb".getBytes()); - - byte[] value = db.get(columnFamilyHandleList.get(1), - "cfkey".getBytes()); - String strValue = new String(value); - assertThat(strValue).isEqualTo("aa,bb"); - } finally { - for (final ColumnFamilyHandle handle : columnFamilyHandleList) { - handle.close(); - } - } - } - } - } - - @Test - public void cFUInt64AddOption() - throws InterruptedException, RocksDBException { - - try (final ColumnFamilyOptions cfOpt1 = new ColumnFamilyOptions() - .setMergeOperatorName("uint64add"); - final ColumnFamilyOptions cfOpt2 = new ColumnFamilyOptions() - .setMergeOperatorName("uint64add") - ) { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt1), - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt2) - ); - - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions opt = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, - columnFamilyHandleList)) { - try { - // writing (long)100 under key - db.put(columnFamilyHandleList.get(1), - "cfkey".getBytes(), longToByteArray(100)); - // merge (long)157 under key - db.merge(columnFamilyHandleList.get(1), "cfkey".getBytes(), longToByteArray(157)); - - byte[] value = db.get(columnFamilyHandleList.get(1), - "cfkey".getBytes()); - long longValue = longFromByteArray(value); - assertThat(longValue).isEqualTo(257); - } finally { - for (final ColumnFamilyHandle handle : columnFamilyHandleList) { - handle.close(); - } - } - } - } - } - - @Test - public void operatorOption() - throws InterruptedException, RocksDBException { - try (final StringAppendOperator stringAppendOperator = new StringAppendOperator(); - final Options opt = new Options() - .setCreateIfMissing(true) - .setMergeOperator(stringAppendOperator); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - // Writing aa under key - db.put("key".getBytes(), "aa".getBytes()); - - // Writing bb under key - db.merge("key".getBytes(), "bb".getBytes()); - - final byte[] value = db.get("key".getBytes()); - final String strValue = new String(value); - - assertThat(strValue).isEqualTo("aa,bb"); - } - } - - @Test - public void uint64AddOperatorOption() - throws InterruptedException, RocksDBException { - try (final UInt64AddOperator uint64AddOperator = new UInt64AddOperator(); - final Options opt = new Options() - .setCreateIfMissing(true) - .setMergeOperator(uint64AddOperator); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - // Writing (long)100 under key - db.put("key".getBytes(), longToByteArray(100)); - - // Writing (long)1 under key - db.merge("key".getBytes(), longToByteArray(1)); - - final byte[] value = db.get("key".getBytes()); - final long longValue = longFromByteArray(value); - - assertThat(longValue).isEqualTo(101); - } - } - - @Test - public void cFOperatorOption() - throws InterruptedException, RocksDBException { - try (final StringAppendOperator stringAppendOperator = new StringAppendOperator(); - final ColumnFamilyOptions cfOpt1 = new ColumnFamilyOptions() - .setMergeOperator(stringAppendOperator); - final ColumnFamilyOptions cfOpt2 = new ColumnFamilyOptions() - .setMergeOperator(stringAppendOperator) - ) { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt1), - new ColumnFamilyDescriptor("new_cf".getBytes(), cfOpt2) - ); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions opt = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, - columnFamilyHandleList) - ) { - try { - // writing aa under key - db.put(columnFamilyHandleList.get(1), - "cfkey".getBytes(), "aa".getBytes()); - // merge bb under key - db.merge(columnFamilyHandleList.get(1), - "cfkey".getBytes(), "bb".getBytes()); - byte[] value = db.get(columnFamilyHandleList.get(1), - "cfkey".getBytes()); - String strValue = new String(value); - - // Test also with createColumnFamily - try (final ColumnFamilyOptions cfHandleOpts = - new ColumnFamilyOptions() - .setMergeOperator(stringAppendOperator); - final ColumnFamilyHandle cfHandle = - db.createColumnFamily( - new ColumnFamilyDescriptor("new_cf2".getBytes(), - cfHandleOpts)) - ) { - // writing xx under cfkey2 - db.put(cfHandle, "cfkey2".getBytes(), "xx".getBytes()); - // merge yy under cfkey2 - db.merge(cfHandle, new WriteOptions(), "cfkey2".getBytes(), - "yy".getBytes()); - value = db.get(cfHandle, "cfkey2".getBytes()); - String strValueTmpCf = new String(value); - - assertThat(strValue).isEqualTo("aa,bb"); - assertThat(strValueTmpCf).isEqualTo("xx,yy"); - } - } finally { - for (final ColumnFamilyHandle columnFamilyHandle : - columnFamilyHandleList) { - columnFamilyHandle.close(); - } - } - } - } - } - - @Test - public void cFUInt64AddOperatorOption() - throws InterruptedException, RocksDBException { - try (final UInt64AddOperator uint64AddOperator = new UInt64AddOperator(); - final ColumnFamilyOptions cfOpt1 = new ColumnFamilyOptions() - .setMergeOperator(uint64AddOperator); - final ColumnFamilyOptions cfOpt2 = new ColumnFamilyOptions() - .setMergeOperator(uint64AddOperator) - ) { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt1), - new ColumnFamilyDescriptor("new_cf".getBytes(), cfOpt2) - ); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions opt = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, - columnFamilyHandleList) - ) { - try { - // writing (long)100 under key - db.put(columnFamilyHandleList.get(1), - "cfkey".getBytes(), longToByteArray(100)); - // merge (long)1 under key - db.merge(columnFamilyHandleList.get(1), - "cfkey".getBytes(), longToByteArray(1)); - byte[] value = db.get(columnFamilyHandleList.get(1), - "cfkey".getBytes()); - long longValue = longFromByteArray(value); - - // Test also with createColumnFamily - try (final ColumnFamilyOptions cfHandleOpts = - new ColumnFamilyOptions() - .setMergeOperator(uint64AddOperator); - final ColumnFamilyHandle cfHandle = - db.createColumnFamily( - new ColumnFamilyDescriptor("new_cf2".getBytes(), - cfHandleOpts)) - ) { - // writing (long)200 under cfkey2 - db.put(cfHandle, "cfkey2".getBytes(), longToByteArray(200)); - // merge (long)50 under cfkey2 - db.merge(cfHandle, new WriteOptions(), "cfkey2".getBytes(), - longToByteArray(50)); - value = db.get(cfHandle, "cfkey2".getBytes()); - long longValueTmpCf = longFromByteArray(value); - - assertThat(longValue).isEqualTo(101); - assertThat(longValueTmpCf).isEqualTo(250); - } - } finally { - for (final ColumnFamilyHandle columnFamilyHandle : - columnFamilyHandleList) { - columnFamilyHandle.close(); - } - } - } - } - } - - @Test - public void operatorGcBehaviour() - throws RocksDBException { - try (final StringAppendOperator stringAppendOperator = new StringAppendOperator()) { - try (final Options opt = new Options() - .setCreateIfMissing(true) - .setMergeOperator(stringAppendOperator); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - //no-op - } - - // test reuse - try (final Options opt = new Options() - .setMergeOperator(stringAppendOperator); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - //no-op - } - - // test param init - try (final StringAppendOperator stringAppendOperator2 = new StringAppendOperator(); - final Options opt = new Options() - .setMergeOperator(stringAppendOperator2); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - //no-op - } - - // test replace one with another merge operator instance - try (final Options opt = new Options() - .setMergeOperator(stringAppendOperator); - final StringAppendOperator newStringAppendOperator = new StringAppendOperator()) { - opt.setMergeOperator(newStringAppendOperator); - try (final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - //no-op - } - } - } - } - - @Test - public void uint64AddOperatorGcBehaviour() - throws RocksDBException { - try (final UInt64AddOperator uint64AddOperator = new UInt64AddOperator()) { - try (final Options opt = new Options() - .setCreateIfMissing(true) - .setMergeOperator(uint64AddOperator); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - //no-op - } - - // test reuse - try (final Options opt = new Options() - .setMergeOperator(uint64AddOperator); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - //no-op - } - - // test param init - try (final UInt64AddOperator uint64AddOperator2 = new UInt64AddOperator(); - final Options opt = new Options() - .setMergeOperator(uint64AddOperator2); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - //no-op - } - - // test replace one with another merge operator instance - try (final Options opt = new Options() - .setMergeOperator(uint64AddOperator); - final UInt64AddOperator newUInt64AddOperator = new UInt64AddOperator()) { - opt.setMergeOperator(newUInt64AddOperator); - try (final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - //no-op - } - } - } - } - - @Test - public void emptyStringAsStringAppendDelimiter() throws RocksDBException { - try (final StringAppendOperator stringAppendOperator = new StringAppendOperator(""); - final Options opt = - new Options().setCreateIfMissing(true).setMergeOperator(stringAppendOperator); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - db.put("key".getBytes(), "aa".getBytes()); - db.merge("key".getBytes(), "bb".getBytes()); - final byte[] value = db.get("key".getBytes()); - assertThat(new String(value)).isEqualTo("aabb"); - } - } - - @Test - public void multiCharStringAsStringAppendDelimiter() throws RocksDBException { - try (final StringAppendOperator stringAppendOperator = new StringAppendOperator("<>"); - final Options opt = - new Options().setCreateIfMissing(true).setMergeOperator(stringAppendOperator); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - db.put("key".getBytes(), "aa".getBytes()); - db.merge("key".getBytes(), "bb".getBytes()); - final byte[] value = db.get("key".getBytes()); - assertThat(new String(value)).isEqualTo("aa<>bb"); - } - } - - @Test - public void emptyStringInSetMergeOperatorByName() { - try (final Options opt = new Options() - .setMergeOperatorName(""); - final ColumnFamilyOptions cOpt = new ColumnFamilyOptions() - .setMergeOperatorName("")) { - //no-op - } - } - - @Test(expected = IllegalArgumentException.class) - public void nullStringInSetMergeOperatorByNameOptions() { - try (final Options opt = new Options()) { - opt.setMergeOperatorName(null); - } - } - - @Test(expected = IllegalArgumentException.class) - public void - nullStringInSetMergeOperatorByNameColumnFamilyOptions() { - try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { - opt.setMergeOperatorName(null); - } - } -} diff --git a/java/src/test/java/org/rocksdb/MixedOptionsTest.java b/java/src/test/java/org/rocksdb/MixedOptionsTest.java deleted file mode 100644 index 4e17d04ef..000000000 --- a/java/src/test/java/org/rocksdb/MixedOptionsTest.java +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class MixedOptionsTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Test - public void mixedOptionsTest(){ - // Set a table factory and check the names - try(final Filter bloomFilter = new BloomFilter(); - final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions() - .setTableFormatConfig( - new BlockBasedTableConfig().setFilterPolicy(bloomFilter)) - ) { - assertThat(cfOptions.tableFactoryName()).isEqualTo( - "BlockBasedTable"); - cfOptions.setTableFormatConfig(new PlainTableConfig()); - assertThat(cfOptions.tableFactoryName()).isEqualTo("PlainTable"); - // Initialize a dbOptions object from cf options and - // db options - try (final DBOptions dbOptions = new DBOptions(); - final Options options = new Options(dbOptions, cfOptions)) { - assertThat(options.tableFactoryName()).isEqualTo("PlainTable"); - // Free instances - } - } - - // Test Optimize for statements - try(final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions()) { - cfOptions.optimizeUniversalStyleCompaction(); - cfOptions.optimizeLevelStyleCompaction(); - cfOptions.optimizeForPointLookup(1024); - try(final Options options = new Options()) { - options.optimizeLevelStyleCompaction(); - options.optimizeLevelStyleCompaction(400); - options.optimizeUniversalStyleCompaction(); - options.optimizeUniversalStyleCompaction(400); - options.optimizeForPointLookup(1024); - options.prepareForBulkLoad(); - } - } - } - - @Test - public void mixedOptionsEnvTest() { - try (final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions(); - final DBOptions dbOptions = new DBOptions()) { - assertThat(dbOptions.getEnv()).isNotNull(); - assertThat(dbOptions.getEnv()).isSameAs(Env.getDefault()); - final Env memEnv = new RocksMemEnv(Env.getDefault()); - - try (final Options options = new Options(dbOptions, cfOptions)) { - assertThat(options.getEnv()).isSameAs(Env.getDefault()); - } - - dbOptions.setEnv(memEnv); - memEnv.setBackgroundThreads(4, Priority.LOW); - Env.getDefault().setBackgroundThreads(2, Priority.HIGH); - assertThat(dbOptions.getEnv().getBackgroundThreads(Priority.LOW)).isEqualTo(4); - assertThat(dbOptions.getEnv().getBackgroundThreads(Priority.HIGH)).isEqualTo(2); - assertThat(Env.getDefault().getBackgroundThreads(Priority.LOW)).isEqualTo(4); - assertThat(Env.getDefault().getBackgroundThreads(Priority.HIGH)).isEqualTo(2); - - try (final Options options = new Options(dbOptions, cfOptions)) { - assertThat(options.getEnv().getBackgroundThreads(Priority.LOW)).isEqualTo(4); - assertThat(options.getEnv().getBackgroundThreads(Priority.HIGH)).isEqualTo(2); - - assertThat(options.getEnv()).isNotSameAs(Env.getDefault()); - assertThat(options.getEnv()).isSameAs(memEnv); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/MultiColumnRegressionTest.java b/java/src/test/java/org/rocksdb/MultiColumnRegressionTest.java deleted file mode 100644 index cdfd9d3a9..000000000 --- a/java/src/test/java/org/rocksdb/MultiColumnRegressionTest.java +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -/** - * Test for changes made by - * transactional multiGet problem - * the tests here were previously broken by the nonsense removed by that change. - */ -@RunWith(Parameterized.class) -public class MultiColumnRegressionTest { - @Parameterized.Parameters - public static List data() { - return Arrays.asList(new Params(3, 100), new Params(3, 1000000)); - } - - public static class Params { - final int numColumns; - final int keySize; - - public Params(final int numColumns, final int keySize) { - this.numColumns = numColumns; - this.keySize = keySize; - } - } - - @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); - - private final Params params; - - public MultiColumnRegressionTest(final Params params) { - this.params = params; - } - - @Test - public void transactionDB() throws RocksDBException { - final List columnFamilyDescriptors = new ArrayList<>(); - for (int i = 0; i < params.numColumns; i++) { - StringBuilder sb = new StringBuilder(); - sb.append("cf" + i); - for (int j = 0; j < params.keySize; j++) sb.append("_cf"); - columnFamilyDescriptors.add(new ColumnFamilyDescriptor(sb.toString().getBytes())); - } - try (final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - final List columnFamilyHandles = - db.createColumnFamilies(columnFamilyDescriptors); - } - - columnFamilyDescriptors.add(new ColumnFamilyDescriptor("default".getBytes())); - final List columnFamilyHandles = new ArrayList<>(); - try (final TransactionDB tdb = TransactionDB.open(new DBOptions().setCreateIfMissing(true), - new TransactionDBOptions(), dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, columnFamilyHandles)) { - final WriteOptions writeOptions = new WriteOptions(); - try (Transaction transaction = tdb.beginTransaction(writeOptions)) { - for (int i = 0; i < params.numColumns; i++) { - transaction.put( - columnFamilyHandles.get(i), ("key" + i).getBytes(), ("value" + (i - 7)).getBytes()); - } - transaction.put("key".getBytes(), "value".getBytes()); - transaction.commit(); - } - for (ColumnFamilyHandle columnFamilyHandle : columnFamilyHandles) { - columnFamilyHandle.close(); - } - } - - final List columnFamilyHandles2 = new ArrayList<>(); - try (final TransactionDB tdb = TransactionDB.open(new DBOptions().setCreateIfMissing(true), - new TransactionDBOptions(), dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, columnFamilyHandles2)) { - try (Transaction transaction = tdb.beginTransaction(new WriteOptions())) { - final ReadOptions readOptions = new ReadOptions(); - for (int i = 0; i < params.numColumns; i++) { - final byte[] value = - transaction.get(columnFamilyHandles2.get(i), readOptions, ("key" + i).getBytes()); - assertThat(value).isEqualTo(("value" + (i - 7)).getBytes()); - } - transaction.commit(); - } - for (ColumnFamilyHandle columnFamilyHandle : columnFamilyHandles2) { - columnFamilyHandle.close(); - } - } - } - - @Test - public void optimisticDB() throws RocksDBException { - final List columnFamilyDescriptors = new ArrayList<>(); - for (int i = 0; i < params.numColumns; i++) { - columnFamilyDescriptors.add(new ColumnFamilyDescriptor("default".getBytes())); - } - - columnFamilyDescriptors.add(new ColumnFamilyDescriptor("default".getBytes())); - final List columnFamilyHandles = new ArrayList<>(); - try (final OptimisticTransactionDB otdb = OptimisticTransactionDB.open( - new DBOptions().setCreateIfMissing(true), dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, columnFamilyHandles)) { - try (Transaction transaction = otdb.beginTransaction(new WriteOptions())) { - for (int i = 0; i < params.numColumns; i++) { - transaction.put( - columnFamilyHandles.get(i), ("key" + i).getBytes(), ("value" + (i - 7)).getBytes()); - } - transaction.put("key".getBytes(), "value".getBytes()); - transaction.commit(); - } - for (ColumnFamilyHandle columnFamilyHandle : columnFamilyHandles) { - columnFamilyHandle.close(); - } - } - - final List columnFamilyHandles2 = new ArrayList<>(); - try (final OptimisticTransactionDB otdb = OptimisticTransactionDB.open( - new DBOptions().setCreateIfMissing(true), dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, columnFamilyHandles2)) { - try (Transaction transaction = otdb.beginTransaction(new WriteOptions())) { - final ReadOptions readOptions = new ReadOptions(); - for (int i = 0; i < params.numColumns; i++) { - final byte[] value = - transaction.get(columnFamilyHandles2.get(i), readOptions, ("key" + i).getBytes()); - assertThat(value).isEqualTo(("value" + (i - 7)).getBytes()); - } - transaction.commit(); - } - for (ColumnFamilyHandle columnFamilyHandle : columnFamilyHandles2) { - columnFamilyHandle.close(); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/MultiGetManyKeysTest.java b/java/src/test/java/org/rocksdb/MultiGetManyKeysTest.java deleted file mode 100644 index 90a13e1da..000000000 --- a/java/src/test/java/org/rocksdb/MultiGetManyKeysTest.java +++ /dev/null @@ -1,241 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.util.*; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class MultiGetManyKeysTest { - @Parameterized.Parameters - public static List data() { - return Arrays.asList(2, 3, 250, 60000, 70000, 150000, 750000); - } - - @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); - - private final int numKeys; - - public MultiGetManyKeysTest(final Integer numKeys) { - this.numKeys = numKeys; - } - - /** - * Test for multiGet problem - */ - @Test - public void multiGetAsListLarge() throws RocksDBException { - final List keys = generateRandomKeys(numKeys); - final Map keyValues = generateRandomKeyValues(keys, 10); - putKeysAndValues(keyValues); - - try (final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - final List values = db.multiGetAsList(keys); - assertKeysAndValues(keys, keyValues, values); - } - } - - /** - * Test for transactional multiGet - * problem - */ - @Test - public void multiGetAsListLargeTransactional() throws RocksDBException { - final List keys = generateRandomKeys(numKeys); - final Map keyValues = generateRandomKeyValues(keys, 10); - putKeysAndValues(keyValues); - - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB txnDB = - TransactionDB.open(options, txnDbOptions, dbFolder.getRoot().getAbsolutePath())) { - try (final Transaction transaction = txnDB.beginTransaction(new WriteOptions())) { - final List values = transaction.multiGetAsList(new ReadOptions(), keys); - assertKeysAndValues(keys, keyValues, values); - } - } - } - - /** - * Test for transactional multiGet - * problem - */ - @Test - public void multiGetForUpdateAsListLargeTransactional() throws RocksDBException { - final List keys = generateRandomKeys(numKeys); - final Map keyValues = generateRandomKeyValues(keys, 10); - putKeysAndValues(keyValues); - - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB txnDB = - TransactionDB.open(options, txnDbOptions, dbFolder.getRoot().getAbsolutePath())) { - try (final Transaction transaction = txnDB.beginTransaction(new WriteOptions())) { - final List values = transaction.multiGetForUpdateAsList(new ReadOptions(), keys); - assertKeysAndValues(keys, keyValues, values); - } - } - } - - /** - * Test for transactional multiGet - * problem - */ - @Test - public void multiGetAsListLargeTransactionalCF() throws RocksDBException { - final List keys = generateRandomKeys(numKeys); - final Map keyValues = generateRandomKeyValues(keys, 10); - final ColumnFamilyDescriptor columnFamilyDescriptor = - new ColumnFamilyDescriptor("cfTest".getBytes()); - putKeysAndValues(columnFamilyDescriptor, keyValues); - - final List columnFamilyDescriptors = new ArrayList<>(); - columnFamilyDescriptors.add(columnFamilyDescriptor); - columnFamilyDescriptors.add(new ColumnFamilyDescriptor("default".getBytes())); - final List columnFamilyHandles = new ArrayList<>(); - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB txnDB = TransactionDB.open(new DBOptions(options), txnDbOptions, - dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles)) { - final List columnFamilyHandlesForMultiGet = new ArrayList<>(numKeys); - for (int i = 0; i < numKeys; i++) - columnFamilyHandlesForMultiGet.add(columnFamilyHandles.get(0)); - try (final Transaction transaction = txnDB.beginTransaction(new WriteOptions())) { - final List values = - transaction.multiGetAsList(new ReadOptions(), columnFamilyHandlesForMultiGet, keys); - assertKeysAndValues(keys, keyValues, values); - } - for (ColumnFamilyHandle columnFamilyHandle : columnFamilyHandles) { - columnFamilyHandle.close(); - } - } - } - - /** - * Test for transactional multiGet - * problem - */ - @Test - public void multiGetForUpdateAsListLargeTransactionalCF() throws RocksDBException { - final List keys = generateRandomKeys(numKeys); - final Map keyValues = generateRandomKeyValues(keys, 10); - final ColumnFamilyDescriptor columnFamilyDescriptor = - new ColumnFamilyDescriptor("cfTest".getBytes()); - putKeysAndValues(columnFamilyDescriptor, keyValues); - - final List columnFamilyDescriptors = new ArrayList<>(); - columnFamilyDescriptors.add(columnFamilyDescriptor); - columnFamilyDescriptors.add(new ColumnFamilyDescriptor("default".getBytes())); - final List columnFamilyHandles = new ArrayList<>(); - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB txnDB = TransactionDB.open(new DBOptions(options), txnDbOptions, - dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles)) { - final List columnFamilyHandlesForMultiGet = new ArrayList<>(numKeys); - for (int i = 0; i < numKeys; i++) - columnFamilyHandlesForMultiGet.add(columnFamilyHandles.get(0)); - try (final Transaction transaction = txnDB.beginTransaction(new WriteOptions())) { - final List values = transaction.multiGetForUpdateAsList( - new ReadOptions(), columnFamilyHandlesForMultiGet, keys); - assertKeysAndValues(keys, keyValues, values); - } - for (ColumnFamilyHandle columnFamilyHandle : columnFamilyHandles) { - columnFamilyHandle.close(); - } - } - } - - private List generateRandomKeys(final int numKeys) { - final Random rand = new Random(); - final List keys = new ArrayList<>(); - for (int i = 0; i < numKeys; i++) { - final byte[] key = new byte[4]; - rand.nextBytes(key); - keys.add(key); - } - return keys; - } - - private Map generateRandomKeyValues(final List keys, final int percent) { - final Random rand = new Random(); - final Map keyValues = new HashMap<>(); - for (int i = 0; i < numKeys; i++) { - if (rand.nextInt(100) < percent) { - final byte[] value = new byte[1024]; - rand.nextBytes(value); - keyValues.put(new Key(keys.get(i)), value); - } - } - return keyValues; - } - - private void putKeysAndValues(Map keyValues) throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { - for (Map.Entry keyValue : keyValues.entrySet()) { - db.put(keyValue.getKey().get(), keyValue.getValue()); - } - } - } - - private void putKeysAndValues(ColumnFamilyDescriptor columnFamilyDescriptor, - Map keyValues) throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()); - final ColumnFamilyHandle columnFamilyHandle = - db.createColumnFamily(columnFamilyDescriptor)) { - for (Map.Entry keyValue : keyValues.entrySet()) { - db.put(columnFamilyHandle, keyValue.getKey().get(), keyValue.getValue()); - } - } - } - - private void assertKeysAndValues( - final List keys, final Map keyValues, final List values) { - assertThat(values.size()).isEqualTo(keys.size()); - for (int i = 0; i < numKeys; i++) { - final Key key = new Key(keys.get(i)); - final byte[] value = values.get(i); - if (keyValues.containsKey(key)) { - assertThat(value).isEqualTo(keyValues.get(key)); - } else { - assertThat(value).isNull(); - } - } - } - - static private class Key { - private final byte[] bytes; - public Key(byte[] bytes) { - this.bytes = bytes; - } - - public byte[] get() { - return this.bytes; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - Key key = (Key) o; - return Arrays.equals(bytes, key.bytes); - } - - @Override - public int hashCode() { - return Arrays.hashCode(bytes); - } - } -} diff --git a/java/src/test/java/org/rocksdb/MultiGetTest.java b/java/src/test/java/org/rocksdb/MultiGetTest.java deleted file mode 100644 index c391d81f6..000000000 --- a/java/src/test/java/org/rocksdb/MultiGetTest.java +++ /dev/null @@ -1,530 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.fail; - -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.TestUtil; - -public class MultiGetTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void putNThenMultiGet() throws RocksDBException { - try (final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - db.put("key1".getBytes(), "value1ForKey1".getBytes()); - db.put("key2".getBytes(), "value2ForKey2".getBytes()); - db.put("key3".getBytes(), "value3ForKey3".getBytes()); - final List keys = - Arrays.asList("key1".getBytes(), "key2".getBytes(), "key3".getBytes()); - final List values = db.multiGetAsList(keys); - assertThat(values.size()).isEqualTo(keys.size()); - assertThat(values.get(0)).isEqualTo("value1ForKey1".getBytes()); - assertThat(values.get(1)).isEqualTo("value2ForKey2".getBytes()); - assertThat(values.get(2)).isEqualTo("value3ForKey3".getBytes()); - } - } - - @Test - public void putNThenMultiGetDirect() throws RocksDBException { - try (final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - db.put("key1".getBytes(), "value1ForKey1".getBytes()); - db.put("key2".getBytes(), "value2ForKey2".getBytes()); - db.put("key3".getBytes(), "value3ForKey3".getBytes()); - - final List keys = new ArrayList<>(); - keys.add(ByteBuffer.allocateDirect(12).put("key1".getBytes())); - keys.add(ByteBuffer.allocateDirect(12).put("key2".getBytes())); - keys.add(ByteBuffer.allocateDirect(12).put("key3".getBytes())); - // Java8 and lower flip() returns Buffer not ByteBuffer, so can't chain above /\/\ - for (final ByteBuffer key : keys) { - key.flip(); - } - final List values = new ArrayList<>(); - for (int i = 0; i < keys.size(); i++) { - values.add(ByteBuffer.allocateDirect(24)); - } - - { - final List results = db.multiGetByteBuffers(keys, values); - - assertThat(results.get(0).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(1).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(2).status.getCode()).isEqualTo(Status.Code.Ok); - - assertThat(results.get(0).requiredSize).isEqualTo("value1ForKey1".getBytes().length); - assertThat(results.get(1).requiredSize).isEqualTo("value2ForKey2".getBytes().length); - assertThat(results.get(2).requiredSize).isEqualTo("value3ForKey3".getBytes().length); - - assertThat(TestUtil.bufferBytes(results.get(0).value)) - .isEqualTo("value1ForKey1".getBytes()); - assertThat(TestUtil.bufferBytes(results.get(1).value)) - .isEqualTo("value2ForKey2".getBytes()); - assertThat(TestUtil.bufferBytes(results.get(2).value)) - .isEqualTo("value3ForKey3".getBytes()); - } - - { - final List results = - db.multiGetByteBuffers(new ReadOptions(), keys, values); - - assertThat(results.get(0).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(1).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(2).status.getCode()).isEqualTo(Status.Code.Ok); - - assertThat(results.get(0).requiredSize).isEqualTo("value1ForKey1".getBytes().length); - assertThat(results.get(1).requiredSize).isEqualTo("value2ForKey2".getBytes().length); - assertThat(results.get(2).requiredSize).isEqualTo("value3ForKey3".getBytes().length); - - assertThat(TestUtil.bufferBytes(results.get(0).value)) - .isEqualTo("value1ForKey1".getBytes()); - assertThat(TestUtil.bufferBytes(results.get(1).value)) - .isEqualTo("value2ForKey2".getBytes()); - assertThat(TestUtil.bufferBytes(results.get(2).value)) - .isEqualTo("value3ForKey3".getBytes()); - } - } - } - - @Test - public void putNThenMultiGetDirectSliced() throws RocksDBException { - try (final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - db.put("key1".getBytes(), "value1ForKey1".getBytes()); - db.put("key2".getBytes(), "value2ForKey2".getBytes()); - db.put("key3".getBytes(), "value3ForKey3".getBytes()); - - final List keys = new ArrayList<>(); - keys.add(ByteBuffer.allocateDirect(12).put("key2".getBytes())); - keys.add(ByteBuffer.allocateDirect(12).put("key3".getBytes())); - keys.add( - ByteBuffer.allocateDirect(12).put("prefix1".getBytes()).slice().put("key1".getBytes())); - // Java8 and lower flip() returns Buffer not ByteBuffer, so can't chain above /\/\ - for (final ByteBuffer key : keys) { - key.flip(); - } - final List values = new ArrayList<>(); - for (int i = 0; i < keys.size(); i++) { - values.add(ByteBuffer.allocateDirect(24)); - } - - { - final List results = db.multiGetByteBuffers(keys, values); - - assertThat(results.get(0).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(1).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(2).status.getCode()).isEqualTo(Status.Code.Ok); - - assertThat(results.get(1).requiredSize).isEqualTo("value2ForKey2".getBytes().length); - assertThat(results.get(2).requiredSize).isEqualTo("value3ForKey3".getBytes().length); - assertThat(results.get(0).requiredSize).isEqualTo("value1ForKey1".getBytes().length); - - assertThat(TestUtil.bufferBytes(results.get(0).value)) - .isEqualTo("value2ForKey2".getBytes()); - assertThat(TestUtil.bufferBytes(results.get(1).value)) - .isEqualTo("value3ForKey3".getBytes()); - assertThat(TestUtil.bufferBytes(results.get(2).value)) - .isEqualTo("value1ForKey1".getBytes()); - } - } - } - - @Test - public void putNThenMultiGetDirectBadValuesArray() throws RocksDBException { - try (final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - db.put("key1".getBytes(), "value1ForKey1".getBytes()); - db.put("key2".getBytes(), "value2ForKey2".getBytes()); - db.put("key3".getBytes(), "value3ForKey3".getBytes()); - - final List keys = new ArrayList<>(); - keys.add(ByteBuffer.allocateDirect(12).put("key1".getBytes())); - keys.add(ByteBuffer.allocateDirect(12).put("key2".getBytes())); - keys.add(ByteBuffer.allocateDirect(12).put("key3".getBytes())); - // Java8 and lower flip() returns Buffer not ByteBuffer, so can't chain above /\/\ - for (final ByteBuffer key : keys) { - key.flip(); - } - - { - final List values = new ArrayList<>(); - for (int i = 0; i < keys.size(); i++) { - values.add(ByteBuffer.allocateDirect(24)); - } - - values.remove(0); - - try { - db.multiGetByteBuffers(keys, values); - fail("Expected exception when not enough value ByteBuffers supplied"); - } catch (final IllegalArgumentException e) { - assertThat(e.getMessage()).contains("For each key there must be a corresponding value"); - } - } - - { - final List values = new ArrayList<>(); - for (int i = 0; i < keys.size(); i++) { - values.add(ByteBuffer.allocateDirect(24)); - } - - values.add(ByteBuffer.allocateDirect(24)); - - try { - db.multiGetByteBuffers(keys, values); - fail("Expected exception when too many value ByteBuffers supplied"); - } catch (final IllegalArgumentException e) { - assertThat(e.getMessage()).contains("For each key there must be a corresponding value"); - } - } - } - } - - @Test - public void putNThenMultiGetDirectShortValueBuffers() throws RocksDBException { - try (final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - db.put("key1".getBytes(), "value1ForKey1".getBytes()); - db.put("key2".getBytes(), "value2ForKey2".getBytes()); - db.put("key3".getBytes(), "value3ForKey3".getBytes()); - - final List keys = new ArrayList<>(); - keys.add(ByteBuffer.allocateDirect(12).put("key1".getBytes())); - keys.add(ByteBuffer.allocateDirect(12).put("key2".getBytes())); - keys.add(ByteBuffer.allocateDirect(12).put("key3".getBytes())); - // Java8 and lower flip() returns Buffer not ByteBuffer, so can't chain above /\/\ - for (final ByteBuffer key : keys) { - key.flip(); - } - - { - final List values = new ArrayList<>(); - for (int i = 0; i < keys.size(); i++) { - values.add(ByteBuffer.allocateDirect(4)); - } - - final List statii = db.multiGetByteBuffers(keys, values); - assertThat(statii.size()).isEqualTo(values.size()); - for (final ByteBufferGetStatus status : statii) { - assertThat(status.status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(status.requiredSize).isEqualTo("value3ForKey3".getBytes().length); - final ByteBuffer expected = - ByteBuffer.allocateDirect(24).put(Arrays.copyOf("valueX".getBytes(), 4)); - expected.flip(); - assertThat(status.value).isEqualTo(expected); - } - } - } - } - - @Test - public void putNThenMultiGetDirectNondefaultCF() throws RocksDBException { - try (final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - final List cfDescriptors = new ArrayList<>(0); - cfDescriptors.add(new ColumnFamilyDescriptor("cf0".getBytes())); - cfDescriptors.add(new ColumnFamilyDescriptor("cf1".getBytes())); - cfDescriptors.add(new ColumnFamilyDescriptor("cf2".getBytes())); - - final List cf = db.createColumnFamilies(cfDescriptors); - - db.put(cf.get(0), "key1".getBytes(), "value1ForKey1".getBytes()); - db.put(cf.get(0), "key2".getBytes(), "value2ForKey2".getBytes()); - db.put(cf.get(0), "key3".getBytes(), "value3ForKey3".getBytes()); - - final List keys = new ArrayList<>(); - keys.add(ByteBuffer.allocateDirect(12).put("key1".getBytes())); - keys.add(ByteBuffer.allocateDirect(12).put("key2".getBytes())); - keys.add(ByteBuffer.allocateDirect(12).put("key3".getBytes())); - // Java8 and lower flip() returns Buffer not ByteBuffer, so can't chain above /\/\ - for (final ByteBuffer key : keys) { - key.flip(); - } - final List values = new ArrayList<>(); - for (int i = 0; i < keys.size(); i++) { - values.add(ByteBuffer.allocateDirect(24)); - } - - { - final List results = db.multiGetByteBuffers(keys, values); - - assertThat(results.get(0).status.getCode()).isEqualTo(Status.Code.NotFound); - assertThat(results.get(1).status.getCode()).isEqualTo(Status.Code.NotFound); - assertThat(results.get(2).status.getCode()).isEqualTo(Status.Code.NotFound); - } - - { - final List columnFamilyHandles = new ArrayList<>(); - columnFamilyHandles.add(cf.get(0)); - final List results = - db.multiGetByteBuffers(columnFamilyHandles, keys, values); - - assertThat(results.get(0).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(1).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(2).status.getCode()).isEqualTo(Status.Code.Ok); - - assertThat(results.get(0).requiredSize).isEqualTo("value1ForKey1".getBytes().length); - assertThat(results.get(1).requiredSize).isEqualTo("value2ForKey2".getBytes().length); - assertThat(results.get(2).requiredSize).isEqualTo("value3ForKey3".getBytes().length); - - assertThat(TestUtil.bufferBytes(results.get(0).value)) - .isEqualTo("value1ForKey1".getBytes()); - assertThat(TestUtil.bufferBytes(results.get(1).value)) - .isEqualTo("value2ForKey2".getBytes()); - assertThat(TestUtil.bufferBytes(results.get(2).value)) - .isEqualTo("value3ForKey3".getBytes()); - } - - { - final List columnFamilyHandles = new ArrayList<>(); - columnFamilyHandles.add(cf.get(0)); - columnFamilyHandles.add(cf.get(0)); - columnFamilyHandles.add(cf.get(0)); - final List results = - db.multiGetByteBuffers(columnFamilyHandles, keys, values); - - assertThat(results.get(0).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(1).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(2).status.getCode()).isEqualTo(Status.Code.Ok); - - assertThat(results.get(0).requiredSize).isEqualTo("value1ForKey1".getBytes().length); - assertThat(results.get(1).requiredSize).isEqualTo("value2ForKey2".getBytes().length); - assertThat(results.get(2).requiredSize).isEqualTo("value3ForKey3".getBytes().length); - - assertThat(TestUtil.bufferBytes(results.get(0).value)) - .isEqualTo("value1ForKey1".getBytes()); - assertThat(TestUtil.bufferBytes(results.get(1).value)) - .isEqualTo("value2ForKey2".getBytes()); - assertThat(TestUtil.bufferBytes(results.get(2).value)) - .isEqualTo("value3ForKey3".getBytes()); - } - } - } - - @Test - public void putNThenMultiGetDirectCFParams() throws RocksDBException { - try (final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - db.put("key1".getBytes(), "value1ForKey1".getBytes()); - db.put("key2".getBytes(), "value2ForKey2".getBytes()); - db.put("key3".getBytes(), "value3ForKey3".getBytes()); - - final List columnFamilyHandles = new ArrayList<>(); - columnFamilyHandles.add(db.getDefaultColumnFamily()); - columnFamilyHandles.add(db.getDefaultColumnFamily()); - - final List keys = new ArrayList<>(); - keys.add(ByteBuffer.allocateDirect(12).put("key1".getBytes())); - keys.add(ByteBuffer.allocateDirect(12).put("key2".getBytes())); - keys.add(ByteBuffer.allocateDirect(12).put("key3".getBytes())); - // Java8 and lower flip() returns Buffer not ByteBuffer, so can't chain above /\/\ - for (final ByteBuffer key : keys) { - key.flip(); - } - final List values = new ArrayList<>(); - for (int i = 0; i < keys.size(); i++) { - values.add(ByteBuffer.allocateDirect(24)); - } - try { - db.multiGetByteBuffers(columnFamilyHandles, keys, values); - fail("Expected exception when 2 column families supplied"); - } catch (final IllegalArgumentException e) { - assertThat(e.getMessage()).contains("Wrong number of ColumnFamilyHandle(s) supplied"); - } - - columnFamilyHandles.clear(); - columnFamilyHandles.add(db.getDefaultColumnFamily()); - final List results = - db.multiGetByteBuffers(columnFamilyHandles, keys, values); - - assertThat(results.get(0).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(1).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(2).status.getCode()).isEqualTo(Status.Code.Ok); - - assertThat(results.get(0).requiredSize).isEqualTo("value1ForKey1".getBytes().length); - assertThat(results.get(1).requiredSize).isEqualTo("value2ForKey2".getBytes().length); - assertThat(results.get(2).requiredSize).isEqualTo("value3ForKey3".getBytes().length); - - assertThat(TestUtil.bufferBytes(results.get(0).value)).isEqualTo("value1ForKey1".getBytes()); - assertThat(TestUtil.bufferBytes(results.get(1).value)).isEqualTo("value2ForKey2".getBytes()); - assertThat(TestUtil.bufferBytes(results.get(2).value)).isEqualTo("value3ForKey3".getBytes()); - } - } - - @Test - public void putNThenMultiGetDirectMixedCF() throws RocksDBException { - try (final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - final List cfDescriptors = new ArrayList<>(); - cfDescriptors.add(new ColumnFamilyDescriptor("cf0".getBytes())); - cfDescriptors.add(new ColumnFamilyDescriptor("cf1".getBytes())); - cfDescriptors.add(new ColumnFamilyDescriptor("cf2".getBytes())); - cfDescriptors.add(new ColumnFamilyDescriptor("cf3".getBytes())); - - final List cf = db.createColumnFamilies(cfDescriptors); - - db.put(cf.get(1), "key1".getBytes(), "value1ForKey1".getBytes()); - db.put("key2".getBytes(), "value2ForKey2".getBytes()); - db.put(cf.get(3), "key3".getBytes(), "value3ForKey3".getBytes()); - - final List keys = new ArrayList<>(); - keys.add(ByteBuffer.allocateDirect(12).put("key1".getBytes())); - keys.add(ByteBuffer.allocateDirect(12).put("key2".getBytes())); - keys.add(ByteBuffer.allocateDirect(12).put("key3".getBytes())); - // Java8 and lower flip() returns Buffer not ByteBuffer, so can't chain above /\/\ - for (final ByteBuffer key : keys) { - key.flip(); - } - final List values = new ArrayList<>(); - for (int i = 0; i < keys.size(); i++) { - values.add(ByteBuffer.allocateDirect(24)); - } - - { - final List columnFamilyHandles = new ArrayList<>(); - columnFamilyHandles.add(db.getDefaultColumnFamily()); - - final List results = - db.multiGetByteBuffers(columnFamilyHandles, keys, values); - - assertThat(results.get(0).status.getCode()).isEqualTo(Status.Code.NotFound); - assertThat(results.get(1).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(2).status.getCode()).isEqualTo(Status.Code.NotFound); - - assertThat(results.get(1).requiredSize).isEqualTo("value2ForKey2".getBytes().length); - - assertThat(TestUtil.bufferBytes(results.get(1).value)) - .isEqualTo("value2ForKey2".getBytes()); - } - - { - final List columnFamilyHandles = new ArrayList<>(); - columnFamilyHandles.add(cf.get(1)); - - final List results = - db.multiGetByteBuffers(columnFamilyHandles, keys, values); - - assertThat(results.get(0).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(1).status.getCode()).isEqualTo(Status.Code.NotFound); - assertThat(results.get(2).status.getCode()).isEqualTo(Status.Code.NotFound); - - assertThat(results.get(0).requiredSize).isEqualTo("value2ForKey2".getBytes().length); - - assertThat(TestUtil.bufferBytes(results.get(0).value)) - .isEqualTo("value1ForKey1".getBytes()); - } - - { - final List columnFamilyHandles = new ArrayList<>(); - columnFamilyHandles.add(cf.get(1)); - columnFamilyHandles.add(db.getDefaultColumnFamily()); - columnFamilyHandles.add(cf.get(3)); - - final List results = - db.multiGetByteBuffers(columnFamilyHandles, keys, values); - - assertThat(results.get(0).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(1).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(2).status.getCode()).isEqualTo(Status.Code.Ok); - - assertThat(results.get(0).requiredSize).isEqualTo("value1ForKey1".getBytes().length); - assertThat(results.get(1).requiredSize).isEqualTo("value2ForKey2".getBytes().length); - assertThat(results.get(2).requiredSize).isEqualTo("value3ForKey3".getBytes().length); - - assertThat(TestUtil.bufferBytes(results.get(0).value)) - .isEqualTo("value1ForKey1".getBytes()); - assertThat(TestUtil.bufferBytes(results.get(1).value)) - .isEqualTo("value2ForKey2".getBytes()); - assertThat(TestUtil.bufferBytes(results.get(2).value)) - .isEqualTo("value3ForKey3".getBytes()); - } - - { - final List columnFamilyHandles = new ArrayList<>(); - columnFamilyHandles.add(db.getDefaultColumnFamily()); - columnFamilyHandles.add(cf.get(1)); - columnFamilyHandles.add(cf.get(3)); - - final List results = - db.multiGetByteBuffers(columnFamilyHandles, keys, values); - - assertThat(results.get(0).status.getCode()).isEqualTo(Status.Code.NotFound); - assertThat(results.get(1).status.getCode()).isEqualTo(Status.Code.NotFound); - assertThat(results.get(2).status.getCode()).isEqualTo(Status.Code.Ok); - - assertThat(results.get(2).requiredSize).isEqualTo("value3ForKey3".getBytes().length); - - assertThat(TestUtil.bufferBytes(results.get(2).value)) - .isEqualTo("value3ForKey3".getBytes()); - } - } - } - - @Test - public void putNThenMultiGetDirectTruncateCF() throws RocksDBException { - try (final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - final List cfDescriptors = new ArrayList<>(); - cfDescriptors.add(new ColumnFamilyDescriptor("cf0".getBytes())); - - final List cf = db.createColumnFamilies(cfDescriptors); - - db.put(cf.get(0), "key1".getBytes(), "value1ForKey1".getBytes()); - db.put(cf.get(0), "key2".getBytes(), "value2ForKey2WithLotsOfTrailingGarbage".getBytes()); - db.put(cf.get(0), "key3".getBytes(), "value3ForKey3".getBytes()); - - final List keys = new ArrayList<>(); - keys.add(ByteBuffer.allocateDirect(12).put("key1".getBytes())); - keys.add(ByteBuffer.allocateDirect(12).put("key2".getBytes())); - keys.add(ByteBuffer.allocateDirect(12).put("key3".getBytes())); - // Java8 and lower flip() returns Buffer not ByteBuffer, so can't chain above /\/\ - for (final ByteBuffer key : keys) { - key.flip(); - } - final List values = new ArrayList<>(); - for (int i = 0; i < keys.size(); i++) { - values.add(ByteBuffer.allocateDirect(24)); - } - - { - final List columnFamilyHandles = new ArrayList<>(); - columnFamilyHandles.add(cf.get(0)); - final List results = - db.multiGetByteBuffers(columnFamilyHandles, keys, values); - - assertThat(results.get(0).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(1).status.getCode()).isEqualTo(Status.Code.Ok); - assertThat(results.get(2).status.getCode()).isEqualTo(Status.Code.Ok); - - assertThat(results.get(0).requiredSize).isEqualTo("value1ForKey1".getBytes().length); - assertThat(results.get(1).requiredSize) - .isEqualTo("value2ForKey2WithLotsOfTrailingGarbage".getBytes().length); - assertThat(results.get(2).requiredSize).isEqualTo("value3ForKey3".getBytes().length); - - assertThat(TestUtil.bufferBytes(results.get(0).value)) - .isEqualTo("value1ForKey1".getBytes()); - assertThat(TestUtil.bufferBytes(results.get(1).value)) - .isEqualTo("valu e2Fo rKey 2Wit hLot sOfT".replace(" ", "").getBytes()); - assertThat(TestUtil.bufferBytes(results.get(2).value)) - .isEqualTo("value3ForKey3".getBytes()); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/MutableColumnFamilyOptionsTest.java b/java/src/test/java/org/rocksdb/MutableColumnFamilyOptionsTest.java deleted file mode 100644 index b2b2599a7..000000000 --- a/java/src/test/java/org/rocksdb/MutableColumnFamilyOptionsTest.java +++ /dev/null @@ -1,167 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import org.junit.Test; -import org.rocksdb.MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder; - -import java.util.NoSuchElementException; - -import static org.assertj.core.api.Assertions.assertThat; - -public class MutableColumnFamilyOptionsTest { - - @Test - public void builder() { - final MutableColumnFamilyOptionsBuilder builder = - MutableColumnFamilyOptions.builder(); - builder - .setWriteBufferSize(10) - .setInplaceUpdateNumLocks(5) - .setDisableAutoCompactions(true) - .setParanoidFileChecks(true); - - assertThat(builder.writeBufferSize()).isEqualTo(10); - assertThat(builder.inplaceUpdateNumLocks()).isEqualTo(5); - assertThat(builder.disableAutoCompactions()).isEqualTo(true); - assertThat(builder.paranoidFileChecks()).isEqualTo(true); - } - - @Test(expected = NoSuchElementException.class) - public void builder_getWhenNotSet() { - final MutableColumnFamilyOptionsBuilder builder = - MutableColumnFamilyOptions.builder(); - - builder.writeBufferSize(); - } - - @Test - public void builder_build() { - final MutableColumnFamilyOptions options = MutableColumnFamilyOptions - .builder() - .setWriteBufferSize(10) - .setParanoidFileChecks(true) - .build(); - - assertThat(options.getKeys().length).isEqualTo(2); - assertThat(options.getValues().length).isEqualTo(2); - assertThat(options.getKeys()[0]) - .isEqualTo( - MutableColumnFamilyOptions.MemtableOption.write_buffer_size.name()); - assertThat(options.getValues()[0]).isEqualTo("10"); - assertThat(options.getKeys()[1]) - .isEqualTo( - MutableColumnFamilyOptions.MiscOption.paranoid_file_checks.name()); - assertThat(options.getValues()[1]).isEqualTo("true"); - } - - @Test - public void mutableColumnFamilyOptions_toString() { - final String str = MutableColumnFamilyOptions.builder() - .setWriteBufferSize(10) - .setInplaceUpdateNumLocks(5) - .setDisableAutoCompactions(true) - .setParanoidFileChecks(true) - .setMaxBytesForLevelMultiplierAdditional(new int[] {2, 3, 5, 7, 11, 13}) - .build() - .toString(); - - assertThat(str).isEqualTo("write_buffer_size=10;inplace_update_num_locks=5;" - + "disable_auto_compactions=true;paranoid_file_checks=true;max_bytes_for_level_multiplier_additional=2:3:5:7:11:13"); - } - - @Test - public void mutableColumnFamilyOptions_parse() { - final String str = "write_buffer_size=10;inplace_update_num_locks=5;" - + "disable_auto_compactions=true;paranoid_file_checks=true;max_bytes_for_level_multiplier_additional=2:{3}:{5}:{7}:{11}:{13}"; - - final MutableColumnFamilyOptionsBuilder builder = - MutableColumnFamilyOptions.parse(str); - - assertThat(builder.writeBufferSize()).isEqualTo(10); - assertThat(builder.inplaceUpdateNumLocks()).isEqualTo(5); - assertThat(builder.disableAutoCompactions()).isEqualTo(true); - assertThat(builder.paranoidFileChecks()).isEqualTo(true); - assertThat(builder.maxBytesForLevelMultiplierAdditional()) - .isEqualTo(new int[] {2, 3, 5, 7, 11, 13}); - } - - /** - * Extended parsing test to deal with all the options which C++ may return. - * We have canned a set of options returned by {RocksDB#getOptions} - */ - @Test - public void mutableColumnFamilyOptions_parse_getOptions_output() { - final String optionsString = - "bottommost_compression=kDisableCompressionOption; sample_for_compression=0; " - + "blob_garbage_collection_age_cutoff=0.250000; blob_garbage_collection_force_threshold=0.800000;" - + "arena_block_size=1048576; enable_blob_garbage_collection=false; level0_stop_writes_trigger=36; min_blob_size=65536;" - + "blob_compaction_readahead_size=262144; blob_file_starting_level=5; prepopulate_blob_cache=kDisable;" - + "compaction_options_universal={allow_trivial_move=false;stop_style=kCompactionStopStyleTotalSize;min_merge_width=2;" - + "compression_size_percent=-1;max_size_amplification_percent=200;max_merge_width=4294967295;size_ratio=1;}; " - + "target_file_size_base=67108864; max_bytes_for_level_base=268435456; memtable_whole_key_filtering=false; " - + "soft_pending_compaction_bytes_limit=68719476736; blob_compression_type=kNoCompression; max_write_buffer_number=2; " - + "ttl=2592000; compaction_options_fifo={allow_compaction=false;age_for_warm=0;max_table_files_size=1073741824;}; " - + "check_flush_compaction_key_order=true; max_successive_merges=0; inplace_update_num_locks=10000; " - + "bottommost_compression_opts={enabled=false;parallel_threads=1;zstd_max_train_bytes=0;max_dict_bytes=0;" - + "strategy=0;max_dict_buffer_bytes=0;level=32767;window_bits=-14;}; " - + "target_file_size_multiplier=1; max_bytes_for_level_multiplier_additional=5:{7}:{9}:{11}:{13}:{15}:{17}; " - + "enable_blob_files=true; level0_slowdown_writes_trigger=20; compression=kLZ4HCCompression; level0_file_num_compaction_trigger=4; " - + "blob_file_size=268435456; prefix_extractor=nullptr; max_bytes_for_level_multiplier=10.000000; write_buffer_size=67108864; " - + "disable_auto_compactions=false; max_compaction_bytes=1677721600; memtable_huge_page_size=0; " - + "compression_opts={enabled=false;parallel_threads=1;zstd_max_train_bytes=0;max_dict_bytes=0;strategy=0;max_dict_buffer_bytes=0;" - + "level=32767;window_bits=-14;}; " - + "hard_pending_compaction_bytes_limit=274877906944; periodic_compaction_seconds=0; paranoid_file_checks=true; " - + "memtable_prefix_bloom_size_ratio=7.500000; max_sequential_skip_in_iterations=8; report_bg_io_stats=true; " - + "compaction_pri=kMinOverlappingRatio; compaction_style=kCompactionStyleLevel; memtable_factory=SkipListFactory; " - + "comparator=leveldb.BytewiseComparator; bloom_locality=0; compaction_filter_factory=nullptr; " - + "min_write_buffer_number_to_merge=1; max_write_buffer_number_to_maintain=0; compaction_filter=nullptr; merge_operator=nullptr; " - + "num_levels=7; optimize_filters_for_hits=false; force_consistency_checks=true; table_factory=BlockBasedTable; " - + "max_write_buffer_size_to_maintain=0; memtable_insert_with_hint_prefix_extractor=nullptr; level_compaction_dynamic_level_bytes=false; " - + "inplace_update_support=false; experimental_mempurge_threshold=0.003"; - - MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder cf = - MutableColumnFamilyOptions.parse(optionsString, true); - - // Check the values from the parsed string which are column family options - assertThat(cf.blobGarbageCollectionAgeCutoff()).isEqualTo(0.25); - assertThat(cf.blobGarbageCollectionForceThreshold()).isEqualTo(0.80); - assertThat(cf.arenaBlockSize()).isEqualTo(1048576); - assertThat(cf.enableBlobGarbageCollection()).isEqualTo(false); - assertThat(cf.level0StopWritesTrigger()).isEqualTo(36); - assertThat(cf.minBlobSize()).isEqualTo(65536); - assertThat(cf.blobCompactionReadaheadSize()).isEqualTo(262144); - assertThat(cf.blobFileStartingLevel()).isEqualTo(5); - assertThat(cf.prepopulateBlobCache()).isEqualTo(PrepopulateBlobCache.PREPOPULATE_BLOB_DISABLE); - assertThat(cf.targetFileSizeBase()).isEqualTo(67108864); - assertThat(cf.maxBytesForLevelBase()).isEqualTo(268435456); - assertThat(cf.softPendingCompactionBytesLimit()).isEqualTo(68719476736L); - assertThat(cf.blobCompressionType()).isEqualTo(CompressionType.NO_COMPRESSION); - assertThat(cf.maxWriteBufferNumber()).isEqualTo(2); - assertThat(cf.ttl()).isEqualTo(2592000); - assertThat(cf.maxSuccessiveMerges()).isEqualTo(0); - assertThat(cf.inplaceUpdateNumLocks()).isEqualTo(10000); - assertThat(cf.targetFileSizeMultiplier()).isEqualTo(1); - assertThat(cf.maxBytesForLevelMultiplierAdditional()) - .isEqualTo(new int[] {5, 7, 9, 11, 13, 15, 17}); - assertThat(cf.enableBlobFiles()).isEqualTo(true); - assertThat(cf.level0SlowdownWritesTrigger()).isEqualTo(20); - assertThat(cf.compressionType()).isEqualTo(CompressionType.LZ4HC_COMPRESSION); - assertThat(cf.level0FileNumCompactionTrigger()).isEqualTo(4); - assertThat(cf.blobFileSize()).isEqualTo(268435456); - assertThat(cf.maxBytesForLevelMultiplier()).isEqualTo(10.0); - assertThat(cf.writeBufferSize()).isEqualTo(67108864); - assertThat(cf.disableAutoCompactions()).isEqualTo(false); - assertThat(cf.maxCompactionBytes()).isEqualTo(1677721600); - assertThat(cf.memtableHugePageSize()).isEqualTo(0); - assertThat(cf.hardPendingCompactionBytesLimit()).isEqualTo(274877906944L); - assertThat(cf.periodicCompactionSeconds()).isEqualTo(0); - assertThat(cf.paranoidFileChecks()).isEqualTo(true); - assertThat(cf.memtablePrefixBloomSizeRatio()).isEqualTo(7.5); - assertThat(cf.experimentalMempurgeThreshold()).isEqualTo(0.003); - assertThat(cf.maxSequentialSkipInIterations()).isEqualTo(8); - assertThat(cf.reportBgIoStats()).isEqualTo(true); - } -} diff --git a/java/src/test/java/org/rocksdb/MutableDBOptionsTest.java b/java/src/test/java/org/rocksdb/MutableDBOptionsTest.java deleted file mode 100644 index 063a8de38..000000000 --- a/java/src/test/java/org/rocksdb/MutableDBOptionsTest.java +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import org.junit.Test; -import org.rocksdb.MutableDBOptions.MutableDBOptionsBuilder; - -import java.util.NoSuchElementException; - -import static org.assertj.core.api.Assertions.assertThat; - -public class MutableDBOptionsTest { - - @Test - public void builder() { - final MutableDBOptionsBuilder builder = - MutableDBOptions.builder(); - builder - .setBytesPerSync(1024 * 1024 * 7) - .setMaxBackgroundJobs(5) - .setAvoidFlushDuringShutdown(false); - - assertThat(builder.bytesPerSync()).isEqualTo(1024 * 1024 * 7); - assertThat(builder.maxBackgroundJobs()).isEqualTo(5); - assertThat(builder.avoidFlushDuringShutdown()).isEqualTo(false); - } - - @Test(expected = NoSuchElementException.class) - public void builder_getWhenNotSet() { - final MutableDBOptionsBuilder builder = - MutableDBOptions.builder(); - - builder.bytesPerSync(); - } - - @Test - public void builder_build() { - final MutableDBOptions options = MutableDBOptions - .builder() - .setBytesPerSync(1024 * 1024 * 7) - .setMaxBackgroundJobs(5) - .build(); - - assertThat(options.getKeys().length).isEqualTo(2); - assertThat(options.getValues().length).isEqualTo(2); - assertThat(options.getKeys()[0]) - .isEqualTo( - MutableDBOptions.DBOption.bytes_per_sync.name()); - assertThat(options.getValues()[0]).isEqualTo("7340032"); - assertThat(options.getKeys()[1]) - .isEqualTo( - MutableDBOptions.DBOption.max_background_jobs.name()); - assertThat(options.getValues()[1]).isEqualTo("5"); - } - - @Test - public void mutableDBOptions_toString() { - final String str = MutableDBOptions - .builder() - .setMaxOpenFiles(99) - .setDelayedWriteRate(789) - .setAvoidFlushDuringShutdown(true) - .setStrictBytesPerSync(true) - .build() - .toString(); - - assertThat(str).isEqualTo("max_open_files=99;delayed_write_rate=789;" - + "avoid_flush_during_shutdown=true;strict_bytes_per_sync=true"); - } - - @Test - public void mutableDBOptions_parse() { - final String str = "max_open_files=99;delayed_write_rate=789;" - + "avoid_flush_during_shutdown=true"; - - final MutableDBOptionsBuilder builder = - MutableDBOptions.parse(str); - - assertThat(builder.maxOpenFiles()).isEqualTo(99); - assertThat(builder.delayedWriteRate()).isEqualTo(789); - assertThat(builder.avoidFlushDuringShutdown()).isEqualTo(true); - } -} diff --git a/java/src/test/java/org/rocksdb/MutableOptionsGetSetTest.java b/java/src/test/java/org/rocksdb/MutableOptionsGetSetTest.java deleted file mode 100644 index 6db940619..000000000 --- a/java/src/test/java/org/rocksdb/MutableOptionsGetSetTest.java +++ /dev/null @@ -1,429 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class MutableOptionsGetSetTest { - final int minBlobSize = 65536; - - @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); - - /** - * Validate the round-trip of blob options into and out of the C++ core of RocksDB - * From CF options on CF Creation to {RocksDB#getOptions} - * Uses 2x column families with different values for their options. - * NOTE that some constraints are applied to the options in the C++ core, - * e.g. on {ColumnFamilyOptions#setMemtablePrefixBloomSizeRatio} - * - * @throws RocksDBException if the database throws an exception - */ - @Test - public void testGetMutableBlobOptionsAfterCreate() throws RocksDBException { - final ColumnFamilyOptions columnFamilyOptions0 = new ColumnFamilyOptions(); - final ColumnFamilyDescriptor columnFamilyDescriptor0 = - new ColumnFamilyDescriptor("default".getBytes(UTF_8), columnFamilyOptions0); - final List columnFamilyDescriptors = - Collections.singletonList(columnFamilyDescriptor0); - final List columnFamilyHandles = new ArrayList<>(); - - try (final DBOptions dbOptions = new DBOptions().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(dbOptions, dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, columnFamilyHandles)) { - try (final ColumnFamilyOptions columnFamilyOptions1 = - new ColumnFamilyOptions() - .setMinBlobSize(minBlobSize) - .setEnableBlobFiles(true) - .setBlobGarbageCollectionAgeCutoff(0.25) - .setBlobGarbageCollectionForceThreshold(0.80) - .setBlobCompactionReadaheadSize(262144) - .setBlobFileStartingLevel(2) - .setArenaBlockSize(42) - .setMemtablePrefixBloomSizeRatio(0.17) - .setExperimentalMempurgeThreshold(0.005) - .setMemtableWholeKeyFiltering(false) - .setMemtableHugePageSize(3) - .setMaxSuccessiveMerges(4) - .setMaxWriteBufferNumber(12) - .setInplaceUpdateNumLocks(16) - .setDisableAutoCompactions(false) - .setSoftPendingCompactionBytesLimit(112) - .setHardPendingCompactionBytesLimit(280) - .setLevel0FileNumCompactionTrigger(200) - .setLevel0SlowdownWritesTrigger(312) - .setLevel0StopWritesTrigger(584) - .setMaxCompactionBytes(12) - .setTargetFileSizeBase(99) - .setTargetFileSizeMultiplier(112) - .setMaxSequentialSkipInIterations(50) - .setReportBgIoStats(true); - - final ColumnFamilyOptions columnFamilyOptions2 = - new ColumnFamilyOptions() - .setMinBlobSize(minBlobSize) - .setEnableBlobFiles(false) - .setArenaBlockSize(42) - .setMemtablePrefixBloomSizeRatio(0.236) - .setExperimentalMempurgeThreshold(0.247) - .setMemtableWholeKeyFiltering(true) - .setMemtableHugePageSize(8) - .setMaxSuccessiveMerges(12) - .setMaxWriteBufferNumber(22) - .setInplaceUpdateNumLocks(160) - .setDisableAutoCompactions(true) - .setSoftPendingCompactionBytesLimit(1124) - .setHardPendingCompactionBytesLimit(2800) - .setLevel0FileNumCompactionTrigger(2000) - .setLevel0SlowdownWritesTrigger(5840) - .setLevel0StopWritesTrigger(31200) - .setMaxCompactionBytes(112) - .setTargetFileSizeBase(999) - .setTargetFileSizeMultiplier(1120) - .setMaxSequentialSkipInIterations(24) - .setReportBgIoStats(true)) { - final ColumnFamilyDescriptor columnFamilyDescriptor1 = - new ColumnFamilyDescriptor("column_family_1".getBytes(UTF_8), columnFamilyOptions1); - final ColumnFamilyDescriptor columnFamilyDescriptor2 = - new ColumnFamilyDescriptor("column_family_2".getBytes(UTF_8), columnFamilyOptions2); - - // Create the column family with blob options - final ColumnFamilyHandle columnFamilyHandle1 = - db.createColumnFamily(columnFamilyDescriptor1); - final ColumnFamilyHandle columnFamilyHandle2 = - db.createColumnFamily(columnFamilyDescriptor2); - - // Check the getOptions() brings back the creation options for CF1 - final MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder builder1 = - db.getOptions(columnFamilyHandle1); - assertThat(builder1.enableBlobFiles()).isEqualTo(true); - assertThat(builder1.blobGarbageCollectionAgeCutoff()).isEqualTo(0.25); - assertThat(builder1.blobGarbageCollectionForceThreshold()).isEqualTo(0.80); - assertThat(builder1.blobCompactionReadaheadSize()).isEqualTo(262144); - assertThat(builder1.blobFileStartingLevel()).isEqualTo(2); - assertThat(builder1.minBlobSize()).isEqualTo(minBlobSize); - assertThat(builder1.arenaBlockSize()).isEqualTo(42); - assertThat(builder1.memtablePrefixBloomSizeRatio()).isEqualTo(0.17); - assertThat(builder1.experimentalMempurgeThreshold()).isEqualTo(0.005); - assertThat(builder1.memtableWholeKeyFiltering()).isEqualTo(false); - assertThat(builder1.memtableHugePageSize()).isEqualTo(3); - assertThat(builder1.maxSuccessiveMerges()).isEqualTo(4); - assertThat(builder1.maxWriteBufferNumber()).isEqualTo(12); - assertThat(builder1.inplaceUpdateNumLocks()).isEqualTo(16); - assertThat(builder1.disableAutoCompactions()).isEqualTo(false); - assertThat(builder1.softPendingCompactionBytesLimit()).isEqualTo(112); - assertThat(builder1.hardPendingCompactionBytesLimit()).isEqualTo(280); - assertThat(builder1.level0FileNumCompactionTrigger()).isEqualTo(200); - assertThat(builder1.level0SlowdownWritesTrigger()).isEqualTo(312); - assertThat(builder1.level0StopWritesTrigger()).isEqualTo(584); - assertThat(builder1.maxCompactionBytes()).isEqualTo(12); - assertThat(builder1.targetFileSizeBase()).isEqualTo(99); - assertThat(builder1.targetFileSizeMultiplier()).isEqualTo(112); - assertThat(builder1.maxSequentialSkipInIterations()).isEqualTo(50); - assertThat(builder1.reportBgIoStats()).isEqualTo(true); - - // Check the getOptions() brings back the creation options for CF2 - final MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder builder2 = - db.getOptions(columnFamilyHandle2); - assertThat(builder2.enableBlobFiles()).isEqualTo(false); - assertThat(builder2.minBlobSize()).isEqualTo(minBlobSize); - assertThat(builder2.arenaBlockSize()).isEqualTo(42); - assertThat(builder2.memtablePrefixBloomSizeRatio()).isEqualTo(0.236); - assertThat(builder2.experimentalMempurgeThreshold()).isEqualTo(0.247); - assertThat(builder2.memtableWholeKeyFiltering()).isEqualTo(true); - assertThat(builder2.memtableHugePageSize()).isEqualTo(8); - assertThat(builder2.maxSuccessiveMerges()).isEqualTo(12); - assertThat(builder2.maxWriteBufferNumber()).isEqualTo(22); - assertThat(builder2.inplaceUpdateNumLocks()).isEqualTo(160); - assertThat(builder2.disableAutoCompactions()).isEqualTo(true); - assertThat(builder2.softPendingCompactionBytesLimit()).isEqualTo(1124); - assertThat(builder2.hardPendingCompactionBytesLimit()).isEqualTo(2800); - assertThat(builder2.level0FileNumCompactionTrigger()).isEqualTo(2000); - assertThat(builder2.level0SlowdownWritesTrigger()).isEqualTo(5840); - assertThat(builder2.level0StopWritesTrigger()).isEqualTo(31200); - assertThat(builder2.maxCompactionBytes()).isEqualTo(112); - assertThat(builder2.targetFileSizeBase()).isEqualTo(999); - assertThat(builder2.targetFileSizeMultiplier()).isEqualTo(1120); - assertThat(builder2.maxSequentialSkipInIterations()).isEqualTo(24); - assertThat(builder2.reportBgIoStats()).isEqualTo(true); - } - } - } - - /** - * Validate the round-trip of blob options into and out of the C++ core of RocksDB - * From {RocksDB#setOptions} to {RocksDB#getOptions} - * Uses 2x column families with different values for their options. - * NOTE that some constraints are applied to the options in the C++ core, - * e.g. on {ColumnFamilyOptions#setMemtablePrefixBloomSizeRatio} - * - * @throws RocksDBException if a database access has an error - */ - @Test - public void testGetMutableBlobOptionsAfterSetCF() throws RocksDBException { - final ColumnFamilyOptions columnFamilyOptions0 = new ColumnFamilyOptions(); - final ColumnFamilyDescriptor columnFamilyDescriptor0 = - new ColumnFamilyDescriptor("default".getBytes(UTF_8), columnFamilyOptions0); - final List columnFamilyDescriptors = - Collections.singletonList(columnFamilyDescriptor0); - final List columnFamilyHandles = new ArrayList<>(); - - try (final DBOptions dbOptions = new DBOptions().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(dbOptions, dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, columnFamilyHandles)) { - try (final ColumnFamilyOptions columnFamilyOptions1 = new ColumnFamilyOptions(); - - final ColumnFamilyOptions columnFamilyOptions2 = new ColumnFamilyOptions()) { - final ColumnFamilyDescriptor columnFamilyDescriptor1 = - new ColumnFamilyDescriptor("column_family_1".getBytes(UTF_8), columnFamilyOptions1); - final ColumnFamilyDescriptor columnFamilyDescriptor2 = - new ColumnFamilyDescriptor("column_family_2".getBytes(UTF_8), columnFamilyOptions2); - - // Create the column family with blob options - final ColumnFamilyHandle columnFamilyHandle1 = - db.createColumnFamily(columnFamilyDescriptor1); - final ColumnFamilyHandle columnFamilyHandle2 = - db.createColumnFamily(columnFamilyDescriptor2); - db.flush(new FlushOptions().setWaitForFlush(true)); - - final MutableColumnFamilyOptions - .MutableColumnFamilyOptionsBuilder mutableColumnFamilyOptions1 = - MutableColumnFamilyOptions.builder() - .setMinBlobSize(minBlobSize) - .setEnableBlobFiles(true) - .setBlobGarbageCollectionAgeCutoff(0.25) - .setBlobGarbageCollectionForceThreshold(0.80) - .setBlobCompactionReadaheadSize(262144) - .setBlobFileStartingLevel(3) - .setArenaBlockSize(42) - .setMemtablePrefixBloomSizeRatio(0.17) - .setExperimentalMempurgeThreshold(0.005) - .setMemtableWholeKeyFiltering(false) - .setMemtableHugePageSize(3) - .setMaxSuccessiveMerges(4) - .setMaxWriteBufferNumber(12) - .setInplaceUpdateNumLocks(16) - .setDisableAutoCompactions(false) - .setSoftPendingCompactionBytesLimit(112) - .setHardPendingCompactionBytesLimit(280) - .setLevel0FileNumCompactionTrigger(200) - .setLevel0SlowdownWritesTrigger(312) - .setLevel0StopWritesTrigger(584) - .setMaxCompactionBytes(12) - .setTargetFileSizeBase(99) - .setTargetFileSizeMultiplier(112); - db.setOptions(columnFamilyHandle1, mutableColumnFamilyOptions1.build()); - - // Check the getOptions() brings back the creation options for CF1 - final MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder builder1 = - db.getOptions(columnFamilyHandle1); - assertThat(builder1.enableBlobFiles()).isEqualTo(true); - assertThat(builder1.blobGarbageCollectionAgeCutoff()).isEqualTo(0.25); - assertThat(builder1.blobGarbageCollectionForceThreshold()).isEqualTo(0.80); - assertThat(builder1.blobCompactionReadaheadSize()).isEqualTo(262144); - assertThat(builder1.blobFileStartingLevel()).isEqualTo(3); - assertThat(builder1.minBlobSize()).isEqualTo(minBlobSize); - assertThat(builder1.arenaBlockSize()).isEqualTo(42); - assertThat(builder1.memtablePrefixBloomSizeRatio()).isEqualTo(0.17); - assertThat(builder1.experimentalMempurgeThreshold()).isEqualTo(0.005); - assertThat(builder1.memtableWholeKeyFiltering()).isEqualTo(false); - assertThat(builder1.memtableHugePageSize()).isEqualTo(3); - assertThat(builder1.maxSuccessiveMerges()).isEqualTo(4); - assertThat(builder1.maxWriteBufferNumber()).isEqualTo(12); - assertThat(builder1.inplaceUpdateNumLocks()).isEqualTo(16); - assertThat(builder1.disableAutoCompactions()).isEqualTo(false); - assertThat(builder1.softPendingCompactionBytesLimit()).isEqualTo(112); - assertThat(builder1.hardPendingCompactionBytesLimit()).isEqualTo(280); - assertThat(builder1.level0FileNumCompactionTrigger()).isEqualTo(200); - assertThat(builder1.level0SlowdownWritesTrigger()).isEqualTo(312); - assertThat(builder1.level0StopWritesTrigger()).isEqualTo(584); - assertThat(builder1.maxCompactionBytes()).isEqualTo(12); - assertThat(builder1.targetFileSizeBase()).isEqualTo(99); - assertThat(builder1.targetFileSizeMultiplier()).isEqualTo(112); - - final MutableColumnFamilyOptions - .MutableColumnFamilyOptionsBuilder mutableColumnFamilyOptions2 = - MutableColumnFamilyOptions.builder() - .setMinBlobSize(minBlobSize) - .setEnableBlobFiles(false) - .setArenaBlockSize(42) - .setMemtablePrefixBloomSizeRatio(0.236) - .setExperimentalMempurgeThreshold(0.247) - .setMemtableWholeKeyFiltering(true) - .setMemtableHugePageSize(8) - .setMaxSuccessiveMerges(12) - .setMaxWriteBufferNumber(22) - .setInplaceUpdateNumLocks(160) - .setDisableAutoCompactions(true) - .setSoftPendingCompactionBytesLimit(1124) - .setHardPendingCompactionBytesLimit(2800) - .setLevel0FileNumCompactionTrigger(2000) - .setLevel0SlowdownWritesTrigger(5840) - .setLevel0StopWritesTrigger(31200) - .setMaxCompactionBytes(112) - .setTargetFileSizeBase(999) - .setTargetFileSizeMultiplier(1120); - db.setOptions(columnFamilyHandle2, mutableColumnFamilyOptions2.build()); - - // Check the getOptions() brings back the creation options for CF2 - final MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder builder2 = - db.getOptions(columnFamilyHandle2); - assertThat(builder2.enableBlobFiles()).isEqualTo(false); - assertThat(builder2.minBlobSize()).isEqualTo(minBlobSize); - assertThat(builder2.arenaBlockSize()).isEqualTo(42); - assertThat(builder2.memtablePrefixBloomSizeRatio()).isEqualTo(0.236); - assertThat(builder2.experimentalMempurgeThreshold()).isEqualTo(0.247); - assertThat(builder2.memtableWholeKeyFiltering()).isEqualTo(true); - assertThat(builder2.memtableHugePageSize()).isEqualTo(8); - assertThat(builder2.maxSuccessiveMerges()).isEqualTo(12); - assertThat(builder2.maxWriteBufferNumber()).isEqualTo(22); - assertThat(builder2.inplaceUpdateNumLocks()).isEqualTo(160); - assertThat(builder2.disableAutoCompactions()).isEqualTo(true); - assertThat(builder2.softPendingCompactionBytesLimit()).isEqualTo(1124); - assertThat(builder2.hardPendingCompactionBytesLimit()).isEqualTo(2800); - assertThat(builder2.level0FileNumCompactionTrigger()).isEqualTo(2000); - assertThat(builder2.level0SlowdownWritesTrigger()).isEqualTo(5840); - assertThat(builder2.level0StopWritesTrigger()).isEqualTo(31200); - assertThat(builder2.maxCompactionBytes()).isEqualTo(112); - assertThat(builder2.targetFileSizeBase()).isEqualTo(999); - assertThat(builder2.targetFileSizeMultiplier()).isEqualTo(1120); - } - } - } - - /** - * Validate the round-trip of blob options into and out of the C++ core of RocksDB - * From {RocksDB#setOptions} to {RocksDB#getOptions} - * Uses 2x column families with different values for their options. - * NOTE that some constraints are applied to the options in the C++ core, - * e.g. on {ColumnFamilyOptions#setMemtablePrefixBloomSizeRatio} - * - * @throws RocksDBException if a database access has an error - */ - @Test - public void testGetMutableBlobOptionsAfterSet() throws RocksDBException { - final ColumnFamilyOptions columnFamilyOptions0 = new ColumnFamilyOptions(); - final ColumnFamilyDescriptor columnFamilyDescriptor0 = - new ColumnFamilyDescriptor("default".getBytes(UTF_8), columnFamilyOptions0); - final List columnFamilyDescriptors = - Collections.singletonList(columnFamilyDescriptor0); - final List columnFamilyHandles = new ArrayList<>(); - - try (final DBOptions dbOptions = new DBOptions().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(dbOptions, dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, columnFamilyHandles)) { - final MutableColumnFamilyOptions - .MutableColumnFamilyOptionsBuilder mutableColumnFamilyOptions = - MutableColumnFamilyOptions.builder() - .setMinBlobSize(minBlobSize) - .setEnableBlobFiles(true) - .setBlobGarbageCollectionAgeCutoff(0.25) - .setBlobGarbageCollectionForceThreshold(0.80) - .setBlobCompactionReadaheadSize(131072) - .setBlobFileStartingLevel(4) - .setArenaBlockSize(42) - .setMemtablePrefixBloomSizeRatio(0.17) - .setExperimentalMempurgeThreshold(0.005) - .setMemtableWholeKeyFiltering(false) - .setMemtableHugePageSize(3) - .setMaxSuccessiveMerges(4) - .setMaxWriteBufferNumber(12) - .setInplaceUpdateNumLocks(16) - .setDisableAutoCompactions(false) - .setSoftPendingCompactionBytesLimit(112) - .setHardPendingCompactionBytesLimit(280) - .setLevel0FileNumCompactionTrigger(200) - .setLevel0SlowdownWritesTrigger(312) - .setLevel0StopWritesTrigger(584) - .setMaxCompactionBytes(12) - .setTargetFileSizeBase(99) - .setTargetFileSizeMultiplier(112); - db.setOptions(mutableColumnFamilyOptions.build()); - - // Check the getOptions() brings back the creation options for CF1 - final MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder builder1 = db.getOptions(); - assertThat(builder1.enableBlobFiles()).isEqualTo(true); - assertThat(builder1.blobGarbageCollectionAgeCutoff()).isEqualTo(0.25); - assertThat(builder1.blobGarbageCollectionForceThreshold()).isEqualTo(0.80); - assertThat(builder1.blobCompactionReadaheadSize()).isEqualTo(131072); - assertThat(builder1.blobFileStartingLevel()).isEqualTo(4); - assertThat(builder1.minBlobSize()).isEqualTo(minBlobSize); - assertThat(builder1.arenaBlockSize()).isEqualTo(42); - assertThat(builder1.memtablePrefixBloomSizeRatio()).isEqualTo(0.17); - assertThat(builder1.experimentalMempurgeThreshold()).isEqualTo(0.005); - assertThat(builder1.memtableWholeKeyFiltering()).isEqualTo(false); - assertThat(builder1.memtableHugePageSize()).isEqualTo(3); - assertThat(builder1.maxSuccessiveMerges()).isEqualTo(4); - assertThat(builder1.maxWriteBufferNumber()).isEqualTo(12); - assertThat(builder1.inplaceUpdateNumLocks()).isEqualTo(16); - assertThat(builder1.disableAutoCompactions()).isEqualTo(false); - assertThat(builder1.softPendingCompactionBytesLimit()).isEqualTo(112); - assertThat(builder1.hardPendingCompactionBytesLimit()).isEqualTo(280); - assertThat(builder1.level0FileNumCompactionTrigger()).isEqualTo(200); - assertThat(builder1.level0SlowdownWritesTrigger()).isEqualTo(312); - assertThat(builder1.level0StopWritesTrigger()).isEqualTo(584); - assertThat(builder1.maxCompactionBytes()).isEqualTo(12); - assertThat(builder1.targetFileSizeBase()).isEqualTo(99); - assertThat(builder1.targetFileSizeMultiplier()).isEqualTo(112); - } - } - - @Test - public void testGetMutableDBOptionsAfterSet() throws RocksDBException { - final ColumnFamilyOptions columnFamilyOptions0 = new ColumnFamilyOptions(); - final ColumnFamilyDescriptor columnFamilyDescriptor0 = - new ColumnFamilyDescriptor("default".getBytes(UTF_8), columnFamilyOptions0); - final List columnFamilyDescriptors = - Collections.singletonList(columnFamilyDescriptor0); - final List columnFamilyHandles = new ArrayList<>(); - - try (final DBOptions dbOptions = new DBOptions().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(dbOptions, dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, columnFamilyHandles)) { - final MutableDBOptions.MutableDBOptionsBuilder mutableDBOptions = - MutableDBOptions.builder() - .setMaxBackgroundJobs(16) - .setAvoidFlushDuringShutdown(true) - .setWritableFileMaxBufferSize(2097152) - .setDelayedWriteRate(67108864) - .setMaxTotalWalSize(16777216) - .setDeleteObsoleteFilesPeriodMicros(86400000000L) - .setStatsDumpPeriodSec(1200) - .setStatsPersistPeriodSec(7200) - .setStatsHistoryBufferSize(6291456) - .setMaxOpenFiles(8) - .setBytesPerSync(4194304) - .setWalBytesPerSync(1048576) - .setStrictBytesPerSync(true) - .setCompactionReadaheadSize(1024); - - db.setDBOptions(mutableDBOptions.build()); - - final MutableDBOptions.MutableDBOptionsBuilder getBuilder = db.getDBOptions(); - assertThat(getBuilder.maxBackgroundJobs()).isEqualTo(16); // 4 - assertThat(getBuilder.avoidFlushDuringShutdown()).isEqualTo(true); // false - assertThat(getBuilder.writableFileMaxBufferSize()).isEqualTo(2097152); // 1048576 - assertThat(getBuilder.delayedWriteRate()).isEqualTo(67108864); // 16777216 - assertThat(getBuilder.maxTotalWalSize()).isEqualTo(16777216); - assertThat(getBuilder.deleteObsoleteFilesPeriodMicros()) - .isEqualTo(86400000000L); // 21600000000 - assertThat(getBuilder.statsDumpPeriodSec()).isEqualTo(1200); // 600 - assertThat(getBuilder.statsPersistPeriodSec()).isEqualTo(7200); // 600 - assertThat(getBuilder.statsHistoryBufferSize()).isEqualTo(6291456); // 1048576 - assertThat(getBuilder.maxOpenFiles()).isEqualTo(8); //-1 - assertThat(getBuilder.bytesPerSync()).isEqualTo(4194304); // 1048576 - assertThat(getBuilder.walBytesPerSync()).isEqualTo(1048576); // 0 - assertThat(getBuilder.strictBytesPerSync()).isEqualTo(true); // false - assertThat(getBuilder.compactionReadaheadSize()).isEqualTo(1024); // 0 - } - } -} diff --git a/java/src/test/java/org/rocksdb/NativeComparatorWrapperTest.java b/java/src/test/java/org/rocksdb/NativeComparatorWrapperTest.java deleted file mode 100644 index 970e58c0c..000000000 --- a/java/src/test/java/org/rocksdb/NativeComparatorWrapperTest.java +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.util.*; -import java.util.Comparator; - -import static org.junit.Assert.assertEquals; - -public class NativeComparatorWrapperTest { - static { - RocksDB.loadLibrary(); - } - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - private static final Random random = new Random(); - - @Test - public void rountrip() throws RocksDBException { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - final int ITERATIONS = 1_000; - - final String[] storedKeys = new String[ITERATIONS]; - try (final NativeStringComparatorWrapper comparator = new NativeStringComparatorWrapper(); - final Options opt = new Options() - .setCreateIfMissing(true) - .setComparator(comparator)) { - - // store random integer keys - try (final RocksDB db = RocksDB.open(opt, dbPath)) { - for (int i = 0; i < ITERATIONS; i++) { - final String strKey = randomString(); - final byte key[] = strKey.getBytes(); - // does key already exist (avoid duplicates) - if (i > 0 && db.get(key) != null) { - i--; // generate a different key - } else { - db.put(key, "value".getBytes()); - storedKeys[i] = strKey; - } - } - } - - // sort the stored keys into ascending alpha-numeric order - Arrays.sort(storedKeys, new Comparator() { - @Override - public int compare(final String o1, final String o2) { - return o1.compareTo(o2); - } - }); - - // re-open db and read from start to end - // string keys should be in ascending - // order - try (final RocksDB db = RocksDB.open(opt, dbPath); - final RocksIterator it = db.newIterator()) { - int count = 0; - for (it.seekToFirst(); it.isValid(); it.next()) { - final String strKey = new String(it.key()); - assertEquals(storedKeys[count++], strKey); - } - } - } - } - - private String randomString() { - final char[] chars = new char[12]; - for(int i = 0; i < 12; i++) { - final int letterCode = random.nextInt(24); - final char letter = (char) (((int) 'a') + letterCode); - chars[i] = letter; - } - return String.copyValueOf(chars); - } - - public static class NativeStringComparatorWrapper - extends NativeComparatorWrapper { - - @Override - protected long initializeNative(final long... nativeParameterHandles) { - return newStringComparator(); - } - - private native long newStringComparator(); - } -} diff --git a/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java b/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java deleted file mode 100644 index ab60081a0..000000000 --- a/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.Environment; - -import java.io.File; -import java.io.IOException; -import java.nio.file.*; - -import static org.assertj.core.api.Assertions.assertThat; - -public class NativeLibraryLoaderTest { - - @Rule - public TemporaryFolder temporaryFolder = new TemporaryFolder(); - - @Test - public void tempFolder() throws IOException { - NativeLibraryLoader.getInstance().loadLibraryFromJarToTemp( - temporaryFolder.getRoot().getAbsolutePath()); - final Path path = Paths.get(temporaryFolder.getRoot().getAbsolutePath(), - Environment.getJniLibraryFileName("rocksdb")); - assertThat(Files.exists(path)).isTrue(); - assertThat(Files.isReadable(path)).isTrue(); - } - - @Test - public void overridesExistingLibrary() throws IOException { - File first = NativeLibraryLoader.getInstance().loadLibraryFromJarToTemp( - temporaryFolder.getRoot().getAbsolutePath()); - NativeLibraryLoader.getInstance().loadLibraryFromJarToTemp( - temporaryFolder.getRoot().getAbsolutePath()); - assertThat(first.exists()).isTrue(); - } -} diff --git a/java/src/test/java/org/rocksdb/OptimisticTransactionDBTest.java b/java/src/test/java/org/rocksdb/OptimisticTransactionDBTest.java deleted file mode 100644 index 519b70b1d..000000000 --- a/java/src/test/java/org/rocksdb/OptimisticTransactionDBTest.java +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; - -public class OptimisticTransactionDBTest { - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void open() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final OptimisticTransactionDB otdb = OptimisticTransactionDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - assertThat(otdb).isNotNull(); - } - } - - @Test - public void open_columnFamilies() throws RocksDBException { - try(final DBOptions dbOptions = new DBOptions().setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final ColumnFamilyOptions myCfOpts = new ColumnFamilyOptions()) { - - final List columnFamilyDescriptors = - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("myCf".getBytes(), myCfOpts)); - - final List columnFamilyHandles = new ArrayList<>(); - - try (final OptimisticTransactionDB otdb = OptimisticTransactionDB.open(dbOptions, - dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, columnFamilyHandles)) { - try { - assertThat(otdb).isNotNull(); - } finally { - for (final ColumnFamilyHandle handle : columnFamilyHandles) { - handle.close(); - } - } - } - } - } - - @Test - public void beginTransaction() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final OptimisticTransactionDB otdb = OptimisticTransactionDB.open( - options, dbFolder.getRoot().getAbsolutePath()); - final WriteOptions writeOptions = new WriteOptions()) { - - try(final Transaction txn = otdb.beginTransaction(writeOptions)) { - assertThat(txn).isNotNull(); - } - } - } - - @Test - public void beginTransaction_transactionOptions() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final OptimisticTransactionDB otdb = OptimisticTransactionDB.open( - options, dbFolder.getRoot().getAbsolutePath()); - final WriteOptions writeOptions = new WriteOptions(); - final OptimisticTransactionOptions optimisticTxnOptions = - new OptimisticTransactionOptions()) { - - try(final Transaction txn = otdb.beginTransaction(writeOptions, - optimisticTxnOptions)) { - assertThat(txn).isNotNull(); - } - } - } - - @Test - public void beginTransaction_withOld() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final OptimisticTransactionDB otdb = OptimisticTransactionDB.open( - options, dbFolder.getRoot().getAbsolutePath()); - final WriteOptions writeOptions = new WriteOptions()) { - - try(final Transaction txn = otdb.beginTransaction(writeOptions)) { - final Transaction txnReused = otdb.beginTransaction(writeOptions, txn); - assertThat(txnReused).isSameAs(txn); - } - } - } - - @Test - public void beginTransaction_withOld_transactionOptions() - throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final OptimisticTransactionDB otdb = OptimisticTransactionDB.open( - options, dbFolder.getRoot().getAbsolutePath()); - final WriteOptions writeOptions = new WriteOptions(); - final OptimisticTransactionOptions optimisticTxnOptions = - new OptimisticTransactionOptions()) { - - try(final Transaction txn = otdb.beginTransaction(writeOptions)) { - final Transaction txnReused = otdb.beginTransaction(writeOptions, - optimisticTxnOptions, txn); - assertThat(txnReused).isSameAs(txn); - } - } - } - - @Test - public void baseDB() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final OptimisticTransactionDB otdb = OptimisticTransactionDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - assertThat(otdb).isNotNull(); - final RocksDB db = otdb.getBaseDB(); - assertThat(db).isNotNull(); - assertThat(db.isOwningHandle()).isFalse(); - } - } -} diff --git a/java/src/test/java/org/rocksdb/OptimisticTransactionOptionsTest.java b/java/src/test/java/org/rocksdb/OptimisticTransactionOptionsTest.java deleted file mode 100644 index ef656b958..000000000 --- a/java/src/test/java/org/rocksdb/OptimisticTransactionOptionsTest.java +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; -import org.rocksdb.util.BytewiseComparator; - -import java.util.Random; - -import static org.assertj.core.api.Assertions.assertThat; - -public class OptimisticTransactionOptionsTest { - - private static final Random rand = PlatformRandomHelper. - getPlatformSpecificRandomFactory(); - - @Test - public void setSnapshot() { - try (final OptimisticTransactionOptions opt = new OptimisticTransactionOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setSetSnapshot(boolValue); - assertThat(opt.isSetSnapshot()).isEqualTo(boolValue); - } - } - - @Test - public void comparator() { - try (final OptimisticTransactionOptions opt = new OptimisticTransactionOptions(); - final ComparatorOptions copt = new ComparatorOptions() - .setUseDirectBuffer(true); - final AbstractComparator comparator = new BytewiseComparator(copt)) { - opt.setComparator(comparator); - } - } -} diff --git a/java/src/test/java/org/rocksdb/OptimisticTransactionTest.java b/java/src/test/java/org/rocksdb/OptimisticTransactionTest.java deleted file mode 100644 index d2f92e1ff..000000000 --- a/java/src/test/java/org/rocksdb/OptimisticTransactionTest.java +++ /dev/null @@ -1,446 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.*; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.junit.Test; - -public class OptimisticTransactionTest extends AbstractTransactionTest { - @Test - public void prepare_commit() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - final byte[] v12 = "value12".getBytes(UTF_8); - - try (final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - try (final Transaction txn = dbContainer.beginTransaction()) { - txn.put(k1, v1); - txn.commit(); - } - - try (final Transaction txn = dbContainer.beginTransaction()) { - txn.put(k1, v12); - txn.prepare(); - - failBecauseExceptionWasNotThrown(RocksDBException.class); - } catch (final RocksDBException e) { - assertThat(e.getMessage()) - .contains("Two phase commit not supported for optimistic transactions"); - } - } - } - - @Test - public void getForUpdate_cf_conflict() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - final byte[] v12 = "value12".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - - try(final Transaction txn = dbContainer.beginTransaction()) { - txn.put(testCf, k1, v1); - assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); - txn.commit(); - } - - try(final Transaction txn2 = dbContainer.beginTransaction()) { - try(final Transaction txn3 = dbContainer.beginTransaction()) { - assertThat(txn3.getForUpdate(readOptions, testCf, k1, true)).isEqualTo(v1); - - // NOTE: txn2 updates k1, during txn3 - txn2.put(testCf, k1, v12); - assertThat(txn2.get(testCf, readOptions, k1)).isEqualTo(v12); - txn2.commit(); - - try { - txn3.commit(); // should cause an exception! - } catch(final RocksDBException e) { - assertThat(e.getStatus().getCode()).isSameAs(Status.Code.Busy); - return; - } - } - } - - fail("Expected an exception for put after getForUpdate from conflicting" + - "transactions"); - } - } - - @Test - public void getForUpdate_conflict() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - final byte[] v12 = "value12".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - - try(final Transaction txn = dbContainer.beginTransaction()) { - txn.put(k1, v1); - assertThat(txn.get(readOptions, k1)).isEqualTo(v1); - txn.commit(); - } - - try(final Transaction txn2 = dbContainer.beginTransaction()) { - try(final Transaction txn3 = dbContainer.beginTransaction()) { - assertThat(txn3.getForUpdate(readOptions, k1, true)).isEqualTo(v1); - - // NOTE: txn2 updates k1, during txn3 - txn2.put(k1, v12); - assertThat(txn2.get(readOptions, k1)).isEqualTo(v12); - txn2.commit(); - - try { - txn3.commit(); // should cause an exception! - } catch(final RocksDBException e) { - assertThat(e.getStatus().getCode()).isSameAs(Status.Code.Busy); - return; - } - } - } - - fail("Expected an exception for put after getForUpdate from conflicting" + - "transactions"); - } - } - - @Deprecated - @Test - public void multiGetForUpdate_cf_conflict() throws RocksDBException { - final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; - final byte[][] values = new byte[][] {"value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; - final byte[] otherValue = "otherValue".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - final List cfList = Arrays.asList(testCf, testCf); - - try(final Transaction txn = dbContainer.beginTransaction()) { - txn.put(testCf, keys[0], values[0]); - txn.put(testCf, keys[1], values[1]); - assertThat(txn.multiGet(readOptions, cfList, keys)).isEqualTo(values); - txn.commit(); - } - - try(final Transaction txn2 = dbContainer.beginTransaction()) { - try(final Transaction txn3 = dbContainer.beginTransaction()) { - assertThat(txn3.multiGetForUpdate(readOptions, cfList, keys)) - .isEqualTo(values); - - // NOTE: txn2 updates k1, during txn3 - txn2.put(testCf, keys[0], otherValue); - assertThat(txn2.get(testCf, readOptions, keys[0])) - .isEqualTo(otherValue); - txn2.commit(); - - try { - txn3.commit(); // should cause an exception! - } catch(final RocksDBException e) { - assertThat(e.getStatus().getCode()).isSameAs(Status.Code.Busy); - return; - } - } - } - - fail("Expected an exception for put after getForUpdate from conflicting" + - "transactions"); - } - } - - @Test - public void multiGetAsListForUpdate_cf_conflict() throws RocksDBException { - final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; - final byte[][] values = new byte[][] {"value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; - final byte[] otherValue = "otherValue".getBytes(UTF_8); - - try (final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - final List cfList = Arrays.asList(testCf, testCf); - - try (final Transaction txn = dbContainer.beginTransaction()) { - txn.put(testCf, keys[0], values[0]); - txn.put(testCf, keys[1], values[1]); - assertThat(txn.multiGetAsList(readOptions, cfList, Arrays.asList(keys))) - .containsExactly(values); - txn.commit(); - } - - try (final Transaction txn2 = dbContainer.beginTransaction()) { - try (final Transaction txn3 = dbContainer.beginTransaction()) { - assertThat(txn3.multiGetForUpdateAsList(readOptions, cfList, Arrays.asList(keys))) - .containsExactly(values); - - // NOTE: txn2 updates k1, during txn3 - txn2.put(testCf, keys[0], otherValue); - assertThat(txn2.get(testCf, readOptions, keys[0])).isEqualTo(otherValue); - txn2.commit(); - - try { - txn3.commit(); // should cause an exception! - } catch (final RocksDBException e) { - assertThat(e.getStatus().getCode()).isSameAs(Status.Code.Busy); - return; - } - } - } - - fail("Expected an exception for put after getForUpdate from conflicting" - + "transactions"); - } - } - - @Deprecated - @Test - public void multiGetForUpdate_conflict() throws RocksDBException { - final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; - final byte[][] values = new byte[][] {"value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; - final byte[] otherValue = "otherValue".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - try(final Transaction txn = dbContainer.beginTransaction()) { - txn.put(keys[0], values[0]); - txn.put(keys[1], values[1]); - assertThat(txn.multiGet(readOptions, keys)).isEqualTo(values); - txn.commit(); - } - - try(final Transaction txn2 = dbContainer.beginTransaction()) { - try(final Transaction txn3 = dbContainer.beginTransaction()) { - assertThat(txn3.multiGetForUpdate(readOptions, keys)) - .isEqualTo(values); - - // NOTE: txn2 updates k1, during txn3 - txn2.put(keys[0], otherValue); - assertThat(txn2.get(readOptions, keys[0])) - .isEqualTo(otherValue); - txn2.commit(); - - try { - txn3.commit(); // should cause an exception! - } catch(final RocksDBException e) { - assertThat(e.getStatus().getCode()).isSameAs(Status.Code.Busy); - return; - } - } - } - - fail("Expected an exception for put after getForUpdate from conflicting" + - "transactions"); - } - } - - @Test - public void multiGetasListForUpdate_conflict() throws RocksDBException { - final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; - final byte[][] values = new byte[][] {"value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; - final byte[] otherValue = "otherValue".getBytes(UTF_8); - - try (final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - try (final Transaction txn = dbContainer.beginTransaction()) { - txn.put(keys[0], values[0]); - txn.put(keys[1], values[1]); - assertThat(txn.multiGetAsList(readOptions, Arrays.asList(keys))).containsExactly(values); - txn.commit(); - } - - try (final Transaction txn2 = dbContainer.beginTransaction()) { - try (final Transaction txn3 = dbContainer.beginTransaction()) { - assertThat(txn3.multiGetForUpdateAsList(readOptions, Arrays.asList(keys))) - .containsExactly(values); - - // NOTE: txn2 updates k1, during txn3 - txn2.put(keys[0], otherValue); - assertThat(txn2.get(readOptions, keys[0])).isEqualTo(otherValue); - txn2.commit(); - - try { - txn3.commit(); // should cause an exception! - } catch (final RocksDBException e) { - assertThat(e.getStatus().getCode()).isSameAs(Status.Code.Busy); - return; - } - } - } - - fail("Expected an exception for put after getForUpdate from conflicting" - + "transactions"); - } - } - - @Test - public void undoGetForUpdate_cf_conflict() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - final byte[] v12 = "value12".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - - try(final Transaction txn = dbContainer.beginTransaction()) { - txn.put(testCf, k1, v1); - assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); - txn.commit(); - } - - try(final Transaction txn2 = dbContainer.beginTransaction()) { - try(final Transaction txn3 = dbContainer.beginTransaction()) { - assertThat(txn3.getForUpdate(readOptions, testCf, k1, true)).isEqualTo(v1); - - // undo the getForUpdate - txn3.undoGetForUpdate(testCf, k1); - - // NOTE: txn2 updates k1, during txn3 - txn2.put(testCf, k1, v12); - assertThat(txn2.get(testCf, readOptions, k1)).isEqualTo(v12); - txn2.commit(); - - // should not cause an exception - // because we undid the getForUpdate above! - txn3.commit(); - } - } - } - } - - @Test - public void undoGetForUpdate_conflict() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - final byte[] v12 = "value12".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - - try(final Transaction txn = dbContainer.beginTransaction()) { - txn.put(k1, v1); - assertThat(txn.get(readOptions, k1)).isEqualTo(v1); - txn.commit(); - } - - try(final Transaction txn2 = dbContainer.beginTransaction()) { - try(final Transaction txn3 = dbContainer.beginTransaction()) { - assertThat(txn3.getForUpdate(readOptions, k1, true)).isEqualTo(v1); - - // undo the getForUpdate - txn3.undoGetForUpdate(k1); - - // NOTE: txn2 updates k1, during txn3 - txn2.put(k1, v12); - assertThat(txn2.get(readOptions, k1)).isEqualTo(v12); - txn2.commit(); - - // should not cause an exception - // because we undid the getForUpdate above! - txn3.commit(); - } - } - } - } - - @Test - public void name() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.getName()).isEmpty(); - final String name = "my-transaction-" + rand.nextLong(); - - try { - txn.setName(name); - fail("Optimistic transactions cannot be named."); - } catch(final RocksDBException e) { - assertThat(e.getStatus().getCode()).isEqualTo(Status.Code.InvalidArgument); - } - } - } - - @Override - public OptimisticTransactionDBContainer startDb() - throws RocksDBException { - final DBOptions options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - - final ColumnFamilyOptions columnFamilyOptions = new ColumnFamilyOptions(); - final List columnFamilyDescriptors = - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor(TXN_TEST_COLUMN_FAMILY, - columnFamilyOptions)); - final List columnFamilyHandles = new ArrayList<>(); - - final OptimisticTransactionDB optimisticTxnDb; - try { - optimisticTxnDb = OptimisticTransactionDB.open( - options, dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, columnFamilyHandles); - } catch(final RocksDBException e) { - columnFamilyOptions.close(); - options.close(); - throw e; - } - - final WriteOptions writeOptions = new WriteOptions(); - final OptimisticTransactionOptions optimisticTxnOptions = - new OptimisticTransactionOptions(); - - return new OptimisticTransactionDBContainer(optimisticTxnOptions, - writeOptions, columnFamilyHandles, optimisticTxnDb, columnFamilyOptions, - options); - } - - private static class OptimisticTransactionDBContainer - extends DBContainer { - - private final OptimisticTransactionOptions optimisticTxnOptions; - private final OptimisticTransactionDB optimisticTxnDb; - - public OptimisticTransactionDBContainer( - final OptimisticTransactionOptions optimisticTxnOptions, - final WriteOptions writeOptions, - final List columnFamilyHandles, - final OptimisticTransactionDB optimisticTxnDb, - final ColumnFamilyOptions columnFamilyOptions, - final DBOptions options) { - super(writeOptions, columnFamilyHandles, columnFamilyOptions, - options); - this.optimisticTxnOptions = optimisticTxnOptions; - this.optimisticTxnDb = optimisticTxnDb; - } - - @Override - public Transaction beginTransaction() { - return optimisticTxnDb.beginTransaction(writeOptions, - optimisticTxnOptions); - } - - @Override - public Transaction beginTransaction(final WriteOptions writeOptions) { - return optimisticTxnDb.beginTransaction(writeOptions, - optimisticTxnOptions); - } - - @Override - public void close() { - optimisticTxnOptions.close(); - writeOptions.close(); - for(final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandles) { - columnFamilyHandle.close(); - } - optimisticTxnDb.close(); - options.close(); - } - } -} diff --git a/java/src/test/java/org/rocksdb/OptionsTest.java b/java/src/test/java/org/rocksdb/OptionsTest.java deleted file mode 100644 index 129f1c39a..000000000 --- a/java/src/test/java/org/rocksdb/OptionsTest.java +++ /dev/null @@ -1,1492 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.Assert.*; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.*; -import java.util.concurrent.atomic.AtomicBoolean; -import org.junit.ClassRule; -import org.junit.Test; -import org.rocksdb.test.RemoveEmptyValueCompactionFilterFactory; - -public class OptionsTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - public static final Random rand = PlatformRandomHelper. - getPlatformSpecificRandomFactory(); - - @Test - public void copyConstructor() { - Options origOpts = new Options(); - origOpts.setNumLevels(rand.nextInt(8)); - origOpts.setTargetFileSizeMultiplier(rand.nextInt(100)); - origOpts.setLevel0StopWritesTrigger(rand.nextInt(50)); - Options copyOpts = new Options(origOpts); - assertThat(origOpts.numLevels()).isEqualTo(copyOpts.numLevels()); - assertThat(origOpts.targetFileSizeMultiplier()).isEqualTo(copyOpts.targetFileSizeMultiplier()); - assertThat(origOpts.level0StopWritesTrigger()).isEqualTo(copyOpts.level0StopWritesTrigger()); - } - - @Test - public void setIncreaseParallelism() { - try (final Options opt = new Options()) { - final int threads = Runtime.getRuntime().availableProcessors() * 2; - opt.setIncreaseParallelism(threads); - } - } - - @Test - public void writeBufferSize() throws RocksDBException { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setWriteBufferSize(longValue); - assertThat(opt.writeBufferSize()).isEqualTo(longValue); - } - } - - @Test - public void maxWriteBufferNumber() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setMaxWriteBufferNumber(intValue); - assertThat(opt.maxWriteBufferNumber()).isEqualTo(intValue); - } - } - - @Test - public void minWriteBufferNumberToMerge() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setMinWriteBufferNumberToMerge(intValue); - assertThat(opt.minWriteBufferNumberToMerge()).isEqualTo(intValue); - } - } - - @Test - public void numLevels() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setNumLevels(intValue); - assertThat(opt.numLevels()).isEqualTo(intValue); - } - } - - @Test - public void levelZeroFileNumCompactionTrigger() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setLevelZeroFileNumCompactionTrigger(intValue); - assertThat(opt.levelZeroFileNumCompactionTrigger()).isEqualTo(intValue); - } - } - - @Test - public void levelZeroSlowdownWritesTrigger() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setLevelZeroSlowdownWritesTrigger(intValue); - assertThat(opt.levelZeroSlowdownWritesTrigger()).isEqualTo(intValue); - } - } - - @Test - public void levelZeroStopWritesTrigger() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setLevelZeroStopWritesTrigger(intValue); - assertThat(opt.levelZeroStopWritesTrigger()).isEqualTo(intValue); - } - } - - @Test - public void targetFileSizeBase() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setTargetFileSizeBase(longValue); - assertThat(opt.targetFileSizeBase()).isEqualTo(longValue); - } - } - - @Test - public void targetFileSizeMultiplier() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setTargetFileSizeMultiplier(intValue); - assertThat(opt.targetFileSizeMultiplier()).isEqualTo(intValue); - } - } - - @Test - public void maxBytesForLevelBase() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setMaxBytesForLevelBase(longValue); - assertThat(opt.maxBytesForLevelBase()).isEqualTo(longValue); - } - } - - @Test - public void levelCompactionDynamicLevelBytes() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setLevelCompactionDynamicLevelBytes(boolValue); - assertThat(opt.levelCompactionDynamicLevelBytes()) - .isEqualTo(boolValue); - } - } - - @Test - public void maxBytesForLevelMultiplier() { - try (final Options opt = new Options()) { - final double doubleValue = rand.nextDouble(); - opt.setMaxBytesForLevelMultiplier(doubleValue); - assertThat(opt.maxBytesForLevelMultiplier()).isEqualTo(doubleValue); - } - } - - @Test - public void maxBytesForLevelMultiplierAdditional() { - try (final Options opt = new Options()) { - final int intValue1 = rand.nextInt(); - final int intValue2 = rand.nextInt(); - final int[] ints = new int[]{intValue1, intValue2}; - opt.setMaxBytesForLevelMultiplierAdditional(ints); - assertThat(opt.maxBytesForLevelMultiplierAdditional()).isEqualTo(ints); - } - } - - @Test - public void maxCompactionBytes() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setMaxCompactionBytes(longValue); - assertThat(opt.maxCompactionBytes()).isEqualTo(longValue); - } - } - - @Test - public void softPendingCompactionBytesLimit() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setSoftPendingCompactionBytesLimit(longValue); - assertThat(opt.softPendingCompactionBytesLimit()).isEqualTo(longValue); - } - } - - @Test - public void hardPendingCompactionBytesLimit() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setHardPendingCompactionBytesLimit(longValue); - assertThat(opt.hardPendingCompactionBytesLimit()).isEqualTo(longValue); - } - } - - @Test - public void level0FileNumCompactionTrigger() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setLevel0FileNumCompactionTrigger(intValue); - assertThat(opt.level0FileNumCompactionTrigger()).isEqualTo(intValue); - } - } - - @Test - public void level0SlowdownWritesTrigger() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setLevel0SlowdownWritesTrigger(intValue); - assertThat(opt.level0SlowdownWritesTrigger()).isEqualTo(intValue); - } - } - - @Test - public void level0StopWritesTrigger() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setLevel0StopWritesTrigger(intValue); - assertThat(opt.level0StopWritesTrigger()).isEqualTo(intValue); - } - } - - @Test - public void arenaBlockSize() throws RocksDBException { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setArenaBlockSize(longValue); - assertThat(opt.arenaBlockSize()).isEqualTo(longValue); - } - } - - @Test - public void disableAutoCompactions() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setDisableAutoCompactions(boolValue); - assertThat(opt.disableAutoCompactions()).isEqualTo(boolValue); - } - } - - @Test - public void maxSequentialSkipInIterations() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setMaxSequentialSkipInIterations(longValue); - assertThat(opt.maxSequentialSkipInIterations()).isEqualTo(longValue); - } - } - - @Test - public void inplaceUpdateSupport() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setInplaceUpdateSupport(boolValue); - assertThat(opt.inplaceUpdateSupport()).isEqualTo(boolValue); - } - } - - @Test - public void inplaceUpdateNumLocks() throws RocksDBException { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setInplaceUpdateNumLocks(longValue); - assertThat(opt.inplaceUpdateNumLocks()).isEqualTo(longValue); - } - } - - @Test - public void memtablePrefixBloomSizeRatio() { - try (final Options opt = new Options()) { - final double doubleValue = rand.nextDouble(); - opt.setMemtablePrefixBloomSizeRatio(doubleValue); - assertThat(opt.memtablePrefixBloomSizeRatio()).isEqualTo(doubleValue); - } - } - - @Test - public void experimentalMempurgeThreshold() { - try (final Options opt = new Options()) { - final double doubleValue = rand.nextDouble(); - opt.setExperimentalMempurgeThreshold(doubleValue); - assertThat(opt.experimentalMempurgeThreshold()).isEqualTo(doubleValue); - } - } - - @Test - public void memtableWholeKeyFiltering() { - try (final Options opt = new Options()) { - final boolean booleanValue = rand.nextBoolean(); - opt.setMemtableWholeKeyFiltering(booleanValue); - assertThat(opt.memtableWholeKeyFiltering()).isEqualTo(booleanValue); - } - } - - @Test - public void memtableHugePageSize() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setMemtableHugePageSize(longValue); - assertThat(opt.memtableHugePageSize()).isEqualTo(longValue); - } - } - - @Test - public void bloomLocality() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setBloomLocality(intValue); - assertThat(opt.bloomLocality()).isEqualTo(intValue); - } - } - - @Test - public void maxSuccessiveMerges() throws RocksDBException { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setMaxSuccessiveMerges(longValue); - assertThat(opt.maxSuccessiveMerges()).isEqualTo(longValue); - } - } - - @Test - public void optimizeFiltersForHits() { - try (final Options opt = new Options()) { - final boolean aBoolean = rand.nextBoolean(); - opt.setOptimizeFiltersForHits(aBoolean); - assertThat(opt.optimizeFiltersForHits()).isEqualTo(aBoolean); - } - } - - @Test - public void createIfMissing() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setCreateIfMissing(boolValue); - assertThat(opt.createIfMissing()). - isEqualTo(boolValue); - } - } - - @Test - public void createMissingColumnFamilies() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setCreateMissingColumnFamilies(boolValue); - assertThat(opt.createMissingColumnFamilies()). - isEqualTo(boolValue); - } - } - - @Test - public void errorIfExists() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setErrorIfExists(boolValue); - assertThat(opt.errorIfExists()).isEqualTo(boolValue); - } - } - - @Test - public void paranoidChecks() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setParanoidChecks(boolValue); - assertThat(opt.paranoidChecks()). - isEqualTo(boolValue); - } - } - - @Test - public void maxTotalWalSize() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setMaxTotalWalSize(longValue); - assertThat(opt.maxTotalWalSize()). - isEqualTo(longValue); - } - } - - @Test - public void maxOpenFiles() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setMaxOpenFiles(intValue); - assertThat(opt.maxOpenFiles()).isEqualTo(intValue); - } - } - - @Test - public void maxFileOpeningThreads() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setMaxFileOpeningThreads(intValue); - assertThat(opt.maxFileOpeningThreads()).isEqualTo(intValue); - } - } - - @Test - public void useFsync() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setUseFsync(boolValue); - assertThat(opt.useFsync()).isEqualTo(boolValue); - } - } - - @Test - public void dbPaths() { - final List dbPaths = new ArrayList<>(); - dbPaths.add(new DbPath(Paths.get("/a"), 10)); - dbPaths.add(new DbPath(Paths.get("/b"), 100)); - dbPaths.add(new DbPath(Paths.get("/c"), 1000)); - - try (final Options opt = new Options()) { - assertThat(opt.dbPaths()).isEqualTo(Collections.emptyList()); - - opt.setDbPaths(dbPaths); - - assertThat(opt.dbPaths()).isEqualTo(dbPaths); - } - } - - @Test - public void dbLogDir() { - try (final Options opt = new Options()) { - final String str = "path/to/DbLogDir"; - opt.setDbLogDir(str); - assertThat(opt.dbLogDir()).isEqualTo(str); - } - } - - @Test - public void walDir() { - try (final Options opt = new Options()) { - final String str = "path/to/WalDir"; - opt.setWalDir(str); - assertThat(opt.walDir()).isEqualTo(str); - } - } - - @Test - public void deleteObsoleteFilesPeriodMicros() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setDeleteObsoleteFilesPeriodMicros(longValue); - assertThat(opt.deleteObsoleteFilesPeriodMicros()). - isEqualTo(longValue); - } - } - - @SuppressWarnings("deprecated") - @Test - public void maxBackgroundCompactions() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setMaxBackgroundCompactions(intValue); - assertThat(opt.maxBackgroundCompactions()). - isEqualTo(intValue); - } - } - - @Test - public void maxSubcompactions() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setMaxSubcompactions(intValue); - assertThat(opt.maxSubcompactions()). - isEqualTo(intValue); - } - } - - @SuppressWarnings("deprecated") - @Test - public void maxBackgroundFlushes() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setMaxBackgroundFlushes(intValue); - assertThat(opt.maxBackgroundFlushes()). - isEqualTo(intValue); - } - } - - @Test - public void maxBackgroundJobs() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setMaxBackgroundJobs(intValue); - assertThat(opt.maxBackgroundJobs()).isEqualTo(intValue); - } - } - - @Test - public void maxLogFileSize() throws RocksDBException { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setMaxLogFileSize(longValue); - assertThat(opt.maxLogFileSize()).isEqualTo(longValue); - } - } - - @Test - public void logFileTimeToRoll() throws RocksDBException { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setLogFileTimeToRoll(longValue); - assertThat(opt.logFileTimeToRoll()). - isEqualTo(longValue); - } - } - - @Test - public void keepLogFileNum() throws RocksDBException { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setKeepLogFileNum(longValue); - assertThat(opt.keepLogFileNum()).isEqualTo(longValue); - } - } - - @Test - public void recycleLogFileNum() throws RocksDBException { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setRecycleLogFileNum(longValue); - assertThat(opt.recycleLogFileNum()).isEqualTo(longValue); - } - } - - @Test - public void maxManifestFileSize() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setMaxManifestFileSize(longValue); - assertThat(opt.maxManifestFileSize()). - isEqualTo(longValue); - } - } - - @Test - public void tableCacheNumshardbits() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setTableCacheNumshardbits(intValue); - assertThat(opt.tableCacheNumshardbits()). - isEqualTo(intValue); - } - } - - @Test - public void walSizeLimitMB() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setWalSizeLimitMB(longValue); - assertThat(opt.walSizeLimitMB()).isEqualTo(longValue); - } - } - - @Test - public void walTtlSeconds() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setWalTtlSeconds(longValue); - assertThat(opt.walTtlSeconds()).isEqualTo(longValue); - } - } - - @Test - public void manifestPreallocationSize() throws RocksDBException { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setManifestPreallocationSize(longValue); - assertThat(opt.manifestPreallocationSize()). - isEqualTo(longValue); - } - } - - @Test - public void useDirectReads() { - try(final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setUseDirectReads(boolValue); - assertThat(opt.useDirectReads()).isEqualTo(boolValue); - } - } - - @Test - public void useDirectIoForFlushAndCompaction() { - try(final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setUseDirectIoForFlushAndCompaction(boolValue); - assertThat(opt.useDirectIoForFlushAndCompaction()).isEqualTo(boolValue); - } - } - - @Test - public void allowFAllocate() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAllowFAllocate(boolValue); - assertThat(opt.allowFAllocate()).isEqualTo(boolValue); - } - } - - @Test - public void allowMmapReads() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAllowMmapReads(boolValue); - assertThat(opt.allowMmapReads()).isEqualTo(boolValue); - } - } - - @Test - public void allowMmapWrites() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAllowMmapWrites(boolValue); - assertThat(opt.allowMmapWrites()).isEqualTo(boolValue); - } - } - - @Test - public void isFdCloseOnExec() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setIsFdCloseOnExec(boolValue); - assertThat(opt.isFdCloseOnExec()).isEqualTo(boolValue); - } - } - - @Test - public void statsDumpPeriodSec() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setStatsDumpPeriodSec(intValue); - assertThat(opt.statsDumpPeriodSec()).isEqualTo(intValue); - } - } - - @Test - public void statsPersistPeriodSec() { - try (final Options opt = new Options()) { - final int intValue = rand.nextInt(); - opt.setStatsPersistPeriodSec(intValue); - assertThat(opt.statsPersistPeriodSec()).isEqualTo(intValue); - } - } - - @Test - public void statsHistoryBufferSize() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setStatsHistoryBufferSize(longValue); - assertThat(opt.statsHistoryBufferSize()).isEqualTo(longValue); - } - } - - @Test - public void adviseRandomOnOpen() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAdviseRandomOnOpen(boolValue); - assertThat(opt.adviseRandomOnOpen()).isEqualTo(boolValue); - } - } - - @Test - public void dbWriteBufferSize() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setDbWriteBufferSize(longValue); - assertThat(opt.dbWriteBufferSize()).isEqualTo(longValue); - } - } - - @Test - public void setWriteBufferManager() throws RocksDBException { - try (final Options opt = new Options(); - final Cache cache = new LRUCache(1 * 1024 * 1024); - final WriteBufferManager writeBufferManager = new WriteBufferManager(2000l, cache)) { - opt.setWriteBufferManager(writeBufferManager); - assertThat(opt.writeBufferManager()).isEqualTo(writeBufferManager); - } - } - - @Test - public void setWriteBufferManagerWithZeroBufferSize() throws RocksDBException { - try (final Options opt = new Options(); - final Cache cache = new LRUCache(1 * 1024 * 1024); - final WriteBufferManager writeBufferManager = new WriteBufferManager(0l, cache)) { - opt.setWriteBufferManager(writeBufferManager); - assertThat(opt.writeBufferManager()).isEqualTo(writeBufferManager); - } - } - - @Test - public void setWriteBufferManagerWithAllowStall() throws RocksDBException { - try (final Options opt = new Options(); final Cache cache = new LRUCache(1 * 1024 * 1024); - final WriteBufferManager writeBufferManager = new WriteBufferManager(2000l, cache, true)) { - opt.setWriteBufferManager(writeBufferManager); - assertThat(opt.writeBufferManager()).isEqualTo(writeBufferManager); - assertThat(opt.writeBufferManager().allowStall()).isEqualTo(true); - } - } - - @Test - public void accessHintOnCompactionStart() { - try (final Options opt = new Options()) { - final AccessHint accessHint = AccessHint.SEQUENTIAL; - opt.setAccessHintOnCompactionStart(accessHint); - assertThat(opt.accessHintOnCompactionStart()).isEqualTo(accessHint); - } - } - - @Test - public void compactionReadaheadSize() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setCompactionReadaheadSize(longValue); - assertThat(opt.compactionReadaheadSize()).isEqualTo(longValue); - } - } - - @Test - public void randomAccessMaxBufferSize() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setRandomAccessMaxBufferSize(longValue); - assertThat(opt.randomAccessMaxBufferSize()).isEqualTo(longValue); - } - } - - @Test - public void writableFileMaxBufferSize() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setWritableFileMaxBufferSize(longValue); - assertThat(opt.writableFileMaxBufferSize()).isEqualTo(longValue); - } - } - - @Test - public void useAdaptiveMutex() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setUseAdaptiveMutex(boolValue); - assertThat(opt.useAdaptiveMutex()).isEqualTo(boolValue); - } - } - - @Test - public void bytesPerSync() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setBytesPerSync(longValue); - assertThat(opt.bytesPerSync()).isEqualTo(longValue); - } - } - - @Test - public void walBytesPerSync() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setWalBytesPerSync(longValue); - assertThat(opt.walBytesPerSync()).isEqualTo(longValue); - } - } - - @Test - public void strictBytesPerSync() { - try (final Options opt = new Options()) { - assertThat(opt.strictBytesPerSync()).isFalse(); - opt.setStrictBytesPerSync(true); - assertThat(opt.strictBytesPerSync()).isTrue(); - } - } - - @Test - public void enableThreadTracking() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setEnableThreadTracking(boolValue); - assertThat(opt.enableThreadTracking()).isEqualTo(boolValue); - } - } - - @Test - public void delayedWriteRate() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setDelayedWriteRate(longValue); - assertThat(opt.delayedWriteRate()).isEqualTo(longValue); - } - } - - @Test - public void enablePipelinedWrite() { - try(final Options opt = new Options()) { - assertThat(opt.enablePipelinedWrite()).isFalse(); - opt.setEnablePipelinedWrite(true); - assertThat(opt.enablePipelinedWrite()).isTrue(); - } - } - - @Test - public void unordredWrite() { - try(final Options opt = new Options()) { - assertThat(opt.unorderedWrite()).isFalse(); - opt.setUnorderedWrite(true); - assertThat(opt.unorderedWrite()).isTrue(); - } - } - - @Test - public void allowConcurrentMemtableWrite() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAllowConcurrentMemtableWrite(boolValue); - assertThat(opt.allowConcurrentMemtableWrite()).isEqualTo(boolValue); - } - } - - @Test - public void enableWriteThreadAdaptiveYield() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setEnableWriteThreadAdaptiveYield(boolValue); - assertThat(opt.enableWriteThreadAdaptiveYield()).isEqualTo(boolValue); - } - } - - @Test - public void writeThreadMaxYieldUsec() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setWriteThreadMaxYieldUsec(longValue); - assertThat(opt.writeThreadMaxYieldUsec()).isEqualTo(longValue); - } - } - - @Test - public void writeThreadSlowYieldUsec() { - try (final Options opt = new Options()) { - final long longValue = rand.nextLong(); - opt.setWriteThreadSlowYieldUsec(longValue); - assertThat(opt.writeThreadSlowYieldUsec()).isEqualTo(longValue); - } - } - - @Test - public void skipStatsUpdateOnDbOpen() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setSkipStatsUpdateOnDbOpen(boolValue); - assertThat(opt.skipStatsUpdateOnDbOpen()).isEqualTo(boolValue); - } - } - - @Test - public void walRecoveryMode() { - try (final Options opt = new Options()) { - for (final WALRecoveryMode walRecoveryMode : WALRecoveryMode.values()) { - opt.setWalRecoveryMode(walRecoveryMode); - assertThat(opt.walRecoveryMode()).isEqualTo(walRecoveryMode); - } - } - } - - @Test - public void allow2pc() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAllow2pc(boolValue); - assertThat(opt.allow2pc()).isEqualTo(boolValue); - } - } - - @Test - public void rowCache() { - try (final Options opt = new Options()) { - assertThat(opt.rowCache()).isNull(); - - try(final Cache lruCache = new LRUCache(1000)) { - opt.setRowCache(lruCache); - assertThat(opt.rowCache()).isEqualTo(lruCache); - } - - try(final Cache clockCache = new ClockCache(1000)) { - opt.setRowCache(clockCache); - assertThat(opt.rowCache()).isEqualTo(clockCache); - } - } - } - - @Test - public void walFilter() { - try (final Options opt = new Options()) { - assertThat(opt.walFilter()).isNull(); - - try (final AbstractWalFilter walFilter = new AbstractWalFilter() { - @Override - public void columnFamilyLogNumberMap( - final Map cfLognumber, - final Map cfNameId) { - // no-op - } - - @Override - public LogRecordFoundResult logRecordFound(final long logNumber, - final String logFileName, final WriteBatch batch, - final WriteBatch newBatch) { - return new LogRecordFoundResult( - WalProcessingOption.CONTINUE_PROCESSING, false); - } - - @Override - public String name() { - return "test-wal-filter"; - } - }) { - opt.setWalFilter(walFilter); - assertThat(opt.walFilter()).isEqualTo(walFilter); - } - } - } - - @Test - public void failIfOptionsFileError() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setFailIfOptionsFileError(boolValue); - assertThat(opt.failIfOptionsFileError()).isEqualTo(boolValue); - } - } - - @Test - public void dumpMallocStats() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setDumpMallocStats(boolValue); - assertThat(opt.dumpMallocStats()).isEqualTo(boolValue); - } - } - - @Test - public void avoidFlushDuringRecovery() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAvoidFlushDuringRecovery(boolValue); - assertThat(opt.avoidFlushDuringRecovery()).isEqualTo(boolValue); - } - } - - @Test - public void avoidFlushDuringShutdown() { - try (final Options opt = new Options()) { - final boolean boolValue = rand.nextBoolean(); - opt.setAvoidFlushDuringShutdown(boolValue); - assertThat(opt.avoidFlushDuringShutdown()).isEqualTo(boolValue); - } - } - - - @Test - public void allowIngestBehind() { - try (final Options opt = new Options()) { - assertThat(opt.allowIngestBehind()).isFalse(); - opt.setAllowIngestBehind(true); - assertThat(opt.allowIngestBehind()).isTrue(); - } - } - - @Test - public void twoWriteQueues() { - try (final Options opt = new Options()) { - assertThat(opt.twoWriteQueues()).isFalse(); - opt.setTwoWriteQueues(true); - assertThat(opt.twoWriteQueues()).isTrue(); - } - } - - @Test - public void manualWalFlush() { - try (final Options opt = new Options()) { - assertThat(opt.manualWalFlush()).isFalse(); - opt.setManualWalFlush(true); - assertThat(opt.manualWalFlush()).isTrue(); - } - } - - @Test - public void atomicFlush() { - try (final Options opt = new Options()) { - assertThat(opt.atomicFlush()).isFalse(); - opt.setAtomicFlush(true); - assertThat(opt.atomicFlush()).isTrue(); - } - } - - @Test - public void env() { - try (final Options options = new Options(); - final Env env = Env.getDefault()) { - options.setEnv(env); - assertThat(options.getEnv()).isSameAs(env); - } - } - - @Test - public void linkageOfPrepMethods() { - try (final Options options = new Options()) { - options.optimizeUniversalStyleCompaction(); - options.optimizeUniversalStyleCompaction(4000); - options.optimizeLevelStyleCompaction(); - options.optimizeLevelStyleCompaction(3000); - options.optimizeForPointLookup(10); - options.optimizeForSmallDb(); - options.prepareForBulkLoad(); - } - } - - @Test - public void compressionTypes() { - try (final Options options = new Options()) { - for (final CompressionType compressionType : - CompressionType.values()) { - options.setCompressionType(compressionType); - assertThat(options.compressionType()). - isEqualTo(compressionType); - assertThat(CompressionType.valueOf("NO_COMPRESSION")). - isEqualTo(CompressionType.NO_COMPRESSION); - } - } - } - - @Test - public void prepopulateBlobCache() { - try (final Options options = new Options()) { - for (final PrepopulateBlobCache prepopulateBlobCache : PrepopulateBlobCache.values()) { - options.setPrepopulateBlobCache(prepopulateBlobCache); - assertThat(options.prepopulateBlobCache()).isEqualTo(prepopulateBlobCache); - assertThat(PrepopulateBlobCache.valueOf("PREPOPULATE_BLOB_DISABLE")) - .isEqualTo(PrepopulateBlobCache.PREPOPULATE_BLOB_DISABLE); - } - } - } - - @Test - public void compressionPerLevel() { - try (final Options options = new Options()) { - assertThat(options.compressionPerLevel()).isEmpty(); - List compressionTypeList = - new ArrayList<>(); - for (int i = 0; i < options.numLevels(); i++) { - compressionTypeList.add(CompressionType.NO_COMPRESSION); - } - options.setCompressionPerLevel(compressionTypeList); - compressionTypeList = options.compressionPerLevel(); - for (final CompressionType compressionType : compressionTypeList) { - assertThat(compressionType).isEqualTo( - CompressionType.NO_COMPRESSION); - } - } - } - - @Test - public void differentCompressionsPerLevel() { - try (final Options options = new Options()) { - options.setNumLevels(3); - - assertThat(options.compressionPerLevel()).isEmpty(); - List compressionTypeList = new ArrayList<>(); - - compressionTypeList.add(CompressionType.BZLIB2_COMPRESSION); - compressionTypeList.add(CompressionType.SNAPPY_COMPRESSION); - compressionTypeList.add(CompressionType.LZ4_COMPRESSION); - - options.setCompressionPerLevel(compressionTypeList); - compressionTypeList = options.compressionPerLevel(); - - assertThat(compressionTypeList.size()).isEqualTo(3); - assertThat(compressionTypeList). - containsExactly( - CompressionType.BZLIB2_COMPRESSION, - CompressionType.SNAPPY_COMPRESSION, - CompressionType.LZ4_COMPRESSION); - - } - } - - @Test - public void bottommostCompressionType() { - try (final Options options = new Options()) { - assertThat(options.bottommostCompressionType()) - .isEqualTo(CompressionType.DISABLE_COMPRESSION_OPTION); - - for (final CompressionType compressionType : CompressionType.values()) { - options.setBottommostCompressionType(compressionType); - assertThat(options.bottommostCompressionType()) - .isEqualTo(compressionType); - } - } - } - - @Test - public void bottommostCompressionOptions() { - try (final Options options = new Options(); - final CompressionOptions bottommostCompressionOptions = new CompressionOptions() - .setMaxDictBytes(123)) { - - options.setBottommostCompressionOptions(bottommostCompressionOptions); - assertThat(options.bottommostCompressionOptions()) - .isEqualTo(bottommostCompressionOptions); - assertThat(options.bottommostCompressionOptions().maxDictBytes()) - .isEqualTo(123); - } - } - - @Test - public void compressionOptions() { - try (final Options options = new Options(); - final CompressionOptions compressionOptions = new CompressionOptions() - .setMaxDictBytes(123)) { - - options.setCompressionOptions(compressionOptions); - assertThat(options.compressionOptions()) - .isEqualTo(compressionOptions); - assertThat(options.compressionOptions().maxDictBytes()) - .isEqualTo(123); - } - } - - @Test - public void compactionStyles() { - try (final Options options = new Options()) { - for (final CompactionStyle compactionStyle : - CompactionStyle.values()) { - options.setCompactionStyle(compactionStyle); - assertThat(options.compactionStyle()). - isEqualTo(compactionStyle); - assertThat(CompactionStyle.valueOf("FIFO")). - isEqualTo(CompactionStyle.FIFO); - } - } - } - - @Test - public void maxTableFilesSizeFIFO() { - try (final Options opt = new Options()) { - long longValue = rand.nextLong(); - // Size has to be positive - longValue = (longValue < 0) ? -longValue : longValue; - longValue = (longValue == 0) ? longValue + 1 : longValue; - opt.setMaxTableFilesSizeFIFO(longValue); - assertThat(opt.maxTableFilesSizeFIFO()). - isEqualTo(longValue); - } - } - - @Test - public void rateLimiter() { - try (final Options options = new Options(); - final Options anotherOptions = new Options(); - final RateLimiter rateLimiter = - new RateLimiter(1000, 100 * 1000, 1)) { - options.setRateLimiter(rateLimiter); - // Test with parameter initialization - anotherOptions.setRateLimiter( - new RateLimiter(1000)); - } - } - - @Test - public void sstFileManager() throws RocksDBException { - try (final Options options = new Options(); - final SstFileManager sstFileManager = - new SstFileManager(Env.getDefault())) { - options.setSstFileManager(sstFileManager); - } - } - - @Test - public void shouldSetTestPrefixExtractor() { - try (final Options options = new Options()) { - options.useFixedLengthPrefixExtractor(100); - options.useFixedLengthPrefixExtractor(10); - } - } - - @Test - public void shouldSetTestCappedPrefixExtractor() { - try (final Options options = new Options()) { - options.useCappedPrefixExtractor(100); - options.useCappedPrefixExtractor(10); - } - } - - @Test - public void shouldTestMemTableFactoryName() - throws RocksDBException { - try (final Options options = new Options()) { - options.setMemTableConfig(new VectorMemTableConfig()); - assertThat(options.memTableFactoryName()). - isEqualTo("VectorRepFactory"); - options.setMemTableConfig( - new HashLinkedListMemTableConfig()); - assertThat(options.memTableFactoryName()). - isEqualTo("HashLinkedListRepFactory"); - } - } - - @Test - public void statistics() { - try(final Options options = new Options()) { - final Statistics statistics = options.statistics(); - assertThat(statistics).isNull(); - } - - try(final Statistics statistics = new Statistics(); - final Options options = new Options().setStatistics(statistics); - final Statistics stats = options.statistics()) { - assertThat(stats).isNotNull(); - } - } - - @Test - public void maxWriteBufferNumberToMaintain() { - try (final Options options = new Options()) { - int intValue = rand.nextInt(); - // Size has to be positive - intValue = (intValue < 0) ? -intValue : intValue; - intValue = (intValue == 0) ? intValue + 1 : intValue; - options.setMaxWriteBufferNumberToMaintain(intValue); - assertThat(options.maxWriteBufferNumberToMaintain()). - isEqualTo(intValue); - } - } - - @Test - public void compactionPriorities() { - try (final Options options = new Options()) { - for (final CompactionPriority compactionPriority : - CompactionPriority.values()) { - options.setCompactionPriority(compactionPriority); - assertThat(options.compactionPriority()). - isEqualTo(compactionPriority); - } - } - } - - @Test - public void reportBgIoStats() { - try (final Options options = new Options()) { - final boolean booleanValue = true; - options.setReportBgIoStats(booleanValue); - assertThat(options.reportBgIoStats()). - isEqualTo(booleanValue); - } - } - - @Test - public void ttl() { - try (final Options options = new Options()) { - options.setTtl(1000 * 60); - assertThat(options.ttl()). - isEqualTo(1000 * 60); - } - } - - @Test - public void periodicCompactionSeconds() { - try (final Options options = new Options()) { - options.setPeriodicCompactionSeconds(1000 * 60); - assertThat(options.periodicCompactionSeconds()).isEqualTo(1000 * 60); - } - } - - @Test - public void compactionOptionsUniversal() { - try (final Options options = new Options(); - final CompactionOptionsUniversal optUni = new CompactionOptionsUniversal() - .setCompressionSizePercent(7)) { - options.setCompactionOptionsUniversal(optUni); - assertThat(options.compactionOptionsUniversal()). - isEqualTo(optUni); - assertThat(options.compactionOptionsUniversal().compressionSizePercent()) - .isEqualTo(7); - } - } - - @Test - public void compactionOptionsFIFO() { - try (final Options options = new Options(); - final CompactionOptionsFIFO optFifo = new CompactionOptionsFIFO() - .setMaxTableFilesSize(2000)) { - options.setCompactionOptionsFIFO(optFifo); - assertThat(options.compactionOptionsFIFO()). - isEqualTo(optFifo); - assertThat(options.compactionOptionsFIFO().maxTableFilesSize()) - .isEqualTo(2000); - } - } - - @Test - public void forceConsistencyChecks() { - try (final Options options = new Options()) { - final boolean booleanValue = true; - options.setForceConsistencyChecks(booleanValue); - assertThat(options.forceConsistencyChecks()). - isEqualTo(booleanValue); - } - } - - @Test - public void compactionFilter() { - try(final Options options = new Options(); - final RemoveEmptyValueCompactionFilter cf = new RemoveEmptyValueCompactionFilter()) { - options.setCompactionFilter(cf); - assertThat(options.compactionFilter()).isEqualTo(cf); - } - } - - @Test - public void compactionFilterFactory() { - try(final Options options = new Options(); - final RemoveEmptyValueCompactionFilterFactory cff = new RemoveEmptyValueCompactionFilterFactory()) { - options.setCompactionFilterFactory(cff); - assertThat(options.compactionFilterFactory()).isEqualTo(cff); - } - } - - @Test - public void compactionThreadLimiter() { - try (final Options options = new Options(); - final ConcurrentTaskLimiter compactionThreadLimiter = - new ConcurrentTaskLimiterImpl("name", 3)) { - options.setCompactionThreadLimiter(compactionThreadLimiter); - assertThat(options.compactionThreadLimiter()).isEqualTo(compactionThreadLimiter); - } - } - - @Test - public void oldDefaults() { - try (final Options options = new Options()) { - options.oldDefaults(4, 6); - assertThat(options.writeBufferSize()).isEqualTo(4 << 20); - assertThat(options.compactionPriority()).isEqualTo(CompactionPriority.ByCompensatedSize); - assertThat(options.targetFileSizeBase()).isEqualTo(2 * 1048576); - assertThat(options.maxBytesForLevelBase()).isEqualTo(10 * 1048576); - assertThat(options.softPendingCompactionBytesLimit()).isEqualTo(0); - assertThat(options.hardPendingCompactionBytesLimit()).isEqualTo(0); - assertThat(options.level0StopWritesTrigger()).isEqualTo(24); - } - } - - @Test - public void optimizeForSmallDbWithCache() { - try (final Options options = new Options(); final Cache cache = new LRUCache(1024)) { - assertThat(options.optimizeForSmallDb(cache)).isEqualTo(options); - } - } - - @Test - public void cfPaths() { - try (final Options options = new Options()) { - final List paths = Arrays.asList( - new DbPath(Paths.get("test1"), 2 << 25), new DbPath(Paths.get("/test2/path"), 2 << 25)); - assertThat(options.cfPaths()).isEqualTo(Collections.emptyList()); - assertThat(options.setCfPaths(paths)).isEqualTo(options); - assertThat(options.cfPaths()).isEqualTo(paths); - } - } - - @Test - public void avoidUnnecessaryBlockingIO() { - try (final Options options = new Options()) { - assertThat(options.avoidUnnecessaryBlockingIO()).isEqualTo(false); - assertThat(options.setAvoidUnnecessaryBlockingIO(true)).isEqualTo(options); - assertThat(options.avoidUnnecessaryBlockingIO()).isEqualTo(true); - } - } - - @Test - public void persistStatsToDisk() { - try (final Options options = new Options()) { - assertThat(options.persistStatsToDisk()).isEqualTo(false); - assertThat(options.setPersistStatsToDisk(true)).isEqualTo(options); - assertThat(options.persistStatsToDisk()).isEqualTo(true); - } - } - - @Test - public void writeDbidToManifest() { - try (final Options options = new Options()) { - assertThat(options.writeDbidToManifest()).isEqualTo(false); - assertThat(options.setWriteDbidToManifest(true)).isEqualTo(options); - assertThat(options.writeDbidToManifest()).isEqualTo(true); - } - } - - @Test - public void logReadaheadSize() { - try (final Options options = new Options()) { - assertThat(options.logReadaheadSize()).isEqualTo(0); - final int size = 1024 * 1024 * 100; - assertThat(options.setLogReadaheadSize(size)).isEqualTo(options); - assertThat(options.logReadaheadSize()).isEqualTo(size); - } - } - - @Test - public void bestEffortsRecovery() { - try (final Options options = new Options()) { - assertThat(options.bestEffortsRecovery()).isEqualTo(false); - assertThat(options.setBestEffortsRecovery(true)).isEqualTo(options); - assertThat(options.bestEffortsRecovery()).isEqualTo(true); - } - } - - @Test - public void maxBgerrorResumeCount() { - try (final Options options = new Options()) { - final int INT_MAX = 2147483647; - assertThat(options.maxBgerrorResumeCount()).isEqualTo(INT_MAX); - assertThat(options.setMaxBgErrorResumeCount(-1)).isEqualTo(options); - assertThat(options.maxBgerrorResumeCount()).isEqualTo(-1); - } - } - - @Test - public void bgerrorResumeRetryInterval() { - try (final Options options = new Options()) { - assertThat(options.bgerrorResumeRetryInterval()).isEqualTo(1000000); - final long newRetryInterval = 24 * 3600 * 1000000L; - assertThat(options.setBgerrorResumeRetryInterval(newRetryInterval)).isEqualTo(options); - assertThat(options.bgerrorResumeRetryInterval()).isEqualTo(newRetryInterval); - } - } - - @Test - public void maxWriteBatchGroupSizeBytes() { - try (final Options options = new Options()) { - assertThat(options.maxWriteBatchGroupSizeBytes()).isEqualTo(1024 * 1024); - final long size = 1024 * 1024 * 1024 * 10L; - assertThat(options.setMaxWriteBatchGroupSizeBytes(size)).isEqualTo(options); - assertThat(options.maxWriteBatchGroupSizeBytes()).isEqualTo(size); - } - } - - @Test - public void skipCheckingSstFileSizesOnDbOpen() { - try (final Options options = new Options()) { - assertThat(options.skipCheckingSstFileSizesOnDbOpen()).isEqualTo(false); - assertThat(options.setSkipCheckingSstFileSizesOnDbOpen(true)).isEqualTo(options); - assertThat(options.skipCheckingSstFileSizesOnDbOpen()).isEqualTo(true); - } - } - - @Test - public void eventListeners() { - final AtomicBoolean wasCalled1 = new AtomicBoolean(); - final AtomicBoolean wasCalled2 = new AtomicBoolean(); - try (final Options options = new Options(); - final AbstractEventListener el1 = - new AbstractEventListener() { - @Override - public void onTableFileDeleted(final TableFileDeletionInfo tableFileDeletionInfo) { - wasCalled1.set(true); - } - }; - final AbstractEventListener el2 = - new AbstractEventListener() { - @Override - public void onMemTableSealed(final MemTableInfo memTableInfo) { - wasCalled2.set(true); - } - }) { - assertThat(options.setListeners(Arrays.asList(el1, el2))).isEqualTo(options); - List listeners = options.listeners(); - assertEquals(el1, listeners.get(0)); - assertEquals(el2, listeners.get(1)); - options.setListeners(Collections.emptyList()); - listeners.get(0).onTableFileDeleted(null); - assertTrue(wasCalled1.get()); - listeners.get(1).onMemTableSealed(null); - assertTrue(wasCalled2.get()); - List listeners2 = options.listeners(); - assertNotNull(listeners2); - assertEquals(0, listeners2.size()); - } - } -} diff --git a/java/src/test/java/org/rocksdb/OptionsUtilTest.java b/java/src/test/java/org/rocksdb/OptionsUtilTest.java deleted file mode 100644 index 02bfc0025..000000000 --- a/java/src/test/java/org/rocksdb/OptionsUtilTest.java +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.util.*; - -import static org.assertj.core.api.Assertions.assertThat; - -public class OptionsUtilTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); - - @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); - - enum TestAPI { LOAD_LATEST_OPTIONS, LOAD_OPTIONS_FROM_FILE } - - @Test - public void loadLatestOptions() throws RocksDBException { - verifyOptions(TestAPI.LOAD_LATEST_OPTIONS); - } - - @Test - public void loadOptionsFromFile() throws RocksDBException { - verifyOptions(TestAPI.LOAD_OPTIONS_FROM_FILE); - } - - @Test - public void getLatestOptionsFileName() throws RocksDBException { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, dbPath)) { - assertThat(db).isNotNull(); - } - - String fName = OptionsUtil.getLatestOptionsFileName(dbPath, Env.getDefault()); - assertThat(fName).isNotNull(); - assert(fName.startsWith("OPTIONS-") == true); - // System.out.println("latest options fileName: " + fName); - } - - private void verifyOptions(TestAPI apiType) throws RocksDBException { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - final Options options = new Options() - .setCreateIfMissing(true) - .setParanoidChecks(false) - .setMaxOpenFiles(478) - .setDelayedWriteRate(1234567L); - final ColumnFamilyOptions baseDefaultCFOpts = new ColumnFamilyOptions(); - final byte[] secondCFName = "new_cf".getBytes(); - final ColumnFamilyOptions baseSecondCFOpts = - new ColumnFamilyOptions() - .setWriteBufferSize(70 * 1024) - .setMaxWriteBufferNumber(7) - .setMaxBytesForLevelBase(53 * 1024 * 1024) - .setLevel0FileNumCompactionTrigger(3) - .setLevel0SlowdownWritesTrigger(51) - .setBottommostCompressionType(CompressionType.ZSTD_COMPRESSION); - - // Create a database with a new column family - try (final RocksDB db = RocksDB.open(options, dbPath)) { - assertThat(db).isNotNull(); - - // create column family - try (final ColumnFamilyHandle columnFamilyHandle = - db.createColumnFamily(new ColumnFamilyDescriptor(secondCFName, baseSecondCFOpts))) { - assert(columnFamilyHandle != null); - } - } - - // Read the options back and verify - DBOptions dbOptions = new DBOptions(); - ConfigOptions configOptions = - new ConfigOptions().setIgnoreUnknownOptions(false).setInputStringsEscaped(true).setEnv( - Env.getDefault()); - final List cfDescs = new ArrayList<>(); - String path = dbPath; - if (apiType == TestAPI.LOAD_LATEST_OPTIONS) { - OptionsUtil.loadLatestOptions(configOptions, path, dbOptions, cfDescs); - } else if (apiType == TestAPI.LOAD_OPTIONS_FROM_FILE) { - path = dbPath + "/" + OptionsUtil.getLatestOptionsFileName(dbPath, Env.getDefault()); - OptionsUtil.loadOptionsFromFile(configOptions, path, dbOptions, cfDescs); - } - - assertThat(dbOptions.createIfMissing()).isEqualTo(options.createIfMissing()); - assertThat(dbOptions.paranoidChecks()).isEqualTo(options.paranoidChecks()); - assertThat(dbOptions.maxOpenFiles()).isEqualTo(options.maxOpenFiles()); - assertThat(dbOptions.delayedWriteRate()).isEqualTo(options.delayedWriteRate()); - - assertThat(cfDescs.size()).isEqualTo(2); - assertThat(cfDescs.get(0)).isNotNull(); - assertThat(cfDescs.get(1)).isNotNull(); - assertThat(cfDescs.get(0).getName()).isEqualTo(RocksDB.DEFAULT_COLUMN_FAMILY); - assertThat(cfDescs.get(1).getName()).isEqualTo(secondCFName); - - ColumnFamilyOptions defaultCFOpts = cfDescs.get(0).getOptions(); - assertThat(defaultCFOpts.writeBufferSize()).isEqualTo(baseDefaultCFOpts.writeBufferSize()); - assertThat(defaultCFOpts.maxWriteBufferNumber()) - .isEqualTo(baseDefaultCFOpts.maxWriteBufferNumber()); - assertThat(defaultCFOpts.maxBytesForLevelBase()) - .isEqualTo(baseDefaultCFOpts.maxBytesForLevelBase()); - assertThat(defaultCFOpts.level0FileNumCompactionTrigger()) - .isEqualTo(baseDefaultCFOpts.level0FileNumCompactionTrigger()); - assertThat(defaultCFOpts.level0SlowdownWritesTrigger()) - .isEqualTo(baseDefaultCFOpts.level0SlowdownWritesTrigger()); - assertThat(defaultCFOpts.bottommostCompressionType()) - .isEqualTo(baseDefaultCFOpts.bottommostCompressionType()); - - ColumnFamilyOptions secondCFOpts = cfDescs.get(1).getOptions(); - assertThat(secondCFOpts.writeBufferSize()).isEqualTo(baseSecondCFOpts.writeBufferSize()); - assertThat(secondCFOpts.maxWriteBufferNumber()) - .isEqualTo(baseSecondCFOpts.maxWriteBufferNumber()); - assertThat(secondCFOpts.maxBytesForLevelBase()) - .isEqualTo(baseSecondCFOpts.maxBytesForLevelBase()); - assertThat(secondCFOpts.level0FileNumCompactionTrigger()) - .isEqualTo(baseSecondCFOpts.level0FileNumCompactionTrigger()); - assertThat(secondCFOpts.level0SlowdownWritesTrigger()) - .isEqualTo(baseSecondCFOpts.level0SlowdownWritesTrigger()); - assertThat(secondCFOpts.bottommostCompressionType()) - .isEqualTo(baseSecondCFOpts.bottommostCompressionType()); - } -} diff --git a/java/src/test/java/org/rocksdb/PlainTableConfigTest.java b/java/src/test/java/org/rocksdb/PlainTableConfigTest.java deleted file mode 100644 index c813dbbb4..000000000 --- a/java/src/test/java/org/rocksdb/PlainTableConfigTest.java +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class PlainTableConfigTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Test - public void keySize() { - PlainTableConfig plainTableConfig = new PlainTableConfig(); - plainTableConfig.setKeySize(5); - assertThat(plainTableConfig.keySize()). - isEqualTo(5); - } - - @Test - public void bloomBitsPerKey() { - PlainTableConfig plainTableConfig = new PlainTableConfig(); - plainTableConfig.setBloomBitsPerKey(11); - assertThat(plainTableConfig.bloomBitsPerKey()). - isEqualTo(11); - } - - @Test - public void hashTableRatio() { - PlainTableConfig plainTableConfig = new PlainTableConfig(); - plainTableConfig.setHashTableRatio(0.95); - assertThat(plainTableConfig.hashTableRatio()). - isEqualTo(0.95); - } - - @Test - public void indexSparseness() { - PlainTableConfig plainTableConfig = new PlainTableConfig(); - plainTableConfig.setIndexSparseness(18); - assertThat(plainTableConfig.indexSparseness()). - isEqualTo(18); - } - - @Test - public void hugePageTlbSize() { - PlainTableConfig plainTableConfig = new PlainTableConfig(); - plainTableConfig.setHugePageTlbSize(1); - assertThat(plainTableConfig.hugePageTlbSize()). - isEqualTo(1); - } - - @Test - public void encodingType() { - PlainTableConfig plainTableConfig = new PlainTableConfig(); - plainTableConfig.setEncodingType(EncodingType.kPrefix); - assertThat(plainTableConfig.encodingType()).isEqualTo( - EncodingType.kPrefix); - } - - @Test - public void fullScanMode() { - PlainTableConfig plainTableConfig = new PlainTableConfig(); - plainTableConfig.setFullScanMode(true); - assertThat(plainTableConfig.fullScanMode()).isTrue(); } - - @Test - public void storeIndexInFile() { - PlainTableConfig plainTableConfig = new PlainTableConfig(); - plainTableConfig.setStoreIndexInFile(true); - assertThat(plainTableConfig.storeIndexInFile()). - isTrue(); - } - - @Test - public void plainTableConfig() { - try(final Options opt = new Options()) { - final PlainTableConfig plainTableConfig = new PlainTableConfig(); - opt.setTableFormatConfig(plainTableConfig); - assertThat(opt.tableFactoryName()).isEqualTo("PlainTable"); - } - } -} diff --git a/java/src/test/java/org/rocksdb/PlatformRandomHelper.java b/java/src/test/java/org/rocksdb/PlatformRandomHelper.java deleted file mode 100644 index 80ea4d197..000000000 --- a/java/src/test/java/org/rocksdb/PlatformRandomHelper.java +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Random; - -/** - * Helper class to get the appropriate Random class instance dependent - * on the current platform architecture (32bit vs 64bit) - */ -public class PlatformRandomHelper { - /** - * Determine if OS is 32-Bit/64-Bit - * - * @return boolean value indicating if operating system is 64 Bit. - */ - public static boolean isOs64Bit(){ - final boolean is64Bit; - if (System.getProperty("os.name").contains("Windows")) { - is64Bit = (System.getenv("ProgramFiles(x86)") != null); - } else { - is64Bit = (System.getProperty("os.arch").contains("64")); - } - return is64Bit; - } - - /** - * Factory to get a platform specific Random instance - * - * @return {@link java.util.Random} instance. - */ - public static Random getPlatformSpecificRandomFactory(){ - if (isOs64Bit()) { - return new Random(); - } - return new Random32Bit(); - } - - /** - * Random32Bit is a class which overrides {@code nextLong} to - * provide random numbers which fit in size_t. This workaround - * is necessary because there is no unsigned_int < Java 8 - */ - private static class Random32Bit extends Random { - @Override - public long nextLong(){ - return this.nextInt(Integer.MAX_VALUE); - } - } - - /** - * Utility class constructor - */ - private PlatformRandomHelper() { } -} diff --git a/java/src/test/java/org/rocksdb/PutMultiplePartsTest.java b/java/src/test/java/org/rocksdb/PutMultiplePartsTest.java deleted file mode 100644 index 471ef0728..000000000 --- a/java/src/test/java/org/rocksdb/PutMultiplePartsTest.java +++ /dev/null @@ -1,164 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class PutMultiplePartsTest { - @Parameterized.Parameters - public static List data() { - return Arrays.asList(2, 3, 250, 20000); - } - - @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); - - private final int numParts; - - public PutMultiplePartsTest(final Integer numParts) { - this.numParts = numParts; - } - - @Test - public void putUntracked() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB txnDB = - TransactionDB.open(options, txnDbOptions, dbFolder.getRoot().getAbsolutePath())) { - try (final Transaction transaction = txnDB.beginTransaction(new WriteOptions())) { - final byte[][] keys = generateItems("key", ":", numParts); - final byte[][] values = generateItems("value", "", numParts); - transaction.putUntracked(keys, values); - transaction.commit(); - } - txnDB.syncWal(); - } - - validateResults(); - } - - @Test - public void put() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB txnDB = - TransactionDB.open(options, txnDbOptions, dbFolder.getRoot().getAbsolutePath())) { - try (final Transaction transaction = txnDB.beginTransaction(new WriteOptions())) { - final byte[][] keys = generateItems("key", ":", numParts); - final byte[][] values = generateItems("value", "", numParts); - transaction.put(keys, values); - transaction.commit(); - } - txnDB.syncWal(); - } - - validateResults(); - } - - @Test - public void putUntrackedCF() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB txnDB = - TransactionDB.open(options, txnDbOptions, dbFolder.getRoot().getAbsolutePath()); - final ColumnFamilyHandle columnFamilyHandle = - txnDB.createColumnFamily(new ColumnFamilyDescriptor("cfTest".getBytes()))) { - try (final Transaction transaction = txnDB.beginTransaction(new WriteOptions())) { - final byte[][] keys = generateItems("key", ":", numParts); - final byte[][] values = generateItems("value", "", numParts); - transaction.putUntracked(columnFamilyHandle, keys, values); - transaction.commit(); - } - txnDB.syncWal(); - } - - validateResultsCF(); - } - @Test - public void putCF() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB txnDB = - TransactionDB.open(options, txnDbOptions, dbFolder.getRoot().getAbsolutePath()); - final ColumnFamilyHandle columnFamilyHandle = - txnDB.createColumnFamily(new ColumnFamilyDescriptor("cfTest".getBytes()))) { - try (final Transaction transaction = txnDB.beginTransaction(new WriteOptions())) { - final byte[][] keys = generateItems("key", ":", numParts); - final byte[][] values = generateItems("value", "", numParts); - transaction.put(columnFamilyHandle, keys, values); - transaction.commit(); - } - txnDB.syncWal(); - } - - validateResultsCF(); - } - - private void validateResults() throws RocksDBException { - try (final RocksDB db = RocksDB.open(new Options(), dbFolder.getRoot().getAbsolutePath())) { - final List keys = generateItemsAsList("key", ":", numParts); - final byte[][] values = generateItems("value", "", numParts); - - StringBuilder singleKey = new StringBuilder(); - for (int i = 0; i < numParts; i++) { - singleKey.append(new String(keys.get(i), StandardCharsets.UTF_8)); - } - final byte[] result = db.get(singleKey.toString().getBytes()); - StringBuilder singleValue = new StringBuilder(); - for (int i = 0; i < numParts; i++) { - singleValue.append(new String(values[i], StandardCharsets.UTF_8)); - } - assertThat(result).isEqualTo(singleValue.toString().getBytes()); - } - } - - private void validateResultsCF() throws RocksDBException { - final List columnFamilyDescriptors = new ArrayList<>(); - columnFamilyDescriptors.add(new ColumnFamilyDescriptor("cfTest".getBytes())); - columnFamilyDescriptors.add(new ColumnFamilyDescriptor("default".getBytes())); - final List columnFamilyHandles = new ArrayList<>(); - try (final RocksDB db = RocksDB.open(new DBOptions(), dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, columnFamilyHandles)) { - final List keys = generateItemsAsList("key", ":", numParts); - final byte[][] values = generateItems("value", "", numParts); - - StringBuilder singleKey = new StringBuilder(); - for (int i = 0; i < numParts; i++) { - singleKey.append(new String(keys.get(i), StandardCharsets.UTF_8)); - } - final byte[] result = db.get(columnFamilyHandles.get(0), singleKey.toString().getBytes()); - StringBuilder singleValue = new StringBuilder(); - for (int i = 0; i < numParts; i++) { - singleValue.append(new String(values[i], StandardCharsets.UTF_8)); - } - assertThat(result).isEqualTo(singleValue.toString().getBytes()); - } - } - - private byte[][] generateItems(final String prefix, final String suffix, final int numItems) { - return generateItemsAsList(prefix, suffix, numItems).toArray(new byte[0][0]); - } - - private List generateItemsAsList( - final String prefix, final String suffix, final int numItems) { - final List items = new ArrayList<>(); - for (int i = 0; i < numItems; i++) { - items.add((prefix + i + suffix).getBytes()); - } - return items; - } -} diff --git a/java/src/test/java/org/rocksdb/RateLimiterTest.java b/java/src/test/java/org/rocksdb/RateLimiterTest.java deleted file mode 100644 index e7d6e6c49..000000000 --- a/java/src/test/java/org/rocksdb/RateLimiterTest.java +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.rocksdb.RateLimiter.*; - -public class RateLimiterTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Test - public void bytesPerSecond() { - try(final RateLimiter rateLimiter = - new RateLimiter(1000, DEFAULT_REFILL_PERIOD_MICROS, - DEFAULT_FAIRNESS, DEFAULT_MODE, DEFAULT_AUTOTUNE)) { - assertThat(rateLimiter.getBytesPerSecond()).isGreaterThan(0); - rateLimiter.setBytesPerSecond(2000); - assertThat(rateLimiter.getBytesPerSecond()).isGreaterThan(0); - } - } - - @Test - public void getSingleBurstBytes() { - try(final RateLimiter rateLimiter = - new RateLimiter(1000, DEFAULT_REFILL_PERIOD_MICROS, - DEFAULT_FAIRNESS, DEFAULT_MODE, DEFAULT_AUTOTUNE)) { - assertThat(rateLimiter.getSingleBurstBytes()).isEqualTo(100); - } - } - - @Test - public void getTotalBytesThrough() { - try(final RateLimiter rateLimiter = - new RateLimiter(1000, DEFAULT_REFILL_PERIOD_MICROS, - DEFAULT_FAIRNESS, DEFAULT_MODE, DEFAULT_AUTOTUNE)) { - assertThat(rateLimiter.getTotalBytesThrough()).isEqualTo(0); - } - } - - @Test - public void getTotalRequests() { - try(final RateLimiter rateLimiter = - new RateLimiter(1000, DEFAULT_REFILL_PERIOD_MICROS, - DEFAULT_FAIRNESS, DEFAULT_MODE, DEFAULT_AUTOTUNE)) { - assertThat(rateLimiter.getTotalRequests()).isEqualTo(0); - } - } - - @Test - public void autoTune() { - try(final RateLimiter rateLimiter = - new RateLimiter(1000, DEFAULT_REFILL_PERIOD_MICROS, - DEFAULT_FAIRNESS, DEFAULT_MODE, true)) { - assertThat(rateLimiter.getBytesPerSecond()).isGreaterThan(0); - } - } -} diff --git a/java/src/test/java/org/rocksdb/ReadOnlyTest.java b/java/src/test/java/org/rocksdb/ReadOnlyTest.java deleted file mode 100644 index 5b40a5df1..000000000 --- a/java/src/test/java/org/rocksdb/ReadOnlyTest.java +++ /dev/null @@ -1,234 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; - -public class ReadOnlyTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void readOnlyOpen() throws RocksDBException { - try (final Options options = new Options() - .setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - db.put("key".getBytes(), "value".getBytes()); - } - try (final RocksDB db = RocksDB.openReadOnly(dbFolder.getRoot().getAbsolutePath())) { - assertThat("value").isEqualTo(new String(db.get("key".getBytes()))); - } - - try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { - final List cfDescriptors = new ArrayList<>(); - cfDescriptors.add(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts)); - final List columnFamilyHandleList = new ArrayList<>(); - try (final RocksDB db = RocksDB.open( - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { - columnFamilyHandleList.add( - db.createColumnFamily(new ColumnFamilyDescriptor("new_cf".getBytes(), cfOpts))); - columnFamilyHandleList.add( - db.createColumnFamily(new ColumnFamilyDescriptor("new_cf2".getBytes(), cfOpts))); - db.put(columnFamilyHandleList.get(2), "key2".getBytes(), "value2".getBytes()); - } - - columnFamilyHandleList.clear(); - try (final RocksDB db = RocksDB.openReadOnly( - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { - assertThat(db.get("key2".getBytes())).isNull(); - assertThat(db.get(columnFamilyHandleList.get(0), "key2".getBytes())).isNull(); - } - - cfDescriptors.clear(); - cfDescriptors.add(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts)); - cfDescriptors.add(new ColumnFamilyDescriptor("new_cf2".getBytes(), cfOpts)); - columnFamilyHandleList.clear(); - try (final RocksDB db = RocksDB.openReadOnly( - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { - assertThat(new String(db.get(columnFamilyHandleList.get(1), "key2".getBytes()))) - .isEqualTo("value2"); - } - } - } - - @Test(expected = RocksDBException.class) - public void failToWriteInReadOnly() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { - // no-op - } - } - - try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { - final List cfDescriptors = - Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts)); - - final List readOnlyColumnFamilyHandleList = new ArrayList<>(); - try (final RocksDB rDb = RocksDB.openReadOnly(dbFolder.getRoot().getAbsolutePath(), - cfDescriptors, readOnlyColumnFamilyHandleList)) { - // test that put fails in readonly mode - rDb.put("key".getBytes(), "value".getBytes()); - } - } - } - - @Test(expected = RocksDBException.class) - public void failToCFWriteInReadOnly() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - //no-op - } - - try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts) - ); - final List readOnlyColumnFamilyHandleList = - new ArrayList<>(); - try (final RocksDB rDb = RocksDB.openReadOnly( - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, - readOnlyColumnFamilyHandleList)) { - rDb.put(readOnlyColumnFamilyHandleList.get(0), "key".getBytes(), "value".getBytes()); - } - } - } - - @Test(expected = RocksDBException.class) - public void failToRemoveInReadOnly() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - //no-op - } - - try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts) - ); - - final List readOnlyColumnFamilyHandleList = - new ArrayList<>(); - - try (final RocksDB rDb = RocksDB.openReadOnly( - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, - readOnlyColumnFamilyHandleList)) { - rDb.delete("key".getBytes()); - } - } - } - - @Test(expected = RocksDBException.class) - public void failToCFRemoveInReadOnly() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - //no-op - } - - try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts) - ); - - final List readOnlyColumnFamilyHandleList = - new ArrayList<>(); - try (final RocksDB rDb = RocksDB.openReadOnly( - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, - readOnlyColumnFamilyHandleList)) { - rDb.delete(readOnlyColumnFamilyHandleList.get(0), - "key".getBytes()); - } - } - } - - @Test(expected = RocksDBException.class) - public void failToWriteBatchReadOnly() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - //no-op - } - - try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts) - ); - - final List readOnlyColumnFamilyHandleList = - new ArrayList<>(); - try (final RocksDB rDb = RocksDB.openReadOnly( - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, - readOnlyColumnFamilyHandleList); - final WriteBatch wb = new WriteBatch(); - final WriteOptions wOpts = new WriteOptions()) { - wb.put("key".getBytes(), "value".getBytes()); - rDb.write(wOpts, wb); - } - } - } - - @Test(expected = RocksDBException.class) - public void failToCFWriteBatchReadOnly() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - //no-op - } - - try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts) - ); - - final List readOnlyColumnFamilyHandleList = - new ArrayList<>(); - try (final RocksDB rDb = RocksDB.openReadOnly( - dbFolder.getRoot().getAbsolutePath(), cfDescriptors, - readOnlyColumnFamilyHandleList); - final WriteBatch wb = new WriteBatch(); - final WriteOptions wOpts = new WriteOptions()) { - wb.put(readOnlyColumnFamilyHandleList.get(0), "key".getBytes(), - "value".getBytes()); - rDb.write(wOpts, wb); - } - } - } - - @Test(expected = RocksDBException.class) - public void errorIfWalFileExists() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { - // no-op - } - - try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { - final List cfDescriptors = - Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts)); - - final List readOnlyColumnFamilyHandleList = new ArrayList<>(); - try (final DBOptions options = new DBOptions(); - final RocksDB rDb = RocksDB.openReadOnly(options, dbFolder.getRoot().getAbsolutePath(), - cfDescriptors, readOnlyColumnFamilyHandleList, true);) { - // no-op... should have raised an error as errorIfWalFileExists=true - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/ReadOptionsTest.java b/java/src/test/java/org/rocksdb/ReadOptionsTest.java deleted file mode 100644 index 156dd3730..000000000 --- a/java/src/test/java/org/rocksdb/ReadOptionsTest.java +++ /dev/null @@ -1,375 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Arrays; -import java.util.Random; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -import static org.assertj.core.api.Assertions.assertThat; - -public class ReadOptionsTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public ExpectedException exception = ExpectedException.none(); - - @Test - public void altConstructor() { - try (final ReadOptions opt = new ReadOptions(true, true)) { - assertThat(opt.verifyChecksums()).isTrue(); - assertThat(opt.fillCache()).isTrue(); - } - } - - @Test - public void copyConstructor() { - try (final ReadOptions opt = new ReadOptions()) { - opt.setVerifyChecksums(false); - opt.setFillCache(false); - opt.setIterateUpperBound(buildRandomSlice()); - opt.setIterateLowerBound(buildRandomSlice()); - opt.setTimestamp(buildRandomSlice()); - opt.setIterStartTs(buildRandomSlice()); - try (final ReadOptions other = new ReadOptions(opt)) { - assertThat(opt.verifyChecksums()).isEqualTo(other.verifyChecksums()); - assertThat(opt.fillCache()).isEqualTo(other.fillCache()); - assertThat(Arrays.equals(opt.iterateUpperBound().data(), other.iterateUpperBound().data())).isTrue(); - assertThat(Arrays.equals(opt.iterateLowerBound().data(), other.iterateLowerBound().data())).isTrue(); - assertThat(Arrays.equals(opt.timestamp().data(), other.timestamp().data())).isTrue(); - assertThat(Arrays.equals(opt.iterStartTs().data(), other.iterStartTs().data())).isTrue(); - } - } - } - - @Test - public void verifyChecksum() { - try (final ReadOptions opt = new ReadOptions()) { - final Random rand = new Random(); - final boolean boolValue = rand.nextBoolean(); - opt.setVerifyChecksums(boolValue); - assertThat(opt.verifyChecksums()).isEqualTo(boolValue); - } - } - - @Test - public void fillCache() { - try (final ReadOptions opt = new ReadOptions()) { - final Random rand = new Random(); - final boolean boolValue = rand.nextBoolean(); - opt.setFillCache(boolValue); - assertThat(opt.fillCache()).isEqualTo(boolValue); - } - } - - @Test - public void tailing() { - try (final ReadOptions opt = new ReadOptions()) { - final Random rand = new Random(); - final boolean boolValue = rand.nextBoolean(); - opt.setTailing(boolValue); - assertThat(opt.tailing()).isEqualTo(boolValue); - } - } - - @Test - public void snapshot() { - try (final ReadOptions opt = new ReadOptions()) { - opt.setSnapshot(null); - assertThat(opt.snapshot()).isNull(); - } - } - - @Test - public void readTier() { - try (final ReadOptions opt = new ReadOptions()) { - opt.setReadTier(ReadTier.BLOCK_CACHE_TIER); - assertThat(opt.readTier()).isEqualTo(ReadTier.BLOCK_CACHE_TIER); - } - } - - @SuppressWarnings("deprecated") - @Test - public void managed() { - try (final ReadOptions opt = new ReadOptions()) { - opt.setManaged(true); - assertThat(opt.managed()).isTrue(); - } - } - - @Test - public void totalOrderSeek() { - try (final ReadOptions opt = new ReadOptions()) { - opt.setTotalOrderSeek(true); - assertThat(opt.totalOrderSeek()).isTrue(); - } - } - - @Test - public void prefixSameAsStart() { - try (final ReadOptions opt = new ReadOptions()) { - opt.setPrefixSameAsStart(true); - assertThat(opt.prefixSameAsStart()).isTrue(); - } - } - - @Test - public void pinData() { - try (final ReadOptions opt = new ReadOptions()) { - opt.setPinData(true); - assertThat(opt.pinData()).isTrue(); - } - } - - @Test - public void backgroundPurgeOnIteratorCleanup() { - try (final ReadOptions opt = new ReadOptions()) { - opt.setBackgroundPurgeOnIteratorCleanup(true); - assertThat(opt.backgroundPurgeOnIteratorCleanup()).isTrue(); - } - } - - @Test - public void readaheadSize() { - try (final ReadOptions opt = new ReadOptions()) { - final Random rand = new Random(); - final int intValue = rand.nextInt(2147483647); - opt.setReadaheadSize(intValue); - assertThat(opt.readaheadSize()).isEqualTo(intValue); - } - } - - @Test - public void ignoreRangeDeletions() { - try (final ReadOptions opt = new ReadOptions()) { - opt.setIgnoreRangeDeletions(true); - assertThat(opt.ignoreRangeDeletions()).isTrue(); - } - } - - @Test - public void iterateUpperBound() { - try (final ReadOptions opt = new ReadOptions()) { - Slice upperBound = buildRandomSlice(); - opt.setIterateUpperBound(upperBound); - assertThat(Arrays.equals(upperBound.data(), opt.iterateUpperBound().data())).isTrue(); - opt.setIterateUpperBound(null); - assertThat(opt.iterateUpperBound()).isNull(); - } - } - - @Test - public void iterateUpperBoundNull() { - try (final ReadOptions opt = new ReadOptions()) { - assertThat(opt.iterateUpperBound()).isNull(); - } - } - - @Test - public void iterateLowerBound() { - try (final ReadOptions opt = new ReadOptions()) { - Slice lowerBound = buildRandomSlice(); - opt.setIterateLowerBound(lowerBound); - assertThat(Arrays.equals(lowerBound.data(), opt.iterateLowerBound().data())).isTrue(); - opt.setIterateLowerBound(null); - assertThat(opt.iterateLowerBound()).isNull(); - } - } - - @Test - public void iterateLowerBoundNull() { - try (final ReadOptions opt = new ReadOptions()) { - assertThat(opt.iterateLowerBound()).isNull(); - } - } - - @Test - public void tableFilter() { - try (final ReadOptions opt = new ReadOptions(); - final AbstractTableFilter allTablesFilter = new AllTablesFilter()) { - opt.setTableFilter(allTablesFilter); - } - } - - @Test - public void autoPrefixMode() { - try (final ReadOptions opt = new ReadOptions()) { - opt.setAutoPrefixMode(true); - assertThat(opt.autoPrefixMode()).isTrue(); - } - } - - @Test - public void timestamp() { - try (final ReadOptions opt = new ReadOptions()) { - Slice timestamp = buildRandomSlice(); - opt.setTimestamp(timestamp); - assertThat(Arrays.equals(timestamp.data(), opt.timestamp().data())).isTrue(); - opt.setTimestamp(null); - assertThat(opt.timestamp()).isNull(); - } - } - - @Test - public void iterStartTs() { - try (final ReadOptions opt = new ReadOptions()) { - Slice itertStartTsSlice = buildRandomSlice(); - opt.setIterStartTs(itertStartTsSlice); - assertThat(Arrays.equals(itertStartTsSlice.data(), opt.iterStartTs().data())).isTrue(); - opt.setIterStartTs(null); - assertThat(opt.iterStartTs()).isNull(); - } - } - - @Test - public void deadline() { - try (final ReadOptions opt = new ReadOptions()) { - opt.setDeadline(1999l); - assertThat(opt.deadline()).isEqualTo(1999l); - } - } - - @Test - public void ioTimeout() { - try (final ReadOptions opt = new ReadOptions()) { - opt.setIoTimeout(34555l); - assertThat(opt.ioTimeout()).isEqualTo(34555l); - } - } - - @Test - public void valueSizeSoftLimit() { - try (final ReadOptions opt = new ReadOptions()) { - opt.setValueSizeSoftLimit(12134324l); - assertThat(opt.valueSizeSoftLimit()).isEqualTo(12134324l); - } - } - - @Test - public void failSetVerifyChecksumUninitialized() { - try (final ReadOptions readOptions = - setupUninitializedReadOptions(exception)) { - readOptions.setVerifyChecksums(true); - } - } - - @Test - public void failVerifyChecksumUninitialized() { - try (final ReadOptions readOptions = - setupUninitializedReadOptions(exception)) { - readOptions.verifyChecksums(); - } - } - - @Test - public void failSetFillCacheUninitialized() { - try (final ReadOptions readOptions = - setupUninitializedReadOptions(exception)) { - readOptions.setFillCache(true); - } - } - - @Test - public void failFillCacheUninitialized() { - try (final ReadOptions readOptions = - setupUninitializedReadOptions(exception)) { - readOptions.fillCache(); - } - } - - @Test - public void failSetTailingUninitialized() { - try (final ReadOptions readOptions = - setupUninitializedReadOptions(exception)) { - readOptions.setTailing(true); - } - } - - @Test - public void failTailingUninitialized() { - try (final ReadOptions readOptions = - setupUninitializedReadOptions(exception)) { - readOptions.tailing(); - } - } - - @Test - public void failSetSnapshotUninitialized() { - try (final ReadOptions readOptions = - setupUninitializedReadOptions(exception)) { - readOptions.setSnapshot(null); - } - } - - @Test - public void failSnapshotUninitialized() { - try (final ReadOptions readOptions = - setupUninitializedReadOptions(exception)) { - readOptions.snapshot(); - } - } - - @Test - public void failSetIterateUpperBoundUninitialized() { - try (final ReadOptions readOptions = - setupUninitializedReadOptions(exception)) { - readOptions.setIterateUpperBound(null); - } - } - - @Test - public void failIterateUpperBoundUninitialized() { - try (final ReadOptions readOptions = - setupUninitializedReadOptions(exception)) { - readOptions.iterateUpperBound(); - } - } - - @Test - public void failSetIterateLowerBoundUninitialized() { - try (final ReadOptions readOptions = - setupUninitializedReadOptions(exception)) { - readOptions.setIterateLowerBound(null); - } - } - - @Test - public void failIterateLowerBoundUninitialized() { - try (final ReadOptions readOptions = - setupUninitializedReadOptions(exception)) { - readOptions.iterateLowerBound(); - } - } - - private ReadOptions setupUninitializedReadOptions( - ExpectedException exception) { - final ReadOptions readOptions = new ReadOptions(); - readOptions.close(); - exception.expect(AssertionError.class); - return readOptions; - } - - private Slice buildRandomSlice() { - final Random rand = new Random(); - byte[] sliceBytes = new byte[rand.nextInt(100) + 1]; - rand.nextBytes(sliceBytes); - return new Slice(sliceBytes); - } - - private static class AllTablesFilter extends AbstractTableFilter { - @Override - public boolean filter(final TableProperties tableProperties) { - return true; - } - } -} diff --git a/java/src/test/java/org/rocksdb/RocksDBExceptionTest.java b/java/src/test/java/org/rocksdb/RocksDBExceptionTest.java deleted file mode 100644 index d3bd4ece7..000000000 --- a/java/src/test/java/org/rocksdb/RocksDBExceptionTest.java +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; - -import org.rocksdb.Status.Code; -import org.rocksdb.Status.SubCode; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.Assert.fail; - -public class RocksDBExceptionTest { - - @Test - public void exception() { - try { - raiseException(); - } catch(final RocksDBException e) { - assertThat(e.getStatus()).isNull(); - assertThat(e.getMessage()).isEqualTo("test message"); - return; - } - fail(); - } - - @Test - public void exceptionWithStatusCode() { - try { - raiseExceptionWithStatusCode(); - } catch(final RocksDBException e) { - assertThat(e.getStatus()).isNotNull(); - assertThat(e.getStatus().getCode()).isEqualTo(Code.NotSupported); - assertThat(e.getStatus().getSubCode()).isEqualTo(SubCode.None); - assertThat(e.getStatus().getState()).isNull(); - assertThat(e.getMessage()).isEqualTo("test message"); - return; - } - fail(); - } - - @Test - public void exceptionNoMsgWithStatusCode() { - try { - raiseExceptionNoMsgWithStatusCode(); - } catch(final RocksDBException e) { - assertThat(e.getStatus()).isNotNull(); - assertThat(e.getStatus().getCode()).isEqualTo(Code.NotSupported); - assertThat(e.getStatus().getSubCode()).isEqualTo(SubCode.None); - assertThat(e.getStatus().getState()).isNull(); - assertThat(e.getMessage()).isEqualTo(Code.NotSupported.name()); - return; - } - fail(); - } - - @Test - public void exceptionWithStatusCodeSubCode() { - try { - raiseExceptionWithStatusCodeSubCode(); - } catch(final RocksDBException e) { - assertThat(e.getStatus()).isNotNull(); - assertThat(e.getStatus().getCode()).isEqualTo(Code.TimedOut); - assertThat(e.getStatus().getSubCode()) - .isEqualTo(Status.SubCode.LockTimeout); - assertThat(e.getStatus().getState()).isNull(); - assertThat(e.getMessage()).isEqualTo("test message"); - return; - } - fail(); - } - - @Test - public void exceptionNoMsgWithStatusCodeSubCode() { - try { - raiseExceptionNoMsgWithStatusCodeSubCode(); - } catch(final RocksDBException e) { - assertThat(e.getStatus()).isNotNull(); - assertThat(e.getStatus().getCode()).isEqualTo(Code.TimedOut); - assertThat(e.getStatus().getSubCode()).isEqualTo(SubCode.LockTimeout); - assertThat(e.getStatus().getState()).isNull(); - assertThat(e.getMessage()).isEqualTo(Code.TimedOut.name() + - "(" + SubCode.LockTimeout.name() + ")"); - return; - } - fail(); - } - - @Test - public void exceptionWithStatusCodeState() { - try { - raiseExceptionWithStatusCodeState(); - } catch(final RocksDBException e) { - assertThat(e.getStatus()).isNotNull(); - assertThat(e.getStatus().getCode()).isEqualTo(Code.NotSupported); - assertThat(e.getStatus().getSubCode()).isEqualTo(SubCode.None); - assertThat(e.getStatus().getState()).isNotNull(); - assertThat(e.getMessage()).isEqualTo("test message"); - return; - } - fail(); - } - - private native void raiseException() throws RocksDBException; - private native void raiseExceptionWithStatusCode() throws RocksDBException; - private native void raiseExceptionNoMsgWithStatusCode() throws RocksDBException; - private native void raiseExceptionWithStatusCodeSubCode() - throws RocksDBException; - private native void raiseExceptionNoMsgWithStatusCodeSubCode() - throws RocksDBException; - private native void raiseExceptionWithStatusCodeState() - throws RocksDBException; -} diff --git a/java/src/test/java/org/rocksdb/RocksDBTest.java b/java/src/test/java/org/rocksdb/RocksDBTest.java deleted file mode 100644 index 488dbafe8..000000000 --- a/java/src/test/java/org/rocksdb/RocksDBTest.java +++ /dev/null @@ -1,1695 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import org.junit.*; -import org.junit.rules.ExpectedException; -import org.junit.rules.TemporaryFolder; - -import java.nio.ByteBuffer; -import java.util.*; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.Assert.fail; - -public class RocksDBTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - public static final Random rand = PlatformRandomHelper. - getPlatformSpecificRandomFactory(); - - @Test - public void open() throws RocksDBException { - try (final RocksDB db = - RocksDB.open(dbFolder.getRoot().getAbsolutePath())) { - assertThat(db).isNotNull(); - } - } - - @Test - public void open_opt() throws RocksDBException { - try (final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - assertThat(db).isNotNull(); - } - } - - @Test - public void openWhenOpen() throws RocksDBException { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - - try (final RocksDB db1 = RocksDB.open(dbPath)) { - try (final RocksDB db2 = RocksDB.open(dbPath)) { - fail("Should have thrown an exception when opening the same db twice"); - } catch (final RocksDBException e) { - assertThat(e.getStatus().getCode()).isEqualTo(Status.Code.IOError); - assertThat(e.getStatus().getSubCode()).isEqualTo(Status.SubCode.None); - assertThat(e.getStatus().getState()).contains("lock "); - } - } - } - - @Test - public void createColumnFamily() throws RocksDBException { - final byte[] col1Name = "col1".getBytes(UTF_8); - - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); - final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions() - ) { - try (final ColumnFamilyHandle col1 = - db.createColumnFamily(new ColumnFamilyDescriptor(col1Name, cfOpts))) { - assertThat(col1).isNotNull(); - assertThat(col1.getName()).isEqualTo(col1Name); - } - } - - final List cfHandles = new ArrayList<>(); - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath(), - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor(col1Name)), - cfHandles)) { - try { - assertThat(cfHandles.size()).isEqualTo(2); - assertThat(cfHandles.get(1)).isNotNull(); - assertThat(cfHandles.get(1).getName()).isEqualTo(col1Name); - } finally { - for (final ColumnFamilyHandle cfHandle : - cfHandles) { - cfHandle.close(); - } - } - } - } - - - @Test - public void createColumnFamilies() throws RocksDBException { - final byte[] col1Name = "col1".getBytes(UTF_8); - final byte[] col2Name = "col2".getBytes(UTF_8); - - List cfHandles; - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); - final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions() - ) { - cfHandles = - db.createColumnFamilies(cfOpts, Arrays.asList(col1Name, col2Name)); - try { - assertThat(cfHandles).isNotNull(); - assertThat(cfHandles.size()).isEqualTo(2); - assertThat(cfHandles.get(0).getName()).isEqualTo(col1Name); - assertThat(cfHandles.get(1).getName()).isEqualTo(col2Name); - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - } - } - - cfHandles = new ArrayList<>(); - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath(), - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor(col1Name), - new ColumnFamilyDescriptor(col2Name)), - cfHandles)) { - try { - assertThat(cfHandles.size()).isEqualTo(3); - assertThat(cfHandles.get(1)).isNotNull(); - assertThat(cfHandles.get(1).getName()).isEqualTo(col1Name); - assertThat(cfHandles.get(2)).isNotNull(); - assertThat(cfHandles.get(2).getName()).isEqualTo(col2Name); - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - } - } - } - - @Test - public void createColumnFamiliesfromDescriptors() throws RocksDBException { - final byte[] col1Name = "col1".getBytes(UTF_8); - final byte[] col2Name = "col2".getBytes(UTF_8); - - List cfHandles; - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); - final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions() - ) { - cfHandles = - db.createColumnFamilies(Arrays.asList( - new ColumnFamilyDescriptor(col1Name, cfOpts), - new ColumnFamilyDescriptor(col2Name, cfOpts))); - try { - assertThat(cfHandles).isNotNull(); - assertThat(cfHandles.size()).isEqualTo(2); - assertThat(cfHandles.get(0).getName()).isEqualTo(col1Name); - assertThat(cfHandles.get(1).getName()).isEqualTo(col2Name); - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - } - } - - cfHandles = new ArrayList<>(); - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath(), - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor(col1Name), - new ColumnFamilyDescriptor(col2Name)), - cfHandles)) { - try { - assertThat(cfHandles.size()).isEqualTo(3); - assertThat(cfHandles.get(1)).isNotNull(); - assertThat(cfHandles.get(1).getName()).isEqualTo(col1Name); - assertThat(cfHandles.get(2)).isNotNull(); - assertThat(cfHandles.get(2).getName()).isEqualTo(col2Name); - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - } - } - } - - @Test - public void put() throws RocksDBException { - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); - final WriteOptions opt = new WriteOptions(); final ReadOptions optr = new ReadOptions()) { - db.put("key1".getBytes(), "value".getBytes()); - db.put(opt, "key2".getBytes(), "12345678".getBytes()); - assertThat(db.get("key1".getBytes())).isEqualTo( - "value".getBytes()); - assertThat(db.get("key2".getBytes())).isEqualTo( - "12345678".getBytes()); - - ByteBuffer key = ByteBuffer.allocateDirect(12); - ByteBuffer value = ByteBuffer.allocateDirect(12); - key.position(4); - key.put("key3".getBytes()); - key.position(4).limit(8); - value.position(4); - value.put("val3".getBytes()); - value.position(4).limit(8); - - db.put(opt, key, value); - - assertThat(key.position()).isEqualTo(8); - assertThat(key.limit()).isEqualTo(8); - - assertThat(value.position()).isEqualTo(8); - assertThat(value.limit()).isEqualTo(8); - - key.position(4); - - ByteBuffer result = ByteBuffer.allocateDirect(12); - assertThat(db.get(optr, key, result)).isEqualTo(4); - assertThat(result.position()).isEqualTo(0); - assertThat(result.limit()).isEqualTo(4); - assertThat(key.position()).isEqualTo(8); - assertThat(key.limit()).isEqualTo(8); - - byte[] tmp = new byte[4]; - result.get(tmp); - assertThat(tmp).isEqualTo("val3".getBytes()); - - key.position(4); - - result.clear().position(9); - assertThat(db.get(optr, key, result)).isEqualTo(4); - assertThat(result.position()).isEqualTo(9); - assertThat(result.limit()).isEqualTo(12); - assertThat(key.position()).isEqualTo(8); - assertThat(key.limit()).isEqualTo(8); - byte[] tmp2 = new byte[3]; - result.get(tmp2); - assertThat(tmp2).isEqualTo("val".getBytes()); - - // put - Segment key3 = sliceSegment("key3"); - Segment key4 = sliceSegment("key4"); - Segment value0 = sliceSegment("value 0"); - Segment value1 = sliceSegment("value 1"); - db.put(key3.data, key3.offset, key3.len, value0.data, value0.offset, value0.len); - db.put(opt, key4.data, key4.offset, key4.len, value1.data, value1.offset, value1.len); - - // compare - Assert.assertTrue(value0.isSamePayload(db.get(key3.data, key3.offset, key3.len))); - Assert.assertTrue(value1.isSamePayload(db.get(key4.data, key4.offset, key4.len))); - } - } - - private static Segment sliceSegment(String key) { - ByteBuffer rawKey = ByteBuffer.allocate(key.length() + 4); - rawKey.put((byte)0); - rawKey.put((byte)0); - rawKey.put(key.getBytes()); - - return new Segment(rawKey.array(), 2, key.length()); - } - - private static class Segment { - final byte[] data; - final int offset; - final int len; - - public boolean isSamePayload(byte[] value) { - if (value == null) { - return false; - } - if (value.length != len) { - return false; - } - - for (int i = 0; i < value.length; i++) { - if (data[i + offset] != value[i]) { - return false; - } - } - - return true; - } - - public Segment(byte[] value, int offset, int len) { - this.data = value; - this.offset = offset; - this.len = len; - } - } - - @Test - public void write() throws RocksDBException { - try (final StringAppendOperator stringAppendOperator = new StringAppendOperator(); - final Options options = new Options() - .setMergeOperator(stringAppendOperator) - .setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath()); - final WriteOptions opts = new WriteOptions()) { - - try (final WriteBatch wb1 = new WriteBatch()) { - wb1.put("key1".getBytes(), "aa".getBytes()); - wb1.merge("key1".getBytes(), "bb".getBytes()); - - try (final WriteBatch wb2 = new WriteBatch()) { - wb2.put("key2".getBytes(), "xx".getBytes()); - wb2.merge("key2".getBytes(), "yy".getBytes()); - db.write(opts, wb1); - db.write(opts, wb2); - } - } - - assertThat(db.get("key1".getBytes())).isEqualTo( - "aa,bb".getBytes()); - assertThat(db.get("key2".getBytes())).isEqualTo( - "xx,yy".getBytes()); - } - } - - @Test - public void getWithOutValue() throws RocksDBException { - try (final RocksDB db = - RocksDB.open(dbFolder.getRoot().getAbsolutePath())) { - db.put("key1".getBytes(), "value".getBytes()); - db.put("key2".getBytes(), "12345678".getBytes()); - byte[] outValue = new byte[5]; - // not found value - int getResult = db.get("keyNotFound".getBytes(), outValue); - assertThat(getResult).isEqualTo(RocksDB.NOT_FOUND); - // found value which fits in outValue - getResult = db.get("key1".getBytes(), outValue); - assertThat(getResult).isNotEqualTo(RocksDB.NOT_FOUND); - assertThat(outValue).isEqualTo("value".getBytes()); - // found value which fits partially - getResult = db.get("key2".getBytes(), outValue); - assertThat(getResult).isNotEqualTo(RocksDB.NOT_FOUND); - assertThat(outValue).isEqualTo("12345".getBytes()); - } - } - - @Test - public void getWithOutValueReadOptions() throws RocksDBException { - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); - final ReadOptions rOpt = new ReadOptions()) { - db.put("key1".getBytes(), "value".getBytes()); - db.put("key2".getBytes(), "12345678".getBytes()); - byte[] outValue = new byte[5]; - // not found value - int getResult = db.get(rOpt, "keyNotFound".getBytes(), - outValue); - assertThat(getResult).isEqualTo(RocksDB.NOT_FOUND); - // found value which fits in outValue - getResult = db.get(rOpt, "key1".getBytes(), outValue); - assertThat(getResult).isNotEqualTo(RocksDB.NOT_FOUND); - assertThat(outValue).isEqualTo("value".getBytes()); - // found value which fits partially - getResult = db.get(rOpt, "key2".getBytes(), outValue); - assertThat(getResult).isNotEqualTo(RocksDB.NOT_FOUND); - assertThat(outValue).isEqualTo("12345".getBytes()); - } - } - - @Rule - public ExpectedException thrown = ExpectedException.none(); - - @Test - public void getOutOfArrayMaxSizeValue() throws RocksDBException { - final int numberOfValueSplits = 10; - final int splitSize = Integer.MAX_VALUE / numberOfValueSplits; - - Runtime runtime = Runtime.getRuntime(); - long neededMemory = ((long)(splitSize)) * (((long)numberOfValueSplits) + 3); - boolean isEnoughMemory = runtime.maxMemory() - runtime.totalMemory() > neededMemory; - Assume.assumeTrue(isEnoughMemory); - - final byte[] valueSplit = new byte[splitSize]; - final byte[] key = "key".getBytes(); - - thrown.expect(RocksDBException.class); - thrown.expectMessage("Requested array size exceeds VM limit"); - - // merge (numberOfValueSplits + 1) valueSplit's to get value size exceeding Integer.MAX_VALUE - try (final StringAppendOperator stringAppendOperator = new StringAppendOperator(); - final Options opt = new Options() - .setCreateIfMissing(true) - .setMergeOperator(stringAppendOperator); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - db.put(key, valueSplit); - for (int i = 0; i < numberOfValueSplits; i++) { - db.merge(key, valueSplit); - } - db.get(key); - } - } - - @Test - public void multiGetAsList() throws RocksDBException { - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); - final ReadOptions rOpt = new ReadOptions()) { - db.put("key1".getBytes(), "value".getBytes()); - db.put("key2".getBytes(), "12345678".getBytes()); - List lookupKeys = new ArrayList<>(); - lookupKeys.add("key1".getBytes()); - lookupKeys.add("key2".getBytes()); - List results = db.multiGetAsList(lookupKeys); - assertThat(results).isNotNull(); - assertThat(results).hasSize(lookupKeys.size()); - assertThat(results). - containsExactly("value".getBytes(), "12345678".getBytes()); - // test same method with ReadOptions - results = db.multiGetAsList(rOpt, lookupKeys); - assertThat(results).isNotNull(); - assertThat(results). - contains("value".getBytes(), "12345678".getBytes()); - - // remove existing key - lookupKeys.remove(1); - // add non existing key - lookupKeys.add("key3".getBytes()); - results = db.multiGetAsList(lookupKeys); - assertThat(results).isNotNull(); - assertThat(results). - containsExactly("value".getBytes(), null); - // test same call with readOptions - results = db.multiGetAsList(rOpt, lookupKeys); - assertThat(results).isNotNull(); - assertThat(results).contains("value".getBytes()); - } - } - - @Test - public void merge() throws RocksDBException { - try (final StringAppendOperator stringAppendOperator = new StringAppendOperator(); - final Options opt = new Options() - .setCreateIfMissing(true) - .setMergeOperator(stringAppendOperator); - final WriteOptions wOpt = new WriteOptions(); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath()) - ) { - db.put("key1".getBytes(), "value".getBytes()); - assertThat(db.get("key1".getBytes())).isEqualTo( - "value".getBytes()); - // merge key1 with another value portion - db.merge("key1".getBytes(), "value2".getBytes()); - assertThat(db.get("key1".getBytes())).isEqualTo( - "value,value2".getBytes()); - // merge key1 with another value portion - db.merge(wOpt, "key1".getBytes(), "value3".getBytes()); - assertThat(db.get("key1".getBytes())).isEqualTo( - "value,value2,value3".getBytes()); - // merge on non existent key shall insert the value - db.merge(wOpt, "key2".getBytes(), "xxxx".getBytes()); - assertThat(db.get("key2".getBytes())).isEqualTo( - "xxxx".getBytes()); - - Segment key3 = sliceSegment("key3"); - Segment key4 = sliceSegment("key4"); - Segment value0 = sliceSegment("value 0"); - Segment value1 = sliceSegment("value 1"); - - db.merge(key3.data, key3.offset, key3.len, value0.data, value0.offset, value0.len); - db.merge(wOpt, key4.data, key4.offset, key4.len, value1.data, value1.offset, value1.len); - - // compare - Assert.assertTrue(value0.isSamePayload(db.get(key3.data, key3.offset, key3.len))); - Assert.assertTrue(value1.isSamePayload(db.get(key4.data, key4.offset, key4.len))); - } - } - - @Test - public void delete() throws RocksDBException { - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); - final WriteOptions wOpt = new WriteOptions()) { - db.put("key1".getBytes(), "value".getBytes()); - db.put("key2".getBytes(), "12345678".getBytes()); - db.put("key3".getBytes(), "33".getBytes()); - assertThat(db.get("key1".getBytes())).isEqualTo( - "value".getBytes()); - assertThat(db.get("key2".getBytes())).isEqualTo( - "12345678".getBytes()); - assertThat(db.get("key3".getBytes())).isEqualTo("33".getBytes()); - db.delete("key1".getBytes()); - db.delete(wOpt, "key2".getBytes()); - ByteBuffer key = ByteBuffer.allocateDirect(16); - key.put("key3".getBytes()).flip(); - db.delete(wOpt, key); - assertThat(key.position()).isEqualTo(4); - assertThat(key.limit()).isEqualTo(4); - - assertThat(db.get("key1".getBytes())).isNull(); - assertThat(db.get("key2".getBytes())).isNull(); - - Segment key3 = sliceSegment("key3"); - Segment key4 = sliceSegment("key4"); - db.put("key3".getBytes(), "key3 value".getBytes()); - db.put("key4".getBytes(), "key4 value".getBytes()); - - db.delete(key3.data, key3.offset, key3.len); - db.delete(wOpt, key4.data, key4.offset, key4.len); - - assertThat(db.get("key3".getBytes())).isNull(); - assertThat(db.get("key4".getBytes())).isNull(); - } - } - - @Test - public void singleDelete() throws RocksDBException { - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); - final WriteOptions wOpt = new WriteOptions()) { - db.put("key1".getBytes(), "value".getBytes()); - db.put("key2".getBytes(), "12345678".getBytes()); - assertThat(db.get("key1".getBytes())).isEqualTo( - "value".getBytes()); - assertThat(db.get("key2".getBytes())).isEqualTo( - "12345678".getBytes()); - db.singleDelete("key1".getBytes()); - db.singleDelete(wOpt, "key2".getBytes()); - assertThat(db.get("key1".getBytes())).isNull(); - assertThat(db.get("key2".getBytes())).isNull(); - } - } - - @Test - public void singleDelete_nonExisting() throws RocksDBException { - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); - final WriteOptions wOpt = new WriteOptions()) { - db.singleDelete("key1".getBytes()); - db.singleDelete(wOpt, "key2".getBytes()); - assertThat(db.get("key1".getBytes())).isNull(); - assertThat(db.get("key2".getBytes())).isNull(); - } - } - - @Test - public void deleteRange() throws RocksDBException { - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath())) { - db.put("key1".getBytes(), "value".getBytes()); - db.put("key2".getBytes(), "12345678".getBytes()); - db.put("key3".getBytes(), "abcdefg".getBytes()); - db.put("key4".getBytes(), "xyz".getBytes()); - assertThat(db.get("key1".getBytes())).isEqualTo("value".getBytes()); - assertThat(db.get("key2".getBytes())).isEqualTo("12345678".getBytes()); - assertThat(db.get("key3".getBytes())).isEqualTo("abcdefg".getBytes()); - assertThat(db.get("key4".getBytes())).isEqualTo("xyz".getBytes()); - db.deleteRange("key2".getBytes(), "key4".getBytes()); - assertThat(db.get("key1".getBytes())).isEqualTo("value".getBytes()); - assertThat(db.get("key2".getBytes())).isNull(); - assertThat(db.get("key3".getBytes())).isNull(); - assertThat(db.get("key4".getBytes())).isEqualTo("xyz".getBytes()); - } - } - - @Test - public void getIntProperty() throws RocksDBException { - try ( - final Options options = new Options() - .setCreateIfMissing(true) - .setMaxWriteBufferNumber(10) - .setMinWriteBufferNumberToMerge(10); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath()); - final WriteOptions wOpt = new WriteOptions().setDisableWAL(true) - ) { - db.put(wOpt, "key1".getBytes(), "value1".getBytes()); - db.put(wOpt, "key2".getBytes(), "value2".getBytes()); - db.put(wOpt, "key3".getBytes(), "value3".getBytes()); - db.put(wOpt, "key4".getBytes(), "value4".getBytes()); - assertThat(db.getLongProperty("rocksdb.num-entries-active-mem-table")) - .isGreaterThan(0); - assertThat(db.getLongProperty("rocksdb.cur-size-active-mem-table")) - .isGreaterThan(0); - } - } - - @Test - public void fullCompactRange() throws RocksDBException { - try (final Options opt = new Options(). - setCreateIfMissing(true). - setDisableAutoCompactions(true). - setCompactionStyle(CompactionStyle.LEVEL). - setNumLevels(4). - setWriteBufferSize(100 << 10). - setLevelZeroFileNumCompactionTrigger(3). - setTargetFileSizeBase(200 << 10). - setTargetFileSizeMultiplier(1). - setMaxBytesForLevelBase(500 << 10). - setMaxBytesForLevelMultiplier(1). - setDisableAutoCompactions(false); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - // fill database with key/value pairs - byte[] b = new byte[10000]; - for (int i = 0; i < 200; i++) { - rand.nextBytes(b); - db.put((String.valueOf(i)).getBytes(), b); - } - db.compactRange(); - } - } - - @Test - public void fullCompactRangeColumnFamily() - throws RocksDBException { - try ( - final DBOptions opt = new DBOptions(). - setCreateIfMissing(true). - setCreateMissingColumnFamilies(true); - final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions(). - setDisableAutoCompactions(true). - setCompactionStyle(CompactionStyle.LEVEL). - setNumLevels(4). - setWriteBufferSize(100 << 10). - setLevelZeroFileNumCompactionTrigger(3). - setTargetFileSizeBase(200 << 10). - setTargetFileSizeMultiplier(1). - setMaxBytesForLevelBase(500 << 10). - setMaxBytesForLevelMultiplier(1). - setDisableAutoCompactions(false) - ) { - final List columnFamilyDescriptors = - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts)); - - // open database - final List columnFamilyHandles = new ArrayList<>(); - try (final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, - columnFamilyHandles)) { - try { - // fill database with key/value pairs - byte[] b = new byte[10000]; - for (int i = 0; i < 200; i++) { - rand.nextBytes(b); - db.put(columnFamilyHandles.get(1), - String.valueOf(i).getBytes(), b); - } - db.compactRange(columnFamilyHandles.get(1)); - } finally { - for (final ColumnFamilyHandle handle : columnFamilyHandles) { - handle.close(); - } - } - } - } - } - - @Test - public void compactRangeWithKeys() - throws RocksDBException { - try (final Options opt = new Options(). - setCreateIfMissing(true). - setDisableAutoCompactions(true). - setCompactionStyle(CompactionStyle.LEVEL). - setNumLevels(4). - setWriteBufferSize(100 << 10). - setLevelZeroFileNumCompactionTrigger(3). - setTargetFileSizeBase(200 << 10). - setTargetFileSizeMultiplier(1). - setMaxBytesForLevelBase(500 << 10). - setMaxBytesForLevelMultiplier(1). - setDisableAutoCompactions(false); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - // fill database with key/value pairs - byte[] b = new byte[10000]; - for (int i = 0; i < 200; i++) { - rand.nextBytes(b); - db.put((String.valueOf(i)).getBytes(), b); - } - db.compactRange("0".getBytes(), "201".getBytes()); - } - } - - @Test - public void compactRangeWithKeysReduce() - throws RocksDBException { - try ( - final Options opt = new Options(). - setCreateIfMissing(true). - setDisableAutoCompactions(true). - setCompactionStyle(CompactionStyle.LEVEL). - setNumLevels(4). - setWriteBufferSize(100 << 10). - setLevelZeroFileNumCompactionTrigger(3). - setTargetFileSizeBase(200 << 10). - setTargetFileSizeMultiplier(1). - setMaxBytesForLevelBase(500 << 10). - setMaxBytesForLevelMultiplier(1). - setDisableAutoCompactions(false); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - // fill database with key/value pairs - byte[] b = new byte[10000]; - for (int i = 0; i < 200; i++) { - rand.nextBytes(b); - db.put((String.valueOf(i)).getBytes(), b); - } - db.flush(new FlushOptions().setWaitForFlush(true)); - try (final CompactRangeOptions compactRangeOpts = new CompactRangeOptions() - .setChangeLevel(true) - .setTargetLevel(-1) - .setTargetPathId(0)) { - db.compactRange(null, "0".getBytes(), "201".getBytes(), - compactRangeOpts); - } - } - } - - @Test - public void compactRangeWithKeysColumnFamily() - throws RocksDBException { - try (final DBOptions opt = new DBOptions(). - setCreateIfMissing(true). - setCreateMissingColumnFamilies(true); - final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions(). - setDisableAutoCompactions(true). - setCompactionStyle(CompactionStyle.LEVEL). - setNumLevels(4). - setWriteBufferSize(100 << 10). - setLevelZeroFileNumCompactionTrigger(3). - setTargetFileSizeBase(200 << 10). - setTargetFileSizeMultiplier(1). - setMaxBytesForLevelBase(500 << 10). - setMaxBytesForLevelMultiplier(1). - setDisableAutoCompactions(false) - ) { - final List columnFamilyDescriptors = - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts) - ); - - // open database - final List columnFamilyHandles = - new ArrayList<>(); - try (final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, - columnFamilyHandles)) { - try { - // fill database with key/value pairs - byte[] b = new byte[10000]; - for (int i = 0; i < 200; i++) { - rand.nextBytes(b); - db.put(columnFamilyHandles.get(1), - String.valueOf(i).getBytes(), b); - } - db.compactRange(columnFamilyHandles.get(1), - "0".getBytes(), "201".getBytes()); - } finally { - for (final ColumnFamilyHandle handle : columnFamilyHandles) { - handle.close(); - } - } - } - } - } - - @Test - public void compactRangeWithKeysReduceColumnFamily() - throws RocksDBException { - try (final DBOptions opt = new DBOptions(). - setCreateIfMissing(true). - setCreateMissingColumnFamilies(true); - final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions(). - setDisableAutoCompactions(true). - setCompactionStyle(CompactionStyle.LEVEL). - setNumLevels(4). - setWriteBufferSize(100 << 10). - setLevelZeroFileNumCompactionTrigger(3). - setTargetFileSizeBase(200 << 10). - setTargetFileSizeMultiplier(1). - setMaxBytesForLevelBase(500 << 10). - setMaxBytesForLevelMultiplier(1). - setDisableAutoCompactions(false) - ) { - final List columnFamilyDescriptors = - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts) - ); - - final List columnFamilyHandles = new ArrayList<>(); - // open database - try (final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, - columnFamilyHandles)) { - try (final CompactRangeOptions compactRangeOpts = new CompactRangeOptions() - .setChangeLevel(true) - .setTargetLevel(-1) - .setTargetPathId(0)) { - // fill database with key/value pairs - byte[] b = new byte[10000]; - for (int i = 0; i < 200; i++) { - rand.nextBytes(b); - db.put(columnFamilyHandles.get(1), - String.valueOf(i).getBytes(), b); - } - db.compactRange(columnFamilyHandles.get(1), "0".getBytes(), - "201".getBytes(), compactRangeOpts); - } finally { - for (final ColumnFamilyHandle handle : columnFamilyHandles) { - handle.close(); - } - } - } - } - } - - @Test - public void compactRangeToLevel() - throws RocksDBException, InterruptedException { - final int NUM_KEYS_PER_L0_FILE = 100; - final int KEY_SIZE = 20; - final int VALUE_SIZE = 300; - final int L0_FILE_SIZE = - NUM_KEYS_PER_L0_FILE * (KEY_SIZE + VALUE_SIZE); - final int NUM_L0_FILES = 10; - final int TEST_SCALE = 5; - final int KEY_INTERVAL = 100; - try (final Options opt = new Options(). - setCreateIfMissing(true). - setCompactionStyle(CompactionStyle.LEVEL). - setNumLevels(5). - // a slightly bigger write buffer than L0 file - // so that we can ensure manual flush always - // go before background flush happens. - setWriteBufferSize(L0_FILE_SIZE * 2). - // Disable auto L0 -> L1 compaction - setLevelZeroFileNumCompactionTrigger(20). - setTargetFileSizeBase(L0_FILE_SIZE * 100). - setTargetFileSizeMultiplier(1). - // To disable auto compaction - setMaxBytesForLevelBase(NUM_L0_FILES * L0_FILE_SIZE * 100). - setMaxBytesForLevelMultiplier(2). - setDisableAutoCompactions(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath()) - ) { - // fill database with key/value pairs - byte[] value = new byte[VALUE_SIZE]; - int int_key = 0; - for (int round = 0; round < 5; ++round) { - int initial_key = int_key; - for (int f = 1; f <= NUM_L0_FILES; ++f) { - for (int i = 0; i < NUM_KEYS_PER_L0_FILE; ++i) { - int_key += KEY_INTERVAL; - rand.nextBytes(value); - - db.put(String.format("%020d", int_key).getBytes(), - value); - } - db.flush(new FlushOptions().setWaitForFlush(true)); - // Make sure we do create one more L0 files. - assertThat( - db.getProperty("rocksdb.num-files-at-level0")). - isEqualTo("" + f); - } - - // Compact all L0 files we just created - db.compactRange( - String.format("%020d", initial_key).getBytes(), - String.format("%020d", int_key - 1).getBytes()); - // Making sure there isn't any L0 files. - assertThat( - db.getProperty("rocksdb.num-files-at-level0")). - isEqualTo("0"); - // Making sure there are some L1 files. - // Here we only use != 0 instead of a specific number - // as we don't want the test make any assumption on - // how compaction works. - assertThat( - db.getProperty("rocksdb.num-files-at-level1")). - isNotEqualTo("0"); - // Because we only compacted those keys we issued - // in this round, there shouldn't be any L1 -> L2 - // compaction. So we expect zero L2 files here. - assertThat( - db.getProperty("rocksdb.num-files-at-level2")). - isEqualTo("0"); - } - } - } - - @Test - public void deleteFilesInRange() throws RocksDBException, InterruptedException { - final int KEY_SIZE = 20; - final int VALUE_SIZE = 1000; - final int FILE_SIZE = 64000; - final int NUM_FILES = 10; - - final int KEY_INTERVAL = 10000; - /* - * Intention of these options is to end up reliably with 10 files - * we will be deleting using deleteFilesInRange. - * It is writing roughly number of keys that will fit in 10 files (target size) - * It is writing interleaved so that files from memory on L0 will overlap - * Then compaction cleans everything and we should end up with 10 files - */ - try (final Options opt = new Options() - .setCreateIfMissing(true) - .setCompressionType(CompressionType.NO_COMPRESSION) - .setTargetFileSizeBase(FILE_SIZE) - .setWriteBufferSize(FILE_SIZE / 2) - .setDisableAutoCompactions(true); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - int records = FILE_SIZE / (KEY_SIZE + VALUE_SIZE); - - // fill database with key/value pairs - byte[] value = new byte[VALUE_SIZE]; - int key_init = 0; - for (int o = 0; o < NUM_FILES; ++o) { - int int_key = key_init++; - for (int i = 0; i < records; ++i) { - int_key += KEY_INTERVAL; - rand.nextBytes(value); - - db.put(String.format("%020d", int_key).getBytes(), value); - } - } - db.flush(new FlushOptions().setWaitForFlush(true)); - db.compactRange(); - // Make sure we do create one more L0 files. - assertThat(db.getProperty("rocksdb.num-files-at-level0")).isEqualTo("0"); - - // Should be 10, but we are OK with asserting +- 2 - int files = Integer.parseInt(db.getProperty("rocksdb.num-files-at-level1")); - assertThat(files).isBetween(8, 12); - - // Delete lower 60% (roughly). Result should be 5, but we are OK with asserting +- 2 - // Important is that we know something was deleted (JNI call did something) - // Exact assertions are done in C++ unit tests - db.deleteFilesInRanges(null, - Arrays.asList(null, String.format("%020d", records * KEY_INTERVAL * 6 / 10).getBytes()), - false); - files = Integer.parseInt(db.getProperty("rocksdb.num-files-at-level1")); - assertThat(files).isBetween(3, 7); - } - } - - @Test - public void compactRangeToLevelColumnFamily() - throws RocksDBException { - final int NUM_KEYS_PER_L0_FILE = 100; - final int KEY_SIZE = 20; - final int VALUE_SIZE = 300; - final int L0_FILE_SIZE = - NUM_KEYS_PER_L0_FILE * (KEY_SIZE + VALUE_SIZE); - final int NUM_L0_FILES = 10; - final int TEST_SCALE = 5; - final int KEY_INTERVAL = 100; - - try (final DBOptions opt = new DBOptions(). - setCreateIfMissing(true). - setCreateMissingColumnFamilies(true); - final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions(). - setCompactionStyle(CompactionStyle.LEVEL). - setNumLevels(5). - // a slightly bigger write buffer than L0 file - // so that we can ensure manual flush always - // go before background flush happens. - setWriteBufferSize(L0_FILE_SIZE * 2). - // Disable auto L0 -> L1 compaction - setLevelZeroFileNumCompactionTrigger(20). - setTargetFileSizeBase(L0_FILE_SIZE * 100). - setTargetFileSizeMultiplier(1). - // To disable auto compaction - setMaxBytesForLevelBase(NUM_L0_FILES * L0_FILE_SIZE * 100). - setMaxBytesForLevelMultiplier(2). - setDisableAutoCompactions(true) - ) { - final List columnFamilyDescriptors = - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts) - ); - - final List columnFamilyHandles = new ArrayList<>(); - // open database - try (final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, - columnFamilyHandles)) { - try { - // fill database with key/value pairs - byte[] value = new byte[VALUE_SIZE]; - int int_key = 0; - for (int round = 0; round < 5; ++round) { - int initial_key = int_key; - for (int f = 1; f <= NUM_L0_FILES; ++f) { - for (int i = 0; i < NUM_KEYS_PER_L0_FILE; ++i) { - int_key += KEY_INTERVAL; - rand.nextBytes(value); - - db.put(columnFamilyHandles.get(1), - String.format("%020d", int_key).getBytes(), - value); - } - db.flush(new FlushOptions().setWaitForFlush(true), - columnFamilyHandles.get(1)); - // Make sure we do create one more L0 files. - assertThat( - db.getProperty(columnFamilyHandles.get(1), - "rocksdb.num-files-at-level0")). - isEqualTo("" + f); - } - - // Compact all L0 files we just created - db.compactRange( - columnFamilyHandles.get(1), - String.format("%020d", initial_key).getBytes(), - String.format("%020d", int_key - 1).getBytes()); - // Making sure there isn't any L0 files. - assertThat( - db.getProperty(columnFamilyHandles.get(1), - "rocksdb.num-files-at-level0")). - isEqualTo("0"); - // Making sure there are some L1 files. - // Here we only use != 0 instead of a specific number - // as we don't want the test make any assumption on - // how compaction works. - assertThat( - db.getProperty(columnFamilyHandles.get(1), - "rocksdb.num-files-at-level1")). - isNotEqualTo("0"); - // Because we only compacted those keys we issued - // in this round, there shouldn't be any L1 -> L2 - // compaction. So we expect zero L2 files here. - assertThat( - db.getProperty(columnFamilyHandles.get(1), - "rocksdb.num-files-at-level2")). - isEqualTo("0"); - } - } finally { - for (final ColumnFamilyHandle handle : columnFamilyHandles) { - handle.close(); - } - } - } - } - } - - @Test - public void continueBackgroundWorkAfterCancelAllBackgroundWork() throws RocksDBException { - final int KEY_SIZE = 20; - final int VALUE_SIZE = 300; - try (final DBOptions opt = new DBOptions(). - setCreateIfMissing(true). - setCreateMissingColumnFamilies(true); - final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions() - ) { - final List columnFamilyDescriptors = - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts) - ); - - final List columnFamilyHandles = new ArrayList<>(); - // open the database - try (final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, - columnFamilyHandles)) { - try { - db.cancelAllBackgroundWork(true); - try { - db.put(new byte[KEY_SIZE], new byte[VALUE_SIZE]); - db.flush(new FlushOptions().setWaitForFlush(true)); - fail("Expected RocksDBException to be thrown if we attempt to trigger a flush after" + - " all background work is cancelled."); - } catch (RocksDBException ignored) { } - } finally { - for (final ColumnFamilyHandle handle : columnFamilyHandles) { - handle.close(); - } - } - } - } - } - - @Test - public void cancelAllBackgroundWorkTwice() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath()) - ) { - // Cancel all background work synchronously - db.cancelAllBackgroundWork(true); - // Cancel all background work asynchronously - db.cancelAllBackgroundWork(false); - } - } - - @Test - public void pauseContinueBackgroundWork() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath()) - ) { - db.pauseBackgroundWork(); - db.continueBackgroundWork(); - db.pauseBackgroundWork(); - db.continueBackgroundWork(); - } - } - - @Test - public void enableDisableFileDeletions() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath()) - ) { - db.disableFileDeletions(); - db.enableFileDeletions(false); - db.disableFileDeletions(); - db.enableFileDeletions(true); - } - } - - @Test - public void setOptions() throws RocksDBException { - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions() - .setWriteBufferSize(4096)) { - - final List columnFamilyDescriptors = - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts)); - - // open database - final List columnFamilyHandles = new ArrayList<>(); - try (final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles)) { - try { - final MutableColumnFamilyOptions mutableOptions = - MutableColumnFamilyOptions.builder() - .setWriteBufferSize(2048) - .build(); - - db.setOptions(columnFamilyHandles.get(1), mutableOptions); - - } finally { - for (final ColumnFamilyHandle handle : columnFamilyHandles) { - handle.close(); - } - } - } - } - } - - @Test - public void destroyDB() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - db.put("key1".getBytes(), "value".getBytes()); - } - assertThat(dbFolder.getRoot().exists() && dbFolder.getRoot().listFiles().length != 0) - .isTrue(); - RocksDB.destroyDB(dbPath, options); - assertThat(dbFolder.getRoot().exists() && dbFolder.getRoot().listFiles().length != 0) - .isFalse(); - } - } - - @Test(expected = RocksDBException.class) - public void destroyDBFailIfOpen() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - // Fails as the db is open and locked. - RocksDB.destroyDB(dbPath, options); - } - } - } - - @Test - public void getApproximateSizes() throws RocksDBException { - final byte key1[] = "key1".getBytes(UTF_8); - final byte key2[] = "key2".getBytes(UTF_8); - final byte key3[] = "key3".getBytes(UTF_8); - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - db.put(key1, key1); - db.put(key2, key2); - db.put(key3, key3); - - final long[] sizes = db.getApproximateSizes( - Arrays.asList( - new Range(new Slice(key1), new Slice(key1)), - new Range(new Slice(key2), new Slice(key3)) - ), - SizeApproximationFlag.INCLUDE_FILES, - SizeApproximationFlag.INCLUDE_MEMTABLES); - - assertThat(sizes.length).isEqualTo(2); - assertThat(sizes[0]).isEqualTo(0); - assertThat(sizes[1]).isGreaterThanOrEqualTo(1); - } - } - } - - @Test - public void getApproximateMemTableStats() throws RocksDBException { - final byte key1[] = "key1".getBytes(UTF_8); - final byte key2[] = "key2".getBytes(UTF_8); - final byte key3[] = "key3".getBytes(UTF_8); - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - db.put(key1, key1); - db.put(key2, key2); - db.put(key3, key3); - - final RocksDB.CountAndSize stats = - db.getApproximateMemTableStats( - new Range(new Slice(key1), new Slice(key3))); - - assertThat(stats).isNotNull(); - assertThat(stats.count).isGreaterThan(1); - assertThat(stats.size).isGreaterThan(1); - } - } - } - - @Test - public void getApproximateMemTableStatsSingleKey() throws RocksDBException { - final byte key1[] = "key1".getBytes(UTF_8); - final byte key2[] = "key2".getBytes(UTF_8); - final byte key3[] = "key3".getBytes(UTF_8); - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - db.put(key1, key1); - - final RocksDB.CountAndSize stats = - db.getApproximateMemTableStats(new Range(new Slice(key1), new Slice(key3))); - - assertThat(stats).isNotNull(); - assertThat(stats.count).isEqualTo(1); - assertThat(stats.size).isGreaterThan(1); - } - } - } - - @Ignore("TODO(AR) re-enable when ready!") - @Test - public void compactFiles() throws RocksDBException { - final int kTestKeySize = 16; - final int kTestValueSize = 984; - final int kEntrySize = kTestKeySize + kTestValueSize; - final int kEntriesPerBuffer = 100; - final int writeBufferSize = kEntrySize * kEntriesPerBuffer; - final byte[] cfName = "pikachu".getBytes(UTF_8); - - try (final Options options = new Options() - .setCreateIfMissing(true) - .setWriteBufferSize(writeBufferSize) - .setCompactionStyle(CompactionStyle.LEVEL) - .setTargetFileSizeBase(writeBufferSize) - .setMaxBytesForLevelBase(writeBufferSize * 2) - .setLevel0StopWritesTrigger(2) - .setMaxBytesForLevelMultiplier(2) - .setCompressionType(CompressionType.NO_COMPRESSION) - .setMaxSubcompactions(4)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath); - final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions(options)) { - db.createColumnFamily(new ColumnFamilyDescriptor(cfName, - cfOptions)).close(); - } - - try (final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions(options)) { - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOptions), - new ColumnFamilyDescriptor(cfName, cfOptions) - ); - final List cfHandles = new ArrayList<>(); - try (final DBOptions dbOptions = new DBOptions(options); - final RocksDB db = RocksDB.open(dbOptions, dbPath, cfDescriptors, - cfHandles); - ) { - try (final FlushOptions flushOptions = new FlushOptions() - .setWaitForFlush(true) - .setAllowWriteStall(true); - final CompactionOptions compactionOptions = new CompactionOptions()) { - final Random rnd = new Random(301); - for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) { - final byte[] value = new byte[kTestValueSize]; - rnd.nextBytes(value); - db.put(cfHandles.get(1), Integer.toString(key).getBytes(UTF_8), - value); - } - db.flush(flushOptions, cfHandles); - - final RocksDB.LiveFiles liveFiles = db.getLiveFiles(); - final List compactedFiles = - db.compactFiles(compactionOptions, cfHandles.get(1), - liveFiles.files, 1, -1, null); - assertThat(compactedFiles).isNotEmpty(); - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - } - } - } - } - } - - @Test - public void enableAutoCompaction() throws RocksDBException { - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true)) { - final List cfDescs = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY) - ); - final List cfHandles = new ArrayList<>(); - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath, cfDescs, cfHandles)) { - try { - db.enableAutoCompaction(cfHandles); - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - } - } - } - } - - @Test - public void numberLevels() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - assertThat(db.numberLevels()).isEqualTo(7); - } - } - } - - @Test - public void maxMemCompactionLevel() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - assertThat(db.maxMemCompactionLevel()).isEqualTo(0); - } - } - } - - @Test - public void level0StopWriteTrigger() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - assertThat(db.level0StopWriteTrigger()).isEqualTo(36); - } - } - } - - @Test - public void getName() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - assertThat(db.getName()).isEqualTo(dbPath); - } - } - } - - @Test - public void getEnv() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - assertThat(db.getEnv()).isEqualTo(Env.getDefault()); - } - } - } - - @Test - public void flush() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath); - final FlushOptions flushOptions = new FlushOptions()) { - db.flush(flushOptions); - } - } - } - - @Test - public void flushWal() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - db.flushWal(true); - } - } - } - - @Test - public void syncWal() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - db.syncWal(); - } - } - } - - @Test - public void getLiveFiles() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - final RocksDB.LiveFiles livefiles = db.getLiveFiles(true); - assertThat(livefiles).isNotNull(); - assertThat(livefiles.manifestFileSize).isEqualTo(66); - assertThat(livefiles.files.size()).isEqualTo(3); - assertThat(livefiles.files.get(0)).isEqualTo("/CURRENT"); - assertThat(livefiles.files.get(1)).isEqualTo("/MANIFEST-000005"); - assertThat(livefiles.files.get(2)).isEqualTo("/OPTIONS-000007"); - } - } - } - - @Test - public void getSortedWalFiles() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - db.put("key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); - final List logFiles = db.getSortedWalFiles(); - assertThat(logFiles).isNotNull(); - assertThat(logFiles.size()).isEqualTo(1); - assertThat(logFiles.get(0).type()) - .isEqualTo(WalFileType.kAliveLogFile); - } - } - } - - @Test - public void deleteFile() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - db.deleteFile("unknown"); - } - } - } - - @Test - public void getLiveFilesMetaData() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - db.put("key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); - final List liveFilesMetaData - = db.getLiveFilesMetaData(); - assertThat(liveFilesMetaData).isEmpty(); - } - } - } - - @Test - public void getColumnFamilyMetaData() throws RocksDBException { - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true)) { - final List cfDescs = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY) - ); - final List cfHandles = new ArrayList<>(); - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath, cfDescs, cfHandles)) { - db.put(cfHandles.get(0), "key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); - try { - final ColumnFamilyMetaData cfMetadata = - db.getColumnFamilyMetaData(cfHandles.get(0)); - assertThat(cfMetadata).isNotNull(); - assertThat(cfMetadata.name()).isEqualTo(RocksDB.DEFAULT_COLUMN_FAMILY); - assertThat(cfMetadata.levels().size()).isEqualTo(7); - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - } - } - } - } - - @Test - public void verifyChecksum() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - db.verifyChecksum(); - } - } - } - - @Test - public void getPropertiesOfAllTables() throws RocksDBException { - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true)) { - final List cfDescs = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY) - ); - final List cfHandles = new ArrayList<>(); - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath, cfDescs, cfHandles)) { - db.put(cfHandles.get(0), "key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); - try { - final Map properties = - db.getPropertiesOfAllTables(cfHandles.get(0)); - assertThat(properties).isNotNull(); - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - } - } - } - } - - @Test - public void getPropertiesOfTablesInRange() throws RocksDBException { - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true)) { - final List cfDescs = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY) - ); - final List cfHandles = new ArrayList<>(); - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath, cfDescs, cfHandles)) { - db.put(cfHandles.get(0), "key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); - db.put(cfHandles.get(0), "key2".getBytes(UTF_8), "value2".getBytes(UTF_8)); - db.put(cfHandles.get(0), "key3".getBytes(UTF_8), "value3".getBytes(UTF_8)); - try { - final Range range = new Range( - new Slice("key1".getBytes(UTF_8)), - new Slice("key3".getBytes(UTF_8))); - final Map properties = - db.getPropertiesOfTablesInRange( - cfHandles.get(0), Arrays.asList(range)); - assertThat(properties).isNotNull(); - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - } - } - } - } - - @Test - public void suggestCompactRange() throws RocksDBException { - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true)) { - final List cfDescs = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY) - ); - final List cfHandles = new ArrayList<>(); - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath, cfDescs, cfHandles)) { - db.put(cfHandles.get(0), "key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); - db.put(cfHandles.get(0), "key2".getBytes(UTF_8), "value2".getBytes(UTF_8)); - db.put(cfHandles.get(0), "key3".getBytes(UTF_8), "value3".getBytes(UTF_8)); - try { - final Range range = db.suggestCompactRange(cfHandles.get(0)); - assertThat(range).isNotNull(); - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - } - } - } - } - - @Test - public void promoteL0() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - db.promoteL0(2); - } - } - } - - @Test - public void startTrace() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true)) { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - try (final RocksDB db = RocksDB.open(options, dbPath)) { - final TraceOptions traceOptions = new TraceOptions(); - - try (final InMemoryTraceWriter traceWriter = new InMemoryTraceWriter()) { - db.startTrace(traceOptions, traceWriter); - - db.put("key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); - - db.endTrace(); - - final List writes = traceWriter.getWrites(); - assertThat(writes.size()).isGreaterThan(0); - } - } - } - } - - @Test - public void setDBOptions() throws RocksDBException { - try (final DBOptions options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions() - .setWriteBufferSize(4096)) { - - final List columnFamilyDescriptors = - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts)); - - // open database - final List columnFamilyHandles = new ArrayList<>(); - try (final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles)) { - try { - final MutableDBOptions mutableOptions = - MutableDBOptions.builder() - .setBytesPerSync(1024 * 1027 * 7) - .setAvoidFlushDuringShutdown(false) - .build(); - - db.setDBOptions(mutableOptions); - } finally { - for (final ColumnFamilyHandle handle : columnFamilyHandles) { - handle.close(); - } - } - } - } - } - - @Test - public void rocksdbVersion() { - final RocksDB.Version version = RocksDB.rocksdbVersion(); - assertThat(version).isNotNull(); - assertThat(version.getMajor()).isGreaterThan(1); - } - - private static class InMemoryTraceWriter extends AbstractTraceWriter { - private final List writes = new ArrayList<>(); - private volatile boolean closed = false; - - @Override - public void write(final Slice slice) { - if (closed) { - return; - } - final byte[] data = slice.data(); - final byte[] dataCopy = new byte[data.length]; - System.arraycopy(data, 0, dataCopy, 0, data.length); - writes.add(dataCopy); - } - - @Override - public void closeWriter() { - closed = true; - } - - @Override - public long getFileSize() { - long size = 0; - for (int i = 0; i < writes.size(); i++) { - size += writes.get(i).length; - } - return size; - } - - public List getWrites() { - return writes; - } - } -} diff --git a/java/src/test/java/org/rocksdb/RocksIteratorTest.java b/java/src/test/java/org/rocksdb/RocksIteratorTest.java deleted file mode 100644 index 2a13550b7..000000000 --- a/java/src/test/java/org/rocksdb/RocksIteratorTest.java +++ /dev/null @@ -1,289 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class RocksIteratorTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - private void validateByteBufferResult( - final int fetched, final ByteBuffer byteBuffer, final String expected) { - assertThat(fetched).isEqualTo(expected.length()); - assertThat(byteBuffer.position()).isEqualTo(0); - assertThat(byteBuffer.limit()).isEqualTo(Math.min(byteBuffer.remaining(), expected.length())); - final int bufferSpace = byteBuffer.remaining(); - final byte[] contents = new byte[bufferSpace]; - byteBuffer.get(contents, 0, bufferSpace); - assertThat(contents).isEqualTo( - expected.substring(0, bufferSpace).getBytes(StandardCharsets.UTF_8)); - } - - private void validateKey( - final RocksIterator iterator, final ByteBuffer byteBuffer, final String key) { - validateByteBufferResult(iterator.key(byteBuffer), byteBuffer, key); - } - - private void validateValue( - final RocksIterator iterator, final ByteBuffer byteBuffer, final String value) { - validateByteBufferResult(iterator.value(byteBuffer), byteBuffer, value); - } - - @Test - public void rocksIterator() throws RocksDBException { - try (final Options options = - new Options().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { - db.put("key1".getBytes(), "value1".getBytes()); - db.put("key2".getBytes(), "value2".getBytes()); - - try (final RocksIterator iterator = db.newIterator()) { - iterator.seekToFirst(); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key1".getBytes()); - assertThat(iterator.value()).isEqualTo("value1".getBytes()); - - validateKey(iterator, ByteBuffer.allocateDirect(2), "key1"); - validateKey(iterator, ByteBuffer.allocateDirect(2), "key0"); - validateKey(iterator, ByteBuffer.allocateDirect(4), "key1"); - validateKey(iterator, ByteBuffer.allocateDirect(5), "key1"); - validateValue(iterator, ByteBuffer.allocateDirect(2), "value2"); - validateValue(iterator, ByteBuffer.allocateDirect(2), "vasicu"); - validateValue(iterator, ByteBuffer.allocateDirect(8), "value1"); - - validateKey(iterator, ByteBuffer.allocate(2), "key1"); - validateKey(iterator, ByteBuffer.allocate(2), "key0"); - validateKey(iterator, ByteBuffer.allocate(4), "key1"); - validateKey(iterator, ByteBuffer.allocate(5), "key1"); - validateValue(iterator, ByteBuffer.allocate(2), "value1"); - validateValue(iterator, ByteBuffer.allocate(8), "value1"); - - iterator.next(); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key2".getBytes()); - assertThat(iterator.value()).isEqualTo("value2".getBytes()); - iterator.next(); - assertThat(iterator.isValid()).isFalse(); - iterator.seekToLast(); - iterator.prev(); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key1".getBytes()); - assertThat(iterator.value()).isEqualTo("value1".getBytes()); - iterator.seekToFirst(); - iterator.seekToLast(); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key2".getBytes()); - assertThat(iterator.value()).isEqualTo("value2".getBytes()); - iterator.status(); - - { - final ByteBuffer key = ByteBuffer.allocate(12); - key.put("key1".getBytes()).flip(); - iterator.seek(key); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.value()).isEqualTo("value1".getBytes()); - assertThat(key.position()).isEqualTo(4); - assertThat(key.limit()).isEqualTo(4); - - validateValue(iterator, ByteBuffer.allocateDirect(12), "value1"); - validateValue(iterator, ByteBuffer.allocateDirect(4), "valu56"); - } - - { - final ByteBuffer key = ByteBuffer.allocate(12); - key.put("key2".getBytes()).flip(); - iterator.seekForPrev(key); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.value()).isEqualTo("value2".getBytes()); - assertThat(key.position()).isEqualTo(4); - assertThat(key.limit()).isEqualTo(4); - } - - { - final ByteBuffer key = ByteBuffer.allocate(12); - key.put("key1".getBytes()).flip(); - iterator.seek(key); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.value()).isEqualTo("value1".getBytes()); - assertThat(key.position()).isEqualTo(4); - assertThat(key.limit()).isEqualTo(4); - } - - { - // Check offsets of slice byte buffers - final ByteBuffer key0 = ByteBuffer.allocate(24); - key0.put("key2key2".getBytes()); - final ByteBuffer key = key0.slice(); - key.put("key1".getBytes()).flip(); - iterator.seek(key); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.value()).isEqualTo("value1".getBytes()); - assertThat(key.position()).isEqualTo(4); - assertThat(key.limit()).isEqualTo(4); - } - - { - // Check offsets of slice byte buffers - final ByteBuffer key0 = ByteBuffer.allocateDirect(24); - key0.put("key2key2".getBytes()); - final ByteBuffer key = key0.slice(); - key.put("key1".getBytes()).flip(); - iterator.seek(key); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.value()).isEqualTo("value1".getBytes()); - assertThat(key.position()).isEqualTo(4); - assertThat(key.limit()).isEqualTo(4); - } - - { - final ByteBuffer key = ByteBuffer.allocate(12); - key.put("key2".getBytes()).flip(); - iterator.seekForPrev(key); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.value()).isEqualTo("value2".getBytes()); - assertThat(key.position()).isEqualTo(4); - assertThat(key.limit()).isEqualTo(4); - } - } - } - } - - @Test - public void rocksIteratorSeekAndInsert() throws RocksDBException { - try (final Options options = - new Options().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { - db.put("key1".getBytes(), "value1".getBytes()); - db.put("key2".getBytes(), "value2".getBytes()); - - try (final RocksIterator iterator = db.newIterator()) { - iterator.seek("key0".getBytes()); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key1".getBytes()); - - iterator.seek("key1".getBytes()); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key1".getBytes()); - - iterator.seek("key1.5".getBytes()); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key2".getBytes()); - - iterator.seek("key2".getBytes()); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key2".getBytes()); - - iterator.seek("key3".getBytes()); - assertThat(iterator.isValid()).isFalse(); - } - - try (final RocksIterator iterator = db.newIterator()) { - iterator.seekForPrev("key0".getBytes()); - assertThat(iterator.isValid()).isFalse(); - - iterator.seekForPrev("key1".getBytes()); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key1".getBytes()); - - iterator.seekForPrev("key1.5".getBytes()); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key1".getBytes()); - - iterator.seekForPrev("key2".getBytes()); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key2".getBytes()); - - iterator.seekForPrev("key3".getBytes()); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key2".getBytes()); - } - - try (final RocksIterator iterator = db.newIterator()) { - iterator.seekToFirst(); - assertThat(iterator.isValid()).isTrue(); - - byte[] lastKey; - do { - lastKey = iterator.key(); - iterator.next(); - } while (iterator.isValid()); - - db.put("key3".getBytes(), "value3".getBytes()); - assertThat(iterator.isValid()).isFalse(); - iterator.refresh(); - iterator.seek(lastKey); - assertThat(iterator.isValid()).isTrue(); - - iterator.next(); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key3".getBytes()); - } - } - } - - @Test - public void rocksIteratorReleaseAfterCfClose() throws RocksDBException { - try (final Options options = new Options() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(options, - this.dbFolder.getRoot().getAbsolutePath())) { - db.put("key".getBytes(), "value".getBytes()); - - // Test case: release iterator after default CF close - try (final RocksIterator iterator = db.newIterator()) { - // In fact, calling close() on default CF has no effect - db.getDefaultColumnFamily().close(); - - iterator.seekToFirst(); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key".getBytes()); - assertThat(iterator.value()).isEqualTo("value".getBytes()); - } - - // Test case: release iterator after custom CF close - final ColumnFamilyDescriptor cfd1 = new ColumnFamilyDescriptor("cf1".getBytes()); - final ColumnFamilyHandle cfHandle1 = db.createColumnFamily(cfd1); - db.put(cfHandle1, "key1".getBytes(), "value1".getBytes()); - - try (final RocksIterator iterator = db.newIterator(cfHandle1)) { - cfHandle1.close(); - - iterator.seekToFirst(); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key1".getBytes()); - assertThat(iterator.value()).isEqualTo("value1".getBytes()); - } - - // Test case: release iterator after custom CF drop & close - final ColumnFamilyDescriptor cfd2 = new ColumnFamilyDescriptor("cf2".getBytes()); - final ColumnFamilyHandle cfHandle2 = db.createColumnFamily(cfd2); - db.put(cfHandle2, "key2".getBytes(), "value2".getBytes()); - - try (final RocksIterator iterator = db.newIterator(cfHandle2)) { - db.dropColumnFamily(cfHandle2); - cfHandle2.close(); - - iterator.seekToFirst(); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key2".getBytes()); - assertThat(iterator.value()).isEqualTo("value2".getBytes()); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/RocksMemEnvTest.java b/java/src/test/java/org/rocksdb/RocksMemEnvTest.java deleted file mode 100644 index cce0c61e0..000000000 --- a/java/src/test/java/org/rocksdb/RocksMemEnvTest.java +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class RocksMemEnvTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Test - public void memEnvFillAndReopen() throws RocksDBException { - - final byte[][] keys = { - "aaa".getBytes(), - "bbb".getBytes(), - "ccc".getBytes() - }; - - final byte[][] values = { - "foo".getBytes(), - "bar".getBytes(), - "baz".getBytes() - }; - - try (final Env env = new RocksMemEnv(Env.getDefault()); - final Options options = new Options() - .setCreateIfMissing(true) - .setEnv(env); - final FlushOptions flushOptions = new FlushOptions() - .setWaitForFlush(true); - ) { - try (final RocksDB db = RocksDB.open(options, "/dir/db")) { - // write key/value pairs using MemEnv - for (int i = 0; i < keys.length; i++) { - db.put(keys[i], values[i]); - } - - // read key/value pairs using MemEnv - for (int i = 0; i < keys.length; i++) { - assertThat(db.get(keys[i])).isEqualTo(values[i]); - } - - // Check iterator access - try (final RocksIterator iterator = db.newIterator()) { - iterator.seekToFirst(); - for (int i = 0; i < keys.length; i++) { - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo(keys[i]); - assertThat(iterator.value()).isEqualTo(values[i]); - iterator.next(); - } - // reached end of database - assertThat(iterator.isValid()).isFalse(); - } - - // flush - db.flush(flushOptions); - - // read key/value pairs after flush using MemEnv - for (int i = 0; i < keys.length; i++) { - assertThat(db.get(keys[i])).isEqualTo(values[i]); - } - } - - options.setCreateIfMissing(false); - - // After reopen the values shall still be in the mem env. - // as long as the env is not freed. - try (final RocksDB db = RocksDB.open(options, "/dir/db")) { - // read key/value pairs using MemEnv - for (int i = 0; i < keys.length; i++) { - assertThat(db.get(keys[i])).isEqualTo(values[i]); - } - } - } - } - - @Test - public void multipleDatabaseInstances() throws RocksDBException { - // db - keys - final byte[][] keys = { - "aaa".getBytes(), - "bbb".getBytes(), - "ccc".getBytes() - }; - // otherDb - keys - final byte[][] otherKeys = { - "111".getBytes(), - "222".getBytes(), - "333".getBytes() - }; - // values - final byte[][] values = { - "foo".getBytes(), - "bar".getBytes(), - "baz".getBytes() - }; - - try (final Env env = new RocksMemEnv(Env.getDefault()); - final Options options = new Options().setCreateIfMissing(true).setEnv(env); - final RocksDB db = RocksDB.open(options, "/dir/db"); - final RocksDB otherDb = RocksDB.open(options, "/dir/otherDb")) { - // write key/value pairs using MemEnv - // to db and to otherDb. - for (int i = 0; i < keys.length; i++) { - db.put(keys[i], values[i]); - otherDb.put(otherKeys[i], values[i]); - } - - // verify key/value pairs after flush using MemEnv - for (int i = 0; i < keys.length; i++) { - // verify db - assertThat(db.get(otherKeys[i])).isNull(); - assertThat(db.get(keys[i])).isEqualTo(values[i]); - - // verify otherDb - assertThat(otherDb.get(keys[i])).isNull(); - assertThat(otherDb.get(otherKeys[i])).isEqualTo(values[i]); - } - } - } - - @Test(expected = RocksDBException.class) - public void createIfMissingFalse() throws RocksDBException { - try (final Env env = new RocksMemEnv(Env.getDefault()); - final Options options = new Options().setCreateIfMissing(false).setEnv(env); - final RocksDB db = RocksDB.open(options, "/db/dir")) { - // shall throw an exception because db dir does not - // exist. - } - } -} diff --git a/java/src/test/java/org/rocksdb/RocksNativeLibraryResource.java b/java/src/test/java/org/rocksdb/RocksNativeLibraryResource.java deleted file mode 100644 index 6116f2f92..000000000 --- a/java/src/test/java/org/rocksdb/RocksNativeLibraryResource.java +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.rules.ExternalResource; - -/** - * Resource to load the RocksDB JNI library. - */ -public class RocksNativeLibraryResource extends ExternalResource { - @Override - protected void before() { - RocksDB.loadLibrary(); - } -} diff --git a/java/src/test/java/org/rocksdb/SecondaryDBTest.java b/java/src/test/java/org/rocksdb/SecondaryDBTest.java deleted file mode 100644 index 557d4a47d..000000000 --- a/java/src/test/java/org/rocksdb/SecondaryDBTest.java +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.util.ArrayList; -import java.util.List; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class SecondaryDBTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Rule public TemporaryFolder secondaryDbFolder = new TemporaryFolder(); - - @Test - public void openAsSecondary() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { - db.put("key1".getBytes(), "value1".getBytes()); - db.put("key2".getBytes(), "value2".getBytes()); - db.put("key3".getBytes(), "value3".getBytes()); - - // open secondary - try (final Options secondaryOptions = new Options(); - final RocksDB secondaryDb = - RocksDB.openAsSecondary(secondaryOptions, dbFolder.getRoot().getAbsolutePath(), - secondaryDbFolder.getRoot().getAbsolutePath())) { - assertThat(secondaryDb.get("key1".getBytes())).isEqualTo("value1".getBytes()); - assertThat(secondaryDb.get("key2".getBytes())).isEqualTo("value2".getBytes()); - assertThat(secondaryDb.get("key3".getBytes())).isEqualTo("value3".getBytes()); - - // write to primary - db.put("key4".getBytes(), "value4".getBytes()); - db.put("key5".getBytes(), "value5".getBytes()); - db.put("key6".getBytes(), "value6".getBytes()); - - // tell secondary to catch up - secondaryDb.tryCatchUpWithPrimary(); - - db.put("key7".getBytes(), "value7".getBytes()); - - // check secondary - assertThat(secondaryDb.get("key4".getBytes())).isEqualTo("value4".getBytes()); - assertThat(secondaryDb.get("key5".getBytes())).isEqualTo("value5".getBytes()); - assertThat(secondaryDb.get("key6".getBytes())).isEqualTo("value6".getBytes()); - - assertThat(secondaryDb.get("key7".getBytes())).isNull(); - } - } - } - - @Test - public void openAsSecondaryColumnFamilies() throws RocksDBException { - try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { - final List cfDescriptors = new ArrayList<>(); - cfDescriptors.add(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts)); - cfDescriptors.add(new ColumnFamilyDescriptor("cf1".getBytes(), cfOpts)); - - final List cfHandles = new ArrayList<>(); - - try (final DBOptions options = - new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open( - options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, cfHandles)) { - try { - final ColumnFamilyHandle cf1 = cfHandles.get(1); - - db.put(cf1, "key1".getBytes(), "value1".getBytes()); - db.put(cf1, "key2".getBytes(), "value2".getBytes()); - db.put(cf1, "key3".getBytes(), "value3".getBytes()); - - final List secondaryCfHandles = new ArrayList<>(); - - // open secondary - try (final DBOptions secondaryOptions = new DBOptions(); - final RocksDB secondaryDb = - RocksDB.openAsSecondary(secondaryOptions, dbFolder.getRoot().getAbsolutePath(), - secondaryDbFolder.getRoot().getAbsolutePath(), cfDescriptors, - secondaryCfHandles)) { - try { - final ColumnFamilyHandle secondaryCf1 = secondaryCfHandles.get(1); - - assertThat(secondaryDb.get(secondaryCf1, "key1".getBytes())) - .isEqualTo("value1".getBytes()); - assertThat(secondaryDb.get(secondaryCf1, "key2".getBytes())) - .isEqualTo("value2".getBytes()); - assertThat(secondaryDb.get(secondaryCf1, "key3".getBytes())) - .isEqualTo("value3".getBytes()); - - // write to primary - db.put(cf1, "key4".getBytes(), "value4".getBytes()); - db.put(cf1, "key5".getBytes(), "value5".getBytes()); - db.put(cf1, "key6".getBytes(), "value6".getBytes()); - - // tell secondary to catch up - secondaryDb.tryCatchUpWithPrimary(); - - db.put(cf1, "key7".getBytes(), "value7".getBytes()); - - // check secondary - assertThat(secondaryDb.get(secondaryCf1, "key4".getBytes())) - .isEqualTo("value4".getBytes()); - assertThat(secondaryDb.get(secondaryCf1, "key5".getBytes())) - .isEqualTo("value5".getBytes()); - assertThat(secondaryDb.get(secondaryCf1, "key6".getBytes())) - .isEqualTo("value6".getBytes()); - - assertThat(secondaryDb.get(secondaryCf1, "key7".getBytes())).isNull(); - - } finally { - for (final ColumnFamilyHandle secondaryCfHandle : secondaryCfHandles) { - secondaryCfHandle.close(); - } - } - } - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - } - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/SliceTest.java b/java/src/test/java/org/rocksdb/SliceTest.java deleted file mode 100644 index c65b01903..000000000 --- a/java/src/test/java/org/rocksdb/SliceTest.java +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class SliceTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Test - public void slice() { - try (final Slice slice = new Slice("testSlice")) { - assertThat(slice.empty()).isFalse(); - assertThat(slice.size()).isEqualTo(9); - assertThat(slice.data()).isEqualTo("testSlice".getBytes()); - } - - try (final Slice otherSlice = new Slice("otherSlice".getBytes())) { - assertThat(otherSlice.data()).isEqualTo("otherSlice".getBytes()); - } - - try (final Slice thirdSlice = new Slice("otherSlice".getBytes(), 5)) { - assertThat(thirdSlice.data()).isEqualTo("Slice".getBytes()); - } - } - - @Test - public void sliceClear() { - try (final Slice slice = new Slice("abc")) { - assertThat(slice.toString()).isEqualTo("abc"); - slice.clear(); - assertThat(slice.toString()).isEmpty(); - slice.clear(); // make sure we don't double-free - } - } - - @Test - public void sliceRemovePrefix() { - try (final Slice slice = new Slice("abc")) { - assertThat(slice.toString()).isEqualTo("abc"); - slice.removePrefix(1); - assertThat(slice.toString()).isEqualTo("bc"); - } - } - - @Test - public void sliceEquals() { - try (final Slice slice = new Slice("abc"); - final Slice slice2 = new Slice("abc")) { - assertThat(slice.equals(slice2)).isTrue(); - assertThat(slice.hashCode() == slice2.hashCode()).isTrue(); - } - } - - @Test - public void sliceStartWith() { - try (final Slice slice = new Slice("matchpoint"); - final Slice match = new Slice("mat"); - final Slice noMatch = new Slice("nomatch")) { - assertThat(slice.startsWith(match)).isTrue(); - assertThat(slice.startsWith(noMatch)).isFalse(); - } - } - - @Test - public void sliceToString() { - try (final Slice slice = new Slice("stringTest")) { - assertThat(slice.toString()).isEqualTo("stringTest"); - assertThat(slice.toString(true)).isNotEqualTo(""); - } - } -} diff --git a/java/src/test/java/org/rocksdb/SnapshotTest.java b/java/src/test/java/org/rocksdb/SnapshotTest.java deleted file mode 100644 index 11f0d560a..000000000 --- a/java/src/test/java/org/rocksdb/SnapshotTest.java +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import static org.assertj.core.api.Assertions.assertThat; - -public class SnapshotTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void snapshots() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - db.put("key".getBytes(), "value".getBytes()); - // Get new Snapshot of database - try (final Snapshot snapshot = db.getSnapshot()) { - assertThat(snapshot.getSequenceNumber()).isGreaterThan(0); - assertThat(snapshot.getSequenceNumber()).isEqualTo(1); - try (final ReadOptions readOptions = new ReadOptions()) { - // set snapshot in ReadOptions - readOptions.setSnapshot(snapshot); - - // retrieve key value pair - assertThat(new String(db.get("key".getBytes()))). - isEqualTo("value"); - // retrieve key value pair created before - // the snapshot was made - assertThat(new String(db.get(readOptions, - "key".getBytes()))).isEqualTo("value"); - // add new key/value pair - db.put("newkey".getBytes(), "newvalue".getBytes()); - // using no snapshot the latest db entries - // will be taken into account - assertThat(new String(db.get("newkey".getBytes()))). - isEqualTo("newvalue"); - // snapshopot was created before newkey - assertThat(db.get(readOptions, "newkey".getBytes())). - isNull(); - // Retrieve snapshot from read options - try (final Snapshot sameSnapshot = readOptions.snapshot()) { - readOptions.setSnapshot(sameSnapshot); - // results must be the same with new Snapshot - // instance using the same native pointer - assertThat(new String(db.get(readOptions, - "key".getBytes()))).isEqualTo("value"); - // update key value pair to newvalue - db.put("key".getBytes(), "newvalue".getBytes()); - // read with previously created snapshot will - // read previous version of key value pair - assertThat(new String(db.get(readOptions, - "key".getBytes()))).isEqualTo("value"); - // read for newkey using the snapshot must be - // null - assertThat(db.get(readOptions, "newkey".getBytes())). - isNull(); - // setting null to snapshot in ReadOptions leads - // to no Snapshot being used. - readOptions.setSnapshot(null); - assertThat(new String(db.get(readOptions, - "newkey".getBytes()))).isEqualTo("newvalue"); - // release Snapshot - db.releaseSnapshot(snapshot); - } - } - } - } - } - - @Test - public void iteratorWithSnapshot() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - db.put("key".getBytes(), "value".getBytes()); - - // Get new Snapshot of database - // set snapshot in ReadOptions - try (final Snapshot snapshot = db.getSnapshot(); - final ReadOptions readOptions = - new ReadOptions().setSnapshot(snapshot)) { - db.put("key2".getBytes(), "value2".getBytes()); - - // iterate over current state of db - try (final RocksIterator iterator = db.newIterator()) { - iterator.seekToFirst(); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key".getBytes()); - iterator.next(); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key2".getBytes()); - iterator.next(); - assertThat(iterator.isValid()).isFalse(); - } - - // iterate using a snapshot - try (final RocksIterator snapshotIterator = - db.newIterator(readOptions)) { - snapshotIterator.seekToFirst(); - assertThat(snapshotIterator.isValid()).isTrue(); - assertThat(snapshotIterator.key()).isEqualTo("key".getBytes()); - snapshotIterator.next(); - assertThat(snapshotIterator.isValid()).isFalse(); - } - - // release Snapshot - db.releaseSnapshot(snapshot); - } - } - } - - @Test - public void iteratorWithSnapshotOnColumnFamily() throws RocksDBException { - try (final Options options = new Options() - .setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - - db.put("key".getBytes(), "value".getBytes()); - - // Get new Snapshot of database - // set snapshot in ReadOptions - try (final Snapshot snapshot = db.getSnapshot(); - final ReadOptions readOptions = new ReadOptions() - .setSnapshot(snapshot)) { - db.put("key2".getBytes(), "value2".getBytes()); - - // iterate over current state of column family - try (final RocksIterator iterator = db.newIterator( - db.getDefaultColumnFamily())) { - iterator.seekToFirst(); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key".getBytes()); - iterator.next(); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key2".getBytes()); - iterator.next(); - assertThat(iterator.isValid()).isFalse(); - } - - // iterate using a snapshot on default column family - try (final RocksIterator snapshotIterator = db.newIterator( - db.getDefaultColumnFamily(), readOptions)) { - snapshotIterator.seekToFirst(); - assertThat(snapshotIterator.isValid()).isTrue(); - assertThat(snapshotIterator.key()).isEqualTo("key".getBytes()); - snapshotIterator.next(); - assertThat(snapshotIterator.isValid()).isFalse(); - - // release Snapshot - db.releaseSnapshot(snapshot); - } - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/SstFileManagerTest.java b/java/src/test/java/org/rocksdb/SstFileManagerTest.java deleted file mode 100644 index 2e136e820..000000000 --- a/java/src/test/java/org/rocksdb/SstFileManagerTest.java +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; - -import java.util.Collections; - -import static org.assertj.core.api.Assertions.*; - -public class SstFileManagerTest { - - @Test - public void maxAllowedSpaceUsage() throws RocksDBException { - try (final SstFileManager sstFileManager = new SstFileManager(Env.getDefault())) { - sstFileManager.setMaxAllowedSpaceUsage(1024 * 1024 * 64); - assertThat(sstFileManager.isMaxAllowedSpaceReached()).isFalse(); - assertThat(sstFileManager.isMaxAllowedSpaceReachedIncludingCompactions()).isFalse(); - } - } - - @Test - public void compactionBufferSize() throws RocksDBException { - try (final SstFileManager sstFileManager = new SstFileManager(Env.getDefault())) { - sstFileManager.setCompactionBufferSize(1024 * 1024 * 10); - assertThat(sstFileManager.isMaxAllowedSpaceReachedIncludingCompactions()).isFalse(); - } - } - - @Test - public void totalSize() throws RocksDBException { - try (final SstFileManager sstFileManager = new SstFileManager(Env.getDefault())) { - assertThat(sstFileManager.getTotalSize()).isEqualTo(0); - } - } - - @Test - public void trackedFiles() throws RocksDBException { - try (final SstFileManager sstFileManager = new SstFileManager(Env.getDefault())) { - assertThat(sstFileManager.getTrackedFiles()).isEqualTo(Collections.emptyMap()); - } - } - - @Test - public void deleteRateBytesPerSecond() throws RocksDBException { - try (final SstFileManager sstFileManager = new SstFileManager(Env.getDefault())) { - assertThat(sstFileManager.getDeleteRateBytesPerSecond()).isEqualTo(SstFileManager.RATE_BYTES_PER_SEC_DEFAULT); - final long ratePerSecond = 1024 * 1024 * 52; - sstFileManager.setDeleteRateBytesPerSecond(ratePerSecond); - assertThat(sstFileManager.getDeleteRateBytesPerSecond()).isEqualTo(ratePerSecond); - } - } - - @Test - public void maxTrashDBRatio() throws RocksDBException { - try (final SstFileManager sstFileManager = new SstFileManager(Env.getDefault())) { - assertThat(sstFileManager.getMaxTrashDBRatio()).isEqualTo(SstFileManager.MAX_TRASH_DB_RATION_DEFAULT); - final double trashRatio = 0.2; - sstFileManager.setMaxTrashDBRatio(trashRatio); - assertThat(sstFileManager.getMaxTrashDBRatio()).isEqualTo(trashRatio); - } - } -} diff --git a/java/src/test/java/org/rocksdb/SstFileReaderTest.java b/java/src/test/java/org/rocksdb/SstFileReaderTest.java deleted file mode 100644 index e29df99f2..000000000 --- a/java/src/test/java/org/rocksdb/SstFileReaderTest.java +++ /dev/null @@ -1,222 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; - -import java.io.File; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.rocksdb.util.ByteBufferAllocator; - -@RunWith(Parameterized.class) -public class SstFileReaderTest { - private static final String SST_FILE_NAME = "test.sst"; - - static class KeyValueWithOp { - KeyValueWithOp(final String key, final String value, final OpType opType) { - this.key = key; - this.value = value; - this.opType = opType; - } - - String getKey() { - return key; - } - - String getValue() { - return value; - } - - OpType getOpType() { - return opType; - } - - private final String key; - private final String value; - private final OpType opType; - } - - @Rule public TemporaryFolder parentFolder = new TemporaryFolder(); - - @Parameterized.Parameters(name = "{0}") - public static Iterable parameters() { - return Arrays.asList(new Object[][] { - {"direct", ByteBufferAllocator.DIRECT}, {"indirect", ByteBufferAllocator.HEAP}}); - } - - @Parameterized.Parameter(0) public String name; - - @Parameterized.Parameter(1) public ByteBufferAllocator byteBufferAllocator; - - enum OpType { PUT, PUT_BYTES, MERGE, MERGE_BYTES, DELETE, DELETE_BYTES } - - private File newSstFile(final List keyValues) - throws IOException, RocksDBException { - final EnvOptions envOptions = new EnvOptions(); - final StringAppendOperator stringAppendOperator = new StringAppendOperator(); - final Options options = new Options().setMergeOperator(stringAppendOperator); - final SstFileWriter sstFileWriter; - sstFileWriter = new SstFileWriter(envOptions, options); - - final File sstFile = parentFolder.newFile(SST_FILE_NAME); - try { - sstFileWriter.open(sstFile.getAbsolutePath()); - for (final KeyValueWithOp keyValue : keyValues) { - final Slice keySlice = new Slice(keyValue.getKey()); - final Slice valueSlice = new Slice(keyValue.getValue()); - final byte[] keyBytes = keyValue.getKey().getBytes(); - final byte[] valueBytes = keyValue.getValue().getBytes(); - switch (keyValue.getOpType()) { - case PUT: - sstFileWriter.put(keySlice, valueSlice); - break; - case PUT_BYTES: - sstFileWriter.put(keyBytes, valueBytes); - break; - case MERGE: - sstFileWriter.merge(keySlice, valueSlice); - break; - case MERGE_BYTES: - sstFileWriter.merge(keyBytes, valueBytes); - break; - case DELETE: - sstFileWriter.delete(keySlice); - break; - case DELETE_BYTES: - sstFileWriter.delete(keyBytes); - break; - default: - fail("Unsupported op type"); - } - keySlice.close(); - valueSlice.close(); - } - sstFileWriter.finish(); - } finally { - assertThat(sstFileWriter).isNotNull(); - sstFileWriter.close(); - options.close(); - envOptions.close(); - } - return sstFile; - } - - @Test - public void readSstFile() throws RocksDBException, IOException { - final List keyValues = new ArrayList<>(); - keyValues.add(new KeyValueWithOp("key1", "value1", OpType.PUT)); - keyValues.add(new KeyValueWithOp("key2", "value2", OpType.PUT)); - keyValues.add(new KeyValueWithOp("key3", "value3", OpType.PUT)); - - final File sstFile = newSstFile(keyValues); - try (final StringAppendOperator stringAppendOperator = new StringAppendOperator(); - final Options options = - new Options().setCreateIfMissing(true).setMergeOperator(stringAppendOperator); - final SstFileReader reader = new SstFileReader(options)) { - // Open the sst file and iterator - reader.open(sstFile.getAbsolutePath()); - final ReadOptions readOptions = new ReadOptions(); - final SstFileReaderIterator iterator = reader.newIterator(readOptions); - - // Use the iterator to read sst file - iterator.seekToFirst(); - - // Verify Checksum - reader.verifyChecksum(); - - // Verify Table Properties - assertEquals(reader.getTableProperties().getNumEntries(), 3); - - // Check key and value - assertThat(iterator.key()).isEqualTo("key1".getBytes()); - assertThat(iterator.value()).isEqualTo("value1".getBytes()); - - final ByteBuffer byteBuffer = byteBufferAllocator.allocate(128); - byteBuffer.put("key1".getBytes()).flip(); - iterator.seek(byteBuffer); - assertThat(byteBuffer.position()).isEqualTo(4); - assertThat(byteBuffer.limit()).isEqualTo(4); - - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key1".getBytes()); - assertThat(iterator.value()).isEqualTo("value1".getBytes()); - - { - byteBuffer.clear(); - assertThat(iterator.key(byteBuffer)).isEqualTo("key1".getBytes().length); - final byte[] dst = new byte["key1".getBytes().length]; - byteBuffer.get(dst); - assertThat(new String(dst)).isEqualTo("key1"); - } - - { - byteBuffer.clear(); - byteBuffer.put("PREFIX".getBytes()); - final ByteBuffer slice = byteBuffer.slice(); - assertThat(iterator.key(byteBuffer)).isEqualTo("key1".getBytes().length); - final byte[] dst = new byte["key1".getBytes().length]; - slice.get(dst); - assertThat(new String(dst)).isEqualTo("key1"); - } - - { - byteBuffer.clear(); - assertThat(iterator.value(byteBuffer)).isEqualTo("value1".getBytes().length); - final byte[] dst = new byte["value1".getBytes().length]; - byteBuffer.get(dst); - assertThat(new String(dst)).isEqualTo("value1"); - } - - byteBuffer.clear(); - byteBuffer.put("key1point5".getBytes()).flip(); - iterator.seek(byteBuffer); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key2".getBytes()); - assertThat(iterator.value()).isEqualTo("value2".getBytes()); - - byteBuffer.clear(); - byteBuffer.put("key1point5".getBytes()).flip(); - iterator.seekForPrev(byteBuffer); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key1".getBytes()); - assertThat(iterator.value()).isEqualTo("value1".getBytes()); - - byteBuffer.clear(); - byteBuffer.put("key2point5".getBytes()).flip(); - iterator.seek(byteBuffer); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key3".getBytes()); - assertThat(iterator.value()).isEqualTo("value3".getBytes()); - - byteBuffer.clear(); - byteBuffer.put("key2point5".getBytes()).flip(); - iterator.seekForPrev(byteBuffer); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key2".getBytes()); - assertThat(iterator.value()).isEqualTo("value2".getBytes()); - - byteBuffer.clear(); - byteBuffer.put("PREFIX".getBytes()); - final ByteBuffer slice = byteBuffer.slice(); - slice.put("key1point5".getBytes()).flip(); - iterator.seekForPrev(slice); - assertThat(iterator.isValid()).isTrue(); - assertThat(iterator.key()).isEqualTo("key1".getBytes()); - assertThat(iterator.value()).isEqualTo("value1".getBytes()); - } - } -} diff --git a/java/src/test/java/org/rocksdb/SstFileWriterTest.java b/java/src/test/java/org/rocksdb/SstFileWriterTest.java deleted file mode 100644 index 87165bfe1..000000000 --- a/java/src/test/java/org/rocksdb/SstFileWriterTest.java +++ /dev/null @@ -1,241 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.Assert.fail; - -import java.io.File; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.BytewiseComparator; - -public class SstFileWriterTest { - private static final String SST_FILE_NAME = "test.sst"; - private static final String DB_DIRECTORY_NAME = "test_db"; - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE - = new RocksNativeLibraryResource(); - - @Rule public TemporaryFolder parentFolder = new TemporaryFolder(); - - enum OpType { PUT, PUT_BYTES, PUT_DIRECT, MERGE, MERGE_BYTES, DELETE, DELETE_BYTES } - - static class KeyValueWithOp { - KeyValueWithOp(String key, String value, OpType opType) { - this.key = key; - this.value = value; - this.opType = opType; - } - - String getKey() { - return key; - } - - String getValue() { - return value; - } - - OpType getOpType() { - return opType; - } - - private final String key; - private final String value; - private final OpType opType; - }; - - private File newSstFile(final List keyValues, - boolean useJavaBytewiseComparator) throws IOException, RocksDBException { - final EnvOptions envOptions = new EnvOptions(); - final StringAppendOperator stringAppendOperator = new StringAppendOperator(); - final Options options = new Options().setMergeOperator(stringAppendOperator); - SstFileWriter sstFileWriter = null; - ComparatorOptions comparatorOptions = null; - BytewiseComparator comparator = null; - if (useJavaBytewiseComparator) { - comparatorOptions = new ComparatorOptions().setUseDirectBuffer(false); - comparator = new BytewiseComparator(comparatorOptions); - options.setComparator(comparator); - sstFileWriter = new SstFileWriter(envOptions, options); - } else { - sstFileWriter = new SstFileWriter(envOptions, options); - } - - final File sstFile = parentFolder.newFile(SST_FILE_NAME); - try { - sstFileWriter.open(sstFile.getAbsolutePath()); - assertThat(sstFileWriter.fileSize()).isEqualTo(0); - for (KeyValueWithOp keyValue : keyValues) { - Slice keySlice = new Slice(keyValue.getKey()); - Slice valueSlice = new Slice(keyValue.getValue()); - byte[] keyBytes = keyValue.getKey().getBytes(); - byte[] valueBytes = keyValue.getValue().getBytes(); - ByteBuffer keyDirect = ByteBuffer.allocateDirect(keyBytes.length); - keyDirect.put(keyBytes); - keyDirect.flip(); - ByteBuffer valueDirect = ByteBuffer.allocateDirect(valueBytes.length); - valueDirect.put(valueBytes); - valueDirect.flip(); - switch (keyValue.getOpType()) { - case PUT: - sstFileWriter.put(keySlice, valueSlice); - break; - case PUT_BYTES: - sstFileWriter.put(keyBytes, valueBytes); - break; - case PUT_DIRECT: - sstFileWriter.put(keyDirect, valueDirect); - assertThat(keyDirect.position()).isEqualTo(keyBytes.length); - assertThat(keyDirect.limit()).isEqualTo(keyBytes.length); - assertThat(valueDirect.position()).isEqualTo(valueBytes.length); - assertThat(valueDirect.limit()).isEqualTo(valueBytes.length); - break; - case MERGE: - sstFileWriter.merge(keySlice, valueSlice); - break; - case MERGE_BYTES: - sstFileWriter.merge(keyBytes, valueBytes); - break; - case DELETE: - sstFileWriter.delete(keySlice); - break; - case DELETE_BYTES: - sstFileWriter.delete(keyBytes); - break; - default: - fail("Unsupported op type"); - } - keySlice.close(); - valueSlice.close(); - } - sstFileWriter.finish(); - assertThat(sstFileWriter.fileSize()).isGreaterThan(100); - } finally { - assertThat(sstFileWriter).isNotNull(); - sstFileWriter.close(); - options.close(); - envOptions.close(); - if (comparatorOptions != null) { - comparatorOptions.close(); - } - if (comparator != null) { - comparator.close(); - } - } - return sstFile; - } - - @Test - public void generateSstFileWithJavaComparator() - throws RocksDBException, IOException { - final List keyValues = new ArrayList<>(); - keyValues.add(new KeyValueWithOp("key1", "value1", OpType.PUT)); - keyValues.add(new KeyValueWithOp("key2", "value2", OpType.PUT)); - keyValues.add(new KeyValueWithOp("key3", "value3", OpType.MERGE)); - keyValues.add(new KeyValueWithOp("key4", "value4", OpType.MERGE)); - keyValues.add(new KeyValueWithOp("key5", "", OpType.DELETE)); - - newSstFile(keyValues, true); - } - - @Test - public void generateSstFileWithNativeComparator() - throws RocksDBException, IOException { - final List keyValues = new ArrayList<>(); - keyValues.add(new KeyValueWithOp("key1", "value1", OpType.PUT)); - keyValues.add(new KeyValueWithOp("key2", "value2", OpType.PUT)); - keyValues.add(new KeyValueWithOp("key3", "value3", OpType.MERGE)); - keyValues.add(new KeyValueWithOp("key4", "value4", OpType.MERGE)); - keyValues.add(new KeyValueWithOp("key5", "", OpType.DELETE)); - - newSstFile(keyValues, false); - } - - @Test - public void ingestSstFile() throws RocksDBException, IOException { - final List keyValues = new ArrayList<>(); - keyValues.add(new KeyValueWithOp("key1", "value1", OpType.PUT)); - keyValues.add(new KeyValueWithOp("key2", "value2", OpType.PUT_DIRECT)); - keyValues.add(new KeyValueWithOp("key3", "value3", OpType.PUT_BYTES)); - keyValues.add(new KeyValueWithOp("key4", "value4", OpType.MERGE)); - keyValues.add(new KeyValueWithOp("key5", "value5", OpType.MERGE_BYTES)); - keyValues.add(new KeyValueWithOp("key6", "", OpType.DELETE)); - keyValues.add(new KeyValueWithOp("key7", "", OpType.DELETE)); - - - final File sstFile = newSstFile(keyValues, false); - final File dbFolder = parentFolder.newFolder(DB_DIRECTORY_NAME); - try(final StringAppendOperator stringAppendOperator = - new StringAppendOperator(); - final Options options = new Options() - .setCreateIfMissing(true) - .setMergeOperator(stringAppendOperator); - final RocksDB db = RocksDB.open(options, dbFolder.getAbsolutePath()); - final IngestExternalFileOptions ingestExternalFileOptions = - new IngestExternalFileOptions()) { - db.ingestExternalFile(Arrays.asList(sstFile.getAbsolutePath()), - ingestExternalFileOptions); - - assertThat(db.get("key1".getBytes())).isEqualTo("value1".getBytes()); - assertThat(db.get("key2".getBytes())).isEqualTo("value2".getBytes()); - assertThat(db.get("key3".getBytes())).isEqualTo("value3".getBytes()); - assertThat(db.get("key4".getBytes())).isEqualTo("value4".getBytes()); - assertThat(db.get("key5".getBytes())).isEqualTo("value5".getBytes()); - assertThat(db.get("key6".getBytes())).isEqualTo(null); - assertThat(db.get("key7".getBytes())).isEqualTo(null); - } - } - - @Test - public void ingestSstFile_cf() throws RocksDBException, IOException { - final List keyValues = new ArrayList<>(); - keyValues.add(new KeyValueWithOp("key1", "value1", OpType.PUT)); - keyValues.add(new KeyValueWithOp("key2", "value2", OpType.PUT)); - keyValues.add(new KeyValueWithOp("key3", "value3", OpType.MERGE)); - keyValues.add(new KeyValueWithOp("key4", "", OpType.DELETE)); - - final File sstFile = newSstFile(keyValues, false); - final File dbFolder = parentFolder.newFolder(DB_DIRECTORY_NAME); - try(final StringAppendOperator stringAppendOperator = - new StringAppendOperator(); - final Options options = new Options() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true) - .setMergeOperator(stringAppendOperator); - final RocksDB db = RocksDB.open(options, dbFolder.getAbsolutePath()); - final IngestExternalFileOptions ingestExternalFileOptions = - new IngestExternalFileOptions()) { - - try(final ColumnFamilyOptions cf_opts = new ColumnFamilyOptions() - .setMergeOperator(stringAppendOperator); - final ColumnFamilyHandle cf_handle = db.createColumnFamily( - new ColumnFamilyDescriptor("new_cf".getBytes(), cf_opts))) { - - db.ingestExternalFile(cf_handle, - Arrays.asList(sstFile.getAbsolutePath()), - ingestExternalFileOptions); - - assertThat(db.get(cf_handle, - "key1".getBytes())).isEqualTo("value1".getBytes()); - assertThat(db.get(cf_handle, - "key2".getBytes())).isEqualTo("value2".getBytes()); - assertThat(db.get(cf_handle, - "key3".getBytes())).isEqualTo("value3".getBytes()); - assertThat(db.get(cf_handle, - "key4".getBytes())).isEqualTo(null); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/SstPartitionerTest.java b/java/src/test/java/org/rocksdb/SstPartitionerTest.java deleted file mode 100644 index 74816db93..000000000 --- a/java/src/test/java/org/rocksdb/SstPartitionerTest.java +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; - -import java.util.List; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class SstPartitionerTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void sstFixedPrefix() throws RocksDBException { - try (SstPartitionerFixedPrefixFactory factory = new SstPartitionerFixedPrefixFactory(4); - final Options opt = - new Options().setCreateIfMissing(true).setSstPartitionerFactory(factory); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - // writing (long)100 under key - db.put("aaaa1".getBytes(), "A".getBytes()); - db.put("bbbb1".getBytes(), "B".getBytes()); - db.flush(new FlushOptions()); - - db.put("aaaa0".getBytes(), "A2".getBytes()); - db.put("aaaa2".getBytes(), "A2".getBytes()); - db.flush(new FlushOptions()); - - db.compactRange(); - - List metadata = db.getLiveFilesMetaData(); - assertThat(metadata.size()).isEqualTo(2); - } - } - - @Test - public void sstFixedPrefixFamily() throws RocksDBException { - final byte[] cfName = "new_cf".getBytes(UTF_8); - final ColumnFamilyDescriptor cfDescriptor = new ColumnFamilyDescriptor(cfName, - new ColumnFamilyOptions().setSstPartitionerFactory( - new SstPartitionerFixedPrefixFactory(4))); - - try (final Options opt = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { - final ColumnFamilyHandle columnFamilyHandle = db.createColumnFamily(cfDescriptor); - - // writing (long)100 under key - db.put(columnFamilyHandle, "aaaa1".getBytes(), "A".getBytes()); - db.put(columnFamilyHandle, "bbbb1".getBytes(), "B".getBytes()); - db.flush(new FlushOptions(), columnFamilyHandle); - - db.put(columnFamilyHandle, "aaaa0".getBytes(), "A2".getBytes()); - db.put(columnFamilyHandle, "aaaa2".getBytes(), "A2".getBytes()); - db.flush(new FlushOptions(), columnFamilyHandle); - - db.compactRange(columnFamilyHandle); - - List metadata = db.getLiveFilesMetaData(); - assertThat(metadata.size()).isEqualTo(2); - } - } -} diff --git a/java/src/test/java/org/rocksdb/StatisticsCollectorTest.java b/java/src/test/java/org/rocksdb/StatisticsCollectorTest.java deleted file mode 100644 index 36721c80d..000000000 --- a/java/src/test/java/org/rocksdb/StatisticsCollectorTest.java +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Collections; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import static org.assertj.core.api.Assertions.assertThat; - -public class StatisticsCollectorTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void statisticsCollector() - throws InterruptedException, RocksDBException { - try (final Statistics statistics = new Statistics(); - final Options opt = new Options() - .setStatistics(statistics) - .setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - - try(final Statistics stats = opt.statistics()) { - - final StatsCallbackMock callback = new StatsCallbackMock(); - final StatsCollectorInput statsInput = - new StatsCollectorInput(stats, callback); - - final StatisticsCollector statsCollector = new StatisticsCollector( - Collections.singletonList(statsInput), 100); - statsCollector.start(); - - Thread.sleep(1000); - - assertThat(callback.tickerCallbackCount).isGreaterThan(0); - assertThat(callback.histCallbackCount).isGreaterThan(0); - - statsCollector.shutDown(1000); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/StatisticsTest.java b/java/src/test/java/org/rocksdb/StatisticsTest.java deleted file mode 100644 index de92102ec..000000000 --- a/java/src/test/java/org/rocksdb/StatisticsTest.java +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.nio.charset.StandardCharsets; - -import static org.assertj.core.api.Assertions.assertThat; - -public class StatisticsTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void statsLevel() throws RocksDBException { - final Statistics statistics = new Statistics(); - statistics.setStatsLevel(StatsLevel.ALL); - assertThat(statistics.statsLevel()).isEqualTo(StatsLevel.ALL); - } - - @Test - public void getTickerCount() throws RocksDBException { - try (final Statistics statistics = new Statistics(); - final Options opt = new Options() - .setStatistics(statistics) - .setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - - final byte[] key = "some-key".getBytes(StandardCharsets.UTF_8); - final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); - - db.put(key, value); - for(int i = 0; i < 10; i++) { - db.get(key); - } - - assertThat(statistics.getTickerCount(TickerType.BYTES_READ)).isGreaterThan(0); - } - } - - @Test - public void getAndResetTickerCount() throws RocksDBException { - try (final Statistics statistics = new Statistics(); - final Options opt = new Options() - .setStatistics(statistics) - .setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - - final byte[] key = "some-key".getBytes(StandardCharsets.UTF_8); - final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); - - db.put(key, value); - for(int i = 0; i < 10; i++) { - db.get(key); - } - - final long read = statistics.getAndResetTickerCount(TickerType.BYTES_READ); - assertThat(read).isGreaterThan(0); - - final long readAfterReset = statistics.getTickerCount(TickerType.BYTES_READ); - assertThat(readAfterReset).isLessThan(read); - } - } - - @Test - public void getHistogramData() throws RocksDBException { - try (final Statistics statistics = new Statistics(); - final Options opt = new Options() - .setStatistics(statistics) - .setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - - final byte[] key = "some-key".getBytes(StandardCharsets.UTF_8); - final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); - - db.put(key, value); - for(int i = 0; i < 10; i++) { - db.get(key); - } - - final HistogramData histogramData = statistics.getHistogramData(HistogramType.BYTES_PER_READ); - assertThat(histogramData).isNotNull(); - assertThat(histogramData.getAverage()).isGreaterThan(0); - assertThat(histogramData.getMedian()).isGreaterThan(0); - assertThat(histogramData.getPercentile95()).isGreaterThan(0); - assertThat(histogramData.getPercentile99()).isGreaterThan(0); - assertThat(histogramData.getStandardDeviation()).isEqualTo(0.00); - assertThat(histogramData.getMax()).isGreaterThan(0); - assertThat(histogramData.getCount()).isGreaterThan(0); - assertThat(histogramData.getSum()).isGreaterThan(0); - assertThat(histogramData.getMin()).isGreaterThan(0); - } - } - - @Test - public void getHistogramString() throws RocksDBException { - try (final Statistics statistics = new Statistics(); - final Options opt = new Options() - .setStatistics(statistics) - .setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - - final byte[] key = "some-key".getBytes(StandardCharsets.UTF_8); - final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); - - for(int i = 0; i < 10; i++) { - db.put(key, value); - } - - assertThat(statistics.getHistogramString(HistogramType.BYTES_PER_WRITE)).isNotNull(); - } - } - - @Test - public void reset() throws RocksDBException { - try (final Statistics statistics = new Statistics(); - final Options opt = new Options() - .setStatistics(statistics) - .setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - - final byte[] key = "some-key".getBytes(StandardCharsets.UTF_8); - final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); - - db.put(key, value); - for(int i = 0; i < 10; i++) { - db.get(key); - } - - final long read = statistics.getTickerCount(TickerType.BYTES_READ); - assertThat(read).isGreaterThan(0); - - statistics.reset(); - - final long readAfterReset = statistics.getTickerCount(TickerType.BYTES_READ); - assertThat(readAfterReset).isLessThan(read); - } - } - - @Test - public void ToString() throws RocksDBException { - try (final Statistics statistics = new Statistics(); - final Options opt = new Options() - .setStatistics(statistics) - .setCreateIfMissing(true); - final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath())) { - assertThat(statistics.toString()).isNotNull(); - } - } -} diff --git a/java/src/test/java/org/rocksdb/StatsCallbackMock.java b/java/src/test/java/org/rocksdb/StatsCallbackMock.java deleted file mode 100644 index af8db0caa..000000000 --- a/java/src/test/java/org/rocksdb/StatsCallbackMock.java +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -public class StatsCallbackMock implements StatisticsCollectorCallback { - public int tickerCallbackCount = 0; - public int histCallbackCount = 0; - - public void tickerCallback(TickerType tickerType, long tickerCount) { - tickerCallbackCount++; - } - - public void histogramCallback(HistogramType histType, - HistogramData histData) { - histCallbackCount++; - } -} diff --git a/java/src/test/java/org/rocksdb/TableFilterTest.java b/java/src/test/java/org/rocksdb/TableFilterTest.java deleted file mode 100644 index 2bd3b1798..000000000 --- a/java/src/test/java/org/rocksdb/TableFilterTest.java +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; - -public class TableFilterTest { - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void readOptions() throws RocksDBException { - try (final DBOptions opt = new DBOptions(). - setCreateIfMissing(true). - setCreateMissingColumnFamilies(true); - final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions() - ) { - final List columnFamilyDescriptors = - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts) - ); - - final List columnFamilyHandles = new ArrayList<>(); - - // open database - try (final RocksDB db = RocksDB.open(opt, - dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, - columnFamilyHandles)) { - - try (final CfNameCollectionTableFilter cfNameCollectingTableFilter = - new CfNameCollectionTableFilter(); - final FlushOptions flushOptions = - new FlushOptions().setWaitForFlush(true); - final ReadOptions readOptions = - new ReadOptions().setTableFilter(cfNameCollectingTableFilter)) { - - db.put(columnFamilyHandles.get(0), - "key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); - db.put(columnFamilyHandles.get(0), - "key2".getBytes(UTF_8), "value2".getBytes(UTF_8)); - db.put(columnFamilyHandles.get(0), - "key3".getBytes(UTF_8), "value3".getBytes(UTF_8)); - db.put(columnFamilyHandles.get(1), - "key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); - db.put(columnFamilyHandles.get(1), - "key2".getBytes(UTF_8), "value2".getBytes(UTF_8)); - db.put(columnFamilyHandles.get(1), - "key3".getBytes(UTF_8), "value3".getBytes(UTF_8)); - - db.flush(flushOptions, columnFamilyHandles); - - try (final RocksIterator iterator = - db.newIterator(columnFamilyHandles.get(0), readOptions)) { - iterator.seekToFirst(); - while (iterator.isValid()) { - iterator.key(); - iterator.value(); - iterator.next(); - } - } - - try (final RocksIterator iterator = - db.newIterator(columnFamilyHandles.get(1), readOptions)) { - iterator.seekToFirst(); - while (iterator.isValid()) { - iterator.key(); - iterator.value(); - iterator.next(); - } - } - - assertThat(cfNameCollectingTableFilter.cfNames.size()).isEqualTo(2); - assertThat(cfNameCollectingTableFilter.cfNames.get(0)) - .isEqualTo(RocksDB.DEFAULT_COLUMN_FAMILY); - assertThat(cfNameCollectingTableFilter.cfNames.get(1)) - .isEqualTo("new_cf".getBytes(UTF_8)); - } finally { - for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandles) { - columnFamilyHandle.close(); - } - } - } - } - } - - private static class CfNameCollectionTableFilter extends AbstractTableFilter { - private final List cfNames = new ArrayList<>(); - - @Override - public boolean filter(final TableProperties tableProperties) { - cfNames.add(tableProperties.getColumnFamilyName()); - return true; - } - } -} diff --git a/java/src/test/java/org/rocksdb/TimedEnvTest.java b/java/src/test/java/org/rocksdb/TimedEnvTest.java deleted file mode 100644 index c958f96b2..000000000 --- a/java/src/test/java/org/rocksdb/TimedEnvTest.java +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import static java.nio.charset.StandardCharsets.UTF_8; - -public class TimedEnvTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void construct() throws RocksDBException { - try (final Env env = new TimedEnv(Env.getDefault())) { - // no-op - } - } - - @Test - public void construct_integration() throws RocksDBException { - try (final Env env = new TimedEnv(Env.getDefault()); - final Options options = new Options() - .setCreateIfMissing(true) - .setEnv(env); - ) { - try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getPath())) { - db.put("key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/TransactionDBOptionsTest.java b/java/src/test/java/org/rocksdb/TransactionDBOptionsTest.java deleted file mode 100644 index 7eaa6b16c..000000000 --- a/java/src/test/java/org/rocksdb/TransactionDBOptionsTest.java +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; - -import java.util.Random; - -import static org.assertj.core.api.Assertions.assertThat; - -public class TransactionDBOptionsTest { - - private static final Random rand = PlatformRandomHelper. - getPlatformSpecificRandomFactory(); - - @Test - public void maxNumLocks() { - try (final TransactionDBOptions opt = new TransactionDBOptions()) { - final long longValue = rand.nextLong(); - opt.setMaxNumLocks(longValue); - assertThat(opt.getMaxNumLocks()).isEqualTo(longValue); - } - } - - @Test - public void maxNumStripes() { - try (final TransactionDBOptions opt = new TransactionDBOptions()) { - final long longValue = rand.nextLong(); - opt.setNumStripes(longValue); - assertThat(opt.getNumStripes()).isEqualTo(longValue); - } - } - - @Test - public void transactionLockTimeout() { - try (final TransactionDBOptions opt = new TransactionDBOptions()) { - final long longValue = rand.nextLong(); - opt.setTransactionLockTimeout(longValue); - assertThat(opt.getTransactionLockTimeout()).isEqualTo(longValue); - } - } - - @Test - public void defaultLockTimeout() { - try (final TransactionDBOptions opt = new TransactionDBOptions()) { - final long longValue = rand.nextLong(); - opt.setDefaultLockTimeout(longValue); - assertThat(opt.getDefaultLockTimeout()).isEqualTo(longValue); - } - } - - @Test - public void writePolicy() { - try (final TransactionDBOptions opt = new TransactionDBOptions()) { - final TxnDBWritePolicy writePolicy = TxnDBWritePolicy.WRITE_UNPREPARED; // non-default - opt.setWritePolicy(writePolicy); - assertThat(opt.getWritePolicy()).isEqualTo(writePolicy); - } - } - -} diff --git a/java/src/test/java/org/rocksdb/TransactionDBTest.java b/java/src/test/java/org/rocksdb/TransactionDBTest.java deleted file mode 100644 index b0ea813ff..000000000 --- a/java/src/test/java/org/rocksdb/TransactionDBTest.java +++ /dev/null @@ -1,178 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.util.*; - -import static org.assertj.core.api.Assertions.assertThat; -import static java.nio.charset.StandardCharsets.UTF_8; - -public class TransactionDBTest { - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void open() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, - dbFolder.getRoot().getAbsolutePath())) { - assertThat(tdb).isNotNull(); - } - } - - @Test - public void open_columnFamilies() throws RocksDBException { - try(final DBOptions dbOptions = new DBOptions().setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final ColumnFamilyOptions myCfOpts = new ColumnFamilyOptions()) { - - final List columnFamilyDescriptors = - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("myCf".getBytes(), myCfOpts)); - - final List columnFamilyHandles = new ArrayList<>(); - - try (final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB tdb = TransactionDB.open(dbOptions, txnDbOptions, - dbFolder.getRoot().getAbsolutePath(), - columnFamilyDescriptors, columnFamilyHandles)) { - try { - assertThat(tdb).isNotNull(); - } finally { - for (final ColumnFamilyHandle handle : columnFamilyHandles) { - handle.close(); - } - } - } - } - } - - @Test - public void beginTransaction() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, - dbFolder.getRoot().getAbsolutePath()); - final WriteOptions writeOptions = new WriteOptions()) { - - try(final Transaction txn = tdb.beginTransaction(writeOptions)) { - assertThat(txn).isNotNull(); - } - } - } - - @Test - public void beginTransaction_transactionOptions() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, - dbFolder.getRoot().getAbsolutePath()); - final WriteOptions writeOptions = new WriteOptions(); - final TransactionOptions txnOptions = new TransactionOptions()) { - - try(final Transaction txn = tdb.beginTransaction(writeOptions, - txnOptions)) { - assertThat(txn).isNotNull(); - } - } - } - - @Test - public void beginTransaction_withOld() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, - dbFolder.getRoot().getAbsolutePath()); - final WriteOptions writeOptions = new WriteOptions()) { - - try(final Transaction txn = tdb.beginTransaction(writeOptions)) { - final Transaction txnReused = tdb.beginTransaction(writeOptions, txn); - assertThat(txnReused).isSameAs(txn); - } - } - } - - @Test - public void beginTransaction_withOld_transactionOptions() - throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, - dbFolder.getRoot().getAbsolutePath()); - final WriteOptions writeOptions = new WriteOptions(); - final TransactionOptions txnOptions = new TransactionOptions()) { - - try(final Transaction txn = tdb.beginTransaction(writeOptions)) { - final Transaction txnReused = tdb.beginTransaction(writeOptions, - txnOptions, txn); - assertThat(txnReused).isSameAs(txn); - } - } - } - - @Test - public void lockStatusData() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, - dbFolder.getRoot().getAbsolutePath()); - final WriteOptions writeOptions = new WriteOptions(); - final ReadOptions readOptions = new ReadOptions()) { - - try (final Transaction txn = tdb.beginTransaction(writeOptions)) { - - final byte key[] = "key".getBytes(UTF_8); - final byte value[] = "value".getBytes(UTF_8); - - txn.put(key, value); - assertThat(txn.getForUpdate(readOptions, key, true)).isEqualTo(value); - - final Map lockStatus = - tdb.getLockStatusData(); - - assertThat(lockStatus.size()).isEqualTo(1); - final Set> entrySet = lockStatus.entrySet(); - final Map.Entry entry = entrySet.iterator().next(); - final long columnFamilyId = entry.getKey(); - assertThat(columnFamilyId).isEqualTo(0); - final TransactionDB.KeyLockInfo keyLockInfo = entry.getValue(); - assertThat(keyLockInfo.getKey()).isEqualTo(new String(key, UTF_8)); - assertThat(keyLockInfo.getTransactionIDs().length).isEqualTo(1); - assertThat(keyLockInfo.getTransactionIDs()[0]).isEqualTo(txn.getId()); - assertThat(keyLockInfo.isExclusive()).isTrue(); - } - } - } - - @Test - public void deadlockInfoBuffer() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, - dbFolder.getRoot().getAbsolutePath())) { - - // TODO(AR) can we cause a deadlock so that we can test the output here? - assertThat(tdb.getDeadlockInfoBuffer()).isEmpty(); - } - } - - @Test - public void setDeadlockInfoBufferSize() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, - dbFolder.getRoot().getAbsolutePath())) { - tdb.setDeadlockInfoBufferSize(123); - } - } -} diff --git a/java/src/test/java/org/rocksdb/TransactionLogIteratorTest.java b/java/src/test/java/org/rocksdb/TransactionLogIteratorTest.java deleted file mode 100644 index 3c4dff7bb..000000000 --- a/java/src/test/java/org/rocksdb/TransactionLogIteratorTest.java +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import static org.assertj.core.api.Assertions.assertThat; - -public class TransactionLogIteratorTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void transactionLogIterator() throws RocksDBException { - try (final Options options = new Options() - .setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath()); - final TransactionLogIterator transactionLogIterator = - db.getUpdatesSince(0)) { - //no-op - } - } - - @Test - public void getBatch() throws RocksDBException { - final int numberOfPuts = 5; - try (final Options options = new Options() - .setCreateIfMissing(true) - .setWalTtlSeconds(1000) - .setWalSizeLimitMB(10); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - - for (int i = 0; i < numberOfPuts; i++) { - db.put(String.valueOf(i).getBytes(), - String.valueOf(i).getBytes()); - } - db.flush(new FlushOptions().setWaitForFlush(true)); - - // the latest sequence number is 5 because 5 puts - // were written beforehand - assertThat(db.getLatestSequenceNumber()). - isEqualTo(numberOfPuts); - - // insert 5 writes into a cf - try (final ColumnFamilyHandle cfHandle = db.createColumnFamily( - new ColumnFamilyDescriptor("new_cf".getBytes()))) { - for (int i = 0; i < numberOfPuts; i++) { - db.put(cfHandle, String.valueOf(i).getBytes(), - String.valueOf(i).getBytes()); - } - // the latest sequence number is 10 because - // (5 + 5) puts were written beforehand - assertThat(db.getLatestSequenceNumber()). - isEqualTo(numberOfPuts + numberOfPuts); - - // Get updates since the beginning - try (final TransactionLogIterator transactionLogIterator = - db.getUpdatesSince(0)) { - assertThat(transactionLogIterator.isValid()).isTrue(); - transactionLogIterator.status(); - - // The first sequence number is 1 - final TransactionLogIterator.BatchResult batchResult = - transactionLogIterator.getBatch(); - assertThat(batchResult.sequenceNumber()).isEqualTo(1); - } - } - } - } - - @Test - public void transactionLogIteratorStallAtLastRecord() - throws RocksDBException { - try (final Options options = new Options() - .setCreateIfMissing(true) - .setWalTtlSeconds(1000) - .setWalSizeLimitMB(10); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - - db.put("key1".getBytes(), "value1".getBytes()); - // Get updates since the beginning - try (final TransactionLogIterator transactionLogIterator = - db.getUpdatesSince(0)) { - transactionLogIterator.status(); - assertThat(transactionLogIterator.isValid()).isTrue(); - transactionLogIterator.next(); - assertThat(transactionLogIterator.isValid()).isFalse(); - transactionLogIterator.status(); - db.put("key2".getBytes(), "value2".getBytes()); - transactionLogIterator.next(); - transactionLogIterator.status(); - assertThat(transactionLogIterator.isValid()).isTrue(); - } - } - } - - @Test - public void transactionLogIteratorCheckAfterRestart() - throws RocksDBException { - final int numberOfKeys = 2; - try (final Options options = new Options() - .setCreateIfMissing(true) - .setWalTtlSeconds(1000) - .setWalSizeLimitMB(10)) { - - try (final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - db.put("key1".getBytes(), "value1".getBytes()); - db.put("key2".getBytes(), "value2".getBytes()); - db.flush(new FlushOptions().setWaitForFlush(true)); - - } - - // reopen - try (final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - assertThat(db.getLatestSequenceNumber()).isEqualTo(numberOfKeys); - - try (final TransactionLogIterator transactionLogIterator = - db.getUpdatesSince(0)) { - for (int i = 0; i < numberOfKeys; i++) { - transactionLogIterator.status(); - assertThat(transactionLogIterator.isValid()).isTrue(); - transactionLogIterator.next(); - } - } - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/TransactionOptionsTest.java b/java/src/test/java/org/rocksdb/TransactionOptionsTest.java deleted file mode 100644 index add0439e0..000000000 --- a/java/src/test/java/org/rocksdb/TransactionOptionsTest.java +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; - -import java.util.Random; - -import static org.assertj.core.api.Assertions.assertThat; - -public class TransactionOptionsTest { - - private static final Random rand = PlatformRandomHelper. - getPlatformSpecificRandomFactory(); - - @Test - public void snapshot() { - try (final TransactionOptions opt = new TransactionOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setSetSnapshot(boolValue); - assertThat(opt.isSetSnapshot()).isEqualTo(boolValue); - } - } - - @Test - public void deadlockDetect() { - try (final TransactionOptions opt = new TransactionOptions()) { - final boolean boolValue = rand.nextBoolean(); - opt.setDeadlockDetect(boolValue); - assertThat(opt.isDeadlockDetect()).isEqualTo(boolValue); - } - } - - @Test - public void lockTimeout() { - try (final TransactionOptions opt = new TransactionOptions()) { - final long longValue = rand.nextLong(); - opt.setLockTimeout(longValue); - assertThat(opt.getLockTimeout()).isEqualTo(longValue); - } - } - - @Test - public void expiration() { - try (final TransactionOptions opt = new TransactionOptions()) { - final long longValue = rand.nextLong(); - opt.setExpiration(longValue); - assertThat(opt.getExpiration()).isEqualTo(longValue); - } - } - - @Test - public void deadlockDetectDepth() { - try (final TransactionOptions opt = new TransactionOptions()) { - final long longValue = rand.nextLong(); - opt.setDeadlockDetectDepth(longValue); - assertThat(opt.getDeadlockDetectDepth()).isEqualTo(longValue); - } - } - - @Test - public void maxWriteBatchSize() { - try (final TransactionOptions opt = new TransactionOptions()) { - final long longValue = rand.nextLong(); - opt.setMaxWriteBatchSize(longValue); - assertThat(opt.getMaxWriteBatchSize()).isEqualTo(longValue); - } - } -} diff --git a/java/src/test/java/org/rocksdb/TransactionTest.java b/java/src/test/java/org/rocksdb/TransactionTest.java deleted file mode 100644 index 8a3067de9..000000000 --- a/java/src/test/java/org/rocksdb/TransactionTest.java +++ /dev/null @@ -1,488 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.fail; - -public class TransactionTest extends AbstractTransactionTest { - - @Test - public void getForUpdate_cf_conflict() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - final byte[] v12 = "value12".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - - try(final Transaction txn = dbContainer.beginTransaction()) { - txn.put(testCf, k1, v1); - assertThat(txn.getForUpdate(readOptions, testCf, k1, true)).isEqualTo(v1); - txn.commit(); - } - - try(final Transaction txn2 = dbContainer.beginTransaction()) { - try(final Transaction txn3 = dbContainer.beginTransaction()) { - assertThat(txn3.getForUpdate(readOptions, testCf, k1, true)).isEqualTo(v1); - - // NOTE: txn2 updates k1, during txn3 - try { - txn2.put(testCf, k1, v12); // should cause an exception! - } catch(final RocksDBException e) { - assertThat(e.getStatus().getCode()).isSameAs(Status.Code.TimedOut); - return; - } - } - } - - fail("Expected an exception for put after getForUpdate from conflicting" + - "transactions"); - } - } - - @Test - public void prepare_commit() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - final byte[] v12 = "value12".getBytes(UTF_8); - - try (final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - try (final Transaction txn = dbContainer.beginTransaction()) { - txn.put(k1, v1); - txn.commit(); - } - - try (final Transaction txn = dbContainer.beginTransaction()) { - txn.setName("txnPrepare1"); - txn.put(k1, v12); - txn.prepare(); - txn.commit(); - } - - try (final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.get(readOptions, k1)).isEqualTo(v12); - } - } - } - - @Test - public void prepare_rollback() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - final byte[] v12 = "value12".getBytes(UTF_8); - - try (final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - try (final Transaction txn = dbContainer.beginTransaction()) { - txn.put(k1, v1); - txn.commit(); - } - - try (final Transaction txn = dbContainer.beginTransaction()) { - txn.setName("txnPrepare1"); - txn.put(k1, v12); - txn.prepare(); - txn.rollback(); - } - - try (final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.get(readOptions, k1)).isEqualTo(v1); - } - } - } - - @Test - public void prepare_read_prepared_commit() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - final byte[] v12 = "value12".getBytes(UTF_8); - - try (final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - try (final Transaction txn = dbContainer.beginTransaction()) { - txn.put(k1, v1); - txn.commit(); - } - - Transaction txnPrepare; - txnPrepare = dbContainer.beginTransaction(); - txnPrepare.setName("txnPrepare1"); - txnPrepare.put(k1, v12); - txnPrepare.prepare(); - - try (final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.get(readOptions, k1)).isEqualTo(v1); - } - - txnPrepare.commit(); - - try (final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.get(readOptions, k1)).isEqualTo(v12); - } - } - } - - @Test - public void prepare_read_prepared_rollback() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - final byte[] v12 = "value12".getBytes(UTF_8); - - try (final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - try (final Transaction txn = dbContainer.beginTransaction()) { - txn.put(k1, v1); - txn.commit(); - } - - Transaction txnPrepare; - txnPrepare = dbContainer.beginTransaction(); - txnPrepare.setName("txnPrepare1"); - txnPrepare.put(k1, v12); - txnPrepare.prepare(); - - try (final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.get(readOptions, k1)).isEqualTo(v1); - } - - txnPrepare.rollback(); - - try (final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.get(readOptions, k1)).isEqualTo(v1); - } - } - } - - @Test - public void getForUpdate_conflict() throws RocksDBException { - final byte[] k1 = "key1".getBytes(UTF_8); - final byte[] v1 = "value1".getBytes(UTF_8); - final byte[] v12 = "value12".getBytes(UTF_8); - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - - try(final Transaction txn = dbContainer.beginTransaction()) { - txn.put(k1, v1); - assertThat(txn.getForUpdate(readOptions, k1, true)).isEqualTo(v1); - txn.commit(); - } - - try(final Transaction txn2 = dbContainer.beginTransaction()) { - try(final Transaction txn3 = dbContainer.beginTransaction()) { - assertThat(txn3.getForUpdate(readOptions, k1, true)).isEqualTo(v1); - - // NOTE: txn2 updates k1, during txn3 - try { - txn2.put(k1, v12); // should cause an exception! - } catch(final RocksDBException e) { - assertThat(e.getStatus().getCode()).isSameAs(Status.Code.TimedOut); - return; - } - } - } - - fail("Expected an exception for put after getForUpdate from conflicting" + - "transactions"); - } - } - - @Test - public void multiGetForUpdate_cf_conflict() throws RocksDBException { - final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; - final byte[][] values = new byte[][] {"value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; - final byte[] otherValue = "otherValue".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - final List cfList = Arrays.asList(testCf, testCf); - - try(final Transaction txn = dbContainer.beginTransaction()) { - txn.put(testCf, keys[0], values[0]); - txn.put(testCf, keys[1], values[1]); - assertThat(txn.multiGet(readOptions, cfList, keys)).isEqualTo(values); - txn.commit(); - } - - try(final Transaction txn2 = dbContainer.beginTransaction()) { - try(final Transaction txn3 = dbContainer.beginTransaction()) { - assertThat(txn3.multiGetForUpdate(readOptions, cfList, keys)) - .isEqualTo(values); - - // NOTE: txn2 updates k1, during txn3 - try { - txn2.put(testCf, keys[0], otherValue); // should cause an exception! - } catch(final RocksDBException e) { - assertThat(e.getStatus().getCode()).isSameAs(Status.Code.TimedOut); - return; - } - } - } - - fail("Expected an exception for put after getForUpdate from conflicting" + - "transactions"); - } - } - - @Test - public void multiGetAsListForUpdate_cf_conflict() throws RocksDBException { - final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; - final byte[][] values = new byte[][] {"value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; - final byte[] otherValue = "otherValue".getBytes(UTF_8); - - try (final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - final List cfList = Arrays.asList(testCf, testCf); - - try (final Transaction txn = dbContainer.beginTransaction()) { - txn.put(testCf, keys[0], values[0]); - txn.put(testCf, keys[1], values[1]); - assertThat(txn.multiGetAsList(readOptions, cfList, Arrays.asList(keys))) - .containsExactly(values); - txn.commit(); - } - - try (final Transaction txn2 = dbContainer.beginTransaction()) { - try (final Transaction txn3 = dbContainer.beginTransaction()) { - assertThat(txn3.multiGetForUpdateAsList(readOptions, cfList, Arrays.asList(keys))) - .containsExactly(values); - - // NOTE: txn2 updates k1, during txn3 - try { - txn2.put(testCf, keys[0], otherValue); // should cause an exception! - } catch (final RocksDBException e) { - assertThat(e.getStatus().getCode()).isSameAs(Status.Code.TimedOut); - return; - } - } - } - - fail("Expected an exception for put after getForUpdate from conflicting" - + "transactions"); - } - } - - @Test - public void multiGetForUpdate_conflict() throws RocksDBException { - final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; - final byte[][] values = new byte[][] {"value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; - final byte[] otherValue = "otherValue".getBytes(UTF_8); - - try(final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - try(final Transaction txn = dbContainer.beginTransaction()) { - txn.put(keys[0], values[0]); - txn.put(keys[1], values[1]); - assertThat(txn.multiGet(readOptions, keys)).isEqualTo(values); - txn.commit(); - } - - try(final Transaction txn2 = dbContainer.beginTransaction()) { - try(final Transaction txn3 = dbContainer.beginTransaction()) { - assertThat(txn3.multiGetForUpdate(readOptions, keys)) - .isEqualTo(values); - - // NOTE: txn2 updates k1, during txn3 - try { - txn2.put(keys[0], otherValue); // should cause an exception! - } catch (final RocksDBException e) { - assertThat(e.getStatus().getCode()).isSameAs(Status.Code.TimedOut); - return; - } - } - } - - fail("Expected an exception for put after getForUpdate from conflicting" - + "transactions"); - } - } - - @Test - public void multiGetAsListForUpdate_conflict() throws RocksDBException { - final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; - final byte[][] values = new byte[][] {"value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; - final byte[] otherValue = "otherValue".getBytes(UTF_8); - - try (final DBContainer dbContainer = startDb(); - final ReadOptions readOptions = new ReadOptions()) { - try (final Transaction txn = dbContainer.beginTransaction()) { - txn.put(keys[0], values[0]); - txn.put(keys[1], values[1]); - assertThat(txn.multiGetAsList(readOptions, Arrays.asList(keys))).containsExactly(values); - txn.commit(); - } - - try (final Transaction txn2 = dbContainer.beginTransaction()) { - try (final Transaction txn3 = dbContainer.beginTransaction()) { - assertThat(txn3.multiGetForUpdateAsList(readOptions, Arrays.asList(keys))) - .containsExactly(values); - - // NOTE: txn2 updates k1, during txn3 - try { - txn2.put(keys[0], otherValue); // should cause an exception! - } catch(final RocksDBException e) { - assertThat(e.getStatus().getCode()).isSameAs(Status.Code.TimedOut); - return; - } - } - } - - fail("Expected an exception for put after getForUpdate from conflicting" + - "transactions"); - } - } - - @Test - public void name() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.getName()).isEmpty(); - final String name = "my-transaction-" + rand.nextLong(); - txn.setName(name); - assertThat(txn.getName()).isEqualTo(name); - } - } - - @Test - public void ID() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.getID()).isGreaterThan(0); - } - } - - @Test - public void deadlockDetect() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.isDeadlockDetect()).isFalse(); - } - } - - @Test - public void waitingTxns() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.getWaitingTxns().getTransactionIds().length).isEqualTo(0); - } - } - - @Test - public void state() throws RocksDBException { - try(final DBContainer dbContainer = startDb()) { - - try(final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.getState()) - .isSameAs(Transaction.TransactionState.STARTED); - txn.commit(); - assertThat(txn.getState()) - .isSameAs(Transaction.TransactionState.COMMITTED); - } - - try(final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.getState()) - .isSameAs(Transaction.TransactionState.STARTED); - txn.rollback(); - assertThat(txn.getState()) - .isSameAs(Transaction.TransactionState.STARTED); - } - } - } - - @Test - public void Id() throws RocksDBException { - try(final DBContainer dbContainer = startDb(); - final Transaction txn = dbContainer.beginTransaction()) { - assertThat(txn.getId()).isNotNull(); - } - } - - @Override - public TransactionDBContainer startDb() throws RocksDBException { - final DBOptions options = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true); - final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); - final ColumnFamilyOptions columnFamilyOptions = new ColumnFamilyOptions(); - final List columnFamilyDescriptors = - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor(TXN_TEST_COLUMN_FAMILY, - columnFamilyOptions)); - final List columnFamilyHandles = new ArrayList<>(); - - final TransactionDB txnDb; - try { - txnDb = TransactionDB.open(options, txnDbOptions, - dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, - columnFamilyHandles); - } catch(final RocksDBException e) { - columnFamilyOptions.close(); - txnDbOptions.close(); - options.close(); - throw e; - } - - final WriteOptions writeOptions = new WriteOptions(); - final TransactionOptions txnOptions = new TransactionOptions(); - - return new TransactionDBContainer(txnOptions, writeOptions, - columnFamilyHandles, txnDb, txnDbOptions, columnFamilyOptions, options); - } - - private static class TransactionDBContainer - extends DBContainer { - private final TransactionOptions txnOptions; - private final TransactionDB txnDb; - private final TransactionDBOptions txnDbOptions; - - public TransactionDBContainer( - final TransactionOptions txnOptions, final WriteOptions writeOptions, - final List columnFamilyHandles, - final TransactionDB txnDb, final TransactionDBOptions txnDbOptions, - final ColumnFamilyOptions columnFamilyOptions, - final DBOptions options) { - super(writeOptions, columnFamilyHandles, columnFamilyOptions, - options); - this.txnOptions = txnOptions; - this.txnDb = txnDb; - this.txnDbOptions = txnDbOptions; - } - - @Override - public Transaction beginTransaction() { - return txnDb.beginTransaction(writeOptions, txnOptions); - } - - @Override - public Transaction beginTransaction(final WriteOptions writeOptions) { - return txnDb.beginTransaction(writeOptions, txnOptions); - } - - @Override - public void close() { - txnOptions.close(); - writeOptions.close(); - for(final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandles) { - columnFamilyHandle.close(); - } - txnDb.close(); - txnDbOptions.close(); - options.close(); - } - } - -} diff --git a/java/src/test/java/org/rocksdb/TtlDBTest.java b/java/src/test/java/org/rocksdb/TtlDBTest.java deleted file mode 100644 index ffa15e768..000000000 --- a/java/src/test/java/org/rocksdb/TtlDBTest.java +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.TimeUnit; - -import static org.assertj.core.api.Assertions.assertThat; - -public class TtlDBTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void ttlDBOpen() throws RocksDBException, InterruptedException { - try (final Options options = new Options().setCreateIfMissing(true).setMaxCompactionBytes(0); - final TtlDB ttlDB = TtlDB.open(options, dbFolder.getRoot().getAbsolutePath())) { - ttlDB.put("key".getBytes(), "value".getBytes()); - assertThat(ttlDB.get("key".getBytes())). - isEqualTo("value".getBytes()); - assertThat(ttlDB.get("key".getBytes())).isNotNull(); - } - } - - @Test - public void ttlDBOpenWithTtl() throws RocksDBException, InterruptedException { - try (final Options options = new Options().setCreateIfMissing(true).setMaxCompactionBytes(0); - final TtlDB ttlDB = TtlDB.open(options, dbFolder.getRoot().getAbsolutePath(), 1, false);) { - ttlDB.put("key".getBytes(), "value".getBytes()); - assertThat(ttlDB.get("key".getBytes())). - isEqualTo("value".getBytes()); - TimeUnit.SECONDS.sleep(2); - ttlDB.compactRange(); - assertThat(ttlDB.get("key".getBytes())).isNull(); - } - } - - @Test - public void ttlDbOpenWithColumnFamilies() throws RocksDBException, - InterruptedException { - final List cfNames = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes()) - ); - final List ttlValues = Arrays.asList(0, 1); - - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions dbOptions = new DBOptions() - .setCreateMissingColumnFamilies(true) - .setCreateIfMissing(true); - final TtlDB ttlDB = TtlDB.open(dbOptions, - dbFolder.getRoot().getAbsolutePath(), cfNames, - columnFamilyHandleList, ttlValues, false)) { - try { - ttlDB.put("key".getBytes(), "value".getBytes()); - assertThat(ttlDB.get("key".getBytes())). - isEqualTo("value".getBytes()); - ttlDB.put(columnFamilyHandleList.get(1), "key".getBytes(), - "value".getBytes()); - assertThat(ttlDB.get(columnFamilyHandleList.get(1), - "key".getBytes())).isEqualTo("value".getBytes()); - TimeUnit.SECONDS.sleep(2); - - ttlDB.compactRange(); - ttlDB.compactRange(columnFamilyHandleList.get(1)); - - assertThat(ttlDB.get("key".getBytes())).isNotNull(); - assertThat(ttlDB.get(columnFamilyHandleList.get(1), - "key".getBytes())).isNull(); - } finally { - for (final ColumnFamilyHandle columnFamilyHandle : - columnFamilyHandleList) { - columnFamilyHandle.close(); - } - } - } - } - - @Test - public void createTtlColumnFamily() throws RocksDBException, - InterruptedException { - try (final Options options = new Options().setCreateIfMissing(true); - final TtlDB ttlDB = TtlDB.open(options, - dbFolder.getRoot().getAbsolutePath()); - final ColumnFamilyHandle columnFamilyHandle = - ttlDB.createColumnFamilyWithTtl( - new ColumnFamilyDescriptor("new_cf".getBytes()), 1)) { - ttlDB.put(columnFamilyHandle, "key".getBytes(), - "value".getBytes()); - assertThat(ttlDB.get(columnFamilyHandle, "key".getBytes())). - isEqualTo("value".getBytes()); - TimeUnit.SECONDS.sleep(2); - ttlDB.compactRange(columnFamilyHandle); - assertThat(ttlDB.get(columnFamilyHandle, "key".getBytes())).isNull(); - } - } -} diff --git a/java/src/test/java/org/rocksdb/Types.java b/java/src/test/java/org/rocksdb/Types.java deleted file mode 100644 index c3c1de833..000000000 --- a/java/src/test/java/org/rocksdb/Types.java +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -/** - * Simple type conversion methods - * for use in tests - */ -public class Types { - - /** - * Convert first 4 bytes of a byte array to an int - * - * @param data The byte array - * - * @return An integer - */ - public static int byteToInt(final byte data[]) { - return (data[0] & 0xff) | - ((data[1] & 0xff) << 8) | - ((data[2] & 0xff) << 16) | - ((data[3] & 0xff) << 24); - } - - /** - * Convert an int to 4 bytes - * - * @param v The int - * - * @return A byte array containing 4 bytes - */ - public static byte[] intToByte(final int v) { - return new byte[] { - (byte)((v >>> 0) & 0xff), - (byte)((v >>> 8) & 0xff), - (byte)((v >>> 16) & 0xff), - (byte)((v >>> 24) & 0xff) - }; - } -} diff --git a/java/src/test/java/org/rocksdb/VerifyChecksumsTest.java b/java/src/test/java/org/rocksdb/VerifyChecksumsTest.java deleted file mode 100644 index ddc2a456f..000000000 --- a/java/src/test/java/org/rocksdb/VerifyChecksumsTest.java +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.text.MessageFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class VerifyChecksumsTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); - - /** - * Class to factor out the specific DB operations within the test - */ - abstract static class Operations { - final int kv_count; - final List elements = new ArrayList<>(); - final List sortedElements = new ArrayList<>(); - - Operations(final int kv_count) { - this.kv_count = kv_count; - for (int i = 0; i < kv_count; i++) elements.add(MessageFormat.format("{0,number,#}", i)); - sortedElements.addAll(elements); - Collections.sort(sortedElements); - } - - void fill(final RocksDB db) throws RocksDBException { - for (int i = 0; i < kv_count; i++) { - final String key = MessageFormat.format("key{0}", elements.get(i)); - final String value = MessageFormat.format("value{0}", elements.get(i)); - // noinspection ObjectAllocationInLoop - db.put(key.getBytes(), value.getBytes()); - } - db.flush(new FlushOptions()); - } - - @SuppressWarnings("ObjectAllocationInLoop") - void get(final RocksDB db, final boolean verifyFlag) throws RocksDBException { - try (final ReadOptions readOptions = new ReadOptions()) { - readOptions.setReadaheadSize(32 * 1024); - readOptions.setFillCache(false); - readOptions.setVerifyChecksums(verifyFlag); - - for (int i = 0; i < kv_count / 10; i++) { - @SuppressWarnings("UnsecureRandomNumberGeneration") - final int index = Double.valueOf(Math.random() * kv_count).intValue(); - final String key = MessageFormat.format("key{0}", sortedElements.get(index)); - final String expectedValue = MessageFormat.format("value{0}", sortedElements.get(index)); - - final byte[] value = db.get(readOptions, key.getBytes()); - assertThat(value).isEqualTo(expectedValue.getBytes()); - } - } - } - - @SuppressWarnings("ObjectAllocationInLoop") - void multiGet(final RocksDB db, final boolean verifyFlag) throws RocksDBException { - try (final ReadOptions readOptions = new ReadOptions()) { - readOptions.setReadaheadSize(32 * 1024); - readOptions.setFillCache(false); - readOptions.setVerifyChecksums(verifyFlag); - - final List keys = new ArrayList<>(); - final List expectedValues = new ArrayList<>(); - - for (int i = 0; i < kv_count / 10; i++) { - @SuppressWarnings("UnsecureRandomNumberGeneration") - final int index = Double.valueOf(Math.random() * kv_count).intValue(); - keys.add(MessageFormat.format("key{0}", sortedElements.get(index)).getBytes()); - - expectedValues.add(MessageFormat.format("value{0}", sortedElements.get(index))); - } - - final List values = db.multiGetAsList(readOptions, keys); - for (int i = 0; i < keys.size(); i++) { - assertThat(values.get(i)).isEqualTo(expectedValues.get(i).getBytes()); - } - } - } - - void iterate(final RocksDB db, final boolean verifyFlag) throws RocksDBException { - final ReadOptions readOptions = new ReadOptions(); - readOptions.setReadaheadSize(32 * 1024); - readOptions.setFillCache(false); - readOptions.setVerifyChecksums(verifyFlag); - int i = 0; - try (final RocksIterator rocksIterator = db.newIterator(readOptions)) { - rocksIterator.seekToFirst(); - rocksIterator.status(); - while (rocksIterator.isValid()) { - final byte[] key = rocksIterator.key(); - final byte[] value = rocksIterator.value(); - // noinspection ObjectAllocationInLoop - assertThat(key).isEqualTo( - (MessageFormat.format("key{0}", sortedElements.get(i))).getBytes()); - // noinspection ObjectAllocationInLoop - assertThat(value).isEqualTo( - (MessageFormat.format("value{0}", sortedElements.get(i))).getBytes()); - rocksIterator.next(); - rocksIterator.status(); - i++; - } - } - assertThat(i).isEqualTo(kv_count); - } - - abstract void performOperations(final RocksDB db, final boolean verifyFlag) - throws RocksDBException; - } - - private static final int KV_COUNT = 10000; - - /** - * Run some operations and count the TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT before and after - * It should GO UP when the read options have checksum verification turned on. - * It shoulld REMAIN UNCHANGED when the read options have checksum verification turned off. - * As the read options refer only to the read operations, there are still a few checksums - * performed outside this (blocks are getting loaded for lots of reasons, not aways directly due - * to reads) but this test provides a good enough proxy for whether the flag is being noticed. - * - * @param operations the DB reading operations to perform which affect the checksum stats - * - * @throws RocksDBException - */ - private void verifyChecksums(final Operations operations) throws RocksDBException { - final String dbPath = dbFolder.getRoot().getAbsolutePath(); - - // noinspection SingleStatementInBlock - try (final Statistics statistics = new Statistics(); - final Options options = new Options().setCreateIfMissing(true).setStatistics(statistics)) { - try (final RocksDB db = RocksDB.open(options, dbPath)) { - // 0 - System.out.println(MessageFormat.format( - "newly open {0}", statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT))); - operations.fill(db); - // - System.out.println(MessageFormat.format( - "flushed {0}", statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT))); - } - - // 2 - System.out.println(MessageFormat.format("closed-after-write {0}", - statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT))); - - for (final boolean verifyFlag : new boolean[] {false, true, false, true}) { - try (final RocksDB db = RocksDB.open(options, dbPath)) { - final long beforeOperationsCount = - statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT); - System.out.println(MessageFormat.format("re-opened {0}", beforeOperationsCount)); - operations.performOperations(db, verifyFlag); - final long afterOperationsCount = - statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT); - if (verifyFlag) { - // We don't need to be exact - we are checking that the checksums happen - // exactly how many depends on block size etc etc, so may not be entirely stable - System.out.println(MessageFormat.format("verify=true {0}", afterOperationsCount)); - assertThat(afterOperationsCount).isGreaterThan(beforeOperationsCount + 20); - } else { - System.out.println(MessageFormat.format("verify=false {0}", afterOperationsCount)); - assertThat(afterOperationsCount).isEqualTo(beforeOperationsCount); - } - } - } - } - } - - @Test - public void verifyChecksumsInIteration() throws RocksDBException { - // noinspection AnonymousInnerClassMayBeStatic - verifyChecksums(new Operations(KV_COUNT) { - @Override - void performOperations(final RocksDB db, final boolean verifyFlag) throws RocksDBException { - iterate(db, verifyFlag); - } - }); - } - - @Test - public void verifyChecksumsGet() throws RocksDBException { - // noinspection AnonymousInnerClassMayBeStatic - verifyChecksums(new Operations(KV_COUNT) { - @Override - void performOperations(final RocksDB db, final boolean verifyFlag) throws RocksDBException { - get(db, verifyFlag); - } - }); - } - - @Test - public void verifyChecksumsMultiGet() throws RocksDBException { - // noinspection AnonymousInnerClassMayBeStatic - verifyChecksums(new Operations(KV_COUNT) { - @Override - void performOperations(final RocksDB db, final boolean verifyFlag) throws RocksDBException { - multiGet(db, verifyFlag); - } - }); - } -} diff --git a/java/src/test/java/org/rocksdb/WALRecoveryModeTest.java b/java/src/test/java/org/rocksdb/WALRecoveryModeTest.java deleted file mode 100644 index 2a0133f6b..000000000 --- a/java/src/test/java/org/rocksdb/WALRecoveryModeTest.java +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - - -public class WALRecoveryModeTest { - - @Test - public void getWALRecoveryMode() { - for (final WALRecoveryMode walRecoveryMode : WALRecoveryMode.values()) { - assertThat(WALRecoveryMode.getWALRecoveryMode(walRecoveryMode.getValue())) - .isEqualTo(walRecoveryMode); - } - } -} diff --git a/java/src/test/java/org/rocksdb/WalFilterTest.java b/java/src/test/java/org/rocksdb/WalFilterTest.java deleted file mode 100644 index adeb959d1..000000000 --- a/java/src/test/java/org/rocksdb/WalFilterTest.java +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.rocksdb.util.ByteUtil.bytes; -import static org.rocksdb.util.TestUtil.*; - -public class WalFilterTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void walFilter() throws RocksDBException { - // Create 3 batches with two keys each - final byte[][][] batchKeys = { - new byte[][] { - bytes("key1"), - bytes("key2") - }, - new byte[][] { - bytes("key3"), - bytes("key4") - }, - new byte[][] { - bytes("key5"), - bytes("key6") - } - - }; - - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor(bytes("pikachu")) - ); - final List cfHandles = new ArrayList<>(); - - // Test with all WAL processing options - for (final WalProcessingOption option : WalProcessingOption.values()) { - try (final Options options = optionsForLogIterTest(); - final DBOptions dbOptions = new DBOptions(options) - .setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open(dbOptions, - dbFolder.getRoot().getAbsolutePath(), - cfDescriptors, cfHandles)) { - try (final WriteOptions writeOptions = new WriteOptions()) { - // Write given keys in given batches - for (int i = 0; i < batchKeys.length; i++) { - final WriteBatch batch = new WriteBatch(); - for (int j = 0; j < batchKeys[i].length; j++) { - batch.put(cfHandles.get(0), batchKeys[i][j], dummyString(1024)); - } - db.write(writeOptions, batch); - } - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - cfHandles.clear(); - } - } - - // Create a test filter that would apply wal_processing_option at the first - // record - final int applyOptionForRecordIndex = 1; - try (final TestableWalFilter walFilter = - new TestableWalFilter(option, applyOptionForRecordIndex)) { - - try (final Options options = optionsForLogIterTest(); - final DBOptions dbOptions = new DBOptions(options) - .setWalFilter(walFilter)) { - - try (final RocksDB db = RocksDB.open(dbOptions, - dbFolder.getRoot().getAbsolutePath(), - cfDescriptors, cfHandles)) { - - try { - assertThat(walFilter.logNumbers).isNotEmpty(); - assertThat(walFilter.logFileNames).isNotEmpty(); - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - cfHandles.clear(); - } - } catch (final RocksDBException e) { - if (option != WalProcessingOption.CORRUPTED_RECORD) { - // exception is expected when CORRUPTED_RECORD! - throw e; - } - } - } - } - } - } - - - private static class TestableWalFilter extends AbstractWalFilter { - private final WalProcessingOption walProcessingOption; - private final int applyOptionForRecordIndex; - Map cfLognumber; - Map cfNameId; - final List logNumbers = new ArrayList<>(); - final List logFileNames = new ArrayList<>(); - private int currentRecordIndex = 0; - - public TestableWalFilter(final WalProcessingOption walProcessingOption, - final int applyOptionForRecordIndex) { - super(); - this.walProcessingOption = walProcessingOption; - this.applyOptionForRecordIndex = applyOptionForRecordIndex; - } - - @Override - public void columnFamilyLogNumberMap(final Map cfLognumber, - final Map cfNameId) { - this.cfLognumber = cfLognumber; - this.cfNameId = cfNameId; - } - - @Override - public LogRecordFoundResult logRecordFound( - final long logNumber, final String logFileName, final WriteBatch batch, - final WriteBatch newBatch) { - - logNumbers.add(logNumber); - logFileNames.add(logFileName); - - final WalProcessingOption optionToReturn; - if (currentRecordIndex == applyOptionForRecordIndex) { - optionToReturn = walProcessingOption; - } - else { - optionToReturn = WalProcessingOption.CONTINUE_PROCESSING; - } - - currentRecordIndex++; - - return new LogRecordFoundResult(optionToReturn, false); - } - - @Override - public String name() { - return "testable-wal-filter"; - } - } -} diff --git a/java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java b/java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java deleted file mode 100644 index 2826b128f..000000000 --- a/java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import java.util.Arrays; -import java.util.List; - -import org.junit.ClassRule; -import org.junit.Test; -import org.rocksdb.util.CapturingWriteBatchHandler; -import org.rocksdb.util.CapturingWriteBatchHandler.Event; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.rocksdb.util.CapturingWriteBatchHandler.Action.*; - - -public class WriteBatchHandlerTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Test - public void writeBatchHandler() throws RocksDBException { - // setup test data - final List testEvents = Arrays.asList( - new Event(DELETE, "k0".getBytes(), null), - new Event(PUT, "k1".getBytes(), "v1".getBytes()), - new Event(PUT, "k2".getBytes(), "v2".getBytes()), - new Event(PUT, "k3".getBytes(), "v3".getBytes()), - new Event(LOG, null, "log1".getBytes()), - new Event(MERGE, "k2".getBytes(), "v22".getBytes()), - new Event(DELETE, "k3".getBytes(), null) - ); - - // load test data to the write batch - try (final WriteBatch batch = new WriteBatch()) { - for (final Event testEvent : testEvents) { - switch (testEvent.action) { - - case PUT: - batch.put(testEvent.key, testEvent.value); - break; - - case MERGE: - batch.merge(testEvent.key, testEvent.value); - break; - - case DELETE: - batch.delete(testEvent.key); - break; - - case LOG: - batch.putLogData(testEvent.value); - break; - } - } - - // attempt to read test data back from the WriteBatch by iterating - // with a handler - try (final CapturingWriteBatchHandler handler = - new CapturingWriteBatchHandler()) { - batch.iterate(handler); - - // compare the results to the test data - final List actualEvents = - handler.getEvents(); - assertThat(testEvents.size()).isSameAs(actualEvents.size()); - - assertThat(testEvents).isEqualTo(actualEvents); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/WriteBatchTest.java b/java/src/test/java/org/rocksdb/WriteBatchTest.java deleted file mode 100644 index cc3ad26eb..000000000 --- a/java/src/test/java/org/rocksdb/WriteBatchTest.java +++ /dev/null @@ -1,528 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -package org.rocksdb; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; -import static org.rocksdb.util.CapturingWriteBatchHandler.Action.DELETE; -import static org.rocksdb.util.CapturingWriteBatchHandler.Action.DELETE_RANGE; -import static org.rocksdb.util.CapturingWriteBatchHandler.Action.LOG; -import static org.rocksdb.util.CapturingWriteBatchHandler.Action.MERGE; -import static org.rocksdb.util.CapturingWriteBatchHandler.Action.PUT; -import static org.rocksdb.util.CapturingWriteBatchHandler.Action.SINGLE_DELETE; - -import java.io.UnsupportedEncodingException; -import java.nio.ByteBuffer; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.CapturingWriteBatchHandler; -import org.rocksdb.util.CapturingWriteBatchHandler.Event; -import org.rocksdb.util.WriteBatchGetter; - -/** - * This class mimics the db/write_batch_test.cc - * in the c++ rocksdb library. - */ -public class WriteBatchTest { - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void emptyWriteBatch() { - try (final WriteBatch batch = new WriteBatch()) { - assertThat(batch.count()).isEqualTo(0); - } - } - - @Test - public void multipleBatchOperations() - throws RocksDBException { - - final byte[] foo = "foo".getBytes(UTF_8); - final byte[] bar = "bar".getBytes(UTF_8); - final byte[] box = "box".getBytes(UTF_8); - final byte[] baz = "baz".getBytes(UTF_8); - final byte[] boo = "boo".getBytes(UTF_8); - final byte[] hoo = "hoo".getBytes(UTF_8); - final byte[] hello = "hello".getBytes(UTF_8); - - try (final WriteBatch batch = new WriteBatch()) { - batch.put(foo, bar); - batch.delete(box); - batch.put(baz, boo); - batch.merge(baz, hoo); - batch.singleDelete(foo); - batch.deleteRange(baz, foo); - batch.putLogData(hello); - - try(final CapturingWriteBatchHandler handler = - new CapturingWriteBatchHandler()) { - batch.iterate(handler); - - assertThat(handler.getEvents().size()).isEqualTo(7); - - assertThat(handler.getEvents().get(0)).isEqualTo(new Event(PUT, foo, bar)); - assertThat(handler.getEvents().get(1)).isEqualTo(new Event(DELETE, box, null)); - assertThat(handler.getEvents().get(2)).isEqualTo(new Event(PUT, baz, boo)); - assertThat(handler.getEvents().get(3)).isEqualTo(new Event(MERGE, baz, hoo)); - assertThat(handler.getEvents().get(4)).isEqualTo(new Event(SINGLE_DELETE, foo, null)); - assertThat(handler.getEvents().get(5)).isEqualTo(new Event(DELETE_RANGE, baz, foo)); - assertThat(handler.getEvents().get(6)).isEqualTo(new Event(LOG, null, hello)); - } - } - } - - @Test - public void multipleBatchOperationsDirect() - throws UnsupportedEncodingException, RocksDBException { - try (WriteBatch batch = new WriteBatch()) { - ByteBuffer key = ByteBuffer.allocateDirect(16); - ByteBuffer value = ByteBuffer.allocateDirect(16); - key.put("foo".getBytes("US-ASCII")).flip(); - value.put("bar".getBytes("US-ASCII")).flip(); - batch.put(key, value); - assertThat(key.position()).isEqualTo(3); - assertThat(key.limit()).isEqualTo(3); - assertThat(value.position()).isEqualTo(3); - assertThat(value.limit()).isEqualTo(3); - - key.clear(); - key.put("box".getBytes("US-ASCII")).flip(); - batch.delete(key); - assertThat(key.position()).isEqualTo(3); - assertThat(key.limit()).isEqualTo(3); - - batch.put("baz".getBytes("US-ASCII"), "boo".getBytes("US-ASCII")); - - WriteBatchTestInternalHelper.setSequence(batch, 100); - assertThat(WriteBatchTestInternalHelper.sequence(batch)).isNotNull().isEqualTo(100); - assertThat(batch.count()).isEqualTo(3); - assertThat(new String(getContents(batch), "US-ASCII")) - .isEqualTo("Put(baz, boo)@102" - + "Delete(box)@101" - + "Put(foo, bar)@100"); - } - } - - @Test - public void testAppendOperation() - throws RocksDBException { - try (final WriteBatch b1 = new WriteBatch(); - final WriteBatch b2 = new WriteBatch()) { - WriteBatchTestInternalHelper.setSequence(b1, 200); - WriteBatchTestInternalHelper.setSequence(b2, 300); - WriteBatchTestInternalHelper.append(b1, b2); - assertThat(getContents(b1).length).isEqualTo(0); - assertThat(b1.count()).isEqualTo(0); - b2.put("a".getBytes(UTF_8), "va".getBytes(UTF_8)); - WriteBatchTestInternalHelper.append(b1, b2); - assertThat("Put(a, va)@200".equals(new String(getContents(b1), - UTF_8))); - assertThat(b1.count()).isEqualTo(1); - b2.clear(); - b2.put("b".getBytes(UTF_8), "vb".getBytes(UTF_8)); - WriteBatchTestInternalHelper.append(b1, b2); - assertThat(("Put(a, va)@200" + - "Put(b, vb)@201") - .equals(new String(getContents(b1), UTF_8))); - assertThat(b1.count()).isEqualTo(2); - b2.delete("foo".getBytes(UTF_8)); - WriteBatchTestInternalHelper.append(b1, b2); - assertThat(("Put(a, va)@200" + - "Put(b, vb)@202" + - "Put(b, vb)@201" + - "Delete(foo)@203") - .equals(new String(getContents(b1), UTF_8))); - assertThat(b1.count()).isEqualTo(4); - } - } - - @Test - public void blobOperation() - throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - batch.put("k1".getBytes(UTF_8), "v1".getBytes(UTF_8)); - batch.put("k2".getBytes(UTF_8), "v2".getBytes(UTF_8)); - batch.put("k3".getBytes(UTF_8), "v3".getBytes(UTF_8)); - batch.putLogData("blob1".getBytes(UTF_8)); - batch.delete("k2".getBytes(UTF_8)); - batch.putLogData("blob2".getBytes(UTF_8)); - batch.merge("foo".getBytes(UTF_8), "bar".getBytes(UTF_8)); - assertThat(batch.count()).isEqualTo(5); - assertThat(("Merge(foo, bar)@4" + - "Put(k1, v1)@0" + - "Delete(k2)@3" + - "Put(k2, v2)@1" + - "Put(k3, v3)@2") - .equals(new String(getContents(batch), UTF_8))); - } - } - - @Test - public void savePoints() - throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - batch.put("k1".getBytes(UTF_8), "v1".getBytes(UTF_8)); - batch.put("k2".getBytes(UTF_8), "v2".getBytes(UTF_8)); - batch.put("k3".getBytes(UTF_8), "v3".getBytes(UTF_8)); - - assertThat(getFromWriteBatch(batch, "k1")).isEqualTo("v1"); - assertThat(getFromWriteBatch(batch, "k2")).isEqualTo("v2"); - assertThat(getFromWriteBatch(batch, "k3")).isEqualTo("v3"); - - batch.setSavePoint(); - - batch.delete("k2".getBytes(UTF_8)); - batch.put("k3".getBytes(UTF_8), "v3-2".getBytes(UTF_8)); - - assertThat(getFromWriteBatch(batch, "k2")).isNull(); - assertThat(getFromWriteBatch(batch, "k3")).isEqualTo("v3-2"); - - - batch.setSavePoint(); - - batch.put("k3".getBytes(UTF_8), "v3-3".getBytes(UTF_8)); - batch.put("k4".getBytes(UTF_8), "v4".getBytes(UTF_8)); - - assertThat(getFromWriteBatch(batch, "k3")).isEqualTo("v3-3"); - assertThat(getFromWriteBatch(batch, "k4")).isEqualTo("v4"); - - - batch.rollbackToSavePoint(); - - assertThat(getFromWriteBatch(batch, "k2")).isNull(); - assertThat(getFromWriteBatch(batch, "k3")).isEqualTo("v3-2"); - assertThat(getFromWriteBatch(batch, "k4")).isNull(); - - - batch.rollbackToSavePoint(); - - assertThat(getFromWriteBatch(batch, "k1")).isEqualTo("v1"); - assertThat(getFromWriteBatch(batch, "k2")).isEqualTo("v2"); - assertThat(getFromWriteBatch(batch, "k3")).isEqualTo("v3"); - assertThat(getFromWriteBatch(batch, "k4")).isNull(); - } - } - - @Test - public void deleteRange() throws RocksDBException { - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); - final WriteBatch batch = new WriteBatch(); - final WriteOptions wOpt = new WriteOptions()) { - db.put("key1".getBytes(), "value".getBytes()); - db.put("key2".getBytes(), "12345678".getBytes()); - db.put("key3".getBytes(), "abcdefg".getBytes()); - db.put("key4".getBytes(), "xyz".getBytes()); - assertThat(db.get("key1".getBytes())).isEqualTo("value".getBytes()); - assertThat(db.get("key2".getBytes())).isEqualTo("12345678".getBytes()); - assertThat(db.get("key3".getBytes())).isEqualTo("abcdefg".getBytes()); - assertThat(db.get("key4".getBytes())).isEqualTo("xyz".getBytes()); - - batch.deleteRange("key2".getBytes(), "key4".getBytes()); - db.write(wOpt, batch); - - assertThat(db.get("key1".getBytes())).isEqualTo("value".getBytes()); - assertThat(db.get("key2".getBytes())).isNull(); - assertThat(db.get("key3".getBytes())).isNull(); - assertThat(db.get("key4".getBytes())).isEqualTo("xyz".getBytes()); - } - } - - @Test - public void restorePoints() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - - batch.put("k1".getBytes(), "v1".getBytes()); - batch.put("k2".getBytes(), "v2".getBytes()); - - batch.setSavePoint(); - - batch.put("k1".getBytes(), "123456789".getBytes()); - batch.delete("k2".getBytes()); - - batch.rollbackToSavePoint(); - - try(final CapturingWriteBatchHandler handler = new CapturingWriteBatchHandler()) { - batch.iterate(handler); - - assertThat(handler.getEvents().size()).isEqualTo(2); - assertThat(handler.getEvents().get(0)).isEqualTo(new Event(PUT, "k1".getBytes(), "v1".getBytes())); - assertThat(handler.getEvents().get(1)).isEqualTo(new Event(PUT, "k2".getBytes(), "v2".getBytes())); - } - } - } - - @Test(expected = RocksDBException.class) - public void restorePoints_withoutSavePoints() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - batch.rollbackToSavePoint(); - } - } - - @Test(expected = RocksDBException.class) - public void restorePoints_withoutSavePoints_nested() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - - batch.setSavePoint(); - batch.rollbackToSavePoint(); - - // without previous corresponding setSavePoint - batch.rollbackToSavePoint(); - } - } - - @Test - public void popSavePoint() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - - batch.put("k1".getBytes(), "v1".getBytes()); - batch.put("k2".getBytes(), "v2".getBytes()); - - batch.setSavePoint(); - - batch.put("k1".getBytes(), "123456789".getBytes()); - batch.delete("k2".getBytes()); - - batch.setSavePoint(); - - batch.popSavePoint(); - - batch.rollbackToSavePoint(); - - try(final CapturingWriteBatchHandler handler = new CapturingWriteBatchHandler()) { - batch.iterate(handler); - - assertThat(handler.getEvents().size()).isEqualTo(2); - assertThat(handler.getEvents().get(0)).isEqualTo(new Event(PUT, "k1".getBytes(), "v1".getBytes())); - assertThat(handler.getEvents().get(1)).isEqualTo(new Event(PUT, "k2".getBytes(), "v2".getBytes())); - } - } - } - - @Test(expected = RocksDBException.class) - public void popSavePoint_withoutSavePoints() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - batch.popSavePoint(); - } - } - - @Test(expected = RocksDBException.class) - public void popSavePoint_withoutSavePoints_nested() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - - batch.setSavePoint(); - batch.popSavePoint(); - - // without previous corresponding setSavePoint - batch.popSavePoint(); - } - } - - @Test - public void maxBytes() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - batch.setMaxBytes(19); - - batch.put("k1".getBytes(), "v1".getBytes()); - } - } - - @Test(expected = RocksDBException.class) - public void maxBytes_over() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - batch.setMaxBytes(1); - - batch.put("k1".getBytes(), "v1".getBytes()); - } - } - - @Test - public void data() throws RocksDBException { - try (final WriteBatch batch1 = new WriteBatch()) { - batch1.delete("k0".getBytes()); - batch1.put("k1".getBytes(), "v1".getBytes()); - batch1.put("k2".getBytes(), "v2".getBytes()); - batch1.put("k3".getBytes(), "v3".getBytes()); - batch1.putLogData("log1".getBytes()); - batch1.merge("k2".getBytes(), "v22".getBytes()); - batch1.delete("k3".getBytes()); - - final byte[] serialized = batch1.data(); - - try(final WriteBatch batch2 = new WriteBatch(serialized)) { - assertThat(batch2.count()).isEqualTo(batch1.count()); - - try(final CapturingWriteBatchHandler handler1 = new CapturingWriteBatchHandler()) { - batch1.iterate(handler1); - - try (final CapturingWriteBatchHandler handler2 = new CapturingWriteBatchHandler()) { - batch2.iterate(handler2); - - assertThat(handler1.getEvents().equals(handler2.getEvents())).isTrue(); - } - } - } - } - } - - @Test - public void dataSize() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - batch.put("k1".getBytes(), "v1".getBytes()); - - assertThat(batch.getDataSize()).isEqualTo(19); - } - } - - @Test - public void hasPut() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - assertThat(batch.hasPut()).isFalse(); - - batch.put("k1".getBytes(), "v1".getBytes()); - - assertThat(batch.hasPut()).isTrue(); - } - } - - @Test - public void hasDelete() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - assertThat(batch.hasDelete()).isFalse(); - - batch.delete("k1".getBytes()); - - assertThat(batch.hasDelete()).isTrue(); - } - } - - @Test - public void hasSingleDelete() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - assertThat(batch.hasSingleDelete()).isFalse(); - - batch.singleDelete("k1".getBytes()); - - assertThat(batch.hasSingleDelete()).isTrue(); - } - } - - @Test - public void hasDeleteRange() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - assertThat(batch.hasDeleteRange()).isFalse(); - - batch.deleteRange("k1".getBytes(), "k2".getBytes()); - - assertThat(batch.hasDeleteRange()).isTrue(); - } - } - - @Test - public void hasBeginPrepareRange() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - assertThat(batch.hasBeginPrepare()).isFalse(); - } - } - - @Test - public void hasEndPrepareRange() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - assertThat(batch.hasEndPrepare()).isFalse(); - } - } - - @Test - public void hasCommit() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - assertThat(batch.hasCommit()).isFalse(); - } - } - - @Test - public void hasRollback() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - assertThat(batch.hasRollback()).isFalse(); - } - } - - @Test - public void walTerminationPoint() throws RocksDBException { - try (final WriteBatch batch = new WriteBatch()) { - WriteBatch.SavePoint walTerminationPoint = batch.getWalTerminationPoint(); - assertThat(walTerminationPoint.isCleared()).isTrue(); - - batch.put("k1".getBytes(UTF_8), "v1".getBytes(UTF_8)); - - batch.markWalTerminationPoint(); - - walTerminationPoint = batch.getWalTerminationPoint(); - assertThat(walTerminationPoint.getSize()).isEqualTo(19); - assertThat(walTerminationPoint.getCount()).isEqualTo(1); - assertThat(walTerminationPoint.getContentFlags()).isEqualTo(2); - } - } - - @Test - public void getWriteBatch() { - try (final WriteBatch batch = new WriteBatch()) { - assertThat(batch.getWriteBatch()).isEqualTo(batch); - } - } - - static byte[] getContents(final WriteBatch wb) { - return getContents(wb.nativeHandle_); - } - - static String getFromWriteBatch(final WriteBatch wb, final String key) - throws RocksDBException { - final WriteBatchGetter getter = - new WriteBatchGetter(key.getBytes(UTF_8)); - wb.iterate(getter); - if(getter.getValue() != null) { - return new String(getter.getValue(), UTF_8); - } else { - return null; - } - } - - private static native byte[] getContents(final long writeBatchHandle); -} - -/** - * Package-private class which provides java api to access - * c++ WriteBatchInternal. - */ -class WriteBatchTestInternalHelper { - static void setSequence(final WriteBatch wb, final long sn) { - setSequence(wb.nativeHandle_, sn); - } - - static long sequence(final WriteBatch wb) { - return sequence(wb.nativeHandle_); - } - - static void append(final WriteBatch wb1, final WriteBatch wb2) { - append(wb1.nativeHandle_, wb2.nativeHandle_); - } - - private static native void setSequence(final long writeBatchHandle, - final long sn); - - private static native long sequence(final long writeBatchHandle); - - private static native void append(final long writeBatchHandle1, - final long writeBatchHandle2); -} diff --git a/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java b/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java deleted file mode 100644 index c5090dbce..000000000 --- a/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; - -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; - -import java.nio.ByteBuffer; -import java.util.*; -import java.util.concurrent.*; - -@RunWith(Parameterized.class) -public class WriteBatchThreadedTest { - - @Parameters(name = "WriteBatchThreadedTest(threadCount={0})") - public static Iterable data() { - return Arrays.asList(new Integer[]{1, 10, 50, 100}); - } - - @Parameter - public int threadCount; - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - RocksDB db; - - @Before - public void setUp() throws Exception { - RocksDB.loadLibrary(); - final Options options = new Options() - .setCreateIfMissing(true) - .setIncreaseParallelism(32); - db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()); - assert (db != null); - } - - @After - public void tearDown() throws Exception { - if (db != null) { - db.close(); - } - } - - @Test - public void threadedWrites() throws InterruptedException, ExecutionException { - final List> callables = new ArrayList<>(); - for (int i = 0; i < 100; i++) { - final int offset = i * 100; - callables.add(new Callable() { - @Override - public Void call() throws RocksDBException { - try (final WriteBatch wb = new WriteBatch(); - final WriteOptions w_opt = new WriteOptions()) { - for (int i = offset; i < offset + 100; i++) { - wb.put(ByteBuffer.allocate(4).putInt(i).array(), "parallel rocks test".getBytes()); - } - db.write(w_opt, wb); - } - return null; - } - }); - } - - //submit the callables - final ExecutorService executorService = - Executors.newFixedThreadPool(threadCount); - try { - final ExecutorCompletionService completionService = - new ExecutorCompletionService<>(executorService); - final Set> futures = new HashSet<>(); - for (final Callable callable : callables) { - futures.add(completionService.submit(callable)); - } - - while (futures.size() > 0) { - final Future future = completionService.take(); - futures.remove(future); - - try { - future.get(); - } catch (final ExecutionException e) { - for (final Future f : futures) { - f.cancel(true); - } - - throw e; - } - } - } finally { - executorService.shutdown(); - executorService.awaitTermination(10, TimeUnit.SECONDS); - } - } -} diff --git a/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java b/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java deleted file mode 100644 index b0a0cdc0e..000000000 --- a/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java +++ /dev/null @@ -1,1068 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -package org.rocksdb; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; - -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.ByteBufferAllocator; - -public class WriteBatchWithIndexTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - @Test - public void readYourOwnWrites() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - - final byte[] k1 = "key1".getBytes(); - final byte[] v1 = "value1".getBytes(); - final byte[] k2 = "key2".getBytes(); - final byte[] v2 = "value2".getBytes(); - - db.put(k1, v1); - db.put(k2, v2); - - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); - final RocksIterator base = db.newIterator(); - final RocksIterator it = wbwi.newIteratorWithBase(base)) { - it.seek(k1); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(k1); - assertThat(it.value()).isEqualTo(v1); - - it.seek(k2); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(k2); - assertThat(it.value()).isEqualTo(v2); - - //put data to the write batch and make sure we can read it. - final byte[] k3 = "key3".getBytes(); - final byte[] v3 = "value3".getBytes(); - wbwi.put(k3, v3); - it.seek(k3); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(k3); - assertThat(it.value()).isEqualTo(v3); - - //update k2 in the write batch and check the value - final byte[] v2Other = "otherValue2".getBytes(); - wbwi.put(k2, v2Other); - it.seek(k2); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(k2); - assertThat(it.value()).isEqualTo(v2Other); - - //delete k1 and make sure we can read back the write - wbwi.delete(k1); - it.seek(k1); - assertThat(it.key()).isNotEqualTo(k1); - - //reinsert k1 and make sure we see the new value - final byte[] v1Other = "otherValue1".getBytes(); - wbwi.put(k1, v1Other); - it.seek(k1); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(k1); - assertThat(it.value()).isEqualTo(v1Other); - - //single remove k3 and make sure we can read back the write - wbwi.singleDelete(k3); - it.seek(k3); - assertThat(it.isValid()).isEqualTo(false); - - //reinsert k3 and make sure we see the new value - final byte[] v3Other = "otherValue3".getBytes(); - wbwi.put(k3, v3Other); - it.seek(k3); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(k3); - assertThat(it.value()).isEqualTo(v3Other); - } - } - } - - @Test - public void readYourOwnWritesCf() throws RocksDBException { - final List cfNames = - Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - - final List columnFamilyHandleList = new ArrayList<>(); - - // Test open database with column family names - try (final DBOptions options = - new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open( - options, dbFolder.getRoot().getAbsolutePath(), cfNames, columnFamilyHandleList)) { - final ColumnFamilyHandle newCf = columnFamilyHandleList.get(1); - - try { - final byte[] k1 = "key1".getBytes(); - final byte[] v1 = "value1".getBytes(); - final byte[] k2 = "key2".getBytes(); - final byte[] v2 = "value2".getBytes(); - - db.put(newCf, k1, v1); - db.put(newCf, k2, v2); - - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); - final ReadOptions readOptions = new ReadOptions(); - final RocksIterator base = db.newIterator(newCf, readOptions); - final RocksIterator it = wbwi.newIteratorWithBase(newCf, base, readOptions)) { - it.seek(k1); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(k1); - assertThat(it.value()).isEqualTo(v1); - - it.seek(k2); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(k2); - assertThat(it.value()).isEqualTo(v2); - - // put data to the write batch and make sure we can read it. - final byte[] k3 = "key3".getBytes(); - final byte[] v3 = "value3".getBytes(); - wbwi.put(newCf, k3, v3); - it.seek(k3); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(k3); - assertThat(it.value()).isEqualTo(v3); - - // update k2 in the write batch and check the value - final byte[] v2Other = "otherValue2".getBytes(); - wbwi.put(newCf, k2, v2Other); - it.seek(k2); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(k2); - assertThat(it.value()).isEqualTo(v2Other); - - // delete k1 and make sure we can read back the write - wbwi.delete(newCf, k1); - it.seek(k1); - assertThat(it.key()).isNotEqualTo(k1); - - // reinsert k1 and make sure we see the new value - final byte[] v1Other = "otherValue1".getBytes(); - wbwi.put(newCf, k1, v1Other); - it.seek(k1); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(k1); - assertThat(it.value()).isEqualTo(v1Other); - - // single remove k3 and make sure we can read back the write - wbwi.singleDelete(newCf, k3); - it.seek(k3); - assertThat(it.isValid()).isEqualTo(false); - - // reinsert k3 and make sure we see the new value - final byte[] v3Other = "otherValue3".getBytes(); - wbwi.put(newCf, k3, v3Other); - it.seek(k3); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(k3); - assertThat(it.value()).isEqualTo(v3Other); - } - } finally { - for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { - columnFamilyHandle.close(); - } - } - } - } - - @Test - public void readYourOwnWritesCfIterDirectBB() throws RocksDBException { - readYourOwnWritesCfIterDirect(ByteBufferAllocator.DIRECT); - } - - @Test - public void readYourOwnWritesCfIterIndirectBB() throws RocksDBException { - readYourOwnWritesCfIterDirect(ByteBufferAllocator.HEAP); - } - - public void readYourOwnWritesCfIterDirect(final ByteBufferAllocator byteBufferAllocator) - throws RocksDBException { - final List cfNames = - Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - - final List columnFamilyHandleList = new ArrayList<>(); - - // Test open database with column family names - try (final DBOptions options = - new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open( - options, dbFolder.getRoot().getAbsolutePath(), cfNames, columnFamilyHandleList)) { - final ColumnFamilyHandle newCf = columnFamilyHandleList.get(1); - - try { - final byte[] kv1 = "key1".getBytes(); - final byte[] vv1 = "value1".getBytes(); - final ByteBuffer k1 = byteBufferAllocator.allocate(12); - k1.put(kv1); - final byte[] kv2 = "key2".getBytes(); - final byte[] vv2 = "value2".getBytes(); - final ByteBuffer k2 = byteBufferAllocator.allocate(12); - k2.put(kv2); - - db.put(newCf, kv1, vv1); - db.put(newCf, kv2, vv2); - - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); - final ReadOptions readOptions = new ReadOptions(); - final RocksIterator base = db.newIterator(newCf, readOptions); - final RocksIterator it = wbwi.newIteratorWithBase(newCf, base, readOptions)) { - k1.flip(); - it.seek(k1); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(kv1); - assertThat(it.value()).isEqualTo(vv1); - - k2.flip(); - it.seek(k2); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(kv2); - assertThat(it.value()).isEqualTo(vv2); - - final byte[] kv1point5 = "key1point5".getBytes(); - final ByteBuffer k1point5 = byteBufferAllocator.allocate(12); - k1point5.put(kv1point5); - - k1point5.flip(); - it.seek(k1point5); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(kv2); - assertThat(it.value()).isEqualTo(vv2); - - k1point5.flip(); - it.seekForPrev(k1point5); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(kv1); - assertThat(it.value()).isEqualTo(vv1); - - // put data to the write batch and make sure we can read it. - final byte[] kv3 = "key3".getBytes(); - final ByteBuffer k3 = byteBufferAllocator.allocate(12); - k3.put(kv3); - final byte[] vv3 = "value3".getBytes(); - wbwi.put(newCf, kv3, vv3); - k3.flip(); - it.seek(k3); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(kv3); - assertThat(it.value()).isEqualTo(vv3); - - // update k2 in the write batch and check the value - final byte[] v2Other = "otherValue2".getBytes(); - wbwi.put(newCf, kv2, v2Other); - k2.flip(); - it.seek(k2); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(kv2); - assertThat(it.value()).isEqualTo(v2Other); - - // delete k1 and make sure we can read back the write - wbwi.delete(newCf, kv1); - k1.flip(); - it.seek(k1); - assertThat(it.key()).isNotEqualTo(kv1); - - // reinsert k1 and make sure we see the new value - final byte[] v1Other = "otherValue1".getBytes(); - wbwi.put(newCf, kv1, v1Other); - k1.flip(); - it.seek(k1); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(kv1); - assertThat(it.value()).isEqualTo(v1Other); - - // single remove k3 and make sure we can read back the write - wbwi.singleDelete(newCf, kv3); - k3.flip(); - it.seek(k3); - assertThat(it.isValid()).isEqualTo(false); - - // reinsert k3 and make sure we see the new value - final byte[] v3Other = "otherValue3".getBytes(); - wbwi.put(newCf, kv3, v3Other); - k3.flip(); - it.seek(k3); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(kv3); - assertThat(it.value()).isEqualTo(v3Other); - } - } finally { - for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { - columnFamilyHandle.close(); - } - } - } - } - - @Test - public void readYourOwnWritesCfIterIndirect() throws RocksDBException { - final List cfNames = - Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - - final List columnFamilyHandleList = new ArrayList<>(); - - // Test open database with column family names - try (final DBOptions options = - new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open( - options, dbFolder.getRoot().getAbsolutePath(), cfNames, columnFamilyHandleList)) { - final ColumnFamilyHandle newCf = columnFamilyHandleList.get(1); - - try { - final byte[] kv1 = "key1".getBytes(); - final byte[] vv1 = "value1".getBytes(); - final ByteBuffer k1 = ByteBuffer.allocate(12); - k1.put(kv1).flip(); - final byte[] kv2 = "key2".getBytes(); - final byte[] vv2 = "value2".getBytes(); - final ByteBuffer k2 = ByteBuffer.allocate(12); - k2.put(kv2).flip(); - - db.put(newCf, kv1, vv1); - db.put(newCf, kv2, vv2); - - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); - final ReadOptions readOptions = new ReadOptions(); - final RocksIterator base = db.newIterator(newCf, readOptions); - final RocksIterator it = wbwi.newIteratorWithBase(newCf, base, readOptions)) { - it.seek(k1); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(kv1); - assertThat(it.value()).isEqualTo(vv1); - - it.seek(k2); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(kv2); - assertThat(it.value()).isEqualTo(vv2); - - // put data to the write batch and make sure we can read it. - final byte[] kv3 = "key3".getBytes(); - final ByteBuffer k3 = ByteBuffer.allocate(12); - k3.put(kv3); - final byte[] vv3 = "value3".getBytes(); - wbwi.put(newCf, kv3, vv3); - k3.flip(); - it.seek(k3); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(kv3); - assertThat(it.value()).isEqualTo(vv3); - - // update k2 in the write batch and check the value - final byte[] v2Other = "otherValue2".getBytes(); - wbwi.put(newCf, kv2, v2Other); - k2.flip(); - it.seek(k2); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(kv2); - assertThat(it.value()).isEqualTo(v2Other); - - // delete k1 and make sure we can read back the write - wbwi.delete(newCf, kv1); - k1.flip(); - it.seek(k1); - assertThat(it.key()).isNotEqualTo(kv1); - - // reinsert k1 and make sure we see the new value - final byte[] v1Other = "otherValue1".getBytes(); - wbwi.put(newCf, kv1, v1Other); - k1.flip(); - it.seek(k1); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(kv1); - assertThat(it.value()).isEqualTo(v1Other); - - // single remove k3 and make sure we can read back the write - wbwi.singleDelete(newCf, kv3); - k3.flip(); - it.seek(k3); - assertThat(it.isValid()).isEqualTo(false); - - // reinsert k3 and make sure we see the new value - final byte[] v3Other = "otherValue3".getBytes(); - wbwi.put(newCf, kv3, v3Other); - k3.flip(); - it.seek(k3); - assertThat(it.isValid()).isTrue(); - assertThat(it.key()).isEqualTo(kv3); - assertThat(it.value()).isEqualTo(v3Other); - } - } finally { - for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { - columnFamilyHandle.close(); - } - } - } - } - - @Test - public void writeBatchWithIndex() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - - final byte[] k1 = "key1".getBytes(); - final byte[] v1 = "value1".getBytes(); - final byte[] k2 = "key2".getBytes(); - final byte[] v2 = "value2".getBytes(); - - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(); - final WriteOptions wOpt = new WriteOptions()) { - wbwi.put(k1, v1); - wbwi.put(k2, v2); - - db.write(wOpt, wbwi); - } - - assertThat(db.get(k1)).isEqualTo(v1); - assertThat(db.get(k2)).isEqualTo(v2); - } - } - - @Test - public void write_writeBatchWithIndexDirect() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { - final ByteBuffer k1 = ByteBuffer.allocateDirect(16); - final ByteBuffer v1 = ByteBuffer.allocateDirect(16); - final ByteBuffer k2 = ByteBuffer.allocateDirect(16); - final ByteBuffer v2 = ByteBuffer.allocateDirect(16); - k1.put("key1".getBytes()).flip(); - v1.put("value1".getBytes()).flip(); - k2.put("key2".getBytes()).flip(); - v2.put("value2".getBytes()).flip(); - - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { - wbwi.put(k1, v1); - assertThat(k1.position()).isEqualTo(4); - assertThat(k1.limit()).isEqualTo(4); - assertThat(v1.position()).isEqualTo(6); - assertThat(v1.limit()).isEqualTo(6); - - wbwi.put(k2, v2); - - db.write(new WriteOptions(), wbwi); - } - - assertThat(db.get("key1".getBytes())).isEqualTo("value1".getBytes()); - assertThat(db.get("key2".getBytes())).isEqualTo("value2".getBytes()); - } - } - - @Test - public void iterator() throws RocksDBException { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true)) { - - final String k1 = "key1"; - final String v1 = "value1"; - final String k2 = "key2"; - final String v2 = "value2"; - final String k3 = "key3"; - final String v3 = "value3"; - final String k4 = "key4"; - final String k5 = "key5"; - final String v8 = "value8"; - final byte[] k1b = k1.getBytes(UTF_8); - final byte[] v1b = v1.getBytes(UTF_8); - final byte[] k2b = k2.getBytes(UTF_8); - final byte[] v2b = v2.getBytes(UTF_8); - final byte[] k3b = k3.getBytes(UTF_8); - final byte[] v3b = v3.getBytes(UTF_8); - final byte[] k4b = k4.getBytes(UTF_8); - final byte[] k5b = k5.getBytes(UTF_8); - final byte[] v8b = v8.getBytes(UTF_8); - - final String k1point5 = "key1point5"; - final String k2point5 = "key2point5"; - - // add put records - wbwi.put(k1b, v1b); - wbwi.put(k2b, v2b); - wbwi.put(k3b, v3b); - - // add a deletion record - wbwi.delete(k4b); - - // add a single deletion record - wbwi.singleDelete(k5b); - - // add a log record - wbwi.putLogData(v8b); - - final WBWIRocksIterator.WriteEntry[] expected = { - new WBWIRocksIterator.WriteEntry(WBWIRocksIterator.WriteType.PUT, - new DirectSlice(k1), new DirectSlice(v1)), - new WBWIRocksIterator.WriteEntry(WBWIRocksIterator.WriteType.PUT, - new DirectSlice(k2), new DirectSlice(v2)), - new WBWIRocksIterator.WriteEntry(WBWIRocksIterator.WriteType.PUT, - new DirectSlice(k3), new DirectSlice(v3)), - new WBWIRocksIterator.WriteEntry(WBWIRocksIterator.WriteType.DELETE, - new DirectSlice(k4), DirectSlice.NONE), - new WBWIRocksIterator.WriteEntry(WBWIRocksIterator.WriteType.SINGLE_DELETE, - new DirectSlice(k5), DirectSlice.NONE), - }; - - try (final WBWIRocksIterator it = wbwi.newIterator()) { - //direct access - seek to key offsets - final int[] testOffsets = {2, 0, 3, 4, 1}; - for (final int testOffset : testOffsets) { - final byte[] key = toArray(expected[testOffset].getKey().data()); - - it.seek(key); - assertThat(it.isValid()).isTrue(); - - final WBWIRocksIterator.WriteEntry entry = it.entry(); - assertThat(entry).isEqualTo(expected[testOffset]); - } - - for (final int testOffset : testOffsets) { - final byte[] key = toArray(expected[testOffset].getKey().data()); - - // Direct buffer seek - final ByteBuffer db = expected[testOffset].getKey().data(); - it.seek(db); - assertThat(db.position()).isEqualTo(key.length); - assertThat(it.isValid()).isTrue(); - - final WBWIRocksIterator.WriteEntry entry = it.entry(); - assertThat(entry).isEqualTo(expected[testOffset]); - } - - for (final int testOffset : testOffsets) { - final byte[] key = toArray(expected[testOffset].getKey().data()); - - // Direct buffer seek - final ByteBuffer db = expected[testOffset].getKey().data(); - it.seekForPrev(db); - assertThat(db.position()).isEqualTo(key.length); - assertThat(it.isValid()).isTrue(); - - final WBWIRocksIterator.WriteEntry entry = it.entry(); - assertThat(entry).isEqualTo(expected[testOffset]); - } - - for (final int testOffset : testOffsets) { - final byte[] key = toArray(expected[testOffset].getKey().data()); - - // Indirect buffer seek - final ByteBuffer db = ByteBuffer.allocate(key.length); - System.arraycopy(key, 0, db.array(), 0, key.length); - it.seek(db); - assertThat(db.position()).isEqualTo(key.length); - assertThat(it.isValid()).isTrue(); - - final WBWIRocksIterator.WriteEntry entry = it.entry(); - assertThat(entry).isEqualTo(expected[testOffset]); - } - - for (final int testOffset : testOffsets) { - final byte[] key = toArray(expected[testOffset].getKey().data()); - - // Indirect buffer seek for prev - final ByteBuffer db = ByteBuffer.allocate(key.length); - System.arraycopy(key, 0, db.array(), 0, key.length); - it.seekForPrev(db); - assertThat(db.position()).isEqualTo(key.length); - assertThat(it.isValid()).isTrue(); - - final WBWIRocksIterator.WriteEntry entry = it.entry(); - assertThat(entry).isEqualTo(expected[testOffset]); - } - - { - it.seekForPrev(k2point5.getBytes()); - assertThat(it.isValid()).isTrue(); - final WBWIRocksIterator.WriteEntry entry = it.entry(); - assertThat(entry).isEqualTo(expected[1]); - } - - { - it.seekForPrev(k1point5.getBytes()); - assertThat(it.isValid()).isTrue(); - final WBWIRocksIterator.WriteEntry entry = it.entry(); - assertThat(entry).isEqualTo(expected[0]); - } - - { - final ByteBuffer db = ByteBuffer.allocate(k2point5.length()); - db.put(k2point5.getBytes()); - db.flip(); - it.seekForPrev(db); - assertThat(it.isValid()).isTrue(); - final WBWIRocksIterator.WriteEntry entry = it.entry(); - assertThat(entry).isEqualTo(expected[1]); - } - - { - final ByteBuffer db = ByteBuffer.allocate(k1point5.length()); - db.put(k1point5.getBytes()); - db.flip(); - it.seekForPrev(db); - assertThat(it.isValid()).isTrue(); - final WBWIRocksIterator.WriteEntry entry = it.entry(); - assertThat(entry).isEqualTo(expected[0]); - } - - //forward iterative access - int i = 0; - for (it.seekToFirst(); it.isValid(); it.next()) { - assertThat(it.entry()).isEqualTo(expected[i++]); - } - - //reverse iterative access - i = expected.length - 1; - for (it.seekToLast(); it.isValid(); it.prev()) { - assertThat(it.entry()).isEqualTo(expected[i--]); - } - } - } - } - - @Test - public void zeroByteTests() throws RocksDBException { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true)) { - final byte[] zeroByteValue = new byte[]{0, 0}; - //add zero byte value - wbwi.put(zeroByteValue, zeroByteValue); - - final ByteBuffer buffer = ByteBuffer.allocateDirect(zeroByteValue.length); - buffer.put(zeroByteValue); - - final WBWIRocksIterator.WriteEntry expected = - new WBWIRocksIterator.WriteEntry(WBWIRocksIterator.WriteType.PUT, - new DirectSlice(buffer, zeroByteValue.length), - new DirectSlice(buffer, zeroByteValue.length)); - - try (final WBWIRocksIterator it = wbwi.newIterator()) { - it.seekToFirst(); - final WBWIRocksIterator.WriteEntry actual = it.entry(); - assertThat(actual.equals(expected)).isTrue(); - assertThat(it.entry().hashCode() == expected.hashCode()).isTrue(); - } - } - } - - @Test - public void savePoints() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); - final ReadOptions readOptions = new ReadOptions()) { - wbwi.put("k1".getBytes(), "v1".getBytes()); - wbwi.put("k2".getBytes(), "v2".getBytes()); - wbwi.put("k3".getBytes(), "v3".getBytes()); - - assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k1")) - .isEqualTo("v1"); - assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k2")) - .isEqualTo("v2"); - assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k3")) - .isEqualTo("v3"); - - - wbwi.setSavePoint(); - - wbwi.delete("k2".getBytes()); - wbwi.put("k3".getBytes(), "v3-2".getBytes()); - - assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k2")) - .isNull(); - assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k3")) - .isEqualTo("v3-2"); - - - wbwi.setSavePoint(); - - wbwi.put("k3".getBytes(), "v3-3".getBytes()); - wbwi.put("k4".getBytes(), "v4".getBytes()); - - assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k3")) - .isEqualTo("v3-3"); - assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k4")) - .isEqualTo("v4"); - - - wbwi.rollbackToSavePoint(); - - assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k2")) - .isNull(); - assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k3")) - .isEqualTo("v3-2"); - assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k4")) - .isNull(); - - - wbwi.rollbackToSavePoint(); - - assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k1")) - .isEqualTo("v1"); - assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k2")) - .isEqualTo("v2"); - assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k3")) - .isEqualTo("v3"); - assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k4")) - .isNull(); - } - } - } - - @Test - public void restorePoints() throws RocksDBException { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { - - wbwi.put("k1".getBytes(UTF_8), "v1".getBytes(UTF_8)); - wbwi.put("k2".getBytes(UTF_8), "v2".getBytes(UTF_8)); - - wbwi.setSavePoint(); - - wbwi.put("k1".getBytes(UTF_8), "123456789".getBytes(UTF_8)); - wbwi.delete("k2".getBytes(UTF_8)); - - wbwi.rollbackToSavePoint(); - - try(final DBOptions options = new DBOptions()) { - assertThat(wbwi.getFromBatch(options,"k1".getBytes(UTF_8))).isEqualTo("v1".getBytes()); - assertThat(wbwi.getFromBatch(options,"k2".getBytes(UTF_8))).isEqualTo("v2".getBytes()); - } - } - } - - @Test(expected = RocksDBException.class) - public void restorePoints_withoutSavePoints() throws RocksDBException { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { - wbwi.rollbackToSavePoint(); - } - } - - @Test(expected = RocksDBException.class) - public void restorePoints_withoutSavePoints_nested() throws RocksDBException { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { - - wbwi.setSavePoint(); - wbwi.rollbackToSavePoint(); - - // without previous corresponding setSavePoint - wbwi.rollbackToSavePoint(); - } - } - - @Test - public void popSavePoint() throws RocksDBException { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { - - wbwi.put("k1".getBytes(), "v1".getBytes()); - wbwi.put("k2".getBytes(), "v2".getBytes()); - - wbwi.setSavePoint(); - - wbwi.put("k1".getBytes(), "123456789".getBytes()); - wbwi.delete("k2".getBytes()); - - wbwi.setSavePoint(); - - wbwi.popSavePoint(); - - wbwi.rollbackToSavePoint(); - - try(final DBOptions options = new DBOptions()) { - assertThat(wbwi.getFromBatch(options,"k1".getBytes(UTF_8))).isEqualTo("v1".getBytes()); - assertThat(wbwi.getFromBatch(options,"k2".getBytes(UTF_8))).isEqualTo("v2".getBytes()); - } - } - } - - @Test(expected = RocksDBException.class) - public void popSavePoint_withoutSavePoints() throws RocksDBException { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { - wbwi.popSavePoint(); - } - } - - @Test(expected = RocksDBException.class) - public void popSavePoint_withoutSavePoints_nested() throws RocksDBException { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { - - wbwi.setSavePoint(); - wbwi.popSavePoint(); - - // without previous corresponding setSavePoint - wbwi.popSavePoint(); - } - } - - @Test - public void maxBytes() throws RocksDBException { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { - wbwi.setMaxBytes(19); - - wbwi.put("k1".getBytes(), "v1".getBytes()); - } - } - - @Test(expected = RocksDBException.class) - public void maxBytes_over() throws RocksDBException { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { - wbwi.setMaxBytes(1); - - wbwi.put("k1".getBytes(), "v1".getBytes()); - } - } - - @Test - public void getWriteBatch() { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { - - final WriteBatch wb = wbwi.getWriteBatch(); - assertThat(wb).isNotNull(); - assertThat(wb.isOwningHandle()).isFalse(); - } - } - - private static String getFromWriteBatchWithIndex(final RocksDB db, - final ReadOptions readOptions, final WriteBatchWithIndex wbwi, - final String skey) { - final byte[] key = skey.getBytes(); - try (final RocksIterator baseIterator = db.newIterator(readOptions); - final RocksIterator iterator = wbwi.newIteratorWithBase(baseIterator)) { - iterator.seek(key); - - // Arrays.equals(key, iterator.key()) ensures an exact match in Rocks, - // instead of a nearest match - return iterator.isValid() && - Arrays.equals(key, iterator.key()) ? - new String(iterator.value()) : null; - } - } - - @Test - public void getFromBatch() throws RocksDBException { - final byte[] k1 = "k1".getBytes(); - final byte[] k2 = "k2".getBytes(); - final byte[] k3 = "k3".getBytes(); - final byte[] k4 = "k4".getBytes(); - - final byte[] v1 = "v1".getBytes(); - final byte[] v2 = "v2".getBytes(); - final byte[] v3 = "v3".getBytes(); - - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); - final DBOptions dbOptions = new DBOptions()) { - wbwi.put(k1, v1); - wbwi.put(k2, v2); - wbwi.put(k3, v3); - - assertThat(wbwi.getFromBatch(dbOptions, k1)).isEqualTo(v1); - assertThat(wbwi.getFromBatch(dbOptions, k2)).isEqualTo(v2); - assertThat(wbwi.getFromBatch(dbOptions, k3)).isEqualTo(v3); - assertThat(wbwi.getFromBatch(dbOptions, k4)).isNull(); - - wbwi.delete(k2); - - assertThat(wbwi.getFromBatch(dbOptions, k2)).isNull(); - } - } - - @Test - public void getFromBatchAndDB() throws RocksDBException { - final byte[] k1 = "k1".getBytes(); - final byte[] k2 = "k2".getBytes(); - final byte[] k3 = "k3".getBytes(); - final byte[] k4 = "k4".getBytes(); - - final byte[] v1 = "v1".getBytes(); - final byte[] v2 = "v2".getBytes(); - final byte[] v3 = "v3".getBytes(); - final byte[] v4 = "v4".getBytes(); - - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, - dbFolder.getRoot().getAbsolutePath())) { - - db.put(k1, v1); - db.put(k2, v2); - db.put(k4, v4); - - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); - final DBOptions dbOptions = new DBOptions(); - final ReadOptions readOptions = new ReadOptions()) { - - assertThat(wbwi.getFromBatch(dbOptions, k1)).isNull(); - assertThat(wbwi.getFromBatch(dbOptions, k2)).isNull(); - assertThat(wbwi.getFromBatch(dbOptions, k4)).isNull(); - - wbwi.put(k3, v3); - - assertThat(wbwi.getFromBatch(dbOptions, k3)).isEqualTo(v3); - - assertThat(wbwi.getFromBatchAndDB(db, readOptions, k1)).isEqualTo(v1); - assertThat(wbwi.getFromBatchAndDB(db, readOptions, k2)).isEqualTo(v2); - assertThat(wbwi.getFromBatchAndDB(db, readOptions, k3)).isEqualTo(v3); - assertThat(wbwi.getFromBatchAndDB(db, readOptions, k4)).isEqualTo(v4); - - wbwi.delete(k4); - - assertThat(wbwi.getFromBatchAndDB(db, readOptions, k4)).isNull(); - } - } - } - private byte[] toArray(final ByteBuffer buf) { - final byte[] ary = new byte[buf.remaining()]; - buf.get(ary); - return ary; - } - - @Test - public void deleteRange() throws RocksDBException { - try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); - final WriteBatch batch = new WriteBatch(); - final WriteOptions wOpt = new WriteOptions()) { - db.put("key1".getBytes(), "value".getBytes()); - db.put("key2".getBytes(), "12345678".getBytes()); - db.put("key3".getBytes(), "abcdefg".getBytes()); - db.put("key4".getBytes(), "xyz".getBytes()); - assertThat(db.get("key1".getBytes())).isEqualTo("value".getBytes()); - assertThat(db.get("key2".getBytes())).isEqualTo("12345678".getBytes()); - assertThat(db.get("key3".getBytes())).isEqualTo("abcdefg".getBytes()); - assertThat(db.get("key4".getBytes())).isEqualTo("xyz".getBytes()); - - batch.deleteRange("key2".getBytes(), "key4".getBytes()); - db.write(wOpt, batch); - - assertThat(db.get("key1".getBytes())).isEqualTo("value".getBytes()); - assertThat(db.get("key2".getBytes())).isNull(); - assertThat(db.get("key3".getBytes())).isNull(); - assertThat(db.get("key4".getBytes())).isEqualTo("xyz".getBytes()); - } - } - - @Test - public void iteratorWithBaseOverwriteTrue() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); - final RocksIterator baseIter = db.newIterator(); - final RocksIterator wbwiIter = wbwi.newIteratorWithBase(baseIter)) { - assertThat(wbwiIter).isNotNull(); - assertThat(wbwiIter.nativeHandle_).isGreaterThan(0); - wbwiIter.status(); - } - - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); - final RocksIterator baseIter = db.newIterator(); - final ReadOptions readOptions = new ReadOptions(); - final RocksIterator wbwiIter = wbwi.newIteratorWithBase(baseIter, readOptions)) { - assertThat(wbwiIter).isNotNull(); - assertThat(wbwiIter.nativeHandle_).isGreaterThan(0); - wbwiIter.status(); - } - } - - final List cfNames = - Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions options = - new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open( - options, dbFolder.getRoot().getAbsolutePath(), cfNames, columnFamilyHandleList)) { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); - final RocksIterator baseIter = db.newIterator(); - final RocksIterator wbwiIter = - wbwi.newIteratorWithBase(columnFamilyHandleList.get(1), baseIter)) { - assertThat(wbwiIter).isNotNull(); - assertThat(wbwiIter.nativeHandle_).isGreaterThan(0); - wbwiIter.status(); - } - - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); - final RocksIterator baseIter = db.newIterator(); - final ReadOptions readOptions = new ReadOptions(); - final RocksIterator wbwiIter = - wbwi.newIteratorWithBase(columnFamilyHandleList.get(1), baseIter, readOptions)) { - assertThat(wbwiIter).isNotNull(); - assertThat(wbwiIter.nativeHandle_).isGreaterThan(0); - wbwiIter.status(); - } - } - } - - @Test - public void iteratorWithBaseOverwriteFalse() throws RocksDBException { - try (final Options options = new Options().setCreateIfMissing(true); - final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(false); - final RocksIterator baseIter = db.newIterator(); - final RocksIterator wbwiIter = wbwi.newIteratorWithBase(baseIter)) { - assertThat(wbwiIter).isNotNull(); - assertThat(wbwiIter.nativeHandle_).isGreaterThan(0); - wbwiIter.status(); - } - - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(false); - final RocksIterator baseIter = db.newIterator(); - final ReadOptions readOptions = new ReadOptions(); - final RocksIterator wbwiIter = wbwi.newIteratorWithBase(baseIter, readOptions)) { - assertThat(wbwiIter).isNotNull(); - assertThat(wbwiIter.nativeHandle_).isGreaterThan(0); - wbwiIter.status(); - } - } - - final List cfNames = - Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes())); - final List columnFamilyHandleList = new ArrayList<>(); - try (final DBOptions options = - new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); - final RocksDB db = RocksDB.open( - options, dbFolder.getRoot().getAbsolutePath(), cfNames, columnFamilyHandleList)) { - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(false); - final RocksIterator baseIter = db.newIterator(); - final RocksIterator wbwiIter = - wbwi.newIteratorWithBase(columnFamilyHandleList.get(1), baseIter)) { - assertThat(wbwiIter).isNotNull(); - assertThat(wbwiIter.nativeHandle_).isGreaterThan(0); - wbwiIter.status(); - } - - try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(false); - final RocksIterator baseIter = db.newIterator(); - final ReadOptions readOptions = new ReadOptions(); - final RocksIterator wbwiIter = - wbwi.newIteratorWithBase(columnFamilyHandleList.get(1), baseIter, readOptions)) { - assertThat(wbwiIter).isNotNull(); - assertThat(wbwiIter.nativeHandle_).isGreaterThan(0); - wbwiIter.status(); - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/WriteOptionsTest.java b/java/src/test/java/org/rocksdb/WriteOptionsTest.java deleted file mode 100644 index 735677cb7..000000000 --- a/java/src/test/java/org/rocksdb/WriteOptionsTest.java +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.util.Random; -import org.junit.ClassRule; -import org.junit.Test; - -public class WriteOptionsTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - public static final Random rand = PlatformRandomHelper. - getPlatformSpecificRandomFactory(); - - @Test - public void writeOptions() { - try (final WriteOptions writeOptions = new WriteOptions()) { - - writeOptions.setSync(true); - assertThat(writeOptions.sync()).isTrue(); - writeOptions.setSync(false); - assertThat(writeOptions.sync()).isFalse(); - - writeOptions.setDisableWAL(true); - assertThat(writeOptions.disableWAL()).isTrue(); - writeOptions.setDisableWAL(false); - assertThat(writeOptions.disableWAL()).isFalse(); - - - writeOptions.setIgnoreMissingColumnFamilies(true); - assertThat(writeOptions.ignoreMissingColumnFamilies()).isTrue(); - writeOptions.setIgnoreMissingColumnFamilies(false); - assertThat(writeOptions.ignoreMissingColumnFamilies()).isFalse(); - - writeOptions.setNoSlowdown(true); - assertThat(writeOptions.noSlowdown()).isTrue(); - writeOptions.setNoSlowdown(false); - assertThat(writeOptions.noSlowdown()).isFalse(); - - writeOptions.setLowPri(true); - assertThat(writeOptions.lowPri()).isTrue(); - writeOptions.setLowPri(false); - assertThat(writeOptions.lowPri()).isFalse(); - - writeOptions.setMemtableInsertHintPerBatch(true); - assertThat(writeOptions.memtableInsertHintPerBatch()).isTrue(); - writeOptions.setMemtableInsertHintPerBatch(false); - assertThat(writeOptions.memtableInsertHintPerBatch()).isFalse(); - } - } - - @Test - public void copyConstructor() { - WriteOptions origOpts = new WriteOptions(); - origOpts.setDisableWAL(rand.nextBoolean()); - origOpts.setIgnoreMissingColumnFamilies(rand.nextBoolean()); - origOpts.setSync(rand.nextBoolean()); - origOpts.setMemtableInsertHintPerBatch(true); - WriteOptions copyOpts = new WriteOptions(origOpts); - assertThat(origOpts.disableWAL()).isEqualTo(copyOpts.disableWAL()); - assertThat(origOpts.ignoreMissingColumnFamilies()).isEqualTo( - copyOpts.ignoreMissingColumnFamilies()); - assertThat(origOpts.sync()).isEqualTo(copyOpts.sync()); - assertThat(origOpts.memtableInsertHintPerBatch()) - .isEqualTo(copyOpts.memtableInsertHintPerBatch()); - } -} diff --git a/java/src/test/java/org/rocksdb/test/RemoveEmptyValueCompactionFilterFactory.java b/java/src/test/java/org/rocksdb/test/RemoveEmptyValueCompactionFilterFactory.java deleted file mode 100644 index c4e4f25a0..000000000 --- a/java/src/test/java/org/rocksdb/test/RemoveEmptyValueCompactionFilterFactory.java +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb.test; - -import org.rocksdb.AbstractCompactionFilter; -import org.rocksdb.AbstractCompactionFilterFactory; -import org.rocksdb.RemoveEmptyValueCompactionFilter; - -/** - * Simple CompactionFilterFactory class used in tests. Generates RemoveEmptyValueCompactionFilters. - */ -public class RemoveEmptyValueCompactionFilterFactory extends AbstractCompactionFilterFactory { - @Override - public RemoveEmptyValueCompactionFilter createCompactionFilter(final AbstractCompactionFilter.Context context) { - return new RemoveEmptyValueCompactionFilter(); - } - - @Override - public String name() { - return "RemoveEmptyValueCompactionFilterFactory"; - } -} diff --git a/java/src/test/java/org/rocksdb/test/RocksJunitRunner.java b/java/src/test/java/org/rocksdb/test/RocksJunitRunner.java deleted file mode 100644 index 42d3148ef..000000000 --- a/java/src/test/java/org/rocksdb/test/RocksJunitRunner.java +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.test; - -import org.junit.internal.JUnitSystem; -import org.junit.internal.RealSystem; -import org.junit.internal.TextListener; -import org.junit.runner.Description; -import org.junit.runner.JUnitCore; -import org.junit.runner.Result; -import org.junit.runner.notification.Failure; -import org.rocksdb.RocksDB; - -import java.io.PrintStream; -import java.text.DecimalFormat; -import java.text.NumberFormat; -import java.util.ArrayList; -import java.util.List; - -import static org.rocksdb.test.RocksJunitRunner.RocksJunitListener.Status.*; - -/** - * Custom Junit Runner to print also Test classes - * and executed methods to command prompt. - */ -public class RocksJunitRunner { - - /** - * Listener which overrides default functionality - * to print class and method to system out. - */ - static class RocksJunitListener extends TextListener { - - private final static NumberFormat secsFormat = - new DecimalFormat("###,###.###"); - - private final PrintStream writer; - - private String currentClassName = null; - private String currentMethodName = null; - private Status currentStatus = null; - private long currentTestsStartTime; - private int currentTestsCount = 0; - private int currentTestsIgnoredCount = 0; - private int currentTestsFailureCount = 0; - private int currentTestsErrorCount = 0; - - enum Status { - IGNORED, - FAILURE, - ERROR, - OK - } - - /** - * RocksJunitListener constructor - * - * @param system JUnitSystem - */ - public RocksJunitListener(final JUnitSystem system) { - this(system.out()); - } - - public RocksJunitListener(final PrintStream writer) { - super(writer); - this.writer = writer; - } - - @Override - public void testRunStarted(final Description description) { - writer.format("Starting RocksJava Tests...%n"); - - } - - @Override - public void testStarted(final Description description) { - if(currentClassName == null - || !currentClassName.equals(description.getClassName())) { - if(currentClassName != null) { - printTestsSummary(); - } else { - currentTestsStartTime = System.currentTimeMillis(); - } - writer.format("%nRunning: %s%n", description.getClassName()); - currentClassName = description.getClassName(); - } - currentMethodName = description.getMethodName(); - currentStatus = OK; - currentTestsCount++; - } - - private void printTestsSummary() { - // print summary of last test set - writer.format("Tests run: %d, Failures: %d, Errors: %d, Ignored: %d, Time elapsed: %s sec%n", - currentTestsCount, - currentTestsFailureCount, - currentTestsErrorCount, - currentTestsIgnoredCount, - formatSecs(System.currentTimeMillis() - currentTestsStartTime)); - - // reset counters - currentTestsCount = 0; - currentTestsFailureCount = 0; - currentTestsErrorCount = 0; - currentTestsIgnoredCount = 0; - currentTestsStartTime = System.currentTimeMillis(); - } - - private static String formatSecs(final double milliseconds) { - final double seconds = milliseconds / 1000; - return secsFormat.format(seconds); - } - - @Override - public void testFailure(final Failure failure) { - if (failure.getException() != null - && failure.getException() instanceof AssertionError) { - currentStatus = FAILURE; - currentTestsFailureCount++; - } else { - currentStatus = ERROR; - currentTestsErrorCount++; - } - } - - @Override - public void testIgnored(final Description description) { - currentStatus = IGNORED; - currentTestsIgnoredCount++; - } - - @Override - public void testFinished(final Description description) { - if(currentStatus == OK) { - writer.format("\t%s OK%n",currentMethodName); - } else { - writer.format(" [%s] %s%n", currentStatus.name(), currentMethodName); - } - } - - @Override - public void testRunFinished(final Result result) { - printTestsSummary(); - super.testRunFinished(result); - } - } - - /** - * Main method to execute tests - * - * @param args Test classes as String names - */ - public static void main(final String[] args){ - final JUnitCore runner = new JUnitCore(); - final JUnitSystem system = new RealSystem(); - runner.addListener(new RocksJunitListener(system)); - try { - final List> classes = new ArrayList<>(); - for (final String arg : args) { - classes.add(Class.forName(arg)); - } - final Class[] clazzes = classes.toArray(new Class[classes.size()]); - final Result result = runner.run(clazzes); - if(!result.wasSuccessful()) { - System.exit(-1); - } - } catch (final ClassNotFoundException e) { - e.printStackTrace(); - System.exit(-2); - } - } -} diff --git a/java/src/test/java/org/rocksdb/test/TestableEventListener.java b/java/src/test/java/org/rocksdb/test/TestableEventListener.java deleted file mode 100644 index 865ad5cf7..000000000 --- a/java/src/test/java/org/rocksdb/test/TestableEventListener.java +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.test; - -import org.rocksdb.AbstractEventListener; - -public class TestableEventListener extends AbstractEventListener { - public TestableEventListener() { - super(); - } - - public TestableEventListener(final EnabledEventCallback... enabledEventCallbacks) { - super(enabledEventCallbacks); - } - - public void invokeAllCallbacks() { - invokeAllCallbacks(nativeHandle_); - } - - private static native void invokeAllCallbacks(final long handle); -} diff --git a/java/src/test/java/org/rocksdb/util/ByteBufferAllocator.java b/java/src/test/java/org/rocksdb/util/ByteBufferAllocator.java deleted file mode 100644 index 8d7956cf2..000000000 --- a/java/src/test/java/org/rocksdb/util/ByteBufferAllocator.java +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb.util; - -import java.nio.ByteBuffer; - -public interface ByteBufferAllocator { - ByteBuffer allocate(int capacity); - - ByteBufferAllocator DIRECT = new DirectByteBufferAllocator(); - ByteBufferAllocator HEAP = new HeapByteBufferAllocator(); -} diff --git a/java/src/test/java/org/rocksdb/util/BytewiseComparatorIntTest.java b/java/src/test/java/org/rocksdb/util/BytewiseComparatorIntTest.java deleted file mode 100644 index fb7239c92..000000000 --- a/java/src/test/java/org/rocksdb/util/BytewiseComparatorIntTest.java +++ /dev/null @@ -1,267 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb.util; - -import org.junit.BeforeClass; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; -import org.rocksdb.*; - -import java.nio.ByteBuffer; -import java.nio.file.FileSystems; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Random; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; - -/** - * Similar to {@link IntComparatorTest}, but uses {@link BytewiseComparator} - * which ensures the correct ordering of positive integers. - */ -@RunWith(Parameterized.class) -public class BytewiseComparatorIntTest { - - // test with 500 random positive integer keys - private static final int TOTAL_KEYS = 500; - private static final byte[][] keys = new byte[TOTAL_KEYS][4]; - - @BeforeClass - public static void prepareKeys() { - final ByteBuffer buf = ByteBuffer.allocate(4); - final Random random = new Random(); - for (int i = 0; i < TOTAL_KEYS; i++) { - final int ri = random.nextInt() & Integer.MAX_VALUE; // the & ensures positive integer - buf.putInt(ri); - buf.flip(); - final byte[] key = buf.array(); - - // does key already exist (avoid duplicates) - if (keyExists(key, i)) { - i--; // loop round and generate a different key - } else { - System.arraycopy(key, 0, keys[i], 0, 4); - } - } - } - - private static boolean keyExists(final byte[] key, final int limit) { - for (int j = 0; j < limit; j++) { - if (Arrays.equals(key, keys[j])) { - return true; - } - } - return false; - } - - @Parameters(name = "{0}") - public static Iterable parameters() { - return Arrays.asList(new Object[][] { - { "non-direct_reused64_mutex", false, 64, ReusedSynchronisationType.MUTEX }, - { "direct_reused64_mutex", true, 64, ReusedSynchronisationType.MUTEX }, - { "non-direct_reused64_adaptive-mutex", false, 64, ReusedSynchronisationType.ADAPTIVE_MUTEX }, - { "direct_reused64_adaptive-mutex", true, 64, ReusedSynchronisationType.ADAPTIVE_MUTEX }, - { "non-direct_reused64_thread-local", false, 64, ReusedSynchronisationType.THREAD_LOCAL }, - { "direct_reused64_thread-local", true, 64, ReusedSynchronisationType.THREAD_LOCAL }, - { "non-direct_noreuse", false, -1, null }, - { "direct_noreuse", true, -1, null } - }); - } - - @Parameter(0) - public String name; - - @Parameter(1) - public boolean useDirectBuffer; - - @Parameter(2) - public int maxReusedBufferSize; - - @Parameter(3) - public ReusedSynchronisationType reusedSynchronisationType; - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - - @Test - public void javaComparatorDefaultCf() throws RocksDBException { - try (final ComparatorOptions options = new ComparatorOptions() - .setUseDirectBuffer(useDirectBuffer) - .setMaxReusedBufferSize(maxReusedBufferSize) - // if reusedSynchronisationType == null we assume that maxReusedBufferSize <= 0 and so we just set ADAPTIVE_MUTEX, even though it won't be used - .setReusedSynchronisationType(reusedSynchronisationType == null ? ReusedSynchronisationType.ADAPTIVE_MUTEX : reusedSynchronisationType); - final BytewiseComparator comparator = new BytewiseComparator(options)) { - - // test the round-tripability of keys written and read with the Comparator - testRoundtrip(FileSystems.getDefault().getPath( - dbFolder.getRoot().getAbsolutePath()), comparator); - } - } - - @Test - public void javaComparatorNamedCf() throws RocksDBException { - try (final ComparatorOptions options = new ComparatorOptions() - .setUseDirectBuffer(useDirectBuffer) - .setMaxReusedBufferSize(maxReusedBufferSize) - // if reusedSynchronisationType == null we assume that maxReusedBufferSize <= 0 and so we just set ADAPTIVE_MUTEX, even though it won't be used - .setReusedSynchronisationType(reusedSynchronisationType == null ? ReusedSynchronisationType.ADAPTIVE_MUTEX : reusedSynchronisationType); - final BytewiseComparator comparator = new BytewiseComparator(options)) { - - // test the round-tripability of keys written and read with the Comparator - testRoundtripCf(FileSystems.getDefault().getPath( - dbFolder.getRoot().getAbsolutePath()), comparator); - } - } - - /** - * Test which stores random keys into the database - * using an {@link IntComparator} - * it then checks that these keys are read back in - * ascending order - * - * @param db_path A path where we can store database - * files temporarily - * - * @param comparator the comparator - * - * @throws RocksDBException if a database error happens. - */ - private void testRoundtrip(final Path db_path, - final AbstractComparator comparator) throws RocksDBException { - try (final Options opt = new Options() - .setCreateIfMissing(true) - .setComparator(comparator)) { - - // store TOTAL_KEYS into the db - try (final RocksDB db = RocksDB.open(opt, db_path.toString())) { - for (int i = 0; i < TOTAL_KEYS; i++) { - db.put(keys[i], "value".getBytes(UTF_8)); - } - } - - // re-open db and read from start to end - // integer keys should be in ascending - // order as defined by IntComparator - final ByteBuffer key = ByteBuffer.allocate(4); - try (final RocksDB db = RocksDB.open(opt, db_path.toString()); - final RocksIterator it = db.newIterator()) { - it.seekToFirst(); - int lastKey = Integer.MIN_VALUE; - int count = 0; - for (it.seekToFirst(); it.isValid(); it.next()) { - key.put(it.key()); - key.flip(); - final int thisKey = key.getInt(); - key.clear(); - assertThat(thisKey).isGreaterThan(lastKey); - lastKey = thisKey; - count++; - } - assertThat(count).isEqualTo(TOTAL_KEYS); - } - } - } - - /** - * Test which stores random keys into a column family - * in the database - * using an {@link IntComparator} - * it then checks that these keys are read back in - * ascending order - * - * @param db_path A path where we can store database - * files temporarily - * - * @param comparator the comparator - * - * @throws RocksDBException if a database error happens. - */ - private void testRoundtripCf(final Path db_path, - final AbstractComparator comparator) throws RocksDBException { - - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes(), - new ColumnFamilyOptions() - .setComparator(comparator)) - ); - - final List cfHandles = new ArrayList<>(); - - try (final DBOptions opt = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true)) { - - try (final RocksDB db = RocksDB.open(opt, db_path.toString(), - cfDescriptors, cfHandles)) { - try { - assertThat(cfDescriptors.size()).isEqualTo(2); - assertThat(cfHandles.size()).isEqualTo(2); - - for (int i = 0; i < TOTAL_KEYS; i++) { - db.put(cfHandles.get(1), keys[i], "value".getBytes(UTF_8)); - } - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - cfHandles.clear(); - } - } - - // re-open db and read from start to end - // integer keys should be in ascending - // order as defined by SimpleIntComparator - final ByteBuffer key = ByteBuffer.allocate(4); - try (final RocksDB db = RocksDB.open(opt, db_path.toString(), - cfDescriptors, cfHandles); - final RocksIterator it = db.newIterator(cfHandles.get(1))) { - try { - assertThat(cfDescriptors.size()).isEqualTo(2); - assertThat(cfHandles.size()).isEqualTo(2); - - it.seekToFirst(); - int lastKey = Integer.MIN_VALUE; - int count = 0; - for (it.seekToFirst(); it.isValid(); it.next()) { - key.put(it.key()); - key.flip(); - final int thisKey = key.getInt(); - key.clear(); - assertThat(thisKey).isGreaterThan(lastKey); - lastKey = thisKey; - count++; - } - - assertThat(count).isEqualTo(TOTAL_KEYS); - - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - cfHandles.clear(); - for (final ColumnFamilyDescriptor cfDescriptor : cfDescriptors) { - cfDescriptor.getOptions().close(); - } - } - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/util/BytewiseComparatorTest.java b/java/src/test/java/org/rocksdb/util/BytewiseComparatorTest.java deleted file mode 100644 index 69f2c282b..000000000 --- a/java/src/test/java/org/rocksdb/util/BytewiseComparatorTest.java +++ /dev/null @@ -1,531 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb.util; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.rocksdb.*; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.file.*; -import java.util.*; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.junit.Assert.*; -import static org.rocksdb.util.ByteUtil.bytes; - -/** - * This is a direct port of various C++ - * tests from db/comparator_db_test.cc - * and some code to adapt it to RocksJava - */ -public class BytewiseComparatorTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - private List source_strings = Arrays.asList("b", "d", "f", "h", "j", "l"); - private List interleaving_strings = Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m"); - - /** - * Open the database using the C++ BytewiseComparatorImpl - * and test the results against our Java BytewiseComparator - */ - @Test - public void java_vs_cpp_bytewiseComparator() - throws IOException, RocksDBException { - for(int rand_seed = 301; rand_seed < 306; rand_seed++) { - final Path dbDir = - FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); - try(final RocksDB db = openDatabase(dbDir, - BuiltinComparator.BYTEWISE_COMPARATOR)) { - - final Random rnd = new Random(rand_seed); - try(final ComparatorOptions copt2 = new ComparatorOptions() - .setUseDirectBuffer(false); - final AbstractComparator comparator2 = new BytewiseComparator(copt2)) { - final java.util.Comparator jComparator = toJavaComparator(comparator2); - doRandomIterationTest( - db, - jComparator, - rnd, - 8, 100, 3 - ); - } - } - } - } - - /** - * Open the database using the Java BytewiseComparator - * and test the results against another Java BytewiseComparator - */ - @Test - public void java_vs_java_bytewiseComparator() - throws IOException, RocksDBException { - for(int rand_seed = 301; rand_seed < 306; rand_seed++) { - final Path dbDir = - FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); - try(final ComparatorOptions copt = new ComparatorOptions() - .setUseDirectBuffer(false); - final AbstractComparator comparator = new BytewiseComparator(copt); - final RocksDB db = openDatabase(dbDir, comparator)) { - - final Random rnd = new Random(rand_seed); - try(final ComparatorOptions copt2 = new ComparatorOptions() - .setUseDirectBuffer(false); - final AbstractComparator comparator2 = new BytewiseComparator(copt2)) { - final java.util.Comparator jComparator = toJavaComparator(comparator2); - doRandomIterationTest( - db, - jComparator, - rnd, - 8, 100, 3 - ); - } - } - } - } - - /** - * Open the database using the C++ BytewiseComparatorImpl - * and test the results against our Java DirectBytewiseComparator - */ - @Test - public void java_vs_cpp_directBytewiseComparator() - throws IOException, RocksDBException { - for(int rand_seed = 301; rand_seed < 306; rand_seed++) { - final Path dbDir = - FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); - try(final RocksDB db = openDatabase(dbDir, - BuiltinComparator.BYTEWISE_COMPARATOR)) { - - final Random rnd = new Random(rand_seed); - try(final ComparatorOptions copt2 = new ComparatorOptions() - .setUseDirectBuffer(true); - final AbstractComparator comparator2 = new BytewiseComparator(copt2)) { - final java.util.Comparator jComparator = toJavaComparator(comparator2); - doRandomIterationTest( - db, - jComparator, - rnd, - 8, 100, 3 - ); - } - } - } - } - - /** - * Open the database using the Java DirectBytewiseComparator - * and test the results against another Java DirectBytewiseComparator - */ - @Test - public void java_vs_java_directBytewiseComparator() - throws IOException, RocksDBException { - for(int rand_seed = 301; rand_seed < 306; rand_seed++) { - final Path dbDir = - FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); - try (final ComparatorOptions copt = new ComparatorOptions() - .setUseDirectBuffer(true); - final AbstractComparator comparator = new BytewiseComparator(copt); - final RocksDB db = openDatabase(dbDir, comparator)) { - - final Random rnd = new Random(rand_seed); - try(final ComparatorOptions copt2 = new ComparatorOptions() - .setUseDirectBuffer(true); - final AbstractComparator comparator2 = new BytewiseComparator(copt2)) { - final java.util.Comparator jComparator = toJavaComparator(comparator2); - doRandomIterationTest( - db, - jComparator, - rnd, - 8, 100, 3 - ); - } - } - } - } - - /** - * Open the database using the C++ ReverseBytewiseComparatorImpl - * and test the results against our Java ReverseBytewiseComparator - */ - @Test - public void java_vs_cpp_reverseBytewiseComparator() - throws IOException, RocksDBException { - for(int rand_seed = 301; rand_seed < 306; rand_seed++) { - final Path dbDir = - FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); - try(final RocksDB db = openDatabase(dbDir, - BuiltinComparator.REVERSE_BYTEWISE_COMPARATOR)) { - - final Random rnd = new Random(rand_seed); - try(final ComparatorOptions copt2 = new ComparatorOptions() - .setUseDirectBuffer(false); - final AbstractComparator comparator2 = new ReverseBytewiseComparator(copt2)) { - final java.util.Comparator jComparator = toJavaComparator(comparator2); - doRandomIterationTest( - db, - jComparator, - rnd, - 8, 100, 3 - ); - } - } - } - } - - /** - * Open the database using the Java ReverseBytewiseComparator - * and test the results against another Java ReverseBytewiseComparator - */ - @Test - public void java_vs_java_reverseBytewiseComparator() - throws IOException, RocksDBException { - for(int rand_seed = 301; rand_seed < 306; rand_seed++) { - final Path dbDir = - FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); - try (final ComparatorOptions copt = new ComparatorOptions() - .setUseDirectBuffer(false); - final AbstractComparator comparator = new ReverseBytewiseComparator(copt); - final RocksDB db = openDatabase(dbDir, comparator)) { - - final Random rnd = new Random(rand_seed); - try(final ComparatorOptions copt2 = new ComparatorOptions() - .setUseDirectBuffer(false); - final AbstractComparator comparator2 = new ReverseBytewiseComparator(copt2)) { - final java.util.Comparator jComparator = toJavaComparator(comparator2); - doRandomIterationTest( - db, - jComparator, - rnd, - 8, 100, 3 - ); - } - } - } - } - - private void doRandomIterationTest( - final RocksDB db, final java.util.Comparator javaComparator, - final Random rnd, - final int num_writes, final int num_iter_ops, - final int num_trigger_flush) throws RocksDBException { - - final TreeMap map = new TreeMap<>(javaComparator); - - try (final FlushOptions flushOptions = new FlushOptions(); - final WriteOptions writeOptions = new WriteOptions()) { - for (int i = 0; i < num_writes; i++) { - if (num_trigger_flush > 0 && i != 0 && i % num_trigger_flush == 0) { - db.flush(flushOptions); - } - - final int type = rnd.nextInt(2); - final int index = rnd.nextInt(source_strings.size()); - final String key = source_strings.get(index); - switch (type) { - case 0: - // put - map.put(key, key); - db.put(writeOptions, bytes(key), bytes(key)); - break; - case 1: - // delete - if (map.containsKey(key)) { - map.remove(key); - } - db.delete(writeOptions, bytes(key)); - break; - - default: - fail("Should not be able to generate random outside range 1..2"); - } - } - } - - try (final ReadOptions readOptions = new ReadOptions(); - final RocksIterator iter = db.newIterator(readOptions)) { - final KVIter result_iter = new KVIter<>(map); - - boolean is_valid = false; - for (int i = 0; i < num_iter_ops; i++) { - // Random walk and make sure iter and result_iter returns the - // same key and value - final int type = rnd.nextInt(8); - iter.status(); - switch (type) { - case 0: - // Seek to First - iter.seekToFirst(); - result_iter.seekToFirst(); - break; - case 1: - // Seek to last - iter.seekToLast(); - result_iter.seekToLast(); - break; - case 2: { - // Seek to random (existing or non-existing) key - final int key_idx = rnd.nextInt(interleaving_strings.size()); - final String key = interleaving_strings.get(key_idx); - iter.seek(bytes(key)); - result_iter.seek(bytes(key)); - break; - } - case 3: { - // SeekForPrev to random (existing or non-existing) key - final int key_idx = rnd.nextInt(interleaving_strings.size()); - final String key = interleaving_strings.get(key_idx); - iter.seekForPrev(bytes(key)); - result_iter.seekForPrev(bytes(key)); - break; - } - case 4: - // Next - if (is_valid) { - iter.next(); - result_iter.next(); - } else { - continue; - } - break; - case 5: - // Prev - if (is_valid) { - iter.prev(); - result_iter.prev(); - } else { - continue; - } - break; - case 6: - // Refresh - iter.refresh(); - result_iter.refresh(); - iter.seekToFirst(); - result_iter.seekToFirst(); - break; - default: { - assert (type == 7); - final int key_idx = rnd.nextInt(source_strings.size()); - final String key = source_strings.get(key_idx); - final byte[] result = db.get(readOptions, bytes(key)); - if (!map.containsKey(key)) { - assertNull(result); - } else { - assertArrayEquals(bytes(map.get(key)), result); - } - break; - } - } - - assertEquals(result_iter.isValid(), iter.isValid()); - - is_valid = iter.isValid(); - - if (is_valid) { - assertArrayEquals(bytes(result_iter.key()), iter.key()); - - //note that calling value on a non-valid iterator from the Java API - //results in a SIGSEGV - assertArrayEquals(bytes(result_iter.value()), iter.value()); - } - } - } - } - - /** - * Open the database using a C++ Comparator - */ - private RocksDB openDatabase( - final Path dbDir, final BuiltinComparator cppComparator) - throws IOException, RocksDBException { - final Options options = new Options() - .setCreateIfMissing(true) - .setComparator(cppComparator); - return RocksDB.open(options, dbDir.toAbsolutePath().toString()); - } - - /** - * Open the database using a Java Comparator - */ - private RocksDB openDatabase( - final Path dbDir, - final AbstractComparator javaComparator) - throws IOException, RocksDBException { - final Options options = new Options() - .setCreateIfMissing(true) - .setComparator(javaComparator); - return RocksDB.open(options, dbDir.toAbsolutePath().toString()); - } - - private java.util.Comparator toJavaComparator( - final AbstractComparator rocksComparator) { - return new java.util.Comparator() { - @Override - public int compare(final String s1, final String s2) { - final ByteBuffer bufS1; - final ByteBuffer bufS2; - if (rocksComparator.usingDirectBuffers()) { - bufS1 = ByteBuffer.allocateDirect(s1.length()); - bufS2 = ByteBuffer.allocateDirect(s2.length()); - } else { - bufS1 = ByteBuffer.allocate(s1.length()); - bufS2 = ByteBuffer.allocate(s2.length()); - } - bufS1.put(bytes(s1)); - bufS1.flip(); - bufS2.put(bytes(s2)); - bufS2.flip(); - return rocksComparator.compare(bufS1, bufS2); - } - }; - } - - private static class KVIter implements RocksIteratorInterface { - - private final List> entries; - private final java.util.Comparator comparator; - private int offset = -1; - - private int lastPrefixMatchIdx = -1; - private int lastPrefixMatch = 0; - - public KVIter(final TreeMap map) { - this.entries = new ArrayList<>(); - entries.addAll(map.entrySet()); - this.comparator = map.comparator(); - } - - - @Override - public boolean isValid() { - return offset > -1 && offset < entries.size(); - } - - @Override - public void seekToFirst() { - offset = 0; - } - - @Override - public void seekToLast() { - offset = entries.size() - 1; - } - - @SuppressWarnings("unchecked") - @Override - public void seek(final byte[] target) { - for(offset = 0; offset < entries.size(); offset++) { - if(comparator.compare(entries.get(offset).getKey(), - (K)new String(target, UTF_8)) >= 0) { - return; - } - } - } - - @SuppressWarnings("unchecked") - @Override - public void seekForPrev(final byte[] target) { - for(offset = entries.size()-1; offset >= 0; offset--) { - if(comparator.compare(entries.get(offset).getKey(), - (K)new String(target, UTF_8)) <= 0) { - return; - } - } - } - - /** - * Is `a` a prefix of `b` - * - * @return The length of the matching prefix, or 0 if it is not a prefix - */ - private int isPrefix(final byte[] a, final byte[] b) { - if(b.length >= a.length) { - for(int i = 0; i < a.length; i++) { - if(a[i] != b[i]) { - return i; - } - } - return a.length; - } else { - return 0; - } - } - - @Override - public void next() { - if(offset < entries.size()) { - offset++; - } - } - - @Override - public void prev() { - if(offset >= 0) { - offset--; - } - } - - @Override - public void refresh() throws RocksDBException { - offset = -1; - } - - @Override - public void status() throws RocksDBException { - if(offset < 0 || offset >= entries.size()) { - throw new RocksDBException("Index out of bounds. Size is: " + - entries.size() + ", offset is: " + offset); - } - } - - @SuppressWarnings("unchecked") - public K key() { - if(!isValid()) { - if(entries.isEmpty()) { - return (K)""; - } else if(offset == -1){ - return entries.get(0).getKey(); - } else if(offset == entries.size()) { - return entries.get(offset - 1).getKey(); - } else { - return (K)""; - } - } else { - return entries.get(offset).getKey(); - } - } - - @SuppressWarnings("unchecked") - public V value() { - if(!isValid()) { - return (V)""; - } else { - return entries.get(offset).getValue(); - } - } - - @Override - public void seek(ByteBuffer target) { - throw new IllegalAccessError("Not implemented"); - } - - @Override - public void seekForPrev(ByteBuffer target) { - throw new IllegalAccessError("Not implemented"); - } - } -} diff --git a/java/src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java b/java/src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java deleted file mode 100644 index 8ea104332..000000000 --- a/java/src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java +++ /dev/null @@ -1,190 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb.util; - -import org.rocksdb.RocksDBException; -import org.rocksdb.WriteBatch; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Objects; - -/** - * A simple WriteBatch Handler which adds a record - * of each event that it receives to a list - */ -public class CapturingWriteBatchHandler extends WriteBatch.Handler { - - private final List events = new ArrayList<>(); - - /** - * Returns a copy of the current events list - * - * @return a list of the events which have happened upto now - */ - public List getEvents() { - return new ArrayList<>(events); - } - - @Override - public void put(final int columnFamilyId, final byte[] key, - final byte[] value) { - events.add(new Event(Action.PUT, columnFamilyId, key, value)); - } - - @Override - public void put(final byte[] key, final byte[] value) { - events.add(new Event(Action.PUT, key, value)); - } - - @Override - public void merge(final int columnFamilyId, final byte[] key, - final byte[] value) { - events.add(new Event(Action.MERGE, columnFamilyId, key, value)); - } - - @Override - public void merge(final byte[] key, final byte[] value) { - events.add(new Event(Action.MERGE, key, value)); - } - - @Override - public void delete(final int columnFamilyId, final byte[] key) { - events.add(new Event(Action.DELETE, columnFamilyId, key, (byte[])null)); - } - - @Override - public void delete(final byte[] key) { - events.add(new Event(Action.DELETE, key, (byte[])null)); - } - - @Override - public void singleDelete(final int columnFamilyId, final byte[] key) { - events.add(new Event(Action.SINGLE_DELETE, - columnFamilyId, key, (byte[])null)); - } - - @Override - public void singleDelete(final byte[] key) { - events.add(new Event(Action.SINGLE_DELETE, key, (byte[])null)); - } - - @Override - public void deleteRange(final int columnFamilyId, final byte[] beginKey, - final byte[] endKey) { - events.add(new Event(Action.DELETE_RANGE, columnFamilyId, beginKey, - endKey)); - } - - @Override - public void deleteRange(final byte[] beginKey, final byte[] endKey) { - events.add(new Event(Action.DELETE_RANGE, beginKey, endKey)); - } - - @Override - public void logData(final byte[] blob) { - events.add(new Event(Action.LOG, (byte[])null, blob)); - } - - @Override - public void putBlobIndex(final int columnFamilyId, final byte[] key, - final byte[] value) { - events.add(new Event(Action.PUT_BLOB_INDEX, key, value)); - } - - @Override - public void markBeginPrepare() throws RocksDBException { - events.add(new Event(Action.MARK_BEGIN_PREPARE, (byte[])null, - (byte[])null)); - } - - @Override - public void markEndPrepare(final byte[] xid) throws RocksDBException { - events.add(new Event(Action.MARK_END_PREPARE, (byte[])null, - (byte[])null)); - } - - @Override - public void markNoop(final boolean emptyBatch) throws RocksDBException { - events.add(new Event(Action.MARK_NOOP, (byte[])null, (byte[])null)); - } - - @Override - public void markRollback(final byte[] xid) throws RocksDBException { - events.add(new Event(Action.MARK_ROLLBACK, (byte[])null, (byte[])null)); - } - - @Override - public void markCommit(final byte[] xid) throws RocksDBException { - events.add(new Event(Action.MARK_COMMIT, (byte[])null, (byte[])null)); - } - - @Override - public void markCommitWithTimestamp(final byte[] xid, final byte[] ts) throws RocksDBException { - events.add(new Event(Action.MARK_COMMIT_WITH_TIMESTAMP, (byte[]) null, (byte[]) null)); - } - - public static class Event { - public final Action action; - public final int columnFamilyId; - public final byte[] key; - public final byte[] value; - - public Event(final Action action, final byte[] key, final byte[] value) { - this(action, 0, key, value); - } - - public Event(final Action action, final int columnFamilyId, final byte[] key, - final byte[] value) { - this.action = action; - this.columnFamilyId = columnFamilyId; - this.key = key; - this.value = value; - } - - @Override - public boolean equals(final Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - final Event event = (Event) o; - return columnFamilyId == event.columnFamilyId && - action == event.action && - ((key == null && event.key == null) - || Arrays.equals(key, event.key)) && - ((value == null && event.value == null) - || Arrays.equals(value, event.value)); - } - - @Override - public int hashCode() { - int result = Objects.hash(action, columnFamilyId); - result = 31 * result + Arrays.hashCode(key); - result = 31 * result + Arrays.hashCode(value); - return result; - } - } - - /** - * Enumeration of Write Batch - * event actions - */ - public enum Action { - PUT, - MERGE, - DELETE, - SINGLE_DELETE, - DELETE_RANGE, - LOG, - PUT_BLOB_INDEX, - MARK_BEGIN_PREPARE, - MARK_END_PREPARE, - MARK_NOOP, - MARK_COMMIT, - MARK_ROLLBACK, - MARK_COMMIT_WITH_TIMESTAMP - } -} diff --git a/java/src/test/java/org/rocksdb/util/DirectByteBufferAllocator.java b/java/src/test/java/org/rocksdb/util/DirectByteBufferAllocator.java deleted file mode 100644 index d26fb578b..000000000 --- a/java/src/test/java/org/rocksdb/util/DirectByteBufferAllocator.java +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb.util; - -import java.nio.ByteBuffer; - -public final class DirectByteBufferAllocator implements ByteBufferAllocator { - DirectByteBufferAllocator(){}; - - @Override - public ByteBuffer allocate(final int capacity) { - return ByteBuffer.allocateDirect(capacity); - } -} diff --git a/java/src/test/java/org/rocksdb/util/EnvironmentTest.java b/java/src/test/java/org/rocksdb/util/EnvironmentTest.java deleted file mode 100644 index ae340e06d..000000000 --- a/java/src/test/java/org/rocksdb/util/EnvironmentTest.java +++ /dev/null @@ -1,304 +0,0 @@ -// Copyright (c) 2014, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.hamcrest.Matchers.is; - -import java.lang.reflect.Field; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; - -public class EnvironmentTest { - private final static String ARCH_FIELD_NAME = "ARCH"; - private final static String OS_FIELD_NAME = "OS"; - - private final static String MUSL_ENVIRONMENT_FIELD_NAME = "MUSL_ENVIRONMENT"; - private final static String MUSL_LIBC_FIELD_NAME = "MUSL_LIBC"; - - private static String INITIAL_OS; - private static String INITIAL_ARCH; - private static String INITIAL_MUSL_ENVIRONMENT; - private static Boolean INITIAL_MUSL_LIBC; - - @BeforeClass - public static void saveState() { - INITIAL_ARCH = getEnvironmentClassField(ARCH_FIELD_NAME); - INITIAL_OS = getEnvironmentClassField(OS_FIELD_NAME); - INITIAL_MUSL_LIBC = getEnvironmentClassField(MUSL_LIBC_FIELD_NAME); - INITIAL_MUSL_ENVIRONMENT = getEnvironmentClassField(MUSL_ENVIRONMENT_FIELD_NAME); - } - - @Test - public void mac32() { - setEnvironmentClassFields("mac", "32"); - assertThat(Environment.isWindows()).isFalse(); - assertThat(Environment.getJniLibraryExtension()). - isEqualTo(".jnilib"); - assertThat(Environment.getJniLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni-osx.jnilib"); - assertThat(Environment.getFallbackJniLibraryFileName("rocksdb")).isNull(); - assertThat(Environment.getSharedLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni.dylib"); - } - - @Test - public void mac64_x86_64() { - setEnvironmentClassFields("mac", "x86_64"); - assertThat(Environment.isWindows()).isFalse(); - assertThat(Environment.getJniLibraryExtension()). - isEqualTo(".jnilib"); - assertThat(Environment.getJniLibraryFileName("rocksdb")) - .isEqualTo("librocksdbjni-osx-x86_64.jnilib"); - assertThat(Environment.getFallbackJniLibraryFileName("rocksdb")) - .isEqualTo("librocksdbjni-osx.jnilib"); - assertThat(Environment.getSharedLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni.dylib"); - } - - @Test - public void macAarch64() { - setEnvironmentClassFields("mac", "aarch64"); - assertThat(Environment.isWindows()).isFalse(); - assertThat(Environment.getJniLibraryExtension()).isEqualTo(".jnilib"); - assertThat(Environment.getJniLibraryFileName("rocksdb")) - .isEqualTo("librocksdbjni-osx-arm64.jnilib"); - assertThat(Environment.getFallbackJniLibraryFileName("rocksdb")) - .isEqualTo("librocksdbjni-osx.jnilib"); - assertThat(Environment.getSharedLibraryFileName("rocksdb")).isEqualTo("librocksdbjni.dylib"); - } - - @Test - public void nix32() { - // Linux - setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); - setEnvironmentClassFields("Linux", "32"); - assertThat(Environment.isWindows()).isFalse(); - assertThat(Environment.getJniLibraryExtension()). - isEqualTo(".so"); - assertThat(Environment.getJniLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni-linux32.so"); - assertThat(Environment.getFallbackJniLibraryFileName("rocksdb")).isNull(); - assertThat(Environment.getSharedLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni.so"); - // Linux musl-libc (Alpine) - setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, true); - assertThat(Environment.isWindows()).isFalse(); - assertThat(Environment.getJniLibraryExtension()). - isEqualTo(".so"); - assertThat(Environment.getJniLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni-linux32-musl.so"); - assertThat(Environment.getSharedLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni.so"); - // UNIX - setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); - setEnvironmentClassFields("Unix", "32"); - assertThat(Environment.isWindows()).isFalse(); - assertThat(Environment.getJniLibraryExtension()). - isEqualTo(".so"); - assertThat(Environment.getJniLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni-linux32.so"); - assertThat(Environment.getSharedLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni.so"); - } - - @Test(expected = UnsupportedOperationException.class) - public void aix32() { - // AIX - setEnvironmentClassFields("aix", "32"); - assertThat(Environment.isWindows()).isFalse(); - assertThat(Environment.getJniLibraryExtension()). - isEqualTo(".so"); - assertThat(Environment.getJniLibraryFileName("rocksdb")).isEqualTo("blah"); - assertThat(Environment.getFallbackJniLibraryFileName("rocksdb")).isNull(); - } - - @Test - public void nix64() { - setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); - setEnvironmentClassFields("Linux", "x64"); - assertThat(Environment.isWindows()).isFalse(); - assertThat(Environment.getJniLibraryExtension()). - isEqualTo(".so"); - assertThat(Environment.getJniLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni-linux64.so"); - assertThat(Environment.getFallbackJniLibraryFileName("rocksdb")).isNull(); - assertThat(Environment.getSharedLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni.so"); - // Linux musl-libc (Alpine) - setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, true); - assertThat(Environment.isWindows()).isFalse(); - assertThat(Environment.getJniLibraryExtension()). - isEqualTo(".so"); - assertThat(Environment.getJniLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni-linux64-musl.so"); - assertThat(Environment.getFallbackJniLibraryFileName("rocksdb")).isNull(); - assertThat(Environment.getSharedLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni.so"); - // UNIX - setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); - setEnvironmentClassFields("Unix", "x64"); - assertThat(Environment.isWindows()).isFalse(); - assertThat(Environment.getJniLibraryExtension()). - isEqualTo(".so"); - assertThat(Environment.getJniLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni-linux64.so"); - assertThat(Environment.getFallbackJniLibraryFileName("rocksdb")).isNull(); - assertThat(Environment.getSharedLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni.so"); - // AIX - setEnvironmentClassFields("aix", "x64"); - assertThat(Environment.isWindows()).isFalse(); - assertThat(Environment.getJniLibraryExtension()). - isEqualTo(".so"); - assertThat(Environment.getJniLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni-aix64.so"); - assertThat(Environment.getFallbackJniLibraryFileName("rocksdb")).isNull(); - assertThat(Environment.getSharedLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni.so"); - } - - @Test - public void detectWindows(){ - setEnvironmentClassFields("win", "x64"); - assertThat(Environment.isWindows()).isTrue(); - } - - @Test - public void win64() { - setEnvironmentClassFields("win", "x64"); - assertThat(Environment.isWindows()).isTrue(); - assertThat(Environment.getJniLibraryExtension()). - isEqualTo(".dll"); - assertThat(Environment.getJniLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni-win64.dll"); - assertThat(Environment.getFallbackJniLibraryFileName("rocksdb")).isNull(); - assertThat(Environment.getSharedLibraryFileName("rocksdb")). - isEqualTo("librocksdbjni.dll"); - } - - @Test - public void ppc64le() { - setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); - setEnvironmentClassFields("Linux", "ppc64le"); - assertThat(Environment.isUnix()).isTrue(); - assertThat(Environment.isPowerPC()).isTrue(); - assertThat(Environment.is64Bit()).isTrue(); - assertThat(Environment.getJniLibraryExtension()).isEqualTo(".so"); - assertThat(Environment.getSharedLibraryName("rocksdb")).isEqualTo("rocksdbjni"); - assertThat(Environment.getJniLibraryName("rocksdb")).isEqualTo("rocksdbjni-linux-ppc64le"); - assertThat(Environment.getJniLibraryFileName("rocksdb")) - .isEqualTo("librocksdbjni-linux-ppc64le.so"); - assertThat(Environment.getFallbackJniLibraryFileName("rocksdb")).isNull(); - assertThat(Environment.getSharedLibraryFileName("rocksdb")).isEqualTo("librocksdbjni.so"); - // Linux musl-libc (Alpine) - setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, true); - setEnvironmentClassFields("Linux", "ppc64le"); - assertThat(Environment.isUnix()).isTrue(); - assertThat(Environment.isPowerPC()).isTrue(); - assertThat(Environment.is64Bit()).isTrue(); - assertThat(Environment.getJniLibraryExtension()).isEqualTo(".so"); - assertThat(Environment.getSharedLibraryName("rocksdb")).isEqualTo("rocksdbjni"); - assertThat(Environment.getJniLibraryName("rocksdb")).isEqualTo("rocksdbjni-linux-ppc64le-musl"); - assertThat(Environment.getJniLibraryFileName("rocksdb")) - .isEqualTo("librocksdbjni-linux-ppc64le-musl.so"); - assertThat(Environment.getFallbackJniLibraryFileName("rocksdb")).isNull(); - assertThat(Environment.getSharedLibraryFileName("rocksdb")).isEqualTo("librocksdbjni.so"); - setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); - } - - @Test - public void linuxArch64() { - setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); - setEnvironmentClassFields("Linux", "aarch64"); - assertThat(Environment.isUnix()).isTrue(); - assertThat(Environment.isAarch64()).isTrue(); - assertThat(Environment.is64Bit()).isTrue(); - assertThat(Environment.getJniLibraryExtension()).isEqualTo(".so"); - assertThat(Environment.getSharedLibraryName("rocksdb")).isEqualTo("rocksdbjni"); - assertThat(Environment.getJniLibraryName("rocksdb")).isEqualTo("rocksdbjni-linux-aarch64"); - assertThat(Environment.getJniLibraryFileName("rocksdb")) - .isEqualTo("librocksdbjni-linux-aarch64.so"); - assertThat(Environment.getFallbackJniLibraryFileName("rocksdb")).isNull(); - assertThat(Environment.getSharedLibraryFileName("rocksdb")).isEqualTo("librocksdbjni.so"); - // Linux musl-libc (Alpine) - setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, true); - setEnvironmentClassFields("Linux", "aarch64"); - assertThat(Environment.isUnix()).isTrue(); - assertThat(Environment.isAarch64()).isTrue(); - assertThat(Environment.is64Bit()).isTrue(); - assertThat(Environment.getJniLibraryExtension()).isEqualTo(".so"); - assertThat(Environment.getSharedLibraryName("rocksdb")).isEqualTo("rocksdbjni"); - assertThat(Environment.getJniLibraryName("rocksdb")).isEqualTo("rocksdbjni-linux-aarch64-musl"); - assertThat(Environment.getJniLibraryFileName("rocksdb")) - .isEqualTo("librocksdbjni-linux-aarch64-musl.so"); - assertThat(Environment.getFallbackJniLibraryFileName("rocksdb")).isNull(); - assertThat(Environment.getSharedLibraryFileName("rocksdb")).isEqualTo("librocksdbjni.so"); - setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); - } - - @Test - public void resolveIsMuslLibc() { - setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, null); - setEnvironmentClassFields("win", "anyarch"); - assertThat(Environment.isUnix()).isFalse(); - - // with user input, will resolve to true if set as true. Even on OSs that appear absurd for - // musl. Users choice - assertThat(Environment.initIsMuslLibc()).isFalse(); - setEnvironmentClassField(MUSL_ENVIRONMENT_FIELD_NAME, "true"); - assertThat(Environment.initIsMuslLibc()).isTrue(); - setEnvironmentClassField(MUSL_ENVIRONMENT_FIELD_NAME, "false"); - assertThat(Environment.initIsMuslLibc()).isFalse(); - } - - private void setEnvironmentClassFields(String osName, - String osArch) { - setEnvironmentClassField(OS_FIELD_NAME, osName); - setEnvironmentClassField(ARCH_FIELD_NAME, osArch); - } - - @AfterClass - public static void restoreState() { - setEnvironmentClassField(OS_FIELD_NAME, INITIAL_OS); - setEnvironmentClassField(ARCH_FIELD_NAME, INITIAL_ARCH); - setEnvironmentClassField(MUSL_ENVIRONMENT_FIELD_NAME, INITIAL_MUSL_ENVIRONMENT); - setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, INITIAL_MUSL_LIBC); - } - - @SuppressWarnings("unchecked") - private static T getEnvironmentClassField(String fieldName) { - final Field field; - try { - field = Environment.class.getDeclaredField(fieldName); - field.setAccessible(true); - /* Fails in JDK 13; and not needed unless fields are final - final Field modifiersField = Field.class.getDeclaredField("modifiers"); - modifiersField.setAccessible(true); - modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL); - */ - return (T)field.get(null); - } catch (final NoSuchFieldException | IllegalAccessException e) { - throw new RuntimeException(e); - } - } - - private static void setEnvironmentClassField(String fieldName, Object value) { - final Field field; - try { - field = Environment.class.getDeclaredField(fieldName); - field.setAccessible(true); - /* Fails in JDK 13; and not needed unless fields are final - final Field modifiersField = Field.class.getDeclaredField("modifiers"); - modifiersField.setAccessible(true); - modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL); - */ - field.set(null, value); - } catch (final NoSuchFieldException | IllegalAccessException e) { - throw new RuntimeException(e); - } - } -} diff --git a/java/src/test/java/org/rocksdb/util/HeapByteBufferAllocator.java b/java/src/test/java/org/rocksdb/util/HeapByteBufferAllocator.java deleted file mode 100644 index ad6b8f6f4..000000000 --- a/java/src/test/java/org/rocksdb/util/HeapByteBufferAllocator.java +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb.util; - -import java.nio.ByteBuffer; - -public final class HeapByteBufferAllocator implements ByteBufferAllocator { - HeapByteBufferAllocator(){}; - - @Override - public ByteBuffer allocate(final int capacity) { - return ByteBuffer.allocate(capacity); - } -} diff --git a/java/src/test/java/org/rocksdb/util/IntComparatorTest.java b/java/src/test/java/org/rocksdb/util/IntComparatorTest.java deleted file mode 100644 index dd3288513..000000000 --- a/java/src/test/java/org/rocksdb/util/IntComparatorTest.java +++ /dev/null @@ -1,266 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb.util; - -import org.junit.BeforeClass; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; -import org.rocksdb.*; - -import java.nio.ByteBuffer; -import java.nio.file.*; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Random; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; - -/** - * Tests for IntComparator, but more generally - * also for rocksdb::ComparatorJniCallback implementation. - */ -@RunWith(Parameterized.class) -public class IntComparatorTest { - - // test with 500 random integer keys - private static final int TOTAL_KEYS = 500; - private static final byte[][] keys = new byte[TOTAL_KEYS][4]; - - @BeforeClass - public static void prepareKeys() { - final ByteBuffer buf = ByteBuffer.allocate(4); - final Random random = new Random(); - for (int i = 0; i < TOTAL_KEYS; i++) { - final int ri = random.nextInt(); - buf.putInt(ri); - buf.flip(); - final byte[] key = buf.array(); - - // does key already exist (avoid duplicates) - if (keyExists(key, i)) { - i--; // loop round and generate a different key - } else { - System.arraycopy(key, 0, keys[i], 0, 4); - } - } - } - - private static boolean keyExists(final byte[] key, final int limit) { - for (int j = 0; j < limit; j++) { - if (Arrays.equals(key, keys[j])) { - return true; - } - } - return false; - } - - @Parameters(name = "{0}") - public static Iterable parameters() { - return Arrays.asList(new Object[][] { - { "non-direct_reused64_mutex", false, 64, ReusedSynchronisationType.MUTEX }, - { "direct_reused64_mutex", true, 64, ReusedSynchronisationType.MUTEX }, - { "non-direct_reused64_adaptive-mutex", false, 64, ReusedSynchronisationType.ADAPTIVE_MUTEX }, - { "direct_reused64_adaptive-mutex", true, 64, ReusedSynchronisationType.ADAPTIVE_MUTEX }, - { "non-direct_reused64_thread-local", false, 64, ReusedSynchronisationType.THREAD_LOCAL }, - { "direct_reused64_thread-local", true, 64, ReusedSynchronisationType.THREAD_LOCAL }, - { "non-direct_noreuse", false, -1, null }, - { "direct_noreuse", true, -1, null } - }); - } - - @Parameter(0) - public String name; - - @Parameter(1) - public boolean useDirectBuffer; - - @Parameter(2) - public int maxReusedBufferSize; - - @Parameter(3) - public ReusedSynchronisationType reusedSynchronisationType; - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - - @Test - public void javaComparatorDefaultCf() throws RocksDBException { - try (final ComparatorOptions options = new ComparatorOptions() - .setUseDirectBuffer(useDirectBuffer) - .setMaxReusedBufferSize(maxReusedBufferSize) - // if reusedSynchronisationType == null we assume that maxReusedBufferSize <= 0 and so we just set ADAPTIVE_MUTEX, even though it won't be used - .setReusedSynchronisationType(reusedSynchronisationType == null ? ReusedSynchronisationType.ADAPTIVE_MUTEX : reusedSynchronisationType); - final IntComparator comparator = new IntComparator(options)) { - - // test the round-tripability of keys written and read with the Comparator - testRoundtrip(FileSystems.getDefault().getPath( - dbFolder.getRoot().getAbsolutePath()), comparator); - } - } - - @Test - public void javaComparatorNamedCf() throws RocksDBException { - try (final ComparatorOptions options = new ComparatorOptions() - .setUseDirectBuffer(useDirectBuffer) - .setMaxReusedBufferSize(maxReusedBufferSize) - // if reusedSynchronisationType == null we assume that maxReusedBufferSize <= 0 and so we just set ADAPTIVE_MUTEX, even though it won't be used - .setReusedSynchronisationType(reusedSynchronisationType == null ? ReusedSynchronisationType.ADAPTIVE_MUTEX : reusedSynchronisationType); - final IntComparator comparator = new IntComparator(options)) { - - // test the round-tripability of keys written and read with the Comparator - testRoundtripCf(FileSystems.getDefault().getPath( - dbFolder.getRoot().getAbsolutePath()), comparator); - } - } - - /** - * Test which stores random keys into the database - * using an {@link IntComparator} - * it then checks that these keys are read back in - * ascending order - * - * @param db_path A path where we can store database - * files temporarily - * - * @param comparator the comparator - * - * @throws RocksDBException if a database error happens. - */ - private void testRoundtrip(final Path db_path, - final AbstractComparator comparator) throws RocksDBException { - try (final Options opt = new Options() - .setCreateIfMissing(true) - .setComparator(comparator)) { - - // store TOTAL_KEYS into the db - try (final RocksDB db = RocksDB.open(opt, db_path.toString())) { - for (int i = 0; i < TOTAL_KEYS; i++) { - db.put(keys[i], "value".getBytes(UTF_8)); - } - } - - // re-open db and read from start to end - // integer keys should be in ascending - // order as defined by IntComparator - final ByteBuffer key = ByteBuffer.allocate(4); - try (final RocksDB db = RocksDB.open(opt, db_path.toString()); - final RocksIterator it = db.newIterator()) { - it.seekToFirst(); - int lastKey = Integer.MIN_VALUE; - int count = 0; - for (it.seekToFirst(); it.isValid(); it.next()) { - key.put(it.key()); - key.flip(); - final int thisKey = key.getInt(); - key.clear(); - assertThat(thisKey).isGreaterThan(lastKey); - lastKey = thisKey; - count++; - } - assertThat(count).isEqualTo(TOTAL_KEYS); - } - } - } - - /** - * Test which stores random keys into a column family - * in the database - * using an {@link IntComparator} - * it then checks that these keys are read back in - * ascending order - * - * @param db_path A path where we can store database - * files temporarily - * - * @param comparator the comparator - * - * @throws RocksDBException if a database error happens. - */ - private void testRoundtripCf(final Path db_path, - final AbstractComparator comparator) throws RocksDBException { - - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes(), - new ColumnFamilyOptions() - .setComparator(comparator)) - ); - - final List cfHandles = new ArrayList<>(); - - try (final DBOptions opt = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true)) { - - try (final RocksDB db = RocksDB.open(opt, db_path.toString(), - cfDescriptors, cfHandles)) { - try { - assertThat(cfDescriptors.size()).isEqualTo(2); - assertThat(cfHandles.size()).isEqualTo(2); - - for (int i = 0; i < TOTAL_KEYS; i++) { - db.put(cfHandles.get(1), keys[i], "value".getBytes(UTF_8)); - } - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - cfHandles.clear(); - } - } - - // re-open db and read from start to end - // integer keys should be in ascending - // order as defined by SimpleIntComparator - final ByteBuffer key = ByteBuffer.allocate(4); - try (final RocksDB db = RocksDB.open(opt, db_path.toString(), - cfDescriptors, cfHandles); - final RocksIterator it = db.newIterator(cfHandles.get(1))) { - try { - assertThat(cfDescriptors.size()).isEqualTo(2); - assertThat(cfHandles.size()).isEqualTo(2); - - it.seekToFirst(); - int lastKey = Integer.MIN_VALUE; - int count = 0; - for (it.seekToFirst(); it.isValid(); it.next()) { - key.put(it.key()); - key.flip(); - final int thisKey = key.getInt(); - key.clear(); - assertThat(thisKey).isGreaterThan(lastKey); - lastKey = thisKey; - count++; - } - - assertThat(count).isEqualTo(TOTAL_KEYS); - - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - cfHandles.clear(); - for (final ColumnFamilyDescriptor cfDescriptor : cfDescriptors) { - cfDescriptor.getOptions().close(); - } - } - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/util/JNIComparatorTest.java b/java/src/test/java/org/rocksdb/util/JNIComparatorTest.java deleted file mode 100644 index a962b8d78..000000000 --- a/java/src/test/java/org/rocksdb/util/JNIComparatorTest.java +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright (c) Meta Platforms, Inc. and affiliates. -// -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb.util; - -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; -import org.rocksdb.*; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.file.*; -import java.util.Arrays; - -import static org.assertj.core.api.Assertions.assertThat; - -@RunWith(Parameterized.class) -public class JNIComparatorTest { - - @Parameters(name = "{0}") - public static Iterable parameters() { - return Arrays.asList(new Object[][] { - { "bytewise_non-direct", BuiltinComparator.BYTEWISE_COMPARATOR, false }, - { "bytewise_direct", BuiltinComparator.BYTEWISE_COMPARATOR, true }, - { "reverse-bytewise_non-direct", BuiltinComparator.REVERSE_BYTEWISE_COMPARATOR, false }, - { "reverse-bytewise_direct", BuiltinComparator.REVERSE_BYTEWISE_COMPARATOR, true }, - }); - } - - @Parameter(0) - public String name; - - @Parameter(1) - public BuiltinComparator builtinComparator; - - @Parameter(2) - public boolean useDirectBuffer; - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - private static final int MIN = Short.MIN_VALUE - 1; - private static final int MAX = Short.MAX_VALUE + 1; - - @Test - public void java_comparator_equals_cpp_comparator() throws RocksDBException, IOException { - final int[] javaKeys; - try (final ComparatorOptions comparatorOptions = new ComparatorOptions(); - final AbstractComparator comparator = builtinComparator == BuiltinComparator.BYTEWISE_COMPARATOR - ? new BytewiseComparator(comparatorOptions) - : new ReverseBytewiseComparator(comparatorOptions)) { - final Path javaDbDir = - FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); - storeWithJavaComparator(javaDbDir, comparator); - javaKeys = readAllWithJavaComparator(javaDbDir, comparator); - } - - final Path cppDbDir = - FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); - storeWithCppComparator(cppDbDir, builtinComparator); - final int[] cppKeys = - readAllWithCppComparator(cppDbDir, builtinComparator); - - assertThat(javaKeys).isEqualTo(cppKeys); - } - - private void storeWithJavaComparator(final Path dir, - final AbstractComparator comparator) throws RocksDBException { - final ByteBuffer buf = ByteBuffer.allocate(4); - try (final Options options = new Options() - .setCreateIfMissing(true) - .setComparator(comparator); - final RocksDB db = - RocksDB.open(options, dir.toAbsolutePath().toString())) { - for (int i = MIN; i < MAX; i++) { - buf.putInt(i); - buf.flip(); - - db.put(buf.array(), buf.array()); - - buf.clear(); - } - } - } - - private void storeWithCppComparator(final Path dir, - final BuiltinComparator builtinComparator) throws RocksDBException { - try (final Options options = new Options() - .setCreateIfMissing(true) - .setComparator(builtinComparator); - final RocksDB db = - RocksDB.open(options, dir.toAbsolutePath().toString())) { - - final ByteBuffer buf = ByteBuffer.allocate(4); - for (int i = MIN; i < MAX; i++) { - buf.putInt(i); - buf.flip(); - - db.put(buf.array(), buf.array()); - - buf.clear(); - } - } - } - - private int[] readAllWithJavaComparator(final Path dir, - final AbstractComparator comparator) throws RocksDBException { - try (final Options options = new Options() - .setCreateIfMissing(true) - .setComparator(comparator); - final RocksDB db = - RocksDB.open(options, dir.toAbsolutePath().toString())) { - - try (final RocksIterator it = db.newIterator()) { - it.seekToFirst(); - - final ByteBuffer buf = ByteBuffer.allocate(4); - final int[] keys = new int[MAX - MIN]; - int idx = 0; - while (it.isValid()) { - buf.put(it.key()); - buf.flip(); - - final int thisKey = buf.getInt(); - keys[idx++] = thisKey; - - buf.clear(); - - it.next(); - } - - return keys; - } - } - } - - private int[] readAllWithCppComparator(final Path dir, - final BuiltinComparator comparator) throws RocksDBException { - try (final Options options = new Options() - .setCreateIfMissing(true) - .setComparator(comparator); - final RocksDB db = - RocksDB.open(options, dir.toAbsolutePath().toString())) { - - try (final RocksIterator it = db.newIterator()) { - it.seekToFirst(); - - final ByteBuffer buf = ByteBuffer.allocate(4); - final int[] keys = new int[MAX - MIN]; - int idx = 0; - while (it.isValid()) { - buf.put(it.key()); - buf.flip(); - - final int thisKey = buf.getInt(); - keys[idx++] = thisKey; - - buf.clear(); - - it.next(); - } - - return keys; - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/util/ReverseBytewiseComparatorIntTest.java b/java/src/test/java/org/rocksdb/util/ReverseBytewiseComparatorIntTest.java deleted file mode 100644 index ca08d9de1..000000000 --- a/java/src/test/java/org/rocksdb/util/ReverseBytewiseComparatorIntTest.java +++ /dev/null @@ -1,270 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb.util; - -import org.junit.BeforeClass; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; -import org.rocksdb.*; - -import java.nio.ByteBuffer; -import java.nio.file.FileSystems; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Random; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; - -/** - * Similar to {@link IntComparatorTest}, but uses - * {@link ReverseBytewiseComparator} which ensures the correct reverse - * ordering of positive integers. - */ -@RunWith(Parameterized.class) -public class ReverseBytewiseComparatorIntTest { - - // test with 500 random positive integer keys - private static final int TOTAL_KEYS = 500; - private static final byte[][] keys = new byte[TOTAL_KEYS][4]; - - @BeforeClass - public static void prepareKeys() { - final ByteBuffer buf = ByteBuffer.allocate(4); - final Random random = new Random(); - for (int i = 0; i < TOTAL_KEYS; i++) { - final int ri = random.nextInt() & Integer.MAX_VALUE; // the & ensures positive integer - buf.putInt(ri); - buf.flip(); - final byte[] key = buf.array(); - - // does key already exist (avoid duplicates) - if (keyExists(key, i)) { - i--; // loop round and generate a different key - } else { - System.arraycopy(key, 0, keys[i], 0, 4); - } - } - } - - private static boolean keyExists(final byte[] key, final int limit) { - for (int j = 0; j < limit; j++) { - if (Arrays.equals(key, keys[j])) { - return true; - } - } - return false; - } - - @Parameters(name = "{0}") - public static Iterable parameters() { - return Arrays.asList(new Object[][] { - { "non-direct_reused64_mutex", false, 64, ReusedSynchronisationType.MUTEX }, - { "direct_reused64_adaptive-mutex", true, 64, ReusedSynchronisationType.MUTEX }, - { "non-direct_reused64_adaptive-mutex", false, 64, ReusedSynchronisationType.ADAPTIVE_MUTEX }, - { "direct_reused64_adaptive-mutex", true, 64, ReusedSynchronisationType.ADAPTIVE_MUTEX }, - { "non-direct_reused64_adaptive-mutex", false, 64, ReusedSynchronisationType.THREAD_LOCAL }, - { "direct_reused64_adaptive-mutex", true, 64, ReusedSynchronisationType.THREAD_LOCAL }, - { "non-direct_noreuse", false, -1, null }, - { "direct_noreuse", true, -1, null } - }); - } - - @Parameter(0) - public String name; - - @Parameter(1) - public boolean useDirectBuffer; - - @Parameter(2) - public int maxReusedBufferSize; - - @Parameter(3) - public ReusedSynchronisationType reusedSynchronisationType; - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - - @Rule - public TemporaryFolder dbFolder = new TemporaryFolder(); - - - @Test - public void javaComparatorDefaultCf() throws RocksDBException { - try (final ComparatorOptions options = new ComparatorOptions() - .setUseDirectBuffer(useDirectBuffer) - .setMaxReusedBufferSize(maxReusedBufferSize) - // if reusedSynchronisationType == null we assume that maxReusedBufferSize <= 0 and so we just set ADAPTIVE_MUTEX, even though it won't be used - .setReusedSynchronisationType(reusedSynchronisationType == null ? ReusedSynchronisationType.ADAPTIVE_MUTEX : reusedSynchronisationType); - final ReverseBytewiseComparator comparator = - new ReverseBytewiseComparator(options)) { - - // test the round-tripability of keys written and read with the Comparator - testRoundtrip(FileSystems.getDefault().getPath( - dbFolder.getRoot().getAbsolutePath()), comparator); - } - } - - @Test - public void javaComparatorNamedCf() throws RocksDBException { - try (final ComparatorOptions options = new ComparatorOptions() - .setUseDirectBuffer(useDirectBuffer) - .setMaxReusedBufferSize(maxReusedBufferSize) - // if reusedSynchronisationType == null we assume that maxReusedBufferSize <= 0 and so we just set ADAPTIVE_MUTEX, even though it won't be used - .setReusedSynchronisationType(reusedSynchronisationType == null ? ReusedSynchronisationType.ADAPTIVE_MUTEX : reusedSynchronisationType); - final ReverseBytewiseComparator comparator - = new ReverseBytewiseComparator(options)) { - - // test the round-tripability of keys written and read with the Comparator - testRoundtripCf(FileSystems.getDefault().getPath( - dbFolder.getRoot().getAbsolutePath()), comparator); - } - } - - /** - * Test which stores random keys into the database - * using an {@link IntComparator} - * it then checks that these keys are read back in - * ascending order - * - * @param db_path A path where we can store database - * files temporarily - * - * @param comparator the comparator - * - * @throws RocksDBException if a database error happens. - */ - private void testRoundtrip(final Path db_path, - final AbstractComparator comparator) throws RocksDBException { - try (final Options opt = new Options() - .setCreateIfMissing(true) - .setComparator(comparator)) { - - // store TOTAL_KEYS into the db - try (final RocksDB db = RocksDB.open(opt, db_path.toString())) { - for (int i = 0; i < TOTAL_KEYS; i++) { - db.put(keys[i], "value".getBytes(UTF_8)); - } - } - - // re-open db and read from start to end - // integer keys should be in descending - // order - final ByteBuffer key = ByteBuffer.allocate(4); - try (final RocksDB db = RocksDB.open(opt, db_path.toString()); - final RocksIterator it = db.newIterator()) { - it.seekToFirst(); - int lastKey = Integer.MAX_VALUE; - int count = 0; - for (it.seekToFirst(); it.isValid(); it.next()) { - key.put(it.key()); - key.flip(); - final int thisKey = key.getInt(); - key.clear(); - assertThat(thisKey).isLessThan(lastKey); - lastKey = thisKey; - count++; - } - assertThat(count).isEqualTo(TOTAL_KEYS); - } - } - } - - /** - * Test which stores random keys into a column family - * in the database - * using an {@link IntComparator} - * it then checks that these keys are read back in - * ascending order - * - * @param db_path A path where we can store database - * files temporarily - * - * @param comparator the comparator - * - * @throws RocksDBException if a database error happens. - */ - private void testRoundtripCf(final Path db_path, - final AbstractComparator comparator) throws RocksDBException { - - final List cfDescriptors = Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor("new_cf".getBytes(), - new ColumnFamilyOptions() - .setComparator(comparator)) - ); - - final List cfHandles = new ArrayList<>(); - - try (final DBOptions opt = new DBOptions() - .setCreateIfMissing(true) - .setCreateMissingColumnFamilies(true)) { - - try (final RocksDB db = RocksDB.open(opt, db_path.toString(), - cfDescriptors, cfHandles)) { - try { - assertThat(cfDescriptors.size()).isEqualTo(2); - assertThat(cfHandles.size()).isEqualTo(2); - - for (int i = 0; i < TOTAL_KEYS; i++) { - db.put(cfHandles.get(1), keys[i], "value".getBytes(UTF_8)); - } - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - cfHandles.clear(); - } - } - - // re-open db and read from start to end - // integer keys should be in descending - // order - final ByteBuffer key = ByteBuffer.allocate(4); - try (final RocksDB db = RocksDB.open(opt, db_path.toString(), - cfDescriptors, cfHandles); - final RocksIterator it = db.newIterator(cfHandles.get(1))) { - try { - assertThat(cfDescriptors.size()).isEqualTo(2); - assertThat(cfHandles.size()).isEqualTo(2); - - it.seekToFirst(); - int lastKey = Integer.MAX_VALUE; - int count = 0; - for (it.seekToFirst(); it.isValid(); it.next()) { - key.put(it.key()); - key.flip(); - final int thisKey = key.getInt(); - key.clear(); - assertThat(thisKey).isLessThan(lastKey); - lastKey = thisKey; - count++; - } - - assertThat(count).isEqualTo(TOTAL_KEYS); - - } finally { - for (final ColumnFamilyHandle cfHandle : cfHandles) { - cfHandle.close(); - } - cfHandles.clear(); - for (final ColumnFamilyDescriptor cfDescriptor : cfDescriptors) { - cfDescriptor.getOptions().close(); - } - } - } - } - } -} diff --git a/java/src/test/java/org/rocksdb/util/SizeUnitTest.java b/java/src/test/java/org/rocksdb/util/SizeUnitTest.java deleted file mode 100644 index 990aa5f47..000000000 --- a/java/src/test/java/org/rocksdb/util/SizeUnitTest.java +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; - -import org.junit.Test; - -import static org.assertj.core.api.Assertions.assertThat; - -public class SizeUnitTest { - - public static final long COMPUTATION_UNIT = 1024L; - - @Test - public void sizeUnit() { - assertThat(SizeUnit.KB).isEqualTo(COMPUTATION_UNIT); - assertThat(SizeUnit.MB).isEqualTo( - SizeUnit.KB * COMPUTATION_UNIT); - assertThat(SizeUnit.GB).isEqualTo( - SizeUnit.MB * COMPUTATION_UNIT); - assertThat(SizeUnit.TB).isEqualTo( - SizeUnit.GB * COMPUTATION_UNIT); - assertThat(SizeUnit.PB).isEqualTo( - SizeUnit.TB * COMPUTATION_UNIT); - } -} diff --git a/java/src/test/java/org/rocksdb/util/TestUtil.java b/java/src/test/java/org/rocksdb/util/TestUtil.java deleted file mode 100644 index e4f490c8e..000000000 --- a/java/src/test/java/org/rocksdb/util/TestUtil.java +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -package org.rocksdb.util; - -import static java.nio.charset.StandardCharsets.UTF_8; - -import java.nio.ByteBuffer; -import java.util.Random; -import org.rocksdb.CompactionPriority; -import org.rocksdb.Options; -import org.rocksdb.WALRecoveryMode; - -/** - * General test utilities. - */ -public class TestUtil { - - /** - * Get the options for log iteration tests. - * - * @return the options - */ - public static Options optionsForLogIterTest() { - return defaultOptions() - .setCreateIfMissing(true) - .setWalTtlSeconds(1000); - } - - /** - * Get the default options. - * - * @return the options - */ - public static Options defaultOptions() { - return new Options() - .setWriteBufferSize(4090 * 4096) - .setTargetFileSizeBase(2 * 1024 * 1024) - .setMaxBytesForLevelBase(10 * 1024 * 1024) - .setMaxOpenFiles(5000) - .setWalRecoveryMode(WALRecoveryMode.TolerateCorruptedTailRecords) - .setCompactionPriority(CompactionPriority.ByCompensatedSize); - } - - private static final Random random = new Random(); - - /** - * Generate a random string of bytes. - * - * @param len the length of the string to generate. - * - * @return the random string of bytes - */ - public static byte[] dummyString(final int len) { - final byte[] str = new byte[len]; - random.nextBytes(str); - return str; - } - - /** - * Copy a {@link ByteBuffer} into an array for shorthand ease of test coding - * @param byteBuffer the buffer to copy - * @return a {@link byte[]} containing the same bytes as the input - */ - public static byte[] bufferBytes(final ByteBuffer byteBuffer) { - final byte[] result = new byte[byteBuffer.limit()]; - byteBuffer.get(result); - return result; - } -} diff --git a/java/src/test/java/org/rocksdb/util/WriteBatchGetter.java b/java/src/test/java/org/rocksdb/util/WriteBatchGetter.java deleted file mode 100644 index 2efa16473..000000000 --- a/java/src/test/java/org/rocksdb/util/WriteBatchGetter.java +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb.util; - -import org.rocksdb.RocksDBException; -import org.rocksdb.WriteBatch; - -import java.util.Arrays; - -public class WriteBatchGetter extends WriteBatch.Handler { - - private int columnFamilyId = -1; - private final byte[] key; - private byte[] value; - - public WriteBatchGetter(final byte[] key) { - this.key = key; - } - - public byte[] getValue() { - return value; - } - - @Override - public void put(final int columnFamilyId, final byte[] key, - final byte[] value) { - if(Arrays.equals(this.key, key)) { - this.columnFamilyId = columnFamilyId; - this.value = value; - } - } - - @Override - public void put(final byte[] key, final byte[] value) { - if(Arrays.equals(this.key, key)) { - this.value = value; - } - } - - @Override - public void merge(final int columnFamilyId, final byte[] key, - final byte[] value) { - if(Arrays.equals(this.key, key)) { - this.columnFamilyId = columnFamilyId; - this.value = value; - } - } - - @Override - public void merge(final byte[] key, final byte[] value) { - if(Arrays.equals(this.key, key)) { - this.value = value; - } - } - - @Override - public void delete(final int columnFamilyId, final byte[] key) { - if(Arrays.equals(this.key, key)) { - this.columnFamilyId = columnFamilyId; - this.value = null; - } - } - - @Override - public void delete(final byte[] key) { - if(Arrays.equals(this.key, key)) { - this.value = null; - } - } - - @Override - public void singleDelete(final int columnFamilyId, final byte[] key) { - if(Arrays.equals(this.key, key)) { - this.columnFamilyId = columnFamilyId; - this.value = null; - } - } - - @Override - public void singleDelete(final byte[] key) { - if(Arrays.equals(this.key, key)) { - this.value = null; - } - } - - @Override - public void deleteRange(final int columnFamilyId, final byte[] beginKey, - final byte[] endKey) { - throw new UnsupportedOperationException(); - } - - @Override - public void deleteRange(final byte[] beginKey, final byte[] endKey) { - throw new UnsupportedOperationException(); - } - - @Override - public void logData(final byte[] blob) { - throw new UnsupportedOperationException(); - } - - @Override - public void putBlobIndex(final int columnFamilyId, final byte[] key, - final byte[] value) { - if(Arrays.equals(this.key, key)) { - this.columnFamilyId = columnFamilyId; - this.value = value; - } - } - - @Override - public void markBeginPrepare() throws RocksDBException { - throw new UnsupportedOperationException(); - } - - @Override - public void markEndPrepare(final byte[] xid) throws RocksDBException { - throw new UnsupportedOperationException(); - } - - @Override - public void markNoop(final boolean emptyBatch) throws RocksDBException { - throw new UnsupportedOperationException(); - } - - @Override - public void markRollback(final byte[] xid) throws RocksDBException { - throw new UnsupportedOperationException(); - } - - @Override - public void markCommit(final byte[] xid) throws RocksDBException { - throw new UnsupportedOperationException(); - } - - @Override - public void markCommitWithTimestamp(final byte[] xid, final byte[] ts) throws RocksDBException { - throw new UnsupportedOperationException(); - } -} diff --git a/java/understanding_options.md b/java/understanding_options.md deleted file mode 100644 index 0393aff4d..000000000 --- a/java/understanding_options.md +++ /dev/null @@ -1,79 +0,0 @@ -# How RocksDB Options and their Java Wrappers Work - -Options in RocksDB come in many different flavours. This is an attempt at a taxonomy and explanation. - -## RocksDB Options - -Initially, I believe, RocksDB had only database options. I don't know if any of these were mutable. Column families came later. Read on to understand the terminology. - -So to begin, one sets up a collection of options and starts/creates a database with these options. That's a useful way to think about it, because from a Java point-of-view (and I didn't realise this initially and got very confused), despite making native calls to C++, the `API`s are just manipulating a native C++ configuration object. This object is just a record of configuration, and it must later be passed to the database (at create or open time) in order to apply the options. - -### Database versus Column Family - -The concept of the *column family* or `CF` is widespread within RocksDB. I think of it as a data namespace, but conveniently transactions can operate across these namespaces. The concept of a default column family exists, and when operations do not refer to a particular `CF`, it refers to the default. - -We raise this w.r.t. options because many options, perhaps most that users encounter, are *column family options*. That is to say they apply individually to a particular column family, or to the default column family. Crucially also, many/most/all of these same options are exposed as *database options* and then apply as the default for column families which do not have the option set explicitly. Obviously some database options are naturally database-wide; they apply to the operation of the database and don't make any sense applied to a column family. - -### Mutability - -There are 2 kinds of options - -- Mutable options -- Immutable options. We name these in contrast to the mutable ones, but they are usually referred to unqualified. - -Mutable options are those which can be changed on a running `RocksDB` instance. Immutable options can only be configured prior to the start of a database. Of course, we can configure the immutable options at this time too; The entirety of options is a strict superset of the mutable options. - -Mutable options (whether column-family specific or database-wide) are manipulated at runtime with builders, so we have `MutableDBOptions.MutableDBOptionsBuilder` and `MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder` which share tooling classes/hierarchy and maintain and manipulate the relevant options as a `(key,value)` map. - -Mutable options are then passed using `setOptions()` and `setDBOptions()` methods on the live RocksDB, and then take effect immediately (depending on the semantics of the option) on the database. - -### Advanced - -There are 2 classes of options - -- Advanced options -- Non-advanced options - -It's not clear to me what the conceptual distinction is between advanced and not. However, the Java code takes care to reflect it from the underlying C++. - -This leads to 2 separate type hierarchies within column family options, one for each `class` of options. The `kind`s are represented by where the options appear in their hierarchy. - -```java -interface ColumnFamilyOptionsInterface> - extends AdvancedColumnFamilyOptionsInterface -interface MutableColumnFamilyOptionsInterface> - extends AdvancedMutableColumnFamilyOptionsInterface -``` - -And then there is ultimately a single concrete implementation class for CF options: - -```java -class ColumnFamilyOptions extends RocksObject - implements ColumnFamilyOptionsInterface, - MutableColumnFamilyOptionsInterface -``` - -as there is a single concrete implementation class for DB options: - -```java -class DBOptions extends RocksObject - implements DBOptionsInterface, - MutableDBOptionsInterface -``` - -Interestingly `DBOptionsInterface` doesn't extend `MutableDBOptionsInterface`, if only in order to disrupt our belief in consistent basic laws of the Universe. - -## Startup/Creation Options - -```java -class Options extends RocksObject - implements DBOptionsInterface, - MutableDBOptionsInterface, - ColumnFamilyOptionsInterface, - MutableColumnFamilyOptionsInterface -``` - -### Example - Blob Options - -The `enable_blob_files` and `min_blob_size` options are per-column-family, and are mutable. The options also appear in the unqualified database options. So by initial configuration, we can set up a RocksDB database where for every `(key,value)` with a value of size at least `min_blob_size`, the value is written (indirected) to a blob file. Blobs may share a blob file, subject to the configuration values set. Later, using the `MutableColumnFamilyOptionsInterface` of the `ColumnFamilyOptions`, we can choose to turn this off (`enable_blob_files=false`) , or alter the `min_blob_size` for the default column family, or any other column family. It seems to me that we cannot, though, mutate the column family options for all column families using the -`setOptions()` mechanism, either for all existing column families or for all future column families; but maybe we can do the latter on a re-`open()/create()' diff --git a/logging/auto_roll_logger_test.cc b/logging/auto_roll_logger_test.cc deleted file mode 100644 index 3d0ec1763..000000000 --- a/logging/auto_roll_logger_test.cc +++ /dev/null @@ -1,731 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// - - -#include "logging/auto_roll_logger.h" - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "db/db_test_util.h" -#include "env/emulated_clock.h" -#include "logging/env_logger.h" -#include "logging/logging.h" -#include "port/port.h" -#include "rocksdb/db.h" -#include "rocksdb/file_system.h" -#include "rocksdb/system_clock.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" - -namespace ROCKSDB_NAMESPACE { - -// In this test we only want to Log some simple log message with -// no format. LogMessage() provides such a simple interface and -// avoids the [format-security] warning which occurs when you -// call ROCKS_LOG_INFO(logger, log_message) directly. -namespace { -void LogMessage(Logger* logger, const char* message) { - ROCKS_LOG_INFO(logger, "%s", message); -} - -void LogMessage(const InfoLogLevel log_level, Logger* logger, - const char* message) { - Log(log_level, logger, "%s", message); -} -} // namespace - -class AutoRollLoggerTest : public testing::Test { - public: - static void InitTestDb() { - // TODO replace the `system` calls with Env/FileSystem APIs. -#ifdef OS_WIN - // Replace all slashes in the path so windows CompSpec does not - // become confused - std::string testDbDir(kTestDbDir); - std::replace_if( - testDbDir.begin(), testDbDir.end(), [](char ch) { return ch == '/'; }, - '\\'); - std::string deleteDbDirCmd = - "if exist " + testDbDir + " rd /s /q " + testDbDir; - ASSERT_TRUE(system(deleteDbDirCmd.c_str()) == 0); - - std::string testDir(kTestDir); - std::replace_if( - testDir.begin(), testDir.end(), [](char ch) { return ch == '/'; }, - '\\'); - std::string deleteCmd = "if exist " + testDir + " rd /s /q " + testDir; -#else - std::string deleteCmd = "rm -rf " + kTestDir + " " + kTestDbDir; -#endif - ASSERT_TRUE(system(deleteCmd.c_str()) == 0); - ASSERT_OK(Env::Default()->CreateDir(kTestDir)); - ASSERT_OK(Env::Default()->CreateDir(kTestDbDir)); - } - - void RollLogFileBySizeTest(AutoRollLogger* logger, size_t log_max_size, - const std::string& log_message); - void RollLogFileByTimeTest(const std::shared_ptr& fs, - const std::shared_ptr& sc, - AutoRollLogger* logger, size_t time, - const std::string& log_message); - // return list of files under kTestDir that contains "LOG" - std::vector GetLogFiles() { - std::vector ret; - std::vector files; - Status s = default_env->GetChildren(kTestDir, &files); - // Should call ASSERT_OK() here but it doesn't compile. It's not - // worth the time figuring out why. - EXPECT_TRUE(s.ok()); - for (const auto& f : files) { - if (f.find("LOG") != std::string::npos) { - ret.push_back(f); - } - } - return ret; - } - - // Delete all log files under kTestDir - void CleanupLogFiles() { - for (const std::string& f : GetLogFiles()) { - ASSERT_OK(default_env->DeleteFile(kTestDir + "/" + f)); - } - } - - void RollNTimesBySize(Logger* auto_roll_logger, size_t file_num, - size_t max_log_file_size) { - // Roll the log 4 times, and it will trim to 3 files. - std::string dummy_large_string; - dummy_large_string.assign(max_log_file_size, '='); - auto_roll_logger->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); - for (size_t i = 0; i < file_num + 1; i++) { - // Log enough bytes to trigger at least one roll. - LogMessage(auto_roll_logger, dummy_large_string.c_str()); - LogMessage(auto_roll_logger, ""); - } - } - - static const std::string kSampleMessage; - static const std::string kTestDir; - static const std::string kTestDbDir; - static const std::string kLogFile; - static Env* default_env; -}; - -const std::string AutoRollLoggerTest::kSampleMessage( - "this is the message to be written to the log file!!"); -const std::string AutoRollLoggerTest::kTestDir( - test::PerThreadDBPath("db_log_test")); -const std::string AutoRollLoggerTest::kTestDbDir( - test::PerThreadDBPath("db_log_test_db")); -const std::string AutoRollLoggerTest::kLogFile( - test::PerThreadDBPath("db_log_test") + "/LOG"); -Env* AutoRollLoggerTest::default_env = Env::Default(); - -void AutoRollLoggerTest::RollLogFileBySizeTest(AutoRollLogger* logger, - size_t log_max_size, - const std::string& log_message) { - logger->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); - ASSERT_EQ(InfoLogLevel::INFO_LEVEL, logger->GetInfoLogLevel()); - ASSERT_EQ(InfoLogLevel::INFO_LEVEL, - logger->TEST_inner_logger()->GetInfoLogLevel()); - // measure the size of each message, which is supposed - // to be equal or greater than log_message.size() - LogMessage(logger, log_message.c_str()); - size_t message_size = logger->GetLogFileSize(); - size_t current_log_size = message_size; - - // Test the cases when the log file will not be rolled. - while (current_log_size + message_size < log_max_size) { - LogMessage(logger, log_message.c_str()); - current_log_size += message_size; - ASSERT_EQ(current_log_size, logger->GetLogFileSize()); - } - - // Now the log file will be rolled - LogMessage(logger, log_message.c_str()); - // Since rotation is checked before actual logging, we need to - // trigger the rotation by logging another message. - LogMessage(logger, log_message.c_str()); - - ASSERT_TRUE(message_size == logger->GetLogFileSize()); -} - -void AutoRollLoggerTest::RollLogFileByTimeTest( - const std::shared_ptr& fs, - const std::shared_ptr& sc, AutoRollLogger* logger, size_t time, - const std::string& log_message) { - uint64_t expected_ctime; - uint64_t actual_ctime; - - uint64_t total_log_size; - EXPECT_OK(fs->GetFileSize(kLogFile, IOOptions(), &total_log_size, nullptr)); - expected_ctime = logger->TEST_ctime(); - logger->SetCallNowMicrosEveryNRecords(0); - - // -- Write to the log for several times, which is supposed - // to be finished before time. - for (int i = 0; i < 10; ++i) { - sc->SleepForMicroseconds(50000); - LogMessage(logger, log_message.c_str()); - EXPECT_OK(logger->GetStatus()); - // Make sure we always write to the same log file (by - // checking the create time); - - actual_ctime = logger->TEST_ctime(); - - // Also make sure the log size is increasing. - EXPECT_EQ(expected_ctime, actual_ctime); - EXPECT_GT(logger->GetLogFileSize(), total_log_size); - total_log_size = logger->GetLogFileSize(); - } - - // -- Make the log file expire - sc->SleepForMicroseconds(static_cast(time * 1000000)); - LogMessage(logger, log_message.c_str()); - - // At this time, the new log file should be created. - actual_ctime = logger->TEST_ctime(); - EXPECT_LT(expected_ctime, actual_ctime); - EXPECT_LT(logger->GetLogFileSize(), total_log_size); -} - -TEST_F(AutoRollLoggerTest, RollLogFileBySize) { - InitTestDb(); - size_t log_max_size = 1024 * 5; - size_t keep_log_file_num = 10; - - AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(), kTestDir, - "", log_max_size, 0, keep_log_file_num); - - RollLogFileBySizeTest(&logger, log_max_size, - kSampleMessage + ":RollLogFileBySize"); -} - -TEST_F(AutoRollLoggerTest, RollLogFileByTime) { - auto nsc = - std::make_shared(SystemClock::Default(), true); - - size_t time = 2; - size_t log_size = 1024 * 5; - size_t keep_log_file_num = 10; - - InitTestDb(); - // -- Test the existence of file during the server restart. - ASSERT_EQ(Status::NotFound(), default_env->FileExists(kLogFile)); - AutoRollLogger logger(default_env->GetFileSystem(), nsc, kTestDir, "", - log_size, time, keep_log_file_num); - ASSERT_OK(default_env->FileExists(kLogFile)); - - RollLogFileByTimeTest(default_env->GetFileSystem(), nsc, &logger, time, - kSampleMessage + ":RollLogFileByTime"); -} - -TEST_F(AutoRollLoggerTest, SetInfoLogLevel) { - InitTestDb(); - Options options; - options.info_log_level = InfoLogLevel::FATAL_LEVEL; - options.max_log_file_size = 1024; - std::shared_ptr logger; - ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); - auto* auto_roll_logger = dynamic_cast(logger.get()); - ASSERT_NE(nullptr, auto_roll_logger); - ASSERT_EQ(InfoLogLevel::FATAL_LEVEL, auto_roll_logger->GetInfoLogLevel()); - ASSERT_EQ(InfoLogLevel::FATAL_LEVEL, - auto_roll_logger->TEST_inner_logger()->GetInfoLogLevel()); - auto_roll_logger->SetInfoLogLevel(InfoLogLevel::DEBUG_LEVEL); - ASSERT_EQ(InfoLogLevel::DEBUG_LEVEL, auto_roll_logger->GetInfoLogLevel()); - ASSERT_EQ(InfoLogLevel::DEBUG_LEVEL, logger->GetInfoLogLevel()); - ASSERT_EQ(InfoLogLevel::DEBUG_LEVEL, - auto_roll_logger->TEST_inner_logger()->GetInfoLogLevel()); -} - -TEST_F(AutoRollLoggerTest, OpenLogFilesMultipleTimesWithOptionLog_max_size) { - // If only 'log_max_size' options is specified, then every time - // when rocksdb is restarted, a new empty log file will be created. - InitTestDb(); - // WORKAROUND: - // avoid complier's complaint of "comparison between signed - // and unsigned integer expressions" because literal 0 is - // treated as "singed". - size_t kZero = 0; - size_t log_size = 1024; - size_t keep_log_file_num = 10; - - AutoRollLogger* logger = - new AutoRollLogger(FileSystem::Default(), SystemClock::Default(), - kTestDir, "", log_size, 0, keep_log_file_num); - - LogMessage(logger, kSampleMessage.c_str()); - ASSERT_GT(logger->GetLogFileSize(), kZero); - delete logger; - - // reopens the log file and an empty log file will be created. - logger = new AutoRollLogger(FileSystem::Default(), SystemClock::Default(), - kTestDir, "", log_size, 0, 10); - ASSERT_EQ(logger->GetLogFileSize(), kZero); - delete logger; -} - -TEST_F(AutoRollLoggerTest, CompositeRollByTimeAndSizeLogger) { - size_t time = 2, log_max_size = 1024 * 5; - size_t keep_log_file_num = 10; - - InitTestDb(); - - auto nsc = - std::make_shared(SystemClock::Default(), true); - AutoRollLogger logger(FileSystem::Default(), nsc, kTestDir, "", log_max_size, - time, keep_log_file_num); - - // Test the ability to roll by size - RollLogFileBySizeTest(&logger, log_max_size, - kSampleMessage + ":CompositeRollByTimeAndSizeLogger"); - - // Test the ability to roll by Time - RollLogFileByTimeTest(FileSystem::Default(), nsc, &logger, time, - kSampleMessage + ":CompositeRollByTimeAndSizeLogger"); -} - -#ifndef OS_WIN -// TODO: does not build for Windows because of EnvLogger use below. Need to -// port -TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) { - DBOptions options; - auto nsc = - std::make_shared(SystemClock::Default(), true); - std::unique_ptr nse(new CompositeEnvWrapper(Env::Default(), nsc)); - - std::shared_ptr logger; - - // Normal logger - ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); - ASSERT_TRUE(dynamic_cast(logger.get())); - - // Only roll by size - InitTestDb(); - options.max_log_file_size = 1024; - ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); - AutoRollLogger* auto_roll_logger = - dynamic_cast(logger.get()); - ASSERT_TRUE(auto_roll_logger); - RollLogFileBySizeTest(auto_roll_logger, options.max_log_file_size, - kSampleMessage + ":CreateLoggerFromOptions - size"); - - // Only roll by Time - options.env = nse.get(); - InitTestDb(); - options.max_log_file_size = 0; - options.log_file_time_to_roll = 2; - ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); - auto_roll_logger = dynamic_cast(logger.get()); - RollLogFileByTimeTest(options.env->GetFileSystem(), nsc, auto_roll_logger, - options.log_file_time_to_roll, - kSampleMessage + ":CreateLoggerFromOptions - time"); - - // roll by both Time and size - InitTestDb(); - options.max_log_file_size = 1024 * 5; - options.log_file_time_to_roll = 2; - ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); - auto_roll_logger = dynamic_cast(logger.get()); - RollLogFileBySizeTest(auto_roll_logger, options.max_log_file_size, - kSampleMessage + ":CreateLoggerFromOptions - both"); - RollLogFileByTimeTest(options.env->GetFileSystem(), nsc, auto_roll_logger, - options.log_file_time_to_roll, - kSampleMessage + ":CreateLoggerFromOptions - both"); - - // Set keep_log_file_num - { - const size_t kFileNum = 3; - InitTestDb(); - options.max_log_file_size = 512; - options.log_file_time_to_roll = 2; - options.keep_log_file_num = kFileNum; - ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); - auto_roll_logger = dynamic_cast(logger.get()); - - // Roll the log 4 times, and it will trim to 3 files. - std::string dummy_large_string; - dummy_large_string.assign(options.max_log_file_size, '='); - auto_roll_logger->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); - for (size_t i = 0; i < kFileNum + 1; i++) { - // Log enough bytes to trigger at least one roll. - LogMessage(auto_roll_logger, dummy_large_string.c_str()); - LogMessage(auto_roll_logger, ""); - } - - std::vector files = GetLogFiles(); - ASSERT_EQ(kFileNum, files.size()); - - CleanupLogFiles(); - } - - // Set keep_log_file_num and dbname is different from - // db_log_dir. - { - const size_t kFileNum = 3; - InitTestDb(); - options.max_log_file_size = 512; - options.log_file_time_to_roll = 2; - options.keep_log_file_num = kFileNum; - options.db_log_dir = kTestDir; - ASSERT_OK(CreateLoggerFromOptions(kTestDbDir, options, &logger)); - auto_roll_logger = dynamic_cast(logger.get()); - - // Roll the log 4 times, and it will trim to 3 files. - std::string dummy_large_string; - dummy_large_string.assign(options.max_log_file_size, '='); - auto_roll_logger->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); - for (size_t i = 0; i < kFileNum + 1; i++) { - // Log enough bytes to trigger at least one roll. - LogMessage(auto_roll_logger, dummy_large_string.c_str()); - LogMessage(auto_roll_logger, ""); - } - - std::vector files = GetLogFiles(); - ASSERT_EQ(kFileNum, files.size()); - for (const auto& f : files) { - ASSERT_TRUE(f.find("db_log_test_db") != std::string::npos); - } - - // Cleaning up those files. - CleanupLogFiles(); - } -} - -TEST_F(AutoRollLoggerTest, AutoDeleting) { - for (int attempt = 0; attempt < 2; attempt++) { - // In the first attemp, db_log_dir is not set, while in the - // second it is set. - std::string dbname = (attempt == 0) ? kTestDir : "/test/dummy/dir"; - std::string db_log_dir = (attempt == 0) ? "" : kTestDir; - - InitTestDb(); - const size_t kMaxFileSize = 512; - { - size_t log_num = 8; - AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(), - dbname, db_log_dir, kMaxFileSize, 0, log_num); - RollNTimesBySize(&logger, log_num, kMaxFileSize); - - ASSERT_EQ(log_num, GetLogFiles().size()); - } - // Shrink number of files - { - size_t log_num = 5; - AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(), - dbname, db_log_dir, kMaxFileSize, 0, log_num); - ASSERT_EQ(log_num, GetLogFiles().size()); - - RollNTimesBySize(&logger, 3, kMaxFileSize); - ASSERT_EQ(log_num, GetLogFiles().size()); - } - - // Increase number of files again. - { - size_t log_num = 7; - AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(), - dbname, db_log_dir, kMaxFileSize, 0, log_num); - ASSERT_EQ(6, GetLogFiles().size()); - - RollNTimesBySize(&logger, 3, kMaxFileSize); - ASSERT_EQ(log_num, GetLogFiles().size()); - } - - CleanupLogFiles(); - } -} - -TEST_F(AutoRollLoggerTest, LogFlushWhileRolling) { - DBOptions options; - std::shared_ptr logger; - - InitTestDb(); - options.max_log_file_size = 1024 * 5; - ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); - AutoRollLogger* auto_roll_logger = - dynamic_cast(logger.get()); - ASSERT_TRUE(auto_roll_logger); - ROCKSDB_NAMESPACE::port::Thread flush_thread; - - // Notes: - // (1) Need to pin the old logger before beginning the roll, as rolling grabs - // the mutex, which would prevent us from accessing the old logger. This - // also marks flush_thread with AutoRollLogger::Flush:PinnedLogger. - // (2) New logger will be cut in AutoRollLogger::RollLogFile only when flush - // is completed and reference to pinned logger is released. - // (3) EnvLogger::Flush() happens in both threads but its SyncPoints only - // are enabled in flush_thread (the one pinning the old logger). - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependencyAndMarkers( - {{"AutoRollLogger::Flush:PinnedLogger", - "AutoRollLoggerTest::LogFlushWhileRolling:PreRollAndPostThreadInit"}}, - {{"AutoRollLogger::Flush:PinnedLogger", "EnvLogger::Flush:Begin1"}, - {"AutoRollLogger::Flush:PinnedLogger", "EnvLogger::Flush:Begin2"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - flush_thread = port::Thread([&]() { auto_roll_logger->Flush(); }); - TEST_SYNC_POINT( - "AutoRollLoggerTest::LogFlushWhileRolling:PreRollAndPostThreadInit"); - RollLogFileBySizeTest(auto_roll_logger, options.max_log_file_size, - kSampleMessage + ":LogFlushWhileRolling"); - flush_thread.join(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -#endif // OS_WIN - -TEST_F(AutoRollLoggerTest, InfoLogLevel) { - InitTestDb(); - - size_t log_size = 8192; - size_t log_lines = 0; - // an extra-scope to force the AutoRollLogger to flush the log file when it - // becomes out of scope. - { - AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(), - kTestDir, "", log_size, 0, 10); - for (int log_level = InfoLogLevel::HEADER_LEVEL; - log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) { - logger.SetInfoLogLevel((InfoLogLevel)log_level); - for (int log_type = InfoLogLevel::DEBUG_LEVEL; - log_type <= InfoLogLevel::HEADER_LEVEL; log_type++) { - // log messages with log level smaller than log_level will not be - // logged. - LogMessage((InfoLogLevel)log_type, &logger, kSampleMessage.c_str()); - } - log_lines += InfoLogLevel::HEADER_LEVEL - log_level + 1; - } - for (int log_level = InfoLogLevel::HEADER_LEVEL; - log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) { - logger.SetInfoLogLevel((InfoLogLevel)log_level); - - // again, messages with level smaller than log_level will not be logged. - ROCKS_LOG_HEADER(&logger, "%s", kSampleMessage.c_str()); - ROCKS_LOG_DEBUG(&logger, "%s", kSampleMessage.c_str()); - ROCKS_LOG_INFO(&logger, "%s", kSampleMessage.c_str()); - ROCKS_LOG_WARN(&logger, "%s", kSampleMessage.c_str()); - ROCKS_LOG_ERROR(&logger, "%s", kSampleMessage.c_str()); - ROCKS_LOG_FATAL(&logger, "%s", kSampleMessage.c_str()); - log_lines += InfoLogLevel::HEADER_LEVEL - log_level + 1; - } - } - std::ifstream inFile(AutoRollLoggerTest::kLogFile.c_str()); - size_t lines = std::count(std::istreambuf_iterator(inFile), - std::istreambuf_iterator(), '\n'); - ASSERT_EQ(log_lines, lines); - inFile.close(); -} - -TEST_F(AutoRollLoggerTest, Close) { - InitTestDb(); - - size_t log_size = 8192; - size_t log_lines = 0; - AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(), kTestDir, - "", log_size, 0, 10); - for (int log_level = InfoLogLevel::HEADER_LEVEL; - log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) { - logger.SetInfoLogLevel((InfoLogLevel)log_level); - for (int log_type = InfoLogLevel::DEBUG_LEVEL; - log_type <= InfoLogLevel::HEADER_LEVEL; log_type++) { - // log messages with log level smaller than log_level will not be - // logged. - LogMessage((InfoLogLevel)log_type, &logger, kSampleMessage.c_str()); - } - log_lines += InfoLogLevel::HEADER_LEVEL - log_level + 1; - } - for (int log_level = InfoLogLevel::HEADER_LEVEL; - log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) { - logger.SetInfoLogLevel((InfoLogLevel)log_level); - - // again, messages with level smaller than log_level will not be logged. - ROCKS_LOG_HEADER(&logger, "%s", kSampleMessage.c_str()); - ROCKS_LOG_DEBUG(&logger, "%s", kSampleMessage.c_str()); - ROCKS_LOG_INFO(&logger, "%s", kSampleMessage.c_str()); - ROCKS_LOG_WARN(&logger, "%s", kSampleMessage.c_str()); - ROCKS_LOG_ERROR(&logger, "%s", kSampleMessage.c_str()); - ROCKS_LOG_FATAL(&logger, "%s", kSampleMessage.c_str()); - log_lines += InfoLogLevel::HEADER_LEVEL - log_level + 1; - } - ASSERT_EQ(logger.Close(), Status::OK()); - - std::ifstream inFile(AutoRollLoggerTest::kLogFile.c_str()); - size_t lines = std::count(std::istreambuf_iterator(inFile), - std::istreambuf_iterator(), '\n'); - ASSERT_EQ(log_lines, lines); - inFile.close(); -} - -// Test the logger Header function for roll over logs -// We expect the new logs creates as roll over to carry the headers specified -static std::vector GetOldFileNames(const std::string& path) { - std::vector ret; - - const std::string dirname = path.substr(/*start=*/0, path.find_last_of("/")); - const std::string fname = path.substr(path.find_last_of("/") + 1); - - std::vector children; - EXPECT_OK(Env::Default()->GetChildren(dirname, &children)); - - // We know that the old log files are named [path] - // Return all entities that match the pattern - for (auto& child : children) { - if (fname != child && child.find(fname) == 0) { - ret.push_back(dirname + "/" + child); - } - } - - return ret; -} - -TEST_F(AutoRollLoggerTest, LogHeaderTest) { - static const size_t MAX_HEADERS = 10; - static const size_t LOG_MAX_SIZE = 1024 * 5; - static const std::string HEADER_STR = "Log header line"; - - // test_num == 0 -> standard call to Header() - // test_num == 1 -> call to Log() with InfoLogLevel::HEADER_LEVEL - for (int test_num = 0; test_num < 2; test_num++) { - InitTestDb(); - - AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(), - kTestDir, /*db_log_dir=*/"", LOG_MAX_SIZE, - /*log_file_time_to_roll=*/0, - /*keep_log_file_num=*/10); - - if (test_num == 0) { - // Log some headers explicitly using Header() - for (size_t i = 0; i < MAX_HEADERS; i++) { - Header(&logger, "%s %" ROCKSDB_PRIszt, HEADER_STR.c_str(), i); - } - } else if (test_num == 1) { - // HEADER_LEVEL should make this behave like calling Header() - for (size_t i = 0; i < MAX_HEADERS; i++) { - ROCKS_LOG_HEADER(&logger, "%s %" ROCKSDB_PRIszt, HEADER_STR.c_str(), i); - } - } - - const std::string newfname = logger.TEST_log_fname(); - - // Log enough data to cause a roll over - int i = 0; - for (size_t iter = 0; iter < 2; iter++) { - while (logger.GetLogFileSize() < LOG_MAX_SIZE) { - Info(&logger, (kSampleMessage + ":LogHeaderTest line %d").c_str(), i); - ++i; - } - - Info(&logger, "Rollover"); - } - - // Flush the log for the latest file - LogFlush(&logger); - - const auto oldfiles = GetOldFileNames(newfname); - - ASSERT_EQ(oldfiles.size(), (size_t)2); - - for (auto& oldfname : oldfiles) { - // verify that the files rolled over - ASSERT_NE(oldfname, newfname); - // verify that the old log contains all the header logs - ASSERT_EQ(test::GetLinesCount(oldfname, HEADER_STR), MAX_HEADERS); - } - } -} - -TEST_F(AutoRollLoggerTest, LogFileExistence) { - ROCKSDB_NAMESPACE::DB* db; - ROCKSDB_NAMESPACE::Options options; -#ifdef OS_WIN - // Replace all slashes in the path so windows CompSpec does not - // become confused - std::string testDir(kTestDir); - std::replace_if( - testDir.begin(), testDir.end(), [](char ch) { return ch == '/'; }, '\\'); - std::string deleteCmd = "if exist " + testDir + " rd /s /q " + testDir; -#else - std::string deleteCmd = "rm -rf " + kTestDir; -#endif - ASSERT_EQ(system(deleteCmd.c_str()), 0); - options.max_log_file_size = 100 * 1024 * 1024; - options.create_if_missing = true; - ASSERT_OK(ROCKSDB_NAMESPACE::DB::Open(options, kTestDir, &db)); - ASSERT_OK(default_env->FileExists(kLogFile)); - delete db; -} - -TEST_F(AutoRollLoggerTest, FileCreateFailure) { - Options options; - options.max_log_file_size = 100 * 1024 * 1024; - options.db_log_dir = "/a/dir/does/not/exist/at/all"; - - std::shared_ptr logger; - ASSERT_NOK(CreateLoggerFromOptions("", options, &logger)); - ASSERT_TRUE(!logger); -} - -TEST_F(AutoRollLoggerTest, RenameOnlyWhenExists) { - InitTestDb(); - SpecialEnv env(Env::Default()); - Options options; - options.env = &env; - - // Originally no LOG exists. Should not see a rename. - { - std::shared_ptr logger; - ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); - ASSERT_EQ(0, env.rename_count_); - } - - // Now a LOG exists. Create a new one should see a rename. - { - std::shared_ptr logger; - ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); - ASSERT_EQ(1, env.rename_count_); - } -} - -TEST_F(AutoRollLoggerTest, RenameError) { - InitTestDb(); - SpecialEnv env(Env::Default()); - env.rename_error_ = true; - Options options; - options.env = &env; - - // Originally no LOG exists. Should not be impacted by rename error. - { - std::shared_ptr logger; - ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); - ASSERT_TRUE(logger != nullptr); - } - - // Now a LOG exists. Rename error should cause failure. - { - std::shared_ptr logger; - ASSERT_NOK(CreateLoggerFromOptions(kTestDir, options, &logger)); - ASSERT_TRUE(logger == nullptr); - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/logging/env_logger_test.cc b/logging/env_logger_test.cc deleted file mode 100644 index 467ab064f..000000000 --- a/logging/env_logger_test.cc +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// - -#include "logging/env_logger.h" - -#include "test_util/testharness.h" -#include "test_util/testutil.h" - -namespace ROCKSDB_NAMESPACE { - -namespace { -// In this test we only want to Log some simple log message with -// no format. -void LogMessage(std::shared_ptr logger, const std::string& message) { - Log(logger, "%s", message.c_str()); -} - -// Helper method to write the message num_times in the given logger. -void WriteLogs(std::shared_ptr logger, const std::string& message, - int num_times) { - for (int ii = 0; ii < num_times; ++ii) { - LogMessage(logger, message); - } -} - -} // namespace - -class EnvLoggerTest : public testing::Test { - public: - Env* env_; - - EnvLoggerTest() : env_(Env::Default()) {} - - ~EnvLoggerTest() = default; - - std::shared_ptr CreateLogger() { - std::shared_ptr result; - assert(NewEnvLogger(kLogFile, env_, &result).ok()); - assert(result); - result->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); - return result; - } - - void DeleteLogFile() { ASSERT_OK(env_->DeleteFile(kLogFile)); } - - static const std::string kSampleMessage; - static const std::string kTestDir; - static const std::string kLogFile; -}; - -const std::string EnvLoggerTest::kSampleMessage = - "this is the message to be written to the log file!!"; -const std::string EnvLoggerTest::kLogFile = test::PerThreadDBPath("log_file"); - -TEST_F(EnvLoggerTest, EmptyLogFile) { - auto logger = CreateLogger(); - ASSERT_EQ(logger->Close(), Status::OK()); - - // Check the size of the log file. - uint64_t file_size; - ASSERT_EQ(env_->GetFileSize(kLogFile, &file_size), Status::OK()); - ASSERT_EQ(file_size, 0); - DeleteLogFile(); -} - -TEST_F(EnvLoggerTest, LogMultipleLines) { - auto logger = CreateLogger(); - - // Write multiple lines. - const int kNumIter = 10; - WriteLogs(logger, kSampleMessage, kNumIter); - - // Flush the logs. - logger->Flush(); - ASSERT_EQ(logger->Close(), Status::OK()); - - // Validate whether the log file has 'kNumIter' number of lines. - ASSERT_EQ(test::GetLinesCount(kLogFile, kSampleMessage), kNumIter); - DeleteLogFile(); -} - -TEST_F(EnvLoggerTest, Overwrite) { - { - auto logger = CreateLogger(); - - // Write multiple lines. - const int kNumIter = 10; - WriteLogs(logger, kSampleMessage, kNumIter); - - ASSERT_EQ(logger->Close(), Status::OK()); - - // Validate whether the log file has 'kNumIter' number of lines. - ASSERT_EQ(test::GetLinesCount(kLogFile, kSampleMessage), kNumIter); - } - - // Now reopen the file again. - { - auto logger = CreateLogger(); - - // File should be empty. - uint64_t file_size; - ASSERT_EQ(env_->GetFileSize(kLogFile, &file_size), Status::OK()); - ASSERT_EQ(file_size, 0); - ASSERT_EQ(logger->GetLogFileSize(), 0); - ASSERT_EQ(logger->Close(), Status::OK()); - } - DeleteLogFile(); -} - -TEST_F(EnvLoggerTest, Close) { - auto logger = CreateLogger(); - - // Write multiple lines. - const int kNumIter = 10; - WriteLogs(logger, kSampleMessage, kNumIter); - - ASSERT_EQ(logger->Close(), Status::OK()); - - // Validate whether the log file has 'kNumIter' number of lines. - ASSERT_EQ(test::GetLinesCount(kLogFile, kSampleMessage), kNumIter); - DeleteLogFile(); -} - -TEST_F(EnvLoggerTest, ConcurrentLogging) { - auto logger = CreateLogger(); - - const int kNumIter = 20; - std::function cb = [&]() { - WriteLogs(logger, kSampleMessage, kNumIter); - logger->Flush(); - }; - - // Write to the logs from multiple threads. - std::vector threads; - const int kNumThreads = 5; - // Create threads. - for (int ii = 0; ii < kNumThreads; ++ii) { - threads.push_back(port::Thread(cb)); - } - - // Wait for them to complete. - for (auto& th : threads) { - th.join(); - } - - ASSERT_EQ(logger->Close(), Status::OK()); - - // Verfiy the log file. - ASSERT_EQ(test::GetLinesCount(kLogFile, kSampleMessage), - kNumIter * kNumThreads); - DeleteLogFile(); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/logging/event_logger_test.cc b/logging/event_logger_test.cc deleted file mode 100644 index 582f56ceb..000000000 --- a/logging/event_logger_test.cc +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "logging/event_logger.h" - -#include - -#include "test_util/testharness.h" - -namespace ROCKSDB_NAMESPACE { - -class EventLoggerTest : public testing::Test {}; - -class StringLogger : public Logger { - public: - using Logger::Logv; - void Logv(const char* format, va_list ap) override { - vsnprintf(buffer_, sizeof(buffer_), format, ap); - } - char* buffer() { return buffer_; } - - private: - char buffer_[1000]; -}; - -TEST_F(EventLoggerTest, SimpleTest) { - StringLogger logger; - EventLogger event_logger(&logger); - event_logger.Log() << "id" << 5 << "event" - << "just_testing"; - std::string output(logger.buffer()); - ASSERT_TRUE(output.find("\"event\": \"just_testing\"") != std::string::npos); - ASSERT_TRUE(output.find("\"id\": 5") != std::string::npos); - ASSERT_TRUE(output.find("\"time_micros\"") != std::string::npos); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/memory/arena_test.cc b/memory/arena_test.cc deleted file mode 100644 index 21bf7ed62..000000000 --- a/memory/arena_test.cc +++ /dev/null @@ -1,295 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "memory/arena.h" - -#ifndef OS_WIN -#include -#endif -#include "port/port.h" -#include "test_util/testharness.h" -#include "util/random.h" - -namespace ROCKSDB_NAMESPACE { - -namespace { -const size_t kHugePageSize = 2 * 1024 * 1024; -} // namespace -class ArenaTest : public testing::Test {}; - -TEST_F(ArenaTest, Empty) { Arena arena0; } - -namespace { -bool CheckMemoryAllocated(size_t allocated, size_t expected) { - // The value returned by Arena::MemoryAllocatedBytes() may be greater than - // the requested memory. We choose a somewhat arbitrary upper bound of - // max_expected = expected * 1.1 to detect critical overallocation. - size_t max_expected = expected + expected / 10; - return allocated >= expected && allocated <= max_expected; -} - -void MemoryAllocatedBytesTest(size_t huge_page_size) { - const int N = 17; - size_t req_sz; // requested size - size_t bsz = 32 * 1024; // block size - size_t expected_memory_allocated; - - Arena arena(bsz, nullptr, huge_page_size); - - // requested size > quarter of a block: - // allocate requested size separately - req_sz = 12 * 1024; - for (int i = 0; i < N; i++) { - arena.Allocate(req_sz); - } - expected_memory_allocated = req_sz * N + Arena::kInlineSize; - ASSERT_PRED2(CheckMemoryAllocated, arena.MemoryAllocatedBytes(), - expected_memory_allocated); - - arena.Allocate(Arena::kInlineSize - 1); - - // requested size < quarter of a block: - // allocate a block with the default size, then try to use unused part - // of the block. So one new block will be allocated for the first - // Allocate(99) call. All the remaining calls won't lead to new allocation. - req_sz = 99; - for (int i = 0; i < N; i++) { - arena.Allocate(req_sz); - } - if (huge_page_size) { - ASSERT_TRUE( - CheckMemoryAllocated(arena.MemoryAllocatedBytes(), - expected_memory_allocated + bsz) || - CheckMemoryAllocated(arena.MemoryAllocatedBytes(), - expected_memory_allocated + huge_page_size)); - } else { - expected_memory_allocated += bsz; - ASSERT_PRED2(CheckMemoryAllocated, arena.MemoryAllocatedBytes(), - expected_memory_allocated); - } - - // requested size > size of a block: - // allocate requested size separately - expected_memory_allocated = arena.MemoryAllocatedBytes(); - req_sz = 8 * 1024 * 1024; - for (int i = 0; i < N; i++) { - arena.Allocate(req_sz); - } - expected_memory_allocated += req_sz * N; - ASSERT_PRED2(CheckMemoryAllocated, arena.MemoryAllocatedBytes(), - expected_memory_allocated); -} - -// Make sure we didn't count the allocate but not used memory space in -// Arena::ApproximateMemoryUsage() -static void ApproximateMemoryUsageTest(size_t huge_page_size) { - const size_t kBlockSize = 4096; - const size_t kEntrySize = kBlockSize / 8; - const size_t kZero = 0; - Arena arena(kBlockSize, nullptr, huge_page_size); - ASSERT_EQ(kZero, arena.ApproximateMemoryUsage()); - - // allocate inline bytes - const size_t kAlignUnit = alignof(max_align_t); - EXPECT_TRUE(arena.IsInInlineBlock()); - arena.AllocateAligned(kAlignUnit); - EXPECT_TRUE(arena.IsInInlineBlock()); - arena.AllocateAligned(Arena::kInlineSize / 2 - (2 * kAlignUnit)); - EXPECT_TRUE(arena.IsInInlineBlock()); - arena.AllocateAligned(Arena::kInlineSize / 2); - EXPECT_TRUE(arena.IsInInlineBlock()); - ASSERT_EQ(arena.ApproximateMemoryUsage(), Arena::kInlineSize - kAlignUnit); - ASSERT_PRED2(CheckMemoryAllocated, arena.MemoryAllocatedBytes(), - Arena::kInlineSize); - - auto num_blocks = kBlockSize / kEntrySize; - - // first allocation - arena.AllocateAligned(kEntrySize); - EXPECT_FALSE(arena.IsInInlineBlock()); - auto mem_usage = arena.MemoryAllocatedBytes(); - if (huge_page_size) { - ASSERT_TRUE( - CheckMemoryAllocated(mem_usage, kBlockSize + Arena::kInlineSize) || - CheckMemoryAllocated(mem_usage, huge_page_size + Arena::kInlineSize)); - } else { - ASSERT_PRED2(CheckMemoryAllocated, mem_usage, - kBlockSize + Arena::kInlineSize); - } - auto usage = arena.ApproximateMemoryUsage(); - ASSERT_LT(usage, mem_usage); - for (size_t i = 1; i < num_blocks; ++i) { - arena.AllocateAligned(kEntrySize); - ASSERT_EQ(mem_usage, arena.MemoryAllocatedBytes()); - ASSERT_EQ(arena.ApproximateMemoryUsage(), usage + kEntrySize); - EXPECT_FALSE(arena.IsInInlineBlock()); - usage = arena.ApproximateMemoryUsage(); - } - if (huge_page_size) { - ASSERT_TRUE(usage > mem_usage || - usage + huge_page_size - kBlockSize == mem_usage); - } else { - ASSERT_GT(usage, mem_usage); - } -} - -static void SimpleTest(size_t huge_page_size) { - std::vector> allocated; - Arena arena(Arena::kMinBlockSize, nullptr, huge_page_size); - const int N = 100000; - size_t bytes = 0; - Random rnd(301); - for (int i = 0; i < N; i++) { - size_t s; - if (i % (N / 10) == 0) { - s = i; - } else { - s = rnd.OneIn(4000) - ? rnd.Uniform(6000) - : (rnd.OneIn(10) ? rnd.Uniform(100) : rnd.Uniform(20)); - } - if (s == 0) { - // Our arena disallows size 0 allocations. - s = 1; - } - char* r; - if (rnd.OneIn(10)) { - r = arena.AllocateAligned(s); - } else { - r = arena.Allocate(s); - } - - for (unsigned int b = 0; b < s; b++) { - // Fill the "i"th allocation with a known bit pattern - r[b] = i % 256; - } - bytes += s; - allocated.push_back(std::make_pair(s, r)); - ASSERT_GE(arena.ApproximateMemoryUsage(), bytes); - if (i > N / 10) { - ASSERT_LE(arena.ApproximateMemoryUsage(), bytes * 1.10); - } - } - for (unsigned int i = 0; i < allocated.size(); i++) { - size_t num_bytes = allocated[i].first; - const char* p = allocated[i].second; - for (unsigned int b = 0; b < num_bytes; b++) { - // Check the "i"th allocation for the known bit pattern - ASSERT_EQ(int(p[b]) & 0xff, (int)(i % 256)); - } - } -} -} // namespace - -TEST_F(ArenaTest, MemoryAllocatedBytes) { - MemoryAllocatedBytesTest(0); - MemoryAllocatedBytesTest(kHugePageSize); -} - -TEST_F(ArenaTest, ApproximateMemoryUsage) { - ApproximateMemoryUsageTest(0); - ApproximateMemoryUsageTest(kHugePageSize); -} - -TEST_F(ArenaTest, Simple) { - SimpleTest(0); - SimpleTest(kHugePageSize); -} - -// Number of minor page faults since last call -size_t PopMinorPageFaultCount() { -#ifdef RUSAGE_SELF - static long prev = 0; - struct rusage usage; - EXPECT_EQ(getrusage(RUSAGE_SELF, &usage), 0); - size_t rv = usage.ru_minflt - prev; - prev = usage.ru_minflt; - return rv; -#else - // Conservative - return SIZE_MAX; -#endif // RUSAGE_SELF -} - -TEST(MmapTest, AllocateLazyZeroed) { - // Doesn't have to be page aligned - constexpr size_t len = 1234567; - MemMapping m = MemMapping::AllocateLazyZeroed(len); - auto arr = static_cast(m.Get()); - - // Should generally work - ASSERT_NE(arr, nullptr); - - // Start counting page faults - PopMinorPageFaultCount(); - - // Access half of the allocation - size_t i = 0; - for (; i < len / 2; ++i) { - ASSERT_EQ(arr[i], 0); - arr[i] = static_cast(i & 255); - } - - // Appropriate page faults (maybe more) - size_t faults = PopMinorPageFaultCount(); - ASSERT_GE(faults, len / 2 / port::kPageSize); - - // Access rest of the allocation - for (; i < len; ++i) { - ASSERT_EQ(arr[i], 0); - arr[i] = static_cast(i & 255); - } - - // Appropriate page faults (maybe more) - faults = PopMinorPageFaultCount(); - ASSERT_GE(faults, len / 2 / port::kPageSize); - - // Verify data - for (i = 0; i < len; ++i) { - ASSERT_EQ(arr[i], static_cast(i & 255)); - } -} - -TEST_F(ArenaTest, UnmappedAllocation) { - // Verify that it's possible to get unmapped pages in large allocations, - // for memory efficiency and to ensure we don't accidentally waste time & - // space initializing the memory. - constexpr size_t kBlockSize = 2U << 20; - Arena arena(kBlockSize); - - // The allocator might give us back recycled memory for a while, but - // shouldn't last forever. - for (int i = 0;; ++i) { - char* p = arena.Allocate(kBlockSize); - - // Start counting page faults - PopMinorPageFaultCount(); - - // Overwrite the whole allocation - for (size_t j = 0; j < kBlockSize; ++j) { - p[j] = static_cast(j & 255); - } - - size_t faults = PopMinorPageFaultCount(); - if (faults >= kBlockSize * 3 / 4 / port::kPageSize) { - // Most of the access generated page faults => GOOD - break; - } - // Should have succeeded after enough tries - ASSERT_LT(i, 1000); - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/memory/memory_allocator_test.cc b/memory/memory_allocator_test.cc deleted file mode 100644 index 6616e1c3b..000000000 --- a/memory/memory_allocator_test.cc +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// Copyright (c) 2019 Intel Corporation -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include - -#include "memory/jemalloc_nodump_allocator.h" -#include "memory/memkind_kmem_allocator.h" -#include "rocksdb/cache.h" -#include "rocksdb/convenience.h" -#include "rocksdb/db.h" -#include "rocksdb/options.h" -#include "table/block_based/block_based_table_factory.h" -#include "test_util/testharness.h" -#include "utilities/memory_allocators.h" - -namespace ROCKSDB_NAMESPACE { - -// TODO: the tests do not work in LITE mode due to relying on -// `CreateFromString()` to create non-default memory allocators. - -class MemoryAllocatorTest - : public testing::Test, - public ::testing::WithParamInterface> { - public: - MemoryAllocatorTest() { - std::tie(id_, supported_) = GetParam(); - Status s = - MemoryAllocator::CreateFromString(ConfigOptions(), id_, &allocator_); - EXPECT_EQ(supported_, s.ok()); - } - bool IsSupported() { return supported_; } - - std::shared_ptr allocator_; - std::string id_; - - private: - bool supported_; -}; - -TEST_P(MemoryAllocatorTest, Allocate) { - if (!IsSupported()) { - return; - } - void* p = allocator_->Allocate(1024); - ASSERT_NE(p, nullptr); - size_t size = allocator_->UsableSize(p, 1024); - ASSERT_GE(size, 1024); - allocator_->Deallocate(p); -} - -TEST_P(MemoryAllocatorTest, CreateAllocator) { - ConfigOptions config_options; - config_options.ignore_unknown_options = false; - config_options.ignore_unsupported_options = false; - std::shared_ptr orig, copy; - Status s = MemoryAllocator::CreateFromString(config_options, id_, &orig); - if (!IsSupported()) { - ASSERT_TRUE(s.IsNotSupported()); - } else { - ASSERT_OK(s); - ASSERT_NE(orig, nullptr); - std::string str = orig->ToString(config_options); - ASSERT_OK(MemoryAllocator::CreateFromString(config_options, str, ©)); - ASSERT_EQ(orig, copy); - } -} - -TEST_P(MemoryAllocatorTest, DatabaseBlockCache) { - if (!IsSupported()) { - // Check if a memory node is available for allocation - } - - // Create database with block cache using the MemoryAllocator - Options options; - std::string dbname = test::PerThreadDBPath("allocator_test"); - ASSERT_OK(DestroyDB(dbname, options)); - - options.create_if_missing = true; - BlockBasedTableOptions table_options; - auto cache = NewLRUCache(1024 * 1024, 6, false, 0.0, allocator_); - table_options.block_cache = cache; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DB* db = nullptr; - Status s = DB::Open(options, dbname, &db); - ASSERT_OK(s); - ASSERT_NE(db, nullptr); - ASSERT_LE(cache->GetUsage(), 104); // Cache will contain stats - - // Write 2kB (200 values, each 10 bytes) - int num_keys = 200; - WriteOptions wo; - std::string val = "0123456789"; - for (int i = 0; i < num_keys; i++) { - std::string key = std::to_string(i); - s = db->Put(wo, Slice(key), Slice(val)); - ASSERT_OK(s); - } - ASSERT_OK(db->Flush(FlushOptions())); // Flush all data from memtable so that - // reads are from block cache - - // Read and check block cache usage - ReadOptions ro; - std::string result; - for (int i = 0; i < num_keys; i++) { - std::string key = std::to_string(i); - s = db->Get(ro, key, &result); - ASSERT_OK(s); - ASSERT_EQ(result, val); - } - ASSERT_GT(cache->GetUsage(), 2000); - - // Close database - s = db->Close(); - ASSERT_OK(s); - delete db; - ASSERT_OK(DestroyDB(dbname, options)); -} - -class CreateMemoryAllocatorTest : public testing::Test { - public: - CreateMemoryAllocatorTest() { - config_options_.ignore_unknown_options = false; - config_options_.ignore_unsupported_options = false; - } - ConfigOptions config_options_; -}; - -TEST_F(CreateMemoryAllocatorTest, JemallocOptionsTest) { - std::shared_ptr allocator; - std::string id = std::string("id=") + JemallocNodumpAllocator::kClassName(); - Status s = MemoryAllocator::CreateFromString(config_options_, id, &allocator); - if (!JemallocNodumpAllocator::IsSupported()) { - ASSERT_NOK(s); - ROCKSDB_GTEST_BYPASS("JEMALLOC not supported"); - return; - } - ASSERT_OK(s); - ASSERT_NE(allocator, nullptr); - JemallocAllocatorOptions jopts; - auto opts = allocator->GetOptions(); - ASSERT_NE(opts, nullptr); - ASSERT_EQ(opts->limit_tcache_size, jopts.limit_tcache_size); - ASSERT_EQ(opts->tcache_size_lower_bound, jopts.tcache_size_lower_bound); - ASSERT_EQ(opts->tcache_size_upper_bound, jopts.tcache_size_upper_bound); - - ASSERT_NOK(MemoryAllocator::CreateFromString( - config_options_, - id + "; limit_tcache_size=true; tcache_size_lower_bound=4096; " - "tcache_size_upper_bound=1024", - &allocator)); - ASSERT_OK(MemoryAllocator::CreateFromString( - config_options_, - id + "; limit_tcache_size=false; tcache_size_lower_bound=4096; " - "tcache_size_upper_bound=1024", - &allocator)); - opts = allocator->GetOptions(); - ASSERT_NE(opts, nullptr); - ASSERT_EQ(opts->limit_tcache_size, false); - ASSERT_EQ(opts->tcache_size_lower_bound, 4096U); - ASSERT_EQ(opts->tcache_size_upper_bound, 1024U); - ASSERT_OK(MemoryAllocator::CreateFromString( - config_options_, - id + "; limit_tcache_size=true; tcache_size_upper_bound=4096; " - "tcache_size_lower_bound=1024", - &allocator)); - opts = allocator->GetOptions(); - ASSERT_NE(opts, nullptr); - ASSERT_EQ(opts->limit_tcache_size, true); - ASSERT_EQ(opts->tcache_size_lower_bound, 1024U); - ASSERT_EQ(opts->tcache_size_upper_bound, 4096U); -} - -TEST_F(CreateMemoryAllocatorTest, NewJemallocNodumpAllocator) { - JemallocAllocatorOptions jopts; - std::shared_ptr allocator; - - jopts.limit_tcache_size = true; - jopts.tcache_size_lower_bound = 2 * 1024; - jopts.tcache_size_upper_bound = 1024; - - ASSERT_NOK(NewJemallocNodumpAllocator(jopts, nullptr)); - Status s = NewJemallocNodumpAllocator(jopts, &allocator); - std::string msg; - if (!JemallocNodumpAllocator::IsSupported(&msg)) { - ASSERT_NOK(s); - ROCKSDB_GTEST_BYPASS("JEMALLOC not supported"); - return; - } - ASSERT_NOK(s); // Invalid options - ASSERT_EQ(allocator, nullptr); - - jopts.tcache_size_upper_bound = 4 * 1024; - ASSERT_OK(NewJemallocNodumpAllocator(jopts, &allocator)); - ASSERT_NE(allocator, nullptr); - auto opts = allocator->GetOptions(); - ASSERT_EQ(opts->tcache_size_upper_bound, jopts.tcache_size_upper_bound); - ASSERT_EQ(opts->tcache_size_lower_bound, jopts.tcache_size_lower_bound); - ASSERT_EQ(opts->limit_tcache_size, jopts.limit_tcache_size); - - jopts.limit_tcache_size = false; - ASSERT_OK(NewJemallocNodumpAllocator(jopts, &allocator)); - ASSERT_NE(allocator, nullptr); - opts = allocator->GetOptions(); - ASSERT_EQ(opts->tcache_size_upper_bound, jopts.tcache_size_upper_bound); - ASSERT_EQ(opts->tcache_size_lower_bound, jopts.tcache_size_lower_bound); - ASSERT_EQ(opts->limit_tcache_size, jopts.limit_tcache_size); -} - -INSTANTIATE_TEST_CASE_P(DefaultMemoryAllocator, MemoryAllocatorTest, - ::testing::Values(std::make_tuple( - DefaultMemoryAllocator::kClassName(), true))); -#ifdef MEMKIND -INSTANTIATE_TEST_CASE_P( - MemkindkMemAllocator, MemoryAllocatorTest, - ::testing::Values(std::make_tuple(MemkindKmemAllocator::kClassName(), - MemkindKmemAllocator::IsSupported()))); -#endif // MEMKIND - -#ifdef ROCKSDB_JEMALLOC -INSTANTIATE_TEST_CASE_P( - JemallocNodumpAllocator, MemoryAllocatorTest, - ::testing::Values(std::make_tuple(JemallocNodumpAllocator::kClassName(), - JemallocNodumpAllocator::IsSupported()))); -#endif // ROCKSDB_JEMALLOC - - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/memtable/inlineskiplist_test.cc b/memtable/inlineskiplist_test.cc deleted file mode 100644 index f85644064..000000000 --- a/memtable/inlineskiplist_test.cc +++ /dev/null @@ -1,664 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "memtable/inlineskiplist.h" - -#include -#include - -#include "memory/concurrent_arena.h" -#include "rocksdb/env.h" -#include "test_util/testharness.h" -#include "util/hash.h" -#include "util/random.h" - -namespace ROCKSDB_NAMESPACE { - -// Our test skip list stores 8-byte unsigned integers -using Key = uint64_t; - -static const char* Encode(const uint64_t* key) { - return reinterpret_cast(key); -} - -static Key Decode(const char* key) { - Key rv; - memcpy(&rv, key, sizeof(Key)); - return rv; -} - -struct TestComparator { - using DecodedType = Key; - - static DecodedType decode_key(const char* b) { return Decode(b); } - - int operator()(const char* a, const char* b) const { - if (Decode(a) < Decode(b)) { - return -1; - } else if (Decode(a) > Decode(b)) { - return +1; - } else { - return 0; - } - } - - int operator()(const char* a, const DecodedType b) const { - if (Decode(a) < b) { - return -1; - } else if (Decode(a) > b) { - return +1; - } else { - return 0; - } - } -}; - -using TestInlineSkipList = InlineSkipList; - -class InlineSkipTest : public testing::Test { - public: - void Insert(TestInlineSkipList* list, Key key) { - char* buf = list->AllocateKey(sizeof(Key)); - memcpy(buf, &key, sizeof(Key)); - list->Insert(buf); - keys_.insert(key); - } - - bool InsertWithHint(TestInlineSkipList* list, Key key, void** hint) { - char* buf = list->AllocateKey(sizeof(Key)); - memcpy(buf, &key, sizeof(Key)); - bool res = list->InsertWithHint(buf, hint); - keys_.insert(key); - return res; - } - - void Validate(TestInlineSkipList* list) { - // Check keys exist. - for (Key key : keys_) { - ASSERT_TRUE(list->Contains(Encode(&key))); - } - // Iterate over the list, make sure keys appears in order and no extra - // keys exist. - TestInlineSkipList::Iterator iter(list); - ASSERT_FALSE(iter.Valid()); - Key zero = 0; - iter.Seek(Encode(&zero)); - for (Key key : keys_) { - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(key, Decode(iter.key())); - iter.Next(); - } - ASSERT_FALSE(iter.Valid()); - // Validate the list is well-formed. - list->TEST_Validate(); - } - - private: - std::set keys_; -}; - -TEST_F(InlineSkipTest, Empty) { - Arena arena; - TestComparator cmp; - InlineSkipList list(cmp, &arena); - Key key = 10; - ASSERT_TRUE(!list.Contains(Encode(&key))); - - InlineSkipList::Iterator iter(&list); - ASSERT_TRUE(!iter.Valid()); - iter.SeekToFirst(); - ASSERT_TRUE(!iter.Valid()); - key = 100; - iter.Seek(Encode(&key)); - ASSERT_TRUE(!iter.Valid()); - iter.SeekForPrev(Encode(&key)); - ASSERT_TRUE(!iter.Valid()); - iter.SeekToLast(); - ASSERT_TRUE(!iter.Valid()); -} - -TEST_F(InlineSkipTest, InsertAndLookup) { - const int N = 2000; - const int R = 5000; - Random rnd(1000); - std::set keys; - ConcurrentArena arena; - TestComparator cmp; - InlineSkipList list(cmp, &arena); - for (int i = 0; i < N; i++) { - Key key = rnd.Next() % R; - if (keys.insert(key).second) { - char* buf = list.AllocateKey(sizeof(Key)); - memcpy(buf, &key, sizeof(Key)); - list.Insert(buf); - } - } - - for (Key i = 0; i < R; i++) { - if (list.Contains(Encode(&i))) { - ASSERT_EQ(keys.count(i), 1U); - } else { - ASSERT_EQ(keys.count(i), 0U); - } - } - - // Simple iterator tests - { - InlineSkipList::Iterator iter(&list); - ASSERT_TRUE(!iter.Valid()); - - uint64_t zero = 0; - iter.Seek(Encode(&zero)); - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*(keys.begin()), Decode(iter.key())); - - uint64_t max_key = R - 1; - iter.SeekForPrev(Encode(&max_key)); - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*(keys.rbegin()), Decode(iter.key())); - - iter.SeekToFirst(); - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*(keys.begin()), Decode(iter.key())); - - iter.SeekToLast(); - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*(keys.rbegin()), Decode(iter.key())); - } - - // Forward iteration test - for (Key i = 0; i < R; i++) { - InlineSkipList::Iterator iter(&list); - iter.Seek(Encode(&i)); - - // Compare against model iterator - std::set::iterator model_iter = keys.lower_bound(i); - for (int j = 0; j < 3; j++) { - if (model_iter == keys.end()) { - ASSERT_TRUE(!iter.Valid()); - break; - } else { - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*model_iter, Decode(iter.key())); - ++model_iter; - iter.Next(); - } - } - } - - // Backward iteration test - for (Key i = 0; i < R; i++) { - InlineSkipList::Iterator iter(&list); - iter.SeekForPrev(Encode(&i)); - - // Compare against model iterator - std::set::iterator model_iter = keys.upper_bound(i); - for (int j = 0; j < 3; j++) { - if (model_iter == keys.begin()) { - ASSERT_TRUE(!iter.Valid()); - break; - } else { - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*--model_iter, Decode(iter.key())); - iter.Prev(); - } - } - } -} - -TEST_F(InlineSkipTest, InsertWithHint_Sequential) { - const int N = 100000; - Arena arena; - TestComparator cmp; - TestInlineSkipList list(cmp, &arena); - void* hint = nullptr; - for (int i = 0; i < N; i++) { - Key key = i; - InsertWithHint(&list, key, &hint); - } - Validate(&list); -} - -TEST_F(InlineSkipTest, InsertWithHint_MultipleHints) { - const int N = 100000; - const int S = 100; - Random rnd(534); - Arena arena; - TestComparator cmp; - TestInlineSkipList list(cmp, &arena); - void* hints[S]; - Key last_key[S]; - for (int i = 0; i < S; i++) { - hints[i] = nullptr; - last_key[i] = 0; - } - for (int i = 0; i < N; i++) { - Key s = rnd.Uniform(S); - Key key = (s << 32) + (++last_key[s]); - InsertWithHint(&list, key, &hints[s]); - } - Validate(&list); -} - -TEST_F(InlineSkipTest, InsertWithHint_MultipleHintsRandom) { - const int N = 100000; - const int S = 100; - Random rnd(534); - Arena arena; - TestComparator cmp; - TestInlineSkipList list(cmp, &arena); - void* hints[S]; - for (int i = 0; i < S; i++) { - hints[i] = nullptr; - } - for (int i = 0; i < N; i++) { - Key s = rnd.Uniform(S); - Key key = (s << 32) + rnd.Next(); - InsertWithHint(&list, key, &hints[s]); - } - Validate(&list); -} - -TEST_F(InlineSkipTest, InsertWithHint_CompatibleWithInsertWithoutHint) { - const int N = 100000; - const int S1 = 100; - const int S2 = 100; - Random rnd(534); - Arena arena; - TestComparator cmp; - TestInlineSkipList list(cmp, &arena); - std::unordered_set used; - Key with_hint[S1]; - Key without_hint[S2]; - void* hints[S1]; - for (int i = 0; i < S1; i++) { - hints[i] = nullptr; - while (true) { - Key s = rnd.Next(); - if (used.insert(s).second) { - with_hint[i] = s; - break; - } - } - } - for (int i = 0; i < S2; i++) { - while (true) { - Key s = rnd.Next(); - if (used.insert(s).second) { - without_hint[i] = s; - break; - } - } - } - for (int i = 0; i < N; i++) { - Key s = rnd.Uniform(S1 + S2); - if (s < S1) { - Key key = (with_hint[s] << 32) + rnd.Next(); - InsertWithHint(&list, key, &hints[s]); - } else { - Key key = (without_hint[s - S1] << 32) + rnd.Next(); - Insert(&list, key); - } - } - Validate(&list); -} - -#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -// We want to make sure that with a single writer and multiple -// concurrent readers (with no synchronization other than when a -// reader's iterator is created), the reader always observes all the -// data that was present in the skip list when the iterator was -// constructor. Because insertions are happening concurrently, we may -// also observe new values that were inserted since the iterator was -// constructed, but we should never miss any values that were present -// at iterator construction time. -// -// We generate multi-part keys: -// -// where: -// key is in range [0..K-1] -// gen is a generation number for key -// hash is hash(key,gen) -// -// The insertion code picks a random key, sets gen to be 1 + the last -// generation number inserted for that key, and sets hash to Hash(key,gen). -// -// At the beginning of a read, we snapshot the last inserted -// generation number for each key. We then iterate, including random -// calls to Next() and Seek(). For every key we encounter, we -// check that it is either expected given the initial snapshot or has -// been concurrently added since the iterator started. -class ConcurrentTest { - public: - static const uint32_t K = 8; - - private: - static uint64_t key(Key key) { return (key >> 40); } - static uint64_t gen(Key key) { return (key >> 8) & 0xffffffffu; } - static uint64_t hash(Key key) { return key & 0xff; } - - static uint64_t HashNumbers(uint64_t k, uint64_t g) { - uint64_t data[2] = {k, g}; - return Hash(reinterpret_cast(data), sizeof(data), 0); - } - - static Key MakeKey(uint64_t k, uint64_t g) { - assert(sizeof(Key) == sizeof(uint64_t)); - assert(k <= K); // We sometimes pass K to seek to the end of the skiplist - assert(g <= 0xffffffffu); - return ((k << 40) | (g << 8) | (HashNumbers(k, g) & 0xff)); - } - - static bool IsValidKey(Key k) { - return hash(k) == (HashNumbers(key(k), gen(k)) & 0xff); - } - - static Key RandomTarget(Random* rnd) { - switch (rnd->Next() % 10) { - case 0: - // Seek to beginning - return MakeKey(0, 0); - case 1: - // Seek to end - return MakeKey(K, 0); - default: - // Seek to middle - return MakeKey(rnd->Next() % K, 0); - } - } - - // Per-key generation - struct State { - std::atomic generation[K]; - void Set(int k, int v) { - generation[k].store(v, std::memory_order_release); - } - int Get(int k) { return generation[k].load(std::memory_order_acquire); } - - State() { - for (unsigned int k = 0; k < K; k++) { - Set(k, 0); - } - } - }; - - // Current state of the test - State current_; - - ConcurrentArena arena_; - - // InlineSkipList is not protected by mu_. We just use a single writer - // thread to modify it. - InlineSkipList list_; - - public: - ConcurrentTest() : list_(TestComparator(), &arena_) {} - - // REQUIRES: No concurrent calls to WriteStep or ConcurrentWriteStep - void WriteStep(Random* rnd) { - const uint32_t k = rnd->Next() % K; - const int g = current_.Get(k) + 1; - const Key new_key = MakeKey(k, g); - char* buf = list_.AllocateKey(sizeof(Key)); - memcpy(buf, &new_key, sizeof(Key)); - list_.Insert(buf); - current_.Set(k, g); - } - - // REQUIRES: No concurrent calls for the same k - void ConcurrentWriteStep(uint32_t k, bool use_hint = false) { - const int g = current_.Get(k) + 1; - const Key new_key = MakeKey(k, g); - char* buf = list_.AllocateKey(sizeof(Key)); - memcpy(buf, &new_key, sizeof(Key)); - if (use_hint) { - void* hint = nullptr; - list_.InsertWithHintConcurrently(buf, &hint); - delete[] reinterpret_cast(hint); - } else { - list_.InsertConcurrently(buf); - } - ASSERT_EQ(g, current_.Get(k) + 1); - current_.Set(k, g); - } - - void ReadStep(Random* rnd) { - // Remember the initial committed state of the skiplist. - State initial_state; - for (unsigned int k = 0; k < K; k++) { - initial_state.Set(k, current_.Get(k)); - } - - Key pos = RandomTarget(rnd); - InlineSkipList::Iterator iter(&list_); - iter.Seek(Encode(&pos)); - while (true) { - Key current; - if (!iter.Valid()) { - current = MakeKey(K, 0); - } else { - current = Decode(iter.key()); - ASSERT_TRUE(IsValidKey(current)) << current; - } - ASSERT_LE(pos, current) << "should not go backwards"; - - // Verify that everything in [pos,current) was not present in - // initial_state. - while (pos < current) { - ASSERT_LT(key(pos), K) << pos; - - // Note that generation 0 is never inserted, so it is ok if - // <*,0,*> is missing. - ASSERT_TRUE((gen(pos) == 0U) || - (gen(pos) > static_cast(initial_state.Get( - static_cast(key(pos)))))) - << "key: " << key(pos) << "; gen: " << gen(pos) - << "; initgen: " << initial_state.Get(static_cast(key(pos))); - - // Advance to next key in the valid key space - if (key(pos) < key(current)) { - pos = MakeKey(key(pos) + 1, 0); - } else { - pos = MakeKey(key(pos), gen(pos) + 1); - } - } - - if (!iter.Valid()) { - break; - } - - if (rnd->Next() % 2) { - iter.Next(); - pos = MakeKey(key(pos), gen(pos) + 1); - } else { - Key new_target = RandomTarget(rnd); - if (new_target > pos) { - pos = new_target; - iter.Seek(Encode(&new_target)); - } - } - } - } -}; -const uint32_t ConcurrentTest::K; - -// Simple test that does single-threaded testing of the ConcurrentTest -// scaffolding. -TEST_F(InlineSkipTest, ConcurrentReadWithoutThreads) { - ConcurrentTest test; - Random rnd(test::RandomSeed()); - for (int i = 0; i < 10000; i++) { - test.ReadStep(&rnd); - test.WriteStep(&rnd); - } -} - -TEST_F(InlineSkipTest, ConcurrentInsertWithoutThreads) { - ConcurrentTest test; - Random rnd(test::RandomSeed()); - for (int i = 0; i < 10000; i++) { - test.ReadStep(&rnd); - uint32_t base = rnd.Next(); - for (int j = 0; j < 4; ++j) { - test.ConcurrentWriteStep((base + j) % ConcurrentTest::K); - } - } -} - -class TestState { - public: - ConcurrentTest t_; - bool use_hint_; - int seed_; - std::atomic quit_flag_; - std::atomic next_writer_; - - enum ReaderState { STARTING, RUNNING, DONE }; - - explicit TestState(int s) - : seed_(s), - quit_flag_(false), - state_(STARTING), - pending_writers_(0), - state_cv_(&mu_) {} - - void Wait(ReaderState s) { - mu_.Lock(); - while (state_ != s) { - state_cv_.Wait(); - } - mu_.Unlock(); - } - - void Change(ReaderState s) { - mu_.Lock(); - state_ = s; - state_cv_.Signal(); - mu_.Unlock(); - } - - void AdjustPendingWriters(int delta) { - mu_.Lock(); - pending_writers_ += delta; - if (pending_writers_ == 0) { - state_cv_.Signal(); - } - mu_.Unlock(); - } - - void WaitForPendingWriters() { - mu_.Lock(); - while (pending_writers_ != 0) { - state_cv_.Wait(); - } - mu_.Unlock(); - } - - private: - port::Mutex mu_; - ReaderState state_; - int pending_writers_; - port::CondVar state_cv_; -}; - -static void ConcurrentReader(void* arg) { - TestState* state = reinterpret_cast(arg); - Random rnd(state->seed_); - int64_t reads = 0; - state->Change(TestState::RUNNING); - while (!state->quit_flag_.load(std::memory_order_acquire)) { - state->t_.ReadStep(&rnd); - ++reads; - } - state->Change(TestState::DONE); -} - -static void ConcurrentWriter(void* arg) { - TestState* state = reinterpret_cast(arg); - uint32_t k = state->next_writer_++ % ConcurrentTest::K; - state->t_.ConcurrentWriteStep(k, state->use_hint_); - state->AdjustPendingWriters(-1); -} - -static void RunConcurrentRead(int run) { - const int seed = test::RandomSeed() + (run * 100); - Random rnd(seed); - const int N = 1000; - const int kSize = 1000; - for (int i = 0; i < N; i++) { - if ((i % 100) == 0) { - fprintf(stderr, "Run %d of %d\n", i, N); - } - TestState state(seed + 1); - Env::Default()->SetBackgroundThreads(1); - Env::Default()->Schedule(ConcurrentReader, &state); - state.Wait(TestState::RUNNING); - for (int k = 0; k < kSize; ++k) { - state.t_.WriteStep(&rnd); - } - state.quit_flag_.store(true, std::memory_order_release); - state.Wait(TestState::DONE); - } -} - -static void RunConcurrentInsert(int run, bool use_hint = false, - int write_parallelism = 4) { - Env::Default()->SetBackgroundThreads(1 + write_parallelism, - Env::Priority::LOW); - const int seed = test::RandomSeed() + (run * 100); - Random rnd(seed); - const int N = 1000; - const int kSize = 1000; - for (int i = 0; i < N; i++) { - if ((i % 100) == 0) { - fprintf(stderr, "Run %d of %d\n", i, N); - } - TestState state(seed + 1); - state.use_hint_ = use_hint; - Env::Default()->Schedule(ConcurrentReader, &state); - state.Wait(TestState::RUNNING); - for (int k = 0; k < kSize; k += write_parallelism) { - state.next_writer_ = rnd.Next(); - state.AdjustPendingWriters(write_parallelism); - for (int p = 0; p < write_parallelism; ++p) { - Env::Default()->Schedule(ConcurrentWriter, &state); - } - state.WaitForPendingWriters(); - } - state.quit_flag_.store(true, std::memory_order_release); - state.Wait(TestState::DONE); - } -} - -TEST_F(InlineSkipTest, ConcurrentRead1) { RunConcurrentRead(1); } -TEST_F(InlineSkipTest, ConcurrentRead2) { RunConcurrentRead(2); } -TEST_F(InlineSkipTest, ConcurrentRead3) { RunConcurrentRead(3); } -TEST_F(InlineSkipTest, ConcurrentRead4) { RunConcurrentRead(4); } -TEST_F(InlineSkipTest, ConcurrentRead5) { RunConcurrentRead(5); } -TEST_F(InlineSkipTest, ConcurrentInsert1) { RunConcurrentInsert(1); } -TEST_F(InlineSkipTest, ConcurrentInsert2) { RunConcurrentInsert(2); } -TEST_F(InlineSkipTest, ConcurrentInsert3) { RunConcurrentInsert(3); } -TEST_F(InlineSkipTest, ConcurrentInsertWithHint1) { - RunConcurrentInsert(1, true); -} -TEST_F(InlineSkipTest, ConcurrentInsertWithHint2) { - RunConcurrentInsert(2, true); -} -TEST_F(InlineSkipTest, ConcurrentInsertWithHint3) { - RunConcurrentInsert(3, true); -} - -#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/memtable/skiplist_test.cc b/memtable/skiplist_test.cc deleted file mode 100644 index a07088511..000000000 --- a/memtable/skiplist_test.cc +++ /dev/null @@ -1,387 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "memtable/skiplist.h" - -#include - -#include "memory/arena.h" -#include "rocksdb/env.h" -#include "test_util/testharness.h" -#include "util/hash.h" -#include "util/random.h" - -namespace ROCKSDB_NAMESPACE { - -using Key = uint64_t; - -struct TestComparator { - int operator()(const Key& a, const Key& b) const { - if (a < b) { - return -1; - } else if (a > b) { - return +1; - } else { - return 0; - } - } -}; - -class SkipTest : public testing::Test {}; - -TEST_F(SkipTest, Empty) { - Arena arena; - TestComparator cmp; - SkipList list(cmp, &arena); - ASSERT_TRUE(!list.Contains(10)); - - SkipList::Iterator iter(&list); - ASSERT_TRUE(!iter.Valid()); - iter.SeekToFirst(); - ASSERT_TRUE(!iter.Valid()); - iter.Seek(100); - ASSERT_TRUE(!iter.Valid()); - iter.SeekForPrev(100); - ASSERT_TRUE(!iter.Valid()); - iter.SeekToLast(); - ASSERT_TRUE(!iter.Valid()); -} - -TEST_F(SkipTest, InsertAndLookup) { - const int N = 2000; - const int R = 5000; - Random rnd(1000); - std::set keys; - Arena arena; - TestComparator cmp; - SkipList list(cmp, &arena); - for (int i = 0; i < N; i++) { - Key key = rnd.Next() % R; - if (keys.insert(key).second) { - list.Insert(key); - } - } - - for (int i = 0; i < R; i++) { - if (list.Contains(i)) { - ASSERT_EQ(keys.count(i), 1U); - } else { - ASSERT_EQ(keys.count(i), 0U); - } - } - - // Simple iterator tests - { - SkipList::Iterator iter(&list); - ASSERT_TRUE(!iter.Valid()); - - iter.Seek(0); - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*(keys.begin()), iter.key()); - - iter.SeekForPrev(R - 1); - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*(keys.rbegin()), iter.key()); - - iter.SeekToFirst(); - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*(keys.begin()), iter.key()); - - iter.SeekToLast(); - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*(keys.rbegin()), iter.key()); - } - - // Forward iteration test - for (int i = 0; i < R; i++) { - SkipList::Iterator iter(&list); - iter.Seek(i); - - // Compare against model iterator - std::set::iterator model_iter = keys.lower_bound(i); - for (int j = 0; j < 3; j++) { - if (model_iter == keys.end()) { - ASSERT_TRUE(!iter.Valid()); - break; - } else { - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*model_iter, iter.key()); - ++model_iter; - iter.Next(); - } - } - } - - // Backward iteration test - for (int i = 0; i < R; i++) { - SkipList::Iterator iter(&list); - iter.SeekForPrev(i); - - // Compare against model iterator - std::set::iterator model_iter = keys.upper_bound(i); - for (int j = 0; j < 3; j++) { - if (model_iter == keys.begin()) { - ASSERT_TRUE(!iter.Valid()); - break; - } else { - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*--model_iter, iter.key()); - iter.Prev(); - } - } - } -} - -// We want to make sure that with a single writer and multiple -// concurrent readers (with no synchronization other than when a -// reader's iterator is created), the reader always observes all the -// data that was present in the skip list when the iterator was -// constructor. Because insertions are happening concurrently, we may -// also observe new values that were inserted since the iterator was -// constructed, but we should never miss any values that were present -// at iterator construction time. -// -// We generate multi-part keys: -// -// where: -// key is in range [0..K-1] -// gen is a generation number for key -// hash is hash(key,gen) -// -// The insertion code picks a random key, sets gen to be 1 + the last -// generation number inserted for that key, and sets hash to Hash(key,gen). -// -// At the beginning of a read, we snapshot the last inserted -// generation number for each key. We then iterate, including random -// calls to Next() and Seek(). For every key we encounter, we -// check that it is either expected given the initial snapshot or has -// been concurrently added since the iterator started. -class ConcurrentTest { - private: - static const uint32_t K = 4; - - static uint64_t key(Key key) { return (key >> 40); } - static uint64_t gen(Key key) { return (key >> 8) & 0xffffffffu; } - static uint64_t hash(Key key) { return key & 0xff; } - - static uint64_t HashNumbers(uint64_t k, uint64_t g) { - uint64_t data[2] = {k, g}; - return Hash(reinterpret_cast(data), sizeof(data), 0); - } - - static Key MakeKey(uint64_t k, uint64_t g) { - assert(sizeof(Key) == sizeof(uint64_t)); - assert(k <= K); // We sometimes pass K to seek to the end of the skiplist - assert(g <= 0xffffffffu); - return ((k << 40) | (g << 8) | (HashNumbers(k, g) & 0xff)); - } - - static bool IsValidKey(Key k) { - return hash(k) == (HashNumbers(key(k), gen(k)) & 0xff); - } - - static Key RandomTarget(Random* rnd) { - switch (rnd->Next() % 10) { - case 0: - // Seek to beginning - return MakeKey(0, 0); - case 1: - // Seek to end - return MakeKey(K, 0); - default: - // Seek to middle - return MakeKey(rnd->Next() % K, 0); - } - } - - // Per-key generation - struct State { - std::atomic generation[K]; - void Set(int k, int v) { - generation[k].store(v, std::memory_order_release); - } - int Get(int k) { return generation[k].load(std::memory_order_acquire); } - - State() { - for (unsigned int k = 0; k < K; k++) { - Set(k, 0); - } - } - }; - - // Current state of the test - State current_; - - Arena arena_; - - // SkipList is not protected by mu_. We just use a single writer - // thread to modify it. - SkipList list_; - - public: - ConcurrentTest() : list_(TestComparator(), &arena_) {} - - // REQUIRES: External synchronization - void WriteStep(Random* rnd) { - const uint32_t k = rnd->Next() % K; - const int g = current_.Get(k) + 1; - const Key new_key = MakeKey(k, g); - list_.Insert(new_key); - current_.Set(k, g); - } - - void ReadStep(Random* rnd) { - // Remember the initial committed state of the skiplist. - State initial_state; - for (unsigned int k = 0; k < K; k++) { - initial_state.Set(k, current_.Get(k)); - } - - Key pos = RandomTarget(rnd); - SkipList::Iterator iter(&list_); - iter.Seek(pos); - while (true) { - Key current; - if (!iter.Valid()) { - current = MakeKey(K, 0); - } else { - current = iter.key(); - ASSERT_TRUE(IsValidKey(current)) << current; - } - ASSERT_LE(pos, current) << "should not go backwards"; - - // Verify that everything in [pos,current) was not present in - // initial_state. - while (pos < current) { - ASSERT_LT(key(pos), K) << pos; - - // Note that generation 0 is never inserted, so it is ok if - // <*,0,*> is missing. - ASSERT_TRUE((gen(pos) == 0U) || - (gen(pos) > static_cast(initial_state.Get( - static_cast(key(pos)))))) - << "key: " << key(pos) << "; gen: " << gen(pos) - << "; initgen: " << initial_state.Get(static_cast(key(pos))); - - // Advance to next key in the valid key space - if (key(pos) < key(current)) { - pos = MakeKey(key(pos) + 1, 0); - } else { - pos = MakeKey(key(pos), gen(pos) + 1); - } - } - - if (!iter.Valid()) { - break; - } - - if (rnd->Next() % 2) { - iter.Next(); - pos = MakeKey(key(pos), gen(pos) + 1); - } else { - Key new_target = RandomTarget(rnd); - if (new_target > pos) { - pos = new_target; - iter.Seek(new_target); - } - } - } - } -}; -const uint32_t ConcurrentTest::K; - -// Simple test that does single-threaded testing of the ConcurrentTest -// scaffolding. -TEST_F(SkipTest, ConcurrentWithoutThreads) { - ConcurrentTest test; - Random rnd(test::RandomSeed()); - for (int i = 0; i < 10000; i++) { - test.ReadStep(&rnd); - test.WriteStep(&rnd); - } -} - -class TestState { - public: - ConcurrentTest t_; - int seed_; - std::atomic quit_flag_; - - enum ReaderState { STARTING, RUNNING, DONE }; - - explicit TestState(int s) - : seed_(s), quit_flag_(false), state_(STARTING), state_cv_(&mu_) {} - - void Wait(ReaderState s) { - mu_.Lock(); - while (state_ != s) { - state_cv_.Wait(); - } - mu_.Unlock(); - } - - void Change(ReaderState s) { - mu_.Lock(); - state_ = s; - state_cv_.Signal(); - mu_.Unlock(); - } - - private: - port::Mutex mu_; - ReaderState state_; - port::CondVar state_cv_; -}; - -static void ConcurrentReader(void* arg) { - TestState* state = reinterpret_cast(arg); - Random rnd(state->seed_); - int64_t reads = 0; - state->Change(TestState::RUNNING); - while (!state->quit_flag_.load(std::memory_order_acquire)) { - state->t_.ReadStep(&rnd); - ++reads; - } - state->Change(TestState::DONE); -} - -static void RunConcurrent(int run) { - const int seed = test::RandomSeed() + (run * 100); - Random rnd(seed); - const int N = 1000; - const int kSize = 1000; - for (int i = 0; i < N; i++) { - if ((i % 100) == 0) { - fprintf(stderr, "Run %d of %d\n", i, N); - } - TestState state(seed + 1); - Env::Default()->SetBackgroundThreads(1); - Env::Default()->Schedule(ConcurrentReader, &state); - state.Wait(TestState::RUNNING); - for (int k = 0; k < kSize; k++) { - state.t_.WriteStep(&rnd); - } - state.quit_flag_.store(true, std::memory_order_release); - state.Wait(TestState::DONE); - } -} - -TEST_F(SkipTest, Concurrent1) { RunConcurrent(1); } -TEST_F(SkipTest, Concurrent2) { RunConcurrent(2); } -TEST_F(SkipTest, Concurrent3) { RunConcurrent(3); } -TEST_F(SkipTest, Concurrent4) { RunConcurrent(4); } -TEST_F(SkipTest, Concurrent5) { RunConcurrent(5); } - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/memtable/write_buffer_manager_test.cc b/memtable/write_buffer_manager_test.cc deleted file mode 100644 index c992d2eab..000000000 --- a/memtable/write_buffer_manager_test.cc +++ /dev/null @@ -1,304 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "rocksdb/write_buffer_manager.h" - -#include "rocksdb/advanced_cache.h" -#include "test_util/testharness.h" - -namespace ROCKSDB_NAMESPACE { -class WriteBufferManagerTest : public testing::Test {}; - -const size_t kSizeDummyEntry = 256 * 1024; - -TEST_F(WriteBufferManagerTest, ShouldFlush) { - // A write buffer manager of size 10MB - std::unique_ptr wbf( - new WriteBufferManager(10 * 1024 * 1024)); - - wbf->ReserveMem(8 * 1024 * 1024); - ASSERT_FALSE(wbf->ShouldFlush()); - // 90% of the hard limit will hit the condition - wbf->ReserveMem(1 * 1024 * 1024); - ASSERT_TRUE(wbf->ShouldFlush()); - // Scheduling for freeing will release the condition - wbf->ScheduleFreeMem(1 * 1024 * 1024); - ASSERT_FALSE(wbf->ShouldFlush()); - - wbf->ReserveMem(2 * 1024 * 1024); - ASSERT_TRUE(wbf->ShouldFlush()); - - wbf->ScheduleFreeMem(4 * 1024 * 1024); - // 11MB total, 6MB mutable. hard limit still hit - ASSERT_TRUE(wbf->ShouldFlush()); - - wbf->ScheduleFreeMem(2 * 1024 * 1024); - // 11MB total, 4MB mutable. hard limit stills but won't flush because more - // than half data is already being flushed. - ASSERT_FALSE(wbf->ShouldFlush()); - - wbf->ReserveMem(4 * 1024 * 1024); - // 15 MB total, 8MB mutable. - ASSERT_TRUE(wbf->ShouldFlush()); - - wbf->FreeMem(7 * 1024 * 1024); - // 8MB total, 8MB mutable. - ASSERT_FALSE(wbf->ShouldFlush()); - - // change size: 8M limit, 7M mutable limit - wbf->SetBufferSize(8 * 1024 * 1024); - // 8MB total, 8MB mutable. - ASSERT_TRUE(wbf->ShouldFlush()); - - wbf->ScheduleFreeMem(2 * 1024 * 1024); - // 8MB total, 6MB mutable. - ASSERT_TRUE(wbf->ShouldFlush()); - - wbf->FreeMem(2 * 1024 * 1024); - // 6MB total, 6MB mutable. - ASSERT_FALSE(wbf->ShouldFlush()); - - wbf->ReserveMem(1 * 1024 * 1024); - // 7MB total, 7MB mutable. - ASSERT_FALSE(wbf->ShouldFlush()); - - wbf->ReserveMem(1 * 1024 * 1024); - // 8MB total, 8MB mutable. - ASSERT_TRUE(wbf->ShouldFlush()); - - wbf->ScheduleFreeMem(1 * 1024 * 1024); - wbf->FreeMem(1 * 1024 * 1024); - // 7MB total, 7MB mutable. - ASSERT_FALSE(wbf->ShouldFlush()); -} - -class ChargeWriteBufferTest : public testing::Test {}; - -TEST_F(ChargeWriteBufferTest, Basic) { - constexpr std::size_t kMetaDataChargeOverhead = 10000; - - LRUCacheOptions co; - // 1GB cache - co.capacity = 1024 * 1024 * 1024; - co.num_shard_bits = 4; - co.metadata_charge_policy = kDontChargeCacheMetadata; - std::shared_ptr cache = NewLRUCache(co); - // A write buffer manager of size 50MB - std::unique_ptr wbf( - new WriteBufferManager(50 * 1024 * 1024, cache)); - - // Allocate 333KB will allocate 512KB, memory_used_ = 333KB - wbf->ReserveMem(333 * 1024); - // 2 dummy entries are added for size 333 KB - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 2 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 2 * 256 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 2 * 256 * 1024 + kMetaDataChargeOverhead); - - // Allocate another 512KB, memory_used_ = 845KB - wbf->ReserveMem(512 * 1024); - // 2 more dummy entries are added for size 512 KB - // since ceil((memory_used_ - dummy_entries_in_cache_usage) % kSizeDummyEntry) - // = 2 - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 4 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 4 * 256 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 4 * 256 * 1024 + kMetaDataChargeOverhead); - - // Allocate another 10MB, memory_used_ = 11085KB - wbf->ReserveMem(10 * 1024 * 1024); - // 40 more entries are added for size 10 * 1024 * 1024 KB - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 44 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 44 * 256 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 44 * 256 * 1024 + kMetaDataChargeOverhead); - - // Free 1MB, memory_used_ = 10061KB - // It will not cause any change in cache cost - // since memory_used_ > dummy_entries_in_cache_usage * (3/4) - wbf->FreeMem(1 * 1024 * 1024); - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 44 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 44 * 256 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 44 * 256 * 1024 + kMetaDataChargeOverhead); - ASSERT_FALSE(wbf->ShouldFlush()); - - // Allocate another 41MB, memory_used_ = 52045KB - wbf->ReserveMem(41 * 1024 * 1024); - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 204 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 204 * 256 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), - 204 * 256 * 1024 + kMetaDataChargeOverhead); - ASSERT_TRUE(wbf->ShouldFlush()); - - ASSERT_TRUE(wbf->ShouldFlush()); - - // Schedule free 20MB, memory_used_ = 52045KB - // It will not cause any change in memory_used and cache cost - wbf->ScheduleFreeMem(20 * 1024 * 1024); - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 204 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 204 * 256 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), - 204 * 256 * 1024 + kMetaDataChargeOverhead); - // Still need flush as the hard limit hits - ASSERT_TRUE(wbf->ShouldFlush()); - - // Free 20MB, memory_used_ = 31565KB - // It will releae 80 dummy entries from cache since - // since memory_used_ < dummy_entries_in_cache_usage * (3/4) - // and floor((dummy_entries_in_cache_usage - memory_used_) % kSizeDummyEntry) - // = 80 - wbf->FreeMem(20 * 1024 * 1024); - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 124 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 124 * 256 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), - 124 * 256 * 1024 + kMetaDataChargeOverhead); - - ASSERT_FALSE(wbf->ShouldFlush()); - - // Free 16KB, memory_used_ = 31549KB - // It will not release any dummy entry since memory_used_ >= - // dummy_entries_in_cache_usage * (3/4) - wbf->FreeMem(16 * 1024); - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 124 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 124 * 256 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), - 124 * 256 * 1024 + kMetaDataChargeOverhead); - - // Free 20MB, memory_used_ = 11069KB - // It will releae 80 dummy entries from cache - // since memory_used_ < dummy_entries_in_cache_usage * (3/4) - // and floor((dummy_entries_in_cache_usage - memory_used_) % kSizeDummyEntry) - // = 80 - wbf->FreeMem(20 * 1024 * 1024); - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 44 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 44 * 256 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 44 * 256 * 1024 + kMetaDataChargeOverhead); - - // Free 1MB, memory_used_ = 10045KB - // It will not cause any change in cache cost - // since memory_used_ > dummy_entries_in_cache_usage * (3/4) - wbf->FreeMem(1 * 1024 * 1024); - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 44 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 44 * 256 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 44 * 256 * 1024 + kMetaDataChargeOverhead); - - // Reserve 512KB, memory_used_ = 10557KB - // It will not casue any change in cache cost - // since memory_used_ > dummy_entries_in_cache_usage * (3/4) - // which reflects the benefit of saving dummy entry insertion on memory - // reservation after delay decrease - wbf->ReserveMem(512 * 1024); - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 44 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 44 * 256 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 44 * 256 * 1024 + kMetaDataChargeOverhead); - - // Destroy write buffer manger should free everything - wbf.reset(); - ASSERT_EQ(cache->GetPinnedUsage(), 0); -} - -TEST_F(ChargeWriteBufferTest, BasicWithNoBufferSizeLimit) { - constexpr std::size_t kMetaDataChargeOverhead = 10000; - // 1GB cache - std::shared_ptr cache = NewLRUCache(1024 * 1024 * 1024, 4); - // A write buffer manager of size 256MB - std::unique_ptr wbf(new WriteBufferManager(0, cache)); - - // Allocate 10MB, memory_used_ = 10240KB - // It will allocate 40 dummy entries - wbf->ReserveMem(10 * 1024 * 1024); - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 40 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 40 * 256 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 40 * 256 * 1024 + kMetaDataChargeOverhead); - - ASSERT_FALSE(wbf->ShouldFlush()); - - // Free 9MB, memory_used_ = 1024KB - // It will free 36 dummy entries - wbf->FreeMem(9 * 1024 * 1024); - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 4 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 4 * 256 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 4 * 256 * 1024 + kMetaDataChargeOverhead); - - // Free 160KB gradually, memory_used_ = 864KB - // It will not cause any change - // since memory_used_ > dummy_entries_in_cache_usage * 3/4 - for (int i = 0; i < 40; i++) { - wbf->FreeMem(4 * 1024); - } - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 4 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 4 * 256 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 4 * 256 * 1024 + kMetaDataChargeOverhead); -} - -TEST_F(ChargeWriteBufferTest, BasicWithCacheFull) { - constexpr std::size_t kMetaDataChargeOverhead = 20000; - - // 12MB cache size with strict capacity - LRUCacheOptions lo; - lo.capacity = 12 * 1024 * 1024; - lo.num_shard_bits = 0; - lo.strict_capacity_limit = true; - std::shared_ptr cache = NewLRUCache(lo); - std::unique_ptr wbf(new WriteBufferManager(0, cache)); - - // Allocate 10MB, memory_used_ = 10240KB - wbf->ReserveMem(10 * 1024 * 1024); - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 40 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 40 * kSizeDummyEntry); - ASSERT_LT(cache->GetPinnedUsage(), - 40 * kSizeDummyEntry + kMetaDataChargeOverhead); - - // Allocate 10MB, memory_used_ = 20480KB - // Some dummy entry insertion will fail due to full cache - wbf->ReserveMem(10 * 1024 * 1024); - ASSERT_GE(cache->GetPinnedUsage(), 40 * kSizeDummyEntry); - ASSERT_LE(cache->GetPinnedUsage(), 12 * 1024 * 1024); - ASSERT_LT(wbf->dummy_entries_in_cache_usage(), 80 * kSizeDummyEntry); - - // Free 15MB after encoutering cache full, memory_used_ = 5120KB - wbf->FreeMem(15 * 1024 * 1024); - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 20 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 20 * kSizeDummyEntry); - ASSERT_LT(cache->GetPinnedUsage(), - 20 * kSizeDummyEntry + kMetaDataChargeOverhead); - - // Reserve 15MB, creating cache full again, memory_used_ = 20480KB - wbf->ReserveMem(15 * 1024 * 1024); - ASSERT_LE(cache->GetPinnedUsage(), 12 * 1024 * 1024); - ASSERT_LT(wbf->dummy_entries_in_cache_usage(), 80 * kSizeDummyEntry); - - // Increase capacity so next insert will fully succeed - cache->SetCapacity(40 * 1024 * 1024); - - // Allocate 10MB, memory_used_ = 30720KB - wbf->ReserveMem(10 * 1024 * 1024); - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 120 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 120 * kSizeDummyEntry); - ASSERT_LT(cache->GetPinnedUsage(), - 120 * kSizeDummyEntry + kMetaDataChargeOverhead); - - // Gradually release 20 MB - // It ended up sequentially releasing 32, 24, 18 dummy entries when - // memory_used_ decreases to 22528KB, 16384KB, 11776KB. - // In total, it releases 74 dummy entries - for (int i = 0; i < 40; i++) { - wbf->FreeMem(512 * 1024); - } - - ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 46 * kSizeDummyEntry); - ASSERT_GE(cache->GetPinnedUsage(), 46 * kSizeDummyEntry); - ASSERT_LT(cache->GetPinnedUsage(), - 46 * kSizeDummyEntry + kMetaDataChargeOverhead); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/microbench/CMakeLists.txt b/microbench/CMakeLists.txt deleted file mode 100644 index 483e97973..000000000 --- a/microbench/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -find_package(benchmark REQUIRED) -find_package(Threads REQUIRED) - -file(GLOB_RECURSE ALL_BENCH_CPP *.cc) -foreach(ONE_BENCH_CPP ${ALL_BENCH_CPP}) - get_filename_component(TARGET_NAME ${ONE_BENCH_CPP} NAME_WE) - add_executable(${TARGET_NAME} ${ONE_BENCH_CPP}) - target_link_libraries(${TARGET_NAME} ${ROCKSDB_LIB} benchmark::benchmark - ${CMAKE_THREAD_LIBS_INIT}) - # run benchmark like a test, if added, the benchmark tests could be run by `ctest -R Bench_` - # add_test(Bench_${TARGET_NAME} ${TARGET_NAME}) - list(APPEND ALL_BENCH_TARGETS ${TARGET_NAME}) -endforeach() -add_custom_target(microbench DEPENDS ${ALL_BENCH_TARGETS}) -add_custom_target(run_microbench - COMMAND for t in ${ALL_BENCH_TARGETS}\; do \.\/$$t \|\| exit 1\; done - DEPENDS ${ALL_BENCH_TARGETS}) diff --git a/microbench/README.md b/microbench/README.md deleted file mode 100644 index 290ca58d7..000000000 --- a/microbench/README.md +++ /dev/null @@ -1,60 +0,0 @@ -# RocksDB Micro-Benchmark - -## Overview - -RocksDB micro-benchmark is a set of tests for benchmarking a single component or simple DB operations. The test artificially generates input data and executes the same operation with it to collect and report performance metrics. As it's focusing on testing a single, well-defined operation, the result is more precise and reproducible, which also has its limitation of not representing a real production use case. The test author needs to carefully design the microbench to represent its true purpose. - -The tests are based on [Google Benchmark](https://github.com/google/benchmark) library, which provides a standard framework for writing benchmarks. - -## How to Run -### Prerequisite -Install the [Google Benchmark](https://github.com/google/benchmark) version `1.6.0` or above. - -*Note: Google Benchmark `1.6.x` is incompatible with previous versions like `1.5.x`, please make sure you're using the newer version.* - -### Build and Run -With `Makefile`: -```bash -$ DEBUG_LEVEL=0 make run_microbench -``` -Or with cmake: -```bash -$ mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_BENCHMARK -$ make run_microbench -``` - -*Note: Please run the benchmark code in release build.* -### Run Single Test -Example: -```bash -$ make db_basic_bench -$ ./db_basic_bench --benchmark_filter= -``` - -## Best Practices -#### * Use the Same Test Directory Setting as Unittest -Most of the Micro-benchmark tests use the same test directory setup as unittest, so it could be overridden by: -```bash -$ TEST_TMPDIR=/mydata/tmp/ ./db_basic_bench --benchmark_filter= -``` -Please also follow that when designing new tests. - -#### * Avoid Using Debug API -Even though micro-benchmark is a test, avoid using internal Debug API like TEST_WaitForRun() which is designed for unittest. As benchmark tests are designed for release build, don't use any of that. - -#### * Pay Attention to Local Optimization -As a micro-benchmark is focusing on a single component or area, make sure it is a key part for impacting the overall application performance. - -The compiler might be able to optimize the code that not the same way as the whole application, and if the test data input is simple and small, it may be able to all cached in CPU memory, which is leading to a wrong metric. Take these into consideration when designing the tests. - -#### * Names of user-defined counters/metrics has to be `[A-Za-z0-9_]` -It's a restriction of the metrics collecting and reporting system RocksDB is using internally. It will also help integrate with more systems. - -#### * Minimize the Metrics Variation -Try reducing the test result variation, one way to check that is running the test multiple times and check the CV (Coefficient of Variation) reported by gbenchmark. -```bash -$ ./db_basic_bench --benchmark_filter= --benchmark_repetitions=10 -... -_cv 3.2% -``` -RocksDB has background compaction jobs which may cause the test result to vary a lot. If the micro-benchmark is not purposely testing the operation while compaction is in progress, it should wait for the compaction to finish (`db_impl->WaitForCompact()`) or disable auto-compaction. diff --git a/microbench/db_basic_bench.cc b/microbench/db_basic_bench.cc deleted file mode 100644 index 6c70ad21d..000000000 --- a/microbench/db_basic_bench.cc +++ /dev/null @@ -1,1575 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#ifndef OS_WIN -#include -#endif // ! OS_WIN - -#include "benchmark/benchmark.h" -#include "db/db_impl/db_impl.h" -#include "rocksdb/db.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/options.h" -#include "table/block_based/block.h" -#include "table/block_based/block_builder.h" -#include "util/random.h" -#include "utilities/merge_operators.h" - -namespace ROCKSDB_NAMESPACE { - -class KeyGenerator { - public: - // Generate next key - // buff: the caller needs to make sure there's enough space for generated key - // offset: to control the group of the key, 0 means normal key, 1 means - // non-existing key, 2 is reserved prefix_only: only return a prefix - Slice Next(char* buff, int8_t offset = 0, bool prefix_only = false) { - assert(max_key_ < std::numeric_limits::max() / - MULTIPLIER); // TODO: add large key support - - uint32_t k; - if (is_sequential_) { - assert(next_sequential_key_ < max_key_); - k = (next_sequential_key_ % max_key_) * MULTIPLIER + offset; - if (next_sequential_key_ + 1 == max_key_) { - next_sequential_key_ = 0; - } else { - next_sequential_key_++; - } - } else { - k = (rnd_->Next() % max_key_) * MULTIPLIER + offset; - } - // TODO: make sure the buff is large enough - memset(buff, 0, key_size_); - if (prefix_num_ > 0) { - uint32_t prefix = (k % prefix_num_) * MULTIPLIER + offset; - Encode(buff, prefix); - if (prefix_only) { - return {buff, prefix_size_}; - } - } - Encode(buff + prefix_size_, k); - return {buff, key_size_}; - } - - // use internal buffer for generated key, make sure there's only one caller in - // single thread - Slice Next() { return Next(buff_); } - - // user internal buffer for generated prefix - Slice NextPrefix() { - assert(prefix_num_ > 0); - return Next(buff_, 0, true); - } - - // helper function to get non exist key - Slice NextNonExist() { return Next(buff_, 1); } - - Slice MaxKey(char* buff) const { - memset(buff, 0xff, key_size_); - return {buff, key_size_}; - } - - Slice MinKey(char* buff) const { - memset(buff, 0, key_size_); - return {buff, key_size_}; - } - - // max_key: the max key that it could generate - // prefix_num: the max prefix number - // key_size: in bytes - explicit KeyGenerator(Random* rnd, uint64_t max_key = 100 * 1024 * 1024, - size_t prefix_num = 0, size_t key_size = 10) { - prefix_num_ = prefix_num; - key_size_ = key_size; - max_key_ = max_key; - rnd_ = rnd; - if (prefix_num > 0) { - prefix_size_ = 4; // TODO: support different prefix_size - } - } - - // generate sequential keys - explicit KeyGenerator(uint64_t max_key = 100 * 1024 * 1024, - size_t key_size = 10) { - key_size_ = key_size; - max_key_ = max_key; - rnd_ = nullptr; - is_sequential_ = true; - } - - private: - Random* rnd_; - size_t prefix_num_ = 0; - size_t prefix_size_ = 0; - size_t key_size_; - uint64_t max_key_; - bool is_sequential_ = false; - uint32_t next_sequential_key_ = 0; - char buff_[256] = {0}; - const int MULTIPLIER = 3; - - void static Encode(char* buf, uint32_t value) { - if (port::kLittleEndian) { - buf[0] = static_cast((value >> 24) & 0xff); - buf[1] = static_cast((value >> 16) & 0xff); - buf[2] = static_cast((value >> 8) & 0xff); - buf[3] = static_cast(value & 0xff); - } else { - memcpy(buf, &value, sizeof(value)); - } - } -}; - -static void SetupDB(benchmark::State& state, Options& options, - std::unique_ptr* db, - const std::string& test_name = "") { - options.create_if_missing = true; - auto env = Env::Default(); - std::string db_path; - Status s = env->GetTestDirectory(&db_path); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - return; - } - std::string db_name = - db_path + kFilePathSeparator + test_name + std::to_string(getpid()); - DestroyDB(db_name, options); - - DB* db_ptr = nullptr; - s = DB::Open(options, db_name, &db_ptr); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - return; - } - db->reset(db_ptr); -} - -static void TeardownDB(benchmark::State& state, const std::unique_ptr& db, - const Options& options, KeyGenerator& kg) { - char min_buff[256], max_buff[256]; - const Range r(kg.MinKey(min_buff), kg.MaxKey(max_buff)); - uint64_t size; - Status s = db->GetApproximateSizes(&r, 1, &size); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - state.counters["db_size"] = static_cast(size); - - std::string db_name = db->GetName(); - s = db->Close(); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - DestroyDB(db_name, options); -} - -static void DBOpen(benchmark::State& state) { - // create DB - std::unique_ptr db; - Options options; - SetupDB(state, options, &db, "DBOpen"); - - std::string db_name = db->GetName(); - db->Close(); - - options.create_if_missing = false; - - auto rnd = Random(123); - - for (auto _ : state) { - { - DB* db_ptr = nullptr; - Status s = DB::Open(options, db_name, &db_ptr); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - db.reset(db_ptr); - } - state.PauseTiming(); - auto wo = WriteOptions(); - Status s; - for (int i = 0; i < 2; i++) { - for (int j = 0; j < 100; j++) { - s = db->Put(wo, rnd.RandomString(10), rnd.RandomString(100)); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - s = db->Flush(FlushOptions()); - } - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - s = db->Close(); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - state.ResumeTiming(); - } - DestroyDB(db_name, options); -} - -BENCHMARK(DBOpen)->Iterations(200); // specify iteration number as the db size - // is impacted by iteration number - -static void DBClose(benchmark::State& state) { - // create DB - std::unique_ptr db; - Options options; - SetupDB(state, options, &db, "DBClose"); - - std::string db_name = db->GetName(); - db->Close(); - - options.create_if_missing = false; - - auto rnd = Random(12345); - - for (auto _ : state) { - state.PauseTiming(); - { - DB* db_ptr = nullptr; - Status s = DB::Open(options, db_name, &db_ptr); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - db.reset(db_ptr); - } - auto wo = WriteOptions(); - Status s; - for (int i = 0; i < 2; i++) { - for (int j = 0; j < 100; j++) { - s = db->Put(wo, rnd.RandomString(10), rnd.RandomString(100)); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - s = db->Flush(FlushOptions()); - } - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - state.ResumeTiming(); - s = db->Close(); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - DestroyDB(db_name, options); -} - -BENCHMARK(DBClose)->Iterations(200); // specify iteration number as the db size - // is impacted by iteration number - -static void DBPut(benchmark::State& state) { - auto compaction_style = static_cast(state.range(0)); - uint64_t max_data = state.range(1); - uint64_t per_key_size = state.range(2); - bool enable_statistics = state.range(3); - bool enable_wal = state.range(4); - uint64_t key_num = max_data / per_key_size; - - // setup DB - static std::unique_ptr db = nullptr; - Options options; - if (enable_statistics) { - options.statistics = CreateDBStatistics(); - } - options.compaction_style = compaction_style; - - auto rnd = Random(301 + state.thread_index()); - KeyGenerator kg(&rnd, key_num); - - if (state.thread_index() == 0) { - SetupDB(state, options, &db, "DBPut"); - } - - auto wo = WriteOptions(); - wo.disableWAL = !enable_wal; - - for (auto _ : state) { - state.PauseTiming(); - Slice key = kg.Next(); - std::string val = rnd.RandomString(static_cast(per_key_size)); - state.ResumeTiming(); - Status s = db->Put(wo, key, val); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - - if (state.thread_index() == 0) { - auto db_full = static_cast_with_check(db.get()); - Status s = db_full->WaitForCompact(true); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - return; - } - if (enable_statistics) { - HistogramData histogram_data; - options.statistics->histogramData(DB_WRITE, &histogram_data); - state.counters["put_mean"] = histogram_data.average * std::milli::den; - state.counters["put_p95"] = histogram_data.percentile95 * std::milli::den; - state.counters["put_p99"] = histogram_data.percentile99 * std::milli::den; - } - - TeardownDB(state, db, options, kg); - } -} - -static void DBPutArguments(benchmark::internal::Benchmark* b) { - for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal, - kCompactionStyleFIFO}) { - for (int64_t max_data : {100l << 30}) { - for (int64_t per_key_size : {256, 1024}) { - for (bool enable_statistics : {false, true}) { - for (bool wal : {false, true}) { - b->Args( - {comp_style, max_data, per_key_size, enable_statistics, wal}); - } - } - } - } - } - b->ArgNames( - {"comp_style", "max_data", "per_key_size", "enable_statistics", "wal"}); -} - -static const uint64_t DBPutNum = 409600l; -BENCHMARK(DBPut)->Threads(1)->Iterations(DBPutNum)->Apply(DBPutArguments); -BENCHMARK(DBPut)->Threads(8)->Iterations(DBPutNum / 8)->Apply(DBPutArguments); - -static void ManualCompaction(benchmark::State& state) { - auto compaction_style = static_cast(state.range(0)); - uint64_t max_data = state.range(1); - uint64_t per_key_size = state.range(2); - bool enable_statistics = state.range(3); - uint64_t key_num = max_data / per_key_size; - - // setup DB - static std::unique_ptr db; - Options options; - if (enable_statistics) { - options.statistics = CreateDBStatistics(); - } - options.compaction_style = compaction_style; - // No auto compaction - options.disable_auto_compactions = true; - options.level0_file_num_compaction_trigger = (1 << 30); - options.level0_slowdown_writes_trigger = (1 << 30); - options.level0_stop_writes_trigger = (1 << 30); - options.soft_pending_compaction_bytes_limit = 0; - options.hard_pending_compaction_bytes_limit = 0; - - auto rnd = Random(301 + state.thread_index()); - KeyGenerator kg(&rnd, key_num); - - if (state.thread_index() == 0) { - SetupDB(state, options, &db, "ManualCompaction"); - } - - auto wo = WriteOptions(); - wo.disableWAL = true; - uint64_t flush_mod = key_num / 4; // at least generate 4 files for compaction - for (uint64_t i = 0; i < key_num; i++) { - Status s = db->Put(wo, kg.Next(), - rnd.RandomString(static_cast(per_key_size))); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - if (i + 1 % flush_mod == 0) { - s = db->Flush(FlushOptions()); - } - } - FlushOptions fo; - Status s = db->Flush(fo); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - std::vector files_meta; - db->GetLiveFilesMetaData(&files_meta); - std::vector files_before_compact; - files_before_compact.reserve(files_meta.size()); - for (const LiveFileMetaData& file : files_meta) { - files_before_compact.emplace_back(file.name); - } - - SetPerfLevel(kEnableTime); - get_perf_context()->EnablePerLevelPerfContext(); - get_perf_context()->Reset(); - CompactionOptions co; - for (auto _ : state) { - s = db->CompactFiles(co, files_before_compact, 1); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - - if (state.thread_index() == 0) { - auto db_full = static_cast_with_check(db.get()); - s = db_full->WaitForCompact(true); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - return; - } - if (enable_statistics) { - HistogramData histogram_data; - options.statistics->histogramData(COMPACTION_TIME, &histogram_data); - state.counters["comp_time"] = histogram_data.average; - options.statistics->histogramData(COMPACTION_CPU_TIME, &histogram_data); - state.counters["comp_cpu_time"] = histogram_data.average; - options.statistics->histogramData(COMPACTION_OUTFILE_SYNC_MICROS, - &histogram_data); - state.counters["comp_outfile_sync"] = histogram_data.average; - - state.counters["comp_read"] = static_cast( - options.statistics->getTickerCount(COMPACT_READ_BYTES)); - state.counters["comp_write"] = static_cast( - options.statistics->getTickerCount(COMPACT_WRITE_BYTES)); - - state.counters["user_key_comparison_count"] = - static_cast(get_perf_context()->user_key_comparison_count); - state.counters["block_read_count"] = - static_cast(get_perf_context()->block_read_count); - state.counters["block_read_time"] = - static_cast(get_perf_context()->block_read_time); - state.counters["block_checksum_time"] = - static_cast(get_perf_context()->block_checksum_time); - state.counters["new_table_block_iter_nanos"] = - static_cast(get_perf_context()->new_table_block_iter_nanos); - state.counters["new_table_iterator_nanos"] = - static_cast(get_perf_context()->new_table_iterator_nanos); - state.counters["find_table_nanos"] = - static_cast(get_perf_context()->find_table_nanos); - } - - TeardownDB(state, db, options, kg); - } -} - -static void ManualCompactionArguments(benchmark::internal::Benchmark* b) { - for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal}) { - for (int64_t max_data : {32l << 20, 128l << 20}) { - for (int64_t per_key_size : {256, 1024}) { - for (bool enable_statistics : {false, true}) { - b->Args({comp_style, max_data, per_key_size, enable_statistics}); - } - } - } - } - b->ArgNames({"comp_style", "max_data", "per_key_size", "enable_statistics"}); -} - -BENCHMARK(ManualCompaction)->Iterations(1)->Apply(ManualCompactionArguments); - -static void ManualFlush(benchmark::State& state) { - uint64_t key_num = state.range(0); - uint64_t per_key_size = state.range(1); - bool enable_statistics = true; - - // setup DB - static std::unique_ptr db; - Options options; - if (enable_statistics) { - options.statistics = CreateDBStatistics(); - } - options.disable_auto_compactions = true; - options.level0_file_num_compaction_trigger = (1 << 30); - options.level0_slowdown_writes_trigger = (1 << 30); - options.level0_stop_writes_trigger = (1 << 30); - options.soft_pending_compaction_bytes_limit = 0; - options.hard_pending_compaction_bytes_limit = 0; - options.write_buffer_size = 2l << 30; // 2G to avoid auto flush - - auto rnd = Random(301 + state.thread_index()); - KeyGenerator kg(&rnd, key_num); - - if (state.thread_index() == 0) { - SetupDB(state, options, &db, "ManualFlush"); - } - - auto wo = WriteOptions(); - for (auto _ : state) { - state.PauseTiming(); - for (uint64_t i = 0; i < key_num; i++) { - Status s = db->Put(wo, kg.Next(), - rnd.RandomString(static_cast(per_key_size))); - } - FlushOptions fo; - state.ResumeTiming(); - Status s = db->Flush(fo); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - - if (state.thread_index() == 0) { - auto db_full = static_cast_with_check(db.get()); - Status s = db_full->WaitForCompact(true); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - return; - } - if (enable_statistics) { - HistogramData histogram_data; - options.statistics->histogramData(FLUSH_TIME, &histogram_data); - state.counters["flush_time"] = histogram_data.average; - state.counters["flush_write_bytes"] = static_cast( - options.statistics->getTickerCount(FLUSH_WRITE_BYTES)); - } - - TeardownDB(state, db, options, kg); - } -} - -static void ManualFlushArguments(benchmark::internal::Benchmark* b) { - for (int64_t key_num : {1l << 10, 8l << 10, 64l << 10}) { - for (int64_t per_key_size : {256, 1024}) { - b->Args({key_num, per_key_size}); - } - } - b->ArgNames({"key_num", "per_key_size"}); -} - -BENCHMARK(ManualFlush)->Iterations(1)->Apply(ManualFlushArguments); - -static void DBGet(benchmark::State& state) { - auto compaction_style = static_cast(state.range(0)); - uint64_t max_data = state.range(1); - uint64_t per_key_size = state.range(2); - bool enable_statistics = state.range(3); - bool negative_query = state.range(4); - bool enable_filter = state.range(5); - bool mmap = state.range(6); - uint64_t key_num = max_data / per_key_size; - - // setup DB - static std::unique_ptr db; - Options options; - if (enable_statistics) { - options.statistics = CreateDBStatistics(); - } - if (mmap) { - options.allow_mmap_reads = true; - options.compression = kNoCompression; - } - options.compaction_style = compaction_style; - - BlockBasedTableOptions table_options; - if (enable_filter) { - table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); - } - if (mmap) { - table_options.no_block_cache = true; - table_options.block_restart_interval = 1; - } - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - auto rnd = Random(301 + state.thread_index()); - KeyGenerator kg(&rnd, key_num); - - if (state.thread_index() == 0) { - SetupDB(state, options, &db, "DBGet"); - - // load db - auto wo = WriteOptions(); - wo.disableWAL = true; - for (uint64_t i = 0; i < key_num; i++) { - Status s = db->Put(wo, kg.Next(), - rnd.RandomString(static_cast(per_key_size))); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - - FlushOptions fo; - Status s = db->Flush(fo); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - - auto db_full = static_cast_with_check(db.get()); - s = db_full->WaitForCompact(true); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - return; - } - } - - auto ro = ReadOptions(); - if (mmap) { - ro.verify_checksums = false; - } - size_t not_found = 0; - if (negative_query) { - for (auto _ : state) { - std::string val; - Status s = db->Get(ro, kg.NextNonExist(), &val); - if (s.IsNotFound()) { - not_found++; - } - } - } else { - for (auto _ : state) { - std::string val; - Status s = db->Get(ro, kg.Next(), &val); - if (s.IsNotFound()) { - not_found++; - } - } - } - - state.counters["neg_qu_pct"] = benchmark::Counter( - static_cast(not_found * 100), benchmark::Counter::kAvgIterations); - - if (state.thread_index() == 0) { - if (enable_statistics) { - HistogramData histogram_data; - options.statistics->histogramData(DB_GET, &histogram_data); - state.counters["get_mean"] = histogram_data.average * std::milli::den; - state.counters["get_p95"] = histogram_data.percentile95 * std::milli::den; - state.counters["get_p99"] = histogram_data.percentile99 * std::milli::den; - } - - TeardownDB(state, db, options, kg); - } -} - -static void DBGetArguments(benchmark::internal::Benchmark* b) { - for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal, - kCompactionStyleFIFO}) { - for (int64_t max_data : {128l << 20, 512l << 20}) { - for (int64_t per_key_size : {256, 1024}) { - for (bool enable_statistics : {false, true}) { - for (bool negative_query : {false, true}) { - for (bool enable_filter : {false, true}) { - for (bool mmap : {false, true}) { - b->Args({comp_style, max_data, per_key_size, enable_statistics, - negative_query, enable_filter, mmap}); - } - } - } - } - } - } - } - b->ArgNames({"comp_style", "max_data", "per_key_size", "enable_statistics", - "negative_query", "enable_filter", "mmap"}); -} - -static constexpr uint64_t kDBGetNum = 1l << 20; -BENCHMARK(DBGet)->Threads(1)->Iterations(kDBGetNum)->Apply(DBGetArguments); -BENCHMARK(DBGet)->Threads(8)->Iterations(kDBGetNum / 8)->Apply(DBGetArguments); - -static void SimpleGetWithPerfContext(benchmark::State& state) { - // setup DB - static std::unique_ptr db; - std::string db_name; - Options options; - options.create_if_missing = true; - options.arena_block_size = 8 << 20; - - auto rnd = Random(301 + state.thread_index()); - KeyGenerator kg(&rnd, 1024); - - if (state.thread_index() == 0) { - auto env = Env::Default(); - std::string db_path; - Status s = env->GetTestDirectory(&db_path); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - return; - } - db_name = db_path + "/simple_get_" + std::to_string(getpid()); - DestroyDB(db_name, options); - - { - DB* db_ptr = nullptr; - s = DB::Open(options, db_name, &db_ptr); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - return; - } - db.reset(db_ptr); - } - // load db - auto wo = WriteOptions(); - wo.disableWAL = true; - for (uint64_t i = 0; i < 1024; i++) { - s = db->Put(wo, kg.Next(), rnd.RandomString(1024)); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - auto db_full = static_cast_with_check(db.get()); - s = db_full->WaitForCompact(true); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - return; - } - FlushOptions fo; - s = db->Flush(fo); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - - auto ro = ReadOptions(); - size_t not_found = 0; - uint64_t user_key_comparison_count = 0; - uint64_t block_read_time = 0; - uint64_t block_checksum_time = 0; - uint64_t get_snapshot_time = 0; - uint64_t get_post_process_time = 0; - uint64_t get_from_output_files_time = 0; - uint64_t new_table_block_iter_nanos = 0; - uint64_t block_seek_nanos = 0; - uint64_t get_cpu_nanos = 0; - uint64_t get_from_table_nanos = 0; - SetPerfLevel(kEnableTime); - get_perf_context()->EnablePerLevelPerfContext(); - for (auto _ : state) { - std::string val; - get_perf_context()->Reset(); - Status s = db->Get(ro, kg.NextNonExist(), &val); - if (s.IsNotFound()) { - not_found++; - } - user_key_comparison_count += get_perf_context()->user_key_comparison_count; - block_read_time += get_perf_context()->block_read_time; - block_checksum_time += get_perf_context()->block_checksum_time; - get_snapshot_time += get_perf_context()->get_snapshot_time; - get_post_process_time += get_perf_context()->get_post_process_time; - get_from_output_files_time += - get_perf_context()->get_from_output_files_time; - new_table_block_iter_nanos += - get_perf_context()->new_table_block_iter_nanos; - block_seek_nanos += get_perf_context()->block_seek_nanos; - get_cpu_nanos += get_perf_context()->get_cpu_nanos; - get_from_table_nanos += - (*(get_perf_context()->level_to_perf_context))[0].get_from_table_nanos; - } - - state.counters["neg_qu_pct"] = benchmark::Counter( - static_cast(not_found * 100), benchmark::Counter::kAvgIterations); - state.counters["user_key_comparison_count"] = - benchmark::Counter(static_cast(user_key_comparison_count), - benchmark::Counter::kAvgIterations); - state.counters["block_read_time"] = benchmark::Counter( - static_cast(block_read_time), benchmark::Counter::kAvgIterations); - state.counters["block_checksum_time"] = - benchmark::Counter(static_cast(block_checksum_time), - benchmark::Counter::kAvgIterations); - state.counters["get_snapshot_time"] = - benchmark::Counter(static_cast(get_snapshot_time), - benchmark::Counter::kAvgIterations); - state.counters["get_post_process_time"] = - benchmark::Counter(static_cast(get_post_process_time), - benchmark::Counter::kAvgIterations); - state.counters["get_from_output_files_time"] = - benchmark::Counter(static_cast(get_from_output_files_time), - benchmark::Counter::kAvgIterations); - state.counters["new_table_block_iter_nanos"] = - benchmark::Counter(static_cast(new_table_block_iter_nanos), - benchmark::Counter::kAvgIterations); - state.counters["block_seek_nanos"] = - benchmark::Counter(static_cast(block_seek_nanos), - benchmark::Counter::kAvgIterations); - state.counters["get_cpu_nanos"] = benchmark::Counter( - static_cast(get_cpu_nanos), benchmark::Counter::kAvgIterations); - state.counters["get_from_table_nanos"] = - benchmark::Counter(static_cast(get_from_table_nanos), - benchmark::Counter::kAvgIterations); - - if (state.thread_index() == 0) { - TeardownDB(state, db, options, kg); - } -} - -BENCHMARK(SimpleGetWithPerfContext)->Iterations(1000000); - -static void DBGetMergeOperandsInMemtable(benchmark::State& state) { - const uint64_t kDataLen = 16 << 20; // 16MB - const uint64_t kValueLen = 64; - const uint64_t kNumEntries = kDataLen / kValueLen; - const uint64_t kNumEntriesPerKey = state.range(0); - const uint64_t kNumKeys = kNumEntries / kNumEntriesPerKey; - - // setup DB - static std::unique_ptr db; - - Options options; - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - // Make memtable large enough that automatic flush will not be triggered. - options.write_buffer_size = 2 * kDataLen; - - KeyGenerator sequential_key_gen(kNumKeys); - auto rnd = Random(301 + state.thread_index()); - - if (state.thread_index() == 0) { - SetupDB(state, options, &db, "DBGetMergeOperandsInMemtable"); - - // load db - auto write_opts = WriteOptions(); - write_opts.disableWAL = true; - for (uint64_t i = 0; i < kNumEntries; i++) { - Status s = db->Merge(write_opts, sequential_key_gen.Next(), - rnd.RandomString(static_cast(kValueLen))); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - } - - KeyGenerator random_key_gen(kNumKeys); - std::vector value_operands; - value_operands.resize(kNumEntriesPerKey); - GetMergeOperandsOptions get_merge_ops_opts; - get_merge_ops_opts.expected_max_number_of_operands = - static_cast(kNumEntriesPerKey); - for (auto _ : state) { - int num_value_operands = 0; - Status s = db->GetMergeOperands( - ReadOptions(), db->DefaultColumnFamily(), random_key_gen.Next(), - value_operands.data(), &get_merge_ops_opts, &num_value_operands); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - if (num_value_operands != static_cast(kNumEntriesPerKey)) { - state.SkipWithError("Unexpected number of merge operands found for key"); - } - for (auto& value_operand : value_operands) { - value_operand.Reset(); - } - } - - if (state.thread_index() == 0) { - TeardownDB(state, db, options, random_key_gen); - } -} - -static void DBGetMergeOperandsInSstFile(benchmark::State& state) { - const uint64_t kDataLen = 16 << 20; // 16MB - const uint64_t kValueLen = 64; - const uint64_t kNumEntries = kDataLen / kValueLen; - const uint64_t kNumEntriesPerKey = state.range(0); - const uint64_t kNumKeys = kNumEntries / kNumEntriesPerKey; - const bool kMmap = state.range(1); - - // setup DB - static std::unique_ptr db; - - BlockBasedTableOptions table_options; - if (kMmap) { - table_options.no_block_cache = true; - } else { - // Make block cache large enough that eviction will not be triggered. - table_options.block_cache = NewLRUCache(2 * kDataLen); - } - - Options options; - if (kMmap) { - options.allow_mmap_reads = true; - } - options.compression = kNoCompression; - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - // Make memtable large enough that automatic flush will not be triggered. - options.write_buffer_size = 2 * kDataLen; - - KeyGenerator sequential_key_gen(kNumKeys); - auto rnd = Random(301 + state.thread_index()); - - if (state.thread_index() == 0) { - SetupDB(state, options, &db, "DBGetMergeOperandsInBlockCache"); - - // load db - // - // Take a snapshot after each cycle of merges to ensure flush cannot - // merge any entries. - std::vector snapshots; - snapshots.resize(kNumEntriesPerKey); - auto write_opts = WriteOptions(); - write_opts.disableWAL = true; - for (uint64_t i = 0; i < kNumEntriesPerKey; i++) { - for (uint64_t j = 0; j < kNumKeys; j++) { - Status s = db->Merge(write_opts, sequential_key_gen.Next(), - rnd.RandomString(static_cast(kValueLen))); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - snapshots[i] = db->GetSnapshot(); - } - - // Flush to an L0 file; read back to prime the cache/mapped memory. - db->Flush(FlushOptions()); - for (uint64_t i = 0; i < kNumKeys; ++i) { - std::string value; - Status s = db->Get(ReadOptions(), sequential_key_gen.Next(), &value); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - - if (state.thread_index() == 0) { - for (uint64_t i = 0; i < kNumEntriesPerKey; ++i) { - db->ReleaseSnapshot(snapshots[i]); - } - } - } - - KeyGenerator random_key_gen(kNumKeys); - std::vector value_operands; - value_operands.resize(kNumEntriesPerKey); - GetMergeOperandsOptions get_merge_ops_opts; - get_merge_ops_opts.expected_max_number_of_operands = - static_cast(kNumEntriesPerKey); - for (auto _ : state) { - int num_value_operands = 0; - ReadOptions read_opts; - read_opts.verify_checksums = false; - Status s = db->GetMergeOperands( - read_opts, db->DefaultColumnFamily(), random_key_gen.Next(), - value_operands.data(), &get_merge_ops_opts, &num_value_operands); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - if (num_value_operands != static_cast(kNumEntriesPerKey)) { - state.SkipWithError("Unexpected number of merge operands found for key"); - } - for (auto& value_operand : value_operands) { - value_operand.Reset(); - } - } - - if (state.thread_index() == 0) { - TeardownDB(state, db, options, random_key_gen); - } -} - -static void DBGetMergeOperandsInMemtableArguments( - benchmark::internal::Benchmark* b) { - for (int entries_per_key : {1, 32, 1024}) { - b->Args({entries_per_key}); - } - b->ArgNames({"entries_per_key"}); -} - -static void DBGetMergeOperandsInSstFileArguments( - benchmark::internal::Benchmark* b) { - for (int entries_per_key : {1, 32, 1024}) { - for (bool mmap : {false, true}) { - b->Args({entries_per_key, mmap}); - } - } - b->ArgNames({"entries_per_key", "mmap"}); -} - -BENCHMARK(DBGetMergeOperandsInMemtable) - ->Threads(1) - ->Apply(DBGetMergeOperandsInMemtableArguments); -BENCHMARK(DBGetMergeOperandsInMemtable) - ->Threads(8) - ->Apply(DBGetMergeOperandsInMemtableArguments); -BENCHMARK(DBGetMergeOperandsInSstFile) - ->Threads(1) - ->Apply(DBGetMergeOperandsInSstFileArguments); -BENCHMARK(DBGetMergeOperandsInSstFile) - ->Threads(8) - ->Apply(DBGetMergeOperandsInSstFileArguments); - -std::string GenerateKey(int primary_key, int secondary_key, int padding_size, - Random* rnd) { - char buf[50]; - char* p = &buf[0]; - snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key); - std::string k(p); - if (padding_size) { - k += rnd->RandomString(padding_size); - } - - return k; -} - -void GenerateRandomKVs(std::vector* keys, - std::vector* values, const int from, - const int len, const int step = 1, - const int padding_size = 0, - const int keys_share_prefix = 1) { - Random rnd(302); - - // generate different prefix - for (int i = from; i < from + len; i += step) { - // generating keys that share the prefix - for (int j = 0; j < keys_share_prefix; ++j) { - keys->emplace_back(GenerateKey(i, j, padding_size, &rnd)); - // 100 bytes values - values->emplace_back(rnd.RandomString(100)); - } - } -} - -// TODO: move it to different files, as it's testing an internal API -static void DataBlockSeek(benchmark::State& state) { - Random rnd(301); - Options options = Options(); - - BlockBuilder builder(16, true, false, - BlockBasedTableOptions::kDataBlockBinarySearch); - - int num_records = 500; - std::vector keys; - std::vector values; - - GenerateRandomKVs(&keys, &values, 0, num_records); - - for (int i = 0; i < num_records; i++) { - std::string ukey(keys[i] + "1"); - InternalKey ikey(ukey, 0, kTypeValue); - builder.Add(ikey.Encode().ToString(), values[i]); - } - - Slice rawblock = builder.Finish(); - - BlockContents contents; - contents.data = rawblock; - Block reader(std::move(contents)); - - SetPerfLevel(kEnableTime); - uint64_t total = 0; - for (auto _ : state) { - DataBlockIter* iter = reader.NewDataIterator(options.comparator, - kDisableGlobalSequenceNumber); - uint32_t index = rnd.Uniform(static_cast(num_records)); - std::string ukey(keys[index] + "1"); - InternalKey ikey(ukey, 0, kTypeValue); - get_perf_context()->Reset(); - bool may_exist = iter->SeekForGet(ikey.Encode().ToString()); - if (!may_exist) { - state.SkipWithError("key not found"); - } - total += get_perf_context()->block_seek_nanos; - delete iter; - } - state.counters["seek_ns"] = benchmark::Counter( - static_cast(total), benchmark::Counter::kAvgIterations); -} - -BENCHMARK(DataBlockSeek)->Iterations(1000000); - -static void IteratorSeek(benchmark::State& state) { - auto compaction_style = static_cast(state.range(0)); - uint64_t max_data = state.range(1); - uint64_t per_key_size = state.range(2); - bool enable_statistics = state.range(3); - bool negative_query = state.range(4); - bool enable_filter = state.range(5); - uint64_t key_num = max_data / per_key_size; - - // setup DB - static std::unique_ptr db; - Options options; - if (enable_statistics) { - options.statistics = CreateDBStatistics(); - } - options.compaction_style = compaction_style; - - if (enable_filter) { - BlockBasedTableOptions table_options; - table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - } - - auto rnd = Random(301 + state.thread_index()); - KeyGenerator kg(&rnd, key_num); - - if (state.thread_index() == 0) { - SetupDB(state, options, &db, "IteratorSeek"); - - // load db - auto wo = WriteOptions(); - wo.disableWAL = true; - for (uint64_t i = 0; i < key_num; i++) { - Status s = db->Put(wo, kg.Next(), - rnd.RandomString(static_cast(per_key_size))); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - - FlushOptions fo; - Status s = db->Flush(fo); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - - auto db_full = static_cast_with_check(db.get()); - s = db_full->WaitForCompact(true); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - return; - } - } - - for (auto _ : state) { - std::unique_ptr iter{nullptr}; - state.PauseTiming(); - if (!iter) { - iter.reset(db->NewIterator(ReadOptions())); - } - Slice key = negative_query ? kg.NextNonExist() : kg.Next(); - if (!iter->status().ok()) { - state.SkipWithError(iter->status().ToString().c_str()); - return; - } - state.ResumeTiming(); - iter->Seek(key); - } - - if (state.thread_index() == 0) { - TeardownDB(state, db, options, kg); - } -} - -static void IteratorSeekArguments(benchmark::internal::Benchmark* b) { - for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal, - kCompactionStyleFIFO}) { - for (int64_t max_data : {128l << 20, 512l << 20}) { - for (int64_t per_key_size : {256, 1024}) { - for (bool enable_statistics : {false, true}) { - for (bool negative_query : {false, true}) { - for (bool enable_filter : {false, true}) { - b->Args({comp_style, max_data, per_key_size, enable_statistics, - negative_query, enable_filter}); - } - } - } - } - } - } - b->ArgNames({"comp_style", "max_data", "per_key_size", "enable_statistics", - "negative_query", "enable_filter"}); -} - -static constexpr uint64_t kDBSeekNum = 10l << 10; -BENCHMARK(IteratorSeek) - ->Threads(1) - ->Iterations(kDBSeekNum) - ->Apply(IteratorSeekArguments); -BENCHMARK(IteratorSeek) - ->Threads(8) - ->Iterations(kDBSeekNum / 8) - ->Apply(IteratorSeekArguments); - -static void IteratorNext(benchmark::State& state) { - auto compaction_style = static_cast(state.range(0)); - uint64_t max_data = state.range(1); - uint64_t per_key_size = state.range(2); - uint64_t key_num = max_data / per_key_size; - - // setup DB - static std::unique_ptr db; - Options options; - options.compaction_style = compaction_style; - - auto rnd = Random(301 + state.thread_index()); - KeyGenerator kg(&rnd, key_num); - - if (state.thread_index() == 0) { - SetupDB(state, options, &db, "IteratorNext"); - // load db - auto wo = WriteOptions(); - wo.disableWAL = true; - for (uint64_t i = 0; i < key_num; i++) { - Status s = db->Put(wo, kg.Next(), - rnd.RandomString(static_cast(per_key_size))); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - - FlushOptions fo; - Status s = db->Flush(fo); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - - auto db_full = static_cast_with_check(db.get()); - s = db_full->WaitForCompact(true); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - return; - } - } - - for (auto _ : state) { - std::unique_ptr iter{nullptr}; - state.PauseTiming(); - if (!iter) { - iter.reset(db->NewIterator(ReadOptions())); - } - while (!iter->Valid()) { - iter->Seek(kg.Next()); - if (!iter->status().ok()) { - state.SkipWithError(iter->status().ToString().c_str()); - } - } - state.ResumeTiming(); - iter->Next(); - } - - if (state.thread_index() == 0) { - TeardownDB(state, db, options, kg); - } -} - -static void IteratorNextArguments(benchmark::internal::Benchmark* b) { - for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal, - kCompactionStyleFIFO}) { - for (int64_t max_data : {128l << 20, 512l << 20}) { - for (int64_t per_key_size : {256, 1024}) { - b->Args({comp_style, max_data, per_key_size}); - } - } - } - b->ArgNames({"comp_style", "max_data", "per_key_size"}); -} -static constexpr uint64_t kIteratorNextNum = 10l << 10; -BENCHMARK(IteratorNext) - ->Iterations(kIteratorNextNum) - ->Apply(IteratorNextArguments); - -static void IteratorNextWithPerfContext(benchmark::State& state) { - // setup DB - static std::unique_ptr db; - Options options; - - auto rnd = Random(301 + state.thread_index()); - KeyGenerator kg(&rnd, 1024); - - if (state.thread_index() == 0) { - SetupDB(state, options, &db, "IteratorNextWithPerfContext"); - // load db - auto wo = WriteOptions(); - wo.disableWAL = true; - for (uint64_t i = 0; i < 1024; i++) { - Status s = db->Put(wo, kg.Next(), rnd.RandomString(1024)); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - auto db_full = static_cast_with_check(db.get()); - Status s = db_full->WaitForCompact(true); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - return; - } - FlushOptions fo; - s = db->Flush(fo); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - - uint64_t user_key_comparison_count = 0; - uint64_t internal_key_skipped_count = 0; - uint64_t find_next_user_entry_time = 0; - uint64_t iter_next_cpu_nanos = 0; - - SetPerfLevel(kEnableTime); - get_perf_context()->EnablePerLevelPerfContext(); - - for (auto _ : state) { - std::unique_ptr iter{nullptr}; - state.PauseTiming(); - if (!iter) { - iter.reset(db->NewIterator(ReadOptions())); - } - while (!iter->Valid()) { - iter->Seek(kg.Next()); - if (!iter->status().ok()) { - state.SkipWithError(iter->status().ToString().c_str()); - } - } - get_perf_context()->Reset(); - state.ResumeTiming(); - - iter->Next(); - user_key_comparison_count += get_perf_context()->user_key_comparison_count; - internal_key_skipped_count += - get_perf_context()->internal_key_skipped_count; - find_next_user_entry_time += get_perf_context()->find_next_user_entry_time; - iter_next_cpu_nanos += get_perf_context()->iter_next_cpu_nanos; - } - - state.counters["user_key_comparison_count"] = - benchmark::Counter(static_cast(user_key_comparison_count), - benchmark::Counter::kAvgIterations); - state.counters["internal_key_skipped_count"] = - benchmark::Counter(static_cast(internal_key_skipped_count), - benchmark::Counter::kAvgIterations); - state.counters["find_next_user_entry_time"] = - benchmark::Counter(static_cast(find_next_user_entry_time), - benchmark::Counter::kAvgIterations); - state.counters["iter_next_cpu_nanos"] = - benchmark::Counter(static_cast(iter_next_cpu_nanos), - benchmark::Counter::kAvgIterations); - - if (state.thread_index() == 0) { - TeardownDB(state, db, options, kg); - } -} - -BENCHMARK(IteratorNextWithPerfContext)->Iterations(100000); - -static void IteratorPrev(benchmark::State& state) { - auto compaction_style = static_cast(state.range(0)); - uint64_t max_data = state.range(1); - uint64_t per_key_size = state.range(2); - uint64_t key_num = max_data / per_key_size; - - // setup DB - static std::unique_ptr db; - std::string db_name; - Options options; - options.compaction_style = compaction_style; - - auto rnd = Random(301 + state.thread_index()); - KeyGenerator kg(&rnd, key_num); - - if (state.thread_index() == 0) { - SetupDB(state, options, &db, "IteratorPrev"); - // load db - auto wo = WriteOptions(); - wo.disableWAL = true; - for (uint64_t i = 0; i < key_num; i++) { - Status s = db->Put(wo, kg.Next(), - rnd.RandomString(static_cast(per_key_size))); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - - FlushOptions fo; - Status s = db->Flush(fo); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - - auto db_full = static_cast_with_check(db.get()); - s = db_full->WaitForCompact(true); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - return; - } - } - - for (auto _ : state) { - std::unique_ptr iter{nullptr}; - state.PauseTiming(); - if (!iter) { - iter.reset(db->NewIterator(ReadOptions())); - } - while (!iter->Valid()) { - iter->Seek(kg.Next()); - if (!iter->status().ok()) { - state.SkipWithError(iter->status().ToString().c_str()); - } - } - state.ResumeTiming(); - iter->Prev(); - } - - if (state.thread_index() == 0) { - TeardownDB(state, db, options, kg); - } -} - -static void IteratorPrevArguments(benchmark::internal::Benchmark* b) { - for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal, - kCompactionStyleFIFO}) { - for (int64_t max_data : {128l << 20, 512l << 20}) { - for (int64_t per_key_size : {256, 1024}) { - b->Args({comp_style, max_data, per_key_size}); - } - } - } - b->ArgNames({"comp_style", "max_data", "per_key_size"}); -} - -static constexpr uint64_t kIteratorPrevNum = 10l << 10; -BENCHMARK(IteratorPrev) - ->Iterations(kIteratorPrevNum) - ->Apply(IteratorPrevArguments); - -static void PrefixSeek(benchmark::State& state) { - auto compaction_style = static_cast(state.range(0)); - uint64_t max_data = state.range(1); - uint64_t per_key_size = state.range(2); - bool enable_statistics = state.range(3); - bool enable_filter = state.range(4); - uint64_t key_num = max_data / per_key_size; - - // setup DB - static std::unique_ptr db; - Options options; - if (enable_statistics) { - options.statistics = CreateDBStatistics(); - } - options.compaction_style = compaction_style; - options.prefix_extractor.reset(NewFixedPrefixTransform(4)); - - if (enable_filter) { - BlockBasedTableOptions table_options; - table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - } - - auto rnd = Random(301 + state.thread_index()); - KeyGenerator kg(&rnd, key_num, key_num / 100); - - if (state.thread_index() == 0) { - SetupDB(state, options, &db, "PrefixSeek"); - - // load db - auto wo = WriteOptions(); - wo.disableWAL = true; - for (uint64_t i = 0; i < key_num; i++) { - Status s = db->Put(wo, kg.Next(), - rnd.RandomString(static_cast(per_key_size))); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - - FlushOptions fo; - Status s = db->Flush(fo); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - - auto db_full = static_cast_with_check(db.get()); - s = db_full->WaitForCompact(true); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - return; - } - } - - for (auto _ : state) { - std::unique_ptr iter{nullptr}; - state.PauseTiming(); - if (!iter) { - iter.reset(db->NewIterator(ReadOptions())); - } - state.ResumeTiming(); - iter->Seek(kg.NextPrefix()); - if (!iter->status().ok()) { - state.SkipWithError(iter->status().ToString().c_str()); - return; - } - } - - if (state.thread_index() == 0) { - TeardownDB(state, db, options, kg); - } -} - -static void PrefixSeekArguments(benchmark::internal::Benchmark* b) { - for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal, - kCompactionStyleFIFO}) { - for (int64_t max_data : {128l << 20, 512l << 20}) { - for (int64_t per_key_size : {256, 1024}) { - for (bool enable_statistics : {false, true}) { - for (bool enable_filter : {false, true}) { - b->Args({comp_style, max_data, per_key_size, enable_statistics, - enable_filter}); - } - } - } - } - } - b->ArgNames({"comp_style", "max_data", "per_key_size", "enable_statistics", - "enable_filter"}); -} - -static constexpr uint64_t kPrefixSeekNum = 10l << 10; -BENCHMARK(PrefixSeek)->Iterations(kPrefixSeekNum)->Apply(PrefixSeekArguments); -BENCHMARK(PrefixSeek) - ->Threads(8) - ->Iterations(kPrefixSeekNum / 8) - ->Apply(PrefixSeekArguments); - -// TODO: move it to different files, as it's testing an internal API -static void RandomAccessFileReaderRead(benchmark::State& state) { - bool enable_statistics = state.range(0); - constexpr int kFileNum = 10; - auto env = Env::Default(); - auto fs = env->GetFileSystem(); - std::string db_path; - Status s = env->GetTestDirectory(&db_path); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - return; - } - - // Setup multiple `RandomAccessFileReader`s with different parameters to be - // used for test - Random rand(301); - std::string fname_base = - db_path + kFilePathSeparator + "random-access-file-reader-read"; - std::vector> readers; - auto statistics_share = CreateDBStatistics(); - Statistics* statistics = enable_statistics ? statistics_share.get() : nullptr; - for (int i = 0; i < kFileNum; i++) { - std::string fname = fname_base + std::to_string(i); - std::string content = rand.RandomString(kDefaultPageSize); - std::unique_ptr tgt_file; - env->NewWritableFile(fname, &tgt_file, EnvOptions()); - tgt_file->Append(content); - tgt_file->Close(); - - std::unique_ptr f; - fs->NewRandomAccessFile(fname, FileOptions(), &f, nullptr); - int rand_num = rand.Next() % 3; - auto temperature = rand_num == 0 ? Temperature::kUnknown - : rand_num == 1 ? Temperature::kWarm - : Temperature::kCold; - readers.emplace_back(new RandomAccessFileReader( - std::move(f), fname, env->GetSystemClock().get(), nullptr, statistics, - 0, nullptr, nullptr, {}, temperature, rand_num == 1)); - } - - IOOptions io_options; - std::unique_ptr scratch(new char[2048]); - Slice result; - uint64_t idx = 0; - for (auto _ : state) { - s = readers[idx++ % kFileNum]->Read(io_options, 0, kDefaultPageSize / 3, - &result, scratch.get(), nullptr, - Env::IO_TOTAL); - if (!s.ok()) { - state.SkipWithError(s.ToString().c_str()); - } - } - - // clean up - for (int i = 0; i < kFileNum; i++) { - std::string fname = fname_base + std::to_string(i); - env->DeleteFile(fname); // ignore return, okay to fail cleanup - } -} - -BENCHMARK(RandomAccessFileReaderRead) - ->Iterations(1000000) - ->Arg(0) - ->Arg(1) - ->ArgName("enable_statistics"); - -} // namespace ROCKSDB_NAMESPACE - -BENCHMARK_MAIN(); diff --git a/microbench/ribbon_bench.cc b/microbench/ribbon_bench.cc deleted file mode 100644 index d0fb2ec9a..000000000 --- a/microbench/ribbon_bench.cc +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -// this is a simple micro-benchmark for compare ribbon filter vs. other filter -// for more comprehensive, please check the dedicate util/filter_bench. -#include "benchmark/benchmark.h" -#include "table/block_based/filter_policy_internal.h" -#include "table/block_based/mock_block_based_table.h" - -namespace ROCKSDB_NAMESPACE { - -struct KeyMaker { - explicit KeyMaker(size_t avg_size) - : smallest_size_(avg_size), - buf_size_(avg_size + 11), // pad to vary key size and alignment - buf_(new char[buf_size_]) { - memset(buf_.get(), 0, buf_size_); - assert(smallest_size_ > 8); - } - size_t smallest_size_; - size_t buf_size_; - std::unique_ptr buf_; - - // Returns a unique(-ish) key based on the given parameter values. Each - // call returns a Slice from the same buffer so previously returned - // Slices should be considered invalidated. - Slice Get(uint32_t filter_num, uint32_t val_num) const { - size_t start = val_num % 4; - size_t len = smallest_size_; - // To get range [avg_size - 2, avg_size + 2] - // use range [smallest_size, smallest_size + 4] - len += FastRange32((val_num >> 5) * 1234567891, 5); - char *data = buf_.get() + start; - // Populate key data such that all data makes it into a key of at - // least 8 bytes. We also don't want all the within-filter key - // variance confined to a contiguous 32 bits, because then a 32 bit - // hash function can "cheat" the false positive rate by - // approximating a perfect hash. - EncodeFixed32(data, val_num); - EncodeFixed32(data + 4, filter_num + val_num); - // ensure clearing leftovers from different alignment - EncodeFixed32(data + 8, 0); - return {data, len}; - } -}; - -// benchmark arguments: -// 0. filter impl (like filter_bench -impl) -// 1. filter config bits_per_key -// 2. average data key length -// 3. data entry number -static void CustomArguments(benchmark::internal::Benchmark *b) { - const auto kImplCount = - static_cast(BloomLikeFilterPolicy::GetAllFixedImpls().size()); - for (int filter_impl = 0; filter_impl < kImplCount; ++filter_impl) { - for (int bits_per_key : {10, 20}) { - for (int key_len_avg : {10, 100}) { - for (int64_t entry_num : {1 << 10, 1 << 20}) { - b->Args({filter_impl, bits_per_key, key_len_avg, entry_num}); - } - } - } - } - b->ArgNames({"filter_impl", "bits_per_key", "key_len_avg", "entry_num"}); -} - -static void FilterBuild(benchmark::State &state) { - // setup data - auto filter = BloomLikeFilterPolicy::Create( - BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)), - static_cast(state.range(1))); - auto tester = std::make_unique(filter); - KeyMaker km(state.range(2)); - std::unique_ptr owner; - const int64_t kEntryNum = state.range(3); - auto rnd = Random32(12345); - uint32_t filter_num = rnd.Next(); - // run the test - for (auto _ : state) { - std::unique_ptr builder(tester->GetBuilder()); - for (uint32_t i = 0; i < kEntryNum; i++) { - builder->AddKey(km.Get(filter_num, i)); - } - auto ret = builder->Finish(&owner); - state.counters["size"] = static_cast(ret.size()); - } -} -BENCHMARK(FilterBuild)->Apply(CustomArguments); - -static void FilterQueryPositive(benchmark::State &state) { - // setup data - auto filter = BloomLikeFilterPolicy::Create( - BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)), - static_cast(state.range(1))); - auto tester = std::make_unique(filter); - KeyMaker km(state.range(2)); - std::unique_ptr owner; - const int64_t kEntryNum = state.range(3); - auto rnd = Random32(12345); - uint32_t filter_num = rnd.Next(); - std::unique_ptr builder(tester->GetBuilder()); - for (uint32_t i = 0; i < kEntryNum; i++) { - builder->AddKey(km.Get(filter_num, i)); - } - auto data = builder->Finish(&owner); - std::unique_ptr reader{filter->GetFilterBitsReader(data)}; - - // run test - uint32_t i = 0; - for (auto _ : state) { - i++; - i = i % kEntryNum; - reader->MayMatch(km.Get(filter_num, i)); - } -} -BENCHMARK(FilterQueryPositive)->Apply(CustomArguments); - -static void FilterQueryNegative(benchmark::State &state) { - // setup data - auto filter = BloomLikeFilterPolicy::Create( - BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)), - static_cast(state.range(1))); - auto tester = std::make_unique(filter); - KeyMaker km(state.range(2)); - std::unique_ptr owner; - const int64_t kEntryNum = state.range(3); - auto rnd = Random32(12345); - uint32_t filter_num = rnd.Next(); - std::unique_ptr builder(tester->GetBuilder()); - for (uint32_t i = 0; i < kEntryNum; i++) { - builder->AddKey(km.Get(filter_num, i)); - } - auto data = builder->Finish(&owner); - std::unique_ptr reader{filter->GetFilterBitsReader(data)}; - - // run test - uint32_t i = 0; - double fp_cnt = 0; - for (auto _ : state) { - i++; - auto result = reader->MayMatch(km.Get(filter_num + 1, i)); - if (result) { - fp_cnt++; - } - } - state.counters["fp_pct"] = - benchmark::Counter(fp_cnt * 100, benchmark::Counter::kAvgIterations); -} -BENCHMARK(FilterQueryNegative)->Apply(CustomArguments); - -} // namespace ROCKSDB_NAMESPACE - -BENCHMARK_MAIN(); diff --git a/monitoring/histogram_test.cc b/monitoring/histogram_test.cc deleted file mode 100644 index 19e9f15d0..000000000 --- a/monitoring/histogram_test.cc +++ /dev/null @@ -1,254 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -#include "monitoring/histogram.h" - -#include - -#include "monitoring/histogram_windowing.h" -#include "rocksdb/system_clock.h" -#include "test_util/mock_time_env.h" -#include "test_util/testharness.h" -#include "util/random.h" - -namespace ROCKSDB_NAMESPACE { - -class HistogramTest : public testing::Test {}; - -namespace { -const double kIota = 0.1; -const HistogramBucketMapper bucketMapper; -std::shared_ptr clock = - std::make_shared(SystemClock::Default()); -} // namespace - -void PopulateHistogram(Histogram& histogram, uint64_t low, uint64_t high, - uint64_t loop = 1) { - Random rnd(test::RandomSeed()); - for (; loop > 0; loop--) { - for (uint64_t i = low; i <= high; i++) { - histogram.Add(i); - // sleep a random microseconds [0-10) - clock->SleepForMicroseconds(rnd.Uniform(10)); - } - } - // make sure each data population at least take some time - clock->SleepForMicroseconds(1); -} - -void BasicOperation(Histogram& histogram) { - PopulateHistogram(histogram, 1, 110, 10); // fill up to bucket [70, 110) - - HistogramData data; - histogram.Data(&data); - - ASSERT_LE(fabs(histogram.Percentile(100.0) - 110.0), kIota); - ASSERT_LE(fabs(data.percentile99 - 108.9), kIota); // 99 * 110 / 100 - ASSERT_LE(fabs(data.percentile95 - 104.5), kIota); // 95 * 110 / 100 - ASSERT_LE(fabs(data.median - 55.0), kIota); // 50 * 110 / 100 - ASSERT_EQ(data.average, 55.5); // (1 + 110) / 2 -} - -void MergeHistogram(Histogram& histogram, Histogram& other) { - PopulateHistogram(histogram, 1, 100); - PopulateHistogram(other, 101, 250); - histogram.Merge(other); - - HistogramData data; - histogram.Data(&data); - - ASSERT_LE(fabs(histogram.Percentile(100.0) - 250.0), kIota); - ASSERT_LE(fabs(data.percentile99 - 247.5), kIota); // 99 * 250 / 100 - ASSERT_LE(fabs(data.percentile95 - 237.5), kIota); // 95 * 250 / 100 - ASSERT_LE(fabs(data.median - 125.0), kIota); // 50 * 250 / 100 - ASSERT_EQ(data.average, 125.5); // (1 + 250) / 2 -} - -void EmptyHistogram(Histogram& histogram) { - ASSERT_EQ(histogram.min(), bucketMapper.LastValue()); - ASSERT_EQ(histogram.max(), 0); - ASSERT_EQ(histogram.num(), 0); - ASSERT_EQ(histogram.Median(), 0.0); - ASSERT_EQ(histogram.Percentile(85.0), 0.0); - ASSERT_EQ(histogram.Average(), 0.0); - ASSERT_EQ(histogram.StandardDeviation(), 0.0); -} - -void ClearHistogram(Histogram& histogram) { - for (uint64_t i = 1; i <= 100; i++) { - histogram.Add(i); - } - histogram.Clear(); - ASSERT_TRUE(histogram.Empty()); - ASSERT_EQ(histogram.Median(), 0); - ASSERT_EQ(histogram.Percentile(85.0), 0); - ASSERT_EQ(histogram.Average(), 0); -} - -TEST_F(HistogramTest, BasicOperation) { - HistogramImpl histogram; - BasicOperation(histogram); - - HistogramWindowingImpl histogramWindowing; - BasicOperation(histogramWindowing); -} - -TEST_F(HistogramTest, BoundaryValue) { - HistogramImpl histogram; - // - both should be in [0, 1] bucket because we place values on bucket - // boundaries in the lower bucket. - // - all points are in [0, 1] bucket, so p50 will be 0.5 - // - the test cannot be written with a single point since histogram won't - // report percentiles lower than the min or greater than the max. - histogram.Add(0); - histogram.Add(1); - - ASSERT_LE(fabs(histogram.Percentile(50.0) - 0.5), kIota); -} - -TEST_F(HistogramTest, MergeHistogram) { - HistogramImpl histogram; - HistogramImpl other; - MergeHistogram(histogram, other); - - HistogramWindowingImpl histogramWindowing; - HistogramWindowingImpl otherWindowing; - MergeHistogram(histogramWindowing, otherWindowing); -} - -TEST_F(HistogramTest, EmptyHistogram) { - HistogramImpl histogram; - EmptyHistogram(histogram); - - HistogramWindowingImpl histogramWindowing; - EmptyHistogram(histogramWindowing); -} - -TEST_F(HistogramTest, ClearHistogram) { - HistogramImpl histogram; - ClearHistogram(histogram); - - HistogramWindowingImpl histogramWindowing; - ClearHistogram(histogramWindowing); -} - -TEST_F(HistogramTest, HistogramWindowingExpire) { - uint64_t num_windows = 3; - int micros_per_window = 1000000; - uint64_t min_num_per_window = 0; - - HistogramWindowingImpl histogramWindowing(num_windows, micros_per_window, - min_num_per_window); - histogramWindowing.TEST_UpdateClock(clock); - PopulateHistogram(histogramWindowing, 1, 1, 100); - clock->SleepForMicroseconds(micros_per_window); - ASSERT_EQ(histogramWindowing.num(), 100); - ASSERT_EQ(histogramWindowing.min(), 1); - ASSERT_EQ(histogramWindowing.max(), 1); - ASSERT_EQ(histogramWindowing.Average(), 1.0); - ASSERT_EQ(histogramWindowing.StandardDeviation(), 0.0); - - PopulateHistogram(histogramWindowing, 2, 2, 100); - clock->SleepForMicroseconds(micros_per_window); - ASSERT_EQ(histogramWindowing.num(), 200); - ASSERT_EQ(histogramWindowing.min(), 1); - ASSERT_EQ(histogramWindowing.max(), 2); - ASSERT_EQ(histogramWindowing.Average(), 1.5); - ASSERT_GT(histogramWindowing.StandardDeviation(), 0.0); - - PopulateHistogram(histogramWindowing, 3, 3, 100); - clock->SleepForMicroseconds(micros_per_window); - ASSERT_EQ(histogramWindowing.num(), 300); - ASSERT_EQ(histogramWindowing.min(), 1); - ASSERT_EQ(histogramWindowing.max(), 3); - ASSERT_EQ(histogramWindowing.Average(), 2.0); - ASSERT_GT(histogramWindowing.StandardDeviation(), 0.0); - - // dropping oldest window with value 1, remaining 2 ~ 4 - PopulateHistogram(histogramWindowing, 4, 4, 100); - clock->SleepForMicroseconds(micros_per_window); - ASSERT_EQ(histogramWindowing.num(), 300); - ASSERT_EQ(histogramWindowing.min(), 2); - ASSERT_EQ(histogramWindowing.max(), 4); - ASSERT_EQ(histogramWindowing.Average(), 3.0); - ASSERT_GT(histogramWindowing.StandardDeviation(), 0.0); - - // dropping oldest window with value 2, remaining 3 ~ 5 - PopulateHistogram(histogramWindowing, 5, 5, 100); - clock->SleepForMicroseconds(micros_per_window); - ASSERT_EQ(histogramWindowing.num(), 300); - ASSERT_EQ(histogramWindowing.min(), 3); - ASSERT_EQ(histogramWindowing.max(), 5); - ASSERT_EQ(histogramWindowing.Average(), 4.0); - ASSERT_GT(histogramWindowing.StandardDeviation(), 0.0); -} - -TEST_F(HistogramTest, HistogramWindowingMerge) { - uint64_t num_windows = 3; - int micros_per_window = 1000000; - uint64_t min_num_per_window = 0; - - HistogramWindowingImpl histogramWindowing(num_windows, micros_per_window, - min_num_per_window); - HistogramWindowingImpl otherWindowing(num_windows, micros_per_window, - min_num_per_window); - histogramWindowing.TEST_UpdateClock(clock); - otherWindowing.TEST_UpdateClock(clock); - - PopulateHistogram(histogramWindowing, 1, 1, 100); - PopulateHistogram(otherWindowing, 1, 1, 100); - clock->SleepForMicroseconds(micros_per_window); - - PopulateHistogram(histogramWindowing, 2, 2, 100); - PopulateHistogram(otherWindowing, 2, 2, 100); - clock->SleepForMicroseconds(micros_per_window); - - PopulateHistogram(histogramWindowing, 3, 3, 100); - PopulateHistogram(otherWindowing, 3, 3, 100); - clock->SleepForMicroseconds(micros_per_window); - - histogramWindowing.Merge(otherWindowing); - ASSERT_EQ(histogramWindowing.num(), 600); - ASSERT_EQ(histogramWindowing.min(), 1); - ASSERT_EQ(histogramWindowing.max(), 3); - ASSERT_EQ(histogramWindowing.Average(), 2.0); - - // dropping oldest window with value 1, remaining 2 ~ 4 - PopulateHistogram(histogramWindowing, 4, 4, 100); - clock->SleepForMicroseconds(micros_per_window); - ASSERT_EQ(histogramWindowing.num(), 500); - ASSERT_EQ(histogramWindowing.min(), 2); - ASSERT_EQ(histogramWindowing.max(), 4); - - // dropping oldest window with value 2, remaining 3 ~ 5 - PopulateHistogram(histogramWindowing, 5, 5, 100); - clock->SleepForMicroseconds(micros_per_window); - ASSERT_EQ(histogramWindowing.num(), 400); - ASSERT_EQ(histogramWindowing.min(), 3); - ASSERT_EQ(histogramWindowing.max(), 5); -} - -TEST_F(HistogramTest, LargeStandardDeviation) { - HistogramImpl histogram; - PopulateHistogram(histogram, 1, 1000000); - ASSERT_LT(fabs(histogram.StandardDeviation() - 288675), 1); -} - -TEST_F(HistogramTest, LostUpdateStandardDeviation) { - HistogramImpl histogram; - PopulateHistogram(histogram, 100, 100, 100); - // Simulate a possible lost update (since they are not atomic) - histogram.TEST_GetStats().sum_squares_ -= 10000; - // Ideally zero, but should never be negative or NaN - ASSERT_GE(histogram.StandardDeviation(), 0.0); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/monitoring/iostats_context_test.cc b/monitoring/iostats_context_test.cc deleted file mode 100644 index 5fce33406..000000000 --- a/monitoring/iostats_context_test.cc +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "rocksdb/iostats_context.h" - -#include "test_util/testharness.h" - -namespace ROCKSDB_NAMESPACE { - -TEST(IOStatsContextTest, ToString) { - get_iostats_context()->Reset(); - get_iostats_context()->bytes_read = 12345; - - std::string zero_included = get_iostats_context()->ToString(); - ASSERT_NE(std::string::npos, zero_included.find("= 0")); - ASSERT_NE(std::string::npos, zero_included.find("= 12345")); - - std::string zero_excluded = get_iostats_context()->ToString(true); - ASSERT_EQ(std::string::npos, zero_excluded.find("= 0")); - ASSERT_NE(std::string::npos, zero_excluded.find("= 12345")); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/monitoring/statistics_test.cc b/monitoring/statistics_test.cc deleted file mode 100644 index 98aae0c82..000000000 --- a/monitoring/statistics_test.cc +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// - -#include "rocksdb/statistics.h" - -#include "port/stack_trace.h" -#include "rocksdb/convenience.h" -#include "rocksdb/utilities/options_type.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" - -namespace ROCKSDB_NAMESPACE { - -class StatisticsTest : public testing::Test {}; - -// Sanity check to make sure that contents and order of TickersNameMap -// match Tickers enum -TEST_F(StatisticsTest, SanityTickers) { - EXPECT_EQ(static_cast(Tickers::TICKER_ENUM_MAX), - TickersNameMap.size()); - - for (uint32_t t = 0; t < Tickers::TICKER_ENUM_MAX; t++) { - auto pair = TickersNameMap[static_cast(t)]; - ASSERT_EQ(pair.first, t) << "Miss match at " << pair.second; - } -} - -// Sanity check to make sure that contents and order of HistogramsNameMap -// match Tickers enum -TEST_F(StatisticsTest, SanityHistograms) { - EXPECT_EQ(static_cast(Histograms::HISTOGRAM_ENUM_MAX), - HistogramsNameMap.size()); - - for (uint32_t h = 0; h < Histograms::HISTOGRAM_ENUM_MAX; h++) { - auto pair = HistogramsNameMap[static_cast(h)]; - ASSERT_EQ(pair.first, h) << "Miss match at " << pair.second; - } -} - -TEST_F(StatisticsTest, NoNameStats) { - static std::unordered_map no_name_opt_info = { - {"inner", - OptionTypeInfo::AsCustomSharedPtr( - 0, OptionVerificationType::kByName, - OptionTypeFlags::kAllowNull | OptionTypeFlags::kCompareNever)}, - }; - - class DefaultNameStatistics : public Statistics { - public: - DefaultNameStatistics(const std::shared_ptr& stats = nullptr) - : inner(stats) { - RegisterOptions("", &inner, &no_name_opt_info); - } - - uint64_t getTickerCount(uint32_t /*tickerType*/) const override { - return 0; - } - void histogramData(uint32_t /*type*/, - HistogramData* const /*data*/) const override {} - void recordTick(uint32_t /*tickerType*/, uint64_t /*count*/) override {} - void setTickerCount(uint32_t /*tickerType*/, uint64_t /*count*/) override {} - uint64_t getAndResetTickerCount(uint32_t /*tickerType*/) override { - return 0; - } - std::shared_ptr inner; - }; - ConfigOptions options; - options.ignore_unsupported_options = false; - auto stats = std::make_shared(); - ASSERT_STREQ(stats->Name(), ""); - ASSERT_EQ("", stats->ToString( - options)); // A stats with no name with have no options... - ASSERT_OK(stats->ConfigureFromString(options, "inner=")); - ASSERT_EQ("", stats->ToString( - options)); // A stats with no name with have no options... - ASSERT_NE(stats->inner, nullptr); - ASSERT_NE("", stats->inner->ToString(options)); // ... even if it does... -} -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/monitoring/stats_history_test.cc b/monitoring/stats_history_test.cc deleted file mode 100644 index cfed7bad7..000000000 --- a/monitoring/stats_history_test.cc +++ /dev/null @@ -1,662 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -#include "rocksdb/stats_history.h" - -#include -#include -#include - -#include "db/column_family.h" -#include "db/db_impl/db_impl.h" -#include "db/db_test_util.h" -#include "db/periodic_task_scheduler.h" -#include "monitoring/persistent_stats_history.h" -#include "options/options_helper.h" -#include "port/stack_trace.h" -#include "rocksdb/cache.h" -#include "rocksdb/convenience.h" -#include "rocksdb/rate_limiter.h" -#include "test_util/mock_time_env.h" -#include "test_util/sync_point.h" -#include "test_util/testutil.h" -#include "util/random.h" - -namespace ROCKSDB_NAMESPACE { - -class StatsHistoryTest : public DBTestBase { - public: - StatsHistoryTest() : DBTestBase("stats_history_test", /*env_do_fsync=*/true) { - mock_clock_ = std::make_shared(env_->GetSystemClock()); - mock_env_.reset(new CompositeEnvWrapper(env_, mock_clock_)); - } - - protected: - std::shared_ptr mock_clock_; - std::unique_ptr mock_env_; - - void SetUp() override { - mock_clock_->InstallTimedWaitFixCallback(); - SyncPoint::GetInstance()->SetCallBack( - "DBImpl::StartPeriodicTaskScheduler:Init", [&](void* arg) { - auto periodic_task_scheduler_ptr = - reinterpret_cast(arg); - periodic_task_scheduler_ptr->TEST_OverrideTimer(mock_clock_.get()); - }); - } -}; - -TEST_F(StatsHistoryTest, RunStatsDumpPeriodSec) { - constexpr int kPeriodSec = 5; - Options options; - options.create_if_missing = true; - options.stats_dump_period_sec = kPeriodSec; - options.env = mock_env_.get(); - int counter = 0; - SyncPoint::GetInstance()->SetCallBack("DBImpl::DumpStats:1", - [&](void* /*arg*/) { counter++; }); - Reopen(options); - ASSERT_EQ(5u, dbfull()->GetDBOptions().stats_dump_period_sec); - - // Wait for the first stats persist to finish, as the initial delay could be - // different. - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); }); - - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - ASSERT_GE(counter, 1); - - // Test cancel job through SetOptions - ASSERT_OK(dbfull()->SetDBOptions({{"stats_dump_period_sec", "0"}})); - int old_val = counter; - for (int i = 1; i < 20; ++i) { - mock_clock_->MockSleepForSeconds(kPeriodSec); - } - ASSERT_EQ(counter, old_val); - Close(); -} - -// Test persistent stats background thread scheduling and cancelling -TEST_F(StatsHistoryTest, StatsPersistScheduling) { - constexpr int kPeriodSec = 5; - Options options; - options.create_if_missing = true; - options.stats_persist_period_sec = kPeriodSec; - options.env = mock_env_.get(); - int counter = 0; - SyncPoint::GetInstance()->SetCallBack("DBImpl::PersistStats:Entry", - [&](void* /*arg*/) { counter++; }); - Reopen(options); - ASSERT_EQ(5u, dbfull()->GetDBOptions().stats_persist_period_sec); - - // Wait for the first stats persist to finish, as the initial delay could be - // different. - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); }); - - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - ASSERT_GE(counter, 1); - - // Test cancel job through SetOptions - ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "0"}})); - int old_val = counter; - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec * 2); }); - ASSERT_EQ(counter, old_val); - - Close(); -} - -// Test enabling persistent stats for the first time -TEST_F(StatsHistoryTest, PersistentStatsFreshInstall) { - constexpr unsigned int kPeriodSec = 5; - Options options; - options.create_if_missing = true; - options.stats_persist_period_sec = 0; - options.env = mock_env_.get(); - int counter = 0; - SyncPoint::GetInstance()->SetCallBack("DBImpl::PersistStats:Entry", - [&](void* /*arg*/) { counter++; }); - Reopen(options); - ASSERT_OK(dbfull()->SetDBOptions( - {{"stats_persist_period_sec", std::to_string(kPeriodSec)}})); - ASSERT_EQ(kPeriodSec, dbfull()->GetDBOptions().stats_persist_period_sec); - - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - ASSERT_GE(counter, 1); - Close(); -} - -// TODO(Zhongyi): Move persistent stats related tests to a separate file -TEST_F(StatsHistoryTest, GetStatsHistoryInMemory) { - constexpr int kPeriodSec = 5; - Options options; - options.create_if_missing = true; - options.stats_persist_period_sec = kPeriodSec; - options.statistics = CreateDBStatistics(); - options.env = mock_env_.get(); - CreateColumnFamilies({"pikachu"}, options); - ASSERT_OK(Put("foo", "bar")); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - // make sure the first stats persist to finish - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); }); - - // Wait for stats persist to finish - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - - std::unique_ptr stats_iter; - ASSERT_OK( - db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter)); - ASSERT_TRUE(stats_iter != nullptr); - // disabled stats snapshots - ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "0"}})); - size_t stats_count = 0; - for (; stats_iter->Valid(); stats_iter->Next()) { - auto stats_map = stats_iter->GetStatsMap(); - ASSERT_EQ(stats_iter->GetStatsTime(), mock_clock_->NowSeconds()); - stats_count += stats_map.size(); - } - ASSERT_GT(stats_count, 0); - // Wait a bit and verify no more stats are found - for (int i = 0; i < 10; ++i) { - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(1); }); - } - ASSERT_OK(db_->GetStatsHistory(0, mock_clock_->NowSeconds(), &stats_iter)); - ASSERT_TRUE(stats_iter != nullptr); - size_t stats_count_new = 0; - for (; stats_iter->Valid(); stats_iter->Next()) { - stats_count_new += stats_iter->GetStatsMap().size(); - } - ASSERT_EQ(stats_count_new, stats_count); - Close(); -} - -TEST_F(StatsHistoryTest, InMemoryStatsHistoryPurging) { - constexpr int kPeriodSec = 1; - Options options; - options.create_if_missing = true; - options.statistics = CreateDBStatistics(); - options.stats_persist_period_sec = kPeriodSec; - options.env = mock_env_.get(); - - CreateColumnFamilies({"pikachu"}, options); - ASSERT_OK(Put("foo", "bar")); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - // some random operation to populate statistics - ASSERT_OK(Delete("foo")); - ASSERT_OK(Put("sol", "sol")); - ASSERT_OK(Put("epic", "epic")); - ASSERT_OK(Put("ltd", "ltd")); - ASSERT_EQ("sol", Get("sol")); - ASSERT_EQ("epic", Get("epic")); - ASSERT_EQ("ltd", Get("ltd")); - Iterator* iterator = db_->NewIterator(ReadOptions()); - for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { - ASSERT_TRUE(iterator->key() == iterator->value()); - } - delete iterator; - ASSERT_OK(Flush()); - ASSERT_OK(Delete("sol")); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - // second round of ops - ASSERT_OK(Put("saigon", "saigon")); - ASSERT_OK(Put("noodle talk", "noodle talk")); - ASSERT_OK(Put("ping bistro", "ping bistro")); - iterator = db_->NewIterator(ReadOptions()); - for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { - ASSERT_TRUE(iterator->key() == iterator->value()); - } - delete iterator; - ASSERT_OK(Flush()); - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - - const int kIterations = 10; - for (int i = 0; i < kIterations; ++i) { - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - } - - std::unique_ptr stats_iter; - ASSERT_OK( - db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter)); - ASSERT_TRUE(stats_iter != nullptr); - size_t stats_count = 0; - int slice_count = 0; - for (; stats_iter->Valid(); stats_iter->Next()) { - slice_count++; - auto stats_map = stats_iter->GetStatsMap(); - stats_count += stats_map.size(); - } - size_t stats_history_size = dbfull()->TEST_EstimateInMemoryStatsHistorySize(); - ASSERT_GE(slice_count, kIterations - 1); - ASSERT_GE(stats_history_size, 15000); - // capping memory cost at 15000 bytes since one slice is around 10000~15000 - ASSERT_OK(dbfull()->SetDBOptions({{"stats_history_buffer_size", "15000"}})); - ASSERT_EQ(15000, dbfull()->GetDBOptions().stats_history_buffer_size); - - // Wait for stats persist to finish - for (int i = 0; i < kIterations; ++i) { - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - } - - ASSERT_OK( - db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter)); - ASSERT_TRUE(stats_iter != nullptr); - size_t stats_count_reopen = 0; - slice_count = 0; - for (; stats_iter->Valid(); stats_iter->Next()) { - slice_count++; - auto stats_map = stats_iter->GetStatsMap(); - stats_count_reopen += stats_map.size(); - } - size_t stats_history_size_reopen = - dbfull()->TEST_EstimateInMemoryStatsHistorySize(); - // only one slice can fit under the new stats_history_buffer_size - ASSERT_LT(slice_count, 2); - ASSERT_TRUE(stats_history_size_reopen < 15000 && - stats_history_size_reopen > 0); - ASSERT_TRUE(stats_count_reopen < stats_count && stats_count_reopen > 0); - Close(); - // TODO: may also want to verify stats timestamp to make sure we are purging - // the correct stats snapshot -} - -int countkeys(Iterator* iter) { - int count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - count++; - } - return count; -} - -TEST_F(StatsHistoryTest, GetStatsHistoryFromDisk) { - constexpr int kPeriodSec = 5; - Options options; - options.create_if_missing = true; - options.stats_persist_period_sec = kPeriodSec; - options.statistics = CreateDBStatistics(); - options.persist_stats_to_disk = true; - options.env = mock_env_.get(); - CreateColumnFamilies({"pikachu"}, options); - ASSERT_OK(Put("foo", "bar")); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ(Get("foo"), "bar"); - - // Wait for the first stats persist to finish, as the initial delay could be - // different. - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); }); - - // Wait for stats persist to finish - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - - auto iter = - db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); - int key_count1 = countkeys(iter); - delete iter; - - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - iter = - db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); - int key_count2 = countkeys(iter); - delete iter; - - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - iter = - db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); - int key_count3 = countkeys(iter); - delete iter; - ASSERT_GE(key_count2, key_count1); - ASSERT_GE(key_count3, key_count2); - ASSERT_EQ(key_count3 - key_count2, key_count2 - key_count1); - std::unique_ptr stats_iter; - ASSERT_OK( - db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter)); - ASSERT_TRUE(stats_iter != nullptr); - size_t stats_count = 0; - int slice_count = 0; - int non_zero_count = 0; - for (int i = 2; stats_iter->Valid(); stats_iter->Next(), i++) { - slice_count++; - auto stats_map = stats_iter->GetStatsMap(); - ASSERT_EQ(stats_iter->GetStatsTime(), kPeriodSec * i - 1); - for (auto& stat : stats_map) { - if (stat.second != 0) { - non_zero_count++; - } - } - stats_count += stats_map.size(); - } - ASSERT_EQ(slice_count, 3); - // 2 extra keys for format version - ASSERT_EQ(stats_count, key_count3 - 2); - // verify reopen will not cause data loss - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_OK( - db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter)); - ASSERT_TRUE(stats_iter != nullptr); - size_t stats_count_reopen = 0; - int slice_count_reopen = 0; - int non_zero_count_recover = 0; - for (; stats_iter->Valid(); stats_iter->Next()) { - slice_count_reopen++; - auto stats_map = stats_iter->GetStatsMap(); - for (auto& stat : stats_map) { - if (stat.second != 0) { - non_zero_count_recover++; - } - } - stats_count_reopen += stats_map.size(); - } - - ASSERT_EQ(non_zero_count, non_zero_count_recover); - ASSERT_EQ(slice_count, slice_count_reopen); - ASSERT_EQ(stats_count, stats_count_reopen); - Close(); -} - -// Test persisted stats matches the value found in options.statistics and -// the stats value retains after DB reopen -TEST_F(StatsHistoryTest, PersitentStatsVerifyValue) { - constexpr int kPeriodSec = 5; - Options options; - options.create_if_missing = true; - options.stats_persist_period_sec = kPeriodSec; - options.statistics = CreateDBStatistics(); - options.persist_stats_to_disk = true; - std::map stats_map_before; - ASSERT_TRUE(options.statistics->getTickerMap(&stats_map_before)); - options.env = mock_env_.get(); - CreateColumnFamilies({"pikachu"}, options); - ASSERT_OK(Put("foo", "bar")); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_EQ(Get("foo"), "bar"); - - // Wait for the first stats persist to finish, as the initial delay could be - // different. - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); }); - - // Wait for stats persist to finish - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - auto iter = - db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); - countkeys(iter); - delete iter; - - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - iter = - db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); - countkeys(iter); - delete iter; - - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - iter = - db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); - countkeys(iter); - delete iter; - - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - - std::map stats_map_after; - ASSERT_TRUE(options.statistics->getTickerMap(&stats_map_after)); - std::unique_ptr stats_iter; - ASSERT_OK( - db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter)); - ASSERT_TRUE(stats_iter != nullptr); - std::string sample = "rocksdb.num.iterator.deleted"; - uint64_t recovered_value = 0; - for (int i = 2; stats_iter->Valid(); stats_iter->Next(), ++i) { - auto stats_map = stats_iter->GetStatsMap(); - ASSERT_EQ(stats_iter->GetStatsTime(), kPeriodSec * i - 1); - for (const auto& stat : stats_map) { - if (sample.compare(stat.first) == 0) { - recovered_value += stat.second; - } - } - } - ASSERT_EQ(recovered_value, stats_map_after[sample]); - - // test stats value retains after recovery - ReopenWithColumnFamilies({"default", "pikachu"}, options); - ASSERT_OK( - db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter)); - ASSERT_TRUE(stats_iter != nullptr); - uint64_t new_recovered_value = 0; - for (int i = 2; stats_iter->Valid(); stats_iter->Next(), i++) { - auto stats_map = stats_iter->GetStatsMap(); - ASSERT_EQ(stats_iter->GetStatsTime(), kPeriodSec * i - 1); - for (const auto& stat : stats_map) { - if (sample.compare(stat.first) == 0) { - new_recovered_value += stat.second; - } - } - } - ASSERT_EQ(recovered_value, new_recovered_value); - - // TODO(Zhongyi): also add test to read raw values from disk and verify - // correctness - Close(); -} - -// TODO(Zhongyi): add test for different format versions - -TEST_F(StatsHistoryTest, PersistentStatsCreateColumnFamilies) { - constexpr int kPeriodSec = 5; - Options options; - options.create_if_missing = true; - options.stats_persist_period_sec = kPeriodSec; - options.statistics = CreateDBStatistics(); - options.persist_stats_to_disk = true; - options.env = mock_env_.get(); - ASSERT_OK(TryReopen(options)); - CreateColumnFamilies({"one", "two", "three"}, options); - ASSERT_OK(Put(1, "foo", "bar")); - ReopenWithColumnFamilies({"default", "one", "two", "three"}, options); - ASSERT_EQ(Get(2, "foo"), "bar"); - CreateColumnFamilies({"four"}, options); - ReopenWithColumnFamilies({"default", "one", "two", "three", "four"}, options); - ASSERT_EQ(Get(2, "foo"), "bar"); - - // make sure the first stats persist to finish - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); }); - - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - auto iter = - db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); - int key_count = countkeys(iter); - delete iter; - ASSERT_GE(key_count, 0); - uint64_t num_write_wal = 0; - std::string sample = "rocksdb.write.wal"; - std::unique_ptr stats_iter; - ASSERT_OK(db_->GetStatsHistory(0, mock_clock_->NowSeconds(), &stats_iter)); - ASSERT_TRUE(stats_iter != nullptr); - for (; stats_iter->Valid(); stats_iter->Next()) { - auto stats_map = stats_iter->GetStatsMap(); - for (const auto& stat : stats_map) { - if (sample.compare(stat.first) == 0) { - num_write_wal += stat.second; - } - } - } - stats_iter.reset(); - ASSERT_EQ(num_write_wal, 1); - - options.persist_stats_to_disk = false; - ReopenWithColumnFamilies({"default", "one", "two", "three", "four"}, options); - int cf_count = 0; - for (auto cfd : *dbfull()->versions_->GetColumnFamilySet()) { - (void)cfd; - cf_count++; - } - // persistent stats cf will be implicitly opened even if - // persist_stats_to_disk is false - ASSERT_EQ(cf_count, 6); - ASSERT_EQ(Get(2, "foo"), "bar"); - - // attempt to create column family using same name, should fail - ColumnFamilyOptions cf_opts(options); - ColumnFamilyHandle* handle; - ASSERT_NOK(db_->CreateColumnFamily(cf_opts, kPersistentStatsColumnFamilyName, - &handle)); - - options.persist_stats_to_disk = true; - ReopenWithColumnFamilies({"default", "one", "two", "three", "four"}, options); - ASSERT_NOK(db_->CreateColumnFamily(cf_opts, kPersistentStatsColumnFamilyName, - &handle)); - // verify stats is not affected by prior failed CF creation - ASSERT_OK(db_->GetStatsHistory(0, mock_clock_->NowSeconds(), &stats_iter)); - ASSERT_TRUE(stats_iter != nullptr); - num_write_wal = 0; - for (; stats_iter->Valid(); stats_iter->Next()) { - auto stats_map = stats_iter->GetStatsMap(); - for (const auto& stat : stats_map) { - if (sample.compare(stat.first) == 0) { - num_write_wal += stat.second; - } - } - } - ASSERT_EQ(num_write_wal, 1); - - Close(); - Destroy(options); -} - -TEST_F(StatsHistoryTest, PersistentStatsReadOnly) { - ASSERT_OK(Put("bar", "v2")); - Close(); - - auto options = CurrentOptions(); - options.stats_persist_period_sec = 5; - options.persist_stats_to_disk = true; - assert(options.env == env_); - ASSERT_OK(ReadOnlyReopen(options)); - ASSERT_EQ("v2", Get("bar")); - Close(); - - // Reopen and flush memtable. - ASSERT_OK(TryReopen(options)); - ASSERT_OK(Flush()); - Close(); - // Now check keys in read only mode. - ASSERT_OK(ReadOnlyReopen(options)); -} - -TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) { - constexpr int kPeriodSec = 5; - Options options; - options.create_if_missing = true; - options.write_buffer_size = 1024 * 1024 * 10; // 10 Mb - options.stats_persist_period_sec = kPeriodSec; - options.statistics = CreateDBStatistics(); - options.persist_stats_to_disk = true; - options.env = mock_env_.get(); - CreateColumnFamilies({"pikachu"}, options); - ReopenWithColumnFamilies({"default", "pikachu"}, options); - - // Wait for the first stats persist to finish, as the initial delay could be - // different. - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); }); - - ColumnFamilyData* cfd_default = - static_cast(dbfull()->DefaultColumnFamily()) - ->cfd(); - ColumnFamilyData* cfd_stats = static_cast( - dbfull()->PersistentStatsColumnFamily()) - ->cfd(); - ColumnFamilyData* cfd_test = - static_cast(handles_[1])->cfd(); - - ASSERT_OK(Put("foo", "v0")); - ASSERT_OK(Put("bar", "v0")); - ASSERT_EQ("v0", Get("bar")); - ASSERT_EQ("v0", Get("foo")); - ASSERT_OK(Put(1, "Eevee", "v0")); - ASSERT_EQ("v0", Get(1, "Eevee")); - - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - // writing to all three cf, flush default cf - // LogNumbers: default: 16, stats: 10, pikachu: 5 - // Since in recovery process, cfd_stats column is created after WAL is - // created, synced and MANIFEST is persisted, its log number which depends on - // logfile_number_ will be different. Since "pikachu" is never flushed, thus - // its log_number should be the smallest of the three. - ASSERT_OK(Flush()); - ASSERT_LT(cfd_test->GetLogNumber(), cfd_stats->GetLogNumber()); - ASSERT_LT(cfd_test->GetLogNumber(), cfd_default->GetLogNumber()); - - ASSERT_OK(Put("foo1", "v1")); - ASSERT_OK(Put("bar1", "v1")); - ASSERT_EQ("v1", Get("bar1")); - ASSERT_EQ("v1", Get("foo1")); - ASSERT_OK(Put(1, "Vaporeon", "v1")); - ASSERT_EQ("v1", Get(1, "Vaporeon")); - // writing to default and test cf, flush test cf - // LogNumbers: default: 14, stats: 16, pikachu: 16 - ASSERT_OK(Flush(1)); - ASSERT_EQ(cfd_stats->GetLogNumber(), cfd_test->GetLogNumber()); - ASSERT_GT(cfd_stats->GetLogNumber(), cfd_default->GetLogNumber()); - - ASSERT_OK(Put("foo2", "v2")); - ASSERT_OK(Put("bar2", "v2")); - ASSERT_EQ("v2", Get("bar2")); - ASSERT_EQ("v2", Get("foo2")); - - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - // writing to default and stats cf, flushing default cf - // LogNumbers: default: 19, stats: 19, pikachu: 19 - ASSERT_OK(Flush()); - ASSERT_EQ(cfd_stats->GetLogNumber(), cfd_test->GetLogNumber()); - ASSERT_EQ(cfd_stats->GetLogNumber(), cfd_default->GetLogNumber()); - - ASSERT_OK(Put("foo3", "v3")); - ASSERT_OK(Put("bar3", "v3")); - ASSERT_EQ("v3", Get("bar3")); - ASSERT_EQ("v3", Get("foo3")); - ASSERT_OK(Put(1, "Jolteon", "v3")); - ASSERT_EQ("v3", Get(1, "Jolteon")); - - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); }); - // writing to all three cf, flushing test cf - // LogNumbers: default: 19, stats: 19, pikachu: 22 - ASSERT_OK(Flush(1)); - ASSERT_LT(cfd_stats->GetLogNumber(), cfd_test->GetLogNumber()); - ASSERT_EQ(cfd_stats->GetLogNumber(), cfd_default->GetLogNumber()); - Close(); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/options/configurable_test.cc b/options/configurable_test.cc deleted file mode 100644 index a03d8f0a5..000000000 --- a/options/configurable_test.cc +++ /dev/null @@ -1,861 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "options/configurable_test.h" - -#include -#include -#include -#include - -#include "options/configurable_helper.h" -#include "options/options_helper.h" -#include "options/options_parser.h" -#include "rocksdb/configurable.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" - -#ifndef GFLAGS -bool FLAGS_enable_print = false; -#else -#include "util/gflags_compat.h" -using GFLAGS_NAMESPACE::ParseCommandLineFlags; -DEFINE_bool(enable_print, false, "Print options generated to console."); -#endif // GFLAGS - -namespace ROCKSDB_NAMESPACE { -namespace test { -class StringLogger : public Logger { - public: - using Logger::Logv; - void Logv(const char* format, va_list ap) override { - char buffer[1000]; - vsnprintf(buffer, sizeof(buffer), format, ap); - string_.append(buffer); - } - const std::string& str() const { return string_; } - void clear() { string_.clear(); } - - private: - std::string string_; -}; -static std::unordered_map struct_option_info = { - {"struct", OptionTypeInfo::Struct("struct", &simple_option_info, 0, - OptionVerificationType::kNormal, - OptionTypeFlags::kMutable)}, -}; - -static std::unordered_map imm_struct_option_info = - { - {"struct", OptionTypeInfo::Struct("struct", &simple_option_info, 0, - OptionVerificationType::kNormal, - OptionTypeFlags::kNone)}, -}; - -class SimpleConfigurable : public TestConfigurable { - public: - static SimpleConfigurable* Create( - const std::string& name = "simple", - int mode = TestConfigMode::kDefaultMode, - const std::unordered_map* map = - &simple_option_info) { - return new SimpleConfigurable(name, mode, map); - } - - SimpleConfigurable(const std::string& name, int mode, - const std::unordered_map* - map = &simple_option_info) - : TestConfigurable(name, mode, map) { - if ((mode & TestConfigMode::kUniqueMode) != 0) { - unique_.reset(SimpleConfigurable::Create("Unique" + name_)); - RegisterOptions(name_ + "Unique", &unique_, &unique_option_info); - } - if ((mode & TestConfigMode::kSharedMode) != 0) { - shared_.reset(SimpleConfigurable::Create("Shared" + name_)); - RegisterOptions(name_ + "Shared", &shared_, &shared_option_info); - } - if ((mode & TestConfigMode::kRawPtrMode) != 0) { - pointer_ = SimpleConfigurable::Create("Pointer" + name_); - RegisterOptions(name_ + "Pointer", &pointer_, &pointer_option_info); - } - } - -}; // End class SimpleConfigurable - -using ConfigTestFactoryFunc = std::function; - -class ConfigurableTest : public testing::Test { - public: - ConfigurableTest() { config_options_.invoke_prepare_options = false; } - - ConfigOptions config_options_; -}; - -TEST_F(ConfigurableTest, GetOptionsPtrTest) { - std::string opt_str; - std::unique_ptr configurable(SimpleConfigurable::Create()); - ASSERT_NE(configurable->GetOptions("simple"), nullptr); - ASSERT_EQ(configurable->GetOptions("bad-opt"), nullptr); -} - -TEST_F(ConfigurableTest, ConfigureFromMapTest) { - std::unique_ptr configurable(SimpleConfigurable::Create()); - auto* opts = configurable->GetOptions("simple"); - ASSERT_OK(configurable->ConfigureFromMap(config_options_, {})); - ASSERT_NE(opts, nullptr); - std::unordered_map options_map = { - {"int", "1"}, {"bool", "true"}, {"string", "string"}}; - ASSERT_OK(configurable->ConfigureFromMap(config_options_, options_map)); - ASSERT_EQ(opts->i, 1); - ASSERT_EQ(opts->b, true); - ASSERT_EQ(opts->s, "string"); -} - -TEST_F(ConfigurableTest, ConfigureFromStringTest) { - std::unique_ptr configurable(SimpleConfigurable::Create()); - auto* opts = configurable->GetOptions("simple"); - ASSERT_OK(configurable->ConfigureFromString(config_options_, "")); - ASSERT_NE(opts, nullptr); - ASSERT_OK(configurable->ConfigureFromString(config_options_, - "int=1;bool=true;string=s")); - ASSERT_EQ(opts->i, 1); - ASSERT_EQ(opts->b, true); - ASSERT_EQ(opts->s, "s"); -} - -TEST_F(ConfigurableTest, ConfigureIgnoreTest) { - std::unique_ptr configurable(SimpleConfigurable::Create()); - std::unordered_map options_map = {{"unused", "u"}}; - ConfigOptions ignore = config_options_; - ignore.ignore_unknown_options = true; - ASSERT_NOK(configurable->ConfigureFromMap(config_options_, options_map)); - ASSERT_OK(configurable->ConfigureFromMap(ignore, options_map)); - ASSERT_NOK(configurable->ConfigureFromString(config_options_, "unused=u")); - ASSERT_OK(configurable->ConfigureFromString(ignore, "unused=u")); -} - -TEST_F(ConfigurableTest, ConfigureNestedOptionsTest) { - std::unique_ptr base, copy; - std::string opt_str; - std::string mismatch; - - base.reset(SimpleConfigurable::Create("simple", TestConfigMode::kAllOptMode)); - copy.reset(SimpleConfigurable::Create("simple", TestConfigMode::kAllOptMode)); - ASSERT_OK(base->ConfigureFromString(config_options_, - "shared={int=10; string=10};" - "unique={int=20; string=20};" - "pointer={int=30; string=30};")); - ASSERT_OK(base->GetOptionString(config_options_, &opt_str)); - ASSERT_OK(copy->ConfigureFromString(config_options_, opt_str)); - ASSERT_TRUE(base->AreEquivalent(config_options_, copy.get(), &mismatch)); -} - -TEST_F(ConfigurableTest, GetOptionsTest) { - std::unique_ptr simple; - - simple.reset( - SimpleConfigurable::Create("simple", TestConfigMode::kAllOptMode)); - int i = 11; - for (auto opt : {"", "shared.", "unique.", "pointer."}) { - std::string value; - std::string expected = std::to_string(i); - std::string opt_name = opt; - ASSERT_OK( - simple->ConfigureOption(config_options_, opt_name + "int", expected)); - ASSERT_OK(simple->GetOption(config_options_, opt_name + "int", &value)); - ASSERT_EQ(expected, value); - ASSERT_OK(simple->ConfigureOption(config_options_, opt_name + "string", - expected)); - ASSERT_OK(simple->GetOption(config_options_, opt_name + "string", &value)); - ASSERT_EQ(expected, value); - - ASSERT_NOK( - simple->ConfigureOption(config_options_, opt_name + "bad", expected)); - ASSERT_NOK(simple->GetOption(config_options_, "bad option", &value)); - ASSERT_TRUE(value.empty()); - i += 11; - } -} - -TEST_F(ConfigurableTest, ConfigureBadOptionsTest) { - std::unique_ptr configurable(SimpleConfigurable::Create()); - auto* opts = configurable->GetOptions("simple"); - ASSERT_NE(opts, nullptr); - ASSERT_OK(configurable->ConfigureOption(config_options_, "int", "42")); - ASSERT_EQ(opts->i, 42); - ASSERT_NOK(configurable->ConfigureOption(config_options_, "int", "fred")); - ASSERT_NOK(configurable->ConfigureOption(config_options_, "bool", "fred")); - ASSERT_NOK( - configurable->ConfigureFromString(config_options_, "int=33;unused=u")); - ASSERT_EQ(opts->i, 42); -} - -TEST_F(ConfigurableTest, InvalidOptionTest) { - std::unique_ptr configurable(SimpleConfigurable::Create()); - std::unordered_map options_map = { - {"bad-option", "bad"}}; - ASSERT_NOK(configurable->ConfigureFromMap(config_options_, options_map)); - ASSERT_NOK( - configurable->ConfigureFromString(config_options_, "bad-option=bad")); - ASSERT_NOK( - configurable->ConfigureOption(config_options_, "bad-option", "bad")); -} - -static std::unordered_map validated_option_info = { - {"validated", - {0, OptionType::kBoolean, OptionVerificationType::kNormal, - OptionTypeFlags::kNone}}, -}; -static std::unordered_map prepared_option_info = { - {"prepared", - {0, OptionType::kInt, OptionVerificationType::kNormal, - OptionTypeFlags::kMutable}}, -}; -static std::unordered_map - dont_prepare_option_info = { - {"unique", - {0, OptionType::kConfigurable, OptionVerificationType::kNormal, - (OptionTypeFlags::kUnique | OptionTypeFlags::kDontPrepare)}}, - -}; - -class ValidatedConfigurable : public SimpleConfigurable { - public: - ValidatedConfigurable(const std::string& name, unsigned char mode, - bool dont_prepare = false) - : SimpleConfigurable(name, TestConfigMode::kDefaultMode), - validated(false), - prepared(0) { - RegisterOptions("Validated", &validated, &validated_option_info); - RegisterOptions("Prepared", &prepared, &prepared_option_info); - if ((mode & TestConfigMode::kUniqueMode) != 0) { - unique_.reset(new ValidatedConfigurable( - "Unique" + name_, TestConfigMode::kDefaultMode, false)); - if (dont_prepare) { - RegisterOptions(name_ + "Unique", &unique_, &dont_prepare_option_info); - } else { - RegisterOptions(name_ + "Unique", &unique_, &unique_option_info); - } - } - } - - Status PrepareOptions(const ConfigOptions& config_options) override { - if (++prepared <= 0) { - return Status::InvalidArgument("Cannot prepare option"); - } else { - return SimpleConfigurable::PrepareOptions(config_options); - } - } - - Status ValidateOptions(const DBOptions& db_opts, - const ColumnFamilyOptions& cf_opts) const override { - if (!validated) { - return Status::InvalidArgument("Not Validated"); - } else { - return SimpleConfigurable::ValidateOptions(db_opts, cf_opts); - } - } - - private: - bool validated; - int prepared; -}; - -TEST_F(ConfigurableTest, ValidateOptionsTest) { - std::unique_ptr configurable( - new ValidatedConfigurable("validated", TestConfigMode::kDefaultMode)); - ColumnFamilyOptions cf_opts; - DBOptions db_opts; - ASSERT_OK( - configurable->ConfigureOption(config_options_, "validated", "false")); - ASSERT_NOK(configurable->ValidateOptions(db_opts, cf_opts)); - ASSERT_OK( - configurable->ConfigureOption(config_options_, "validated", "true")); - ASSERT_OK(configurable->ValidateOptions(db_opts, cf_opts)); -} - -TEST_F(ConfigurableTest, PrepareOptionsTest) { - std::unique_ptr c( - new ValidatedConfigurable("Simple", TestConfigMode::kUniqueMode, false)); - auto cp = c->GetOptions("Prepared"); - auto u = c->GetOptions>("SimpleUnique"); - auto up = u->get()->GetOptions("Prepared"); - config_options_.invoke_prepare_options = false; - - ASSERT_NE(cp, nullptr); - ASSERT_NE(up, nullptr); - ASSERT_EQ(*cp, 0); - ASSERT_EQ(*up, 0); - ASSERT_OK(c->ConfigureFromMap(config_options_, {})); - ASSERT_EQ(*cp, 0); - ASSERT_EQ(*up, 0); - config_options_.invoke_prepare_options = true; - ASSERT_OK(c->ConfigureFromMap(config_options_, {})); - ASSERT_EQ(*cp, 1); - ASSERT_EQ(*up, 1); - ASSERT_OK(c->ConfigureFromString(config_options_, "prepared=0")); - ASSERT_EQ(*up, 2); - ASSERT_EQ(*cp, 1); - - ASSERT_NOK(c->ConfigureFromString(config_options_, "prepared=-2")); - - c.reset( - new ValidatedConfigurable("Simple", TestConfigMode::kUniqueMode, true)); - cp = c->GetOptions("Prepared"); - u = c->GetOptions>("SimpleUnique"); - up = u->get()->GetOptions("Prepared"); - - ASSERT_OK(c->ConfigureFromString(config_options_, "prepared=0")); - ASSERT_EQ(*cp, 1); - ASSERT_EQ(*up, 0); -} - -TEST_F(ConfigurableTest, CopyObjectTest) { - class CopyConfigurable : public Configurable { - public: - CopyConfigurable() : prepared_(0), validated_(0) {} - Status PrepareOptions(const ConfigOptions& options) override { - prepared_++; - return Configurable::PrepareOptions(options); - } - Status ValidateOptions(const DBOptions& db_opts, - const ColumnFamilyOptions& cf_opts) const override { - validated_++; - return Configurable::ValidateOptions(db_opts, cf_opts); - } - int prepared_; - mutable int validated_; - }; - - CopyConfigurable c1; - ConfigOptions config_options; - Options options; - - ASSERT_OK(c1.PrepareOptions(config_options)); - ASSERT_OK(c1.ValidateOptions(options, options)); - ASSERT_EQ(c1.prepared_, 1); - ASSERT_EQ(c1.validated_, 1); - CopyConfigurable c2 = c1; - ASSERT_OK(c1.PrepareOptions(config_options)); - ASSERT_OK(c1.ValidateOptions(options, options)); - ASSERT_EQ(c2.prepared_, 1); - ASSERT_EQ(c2.validated_, 1); - ASSERT_EQ(c1.prepared_, 2); - ASSERT_EQ(c1.validated_, 2); -} - -TEST_F(ConfigurableTest, MutableOptionsTest) { - static std::unordered_map imm_option_info = { - {"imm", OptionTypeInfo::Struct("imm", &simple_option_info, 0, - OptionVerificationType::kNormal, - OptionTypeFlags::kNone)}, - }; - - class MutableConfigurable : public SimpleConfigurable { - public: - MutableConfigurable() - : SimpleConfigurable("mutable", TestConfigMode::kDefaultMode | - TestConfigMode::kUniqueMode | - TestConfigMode::kSharedMode) { - RegisterOptions("struct", &options_, &struct_option_info); - RegisterOptions("imm", &options_, &imm_option_info); - } - }; - MutableConfigurable mc; - ConfigOptions options = config_options_; - - ASSERT_OK(mc.ConfigureOption(options, "bool", "true")); - ASSERT_OK(mc.ConfigureOption(options, "int", "42")); - auto* opts = mc.GetOptions("mutable"); - ASSERT_NE(opts, nullptr); - ASSERT_EQ(opts->i, 42); - ASSERT_EQ(opts->b, true); - ASSERT_OK(mc.ConfigureOption(options, "struct", "{bool=false;}")); - ASSERT_OK(mc.ConfigureOption(options, "imm", "{int=55;}")); - - options.mutable_options_only = true; - - // Now only mutable options should be settable. - ASSERT_NOK(mc.ConfigureOption(options, "bool", "true")); - ASSERT_OK(mc.ConfigureOption(options, "int", "24")); - ASSERT_EQ(opts->i, 24); - ASSERT_EQ(opts->b, false); - ASSERT_NOK(mc.ConfigureFromString(options, "bool=false;int=33;")); - ASSERT_EQ(opts->i, 24); - ASSERT_EQ(opts->b, false); - - // Setting options through an immutable struct fails - ASSERT_NOK(mc.ConfigureOption(options, "imm", "{int=55;}")); - ASSERT_NOK(mc.ConfigureOption(options, "imm.int", "55")); - ASSERT_EQ(opts->i, 24); - ASSERT_EQ(opts->b, false); - - // Setting options through an mutable struct succeeds - ASSERT_OK(mc.ConfigureOption(options, "struct", "{int=44;}")); - ASSERT_EQ(opts->i, 44); - ASSERT_OK(mc.ConfigureOption(options, "struct.int", "55")); - ASSERT_EQ(opts->i, 55); - - // Setting nested immutable configurable options fail - ASSERT_NOK(mc.ConfigureOption(options, "shared", "{bool=true;}")); - ASSERT_NOK(mc.ConfigureOption(options, "shared.bool", "true")); - - // Setting nested mutable configurable options succeeds - ASSERT_OK(mc.ConfigureOption(options, "unique", "{bool=true}")); - ASSERT_OK(mc.ConfigureOption(options, "unique.bool", "true")); -} - -TEST_F(ConfigurableTest, DeprecatedOptionsTest) { - static std::unordered_map - deprecated_option_info = { - {"deprecated", - {offsetof(struct TestOptions, b), OptionType::kBoolean, - OptionVerificationType::kDeprecated, OptionTypeFlags::kNone}}}; - std::unique_ptr orig; - orig.reset(SimpleConfigurable::Create("simple", TestConfigMode::kDefaultMode, - &deprecated_option_info)); - auto* opts = orig->GetOptions("simple"); - ASSERT_NE(opts, nullptr); - opts->d = true; - ASSERT_OK(orig->ConfigureOption(config_options_, "deprecated", "false")); - ASSERT_TRUE(opts->d); - ASSERT_OK(orig->ConfigureFromString(config_options_, "deprecated=false")); - ASSERT_TRUE(opts->d); -} - -TEST_F(ConfigurableTest, AliasOptionsTest) { - static std::unordered_map alias_option_info = { - {"bool", - {offsetof(struct TestOptions, b), OptionType::kBoolean, - OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, - {"alias", - {offsetof(struct TestOptions, b), OptionType::kBoolean, - OptionVerificationType::kAlias, OptionTypeFlags::kNone, 0}}}; - std::unique_ptr orig; - orig.reset(SimpleConfigurable::Create("simple", TestConfigMode::kDefaultMode, - &alias_option_info)); - auto* opts = orig->GetOptions("simple"); - ASSERT_NE(opts, nullptr); - ASSERT_OK(orig->ConfigureOption(config_options_, "bool", "false")); - ASSERT_FALSE(opts->b); - ASSERT_OK(orig->ConfigureOption(config_options_, "alias", "true")); - ASSERT_TRUE(opts->b); - std::string opts_str; - ASSERT_OK(orig->GetOptionString(config_options_, &opts_str)); - ASSERT_EQ(opts_str.find("alias"), std::string::npos); - - ASSERT_OK(orig->ConfigureOption(config_options_, "bool", "false")); - ASSERT_FALSE(opts->b); - ASSERT_OK(orig->GetOption(config_options_, "alias", &opts_str)); - ASSERT_EQ(opts_str, "false"); -} - -TEST_F(ConfigurableTest, NestedUniqueConfigTest) { - std::unique_ptr simple; - simple.reset( - SimpleConfigurable::Create("Outer", TestConfigMode::kAllOptMode)); - const auto outer = simple->GetOptions("Outer"); - const auto unique = - simple->GetOptions>("OuterUnique"); - ASSERT_NE(outer, nullptr); - ASSERT_NE(unique, nullptr); - ASSERT_OK( - simple->ConfigureFromString(config_options_, "int=24;string=outer")); - ASSERT_OK(simple->ConfigureFromString(config_options_, - "unique={int=42;string=nested}")); - const auto inner = unique->get()->GetOptions("UniqueOuter"); - ASSERT_NE(inner, nullptr); - ASSERT_EQ(outer->i, 24); - ASSERT_EQ(outer->s, "outer"); - ASSERT_EQ(inner->i, 42); - ASSERT_EQ(inner->s, "nested"); -} - -TEST_F(ConfigurableTest, NestedSharedConfigTest) { - std::unique_ptr simple; - simple.reset(SimpleConfigurable::Create( - "Outer", TestConfigMode::kDefaultMode | TestConfigMode::kSharedMode)); - ASSERT_OK( - simple->ConfigureFromString(config_options_, "int=24;string=outer")); - ASSERT_OK(simple->ConfigureFromString(config_options_, - "shared={int=42;string=nested}")); - const auto outer = simple->GetOptions("Outer"); - const auto shared = - simple->GetOptions>("OuterShared"); - ASSERT_NE(outer, nullptr); - ASSERT_NE(shared, nullptr); - const auto inner = shared->get()->GetOptions("SharedOuter"); - ASSERT_NE(inner, nullptr); - ASSERT_EQ(outer->i, 24); - ASSERT_EQ(outer->s, "outer"); - ASSERT_EQ(inner->i, 42); - ASSERT_EQ(inner->s, "nested"); -} - -TEST_F(ConfigurableTest, NestedRawConfigTest) { - std::unique_ptr simple; - simple.reset(SimpleConfigurable::Create( - "Outer", TestConfigMode::kDefaultMode | TestConfigMode::kRawPtrMode)); - ASSERT_OK( - simple->ConfigureFromString(config_options_, "int=24;string=outer")); - ASSERT_OK(simple->ConfigureFromString(config_options_, - "pointer={int=42;string=nested}")); - const auto outer = simple->GetOptions("Outer"); - const auto pointer = simple->GetOptions("OuterPointer"); - ASSERT_NE(outer, nullptr); - ASSERT_NE(pointer, nullptr); - const auto inner = (*pointer)->GetOptions("PointerOuter"); - ASSERT_NE(inner, nullptr); - ASSERT_EQ(outer->i, 24); - ASSERT_EQ(outer->s, "outer"); - ASSERT_EQ(inner->i, 42); - ASSERT_EQ(inner->s, "nested"); -} - -TEST_F(ConfigurableTest, MatchesTest) { - std::string mismatch; - std::unique_ptr base, copy; - base.reset(SimpleConfigurable::Create( - "simple", TestConfigMode::kDefaultMode | TestConfigMode::kNestedMode)); - copy.reset(SimpleConfigurable::Create( - "simple", TestConfigMode::kDefaultMode | TestConfigMode::kNestedMode)); - ASSERT_OK(base->ConfigureFromString( - config_options_, - "int=11;string=outer;unique={int=22;string=u};shared={int=33;string=s}")); - ASSERT_OK(copy->ConfigureFromString( - config_options_, - "int=11;string=outer;unique={int=22;string=u};shared={int=33;string=s}")); - ASSERT_TRUE(base->AreEquivalent(config_options_, copy.get(), &mismatch)); - ASSERT_OK(base->ConfigureOption(config_options_, "shared", "int=44")); - ASSERT_FALSE(base->AreEquivalent(config_options_, copy.get(), &mismatch)); - ASSERT_EQ(mismatch, "shared.int"); - std::string c1value, c2value; - ASSERT_OK(base->GetOption(config_options_, mismatch, &c1value)); - ASSERT_OK(copy->GetOption(config_options_, mismatch, &c2value)); - ASSERT_NE(c1value, c2value); -} - -static Configurable* SimpleStructFactory() { - return SimpleConfigurable::Create( - "simple-struct", TestConfigMode::kDefaultMode, &struct_option_info); -} - -TEST_F(ConfigurableTest, ConfigureStructTest) { - std::unique_ptr base(SimpleStructFactory()); - std::unique_ptr copy(SimpleStructFactory()); - std::string opt_str, value; - std::string mismatch; - std::unordered_set names; - - ASSERT_OK( - base->ConfigureFromString(config_options_, "struct={int=10; string=10}")); - ASSERT_OK(base->GetOptionString(config_options_, &opt_str)); - ASSERT_OK(copy->ConfigureFromString(config_options_, opt_str)); - ASSERT_TRUE(base->AreEquivalent(config_options_, copy.get(), &mismatch)); - ASSERT_OK(base->GetOptionNames(config_options_, &names)); - ASSERT_EQ(names.size(), 1); - ASSERT_EQ(*(names.begin()), "struct"); - ASSERT_OK( - base->ConfigureFromString(config_options_, "struct={int=20; string=20}")); - ASSERT_OK(base->GetOption(config_options_, "struct", &value)); - ASSERT_OK(copy->ConfigureOption(config_options_, "struct", value)); - ASSERT_TRUE(base->AreEquivalent(config_options_, copy.get(), &mismatch)); - - ASSERT_NOK(base->ConfigureFromString(config_options_, - "struct={int=10; string=10; bad=11}")); - ASSERT_OK(base->ConfigureOption(config_options_, "struct.int", "42")); - ASSERT_NOK(base->ConfigureOption(config_options_, "struct.bad", "42")); - ASSERT_NOK(base->GetOption(config_options_, "struct.bad", &value)); - ASSERT_OK(base->GetOption(config_options_, "struct.int", &value)); - ASSERT_EQ(value, "42"); -} - -TEST_F(ConfigurableTest, ConfigurableEnumTest) { - std::unique_ptr base, copy; - base.reset(SimpleConfigurable::Create("e", TestConfigMode::kEnumMode)); - copy.reset(SimpleConfigurable::Create("e", TestConfigMode::kEnumMode)); - - std::string opts_str; - std::string mismatch; - - ASSERT_OK(base->ConfigureFromString(config_options_, "enum=B")); - ASSERT_FALSE(base->AreEquivalent(config_options_, copy.get(), &mismatch)); - ASSERT_OK(base->GetOptionString(config_options_, &opts_str)); - ASSERT_OK(copy->ConfigureFromString(config_options_, opts_str)); - ASSERT_TRUE(base->AreEquivalent(config_options_, copy.get(), &mismatch)); - ASSERT_NOK(base->ConfigureOption(config_options_, "enum", "bad")); - ASSERT_NOK(base->ConfigureOption(config_options_, "unknown", "bad")); -} - -static std::unordered_map noserialize_option_info = - { - {"int", - {offsetof(struct TestOptions, i), OptionType::kInt, - OptionVerificationType::kNormal, OptionTypeFlags::kDontSerialize}}, -}; - -TEST_F(ConfigurableTest, TestNoSerialize) { - std::unique_ptr base; - base.reset(SimpleConfigurable::Create("c", TestConfigMode::kDefaultMode, - &noserialize_option_info)); - std::string opts_str, value; - ASSERT_OK(base->ConfigureFromString(config_options_, "int=10")); - ASSERT_OK(base->GetOptionString(config_options_, &opts_str)); - ASSERT_EQ(opts_str, ""); - ASSERT_NOK(base->GetOption(config_options_, "int", &value)); -} - -TEST_F(ConfigurableTest, TestNoCompare) { - std::unordered_map nocomp_option_info = { - {"int", - {offsetof(struct TestOptions, i), OptionType::kInt, - OptionVerificationType::kNormal, OptionTypeFlags::kCompareNever}}, - }; - std::unordered_map normal_option_info = { - {"int", - {offsetof(struct TestOptions, i), OptionType::kInt, - OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, - }; - - std::unique_ptr base, copy; - base.reset(SimpleConfigurable::Create("c", TestConfigMode::kDefaultMode, - &nocomp_option_info)); - copy.reset(SimpleConfigurable::Create("c", TestConfigMode::kDefaultMode, - &normal_option_info)); - ASSERT_OK(base->ConfigureFromString(config_options_, "int=10")); - ASSERT_OK(copy->ConfigureFromString(config_options_, "int=20")); - std::string bvalue, cvalue, mismatch; - ASSERT_OK(base->GetOption(config_options_, "int", &bvalue)); - ASSERT_OK(copy->GetOption(config_options_, "int", &cvalue)); - ASSERT_EQ(bvalue, "10"); - ASSERT_EQ(cvalue, "20"); - ASSERT_TRUE(base->AreEquivalent(config_options_, copy.get(), &mismatch)); - ASSERT_FALSE(copy->AreEquivalent(config_options_, base.get(), &mismatch)); -} - -TEST_F(ConfigurableTest, NullOptionMapTest) { - std::unique_ptr base; - std::unordered_set names; - std::string str; - - base.reset( - SimpleConfigurable::Create("c", TestConfigMode::kDefaultMode, nullptr)); - ASSERT_NOK(base->ConfigureFromString(config_options_, "int=10")); - ASSERT_NOK(base->ConfigureFromString(config_options_, "int=20")); - ASSERT_NOK(base->ConfigureOption(config_options_, "int", "20")); - ASSERT_NOK(base->GetOption(config_options_, "int", &str)); - ASSERT_NE(base->GetOptions("c"), nullptr); - ASSERT_OK(base->GetOptionNames(config_options_, &names)); - ASSERT_EQ(names.size(), 0UL); - ASSERT_OK(base->PrepareOptions(config_options_)); - ASSERT_OK(base->ValidateOptions(DBOptions(), ColumnFamilyOptions())); - std::unique_ptr copy; - copy.reset( - SimpleConfigurable::Create("c", TestConfigMode::kDefaultMode, nullptr)); - ASSERT_OK(base->GetOptionString(config_options_, &str)); - ASSERT_OK(copy->ConfigureFromString(config_options_, str)); - ASSERT_TRUE(base->AreEquivalent(config_options_, copy.get(), &str)); -} - -static std::unordered_map TestFactories = { - {"Simple", []() { return SimpleConfigurable::Create("simple"); }}, - {"Struct", []() { return SimpleStructFactory(); }}, - {"Unique", - []() { - return SimpleConfigurable::Create( - "simple", TestConfigMode::kSimpleMode | TestConfigMode::kUniqueMode); - }}, - {"Shared", - []() { - return SimpleConfigurable::Create( - "simple", TestConfigMode::kSimpleMode | TestConfigMode::kSharedMode); - }}, - {"Nested", - []() { - return SimpleConfigurable::Create( - "simple", TestConfigMode::kSimpleMode | TestConfigMode::kNestedMode); - }}, - {"Mutable", - []() { - return SimpleConfigurable::Create("simple", - TestConfigMode::kMutableMode | - TestConfigMode::kSimpleMode | - TestConfigMode::kNestedMode); - }}, - {"ThreeDeep", - []() { - Configurable* simple = SimpleConfigurable::Create( - "Simple", - TestConfigMode::kUniqueMode | TestConfigMode::kDefaultMode); - auto* unique = - simple->GetOptions>("SimpleUnique"); - unique->reset(SimpleConfigurable::Create( - "Child", - TestConfigMode::kUniqueMode | TestConfigMode::kDefaultMode)); - unique = unique->get()->GetOptions>( - "ChildUnique"); - unique->reset( - SimpleConfigurable::Create("Child", TestConfigMode::kDefaultMode)); - return simple; - }}, - {"DBOptions", - []() { - auto config = DBOptionsAsConfigurable(DBOptions()); - return config.release(); - }}, - {"CFOptions", - []() { - auto config = CFOptionsAsConfigurable(ColumnFamilyOptions()); - return config.release(); - }}, - {"BlockBased", []() { return NewBlockBasedTableFactory(); }}, -}; - -class ConfigurableParamTest : public ConfigurableTest, - virtual public ::testing::WithParamInterface< - std::pair> { - public: - ConfigurableParamTest() { - type_ = GetParam().first; - configuration_ = GetParam().second; - assert(TestFactories.find(type_) != TestFactories.end()); - object_.reset(CreateConfigurable()); - } - - Configurable* CreateConfigurable() { - const auto& iter = TestFactories.find(type_); - return (iter->second)(); - } - - void TestConfigureOptions(const ConfigOptions& opts); - std::string type_; - std::string configuration_; - std::unique_ptr object_; -}; - -void ConfigurableParamTest::TestConfigureOptions( - const ConfigOptions& config_options) { - std::unique_ptr base, copy; - std::unordered_set names; - std::string opt_str, mismatch; - - base.reset(CreateConfigurable()); - copy.reset(CreateConfigurable()); - - ASSERT_OK(base->ConfigureFromString(config_options, configuration_)); - ASSERT_OK(base->GetOptionString(config_options, &opt_str)); - ASSERT_OK(copy->ConfigureFromString(config_options, opt_str)); - ASSERT_OK(copy->GetOptionString(config_options, &opt_str)); - ASSERT_TRUE(base->AreEquivalent(config_options, copy.get(), &mismatch)); - - copy.reset(CreateConfigurable()); - ASSERT_OK(base->GetOptionNames(config_options, &names)); - std::unordered_map unused; - bool found_one = false; - for (auto name : names) { - std::string value; - Status s = base->GetOption(config_options, name, &value); - if (s.ok()) { - s = copy->ConfigureOption(config_options, name, value); - if (s.ok() || s.IsNotSupported()) { - found_one = true; - } else { - unused[name] = value; - } - } else { - ASSERT_TRUE(s.IsNotSupported()); - } - } - ASSERT_TRUE(found_one || names.empty()); - while (found_one && !unused.empty()) { - found_one = false; - for (auto iter = unused.begin(); iter != unused.end();) { - if (copy->ConfigureOption(config_options, iter->first, iter->second) - .ok()) { - found_one = true; - iter = unused.erase(iter); - } else { - ++iter; - } - } - } - ASSERT_EQ(0, unused.size()); - ASSERT_TRUE(base->AreEquivalent(config_options, copy.get(), &mismatch)); -} - -TEST_P(ConfigurableParamTest, GetDefaultOptionsTest) { - TestConfigureOptions(config_options_); -} - -TEST_P(ConfigurableParamTest, ConfigureFromPropsTest) { - std::string opt_str, mismatch; - std::unordered_set names; - std::unique_ptr copy(CreateConfigurable()); - - ASSERT_OK(object_->ConfigureFromString(config_options_, configuration_)); - config_options_.delimiter = "\n"; - ASSERT_OK(object_->GetOptionString(config_options_, &opt_str)); - std::istringstream iss(opt_str); - std::unordered_map copy_map; - std::string line; - for (int line_num = 0; std::getline(iss, line); line_num++) { - std::string name; - std::string value; - ASSERT_OK( - RocksDBOptionsParser::ParseStatement(&name, &value, line, line_num)); - copy_map[name] = value; - } - ASSERT_OK(copy->ConfigureFromMap(config_options_, copy_map)); - ASSERT_TRUE(object_->AreEquivalent(config_options_, copy.get(), &mismatch)); -} - -INSTANTIATE_TEST_CASE_P( - ParamTest, ConfigurableParamTest, - testing::Values( - std::pair("Simple", - "int=42;bool=true;string=s"), - std::pair( - "Mutable", "int=42;unique={int=33;string=unique}"), - std::pair( - "Struct", "struct={int=33;bool=true;string=s;}"), - std::pair("Shared", - "int=33;bool=true;string=outer;" - "shared={int=42;string=shared}"), - std::pair("Unique", - "int=33;bool=true;string=outer;" - "unique={int=42;string=unique}"), - std::pair("Nested", - "int=11;bool=true;string=outer;" - "pointer={int=22;string=pointer};" - "unique={int=33;string=unique};" - "shared={int=44;string=shared}"), - std::pair("ThreeDeep", - "int=11;bool=true;string=outer;" - "unique={int=22;string=inner;" - "unique={int=33;string=unique}};"), - std::pair("DBOptions", - "max_background_jobs=100;" - "max_open_files=200;"), - std::pair("CFOptions", - "table_factory=BlockBasedTable;" - "disable_auto_compactions=true;"), - std::pair("BlockBased", - "block_size=1024;" - "no_block_cache=true;"))); - -} // namespace test -} // namespace ROCKSDB_NAMESPACE -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); -#ifdef GFLAGS - ParseCommandLineFlags(&argc, &argv, true); -#endif // GFLAGS - return RUN_ALL_TESTS(); -} diff --git a/options/customizable_test.cc b/options/customizable_test.cc deleted file mode 100644 index d18335410..000000000 --- a/options/customizable_test.cc +++ /dev/null @@ -1,2116 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "rocksdb/customizable.h" - -#include -#include -#include -#include -#include - -#include "db/db_test_util.h" -#include "memory/jemalloc_nodump_allocator.h" -#include "memory/memkind_kmem_allocator.h" -#include "options/options_helper.h" -#include "options/options_parser.h" -#include "port/stack_trace.h" -#include "rocksdb/convenience.h" -#include "rocksdb/env_encryption.h" -#include "rocksdb/file_checksum.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/flush_block_policy.h" -#include "rocksdb/memory_allocator.h" -#include "rocksdb/secondary_cache.h" -#include "rocksdb/slice_transform.h" -#include "rocksdb/sst_partitioner.h" -#include "rocksdb/statistics.h" -#include "rocksdb/utilities/customizable_util.h" -#include "rocksdb/utilities/object_registry.h" -#include "rocksdb/utilities/options_type.h" -#include "table/block_based/filter_policy_internal.h" -#include "table/block_based/flush_block_policy.h" -#include "table/mock_table.h" -#include "test_util/mock_time_env.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/file_checksum_helper.h" -#include "util/string_util.h" -#include "utilities/compaction_filters/remove_emptyvalue_compactionfilter.h" -#include "utilities/memory_allocators.h" -#include "utilities/merge_operators/bytesxor.h" -#include "utilities/merge_operators/sortlist.h" -#include "utilities/merge_operators/string_append/stringappend.h" -#include "utilities/merge_operators/string_append/stringappend2.h" - -#ifndef GFLAGS -bool FLAGS_enable_print = false; -#else -#include "util/gflags_compat.h" -using GFLAGS_NAMESPACE::ParseCommandLineFlags; -DEFINE_bool(enable_print, false, "Print options generated to console."); -#endif // GFLAGS - -namespace ROCKSDB_NAMESPACE { -namespace { -class StringLogger : public Logger { - public: - using Logger::Logv; - void Logv(const char* format, va_list ap) override { - char buffer[1000]; - vsnprintf(buffer, sizeof(buffer), format, ap); - string_.append(buffer); - } - const std::string& str() const { return string_; } - void clear() { string_.clear(); } - - private: - std::string string_; -}; - -class TestCustomizable : public Customizable { - public: - TestCustomizable(const std::string& name) : name_(name) {} - // Method to allow CheckedCast to work for this class - static const char* kClassName() { - return "TestCustomizable"; - } - - const char* Name() const override { return name_.c_str(); } - static const char* Type() { return "test.custom"; } - static Status CreateFromString(const ConfigOptions& opts, - const std::string& value, - std::unique_ptr* result); - static Status CreateFromString(const ConfigOptions& opts, - const std::string& value, - std::shared_ptr* result); - static Status CreateFromString(const ConfigOptions& opts, - const std::string& value, - TestCustomizable** result); - bool IsInstanceOf(const std::string& name) const override { - if (name == kClassName()) { - return true; - } else { - return Customizable::IsInstanceOf(name); - } - } - - protected: - const std::string name_; -}; - -struct AOptions { - static const char* kName() { return "A"; } - int i = 0; - bool b = false; -}; - -static std::unordered_map a_option_info = { - {"int", - {offsetof(struct AOptions, i), OptionType::kInt, - OptionVerificationType::kNormal, OptionTypeFlags::kMutable}}, - {"bool", - {offsetof(struct AOptions, b), OptionType::kBoolean, - OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, -}; - -class ACustomizable : public TestCustomizable { - public: - explicit ACustomizable(const std::string& id) - : TestCustomizable("A"), id_(id) { - RegisterOptions(&opts_, &a_option_info); - } - std::string GetId() const override { return id_; } - static const char* kClassName() { return "A"; } - - private: - AOptions opts_; - const std::string id_; -}; - -struct BOptions { - std::string s; - bool b = false; -}; - -static std::unordered_map b_option_info = { - {"string", - {offsetof(struct BOptions, s), OptionType::kString, - OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, - {"bool", - {offsetof(struct BOptions, b), OptionType::kBoolean, - OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, -}; - -class BCustomizable : public TestCustomizable { - private: - public: - explicit BCustomizable(const std::string& name) : TestCustomizable(name) { - RegisterOptions(name, &opts_, &b_option_info); - } - static const char* kClassName() { return "B"; } - - private: - BOptions opts_; -}; - -static int A_count = 0; -static int RegisterCustomTestObjects(ObjectLibrary& library, - const std::string& /*arg*/) { - library.AddFactory( - ObjectLibrary::PatternEntry("A", true).AddSeparator("_"), - [](const std::string& name, std::unique_ptr* guard, - std::string* /* msg */) { - guard->reset(new ACustomizable(name)); - A_count++; - return guard->get(); - }); - library.AddFactory( - "B", [](const std::string& name, std::unique_ptr* guard, - std::string* /* msg */) { - guard->reset(new BCustomizable(name)); - return guard->get(); - }); - - library.AddFactory( - "S", [](const std::string& name, - std::unique_ptr* /* guard */, - std::string* /* msg */) { return new BCustomizable(name); }); - size_t num_types; - return static_cast(library.GetFactoryCount(&num_types)); -} - -struct SimpleOptions { - static const char* kName() { return "simple"; } - bool b = true; - std::unique_ptr cu; - std::shared_ptr cs; - TestCustomizable* cp = nullptr; -}; - -static std::unordered_map simple_option_info = { - {"bool", - {offsetof(struct SimpleOptions, b), OptionType::kBoolean, - OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, - {"unique", - OptionTypeInfo::AsCustomUniquePtr( - offsetof(struct SimpleOptions, cu), OptionVerificationType::kNormal, - OptionTypeFlags::kAllowNull)}, - {"shared", - OptionTypeInfo::AsCustomSharedPtr( - offsetof(struct SimpleOptions, cs), OptionVerificationType::kNormal, - OptionTypeFlags::kAllowNull)}, - {"pointer", - OptionTypeInfo::AsCustomRawPtr( - offsetof(struct SimpleOptions, cp), OptionVerificationType::kNormal, - OptionTypeFlags::kAllowNull)}, -}; - -class SimpleConfigurable : public Configurable { - private: - SimpleOptions simple_; - - public: - SimpleConfigurable() { RegisterOptions(&simple_, &simple_option_info); } - - explicit SimpleConfigurable( - const std::unordered_map* map) { - RegisterOptions(&simple_, map); - } -}; - -static void GetMapFromProperties( - const std::string& props, - std::unordered_map* map) { - std::istringstream iss(props); - std::unordered_map copy_map; - std::string line; - map->clear(); - for (int line_num = 0; std::getline(iss, line); line_num++) { - std::string name; - std::string value; - ASSERT_OK( - RocksDBOptionsParser::ParseStatement(&name, &value, line, line_num)); - (*map)[name] = value; - } -} -} // namespace - -Status TestCustomizable::CreateFromString( - const ConfigOptions& config_options, const std::string& value, - std::shared_ptr* result) { - return LoadSharedObject(config_options, value, result); -} - -Status TestCustomizable::CreateFromString( - const ConfigOptions& config_options, const std::string& value, - std::unique_ptr* result) { - return LoadUniqueObject(config_options, value, result); -} - -Status TestCustomizable::CreateFromString(const ConfigOptions& config_options, - const std::string& value, - TestCustomizable** result) { - return LoadStaticObject(config_options, value, result); -} - -class CustomizableTest : public testing::Test { - public: - CustomizableTest() { - config_options_.invoke_prepare_options = false; - config_options_.registry->AddLibrary("CustomizableTest", - RegisterCustomTestObjects, ""); - } - - ConfigOptions config_options_; -}; - -// Tests that a Customizable can be created by: -// - a simple name -// - a XXX.id option -// - a property with a name -TEST_F(CustomizableTest, CreateByNameTest) { - ObjectLibrary::Default()->AddFactory( - ObjectLibrary::PatternEntry("TEST", false).AddSeparator("_"), - [](const std::string& name, std::unique_ptr* guard, - std::string* /* msg */) { - guard->reset(new TestCustomizable(name)); - return guard->get(); - }); - std::unique_ptr configurable(new SimpleConfigurable()); - SimpleOptions* simple = configurable->GetOptions(); - ASSERT_NE(simple, nullptr); - ASSERT_OK( - configurable->ConfigureFromString(config_options_, "unique={id=TEST_1}")); - ASSERT_NE(simple->cu, nullptr); - ASSERT_EQ(simple->cu->GetId(), "TEST_1"); - ASSERT_OK( - configurable->ConfigureFromString(config_options_, "unique.id=TEST_2")); - ASSERT_NE(simple->cu, nullptr); - ASSERT_EQ(simple->cu->GetId(), "TEST_2"); - ASSERT_OK( - configurable->ConfigureFromString(config_options_, "unique=TEST_3")); - ASSERT_NE(simple->cu, nullptr); - ASSERT_EQ(simple->cu->GetId(), "TEST_3"); -} - -TEST_F(CustomizableTest, ToStringTest) { - std::unique_ptr custom(new TestCustomizable("test")); - ASSERT_EQ(custom->ToString(config_options_), "test"); -} - -TEST_F(CustomizableTest, SimpleConfigureTest) { - std::unordered_map opt_map = { - {"unique", "id=A;int=1;bool=true"}, - {"shared", "id=B;string=s"}, - }; - std::unique_ptr configurable(new SimpleConfigurable()); - ASSERT_OK(configurable->ConfigureFromMap(config_options_, opt_map)); - SimpleOptions* simple = configurable->GetOptions(); - ASSERT_NE(simple, nullptr); - ASSERT_NE(simple->cu, nullptr); - ASSERT_EQ(simple->cu->GetId(), "A"); - std::string opt_str; - std::string mismatch; - ASSERT_OK(configurable->GetOptionString(config_options_, &opt_str)); - std::unique_ptr copy(new SimpleConfigurable()); - ASSERT_OK(copy->ConfigureFromString(config_options_, opt_str)); - ASSERT_TRUE( - configurable->AreEquivalent(config_options_, copy.get(), &mismatch)); -} - -TEST_F(CustomizableTest, ConfigureFromPropsTest) { - std::unordered_map opt_map = { - {"unique.id", "A"}, {"unique.A.int", "1"}, {"unique.A.bool", "true"}, - {"shared.id", "B"}, {"shared.B.string", "s"}, - }; - std::unique_ptr configurable(new SimpleConfigurable()); - ASSERT_OK(configurable->ConfigureFromMap(config_options_, opt_map)); - SimpleOptions* simple = configurable->GetOptions(); - ASSERT_NE(simple, nullptr); - ASSERT_NE(simple->cu, nullptr); - ASSERT_EQ(simple->cu->GetId(), "A"); - std::string opt_str; - std::string mismatch; - config_options_.delimiter = "\n"; - std::unordered_map props; - ASSERT_OK(configurable->GetOptionString(config_options_, &opt_str)); - GetMapFromProperties(opt_str, &props); - std::unique_ptr copy(new SimpleConfigurable()); - ASSERT_OK(copy->ConfigureFromMap(config_options_, props)); - ASSERT_TRUE( - configurable->AreEquivalent(config_options_, copy.get(), &mismatch)); -} - -TEST_F(CustomizableTest, ConfigureFromShortTest) { - std::unordered_map opt_map = { - {"unique.id", "A"}, {"unique.A.int", "1"}, {"unique.A.bool", "true"}, - {"shared.id", "B"}, {"shared.B.string", "s"}, - }; - std::unique_ptr configurable(new SimpleConfigurable()); - ASSERT_OK(configurable->ConfigureFromMap(config_options_, opt_map)); - SimpleOptions* simple = configurable->GetOptions(); - ASSERT_NE(simple, nullptr); - ASSERT_NE(simple->cu, nullptr); - ASSERT_EQ(simple->cu->GetId(), "A"); -} - -TEST_F(CustomizableTest, AreEquivalentOptionsTest) { - std::unordered_map opt_map = { - {"unique", "id=A;int=1;bool=true"}, - {"shared", "id=A;int=1;bool=true"}, - }; - std::string mismatch; - ConfigOptions config_options = config_options_; - std::unique_ptr c1(new SimpleConfigurable()); - std::unique_ptr c2(new SimpleConfigurable()); - ASSERT_OK(c1->ConfigureFromMap(config_options, opt_map)); - ASSERT_OK(c2->ConfigureFromMap(config_options, opt_map)); - ASSERT_TRUE(c1->AreEquivalent(config_options, c2.get(), &mismatch)); - SimpleOptions* simple = c1->GetOptions(); - ASSERT_TRUE( - simple->cu->AreEquivalent(config_options, simple->cs.get(), &mismatch)); - ASSERT_OK(simple->cu->ConfigureOption(config_options, "int", "2")); - ASSERT_FALSE( - simple->cu->AreEquivalent(config_options, simple->cs.get(), &mismatch)); - ASSERT_FALSE(c1->AreEquivalent(config_options, c2.get(), &mismatch)); - ConfigOptions loosely = config_options; - loosely.sanity_level = ConfigOptions::kSanityLevelLooselyCompatible; - ASSERT_TRUE(c1->AreEquivalent(loosely, c2.get(), &mismatch)); - ASSERT_TRUE(simple->cu->AreEquivalent(loosely, simple->cs.get(), &mismatch)); - - ASSERT_OK(c1->ConfigureOption(config_options, "shared", "id=B;string=3")); - ASSERT_TRUE(c1->AreEquivalent(loosely, c2.get(), &mismatch)); - ASSERT_FALSE(c1->AreEquivalent(config_options, c2.get(), &mismatch)); - ASSERT_FALSE(simple->cs->AreEquivalent(loosely, simple->cu.get(), &mismatch)); - simple->cs.reset(); - ASSERT_TRUE(c1->AreEquivalent(loosely, c2.get(), &mismatch)); - ASSERT_FALSE(c1->AreEquivalent(config_options, c2.get(), &mismatch)); -} - -// Tests that we can initialize a customizable from its options -TEST_F(CustomizableTest, ConfigureStandaloneCustomTest) { - std::unique_ptr base, copy; - const auto& registry = config_options_.registry; - ASSERT_OK(registry->NewUniqueObject("A", &base)); - ASSERT_OK(registry->NewUniqueObject("A", ©)); - ASSERT_OK(base->ConfigureFromString(config_options_, "int=33;bool=true")); - std::string opt_str; - std::string mismatch; - ASSERT_OK(base->GetOptionString(config_options_, &opt_str)); - ASSERT_OK(copy->ConfigureFromString(config_options_, opt_str)); - ASSERT_TRUE(base->AreEquivalent(config_options_, copy.get(), &mismatch)); -} - -// Tests that we fail appropriately if the pattern is not registered -TEST_F(CustomizableTest, BadNameTest) { - config_options_.ignore_unsupported_options = false; - std::unique_ptr c1(new SimpleConfigurable()); - ASSERT_NOK( - c1->ConfigureFromString(config_options_, "unique.shared.id=bad name")); - config_options_.ignore_unsupported_options = true; - ASSERT_OK( - c1->ConfigureFromString(config_options_, "unique.shared.id=bad name")); -} - -// Tests that we fail appropriately if a bad option is passed to the underlying -// configurable -TEST_F(CustomizableTest, BadOptionTest) { - std::unique_ptr c1(new SimpleConfigurable()); - ConfigOptions ignore = config_options_; - ignore.ignore_unknown_options = true; - - ASSERT_NOK(c1->ConfigureFromString(config_options_, "A.int=11")); - ASSERT_NOK(c1->ConfigureFromString(config_options_, "shared={id=B;int=1}")); - ASSERT_OK(c1->ConfigureFromString(ignore, "shared={id=A;string=s}")); - ASSERT_NOK(c1->ConfigureFromString(config_options_, "B.int=11")); - ASSERT_OK(c1->ConfigureFromString(ignore, "B.int=11")); - ASSERT_NOK(c1->ConfigureFromString(config_options_, "A.string=s")); - ASSERT_OK(c1->ConfigureFromString(ignore, "A.string=s")); - // Test as detached - ASSERT_NOK( - c1->ConfigureFromString(config_options_, "shared.id=A;A.string=b}")); - ASSERT_OK(c1->ConfigureFromString(ignore, "shared.id=A;A.string=s}")); -} - -TEST_F(CustomizableTest, FailingFactoryTest) { - std::shared_ptr registry = ObjectRegistry::NewInstance(); - std::unique_ptr c1(new SimpleConfigurable()); - ConfigOptions ignore = config_options_; - - Status s; - ignore.registry->AddLibrary("failing")->AddFactory( - "failing", - [](const std::string& /*uri*/, - std::unique_ptr* /*guard */, std::string* errmsg) { - *errmsg = "Bad Factory"; - return nullptr; - }); - - // If we are ignoring unknown and unsupported options, will see - // different errors for failing versus missing - ignore.ignore_unknown_options = false; - ignore.ignore_unsupported_options = false; - s = c1->ConfigureFromString(ignore, "shared.id=failing"); - ASSERT_TRUE(s.IsInvalidArgument()); - s = c1->ConfigureFromString(ignore, "unique.id=failing"); - ASSERT_TRUE(s.IsInvalidArgument()); - s = c1->ConfigureFromString(ignore, "shared.id=missing"); - ASSERT_TRUE(s.IsNotSupported()); - s = c1->ConfigureFromString(ignore, "unique.id=missing"); - ASSERT_TRUE(s.IsNotSupported()); - - // If we are ignoring unsupported options, will see - // errors for failing but not missing - ignore.ignore_unknown_options = false; - ignore.ignore_unsupported_options = true; - s = c1->ConfigureFromString(ignore, "shared.id=failing"); - ASSERT_TRUE(s.IsInvalidArgument()); - s = c1->ConfigureFromString(ignore, "unique.id=failing"); - ASSERT_TRUE(s.IsInvalidArgument()); - - ASSERT_OK(c1->ConfigureFromString(ignore, "shared.id=missing")); - ASSERT_OK(c1->ConfigureFromString(ignore, "unique.id=missing")); - - // If we are ignoring unknown options, will see no errors - // for failing or missing - ignore.ignore_unknown_options = true; - ignore.ignore_unsupported_options = false; - ASSERT_OK(c1->ConfigureFromString(ignore, "shared.id=failing")); - ASSERT_OK(c1->ConfigureFromString(ignore, "unique.id=failing")); - ASSERT_OK(c1->ConfigureFromString(ignore, "shared.id=missing")); - ASSERT_OK(c1->ConfigureFromString(ignore, "unique.id=missing")); -} - -// Tests that different IDs lead to different objects -TEST_F(CustomizableTest, UniqueIdTest) { - std::unique_ptr base(new SimpleConfigurable()); - ASSERT_OK(base->ConfigureFromString(config_options_, - "unique={id=A_1;int=1;bool=true}")); - SimpleOptions* simple = base->GetOptions(); - ASSERT_NE(simple, nullptr); - ASSERT_NE(simple->cu, nullptr); - ASSERT_EQ(simple->cu->GetId(), std::string("A_1")); - std::string opt_str; - std::string mismatch; - ASSERT_OK(base->GetOptionString(config_options_, &opt_str)); - std::unique_ptr copy(new SimpleConfigurable()); - ASSERT_OK(copy->ConfigureFromString(config_options_, opt_str)); - ASSERT_TRUE(base->AreEquivalent(config_options_, copy.get(), &mismatch)); - ASSERT_OK(base->ConfigureFromString(config_options_, - "unique={id=A_2;int=1;bool=true}")); - ASSERT_FALSE(base->AreEquivalent(config_options_, copy.get(), &mismatch)); - ASSERT_EQ(simple->cu->GetId(), std::string("A_2")); -} - -TEST_F(CustomizableTest, IsInstanceOfTest) { - std::shared_ptr tc = std::make_shared("A_1"); - - ASSERT_EQ(tc->GetId(), std::string("A_1")); - ASSERT_TRUE(tc->IsInstanceOf("A")); - ASSERT_TRUE(tc->IsInstanceOf("TestCustomizable")); - ASSERT_FALSE(tc->IsInstanceOf("B")); - ASSERT_FALSE(tc->IsInstanceOf("A_1")); - ASSERT_EQ(tc->CheckedCast(), tc.get()); - ASSERT_EQ(tc->CheckedCast(), tc.get()); - ASSERT_EQ(tc->CheckedCast(), nullptr); - - tc.reset(new BCustomizable("B")); - ASSERT_TRUE(tc->IsInstanceOf("B")); - ASSERT_TRUE(tc->IsInstanceOf("TestCustomizable")); - ASSERT_FALSE(tc->IsInstanceOf("A")); - ASSERT_EQ(tc->CheckedCast(), tc.get()); - ASSERT_EQ(tc->CheckedCast(), tc.get()); - ASSERT_EQ(tc->CheckedCast(), nullptr); -} - -TEST_F(CustomizableTest, PrepareOptionsTest) { - static std::unordered_map p_option_info = { - {"can_prepare", - {0, OptionType::kBoolean, OptionVerificationType::kNormal, - OptionTypeFlags::kNone}}, - }; - - class PrepareCustomizable : public TestCustomizable { - public: - bool can_prepare_ = true; - - PrepareCustomizable() : TestCustomizable("P") { - RegisterOptions("Prepare", &can_prepare_, &p_option_info); - } - - Status PrepareOptions(const ConfigOptions& opts) override { - if (!can_prepare_) { - return Status::InvalidArgument("Cannot Prepare"); - } else { - return TestCustomizable::PrepareOptions(opts); - } - } - }; - - ObjectLibrary::Default()->AddFactory( - "P", - [](const std::string& /*name*/, std::unique_ptr* guard, - std::string* /* msg */) { - guard->reset(new PrepareCustomizable()); - return guard->get(); - }); - - std::unique_ptr base(new SimpleConfigurable()); - ConfigOptions prepared(config_options_); - prepared.invoke_prepare_options = true; - - ASSERT_OK(base->ConfigureFromString( - prepared, "unique=A_1; shared={id=B;string=s}; pointer.id=S")); - SimpleOptions* simple = base->GetOptions(); - ASSERT_NE(simple, nullptr); - ASSERT_NE(simple->cu, nullptr); - ASSERT_NE(simple->cs, nullptr); - ASSERT_NE(simple->cp, nullptr); - delete simple->cp; - base.reset(new SimpleConfigurable()); - ASSERT_OK(base->ConfigureFromString( - config_options_, "unique=A_1; shared={id=B;string=s}; pointer.id=S")); - - simple = base->GetOptions(); - ASSERT_NE(simple, nullptr); - ASSERT_NE(simple->cu, nullptr); - ASSERT_NE(simple->cs, nullptr); - ASSERT_NE(simple->cp, nullptr); - - ASSERT_OK(base->PrepareOptions(config_options_)); - delete simple->cp; - base.reset(new SimpleConfigurable()); - simple = base->GetOptions(); - ASSERT_NE(simple, nullptr); - - ASSERT_NOK( - base->ConfigureFromString(prepared, "unique={id=P; can_prepare=false}")); - ASSERT_EQ(simple->cu, nullptr); - - ASSERT_OK( - base->ConfigureFromString(prepared, "unique={id=P; can_prepare=true}")); - ASSERT_NE(simple->cu, nullptr); - - ASSERT_OK(base->ConfigureFromString(config_options_, - "unique={id=P; can_prepare=true}")); - ASSERT_NE(simple->cu, nullptr); - ASSERT_OK(simple->cu->PrepareOptions(prepared)); - - ASSERT_OK(base->ConfigureFromString(config_options_, - "unique={id=P; can_prepare=false}")); - ASSERT_NE(simple->cu, nullptr); - ASSERT_NOK(simple->cu->PrepareOptions(prepared)); -} - -namespace { -static std::unordered_map inner_option_info = { - {"inner", - OptionTypeInfo::AsCustomSharedPtr( - 0, OptionVerificationType::kNormal, OptionTypeFlags::kStringNameOnly)} -}; - -struct InnerOptions { - static const char* kName() { return "InnerOptions"; } - std::shared_ptr inner; -}; - -class InnerCustomizable : public Customizable { - public: - explicit InnerCustomizable(const std::shared_ptr& w) { - iopts_.inner = w; - RegisterOptions(&iopts_, &inner_option_info); - } - static const char* kClassName() { return "Inner"; } - const char* Name() const override { return kClassName(); } - - bool IsInstanceOf(const std::string& name) const override { - if (name == kClassName()) { - return true; - } else { - return Customizable::IsInstanceOf(name); - } - } - - protected: - const Customizable* Inner() const override { return iopts_.inner.get(); } - - private: - InnerOptions iopts_; -}; - -struct WrappedOptions1 { - static const char* kName() { return "WrappedOptions1"; } - int i = 42; -}; - -class WrappedCustomizable1 : public InnerCustomizable { - public: - explicit WrappedCustomizable1(const std::shared_ptr& w) - : InnerCustomizable(w) { - RegisterOptions(&wopts_, nullptr); - } - const char* Name() const override { return kClassName(); } - static const char* kClassName() { return "Wrapped1"; } - - private: - WrappedOptions1 wopts_; -}; - -struct WrappedOptions2 { - static const char* kName() { return "WrappedOptions2"; } - std::string s = "42"; -}; -class WrappedCustomizable2 : public InnerCustomizable { - public: - explicit WrappedCustomizable2(const std::shared_ptr& w) - : InnerCustomizable(w) {} - const void* GetOptionsPtr(const std::string& name) const override { - if (name == WrappedOptions2::kName()) { - return &wopts_; - } else { - return InnerCustomizable::GetOptionsPtr(name); - } - } - - const char* Name() const override { return kClassName(); } - static const char* kClassName() { return "Wrapped2"; } - - private: - WrappedOptions2 wopts_; -}; -} // namespace - -TEST_F(CustomizableTest, WrappedInnerTest) { - std::shared_ptr ac = - std::make_shared("A"); - - ASSERT_TRUE(ac->IsInstanceOf("A")); - ASSERT_TRUE(ac->IsInstanceOf("TestCustomizable")); - ASSERT_EQ(ac->CheckedCast(), ac.get()); - ASSERT_EQ(ac->CheckedCast(), nullptr); - ASSERT_EQ(ac->CheckedCast(), nullptr); - ASSERT_EQ(ac->CheckedCast(), nullptr); - std::shared_ptr wc1 = - std::make_shared(ac); - - ASSERT_TRUE(wc1->IsInstanceOf(WrappedCustomizable1::kClassName())); - ASSERT_EQ(wc1->CheckedCast(), wc1.get()); - ASSERT_EQ(wc1->CheckedCast(), nullptr); - ASSERT_EQ(wc1->CheckedCast(), wc1.get()); - ASSERT_EQ(wc1->CheckedCast(), ac.get()); - - std::shared_ptr wc2 = - std::make_shared(wc1); - ASSERT_TRUE(wc2->IsInstanceOf(WrappedCustomizable2::kClassName())); - ASSERT_EQ(wc2->CheckedCast(), wc2.get()); - ASSERT_EQ(wc2->CheckedCast(), wc1.get()); - ASSERT_EQ(wc2->CheckedCast(), wc2.get()); - ASSERT_EQ(wc2->CheckedCast(), ac.get()); -} - -TEST_F(CustomizableTest, CustomizableInnerTest) { - std::shared_ptr c = - std::make_shared(std::make_shared("a")); - std::shared_ptr wc1 = std::make_shared(c); - std::shared_ptr wc2 = std::make_shared(c); - auto inner = c->GetOptions(); - ASSERT_NE(inner, nullptr); - - auto aopts = c->GetOptions(); - ASSERT_NE(aopts, nullptr); - ASSERT_EQ(aopts, wc1->GetOptions()); - ASSERT_EQ(aopts, wc2->GetOptions()); - auto w1opts = wc1->GetOptions(); - ASSERT_NE(w1opts, nullptr); - ASSERT_EQ(c->GetOptions(), nullptr); - ASSERT_EQ(wc2->GetOptions(), nullptr); - - auto w2opts = wc2->GetOptions(); - ASSERT_NE(w2opts, nullptr); - ASSERT_EQ(c->GetOptions(), nullptr); - ASSERT_EQ(wc1->GetOptions(), nullptr); -} - -TEST_F(CustomizableTest, CopyObjectTest) { - class CopyCustomizable : public Customizable { - public: - CopyCustomizable() : prepared_(0), validated_(0) {} - const char* Name() const override { return "CopyCustomizable"; } - - Status PrepareOptions(const ConfigOptions& options) override { - prepared_++; - return Customizable::PrepareOptions(options); - } - Status ValidateOptions(const DBOptions& db_opts, - const ColumnFamilyOptions& cf_opts) const override { - validated_++; - return Customizable::ValidateOptions(db_opts, cf_opts); - } - int prepared_; - mutable int validated_; - }; - - CopyCustomizable c1; - ConfigOptions config_options; - Options options; - - ASSERT_OK(c1.PrepareOptions(config_options)); - ASSERT_OK(c1.ValidateOptions(options, options)); - ASSERT_EQ(c1.prepared_, 1); - ASSERT_EQ(c1.validated_, 1); - CopyCustomizable c2 = c1; - ASSERT_OK(c1.PrepareOptions(config_options)); - ASSERT_OK(c1.ValidateOptions(options, options)); - ASSERT_EQ(c2.prepared_, 1); - ASSERT_EQ(c2.validated_, 1); - ASSERT_EQ(c1.prepared_, 2); - ASSERT_EQ(c1.validated_, 2); -} - -TEST_F(CustomizableTest, TestStringDepth) { - ConfigOptions shallow = config_options_; - std::unique_ptr c( - new InnerCustomizable(std::make_shared("a"))); - std::string opt_str; - shallow.depth = ConfigOptions::Depth::kDepthShallow; - ASSERT_OK(c->GetOptionString(shallow, &opt_str)); - ASSERT_EQ(opt_str, "inner=a;"); - shallow.depth = ConfigOptions::Depth::kDepthDetailed; - ASSERT_OK(c->GetOptionString(shallow, &opt_str)); - ASSERT_NE(opt_str, "inner=a;"); -} - -// Tests that we only get a new customizable when it changes -TEST_F(CustomizableTest, NewUniqueCustomizableTest) { - std::unique_ptr base(new SimpleConfigurable()); - A_count = 0; - ASSERT_OK(base->ConfigureFromString(config_options_, - "unique={id=A_1;int=1;bool=true}")); - SimpleOptions* simple = base->GetOptions(); - ASSERT_NE(simple, nullptr); - ASSERT_NE(simple->cu, nullptr); - ASSERT_EQ(A_count, 1); // Created one A - ASSERT_OK(base->ConfigureFromString(config_options_, - "unique={id=A_1;int=1;bool=false}")); - ASSERT_EQ(A_count, 2); // Create another A_1 - ASSERT_OK(base->ConfigureFromString(config_options_, "unique={id=}")); - ASSERT_EQ(simple->cu, nullptr); - ASSERT_EQ(A_count, 2); - ASSERT_OK(base->ConfigureFromString(config_options_, - "unique={id=A_2;int=1;bool=false}")); - ASSERT_EQ(A_count, 3); // Created another A - ASSERT_OK(base->ConfigureFromString(config_options_, "unique.id=")); - ASSERT_EQ(simple->cu, nullptr); - ASSERT_OK(base->ConfigureFromString(config_options_, "unique=nullptr")); - ASSERT_EQ(simple->cu, nullptr); - ASSERT_OK(base->ConfigureFromString(config_options_, "unique.id=nullptr")); - ASSERT_EQ(simple->cu, nullptr); - ASSERT_EQ(A_count, 3); -} - -TEST_F(CustomizableTest, NewEmptyUniqueTest) { - std::unique_ptr base(new SimpleConfigurable()); - SimpleOptions* simple = base->GetOptions(); - ASSERT_EQ(simple->cu, nullptr); - simple->cu.reset(new BCustomizable("B")); - - ASSERT_OK(base->ConfigureFromString(config_options_, "unique={id=}")); - ASSERT_EQ(simple->cu, nullptr); - simple->cu.reset(new BCustomizable("B")); - - ASSERT_OK(base->ConfigureFromString(config_options_, "unique={id=nullptr}")); - ASSERT_EQ(simple->cu, nullptr); - simple->cu.reset(new BCustomizable("B")); - - ASSERT_OK(base->ConfigureFromString(config_options_, "unique.id=")); - ASSERT_EQ(simple->cu, nullptr); - simple->cu.reset(new BCustomizable("B")); - - ASSERT_OK(base->ConfigureFromString(config_options_, "unique=nullptr")); - ASSERT_EQ(simple->cu, nullptr); - simple->cu.reset(new BCustomizable("B")); - - ASSERT_OK(base->ConfigureFromString(config_options_, "unique.id=nullptr")); - ASSERT_EQ(simple->cu, nullptr); -} - -TEST_F(CustomizableTest, NewEmptySharedTest) { - std::unique_ptr base(new SimpleConfigurable()); - - SimpleOptions* simple = base->GetOptions(); - ASSERT_NE(simple, nullptr); - ASSERT_EQ(simple->cs, nullptr); - simple->cs.reset(new BCustomizable("B")); - - ASSERT_OK(base->ConfigureFromString(config_options_, "shared={id=}")); - ASSERT_NE(simple, nullptr); - ASSERT_EQ(simple->cs, nullptr); - simple->cs.reset(new BCustomizable("B")); - - ASSERT_OK(base->ConfigureFromString(config_options_, "shared={id=nullptr}")); - ASSERT_EQ(simple->cs, nullptr); - simple->cs.reset(new BCustomizable("B")); - - ASSERT_OK(base->ConfigureFromString(config_options_, "shared.id=")); - ASSERT_EQ(simple->cs, nullptr); - simple->cs.reset(new BCustomizable("B")); - - ASSERT_OK(base->ConfigureFromString(config_options_, "shared.id=nullptr")); - ASSERT_EQ(simple->cs, nullptr); - simple->cs.reset(new BCustomizable("B")); - - ASSERT_OK(base->ConfigureFromString(config_options_, "shared=nullptr")); - ASSERT_EQ(simple->cs, nullptr); -} - -TEST_F(CustomizableTest, NewEmptyStaticTest) { - std::unique_ptr base(new SimpleConfigurable()); - ASSERT_OK(base->ConfigureFromString(config_options_, "pointer={id=}")); - SimpleOptions* simple = base->GetOptions(); - ASSERT_NE(simple, nullptr); - ASSERT_EQ(simple->cp, nullptr); - ASSERT_OK(base->ConfigureFromString(config_options_, "pointer={id=nullptr}")); - ASSERT_EQ(simple->cp, nullptr); - - ASSERT_OK(base->ConfigureFromString(config_options_, "pointer=")); - ASSERT_EQ(simple->cp, nullptr); - ASSERT_OK(base->ConfigureFromString(config_options_, "pointer=nullptr")); - ASSERT_EQ(simple->cp, nullptr); - - ASSERT_OK(base->ConfigureFromString(config_options_, "pointer.id=")); - ASSERT_EQ(simple->cp, nullptr); - ASSERT_OK(base->ConfigureFromString(config_options_, "pointer.id=nullptr")); - ASSERT_EQ(simple->cp, nullptr); -} - -namespace { -static std::unordered_map vector_option_info = { - {"vector", - OptionTypeInfo::Vector>( - 0, OptionVerificationType::kNormal, - - OptionTypeFlags::kNone, - - OptionTypeInfo::AsCustomSharedPtr( - 0, OptionVerificationType::kNormal, OptionTypeFlags::kNone))}, -}; -class VectorConfigurable : public SimpleConfigurable { - public: - VectorConfigurable() { RegisterOptions("vector", &cv, &vector_option_info); } - std::vector> cv; -}; -} // namespace - -TEST_F(CustomizableTest, VectorConfigTest) { - VectorConfigurable orig, copy; - std::shared_ptr c1, c2; - ASSERT_OK(TestCustomizable::CreateFromString(config_options_, "A", &c1)); - ASSERT_OK(TestCustomizable::CreateFromString(config_options_, "B", &c2)); - orig.cv.push_back(c1); - orig.cv.push_back(c2); - ASSERT_OK(orig.ConfigureFromString(config_options_, "unique=A2")); - std::string opt_str, mismatch; - ASSERT_OK(orig.GetOptionString(config_options_, &opt_str)); - ASSERT_OK(copy.ConfigureFromString(config_options_, opt_str)); - ASSERT_TRUE(orig.AreEquivalent(config_options_, ©, &mismatch)); -} - -TEST_F(CustomizableTest, NoNameTest) { - // If Customizables are created without names, they are not - // part of the serialization (since they cannot be recreated) - VectorConfigurable orig, copy; - auto sopts = orig.GetOptions(); - auto copts = copy.GetOptions(); - sopts->cu.reset(new ACustomizable("")); - orig.cv.push_back(std::make_shared("")); - orig.cv.push_back(std::make_shared("A_1")); - std::string opt_str, mismatch; - ASSERT_OK(orig.GetOptionString(config_options_, &opt_str)); - ASSERT_OK(copy.ConfigureFromString(config_options_, opt_str)); - ASSERT_EQ(copy.cv.size(), 1U); - ASSERT_EQ(copy.cv[0]->GetId(), "A_1"); - ASSERT_EQ(copts->cu, nullptr); -} - - -TEST_F(CustomizableTest, IgnoreUnknownObjects) { - ConfigOptions ignore = config_options_; - std::shared_ptr shared; - std::unique_ptr unique; - TestCustomizable* pointer = nullptr; - ignore.ignore_unsupported_options = false; - ASSERT_NOK(LoadSharedObject(ignore, "Unknown", &shared)); - ASSERT_NOK(LoadUniqueObject(ignore, "Unknown", &unique)); - ASSERT_NOK(LoadStaticObject(ignore, "Unknown", &pointer)); - ASSERT_EQ(shared.get(), nullptr); - ASSERT_EQ(unique.get(), nullptr); - ASSERT_EQ(pointer, nullptr); - ignore.ignore_unsupported_options = true; - ASSERT_OK(LoadSharedObject(ignore, "Unknown", &shared)); - ASSERT_OK(LoadUniqueObject(ignore, "Unknown", &unique)); - ASSERT_OK(LoadStaticObject(ignore, "Unknown", &pointer)); - ASSERT_EQ(shared.get(), nullptr); - ASSERT_EQ(unique.get(), nullptr); - ASSERT_EQ(pointer, nullptr); - ASSERT_OK(LoadSharedObject(ignore, "id=Unknown", &shared)); - ASSERT_OK(LoadUniqueObject(ignore, "id=Unknown", &unique)); - ASSERT_OK(LoadStaticObject(ignore, "id=Unknown", &pointer)); - ASSERT_EQ(shared.get(), nullptr); - ASSERT_EQ(unique.get(), nullptr); - ASSERT_EQ(pointer, nullptr); - ASSERT_OK(LoadSharedObject(ignore, "id=Unknown;option=bad", - &shared)); - ASSERT_OK(LoadUniqueObject(ignore, "id=Unknown;option=bad", - &unique)); - ASSERT_OK(LoadStaticObject(ignore, "id=Unknown;option=bad", - &pointer)); - ASSERT_EQ(shared.get(), nullptr); - ASSERT_EQ(unique.get(), nullptr); - ASSERT_EQ(pointer, nullptr); -} - -TEST_F(CustomizableTest, URLFactoryTest) { - std::unique_ptr unique; - config_options_.registry->AddLibrary("URL")->AddFactory( - ObjectLibrary::PatternEntry("Z", false).AddSeparator(""), - [](const std::string& name, std::unique_ptr* guard, - std::string* /* msg */) { - guard->reset(new TestCustomizable(name)); - return guard->get(); - }); - - ConfigOptions ignore = config_options_; - ignore.ignore_unsupported_options = false; - ignore.ignore_unsupported_options = false; - ASSERT_OK(TestCustomizable::CreateFromString(ignore, "Z=1;x=y", &unique)); - ASSERT_NE(unique, nullptr); - ASSERT_EQ(unique->GetId(), "Z=1;x=y"); - ASSERT_OK(TestCustomizable::CreateFromString(ignore, "Z;x=y", &unique)); - ASSERT_NE(unique, nullptr); - ASSERT_EQ(unique->GetId(), "Z;x=y"); - unique.reset(); - ASSERT_OK(TestCustomizable::CreateFromString(ignore, "Z=1?x=y", &unique)); - ASSERT_NE(unique, nullptr); - ASSERT_EQ(unique->GetId(), "Z=1?x=y"); -} - -TEST_F(CustomizableTest, MutableOptionsTest) { - static std::unordered_map mutable_option_info = { - {"mutable", - OptionTypeInfo::AsCustomSharedPtr( - 0, OptionVerificationType::kNormal, OptionTypeFlags::kMutable)}}; - static std::unordered_map immutable_option_info = - {{"immutable", - OptionTypeInfo::AsCustomSharedPtr( - 0, OptionVerificationType::kNormal, OptionTypeFlags::kAllowNull)}}; - - class MutableCustomizable : public Customizable { - private: - std::shared_ptr mutable_; - std::shared_ptr immutable_; - - public: - MutableCustomizable() { - RegisterOptions("mutable", &mutable_, &mutable_option_info); - RegisterOptions("immutable", &immutable_, &immutable_option_info); - } - const char* Name() const override { return "MutableCustomizable"; } - }; - MutableCustomizable mc, mc2; - std::string mismatch; - std::string opt_str; - - ConfigOptions options = config_options_; - ASSERT_OK(mc.ConfigureOption(options, "mutable", "{id=B;}")); - options.mutable_options_only = true; - ASSERT_OK(mc.GetOptionString(options, &opt_str)); - ASSERT_OK(mc2.ConfigureFromString(options, opt_str)); - ASSERT_TRUE(mc.AreEquivalent(options, &mc2, &mismatch)); - - options.mutable_options_only = false; - ASSERT_OK(mc.ConfigureOption(options, "immutable", "{id=A; int=10}")); - auto* mm = mc.GetOptions>("mutable"); - auto* im = mc.GetOptions>("immutable"); - ASSERT_NE(mm, nullptr); - ASSERT_NE(mm->get(), nullptr); - ASSERT_NE(im, nullptr); - ASSERT_NE(im->get(), nullptr); - - // Now only deal with mutable options - options.mutable_options_only = true; - - // Setting nested immutable customizable options fails - ASSERT_NOK(mc.ConfigureOption(options, "immutable", "{id=B;}")); - ASSERT_NOK(mc.ConfigureOption(options, "immutable.id", "B")); - ASSERT_NOK(mc.ConfigureOption(options, "immutable.bool", "true")); - ASSERT_NOK(mc.ConfigureOption(options, "immutable", "bool=true")); - ASSERT_NOK(mc.ConfigureOption(options, "immutable", "{int=11;bool=true}")); - auto* im_a = im->get()->GetOptions("A"); - ASSERT_NE(im_a, nullptr); - ASSERT_EQ(im_a->i, 10); - ASSERT_EQ(im_a->b, false); - - // Setting nested mutable customizable options succeeds but the object did not - // change - ASSERT_OK(mc.ConfigureOption(options, "immutable.int", "11")); - ASSERT_EQ(im_a->i, 11); - ASSERT_EQ(im_a, im->get()->GetOptions("A")); - - // The mutable configurable itself can be changed - ASSERT_OK(mc.ConfigureOption(options, "mutable.id", "A")); - ASSERT_OK(mc.ConfigureOption(options, "mutable", "A")); - ASSERT_OK(mc.ConfigureOption(options, "mutable", "{id=A}")); - ASSERT_OK(mc.ConfigureOption(options, "mutable", "{bool=true}")); - - // The Nested options in the mutable object can be changed - ASSERT_OK(mc.ConfigureOption(options, "mutable", "{bool=true}")); - auto* mm_a = mm->get()->GetOptions("A"); - ASSERT_EQ(mm_a->b, true); - ASSERT_OK(mc.ConfigureOption(options, "mutable", "{int=22;bool=false}")); - mm_a = mm->get()->GetOptions("A"); - ASSERT_EQ(mm_a->i, 22); - ASSERT_EQ(mm_a->b, false); - - // Only the mutable options should get serialized - options.mutable_options_only = false; - ASSERT_OK(mc.GetOptionString(options, &opt_str)); - ASSERT_OK(mc.ConfigureOption(options, "immutable", "{id=B;}")); - options.mutable_options_only = true; - - ASSERT_OK(mc.GetOptionString(options, &opt_str)); - ASSERT_OK(mc2.ConfigureFromString(options, opt_str)); - ASSERT_TRUE(mc.AreEquivalent(options, &mc2, &mismatch)); - options.mutable_options_only = false; - ASSERT_FALSE(mc.AreEquivalent(options, &mc2, &mismatch)); - ASSERT_EQ(mismatch, "immutable"); -} - -TEST_F(CustomizableTest, CustomManagedObjects) { - std::shared_ptr object1, object2; - ASSERT_OK(LoadManagedObject( - config_options_, "id=A_1;int=1;bool=true", &object1)); - ASSERT_NE(object1, nullptr); - ASSERT_OK( - LoadManagedObject(config_options_, "A_1", &object2)); - ASSERT_EQ(object1, object2); - auto* opts = object2->GetOptions("A"); - ASSERT_NE(opts, nullptr); - ASSERT_EQ(opts->i, 1); - ASSERT_EQ(opts->b, true); - ASSERT_OK( - LoadManagedObject(config_options_, "A_2", &object2)); - ASSERT_NE(object1, object2); - object1.reset(); - ASSERT_OK(LoadManagedObject( - config_options_, "id=A_1;int=2;bool=false", &object1)); - opts = object1->GetOptions("A"); - ASSERT_NE(opts, nullptr); - ASSERT_EQ(opts->i, 2); - ASSERT_EQ(opts->b, false); -} - -TEST_F(CustomizableTest, CreateManagedObjects) { - class ManagedCustomizable : public Customizable { - public: - static const char* Type() { return "ManagedCustomizable"; } - static const char* kClassName() { return "Managed"; } - const char* Name() const override { return kClassName(); } - std::string GetId() const override { return id_; } - ManagedCustomizable() { id_ = GenerateIndividualId(); } - static Status CreateFromString( - const ConfigOptions& opts, const std::string& value, - std::shared_ptr* result) { - return LoadManagedObject(opts, value, result); - } - - private: - std::string id_; - }; - - config_options_.registry->AddLibrary("Managed") - ->AddFactory( - ObjectLibrary::PatternEntry::AsIndividualId( - ManagedCustomizable::kClassName()), - [](const std::string& /*name*/, - std::unique_ptr* guard, - std::string* /* msg */) { - guard->reset(new ManagedCustomizable()); - return guard->get(); - }); - - std::shared_ptr mc1, mc2, mc3, obj; - // Create a "deadbeef" customizable - std::string deadbeef = - std::string(ManagedCustomizable::kClassName()) + "@0xdeadbeef#0001"; - ASSERT_OK( - ManagedCustomizable::CreateFromString(config_options_, deadbeef, &mc1)); - // Create an object with the base/class name - ASSERT_OK(ManagedCustomizable::CreateFromString( - config_options_, ManagedCustomizable::kClassName(), &mc2)); - // Creating another with the base name returns a different object - ASSERT_OK(ManagedCustomizable::CreateFromString( - config_options_, ManagedCustomizable::kClassName(), &mc3)); - // At this point, there should be 4 managed objects (deadbeef, mc1, 2, and 3) - std::vector> objects; - ASSERT_OK(config_options_.registry->ListManagedObjects(&objects)); - ASSERT_EQ(objects.size(), 4U); - objects.clear(); - // Three separate object, none of them equal - ASSERT_NE(mc1, mc2); - ASSERT_NE(mc1, mc3); - ASSERT_NE(mc2, mc3); - - // Creating another object with "deadbeef" object - ASSERT_OK( - ManagedCustomizable::CreateFromString(config_options_, deadbeef, &obj)); - ASSERT_EQ(mc1, obj); - // Create another with the IDs of the instances - ASSERT_OK(ManagedCustomizable::CreateFromString(config_options_, mc1->GetId(), - &obj)); - ASSERT_EQ(mc1, obj); - ASSERT_OK(ManagedCustomizable::CreateFromString(config_options_, mc2->GetId(), - &obj)); - ASSERT_EQ(mc2, obj); - ASSERT_OK(ManagedCustomizable::CreateFromString(config_options_, mc3->GetId(), - &obj)); - ASSERT_EQ(mc3, obj); - - // Now get rid of deadbeef. 2 Objects left (m2+m3) - mc1.reset(); - ASSERT_EQ( - config_options_.registry->GetManagedObject(deadbeef), - nullptr); - ASSERT_OK(config_options_.registry->ListManagedObjects(&objects)); - ASSERT_EQ(objects.size(), 2U); - objects.clear(); - - // Associate deadbeef with #2 - ASSERT_OK(config_options_.registry->SetManagedObject(deadbeef, mc2)); - ASSERT_OK( - ManagedCustomizable::CreateFromString(config_options_, deadbeef, &obj)); - ASSERT_EQ(mc2, obj); - obj.reset(); - - // Get the ID of mc2 and then reset it. 1 Object left - std::string mc2id = mc2->GetId(); - mc2.reset(); - ASSERT_EQ( - config_options_.registry->GetManagedObject(mc2id), - nullptr); - ASSERT_OK(config_options_.registry->ListManagedObjects(&objects)); - ASSERT_EQ(objects.size(), 1U); - objects.clear(); - - // Create another object with the old mc2id. - ASSERT_OK( - ManagedCustomizable::CreateFromString(config_options_, mc2id, &mc2)); - ASSERT_OK( - ManagedCustomizable::CreateFromString(config_options_, mc2id, &obj)); - ASSERT_EQ(mc2, obj); - - // For good measure, create another deadbeef object - ASSERT_OK( - ManagedCustomizable::CreateFromString(config_options_, deadbeef, &mc1)); - ASSERT_OK( - ManagedCustomizable::CreateFromString(config_options_, deadbeef, &obj)); - ASSERT_EQ(mc1, obj); -} - - -namespace { -class TestSecondaryCache : public SecondaryCache { - public: - static const char* kClassName() { return "Test"; } - const char* Name() const override { return kClassName(); } - Status Insert(const Slice& /*key*/, Cache::ObjectPtr /*value*/, - const Cache::CacheItemHelper* /*helper*/) override { - return Status::NotSupported(); - } - std::unique_ptr Lookup( - const Slice& /*key*/, const Cache::CacheItemHelper* /*helper*/, - Cache::CreateContext* /*create_context*/, bool /*wait*/, - bool /*advise_erase*/, bool& kept_in_sec_cache) override { - kept_in_sec_cache = true; - return nullptr; - } - - bool SupportForceErase() const override { return false; } - - void Erase(const Slice& /*key*/) override {} - - // Wait for a collection of handles to become ready - void WaitAll(std::vector /*handles*/) override {} - - std::string GetPrintableOptions() const override { return ""; } -}; - -class TestStatistics : public StatisticsImpl { - public: - TestStatistics() : StatisticsImpl(nullptr) {} - const char* Name() const override { return kClassName(); } - static const char* kClassName() { return "Test"; } -}; - -class TestFlushBlockPolicyFactory : public FlushBlockPolicyFactory { - public: - TestFlushBlockPolicyFactory() {} - - static const char* kClassName() { return "TestFlushBlockPolicyFactory"; } - const char* Name() const override { return kClassName(); } - - FlushBlockPolicy* NewFlushBlockPolicy( - const BlockBasedTableOptions& /*table_options*/, - const BlockBuilder& /*data_block_builder*/) const override { - return nullptr; - } -}; - -class MockSliceTransform : public SliceTransform { - public: - const char* Name() const override { return kClassName(); } - static const char* kClassName() { return "Mock"; } - - Slice Transform(const Slice& /*key*/) const override { return Slice(); } - - bool InDomain(const Slice& /*key*/) const override { return false; } - - bool InRange(const Slice& /*key*/) const override { return false; } -}; - -class MockMemoryAllocator : public BaseMemoryAllocator { - public: - static const char* kClassName() { return "MockMemoryAllocator"; } - const char* Name() const override { return kClassName(); } -}; - -class MockEncryptionProvider : public EncryptionProvider { - public: - explicit MockEncryptionProvider(const std::string& id) : id_(id) {} - static const char* kClassName() { return "Mock"; } - const char* Name() const override { return kClassName(); } - size_t GetPrefixLength() const override { return 0; } - Status CreateNewPrefix(const std::string& /*fname*/, char* /*prefix*/, - size_t /*prefixLength*/) const override { - return Status::NotSupported(); - } - - Status AddCipher(const std::string& /*descriptor*/, const char* /*cipher*/, - size_t /*len*/, bool /*for_write*/) override { - return Status::NotSupported(); - } - - Status CreateCipherStream( - const std::string& /*fname*/, const EnvOptions& /*options*/, - Slice& /*prefix*/, - std::unique_ptr* /*result*/) override { - return Status::NotSupported(); - } - Status ValidateOptions(const DBOptions& db_opts, - const ColumnFamilyOptions& cf_opts) const override { - if (EndsWith(id_, "://test")) { - return EncryptionProvider::ValidateOptions(db_opts, cf_opts); - } else { - return Status::InvalidArgument("MockProvider not initialized"); - } - } - - private: - std::string id_; -}; - -class MockCipher : public BlockCipher { - public: - const char* Name() const override { return "Mock"; } - size_t BlockSize() override { return 0; } - Status Encrypt(char* /*data*/) override { return Status::NotSupported(); } - Status Decrypt(char* data) override { return Encrypt(data); } -}; - -class DummyFileSystem : public FileSystemWrapper { - public: - explicit DummyFileSystem(const std::shared_ptr& t) - : FileSystemWrapper(t) {} - static const char* kClassName() { return "DummyFileSystem"; } - const char* Name() const override { return kClassName(); } -}; - - - -class MockTablePropertiesCollectorFactory - : public TablePropertiesCollectorFactory { - private: - public: - TablePropertiesCollector* CreateTablePropertiesCollector( - TablePropertiesCollectorFactory::Context /*context*/) override { - return nullptr; - } - static const char* kClassName() { return "Mock"; } - const char* Name() const override { return kClassName(); } -}; - -class MockSstPartitionerFactory : public SstPartitionerFactory { - public: - static const char* kClassName() { return "Mock"; } - const char* Name() const override { return kClassName(); } - std::unique_ptr CreatePartitioner( - const SstPartitioner::Context& /* context */) const override { - return nullptr; - } -}; - -class MockFileChecksumGenFactory : public FileChecksumGenFactory { - public: - static const char* kClassName() { return "Mock"; } - const char* Name() const override { return kClassName(); } - std::unique_ptr CreateFileChecksumGenerator( - const FileChecksumGenContext& /*context*/) override { - return nullptr; - } -}; - -class MockFilterPolicy : public FilterPolicy { - public: - static const char* kClassName() { return "MockFilterPolicy"; } - const char* Name() const override { return kClassName(); } - const char* CompatibilityName() const override { return Name(); } - FilterBitsBuilder* GetBuilderWithContext( - const FilterBuildingContext&) const override { - return nullptr; - } - FilterBitsReader* GetFilterBitsReader( - const Slice& /*contents*/) const override { - return nullptr; - } -}; - -static int RegisterLocalObjects(ObjectLibrary& library, - const std::string& /*arg*/) { - size_t num_types; - library.AddFactory( - mock::MockTableFactory::kClassName(), - [](const std::string& /*uri*/, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new mock::MockTableFactory()); - return guard->get(); - }); - library.AddFactory( - OnFileDeletionListener::kClassName(), - [](const std::string& /*uri*/, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new OnFileDeletionListener()); - return guard->get(); - }); - library.AddFactory( - FlushCounterListener::kClassName(), - [](const std::string& /*uri*/, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new FlushCounterListener()); - return guard->get(); - }); - // Load any locally defined objects here - library.AddFactory( - MockSliceTransform::kClassName(), - [](const std::string& /*uri*/, - std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new MockSliceTransform()); - return guard->get(); - }); - library.AddFactory( - TestStatistics::kClassName(), - [](const std::string& /*uri*/, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new TestStatistics()); - return guard->get(); - }); - - library.AddFactory( - ObjectLibrary::PatternEntry(MockEncryptionProvider::kClassName(), true) - .AddSuffix("://test"), - [](const std::string& uri, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new MockEncryptionProvider(uri)); - return guard->get(); - }); - library.AddFactory( - "Mock", - [](const std::string& /*uri*/, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new MockCipher()); - return guard->get(); - }); - library.AddFactory( - MockMemoryAllocator::kClassName(), - [](const std::string& /*uri*/, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new MockMemoryAllocator()); - return guard->get(); - }); - library.AddFactory( - TestFlushBlockPolicyFactory::kClassName(), - [](const std::string& /*uri*/, - std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new TestFlushBlockPolicyFactory()); - return guard->get(); - }); - - library.AddFactory( - TestSecondaryCache::kClassName(), - [](const std::string& /*uri*/, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new TestSecondaryCache()); - return guard->get(); - }); - - library.AddFactory( - DummyFileSystem::kClassName(), - [](const std::string& /*uri*/, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new DummyFileSystem(nullptr)); - return guard->get(); - }); - - library.AddFactory( - MockSstPartitionerFactory::kClassName(), - [](const std::string& /*uri*/, - std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new MockSstPartitionerFactory()); - return guard->get(); - }); - - library.AddFactory( - MockFileChecksumGenFactory::kClassName(), - [](const std::string& /*uri*/, - std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new MockFileChecksumGenFactory()); - return guard->get(); - }); - - library.AddFactory( - MockTablePropertiesCollectorFactory::kClassName(), - [](const std::string& /*uri*/, - std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new MockTablePropertiesCollectorFactory()); - return guard->get(); - }); - - library.AddFactory( - MockFilterPolicy::kClassName(), - [](const std::string& /*uri*/, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new MockFilterPolicy()); - return guard->get(); - }); - - return static_cast(library.GetFactoryCount(&num_types)); -} -} // namespace - -class LoadCustomizableTest : public testing::Test { - public: - LoadCustomizableTest() { - config_options_.ignore_unsupported_options = false; - config_options_.invoke_prepare_options = false; - } - bool RegisterTests(const std::string& arg) { - config_options_.registry->AddLibrary("custom-tests", - test::RegisterTestObjects, arg); - config_options_.registry->AddLibrary("local-tests", RegisterLocalObjects, - arg); - return true; - } - - template - Status TestCreateStatic(const std::string& name, U** result, - bool delete_result = false) { - Status s = T::CreateFromString(config_options_, name, result); - if (s.ok()) { - EXPECT_NE(*result, nullptr); - EXPECT_TRUE(*result != nullptr && (*result)->IsInstanceOf(name)); - } - if (delete_result) { - delete *result; - *result = nullptr; - } - return s; - } - - template - std::shared_ptr ExpectCreateShared(const std::string& name, - std::shared_ptr* object) { - EXPECT_OK(T::CreateFromString(config_options_, name, object)); - EXPECT_NE(object->get(), nullptr); - EXPECT_TRUE(object->get()->IsInstanceOf(name)); - return *object; - } - - template - std::shared_ptr ExpectCreateShared(const std::string& name) { - std::shared_ptr result; - return ExpectCreateShared(name, &result); - } - - template - Status TestExpectedBuiltins( - const std::string& mock, const std::unordered_set& expected, - std::shared_ptr* object, std::vector* failed, - const std::function(const std::string&)>& alt = - nullptr) { - std::unordered_set factories = expected; - Status s = T::CreateFromString(config_options_, mock, object); - EXPECT_NOK(s); - std::vector builtins; - ObjectLibrary::Default()->GetFactoryNames(T::Type(), &builtins); - factories.insert(builtins.begin(), builtins.end()); - Status result; - int created = 0; - for (const auto& name : factories) { - created++; - s = T::CreateFromString(config_options_, name, object); - if (!s.ok() && alt != nullptr) { - for (const auto& alt_name : alt(name)) { - s = T::CreateFromString(config_options_, alt_name, object); - if (s.ok()) { - break; - } - } - } - if (!s.ok()) { - result = s; - failed->push_back(name); - } else { - EXPECT_NE(object->get(), nullptr); - EXPECT_TRUE(object->get()->IsInstanceOf(name)); - } - } - std::vector plugins; - ObjectRegistry::Default()->GetFactoryNames(T::Type(), &plugins); - if (plugins.size() > builtins.size()) { - for (const auto& name : plugins) { - if (factories.find(name) == factories.end()) { - created++; - s = T::CreateFromString(config_options_, name, object); - if (!s.ok() && alt != nullptr) { - for (const auto& alt_name : alt(name)) { - s = T::CreateFromString(config_options_, alt_name, object); - if (s.ok()) { - break; - } - } - } - if (!s.ok()) { - failed->push_back(name); - if (result.ok()) { - result = s; - } - printf("%s: Failed creating plugin[%s]: %s\n", T::Type(), - name.c_str(), s.ToString().c_str()); - } else if (object->get() == nullptr || - !object->get()->IsInstanceOf(name)) { - failed->push_back(name); - printf("%s: Invalid plugin[%s]\n", T::Type(), name.c_str()); - } - } - } - } - printf("%s: Created %d (expected+builtins+plugins %d+%d+%d) %d Failed\n", - T::Type(), created, (int)expected.size(), - (int)(factories.size() - expected.size()), - (int)(plugins.size() - builtins.size()), (int)failed->size()); - return result; - } - - template - Status TestSharedBuiltins(const std::string& mock, - const std::string& expected, - std::vector* failed = nullptr) { - std::unordered_set values; - if (!expected.empty()) { - values.insert(expected); - } - std::shared_ptr object; - if (failed != nullptr) { - return TestExpectedBuiltins(mock, values, &object, failed); - } else { - std::vector failures; - Status s = TestExpectedBuiltins(mock, values, &object, &failures); - EXPECT_EQ(0U, failures.size()); - return s; - } - } - - template - Status TestStaticBuiltins(const std::string& mock, U** object, - const std::unordered_set& expected, - std::vector* failed, - bool delete_objects = false) { - std::unordered_set factories = expected; - Status s = TestCreateStatic(mock, object, delete_objects); - EXPECT_NOK(s); - std::vector builtins; - ObjectLibrary::Default()->GetFactoryNames(T::Type(), &builtins); - factories.insert(builtins.begin(), builtins.end()); - int created = 0; - Status result; - for (const auto& name : factories) { - created++; - s = TestCreateStatic(name, object, delete_objects); - if (!s.ok()) { - result = s; - failed->push_back(name); - } - } - std::vector plugins; - ObjectRegistry::Default()->GetFactoryNames(T::Type(), &plugins); - if (plugins.size() > builtins.size()) { - for (const auto& name : plugins) { - if (factories.find(name) == factories.end()) { - created++; - s = T::CreateFromString(config_options_, name, object); - if (!s.ok() || *object == nullptr || - !((*object)->IsInstanceOf(name))) { - failed->push_back(name); - if (result.ok() && !s.ok()) { - result = s; - } - printf("%s: Failed creating plugin[%s]: %s\n", T::Type(), - name.c_str(), s.ToString().c_str()); - } - if (delete_objects) { - delete *object; - *object = nullptr; - } - } - } - } - printf("%s: Created %d (expected+builtins+plugins %d+%d+%d) %d Failed\n", - T::Type(), created, (int)expected.size(), - (int)(factories.size() - expected.size()), - (int)(plugins.size() - builtins.size()), (int)failed->size()); - return result; - } - - protected: - DBOptions db_opts_; - ColumnFamilyOptions cf_opts_; - ConfigOptions config_options_; -}; - -TEST_F(LoadCustomizableTest, LoadTableFactoryTest) { - ASSERT_OK( - TestSharedBuiltins(mock::MockTableFactory::kClassName(), - TableFactory::kBlockBasedTableName())); - std::string opts_str = "table_factory="; - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options_, cf_opts_, - opts_str + TableFactory::kBlockBasedTableName(), &cf_opts_)); - ASSERT_NE(cf_opts_.table_factory.get(), nullptr); - ASSERT_STREQ(cf_opts_.table_factory->Name(), - TableFactory::kBlockBasedTableName()); - if (RegisterTests("Test")) { - ExpectCreateShared(mock::MockTableFactory::kClassName()); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options_, cf_opts_, - opts_str + mock::MockTableFactory::kClassName(), &cf_opts_)); - ASSERT_NE(cf_opts_.table_factory.get(), nullptr); - ASSERT_STREQ(cf_opts_.table_factory->Name(), - mock::MockTableFactory::kClassName()); - } -} - -TEST_F(LoadCustomizableTest, LoadFileSystemTest) { - ASSERT_OK(TestSharedBuiltins(DummyFileSystem::kClassName(), - FileSystem::kDefaultName())); - if (RegisterTests("Test")) { - auto fs = ExpectCreateShared(DummyFileSystem::kClassName()); - ASSERT_FALSE(fs->IsInstanceOf(FileSystem::kDefaultName())); - } -} - -TEST_F(LoadCustomizableTest, LoadSecondaryCacheTest) { - ASSERT_OK( - TestSharedBuiltins(TestSecondaryCache::kClassName(), "")); - if (RegisterTests("Test")) { - ExpectCreateShared(TestSecondaryCache::kClassName()); - } -} - -TEST_F(LoadCustomizableTest, LoadSstPartitionerFactoryTest) { - ASSERT_OK(TestSharedBuiltins( - "Mock", SstPartitionerFixedPrefixFactory::kClassName())); - if (RegisterTests("Test")) { - ExpectCreateShared("Mock"); - } -} - -TEST_F(LoadCustomizableTest, LoadChecksumGenFactoryTest) { - ASSERT_OK(TestSharedBuiltins("Mock", "")); - if (RegisterTests("Test")) { - ExpectCreateShared("Mock"); - } -} - -TEST_F(LoadCustomizableTest, LoadTablePropertiesCollectorFactoryTest) { - ASSERT_OK(TestSharedBuiltins( - MockTablePropertiesCollectorFactory::kClassName(), "")); - if (RegisterTests("Test")) { - ExpectCreateShared( - MockTablePropertiesCollectorFactory::kClassName()); - } -} - -TEST_F(LoadCustomizableTest, LoadComparatorTest) { - const Comparator* bytewise = BytewiseComparator(); - const Comparator* reverse = ReverseBytewiseComparator(); - const Comparator* result = nullptr; - std::unordered_set expected = {bytewise->Name(), - reverse->Name()}; - std::vector failures; - ASSERT_OK(TestStaticBuiltins( - test::SimpleSuffixReverseComparator::kClassName(), &result, expected, - &failures)); - if (RegisterTests("Test")) { - ASSERT_OK(TestCreateStatic( - test::SimpleSuffixReverseComparator::kClassName(), &result)); - } -} - -TEST_F(LoadCustomizableTest, LoadSliceTransformFactoryTest) { - std::shared_ptr result; - std::vector failures; - std::unordered_set expected = {"rocksdb.Noop", "fixed", - "rocksdb.FixedPrefix", "capped", - "rocksdb.CappedPrefix"}; - ASSERT_OK(TestExpectedBuiltins( - "Mock", expected, &result, &failures, [](const std::string& name) { - std::vector names = {name + ":22", name + ".22"}; - return names; - })); - ASSERT_OK(SliceTransform::CreateFromString( - config_options_, "rocksdb.FixedPrefix.22", &result)); - ASSERT_NE(result.get(), nullptr); - ASSERT_TRUE(result->IsInstanceOf("fixed")); - ASSERT_OK(SliceTransform::CreateFromString( - config_options_, "rocksdb.CappedPrefix.22", &result)); - ASSERT_NE(result.get(), nullptr); - ASSERT_TRUE(result->IsInstanceOf("capped")); - if (RegisterTests("Test")) { - ExpectCreateShared("Mock", &result); - } -} - -TEST_F(LoadCustomizableTest, LoadStatisticsTest) { - ASSERT_OK(TestSharedBuiltins(TestStatistics::kClassName(), - "BasicStatistics")); - // Empty will create a default BasicStatistics - ASSERT_OK( - Statistics::CreateFromString(config_options_, "", &db_opts_.statistics)); - ASSERT_NE(db_opts_.statistics, nullptr); - ASSERT_STREQ(db_opts_.statistics->Name(), "BasicStatistics"); - - ASSERT_NOK(GetDBOptionsFromString(config_options_, db_opts_, - "statistics=Test", &db_opts_)); - ASSERT_OK(GetDBOptionsFromString(config_options_, db_opts_, - "statistics=BasicStatistics", &db_opts_)); - ASSERT_NE(db_opts_.statistics, nullptr); - ASSERT_STREQ(db_opts_.statistics->Name(), "BasicStatistics"); - - if (RegisterTests("test")) { - auto stats = ExpectCreateShared(TestStatistics::kClassName()); - - ASSERT_OK(GetDBOptionsFromString(config_options_, db_opts_, - "statistics=Test", &db_opts_)); - ASSERT_NE(db_opts_.statistics, nullptr); - ASSERT_STREQ(db_opts_.statistics->Name(), TestStatistics::kClassName()); - - ASSERT_OK(GetDBOptionsFromString( - config_options_, db_opts_, "statistics={id=Test;inner=BasicStatistics}", - &db_opts_)); - ASSERT_NE(db_opts_.statistics, nullptr); - ASSERT_STREQ(db_opts_.statistics->Name(), TestStatistics::kClassName()); - auto* inner = db_opts_.statistics->GetOptions>( - "StatisticsOptions"); - ASSERT_NE(inner, nullptr); - ASSERT_NE(inner->get(), nullptr); - ASSERT_STREQ(inner->get()->Name(), "BasicStatistics"); - - ASSERT_OK(Statistics::CreateFromString( - config_options_, "id=BasicStatistics;inner=Test", &stats)); - ASSERT_NE(stats, nullptr); - ASSERT_STREQ(stats->Name(), "BasicStatistics"); - inner = stats->GetOptions>("StatisticsOptions"); - ASSERT_NE(inner, nullptr); - ASSERT_NE(inner->get(), nullptr); - ASSERT_STREQ(inner->get()->Name(), TestStatistics::kClassName()); - } -} - -TEST_F(LoadCustomizableTest, LoadMemTableRepFactoryTest) { - std::unordered_set expected = { - SkipListFactory::kClassName(), - SkipListFactory::kNickName(), - }; - - std::vector failures; - std::shared_ptr factory; - Status s = TestExpectedBuiltins( - "SpecialSkipListFactory", expected, &factory, &failures); - // There is a "cuckoo" factory registered that we expect to fail. Ignore the - // error if this is the one - if (s.ok() || failures.size() > 1 || failures[0] != "cuckoo") { - ASSERT_OK(s); - } - if (RegisterTests("Test")) { - ExpectCreateShared("SpecialSkipListFactory"); - } -} - -TEST_F(LoadCustomizableTest, LoadMergeOperatorTest) { - std::shared_ptr result; - std::vector failed; - std::unordered_set expected = { - "put", "put_v1", "PutOperator", "uint64add", "UInt64AddOperator", - "max", "MaxOperator", - }; - expected.insert({ - StringAppendOperator::kClassName(), - StringAppendOperator::kNickName(), - StringAppendTESTOperator::kClassName(), - StringAppendTESTOperator::kNickName(), - SortList::kClassName(), - SortList::kNickName(), - BytesXOROperator::kClassName(), - BytesXOROperator::kNickName(), - }); - - ASSERT_OK(TestExpectedBuiltins("Changling", expected, &result, - &failed)); - if (RegisterTests("Test")) { - ExpectCreateShared("Changling"); - } -} - -TEST_F(LoadCustomizableTest, LoadCompactionFilterFactoryTest) { - ASSERT_OK(TestSharedBuiltins("Changling", "")); - if (RegisterTests("Test")) { - ExpectCreateShared("Changling"); - } -} - -TEST_F(LoadCustomizableTest, LoadCompactionFilterTest) { - const CompactionFilter* result = nullptr; - std::vector failures; - ASSERT_OK(TestStaticBuiltins("Changling", &result, {}, - &failures, true)); - if (RegisterTests("Test")) { - ASSERT_OK(TestCreateStatic("Changling", &result, true)); - } -} - -TEST_F(LoadCustomizableTest, LoadEventListenerTest) { - ASSERT_OK(TestSharedBuiltins( - OnFileDeletionListener::kClassName(), "")); - if (RegisterTests("Test")) { - ExpectCreateShared(OnFileDeletionListener::kClassName()); - ExpectCreateShared(FlushCounterListener::kClassName()); - } -} - -TEST_F(LoadCustomizableTest, LoadEncryptionProviderTest) { - std::vector failures; - std::shared_ptr result; - ASSERT_OK( - TestExpectedBuiltins("Mock", {}, &result, &failures)); - if (!failures.empty()) { - ASSERT_EQ(failures[0], "1://test"); - ASSERT_EQ(failures.size(), 1U); - } - - result = ExpectCreateShared("CTR"); - ASSERT_NOK(result->ValidateOptions(db_opts_, cf_opts_)); - ASSERT_OK(EncryptionProvider::CreateFromString(config_options_, "CTR://test", - &result)); - ASSERT_NE(result, nullptr); - ASSERT_STREQ(result->Name(), "CTR"); - ASSERT_OK(result->ValidateOptions(db_opts_, cf_opts_)); - - if (RegisterTests("Test")) { - ExpectCreateShared("Mock"); - ASSERT_OK(EncryptionProvider::CreateFromString(config_options_, - "Mock://test", &result)); - ASSERT_NE(result, nullptr); - ASSERT_STREQ(result->Name(), "Mock"); - ASSERT_OK(result->ValidateOptions(db_opts_, cf_opts_)); - } -} - -TEST_F(LoadCustomizableTest, LoadEncryptionCipherTest) { - ASSERT_OK(TestSharedBuiltins("Mock", "ROT13")); - if (RegisterTests("Test")) { - ExpectCreateShared("Mock"); - } -} - -TEST_F(LoadCustomizableTest, LoadSystemClockTest) { - ASSERT_OK(TestSharedBuiltins(MockSystemClock::kClassName(), - SystemClock::kDefaultName())); - if (RegisterTests("Test")) { - auto result = - ExpectCreateShared(MockSystemClock::kClassName()); - ASSERT_FALSE(result->IsInstanceOf(SystemClock::kDefaultName())); - } -} - -TEST_F(LoadCustomizableTest, LoadMemoryAllocatorTest) { - std::vector failures; - Status s = TestSharedBuiltins( - MockMemoryAllocator::kClassName(), DefaultMemoryAllocator::kClassName(), - &failures); - if (failures.empty()) { - ASSERT_OK(s); - } else { - ASSERT_NOK(s); - for (const auto& failure : failures) { - if (failure == JemallocNodumpAllocator::kClassName()) { - ASSERT_FALSE(JemallocNodumpAllocator::IsSupported()); - } else if (failure == MemkindKmemAllocator::kClassName()) { - ASSERT_FALSE(MemkindKmemAllocator::IsSupported()); - } else { - printf("BYPASSED: %s -- %s\n", failure.c_str(), s.ToString().c_str()); - } - } - } - if (RegisterTests("Test")) { - ExpectCreateShared(MockMemoryAllocator::kClassName()); - } -} - -TEST_F(LoadCustomizableTest, LoadFilterPolicyTest) { - const std::string kAutoBloom = BloomFilterPolicy::kClassName(); - const std::string kAutoRibbon = RibbonFilterPolicy::kClassName(); - - std::shared_ptr result; - std::vector failures; - std::unordered_set expected = { - ReadOnlyBuiltinFilterPolicy::kClassName(), - }; - - expected.insert({ - kAutoBloom, - BloomFilterPolicy::kNickName(), - kAutoRibbon, - RibbonFilterPolicy::kNickName(), - }); - ASSERT_OK(TestExpectedBuiltins( - "Mock", expected, &result, &failures, [](const std::string& name) { - std::vector names = {name + ":1.234"}; - return names; - })); - ASSERT_OK(FilterPolicy::CreateFromString( - config_options_, kAutoBloom + ":1.234:false", &result)); - ASSERT_NE(result.get(), nullptr); - ASSERT_TRUE(result->IsInstanceOf(kAutoBloom)); - ASSERT_OK(FilterPolicy::CreateFromString( - config_options_, kAutoBloom + ":1.234:false", &result)); - ASSERT_NE(result.get(), nullptr); - ASSERT_TRUE(result->IsInstanceOf(kAutoBloom)); - ASSERT_OK(FilterPolicy::CreateFromString(config_options_, - kAutoRibbon + ":1.234:-1", &result)); - ASSERT_NE(result.get(), nullptr); - ASSERT_TRUE(result->IsInstanceOf(kAutoRibbon)); - ASSERT_OK(FilterPolicy::CreateFromString(config_options_, - kAutoRibbon + ":1.234:56", &result)); - ASSERT_NE(result.get(), nullptr); - ASSERT_TRUE(result->IsInstanceOf(kAutoRibbon)); - - if (RegisterTests("Test")) { - ExpectCreateShared(MockFilterPolicy::kClassName(), &result); - } - - std::shared_ptr table; - - std::string table_opts = "id=BlockBasedTable; filter_policy="; - ASSERT_OK(TableFactory::CreateFromString(config_options_, - table_opts + "nullptr", &table)); - ASSERT_NE(table.get(), nullptr); - auto bbto = table->GetOptions(); - ASSERT_NE(bbto, nullptr); - ASSERT_EQ(bbto->filter_policy.get(), nullptr); - ASSERT_OK(TableFactory::CreateFromString( - config_options_, table_opts + ReadOnlyBuiltinFilterPolicy::kClassName(), - &table)); - bbto = table->GetOptions(); - ASSERT_NE(bbto, nullptr); - ASSERT_NE(bbto->filter_policy.get(), nullptr); - ASSERT_STREQ(bbto->filter_policy->Name(), - ReadOnlyBuiltinFilterPolicy::kClassName()); - ASSERT_OK(TableFactory::CreateFromString( - config_options_, table_opts + MockFilterPolicy::kClassName(), &table)); - bbto = table->GetOptions(); - ASSERT_NE(bbto, nullptr); - ASSERT_NE(bbto->filter_policy.get(), nullptr); - ASSERT_TRUE( - bbto->filter_policy->IsInstanceOf(MockFilterPolicy::kClassName())); -} - -TEST_F(LoadCustomizableTest, LoadFlushBlockPolicyFactoryTest) { - std::shared_ptr result; - std::shared_ptr table; - std::vector failed; - std::unordered_set expected = { - FlushBlockBySizePolicyFactory::kClassName(), - FlushBlockEveryKeyPolicyFactory::kClassName(), - }; - - ASSERT_OK(TestExpectedBuiltins( - TestFlushBlockPolicyFactory::kClassName(), expected, &result, &failed)); - - // An empty policy name creates a BySize policy - ASSERT_OK( - FlushBlockPolicyFactory::CreateFromString(config_options_, "", &result)); - ASSERT_NE(result, nullptr); - ASSERT_STREQ(result->Name(), FlushBlockBySizePolicyFactory::kClassName()); - - std::string table_opts = "id=BlockBasedTable; flush_block_policy_factory="; - ASSERT_OK(TableFactory::CreateFromString( - config_options_, - table_opts + FlushBlockEveryKeyPolicyFactory::kClassName(), &table)); - auto bbto = table->GetOptions(); - ASSERT_NE(bbto, nullptr); - ASSERT_NE(bbto->flush_block_policy_factory.get(), nullptr); - ASSERT_STREQ(bbto->flush_block_policy_factory->Name(), - FlushBlockEveryKeyPolicyFactory::kClassName()); - if (RegisterTests("Test")) { - ExpectCreateShared( - TestFlushBlockPolicyFactory::kClassName()); - ASSERT_OK(TableFactory::CreateFromString( - config_options_, table_opts + TestFlushBlockPolicyFactory::kClassName(), - &table)); - bbto = table->GetOptions(); - ASSERT_NE(bbto, nullptr); - ASSERT_NE(bbto->flush_block_policy_factory.get(), nullptr); - ASSERT_STREQ(bbto->flush_block_policy_factory->Name(), - TestFlushBlockPolicyFactory::kClassName()); - } -} - -} // namespace ROCKSDB_NAMESPACE -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); -#ifdef GFLAGS - ParseCommandLineFlags(&argc, &argv, true); -#endif // GFLAGS - return RUN_ALL_TESTS(); -} diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc deleted file mode 100644 index 020debf01..000000000 --- a/options/options_settable_test.cc +++ /dev/null @@ -1,627 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include - -#include "options/cf_options.h" -#include "options/db_options.h" -#include "options/options_helper.h" -#include "rocksdb/convenience.h" -#include "test_util/testharness.h" - -#ifndef GFLAGS -bool FLAGS_enable_print = false; -#else -#include "util/gflags_compat.h" -using GFLAGS_NAMESPACE::ParseCommandLineFlags; -DEFINE_bool(enable_print, false, "Print options generated to console."); -#endif // GFLAGS - -namespace ROCKSDB_NAMESPACE { - -// Verify options are settable from options strings. -// We take the approach that depends on compiler behavior that copy constructor -// won't touch implicit padding bytes, so that the test is fragile. -// As a result, we only run the tests to verify new fields in options are -// settable through string on limited platforms as it depends on behavior of -// compilers. -#if defined OS_LINUX || defined OS_WIN -#ifndef __clang__ -#ifndef ROCKSDB_UBSAN_RUN - -class OptionsSettableTest : public testing::Test { - public: - OptionsSettableTest() {} -}; - -const char kSpecialChar = 'z'; -using OffsetGap = std::vector>; - -void FillWithSpecialChar(char* start_ptr, size_t total_size, - const OffsetGap& excluded, - char special_char = kSpecialChar) { - size_t offset = 0; - // The excluded vector contains pairs of bytes, (first, second). - // The first bytes are all set to the special char (represented as 'c' below). - // The second bytes are simply skipped (padding bytes). - // ccccc[skipped]cccccccc[skiped]cccccccc[skipped] - for (auto& pair : excluded) { - std::memset(start_ptr + offset, special_char, pair.first - offset); - offset = pair.first + pair.second; - } - // The rest of the structure is filled with the special characters. - // ccccc[skipped]cccccccc[skiped]cccccccc[skipped]cccccccccccccccc - std::memset(start_ptr + offset, special_char, total_size - offset); -} - -int NumUnsetBytes(char* start_ptr, size_t total_size, - const OffsetGap& excluded) { - int total_unset_bytes_base = 0; - size_t offset = 0; - for (auto& pair : excluded) { - // The first part of the structure contains memory spaces that can be - // set (pair.first), and memory spaces that cannot be set (pair.second). - // Therefore total_unset_bytes_base only agregates bytes set to kSpecialChar - // in the pair.first bytes, but skips the pair.second bytes (padding bytes). - for (char* ptr = start_ptr + offset; ptr < start_ptr + pair.first; ptr++) { - if (*ptr == kSpecialChar) { - total_unset_bytes_base++; - } - } - offset = pair.first + pair.second; - } - // Then total_unset_bytes_base aggregates the bytes - // set to kSpecialChar in the rest of the structure - for (char* ptr = start_ptr + offset; ptr < start_ptr + total_size; ptr++) { - if (*ptr == kSpecialChar) { - total_unset_bytes_base++; - } - } - return total_unset_bytes_base; -} - -// Return true iff two structs are the same except excluded fields. -bool CompareBytes(char* start_ptr1, char* start_ptr2, size_t total_size, - const OffsetGap& excluded) { - size_t offset = 0; - for (auto& pair : excluded) { - for (; offset < pair.first; offset++) { - if (*(start_ptr1 + offset) != *(start_ptr2 + offset)) { - return false; - } - } - offset = pair.first + pair.second; - } - for (; offset < total_size; offset++) { - if (*(start_ptr1 + offset) != *(start_ptr2 + offset)) { - return false; - } - } - return true; -} - -// If the test fails, likely a new option is added to BlockBasedTableOptions -// but it cannot be set through GetBlockBasedTableOptionsFromString(), or the -// test is not updated accordingly. -// After adding an option, we need to make sure it is settable by -// GetBlockBasedTableOptionsFromString() and add the option to the input string -// passed to the GetBlockBasedTableOptionsFromString() in this test. -// If it is a complicated type, you also need to add the field to -// kBbtoExcluded, and maybe add customized verification for it. -TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) { - // Items in the form of . Need to be in ascending order - // and not overlapping. Need to update if new option to be excluded is added - // (e.g, pointer-type) - const OffsetGap kBbtoExcluded = { - {offsetof(struct BlockBasedTableOptions, flush_block_policy_factory), - sizeof(std::shared_ptr)}, - {offsetof(struct BlockBasedTableOptions, block_cache), - sizeof(std::shared_ptr)}, - {offsetof(struct BlockBasedTableOptions, persistent_cache), - sizeof(std::shared_ptr)}, - {offsetof(struct BlockBasedTableOptions, cache_usage_options), - sizeof(CacheUsageOptions)}, - {offsetof(struct BlockBasedTableOptions, filter_policy), - sizeof(std::shared_ptr)}, - }; - - // In this test, we catch a new option of BlockBasedTableOptions that is not - // settable through GetBlockBasedTableOptionsFromString(). - // We count padding bytes of the option struct, and assert it to be the same - // as unset bytes of an option struct initialized by - // GetBlockBasedTableOptionsFromString(). - - char* bbto_ptr = new char[sizeof(BlockBasedTableOptions)]; - - // Count padding bytes by setting all bytes in the memory to a special char, - // copy a well constructed struct to this memory and see how many special - // bytes left. - BlockBasedTableOptions* bbto = new (bbto_ptr) BlockBasedTableOptions(); - FillWithSpecialChar(bbto_ptr, sizeof(BlockBasedTableOptions), kBbtoExcluded); - // It based on the behavior of compiler that padding bytes are not changed - // when copying the struct. It's prone to failure when compiler behavior - // changes. We verify there is unset bytes to detect the case. - *bbto = BlockBasedTableOptions(); - int unset_bytes_base = - NumUnsetBytes(bbto_ptr, sizeof(BlockBasedTableOptions), kBbtoExcluded); - ASSERT_GT(unset_bytes_base, 0); - bbto->~BlockBasedTableOptions(); - - // Construct the base option passed into - // GetBlockBasedTableOptionsFromString(). - bbto = new (bbto_ptr) BlockBasedTableOptions(); - FillWithSpecialChar(bbto_ptr, sizeof(BlockBasedTableOptions), kBbtoExcluded); - // This option is not setable: - bbto->use_delta_encoding = true; - - char* new_bbto_ptr = new char[sizeof(BlockBasedTableOptions)]; - BlockBasedTableOptions* new_bbto = - new (new_bbto_ptr) BlockBasedTableOptions(); - FillWithSpecialChar(new_bbto_ptr, sizeof(BlockBasedTableOptions), - kBbtoExcluded); - - // Need to update the option string if a new option is added. - ConfigOptions config_options; - config_options.input_strings_escaped = false; - config_options.ignore_unknown_options = false; - config_options.invoke_prepare_options = false; - config_options.ignore_unsupported_options = false; - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, *bbto, - "cache_index_and_filter_blocks=1;" - "cache_index_and_filter_blocks_with_high_priority=true;" - "metadata_cache_options={top_level_index_pinning=kFallback;" - "partition_pinning=kAll;" - "unpartitioned_pinning=kFlushedAndSimilar;};" - "pin_l0_filter_and_index_blocks_in_cache=1;" - "pin_top_level_index_and_filter=1;" - "index_type=kHashSearch;" - "data_block_index_type=kDataBlockBinaryAndHash;" - "index_shortening=kNoShortening;" - "data_block_hash_table_util_ratio=0.75;" - "checksum=kxxHash;no_block_cache=1;" - "block_cache=1M;block_cache_compressed=1k;block_size=1024;" - "block_size_deviation=8;block_restart_interval=4; " - "metadata_block_size=1024;" - "partition_filters=false;" - "optimize_filters_for_memory=true;" - "index_block_restart_interval=4;" - "filter_policy=bloomfilter:4:true;whole_key_filtering=1;detect_filter_" - "construct_corruption=false;" - "format_version=1;" - "verify_compression=true;read_amp_bytes_per_bit=0;" - "enable_index_compression=false;" - "block_align=true;" - "max_auto_readahead_size=0;" - "prepopulate_block_cache=kDisable;" - "initial_auto_readahead_size=0;" - "num_file_reads_for_auto_readahead=0", - new_bbto)); - - ASSERT_EQ(unset_bytes_base, - NumUnsetBytes(new_bbto_ptr, sizeof(BlockBasedTableOptions), - kBbtoExcluded)); - - ASSERT_TRUE(new_bbto->block_cache.get() != nullptr); - ASSERT_TRUE(new_bbto->filter_policy.get() != nullptr); - - bbto->~BlockBasedTableOptions(); - new_bbto->~BlockBasedTableOptions(); - - delete[] bbto_ptr; - delete[] new_bbto_ptr; -} - -// If the test fails, likely a new option is added to DBOptions -// but it cannot be set through GetDBOptionsFromString(), or the test is not -// updated accordingly. -// After adding an option, we need to make sure it is settable by -// GetDBOptionsFromString() and add the option to the input string passed to -// DBOptionsFromString()in this test. -// If it is a complicated type, you also need to add the field to -// kDBOptionsExcluded, and maybe add customized verification for it. -TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { - const OffsetGap kDBOptionsExcluded = { - {offsetof(struct DBOptions, env), sizeof(Env*)}, - {offsetof(struct DBOptions, rate_limiter), - sizeof(std::shared_ptr)}, - {offsetof(struct DBOptions, sst_file_manager), - sizeof(std::shared_ptr)}, - {offsetof(struct DBOptions, info_log), sizeof(std::shared_ptr)}, - {offsetof(struct DBOptions, statistics), - sizeof(std::shared_ptr)}, - {offsetof(struct DBOptions, db_paths), sizeof(std::vector)}, - {offsetof(struct DBOptions, db_log_dir), sizeof(std::string)}, - {offsetof(struct DBOptions, wal_dir), sizeof(std::string)}, - {offsetof(struct DBOptions, write_buffer_manager), - sizeof(std::shared_ptr)}, - {offsetof(struct DBOptions, listeners), - sizeof(std::vector>)}, - {offsetof(struct DBOptions, row_cache), sizeof(std::shared_ptr)}, - {offsetof(struct DBOptions, wal_filter), sizeof(const WalFilter*)}, - {offsetof(struct DBOptions, file_checksum_gen_factory), - sizeof(std::shared_ptr)}, - {offsetof(struct DBOptions, db_host_id), sizeof(std::string)}, - {offsetof(struct DBOptions, checksum_handoff_file_types), - sizeof(FileTypeSet)}, - {offsetof(struct DBOptions, compaction_service), - sizeof(std::shared_ptr)}, - }; - - char* options_ptr = new char[sizeof(DBOptions)]; - - // Count padding bytes by setting all bytes in the memory to a special char, - // copy a well constructed struct to this memory and see how many special - // bytes left. - DBOptions* options = new (options_ptr) DBOptions(); - FillWithSpecialChar(options_ptr, sizeof(DBOptions), kDBOptionsExcluded); - // It based on the behavior of compiler that padding bytes are not changed - // when copying the struct. It's prone to failure when compiler behavior - // changes. We verify there is unset bytes to detect the case. - *options = DBOptions(); - int unset_bytes_base = - NumUnsetBytes(options_ptr, sizeof(DBOptions), kDBOptionsExcluded); - ASSERT_GT(unset_bytes_base, 0); - options->~DBOptions(); - - options = new (options_ptr) DBOptions(); - FillWithSpecialChar(options_ptr, sizeof(DBOptions), kDBOptionsExcluded); - - char* new_options_ptr = new char[sizeof(DBOptions)]; - DBOptions* new_options = new (new_options_ptr) DBOptions(); - FillWithSpecialChar(new_options_ptr, sizeof(DBOptions), kDBOptionsExcluded); - - // Need to update the option string if a new option is added. - ConfigOptions config_options(*options); - config_options.input_strings_escaped = false; - config_options.ignore_unknown_options = false; - ASSERT_OK( - GetDBOptionsFromString(config_options, *options, - "wal_bytes_per_sync=4295048118;" - "delete_obsolete_files_period_micros=4294967758;" - "WAL_ttl_seconds=4295008036;" - "WAL_size_limit_MB=4295036161;" - "max_write_batch_group_size_bytes=1048576;" - "wal_dir=path/to/wal_dir;" - "db_write_buffer_size=2587;" - "max_subcompactions=64330;" - "table_cache_numshardbits=28;" - "max_open_files=72;" - "max_file_opening_threads=35;" - "max_background_jobs=8;" - "max_background_compactions=33;" - "use_fsync=true;" - "use_adaptive_mutex=false;" - "max_total_wal_size=4295005604;" - "compaction_readahead_size=0;" - "keep_log_file_num=4890;" - "skip_stats_update_on_db_open=false;" - "skip_checking_sst_file_sizes_on_db_open=false;" - "max_manifest_file_size=4295009941;" - "db_log_dir=path/to/db_log_dir;" - "writable_file_max_buffer_size=1048576;" - "paranoid_checks=true;" - "flush_verify_memtable_count=true;" - "track_and_verify_wals_in_manifest=true;" - "verify_sst_unique_id_in_manifest=true;" - "is_fd_close_on_exec=false;" - "bytes_per_sync=4295013613;" - "strict_bytes_per_sync=true;" - "enable_thread_tracking=false;" - "recycle_log_file_num=0;" - "create_missing_column_families=true;" - "log_file_time_to_roll=3097;" - "max_background_flushes=35;" - "create_if_missing=false;" - "error_if_exists=true;" - "delayed_write_rate=4294976214;" - "manifest_preallocation_size=1222;" - "allow_mmap_writes=false;" - "stats_dump_period_sec=70127;" - "stats_persist_period_sec=54321;" - "persist_stats_to_disk=true;" - "stats_history_buffer_size=14159;" - "allow_fallocate=true;" - "allow_mmap_reads=false;" - "use_direct_reads=false;" - "use_direct_io_for_flush_and_compaction=false;" - "max_log_file_size=4607;" - "random_access_max_buffer_size=1048576;" - "advise_random_on_open=true;" - "fail_if_options_file_error=false;" - "enable_pipelined_write=false;" - "unordered_write=false;" - "allow_concurrent_memtable_write=true;" - "wal_recovery_mode=kPointInTimeRecovery;" - "enable_write_thread_adaptive_yield=true;" - "write_thread_slow_yield_usec=5;" - "write_thread_max_yield_usec=1000;" - "access_hint_on_compaction_start=NONE;" - "info_log_level=DEBUG_LEVEL;" - "dump_malloc_stats=false;" - "allow_2pc=false;" - "avoid_flush_during_recovery=false;" - "avoid_flush_during_shutdown=false;" - "allow_ingest_behind=false;" - "concurrent_prepare=false;" - "two_write_queues=false;" - "manual_wal_flush=false;" - "wal_compression=kZSTD;" - "seq_per_batch=false;" - "atomic_flush=false;" - "avoid_unnecessary_blocking_io=false;" - "log_readahead_size=0;" - "write_dbid_to_manifest=false;" - "best_efforts_recovery=false;" - "max_bgerror_resume_count=2;" - "bgerror_resume_retry_interval=1000000;" - "db_host_id=hostname;" - "lowest_used_cache_tier=kNonVolatileBlockTier;" - "allow_data_in_errors=false;" - "enforce_single_del_contracts=false;", - new_options)); - - ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions), - kDBOptionsExcluded)); - - options->~DBOptions(); - new_options->~DBOptions(); - - delete[] options_ptr; - delete[] new_options_ptr; -} - -// If the test fails, likely a new option is added to ColumnFamilyOptions -// but it cannot be set through GetColumnFamilyOptionsFromString(), or the -// test is not updated accordingly. -// After adding an option, we need to make sure it is settable by -// GetColumnFamilyOptionsFromString() and add the option to the input -// string passed to GetColumnFamilyOptionsFromString() in this test. -// If it is a complicated type, you also need to add the field to -// kColumnFamilyOptionsExcluded, and maybe add customized verification -// for it. -TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { - // options in the excluded set need to appear in the same order as in - // ColumnFamilyOptions. - const OffsetGap kColumnFamilyOptionsExcluded = { - {offsetof(struct ColumnFamilyOptions, inplace_callback), - sizeof(UpdateStatus(*)(char*, uint32_t*, Slice, std::string*))}, - {offsetof(struct ColumnFamilyOptions, - memtable_insert_with_hint_prefix_extractor), - sizeof(std::shared_ptr)}, - {offsetof(struct ColumnFamilyOptions, compression_per_level), - sizeof(std::vector)}, - {offsetof(struct ColumnFamilyOptions, - max_bytes_for_level_multiplier_additional), - sizeof(std::vector)}, - {offsetof(struct ColumnFamilyOptions, memtable_factory), - sizeof(std::shared_ptr)}, - {offsetof(struct ColumnFamilyOptions, - table_properties_collector_factories), - sizeof(ColumnFamilyOptions::TablePropertiesCollectorFactories)}, - {offsetof(struct ColumnFamilyOptions, preclude_last_level_data_seconds), - sizeof(uint64_t)}, - {offsetof(struct ColumnFamilyOptions, preserve_internal_time_seconds), - sizeof(uint64_t)}, - {offsetof(struct ColumnFamilyOptions, blob_cache), - sizeof(std::shared_ptr)}, - {offsetof(struct ColumnFamilyOptions, comparator), sizeof(Comparator*)}, - {offsetof(struct ColumnFamilyOptions, merge_operator), - sizeof(std::shared_ptr)}, - {offsetof(struct ColumnFamilyOptions, compaction_filter), - sizeof(const CompactionFilter*)}, - {offsetof(struct ColumnFamilyOptions, compaction_filter_factory), - sizeof(std::shared_ptr)}, - {offsetof(struct ColumnFamilyOptions, prefix_extractor), - sizeof(std::shared_ptr)}, - {offsetof(struct ColumnFamilyOptions, snap_refresh_nanos), - sizeof(uint64_t)}, - {offsetof(struct ColumnFamilyOptions, table_factory), - sizeof(std::shared_ptr)}, - {offsetof(struct ColumnFamilyOptions, cf_paths), - sizeof(std::vector)}, - {offsetof(struct ColumnFamilyOptions, compaction_thread_limiter), - sizeof(std::shared_ptr)}, - {offsetof(struct ColumnFamilyOptions, sst_partitioner_factory), - sizeof(std::shared_ptr)}, - }; - - char* options_ptr = new char[sizeof(ColumnFamilyOptions)]; - - // Count padding bytes by setting all bytes in the memory to a special char, - // copy a well constructed struct to this memory and see how many special - // bytes left. - FillWithSpecialChar(options_ptr, sizeof(ColumnFamilyOptions), - kColumnFamilyOptionsExcluded); - - // Invoke a user-defined constructor in the hope that it does not overwrite - // padding bytes. Note that previously we relied on the implicitly-defined - // copy-assignment operator (i.e., `*options = ColumnFamilyOptions();`) here, - // which did in fact modify padding bytes. - ColumnFamilyOptions* options = new (options_ptr) ColumnFamilyOptions(); - - int unset_bytes_base = NumUnsetBytes(options_ptr, sizeof(ColumnFamilyOptions), - kColumnFamilyOptionsExcluded); - ASSERT_GT(unset_bytes_base, 0); - options->~ColumnFamilyOptions(); - - options = new (options_ptr) ColumnFamilyOptions(); - FillWithSpecialChar(options_ptr, sizeof(ColumnFamilyOptions), - kColumnFamilyOptionsExcluded); - - // Following options are not settable through - // GetColumnFamilyOptionsFromString(): - options->compaction_options_universal = CompactionOptionsUniversal(); - options->num_levels = 42; // Initialize options for MutableCF - options->compaction_filter = nullptr; - options->sst_partitioner_factory = nullptr; - - char* new_options_ptr = new char[sizeof(ColumnFamilyOptions)]; - ColumnFamilyOptions* new_options = - new (new_options_ptr) ColumnFamilyOptions(); - FillWithSpecialChar(new_options_ptr, sizeof(ColumnFamilyOptions), - kColumnFamilyOptionsExcluded); - - // Need to update the option string if a new option is added. - ConfigOptions config_options; - config_options.input_strings_escaped = false; - config_options.ignore_unknown_options = false; - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, *options, - "compaction_filter_factory=mpudlojcujCompactionFilterFactory;" - "table_factory=PlainTable;" - "prefix_extractor=rocksdb.CappedPrefix.13;" - "comparator=leveldb.BytewiseComparator;" - "compression_per_level=kBZip2Compression:kBZip2Compression:" - "kBZip2Compression:kNoCompression:kZlibCompression:kBZip2Compression:" - "kSnappyCompression;" - "max_bytes_for_level_base=986;" - "bloom_locality=8016;" - "target_file_size_base=4294976376;" - "memtable_huge_page_size=2557;" - "max_successive_merges=5497;" - "max_sequential_skip_in_iterations=4294971408;" - "arena_block_size=1893;" - "target_file_size_multiplier=35;" - "min_write_buffer_number_to_merge=9;" - "max_write_buffer_number=84;" - "write_buffer_size=1653;" - "max_compaction_bytes=64;" - "ignore_max_compaction_bytes_for_input=true;" - "max_bytes_for_level_multiplier=60;" - "memtable_factory=SkipListFactory;" - "compression=kNoCompression;" - "compression_opts=5:6:7:8:9:10:true:11:false;" - "bottommost_compression_opts=4:5:6:7:8:9:true:10:true;" - "bottommost_compression=kDisableCompressionOption;" - "level0_stop_writes_trigger=33;" - "num_levels=99;" - "level0_slowdown_writes_trigger=22;" - "level0_file_num_compaction_trigger=14;" - "compaction_filter=urxcqstuwnCompactionFilter;" - "soft_pending_compaction_bytes_limit=0;" - "max_write_buffer_number_to_maintain=84;" - "max_write_buffer_size_to_maintain=2147483648;" - "merge_operator=aabcxehazrMergeOperator;" - "memtable_prefix_bloom_size_ratio=0.4642;" - "memtable_whole_key_filtering=true;" - "memtable_insert_with_hint_prefix_extractor=rocksdb.CappedPrefix.13;" - "check_flush_compaction_key_order=false;" - "paranoid_file_checks=true;" - "force_consistency_checks=true;" - "inplace_update_num_locks=7429;" - "experimental_mempurge_threshold=0.0001;" - "optimize_filters_for_hits=false;" - "level_compaction_dynamic_level_bytes=false;" - "level_compaction_dynamic_file_size=true;" - "inplace_update_support=false;" - "compaction_style=kCompactionStyleFIFO;" - "compaction_pri=kMinOverlappingRatio;" - "hard_pending_compaction_bytes_limit=0;" - "disable_auto_compactions=false;" - "report_bg_io_stats=true;" - "ttl=60;" - "periodic_compaction_seconds=3600;" - "sample_for_compression=0;" - "enable_blob_files=true;" - "min_blob_size=256;" - "blob_file_size=1000000;" - "blob_compression_type=kBZip2Compression;" - "enable_blob_garbage_collection=true;" - "blob_garbage_collection_age_cutoff=0.5;" - "blob_garbage_collection_force_threshold=0.75;" - "blob_compaction_readahead_size=262144;" - "blob_file_starting_level=1;" - "prepopulate_blob_cache=kDisable;" - "bottommost_temperature=kWarm;" - "last_level_temperature=kWarm;" - "preclude_last_level_data_seconds=86400;" - "preserve_internal_time_seconds=86400;" - "compaction_options_fifo={max_table_files_size=3;allow_" - "compaction=false;age_for_warm=1;};" - "blob_cache=1M;" - "memtable_protection_bytes_per_key=2;", - new_options)); - - ASSERT_NE(new_options->blob_cache.get(), nullptr); - - ASSERT_EQ(unset_bytes_base, - NumUnsetBytes(new_options_ptr, sizeof(ColumnFamilyOptions), - kColumnFamilyOptionsExcluded)); - - ColumnFamilyOptions rnd_filled_options = *new_options; - - options->~ColumnFamilyOptions(); - new_options->~ColumnFamilyOptions(); - - delete[] options_ptr; - delete[] new_options_ptr; - - // Test copying to mutabable and immutable options and copy back the mutable - // part. - const OffsetGap kMutableCFOptionsExcluded = { - {offsetof(struct MutableCFOptions, prefix_extractor), - sizeof(std::shared_ptr)}, - {offsetof(struct MutableCFOptions, - max_bytes_for_level_multiplier_additional), - sizeof(std::vector)}, - {offsetof(struct MutableCFOptions, compression_per_level), - sizeof(std::vector)}, - {offsetof(struct MutableCFOptions, max_file_size), - sizeof(std::vector)}, - }; - - // For all memory used for options, pre-fill every char. Otherwise, the - // padding bytes might be different so that byte-wise comparison doesn't - // general equal results even if objects are equal. - const char kMySpecialChar = 'x'; - char* mcfo1_ptr = new char[sizeof(MutableCFOptions)]; - FillWithSpecialChar(mcfo1_ptr, sizeof(MutableCFOptions), - kMutableCFOptionsExcluded, kMySpecialChar); - char* mcfo2_ptr = new char[sizeof(MutableCFOptions)]; - FillWithSpecialChar(mcfo2_ptr, sizeof(MutableCFOptions), - kMutableCFOptionsExcluded, kMySpecialChar); - - // A clean column family options is constructed after filling the same special - // char as the initial one. So that the padding bytes are the same. - char* cfo_clean_ptr = new char[sizeof(ColumnFamilyOptions)]; - FillWithSpecialChar(cfo_clean_ptr, sizeof(ColumnFamilyOptions), - kColumnFamilyOptionsExcluded); - rnd_filled_options.num_levels = 66; - ColumnFamilyOptions* cfo_clean = new (cfo_clean_ptr) ColumnFamilyOptions(); - - MutableCFOptions* mcfo1 = - new (mcfo1_ptr) MutableCFOptions(rnd_filled_options); - ColumnFamilyOptions cfo_back = BuildColumnFamilyOptions(*cfo_clean, *mcfo1); - MutableCFOptions* mcfo2 = new (mcfo2_ptr) MutableCFOptions(cfo_back); - - ASSERT_TRUE(CompareBytes(mcfo1_ptr, mcfo2_ptr, sizeof(MutableCFOptions), - kMutableCFOptionsExcluded)); - - cfo_clean->~ColumnFamilyOptions(); - mcfo1->~MutableCFOptions(); - mcfo2->~MutableCFOptions(); - delete[] mcfo1_ptr; - delete[] mcfo2_ptr; - delete[] cfo_clean_ptr; -} -#endif // !ROCKSDB_UBSAN_RUN -#endif // !__clang__ -#endif // OS_LINUX || OS_WIN - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); -#ifdef GFLAGS - ParseCommandLineFlags(&argc, &argv, true); -#endif // GFLAGS - return RUN_ALL_TESTS(); -} diff --git a/options/options_test.cc b/options/options_test.cc deleted file mode 100644 index 481259a9e..000000000 --- a/options/options_test.cc +++ /dev/null @@ -1,4976 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include -#include -#include -#include - -#include "cache/lru_cache.h" -#include "cache/sharded_cache.h" -#include "options/options_helper.h" -#include "options/options_parser.h" -#include "port/port.h" -#include "rocksdb/cache.h" -#include "rocksdb/convenience.h" -#include "rocksdb/file_checksum.h" -#include "rocksdb/memtablerep.h" -#include "rocksdb/utilities/leveldb_options.h" -#include "rocksdb/utilities/object_registry.h" -#include "rocksdb/utilities/options_type.h" -#include "table/block_based/filter_policy_internal.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/random.h" -#include "util/stderr_logger.h" -#include "util/string_util.h" -#include "utilities/merge_operators/bytesxor.h" -#include "utilities/merge_operators/sortlist.h" -#include "utilities/merge_operators/string_append/stringappend.h" -#include "utilities/merge_operators/string_append/stringappend2.h" - -#ifndef GFLAGS -bool FLAGS_enable_print = false; -#else -#include "util/gflags_compat.h" -using GFLAGS_NAMESPACE::ParseCommandLineFlags; -DEFINE_bool(enable_print, false, "Print options generated to console."); -#endif // GFLAGS - -namespace ROCKSDB_NAMESPACE { - -class OptionsTest : public testing::Test {}; - -class UnregisteredTableFactory : public TableFactory { - public: - UnregisteredTableFactory() {} - const char* Name() const override { return "Unregistered"; } - using TableFactory::NewTableReader; - Status NewTableReader(const ReadOptions&, const TableReaderOptions&, - std::unique_ptr&&, uint64_t, - std::unique_ptr*, bool) const override { - return Status::NotSupported(); - } - TableBuilder* NewTableBuilder(const TableBuilderOptions&, - WritableFileWriter*) const override { - return nullptr; - } -}; - -TEST_F(OptionsTest, GetOptionsFromMapTest) { - std::unordered_map cf_options_map = { - {"write_buffer_size", "1"}, - {"max_write_buffer_number", "2"}, - {"min_write_buffer_number_to_merge", "3"}, - {"max_write_buffer_number_to_maintain", "99"}, - {"max_write_buffer_size_to_maintain", "-99999"}, - {"compression", "kSnappyCompression"}, - {"compression_per_level", - "kNoCompression:" - "kSnappyCompression:" - "kZlibCompression:" - "kBZip2Compression:" - "kLZ4Compression:" - "kLZ4HCCompression:" - "kXpressCompression:" - "kZSTD:" - "kZSTDNotFinalCompression"}, - {"bottommost_compression", "kLZ4Compression"}, - {"bottommost_compression_opts", "5:6:7:8:10:true"}, - {"compression_opts", "4:5:6:7:8:2:true:100:false"}, - {"num_levels", "8"}, - {"level0_file_num_compaction_trigger", "8"}, - {"level0_slowdown_writes_trigger", "9"}, - {"level0_stop_writes_trigger", "10"}, - {"target_file_size_base", "12"}, - {"target_file_size_multiplier", "13"}, - {"max_bytes_for_level_base", "14"}, - {"level_compaction_dynamic_level_bytes", "true"}, - {"max_bytes_for_level_multiplier", "15.0"}, - {"max_bytes_for_level_multiplier_additional", "16:17:18"}, - {"max_compaction_bytes", "21"}, - {"hard_pending_compaction_bytes_limit", "211"}, - {"arena_block_size", "22"}, - {"disable_auto_compactions", "true"}, - {"compaction_style", "kCompactionStyleLevel"}, - {"compaction_pri", "kOldestSmallestSeqFirst"}, - {"verify_checksums_in_compaction", "false"}, - {"compaction_options_fifo", "23"}, - {"max_sequential_skip_in_iterations", "24"}, - {"inplace_update_support", "true"}, - {"report_bg_io_stats", "true"}, - {"compaction_measure_io_stats", "false"}, - {"purge_redundant_kvs_while_flush", "false"}, - {"inplace_update_num_locks", "25"}, - {"memtable_prefix_bloom_size_ratio", "0.26"}, - {"memtable_whole_key_filtering", "true"}, - {"memtable_huge_page_size", "28"}, - {"bloom_locality", "29"}, - {"max_successive_merges", "30"}, - {"min_partial_merge_operands", "31"}, - {"prefix_extractor", "fixed:31"}, - {"experimental_mempurge_threshold", "0.003"}, - {"optimize_filters_for_hits", "true"}, - {"enable_blob_files", "true"}, - {"min_blob_size", "1K"}, - {"blob_file_size", "1G"}, - {"blob_compression_type", "kZSTD"}, - {"enable_blob_garbage_collection", "true"}, - {"blob_garbage_collection_age_cutoff", "0.5"}, - {"blob_garbage_collection_force_threshold", "0.75"}, - {"blob_compaction_readahead_size", "256K"}, - {"blob_file_starting_level", "1"}, - {"prepopulate_blob_cache", "kDisable"}, - {"last_level_temperature", "kWarm"}, - }; - - std::unordered_map db_options_map = { - {"create_if_missing", "false"}, - {"create_missing_column_families", "true"}, - {"error_if_exists", "false"}, - {"paranoid_checks", "true"}, - {"track_and_verify_wals_in_manifest", "true"}, - {"verify_sst_unique_id_in_manifest", "true"}, - {"max_open_files", "32"}, - {"max_total_wal_size", "33"}, - {"use_fsync", "true"}, - {"db_log_dir", "/db_log_dir"}, - {"wal_dir", "/wal_dir"}, - {"delete_obsolete_files_period_micros", "34"}, - {"max_background_compactions", "35"}, - {"max_background_flushes", "36"}, - {"max_log_file_size", "37"}, - {"log_file_time_to_roll", "38"}, - {"keep_log_file_num", "39"}, - {"recycle_log_file_num", "5"}, - {"max_manifest_file_size", "40"}, - {"table_cache_numshardbits", "41"}, - {"WAL_ttl_seconds", "43"}, - {"WAL_size_limit_MB", "44"}, - {"manifest_preallocation_size", "45"}, - {"allow_mmap_reads", "true"}, - {"allow_mmap_writes", "false"}, - {"use_direct_reads", "false"}, - {"use_direct_io_for_flush_and_compaction", "false"}, - {"is_fd_close_on_exec", "true"}, - {"skip_log_error_on_recovery", "false"}, - {"stats_dump_period_sec", "46"}, - {"stats_persist_period_sec", "57"}, - {"persist_stats_to_disk", "false"}, - {"stats_history_buffer_size", "69"}, - {"advise_random_on_open", "true"}, - {"use_adaptive_mutex", "false"}, - {"compaction_readahead_size", "100"}, - {"random_access_max_buffer_size", "3145728"}, - {"writable_file_max_buffer_size", "314159"}, - {"bytes_per_sync", "47"}, - {"wal_bytes_per_sync", "48"}, - {"strict_bytes_per_sync", "true"}, - {"preserve_deletes", "false"}, - }; - - ColumnFamilyOptions base_cf_opt; - ColumnFamilyOptions new_cf_opt; - ConfigOptions exact, loose; - exact.input_strings_escaped = false; - exact.ignore_unknown_options = false; - exact.sanity_level = ConfigOptions::kSanityLevelExactMatch; - loose.sanity_level = ConfigOptions::kSanityLevelLooselyCompatible; - - loose.input_strings_escaped = false; - loose.ignore_unknown_options = true; - ASSERT_OK(GetColumnFamilyOptionsFromMap(exact, base_cf_opt, cf_options_map, - &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 1U); - ASSERT_EQ(new_cf_opt.max_write_buffer_number, 2); - ASSERT_EQ(new_cf_opt.min_write_buffer_number_to_merge, 3); - ASSERT_EQ(new_cf_opt.max_write_buffer_number_to_maintain, 99); - ASSERT_EQ(new_cf_opt.max_write_buffer_size_to_maintain, -99999); - ASSERT_EQ(new_cf_opt.compression, kSnappyCompression); - ASSERT_EQ(new_cf_opt.compression_per_level.size(), 9U); - ASSERT_EQ(new_cf_opt.compression_per_level[0], kNoCompression); - ASSERT_EQ(new_cf_opt.compression_per_level[1], kSnappyCompression); - ASSERT_EQ(new_cf_opt.compression_per_level[2], kZlibCompression); - ASSERT_EQ(new_cf_opt.compression_per_level[3], kBZip2Compression); - ASSERT_EQ(new_cf_opt.compression_per_level[4], kLZ4Compression); - ASSERT_EQ(new_cf_opt.compression_per_level[5], kLZ4HCCompression); - ASSERT_EQ(new_cf_opt.compression_per_level[6], kXpressCompression); - ASSERT_EQ(new_cf_opt.compression_per_level[7], kZSTD); - ASSERT_EQ(new_cf_opt.compression_per_level[8], kZSTDNotFinalCompression); - ASSERT_EQ(new_cf_opt.compression_opts.window_bits, 4); - ASSERT_EQ(new_cf_opt.compression_opts.level, 5); - ASSERT_EQ(new_cf_opt.compression_opts.strategy, 6); - ASSERT_EQ(new_cf_opt.compression_opts.max_dict_bytes, 7u); - ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u); - ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 2u); - ASSERT_EQ(new_cf_opt.compression_opts.enabled, true); - ASSERT_EQ(new_cf_opt.compression_opts.max_dict_buffer_bytes, 100u); - ASSERT_EQ(new_cf_opt.compression_opts.use_zstd_dict_trainer, false); - ASSERT_EQ(new_cf_opt.bottommost_compression, kLZ4Compression); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.strategy, 7); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.max_dict_bytes, 8u); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.zstd_max_train_bytes, 10u); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads, - CompressionOptions().parallel_threads); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, true); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, - CompressionOptions().use_zstd_dict_trainer); - ASSERT_EQ(new_cf_opt.num_levels, 8); - ASSERT_EQ(new_cf_opt.level0_file_num_compaction_trigger, 8); - ASSERT_EQ(new_cf_opt.level0_slowdown_writes_trigger, 9); - ASSERT_EQ(new_cf_opt.level0_stop_writes_trigger, 10); - ASSERT_EQ(new_cf_opt.target_file_size_base, static_cast(12)); - ASSERT_EQ(new_cf_opt.target_file_size_multiplier, 13); - ASSERT_EQ(new_cf_opt.max_bytes_for_level_base, 14U); - ASSERT_EQ(new_cf_opt.level_compaction_dynamic_level_bytes, true); - ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier, 15.0); - ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional.size(), 3U); - ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional[0], 16); - ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional[1], 17); - ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional[2], 18); - ASSERT_EQ(new_cf_opt.max_compaction_bytes, 21); - ASSERT_EQ(new_cf_opt.hard_pending_compaction_bytes_limit, 211); - ASSERT_EQ(new_cf_opt.arena_block_size, 22U); - ASSERT_EQ(new_cf_opt.disable_auto_compactions, true); - ASSERT_EQ(new_cf_opt.compaction_style, kCompactionStyleLevel); - ASSERT_EQ(new_cf_opt.compaction_pri, kOldestSmallestSeqFirst); - ASSERT_EQ(new_cf_opt.compaction_options_fifo.max_table_files_size, - static_cast(23)); - ASSERT_EQ(new_cf_opt.max_sequential_skip_in_iterations, - static_cast(24)); - ASSERT_EQ(new_cf_opt.inplace_update_support, true); - ASSERT_EQ(new_cf_opt.inplace_update_num_locks, 25U); - ASSERT_EQ(new_cf_opt.memtable_prefix_bloom_size_ratio, 0.26); - ASSERT_EQ(new_cf_opt.memtable_whole_key_filtering, true); - ASSERT_EQ(new_cf_opt.memtable_huge_page_size, 28U); - ASSERT_EQ(new_cf_opt.bloom_locality, 29U); - ASSERT_EQ(new_cf_opt.max_successive_merges, 30U); - ASSERT_TRUE(new_cf_opt.prefix_extractor != nullptr); - ASSERT_EQ(new_cf_opt.optimize_filters_for_hits, true); - ASSERT_EQ(new_cf_opt.prefix_extractor->AsString(), "rocksdb.FixedPrefix.31"); - ASSERT_EQ(new_cf_opt.experimental_mempurge_threshold, 0.003); - ASSERT_EQ(new_cf_opt.enable_blob_files, true); - ASSERT_EQ(new_cf_opt.min_blob_size, 1ULL << 10); - ASSERT_EQ(new_cf_opt.blob_file_size, 1ULL << 30); - ASSERT_EQ(new_cf_opt.blob_compression_type, kZSTD); - ASSERT_EQ(new_cf_opt.enable_blob_garbage_collection, true); - ASSERT_EQ(new_cf_opt.blob_garbage_collection_age_cutoff, 0.5); - ASSERT_EQ(new_cf_opt.blob_garbage_collection_force_threshold, 0.75); - ASSERT_EQ(new_cf_opt.blob_compaction_readahead_size, 262144); - ASSERT_EQ(new_cf_opt.blob_file_starting_level, 1); - ASSERT_EQ(new_cf_opt.prepopulate_blob_cache, PrepopulateBlobCache::kDisable); - ASSERT_EQ(new_cf_opt.last_level_temperature, Temperature::kWarm); - ASSERT_EQ(new_cf_opt.bottommost_temperature, Temperature::kWarm); - - cf_options_map["write_buffer_size"] = "hello"; - ASSERT_NOK(GetColumnFamilyOptionsFromMap(exact, base_cf_opt, cf_options_map, - &new_cf_opt)); - ASSERT_OK( - RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - cf_options_map["write_buffer_size"] = "1"; - ASSERT_OK(GetColumnFamilyOptionsFromMap(exact, base_cf_opt, cf_options_map, - &new_cf_opt)); - - cf_options_map["unknown_option"] = "1"; - ASSERT_NOK(GetColumnFamilyOptionsFromMap(exact, base_cf_opt, cf_options_map, - &new_cf_opt)); - ASSERT_OK( - RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - // ignore_unknown_options=true;input_strings_escaped=false - ASSERT_OK(GetColumnFamilyOptionsFromMap(loose, base_cf_opt, cf_options_map, - &new_cf_opt)); - ASSERT_OK( - RocksDBOptionsParser::VerifyCFOptions(loose, base_cf_opt, new_cf_opt)); - ASSERT_NOK( - RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - DBOptions base_db_opt; - DBOptions new_db_opt; - ASSERT_OK( - GetDBOptionsFromMap(exact, base_db_opt, db_options_map, &new_db_opt)); - ASSERT_EQ(new_db_opt.create_if_missing, false); - ASSERT_EQ(new_db_opt.create_missing_column_families, true); - ASSERT_EQ(new_db_opt.error_if_exists, false); - ASSERT_EQ(new_db_opt.paranoid_checks, true); - ASSERT_EQ(new_db_opt.track_and_verify_wals_in_manifest, true); - ASSERT_EQ(new_db_opt.verify_sst_unique_id_in_manifest, true); - ASSERT_EQ(new_db_opt.max_open_files, 32); - ASSERT_EQ(new_db_opt.max_total_wal_size, static_cast(33)); - ASSERT_EQ(new_db_opt.use_fsync, true); - ASSERT_EQ(new_db_opt.db_log_dir, "/db_log_dir"); - ASSERT_EQ(new_db_opt.wal_dir, "/wal_dir"); - ASSERT_EQ(new_db_opt.delete_obsolete_files_period_micros, - static_cast(34)); - ASSERT_EQ(new_db_opt.max_background_compactions, 35); - ASSERT_EQ(new_db_opt.max_background_flushes, 36); - ASSERT_EQ(new_db_opt.max_log_file_size, 37U); - ASSERT_EQ(new_db_opt.log_file_time_to_roll, 38U); - ASSERT_EQ(new_db_opt.keep_log_file_num, 39U); - ASSERT_EQ(new_db_opt.recycle_log_file_num, 5U); - ASSERT_EQ(new_db_opt.max_manifest_file_size, static_cast(40)); - ASSERT_EQ(new_db_opt.table_cache_numshardbits, 41); - ASSERT_EQ(new_db_opt.WAL_ttl_seconds, static_cast(43)); - ASSERT_EQ(new_db_opt.WAL_size_limit_MB, static_cast(44)); - ASSERT_EQ(new_db_opt.manifest_preallocation_size, 45U); - ASSERT_EQ(new_db_opt.allow_mmap_reads, true); - ASSERT_EQ(new_db_opt.allow_mmap_writes, false); - ASSERT_EQ(new_db_opt.use_direct_reads, false); - ASSERT_EQ(new_db_opt.use_direct_io_for_flush_and_compaction, false); - ASSERT_EQ(new_db_opt.is_fd_close_on_exec, true); - ASSERT_EQ(new_db_opt.stats_dump_period_sec, 46U); - ASSERT_EQ(new_db_opt.stats_persist_period_sec, 57U); - ASSERT_EQ(new_db_opt.persist_stats_to_disk, false); - ASSERT_EQ(new_db_opt.stats_history_buffer_size, 69U); - ASSERT_EQ(new_db_opt.advise_random_on_open, true); - ASSERT_EQ(new_db_opt.use_adaptive_mutex, false); - ASSERT_EQ(new_db_opt.compaction_readahead_size, 100); - ASSERT_EQ(new_db_opt.random_access_max_buffer_size, 3145728); - ASSERT_EQ(new_db_opt.writable_file_max_buffer_size, 314159); - ASSERT_EQ(new_db_opt.bytes_per_sync, static_cast(47)); - ASSERT_EQ(new_db_opt.wal_bytes_per_sync, static_cast(48)); - ASSERT_EQ(new_db_opt.strict_bytes_per_sync, true); - - db_options_map["max_open_files"] = "hello"; - Status s = - GetDBOptionsFromMap(exact, base_db_opt, db_options_map, &new_db_opt); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); - - ASSERT_OK( - RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); - ASSERT_OK( - RocksDBOptionsParser::VerifyDBOptions(loose, base_db_opt, new_db_opt)); - - // unknow options should fail parsing without ignore_unknown_options = true - db_options_map["unknown_db_option"] = "1"; - s = GetDBOptionsFromMap(exact, base_db_opt, db_options_map, &new_db_opt); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_OK( - RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); - - ASSERT_OK( - GetDBOptionsFromMap(loose, base_db_opt, db_options_map, &new_db_opt)); - ASSERT_OK( - RocksDBOptionsParser::VerifyDBOptions(loose, base_db_opt, new_db_opt)); - ASSERT_NOK( - RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); -} - -TEST_F(OptionsTest, GetColumnFamilyOptionsFromStringTest) { - ColumnFamilyOptions base_cf_opt; - ColumnFamilyOptions new_cf_opt; - ConfigOptions config_options; - config_options.input_strings_escaped = false; - config_options.ignore_unknown_options = false; - - base_cf_opt.table_factory.reset(); - ASSERT_OK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, "", - &new_cf_opt)); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, "write_buffer_size=5", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 5U); - ASSERT_TRUE(new_cf_opt.table_factory == nullptr); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, "write_buffer_size=6;", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 6U); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, " write_buffer_size = 7 ", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 7U); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, " write_buffer_size = 8 ; ", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 8U); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=9;max_write_buffer_number=10", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 9U); - ASSERT_EQ(new_cf_opt.max_write_buffer_number, 10); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=11; max_write_buffer_number = 12 ;", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 11U); - ASSERT_EQ(new_cf_opt.max_write_buffer_number, 12); - // Wrong name "max_write_buffer_number_" - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=13;max_write_buffer_number_=14;", &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, - new_cf_opt)); - - // Comparator from object registry - std::string kCompName = "reverse_comp"; - ObjectLibrary::Default()->AddFactory( - kCompName, - [](const std::string& /*name*/, - std::unique_ptr* /*guard*/, - std::string* /* errmsg */) { return ReverseBytewiseComparator(); }); - - ASSERT_OK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, - "comparator=" + kCompName + ";", - &new_cf_opt)); - ASSERT_EQ(new_cf_opt.comparator, ReverseBytewiseComparator()); - - // MergeOperator from object registry - std::unique_ptr bxo(new BytesXOROperator()); - std::string kMoName = bxo->Name(); - - ASSERT_OK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, - "merge_operator=" + kMoName + ";", - &new_cf_opt)); - ASSERT_EQ(kMoName, std::string(new_cf_opt.merge_operator->Name())); - - // Wrong key/value pair - Status s = GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=13;max_write_buffer_number;", &new_cf_opt); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); - - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, - new_cf_opt)); - - // Error Parsing value - s = GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=13;max_write_buffer_number=;", &new_cf_opt); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, - new_cf_opt)); - - // Missing option name - s = GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, "write_buffer_size=13; =100;", &new_cf_opt); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, - new_cf_opt)); - - const uint64_t kilo = 1024UL; - const uint64_t mega = 1024 * kilo; - const uint64_t giga = 1024 * mega; - const uint64_t tera = 1024 * giga; - - // Units (k) - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, "max_write_buffer_number=15K", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.max_write_buffer_number, 15 * kilo); - // Units (m) - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "max_write_buffer_number=16m;inplace_update_num_locks=17M", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.max_write_buffer_number, 16 * mega); - ASSERT_EQ(new_cf_opt.inplace_update_num_locks, 17u * mega); - // Units (g) - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=18g;prefix_extractor=capped:8;" - "arena_block_size=19G", - &new_cf_opt)); - - ASSERT_EQ(new_cf_opt.write_buffer_size, 18 * giga); - ASSERT_EQ(new_cf_opt.arena_block_size, 19 * giga); - ASSERT_TRUE(new_cf_opt.prefix_extractor.get() != nullptr); - ASSERT_EQ(new_cf_opt.prefix_extractor->AsString(), "rocksdb.CappedPrefix.8"); - - // Units (t) - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, "write_buffer_size=20t;arena_block_size=21T", - &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 20 * tera); - ASSERT_EQ(new_cf_opt.arena_block_size, 21 * tera); - - // Nested block based table options - // Empty - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={};arena_block_size=1024", - &new_cf_opt)); - ASSERT_TRUE(new_cf_opt.table_factory != nullptr); - // Non-empty - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={block_cache=1M;block_size=4;};" - "arena_block_size=1024", - &new_cf_opt)); - ASSERT_TRUE(new_cf_opt.table_factory != nullptr); - // Last one - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={block_cache=1M;block_size=4;}", - &new_cf_opt)); - ASSERT_TRUE(new_cf_opt.table_factory != nullptr); - // Mismatch curly braces - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={{{block_size=4;};" - "arena_block_size=1024", - &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, - new_cf_opt)); - - // Unexpected chars after closing curly brace - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={block_size=4;}};" - "arena_block_size=1024", - &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, - new_cf_opt)); - - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={block_size=4;}xdfa;" - "arena_block_size=1024", - &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, - new_cf_opt)); - - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={block_size=4;}xdfa", - &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, - new_cf_opt)); - - // Invalid block based table option - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={xx_block_size=4;}", - &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, - new_cf_opt)); - - ASSERT_OK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, - "optimize_filters_for_hits=true", - &new_cf_opt)); - ASSERT_OK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, - "optimize_filters_for_hits=false", - &new_cf_opt)); - - ASSERT_NOK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, - "optimize_filters_for_hits=junk", - &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, - new_cf_opt)); - - // Nested plain table options - // Empty - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "plain_table_factory={};arena_block_size=1024", - &new_cf_opt)); - ASSERT_TRUE(new_cf_opt.table_factory != nullptr); - ASSERT_EQ(std::string(new_cf_opt.table_factory->Name()), "PlainTable"); - // Non-empty - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "plain_table_factory={user_key_len=66;bloom_bits_per_key=20;};" - "arena_block_size=1024", - &new_cf_opt)); - ASSERT_TRUE(new_cf_opt.table_factory != nullptr); - ASSERT_EQ(std::string(new_cf_opt.table_factory->Name()), "PlainTable"); - - // memtable factory - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "memtable=skip_list:10;arena_block_size=1024", - &new_cf_opt)); - ASSERT_TRUE(new_cf_opt.memtable_factory != nullptr); - ASSERT_EQ(std::string(new_cf_opt.memtable_factory->Name()), "SkipListFactory"); - ASSERT_TRUE(new_cf_opt.memtable_factory->IsInstanceOf("SkipListFactory")); - - // blob cache - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "blob_cache={capacity=1M;num_shard_bits=4;" - "strict_capacity_limit=true;high_pri_pool_ratio=0.5;};", - &new_cf_opt)); - ASSERT_NE(new_cf_opt.blob_cache, nullptr); - ASSERT_EQ(new_cf_opt.blob_cache->GetCapacity(), 1024UL * 1024UL); - ASSERT_EQ(static_cast(new_cf_opt.blob_cache.get()) - ->GetNumShardBits(), - 4); - ASSERT_EQ(new_cf_opt.blob_cache->HasStrictCapacityLimit(), true); - ASSERT_EQ(static_cast(new_cf_opt.blob_cache.get()) - ->GetHighPriPoolRatio(), - 0.5); -} - -TEST_F(OptionsTest, CompressionOptionsFromString) { - ColumnFamilyOptions base_cf_opt; - ColumnFamilyOptions new_cf_opt; - ConfigOptions config_options; - std::string opts_str; - config_options.ignore_unknown_options = false; - CompressionOptions dflt; - // Test with some optional values removed.... - ASSERT_OK( - GetColumnFamilyOptionsFromString(config_options, ColumnFamilyOptions(), - "compression_opts=3:4:5; " - "bottommost_compression_opts=4:5:6:7", - &base_cf_opt)); - ASSERT_EQ(base_cf_opt.compression_opts.window_bits, 3); - ASSERT_EQ(base_cf_opt.compression_opts.level, 4); - ASSERT_EQ(base_cf_opt.compression_opts.strategy, 5); - ASSERT_EQ(base_cf_opt.compression_opts.max_dict_bytes, dflt.max_dict_bytes); - ASSERT_EQ(base_cf_opt.compression_opts.zstd_max_train_bytes, - dflt.zstd_max_train_bytes); - ASSERT_EQ(base_cf_opt.compression_opts.parallel_threads, - dflt.parallel_threads); - ASSERT_EQ(base_cf_opt.compression_opts.enabled, dflt.enabled); - ASSERT_EQ(base_cf_opt.compression_opts.use_zstd_dict_trainer, - dflt.use_zstd_dict_trainer); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.window_bits, 4); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.level, 5); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.strategy, 6); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.max_dict_bytes, 7u); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.zstd_max_train_bytes, - dflt.zstd_max_train_bytes); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.parallel_threads, - dflt.parallel_threads); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.enabled, dflt.enabled); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, - dflt.use_zstd_dict_trainer); - - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, ColumnFamilyOptions(), - "compression_opts=4:5:6:7:8:9:true:10:false; " - "bottommost_compression_opts=5:6:7:8:9:false", - &base_cf_opt)); - ASSERT_EQ(base_cf_opt.compression_opts.window_bits, 4); - ASSERT_EQ(base_cf_opt.compression_opts.level, 5); - ASSERT_EQ(base_cf_opt.compression_opts.strategy, 6); - ASSERT_EQ(base_cf_opt.compression_opts.max_dict_bytes, 7u); - ASSERT_EQ(base_cf_opt.compression_opts.zstd_max_train_bytes, 8u); - ASSERT_EQ(base_cf_opt.compression_opts.parallel_threads, 9u); - ASSERT_EQ(base_cf_opt.compression_opts.enabled, true); - ASSERT_EQ(base_cf_opt.compression_opts.max_dict_buffer_bytes, 10u); - ASSERT_EQ(base_cf_opt.compression_opts.use_zstd_dict_trainer, false); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.window_bits, 5); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.level, 6); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.strategy, 7); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.max_dict_bytes, 8u); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.zstd_max_train_bytes, 9u); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.parallel_threads, - dflt.parallel_threads); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.enabled, false); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, - dflt.use_zstd_dict_trainer); - - ASSERT_OK( - GetStringFromColumnFamilyOptions(config_options, base_cf_opt, &opts_str)); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, ColumnFamilyOptions(), opts_str, &new_cf_opt)); - ASSERT_EQ(new_cf_opt.compression_opts.window_bits, 4); - ASSERT_EQ(new_cf_opt.compression_opts.level, 5); - ASSERT_EQ(new_cf_opt.compression_opts.strategy, 6); - ASSERT_EQ(new_cf_opt.compression_opts.max_dict_bytes, 7u); - ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u); - ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 9u); - ASSERT_EQ(new_cf_opt.compression_opts.enabled, true); - ASSERT_EQ(base_cf_opt.compression_opts.max_dict_buffer_bytes, 10u); - ASSERT_EQ(base_cf_opt.compression_opts.use_zstd_dict_trainer, false); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.strategy, 7); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.max_dict_bytes, 8u); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.zstd_max_train_bytes, 9u); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads, - dflt.parallel_threads); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, false); - ASSERT_EQ(base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, - dflt.use_zstd_dict_trainer); - - // Test as struct values - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, ColumnFamilyOptions(), - "compression_opts={window_bits=5; level=6; strategy=7; max_dict_bytes=8;" - "zstd_max_train_bytes=9;parallel_threads=10;enabled=true;use_zstd_dict_" - "trainer=false}; " - "bottommost_compression_opts={window_bits=4; level=5; strategy=6;" - " max_dict_bytes=7;zstd_max_train_bytes=8;parallel_threads=9;" - "enabled=false;use_zstd_dict_trainer=true}; ", - &new_cf_opt)); - ASSERT_EQ(new_cf_opt.compression_opts.window_bits, 5); - ASSERT_EQ(new_cf_opt.compression_opts.level, 6); - ASSERT_EQ(new_cf_opt.compression_opts.strategy, 7); - ASSERT_EQ(new_cf_opt.compression_opts.max_dict_bytes, 8u); - ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 9u); - ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 10u); - ASSERT_EQ(new_cf_opt.compression_opts.enabled, true); - ASSERT_EQ(new_cf_opt.compression_opts.use_zstd_dict_trainer, false); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 4); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 5); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.strategy, 6); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.max_dict_bytes, 7u); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.zstd_max_train_bytes, 8u); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads, 9u); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, false); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, true); - - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "compression_opts={window_bits=4; strategy=5;};" - "bottommost_compression_opts={level=6; strategy=7;}", - &new_cf_opt)); - ASSERT_EQ(new_cf_opt.compression_opts.window_bits, 4); - ASSERT_EQ(new_cf_opt.compression_opts.strategy, 5); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.strategy, 7); - - ASSERT_EQ(new_cf_opt.compression_opts.level, - base_cf_opt.compression_opts.level); - ASSERT_EQ(new_cf_opt.compression_opts.max_dict_bytes, - base_cf_opt.compression_opts.max_dict_bytes); - ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, - base_cf_opt.compression_opts.zstd_max_train_bytes); - ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, - base_cf_opt.compression_opts.parallel_threads); - ASSERT_EQ(new_cf_opt.compression_opts.enabled, - base_cf_opt.compression_opts.enabled); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, - base_cf_opt.bottommost_compression_opts.window_bits); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.max_dict_bytes, - base_cf_opt.bottommost_compression_opts.max_dict_bytes); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.zstd_max_train_bytes, - base_cf_opt.bottommost_compression_opts.zstd_max_train_bytes); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads, - base_cf_opt.bottommost_compression_opts.parallel_threads); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, - base_cf_opt.bottommost_compression_opts.enabled); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, - base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer); - - // Test a few individual struct values - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "compression_opts.enabled=false; " - "bottommost_compression_opts.enabled=true; ", - &new_cf_opt)); - ASSERT_EQ(new_cf_opt.compression_opts.enabled, false); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, true); - - // Now test some illegal values - ConfigOptions ignore; - ignore.ignore_unknown_options = true; - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, ColumnFamilyOptions(), - "compression_opts=5:6:7:8:9:x:false", &base_cf_opt)); - ASSERT_OK(GetColumnFamilyOptionsFromString( - ignore, ColumnFamilyOptions(), "compression_opts=5:6:7:8:9:x:false", - &base_cf_opt)); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, ColumnFamilyOptions(), - "compression_opts=1:2:3:4:5:6:true:8", &base_cf_opt)); - ASSERT_OK(GetColumnFamilyOptionsFromString( - ignore, ColumnFamilyOptions(), "compression_opts=1:2:3:4:5:6:true:8", - &base_cf_opt)); - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, ColumnFamilyOptions(), - "compression_opts=1:2:3:4:5:6:true:8:9", &base_cf_opt)); - ASSERT_OK(GetColumnFamilyOptionsFromString( - ignore, ColumnFamilyOptions(), "compression_opts=1:2:3:4:5:6:true:8:9", - &base_cf_opt)); - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, ColumnFamilyOptions(), "compression_opts={unknown=bad;}", - &base_cf_opt)); - ASSERT_OK(GetColumnFamilyOptionsFromString(ignore, ColumnFamilyOptions(), - "compression_opts={unknown=bad;}", - &base_cf_opt)); - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, ColumnFamilyOptions(), "compression_opts.unknown=bad", - &base_cf_opt)); - ASSERT_OK(GetColumnFamilyOptionsFromString(ignore, ColumnFamilyOptions(), - "compression_opts.unknown=bad", - &base_cf_opt)); -} - -TEST_F(OptionsTest, OldInterfaceTest) { - ColumnFamilyOptions base_cf_opt; - ColumnFamilyOptions new_cf_opt; - ConfigOptions exact; - ConfigOptions cf_config_options; - cf_config_options.input_strings_escaped = false; - cf_config_options.ignore_unknown_options = false; - ASSERT_OK(GetColumnFamilyOptionsFromString( - cf_config_options, base_cf_opt, - "write_buffer_size=18;prefix_extractor=capped:8;" - "arena_block_size=19", - &new_cf_opt)); - - ASSERT_EQ(new_cf_opt.write_buffer_size, 18); - ASSERT_EQ(new_cf_opt.arena_block_size, 19); - ASSERT_TRUE(new_cf_opt.prefix_extractor.get() != nullptr); - - // And with a bad option - ASSERT_NOK(GetColumnFamilyOptionsFromString( - cf_config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={xx_block_size=4;}", - &new_cf_opt)); - ASSERT_OK( - RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - std::unordered_map cf_options_map = { - {"write_buffer_size", "1"}, - {"max_write_buffer_number", "2"}, - {"min_write_buffer_number_to_merge", "3"}, - }; - ASSERT_OK(GetColumnFamilyOptionsFromMap(cf_config_options, base_cf_opt, - cf_options_map, &new_cf_opt)); - cf_options_map["unknown_option"] = "1"; - ASSERT_NOK(GetColumnFamilyOptionsFromMap(cf_config_options, base_cf_opt, - cf_options_map, &new_cf_opt)); - ASSERT_OK( - RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - cf_config_options.input_strings_escaped = true; - cf_config_options.ignore_unknown_options = true; - ASSERT_OK(GetColumnFamilyOptionsFromMap(cf_config_options, base_cf_opt, - cf_options_map, &new_cf_opt)); - - DBOptions base_db_opt; - DBOptions new_db_opt; - std::unordered_map db_options_map = { - {"create_if_missing", "false"}, - {"create_missing_column_families", "true"}, - {"error_if_exists", "false"}, - {"paranoid_checks", "true"}, - {"track_and_verify_wals_in_manifest", "true"}, - {"verify_sst_unique_id_in_manifest", "true"}, - {"max_open_files", "32"}, - }; - - ConfigOptions db_config_options(base_db_opt); - db_config_options.input_strings_escaped = false; - db_config_options.ignore_unknown_options = false; - ASSERT_OK(GetDBOptionsFromMap(db_config_options, base_db_opt, db_options_map, - &new_db_opt)); - ASSERT_EQ(new_db_opt.create_if_missing, false); - ASSERT_EQ(new_db_opt.create_missing_column_families, true); - ASSERT_EQ(new_db_opt.error_if_exists, false); - ASSERT_EQ(new_db_opt.paranoid_checks, true); - ASSERT_EQ(new_db_opt.track_and_verify_wals_in_manifest, true); - ASSERT_EQ(new_db_opt.verify_sst_unique_id_in_manifest, true); - ASSERT_EQ(new_db_opt.max_open_files, 32); - db_options_map["unknown_option"] = "1"; - Status s = GetDBOptionsFromMap(db_config_options, base_db_opt, db_options_map, - &new_db_opt); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); - - ASSERT_OK( - RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); - db_config_options.input_strings_escaped = true; - db_config_options.ignore_unknown_options = true; - ASSERT_OK(GetDBOptionsFromMap(db_config_options, base_db_opt, db_options_map, - &new_db_opt)); - db_config_options.input_strings_escaped = false; - db_config_options.ignore_unknown_options = false; - ASSERT_OK(GetDBOptionsFromString( - db_config_options, base_db_opt, - "create_if_missing=false;error_if_exists=false;max_open_files=42;", - &new_db_opt)); - ASSERT_EQ(new_db_opt.create_if_missing, false); - ASSERT_EQ(new_db_opt.error_if_exists, false); - ASSERT_EQ(new_db_opt.max_open_files, 42); - s = GetDBOptionsFromString( - db_config_options, base_db_opt, - "create_if_missing=false;error_if_exists=false;max_open_files=42;" - "unknown_option=1;", - &new_db_opt); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_OK( - RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); -} - - -TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) { - BlockBasedTableOptions table_opt; - BlockBasedTableOptions new_opt; - ConfigOptions config_options; - config_options.input_strings_escaped = false; - config_options.ignore_unknown_options = false; - config_options.ignore_unsupported_options = false; - - // make sure default values are overwritten by something else - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "cache_index_and_filter_blocks=1;index_type=kHashSearch;" - "checksum=kxxHash;" - "block_cache=1M;block_cache_compressed=1k;block_size=1024;" - "block_size_deviation=8;block_restart_interval=4;" - "format_version=5;whole_key_filtering=1;" - "filter_policy=bloomfilter:4.567:false;detect_filter_construct_" - "corruption=true;" - // A bug caused read_amp_bytes_per_bit to be a large integer in OPTIONS - // file generated by 6.10 to 6.14. Though bug is fixed in these releases, - // we need to handle the case of loading OPTIONS file generated before the - // fix. - "read_amp_bytes_per_bit=17179869185;", - &new_opt)); - ASSERT_TRUE(new_opt.cache_index_and_filter_blocks); - ASSERT_EQ(new_opt.index_type, BlockBasedTableOptions::kHashSearch); - ASSERT_EQ(new_opt.checksum, ChecksumType::kxxHash); - ASSERT_TRUE(new_opt.block_cache != nullptr); - ASSERT_EQ(new_opt.block_cache->GetCapacity(), 1024UL*1024UL); - ASSERT_EQ(new_opt.block_size, 1024UL); - ASSERT_EQ(new_opt.block_size_deviation, 8); - ASSERT_EQ(new_opt.block_restart_interval, 4); - ASSERT_EQ(new_opt.format_version, 5U); - ASSERT_EQ(new_opt.whole_key_filtering, true); - ASSERT_EQ(new_opt.detect_filter_construct_corruption, true); - ASSERT_TRUE(new_opt.filter_policy != nullptr); - auto bfp = new_opt.filter_policy->CheckedCast(); - ASSERT_NE(bfp, nullptr); - EXPECT_EQ(bfp->GetMillibitsPerKey(), 4567); - EXPECT_EQ(bfp->GetWholeBitsPerKey(), 5); - // Verify that only the lower 32bits are stored in - // new_opt.read_amp_bytes_per_bit. - EXPECT_EQ(1U, new_opt.read_amp_bytes_per_bit); - - // unknown option - Status s = GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "cache_index_and_filter_blocks=1;index_type=kBinarySearch;" - "bad_option=1", - &new_opt); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_EQ(static_cast(table_opt.cache_index_and_filter_blocks), - new_opt.cache_index_and_filter_blocks); - ASSERT_EQ(table_opt.index_type, new_opt.index_type); - - // unrecognized index type - s = GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "cache_index_and_filter_blocks=1;index_type=kBinarySearchXX", &new_opt); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_EQ(table_opt.cache_index_and_filter_blocks, - new_opt.cache_index_and_filter_blocks); - ASSERT_EQ(table_opt.index_type, new_opt.index_type); - - // unrecognized checksum type - ASSERT_NOK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "cache_index_and_filter_blocks=1;checksum=kxxHashXX", &new_opt)); - ASSERT_EQ(table_opt.cache_index_and_filter_blocks, - new_opt.cache_index_and_filter_blocks); - ASSERT_EQ(table_opt.index_type, new_opt.index_type); - - // unrecognized filter policy name - s = GetBlockBasedTableOptionsFromString(config_options, table_opt, - "filter_policy=bloomfilterxx:4:true", - &new_opt); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); - - // missing bits per key - s = GetBlockBasedTableOptionsFromString( - config_options, table_opt, "filter_policy=bloomfilter", &new_opt); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); - - // Used to be rejected, now accepted - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, "filter_policy=bloomfilter:4", &new_opt)); - bfp = dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000); - EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); - - // use_block_based_builder=true now ignored in public API (same as false) - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, "filter_policy=bloomfilter:4:true", &new_opt)); - bfp = dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000); - EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); - - // Test configuring using other internal names - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "filter_policy=rocksdb.internal.LegacyBloomFilter:3", &new_opt)); - auto builtin = - dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(builtin->GetId(), "rocksdb.internal.LegacyBloomFilter:3"); - - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "filter_policy=rocksdb.internal.FastLocalBloomFilter:1.234", &new_opt)); - builtin = - dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(builtin->GetId(), "rocksdb.internal.FastLocalBloomFilter:1.234"); - - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "filter_policy=rocksdb.internal.Standard128RibbonFilter:1.234", - &new_opt)); - builtin = - dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(builtin->GetId(), "rocksdb.internal.Standard128RibbonFilter:1.234"); - - // Ribbon filter policy (no Bloom hybrid) - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, "filter_policy=ribbonfilter:5.678:-1;", - &new_opt)); - ASSERT_TRUE(new_opt.filter_policy != nullptr); - auto rfp = - dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(rfp->GetMillibitsPerKey(), 5678); - EXPECT_EQ(rfp->GetBloomBeforeLevel(), -1); - - // Ribbon filter policy (default Bloom hybrid) - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, "filter_policy=ribbonfilter:6.789;", - &new_opt)); - ASSERT_TRUE(new_opt.filter_policy != nullptr); - rfp = dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(rfp->GetMillibitsPerKey(), 6789); - EXPECT_EQ(rfp->GetBloomBeforeLevel(), 0); - - // Ribbon filter policy (custom Bloom hybrid) - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, "filter_policy=ribbonfilter:6.789:5;", - &new_opt)); - ASSERT_TRUE(new_opt.filter_policy != nullptr); - rfp = dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(rfp->GetMillibitsPerKey(), 6789); - EXPECT_EQ(rfp->GetBloomBeforeLevel(), 5); - - // Check block cache options are overwritten when specified - // in new format as a struct. - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "block_cache={capacity=1M;num_shard_bits=4;" - "strict_capacity_limit=true;high_pri_pool_ratio=0.5;};" - "block_cache_compressed={capacity=1M;num_shard_bits=4;" - "strict_capacity_limit=true;high_pri_pool_ratio=0.5;}", - &new_opt)); - ASSERT_TRUE(new_opt.block_cache != nullptr); - ASSERT_EQ(new_opt.block_cache->GetCapacity(), 1024UL*1024UL); - ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) - ->GetNumShardBits(), - 4); - ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), true); - ASSERT_EQ(std::dynamic_pointer_cast( - new_opt.block_cache)->GetHighPriPoolRatio(), 0.5); - - // Set only block cache capacity. Check other values are - // reset to default values. - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "block_cache={capacity=2M};" - "block_cache_compressed={capacity=2M}", - &new_opt)); - ASSERT_TRUE(new_opt.block_cache != nullptr); - ASSERT_EQ(new_opt.block_cache->GetCapacity(), 2*1024UL*1024UL); - // Default values - ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) - ->GetNumShardBits(), - GetDefaultCacheShardBits(new_opt.block_cache->GetCapacity())); - ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), false); - ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) - ->GetHighPriPoolRatio(), - 0.5); - - // Set couple of block cache options. - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "block_cache={num_shard_bits=5;high_pri_pool_ratio=0.5;};" - "block_cache_compressed={num_shard_bits=5;" - "high_pri_pool_ratio=0.0;}", - &new_opt)); - ASSERT_EQ(new_opt.block_cache->GetCapacity(), 0); - ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) - ->GetNumShardBits(), - 5); - ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), false); - ASSERT_EQ(std::dynamic_pointer_cast( - new_opt.block_cache)->GetHighPriPoolRatio(), 0.5); - - // Set couple of block cache options. - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "block_cache={capacity=1M;num_shard_bits=4;" - "strict_capacity_limit=true;};" - "block_cache_compressed={capacity=1M;num_shard_bits=4;" - "strict_capacity_limit=true;}", - &new_opt)); - ASSERT_TRUE(new_opt.block_cache != nullptr); - ASSERT_EQ(new_opt.block_cache->GetCapacity(), 1024UL*1024UL); - ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) - ->GetNumShardBits(), - 4); - ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), true); - ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) - ->GetHighPriPoolRatio(), - 0.5); - - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, "filter_policy=rocksdb.BloomFilter:1.234", - &new_opt)); - ASSERT_TRUE(new_opt.filter_policy != nullptr); - ASSERT_TRUE( - new_opt.filter_policy->IsInstanceOf(BloomFilterPolicy::kClassName())); - ASSERT_TRUE( - new_opt.filter_policy->IsInstanceOf(BloomFilterPolicy::kNickName())); - - // Ribbon filter policy alternative name - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, "filter_policy=rocksdb.RibbonFilter:6.789:5;", - &new_opt)); - ASSERT_TRUE(new_opt.filter_policy != nullptr); - ASSERT_TRUE( - new_opt.filter_policy->IsInstanceOf(RibbonFilterPolicy::kClassName())); - ASSERT_TRUE( - new_opt.filter_policy->IsInstanceOf(RibbonFilterPolicy::kNickName())); -} - - -TEST_F(OptionsTest, GetPlainTableOptionsFromString) { - PlainTableOptions table_opt; - PlainTableOptions new_opt; - ConfigOptions config_options; - config_options.input_strings_escaped = false; - config_options.ignore_unknown_options = false; - // make sure default values are overwritten by something else - ASSERT_OK(GetPlainTableOptionsFromString( - config_options, table_opt, - "user_key_len=66;bloom_bits_per_key=20;hash_table_ratio=0.5;" - "index_sparseness=8;huge_page_tlb_size=4;encoding_type=kPrefix;" - "full_scan_mode=true;store_index_in_file=true", - &new_opt)); - ASSERT_EQ(new_opt.user_key_len, 66u); - ASSERT_EQ(new_opt.bloom_bits_per_key, 20); - ASSERT_EQ(new_opt.hash_table_ratio, 0.5); - ASSERT_EQ(new_opt.index_sparseness, 8); - ASSERT_EQ(new_opt.huge_page_tlb_size, 4); - ASSERT_EQ(new_opt.encoding_type, EncodingType::kPrefix); - ASSERT_TRUE(new_opt.full_scan_mode); - ASSERT_TRUE(new_opt.store_index_in_file); - - // unknown option - Status s = GetPlainTableOptionsFromString( - config_options, table_opt, - "user_key_len=66;bloom_bits_per_key=20;hash_table_ratio=0.5;" - "bad_option=1", - &new_opt); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); - - // unrecognized EncodingType - s = GetPlainTableOptionsFromString( - config_options, table_opt, - "user_key_len=66;bloom_bits_per_key=20;hash_table_ratio=0.5;" - "encoding_type=kPrefixXX", - &new_opt); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); -} - -TEST_F(OptionsTest, GetMemTableRepFactoryFromString) { - std::unique_ptr new_mem_factory = nullptr; - - ASSERT_OK(GetMemTableRepFactoryFromString("skip_list", &new_mem_factory)); - ASSERT_OK(GetMemTableRepFactoryFromString("skip_list:16", &new_mem_factory)); - ASSERT_STREQ(new_mem_factory->Name(), "SkipListFactory"); - ASSERT_NOK(GetMemTableRepFactoryFromString("skip_list:16:invalid_opt", - &new_mem_factory)); - - ASSERT_OK(GetMemTableRepFactoryFromString("prefix_hash", &new_mem_factory)); - ASSERT_OK(GetMemTableRepFactoryFromString("prefix_hash:1000", - &new_mem_factory)); - ASSERT_STREQ(new_mem_factory->Name(), "HashSkipListRepFactory"); - ASSERT_NOK(GetMemTableRepFactoryFromString("prefix_hash:1000:invalid_opt", - &new_mem_factory)); - - ASSERT_OK(GetMemTableRepFactoryFromString("hash_linkedlist", - &new_mem_factory)); - ASSERT_OK(GetMemTableRepFactoryFromString("hash_linkedlist:1000", - &new_mem_factory)); - ASSERT_EQ(std::string(new_mem_factory->Name()), "HashLinkListRepFactory"); - ASSERT_NOK(GetMemTableRepFactoryFromString("hash_linkedlist:1000:invalid_opt", - &new_mem_factory)); - - ASSERT_OK(GetMemTableRepFactoryFromString("vector", &new_mem_factory)); - ASSERT_OK(GetMemTableRepFactoryFromString("vector:1024", &new_mem_factory)); - ASSERT_EQ(std::string(new_mem_factory->Name()), "VectorRepFactory"); - ASSERT_NOK(GetMemTableRepFactoryFromString("vector:1024:invalid_opt", - &new_mem_factory)); - - ASSERT_NOK(GetMemTableRepFactoryFromString("cuckoo", &new_mem_factory)); - // CuckooHash memtable is already removed. - ASSERT_NOK(GetMemTableRepFactoryFromString("cuckoo:1024", &new_mem_factory)); - - ASSERT_NOK(GetMemTableRepFactoryFromString("bad_factory", &new_mem_factory)); -} - -TEST_F(OptionsTest, MemTableRepFactoryCreateFromString) { - std::unique_ptr new_mem_factory = nullptr; - ConfigOptions config_options; - config_options.ignore_unsupported_options = false; - config_options.ignore_unknown_options = false; - - ASSERT_OK(MemTableRepFactory::CreateFromString(config_options, "skip_list", - &new_mem_factory)); - ASSERT_OK(MemTableRepFactory::CreateFromString(config_options, "skip_list:16", - &new_mem_factory)); - ASSERT_STREQ(new_mem_factory->Name(), "SkipListFactory"); - ASSERT_TRUE(new_mem_factory->IsInstanceOf("skip_list")); - ASSERT_TRUE(new_mem_factory->IsInstanceOf("SkipListFactory")); - ASSERT_NOK(MemTableRepFactory::CreateFromString( - config_options, "skip_list:16:invalid_opt", &new_mem_factory)); - - ASSERT_NOK(MemTableRepFactory::CreateFromString( - config_options, "invalid_opt=10", &new_mem_factory)); - - // Test a reset - ASSERT_OK(MemTableRepFactory::CreateFromString(config_options, "", - &new_mem_factory)); - ASSERT_EQ(new_mem_factory, nullptr); - ASSERT_NOK(MemTableRepFactory::CreateFromString( - config_options, "invalid_opt=10", &new_mem_factory)); - - ASSERT_OK(MemTableRepFactory::CreateFromString( - config_options, "id=skip_list; lookahead=32", &new_mem_factory)); - ASSERT_OK(MemTableRepFactory::CreateFromString(config_options, "prefix_hash", - &new_mem_factory)); - ASSERT_OK(MemTableRepFactory::CreateFromString( - config_options, "prefix_hash:1000", &new_mem_factory)); - ASSERT_STREQ(new_mem_factory->Name(), "HashSkipListRepFactory"); - ASSERT_TRUE(new_mem_factory->IsInstanceOf("prefix_hash")); - ASSERT_TRUE(new_mem_factory->IsInstanceOf("HashSkipListRepFactory")); - ASSERT_NOK(MemTableRepFactory::CreateFromString( - config_options, "prefix_hash:1000:invalid_opt", &new_mem_factory)); - ASSERT_OK(MemTableRepFactory::CreateFromString( - config_options, - "id=prefix_hash; bucket_count=32; skiplist_height=64; " - "branching_factor=16", - &new_mem_factory)); - ASSERT_NOK(MemTableRepFactory::CreateFromString( - config_options, - "id=prefix_hash; bucket_count=32; skiplist_height=64; " - "branching_factor=16; invalid=unknown", - &new_mem_factory)); - - ASSERT_OK(MemTableRepFactory::CreateFromString( - config_options, "hash_linkedlist", &new_mem_factory)); - ASSERT_OK(MemTableRepFactory::CreateFromString( - config_options, "hash_linkedlist:1000", &new_mem_factory)); - ASSERT_STREQ(new_mem_factory->Name(), "HashLinkListRepFactory"); - ASSERT_TRUE(new_mem_factory->IsInstanceOf("hash_linkedlist")); - ASSERT_TRUE(new_mem_factory->IsInstanceOf("HashLinkListRepFactory")); - ASSERT_NOK(MemTableRepFactory::CreateFromString( - config_options, "hash_linkedlist:1000:invalid_opt", &new_mem_factory)); - ASSERT_OK(MemTableRepFactory::CreateFromString( - config_options, - "id=hash_linkedlist; bucket_count=32; threshold=64; huge_page_size=16; " - "logging_threshold=12; log_when_flash=true", - &new_mem_factory)); - ASSERT_NOK(MemTableRepFactory::CreateFromString( - config_options, - "id=hash_linkedlist; bucket_count=32; threshold=64; huge_page_size=16; " - "logging_threshold=12; log_when_flash=true; invalid=unknown", - &new_mem_factory)); - - ASSERT_OK(MemTableRepFactory::CreateFromString(config_options, "vector", - &new_mem_factory)); - ASSERT_OK(MemTableRepFactory::CreateFromString(config_options, "vector:1024", - &new_mem_factory)); - ASSERT_STREQ(new_mem_factory->Name(), "VectorRepFactory"); - ASSERT_TRUE(new_mem_factory->IsInstanceOf("vector")); - ASSERT_TRUE(new_mem_factory->IsInstanceOf("VectorRepFactory")); - ASSERT_NOK(MemTableRepFactory::CreateFromString( - config_options, "vector:1024:invalid_opt", &new_mem_factory)); - ASSERT_OK(MemTableRepFactory::CreateFromString( - config_options, "id=vector; count=42", &new_mem_factory)); - ASSERT_NOK(MemTableRepFactory::CreateFromString( - config_options, "id=vector; invalid=unknown", &new_mem_factory)); - ASSERT_NOK(MemTableRepFactory::CreateFromString(config_options, "cuckoo", - &new_mem_factory)); - // CuckooHash memtable is already removed. - ASSERT_NOK(MemTableRepFactory::CreateFromString(config_options, "cuckoo:1024", - &new_mem_factory)); - - ASSERT_NOK(MemTableRepFactory::CreateFromString(config_options, "bad_factory", - &new_mem_factory)); -} - -class CustomEnv : public EnvWrapper { - public: - explicit CustomEnv(Env* _target) : EnvWrapper(_target) {} - static const char* kClassName() { return "CustomEnv"; } - const char* Name() const override { return kClassName(); } -}; - -TEST_F(OptionsTest, GetOptionsFromStringTest) { - Options base_options, new_options; - ConfigOptions config_options; - config_options.input_strings_escaped = false; - config_options.ignore_unknown_options = false; - - base_options.write_buffer_size = 20; - base_options.min_write_buffer_number_to_merge = 15; - BlockBasedTableOptions block_based_table_options; - block_based_table_options.cache_index_and_filter_blocks = true; - base_options.table_factory.reset( - NewBlockBasedTableFactory(block_based_table_options)); - - // Register an Env with object registry. - ObjectLibrary::Default()->AddFactory( - CustomEnv::kClassName(), - [](const std::string& /*name*/, std::unique_ptr* /*env_guard*/, - std::string* /* errmsg */) { - static CustomEnv env(Env::Default()); - return &env; - }); - - ASSERT_OK(GetOptionsFromString( - config_options, base_options, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={block_cache=1M;block_size=4;};" - "compression_opts=4:5:6;create_if_missing=true;max_open_files=1;" - "bottommost_compression_opts=5:6:7;create_if_missing=true;max_open_files=" - "1;" - "rate_limiter_bytes_per_sec=1024;env=CustomEnv", - &new_options)); - - ASSERT_EQ(new_options.compression_opts.window_bits, 4); - ASSERT_EQ(new_options.compression_opts.level, 5); - ASSERT_EQ(new_options.compression_opts.strategy, 6); - ASSERT_EQ(new_options.compression_opts.max_dict_bytes, 0u); - ASSERT_EQ(new_options.compression_opts.zstd_max_train_bytes, 0u); - ASSERT_EQ(new_options.compression_opts.parallel_threads, 1u); - ASSERT_EQ(new_options.compression_opts.enabled, false); - ASSERT_EQ(new_options.compression_opts.use_zstd_dict_trainer, true); - ASSERT_EQ(new_options.bottommost_compression, kDisableCompressionOption); - ASSERT_EQ(new_options.bottommost_compression_opts.window_bits, 5); - ASSERT_EQ(new_options.bottommost_compression_opts.level, 6); - ASSERT_EQ(new_options.bottommost_compression_opts.strategy, 7); - ASSERT_EQ(new_options.bottommost_compression_opts.max_dict_bytes, 0u); - ASSERT_EQ(new_options.bottommost_compression_opts.zstd_max_train_bytes, 0u); - ASSERT_EQ(new_options.bottommost_compression_opts.parallel_threads, 1u); - ASSERT_EQ(new_options.bottommost_compression_opts.enabled, false); - ASSERT_EQ(new_options.bottommost_compression_opts.use_zstd_dict_trainer, - true); - ASSERT_EQ(new_options.write_buffer_size, 10U); - ASSERT_EQ(new_options.max_write_buffer_number, 16); - const auto new_bbto = - new_options.table_factory->GetOptions(); - ASSERT_NE(new_bbto, nullptr); - ASSERT_EQ(new_bbto->block_cache->GetCapacity(), 1U << 20); - ASSERT_EQ(new_bbto->block_size, 4U); - // don't overwrite block based table options - ASSERT_TRUE(new_bbto->cache_index_and_filter_blocks); - - ASSERT_EQ(new_options.create_if_missing, true); - ASSERT_EQ(new_options.max_open_files, 1); - ASSERT_TRUE(new_options.rate_limiter.get() != nullptr); - Env* newEnv = new_options.env; - ASSERT_OK(Env::CreateFromString({}, CustomEnv::kClassName(), &newEnv)); - ASSERT_EQ(newEnv, new_options.env); - - config_options.ignore_unknown_options = false; - // Test a bad value for a DBOption returns a failure - base_options.dump_malloc_stats = false; - base_options.write_buffer_size = 1024; - Options bad_options = new_options; - Status s = GetOptionsFromString(config_options, base_options, - "create_if_missing=XX;dump_malloc_stats=true", - &bad_options); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_EQ(bad_options.dump_malloc_stats, false); - - bad_options = new_options; - s = GetOptionsFromString(config_options, base_options, - "write_buffer_size=XX;dump_malloc_stats=true", - &bad_options); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); - - ASSERT_EQ(bad_options.dump_malloc_stats, false); - - // Test a bad value for a TableFactory Option returns a failure - bad_options = new_options; - s = GetOptionsFromString(config_options, base_options, - "write_buffer_size=16;dump_malloc_stats=true" - "block_based_table_factory={block_size=XX;};", - &bad_options); - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_EQ(bad_options.dump_malloc_stats, false); - ASSERT_EQ(bad_options.write_buffer_size, 1024); - - config_options.ignore_unknown_options = true; - ASSERT_OK(GetOptionsFromString(config_options, base_options, - "create_if_missing=XX;dump_malloc_stats=true;" - "write_buffer_size=XX;" - "block_based_table_factory={block_size=XX;};", - &bad_options)); - ASSERT_EQ(bad_options.create_if_missing, base_options.create_if_missing); - ASSERT_EQ(bad_options.dump_malloc_stats, true); - ASSERT_EQ(bad_options.write_buffer_size, base_options.write_buffer_size); - - // Test the old interface - ASSERT_OK(GetOptionsFromString( - base_options, - "write_buffer_size=22;max_write_buffer_number=33;max_open_files=44;", - &new_options)); - ASSERT_EQ(new_options.write_buffer_size, 22U); - ASSERT_EQ(new_options.max_write_buffer_number, 33); - ASSERT_EQ(new_options.max_open_files, 44); -} - -TEST_F(OptionsTest, DBOptionsSerialization) { - Options base_options, new_options; - Random rnd(301); - ConfigOptions config_options; - config_options.input_strings_escaped = false; - config_options.ignore_unknown_options = false; - - // Phase 1: Make big change in base_options - test::RandomInitDBOptions(&base_options, &rnd); - - // Phase 2: obtain a string from base_option - std::string base_options_file_content; - ASSERT_OK(GetStringFromDBOptions(config_options, base_options, - &base_options_file_content)); - - // Phase 3: Set new_options from the derived string and expect - // new_options == base_options - ASSERT_OK(GetDBOptionsFromString(config_options, DBOptions(), - base_options_file_content, &new_options)); - ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(config_options, base_options, - new_options)); -} - -TEST_F(OptionsTest, OptionsComposeDecompose) { - // build an Options from DBOptions + CFOptions, then decompose it to verify - // we get same constituent options. - DBOptions base_db_opts; - ColumnFamilyOptions base_cf_opts; - ConfigOptions - config_options; // Use default for ignore(false) and check (exact) - config_options.input_strings_escaped = false; - - Random rnd(301); - test::RandomInitDBOptions(&base_db_opts, &rnd); - test::RandomInitCFOptions(&base_cf_opts, base_db_opts, &rnd); - - Options base_opts(base_db_opts, base_cf_opts); - DBOptions new_db_opts(base_opts); - ColumnFamilyOptions new_cf_opts(base_opts); - - ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(config_options, base_db_opts, - new_db_opts)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opts, - new_cf_opts)); - delete new_cf_opts.compaction_filter; -} - -TEST_F(OptionsTest, DBOptionsComposeImmutable) { - // Build a DBOptions from an Immutable/Mutable one and verify that - // we get same constituent options. - ConfigOptions config_options; - Random rnd(301); - DBOptions base_opts, new_opts; - test::RandomInitDBOptions(&base_opts, &rnd); - MutableDBOptions m_opts(base_opts); - ImmutableDBOptions i_opts(base_opts); - new_opts = BuildDBOptions(i_opts, m_opts); - ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(config_options, base_opts, - new_opts)); -} - -TEST_F(OptionsTest, GetMutableDBOptions) { - Random rnd(228); - DBOptions base_opts; - std::string opts_str; - std::unordered_map opts_map; - ConfigOptions config_options; - - test::RandomInitDBOptions(&base_opts, &rnd); - ImmutableDBOptions i_opts(base_opts); - MutableDBOptions m_opts(base_opts); - MutableDBOptions new_opts; - ASSERT_OK(GetStringFromMutableDBOptions(config_options, m_opts, &opts_str)); - ASSERT_OK(StringToMap(opts_str, &opts_map)); - ASSERT_OK(GetMutableDBOptionsFromStrings(m_opts, opts_map, &new_opts)); - ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions( - config_options, base_opts, BuildDBOptions(i_opts, new_opts))); -} - -TEST_F(OptionsTest, CFOptionsComposeImmutable) { - // Build a DBOptions from an Immutable/Mutable one and verify that - // we get same constituent options. - ConfigOptions config_options; - Random rnd(301); - ColumnFamilyOptions base_opts, new_opts; - DBOptions dummy; // Needed to create ImmutableCFOptions - test::RandomInitCFOptions(&base_opts, dummy, &rnd); - MutableCFOptions m_opts(base_opts); - ImmutableCFOptions i_opts(base_opts); - UpdateColumnFamilyOptions(i_opts, &new_opts); - UpdateColumnFamilyOptions(m_opts, &new_opts); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_opts, - new_opts)); - delete new_opts.compaction_filter; -} - -TEST_F(OptionsTest, GetMutableCFOptions) { - Random rnd(228); - ColumnFamilyOptions base, copy; - std::string opts_str; - std::unordered_map opts_map; - ConfigOptions config_options; - DBOptions dummy; // Needed to create ImmutableCFOptions - - test::RandomInitCFOptions(&base, dummy, &rnd); - ColumnFamilyOptions result; - MutableCFOptions m_opts(base), new_opts; - - ASSERT_OK(GetStringFromMutableCFOptions(config_options, m_opts, &opts_str)); - ASSERT_OK(StringToMap(opts_str, &opts_map)); - ASSERT_OK(GetMutableOptionsFromStrings(m_opts, opts_map, nullptr, &new_opts)); - UpdateColumnFamilyOptions(ImmutableCFOptions(base), ©); - UpdateColumnFamilyOptions(new_opts, ©); - - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base, copy)); - delete copy.compaction_filter; -} - -TEST_F(OptionsTest, ColumnFamilyOptionsSerialization) { - Options options; - ColumnFamilyOptions base_opt, new_opt; - Random rnd(302); - ConfigOptions config_options; - config_options.input_strings_escaped = false; - - // Phase 1: randomly assign base_opt - // custom type options - test::RandomInitCFOptions(&base_opt, options, &rnd); - - // Phase 2: obtain a string from base_opt - std::string base_options_file_content; - ASSERT_OK(GetStringFromColumnFamilyOptions(config_options, base_opt, - &base_options_file_content)); - - // Phase 3: Set new_opt from the derived string and expect - // new_opt == base_opt - ASSERT_OK( - GetColumnFamilyOptionsFromString(config_options, ColumnFamilyOptions(), - base_options_file_content, &new_opt)); - ASSERT_OK( - RocksDBOptionsParser::VerifyCFOptions(config_options, base_opt, new_opt)); - if (base_opt.compaction_filter) { - delete base_opt.compaction_filter; - } -} - -TEST_F(OptionsTest, CheckBlockBasedTableOptions) { - ColumnFamilyOptions cf_opts; - DBOptions db_opts; - ConfigOptions config_opts; - - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_opts, cf_opts, "prefix_extractor=capped:8", &cf_opts)); - ASSERT_OK(TableFactory::CreateFromString(config_opts, "BlockBasedTable", - &cf_opts.table_factory)); - ASSERT_NE(cf_opts.table_factory.get(), nullptr); - ASSERT_TRUE(cf_opts.table_factory->IsInstanceOf( - TableFactory::kBlockBasedTableName())); - auto bbto = cf_opts.table_factory->GetOptions(); - ASSERT_OK(cf_opts.table_factory->ConfigureFromString( - config_opts, - "block_cache={capacity=1M;num_shard_bits=4;};" - "block_size_deviation=101;" - "block_restart_interval=0;" - "index_block_restart_interval=5;" - "partition_filters=true;" - "index_type=kHashSearch;" - "no_block_cache=1;")); - ASSERT_NE(bbto, nullptr); - ASSERT_EQ(bbto->block_cache.get(), nullptr); - ASSERT_EQ(bbto->block_size_deviation, 0); - ASSERT_EQ(bbto->block_restart_interval, 1); - ASSERT_EQ(bbto->index_block_restart_interval, 1); - ASSERT_FALSE(bbto->partition_filters); - ASSERT_OK(TableFactory::CreateFromString(config_opts, "BlockBasedTable", - &cf_opts.table_factory)); - bbto = cf_opts.table_factory->GetOptions(); - - ASSERT_OK(cf_opts.table_factory->ConfigureFromString(config_opts, - "no_block_cache=0;")); - ASSERT_NE(bbto->block_cache.get(), nullptr); - ASSERT_OK(cf_opts.table_factory->ValidateOptions(db_opts, cf_opts)); -} - -TEST_F(OptionsTest, MutableTableOptions) { - ConfigOptions config_options; - std::shared_ptr bbtf; - bbtf.reset(NewBlockBasedTableFactory()); - auto bbto = bbtf->GetOptions(); - ASSERT_NE(bbto, nullptr); - ASSERT_OK(bbtf->ConfigureOption(config_options, "block_align", "true")); - ASSERT_OK(bbtf->ConfigureOption(config_options, "block_size", "1024")); - ASSERT_EQ(bbto->block_align, true); - ASSERT_EQ(bbto->block_size, 1024); - ASSERT_OK(bbtf->PrepareOptions(config_options)); - config_options.mutable_options_only = true; - ASSERT_OK(bbtf->ConfigureOption(config_options, "block_size", "1024")); - ASSERT_EQ(bbto->block_align, true); - ASSERT_NOK(bbtf->ConfigureOption(config_options, "block_align", "false")); - ASSERT_OK(bbtf->ConfigureOption(config_options, "block_size", "2048")); - ASSERT_EQ(bbto->block_align, true); - ASSERT_EQ(bbto->block_size, 2048); - - ColumnFamilyOptions cf_opts; - cf_opts.table_factory = bbtf; - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, cf_opts, "block_based_table_factory.block_align=false", - &cf_opts)); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, cf_opts, "block_based_table_factory.block_size=8192", - &cf_opts)); - ASSERT_EQ(bbto->block_align, true); - ASSERT_EQ(bbto->block_size, 8192); -} - -TEST_F(OptionsTest, MutableCFOptions) { - ConfigOptions config_options; - ColumnFamilyOptions cf_opts; - - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, cf_opts, - "paranoid_file_checks=true; block_based_table_factory.block_align=false; " - "block_based_table_factory.block_size=8192;", - &cf_opts)); - ASSERT_TRUE(cf_opts.paranoid_file_checks); - ASSERT_NE(cf_opts.table_factory.get(), nullptr); - const auto bbto = cf_opts.table_factory->GetOptions(); - ASSERT_NE(bbto, nullptr); - ASSERT_EQ(bbto->block_size, 8192); - ASSERT_EQ(bbto->block_align, false); - std::unordered_map unused_opts; - ASSERT_OK(GetColumnFamilyOptionsFromMap( - config_options, cf_opts, {{"paranoid_file_checks", "false"}}, &cf_opts)); - ASSERT_EQ(cf_opts.paranoid_file_checks, false); - - ASSERT_OK(GetColumnFamilyOptionsFromMap( - config_options, cf_opts, - {{"block_based_table_factory.block_size", "16384"}}, &cf_opts)); - ASSERT_EQ(bbto, cf_opts.table_factory->GetOptions()); - ASSERT_EQ(bbto->block_size, 16384); - - config_options.mutable_options_only = true; - // Force consistency checks is not mutable - ASSERT_NOK(GetColumnFamilyOptionsFromMap( - config_options, cf_opts, {{"force_consistency_checks", "true"}}, - &cf_opts)); - - // Attempt to change the table. It is not mutable, so this should fail and - // leave the original intact - ASSERT_NOK(GetColumnFamilyOptionsFromMap( - config_options, cf_opts, {{"table_factory", "PlainTable"}}, &cf_opts)); - ASSERT_NOK(GetColumnFamilyOptionsFromMap( - config_options, cf_opts, {{"table_factory.id", "PlainTable"}}, &cf_opts)); - ASSERT_NE(cf_opts.table_factory.get(), nullptr); - ASSERT_EQ(bbto, cf_opts.table_factory->GetOptions()); - - // Change the block size. Should update the value in the current table - ASSERT_OK(GetColumnFamilyOptionsFromMap( - config_options, cf_opts, - {{"block_based_table_factory.block_size", "8192"}}, &cf_opts)); - ASSERT_EQ(bbto, cf_opts.table_factory->GetOptions()); - ASSERT_EQ(bbto->block_size, 8192); - - // Attempt to turn off block cache fails, as this option is not mutable - ASSERT_NOK(GetColumnFamilyOptionsFromMap( - config_options, cf_opts, - {{"block_based_table_factory.no_block_cache", "true"}}, &cf_opts)); - ASSERT_EQ(bbto, cf_opts.table_factory->GetOptions()); - - // Attempt to change the block size via a config string/map. Should update - // the current value - ASSERT_OK(GetColumnFamilyOptionsFromMap( - config_options, cf_opts, - {{"block_based_table_factory", "{block_size=32768}"}}, &cf_opts)); - ASSERT_EQ(bbto, cf_opts.table_factory->GetOptions()); - ASSERT_EQ(bbto->block_size, 32768); - - // Attempt to change the block size and no cache through the map. Should - // fail, leaving the old values intact - ASSERT_NOK(GetColumnFamilyOptionsFromMap( - config_options, cf_opts, - {{"block_based_table_factory", - "{block_size=16384; no_block_cache=true}"}}, - &cf_opts)); - ASSERT_EQ(bbto, cf_opts.table_factory->GetOptions()); - ASSERT_EQ(bbto->block_size, 32768); -} - - -Status StringToMap( - const std::string& opts_str, - std::unordered_map* opts_map); - -TEST_F(OptionsTest, StringToMapTest) { - std::unordered_map opts_map; - // Regular options - ASSERT_OK(StringToMap("k1=v1;k2=v2;k3=v3", &opts_map)); - ASSERT_EQ(opts_map["k1"], "v1"); - ASSERT_EQ(opts_map["k2"], "v2"); - ASSERT_EQ(opts_map["k3"], "v3"); - // Value with '=' - opts_map.clear(); - ASSERT_OK(StringToMap("k1==v1;k2=v2=;", &opts_map)); - ASSERT_EQ(opts_map["k1"], "=v1"); - ASSERT_EQ(opts_map["k2"], "v2="); - // Overwrriten option - opts_map.clear(); - ASSERT_OK(StringToMap("k1=v1;k1=v2;k3=v3", &opts_map)); - ASSERT_EQ(opts_map["k1"], "v2"); - ASSERT_EQ(opts_map["k3"], "v3"); - // Empty value - opts_map.clear(); - ASSERT_OK(StringToMap("k1=v1;k2=;k3=v3;k4=", &opts_map)); - ASSERT_EQ(opts_map["k1"], "v1"); - ASSERT_TRUE(opts_map.find("k2") != opts_map.end()); - ASSERT_EQ(opts_map["k2"], ""); - ASSERT_EQ(opts_map["k3"], "v3"); - ASSERT_TRUE(opts_map.find("k4") != opts_map.end()); - ASSERT_EQ(opts_map["k4"], ""); - opts_map.clear(); - ASSERT_OK(StringToMap("k1=v1;k2=;k3=v3;k4= ", &opts_map)); - ASSERT_EQ(opts_map["k1"], "v1"); - ASSERT_TRUE(opts_map.find("k2") != opts_map.end()); - ASSERT_EQ(opts_map["k2"], ""); - ASSERT_EQ(opts_map["k3"], "v3"); - ASSERT_TRUE(opts_map.find("k4") != opts_map.end()); - ASSERT_EQ(opts_map["k4"], ""); - opts_map.clear(); - ASSERT_OK(StringToMap("k1=v1;k2=;k3=", &opts_map)); - ASSERT_EQ(opts_map["k1"], "v1"); - ASSERT_TRUE(opts_map.find("k2") != opts_map.end()); - ASSERT_EQ(opts_map["k2"], ""); - ASSERT_TRUE(opts_map.find("k3") != opts_map.end()); - ASSERT_EQ(opts_map["k3"], ""); - opts_map.clear(); - ASSERT_OK(StringToMap("k1=v1;k2=;k3=;", &opts_map)); - ASSERT_EQ(opts_map["k1"], "v1"); - ASSERT_TRUE(opts_map.find("k2") != opts_map.end()); - ASSERT_EQ(opts_map["k2"], ""); - ASSERT_TRUE(opts_map.find("k3") != opts_map.end()); - ASSERT_EQ(opts_map["k3"], ""); - // Regular nested options - opts_map.clear(); - ASSERT_OK(StringToMap("k1=v1;k2={nk1=nv1;nk2=nv2};k3=v3", &opts_map)); - ASSERT_EQ(opts_map["k1"], "v1"); - ASSERT_EQ(opts_map["k2"], "nk1=nv1;nk2=nv2"); - ASSERT_EQ(opts_map["k3"], "v3"); - // Multi-level nested options - opts_map.clear(); - ASSERT_OK(StringToMap("k1=v1;k2={nk1=nv1;nk2={nnk1=nnk2}};" - "k3={nk1={nnk1={nnnk1=nnnv1;nnnk2;nnnv2}}};k4=v4", - &opts_map)); - ASSERT_EQ(opts_map["k1"], "v1"); - ASSERT_EQ(opts_map["k2"], "nk1=nv1;nk2={nnk1=nnk2}"); - ASSERT_EQ(opts_map["k3"], "nk1={nnk1={nnnk1=nnnv1;nnnk2;nnnv2}}"); - ASSERT_EQ(opts_map["k4"], "v4"); - // Garbage inside curly braces - opts_map.clear(); - ASSERT_OK(StringToMap("k1=v1;k2={dfad=};k3={=};k4=v4", - &opts_map)); - ASSERT_EQ(opts_map["k1"], "v1"); - ASSERT_EQ(opts_map["k2"], "dfad="); - ASSERT_EQ(opts_map["k3"], "="); - ASSERT_EQ(opts_map["k4"], "v4"); - // Empty nested options - opts_map.clear(); - ASSERT_OK(StringToMap("k1=v1;k2={};", &opts_map)); - ASSERT_EQ(opts_map["k1"], "v1"); - ASSERT_EQ(opts_map["k2"], ""); - opts_map.clear(); - ASSERT_OK(StringToMap("k1=v1;k2={{{{}}}{}{}};", &opts_map)); - ASSERT_EQ(opts_map["k1"], "v1"); - ASSERT_EQ(opts_map["k2"], "{{{}}}{}{}"); - // With random spaces - opts_map.clear(); - ASSERT_OK(StringToMap(" k1 = v1 ; k2= {nk1=nv1; nk2={nnk1=nnk2}} ; " - "k3={ { } }; k4= v4 ", - &opts_map)); - ASSERT_EQ(opts_map["k1"], "v1"); - ASSERT_EQ(opts_map["k2"], "nk1=nv1; nk2={nnk1=nnk2}"); - ASSERT_EQ(opts_map["k3"], "{ }"); - ASSERT_EQ(opts_map["k4"], "v4"); - - // Empty key - ASSERT_NOK(StringToMap("k1=v1;k2=v2;=", &opts_map)); - ASSERT_NOK(StringToMap("=v1;k2=v2", &opts_map)); - ASSERT_NOK(StringToMap("k1=v1;k2v2;", &opts_map)); - ASSERT_NOK(StringToMap("k1=v1;k2=v2;fadfa", &opts_map)); - ASSERT_NOK(StringToMap("k1=v1;k2=v2;;", &opts_map)); - // Mismatch curly braces - ASSERT_NOK(StringToMap("k1=v1;k2={;k3=v3", &opts_map)); - ASSERT_NOK(StringToMap("k1=v1;k2={{};k3=v3", &opts_map)); - ASSERT_NOK(StringToMap("k1=v1;k2={}};k3=v3", &opts_map)); - ASSERT_NOK(StringToMap("k1=v1;k2={{}{}}};k3=v3", &opts_map)); - // However this is valid! - opts_map.clear(); - ASSERT_OK(StringToMap("k1=v1;k2=};k3=v3", &opts_map)); - ASSERT_EQ(opts_map["k1"], "v1"); - ASSERT_EQ(opts_map["k2"], "}"); - ASSERT_EQ(opts_map["k3"], "v3"); - - // Invalid chars after closing curly brace - ASSERT_NOK(StringToMap("k1=v1;k2={{}}{};k3=v3", &opts_map)); - ASSERT_NOK(StringToMap("k1=v1;k2={{}}cfda;k3=v3", &opts_map)); - ASSERT_NOK(StringToMap("k1=v1;k2={{}} cfda;k3=v3", &opts_map)); - ASSERT_NOK(StringToMap("k1=v1;k2={{}} cfda", &opts_map)); - ASSERT_NOK(StringToMap("k1=v1;k2={{}}{}", &opts_map)); - ASSERT_NOK(StringToMap("k1=v1;k2={{dfdl}adfa}{}", &opts_map)); -} - -TEST_F(OptionsTest, StringToMapRandomTest) { - std::unordered_map opts_map; - // Make sure segfault is not hit by semi-random strings - - std::vector bases = { - "a={aa={};tt={xxx={}}};c=defff", - "a={aa={};tt={xxx={}}};c=defff;d={{}yxx{}3{xx}}", - "abc={{}{}{}{{{}}}{{}{}{}{}{}{}{}"}; - - for (std::string base : bases) { - for (int rand_seed = 301; rand_seed < 401; rand_seed++) { - Random rnd(rand_seed); - for (int attempt = 0; attempt < 10; attempt++) { - std::string str = base; - // Replace random position to space - size_t pos = static_cast( - rnd.Uniform(static_cast(base.size()))); - str[pos] = ' '; - Status s = StringToMap(str, &opts_map); - ASSERT_TRUE(s.ok() || s.IsInvalidArgument()); - opts_map.clear(); - } - } - } - - // Random Construct a string - std::vector chars = {'{', '}', ' ', '=', ';', 'c'}; - for (int rand_seed = 301; rand_seed < 1301; rand_seed++) { - Random rnd(rand_seed); - int len = rnd.Uniform(30); - std::string str = ""; - for (int attempt = 0; attempt < len; attempt++) { - // Add a random character - size_t pos = static_cast( - rnd.Uniform(static_cast(chars.size()))); - str.append(1, chars[pos]); - } - Status s = StringToMap(str, &opts_map); - ASSERT_TRUE(s.ok() || s.IsInvalidArgument()); - s = StringToMap("name=" + str, &opts_map); - ASSERT_TRUE(s.ok() || s.IsInvalidArgument()); - opts_map.clear(); - } -} - -TEST_F(OptionsTest, GetStringFromCompressionType) { - std::string res; - - ASSERT_OK(GetStringFromCompressionType(&res, kNoCompression)); - ASSERT_EQ(res, "kNoCompression"); - - ASSERT_OK(GetStringFromCompressionType(&res, kSnappyCompression)); - ASSERT_EQ(res, "kSnappyCompression"); - - ASSERT_OK(GetStringFromCompressionType(&res, kDisableCompressionOption)); - ASSERT_EQ(res, "kDisableCompressionOption"); - - ASSERT_OK(GetStringFromCompressionType(&res, kLZ4Compression)); - ASSERT_EQ(res, "kLZ4Compression"); - - ASSERT_OK(GetStringFromCompressionType(&res, kZlibCompression)); - ASSERT_EQ(res, "kZlibCompression"); - - ASSERT_NOK( - GetStringFromCompressionType(&res, static_cast(-10))); -} - -TEST_F(OptionsTest, OnlyMutableDBOptions) { - std::string opt_str; - Random rnd(302); - ConfigOptions cfg_opts; - DBOptions db_opts; - DBOptions mdb_opts; - std::unordered_set m_names; - std::unordered_set a_names; - - test::RandomInitDBOptions(&db_opts, &rnd); - auto db_config = DBOptionsAsConfigurable(db_opts); - - // Get all of the DB Option names (mutable or not) - ASSERT_OK(db_config->GetOptionNames(cfg_opts, &a_names)); - - // Get only the mutable options from db_opts and set those in mdb_opts - cfg_opts.mutable_options_only = true; - - // Get only the Mutable DB Option names - ASSERT_OK(db_config->GetOptionNames(cfg_opts, &m_names)); - ASSERT_OK(GetStringFromDBOptions(cfg_opts, db_opts, &opt_str)); - ASSERT_OK(GetDBOptionsFromString(cfg_opts, mdb_opts, opt_str, &mdb_opts)); - std::string mismatch; - // Comparing only the mutable options, the two are equivalent - auto mdb_config = DBOptionsAsConfigurable(mdb_opts); - ASSERT_TRUE(mdb_config->AreEquivalent(cfg_opts, db_config.get(), &mismatch)); - ASSERT_TRUE(db_config->AreEquivalent(cfg_opts, mdb_config.get(), &mismatch)); - - ASSERT_GT(a_names.size(), m_names.size()); - for (const auto& n : m_names) { - std::string m, d; - ASSERT_OK(mdb_config->GetOption(cfg_opts, n, &m)); - ASSERT_OK(db_config->GetOption(cfg_opts, n, &d)); - ASSERT_EQ(m, d); - } - - cfg_opts.mutable_options_only = false; - // Comparing all of the options, the two are not equivalent - ASSERT_FALSE(mdb_config->AreEquivalent(cfg_opts, db_config.get(), &mismatch)); - ASSERT_FALSE(db_config->AreEquivalent(cfg_opts, mdb_config.get(), &mismatch)); - - // Make sure there are only mutable options being configured - ASSERT_OK(GetDBOptionsFromString(cfg_opts, DBOptions(), opt_str, &db_opts)); -} - -TEST_F(OptionsTest, OnlyMutableCFOptions) { - std::string opt_str; - Random rnd(302); - ConfigOptions cfg_opts; - DBOptions db_opts; - ColumnFamilyOptions mcf_opts; - ColumnFamilyOptions cf_opts; - std::unordered_set m_names; - std::unordered_set a_names; - - test::RandomInitCFOptions(&cf_opts, db_opts, &rnd); - cf_opts.comparator = ReverseBytewiseComparator(); - auto cf_config = CFOptionsAsConfigurable(cf_opts); - - // Get all of the CF Option names (mutable or not) - ASSERT_OK(cf_config->GetOptionNames(cfg_opts, &a_names)); - - // Get only the mutable options from cf_opts and set those in mcf_opts - cfg_opts.mutable_options_only = true; - // Get only the Mutable CF Option names - ASSERT_OK(cf_config->GetOptionNames(cfg_opts, &m_names)); - ASSERT_OK(GetStringFromColumnFamilyOptions(cfg_opts, cf_opts, &opt_str)); - ASSERT_OK( - GetColumnFamilyOptionsFromString(cfg_opts, mcf_opts, opt_str, &mcf_opts)); - std::string mismatch; - - auto mcf_config = CFOptionsAsConfigurable(mcf_opts); - // Comparing only the mutable options, the two are equivalent - ASSERT_TRUE(mcf_config->AreEquivalent(cfg_opts, cf_config.get(), &mismatch)); - ASSERT_TRUE(cf_config->AreEquivalent(cfg_opts, mcf_config.get(), &mismatch)); - - ASSERT_GT(a_names.size(), m_names.size()); - for (const auto& n : m_names) { - std::string m, d; - ASSERT_OK(mcf_config->GetOption(cfg_opts, n, &m)); - ASSERT_OK(cf_config->GetOption(cfg_opts, n, &d)); - ASSERT_EQ(m, d); - } - - cfg_opts.mutable_options_only = false; - // Comparing all of the options, the two are not equivalent - ASSERT_FALSE(mcf_config->AreEquivalent(cfg_opts, cf_config.get(), &mismatch)); - ASSERT_FALSE(cf_config->AreEquivalent(cfg_opts, mcf_config.get(), &mismatch)); - delete cf_opts.compaction_filter; - - // Make sure the options string contains only mutable options - ASSERT_OK(GetColumnFamilyOptionsFromString(cfg_opts, ColumnFamilyOptions(), - opt_str, &cf_opts)); - delete cf_opts.compaction_filter; -} - -TEST_F(OptionsTest, SstPartitionerTest) { - ConfigOptions cfg_opts; - ColumnFamilyOptions cf_opts, new_opt; - std::string opts_str, mismatch; - - ASSERT_OK(SstPartitionerFactory::CreateFromString( - cfg_opts, SstPartitionerFixedPrefixFactory::kClassName(), - &cf_opts.sst_partitioner_factory)); - ASSERT_NE(cf_opts.sst_partitioner_factory, nullptr); - ASSERT_STREQ(cf_opts.sst_partitioner_factory->Name(), - SstPartitionerFixedPrefixFactory::kClassName()); - ASSERT_NOK(GetColumnFamilyOptionsFromString( - cfg_opts, ColumnFamilyOptions(), - std::string("sst_partitioner_factory={id=") + - SstPartitionerFixedPrefixFactory::kClassName() + "; unknown=10;}", - &cf_opts)); - ASSERT_OK(GetColumnFamilyOptionsFromString( - cfg_opts, ColumnFamilyOptions(), - std::string("sst_partitioner_factory={id=") + - SstPartitionerFixedPrefixFactory::kClassName() + "; length=10;}", - &cf_opts)); - ASSERT_NE(cf_opts.sst_partitioner_factory, nullptr); - ASSERT_STREQ(cf_opts.sst_partitioner_factory->Name(), - SstPartitionerFixedPrefixFactory::kClassName()); - ASSERT_OK(GetStringFromColumnFamilyOptions(cfg_opts, cf_opts, &opts_str)); - ASSERT_OK( - GetColumnFamilyOptionsFromString(cfg_opts, cf_opts, opts_str, &new_opt)); - ASSERT_NE(new_opt.sst_partitioner_factory, nullptr); - ASSERT_STREQ(new_opt.sst_partitioner_factory->Name(), - SstPartitionerFixedPrefixFactory::kClassName()); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(cfg_opts, cf_opts, new_opt)); - ASSERT_TRUE(cf_opts.sst_partitioner_factory->AreEquivalent( - cfg_opts, new_opt.sst_partitioner_factory.get(), &mismatch)); -} - -TEST_F(OptionsTest, FileChecksumGenFactoryTest) { - ConfigOptions cfg_opts; - DBOptions db_opts, new_opt; - std::string opts_str, mismatch; - auto factory = GetFileChecksumGenCrc32cFactory(); - - cfg_opts.ignore_unsupported_options = false; - - ASSERT_OK(GetStringFromDBOptions(cfg_opts, db_opts, &opts_str)); - ASSERT_OK(GetDBOptionsFromString(cfg_opts, db_opts, opts_str, &new_opt)); - - ASSERT_NE(factory, nullptr); - ASSERT_OK(FileChecksumGenFactory::CreateFromString( - cfg_opts, factory->Name(), &db_opts.file_checksum_gen_factory)); - ASSERT_NE(db_opts.file_checksum_gen_factory, nullptr); - ASSERT_STREQ(db_opts.file_checksum_gen_factory->Name(), factory->Name()); - ASSERT_NOK(GetDBOptionsFromString( - cfg_opts, DBOptions(), "file_checksum_gen_factory=unknown", &db_opts)); - ASSERT_OK(GetDBOptionsFromString( - cfg_opts, DBOptions(), - std::string("file_checksum_gen_factory=") + factory->Name(), &db_opts)); - ASSERT_NE(db_opts.file_checksum_gen_factory, nullptr); - ASSERT_STREQ(db_opts.file_checksum_gen_factory->Name(), factory->Name()); - - ASSERT_OK(GetStringFromDBOptions(cfg_opts, db_opts, &opts_str)); - ASSERT_OK(GetDBOptionsFromString(cfg_opts, db_opts, opts_str, &new_opt)); - ASSERT_NE(new_opt.file_checksum_gen_factory, nullptr); - ASSERT_STREQ(new_opt.file_checksum_gen_factory->Name(), factory->Name()); - ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(cfg_opts, db_opts, new_opt)); - ASSERT_TRUE(factory->AreEquivalent( - cfg_opts, new_opt.file_checksum_gen_factory.get(), &mismatch)); - ASSERT_TRUE(db_opts.file_checksum_gen_factory->AreEquivalent( - cfg_opts, new_opt.file_checksum_gen_factory.get(), &mismatch)); -} - -class TestTablePropertiesCollectorFactory - : public TablePropertiesCollectorFactory { - private: - std::string id_; - - public: - explicit TestTablePropertiesCollectorFactory(const std::string& id) - : id_(id) {} - TablePropertiesCollector* CreateTablePropertiesCollector( - TablePropertiesCollectorFactory::Context /*context*/) override { - return nullptr; - } - static const char* kClassName() { return "TestCollector"; } - const char* Name() const override { return kClassName(); } - std::string GetId() const override { - return std::string(kClassName()) + ":" + id_; - } -}; - -TEST_F(OptionsTest, OptionTablePropertiesTest) { - ConfigOptions cfg_opts; - ColumnFamilyOptions orig, copy; - orig.table_properties_collector_factories.push_back( - std::make_shared("1")); - orig.table_properties_collector_factories.push_back( - std::make_shared("2")); - - // Push two TablePropertiesCollectorFactories then create a new - // ColumnFamilyOptions based on those settings. The copy should - // have no properties but still match the original - std::string opts_str; - ASSERT_OK(GetStringFromColumnFamilyOptions(cfg_opts, orig, &opts_str)); - ASSERT_OK(GetColumnFamilyOptionsFromString(cfg_opts, orig, opts_str, ©)); - ASSERT_EQ(copy.table_properties_collector_factories.size(), 0); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(cfg_opts, orig, copy)); - - // Now register a TablePropertiesCollectorFactory - // Repeat the experiment. The copy should have the same - // properties as the original - cfg_opts.registry->AddLibrary("collector") - ->AddFactory( - ObjectLibrary::PatternEntry( - TestTablePropertiesCollectorFactory::kClassName(), false) - .AddSeparator(":"), - [](const std::string& name, - std::unique_ptr* guard, - std::string* /* errmsg */) { - std::string id = name.substr( - strlen(TestTablePropertiesCollectorFactory::kClassName()) + 1); - guard->reset(new TestTablePropertiesCollectorFactory(id)); - return guard->get(); - }); - - ASSERT_OK(GetColumnFamilyOptionsFromString(cfg_opts, orig, opts_str, ©)); - ASSERT_EQ(copy.table_properties_collector_factories.size(), 2); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(cfg_opts, orig, copy)); -} - -TEST_F(OptionsTest, ConvertOptionsTest) { - LevelDBOptions leveldb_opt; - Options converted_opt = ConvertOptions(leveldb_opt); - - ASSERT_EQ(converted_opt.create_if_missing, leveldb_opt.create_if_missing); - ASSERT_EQ(converted_opt.error_if_exists, leveldb_opt.error_if_exists); - ASSERT_EQ(converted_opt.paranoid_checks, leveldb_opt.paranoid_checks); - ASSERT_EQ(converted_opt.env, leveldb_opt.env); - ASSERT_EQ(converted_opt.info_log.get(), leveldb_opt.info_log); - ASSERT_EQ(converted_opt.write_buffer_size, leveldb_opt.write_buffer_size); - ASSERT_EQ(converted_opt.max_open_files, leveldb_opt.max_open_files); - ASSERT_EQ(converted_opt.compression, leveldb_opt.compression); - - std::shared_ptr table_factory = converted_opt.table_factory; - const auto table_opt = table_factory->GetOptions(); - ASSERT_NE(table_opt, nullptr); - - ASSERT_EQ(table_opt->block_cache->GetCapacity(), 8UL << 20); - ASSERT_EQ(table_opt->block_size, leveldb_opt.block_size); - ASSERT_EQ(table_opt->block_restart_interval, - leveldb_opt.block_restart_interval); - ASSERT_EQ(table_opt->filter_policy.get(), leveldb_opt.filter_policy); -} -class TestEventListener : public EventListener { - private: - std::string id_; - - public: - explicit TestEventListener(const std::string& id) : id_("Test" + id) {} - const char* Name() const override { return id_.c_str(); } -}; - -static std::unordered_map - test_listener_option_info = { - {"s", - {0, OptionType::kString, OptionVerificationType::kNormal, - OptionTypeFlags::kNone}}, - -}; - -class TestConfigEventListener : public TestEventListener { - private: - std::string s_; - - public: - explicit TestConfigEventListener(const std::string& id) - : TestEventListener("Config" + id) { - s_ = id; - RegisterOptions("Test", &s_, &test_listener_option_info); - } -}; - -static int RegisterTestEventListener(ObjectLibrary& library, - const std::string& arg) { - library.AddFactory( - "Test" + arg, - [](const std::string& name, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new TestEventListener(name.substr(4))); - return guard->get(); - }); - library.AddFactory( - "TestConfig" + arg, - [](const std::string& name, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new TestConfigEventListener(name.substr(10))); - return guard->get(); - }); - return 1; -} -TEST_F(OptionsTest, OptionsListenerTest) { - DBOptions orig, copy; - orig.listeners.push_back(std::make_shared("1")); - orig.listeners.push_back(std::make_shared("2")); - orig.listeners.push_back(std::make_shared("")); - orig.listeners.push_back(std::make_shared("1")); - orig.listeners.push_back(std::make_shared("2")); - orig.listeners.push_back(std::make_shared("")); - ConfigOptions config_opts(orig); - config_opts.registry->AddLibrary("listener", RegisterTestEventListener, "1"); - std::string opts_str; - ASSERT_OK(GetStringFromDBOptions(config_opts, orig, &opts_str)); - ASSERT_OK(GetDBOptionsFromString(config_opts, orig, opts_str, ©)); - ASSERT_OK(GetStringFromDBOptions(config_opts, copy, &opts_str)); - ASSERT_EQ( - copy.listeners.size(), - 2); // The Test{Config}1 Listeners could be loaded but not the others - ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(config_opts, orig, copy)); -} - -const static std::string kCustomEnvName = "Custom"; -const static std::string kCustomEnvProp = "env=" + kCustomEnvName; - -static int RegisterCustomEnv(ObjectLibrary& library, const std::string& arg) { - library.AddFactory( - arg, [](const std::string& /*name*/, std::unique_ptr* /*env_guard*/, - std::string* /* errmsg */) { - static CustomEnv env(Env::Default()); - return &env; - }); - return 1; -} - -// This test suite tests the old APIs into the Configure options methods. -// Once those APIs are officially deprecated, this test suite can be deleted. -class OptionsOldApiTest : public testing::Test {}; - -TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { - std::unordered_map cf_options_map = { - {"write_buffer_size", "1"}, - {"max_write_buffer_number", "2"}, - {"min_write_buffer_number_to_merge", "3"}, - {"max_write_buffer_number_to_maintain", "99"}, - {"max_write_buffer_size_to_maintain", "-99999"}, - {"compression", "kSnappyCompression"}, - {"compression_per_level", - "kNoCompression:" - "kSnappyCompression:" - "kZlibCompression:" - "kBZip2Compression:" - "kLZ4Compression:" - "kLZ4HCCompression:" - "kXpressCompression:" - "kZSTD:" - "kZSTDNotFinalCompression"}, - {"bottommost_compression", "kLZ4Compression"}, - {"bottommost_compression_opts", "5:6:7:8:9:true"}, - {"compression_opts", "4:5:6:7:8:9:true:10:false"}, - {"num_levels", "8"}, - {"level0_file_num_compaction_trigger", "8"}, - {"level0_slowdown_writes_trigger", "9"}, - {"level0_stop_writes_trigger", "10"}, - {"target_file_size_base", "12"}, - {"target_file_size_multiplier", "13"}, - {"max_bytes_for_level_base", "14"}, - {"level_compaction_dynamic_level_bytes", "true"}, - {"level_compaction_dynamic_file_size", "true"}, - {"max_bytes_for_level_multiplier", "15.0"}, - {"max_bytes_for_level_multiplier_additional", "16:17:18"}, - {"max_compaction_bytes", "21"}, - {"soft_rate_limit", "1.1"}, - {"hard_rate_limit", "2.1"}, - {"rate_limit_delay_max_milliseconds", "100"}, - {"hard_pending_compaction_bytes_limit", "211"}, - {"arena_block_size", "22"}, - {"disable_auto_compactions", "true"}, - {"compaction_style", "kCompactionStyleLevel"}, - {"compaction_pri", "kOldestSmallestSeqFirst"}, - {"verify_checksums_in_compaction", "false"}, - {"compaction_options_fifo", "23"}, - {"max_sequential_skip_in_iterations", "24"}, - {"inplace_update_support", "true"}, - {"report_bg_io_stats", "true"}, - {"compaction_measure_io_stats", "false"}, - {"purge_redundant_kvs_while_flush", "false"}, - {"inplace_update_num_locks", "25"}, - {"memtable_prefix_bloom_size_ratio", "0.26"}, - {"memtable_whole_key_filtering", "true"}, - {"memtable_huge_page_size", "28"}, - {"bloom_locality", "29"}, - {"max_successive_merges", "30"}, - {"min_partial_merge_operands", "31"}, - {"prefix_extractor", "fixed:31"}, - {"experimental_mempurge_threshold", "0.003"}, - {"optimize_filters_for_hits", "true"}, - {"enable_blob_files", "true"}, - {"min_blob_size", "1K"}, - {"blob_file_size", "1G"}, - {"blob_compression_type", "kZSTD"}, - {"enable_blob_garbage_collection", "true"}, - {"blob_garbage_collection_age_cutoff", "0.5"}, - {"blob_garbage_collection_force_threshold", "0.75"}, - {"blob_compaction_readahead_size", "256K"}, - {"blob_file_starting_level", "1"}, - {"prepopulate_blob_cache", "kDisable"}, - {"last_level_temperature", "kWarm"}, - }; - - std::unordered_map db_options_map = { - {"create_if_missing", "false"}, - {"create_missing_column_families", "true"}, - {"error_if_exists", "false"}, - {"paranoid_checks", "true"}, - {"track_and_verify_wals_in_manifest", "true"}, - {"verify_sst_unique_id_in_manifest", "true"}, - {"max_open_files", "32"}, - {"max_total_wal_size", "33"}, - {"use_fsync", "true"}, - {"db_log_dir", "/db_log_dir"}, - {"wal_dir", "/wal_dir"}, - {"delete_obsolete_files_period_micros", "34"}, - {"max_background_compactions", "35"}, - {"max_background_flushes", "36"}, - {"max_log_file_size", "37"}, - {"log_file_time_to_roll", "38"}, - {"keep_log_file_num", "39"}, - {"recycle_log_file_num", "5"}, - {"max_manifest_file_size", "40"}, - {"table_cache_numshardbits", "41"}, - {"WAL_ttl_seconds", "43"}, - {"WAL_size_limit_MB", "44"}, - {"manifest_preallocation_size", "45"}, - {"allow_mmap_reads", "true"}, - {"allow_mmap_writes", "false"}, - {"use_direct_reads", "false"}, - {"use_direct_io_for_flush_and_compaction", "false"}, - {"is_fd_close_on_exec", "true"}, - {"skip_log_error_on_recovery", "false"}, - {"stats_dump_period_sec", "46"}, - {"stats_persist_period_sec", "57"}, - {"persist_stats_to_disk", "false"}, - {"stats_history_buffer_size", "69"}, - {"advise_random_on_open", "true"}, - {"use_adaptive_mutex", "false"}, - {"compaction_readahead_size", "100"}, - {"random_access_max_buffer_size", "3145728"}, - {"writable_file_max_buffer_size", "314159"}, - {"bytes_per_sync", "47"}, - {"wal_bytes_per_sync", "48"}, - {"strict_bytes_per_sync", "true"}, - {"preserve_deletes", "false"}, - }; - - ColumnFamilyOptions base_cf_opt; - ColumnFamilyOptions new_cf_opt; - ConfigOptions cf_config_options; - cf_config_options.ignore_unknown_options = false; - cf_config_options.input_strings_escaped = false; - ASSERT_OK(GetColumnFamilyOptionsFromMap(cf_config_options, base_cf_opt, - cf_options_map, &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 1U); - ASSERT_EQ(new_cf_opt.max_write_buffer_number, 2); - ASSERT_EQ(new_cf_opt.min_write_buffer_number_to_merge, 3); - ASSERT_EQ(new_cf_opt.max_write_buffer_number_to_maintain, 99); - ASSERT_EQ(new_cf_opt.max_write_buffer_size_to_maintain, -99999); - ASSERT_EQ(new_cf_opt.compression, kSnappyCompression); - ASSERT_EQ(new_cf_opt.compression_per_level.size(), 9U); - ASSERT_EQ(new_cf_opt.compression_per_level[0], kNoCompression); - ASSERT_EQ(new_cf_opt.compression_per_level[1], kSnappyCompression); - ASSERT_EQ(new_cf_opt.compression_per_level[2], kZlibCompression); - ASSERT_EQ(new_cf_opt.compression_per_level[3], kBZip2Compression); - ASSERT_EQ(new_cf_opt.compression_per_level[4], kLZ4Compression); - ASSERT_EQ(new_cf_opt.compression_per_level[5], kLZ4HCCompression); - ASSERT_EQ(new_cf_opt.compression_per_level[6], kXpressCompression); - ASSERT_EQ(new_cf_opt.compression_per_level[7], kZSTD); - ASSERT_EQ(new_cf_opt.compression_per_level[8], kZSTDNotFinalCompression); - ASSERT_EQ(new_cf_opt.compression_opts.window_bits, 4); - ASSERT_EQ(new_cf_opt.compression_opts.level, 5); - ASSERT_EQ(new_cf_opt.compression_opts.strategy, 6); - ASSERT_EQ(new_cf_opt.compression_opts.max_dict_bytes, 7u); - ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u); - ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 9u); - ASSERT_EQ(new_cf_opt.compression_opts.enabled, true); - ASSERT_EQ(new_cf_opt.compression_opts.max_dict_buffer_bytes, 10u); - ASSERT_EQ(new_cf_opt.compression_opts.use_zstd_dict_trainer, false); - ASSERT_EQ(new_cf_opt.bottommost_compression, kLZ4Compression); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.strategy, 7); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.max_dict_bytes, 8u); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.zstd_max_train_bytes, 9u); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads, - CompressionOptions().parallel_threads); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, true); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.max_dict_buffer_bytes, - CompressionOptions().max_dict_buffer_bytes); - ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, - CompressionOptions().use_zstd_dict_trainer); - ASSERT_EQ(new_cf_opt.num_levels, 8); - ASSERT_EQ(new_cf_opt.level0_file_num_compaction_trigger, 8); - ASSERT_EQ(new_cf_opt.level0_slowdown_writes_trigger, 9); - ASSERT_EQ(new_cf_opt.level0_stop_writes_trigger, 10); - ASSERT_EQ(new_cf_opt.target_file_size_base, static_cast(12)); - ASSERT_EQ(new_cf_opt.target_file_size_multiplier, 13); - ASSERT_EQ(new_cf_opt.max_bytes_for_level_base, 14U); - ASSERT_EQ(new_cf_opt.level_compaction_dynamic_level_bytes, true); - ASSERT_EQ(new_cf_opt.level_compaction_dynamic_file_size, true); - ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier, 15.0); - ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional.size(), 3U); - ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional[0], 16); - ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional[1], 17); - ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional[2], 18); - ASSERT_EQ(new_cf_opt.max_compaction_bytes, 21); - ASSERT_EQ(new_cf_opt.hard_pending_compaction_bytes_limit, 211); - ASSERT_EQ(new_cf_opt.arena_block_size, 22U); - ASSERT_EQ(new_cf_opt.disable_auto_compactions, true); - ASSERT_EQ(new_cf_opt.compaction_style, kCompactionStyleLevel); - ASSERT_EQ(new_cf_opt.compaction_pri, kOldestSmallestSeqFirst); - ASSERT_EQ(new_cf_opt.compaction_options_fifo.max_table_files_size, - static_cast(23)); - ASSERT_EQ(new_cf_opt.max_sequential_skip_in_iterations, - static_cast(24)); - ASSERT_EQ(new_cf_opt.inplace_update_support, true); - ASSERT_EQ(new_cf_opt.inplace_update_num_locks, 25U); - ASSERT_EQ(new_cf_opt.memtable_prefix_bloom_size_ratio, 0.26); - ASSERT_EQ(new_cf_opt.memtable_whole_key_filtering, true); - ASSERT_EQ(new_cf_opt.memtable_huge_page_size, 28U); - ASSERT_EQ(new_cf_opt.bloom_locality, 29U); - ASSERT_EQ(new_cf_opt.max_successive_merges, 30U); - ASSERT_TRUE(new_cf_opt.prefix_extractor != nullptr); - ASSERT_EQ(new_cf_opt.optimize_filters_for_hits, true); - ASSERT_EQ(new_cf_opt.prefix_extractor->AsString(), "rocksdb.FixedPrefix.31"); - ASSERT_EQ(new_cf_opt.experimental_mempurge_threshold, 0.003); - ASSERT_EQ(new_cf_opt.enable_blob_files, true); - ASSERT_EQ(new_cf_opt.min_blob_size, 1ULL << 10); - ASSERT_EQ(new_cf_opt.blob_file_size, 1ULL << 30); - ASSERT_EQ(new_cf_opt.blob_compression_type, kZSTD); - ASSERT_EQ(new_cf_opt.enable_blob_garbage_collection, true); - ASSERT_EQ(new_cf_opt.blob_garbage_collection_age_cutoff, 0.5); - ASSERT_EQ(new_cf_opt.blob_garbage_collection_force_threshold, 0.75); - ASSERT_EQ(new_cf_opt.blob_compaction_readahead_size, 262144); - ASSERT_EQ(new_cf_opt.blob_file_starting_level, 1); - ASSERT_EQ(new_cf_opt.prepopulate_blob_cache, PrepopulateBlobCache::kDisable); - ASSERT_EQ(new_cf_opt.last_level_temperature, Temperature::kWarm); - ASSERT_EQ(new_cf_opt.bottommost_temperature, Temperature::kWarm); - - cf_options_map["write_buffer_size"] = "hello"; - ASSERT_NOK(GetColumnFamilyOptionsFromMap(cf_config_options, base_cf_opt, - cf_options_map, &new_cf_opt)); - ConfigOptions exact, loose; - exact.sanity_level = ConfigOptions::kSanityLevelExactMatch; - loose.sanity_level = ConfigOptions::kSanityLevelLooselyCompatible; - - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - cf_options_map["write_buffer_size"] = "1"; - ASSERT_OK(GetColumnFamilyOptionsFromMap(cf_config_options, base_cf_opt, - cf_options_map, &new_cf_opt)); - - cf_options_map["unknown_option"] = "1"; - ASSERT_NOK(GetColumnFamilyOptionsFromMap(cf_config_options, base_cf_opt, - cf_options_map, &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - cf_config_options.input_strings_escaped = false; - cf_config_options.ignore_unknown_options = true; - ASSERT_OK(GetColumnFamilyOptionsFromMap(cf_config_options, base_cf_opt, - cf_options_map, &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( - loose, base_cf_opt, new_cf_opt, nullptr /* new_opt_map */)); - ASSERT_NOK(RocksDBOptionsParser::VerifyCFOptions( - exact /* default for VerifyCFOptions */, base_cf_opt, new_cf_opt, nullptr)); - - DBOptions base_db_opt; - DBOptions new_db_opt; - ConfigOptions db_config_options(base_db_opt); - db_config_options.input_strings_escaped = false; - db_config_options.ignore_unknown_options = false; - ASSERT_OK(GetDBOptionsFromMap(db_config_options, base_db_opt, db_options_map, - &new_db_opt)); - ASSERT_EQ(new_db_opt.create_if_missing, false); - ASSERT_EQ(new_db_opt.create_missing_column_families, true); - ASSERT_EQ(new_db_opt.error_if_exists, false); - ASSERT_EQ(new_db_opt.paranoid_checks, true); - ASSERT_EQ(new_db_opt.track_and_verify_wals_in_manifest, true); - ASSERT_EQ(new_db_opt.max_open_files, 32); - ASSERT_EQ(new_db_opt.max_total_wal_size, static_cast(33)); - ASSERT_EQ(new_db_opt.use_fsync, true); - ASSERT_EQ(new_db_opt.db_log_dir, "/db_log_dir"); - ASSERT_EQ(new_db_opt.wal_dir, "/wal_dir"); - ASSERT_EQ(new_db_opt.delete_obsolete_files_period_micros, - static_cast(34)); - ASSERT_EQ(new_db_opt.max_background_compactions, 35); - ASSERT_EQ(new_db_opt.max_background_flushes, 36); - ASSERT_EQ(new_db_opt.max_log_file_size, 37U); - ASSERT_EQ(new_db_opt.log_file_time_to_roll, 38U); - ASSERT_EQ(new_db_opt.keep_log_file_num, 39U); - ASSERT_EQ(new_db_opt.recycle_log_file_num, 5U); - ASSERT_EQ(new_db_opt.max_manifest_file_size, static_cast(40)); - ASSERT_EQ(new_db_opt.table_cache_numshardbits, 41); - ASSERT_EQ(new_db_opt.WAL_ttl_seconds, static_cast(43)); - ASSERT_EQ(new_db_opt.WAL_size_limit_MB, static_cast(44)); - ASSERT_EQ(new_db_opt.manifest_preallocation_size, 45U); - ASSERT_EQ(new_db_opt.allow_mmap_reads, true); - ASSERT_EQ(new_db_opt.allow_mmap_writes, false); - ASSERT_EQ(new_db_opt.use_direct_reads, false); - ASSERT_EQ(new_db_opt.use_direct_io_for_flush_and_compaction, false); - ASSERT_EQ(new_db_opt.is_fd_close_on_exec, true); - ASSERT_EQ(new_db_opt.stats_dump_period_sec, 46U); - ASSERT_EQ(new_db_opt.stats_persist_period_sec, 57U); - ASSERT_EQ(new_db_opt.persist_stats_to_disk, false); - ASSERT_EQ(new_db_opt.stats_history_buffer_size, 69U); - ASSERT_EQ(new_db_opt.advise_random_on_open, true); - ASSERT_EQ(new_db_opt.use_adaptive_mutex, false); - ASSERT_EQ(new_db_opt.compaction_readahead_size, 100); - ASSERT_EQ(new_db_opt.random_access_max_buffer_size, 3145728); - ASSERT_EQ(new_db_opt.writable_file_max_buffer_size, 314159); - ASSERT_EQ(new_db_opt.bytes_per_sync, static_cast(47)); - ASSERT_EQ(new_db_opt.wal_bytes_per_sync, static_cast(48)); - ASSERT_EQ(new_db_opt.strict_bytes_per_sync, true); - - db_options_map["max_open_files"] = "hello"; - ASSERT_NOK(GetDBOptionsFromMap(db_config_options, base_db_opt, db_options_map, - &new_db_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(loose, base_db_opt, new_db_opt)); - - // unknow options should fail parsing without ignore_unknown_options = true - db_options_map["unknown_db_option"] = "1"; - ASSERT_NOK(GetDBOptionsFromMap(db_config_options, base_db_opt, db_options_map, - &new_db_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); - - db_config_options.input_strings_escaped = false; - db_config_options.ignore_unknown_options = true; - ASSERT_OK(GetDBOptionsFromMap(db_config_options, base_db_opt, db_options_map, - &new_db_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(loose, base_db_opt, new_db_opt)); - ASSERT_NOK(RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); -} - -TEST_F(OptionsOldApiTest, GetColumnFamilyOptionsFromStringTest) { - ColumnFamilyOptions base_cf_opt; - ColumnFamilyOptions new_cf_opt; - base_cf_opt.table_factory.reset(); - ConfigOptions config_options; - config_options.input_strings_escaped = false; - config_options.ignore_unknown_options = false; - ASSERT_OK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, "", - &new_cf_opt)); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, "write_buffer_size=5", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 5U); - ASSERT_TRUE(new_cf_opt.table_factory == nullptr); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, "write_buffer_size=6;", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 6U); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, " write_buffer_size = 7 ", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 7U); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, " write_buffer_size = 8 ; ", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 8U); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=9;max_write_buffer_number=10", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 9U); - ASSERT_EQ(new_cf_opt.max_write_buffer_number, 10); - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=11; max_write_buffer_number = 12 ;", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 11U); - ASSERT_EQ(new_cf_opt.max_write_buffer_number, 12); - // Wrong name "max_write_buffer_number_" - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=13;max_write_buffer_number_=14;", &new_cf_opt)); - ConfigOptions exact; - exact.sanity_level = ConfigOptions::kSanityLevelExactMatch; - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - // Comparator from object registry - std::string kCompName = "reverse_comp"; - ObjectLibrary::Default()->AddFactory( - kCompName, - [](const std::string& /*name*/, - std::unique_ptr* /*guard*/, - std::string* /* errmsg */) { return ReverseBytewiseComparator(); }); - - ASSERT_OK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, - "comparator=" + kCompName + ";", - &new_cf_opt)); - ASSERT_EQ(new_cf_opt.comparator, ReverseBytewiseComparator()); - - // MergeOperator from object registry - std::unique_ptr bxo(new BytesXOROperator()); - std::string kMoName = bxo->Name(); - ASSERT_OK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, - "merge_operator=" + kMoName + ";", - &new_cf_opt)); - ASSERT_EQ(kMoName, std::string(new_cf_opt.merge_operator->Name())); - - // Wrong key/value pair - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=13;max_write_buffer_number;", &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - // Error Paring value - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=13;max_write_buffer_number=;", &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - // Missing option name - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, "write_buffer_size=13; =100;", &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - const uint64_t kilo = 1024UL; - const uint64_t mega = 1024 * kilo; - const uint64_t giga = 1024 * mega; - const uint64_t tera = 1024 * giga; - - // Units (k) - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, "max_write_buffer_number=15K", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.max_write_buffer_number, 15 * kilo); - // Units (m) - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "max_write_buffer_number=16m;inplace_update_num_locks=17M", &new_cf_opt)); - ASSERT_EQ(new_cf_opt.max_write_buffer_number, 16 * mega); - ASSERT_EQ(new_cf_opt.inplace_update_num_locks, 17u * mega); - // Units (g) - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=18g;prefix_extractor=capped:8;" - "arena_block_size=19G", - &new_cf_opt)); - - ASSERT_EQ(new_cf_opt.write_buffer_size, 18 * giga); - ASSERT_EQ(new_cf_opt.arena_block_size, 19 * giga); - ASSERT_TRUE(new_cf_opt.prefix_extractor.get() != nullptr); - ASSERT_EQ(new_cf_opt.prefix_extractor->AsString(), "rocksdb.CappedPrefix.8"); - - // Units (t) - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, "write_buffer_size=20t;arena_block_size=21T", - &new_cf_opt)); - ASSERT_EQ(new_cf_opt.write_buffer_size, 20 * tera); - ASSERT_EQ(new_cf_opt.arena_block_size, 21 * tera); - - // Nested block based table options - // Empty - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={};arena_block_size=1024", - &new_cf_opt)); - ASSERT_TRUE(new_cf_opt.table_factory != nullptr); - // Non-empty - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={block_cache=1M;block_size=4;};" - "arena_block_size=1024", - &new_cf_opt)); - ASSERT_TRUE(new_cf_opt.table_factory != nullptr); - // Last one - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={block_cache=1M;block_size=4;}", - &new_cf_opt)); - ASSERT_TRUE(new_cf_opt.table_factory != nullptr); - // Mismatch curly braces - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={{{block_size=4;};" - "arena_block_size=1024", - &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - // Unexpected chars after closing curly brace - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={block_size=4;}};" - "arena_block_size=1024", - &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={block_size=4;}xdfa;" - "arena_block_size=1024", - &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={block_size=4;}xdfa", - &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - // Invalid block based table option - ASSERT_NOK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={xx_block_size=4;}", - &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - ASSERT_OK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, - "optimize_filters_for_hits=true", - &new_cf_opt)); - ASSERT_OK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, - "optimize_filters_for_hits=false", - &new_cf_opt)); - - ASSERT_NOK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, - "optimize_filters_for_hits=junk", - &new_cf_opt)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); - - // Nested plain table options - // Empty - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "plain_table_factory={};arena_block_size=1024", - &new_cf_opt)); - ASSERT_TRUE(new_cf_opt.table_factory != nullptr); - ASSERT_EQ(std::string(new_cf_opt.table_factory->Name()), "PlainTable"); - // Non-empty - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "plain_table_factory={user_key_len=66;bloom_bits_per_key=20;};" - "arena_block_size=1024", - &new_cf_opt)); - ASSERT_TRUE(new_cf_opt.table_factory != nullptr); - ASSERT_EQ(std::string(new_cf_opt.table_factory->Name()), "PlainTable"); - - // memtable factory - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "write_buffer_size=10;max_write_buffer_number=16;" - "memtable=skip_list:10;arena_block_size=1024", - &new_cf_opt)); - ASSERT_TRUE(new_cf_opt.memtable_factory != nullptr); - ASSERT_TRUE(new_cf_opt.memtable_factory->IsInstanceOf("SkipListFactory")); - - // blob cache - ASSERT_OK(GetColumnFamilyOptionsFromString( - config_options, base_cf_opt, - "blob_cache={capacity=1M;num_shard_bits=4;" - "strict_capacity_limit=true;high_pri_pool_ratio=0.5;};", - &new_cf_opt)); - ASSERT_NE(new_cf_opt.blob_cache, nullptr); - ASSERT_EQ(new_cf_opt.blob_cache->GetCapacity(), 1024UL * 1024UL); - ASSERT_EQ(static_cast(new_cf_opt.blob_cache.get()) - ->GetNumShardBits(), - 4); - ASSERT_EQ(new_cf_opt.blob_cache->HasStrictCapacityLimit(), true); - ASSERT_EQ(static_cast(new_cf_opt.blob_cache.get()) - ->GetHighPriPoolRatio(), - 0.5); -} - -TEST_F(OptionsTest, SliceTransformCreateFromString) { - std::shared_ptr transform = nullptr; - ConfigOptions config_options; - config_options.ignore_unsupported_options = false; - config_options.ignore_unknown_options = false; - - ASSERT_OK( - SliceTransform::CreateFromString(config_options, "fixed:31", &transform)); - ASSERT_NE(transform, nullptr); - ASSERT_FALSE(transform->IsInstanceOf("capped")); - ASSERT_TRUE(transform->IsInstanceOf("fixed")); - ASSERT_TRUE(transform->IsInstanceOf("rocksdb.FixedPrefix")); - ASSERT_EQ(transform->GetId(), "rocksdb.FixedPrefix.31"); - ASSERT_OK(SliceTransform::CreateFromString( - config_options, "rocksdb.FixedPrefix.42", &transform)); - ASSERT_NE(transform, nullptr); - ASSERT_EQ(transform->GetId(), "rocksdb.FixedPrefix.42"); - - ASSERT_OK(SliceTransform::CreateFromString(config_options, "capped:16", - &transform)); - ASSERT_NE(transform, nullptr); - ASSERT_FALSE(transform->IsInstanceOf("fixed")); - ASSERT_TRUE(transform->IsInstanceOf("capped")); - ASSERT_TRUE(transform->IsInstanceOf("rocksdb.CappedPrefix")); - ASSERT_EQ(transform->GetId(), "rocksdb.CappedPrefix.16"); - ASSERT_OK(SliceTransform::CreateFromString( - config_options, "rocksdb.CappedPrefix.42", &transform)); - ASSERT_NE(transform, nullptr); - ASSERT_EQ(transform->GetId(), "rocksdb.CappedPrefix.42"); - - ASSERT_OK(SliceTransform::CreateFromString(config_options, "rocksdb.Noop", - &transform)); - ASSERT_NE(transform, nullptr); - - ASSERT_NOK(SliceTransform::CreateFromString(config_options, - "fixed:21:invalid", &transform)); - ASSERT_NOK(SliceTransform::CreateFromString(config_options, - "capped:21:invalid", &transform)); - ASSERT_NOK( - SliceTransform::CreateFromString(config_options, "fixed", &transform)); - ASSERT_NOK( - SliceTransform::CreateFromString(config_options, "capped", &transform)); - ASSERT_NOK( - SliceTransform::CreateFromString(config_options, "fixed:", &transform)); - ASSERT_NOK( - SliceTransform::CreateFromString(config_options, "capped:", &transform)); - ASSERT_NOK(SliceTransform::CreateFromString( - config_options, "rocksdb.FixedPrefix:42", &transform)); - ASSERT_NOK(SliceTransform::CreateFromString( - config_options, "rocksdb.CappedPrefix:42", &transform)); - ASSERT_NOK(SliceTransform::CreateFromString( - config_options, "rocksdb.FixedPrefix", &transform)); - ASSERT_NOK(SliceTransform::CreateFromString( - config_options, "rocksdb.CappedPrefix", &transform)); - ASSERT_NOK(SliceTransform::CreateFromString( - config_options, "rocksdb.FixedPrefix.", &transform)); - ASSERT_NOK(SliceTransform::CreateFromString( - config_options, "rocksdb.CappedPrefix.", &transform)); - ASSERT_NOK( - SliceTransform::CreateFromString(config_options, "invalid", &transform)); - - ASSERT_OK(SliceTransform::CreateFromString( - config_options, "rocksdb.CappedPrefix.11", &transform)); - ASSERT_NE(transform, nullptr); - ASSERT_EQ(transform->GetId(), "rocksdb.CappedPrefix.11"); - ASSERT_TRUE(transform->IsInstanceOf("capped")); - ASSERT_TRUE(transform->IsInstanceOf("capped:11")); - ASSERT_TRUE(transform->IsInstanceOf("rocksdb.CappedPrefix")); - ASSERT_TRUE(transform->IsInstanceOf("rocksdb.CappedPrefix.11")); - ASSERT_FALSE(transform->IsInstanceOf("fixed")); - ASSERT_FALSE(transform->IsInstanceOf("fixed:11")); - ASSERT_FALSE(transform->IsInstanceOf("rocksdb.FixedPrefix")); - ASSERT_FALSE(transform->IsInstanceOf("rocksdb.FixedPrefix.11")); - - ASSERT_OK(SliceTransform::CreateFromString( - config_options, "rocksdb.FixedPrefix.11", &transform)); - ASSERT_TRUE(transform->IsInstanceOf("fixed")); - ASSERT_TRUE(transform->IsInstanceOf("fixed:11")); - ASSERT_TRUE(transform->IsInstanceOf("rocksdb.FixedPrefix")); - ASSERT_TRUE(transform->IsInstanceOf("rocksdb.FixedPrefix.11")); - ASSERT_FALSE(transform->IsInstanceOf("capped")); - ASSERT_FALSE(transform->IsInstanceOf("capped:11")); - ASSERT_FALSE(transform->IsInstanceOf("rocksdb.CappedPrefix")); - ASSERT_FALSE(transform->IsInstanceOf("rocksdb.CappedPrefix.11")); -} - -TEST_F(OptionsOldApiTest, GetBlockBasedTableOptionsFromString) { - BlockBasedTableOptions table_opt; - BlockBasedTableOptions new_opt; - ConfigOptions config_options; - config_options.input_strings_escaped = false; - config_options.ignore_unknown_options = false; - config_options.invoke_prepare_options = false; - config_options.ignore_unsupported_options = false; - - // make sure default values are overwritten by something else - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "cache_index_and_filter_blocks=1;index_type=kHashSearch;" - "checksum=kxxHash;no_block_cache=1;" - "block_cache=1M;block_cache_compressed=1k;block_size=1024;" - "block_size_deviation=8;block_restart_interval=4;" - "format_version=5;whole_key_filtering=1;" - "filter_policy=bloomfilter:4.567:false;", - &new_opt)); - ASSERT_TRUE(new_opt.cache_index_and_filter_blocks); - ASSERT_EQ(new_opt.index_type, BlockBasedTableOptions::kHashSearch); - ASSERT_EQ(new_opt.checksum, ChecksumType::kxxHash); - ASSERT_TRUE(new_opt.no_block_cache); - ASSERT_TRUE(new_opt.block_cache != nullptr); - ASSERT_EQ(new_opt.block_cache->GetCapacity(), 1024UL*1024UL); - ASSERT_EQ(new_opt.block_size, 1024UL); - ASSERT_EQ(new_opt.block_size_deviation, 8); - ASSERT_EQ(new_opt.block_restart_interval, 4); - ASSERT_EQ(new_opt.format_version, 5U); - ASSERT_EQ(new_opt.whole_key_filtering, true); - ASSERT_TRUE(new_opt.filter_policy != nullptr); - const BloomFilterPolicy* bfp = - dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(bfp->GetMillibitsPerKey(), 4567); - EXPECT_EQ(bfp->GetWholeBitsPerKey(), 5); - - // unknown option - ASSERT_NOK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "cache_index_and_filter_blocks=1;index_type=kBinarySearch;" - "bad_option=1", - &new_opt)); - ASSERT_EQ(static_cast(table_opt.cache_index_and_filter_blocks), - new_opt.cache_index_and_filter_blocks); - ASSERT_EQ(table_opt.index_type, new_opt.index_type); - - // unrecognized index type - ASSERT_NOK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "cache_index_and_filter_blocks=1;index_type=kBinarySearchXX", &new_opt)); - ASSERT_EQ(table_opt.cache_index_and_filter_blocks, - new_opt.cache_index_and_filter_blocks); - ASSERT_EQ(table_opt.index_type, new_opt.index_type); - - // unrecognized checksum type - ASSERT_NOK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "cache_index_and_filter_blocks=1;checksum=kxxHashXX", &new_opt)); - ASSERT_EQ(table_opt.cache_index_and_filter_blocks, - new_opt.cache_index_and_filter_blocks); - ASSERT_EQ(table_opt.index_type, new_opt.index_type); - - // unrecognized filter policy name - ASSERT_NOK( - GetBlockBasedTableOptionsFromString(config_options, table_opt, - "cache_index_and_filter_blocks=1;" - "filter_policy=bloomfilterxx:4:true", - &new_opt)); - ASSERT_EQ(table_opt.cache_index_and_filter_blocks, - new_opt.cache_index_and_filter_blocks); - ASSERT_EQ(table_opt.filter_policy, new_opt.filter_policy); - - // Used to be rejected, now accepted - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, "filter_policy=bloomfilter:4", &new_opt)); - bfp = dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000); - EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); - - // Check block cache options are overwritten when specified - // in new format as a struct. - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "block_cache={capacity=1M;num_shard_bits=4;" - "strict_capacity_limit=true;high_pri_pool_ratio=0.5;};" - "block_cache_compressed={capacity=1M;num_shard_bits=4;" - "strict_capacity_limit=true;high_pri_pool_ratio=0.5;}", - &new_opt)); - ASSERT_TRUE(new_opt.block_cache != nullptr); - ASSERT_EQ(new_opt.block_cache->GetCapacity(), 1024UL*1024UL); - ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) - ->GetNumShardBits(), - 4); - ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), true); - ASSERT_EQ(std::dynamic_pointer_cast( - new_opt.block_cache)->GetHighPriPoolRatio(), 0.5); - - // Set only block cache capacity. Check other values are - // reset to default values. - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "block_cache={capacity=2M};" - "block_cache_compressed={capacity=2M}", - &new_opt)); - ASSERT_TRUE(new_opt.block_cache != nullptr); - ASSERT_EQ(new_opt.block_cache->GetCapacity(), 2*1024UL*1024UL); - // Default values - ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) - ->GetNumShardBits(), - GetDefaultCacheShardBits(new_opt.block_cache->GetCapacity())); - ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), false); - ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) - ->GetHighPriPoolRatio(), - 0.5); - - // Set couple of block cache options. - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "block_cache={num_shard_bits=5;high_pri_pool_ratio=0.5;};" - "block_cache_compressed={num_shard_bits=5;" - "high_pri_pool_ratio=0.0;}", - &new_opt)); - ASSERT_EQ(new_opt.block_cache->GetCapacity(), 0); - ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) - ->GetNumShardBits(), - 5); - ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), false); - ASSERT_EQ(std::dynamic_pointer_cast( - new_opt.block_cache)->GetHighPriPoolRatio(), 0.5); - - // Set couple of block cache options. - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, - "block_cache={capacity=1M;num_shard_bits=4;" - "strict_capacity_limit=true;};" - "block_cache_compressed={capacity=1M;num_shard_bits=4;" - "strict_capacity_limit=true;}", - &new_opt)); - ASSERT_TRUE(new_opt.block_cache != nullptr); - ASSERT_EQ(new_opt.block_cache->GetCapacity(), 1024UL*1024UL); - ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) - ->GetNumShardBits(), - 4); - ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), true); - ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) - ->GetHighPriPoolRatio(), - 0.5); -} - -TEST_F(OptionsOldApiTest, GetPlainTableOptionsFromString) { - PlainTableOptions table_opt; - PlainTableOptions new_opt; - // make sure default values are overwritten by something else - ConfigOptions config_options_from_string; - config_options_from_string.input_strings_escaped = false; - config_options_from_string.ignore_unknown_options = false; - config_options_from_string.invoke_prepare_options = false; - ASSERT_OK(GetPlainTableOptionsFromString( - config_options_from_string, table_opt, - "user_key_len=66;bloom_bits_per_key=20;hash_table_ratio=0.5;" - "index_sparseness=8;huge_page_tlb_size=4;encoding_type=kPrefix;" - "full_scan_mode=true;store_index_in_file=true", - &new_opt)); - ASSERT_EQ(new_opt.user_key_len, 66u); - ASSERT_EQ(new_opt.bloom_bits_per_key, 20); - ASSERT_EQ(new_opt.hash_table_ratio, 0.5); - ASSERT_EQ(new_opt.index_sparseness, 8); - ASSERT_EQ(new_opt.huge_page_tlb_size, 4); - ASSERT_EQ(new_opt.encoding_type, EncodingType::kPrefix); - ASSERT_TRUE(new_opt.full_scan_mode); - ASSERT_TRUE(new_opt.store_index_in_file); - - std::unordered_map opt_map; - ASSERT_OK(StringToMap( - "user_key_len=55;bloom_bits_per_key=10;huge_page_tlb_size=8;", &opt_map)); - ConfigOptions config_options_from_map; - config_options_from_map.input_strings_escaped = false; - config_options_from_map.ignore_unknown_options = false; - ASSERT_OK(GetPlainTableOptionsFromMap(config_options_from_map, table_opt, - opt_map, &new_opt)); - ASSERT_EQ(new_opt.user_key_len, 55u); - ASSERT_EQ(new_opt.bloom_bits_per_key, 10); - ASSERT_EQ(new_opt.huge_page_tlb_size, 8); - - // unknown option - ASSERT_NOK(GetPlainTableOptionsFromString( - config_options_from_string, table_opt, - "user_key_len=66;bloom_bits_per_key=20;hash_table_ratio=0.5;" - "bad_option=1", - &new_opt)); - - // unrecognized EncodingType - ASSERT_NOK(GetPlainTableOptionsFromString( - config_options_from_string, table_opt, - "user_key_len=66;bloom_bits_per_key=20;hash_table_ratio=0.5;" - "encoding_type=kPrefixXX", - &new_opt)); -} - -TEST_F(OptionsOldApiTest, GetOptionsFromStringTest) { - Options base_options, new_options; - base_options.write_buffer_size = 20; - base_options.min_write_buffer_number_to_merge = 15; - BlockBasedTableOptions block_based_table_options; - block_based_table_options.cache_index_and_filter_blocks = true; - base_options.table_factory.reset( - NewBlockBasedTableFactory(block_based_table_options)); - - // Register an Env with object registry. - ObjectLibrary::Default()->AddFactory( - "CustomEnvDefault", - [](const std::string& /*name*/, std::unique_ptr* /*env_guard*/, - std::string* /* errmsg */) { - static CustomEnv env(Env::Default()); - return &env; - }); - - ASSERT_OK(GetOptionsFromString( - base_options, - "write_buffer_size=10;max_write_buffer_number=16;" - "block_based_table_factory={block_cache=1M;block_size=4;};" - "compression_opts=4:5:6;create_if_missing=true;max_open_files=1;" - "bottommost_compression_opts=5:6:7;create_if_missing=true;max_open_files=" - "1;" - "rate_limiter_bytes_per_sec=1024;env=CustomEnvDefault", - &new_options)); - - ASSERT_EQ(new_options.compression_opts.window_bits, 4); - ASSERT_EQ(new_options.compression_opts.level, 5); - ASSERT_EQ(new_options.compression_opts.strategy, 6); - ASSERT_EQ(new_options.compression_opts.max_dict_bytes, 0u); - ASSERT_EQ(new_options.compression_opts.zstd_max_train_bytes, 0u); - ASSERT_EQ(new_options.compression_opts.parallel_threads, 1u); - ASSERT_EQ(new_options.compression_opts.enabled, false); - ASSERT_EQ(new_options.compression_opts.use_zstd_dict_trainer, true); - ASSERT_EQ(new_options.bottommost_compression, kDisableCompressionOption); - ASSERT_EQ(new_options.bottommost_compression_opts.window_bits, 5); - ASSERT_EQ(new_options.bottommost_compression_opts.level, 6); - ASSERT_EQ(new_options.bottommost_compression_opts.strategy, 7); - ASSERT_EQ(new_options.bottommost_compression_opts.max_dict_bytes, 0u); - ASSERT_EQ(new_options.bottommost_compression_opts.zstd_max_train_bytes, 0u); - ASSERT_EQ(new_options.bottommost_compression_opts.parallel_threads, 1u); - ASSERT_EQ(new_options.bottommost_compression_opts.enabled, false); - ASSERT_EQ(new_options.bottommost_compression_opts.use_zstd_dict_trainer, - true); - ASSERT_EQ(new_options.write_buffer_size, 10U); - ASSERT_EQ(new_options.max_write_buffer_number, 16); - - auto new_block_based_table_options = - new_options.table_factory->GetOptions(); - ASSERT_NE(new_block_based_table_options, nullptr); - ASSERT_EQ(new_block_based_table_options->block_cache->GetCapacity(), - 1U << 20); - ASSERT_EQ(new_block_based_table_options->block_size, 4U); - // don't overwrite block based table options - ASSERT_TRUE(new_block_based_table_options->cache_index_and_filter_blocks); - - ASSERT_EQ(new_options.create_if_missing, true); - ASSERT_EQ(new_options.max_open_files, 1); - ASSERT_TRUE(new_options.rate_limiter.get() != nullptr); - Env* newEnv = new_options.env; - ASSERT_OK(Env::CreateFromString({}, "CustomEnvDefault", &newEnv)); - ASSERT_EQ(newEnv, new_options.env); -} - -TEST_F(OptionsOldApiTest, DBOptionsSerialization) { - Options base_options, new_options; - Random rnd(301); - - // Phase 1: Make big change in base_options - test::RandomInitDBOptions(&base_options, &rnd); - - // Phase 2: obtain a string from base_option - std::string base_options_file_content; - ASSERT_OK(GetStringFromDBOptions(&base_options_file_content, base_options)); - - // Phase 3: Set new_options from the derived string and expect - // new_options == base_options - const DBOptions base_db_options; - ConfigOptions db_config_options(base_db_options); - db_config_options.input_strings_escaped = false; - db_config_options.ignore_unknown_options = false; - ASSERT_OK(GetDBOptionsFromString(db_config_options, base_db_options, - base_options_file_content, &new_options)); - ConfigOptions verify_db_config_options; - ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(verify_db_config_options, - base_options, new_options)); -} - -TEST_F(OptionsOldApiTest, ColumnFamilyOptionsSerialization) { - Options options; - ColumnFamilyOptions base_opt, new_opt; - Random rnd(302); - // Phase 1: randomly assign base_opt - // custom type options - test::RandomInitCFOptions(&base_opt, options, &rnd); - - // Phase 2: obtain a string from base_opt - std::string base_options_file_content; - ASSERT_OK( - GetStringFromColumnFamilyOptions(&base_options_file_content, base_opt)); - - // Phase 3: Set new_opt from the derived string and expect - // new_opt == base_opt - ConfigOptions cf_config_options; - cf_config_options.input_strings_escaped = false; - cf_config_options.ignore_unknown_options = false; - ASSERT_OK( - GetColumnFamilyOptionsFromString(cf_config_options, ColumnFamilyOptions(), - base_options_file_content, &new_opt)); - ConfigOptions verify_cf_config_options; - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(verify_cf_config_options, - base_opt, new_opt)); - if (base_opt.compaction_filter) { - delete base_opt.compaction_filter; - } -} - -class OptionsParserTest : public testing::Test { - public: - OptionsParserTest() { fs_.reset(new test::StringFS(FileSystem::Default())); } - - protected: - std::shared_ptr fs_; -}; - -TEST_F(OptionsParserTest, Comment) { - DBOptions db_opt; - db_opt.max_open_files = 12345; - db_opt.max_background_flushes = 301; - db_opt.max_total_wal_size = 1024; - ColumnFamilyOptions cf_opt; - - std::string options_file_content = - "# This is a testing option string.\n" - "# Currently we only support \"#\" styled comment.\n" - "\n" - "[Version]\n" - " rocksdb_version=3.14.0\n" - " options_file_version=1\n" - "[ DBOptions ]\n" - " # note that we don't support space around \"=\"\n" - " max_open_files=12345;\n" - " max_background_flushes=301 # comment after a statement is fine\n" - " # max_background_flushes=1000 # this line would be ignored\n" - " # max_background_compactions=2000 # so does this one\n" - " max_total_wal_size=1024 # keep_log_file_num=1000\n" - "[CFOptions \"default\"] # column family must be specified\n" - " # in the correct order\n" - " # if a section is blank, we will use the default\n"; - - const std::string kTestFileName = "test-rocksdb-options.ini"; - ASSERT_OK(fs_->WriteToNewFile(kTestFileName, options_file_content)); - RocksDBOptionsParser parser; - ASSERT_OK( - parser.Parse(kTestFileName, fs_.get(), false, 4096 /* readahead_size */)); - - ConfigOptions exact; - exact.input_strings_escaped = false; - exact.sanity_level = ConfigOptions::kSanityLevelExactMatch; - ASSERT_OK( - RocksDBOptionsParser::VerifyDBOptions(exact, *parser.db_opt(), db_opt)); - ASSERT_EQ(parser.NumColumnFamilies(), 1U); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( - exact, *parser.GetCFOptions("default"), cf_opt)); -} - -TEST_F(OptionsParserTest, ExtraSpace) { - std::string options_file_content = - "# This is a testing option string.\n" - "# Currently we only support \"#\" styled comment.\n" - "\n" - "[ Version ]\n" - " rocksdb_version = 3.14.0 \n" - " options_file_version=1 # some comment\n" - "[DBOptions ] # some comment\n" - "max_open_files=12345 \n" - " max_background_flushes = 301 \n" - " max_total_wal_size = 1024 # keep_log_file_num=1000\n" - " [CFOptions \"default\" ]\n" - " # if a section is blank, we will use the default\n"; - - const std::string kTestFileName = "test-rocksdb-options.ini"; - ASSERT_OK(fs_->WriteToNewFile(kTestFileName, options_file_content)); - RocksDBOptionsParser parser; - ASSERT_OK( - parser.Parse(kTestFileName, fs_.get(), false, 4096 /* readahead_size */)); -} - -TEST_F(OptionsParserTest, MissingDBOptions) { - std::string options_file_content = - "# This is a testing option string.\n" - "# Currently we only support \"#\" styled comment.\n" - "\n" - "[Version]\n" - " rocksdb_version=3.14.0\n" - " options_file_version=1\n" - "[CFOptions \"default\"]\n" - " # if a section is blank, we will use the default\n"; - - const std::string kTestFileName = "test-rocksdb-options.ini"; - ASSERT_OK(fs_->WriteToNewFile(kTestFileName, options_file_content)); - RocksDBOptionsParser parser; - ASSERT_NOK( - parser.Parse(kTestFileName, fs_.get(), false, 4096 /* readahead_size */)); - ; -} - -TEST_F(OptionsParserTest, DoubleDBOptions) { - DBOptions db_opt; - db_opt.max_open_files = 12345; - db_opt.max_background_flushes = 301; - db_opt.max_total_wal_size = 1024; - ColumnFamilyOptions cf_opt; - - std::string options_file_content = - "# This is a testing option string.\n" - "# Currently we only support \"#\" styled comment.\n" - "\n" - "[Version]\n" - " rocksdb_version=3.14.0\n" - " options_file_version=1\n" - "[DBOptions]\n" - " max_open_files=12345\n" - " max_background_flushes=301\n" - " max_total_wal_size=1024 # keep_log_file_num=1000\n" - "[DBOptions]\n" - "[CFOptions \"default\"]\n" - " # if a section is blank, we will use the default\n"; - - const std::string kTestFileName = "test-rocksdb-options.ini"; - ASSERT_OK(fs_->WriteToNewFile(kTestFileName, options_file_content)); - RocksDBOptionsParser parser; - ASSERT_NOK( - parser.Parse(kTestFileName, fs_.get(), false, 4096 /* readahead_size */)); -} - -TEST_F(OptionsParserTest, NoDefaultCFOptions) { - DBOptions db_opt; - db_opt.max_open_files = 12345; - db_opt.max_background_flushes = 301; - db_opt.max_total_wal_size = 1024; - ColumnFamilyOptions cf_opt; - - std::string options_file_content = - "# This is a testing option string.\n" - "# Currently we only support \"#\" styled comment.\n" - "\n" - "[Version]\n" - " rocksdb_version=3.14.0\n" - " options_file_version=1\n" - "[DBOptions]\n" - " max_open_files=12345\n" - " max_background_flushes=301\n" - " max_total_wal_size=1024 # keep_log_file_num=1000\n" - "[CFOptions \"something_else\"]\n" - " # if a section is blank, we will use the default\n"; - - const std::string kTestFileName = "test-rocksdb-options.ini"; - ASSERT_OK(fs_->WriteToNewFile(kTestFileName, options_file_content)); - RocksDBOptionsParser parser; - ASSERT_NOK( - parser.Parse(kTestFileName, fs_.get(), false, 4096 /* readahead_size */)); -} - -TEST_F(OptionsParserTest, DefaultCFOptionsMustBeTheFirst) { - DBOptions db_opt; - db_opt.max_open_files = 12345; - db_opt.max_background_flushes = 301; - db_opt.max_total_wal_size = 1024; - ColumnFamilyOptions cf_opt; - - std::string options_file_content = - "# This is a testing option string.\n" - "# Currently we only support \"#\" styled comment.\n" - "\n" - "[Version]\n" - " rocksdb_version=3.14.0\n" - " options_file_version=1\n" - "[DBOptions]\n" - " max_open_files=12345\n" - " max_background_flushes=301\n" - " max_total_wal_size=1024 # keep_log_file_num=1000\n" - "[CFOptions \"something_else\"]\n" - " # if a section is blank, we will use the default\n" - "[CFOptions \"default\"]\n" - " # if a section is blank, we will use the default\n"; - - const std::string kTestFileName = "test-rocksdb-options.ini"; - ASSERT_OK(fs_->WriteToNewFile(kTestFileName, options_file_content)); - RocksDBOptionsParser parser; - ASSERT_NOK( - parser.Parse(kTestFileName, fs_.get(), false, 4096 /* readahead_size */)); -} - -TEST_F(OptionsParserTest, DuplicateCFOptions) { - DBOptions db_opt; - db_opt.max_open_files = 12345; - db_opt.max_background_flushes = 301; - db_opt.max_total_wal_size = 1024; - ColumnFamilyOptions cf_opt; - - std::string options_file_content = - "# This is a testing option string.\n" - "# Currently we only support \"#\" styled comment.\n" - "\n" - "[Version]\n" - " rocksdb_version=3.14.0\n" - " options_file_version=1\n" - "[DBOptions]\n" - " max_open_files=12345\n" - " max_background_flushes=301\n" - " max_total_wal_size=1024 # keep_log_file_num=1000\n" - "[CFOptions \"default\"]\n" - "[CFOptions \"something_else\"]\n" - "[CFOptions \"something_else\"]\n"; - - const std::string kTestFileName = "test-rocksdb-options.ini"; - ASSERT_OK(fs_->WriteToNewFile(kTestFileName, options_file_content)); - RocksDBOptionsParser parser; - ASSERT_NOK( - parser.Parse(kTestFileName, fs_.get(), false, 4096 /* readahead_size */)); -} - -TEST_F(OptionsParserTest, IgnoreUnknownOptions) { - for (int case_id = 0; case_id < 5; case_id++) { - DBOptions db_opt; - db_opt.max_open_files = 12345; - db_opt.max_background_flushes = 301; - db_opt.max_total_wal_size = 1024; - ColumnFamilyOptions cf_opt; - - std::string version_string; - bool should_ignore = true; - if (case_id == 0) { - // same version - should_ignore = false; - version_string = std::to_string(ROCKSDB_MAJOR) + "." + - std::to_string(ROCKSDB_MINOR) + ".0"; - } else if (case_id == 1) { - // higher minor version - should_ignore = true; - version_string = std::to_string(ROCKSDB_MAJOR) + "." + - std::to_string(ROCKSDB_MINOR + 1) + ".0"; - } else if (case_id == 2) { - // higher major version. - should_ignore = true; - version_string = std::to_string(ROCKSDB_MAJOR + 1) + ".0.0"; - } else if (case_id == 3) { - // lower minor version -#if ROCKSDB_MINOR == 0 - continue; -#else - version_string = std::to_string(ROCKSDB_MAJOR) + "." + - std::to_string(ROCKSDB_MINOR - 1) + ".0"; - should_ignore = false; -#endif - } else { - // lower major version - should_ignore = false; - version_string = std::to_string(ROCKSDB_MAJOR - 1) + "." + - std::to_string(ROCKSDB_MINOR) + ".0"; - } - - std::string options_file_content = - "# This is a testing option string.\n" - "# Currently we only support \"#\" styled comment.\n" - "\n" - "[Version]\n" - " rocksdb_version=" + - version_string + - "\n" - " options_file_version=1\n" - "[DBOptions]\n" - " max_open_files=12345\n" - " max_background_flushes=301\n" - " max_total_wal_size=1024 # keep_log_file_num=1000\n" - " unknown_db_option1=321\n" - " unknown_db_option2=false\n" - "[CFOptions \"default\"]\n" - " unknown_cf_option1=hello\n" - "[CFOptions \"something_else\"]\n" - " unknown_cf_option2=world\n" - " # if a section is blank, we will use the default\n"; - - const std::string kTestFileName = "test-rocksdb-options.ini"; - auto s = fs_->FileExists(kTestFileName, IOOptions(), nullptr); - ASSERT_TRUE(s.ok() || s.IsNotFound()); - if (s.ok()) { - ASSERT_OK(fs_->DeleteFile(kTestFileName, IOOptions(), nullptr)); - } - ASSERT_OK(fs_->WriteToNewFile(kTestFileName, options_file_content)); - RocksDBOptionsParser parser; - ASSERT_NOK(parser.Parse(kTestFileName, fs_.get(), false, - 4096 /* readahead_size */)); - if (should_ignore) { - ASSERT_OK(parser.Parse(kTestFileName, fs_.get(), - true /* ignore_unknown_options */, - 4096 /* readahead_size */)); - } else { - ASSERT_NOK(parser.Parse(kTestFileName, fs_.get(), - true /* ignore_unknown_options */, - 4096 /* readahead_size */)); - } - } -} - -TEST_F(OptionsParserTest, ParseVersion) { - DBOptions db_opt; - db_opt.max_open_files = 12345; - db_opt.max_background_flushes = 301; - db_opt.max_total_wal_size = 1024; - ColumnFamilyOptions cf_opt; - - std::string file_template = - "# This is a testing option string.\n" - "# Currently we only support \"#\" styled comment.\n" - "\n" - "[Version]\n" - " rocksdb_version=3.13.1\n" - " options_file_version=%s\n" - "[DBOptions]\n" - "[CFOptions \"default\"]\n"; - const int kLength = 1000; - char buffer[kLength]; - RocksDBOptionsParser parser; - - const std::vector invalid_versions = { - "a.b.c", "3.2.2b", "3.-12", "3. 1", // only digits and dots are allowed - "1.2.3.4", - "1.2.3" // can only contains at most one dot. - "0", // options_file_version must be at least one - "3..2", - ".", ".1.2", // must have at least one digit before each dot - "1.2.", "1.", "2.34."}; // must have at least one digit after each dot - for (auto iv : invalid_versions) { - snprintf(buffer, kLength - 1, file_template.c_str(), iv.c_str()); - - parser.Reset(); - ASSERT_OK(fs_->WriteToNewFile(iv, buffer)); - ASSERT_NOK(parser.Parse(iv, fs_.get(), false, 0 /* readahead_size */)); - } - - const std::vector valid_versions = { - "1.232", "100", "3.12", "1", "12.3 ", " 1.25 "}; - for (auto vv : valid_versions) { - snprintf(buffer, kLength - 1, file_template.c_str(), vv.c_str()); - parser.Reset(); - ASSERT_OK(fs_->WriteToNewFile(vv, buffer)); - ASSERT_OK(parser.Parse(vv, fs_.get(), false, 0 /* readahead_size */)); - } -} - -void VerifyCFPointerTypedOptions( - ColumnFamilyOptions* base_cf_opt, const ColumnFamilyOptions* new_cf_opt, - const std::unordered_map* new_cf_opt_map) { - std::string name_buffer; - ConfigOptions config_options; - config_options.input_strings_escaped = false; - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, *base_cf_opt, - *new_cf_opt, new_cf_opt_map)); - - // change the name of merge operator back-and-forth - { - auto* merge_operator = base_cf_opt->merge_operator - ->CheckedCast(); - if (merge_operator != nullptr) { - name_buffer = merge_operator->Name(); - // change the name and expect non-ok status - merge_operator->SetName("some-other-name"); - ASSERT_NOK(RocksDBOptionsParser::VerifyCFOptions( - config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); - // change the name back and expect ok status - merge_operator->SetName(name_buffer); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( - config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); - } - } - - // change the name of the compaction filter factory back-and-forth - { - auto* compaction_filter_factory = - base_cf_opt->compaction_filter_factory - ->CheckedCast(); - if (compaction_filter_factory != nullptr) { - name_buffer = compaction_filter_factory->Name(); - // change the name and expect non-ok status - compaction_filter_factory->SetName("some-other-name"); - ASSERT_NOK(RocksDBOptionsParser::VerifyCFOptions( - config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); - // change the name back and expect ok status - compaction_filter_factory->SetName(name_buffer); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( - config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); - } - } - - // test by setting compaction_filter to nullptr - { - auto* tmp_compaction_filter = base_cf_opt->compaction_filter; - if (tmp_compaction_filter != nullptr) { - base_cf_opt->compaction_filter = nullptr; - // set compaction_filter to nullptr and expect non-ok status - ASSERT_NOK(RocksDBOptionsParser::VerifyCFOptions( - config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); - // set the value back and expect ok status - base_cf_opt->compaction_filter = tmp_compaction_filter; - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( - config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); - } - } - - // test by setting table_factory to nullptr - { - auto tmp_table_factory = base_cf_opt->table_factory; - if (tmp_table_factory != nullptr) { - base_cf_opt->table_factory.reset(); - // set table_factory to nullptr and expect non-ok status - ASSERT_NOK(RocksDBOptionsParser::VerifyCFOptions( - config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); - // set the value back and expect ok status - base_cf_opt->table_factory = tmp_table_factory; - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( - config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); - } - } - - // test by setting memtable_factory to nullptr - { - auto tmp_memtable_factory = base_cf_opt->memtable_factory; - if (tmp_memtable_factory != nullptr) { - base_cf_opt->memtable_factory.reset(); - // set memtable_factory to nullptr and expect non-ok status - ASSERT_NOK(RocksDBOptionsParser::VerifyCFOptions( - config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); - // set the value back and expect ok status - base_cf_opt->memtable_factory = tmp_memtable_factory; - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( - config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); - } - } -} - -TEST_F(OptionsParserTest, Readahead) { - DBOptions base_db_opt; - std::vector base_cf_opts; - base_cf_opts.emplace_back(); - base_cf_opts.emplace_back(); - - std::string one_mb_string = std::string(1024 * 1024, 'x'); - std::vector cf_names = {"default", one_mb_string}; - const std::string kOptionsFileName = "test-persisted-options.ini"; - - ASSERT_OK(PersistRocksDBOptions(base_db_opt, cf_names, base_cf_opts, - kOptionsFileName, fs_.get())); - - uint64_t file_size = 0; - ASSERT_OK( - fs_->GetFileSize(kOptionsFileName, IOOptions(), &file_size, nullptr)); - assert(file_size > 0); - - RocksDBOptionsParser parser; - - fs_->num_seq_file_read_ = 0; - size_t readahead_size = 128 * 1024; - - ASSERT_OK(parser.Parse(kOptionsFileName, fs_.get(), false, readahead_size)); - ASSERT_EQ(fs_->num_seq_file_read_.load(), - (file_size - 1) / readahead_size + 1); - - fs_->num_seq_file_read_.store(0); - readahead_size = 1024 * 1024; - ASSERT_OK(parser.Parse(kOptionsFileName, fs_.get(), false, readahead_size)); - ASSERT_EQ(fs_->num_seq_file_read_.load(), - (file_size - 1) / readahead_size + 1); - - // Tiny readahead. 8 KB is read each time. - fs_->num_seq_file_read_.store(0); - ASSERT_OK( - parser.Parse(kOptionsFileName, fs_.get(), false, 1 /* readahead_size */)); - ASSERT_GE(fs_->num_seq_file_read_.load(), file_size / (8 * 1024)); - ASSERT_LT(fs_->num_seq_file_read_.load(), file_size / (8 * 1024) * 2); - - // Disable readahead means 512KB readahead. - fs_->num_seq_file_read_.store(0); - ASSERT_OK( - parser.Parse(kOptionsFileName, fs_.get(), false, 0 /* readahead_size */)); - ASSERT_GE(fs_->num_seq_file_read_.load(), (file_size - 1) / (512 * 1024) + 1); -} - -TEST_F(OptionsParserTest, DumpAndParse) { - DBOptions base_db_opt; - std::vector base_cf_opts; - std::vector cf_names = {"default", "cf1", "cf2", "cf3", - "c:f:4:4:4" - "p\\i\\k\\a\\chu\\\\\\", - "###rocksdb#1-testcf#2###"}; - const int num_cf = static_cast(cf_names.size()); - Random rnd(302); - test::RandomInitDBOptions(&base_db_opt, &rnd); - base_db_opt.db_log_dir += "/#odd #but #could #happen #path #/\\\\#OMG"; - - BlockBasedTableOptions special_bbto; - special_bbto.cache_index_and_filter_blocks = true; - special_bbto.block_size = 999999; - - for (int c = 0; c < num_cf; ++c) { - ColumnFamilyOptions cf_opt; - Random cf_rnd(0xFB + c); - test::RandomInitCFOptions(&cf_opt, base_db_opt, &cf_rnd); - if (c < 4) { - cf_opt.prefix_extractor.reset(test::RandomSliceTransform(&rnd, c)); - } - if (c < 3) { - cf_opt.table_factory.reset(test::RandomTableFactory(&rnd, c)); - } else if (c == 4) { - cf_opt.table_factory.reset(NewBlockBasedTableFactory(special_bbto)); - } else if (c == 5) { - // A table factory that doesn't support deserialization should be - // supported. - cf_opt.table_factory.reset(new UnregisteredTableFactory()); - } - base_cf_opts.emplace_back(cf_opt); - } - - const std::string kOptionsFileName = "test-persisted-options.ini"; - // Use default for escaped(true), unknown(false) and check (exact) - ConfigOptions config_options; - ASSERT_OK(PersistRocksDBOptions(base_db_opt, cf_names, base_cf_opts, - kOptionsFileName, fs_.get())); - - RocksDBOptionsParser parser; - ASSERT_OK(parser.Parse(config_options, kOptionsFileName, fs_.get())); - - // Make sure block-based table factory options was deserialized correctly - std::shared_ptr ttf = (*parser.cf_opts())[4].table_factory; - ASSERT_EQ(TableFactory::kBlockBasedTableName(), std::string(ttf->Name())); - const auto parsed_bbto = ttf->GetOptions(); - ASSERT_NE(parsed_bbto, nullptr); - ASSERT_EQ(special_bbto.block_size, parsed_bbto->block_size); - ASSERT_EQ(special_bbto.cache_index_and_filter_blocks, - parsed_bbto->cache_index_and_filter_blocks); - - ASSERT_OK(RocksDBOptionsParser::VerifyRocksDBOptionsFromFile( - config_options, base_db_opt, cf_names, base_cf_opts, kOptionsFileName, - fs_.get())); - - ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions( - config_options, *parser.db_opt(), base_db_opt)); - for (int c = 0; c < num_cf; ++c) { - const auto* cf_opt = parser.GetCFOptions(cf_names[c]); - ASSERT_NE(cf_opt, nullptr); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( - config_options, base_cf_opts[c], *cf_opt, - &(parser.cf_opt_maps()->at(c)))); - } - - // Further verify pointer-typed options - for (int c = 0; c < num_cf; ++c) { - const auto* cf_opt = parser.GetCFOptions(cf_names[c]); - ASSERT_NE(cf_opt, nullptr); - VerifyCFPointerTypedOptions(&base_cf_opts[c], cf_opt, - &(parser.cf_opt_maps()->at(c))); - } - - ASSERT_EQ(parser.GetCFOptions("does not exist"), nullptr); - - base_db_opt.max_open_files++; - ASSERT_NOK(RocksDBOptionsParser::VerifyRocksDBOptionsFromFile( - config_options, base_db_opt, cf_names, base_cf_opts, kOptionsFileName, - fs_.get())); - - for (int c = 0; c < num_cf; ++c) { - if (base_cf_opts[c].compaction_filter) { - delete base_cf_opts[c].compaction_filter; - } - } -} - -TEST_F(OptionsParserTest, DifferentDefault) { - const std::string kOptionsFileName = "test-persisted-options.ini"; - - ColumnFamilyOptions cf_level_opts; - ASSERT_EQ(CompactionPri::kMinOverlappingRatio, cf_level_opts.compaction_pri); - cf_level_opts.OptimizeLevelStyleCompaction(); - - ColumnFamilyOptions cf_univ_opts; - cf_univ_opts.OptimizeUniversalStyleCompaction(); - - ASSERT_OK(PersistRocksDBOptions(DBOptions(), {"default", "universal"}, - {cf_level_opts, cf_univ_opts}, - kOptionsFileName, fs_.get())); - - RocksDBOptionsParser parser; - ASSERT_OK(parser.Parse(kOptionsFileName, fs_.get(), false, - 4096 /* readahead_size */)); - - { - Options old_default_opts; - old_default_opts.OldDefaults(); - ASSERT_EQ(10 * 1048576, old_default_opts.max_bytes_for_level_base); - ASSERT_EQ(5000, old_default_opts.max_open_files); - ASSERT_EQ(2 * 1024U * 1024U, old_default_opts.delayed_write_rate); - ASSERT_EQ(WALRecoveryMode::kTolerateCorruptedTailRecords, - old_default_opts.wal_recovery_mode); - } - { - Options old_default_opts; - old_default_opts.OldDefaults(4, 6); - ASSERT_EQ(10 * 1048576, old_default_opts.max_bytes_for_level_base); - ASSERT_EQ(5000, old_default_opts.max_open_files); - } - { - Options old_default_opts; - old_default_opts.OldDefaults(4, 7); - ASSERT_NE(10 * 1048576, old_default_opts.max_bytes_for_level_base); - ASSERT_NE(4, old_default_opts.table_cache_numshardbits); - ASSERT_EQ(5000, old_default_opts.max_open_files); - ASSERT_EQ(2 * 1024U * 1024U, old_default_opts.delayed_write_rate); - } - { - ColumnFamilyOptions old_default_cf_opts; - old_default_cf_opts.OldDefaults(); - ASSERT_EQ(2 * 1048576, old_default_cf_opts.target_file_size_base); - ASSERT_EQ(4 << 20, old_default_cf_opts.write_buffer_size); - ASSERT_EQ(2 * 1048576, old_default_cf_opts.target_file_size_base); - ASSERT_EQ(0, old_default_cf_opts.soft_pending_compaction_bytes_limit); - ASSERT_EQ(0, old_default_cf_opts.hard_pending_compaction_bytes_limit); - ASSERT_EQ(CompactionPri::kByCompensatedSize, - old_default_cf_opts.compaction_pri); - } - { - ColumnFamilyOptions old_default_cf_opts; - old_default_cf_opts.OldDefaults(4, 6); - ASSERT_EQ(2 * 1048576, old_default_cf_opts.target_file_size_base); - ASSERT_EQ(CompactionPri::kByCompensatedSize, - old_default_cf_opts.compaction_pri); - } - { - ColumnFamilyOptions old_default_cf_opts; - old_default_cf_opts.OldDefaults(4, 7); - ASSERT_NE(2 * 1048576, old_default_cf_opts.target_file_size_base); - ASSERT_EQ(CompactionPri::kByCompensatedSize, - old_default_cf_opts.compaction_pri); - } - { - Options old_default_opts; - old_default_opts.OldDefaults(5, 1); - ASSERT_EQ(2 * 1024U * 1024U, old_default_opts.delayed_write_rate); - } - { - Options old_default_opts; - old_default_opts.OldDefaults(5, 2); - ASSERT_EQ(16 * 1024U * 1024U, old_default_opts.delayed_write_rate); - ASSERT_TRUE(old_default_opts.compaction_pri == - CompactionPri::kByCompensatedSize); - } - { - Options old_default_opts; - old_default_opts.OldDefaults(5, 18); - ASSERT_TRUE(old_default_opts.compaction_pri == - CompactionPri::kByCompensatedSize); - } - - Options small_opts; - small_opts.OptimizeForSmallDb(); - ASSERT_EQ(2 << 20, small_opts.write_buffer_size); - ASSERT_EQ(5000, small_opts.max_open_files); -} - -class OptionsSanityCheckTest : public OptionsParserTest, - public ::testing::WithParamInterface { - protected: - ConfigOptions config_options_; - - public: - OptionsSanityCheckTest() { - config_options_.ignore_unknown_options = false; - config_options_.ignore_unsupported_options = GetParam(); - config_options_.input_strings_escaped = true; - } - - protected: - Status SanityCheckOptions(const DBOptions& db_opts, - const ColumnFamilyOptions& cf_opts, - ConfigOptions::SanityLevel level) { - config_options_.sanity_level = level; - return RocksDBOptionsParser::VerifyRocksDBOptionsFromFile( - config_options_, db_opts, {"default"}, {cf_opts}, kOptionsFileName, - fs_.get()); - } - - Status SanityCheckCFOptions(const ColumnFamilyOptions& cf_opts, - ConfigOptions::SanityLevel level) { - return SanityCheckOptions(DBOptions(), cf_opts, level); - } - - void SanityCheckCFOptions(const ColumnFamilyOptions& opts, bool exact) { - ASSERT_OK(SanityCheckCFOptions( - opts, ConfigOptions::kSanityLevelLooselyCompatible)); - ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); - if (exact) { - ASSERT_OK( - SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); - } else { - ASSERT_NOK( - SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); - } - } - - Status SanityCheckDBOptions(const DBOptions& db_opts, - ConfigOptions::SanityLevel level) { - return SanityCheckOptions(db_opts, ColumnFamilyOptions(), level); - } - - void SanityCheckDBOptions(const DBOptions& opts, bool exact) { - ASSERT_OK(SanityCheckDBOptions( - opts, ConfigOptions::kSanityLevelLooselyCompatible)); - ASSERT_OK(SanityCheckDBOptions(opts, ConfigOptions::kSanityLevelNone)); - if (exact) { - ASSERT_OK( - SanityCheckDBOptions(opts, ConfigOptions::kSanityLevelExactMatch)); - } else { - ASSERT_NOK( - SanityCheckDBOptions(opts, ConfigOptions::kSanityLevelExactMatch)); - } - } - - Status PersistOptions(const DBOptions& db_opts, - const ColumnFamilyOptions& cf_opts) { - Status s = fs_->DeleteFile(kOptionsFileName, IOOptions(), nullptr); - if (!s.ok()) { - return s; - } - return PersistRocksDBOptions(db_opts, {"default"}, {cf_opts}, - kOptionsFileName, fs_.get()); - } - - Status PersistCFOptions(const ColumnFamilyOptions& cf_opts) { - return PersistOptions(DBOptions(), cf_opts); - } - - Status PersistDBOptions(const DBOptions& db_opts) { - return PersistOptions(db_opts, ColumnFamilyOptions()); - } - - const std::string kOptionsFileName = "OPTIONS"; -}; - -TEST_P(OptionsSanityCheckTest, CFOptionsSanityCheck) { - ColumnFamilyOptions opts; - Random rnd(301); - - // default ColumnFamilyOptions - { - ASSERT_OK(PersistCFOptions(opts)); - ASSERT_OK( - SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); - } - - // prefix_extractor - { - // Okay to change prefix_extractor form nullptr to non-nullptr - ASSERT_EQ(opts.prefix_extractor.get(), nullptr); - opts.prefix_extractor.reset(NewCappedPrefixTransform(10)); - ASSERT_OK(SanityCheckCFOptions( - opts, ConfigOptions::kSanityLevelLooselyCompatible)); - ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); - - // persist the change - ASSERT_OK(PersistCFOptions(opts)); - ASSERT_OK( - SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); - - // use same prefix extractor but with different parameter - opts.prefix_extractor.reset(NewCappedPrefixTransform(15)); - // expect pass only in - // ConfigOptions::kSanityLevelLooselyCompatible - ASSERT_NOK( - SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); - ASSERT_OK(SanityCheckCFOptions( - opts, ConfigOptions::kSanityLevelLooselyCompatible)); - ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); - - // repeat the test with FixedPrefixTransform - opts.prefix_extractor.reset(NewFixedPrefixTransform(10)); - ASSERT_NOK( - SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); - ASSERT_OK(SanityCheckCFOptions( - opts, ConfigOptions::kSanityLevelLooselyCompatible)); - ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); - - // persist the change of prefix_extractor - ASSERT_OK(PersistCFOptions(opts)); - ASSERT_OK( - SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); - - // use same prefix extractor but with different parameter - opts.prefix_extractor.reset(NewFixedPrefixTransform(15)); - // expect pass only in - // ConfigOptions::kSanityLevelLooselyCompatible - SanityCheckCFOptions(opts, false); - - // Change prefix extractor from non-nullptr to nullptr - opts.prefix_extractor.reset(); - // expect pass as it's safe to change prefix_extractor - // from non-null to null - ASSERT_OK(SanityCheckCFOptions( - opts, ConfigOptions::kSanityLevelLooselyCompatible)); - ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); - } - // persist the change - ASSERT_OK(PersistCFOptions(opts)); - ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); - - // table_factory - { - for (int tb = 0; tb <= 2; ++tb) { - // change the table factory - opts.table_factory.reset(test::RandomTableFactory(&rnd, tb)); - ASSERT_NOK(SanityCheckCFOptions( - opts, ConfigOptions::kSanityLevelLooselyCompatible)); - ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); - - // persist the change - ASSERT_OK(PersistCFOptions(opts)); - ASSERT_OK( - SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); - } - } - - // merge_operator - { - // Test when going from nullptr -> merge operator - opts.merge_operator.reset(test::RandomMergeOperator(&rnd)); - ASSERT_OK(SanityCheckCFOptions( - opts, ConfigOptions::kSanityLevelLooselyCompatible)); - ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); - - // persist the change - ASSERT_OK(PersistCFOptions(opts)); - SanityCheckCFOptions(opts, config_options_.ignore_unsupported_options); - - for (int test = 0; test < 5; ++test) { - // change the merge operator - opts.merge_operator.reset(test::RandomMergeOperator(&rnd)); - ASSERT_NOK(SanityCheckCFOptions( - opts, ConfigOptions::kSanityLevelLooselyCompatible)); - ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); - - // persist the change - ASSERT_OK(PersistCFOptions(opts)); - SanityCheckCFOptions(opts, config_options_.ignore_unsupported_options); - } - - // Test when going from merge operator -> nullptr - opts.merge_operator = nullptr; - ASSERT_NOK(SanityCheckCFOptions( - opts, ConfigOptions::kSanityLevelLooselyCompatible)); - ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); - - // persist the change - ASSERT_OK(PersistCFOptions(opts)); - SanityCheckCFOptions(opts, true); - } - - // compaction_filter - { - for (int test = 0; test < 5; ++test) { - // change the compaction filter - opts.compaction_filter = test::RandomCompactionFilter(&rnd); - SanityCheckCFOptions(opts, false); - - // persist the change - ASSERT_OK(PersistCFOptions(opts)); - SanityCheckCFOptions(opts, config_options_.ignore_unsupported_options); - delete opts.compaction_filter; - opts.compaction_filter = nullptr; - } - } - - // compaction_filter_factory - { - for (int test = 0; test < 5; ++test) { - // change the compaction filter factory - opts.compaction_filter_factory.reset( - test::RandomCompactionFilterFactory(&rnd)); - SanityCheckCFOptions(opts, false); - - // persist the change - ASSERT_OK(PersistCFOptions(opts)); - SanityCheckCFOptions(opts, config_options_.ignore_unsupported_options); - } - } -} - -TEST_P(OptionsSanityCheckTest, DBOptionsSanityCheck) { - DBOptions opts; - Random rnd(301); - - // default DBOptions - { - ASSERT_OK(PersistDBOptions(opts)); - ASSERT_OK( - SanityCheckDBOptions(opts, ConfigOptions::kSanityLevelExactMatch)); - } - - // File checksum generator - { - class MockFileChecksumGenFactory : public FileChecksumGenFactory { - public: - static const char* kClassName() { return "Mock"; } - const char* Name() const override { return kClassName(); } - std::unique_ptr CreateFileChecksumGenerator( - const FileChecksumGenContext& /*context*/) override { - return nullptr; - } - }; - - // Okay to change file_checksum_gen_factory form nullptr to non-nullptr - ASSERT_EQ(opts.file_checksum_gen_factory.get(), nullptr); - opts.file_checksum_gen_factory.reset(new MockFileChecksumGenFactory()); - - // persist the change - ASSERT_OK(PersistDBOptions(opts)); - SanityCheckDBOptions(opts, config_options_.ignore_unsupported_options); - - // Change file_checksum_gen_factory from non-nullptr to nullptr - opts.file_checksum_gen_factory.reset(); - // expect pass as it's safe to change file_checksum_gen_factory - // from non-null to null - SanityCheckDBOptions(opts, false); - } - // persist the change - ASSERT_OK(PersistDBOptions(opts)); - ASSERT_OK(SanityCheckDBOptions(opts, ConfigOptions::kSanityLevelExactMatch)); -} - -namespace { -bool IsEscapedString(const std::string& str) { - for (size_t i = 0; i < str.size(); ++i) { - if (str[i] == '\\') { - // since we already handle those two consecutive '\'s in - // the next if-then branch, any '\' appear at the end - // of an escaped string in such case is not valid. - if (i == str.size() - 1) { - return false; - } - if (str[i + 1] == '\\') { - // if there're two consecutive '\'s, skip the second one. - i++; - continue; - } - switch (str[i + 1]) { - case ':': - case '\\': - case '#': - continue; - default: - // if true, '\' together with str[i + 1] is not a valid escape. - if (UnescapeChar(str[i + 1]) == str[i + 1]) { - return false; - } - } - } else if (isSpecialChar(str[i]) && (i == 0 || str[i - 1] != '\\')) { - return false; - } - } - return true; -} -} // namespace - -TEST_F(OptionsParserTest, IntegerParsing) { - ASSERT_EQ(ParseUint64("18446744073709551615"), 18446744073709551615U); - ASSERT_EQ(ParseUint32("4294967295"), 4294967295U); - ASSERT_EQ(ParseSizeT("18446744073709551615"), 18446744073709551615U); - ASSERT_EQ(ParseInt64("9223372036854775807"), 9223372036854775807); - ASSERT_EQ(ParseInt64("-9223372036854775808"), - std::numeric_limits::min()); - ASSERT_EQ(ParseInt32("2147483647"), 2147483647); - ASSERT_EQ(ParseInt32("-2147483648"), std::numeric_limits::min()); - ASSERT_EQ(ParseInt("-32767"), -32767); - ASSERT_EQ(ParseDouble("-1.234567"), -1.234567); -} - -TEST_F(OptionsParserTest, EscapeOptionString) { - ASSERT_EQ(UnescapeOptionString( - "This is a test string with \\# \\: and \\\\ escape chars."), - "This is a test string with # : and \\ escape chars."); - - ASSERT_EQ( - EscapeOptionString("This is a test string with # : and \\ escape chars."), - "This is a test string with \\# \\: and \\\\ escape chars."); - - std::string readible_chars = - "A String like this \"1234567890-=_)(*&^%$#@!ertyuiop[]{POIU" - "YTREWQasdfghjkl;':LKJHGFDSAzxcvbnm,.?>" - " -void TestOptInfo(const ConfigOptions& config_options, OptionType opt_type, - T* base, T* comp) { - std::string result; - OptionTypeInfo opt_info(0, opt_type); - ASSERT_FALSE(opt_info.AreEqual(config_options, "base", base, comp, &result)); - ASSERT_EQ(result, "base"); - ASSERT_NE(*base, *comp); - TestAndCompareOption(config_options, opt_info, "base", base, comp); - ASSERT_EQ(*base, *comp); -} - -class OptionTypeInfoTest : public testing::Test {}; - -TEST_F(OptionTypeInfoTest, BasicTypes) { - ConfigOptions config_options; - { - bool a = true, b = false; - TestOptInfo(config_options, OptionType::kBoolean, &a, &b); - } - { - int a = 100, b = 200; - TestOptInfo(config_options, OptionType::kInt, &a, &b); - } - { - int32_t a = 100, b = 200; - TestOptInfo(config_options, OptionType::kInt32T, &a, &b); - } - { - int64_t a = 100, b = 200; - TestOptInfo(config_options, OptionType::kInt64T, &a, &b); - } - { - unsigned int a = 100, b = 200; - TestOptInfo(config_options, OptionType::kUInt, &a, &b); - } - { - uint32_t a = 100, b = 200; - TestOptInfo(config_options, OptionType::kUInt32T, &a, &b); - } - { - uint64_t a = 100, b = 200; - TestOptInfo(config_options, OptionType::kUInt64T, &a, &b); - } - { - size_t a = 100, b = 200; - TestOptInfo(config_options, OptionType::kSizeT, &a, &b); - } - { - std::string a = "100", b = "200"; - TestOptInfo(config_options, OptionType::kString, &a, &b); - } - { - double a = 1.0, b = 2.0; - TestOptInfo(config_options, OptionType::kDouble, &a, &b); - } -} - -TEST_F(OptionTypeInfoTest, TestInvalidArgs) { - ConfigOptions config_options; - bool b; - int i; - int32_t i32; - int64_t i64; - unsigned int u; - int32_t u32; - int64_t u64; - size_t sz; - double d; - - ASSERT_NOK(OptionTypeInfo(0, OptionType::kBoolean) - .Parse(config_options, "b", "x", &b)); - ASSERT_NOK( - OptionTypeInfo(0, OptionType::kInt).Parse(config_options, "b", "x", &i)); - ASSERT_NOK(OptionTypeInfo(0, OptionType::kInt32T) - .Parse(config_options, "b", "x", &i32)); - ASSERT_NOK(OptionTypeInfo(0, OptionType::kInt64T) - .Parse(config_options, "b", "x", &i64)); - ASSERT_NOK( - OptionTypeInfo(0, OptionType::kUInt).Parse(config_options, "b", "x", &u)); - ASSERT_NOK(OptionTypeInfo(0, OptionType::kUInt32T) - .Parse(config_options, "b", "x", &u32)); - ASSERT_NOK(OptionTypeInfo(0, OptionType::kUInt64T) - .Parse(config_options, "b", "x", &u64)); - ASSERT_NOK(OptionTypeInfo(0, OptionType::kSizeT) - .Parse(config_options, "b", "x", &sz)); - ASSERT_NOK(OptionTypeInfo(0, OptionType::kDouble) - .Parse(config_options, "b", "x", &d)); - - // Don't know how to convert Unknowns to anything else - ASSERT_NOK(OptionTypeInfo(0, OptionType::kUnknown) - .Parse(config_options, "b", "x", &d)); - - // Verify that if the parse function throws an exception, it is also trapped - OptionTypeInfo func_info(0, OptionType::kUnknown, - OptionVerificationType::kNormal, - OptionTypeFlags::kNone, - [](const ConfigOptions&, const std::string&, - const std::string& value, void* addr) { - auto ptr = static_cast(addr); - *ptr = ParseInt(value); - return Status::OK(); - }); - ASSERT_OK(func_info.Parse(config_options, "b", "1", &i)); - ASSERT_NOK(func_info.Parse(config_options, "b", "x", &i)); -} - -TEST_F(OptionTypeInfoTest, TestParseFunc) { - OptionTypeInfo opt_info(0, OptionType::kUnknown, - OptionVerificationType::kNormal, - OptionTypeFlags::kNone); - opt_info.SetParseFunc([](const ConfigOptions& /*opts*/, - const std::string& name, const std::string& value, - void* addr) { - auto ptr = static_cast(addr); - if (name == "Oops") { - return Status::InvalidArgument(value); - } else { - *ptr = value + " " + name; - return Status::OK(); - } - }); - ConfigOptions config_options; - std::string base; - ASSERT_OK(opt_info.Parse(config_options, "World", "Hello", &base)); - ASSERT_EQ(base, "Hello World"); - ASSERT_NOK(opt_info.Parse(config_options, "Oops", "Hello", &base)); -} - -TEST_F(OptionTypeInfoTest, TestSerializeFunc) { - OptionTypeInfo opt_info(0, OptionType::kString, - OptionVerificationType::kNormal, - OptionTypeFlags::kNone); - opt_info.SetSerializeFunc([](const ConfigOptions& /*opts*/, - const std::string& name, const void* /*addr*/, - std::string* value) { - if (name == "Oops") { - return Status::InvalidArgument(name); - } else { - *value = name; - return Status::OK(); - } - }); - ConfigOptions config_options; - std::string base; - std::string value; - ASSERT_OK(opt_info.Serialize(config_options, "Hello", &base, &value)); - ASSERT_EQ(value, "Hello"); - ASSERT_NOK(opt_info.Serialize(config_options, "Oops", &base, &value)); -} - -TEST_F(OptionTypeInfoTest, TestEqualsFunc) { - OptionTypeInfo opt_info(0, OptionType::kInt, OptionVerificationType::kNormal, - OptionTypeFlags::kNone); - opt_info.SetEqualsFunc([](const ConfigOptions& /*opts*/, - const std::string& name, const void* addr1, - const void* addr2, std::string* mismatch) { - auto i1 = *(static_cast(addr1)); - auto i2 = *(static_cast(addr2)); - if (name == "LT") { - return i1 < i2; - } else if (name == "GT") { - return i1 > i2; - } else if (name == "EQ") { - return i1 == i2; - } else { - *mismatch = name + "???"; - return false; - } - }); - - ConfigOptions config_options; - int int1 = 100; - int int2 = 200; - std::string mismatch; - ASSERT_TRUE(opt_info.AreEqual(config_options, "LT", &int1, &int2, &mismatch)); - ASSERT_EQ(mismatch, ""); - ASSERT_FALSE( - opt_info.AreEqual(config_options, "GT", &int1, &int2, &mismatch)); - ASSERT_EQ(mismatch, "GT"); - ASSERT_FALSE( - opt_info.AreEqual(config_options, "NO", &int1, &int2, &mismatch)); - ASSERT_EQ(mismatch, "NO???"); -} - -TEST_F(OptionTypeInfoTest, TestPrepareFunc) { - OptionTypeInfo opt_info(0, OptionType::kInt, OptionVerificationType::kNormal, - OptionTypeFlags::kNone); - opt_info.SetPrepareFunc( - [](const ConfigOptions& /*opts*/, const std::string& name, void* addr) { - auto i1 = static_cast(addr); - if (name == "x2") { - *i1 *= 2; - } else if (name == "/2") { - *i1 /= 2; - } else { - return Status::InvalidArgument("Bad Argument", name); - } - return Status::OK(); - }); - ConfigOptions config_options; - int int1 = 100; - ASSERT_OK(opt_info.Prepare(config_options, "x2", &int1)); - ASSERT_EQ(int1, 200); - ASSERT_OK(opt_info.Prepare(config_options, "/2", &int1)); - ASSERT_EQ(int1, 100); - ASSERT_NOK(opt_info.Prepare(config_options, "??", &int1)); - ASSERT_EQ(int1, 100); -} -TEST_F(OptionTypeInfoTest, TestValidateFunc) { - OptionTypeInfo opt_info(0, OptionType::kSizeT, - OptionVerificationType::kNormal, - OptionTypeFlags::kNone); - opt_info.SetValidateFunc([](const DBOptions& db_opts, - const ColumnFamilyOptions& cf_opts, - const std::string& name, const void* addr) { - const auto sz = static_cast(addr); - bool is_valid = false; - if (name == "keep_log_file_num") { - is_valid = (*sz == db_opts.keep_log_file_num); - } else if (name == "write_buffer_size") { - is_valid = (*sz == cf_opts.write_buffer_size); - } - if (is_valid) { - return Status::OK(); - } else { - return Status::InvalidArgument("Mismatched value", name); - } - }); - ConfigOptions config_options; - DBOptions db_options; - ColumnFamilyOptions cf_options; - - ASSERT_OK(opt_info.Validate(db_options, cf_options, "keep_log_file_num", - &db_options.keep_log_file_num)); - ASSERT_OK(opt_info.Validate(db_options, cf_options, "write_buffer_size", - &cf_options.write_buffer_size)); - ASSERT_NOK(opt_info.Validate(db_options, cf_options, "keep_log_file_num", - &cf_options.write_buffer_size)); - ASSERT_NOK(opt_info.Validate(db_options, cf_options, "write_buffer_size", - &db_options.keep_log_file_num)); -} - -TEST_F(OptionTypeInfoTest, TestOptionFlags) { - OptionTypeInfo opt_none(0, OptionType::kString, - OptionVerificationType::kNormal, - OptionTypeFlags::kDontSerialize); - OptionTypeInfo opt_never(0, OptionType::kString, - OptionVerificationType::kNormal, - OptionTypeFlags::kCompareNever); - OptionTypeInfo opt_alias(0, OptionType::kString, - OptionVerificationType::kAlias, - OptionTypeFlags::kNone); - OptionTypeInfo opt_deprecated(0, OptionType::kString, - OptionVerificationType::kDeprecated, - OptionTypeFlags::kNone); - ConfigOptions config_options; - std::string opts_str; - std::string base = "base"; - std::string comp = "comp"; - - // If marked string none, the serialization returns not supported - ASSERT_NOK(opt_none.Serialize(config_options, "None", &base, &opts_str)); - // If marked never compare, they match even when they do not - ASSERT_TRUE(opt_never.AreEqual(config_options, "Never", &base, &comp, &base)); - ASSERT_FALSE(opt_none.AreEqual(config_options, "Never", &base, &comp, &base)); - - // An alias can change the value via parse, but does nothing on serialize on - // match - std::string result; - ASSERT_OK(opt_alias.Parse(config_options, "Alias", "Alias", &base)); - ASSERT_OK(opt_alias.Serialize(config_options, "Alias", &base, &result)); - ASSERT_TRUE( - opt_alias.AreEqual(config_options, "Alias", &base, &comp, &result)); - ASSERT_EQ(base, "Alias"); - ASSERT_NE(base, comp); - - // Deprecated options do nothing on any of the commands - ASSERT_OK(opt_deprecated.Parse(config_options, "Alias", "Deprecated", &base)); - ASSERT_OK(opt_deprecated.Serialize(config_options, "Alias", &base, &result)); - ASSERT_TRUE( - opt_deprecated.AreEqual(config_options, "Alias", &base, &comp, &result)); - ASSERT_EQ(base, "Alias"); - ASSERT_NE(base, comp); -} - -TEST_F(OptionTypeInfoTest, TestCustomEnum) { - enum TestEnum { kA, kB, kC }; - std::unordered_map enum_map = { - {"A", TestEnum::kA}, - {"B", TestEnum::kB}, - {"C", TestEnum::kC}, - }; - OptionTypeInfo opt_info = OptionTypeInfo::Enum(0, &enum_map); - TestEnum e1, e2; - ConfigOptions config_options; - std::string result, mismatch; - - e2 = TestEnum::kA; - - ASSERT_OK(opt_info.Parse(config_options, "", "B", &e1)); - ASSERT_OK(opt_info.Serialize(config_options, "", &e1, &result)); - ASSERT_EQ(e1, TestEnum::kB); - ASSERT_EQ(result, "B"); - - ASSERT_FALSE(opt_info.AreEqual(config_options, "Enum", &e1, &e2, &mismatch)); - ASSERT_EQ(mismatch, "Enum"); - - TestParseAndCompareOption(config_options, opt_info, "", "C", &e1, &e2); - ASSERT_EQ(e2, TestEnum::kC); - - ASSERT_NOK(opt_info.Parse(config_options, "", "D", &e1)); - ASSERT_EQ(e1, TestEnum::kC); -} - -TEST_F(OptionTypeInfoTest, TestBuiltinEnum) { - ConfigOptions config_options; - for (auto iter : OptionsHelper::compaction_style_string_map) { - CompactionStyle e1, e2; - TestParseAndCompareOption(config_options, - OptionTypeInfo(0, OptionType::kCompactionStyle), - "CompactionStyle", iter.first, &e1, &e2); - ASSERT_EQ(e1, iter.second); - } - for (auto iter : OptionsHelper::compaction_pri_string_map) { - CompactionPri e1, e2; - TestParseAndCompareOption(config_options, - OptionTypeInfo(0, OptionType::kCompactionPri), - "CompactionPri", iter.first, &e1, &e2); - ASSERT_EQ(e1, iter.second); - } - for (auto iter : OptionsHelper::compression_type_string_map) { - CompressionType e1, e2; - TestParseAndCompareOption(config_options, - OptionTypeInfo(0, OptionType::kCompressionType), - "CompressionType", iter.first, &e1, &e2); - ASSERT_EQ(e1, iter.second); - } - for (auto iter : OptionsHelper::compaction_stop_style_string_map) { - CompactionStopStyle e1, e2; - TestParseAndCompareOption( - config_options, OptionTypeInfo(0, OptionType::kCompactionStopStyle), - "CompactionStopStyle", iter.first, &e1, &e2); - ASSERT_EQ(e1, iter.second); - } - for (auto iter : OptionsHelper::checksum_type_string_map) { - ChecksumType e1, e2; - TestParseAndCompareOption(config_options, - OptionTypeInfo(0, OptionType::kChecksumType), - "CheckSumType", iter.first, &e1, &e2); - ASSERT_EQ(e1, iter.second); - } - for (auto iter : OptionsHelper::encoding_type_string_map) { - EncodingType e1, e2; - TestParseAndCompareOption(config_options, - OptionTypeInfo(0, OptionType::kEncodingType), - "EncodingType", iter.first, &e1, &e2); - ASSERT_EQ(e1, iter.second); - } -} - -TEST_F(OptionTypeInfoTest, TestStruct) { - struct Basic { - int i = 42; - std::string s = "Hello"; - }; - - struct Extended { - int j = 11; - Basic b; - }; - - std::unordered_map basic_type_map = { - {"i", {offsetof(struct Basic, i), OptionType::kInt}}, - {"s", {offsetof(struct Basic, s), OptionType::kString}}, - }; - OptionTypeInfo basic_info = OptionTypeInfo::Struct( - "b", &basic_type_map, 0, OptionVerificationType::kNormal, - OptionTypeFlags::kMutable); - - std::unordered_map extended_type_map = { - {"j", {offsetof(struct Extended, j), OptionType::kInt}}, - {"b", OptionTypeInfo::Struct( - "b", &basic_type_map, offsetof(struct Extended, b), - OptionVerificationType::kNormal, OptionTypeFlags::kNone)}, - {"m", OptionTypeInfo::Struct( - "m", &basic_type_map, offsetof(struct Extended, b), - OptionVerificationType::kNormal, OptionTypeFlags::kMutable)}, - }; - OptionTypeInfo extended_info = OptionTypeInfo::Struct( - "e", &extended_type_map, 0, OptionVerificationType::kNormal, - OptionTypeFlags::kMutable); - Extended e1, e2; - ConfigOptions config_options; - std::string mismatch; - TestParseAndCompareOption(config_options, basic_info, "b", "{i=33;s=33}", - &e1.b, &e2.b); - ASSERT_EQ(e1.b.i, 33); - ASSERT_EQ(e1.b.s, "33"); - - TestParseAndCompareOption(config_options, basic_info, "b.i", "44", &e1.b, - &e2.b); - ASSERT_EQ(e1.b.i, 44); - - TestParseAndCompareOption(config_options, basic_info, "i", "55", &e1.b, - &e2.b); - ASSERT_EQ(e1.b.i, 55); - - e1.b.i = 0; - - ASSERT_FALSE( - basic_info.AreEqual(config_options, "b", &e1.b, &e2.b, &mismatch)); - ASSERT_EQ(mismatch, "b.i"); - mismatch.clear(); - ASSERT_FALSE( - basic_info.AreEqual(config_options, "b.i", &e1.b, &e2.b, &mismatch)); - ASSERT_EQ(mismatch, "b.i"); - mismatch.clear(); - ASSERT_FALSE( - basic_info.AreEqual(config_options, "i", &e1.b, &e2.b, &mismatch)); - ASSERT_EQ(mismatch, "b.i"); - mismatch.clear(); - - e1 = e2; - ASSERT_NOK(basic_info.Parse(config_options, "b", "{i=33;s=33;j=44}", &e1.b)); - ASSERT_NOK(basic_info.Parse(config_options, "b.j", "44", &e1.b)); - ASSERT_NOK(basic_info.Parse(config_options, "j", "44", &e1.b)); - - TestParseAndCompareOption(config_options, extended_info, "e", - "b={i=55;s=55}; j=22;", &e1, &e2); - ASSERT_EQ(e1.b.i, 55); - ASSERT_EQ(e1.j, 22); - ASSERT_EQ(e1.b.s, "55"); - TestParseAndCompareOption(config_options, extended_info, "e.b", - "{i=66;s=66;}", &e1, &e2); - ASSERT_EQ(e1.b.i, 66); - ASSERT_EQ(e1.j, 22); - ASSERT_EQ(e1.b.s, "66"); - TestParseAndCompareOption(config_options, extended_info, "e.b.i", "77", &e1, - &e2); - ASSERT_EQ(e1.b.i, 77); - ASSERT_EQ(e1.j, 22); - ASSERT_EQ(e1.b.s, "66"); -} - -TEST_F(OptionTypeInfoTest, TestArrayType) { - OptionTypeInfo array_info = OptionTypeInfo::Array( - 0, OptionVerificationType::kNormal, OptionTypeFlags::kNone, - {0, OptionType::kString}); - std::array array1, array2; - std::string mismatch; - - ConfigOptions config_options; - TestParseAndCompareOption(config_options, array_info, "v", "a:b:c:d", &array1, - &array2); - - ASSERT_EQ(array1.size(), 4); - ASSERT_EQ(array1[0], "a"); - ASSERT_EQ(array1[1], "b"); - ASSERT_EQ(array1[2], "c"); - ASSERT_EQ(array1[3], "d"); - array1[3] = "e"; - ASSERT_FALSE( - array_info.AreEqual(config_options, "v", &array1, &array2, &mismatch)); - ASSERT_EQ(mismatch, "v"); - - // Test vectors with inner brackets - TestParseAndCompareOption(config_options, array_info, "v", "a:{b}:c:d", - &array1, &array2); - ASSERT_EQ(array1.size(), 4); - ASSERT_EQ(array1[0], "a"); - ASSERT_EQ(array1[1], "b"); - ASSERT_EQ(array1[2], "c"); - ASSERT_EQ(array1[3], "d"); - - std::array array3, array4; - OptionTypeInfo bar_info = OptionTypeInfo::Array( - 0, OptionVerificationType::kNormal, OptionTypeFlags::kNone, - {0, OptionType::kString}, '|'); - TestParseAndCompareOption(config_options, bar_info, "v", "x|y|z", &array3, - &array4); - - // Test arrays with inner array - TestParseAndCompareOption(config_options, bar_info, "v", - "a|{b1|b2}|{c1|c2|{d1|d2}}", &array3, &array4, - false); - ASSERT_EQ(array3.size(), 3); - ASSERT_EQ(array3[0], "a"); - ASSERT_EQ(array3[1], "b1|b2"); - ASSERT_EQ(array3[2], "c1|c2|{d1|d2}"); - - TestParseAndCompareOption(config_options, bar_info, "v", - "{a1|a2}|{b1|{c1|c2}}|d1", &array3, &array4, true); - ASSERT_EQ(array3.size(), 3); - ASSERT_EQ(array3[0], "a1|a2"); - ASSERT_EQ(array3[1], "b1|{c1|c2}"); - ASSERT_EQ(array3[2], "d1"); - - // Test invalid input: less element than requested - auto s = bar_info.Parse(config_options, "opt_name1", "a1|a2", &array3); - ASSERT_TRUE(s.IsInvalidArgument()); - - // Test invalid input: more element than requested - s = bar_info.Parse(config_options, "opt_name2", "a1|b|c1|d3", &array3); - ASSERT_TRUE(s.IsInvalidArgument()); -} - -TEST_F(OptionTypeInfoTest, TestVectorType) { - OptionTypeInfo vec_info = OptionTypeInfo::Vector( - 0, OptionVerificationType::kNormal, OptionTypeFlags::kNone, - {0, OptionType::kString}); - std::vector vec1, vec2; - std::string mismatch; - - ConfigOptions config_options; - TestParseAndCompareOption(config_options, vec_info, "v", "a:b:c:d", &vec1, - &vec2); - ASSERT_EQ(vec1.size(), 4); - ASSERT_EQ(vec1[0], "a"); - ASSERT_EQ(vec1[1], "b"); - ASSERT_EQ(vec1[2], "c"); - ASSERT_EQ(vec1[3], "d"); - vec1[3] = "e"; - ASSERT_FALSE(vec_info.AreEqual(config_options, "v", &vec1, &vec2, &mismatch)); - ASSERT_EQ(mismatch, "v"); - - // Test vectors with inner brackets - TestParseAndCompareOption(config_options, vec_info, "v", "a:{b}:c:d", &vec1, - &vec2); - ASSERT_EQ(vec1.size(), 4); - ASSERT_EQ(vec1[0], "a"); - ASSERT_EQ(vec1[1], "b"); - ASSERT_EQ(vec1[2], "c"); - ASSERT_EQ(vec1[3], "d"); - - OptionTypeInfo bar_info = OptionTypeInfo::Vector( - 0, OptionVerificationType::kNormal, OptionTypeFlags::kNone, - {0, OptionType::kString}, '|'); - TestParseAndCompareOption(config_options, vec_info, "v", "x|y|z", &vec1, - &vec2); - // Test vectors with inner vector - TestParseAndCompareOption(config_options, bar_info, "v", - "a|{b1|b2}|{c1|c2|{d1|d2}}", &vec1, &vec2, false); - ASSERT_EQ(vec1.size(), 3); - ASSERT_EQ(vec1[0], "a"); - ASSERT_EQ(vec1[1], "b1|b2"); - ASSERT_EQ(vec1[2], "c1|c2|{d1|d2}"); - - TestParseAndCompareOption(config_options, bar_info, "v", - "{a1|a2}|{b1|{c1|c2}}|d1", &vec1, &vec2, true); - ASSERT_EQ(vec1.size(), 3); - ASSERT_EQ(vec1[0], "a1|a2"); - ASSERT_EQ(vec1[1], "b1|{c1|c2}"); - ASSERT_EQ(vec1[2], "d1"); - - TestParseAndCompareOption(config_options, bar_info, "v", "{a1}", &vec1, &vec2, - false); - ASSERT_EQ(vec1.size(), 1); - ASSERT_EQ(vec1[0], "a1"); - - TestParseAndCompareOption(config_options, bar_info, "v", "{a1|a2}|{b1|b2}", - &vec1, &vec2, true); - ASSERT_EQ(vec1.size(), 2); - ASSERT_EQ(vec1[0], "a1|a2"); - ASSERT_EQ(vec1[1], "b1|b2"); -} - -TEST_F(OptionTypeInfoTest, TestStaticType) { - struct SimpleOptions { - size_t size = 0; - bool verify = true; - }; - - static std::unordered_map type_map = { - {"size", {offsetof(struct SimpleOptions, size), OptionType::kSizeT}}, - {"verify", - {offsetof(struct SimpleOptions, verify), OptionType::kBoolean}}, - }; - - ConfigOptions config_options; - SimpleOptions opts, copy; - opts.size = 12345; - opts.verify = false; - std::string str, mismatch; - - ASSERT_OK( - OptionTypeInfo::SerializeType(config_options, type_map, &opts, &str)); - ASSERT_FALSE(OptionTypeInfo::TypesAreEqual(config_options, type_map, &opts, - ©, &mismatch)); - ASSERT_OK(OptionTypeInfo::ParseType(config_options, str, type_map, ©)); - ASSERT_TRUE(OptionTypeInfo::TypesAreEqual(config_options, type_map, &opts, - ©, &mismatch)); -} - -class ConfigOptionsTest : public testing::Test {}; - -TEST_F(ConfigOptionsTest, EnvFromConfigOptions) { - ConfigOptions config_options; - DBOptions db_opts; - Options opts; - Env* mem_env = NewMemEnv(Env::Default()); - config_options.registry->AddLibrary("custom-env", RegisterCustomEnv, - kCustomEnvName); - - config_options.env = mem_env; - // First test that we can get the env as expected - ASSERT_OK(GetDBOptionsFromString(config_options, DBOptions(), kCustomEnvProp, - &db_opts)); - ASSERT_OK( - GetOptionsFromString(config_options, Options(), kCustomEnvProp, &opts)); - ASSERT_NE(config_options.env, db_opts.env); - ASSERT_EQ(opts.env, db_opts.env); - Env* custom_env = db_opts.env; - - // Now try a "bad" env" and check that nothing changed - config_options.ignore_unsupported_options = true; - ASSERT_OK( - GetDBOptionsFromString(config_options, db_opts, "env=unknown", &db_opts)); - ASSERT_OK(GetOptionsFromString(config_options, opts, "env=unknown", &opts)); - ASSERT_EQ(config_options.env, mem_env); - ASSERT_EQ(db_opts.env, custom_env); - ASSERT_EQ(opts.env, db_opts.env); - - // Now try a "bad" env" ignoring unknown objects - config_options.ignore_unsupported_options = false; - ASSERT_NOK( - GetDBOptionsFromString(config_options, db_opts, "env=unknown", &db_opts)); - ASSERT_EQ(config_options.env, mem_env); - ASSERT_EQ(db_opts.env, custom_env); - ASSERT_EQ(opts.env, db_opts.env); - - delete mem_env; -} -TEST_F(ConfigOptionsTest, MergeOperatorFromString) { - ConfigOptions config_options; - std::shared_ptr merge_op; - - ASSERT_OK(MergeOperator::CreateFromString(config_options, "put", &merge_op)); - ASSERT_NE(merge_op, nullptr); - ASSERT_TRUE(merge_op->IsInstanceOf("put")); - ASSERT_STREQ(merge_op->Name(), "PutOperator"); - - ASSERT_OK( - MergeOperator::CreateFromString(config_options, "put_v1", &merge_op)); - ASSERT_NE(merge_op, nullptr); - ASSERT_TRUE(merge_op->IsInstanceOf("PutOperator")); - - ASSERT_OK( - MergeOperator::CreateFromString(config_options, "uint64add", &merge_op)); - ASSERT_NE(merge_op, nullptr); - ASSERT_TRUE(merge_op->IsInstanceOf("uint64add")); - ASSERT_STREQ(merge_op->Name(), "UInt64AddOperator"); - - ASSERT_OK(MergeOperator::CreateFromString(config_options, "max", &merge_op)); - ASSERT_NE(merge_op, nullptr); - ASSERT_TRUE(merge_op->IsInstanceOf("max")); - ASSERT_STREQ(merge_op->Name(), "MaxOperator"); - - ASSERT_OK( - MergeOperator::CreateFromString(config_options, "bytesxor", &merge_op)); - ASSERT_NE(merge_op, nullptr); - ASSERT_TRUE(merge_op->IsInstanceOf("bytesxor")); - ASSERT_STREQ(merge_op->Name(), BytesXOROperator::kClassName()); - - ASSERT_OK( - MergeOperator::CreateFromString(config_options, "sortlist", &merge_op)); - ASSERT_NE(merge_op, nullptr); - ASSERT_TRUE(merge_op->IsInstanceOf("sortlist")); - ASSERT_STREQ(merge_op->Name(), SortList::kClassName()); - - ASSERT_OK(MergeOperator::CreateFromString(config_options, "stringappend", - &merge_op)); - ASSERT_NE(merge_op, nullptr); - ASSERT_TRUE(merge_op->IsInstanceOf("stringappend")); - ASSERT_STREQ(merge_op->Name(), StringAppendOperator::kClassName()); - auto delimiter = merge_op->GetOptions("Delimiter"); - ASSERT_NE(delimiter, nullptr); - ASSERT_EQ(*delimiter, ","); - - ASSERT_OK(MergeOperator::CreateFromString(config_options, "stringappendtest", - &merge_op)); - ASSERT_NE(merge_op, nullptr); - ASSERT_TRUE(merge_op->IsInstanceOf("stringappendtest")); - ASSERT_STREQ(merge_op->Name(), StringAppendTESTOperator::kClassName()); - delimiter = merge_op->GetOptions("Delimiter"); - ASSERT_NE(delimiter, nullptr); - ASSERT_EQ(*delimiter, ","); - - ASSERT_OK(MergeOperator::CreateFromString( - config_options, "id=stringappend; delimiter=||", &merge_op)); - ASSERT_NE(merge_op, nullptr); - ASSERT_TRUE(merge_op->IsInstanceOf("stringappend")); - ASSERT_STREQ(merge_op->Name(), StringAppendOperator::kClassName()); - delimiter = merge_op->GetOptions("Delimiter"); - ASSERT_NE(delimiter, nullptr); - ASSERT_EQ(*delimiter, "||"); - - ASSERT_OK(MergeOperator::CreateFromString( - config_options, "id=stringappendtest; delimiter=&&", &merge_op)); - ASSERT_NE(merge_op, nullptr); - ASSERT_TRUE(merge_op->IsInstanceOf("stringappendtest")); - ASSERT_STREQ(merge_op->Name(), StringAppendTESTOperator::kClassName()); - delimiter = merge_op->GetOptions("Delimiter"); - ASSERT_NE(delimiter, nullptr); - ASSERT_EQ(*delimiter, "&&"); - - std::shared_ptr copy; - std::string mismatch; - std::string opts_str = merge_op->ToString(config_options); - - ASSERT_OK(MergeOperator::CreateFromString(config_options, opts_str, ©)); - ASSERT_TRUE(merge_op->AreEquivalent(config_options, copy.get(), &mismatch)); - ASSERT_NE(copy, nullptr); - delimiter = copy->GetOptions("Delimiter"); - ASSERT_NE(delimiter, nullptr); - ASSERT_EQ(*delimiter, "&&"); -} - -TEST_F(ConfigOptionsTest, ConfiguringOptionsDoesNotRevertRateLimiterBandwidth) { - // Regression test for bug where rate limiter's dynamically set bandwidth - // could be silently reverted when configuring an options structure with an - // existing `rate_limiter`. - Options base_options; - base_options.rate_limiter.reset( - NewGenericRateLimiter(1 << 20 /* rate_bytes_per_sec */)); - Options copy_options(base_options); - - base_options.rate_limiter->SetBytesPerSecond(2 << 20); - ASSERT_EQ(2 << 20, base_options.rate_limiter->GetBytesPerSecond()); - - ASSERT_OK(GetOptionsFromString(base_options, "", ©_options)); - ASSERT_EQ(2 << 20, base_options.rate_limiter->GetBytesPerSecond()); -} - -INSTANTIATE_TEST_CASE_P(OptionsSanityCheckTest, OptionsSanityCheckTest, - ::testing::Bool()); - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); -#ifdef GFLAGS - ParseCommandLineFlags(&argc, &argv, true); -#endif // GFLAGS - return RUN_ALL_TESTS(); -} diff --git a/table/block_fetcher_test.cc b/table/block_fetcher_test.cc deleted file mode 100644 index 6d983f9b7..000000000 --- a/table/block_fetcher_test.cc +++ /dev/null @@ -1,519 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "table/block_fetcher.h" - -#include "db/table_properties_collector.h" -#include "file/file_util.h" -#include "options/options_helper.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/db.h" -#include "rocksdb/file_system.h" -#include "table/block_based/binary_search_index_reader.h" -#include "table/block_based/block_based_table_builder.h" -#include "table/block_based/block_based_table_factory.h" -#include "table/block_based/block_based_table_reader.h" -#include "table/format.h" -#include "test_util/testharness.h" -#include "utilities/memory_allocators.h" - -namespace ROCKSDB_NAMESPACE { -namespace { -struct MemcpyStats { - int num_stack_buf_memcpy; - int num_heap_buf_memcpy; - int num_compressed_buf_memcpy; -}; - -struct BufAllocationStats { - int num_heap_buf_allocations; - int num_compressed_buf_allocations; -}; - -struct TestStats { - MemcpyStats memcpy_stats; - BufAllocationStats buf_allocation_stats; -}; - -class BlockFetcherTest : public testing::Test { - public: - enum class Mode { - kBufferedRead = 0, - kBufferedMmap, - kDirectRead, - kNumModes, - }; - // use NumModes as array size to avoid "size of array '...' has non-integral - // type" errors. - const static int NumModes = static_cast(Mode::kNumModes); - - protected: - void SetUp() override { - SetupSyncPointsToMockDirectIO(); - test_dir_ = test::PerThreadDBPath("block_fetcher_test"); - env_ = Env::Default(); - fs_ = FileSystem::Default(); - ASSERT_OK(fs_->CreateDir(test_dir_, IOOptions(), nullptr)); - } - - void TearDown() override { EXPECT_OK(DestroyDir(env_, test_dir_)); } - - void AssertSameBlock(const std::string& block1, const std::string& block2) { - ASSERT_EQ(block1, block2); - } - - // Creates a table with kv pairs (i, i) where i ranges from 0 to 9, inclusive. - void CreateTable(const std::string& table_name, - const CompressionType& compression_type) { - std::unique_ptr writer; - NewFileWriter(table_name, &writer); - - // Create table builder. - ImmutableOptions ioptions(options_); - InternalKeyComparator comparator(options_.comparator); - ColumnFamilyOptions cf_options(options_); - MutableCFOptions moptions(cf_options); - IntTblPropCollectorFactories factories; - std::unique_ptr table_builder(table_factory_.NewTableBuilder( - TableBuilderOptions(ioptions, moptions, comparator, &factories, - compression_type, CompressionOptions(), - 0 /* column_family_id */, kDefaultColumnFamilyName, - -1 /* level */), - writer.get())); - - // Build table. - for (int i = 0; i < 9; i++) { - std::string key = ToInternalKey(std::to_string(i)); - // Append "00000000" to string value to enhance compression ratio - std::string value = "00000000" + std::to_string(i); - table_builder->Add(key, value); - } - ASSERT_OK(table_builder->Finish()); - } - - void FetchIndexBlock(const std::string& table_name, - CountedMemoryAllocator* heap_buf_allocator, - CountedMemoryAllocator* compressed_buf_allocator, - MemcpyStats* memcpy_stats, BlockContents* index_block, - std::string* result) { - FileOptions fopt(options_); - std::unique_ptr file; - NewFileReader(table_name, fopt, &file); - - // Get handle of the index block. - Footer footer; - ReadFooter(file.get(), &footer); - const BlockHandle& index_handle = footer.index_handle(); - - CompressionType compression_type; - FetchBlock(file.get(), index_handle, BlockType::kIndex, - false /* compressed */, false /* do_uncompress */, - heap_buf_allocator, compressed_buf_allocator, index_block, - memcpy_stats, &compression_type); - ASSERT_EQ(compression_type, CompressionType::kNoCompression); - result->assign(index_block->data.ToString()); - } - - // Fetches the first data block in both direct IO and non-direct IO mode. - // - // compressed: whether the data blocks are compressed; - // do_uncompress: whether the data blocks should be uncompressed on fetching. - // compression_type: the expected compression type. - // - // Expects: - // Block contents are the same. - // Bufferr allocation and memory copy statistics are expected. - void TestFetchDataBlock( - const std::string& table_name_prefix, bool compressed, bool do_uncompress, - std::array expected_stats_by_mode) { - for (CompressionType compression_type : GetSupportedCompressions()) { - bool do_compress = compression_type != kNoCompression; - if (compressed != do_compress) continue; - std::string compression_type_str = - CompressionTypeToString(compression_type); - - std::string table_name = table_name_prefix + compression_type_str; - CreateTable(table_name, compression_type); - - CompressionType expected_compression_type_after_fetch = - (compressed && !do_uncompress) ? compression_type : kNoCompression; - - BlockContents blocks[NumModes]; - std::string block_datas[NumModes]; - MemcpyStats memcpy_stats[NumModes]; - CountedMemoryAllocator heap_buf_allocators[NumModes]; - CountedMemoryAllocator compressed_buf_allocators[NumModes]; - for (int i = 0; i < NumModes; ++i) { - SetMode(static_cast(i)); - FetchFirstDataBlock(table_name, compressed, do_uncompress, - expected_compression_type_after_fetch, - &heap_buf_allocators[i], - &compressed_buf_allocators[i], &blocks[i], - &block_datas[i], &memcpy_stats[i]); - } - - for (int i = 0; i < NumModes - 1; ++i) { - AssertSameBlock(block_datas[i], block_datas[i + 1]); - } - - // Check memcpy and buffer allocation statistics. - for (int i = 0; i < NumModes; ++i) { - const TestStats& expected_stats = expected_stats_by_mode[i]; - - ASSERT_EQ(memcpy_stats[i].num_stack_buf_memcpy, - expected_stats.memcpy_stats.num_stack_buf_memcpy); - ASSERT_EQ(memcpy_stats[i].num_heap_buf_memcpy, - expected_stats.memcpy_stats.num_heap_buf_memcpy); - ASSERT_EQ(memcpy_stats[i].num_compressed_buf_memcpy, - expected_stats.memcpy_stats.num_compressed_buf_memcpy); - - if (kXpressCompression == compression_type) { - // XPRESS allocates memory internally, thus does not support for - // custom allocator verification - continue; - } else { - ASSERT_EQ( - heap_buf_allocators[i].GetNumAllocations(), - expected_stats.buf_allocation_stats.num_heap_buf_allocations); - ASSERT_EQ(compressed_buf_allocators[i].GetNumAllocations(), - expected_stats.buf_allocation_stats - .num_compressed_buf_allocations); - - // The allocated buffers are not deallocated until - // the block content is deleted. - ASSERT_EQ(heap_buf_allocators[i].GetNumDeallocations(), 0); - ASSERT_EQ(compressed_buf_allocators[i].GetNumDeallocations(), 0); - blocks[i].allocation.reset(); - ASSERT_EQ( - heap_buf_allocators[i].GetNumDeallocations(), - expected_stats.buf_allocation_stats.num_heap_buf_allocations); - ASSERT_EQ(compressed_buf_allocators[i].GetNumDeallocations(), - expected_stats.buf_allocation_stats - .num_compressed_buf_allocations); - } - } - } - } - - void SetMode(Mode mode) { - switch (mode) { - case Mode::kBufferedRead: - options_.use_direct_reads = false; - options_.allow_mmap_reads = false; - break; - case Mode::kBufferedMmap: - options_.use_direct_reads = false; - options_.allow_mmap_reads = true; - break; - case Mode::kDirectRead: - options_.use_direct_reads = true; - options_.allow_mmap_reads = false; - break; - case Mode::kNumModes: - assert(false); - } - } - - private: - std::string test_dir_; - Env* env_; - std::shared_ptr fs_; - BlockBasedTableFactory table_factory_; - Options options_; - - std::string Path(const std::string& fname) { return test_dir_ + "/" + fname; } - - void WriteToFile(const std::string& content, const std::string& filename) { - std::unique_ptr f; - ASSERT_OK(fs_->NewWritableFile(Path(filename), FileOptions(), &f, nullptr)); - ASSERT_OK(f->Append(content, IOOptions(), nullptr)); - ASSERT_OK(f->Close(IOOptions(), nullptr)); - } - - void NewFileWriter(const std::string& filename, - std::unique_ptr* writer) { - std::string path = Path(filename); - FileOptions file_options; - ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), path, - file_options, writer, nullptr)); - } - - void NewFileReader(const std::string& filename, const FileOptions& opt, - std::unique_ptr* reader) { - std::string path = Path(filename); - std::unique_ptr f; - ASSERT_OK(fs_->NewRandomAccessFile(path, opt, &f, nullptr)); - reader->reset(new RandomAccessFileReader(std::move(f), path, - env_->GetSystemClock().get())); - } - - void NewTableReader(const ImmutableOptions& ioptions, - const FileOptions& foptions, - const InternalKeyComparator& comparator, - const std::string& table_name, - std::unique_ptr* table) { - std::unique_ptr file; - NewFileReader(table_name, foptions, &file); - - uint64_t file_size = 0; - ASSERT_OK(env_->GetFileSize(Path(table_name), &file_size)); - - std::unique_ptr table_reader; - ReadOptions ro; - const auto* table_options = - table_factory_.GetOptions(); - ASSERT_NE(table_options, nullptr); - ASSERT_OK(BlockBasedTable::Open(ro, ioptions, EnvOptions(), *table_options, - comparator, std::move(file), file_size, - &table_reader)); - - table->reset(reinterpret_cast(table_reader.release())); - } - - std::string ToInternalKey(const std::string& key) { - InternalKey internal_key(key, 0, ValueType::kTypeValue); - return internal_key.Encode().ToString(); - } - - void ReadFooter(RandomAccessFileReader* file, Footer* footer) { - uint64_t file_size = 0; - ASSERT_OK(env_->GetFileSize(file->file_name(), &file_size)); - IOOptions opts; - ASSERT_OK(ReadFooterFromFile(opts, file, *fs_, - nullptr /* prefetch_buffer */, file_size, - footer, kBlockBasedTableMagicNumber)); - } - - // NOTE: compression_type returns the compression type of the fetched block - // contents, so if the block is fetched and uncompressed, then it's - // kNoCompression. - void FetchBlock(RandomAccessFileReader* file, const BlockHandle& block, - BlockType block_type, bool compressed, bool do_uncompress, - MemoryAllocator* heap_buf_allocator, - MemoryAllocator* compressed_buf_allocator, - BlockContents* contents, MemcpyStats* stats, - CompressionType* compresstion_type) { - ImmutableOptions ioptions(options_); - ReadOptions roptions; - PersistentCacheOptions persistent_cache_options; - Footer footer; - ReadFooter(file, &footer); - std::unique_ptr fetcher(new BlockFetcher( - file, nullptr /* prefetch_buffer */, footer, roptions, block, contents, - ioptions, do_uncompress, compressed, block_type, - UncompressionDict::GetEmptyDict(), persistent_cache_options, - heap_buf_allocator, compressed_buf_allocator)); - - ASSERT_OK(fetcher->ReadBlockContents()); - - stats->num_stack_buf_memcpy = fetcher->TEST_GetNumStackBufMemcpy(); - stats->num_heap_buf_memcpy = fetcher->TEST_GetNumHeapBufMemcpy(); - stats->num_compressed_buf_memcpy = - fetcher->TEST_GetNumCompressedBufMemcpy(); - - *compresstion_type = fetcher->get_compression_type(); - } - - // NOTE: expected_compression_type is the expected compression - // type of the fetched block content, if the block is uncompressed, - // then the expected compression type is kNoCompression. - void FetchFirstDataBlock(const std::string& table_name, bool compressed, - bool do_uncompress, - CompressionType expected_compression_type, - MemoryAllocator* heap_buf_allocator, - MemoryAllocator* compressed_buf_allocator, - BlockContents* block, std::string* result, - MemcpyStats* memcpy_stats) { - ImmutableOptions ioptions(options_); - InternalKeyComparator comparator(options_.comparator); - FileOptions foptions(options_); - - // Get block handle for the first data block. - std::unique_ptr table; - NewTableReader(ioptions, foptions, comparator, table_name, &table); - - std::unique_ptr index_reader; - ReadOptions ro; - ASSERT_OK(BinarySearchIndexReader::Create( - table.get(), ro, nullptr /* prefetch_buffer */, false /* use_cache */, - false /* prefetch */, false /* pin */, nullptr /* lookup_context */, - &index_reader)); - - std::unique_ptr> iter( - index_reader->NewIterator( - ReadOptions(), false /* disable_prefix_seek */, nullptr /* iter */, - nullptr /* get_context */, nullptr /* lookup_context */)); - ASSERT_OK(iter->status()); - iter->SeekToFirst(); - BlockHandle first_block_handle = iter->value().handle; - - // Fetch first data block. - std::unique_ptr file; - NewFileReader(table_name, foptions, &file); - CompressionType compression_type; - FetchBlock(file.get(), first_block_handle, BlockType::kData, compressed, - do_uncompress, heap_buf_allocator, compressed_buf_allocator, - block, memcpy_stats, &compression_type); - ASSERT_EQ(compression_type, expected_compression_type); - result->assign(block->data.ToString()); - } -}; - -// Skip the following tests in lite mode since direct I/O is unsupported. - -// Fetch index block under both direct IO and non-direct IO. -// Expects: -// the index block contents are the same for both read modes. -TEST_F(BlockFetcherTest, FetchIndexBlock) { - for (CompressionType compression : GetSupportedCompressions()) { - std::string table_name = - "FetchIndexBlock" + CompressionTypeToString(compression); - CreateTable(table_name, compression); - - CountedMemoryAllocator allocator; - MemcpyStats memcpy_stats; - BlockContents indexes[NumModes]; - std::string index_datas[NumModes]; - for (int i = 0; i < NumModes; ++i) { - SetMode(static_cast(i)); - FetchIndexBlock(table_name, &allocator, &allocator, &memcpy_stats, - &indexes[i], &index_datas[i]); - } - for (int i = 0; i < NumModes - 1; ++i) { - AssertSameBlock(index_datas[i], index_datas[i + 1]); - } - } -} - -// Data blocks are not compressed, -// fetch data block under direct IO, mmap IO,and non-direct IO. -// Expects: -// 1. in non-direct IO mode, allocate a heap buffer and memcpy the block -// into the buffer; -// 2. in direct IO mode, allocate a heap buffer and memcpy from the -// direct IO buffer to the heap buffer. -TEST_F(BlockFetcherTest, FetchUncompressedDataBlock) { - TestStats expected_non_mmap_stats = { - { - 0 /* num_stack_buf_memcpy */, - 1 /* num_heap_buf_memcpy */, - 0 /* num_compressed_buf_memcpy */, - }, - { - 1 /* num_heap_buf_allocations */, - 0 /* num_compressed_buf_allocations */, - }}; - TestStats expected_mmap_stats = {{ - 0 /* num_stack_buf_memcpy */, - 0 /* num_heap_buf_memcpy */, - 0 /* num_compressed_buf_memcpy */, - }, - { - 0 /* num_heap_buf_allocations */, - 0 /* num_compressed_buf_allocations */, - }}; - std::array expected_stats_by_mode{{ - expected_non_mmap_stats /* kBufferedRead */, - expected_mmap_stats /* kBufferedMmap */, - expected_non_mmap_stats /* kDirectRead */, - }}; - TestFetchDataBlock("FetchUncompressedDataBlock", false, false, - expected_stats_by_mode); -} - -// Data blocks are compressed, -// fetch data block under both direct IO and non-direct IO, -// but do not uncompress. -// Expects: -// 1. in non-direct IO mode, allocate a compressed buffer and memcpy the block -// into the buffer; -// 2. in direct IO mode, allocate a compressed buffer and memcpy from the -// direct IO buffer to the compressed buffer. -TEST_F(BlockFetcherTest, FetchCompressedDataBlock) { - TestStats expected_non_mmap_stats = { - { - 0 /* num_stack_buf_memcpy */, - 0 /* num_heap_buf_memcpy */, - 1 /* num_compressed_buf_memcpy */, - }, - { - 0 /* num_heap_buf_allocations */, - 1 /* num_compressed_buf_allocations */, - }}; - TestStats expected_mmap_stats = {{ - 0 /* num_stack_buf_memcpy */, - 0 /* num_heap_buf_memcpy */, - 0 /* num_compressed_buf_memcpy */, - }, - { - 0 /* num_heap_buf_allocations */, - 0 /* num_compressed_buf_allocations */, - }}; - std::array expected_stats_by_mode{{ - expected_non_mmap_stats /* kBufferedRead */, - expected_mmap_stats /* kBufferedMmap */, - expected_non_mmap_stats /* kDirectRead */, - }}; - TestFetchDataBlock("FetchCompressedDataBlock", true, false, - expected_stats_by_mode); -} - -// Data blocks are compressed, -// fetch and uncompress data block under both direct IO and non-direct IO. -// Expects: -// 1. in non-direct IO mode, since the block is small, so it's first memcpyed -// to the stack buffer, then a heap buffer is allocated and the block is -// uncompressed into the heap. -// 2. in direct IO mode mode, allocate a heap buffer, then directly uncompress -// and memcpy from the direct IO buffer to the heap buffer. -TEST_F(BlockFetcherTest, FetchAndUncompressCompressedDataBlock) { - TestStats expected_buffered_read_stats = { - { - 1 /* num_stack_buf_memcpy */, - 1 /* num_heap_buf_memcpy */, - 0 /* num_compressed_buf_memcpy */, - }, - { - 1 /* num_heap_buf_allocations */, - 0 /* num_compressed_buf_allocations */, - }}; - TestStats expected_mmap_stats = {{ - 0 /* num_stack_buf_memcpy */, - 1 /* num_heap_buf_memcpy */, - 0 /* num_compressed_buf_memcpy */, - }, - { - 1 /* num_heap_buf_allocations */, - 0 /* num_compressed_buf_allocations */, - }}; - TestStats expected_direct_read_stats = { - { - 0 /* num_stack_buf_memcpy */, - 1 /* num_heap_buf_memcpy */, - 0 /* num_compressed_buf_memcpy */, - }, - { - 1 /* num_heap_buf_allocations */, - 0 /* num_compressed_buf_allocations */, - }}; - std::array expected_stats_by_mode{{ - expected_buffered_read_stats, - expected_mmap_stats, - expected_direct_read_stats, - }}; - TestFetchDataBlock("FetchAndUncompressCompressedDataBlock", true, true, - expected_stats_by_mode); -} - - -} // namespace -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/table/cleanable_test.cc b/table/cleanable_test.cc deleted file mode 100644 index b58eb7dc6..000000000 --- a/table/cleanable_test.cc +++ /dev/null @@ -1,390 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "rocksdb/cleanable.h" - -#include - -#include - -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/iostats_context.h" -#include "rocksdb/perf_context.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" - -namespace ROCKSDB_NAMESPACE { - -class CleanableTest : public testing::Test {}; - -// Use this to keep track of the cleanups that were actually performed -void Multiplier(void* arg1, void* arg2) { - int* res = reinterpret_cast(arg1); - int* num = reinterpret_cast(arg2); - *res *= *num; -} - -// the first Cleanup is on stack and the rest on heap, so test with both cases -TEST_F(CleanableTest, Register) { - int n2 = 2, n3 = 3; - int res = 1; - { Cleanable c1; } - // ~Cleanable - ASSERT_EQ(1, res); - - res = 1; - { - Cleanable c1; - c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; - } - // ~Cleanable - ASSERT_EQ(2, res); - - res = 1; - { - Cleanable c1; - c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; - c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; - } - // ~Cleanable - ASSERT_EQ(6, res); - - // Test the Reset does cleanup - res = 1; - { - Cleanable c1; - c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; - c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; - c1.Reset(); - ASSERT_EQ(6, res); - } - // ~Cleanable - ASSERT_EQ(6, res); - - // Test Clenable is usable after Reset - res = 1; - { - Cleanable c1; - c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; - c1.Reset(); - ASSERT_EQ(2, res); - c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; - } - // ~Cleanable - ASSERT_EQ(6, res); -} - -// the first Cleanup is on stack and the rest on heap, -// so test all the combinations of them -TEST_F(CleanableTest, Delegation) { - int n2 = 2, n3 = 3, n5 = 5, n7 = 7; - int res = 1; - { - Cleanable c2; - { - Cleanable c1; - c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; - c1.DelegateCleanupsTo(&c2); - } - // ~Cleanable - ASSERT_EQ(1, res); - } - // ~Cleanable - ASSERT_EQ(2, res); - - res = 1; - { - Cleanable c2; - { - Cleanable c1; - c1.DelegateCleanupsTo(&c2); - } - // ~Cleanable - ASSERT_EQ(1, res); - } - // ~Cleanable - ASSERT_EQ(1, res); - - res = 1; - { - Cleanable c2; - { - Cleanable c1; - c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; - c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; - c1.DelegateCleanupsTo(&c2); - } - // ~Cleanable - ASSERT_EQ(1, res); - } - // ~Cleanable - ASSERT_EQ(6, res); - - res = 1; - { - Cleanable c2; - c2.RegisterCleanup(Multiplier, &res, &n5); // res = 5; - { - Cleanable c1; - c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; - c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; - c1.DelegateCleanupsTo(&c2); // res = 2 * 3 * 5; - } - // ~Cleanable - ASSERT_EQ(1, res); - } - // ~Cleanable - ASSERT_EQ(30, res); - - res = 1; - { - Cleanable c2; - c2.RegisterCleanup(Multiplier, &res, &n5); // res = 5; - c2.RegisterCleanup(Multiplier, &res, &n7); // res = 5 * 7; - { - Cleanable c1; - c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; - c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; - c1.DelegateCleanupsTo(&c2); // res = 2 * 3 * 5 * 7; - } - // ~Cleanable - ASSERT_EQ(1, res); - } - // ~Cleanable - ASSERT_EQ(210, res); - - res = 1; - { - Cleanable c2; - c2.RegisterCleanup(Multiplier, &res, &n5); // res = 5; - c2.RegisterCleanup(Multiplier, &res, &n7); // res = 5 * 7; - { - Cleanable c1; - c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; - c1.DelegateCleanupsTo(&c2); // res = 2 * 5 * 7; - } - // ~Cleanable - ASSERT_EQ(1, res); - } - // ~Cleanable - ASSERT_EQ(70, res); - - res = 1; - { - Cleanable c2; - c2.RegisterCleanup(Multiplier, &res, &n5); // res = 5; - c2.RegisterCleanup(Multiplier, &res, &n7); // res = 5 * 7; - { - Cleanable c1; - c1.DelegateCleanupsTo(&c2); // res = 5 * 7; - } - // ~Cleanable - ASSERT_EQ(1, res); - } - // ~Cleanable - ASSERT_EQ(35, res); - - res = 1; - { - Cleanable c2; - c2.RegisterCleanup(Multiplier, &res, &n5); // res = 5; - { - Cleanable c1; - c1.DelegateCleanupsTo(&c2); // res = 5; - } - // ~Cleanable - ASSERT_EQ(1, res); - } - // ~Cleanable - ASSERT_EQ(5, res); -} - -static void ReleaseStringHeap(void* s, void*) { - delete reinterpret_cast(s); -} - -class PinnableSlice4Test : public PinnableSlice { - public: - void TestStringIsRegistered(std::string* s) { - ASSERT_TRUE(cleanup_.function == ReleaseStringHeap); - ASSERT_EQ(cleanup_.arg1, s); - ASSERT_EQ(cleanup_.arg2, nullptr); - ASSERT_EQ(cleanup_.next, nullptr); - } -}; - -// Putting the PinnableSlice tests here due to similarity to Cleanable tests -TEST_F(CleanableTest, PinnableSlice) { - int n2 = 2; - int res = 1; - const std::string const_str = "123"; - - { - res = 1; - PinnableSlice4Test value; - Slice slice(const_str); - value.PinSlice(slice, Multiplier, &res, &n2); - std::string str; - str.assign(value.data(), value.size()); - ASSERT_EQ(const_str, str); - } - // ~Cleanable - ASSERT_EQ(2, res); - - { - res = 1; - PinnableSlice4Test value; - Slice slice(const_str); - { - Cleanable c1; - c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; - value.PinSlice(slice, &c1); - } - // ~Cleanable - ASSERT_EQ(1, res); // cleanups must have be delegated to value - std::string str; - str.assign(value.data(), value.size()); - ASSERT_EQ(const_str, str); - } - // ~Cleanable - ASSERT_EQ(2, res); - - { - PinnableSlice4Test value; - Slice slice(const_str); - value.PinSelf(slice); - std::string str; - str.assign(value.data(), value.size()); - ASSERT_EQ(const_str, str); - } - - { - PinnableSlice4Test value; - std::string* self_str_ptr = value.GetSelf(); - self_str_ptr->assign(const_str); - value.PinSelf(); - std::string str; - str.assign(value.data(), value.size()); - ASSERT_EQ(const_str, str); - } -} - -static void Decrement(void* intptr, void*) { --*static_cast(intptr); } - -// Allow unit testing moved-from data -template -void MarkInitializedForClangAnalyze(T& t) { - // No net effect, but confuse analyzer. (Published advice doesn't work.) - char* p = reinterpret_cast(&t); - std::swap(*p, *p); -} - -TEST_F(CleanableTest, SharedWrapCleanables) { - int val = 5; - Cleanable c1, c2; - c1.RegisterCleanup(&Decrement, &val, nullptr); - c1.RegisterCleanup(&Decrement, &val, nullptr); - ASSERT_TRUE(c1.HasCleanups()); - ASSERT_FALSE(c2.HasCleanups()); - - SharedCleanablePtr scp1; - ASSERT_EQ(scp1.get(), nullptr); - - // No-ops - scp1.RegisterCopyWith(&c2); - scp1.MoveAsCleanupTo(&c2); - - ASSERT_FALSE(c2.HasCleanups()); - c2.RegisterCleanup(&Decrement, &val, nullptr); - c2.RegisterCleanup(&Decrement, &val, nullptr); - c2.RegisterCleanup(&Decrement, &val, nullptr); - - scp1.Allocate(); - ASSERT_NE(scp1.get(), nullptr); - ASSERT_FALSE(scp1->HasCleanups()); - - // Copy ctor (alias scp2 = scp1) - SharedCleanablePtr scp2{scp1}; - ASSERT_EQ(scp1.get(), scp2.get()); - - c1.DelegateCleanupsTo(&*scp1); - ASSERT_TRUE(scp1->HasCleanups()); - ASSERT_TRUE(scp2->HasCleanups()); - ASSERT_FALSE(c1.HasCleanups()); - - SharedCleanablePtr scp3; - ASSERT_EQ(scp3.get(), nullptr); - - // Copy operator (alias scp3 = scp2 = scp1) - scp3 = scp2; - - // Make scp2 point elsewhere - scp2.Allocate(); - c2.DelegateCleanupsTo(&*scp2); - - ASSERT_EQ(val, 5); - // Move operator, invoke old c2 cleanups - scp2 = std::move(scp1); - ASSERT_EQ(val, 2); - MarkInitializedForClangAnalyze(scp1); - ASSERT_EQ(scp1.get(), nullptr); - - // Move ctor - { - SharedCleanablePtr scp4{std::move(scp3)}; - MarkInitializedForClangAnalyze(scp3); - ASSERT_EQ(scp3.get(), nullptr); - ASSERT_EQ(scp4.get(), scp2.get()); - - scp2.Reset(); - ASSERT_EQ(val, 2); - // invoke old c1 cleanups - } - ASSERT_EQ(val, 0); -} - -TEST_F(CleanableTest, CleanableWrapShared) { - int val = 5; - SharedCleanablePtr scp1, scp2; - scp1.Allocate(); - scp1->RegisterCleanup(&Decrement, &val, nullptr); - scp1->RegisterCleanup(&Decrement, &val, nullptr); - - scp2.Allocate(); - scp2->RegisterCleanup(&Decrement, &val, nullptr); - scp2->RegisterCleanup(&Decrement, &val, nullptr); - scp2->RegisterCleanup(&Decrement, &val, nullptr); - - { - Cleanable c1; - { - Cleanable c2, c3; - scp1.RegisterCopyWith(&c1); - scp1.MoveAsCleanupTo(&c2); - ASSERT_TRUE(c1.HasCleanups()); - ASSERT_TRUE(c2.HasCleanups()); - ASSERT_EQ(scp1.get(), nullptr); - scp2.MoveAsCleanupTo(&c3); - ASSERT_TRUE(c3.HasCleanups()); - ASSERT_EQ(scp2.get(), nullptr); - c2.Reset(); - ASSERT_FALSE(c2.HasCleanups()); - ASSERT_EQ(val, 5); - // invoke cleanups from scp2 - } - ASSERT_EQ(val, 2); - // invoke cleanups from scp1 - } - ASSERT_EQ(val, 0); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/table/merger_test.cc b/table/merger_test.cc deleted file mode 100644 index 71dc798e5..000000000 --- a/table/merger_test.cc +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include -#include - -#include "table/merging_iterator.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/random.h" -#include "util/vector_iterator.h" - -namespace ROCKSDB_NAMESPACE { - -class MergerTest : public testing::Test { - public: - MergerTest() - : icomp_(BytewiseComparator()), - rnd_(3), - merging_iterator_(nullptr), - single_iterator_(nullptr) {} - ~MergerTest() override = default; - std::vector GenerateStrings(size_t len, int string_len) { - std::vector ret; - - for (size_t i = 0; i < len; ++i) { - InternalKey ik(rnd_.HumanReadableString(string_len), 0, - ValueType::kTypeValue); - ret.push_back(ik.Encode().ToString(false)); - } - return ret; - } - - void AssertEquivalence() { - auto a = merging_iterator_.get(); - auto b = single_iterator_.get(); - if (!a->Valid()) { - ASSERT_TRUE(!b->Valid()); - } else { - ASSERT_TRUE(b->Valid()); - ASSERT_EQ(b->key().ToString(), a->key().ToString()); - ASSERT_EQ(b->value().ToString(), a->value().ToString()); - } - } - - void SeekToRandom() { - InternalKey ik(rnd_.HumanReadableString(5), 0, ValueType::kTypeValue); - Seek(ik.Encode().ToString(false)); - } - - void Seek(std::string target) { - merging_iterator_->Seek(target); - single_iterator_->Seek(target); - } - - void SeekToFirst() { - merging_iterator_->SeekToFirst(); - single_iterator_->SeekToFirst(); - } - - void SeekToLast() { - merging_iterator_->SeekToLast(); - single_iterator_->SeekToLast(); - } - - void Next(int times) { - for (int i = 0; i < times && merging_iterator_->Valid(); ++i) { - AssertEquivalence(); - merging_iterator_->Next(); - single_iterator_->Next(); - } - AssertEquivalence(); - } - - void Prev(int times) { - for (int i = 0; i < times && merging_iterator_->Valid(); ++i) { - AssertEquivalence(); - merging_iterator_->Prev(); - single_iterator_->Prev(); - } - AssertEquivalence(); - } - - void NextAndPrev(int times) { - for (int i = 0; i < times && merging_iterator_->Valid(); ++i) { - AssertEquivalence(); - if (rnd_.OneIn(2)) { - merging_iterator_->Prev(); - single_iterator_->Prev(); - } else { - merging_iterator_->Next(); - single_iterator_->Next(); - } - } - AssertEquivalence(); - } - - void Generate(size_t num_iterators, size_t strings_per_iterator, - int letters_per_string) { - std::vector small_iterators; - for (size_t i = 0; i < num_iterators; ++i) { - auto strings = GenerateStrings(strings_per_iterator, letters_per_string); - small_iterators.push_back(new VectorIterator(strings, strings, &icomp_)); - all_keys_.insert(all_keys_.end(), strings.begin(), strings.end()); - } - - merging_iterator_.reset( - NewMergingIterator(&icomp_, &small_iterators[0], - static_cast(small_iterators.size()))); - single_iterator_.reset(new VectorIterator(all_keys_, all_keys_, &icomp_)); - } - - InternalKeyComparator icomp_; - Random rnd_; - std::unique_ptr merging_iterator_; - std::unique_ptr single_iterator_; - std::vector all_keys_; -}; - -TEST_F(MergerTest, SeekToRandomNextTest) { - Generate(1000, 50, 50); - for (int i = 0; i < 10; ++i) { - SeekToRandom(); - AssertEquivalence(); - Next(50000); - } -} - -TEST_F(MergerTest, SeekToRandomNextSmallStringsTest) { - Generate(1000, 50, 2); - for (int i = 0; i < 10; ++i) { - SeekToRandom(); - AssertEquivalence(); - Next(50000); - } -} - -TEST_F(MergerTest, SeekToRandomPrevTest) { - Generate(1000, 50, 50); - for (int i = 0; i < 10; ++i) { - SeekToRandom(); - AssertEquivalence(); - Prev(50000); - } -} - -TEST_F(MergerTest, SeekToRandomRandomTest) { - Generate(200, 50, 50); - for (int i = 0; i < 3; ++i) { - SeekToRandom(); - AssertEquivalence(); - NextAndPrev(5000); - } -} - -TEST_F(MergerTest, SeekToFirstTest) { - Generate(1000, 50, 50); - for (int i = 0; i < 10; ++i) { - SeekToFirst(); - AssertEquivalence(); - Next(50000); - } -} - -TEST_F(MergerTest, SeekToLastTest) { - Generate(1000, 50, 50); - for (int i = 0; i < 10; ++i) { - SeekToLast(); - AssertEquivalence(); - Prev(50000); - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/table/sst_file_reader_test.cc b/table/sst_file_reader_test.cc deleted file mode 100644 index ba81d7815..000000000 --- a/table/sst_file_reader_test.cc +++ /dev/null @@ -1,423 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - - -#include "rocksdb/sst_file_reader.h" - -#include - -#include "port/stack_trace.h" -#include "rocksdb/convenience.h" -#include "rocksdb/db.h" -#include "rocksdb/sst_file_writer.h" -#include "table/sst_file_writer_collectors.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "utilities/merge_operators.h" - -namespace ROCKSDB_NAMESPACE { - -std::string EncodeAsString(uint64_t v) { - char buf[16]; - snprintf(buf, sizeof(buf), "%08" PRIu64, v); - return std::string(buf); -} - -std::string EncodeAsUint64(uint64_t v) { - std::string dst; - PutFixed64(&dst, v); - return dst; -} - -class SstFileReaderTest : public testing::Test { - public: - SstFileReaderTest() { - options_.merge_operator = MergeOperators::CreateUInt64AddOperator(); - sst_name_ = test::PerThreadDBPath("sst_file"); - - Env* base_env = Env::Default(); - EXPECT_OK( - test::CreateEnvFromSystem(ConfigOptions(), &base_env, &env_guard_)); - EXPECT_NE(nullptr, base_env); - env_ = base_env; - options_.env = env_; - } - - ~SstFileReaderTest() { - Status s = env_->DeleteFile(sst_name_); - EXPECT_OK(s); - } - - void CreateFile(const std::string& file_name, - const std::vector& keys) { - SstFileWriter writer(soptions_, options_); - ASSERT_OK(writer.Open(file_name)); - for (size_t i = 0; i + 2 < keys.size(); i += 3) { - ASSERT_OK(writer.Put(keys[i], keys[i])); - ASSERT_OK(writer.Merge(keys[i + 1], EncodeAsUint64(i + 1))); - ASSERT_OK(writer.Delete(keys[i + 2])); - } - ASSERT_OK(writer.Finish()); - } - - void CheckFile(const std::string& file_name, - const std::vector& keys, - bool check_global_seqno = false) { - ReadOptions ropts; - SstFileReader reader(options_); - ASSERT_OK(reader.Open(file_name)); - ASSERT_OK(reader.VerifyChecksum()); - std::unique_ptr iter(reader.NewIterator(ropts)); - iter->SeekToFirst(); - for (size_t i = 0; i + 2 < keys.size(); i += 3) { - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(keys[i]), 0); - ASSERT_EQ(iter->value().compare(keys[i]), 0); - iter->Next(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(keys[i + 1]), 0); - ASSERT_EQ(iter->value().compare(EncodeAsUint64(i + 1)), 0); - iter->Next(); - } - ASSERT_FALSE(iter->Valid()); - if (check_global_seqno) { - auto properties = reader.GetTableProperties(); - ASSERT_TRUE(properties); - std::string hostname; - ASSERT_OK(env_->GetHostNameString(&hostname)); - ASSERT_EQ(properties->db_host_id, hostname); - auto& user_properties = properties->user_collected_properties; - ASSERT_TRUE( - user_properties.count(ExternalSstFilePropertyNames::kGlobalSeqno)); - } - } - - void CreateFileAndCheck(const std::vector& keys) { - CreateFile(sst_name_, keys); - CheckFile(sst_name_, keys); - } - - protected: - Options options_; - EnvOptions soptions_; - std::string sst_name_; - std::shared_ptr env_guard_; - Env* env_; -}; - -const uint64_t kNumKeys = 100; - -TEST_F(SstFileReaderTest, Basic) { - std::vector keys; - for (uint64_t i = 0; i < kNumKeys; i++) { - keys.emplace_back(EncodeAsString(i)); - } - CreateFileAndCheck(keys); -} - -TEST_F(SstFileReaderTest, Uint64Comparator) { - options_.comparator = test::Uint64Comparator(); - std::vector keys; - for (uint64_t i = 0; i < kNumKeys; i++) { - keys.emplace_back(EncodeAsUint64(i)); - } - CreateFileAndCheck(keys); -} - -TEST_F(SstFileReaderTest, ReadOptionsOutOfScope) { - // Repro a bug where the SstFileReader depended on its configured ReadOptions - // outliving it. - options_.comparator = test::Uint64Comparator(); - std::vector keys; - for (uint64_t i = 0; i < kNumKeys; i++) { - keys.emplace_back(EncodeAsUint64(i)); - } - CreateFile(sst_name_, keys); - - SstFileReader reader(options_); - ASSERT_OK(reader.Open(sst_name_)); - std::unique_ptr iter; - { - // Make sure ReadOptions go out of scope ASAP so we know the iterator - // operations do not depend on it. - ReadOptions ropts; - iter.reset(reader.NewIterator(ropts)); - } - iter->SeekToFirst(); - while (iter->Valid()) { - iter->Next(); - } -} - -TEST_F(SstFileReaderTest, ReadFileWithGlobalSeqno) { - std::vector keys; - for (uint64_t i = 0; i < kNumKeys; i++) { - keys.emplace_back(EncodeAsString(i)); - } - // Generate a SST file. - CreateFile(sst_name_, keys); - - // Ingest the file into a db, to assign it a global sequence number. - Options options; - options.create_if_missing = true; - std::string db_name = test::PerThreadDBPath("test_db"); - DB* db; - ASSERT_OK(DB::Open(options, db_name, &db)); - // Bump sequence number. - ASSERT_OK(db->Put(WriteOptions(), keys[0], "foo")); - ASSERT_OK(db->Flush(FlushOptions())); - // Ingest the file. - IngestExternalFileOptions ingest_options; - ingest_options.write_global_seqno = true; - ASSERT_OK(db->IngestExternalFile({sst_name_}, ingest_options)); - std::vector live_files; - uint64_t manifest_file_size = 0; - ASSERT_OK(db->GetLiveFiles(live_files, &manifest_file_size)); - // Get the ingested file. - std::string ingested_file; - for (auto& live_file : live_files) { - if (live_file.substr(live_file.size() - 4, std::string::npos) == ".sst") { - if (ingested_file.empty() || ingested_file < live_file) { - ingested_file = live_file; - } - } - } - ASSERT_FALSE(ingested_file.empty()); - delete db; - - // Verify the file can be open and read by SstFileReader. - CheckFile(db_name + ingested_file, keys, true /* check_global_seqno */); - - // Cleanup. - ASSERT_OK(DestroyDB(db_name, options)); -} - -TEST_F(SstFileReaderTest, TimestampSizeMismatch) { - SstFileWriter writer(soptions_, options_); - - ASSERT_OK(writer.Open(sst_name_)); - - // Comparator is not timestamp-aware; calls to APIs taking timestamps should - // fail. - ASSERT_NOK(writer.Put("key", EncodeAsUint64(100), "value")); - ASSERT_NOK(writer.Delete("another_key", EncodeAsUint64(200))); -} - -class SstFileReaderTimestampTest : public testing::Test { - public: - SstFileReaderTimestampTest() { - Env* env = Env::Default(); - EXPECT_OK(test::CreateEnvFromSystem(ConfigOptions(), &env, &env_guard_)); - EXPECT_NE(nullptr, env); - - options_.env = env; - - options_.comparator = test::BytewiseComparatorWithU64TsWrapper(); - - sst_name_ = test::PerThreadDBPath("sst_file_ts"); - } - - ~SstFileReaderTimestampTest() { - EXPECT_OK(options_.env->DeleteFile(sst_name_)); - } - - struct KeyValueDesc { - KeyValueDesc(std::string k, std::string ts, std::string v) - : key(std::move(k)), timestamp(std::move(ts)), value(std::move(v)) {} - - std::string key; - std::string timestamp; - std::string value; - }; - - struct InputKeyValueDesc : public KeyValueDesc { - InputKeyValueDesc(std::string k, std::string ts, std::string v, bool is_del, - bool use_contig_buf) - : KeyValueDesc(std::move(k), std::move(ts), std::move(v)), - is_delete(is_del), - use_contiguous_buffer(use_contig_buf) {} - - bool is_delete = false; - bool use_contiguous_buffer = false; - }; - - struct OutputKeyValueDesc : public KeyValueDesc { - OutputKeyValueDesc(std::string k, std::string ts, std::string v) - : KeyValueDesc(std::move(k), std::string(ts), std::string(v)) {} - }; - - void CreateFile(const std::vector& descs) { - SstFileWriter writer(soptions_, options_); - - ASSERT_OK(writer.Open(sst_name_)); - - for (const auto& desc : descs) { - if (desc.is_delete) { - if (desc.use_contiguous_buffer) { - std::string key_with_ts(desc.key + desc.timestamp); - ASSERT_OK(writer.Delete(Slice(key_with_ts.data(), desc.key.size()), - Slice(key_with_ts.data() + desc.key.size(), - desc.timestamp.size()))); - } else { - ASSERT_OK(writer.Delete(desc.key, desc.timestamp)); - } - } else { - if (desc.use_contiguous_buffer) { - std::string key_with_ts(desc.key + desc.timestamp); - ASSERT_OK(writer.Put(Slice(key_with_ts.data(), desc.key.size()), - Slice(key_with_ts.data() + desc.key.size(), - desc.timestamp.size()), - desc.value)); - } else { - ASSERT_OK(writer.Put(desc.key, desc.timestamp, desc.value)); - } - } - } - - ASSERT_OK(writer.Finish()); - } - - void CheckFile(const std::string& timestamp, - const std::vector& descs) { - SstFileReader reader(options_); - - ASSERT_OK(reader.Open(sst_name_)); - ASSERT_OK(reader.VerifyChecksum()); - - Slice ts_slice(timestamp); - - ReadOptions read_options; - read_options.timestamp = &ts_slice; - - std::unique_ptr iter(reader.NewIterator(read_options)); - iter->SeekToFirst(); - - for (const auto& desc : descs) { - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key(), desc.key); - ASSERT_EQ(iter->timestamp(), desc.timestamp); - ASSERT_EQ(iter->value(), desc.value); - - iter->Next(); - } - - ASSERT_FALSE(iter->Valid()); - } - - protected: - std::shared_ptr env_guard_; - Options options_; - EnvOptions soptions_; - std::string sst_name_; -}; - -TEST_F(SstFileReaderTimestampTest, Basic) { - std::vector input_descs; - - for (uint64_t k = 0; k < kNumKeys; k += 4) { - // A Put with key k, timestamp k that gets overwritten by a subsequent Put - // with timestamp (k + 1). Note that the comparator uses descending order - // for the timestamp part, so we add the later Put first. - input_descs.emplace_back( - /* key */ EncodeAsString(k), /* timestamp */ EncodeAsUint64(k + 1), - /* value */ EncodeAsString(k * 2), /* is_delete */ false, - /* use_contiguous_buffer */ false); - input_descs.emplace_back( - /* key */ EncodeAsString(k), /* timestamp */ EncodeAsUint64(k), - /* value */ EncodeAsString(k * 3), /* is_delete */ false, - /* use_contiguous_buffer */ true); - - // A Put with key (k + 2), timestamp (k + 2) that gets cancelled out by a - // Delete with timestamp (k + 3). Note that the comparator uses descending - // order for the timestamp part, so we add the Delete first. - input_descs.emplace_back(/* key */ EncodeAsString(k + 2), - /* timestamp */ EncodeAsUint64(k + 3), - /* value */ std::string(), /* is_delete */ true, - /* use_contiguous_buffer */ (k % 8) == 0); - input_descs.emplace_back( - /* key */ EncodeAsString(k + 2), /* timestamp */ EncodeAsUint64(k + 2), - /* value */ EncodeAsString(k * 5), /* is_delete */ false, - /* use_contiguous_buffer */ (k % 8) != 0); - } - - CreateFile(input_descs); - - // Note: below, we check the results as of each timestamp in the range, - // updating the expected result as needed. - std::vector output_descs; - - for (uint64_t ts = 0; ts < kNumKeys; ++ts) { - const uint64_t k = ts - (ts % 4); - - switch (ts % 4) { - case 0: // Initial Put for key k - output_descs.emplace_back(/* key */ EncodeAsString(k), - /* timestamp */ EncodeAsUint64(ts), - /* value */ EncodeAsString(k * 3)); - break; - - case 1: // Second Put for key k - assert(output_descs.back().key == EncodeAsString(k)); - assert(output_descs.back().timestamp == EncodeAsUint64(ts - 1)); - assert(output_descs.back().value == EncodeAsString(k * 3)); - output_descs.back().timestamp = EncodeAsUint64(ts); - output_descs.back().value = EncodeAsString(k * 2); - break; - - case 2: // Put for key (k + 2) - output_descs.emplace_back(/* key */ EncodeAsString(k + 2), - /* timestamp */ EncodeAsUint64(ts), - /* value */ EncodeAsString(k * 5)); - break; - - case 3: // Delete for key (k + 2) - assert(output_descs.back().key == EncodeAsString(k + 2)); - assert(output_descs.back().timestamp == EncodeAsUint64(ts - 1)); - assert(output_descs.back().value == EncodeAsString(k * 5)); - output_descs.pop_back(); - break; - } - - CheckFile(EncodeAsUint64(ts), output_descs); - } -} - -TEST_F(SstFileReaderTimestampTest, TimestampsOutOfOrder) { - SstFileWriter writer(soptions_, options_); - - ASSERT_OK(writer.Open(sst_name_)); - - // Note: KVs that have the same user key disregarding timestamps should be in - // descending order of timestamps. - ASSERT_OK(writer.Put("key", EncodeAsUint64(1), "value1")); - ASSERT_NOK(writer.Put("key", EncodeAsUint64(2), "value2")); -} - -TEST_F(SstFileReaderTimestampTest, TimestampSizeMismatch) { - SstFileWriter writer(soptions_, options_); - - ASSERT_OK(writer.Open(sst_name_)); - - // Comparator expects 64-bit timestamps; timestamps with other sizes as well - // as calls to the timestamp-less APIs should be rejected. - ASSERT_NOK(writer.Put("key", "not_an_actual_64_bit_timestamp", "value")); - ASSERT_NOK(writer.Delete("another_key", "timestamp_of_unexpected_size")); - - ASSERT_NOK(writer.Put("key_without_timestamp", "value")); - ASSERT_NOK(writer.Merge("another_key_missing_a_timestamp", "merge_operand")); - ASSERT_NOK(writer.Delete("yet_another_key_still_no_timestamp")); - ASSERT_NOK(writer.DeleteRange("begin_key_timestamp_absent", - "end_key_with_a_complete_lack_of_timestamps")); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/table/table_test.cc b/table/table_test.cc deleted file mode 100644 index df9e508f5..000000000 --- a/table/table_test.cc +++ /dev/null @@ -1,5625 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "rocksdb/table.h" - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "cache/lru_cache.h" -#include "db/db_test_util.h" -#include "db/dbformat.h" -#include "db/memtable.h" -#include "db/write_batch_internal.h" -#include "memtable/stl_wrappers.h" -#include "monitoring/statistics.h" -#include "options/options_helper.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/cache.h" -#include "rocksdb/compression_type.h" -#include "rocksdb/convenience.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/file_checksum.h" -#include "rocksdb/file_system.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/iterator.h" -#include "rocksdb/memtablerep.h" -#include "rocksdb/options.h" -#include "rocksdb/perf_context.h" -#include "rocksdb/slice_transform.h" -#include "rocksdb/statistics.h" -#include "rocksdb/table_properties.h" -#include "rocksdb/trace_record.h" -#include "rocksdb/unique_id.h" -#include "rocksdb/write_buffer_manager.h" -#include "table/block_based/block.h" -#include "table/block_based/block_based_table_builder.h" -#include "table/block_based/block_based_table_factory.h" -#include "table/block_based/block_based_table_reader.h" -#include "table/block_based/block_builder.h" -#include "table/block_based/filter_policy_internal.h" -#include "table/block_based/flush_block_policy.h" -#include "table/block_fetcher.h" -#include "table/format.h" -#include "table/get_context.h" -#include "table/internal_iterator.h" -#include "table/meta_blocks.h" -#include "table/plain/plain_table_factory.h" -#include "table/scoped_arena_iterator.h" -#include "table/sst_file_writer_collectors.h" -#include "table/unique_id_impl.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/coding_lean.h" -#include "util/compression.h" -#include "util/file_checksum_helper.h" -#include "util/random.h" -#include "util/string_util.h" -#include "utilities/memory_allocators.h" -#include "utilities/merge_operators.h" - -namespace ROCKSDB_NAMESPACE { - -extern const uint64_t kLegacyBlockBasedTableMagicNumber; -extern const uint64_t kLegacyPlainTableMagicNumber; -extern const uint64_t kBlockBasedTableMagicNumber; -extern const uint64_t kPlainTableMagicNumber; - -namespace { - -const std::string kDummyValue(10000, 'o'); - -// DummyPropertiesCollector used to test BlockBasedTableProperties -class DummyPropertiesCollector : public TablePropertiesCollector { - public: - const char* Name() const override { return "DummyPropertiesCollector"; } - - Status Finish(UserCollectedProperties* /*properties*/) override { - return Status::OK(); - } - - Status Add(const Slice& /*user_key*/, const Slice& /*value*/) override { - return Status::OK(); - } - - UserCollectedProperties GetReadableProperties() const override { - return UserCollectedProperties{}; - } -}; - -class DummyPropertiesCollectorFactory1 - : public TablePropertiesCollectorFactory { - public: - TablePropertiesCollector* CreateTablePropertiesCollector( - TablePropertiesCollectorFactory::Context /*context*/) override { - return new DummyPropertiesCollector(); - } - const char* Name() const override { - return "DummyPropertiesCollectorFactory1"; - } -}; - -class DummyPropertiesCollectorFactory2 - : public TablePropertiesCollectorFactory { - public: - TablePropertiesCollector* CreateTablePropertiesCollector( - TablePropertiesCollectorFactory::Context /*context*/) override { - return new DummyPropertiesCollector(); - } - const char* Name() const override { - return "DummyPropertiesCollectorFactory2"; - } -}; - -// Return reverse of "key". -// Used to test non-lexicographic comparators. -std::string Reverse(const Slice& key) { - auto rev = key.ToString(); - std::reverse(rev.begin(), rev.end()); - return rev; -} - -class ReverseKeyComparator : public Comparator { - public: - const char* Name() const override { - return "rocksdb.ReverseBytewiseComparator"; - } - - int Compare(const Slice& a, const Slice& b) const override { - return BytewiseComparator()->Compare(Reverse(a), Reverse(b)); - } - - void FindShortestSeparator(std::string* start, - const Slice& limit) const override { - std::string s = Reverse(*start); - std::string l = Reverse(limit); - BytewiseComparator()->FindShortestSeparator(&s, l); - *start = Reverse(s); - } - - void FindShortSuccessor(std::string* key) const override { - std::string s = Reverse(*key); - BytewiseComparator()->FindShortSuccessor(&s); - *key = Reverse(s); - } -}; - -ReverseKeyComparator reverse_key_comparator; - -void Increment(const Comparator* cmp, std::string* key) { - if (cmp == BytewiseComparator()) { - key->push_back('\0'); - } else { - assert(cmp == &reverse_key_comparator); - std::string rev = Reverse(*key); - rev.push_back('\0'); - *key = Reverse(rev); - } -} - -const auto kUnknownColumnFamily = - TablePropertiesCollectorFactory::Context::kUnknownColumnFamily; - -} // namespace - -// Helper class for tests to unify the interface between -// BlockBuilder/TableBuilder and Block/Table. -class Constructor { - public: - explicit Constructor(const Comparator* cmp) - : data_(stl_wrappers::LessOfComparator(cmp)) {} - virtual ~Constructor() {} - - void Add(const std::string& key, const Slice& value) { - data_[key] = value.ToString(); - } - - // Finish constructing the data structure with all the keys that have - // been added so far. Returns the keys in sorted order in "*keys" - // and stores the key/value pairs in "*kvmap" - void Finish(const Options& options, const ImmutableOptions& ioptions, - const MutableCFOptions& moptions, - const BlockBasedTableOptions& table_options, - const InternalKeyComparator& internal_comparator, - std::vector* keys, stl_wrappers::KVMap* kvmap) { - last_internal_comparator_ = &internal_comparator; - *kvmap = data_; - keys->clear(); - for (const auto& kv : data_) { - keys->push_back(kv.first); - } - data_.clear(); - Status s = FinishImpl(options, ioptions, moptions, table_options, - internal_comparator, *kvmap); - ASSERT_TRUE(s.ok()) << s.ToString(); - } - - // Construct the data structure from the data in "data" - virtual Status FinishImpl(const Options& options, - const ImmutableOptions& ioptions, - const MutableCFOptions& moptions, - const BlockBasedTableOptions& table_options, - const InternalKeyComparator& internal_comparator, - const stl_wrappers::KVMap& data) = 0; - - virtual InternalIterator* NewIterator( - const SliceTransform* prefix_extractor = nullptr) const = 0; - - virtual const stl_wrappers::KVMap& data() { return data_; } - - virtual bool IsArenaMode() const { return false; } - - virtual DB* db() const { return nullptr; } // Overridden in DBConstructor - - virtual bool AnywayDeleteIterator() const { return false; } - - protected: - const InternalKeyComparator* last_internal_comparator_; - - private: - stl_wrappers::KVMap data_; -}; - -// A helper class that converts internal format keys into user keys -class KeyConvertingIterator : public InternalIterator { - public: - explicit KeyConvertingIterator(InternalIterator* iter, - bool arena_mode = false) - : iter_(iter), arena_mode_(arena_mode) {} - ~KeyConvertingIterator() override { - if (arena_mode_) { - iter_->~InternalIterator(); - } else { - delete iter_; - } - } - bool Valid() const override { return iter_->Valid() && status_.ok(); } - void Seek(const Slice& target) override { - ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue); - std::string encoded; - AppendInternalKey(&encoded, ikey); - iter_->Seek(encoded); - } - void SeekForPrev(const Slice& target) override { - ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue); - std::string encoded; - AppendInternalKey(&encoded, ikey); - iter_->SeekForPrev(encoded); - } - void SeekToFirst() override { iter_->SeekToFirst(); } - void SeekToLast() override { iter_->SeekToLast(); } - void Next() override { iter_->Next(); } - void Prev() override { iter_->Prev(); } - IterBoundCheck UpperBoundCheckResult() override { - return iter_->UpperBoundCheckResult(); - } - - Slice key() const override { - assert(Valid()); - ParsedInternalKey parsed_key; - Status pik_status = - ParseInternalKey(iter_->key(), &parsed_key, true /* log_err_key */); - if (!pik_status.ok()) { - status_ = pik_status; - return Slice(status_.getState()); - } - return parsed_key.user_key; - } - - Slice value() const override { return iter_->value(); } - Status status() const override { - return status_.ok() ? iter_->status() : status_; - } - - private: - mutable Status status_; - InternalIterator* iter_; - bool arena_mode_; - - // No copying allowed - KeyConvertingIterator(const KeyConvertingIterator&); - void operator=(const KeyConvertingIterator&); -}; - -// `BlockConstructor` APIs always accept/return user keys. -class BlockConstructor : public Constructor { - public: - explicit BlockConstructor(const Comparator* cmp) - : Constructor(cmp), comparator_(cmp), block_(nullptr) {} - ~BlockConstructor() override { delete block_; } - Status FinishImpl(const Options& /*options*/, - const ImmutableOptions& /*ioptions*/, - const MutableCFOptions& /*moptions*/, - const BlockBasedTableOptions& table_options, - const InternalKeyComparator& /*internal_comparator*/, - const stl_wrappers::KVMap& kv_map) override { - delete block_; - block_ = nullptr; - BlockBuilder builder(table_options.block_restart_interval); - - for (const auto& kv : kv_map) { - // `DataBlockIter` assumes it reads only internal keys. `BlockConstructor` - // clients provide user keys, so we need to convert to internal key format - // before writing the data block. - ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue); - std::string encoded; - AppendInternalKey(&encoded, ikey); - builder.Add(encoded, kv.second); - } - // Open the block - data_ = builder.Finish().ToString(); - BlockContents contents; - contents.data = data_; - block_ = new Block(std::move(contents)); - return Status::OK(); - } - InternalIterator* NewIterator( - const SliceTransform* /*prefix_extractor*/) const override { - // `DataBlockIter` returns the internal keys it reads. - // `KeyConvertingIterator` converts them to user keys before they are - // exposed to the `BlockConstructor` clients. - return new KeyConvertingIterator( - block_->NewDataIterator(comparator_, kDisableGlobalSequenceNumber)); - } - - private: - const Comparator* comparator_; - std::string data_; - Block* block_; - - BlockConstructor(); -}; - -class TableConstructor : public Constructor { - public: - explicit TableConstructor(const Comparator* cmp, - bool convert_to_internal_key = false, - int level = -1, SequenceNumber largest_seqno = 0) - : Constructor(cmp), - largest_seqno_(largest_seqno), - convert_to_internal_key_(convert_to_internal_key), - level_(level) { - env_ = ROCKSDB_NAMESPACE::Env::Default(); - } - ~TableConstructor() override { Reset(); } - - Status FinishImpl(const Options& options, const ImmutableOptions& ioptions, - const MutableCFOptions& moptions, - const BlockBasedTableOptions& /*table_options*/, - const InternalKeyComparator& internal_comparator, - const stl_wrappers::KVMap& kv_map) override { - Reset(); - soptions.use_mmap_reads = ioptions.allow_mmap_reads; - std::unique_ptr sink(new test::StringSink()); - file_writer_.reset(new WritableFileWriter( - std::move(sink), "" /* don't care */, FileOptions())); - std::unique_ptr builder; - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - - if (largest_seqno_ != 0) { - // Pretend that it's an external file written by SstFileWriter. - int_tbl_prop_collector_factories.emplace_back( - new SstFileWriterPropertiesCollectorFactory(2 /* version */, - 0 /* global_seqno*/)); - } - - std::string column_family_name; - builder.reset(ioptions.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, internal_comparator, - &int_tbl_prop_collector_factories, - options.compression, options.compression_opts, - kUnknownColumnFamily, column_family_name, level_), - file_writer_.get())); - - for (const auto& kv : kv_map) { - if (convert_to_internal_key_) { - ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue); - std::string encoded; - AppendInternalKey(&encoded, ikey); - builder->Add(encoded, kv.second); - } else { - builder->Add(kv.first, kv.second); - } - EXPECT_OK(builder->status()); - } - Status s = builder->Finish(); - EXPECT_OK(file_writer_->Flush()); - EXPECT_TRUE(s.ok()) << s.ToString(); - - EXPECT_EQ(TEST_GetSink()->contents().size(), builder->FileSize()); - - // Open the table - file_num_ = cur_file_num_++; - - return Reopen(ioptions, moptions); - } - - InternalIterator* NewIterator( - const SliceTransform* prefix_extractor) const override { - InternalIterator* iter = table_reader_->NewIterator( - read_options_, prefix_extractor, /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized); - if (convert_to_internal_key_) { - return new KeyConvertingIterator(iter); - } else { - return iter; - } - } - - uint64_t ApproximateOffsetOf(const Slice& key) const { - if (convert_to_internal_key_) { - InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); - const Slice skey = ikey.Encode(); - return table_reader_->ApproximateOffsetOf( - skey, TableReaderCaller::kUncategorized); - } - return table_reader_->ApproximateOffsetOf( - key, TableReaderCaller::kUncategorized); - } - - virtual Status Reopen(const ImmutableOptions& ioptions, - const MutableCFOptions& moptions) { - std::unique_ptr source(new test::StringSource( - TEST_GetSink()->contents(), file_num_, ioptions.allow_mmap_reads)); - - file_reader_.reset(new RandomAccessFileReader(std::move(source), "test")); - return ioptions.table_factory->NewTableReader( - TableReaderOptions(ioptions, moptions.prefix_extractor, soptions, - *last_internal_comparator_, /*skip_filters*/ false, - /*immortal*/ false, false, level_, - &block_cache_tracer_, moptions.write_buffer_size, "", - file_num_, kNullUniqueId64x2, largest_seqno_), - std::move(file_reader_), TEST_GetSink()->contents().size(), - &table_reader_); - } - - virtual TableReader* GetTableReader() { return table_reader_.get(); } - - bool AnywayDeleteIterator() const override { - return convert_to_internal_key_; - } - - void ResetTableReader() { table_reader_.reset(); } - - bool ConvertToInternalKey() { return convert_to_internal_key_; } - - test::StringSink* TEST_GetSink() { - return static_cast(file_writer_->writable_file()); - } - - BlockCacheTracer block_cache_tracer_; - - private: - void Reset() { - file_num_ = 0; - table_reader_.reset(); - file_writer_.reset(); - file_reader_.reset(); - } - - const ReadOptions read_options_; - uint64_t file_num_; - std::unique_ptr file_writer_; - std::unique_ptr file_reader_; - std::unique_ptr table_reader_; - SequenceNumber largest_seqno_; - bool convert_to_internal_key_; - int level_; - - TableConstructor(); - - static uint64_t cur_file_num_; - EnvOptions soptions; - Env* env_; -}; -uint64_t TableConstructor::cur_file_num_ = 1; - -class MemTableConstructor : public Constructor { - public: - explicit MemTableConstructor(const Comparator* cmp, WriteBufferManager* wb) - : Constructor(cmp), - internal_comparator_(cmp), - write_buffer_manager_(wb), - table_factory_(new SkipListFactory) { - options_.memtable_factory = table_factory_; - ImmutableOptions ioptions(options_); - memtable_ = - new MemTable(internal_comparator_, ioptions, MutableCFOptions(options_), - wb, kMaxSequenceNumber, 0 /* column_family_id */); - memtable_->Ref(); - } - ~MemTableConstructor() override { delete memtable_->Unref(); } - Status FinishImpl(const Options&, const ImmutableOptions& ioptions, - const MutableCFOptions& /*moptions*/, - const BlockBasedTableOptions& /*table_options*/, - const InternalKeyComparator& /*internal_comparator*/, - const stl_wrappers::KVMap& kv_map) override { - delete memtable_->Unref(); - ImmutableOptions mem_ioptions(ioptions); - memtable_ = new MemTable(internal_comparator_, mem_ioptions, - MutableCFOptions(options_), write_buffer_manager_, - kMaxSequenceNumber, 0 /* column_family_id */); - memtable_->Ref(); - int seq = 1; - for (const auto& kv : kv_map) { - Status s = memtable_->Add(seq, kTypeValue, kv.first, kv.second, - nullptr /* kv_prot_info */); - if (!s.ok()) { - return s; - } - seq++; - } - return Status::OK(); - } - InternalIterator* NewIterator( - const SliceTransform* /*prefix_extractor*/) const override { - return new KeyConvertingIterator( - memtable_->NewIterator(ReadOptions(), &arena_), true); - } - - bool AnywayDeleteIterator() const override { return true; } - - bool IsArenaMode() const override { return true; } - - private: - mutable Arena arena_; - InternalKeyComparator internal_comparator_; - Options options_; - WriteBufferManager* write_buffer_manager_; - MemTable* memtable_; - std::shared_ptr table_factory_; -}; - -class InternalIteratorFromIterator : public InternalIterator { - public: - explicit InternalIteratorFromIterator(Iterator* it) : it_(it) {} - bool Valid() const override { return it_->Valid(); } - void Seek(const Slice& target) override { it_->Seek(target); } - void SeekForPrev(const Slice& target) override { it_->SeekForPrev(target); } - void SeekToFirst() override { it_->SeekToFirst(); } - void SeekToLast() override { it_->SeekToLast(); } - void Next() override { it_->Next(); } - void Prev() override { it_->Prev(); } - Slice key() const override { return it_->key(); } - Slice value() const override { return it_->value(); } - Status status() const override { return it_->status(); } - - private: - std::unique_ptr it_; -}; - -class DBConstructor : public Constructor { - public: - explicit DBConstructor(const Comparator* cmp) - : Constructor(cmp), comparator_(cmp) { - db_ = nullptr; - NewDB(); - } - ~DBConstructor() override { delete db_; } - Status FinishImpl(const Options& /*options*/, - const ImmutableOptions& /*ioptions*/, - const MutableCFOptions& /*moptions*/, - const BlockBasedTableOptions& /*table_options*/, - const InternalKeyComparator& /*internal_comparator*/, - const stl_wrappers::KVMap& kv_map) override { - delete db_; - db_ = nullptr; - NewDB(); - for (const auto& kv : kv_map) { - WriteBatch batch; - EXPECT_OK(batch.Put(kv.first, kv.second)); - EXPECT_TRUE(db_->Write(WriteOptions(), &batch).ok()); - } - return Status::OK(); - } - - InternalIterator* NewIterator( - const SliceTransform* /*prefix_extractor*/) const override { - return new InternalIteratorFromIterator(db_->NewIterator(ReadOptions())); - } - - DB* db() const override { return db_; } - - private: - void NewDB() { - std::string name = test::PerThreadDBPath("table_testdb"); - - Options options; - options.comparator = comparator_; - Status status = DestroyDB(name, options); - ASSERT_TRUE(status.ok()) << status.ToString(); - - options.create_if_missing = true; - options.error_if_exists = true; - options.write_buffer_size = 10000; // Something small to force merging - status = DB::Open(options, name, &db_); - ASSERT_TRUE(status.ok()) << status.ToString(); - } - - const Comparator* comparator_; - DB* db_; -}; - -enum TestType { - BLOCK_BASED_TABLE_TEST, - PLAIN_TABLE_SEMI_FIXED_PREFIX, - PLAIN_TABLE_FULL_STR_PREFIX, - PLAIN_TABLE_TOTAL_ORDER, - BLOCK_TEST, - MEMTABLE_TEST, - DB_TEST -}; - -struct TestArgs { - TestType type; - bool reverse_compare; - int restart_interval; - CompressionType compression; - uint32_t compression_parallel_threads; - uint32_t format_version; - bool use_mmap; -}; - -std::ostream& operator<<(std::ostream& os, const TestArgs& args) { - os << "type: " << args.type << " reverse_compare: " << args.reverse_compare - << " restart_interval: " << args.restart_interval - << " compression: " << args.compression - << " compression_parallel_threads: " << args.compression_parallel_threads - << " format_version: " << args.format_version - << " use_mmap: " << args.use_mmap; - - return os; -} - -static std::vector GenerateArgList() { - std::vector test_args; - std::vector test_types = {BLOCK_BASED_TABLE_TEST, - PLAIN_TABLE_SEMI_FIXED_PREFIX, - PLAIN_TABLE_FULL_STR_PREFIX, - PLAIN_TABLE_TOTAL_ORDER, - BLOCK_TEST, - MEMTABLE_TEST, - DB_TEST}; - std::vector reverse_compare_types = {false, true}; - std::vector restart_intervals = {16, 1, 1024}; - std::vector compression_parallel_threads = {1, 4}; - - // Only add compression if it is supported - std::vector> compression_types; - compression_types.emplace_back(kNoCompression, false); - if (Snappy_Supported()) { - compression_types.emplace_back(kSnappyCompression, false); - } - if (Zlib_Supported()) { - compression_types.emplace_back(kZlibCompression, false); - compression_types.emplace_back(kZlibCompression, true); - } - if (BZip2_Supported()) { - compression_types.emplace_back(kBZip2Compression, false); - compression_types.emplace_back(kBZip2Compression, true); - } - if (LZ4_Supported()) { - compression_types.emplace_back(kLZ4Compression, false); - compression_types.emplace_back(kLZ4Compression, true); - compression_types.emplace_back(kLZ4HCCompression, false); - compression_types.emplace_back(kLZ4HCCompression, true); - } - if (XPRESS_Supported()) { - compression_types.emplace_back(kXpressCompression, false); - compression_types.emplace_back(kXpressCompression, true); - } - if (ZSTD_Supported()) { - compression_types.emplace_back(kZSTD, false); - compression_types.emplace_back(kZSTD, true); - } - - for (auto test_type : test_types) { - for (auto reverse_compare : reverse_compare_types) { - if (test_type == PLAIN_TABLE_SEMI_FIXED_PREFIX || - test_type == PLAIN_TABLE_FULL_STR_PREFIX || - test_type == PLAIN_TABLE_TOTAL_ORDER) { - // Plain table doesn't use restart index or compression. - TestArgs one_arg; - one_arg.type = test_type; - one_arg.reverse_compare = reverse_compare; - one_arg.restart_interval = restart_intervals[0]; - one_arg.compression = compression_types[0].first; - one_arg.compression_parallel_threads = 1; - one_arg.format_version = 0; - one_arg.use_mmap = true; - test_args.push_back(one_arg); - one_arg.use_mmap = false; - test_args.push_back(one_arg); - continue; - } - - for (auto restart_interval : restart_intervals) { - for (auto compression_type : compression_types) { - for (auto num_threads : compression_parallel_threads) { - TestArgs one_arg; - one_arg.type = test_type; - one_arg.reverse_compare = reverse_compare; - one_arg.restart_interval = restart_interval; - one_arg.compression = compression_type.first; - one_arg.compression_parallel_threads = num_threads; - one_arg.format_version = compression_type.second ? 2 : 1; - one_arg.use_mmap = false; - test_args.push_back(one_arg); - } - } - } - } - } - return test_args; -} - -// In order to make all tests run for plain table format, including -// those operating on empty keys, create a new prefix transformer which -// return fixed prefix if the slice is not shorter than the prefix length, -// and the full slice if it is shorter. -class FixedOrLessPrefixTransform : public SliceTransform { - private: - const size_t prefix_len_; - - public: - explicit FixedOrLessPrefixTransform(size_t prefix_len) - : prefix_len_(prefix_len) {} - - const char* Name() const override { return "rocksdb.FixedPrefix"; } - - Slice Transform(const Slice& src) const override { - assert(InDomain(src)); - if (src.size() < prefix_len_) { - return src; - } - return Slice(src.data(), prefix_len_); - } - - bool InDomain(const Slice& /*src*/) const override { return true; } - - bool InRange(const Slice& dst) const override { - return (dst.size() <= prefix_len_); - } - bool FullLengthEnabled(size_t* /*len*/) const override { return false; } -}; - -class HarnessTest : public testing::Test { - public: - explicit HarnessTest(const TestArgs& args) - : args_(args), - ioptions_(options_), - moptions_(options_), - write_buffer_(options_.db_write_buffer_size), - support_prev_(true), - only_support_prefix_seek_(false) { - options_.compression = args_.compression; - options_.compression_opts.parallel_threads = - args_.compression_parallel_threads; - // Use shorter block size for tests to exercise block boundary - // conditions more. - if (args_.reverse_compare) { - options_.comparator = &reverse_key_comparator; - } - - internal_comparator_.reset( - new test::PlainInternalKeyComparator(options_.comparator)); - - options_.allow_mmap_reads = args_.use_mmap; - switch (args_.type) { - case BLOCK_BASED_TABLE_TEST: - table_options_.flush_block_policy_factory.reset( - new FlushBlockBySizePolicyFactory()); - table_options_.block_size = 256; - table_options_.block_restart_interval = args_.restart_interval; - table_options_.index_block_restart_interval = args_.restart_interval; - table_options_.format_version = args_.format_version; - options_.table_factory.reset( - new BlockBasedTableFactory(table_options_)); - constructor_.reset(new TableConstructor( - options_.comparator, true /* convert_to_internal_key_ */)); - internal_comparator_.reset( - new InternalKeyComparator(options_.comparator)); - break; - - case PLAIN_TABLE_SEMI_FIXED_PREFIX: - support_prev_ = false; - only_support_prefix_seek_ = true; - options_.prefix_extractor.reset(new FixedOrLessPrefixTransform(2)); - options_.table_factory.reset(NewPlainTableFactory()); - constructor_.reset(new TableConstructor( - options_.comparator, true /* convert_to_internal_key_ */)); - internal_comparator_.reset( - new InternalKeyComparator(options_.comparator)); - break; - case PLAIN_TABLE_FULL_STR_PREFIX: - support_prev_ = false; - only_support_prefix_seek_ = true; - options_.prefix_extractor.reset(NewNoopTransform()); - options_.table_factory.reset(NewPlainTableFactory()); - constructor_.reset(new TableConstructor( - options_.comparator, true /* convert_to_internal_key_ */)); - internal_comparator_.reset( - new InternalKeyComparator(options_.comparator)); - break; - case PLAIN_TABLE_TOTAL_ORDER: - support_prev_ = false; - only_support_prefix_seek_ = false; - options_.prefix_extractor = nullptr; - - { - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = kPlainTableVariableLength; - plain_table_options.bloom_bits_per_key = 0; - plain_table_options.hash_table_ratio = 0; - - options_.table_factory.reset( - NewPlainTableFactory(plain_table_options)); - } - constructor_.reset(new TableConstructor( - options_.comparator, true /* convert_to_internal_key_ */)); - internal_comparator_.reset( - new InternalKeyComparator(options_.comparator)); - break; - case BLOCK_TEST: - table_options_.block_size = 256; - options_.table_factory.reset( - new BlockBasedTableFactory(table_options_)); - constructor_.reset(new BlockConstructor(options_.comparator)); - break; - case MEMTABLE_TEST: - table_options_.block_size = 256; - options_.table_factory.reset( - new BlockBasedTableFactory(table_options_)); - constructor_.reset( - new MemTableConstructor(options_.comparator, &write_buffer_)); - break; - case DB_TEST: - table_options_.block_size = 256; - options_.table_factory.reset( - new BlockBasedTableFactory(table_options_)); - constructor_.reset(new DBConstructor(options_.comparator)); - break; - } - ioptions_ = ImmutableOptions(options_); - moptions_ = MutableCFOptions(options_); - } - - void Add(const std::string& key, const std::string& value) { - constructor_->Add(key, value); - } - - void Test(Random* rnd) { - std::vector keys; - stl_wrappers::KVMap data; - constructor_->Finish(options_, ioptions_, moptions_, table_options_, - *internal_comparator_, &keys, &data); - - TestForwardScan(keys, data); - if (support_prev_) { - TestBackwardScan(keys, data); - } - TestRandomAccess(rnd, keys, data); - } - - void TestForwardScan(const std::vector& /*keys*/, - const stl_wrappers::KVMap& data) { - InternalIterator* iter = constructor_->NewIterator(); - ASSERT_TRUE(!iter->Valid()); - iter->SeekToFirst(); - ASSERT_OK(iter->status()); - for (stl_wrappers::KVMap::const_iterator model_iter = data.begin(); - model_iter != data.end(); ++model_iter) { - ASSERT_EQ(ToString(data, model_iter), ToString(iter)); - iter->Next(); - ASSERT_OK(iter->status()); - } - ASSERT_TRUE(!iter->Valid()); - ASSERT_OK(iter->status()); - if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) { - iter->~InternalIterator(); - } else { - delete iter; - } - } - - void TestBackwardScan(const std::vector& /*keys*/, - const stl_wrappers::KVMap& data) { - InternalIterator* iter = constructor_->NewIterator(); - ASSERT_TRUE(!iter->Valid()); - iter->SeekToLast(); - ASSERT_OK(iter->status()); - for (stl_wrappers::KVMap::const_reverse_iterator model_iter = data.rbegin(); - model_iter != data.rend(); ++model_iter) { - ASSERT_EQ(ToString(data, model_iter), ToString(iter)); - iter->Prev(); - ASSERT_OK(iter->status()); - } - ASSERT_TRUE(!iter->Valid()); - ASSERT_OK(iter->status()); - if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) { - iter->~InternalIterator(); - } else { - delete iter; - } - } - - void TestRandomAccess(Random* rnd, const std::vector& keys, - const stl_wrappers::KVMap& data) { - static const bool kVerbose = false; - InternalIterator* iter = constructor_->NewIterator(); - ASSERT_TRUE(!iter->Valid()); - stl_wrappers::KVMap::const_iterator model_iter = data.begin(); - if (kVerbose) fprintf(stderr, "---\n"); - for (int i = 0; i < 200; i++) { - const int toss = rnd->Uniform(support_prev_ ? 5 : 3); - switch (toss) { - case 0: { - if (iter->Valid()) { - if (kVerbose) fprintf(stderr, "Next\n"); - iter->Next(); - ASSERT_OK(iter->status()); - ++model_iter; - ASSERT_EQ(ToString(data, model_iter), ToString(iter)); - } - break; - } - - case 1: { - if (kVerbose) fprintf(stderr, "SeekToFirst\n"); - iter->SeekToFirst(); - ASSERT_OK(iter->status()); - model_iter = data.begin(); - ASSERT_EQ(ToString(data, model_iter), ToString(iter)); - break; - } - - case 2: { - std::string key = PickRandomKey(rnd, keys); - model_iter = data.lower_bound(key); - if (kVerbose) - fprintf(stderr, "Seek '%s'\n", EscapeString(key).c_str()); - iter->Seek(Slice(key)); - ASSERT_OK(iter->status()); - ASSERT_EQ(ToString(data, model_iter), ToString(iter)); - break; - } - - case 3: { - if (iter->Valid()) { - if (kVerbose) fprintf(stderr, "Prev\n"); - iter->Prev(); - ASSERT_OK(iter->status()); - if (model_iter == data.begin()) { - model_iter = data.end(); // Wrap around to invalid value - } else { - --model_iter; - } - ASSERT_EQ(ToString(data, model_iter), ToString(iter)); - } - break; - } - - case 4: { - if (kVerbose) fprintf(stderr, "SeekToLast\n"); - iter->SeekToLast(); - ASSERT_OK(iter->status()); - if (keys.empty()) { - model_iter = data.end(); - } else { - std::string last = data.rbegin()->first; - model_iter = data.lower_bound(last); - } - ASSERT_EQ(ToString(data, model_iter), ToString(iter)); - break; - } - } - } - if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) { - iter->~InternalIterator(); - } else { - delete iter; - } - } - - std::string ToString(const stl_wrappers::KVMap& data, - const stl_wrappers::KVMap::const_iterator& it) { - if (it == data.end()) { - return "END"; - } else { - return "'" + it->first + "->" + it->second + "'"; - } - } - - std::string ToString(const stl_wrappers::KVMap& data, - const stl_wrappers::KVMap::const_reverse_iterator& it) { - if (it == data.rend()) { - return "END"; - } else { - return "'" + it->first + "->" + it->second + "'"; - } - } - - std::string ToString(const InternalIterator* it) { - if (!it->Valid()) { - return "END"; - } else { - return "'" + it->key().ToString() + "->" + it->value().ToString() + "'"; - } - } - - std::string PickRandomKey(Random* rnd, const std::vector& keys) { - if (keys.empty()) { - return "foo"; - } else { - const int index = rnd->Uniform(static_cast(keys.size())); - std::string result = keys[index]; - switch (rnd->Uniform(support_prev_ ? 3 : 1)) { - case 0: - // Return an existing key - break; - case 1: { - // Attempt to return something smaller than an existing key - if (result.size() > 0 && result[result.size() - 1] > '\0' && - (!only_support_prefix_seek_ || - options_.prefix_extractor->Transform(result).size() < - result.size())) { - result[result.size() - 1]--; - } - break; - } - case 2: { - // Return something larger than an existing key - Increment(options_.comparator, &result); - break; - } - } - return result; - } - } - - // Returns nullptr if not running against a DB - DB* db() const { return constructor_->db(); } - - private: - TestArgs args_; - Options options_; - ImmutableOptions ioptions_; - MutableCFOptions moptions_; - BlockBasedTableOptions table_options_; - std::unique_ptr constructor_; - WriteBufferManager write_buffer_; - bool support_prev_; - bool only_support_prefix_seek_; - std::shared_ptr internal_comparator_; -}; - -class ParameterizedHarnessTest : public HarnessTest, - public testing::WithParamInterface { - public: - ParameterizedHarnessTest() : HarnessTest(GetParam()) {} -}; - -INSTANTIATE_TEST_CASE_P(TableTest, ParameterizedHarnessTest, - ::testing::ValuesIn(GenerateArgList())); - -class DBHarnessTest : public HarnessTest { - public: - DBHarnessTest() - : HarnessTest(TestArgs{DB_TEST, /* reverse_compare */ false, - /* restart_interval */ 16, kNoCompression, - /* compression_parallel_threads */ 1, - /* format_version */ 0, /* use_mmap */ false}) {} -}; - -static bool Between(uint64_t val, uint64_t low, uint64_t high) { - bool result = (val >= low) && (val <= high); - if (!result) { - fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", - (unsigned long long)(val), (unsigned long long)(low), - (unsigned long long)(high)); - } - return result; -} - -// Tests against all kinds of tables -class TableTest : public testing::Test { - public: - const InternalKeyComparator& GetPlainInternalComparator( - const Comparator* comp) { - if (!plain_internal_comparator) { - plain_internal_comparator.reset( - new test::PlainInternalKeyComparator(comp)); - } - return *plain_internal_comparator; - } - void IndexTest(BlockBasedTableOptions table_options); - - private: - std::unique_ptr plain_internal_comparator; -}; - -class GeneralTableTest : public TableTest {}; -class BlockBasedTableTestBase : public TableTest {}; -class BlockBasedTableTest - : public BlockBasedTableTestBase, - virtual public ::testing::WithParamInterface { - public: - BlockBasedTableTest() : format_(GetParam()) { - env_ = ROCKSDB_NAMESPACE::Env::Default(); - } - - BlockBasedTableOptions GetBlockBasedTableOptions() { - BlockBasedTableOptions options; - options.format_version = format_; - return options; - } - - void SetupTracingTest(TableConstructor* c) { - test_path_ = test::PerThreadDBPath("block_based_table_tracing_test"); - EXPECT_OK(env_->CreateDir(test_path_)); - trace_file_path_ = test_path_ + "/block_cache_trace_file"; - - BlockCacheTraceWriterOptions trace_writer_opt; - BlockCacheTraceOptions trace_opt; - std::unique_ptr trace_writer; - EXPECT_OK(NewFileTraceWriter(env_, EnvOptions(), trace_file_path_, - &trace_writer)); - std::unique_ptr block_cache_trace_writer = - NewBlockCacheTraceWriter(env_->GetSystemClock().get(), trace_writer_opt, - std::move(trace_writer)); - ASSERT_NE(block_cache_trace_writer, nullptr); - // Always return Status::OK(). - assert(c->block_cache_tracer_ - .StartTrace(trace_opt, std::move(block_cache_trace_writer)) - .ok()); - - { - std::string user_key = "k01"; - InternalKey internal_key(user_key, 0, kTypeValue); - std::string encoded_key = internal_key.Encode().ToString(); - c->Add(encoded_key, kDummyValue); - } - { - std::string user_key = "k02"; - InternalKey internal_key(user_key, 0, kTypeValue); - std::string encoded_key = internal_key.Encode().ToString(); - c->Add(encoded_key, kDummyValue); - } - } - - void VerifyBlockAccessTrace( - TableConstructor* c, - const std::vector& expected_records) { - c->block_cache_tracer_.EndTrace(); - - { - std::unique_ptr trace_reader; - Status s = NewFileTraceReader(env_, EnvOptions(), trace_file_path_, - &trace_reader); - EXPECT_OK(s); - BlockCacheTraceReader reader(std::move(trace_reader)); - BlockCacheTraceHeader header; - EXPECT_OK(reader.ReadHeader(&header)); - uint32_t index = 0; - while (s.ok()) { - BlockCacheTraceRecord access; - s = reader.ReadAccess(&access); - if (!s.ok()) { - break; - } - ASSERT_LT(index, expected_records.size()); - EXPECT_NE("", access.block_key); - EXPECT_EQ(access.block_type, expected_records[index].block_type); - EXPECT_GT(access.block_size, 0); - EXPECT_EQ(access.caller, expected_records[index].caller); - EXPECT_EQ(access.no_insert, expected_records[index].no_insert); - EXPECT_EQ(access.is_cache_hit, expected_records[index].is_cache_hit); - // Get - if (access.caller == TableReaderCaller::kUserGet) { - EXPECT_EQ(access.referenced_key, - expected_records[index].referenced_key); - EXPECT_EQ(access.get_id, expected_records[index].get_id); - EXPECT_EQ(access.get_from_user_specified_snapshot, - expected_records[index].get_from_user_specified_snapshot); - if (access.block_type == TraceType::kBlockTraceDataBlock) { - EXPECT_GT(access.referenced_data_size, 0); - EXPECT_GT(access.num_keys_in_block, 0); - EXPECT_EQ(access.referenced_key_exist_in_block, - expected_records[index].referenced_key_exist_in_block); - } - } else { - EXPECT_EQ(access.referenced_key, ""); - EXPECT_EQ(access.get_id, 0); - EXPECT_FALSE(access.get_from_user_specified_snapshot); - EXPECT_EQ(access.referenced_data_size, 0); - EXPECT_EQ(access.num_keys_in_block, 0); - EXPECT_FALSE(access.referenced_key_exist_in_block); - } - index++; - } - EXPECT_EQ(index, expected_records.size()); - } - EXPECT_OK(env_->DeleteFile(trace_file_path_)); - EXPECT_OK(env_->DeleteDir(test_path_)); - } - - protected: - uint64_t IndexUncompressedHelper(bool indexCompress); - - private: - uint32_t format_; - Env* env_; - std::string trace_file_path_; - std::string test_path_; -}; -class PlainTableTest : public TableTest {}; -class TablePropertyTest : public testing::Test {}; -class BBTTailPrefetchTest : public TableTest {}; - -// The helper class to test the file checksum -class FileChecksumTestHelper { - public: - FileChecksumTestHelper(bool convert_to_internal_key = false) - : convert_to_internal_key_(convert_to_internal_key) {} - ~FileChecksumTestHelper() {} - - void CreateWritableFile() { - sink_ = new test::StringSink(); - std::unique_ptr holder(sink_); - file_writer_.reset(new WritableFileWriter( - std::move(holder), "" /* don't care */, FileOptions())); - } - - void SetFileChecksumGenerator(FileChecksumGenerator* checksum_generator) { - if (file_writer_ != nullptr) { - file_writer_->TEST_SetFileChecksumGenerator(checksum_generator); - } else { - delete checksum_generator; - } - } - - WritableFileWriter* GetFileWriter() { return file_writer_.get(); } - - Status ResetTableBuilder(std::unique_ptr&& builder) { - assert(builder != nullptr); - table_builder_ = std::move(builder); - return Status::OK(); - } - - void AddKVtoKVMap(int num_entries) { - Random rnd(test::RandomSeed()); - for (int i = 0; i < num_entries; i++) { - std::string v = rnd.RandomString(100); - kv_map_[test::RandomKey(&rnd, 20)] = v; - } - } - - Status WriteKVAndFlushTable() { - for (const auto& kv : kv_map_) { - if (convert_to_internal_key_) { - ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue); - std::string encoded; - AppendInternalKey(&encoded, ikey); - table_builder_->Add(encoded, kv.second); - } else { - table_builder_->Add(kv.first, kv.second); - } - EXPECT_TRUE(table_builder_->status().ok()); - } - Status s = table_builder_->Finish(); - EXPECT_OK(file_writer_->Flush()); - EXPECT_OK(s); - - EXPECT_EQ(sink_->contents().size(), table_builder_->FileSize()); - return s; - } - - std::string GetFileChecksum() { - EXPECT_OK(file_writer_->Close()); - return table_builder_->GetFileChecksum(); - } - - const char* GetFileChecksumFuncName() { - return table_builder_->GetFileChecksumFuncName(); - } - - Status CalculateFileChecksum(FileChecksumGenerator* file_checksum_generator, - std::string* checksum) { - assert(file_checksum_generator != nullptr); - cur_file_num_ = checksum_file_num_++; - test::StringSink* ss_rw = - static_cast(file_writer_->writable_file()); - std::unique_ptr source( - new test::StringSource(ss_rw->contents())); - file_reader_.reset(new RandomAccessFileReader(std::move(source), "test")); - - std::unique_ptr scratch(new char[2048]); - Slice result; - uint64_t offset = 0; - Status s; - s = file_reader_->Read(IOOptions(), offset, 2048, &result, scratch.get(), - nullptr, Env::IO_TOTAL /* rate_limiter_priority */); - if (!s.ok()) { - return s; - } - while (result.size() != 0) { - file_checksum_generator->Update(scratch.get(), result.size()); - offset += static_cast(result.size()); - s = file_reader_->Read(IOOptions(), offset, 2048, &result, scratch.get(), - nullptr, - Env::IO_TOTAL /* rate_limiter_priority */); - if (!s.ok()) { - return s; - } - } - EXPECT_EQ(offset, static_cast(table_builder_->FileSize())); - file_checksum_generator->Finalize(); - *checksum = file_checksum_generator->GetChecksum(); - return Status::OK(); - } - - private: - bool convert_to_internal_key_; - uint64_t cur_file_num_; - std::unique_ptr file_writer_; - std::unique_ptr file_reader_; - std::unique_ptr table_builder_; - stl_wrappers::KVMap kv_map_; - test::StringSink* sink_ = nullptr; - - static uint64_t checksum_file_num_; -}; - -uint64_t FileChecksumTestHelper::checksum_file_num_ = 1; - -INSTANTIATE_TEST_CASE_P(FormatVersions, BlockBasedTableTest, - testing::ValuesIn(test::kFooterFormatVersionsToTest)); - -// This test serves as the living tutorial for the prefix scan of user collected -// properties. -TEST_F(TablePropertyTest, PrefixScanTest) { - UserCollectedProperties props{ - {"num.111.1", "1"}, {"num.111.2", "2"}, {"num.111.3", "3"}, - {"num.333.1", "1"}, {"num.333.2", "2"}, {"num.333.3", "3"}, - {"num.555.1", "1"}, {"num.555.2", "2"}, {"num.555.3", "3"}, - }; - - // prefixes that exist - for (const std::string prefix : {"num.111", "num.333", "num.555"}) { - int num = 0; - for (auto pos = props.lower_bound(prefix); - pos != props.end() && - pos->first.compare(0, prefix.size(), prefix) == 0; - ++pos) { - ++num; - auto key = prefix + "." + std::to_string(num); - ASSERT_EQ(key, pos->first); - ASSERT_EQ(std::to_string(num), pos->second); - } - ASSERT_EQ(3, num); - } - - // prefixes that don't exist - for (const std::string prefix : - {"num.000", "num.222", "num.444", "num.666"}) { - auto pos = props.lower_bound(prefix); - ASSERT_TRUE(pos == props.end() || - pos->first.compare(0, prefix.size(), prefix) != 0); - } -} - -namespace { -struct TestIds { - UniqueId64x3 internal_id; - UniqueId64x3 external_id; -}; - -inline bool operator==(const TestIds& lhs, const TestIds& rhs) { - return lhs.internal_id == rhs.internal_id && - lhs.external_id == rhs.external_id; -} - -std::ostream& operator<<(std::ostream& os, const TestIds& ids) { - return os << std::hex << "{{{ 0x" << ids.internal_id[0] << "U, 0x" - << ids.internal_id[1] << "U, 0x" << ids.internal_id[2] - << "U }}, {{ 0x" << ids.external_id[0] << "U, 0x" - << ids.external_id[1] << "U, 0x" << ids.external_id[2] << "U }}}"; -} - -TestIds GetUniqueId(TableProperties* tp, std::unordered_set* seen, - const std::string& db_id, const std::string& db_session_id, - uint64_t file_number) { - // First test session id logic - if (db_session_id.size() == 20) { - uint64_t upper; - uint64_t lower; - EXPECT_OK(DecodeSessionId(db_session_id, &upper, &lower)); - EXPECT_EQ(EncodeSessionId(upper, lower), db_session_id); - } - - // Get external using public API - tp->db_id = db_id; - tp->db_session_id = db_session_id; - tp->orig_file_number = file_number; - TestIds t; - { - std::string euid; - EXPECT_OK(GetExtendedUniqueIdFromTableProperties(*tp, &euid)); - EXPECT_EQ(euid.size(), 24U); - t.external_id[0] = DecodeFixed64(&euid[0]); - t.external_id[1] = DecodeFixed64(&euid[8]); - t.external_id[2] = DecodeFixed64(&euid[16]); - - std::string uid; - EXPECT_OK(GetUniqueIdFromTableProperties(*tp, &uid)); - EXPECT_EQ(uid.size(), 16U); - EXPECT_EQ(uid, euid.substr(0, 16)); - EXPECT_EQ(t.external_id[0], DecodeFixed64(&uid[0])); - EXPECT_EQ(t.external_id[1], DecodeFixed64(&uid[8])); - } - // All these should be effectively random - EXPECT_TRUE(seen->insert(t.external_id[0]).second); - EXPECT_TRUE(seen->insert(t.external_id[1]).second); - EXPECT_TRUE(seen->insert(t.external_id[2]).second); - - // Get internal with internal API - EXPECT_OK(GetSstInternalUniqueId(db_id, db_session_id, file_number, - &t.internal_id)); - EXPECT_NE(t.internal_id, kNullUniqueId64x3); - - // Verify relationship - UniqueId64x3 tmp = t.internal_id; - InternalUniqueIdToExternal(&tmp); - EXPECT_EQ(tmp, t.external_id); - ExternalUniqueIdToInternal(&tmp); - EXPECT_EQ(tmp, t.internal_id); - - // And 128-bit internal version - UniqueId64x2 tmp2{}; - EXPECT_OK(GetSstInternalUniqueId(db_id, db_session_id, file_number, &tmp2)); - EXPECT_NE(tmp2, kNullUniqueId64x2); - - EXPECT_EQ(tmp2[0], t.internal_id[0]); - EXPECT_EQ(tmp2[1], t.internal_id[1]); - InternalUniqueIdToExternal(&tmp2); - EXPECT_EQ(tmp2[0], t.external_id[0]); - EXPECT_EQ(tmp2[1], t.external_id[1]); - ExternalUniqueIdToInternal(&tmp2); - EXPECT_EQ(tmp2[0], t.internal_id[0]); - EXPECT_EQ(tmp2[1], t.internal_id[1]); - - return t; -} -} // namespace - -TEST_F(TablePropertyTest, UniqueIdsSchemaAndQuality) { - // To ensure the computation only depends on the expected entries, we set - // the rest randomly - TableProperties tp; - TEST_SetRandomTableProperties(&tp); - - // DB id is normally RFC-4122 - const std::string db_id1 = "7265b6eb-4e42-4aec-86a4-0dc5e73a228d"; - // Allow other forms of DB id - const std::string db_id2 = "1728000184588763620"; - const std::string db_id3 = "x"; - - // DB session id is normally 20 chars in base-36, but 13 to 24 chars - // is ok, roughly 64 to 128 bits. - const std::string ses_id1 = "ABCDEFGHIJ0123456789"; - // Same trailing 13 digits - const std::string ses_id2 = "HIJ0123456789"; - const std::string ses_id3 = "0123ABCDEFGHIJ0123456789"; - // Different trailing 12 digits - const std::string ses_id4 = "ABCDEFGH888888888888"; - // And change length - const std::string ses_id5 = "ABCDEFGHIJ012"; - const std::string ses_id6 = "ABCDEFGHIJ0123456789ABCD"; - - using T = TestIds; - std::unordered_set seen; - // Establish a stable schema for the unique IDs. These values must not - // change for existing table files. - // (Note: parens needed for macro parsing, extra braces needed for some - // compilers.) - EXPECT_EQ( - GetUniqueId(&tp, &seen, db_id1, ses_id1, 1), - T({{{0x61d7dcf415d9cf19U, 0x160d77aae90757fdU, 0x907f41dfd90724ffU}}, - {{0xf0bd230365df7464U, 0xca089303f3648eb4U, 0x4b44f7e7324b2817U}}})); - // Only change internal_id[1] with file number - EXPECT_EQ( - GetUniqueId(&tp, &seen, db_id1, ses_id1, 2), - T({{{0x61d7dcf415d9cf19U, 0x160d77aae90757feU, 0x907f41dfd90724ffU}}, - {{0xf13fdf7adcfebb6dU, 0x97cd2226cc033ea2U, 0x198c438182091f0eU}}})); - EXPECT_EQ( - GetUniqueId(&tp, &seen, db_id1, ses_id1, 123456789), - T({{{0x61d7dcf415d9cf19U, 0x160d77aaee5c9ae9U, 0x907f41dfd90724ffU}}, - {{0x81fbcebe1ac6c4f0U, 0x6b14a64cfdc0f1c4U, 0x7d8fb6eaf18edbb3U}}})); - // Change internal_id[1] and internal_id[2] with db_id - EXPECT_EQ( - GetUniqueId(&tp, &seen, db_id2, ses_id1, 1), - T({{{0x61d7dcf415d9cf19U, 0xf89c471f572f0d25U, 0x1f0f2a5eb0e6257eU}}, - {{0x7f1d01d453616991U, 0x32ddf2afec804ab2U, 0xd10a1ee2f0c7d9c1U}}})); - EXPECT_EQ( - GetUniqueId(&tp, &seen, db_id3, ses_id1, 1), - T({{{0x61d7dcf415d9cf19U, 0xfed297a8154a57d0U, 0x8b931b9cdebd9e8U}}, - {{0x62b2f43183f6894bU, 0x897ff2b460eefad1U, 0xf4ec189fb2d15e04U}}})); - // Keeping same last 13 digits of ses_id keeps same internal_id[0] - EXPECT_EQ( - GetUniqueId(&tp, &seen, db_id1, ses_id2, 1), - T({{{0x61d7dcf415d9cf19U, 0x5f6cc4fa2d528c8U, 0x7b70845d5bfb5446U}}, - {{0x96d1c83ffcc94266U, 0x82663eac0ec6e14aU, 0x94a88b49678b77f6U}}})); - EXPECT_EQ( - GetUniqueId(&tp, &seen, db_id1, ses_id3, 1), - T({{{0x61d7dcf415d9cf19U, 0xfc7232879db37ea2U, 0xc0378d74ea4c89cdU}}, - {{0xdf2ef57e98776905U, 0xda5b31c987da833bU, 0x79c1b4bd0a9e760dU}}})); - // Changing last 12 digits of ses_id only changes internal_id[0] - // (vs. db_id1, ses_id1, 1) - EXPECT_EQ( - GetUniqueId(&tp, &seen, db_id1, ses_id4, 1), - T({{{0x4f07cc0d003a83a8U, 0x160d77aae90757fdU, 0x907f41dfd90724ffU}}, - {{0xbcf85336a9f71f04U, 0x4f2949e2f3adb60dU, 0x9ca0def976abfa10U}}})); - // ses_id can change everything. - EXPECT_EQ( - GetUniqueId(&tp, &seen, db_id1, ses_id5, 1), - T({{{0x94b8768e43f87ce6U, 0xc2559653ac4e7c93U, 0xde6dff6bbb1223U}}, - {{0x5a9537af681817fbU, 0x1afcd1fecaead5eaU, 0x767077ad9ebe0008U}}})); - EXPECT_EQ( - GetUniqueId(&tp, &seen, db_id1, ses_id6, 1), - T({{{0x43cfb0ffa3b710edU, 0x263c580426406a1bU, 0xfacc91379a80d29dU}}, - {{0xfa90547d84cb1cdbU, 0x2afe99c641992d4aU, 0x205b7f7b60e51cc2U}}})); - - // Now verify more thoroughly that any small change in inputs completely - // changes external unique id. - // (Relying on 'seen' checks etc. in GetUniqueId) - std::string db_id = "00000000-0000-0000-0000-000000000000"; - std::string ses_id = "000000000000000000000000"; - uint64_t file_num = 1; - // change db_id - for (size_t i = 0; i < db_id.size(); ++i) { - if (db_id[i] == '-') { - continue; - } - for (char alt : std::string("123456789abcdef")) { - db_id[i] = alt; - GetUniqueId(&tp, &seen, db_id, ses_id, file_num); - } - db_id[i] = '0'; - } - // change ses_id - for (size_t i = 0; i < ses_id.size(); ++i) { - for (char alt : std::string("123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")) { - ses_id[i] = alt; - GetUniqueId(&tp, &seen, db_id, ses_id, file_num); - } - ses_id[i] = '0'; - } - // change file_num - for (int i = 1; i < 64; ++i) { - GetUniqueId(&tp, &seen, db_id, ses_id, file_num << i); - } - - // Verify that "all zeros" in first 128 bits is equivalent for internal and - // external IDs. This way, as long as we avoid "all zeros" in internal IDs, - // we avoid it in external IDs. - { - UniqueId64x3 id1{{0, 0, Random::GetTLSInstance()->Next64()}}; - UniqueId64x3 id2 = id1; - InternalUniqueIdToExternal(&id1); - EXPECT_EQ(id1, id2); - ExternalUniqueIdToInternal(&id2); - EXPECT_EQ(id1, id2); - } -} - -namespace { -void SetGoodTableProperties(TableProperties* tp) { - // To ensure the computation only depends on the expected entries, we set - // the rest randomly - TEST_SetRandomTableProperties(tp); - tp->db_id = "7265b6eb-4e42-4aec-86a4-0dc5e73a228d"; - tp->db_session_id = "ABCDEFGHIJ0123456789"; - tp->orig_file_number = 1; -} -} // namespace - -TEST_F(TablePropertyTest, UniqueIdHumanStrings) { - TableProperties tp; - SetGoodTableProperties(&tp); - - std::string tmp; - EXPECT_OK(GetExtendedUniqueIdFromTableProperties(tp, &tmp)); - EXPECT_EQ(tmp, - (std::string{{'\x64', '\x74', '\xdf', '\x65', '\x03', '\x23', - '\xbd', '\xf0', '\xb4', '\x8e', '\x64', '\xf3', - '\x03', '\x93', '\x08', '\xca', '\x17', '\x28', - '\x4b', '\x32', '\xe7', '\xf7', '\x44', '\x4b'}})); - EXPECT_EQ(UniqueIdToHumanString(tmp), - "6474DF650323BDF0-B48E64F3039308CA-17284B32E7F7444B"); - - EXPECT_OK(GetUniqueIdFromTableProperties(tp, &tmp)); - EXPECT_EQ(UniqueIdToHumanString(tmp), "6474DF650323BDF0-B48E64F3039308CA"); - - // including zero padding - tmp = std::string(24U, '\0'); - tmp[15] = '\x12'; - tmp[23] = '\xAB'; - EXPECT_EQ(UniqueIdToHumanString(tmp), - "0000000000000000-0000000000000012-00000000000000AB"); - - // And shortened - tmp = std::string(20U, '\0'); - tmp[5] = '\x12'; - tmp[10] = '\xAB'; - tmp[17] = '\xEF'; - EXPECT_EQ(UniqueIdToHumanString(tmp), - "0000000000120000-0000AB0000000000-00EF0000"); - - tmp.resize(16); - EXPECT_EQ(UniqueIdToHumanString(tmp), "0000000000120000-0000AB0000000000"); - - tmp.resize(11); - EXPECT_EQ(UniqueIdToHumanString(tmp), "0000000000120000-0000AB"); - - tmp.resize(6); - EXPECT_EQ(UniqueIdToHumanString(tmp), "000000000012"); - - // Also internal IDs to human string - UniqueId64x3 euid = {12345, 678, 9}; - EXPECT_EQ(InternalUniqueIdToHumanString(&euid), "{12345,678,9}"); - - UniqueId64x2 uid = {1234, 567890}; - EXPECT_EQ(InternalUniqueIdToHumanString(&uid), "{1234,567890}"); -} - -TEST_F(TablePropertyTest, UniqueIdsFailure) { - TableProperties tp; - std::string tmp; - - // Missing DB id - SetGoodTableProperties(&tp); - tp.db_id = ""; - EXPECT_TRUE(GetUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); - EXPECT_TRUE( - GetExtendedUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); - - // Missing session id - SetGoodTableProperties(&tp); - tp.db_session_id = ""; - EXPECT_TRUE(GetUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); - EXPECT_TRUE( - GetExtendedUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); - - // Missing file number - SetGoodTableProperties(&tp); - tp.orig_file_number = 0; - EXPECT_TRUE(GetUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); - EXPECT_TRUE( - GetExtendedUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); -} - -// This test include all the basic checks except those for index size and block -// size, which will be conducted in separated unit tests. -TEST_P(BlockBasedTableTest, BasicBlockBasedTableProperties) { - TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); - - c.Add("a1", "val1"); - c.Add("b2", "val2"); - c.Add("c3", "val3"); - c.Add("d4", "val4"); - c.Add("e5", "val5"); - c.Add("f6", "val6"); - c.Add("g7", "val7"); - c.Add("h8", "val8"); - c.Add("j9", "val9"); - uint64_t diff_internal_user_bytes = 9 * 8; // 8 is seq size, 9 k-v totally - - std::vector keys; - stl_wrappers::KVMap kvmap; - Options options; - options.compression = kNoCompression; - options.statistics = CreateDBStatistics(); - options.statistics->set_stats_level(StatsLevel::kAll); - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.block_restart_interval = 1; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - ImmutableOptions ioptions(options); - MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); - ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_NOT_COMPRESSED), 0); - - auto& props = *c.GetTableReader()->GetTableProperties(); - ASSERT_EQ(kvmap.size(), props.num_entries); - - auto raw_key_size = kvmap.size() * 2ul; - auto raw_value_size = kvmap.size() * 4ul; - - ASSERT_EQ(raw_key_size + diff_internal_user_bytes, props.raw_key_size); - ASSERT_EQ(raw_value_size, props.raw_value_size); - ASSERT_EQ(1ul, props.num_data_blocks); - ASSERT_EQ("", props.filter_policy_name); // no filter policy is used - - // Verify data size. - BlockBuilder block_builder(1); - for (const auto& item : kvmap) { - block_builder.Add(item.first, item.second); - } - Slice content = block_builder.Finish(); - ASSERT_EQ(content.size() + BlockBasedTable::kBlockTrailerSize + - diff_internal_user_bytes, - props.data_size); - c.ResetTableReader(); -} - -#ifdef SNAPPY -uint64_t BlockBasedTableTest::IndexUncompressedHelper(bool compressed) { - TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); - constexpr size_t kNumKeys = 10000; - - for (size_t k = 0; k < kNumKeys; ++k) { - c.Add("key" + std::to_string(k), "val" + std::to_string(k)); - } - - std::vector keys; - stl_wrappers::KVMap kvmap; - Options options; - options.compression = kSnappyCompression; - options.statistics = CreateDBStatistics(); - options.statistics->set_stats_level(StatsLevel::kAll); - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.block_restart_interval = 1; - table_options.enable_index_compression = compressed; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - ImmutableOptions ioptions(options); - MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); - c.ResetTableReader(); - return options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED); -} -TEST_P(BlockBasedTableTest, IndexUncompressed) { - uint64_t tbl1_compressed_cnt = IndexUncompressedHelper(true); - uint64_t tbl2_compressed_cnt = IndexUncompressedHelper(false); - // tbl1_compressed_cnt should include 1 index block - EXPECT_EQ(tbl2_compressed_cnt + 1, tbl1_compressed_cnt); -} -#endif // SNAPPY - -TEST_P(BlockBasedTableTest, BlockBasedTableProperties2) { - TableConstructor c(&reverse_key_comparator); - std::vector keys; - stl_wrappers::KVMap kvmap; - - { - Options options; - options.compression = CompressionType::kNoCompression; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); - - auto& props = *c.GetTableReader()->GetTableProperties(); - - // Default comparator - ASSERT_EQ("leveldb.BytewiseComparator", props.comparator_name); - // No merge operator - ASSERT_EQ("nullptr", props.merge_operator_name); - // No prefix extractor - ASSERT_EQ("nullptr", props.prefix_extractor_name); - // No property collectors - ASSERT_EQ("[]", props.property_collectors_names); - // No filter policy is used - ASSERT_EQ("", props.filter_policy_name); - // Compression type == that set: - ASSERT_EQ("NoCompression", props.compression_name); - c.ResetTableReader(); - } - - { - Options options; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.comparator = &reverse_key_comparator; - options.merge_operator = MergeOperators::CreateUInt64AddOperator(); - options.prefix_extractor.reset(NewNoopTransform()); - options.table_properties_collector_factories.emplace_back( - new DummyPropertiesCollectorFactory1()); - options.table_properties_collector_factories.emplace_back( - new DummyPropertiesCollectorFactory2()); - - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); - - auto& props = *c.GetTableReader()->GetTableProperties(); - - ASSERT_EQ("rocksdb.ReverseBytewiseComparator", props.comparator_name); - ASSERT_EQ("UInt64AddOperator", props.merge_operator_name); - ASSERT_EQ("rocksdb.Noop", props.prefix_extractor_name); - ASSERT_EQ( - "[DummyPropertiesCollectorFactory1,DummyPropertiesCollectorFactory2]", - props.property_collectors_names); - ASSERT_EQ("", props.filter_policy_name); // no filter policy is used - c.ResetTableReader(); - } -} - -TEST_P(BlockBasedTableTest, RangeDelBlock) { - TableConstructor c(BytewiseComparator()); - std::vector keys = {"1pika", "2chu"}; - std::vector vals = {"p", "c"}; - - std::vector expected_tombstones = { - {"1pika", "2chu", 0}, - {"2chu", "c", 1}, - {"2chu", "c", 0}, - {"c", "p", 0}, - }; - - for (int i = 0; i < 2; i++) { - RangeTombstone t(keys[i], vals[i], i); - std::pair p = t.Serialize(); - c.Add(p.first.Encode().ToString(), p.second); - } - - std::vector sorted_keys; - stl_wrappers::KVMap kvmap; - Options options; - options.compression = kNoCompression; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.block_restart_interval = 1; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - std::unique_ptr internal_cmp( - new InternalKeyComparator(options.comparator)); - c.Finish(options, ioptions, moptions, table_options, *internal_cmp, - &sorted_keys, &kvmap); - - for (int j = 0; j < 2; ++j) { - std::unique_ptr iter( - c.GetTableReader()->NewRangeTombstoneIterator(ReadOptions())); - if (j > 0) { - // For second iteration, delete the table reader object and verify the - // iterator can still access its metablock's range tombstones. - c.ResetTableReader(); - } - ASSERT_FALSE(iter->Valid()); - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - for (size_t i = 0; i < expected_tombstones.size(); i++) { - ASSERT_TRUE(iter->Valid()); - ParsedInternalKey parsed_key; - ASSERT_OK( - ParseInternalKey(iter->key(), &parsed_key, true /* log_err_key */)); - RangeTombstone t(parsed_key, iter->value()); - const auto& expected_t = expected_tombstones[i]; - ASSERT_EQ(t.start_key_, expected_t.start_key_); - ASSERT_EQ(t.end_key_, expected_t.end_key_); - ASSERT_EQ(t.seq_, expected_t.seq_); - iter->Next(); - } - ASSERT_TRUE(!iter->Valid()); - } -} - -TEST_P(BlockBasedTableTest, FilterPolicyNameProperties) { - TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); - c.Add("a1", "val1"); - std::vector keys; - stl_wrappers::KVMap kvmap; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); - Options options; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); - auto& props = *c.GetTableReader()->GetTableProperties(); - ASSERT_EQ(table_options.filter_policy->Name(), props.filter_policy_name); - c.ResetTableReader(); -} - -// -// BlockBasedTableTest::PrefetchTest -// -void AssertKeysInCache(BlockBasedTable* table_reader, - const std::vector& keys_in_cache, - const std::vector& keys_not_in_cache, - bool convert = false) { - if (convert) { - for (auto key : keys_in_cache) { - InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); - ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode())); - } - for (auto key : keys_not_in_cache) { - InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); - ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode())); - } - } else { - for (auto key : keys_in_cache) { - ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), key)); - } - for (auto key : keys_not_in_cache) { - ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), key)); - } - } -} - -void PrefetchRange(TableConstructor* c, Options* opt, - BlockBasedTableOptions* table_options, const char* key_begin, - const char* key_end, - const std::vector& keys_in_cache, - const std::vector& keys_not_in_cache, - const Status expected_status = Status::OK()) { - // reset the cache and reopen the table - table_options->block_cache = NewLRUCache(16 * 1024 * 1024, 4); - opt->table_factory.reset(NewBlockBasedTableFactory(*table_options)); - const ImmutableOptions ioptions2(*opt); - const MutableCFOptions moptions(*opt); - ASSERT_OK(c->Reopen(ioptions2, moptions)); - - // prefetch - auto* table_reader = dynamic_cast(c->GetTableReader()); - Status s; - std::unique_ptr begin, end; - std::unique_ptr i_begin, i_end; - if (key_begin != nullptr) { - if (c->ConvertToInternalKey()) { - i_begin.reset(new InternalKey(key_begin, kMaxSequenceNumber, kTypeValue)); - begin.reset(new Slice(i_begin->Encode())); - } else { - begin.reset(new Slice(key_begin)); - } - } - if (key_end != nullptr) { - if (c->ConvertToInternalKey()) { - i_end.reset(new InternalKey(key_end, kMaxSequenceNumber, kTypeValue)); - end.reset(new Slice(i_end->Encode())); - } else { - end.reset(new Slice(key_end)); - } - } - s = table_reader->Prefetch(begin.get(), end.get()); - - ASSERT_TRUE(s.code() == expected_status.code()); - - // assert our expectation in cache warmup - AssertKeysInCache(table_reader, keys_in_cache, keys_not_in_cache, - c->ConvertToInternalKey()); - c->ResetTableReader(); -} - -TEST_P(BlockBasedTableTest, PrefetchTest) { - // The purpose of this test is to test the prefetching operation built into - // BlockBasedTable. - Options opt; - std::unique_ptr ikc; - ikc.reset(new test::PlainInternalKeyComparator(opt.comparator)); - opt.compression = kNoCompression; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.block_size = 1024; - // big enough so we don't ever lose cached values. - table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4); - opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); - c.Add("k01", "hello"); - c.Add("k02", "hello2"); - c.Add("k03", std::string(10000, 'x')); - c.Add("k04", std::string(200000, 'x')); - c.Add("k05", std::string(300000, 'x')); - c.Add("k06", "hello3"); - c.Add("k07", std::string(100000, 'x')); - std::vector keys; - stl_wrappers::KVMap kvmap; - const ImmutableOptions ioptions(opt); - const MutableCFOptions moptions(opt); - c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap); - c.ResetTableReader(); - - // We get the following data spread : - // - // Data block Index - // ======================== - // [ k01 k02 k03 ] k03 - // [ k04 ] k04 - // [ k05 ] k05 - // [ k06 k07 ] k07 - - // Simple - PrefetchRange(&c, &opt, &table_options, - /*key_range=*/"k01", "k05", - /*keys_in_cache=*/{"k01", "k02", "k03", "k04", "k05"}, - /*keys_not_in_cache=*/{"k06", "k07"}); - PrefetchRange(&c, &opt, &table_options, "k01", "k01", {"k01", "k02", "k03"}, - {"k04", "k05", "k06", "k07"}); - // odd - PrefetchRange(&c, &opt, &table_options, "a", "z", - {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {}); - PrefetchRange(&c, &opt, &table_options, "k00", "k00", {"k01", "k02", "k03"}, - {"k04", "k05", "k06", "k07"}); - // Edge cases - PrefetchRange(&c, &opt, &table_options, "k00", "k06", - {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {}); - PrefetchRange(&c, &opt, &table_options, "k00", "zzz", - {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {}); - // null keys - PrefetchRange(&c, &opt, &table_options, nullptr, nullptr, - {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {}); - PrefetchRange(&c, &opt, &table_options, "k04", nullptr, - {"k04", "k05", "k06", "k07"}, {"k01", "k02", "k03"}); - PrefetchRange(&c, &opt, &table_options, nullptr, "k05", - {"k01", "k02", "k03", "k04", "k05"}, {"k06", "k07"}); - // invalid - PrefetchRange(&c, &opt, &table_options, "k06", "k00", {}, {}, - Status::InvalidArgument(Slice("k06 "), Slice("k07"))); - c.ResetTableReader(); -} - -TEST_P(BlockBasedTableTest, TotalOrderSeekOnHashIndex) { - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - for (int i = 0; i <= 4; ++i) { - Options options; - // Make each key/value an individual block - table_options.block_size = 64; - switch (i) { - case 0: - // Binary search index - table_options.index_type = BlockBasedTableOptions::kBinarySearch; - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - break; - case 1: - // Hash search index - table_options.index_type = BlockBasedTableOptions::kHashSearch; - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - options.prefix_extractor.reset(NewFixedPrefixTransform(4)); - break; - case 2: - // Hash search index with filter policy - table_options.index_type = BlockBasedTableOptions::kHashSearch; - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - options.prefix_extractor.reset(NewFixedPrefixTransform(4)); - break; - case 3: - // Two-level index - table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - break; - case 4: - // Binary search with first key - table_options.index_type = - BlockBasedTableOptions::kBinarySearchWithFirstKey; - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - break; - } - - TableConstructor c(BytewiseComparator(), - true /* convert_to_internal_key_ */); - c.Add("aaaa1", std::string('a', 56)); - c.Add("bbaa1", std::string('a', 56)); - c.Add("cccc1", std::string('a', 56)); - c.Add("bbbb1", std::string('a', 56)); - c.Add("baaa1", std::string('a', 56)); - c.Add("abbb1", std::string('a', 56)); - c.Add("cccc2", std::string('a', 56)); - std::vector keys; - stl_wrappers::KVMap kvmap; - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); - auto props = c.GetTableReader()->GetTableProperties(); - ASSERT_EQ(7u, props->num_data_blocks); - auto* reader = c.GetTableReader(); - ReadOptions ro; - ro.total_order_seek = true; - std::unique_ptr iter(reader->NewIterator( - ro, moptions.prefix_extractor.get(), /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized)); - - iter->Seek(InternalKey("b", 0, kTypeValue).Encode()); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("baaa1", ExtractUserKey(iter->key()).ToString()); - iter->Next(); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString()); - - iter->Seek(InternalKey("bb", 0, kTypeValue).Encode()); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString()); - iter->Next(); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString()); - - iter->Seek(InternalKey("bbb", 0, kTypeValue).Encode()); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString()); - iter->Next(); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("cccc1", ExtractUserKey(iter->key()).ToString()); - } -} - -TEST_P(BlockBasedTableTest, NoopTransformSeek) { - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); - - Options options; - options.comparator = BytewiseComparator(); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - options.prefix_extractor.reset(NewNoopTransform()); - - TableConstructor c(options.comparator); - // To tickle the PrefixMayMatch bug it is important that the - // user-key is a single byte so that the index key exactly matches - // the user-key. - InternalKey key("a", 1, kTypeValue); - c.Add(key.Encode().ToString(), "b"); - std::vector keys; - stl_wrappers::KVMap kvmap; - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - const InternalKeyComparator internal_comparator(options.comparator); - c.Finish(options, ioptions, moptions, table_options, internal_comparator, - &keys, &kvmap); - - auto* reader = c.GetTableReader(); - for (int i = 0; i < 2; ++i) { - ReadOptions ro; - ro.total_order_seek = (i == 0); - std::unique_ptr iter(reader->NewIterator( - ro, moptions.prefix_extractor.get(), /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized)); - - iter->Seek(key.Encode()); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("a", ExtractUserKey(iter->key()).ToString()); - } -} - -TEST_P(BlockBasedTableTest, SkipPrefixBloomFilter) { - // if DB is opened with a prefix extractor of a different name, - // prefix bloom is skipped when read the file - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.filter_policy.reset(NewBloomFilterPolicy(2)); - table_options.whole_key_filtering = false; - - Options options; - options.comparator = BytewiseComparator(); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - - TableConstructor c(options.comparator); - InternalKey key("abcdefghijk", 1, kTypeValue); - c.Add(key.Encode().ToString(), "test"); - std::vector keys; - stl_wrappers::KVMap kvmap; - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - const InternalKeyComparator internal_comparator(options.comparator); - c.Finish(options, ioptions, moptions, table_options, internal_comparator, - &keys, &kvmap); - // TODO(Zhongyi): update test to use MutableCFOptions - options.prefix_extractor.reset(NewFixedPrefixTransform(9)); - const ImmutableOptions new_ioptions(options); - const MutableCFOptions new_moptions(options); - ASSERT_OK(c.Reopen(new_ioptions, new_moptions)); - auto reader = c.GetTableReader(); - ReadOptions read_options; - std::unique_ptr db_iter(reader->NewIterator( - read_options, new_moptions.prefix_extractor.get(), /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized)); - - // Test point lookup - // only one kv - for (auto& kv : kvmap) { - db_iter->Seek(kv.first); - ASSERT_TRUE(db_iter->Valid()); - ASSERT_OK(db_iter->status()); - ASSERT_EQ(db_iter->key(), kv.first); - ASSERT_EQ(db_iter->value(), kv.second); - } -} - -TEST_P(BlockBasedTableTest, BadChecksumType) { - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - - Options options; - options.comparator = BytewiseComparator(); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - - TableConstructor c(options.comparator); - InternalKey key("abc", 1, kTypeValue); - c.Add(key.Encode().ToString(), "test"); - std::vector keys; - stl_wrappers::KVMap kvmap; - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - const InternalKeyComparator internal_comparator(options.comparator); - c.Finish(options, ioptions, moptions, table_options, internal_comparator, - &keys, &kvmap); - - // Corrupt checksum type (123 is invalid) - auto& sink = *c.TEST_GetSink(); - size_t len = sink.contents_.size(); - ASSERT_EQ(sink.contents_[len - Footer::kNewVersionsEncodedLength], - table_options.checksum); - sink.contents_[len - Footer::kNewVersionsEncodedLength] = char{123}; - - // (Re-)Open table file with bad checksum type - const ImmutableOptions new_ioptions(options); - const MutableCFOptions new_moptions(options); - Status s = c.Reopen(new_ioptions, new_moptions); - ASSERT_NOK(s); - // "test" is file name - ASSERT_EQ(s.ToString(), - "Corruption: Corrupt or unsupported checksum type: 123 in test"); -} - -class BuiltinChecksumTest : public testing::Test, - public testing::WithParamInterface {}; - -INSTANTIATE_TEST_CASE_P(SupportedChecksums, BuiltinChecksumTest, - testing::ValuesIn(GetSupportedChecksums())); - -namespace { -std::string ChecksumAsString(const std::string& data, - ChecksumType checksum_type) { - uint32_t v = ComputeBuiltinChecksum(checksum_type, data.data(), data.size()); - - // Verify consistency with other function - if (data.size() >= 1) { - EXPECT_EQ(v, ComputeBuiltinChecksumWithLastByte( - checksum_type, data.data(), data.size() - 1, data.back())); - } - // Little endian as in file - std::array raw_bytes; - EncodeFixed32(raw_bytes.data(), v); - return Slice(raw_bytes.data(), raw_bytes.size()).ToString(/*hex*/ true); -} - -std::string ChecksumAsString(std::string* data, char new_last_byte, - ChecksumType checksum_type) { - data->back() = new_last_byte; - return ChecksumAsString(*data, checksum_type); -} -} // namespace - -// Make sure that checksum values don't change in later versions, even if -// consistent within current version. -TEST_P(BuiltinChecksumTest, ChecksumSchemas) { - // Trailing 'x' chars will be replaced by compression type. Specifically, - // the first byte of a block trailer is compression type, which is part of - // the checksum input. This test does not deal with storing or parsing - // checksums from the trailer (next 4 bytes of trailer). - std::string b0 = "x"; - std::string b1 = "This is a short block!x"; - std::string b2; - for (int i = 0; i < 100; ++i) { - b2.append("This is a long block!"); - } - b2.append("x"); - - std::string empty; - - char ct1 = kNoCompression; - char ct2 = kSnappyCompression; - char ct3 = kZSTD; - - ChecksumType t = GetParam(); - switch (t) { - case kNoChecksum: - EXPECT_EQ(ChecksumAsString(empty, t), "00000000"); - EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "00000000"); - EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "00000000"); - EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "00000000"); - EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "00000000"); - EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "00000000"); - EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "00000000"); - EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "00000000"); - EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "00000000"); - EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "00000000"); - break; - case kCRC32c: - EXPECT_EQ(ChecksumAsString(empty, t), "D8EA82A2"); - EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "D28F2549"); - EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "052B2843"); - EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "46F8F711"); - EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "583F0355"); - EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "2F9B0A57"); - EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "ECE7DA1D"); - EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "943EF0AB"); - EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "43A2EDB1"); - EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "00E53D63"); - break; - case kxxHash: - EXPECT_EQ(ChecksumAsString(empty, t), "055DCC02"); - EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "3EB065CF"); - EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "31F79238"); - EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "320D2E00"); - EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "4A2E5FB0"); - EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "0BD9F652"); - EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "B4107E50"); - EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "20F4D4BA"); - EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "8F1A1F99"); - EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "A191A338"); - break; - case kxxHash64: - EXPECT_EQ(ChecksumAsString(empty, t), "99E9D851"); - EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "682705DB"); - EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "30E7211B"); - EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "B7BB58E8"); - EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "B74655EF"); - EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "B6C8BBBE"); - EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "AED9E3B4"); - EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "0D4999FE"); - EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "F5932423"); - EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "6B31BAB1"); - break; - case kXXH3: - EXPECT_EQ(ChecksumAsString(empty, t), "00000000"); - EXPECT_EQ(ChecksumAsString(&b0, ct1, t), "C294D338"); - EXPECT_EQ(ChecksumAsString(&b0, ct2, t), "1B174353"); - EXPECT_EQ(ChecksumAsString(&b0, ct3, t), "2D0E20C8"); - EXPECT_EQ(ChecksumAsString(&b1, ct1, t), "B37FB5E6"); - EXPECT_EQ(ChecksumAsString(&b1, ct2, t), "6AFC258D"); - EXPECT_EQ(ChecksumAsString(&b1, ct3, t), "5CE54616"); - EXPECT_EQ(ChecksumAsString(&b2, ct1, t), "FA2D482E"); - EXPECT_EQ(ChecksumAsString(&b2, ct2, t), "23AED845"); - EXPECT_EQ(ChecksumAsString(&b2, ct3, t), "15B7BBDE"); - break; - default: - // Force this test to be updated on new ChecksumTypes - assert(false); - break; - } -} - -TEST_P(BuiltinChecksumTest, ChecksumZeroInputs) { - // Verify that no reasonably sized "all zeros" inputs produce "all zeros" - // output. Otherwise, "wiped" data could appear to be well-formed. - // Assuming essentially random assignment of output values, the likelihood - // of encountering checksum == 0 for an input not specifically crafted is - // 1 in 4 billion. - if (GetParam() == kNoChecksum) { - return; - } - // "Thorough" case is too slow for continouous testing - bool thorough = getenv("ROCKSDB_THOROUGH_CHECKSUM_TEST") != nullptr; - // Verified through 10M - size_t kMaxZerosLen = thorough ? 10000000 : 20000; - std::string zeros(kMaxZerosLen, '\0'); - - for (size_t len = 0; len < kMaxZerosLen; ++len) { - if (thorough && (len & 0xffffU) == 0) { - fprintf(stderr, "t=%u len=%u\n", (unsigned)GetParam(), (unsigned)len); - } - uint32_t v = ComputeBuiltinChecksum(GetParam(), zeros.data(), len); - if (v == 0U) { - // One exception case: - if (GetParam() == kXXH3 && len == 0) { - // This is not a big deal because assuming the block length is known - // from the block handle, which comes from a checksum-verified block, - // there is nothing to corrupt in a zero-length block. And when there - // is a block trailer with compression byte (as in block-based table), - // zero length checksummed data never arises. - continue; - } - // Only compute this on failure - SCOPED_TRACE("len=" + std::to_string(len)); - ASSERT_NE(v, 0U); - } - } -} - -void AddInternalKey(TableConstructor* c, const std::string& prefix, - std::string value = "v", int /*suffix_len*/ = 800) { - static Random rnd(1023); - InternalKey k(prefix + rnd.RandomString(800), 0, kTypeValue); - c->Add(k.Encode().ToString(), value); -} - -void TableTest::IndexTest(BlockBasedTableOptions table_options) { - TableConstructor c(BytewiseComparator()); - - // keys with prefix length 3, make sure the key/value is big enough to fill - // one block - AddInternalKey(&c, "0015"); - AddInternalKey(&c, "0035"); - - AddInternalKey(&c, "0054"); - AddInternalKey(&c, "0055"); - - AddInternalKey(&c, "0056"); - AddInternalKey(&c, "0057"); - - AddInternalKey(&c, "0058"); - AddInternalKey(&c, "0075"); - - AddInternalKey(&c, "0076"); - AddInternalKey(&c, "0095"); - - std::vector keys; - stl_wrappers::KVMap kvmap; - Options options; - options.prefix_extractor.reset(NewFixedPrefixTransform(3)); - table_options.block_size = 1700; - table_options.block_cache = NewLRUCache(1024, 4); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - std::unique_ptr comparator( - new InternalKeyComparator(BytewiseComparator())); - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, *comparator, &keys, - &kvmap); - auto reader = c.GetTableReader(); - - auto props = reader->GetTableProperties(); - ASSERT_EQ(5u, props->num_data_blocks); - - // TODO(Zhongyi): update test to use MutableCFOptions - ReadOptions read_options; - std::unique_ptr index_iter(reader->NewIterator( - read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized)); - - // -- Find keys do not exist, but have common prefix. - std::vector prefixes = {"001", "003", "005", "007", "009"}; - std::vector lower_bound = { - keys[0], keys[1], keys[2], keys[7], keys[9], - }; - - // find the lower bound of the prefix - for (size_t i = 0; i < prefixes.size(); ++i) { - index_iter->Seek(InternalKey(prefixes[i], 0, kTypeValue).Encode()); - ASSERT_OK(index_iter->status()); - ASSERT_TRUE(index_iter->Valid()); - - // seek the first element in the block - ASSERT_EQ(lower_bound[i], index_iter->key().ToString()); - ASSERT_EQ("v", index_iter->value().ToString()); - } - - // find the upper bound of prefixes - std::vector upper_bound = { - keys[1], - keys[2], - keys[7], - keys[9], - }; - - // find existing keys - for (const auto& item : kvmap) { - auto ukey = ExtractUserKey(item.first).ToString(); - index_iter->Seek(ukey); - - // ASSERT_OK(regular_iter->status()); - ASSERT_OK(index_iter->status()); - - // ASSERT_TRUE(regular_iter->Valid()); - ASSERT_TRUE(index_iter->Valid()); - - ASSERT_EQ(item.first, index_iter->key().ToString()); - ASSERT_EQ(item.second, index_iter->value().ToString()); - } - - for (size_t i = 0; i < prefixes.size(); ++i) { - // the key is greater than any existing keys. - auto key = prefixes[i] + "9"; - index_iter->Seek(InternalKey(key, 0, kTypeValue).Encode()); - - ASSERT_TRUE(index_iter->status().ok() || index_iter->status().IsNotFound()); - ASSERT_TRUE(!index_iter->status().IsNotFound() || !index_iter->Valid()); - if (i == prefixes.size() - 1) { - // last key - ASSERT_TRUE(!index_iter->Valid()); - } else { - ASSERT_TRUE(index_iter->Valid()); - // seek the first element in the block - ASSERT_EQ(upper_bound[i], index_iter->key().ToString()); - ASSERT_EQ("v", index_iter->value().ToString()); - } - } - - // find keys with prefix that don't match any of the existing prefixes. - std::vector non_exist_prefixes = {"002", "004", "006", "008"}; - for (const auto& prefix : non_exist_prefixes) { - index_iter->Seek(InternalKey(prefix, 0, kTypeValue).Encode()); - // regular_iter->Seek(prefix); - - ASSERT_OK(index_iter->status()); - // Seek to non-existing prefixes should yield either invalid, or a - // key with prefix greater than the target. - if (index_iter->Valid()) { - Slice ukey = ExtractUserKey(index_iter->key()); - Slice ukey_prefix = options.prefix_extractor->Transform(ukey); - ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) < 0); - } - } - for (const auto& prefix : non_exist_prefixes) { - index_iter->SeekForPrev(InternalKey(prefix, 0, kTypeValue).Encode()); - // regular_iter->Seek(prefix); - - ASSERT_OK(index_iter->status()); - // Seek to non-existing prefixes should yield either invalid, or a - // key with prefix greater than the target. - if (index_iter->Valid()) { - Slice ukey = ExtractUserKey(index_iter->key()); - Slice ukey_prefix = options.prefix_extractor->Transform(ukey); - ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) > 0); - } - } - - { - // Test reseek case. It should impact partitioned index more. - ReadOptions ro; - ro.total_order_seek = true; - std::unique_ptr index_iter2(reader->NewIterator( - ro, moptions.prefix_extractor.get(), /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized)); - - // Things to cover in partitioned index: - // 1. Both of Seek() and SeekToLast() has optimization to prevent - // rereek leaf index block if it remains to the same one, and - // they reuse the same variable. - // 2. When Next() or Prev() is called, the block moves, so the - // optimization should kick in only with the current one. - index_iter2->Seek(InternalKey("0055", 0, kTypeValue).Encode()); - ASSERT_TRUE(index_iter2->Valid()); - ASSERT_EQ("0055", index_iter2->key().ToString().substr(0, 4)); - - index_iter2->SeekToLast(); - ASSERT_TRUE(index_iter2->Valid()); - ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); - - index_iter2->Seek(InternalKey("0055", 0, kTypeValue).Encode()); - ASSERT_TRUE(index_iter2->Valid()); - ASSERT_EQ("0055", index_iter2->key().ToString().substr(0, 4)); - - index_iter2->SeekToLast(); - ASSERT_TRUE(index_iter2->Valid()); - ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); - index_iter2->Prev(); - ASSERT_TRUE(index_iter2->Valid()); - index_iter2->Prev(); - ASSERT_TRUE(index_iter2->Valid()); - ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4)); - - index_iter2->Seek(InternalKey("0095", 0, kTypeValue).Encode()); - ASSERT_TRUE(index_iter2->Valid()); - ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); - index_iter2->Prev(); - ASSERT_TRUE(index_iter2->Valid()); - index_iter2->Prev(); - ASSERT_TRUE(index_iter2->Valid()); - ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4)); - - index_iter2->SeekToLast(); - ASSERT_TRUE(index_iter2->Valid()); - ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); - - index_iter2->Seek(InternalKey("0095", 0, kTypeValue).Encode()); - ASSERT_TRUE(index_iter2->Valid()); - ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); - - index_iter2->Prev(); - ASSERT_TRUE(index_iter2->Valid()); - index_iter2->Prev(); - ASSERT_TRUE(index_iter2->Valid()); - ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4)); - - index_iter2->Seek(InternalKey("0075", 0, kTypeValue).Encode()); - ASSERT_TRUE(index_iter2->Valid()); - ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4)); - - index_iter2->Next(); - ASSERT_TRUE(index_iter2->Valid()); - index_iter2->Next(); - ASSERT_TRUE(index_iter2->Valid()); - ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); - - index_iter2->SeekToLast(); - ASSERT_TRUE(index_iter2->Valid()); - ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); - } - - c.ResetTableReader(); -} - -TEST_P(BlockBasedTableTest, BinaryIndexTest) { - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.index_type = BlockBasedTableOptions::kBinarySearch; - IndexTest(table_options); -} - -TEST_P(BlockBasedTableTest, HashIndexTest) { - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.index_type = BlockBasedTableOptions::kHashSearch; - IndexTest(table_options); -} - -TEST_P(BlockBasedTableTest, PartitionIndexTest) { - const int max_index_keys = 5; - const int est_max_index_key_value_size = 32; - const int est_max_index_size = max_index_keys * est_max_index_key_value_size; - for (int i = 1; i <= est_max_index_size + 1; i++) { - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; - table_options.metadata_block_size = i; - IndexTest(table_options); - } -} - -TEST_P(BlockBasedTableTest, IndexSeekOptimizationIncomplete) { - std::unique_ptr comparator( - new InternalKeyComparator(BytewiseComparator())); - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - Options options; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - - TableConstructor c(BytewiseComparator()); - AddInternalKey(&c, "pika"); - - std::vector keys; - stl_wrappers::KVMap kvmap; - c.Finish(options, ioptions, moptions, table_options, *comparator, &keys, - &kvmap); - ASSERT_EQ(1, keys.size()); - - auto reader = c.GetTableReader(); - ReadOptions ropt; - ropt.read_tier = ReadTier::kBlockCacheTier; - std::unique_ptr iter(reader->NewIterator( - ropt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized)); - - auto ikey = [](Slice user_key) { - return InternalKey(user_key, 0, kTypeValue).Encode().ToString(); - }; - - iter->Seek(ikey("pika")); - ASSERT_FALSE(iter->Valid()); - ASSERT_TRUE(iter->status().IsIncomplete()); - - // This used to crash at some point. - iter->Seek(ikey("pika")); - ASSERT_FALSE(iter->Valid()); - ASSERT_TRUE(iter->status().IsIncomplete()); -} - -TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey1) { - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey; - IndexTest(table_options); -} - -class CustomFlushBlockPolicy : public FlushBlockPolicyFactory, - public FlushBlockPolicy { - public: - explicit CustomFlushBlockPolicy(std::vector keys_per_block) - : keys_per_block_(keys_per_block) {} - - const char* Name() const override { return "CustomFlushBlockPolicy"; } - - FlushBlockPolicy* NewFlushBlockPolicy(const BlockBasedTableOptions&, - const BlockBuilder&) const override { - return new CustomFlushBlockPolicy(keys_per_block_); - } - - bool Update(const Slice&, const Slice&) override { - if (keys_in_current_block_ >= keys_per_block_.at(current_block_idx_)) { - ++current_block_idx_; - keys_in_current_block_ = 1; - return true; - } - - ++keys_in_current_block_; - return false; - } - - std::vector keys_per_block_; - - int current_block_idx_ = 0; - int keys_in_current_block_ = 0; -}; - -TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey2) { - for (int use_first_key = 0; use_first_key < 2; ++use_first_key) { - SCOPED_TRACE("use_first_key = " + std::to_string(use_first_key)); - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.index_type = - use_first_key ? BlockBasedTableOptions::kBinarySearchWithFirstKey - : BlockBasedTableOptions::kBinarySearch; - table_options.block_cache = NewLRUCache(10000); // fits all blocks - table_options.index_shortening = - BlockBasedTableOptions::IndexShorteningMode::kNoShortening; - table_options.flush_block_policy_factory = - std::make_shared(std::vector{2, 1, 3, 2}); - Options options; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - options.statistics = CreateDBStatistics(); - Statistics* stats = options.statistics.get(); - std::unique_ptr comparator( - new InternalKeyComparator(BytewiseComparator())); - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - - TableConstructor c(BytewiseComparator()); - - // Block 0. - AddInternalKey(&c, "aaaa", "v0"); - AddInternalKey(&c, "aaac", "v1"); - - // Block 1. - AddInternalKey(&c, "aaca", "v2"); - - // Block 2. - AddInternalKey(&c, "caaa", "v3"); - AddInternalKey(&c, "caac", "v4"); - AddInternalKey(&c, "caae", "v5"); - - // Block 3. - AddInternalKey(&c, "ccaa", "v6"); - AddInternalKey(&c, "ccac", "v7"); - - // Write the file. - std::vector keys; - stl_wrappers::KVMap kvmap; - c.Finish(options, ioptions, moptions, table_options, *comparator, &keys, - &kvmap); - ASSERT_EQ(8, keys.size()); - - auto reader = c.GetTableReader(); - auto props = reader->GetTableProperties(); - ASSERT_EQ(4u, props->num_data_blocks); - ReadOptions read_options; - std::unique_ptr iter(reader->NewIterator( - read_options, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized, - /*compaction_readahead_size=*/0, /*allow_unprepared_value=*/true)); - - // Shouldn't have read data blocks before iterator is seeked. - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - auto ikey = [](Slice user_key) { - return InternalKey(user_key, 0, kTypeValue).Encode().ToString(); - }; - - // Seek to a key between blocks. If index contains first key, we shouldn't - // read any data blocks until value is requested. - iter->Seek(ikey("aaba")); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(keys[2], iter->key().ToString()); - EXPECT_EQ(use_first_key ? 0 : 1, - stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - ASSERT_TRUE(iter->PrepareValue()); - EXPECT_EQ("v2", iter->value().ToString()); - EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - // Seek to the middle of a block. The block should be read right away. - iter->Seek(ikey("caab")); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(keys[4], iter->key().ToString()); - EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - ASSERT_TRUE(iter->PrepareValue()); - EXPECT_EQ("v4", iter->value().ToString()); - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - // Seek to just before the same block and don't access value. - // The iterator should keep pinning the block contents. - iter->Seek(ikey("baaa")); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(keys[3], iter->key().ToString()); - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - // Seek to the same block again to check that the block is still pinned. - iter->Seek(ikey("caae")); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(keys[5], iter->key().ToString()); - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - ASSERT_TRUE(iter->PrepareValue()); - EXPECT_EQ("v5", iter->value().ToString()); - EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - // Step forward and fall through to the next block. Don't access value. - iter->Next(); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(keys[6], iter->key().ToString()); - EXPECT_EQ(use_first_key ? 2 : 3, - stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - // Step forward again. Block should be read. - iter->Next(); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(keys[7], iter->key().ToString()); - EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - ASSERT_TRUE(iter->PrepareValue()); - EXPECT_EQ("v7", iter->value().ToString()); - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - // Step forward and reach the end. - iter->Next(); - EXPECT_FALSE(iter->Valid()); - EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - // Seek to a single-key block and step forward without accessing value. - iter->Seek(ikey("aaca")); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(keys[2], iter->key().ToString()); - EXPECT_EQ(use_first_key ? 0 : 1, - stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - iter->Next(); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(keys[3], iter->key().ToString()); - EXPECT_EQ(use_first_key ? 1 : 2, - stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - ASSERT_TRUE(iter->PrepareValue()); - EXPECT_EQ("v3", iter->value().ToString()); - EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - - // Seek between blocks and step back without accessing value. - iter->Seek(ikey("aaca")); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(keys[2], iter->key().ToString()); - EXPECT_EQ(use_first_key ? 2 : 3, - stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - - iter->Prev(); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(keys[1], iter->key().ToString()); - EXPECT_EQ(use_first_key ? 2 : 3, - stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - // All blocks are in cache now, there'll be no more misses ever. - EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - ASSERT_TRUE(iter->PrepareValue()); - EXPECT_EQ("v1", iter->value().ToString()); - - // Next into the next block again. - iter->Next(); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(keys[2], iter->key().ToString()); - EXPECT_EQ(use_first_key ? 2 : 4, - stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - // Seek to first and step back without accessing value. - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(keys[0], iter->key().ToString()); - EXPECT_EQ(use_first_key ? 2 : 5, - stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - iter->Prev(); - EXPECT_FALSE(iter->Valid()); - EXPECT_EQ(use_first_key ? 2 : 5, - stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - // Do some SeekForPrev() and SeekToLast() just to cover all methods. - iter->SeekForPrev(ikey("caad")); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(keys[4], iter->key().ToString()); - EXPECT_EQ(use_first_key ? 3 : 6, - stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - ASSERT_TRUE(iter->PrepareValue()); - EXPECT_EQ("v4", iter->value().ToString()); - EXPECT_EQ(use_first_key ? 3 : 6, - stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - iter->SeekToLast(); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(keys[7], iter->key().ToString()); - EXPECT_EQ(use_first_key ? 4 : 7, - stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - ASSERT_TRUE(iter->PrepareValue()); - EXPECT_EQ("v7", iter->value().ToString()); - EXPECT_EQ(use_first_key ? 4 : 7, - stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - - EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - - c.ResetTableReader(); - } -} - -TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKeyGlobalSeqno) { - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey; - table_options.block_cache = NewLRUCache(10000); - Options options; - options.statistics = CreateDBStatistics(); - Statistics* stats = options.statistics.get(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - std::unique_ptr comparator( - new InternalKeyComparator(BytewiseComparator())); - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - - TableConstructor c(BytewiseComparator(), /* convert_to_internal_key */ false, - /* level */ -1, /* largest_seqno */ 42); - - c.Add(InternalKey("b", 0, kTypeValue).Encode().ToString(), "x"); - c.Add(InternalKey("c", 0, kTypeValue).Encode().ToString(), "y"); - - std::vector keys; - stl_wrappers::KVMap kvmap; - c.Finish(options, ioptions, moptions, table_options, *comparator, &keys, - &kvmap); - ASSERT_EQ(2, keys.size()); - - auto reader = c.GetTableReader(); - auto props = reader->GetTableProperties(); - ASSERT_EQ(1u, props->num_data_blocks); - ReadOptions read_options; - std::unique_ptr iter(reader->NewIterator( - read_options, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized, - /*compaction_readahead_size=*/0, /*allow_unprepared_value=*/true)); - - iter->Seek(InternalKey("a", 0, kTypeValue).Encode().ToString()); - ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(), - iter->key().ToString()); - EXPECT_NE(keys[0], iter->key().ToString()); - // Key should have been served from index, without reading data blocks. - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - - ASSERT_TRUE(iter->PrepareValue()); - EXPECT_EQ("x", iter->value().ToString()); - EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); - EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); - EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(), - iter->key().ToString()); - - c.ResetTableReader(); -} - -// It's very hard to figure out the index block size of a block accurately. -// To make sure we get the index size, we just make sure as key number -// grows, the filter block size also grows. -TEST_P(BlockBasedTableTest, IndexSizeStat) { - uint64_t last_index_size = 0; - - // we need to use random keys since the pure human readable texts - // may be well compressed, resulting insignifcant change of index - // block size. - Random rnd(test::RandomSeed()); - std::vector keys; - - for (int i = 0; i < 100; ++i) { - keys.push_back(rnd.RandomString(10000)); - } - - // Each time we load one more key to the table. the table index block - // size is expected to be larger than last time's. - for (size_t i = 1; i < keys.size(); ++i) { - TableConstructor c(BytewiseComparator(), - true /* convert_to_internal_key_ */); - for (size_t j = 0; j < i; ++j) { - c.Add(keys[j], "val"); - } - - std::vector ks; - stl_wrappers::KVMap kvmap; - Options options; - options.compression = kNoCompression; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.block_restart_interval = 1; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &ks, &kvmap); - auto index_size = c.GetTableReader()->GetTableProperties()->index_size; - ASSERT_GT(index_size, last_index_size); - last_index_size = index_size; - c.ResetTableReader(); - } -} - -TEST_P(BlockBasedTableTest, NumBlockStat) { - Random rnd(test::RandomSeed()); - TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); - Options options; - options.compression = kNoCompression; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.block_restart_interval = 1; - table_options.block_size = 1000; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - for (int i = 0; i < 10; ++i) { - // the key/val are slightly smaller than block size, so that each block - // holds roughly one key/value pair. - c.Add(rnd.RandomString(900), "val"); - } - - std::vector ks; - stl_wrappers::KVMap kvmap; - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &ks, &kvmap); - ASSERT_EQ(kvmap.size(), - c.GetTableReader()->GetTableProperties()->num_data_blocks); - c.ResetTableReader(); -} - -TEST_P(BlockBasedTableTest, TracingGetTest) { - TableConstructor c(BytewiseComparator()); - Options options; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - options.create_if_missing = true; - table_options.block_cache = NewLRUCache(1024 * 1024, 0); - table_options.cache_index_and_filter_blocks = true; - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - SetupTracingTest(&c); - std::vector keys; - stl_wrappers::KVMap kvmap; - ImmutableOptions ioptions(options); - MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); - std::string user_key = "k01"; - InternalKey internal_key(user_key, 0, kTypeValue); - std::string encoded_key = internal_key.Encode().ToString(); - for (uint32_t i = 1; i <= 2; i++) { - PinnableSlice value; - GetContext get_context(options.comparator, nullptr, nullptr, nullptr, - GetContext::kNotFound, user_key, &value, nullptr, - nullptr, nullptr, true, nullptr, nullptr, nullptr, - nullptr, nullptr, nullptr, /*tracing_get_id=*/i); - get_perf_context()->Reset(); - ASSERT_OK(c.GetTableReader()->Get(ReadOptions(), encoded_key, &get_context, - moptions.prefix_extractor.get())); - ASSERT_EQ(get_context.State(), GetContext::kFound); - ASSERT_EQ(value.ToString(), kDummyValue); - } - - // Verify traces. - std::vector expected_records; - // The first two records should be prefetching index and filter blocks. - BlockCacheTraceRecord record; - record.block_type = TraceType::kBlockTraceIndexBlock; - record.caller = TableReaderCaller::kPrefetch; - record.is_cache_hit = false; - record.no_insert = false; - expected_records.push_back(record); - record.block_type = TraceType::kBlockTraceFilterBlock; - expected_records.push_back(record); - // Then we should have three records for one index, one filter, and one data - // block access. - record.get_id = 1; - record.block_type = TraceType::kBlockTraceFilterBlock; - record.caller = TableReaderCaller::kUserGet; - record.get_from_user_specified_snapshot = false; - record.referenced_key = encoded_key; - record.referenced_key_exist_in_block = true; - record.is_cache_hit = true; - expected_records.push_back(record); - record.block_type = TraceType::kBlockTraceIndexBlock; - expected_records.push_back(record); - record.is_cache_hit = false; - record.block_type = TraceType::kBlockTraceDataBlock; - expected_records.push_back(record); - // The second get should all observe cache hits. - record.is_cache_hit = true; - record.get_id = 2; - record.block_type = TraceType::kBlockTraceFilterBlock; - record.caller = TableReaderCaller::kUserGet; - record.get_from_user_specified_snapshot = false; - record.referenced_key = encoded_key; - expected_records.push_back(record); - record.block_type = TraceType::kBlockTraceIndexBlock; - expected_records.push_back(record); - record.block_type = TraceType::kBlockTraceDataBlock; - expected_records.push_back(record); - VerifyBlockAccessTrace(&c, expected_records); - c.ResetTableReader(); -} - -TEST_P(BlockBasedTableTest, TracingApproximateOffsetOfTest) { - TableConstructor c(BytewiseComparator()); - Options options; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - options.create_if_missing = true; - table_options.block_cache = NewLRUCache(1024 * 1024, 0); - table_options.cache_index_and_filter_blocks = true; - table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - SetupTracingTest(&c); - std::vector keys; - stl_wrappers::KVMap kvmap; - ImmutableOptions ioptions(options); - MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); - for (uint32_t i = 1; i <= 2; i++) { - std::string user_key = "k01"; - InternalKey internal_key(user_key, 0, kTypeValue); - std::string encoded_key = internal_key.Encode().ToString(); - c.GetTableReader()->ApproximateOffsetOf( - encoded_key, TableReaderCaller::kUserApproximateSize); - } - // Verify traces. - std::vector expected_records; - // The first two records should be prefetching index and filter blocks. - BlockCacheTraceRecord record; - record.block_type = TraceType::kBlockTraceIndexBlock; - record.caller = TableReaderCaller::kPrefetch; - record.is_cache_hit = false; - record.no_insert = false; - expected_records.push_back(record); - record.block_type = TraceType::kBlockTraceFilterBlock; - expected_records.push_back(record); - // Then we should have two records for only index blocks. - record.block_type = TraceType::kBlockTraceIndexBlock; - record.caller = TableReaderCaller::kUserApproximateSize; - record.is_cache_hit = true; - expected_records.push_back(record); - expected_records.push_back(record); - VerifyBlockAccessTrace(&c, expected_records); - c.ResetTableReader(); -} - -TEST_P(BlockBasedTableTest, TracingIterator) { - TableConstructor c(BytewiseComparator()); - Options options; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - options.create_if_missing = true; - table_options.block_cache = NewLRUCache(1024 * 1024, 0); - table_options.cache_index_and_filter_blocks = true; - table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - SetupTracingTest(&c); - std::vector keys; - stl_wrappers::KVMap kvmap; - ImmutableOptions ioptions(options); - MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); - - for (uint32_t i = 1; i <= 2; i++) { - ReadOptions read_options; - std::unique_ptr iter(c.GetTableReader()->NewIterator( - read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUserIterator)); - iter->SeekToFirst(); - while (iter->Valid()) { - iter->key(); - iter->value(); - iter->Next(); - } - ASSERT_OK(iter->status()); - iter.reset(); - } - - // Verify traces. - std::vector expected_records; - // The first two records should be prefetching index and filter blocks. - BlockCacheTraceRecord record; - record.block_type = TraceType::kBlockTraceIndexBlock; - record.caller = TableReaderCaller::kPrefetch; - record.is_cache_hit = false; - record.no_insert = false; - expected_records.push_back(record); - record.block_type = TraceType::kBlockTraceFilterBlock; - expected_records.push_back(record); - // Then we should have three records for index and two data block access. - record.block_type = TraceType::kBlockTraceIndexBlock; - record.caller = TableReaderCaller::kUserIterator; - record.is_cache_hit = true; - expected_records.push_back(record); - record.block_type = TraceType::kBlockTraceDataBlock; - record.is_cache_hit = false; - expected_records.push_back(record); - expected_records.push_back(record); - // When we iterate this file for the second time, we should observe all cache - // hits. - record.block_type = TraceType::kBlockTraceIndexBlock; - record.is_cache_hit = true; - expected_records.push_back(record); - record.block_type = TraceType::kBlockTraceDataBlock; - expected_records.push_back(record); - expected_records.push_back(record); - VerifyBlockAccessTrace(&c, expected_records); - c.ResetTableReader(); -} - -// A simple tool that takes the snapshot of block cache statistics. -class BlockCachePropertiesSnapshot { - public: - explicit BlockCachePropertiesSnapshot(Statistics* statistics) { - block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_MISS); - block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_HIT); - index_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS); - index_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT); - data_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_DATA_MISS); - data_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_DATA_HIT); - filter_block_cache_miss = - statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS); - filter_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT); - block_cache_bytes_read = statistics->getTickerCount(BLOCK_CACHE_BYTES_READ); - block_cache_bytes_write = - statistics->getTickerCount(BLOCK_CACHE_BYTES_WRITE); - } - - void AssertIndexBlockStat(int64_t expected_index_block_cache_miss, - int64_t expected_index_block_cache_hit) { - ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss); - ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit); - } - - void AssertFilterBlockStat(int64_t expected_filter_block_cache_miss, - int64_t expected_filter_block_cache_hit) { - ASSERT_EQ(expected_filter_block_cache_miss, filter_block_cache_miss); - ASSERT_EQ(expected_filter_block_cache_hit, filter_block_cache_hit); - } - - // Check if the fetched props matches the expected ones. - // TODO(kailiu) Use this only when you disabled filter policy! - void AssertEqual(int64_t expected_index_block_cache_miss, - int64_t expected_index_block_cache_hit, - int64_t expected_data_block_cache_miss, - int64_t expected_data_block_cache_hit) const { - ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss); - ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit); - ASSERT_EQ(expected_data_block_cache_miss, data_block_cache_miss); - ASSERT_EQ(expected_data_block_cache_hit, data_block_cache_hit); - ASSERT_EQ(expected_index_block_cache_miss + expected_data_block_cache_miss, - block_cache_miss); - ASSERT_EQ(expected_index_block_cache_hit + expected_data_block_cache_hit, - block_cache_hit); - } - - int64_t GetCacheBytesRead() { return block_cache_bytes_read; } - - int64_t GetCacheBytesWrite() { return block_cache_bytes_write; } - - private: - int64_t block_cache_miss = 0; - int64_t block_cache_hit = 0; - int64_t index_block_cache_miss = 0; - int64_t index_block_cache_hit = 0; - int64_t data_block_cache_miss = 0; - int64_t data_block_cache_hit = 0; - int64_t filter_block_cache_miss = 0; - int64_t filter_block_cache_hit = 0; - int64_t block_cache_bytes_read = 0; - int64_t block_cache_bytes_write = 0; -}; - -// Make sure, by default, index/filter blocks were pre-loaded (meaning we won't -// use block cache to store them). -TEST_P(BlockBasedTableTest, BlockCacheDisabledTest) { - Options options; - options.create_if_missing = true; - options.statistics = CreateDBStatistics(); - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.block_cache = NewLRUCache(1024, 4); - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - std::vector keys; - stl_wrappers::KVMap kvmap; - - TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); - c.Add("key", "value"); - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); - - // preloading filter/index blocks is enabled. - auto reader = dynamic_cast(c.GetTableReader()); - ASSERT_FALSE(reader->TEST_FilterBlockInCache()); - ASSERT_FALSE(reader->TEST_IndexBlockInCache()); - - { - // nothing happens in the beginning - BlockCachePropertiesSnapshot props(options.statistics.get()); - props.AssertIndexBlockStat(0, 0); - props.AssertFilterBlockStat(0, 0); - } - - { - GetContext get_context(options.comparator, nullptr, nullptr, nullptr, - GetContext::kNotFound, Slice(), nullptr, nullptr, - nullptr, nullptr, true, nullptr, nullptr); - // a hack that just to trigger BlockBasedTable::GetFilter. - ASSERT_OK(reader->Get(ReadOptions(), "non-exist-key", &get_context, - moptions.prefix_extractor.get())); - BlockCachePropertiesSnapshot props(options.statistics.get()); - props.AssertIndexBlockStat(0, 0); - props.AssertFilterBlockStat(0, 0); - } -} - -// Due to the difficulities of the intersaction between statistics, this test -// only tests the case when "index block is put to block cache" -TEST_P(BlockBasedTableTest, FilterBlockInBlockCache) { - // -- Table construction - Options options; - options.create_if_missing = true; - options.statistics = CreateDBStatistics(); - - // Enable the cache for index/filter blocks - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - LRUCacheOptions co; - co.capacity = 2048; - co.num_shard_bits = 2; - co.metadata_charge_policy = kDontChargeCacheMetadata; - table_options.block_cache = NewLRUCache(co); - table_options.cache_index_and_filter_blocks = true; - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - std::vector keys; - stl_wrappers::KVMap kvmap; - - TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); - c.Add("key", "value"); - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); - // preloading filter/index blocks is prohibited. - auto* reader = dynamic_cast(c.GetTableReader()); - ASSERT_FALSE(reader->TEST_FilterBlockInCache()); - ASSERT_TRUE(reader->TEST_IndexBlockInCache()); - - // -- PART 1: Open with regular block cache. - // Since block_cache is disabled, no cache activities will be involved. - std::unique_ptr iter; - - int64_t last_cache_bytes_read = 0; - // At first, no block will be accessed. - { - BlockCachePropertiesSnapshot props(options.statistics.get()); - // index will be added to block cache. - props.AssertEqual(1, // index block miss - 0, 0, 0); - ASSERT_EQ(props.GetCacheBytesRead(), 0); - ASSERT_EQ(props.GetCacheBytesWrite(), - static_cast(table_options.block_cache->GetUsage())); - last_cache_bytes_read = props.GetCacheBytesRead(); - } - - // Only index block will be accessed - { - iter.reset(c.NewIterator(moptions.prefix_extractor.get())); - BlockCachePropertiesSnapshot props(options.statistics.get()); - // NOTE: to help better highlight the "detla" of each ticker, I use - // + to indicate the increment of changed - // value; other numbers remain the same. - props.AssertEqual(1, 0 + 1, // index block hit - 0, 0); - // Cache hit, bytes read from cache should increase - ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read); - ASSERT_EQ(props.GetCacheBytesWrite(), - static_cast(table_options.block_cache->GetUsage())); - last_cache_bytes_read = props.GetCacheBytesRead(); - } - - // Only data block will be accessed - { - iter->SeekToFirst(); - ASSERT_OK(iter->status()); - BlockCachePropertiesSnapshot props(options.statistics.get()); - props.AssertEqual(1, 1, 0 + 1, // data block miss - 0); - // Cache miss, Bytes read from cache should not change - ASSERT_EQ(props.GetCacheBytesRead(), last_cache_bytes_read); - ASSERT_EQ(props.GetCacheBytesWrite(), - static_cast(table_options.block_cache->GetUsage())); - last_cache_bytes_read = props.GetCacheBytesRead(); - } - - // Data block will be in cache - { - iter.reset(c.NewIterator(moptions.prefix_extractor.get())); - iter->SeekToFirst(); - ASSERT_OK(iter->status()); - BlockCachePropertiesSnapshot props(options.statistics.get()); - props.AssertEqual(1, 1 + 1, /* index block hit */ - 1, 0 + 1 /* data block hit */); - // Cache hit, bytes read from cache should increase - ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read); - ASSERT_EQ(props.GetCacheBytesWrite(), - static_cast(table_options.block_cache->GetUsage())); - } - // release the iterator so that the block cache can reset correctly. - iter.reset(); - - c.ResetTableReader(); - - // -- PART 2: Open with very small block cache - // In this test, no block will ever get hit since the block cache is - // too small to fit even one entry. - table_options.block_cache = NewLRUCache(1, 4); - options.statistics = CreateDBStatistics(); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - const ImmutableOptions ioptions2(options); - const MutableCFOptions moptions2(options); - ASSERT_OK(c.Reopen(ioptions2, moptions2)); - { - BlockCachePropertiesSnapshot props(options.statistics.get()); - props.AssertEqual(1, // index block miss - 0, 0, 0); - // Cache miss, Bytes read from cache should not change - ASSERT_EQ(props.GetCacheBytesRead(), 0); - } - - { - // Both index and data block get accessed. - // It first cache index block then data block. But since the cache size - // is only 1, index block will be purged after data block is inserted. - iter.reset(c.NewIterator(moptions2.prefix_extractor.get())); - BlockCachePropertiesSnapshot props(options.statistics.get()); - props.AssertEqual(1 + 1, // index block miss - 0, 0, // data block miss - 0); - // Cache hit, bytes read from cache should increase - ASSERT_EQ(props.GetCacheBytesRead(), 0); - } - - { - // SeekToFirst() accesses data block. With similar reason, we expect data - // block's cache miss. - iter->SeekToFirst(); - ASSERT_OK(iter->status()); - BlockCachePropertiesSnapshot props(options.statistics.get()); - props.AssertEqual(2, 0, 0 + 1, // data block miss - 0); - // Cache miss, Bytes read from cache should not change - ASSERT_EQ(props.GetCacheBytesRead(), 0); - } - iter.reset(); - c.ResetTableReader(); - - // -- PART 3: Open table with bloom filter enabled but not in SST file - table_options.block_cache = NewLRUCache(4096, 4); - table_options.cache_index_and_filter_blocks = false; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - TableConstructor c3(BytewiseComparator()); - std::string user_key = "k01"; - InternalKey internal_key(user_key, 0, kTypeValue); - c3.Add(internal_key.Encode().ToString(), "hello"); - ImmutableOptions ioptions3(options); - MutableCFOptions moptions3(options); - // Generate table without filter policy - c3.Finish(options, ioptions3, moptions3, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); - c3.ResetTableReader(); - - // Open table with filter policy - table_options.filter_policy.reset(NewBloomFilterPolicy(1)); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - options.statistics = CreateDBStatistics(); - ImmutableOptions ioptions4(options); - MutableCFOptions moptions4(options); - ASSERT_OK(c3.Reopen(ioptions4, moptions4)); - reader = dynamic_cast(c3.GetTableReader()); - ASSERT_FALSE(reader->TEST_FilterBlockInCache()); - PinnableSlice value; - GetContext get_context(options.comparator, nullptr, nullptr, nullptr, - GetContext::kNotFound, user_key, &value, nullptr, - nullptr, nullptr, true, nullptr, nullptr); - ASSERT_OK(reader->Get(ReadOptions(), internal_key.Encode(), &get_context, - moptions4.prefix_extractor.get())); - ASSERT_STREQ(value.data(), "hello"); - BlockCachePropertiesSnapshot props(options.statistics.get()); - props.AssertFilterBlockStat(0, 0); - c3.ResetTableReader(); -} - -void ValidateBlockSizeDeviation(int value, int expected) { - BlockBasedTableOptions table_options; - table_options.block_size_deviation = value; - BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options); - - const BlockBasedTableOptions* normalized_table_options = - factory->GetOptions(); - ASSERT_EQ(normalized_table_options->block_size_deviation, expected); - - delete factory; -} - -void ValidateBlockRestartInterval(int value, int expected) { - BlockBasedTableOptions table_options; - table_options.block_restart_interval = value; - BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options); - - const BlockBasedTableOptions* normalized_table_options = - factory->GetOptions(); - ASSERT_EQ(normalized_table_options->block_restart_interval, expected); - - delete factory; -} - -TEST_P(BlockBasedTableTest, InvalidOptions) { - // invalid values for block_size_deviation (<0 or >100) are silently set to 0 - ValidateBlockSizeDeviation(-10, 0); - ValidateBlockSizeDeviation(-1, 0); - ValidateBlockSizeDeviation(0, 0); - ValidateBlockSizeDeviation(1, 1); - ValidateBlockSizeDeviation(99, 99); - ValidateBlockSizeDeviation(100, 100); - ValidateBlockSizeDeviation(101, 0); - ValidateBlockSizeDeviation(1000, 0); - - // invalid values for block_restart_interval (<1) are silently set to 1 - ValidateBlockRestartInterval(-10, 1); - ValidateBlockRestartInterval(-1, 1); - ValidateBlockRestartInterval(0, 1); - ValidateBlockRestartInterval(1, 1); - ValidateBlockRestartInterval(2, 2); - ValidateBlockRestartInterval(1000, 1000); -} - -TEST_P(BlockBasedTableTest, BlockReadCountTest) { - // bloom_filter_type = 1 -- full filter using use_block_based_builder=false - // bloom_filter_type = 2 -- full filter using use_block_based_builder=true - // because of API change to hide block-based filter - for (int bloom_filter_type = 1; bloom_filter_type <= 2; ++bloom_filter_type) { - for (int index_and_filter_in_cache = 0; index_and_filter_in_cache < 2; - ++index_and_filter_in_cache) { - Options options; - options.create_if_missing = true; - - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.block_cache = NewLRUCache(1, 0); - table_options.cache_index_and_filter_blocks = index_and_filter_in_cache; - table_options.filter_policy.reset( - NewBloomFilterPolicy(10, bloom_filter_type == 2)); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - std::vector keys; - stl_wrappers::KVMap kvmap; - - TableConstructor c(BytewiseComparator()); - std::string user_key = "k04"; - InternalKey internal_key(user_key, 0, kTypeValue); - std::string encoded_key = internal_key.Encode().ToString(); - c.Add(encoded_key, "hello"); - ImmutableOptions ioptions(options); - MutableCFOptions moptions(options); - // Generate table with filter policy - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); - auto reader = c.GetTableReader(); - PinnableSlice value; - { - GetContext get_context(options.comparator, nullptr, nullptr, nullptr, - GetContext::kNotFound, user_key, &value, nullptr, - nullptr, nullptr, true, nullptr, nullptr); - get_perf_context()->Reset(); - ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context, - moptions.prefix_extractor.get())); - if (index_and_filter_in_cache) { - // data, index and filter block - ASSERT_EQ(get_perf_context()->block_read_count, 3); - ASSERT_EQ(get_perf_context()->index_block_read_count, 1); - ASSERT_EQ(get_perf_context()->filter_block_read_count, 1); - } else { - // just the data block - ASSERT_EQ(get_perf_context()->block_read_count, 1); - } - ASSERT_EQ(get_context.State(), GetContext::kFound); - ASSERT_STREQ(value.data(), "hello"); - } - - // Get non-existing key - user_key = "does-not-exist"; - internal_key = InternalKey(user_key, 0, kTypeValue); - encoded_key = internal_key.Encode().ToString(); - - value.Reset(); - { - GetContext get_context(options.comparator, nullptr, nullptr, nullptr, - GetContext::kNotFound, user_key, &value, nullptr, - nullptr, nullptr, true, nullptr, nullptr); - get_perf_context()->Reset(); - ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context, - moptions.prefix_extractor.get())); - ASSERT_EQ(get_context.State(), GetContext::kNotFound); - } - - if (index_and_filter_in_cache) { - if (bloom_filter_type == 0) { - // with block-based, we read index and then the filter - ASSERT_EQ(get_perf_context()->block_read_count, 2); - ASSERT_EQ(get_perf_context()->index_block_read_count, 1); - ASSERT_EQ(get_perf_context()->filter_block_read_count, 1); - } else { - // with full-filter, we read filter first and then we stop - ASSERT_EQ(get_perf_context()->block_read_count, 1); - ASSERT_EQ(get_perf_context()->filter_block_read_count, 1); - } - } else { - // filter is already in memory and it figures out that the key doesn't - // exist - ASSERT_EQ(get_perf_context()->block_read_count, 0); - } - } - } -} - -TEST_P(BlockBasedTableTest, BlockCacheLeak) { - // Check that when we reopen a table we don't lose access to blocks already - // in the cache. This test checks whether the Table actually makes use of the - // unique ID from the file. - - Options opt; - std::unique_ptr ikc; - ikc.reset(new test::PlainInternalKeyComparator(opt.comparator)); - opt.compression = kNoCompression; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.block_size = 1024; - // big enough so we don't ever lose cached values. - table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4); - opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); - c.Add("k01", "hello"); - c.Add("k02", "hello2"); - c.Add("k03", std::string(10000, 'x')); - c.Add("k04", std::string(200000, 'x')); - c.Add("k05", std::string(300000, 'x')); - c.Add("k06", "hello3"); - c.Add("k07", std::string(100000, 'x')); - std::vector keys; - stl_wrappers::KVMap kvmap; - const ImmutableOptions ioptions(opt); - const MutableCFOptions moptions(opt); - c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap); - - std::unique_ptr iter( - c.NewIterator(moptions.prefix_extractor.get())); - iter->SeekToFirst(); - while (iter->Valid()) { - iter->key(); - iter->value(); - iter->Next(); - } - ASSERT_OK(iter->status()); - iter.reset(); - - const ImmutableOptions ioptions1(opt); - const MutableCFOptions moptions1(opt); - ASSERT_OK(c.Reopen(ioptions1, moptions1)); - auto table_reader = dynamic_cast(c.GetTableReader()); - for (const std::string& key : keys) { - InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); - ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode())); - } - c.ResetTableReader(); - - // rerun with different block cache - table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4); - opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); - const ImmutableOptions ioptions2(opt); - const MutableCFOptions moptions2(opt); - ASSERT_OK(c.Reopen(ioptions2, moptions2)); - table_reader = dynamic_cast(c.GetTableReader()); - for (const std::string& key : keys) { - InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); - ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode())); - } - c.ResetTableReader(); -} - -TEST_P(BlockBasedTableTest, MemoryAllocator) { - auto default_memory_allocator = std::make_shared(); - auto custom_memory_allocator = - std::make_shared(default_memory_allocator); - { - Options opt; - std::unique_ptr ikc; - ikc.reset(new test::PlainInternalKeyComparator(opt.comparator)); - opt.compression = kNoCompression; - BlockBasedTableOptions table_options; - table_options.block_size = 1024; - LRUCacheOptions lruOptions; - lruOptions.memory_allocator = custom_memory_allocator; - lruOptions.capacity = 16 * 1024 * 1024; - lruOptions.num_shard_bits = 4; - table_options.block_cache = NewLRUCache(std::move(lruOptions)); - opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - TableConstructor c(BytewiseComparator(), - true /* convert_to_internal_key_ */); - c.Add("k01", "hello"); - c.Add("k02", "hello2"); - c.Add("k03", std::string(10000, 'x')); - c.Add("k04", std::string(200000, 'x')); - c.Add("k05", std::string(300000, 'x')); - c.Add("k06", "hello3"); - c.Add("k07", std::string(100000, 'x')); - std::vector keys; - stl_wrappers::KVMap kvmap; - const ImmutableOptions ioptions(opt); - const MutableCFOptions moptions(opt); - c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap); - - std::unique_ptr iter( - c.NewIterator(moptions.prefix_extractor.get())); - iter->SeekToFirst(); - while (iter->Valid()) { - iter->key(); - iter->value(); - iter->Next(); - } - ASSERT_OK(iter->status()); - } - - // out of scope, block cache should have been deleted, all allocations - // deallocated - EXPECT_EQ(custom_memory_allocator->GetNumAllocations(), - custom_memory_allocator->GetNumDeallocations()); - // make sure that allocations actually happened through the cache allocator - EXPECT_GT(custom_memory_allocator->GetNumAllocations(), 0); -} - -// Test the file checksum of block based table -TEST_P(BlockBasedTableTest, NoFileChecksum) { - Options options; - ImmutableOptions ioptions(options); - MutableCFOptions moptions(options); - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - std::unique_ptr comparator( - new InternalKeyComparator(BytewiseComparator())); - int level = 0; - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - std::string column_family_name; - - FileChecksumTestHelper f(true); - f.CreateWritableFile(); - std::unique_ptr builder; - builder.reset(ioptions.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, *comparator, - &int_tbl_prop_collector_factories, - options.compression, options.compression_opts, - kUnknownColumnFamily, column_family_name, level), - f.GetFileWriter())); - ASSERT_OK(f.ResetTableBuilder(std::move(builder))); - f.AddKVtoKVMap(1000); - ASSERT_OK(f.WriteKVAndFlushTable()); - ASSERT_STREQ(f.GetFileChecksumFuncName(), kUnknownFileChecksumFuncName); - ASSERT_STREQ(f.GetFileChecksum().c_str(), kUnknownFileChecksum); -} - -TEST_P(BlockBasedTableTest, Crc32cFileChecksum) { - FileChecksumGenCrc32cFactory* file_checksum_gen_factory = - new FileChecksumGenCrc32cFactory(); - Options options; - options.file_checksum_gen_factory.reset(file_checksum_gen_factory); - ImmutableOptions ioptions(options); - MutableCFOptions moptions(options); - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - std::unique_ptr comparator( - new InternalKeyComparator(BytewiseComparator())); - int level = 0; - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - std::string column_family_name; - - FileChecksumGenContext gen_context; - gen_context.file_name = "db/tmp"; - std::unique_ptr checksum_crc32c_gen1 = - options.file_checksum_gen_factory->CreateFileChecksumGenerator( - gen_context); - FileChecksumTestHelper f(true); - f.CreateWritableFile(); - f.SetFileChecksumGenerator(checksum_crc32c_gen1.release()); - std::unique_ptr builder; - builder.reset(ioptions.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, *comparator, - &int_tbl_prop_collector_factories, - options.compression, options.compression_opts, - kUnknownColumnFamily, column_family_name, level), - f.GetFileWriter())); - ASSERT_OK(f.ResetTableBuilder(std::move(builder))); - f.AddKVtoKVMap(1000); - ASSERT_OK(f.WriteKVAndFlushTable()); - ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c"); - - std::unique_ptr checksum_crc32c_gen2 = - options.file_checksum_gen_factory->CreateFileChecksumGenerator( - gen_context); - std::string checksum; - ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum)); - ASSERT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str()); - - // Unit test the generator itself for schema stability - std::unique_ptr checksum_crc32c_gen3 = - options.file_checksum_gen_factory->CreateFileChecksumGenerator( - gen_context); - const char data[] = "here is some data"; - checksum_crc32c_gen3->Update(data, sizeof(data)); - checksum_crc32c_gen3->Finalize(); - checksum = checksum_crc32c_gen3->GetChecksum(); - ASSERT_STREQ(checksum.c_str(), "\345\245\277\110"); -} - -TEST_F(PlainTableTest, BasicPlainTableProperties) { - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 8; - plain_table_options.bloom_bits_per_key = 8; - plain_table_options.hash_table_ratio = 0; - - PlainTableFactory factory(plain_table_options); - std::unique_ptr sink(new test::StringSink()); - std::unique_ptr file_writer(new WritableFileWriter( - std::move(sink), "" /* don't care */, FileOptions())); - Options options; - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - InternalKeyComparator ikc(options.comparator); - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - std::string column_family_name; - int unknown_level = -1; - std::unique_ptr builder(factory.NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, - &int_tbl_prop_collector_factories, kNoCompression, - CompressionOptions(), kUnknownColumnFamily, - column_family_name, unknown_level), - file_writer.get())); - - for (char c = 'a'; c <= 'z'; ++c) { - std::string key(8, c); - key.append("\1 "); // PlainTable expects internal key structure - std::string value(28, c + 42); - builder->Add(key, value); - } - ASSERT_OK(builder->Finish()); - ASSERT_OK(file_writer->Flush()); - - test::StringSink* ss = - static_cast(file_writer->writable_file()); - std::unique_ptr source( - new test::StringSource(ss->contents(), 72242, true)); - std::unique_ptr file_reader( - new RandomAccessFileReader(std::move(source), "test")); - - std::unique_ptr props; - auto s = ReadTableProperties(file_reader.get(), ss->contents().size(), - kPlainTableMagicNumber, ioptions, &props); - ASSERT_OK(s); - - ASSERT_EQ(0ul, props->index_size); - ASSERT_EQ(0ul, props->filter_size); - ASSERT_EQ(16ul * 26, props->raw_key_size); - ASSERT_EQ(28ul * 26, props->raw_value_size); - ASSERT_EQ(26ul, props->num_entries); - ASSERT_EQ(1ul, props->num_data_blocks); -} - -TEST_F(PlainTableTest, NoFileChecksum) { - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 20; - plain_table_options.bloom_bits_per_key = 8; - plain_table_options.hash_table_ratio = 0; - PlainTableFactory factory(plain_table_options); - - Options options; - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - InternalKeyComparator ikc(options.comparator); - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - std::string column_family_name; - int unknown_level = -1; - FileChecksumTestHelper f(true); - f.CreateWritableFile(); - - std::unique_ptr builder(factory.NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, - &int_tbl_prop_collector_factories, kNoCompression, - CompressionOptions(), kUnknownColumnFamily, - column_family_name, unknown_level), - f.GetFileWriter())); - ASSERT_OK(f.ResetTableBuilder(std::move(builder))); - f.AddKVtoKVMap(1000); - ASSERT_OK(f.WriteKVAndFlushTable()); - ASSERT_STREQ(f.GetFileChecksumFuncName(), kUnknownFileChecksumFuncName); - EXPECT_EQ(f.GetFileChecksum(), kUnknownFileChecksum); -} - -TEST_F(PlainTableTest, Crc32cFileChecksum) { - PlainTableOptions plain_table_options; - plain_table_options.user_key_len = 20; - plain_table_options.bloom_bits_per_key = 8; - plain_table_options.hash_table_ratio = 0; - PlainTableFactory factory(plain_table_options); - - FileChecksumGenCrc32cFactory* file_checksum_gen_factory = - new FileChecksumGenCrc32cFactory(); - Options options; - options.file_checksum_gen_factory.reset(file_checksum_gen_factory); - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - InternalKeyComparator ikc(options.comparator); - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - std::string column_family_name; - int unknown_level = -1; - - FileChecksumGenContext gen_context; - gen_context.file_name = "db/tmp"; - std::unique_ptr checksum_crc32c_gen1 = - options.file_checksum_gen_factory->CreateFileChecksumGenerator( - gen_context); - FileChecksumTestHelper f(true); - f.CreateWritableFile(); - f.SetFileChecksumGenerator(checksum_crc32c_gen1.release()); - - std::unique_ptr builder(factory.NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, - &int_tbl_prop_collector_factories, kNoCompression, - CompressionOptions(), kUnknownColumnFamily, - column_family_name, unknown_level), - f.GetFileWriter())); - ASSERT_OK(f.ResetTableBuilder(std::move(builder))); - f.AddKVtoKVMap(1000); - ASSERT_OK(f.WriteKVAndFlushTable()); - ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c"); - - std::unique_ptr checksum_crc32c_gen2 = - options.file_checksum_gen_factory->CreateFileChecksumGenerator( - gen_context); - std::string checksum; - ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum)); - EXPECT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str()); -} - - -TEST_F(GeneralTableTest, ApproximateOffsetOfPlain) { - TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); - c.Add("k01", "hello"); - c.Add("k02", "hello2"); - c.Add("k03", std::string(10000, 'x')); - c.Add("k04", std::string(200000, 'x')); - c.Add("k05", std::string(300000, 'x')); - c.Add("k06", "hello3"); - c.Add("k07", std::string(100000, 'x')); - std::vector keys; - stl_wrappers::KVMap kvmap; - Options options; - options.db_host_id = ""; - test::PlainInternalKeyComparator internal_comparator(options.comparator); - options.compression = kNoCompression; - BlockBasedTableOptions table_options; - table_options.block_size = 1024; - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, internal_comparator, - &keys, &kvmap); - - ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 10000, 11000)); - // k04 and k05 will be in two consecutive blocks, the index is - // an arbitrary slice between k04 and k05, either before or after k04a - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04a"), 10000, 211000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000)); - c.ResetTableReader(); -} - -static void DoCompressionTest(CompressionType comp) { - Random rnd(301); - TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); - std::string tmp; - c.Add("k01", "hello"); - c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); - c.Add("k03", "hello3"); - c.Add("k04", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); - std::vector keys; - stl_wrappers::KVMap kvmap; - Options options; - test::PlainInternalKeyComparator ikc(options.comparator); - options.compression = comp; - BlockBasedTableOptions table_options; - table_options.block_size = 1024; - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, ikc, &keys, &kvmap); - - ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3525)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3525)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 7075)); - c.ResetTableReader(); -} - -TEST_F(GeneralTableTest, ApproximateOffsetOfCompressed) { - std::vector compression_state; - if (!Snappy_Supported()) { - fprintf(stderr, "skipping snappy compression tests\n"); - } else { - compression_state.push_back(kSnappyCompression); - } - - if (!Zlib_Supported()) { - fprintf(stderr, "skipping zlib compression tests\n"); - } else { - compression_state.push_back(kZlibCompression); - } - - // TODO(kailiu) DoCompressionTest() doesn't work with BZip2. - /* - if (!BZip2_Supported()) { - fprintf(stderr, "skipping bzip2 compression tests\n"); - } else { - compression_state.push_back(kBZip2Compression); - } - */ - - if (!LZ4_Supported()) { - fprintf(stderr, "skipping lz4 and lz4hc compression tests\n"); - } else { - compression_state.push_back(kLZ4Compression); - compression_state.push_back(kLZ4HCCompression); - } - - if (!XPRESS_Supported()) { - fprintf(stderr, "skipping xpress and xpress compression tests\n"); - } else { - compression_state.push_back(kXpressCompression); - } - - for (auto state : compression_state) { - DoCompressionTest(state); - } -} - -TEST_F(GeneralTableTest, ApproximateKeyAnchors) { - Random rnd(301); - TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); - std::string tmp; - for (int i = 1000; i < 9000; i++) { - c.Add(std::to_string(i), rnd.RandomString(2000)); - } - std::vector keys; - stl_wrappers::KVMap kvmap; - Options options; - InternalKeyComparator ikc(options.comparator); - options.compression = kNoCompression; - BlockBasedTableOptions table_options; - table_options.block_size = 4096; - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, ikc, &keys, &kvmap); - - std::vector anchors; - ASSERT_OK(c.GetTableReader()->ApproximateKeyAnchors(ReadOptions(), anchors)); - // The target is 128 anchors. But in reality it can be slightly more or fewer. - ASSERT_GT(anchors.size(), 120); - ASSERT_LT(anchors.size(), 140); - - // We have around 8000 keys. With 128 anchors, in average 62.5 keys per - // anchor. Here we take a rough range and estimate the distance between - // anchors is between 50 and 100. - // Total data size is about 18,000,000, so each anchor range is about - // 140,625. We also take a rough range. - int prev_num = 1000; - // Non-last anchor - for (size_t i = 0; i + 1 < anchors.size(); i++) { - auto& anchor = anchors[i]; - ASSERT_GT(anchor.range_size, 100000); - ASSERT_LT(anchor.range_size, 200000); - - // Key might be shortened, so fill 0 in the end if it is the case. - std::string key_cpy = anchor.user_key; - key_cpy.append(4 - key_cpy.size(), '0'); - int num = std::stoi(key_cpy); - ASSERT_GT(num - prev_num, 50); - ASSERT_LT(num - prev_num, 100); - prev_num = num; - } - - ASSERT_EQ("8999", anchors.back().user_key); - ASSERT_LT(anchors.back().range_size, 200000); - - c.ResetTableReader(); -} - -#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) -TEST_P(ParameterizedHarnessTest, RandomizedHarnessTest) { - Random rnd(test::RandomSeed() + 5); - for (int num_entries = 0; num_entries < 2000; - num_entries += (num_entries < 50 ? 1 : 200)) { - for (int e = 0; e < num_entries; e++) { - Add(test::RandomKey(&rnd, rnd.Skewed(4)), - rnd.RandomString(rnd.Skewed(5))); - } - Test(&rnd); - } -} - -TEST_F(DBHarnessTest, RandomizedLongDB) { - Random rnd(test::RandomSeed()); - int num_entries = 100000; - for (int e = 0; e < num_entries; e++) { - std::string v; - Add(test::RandomKey(&rnd, rnd.Skewed(4)), rnd.RandomString(rnd.Skewed(5))); - } - Test(&rnd); - - // We must have created enough data to force merging - int files = 0; - for (int level = 0; level < db()->NumberLevels(); level++) { - std::string value; - char name[100]; - snprintf(name, sizeof(name), "rocksdb.num-files-at-level%d", level); - ASSERT_TRUE(db()->GetProperty(name, &value)); - files += atoi(value.c_str()); - } - ASSERT_GT(files, 0); -} -#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) - -class MemTableTest : public testing::Test { - public: - MemTableTest() { - InternalKeyComparator cmp(BytewiseComparator()); - auto table_factory = std::make_shared(); - options_.memtable_factory = table_factory; - ImmutableOptions ioptions(options_); - wb_ = new WriteBufferManager(options_.db_write_buffer_size); - memtable_ = new MemTable(cmp, ioptions, MutableCFOptions(options_), wb_, - kMaxSequenceNumber, 0 /* column_family_id */); - memtable_->Ref(); - } - - ~MemTableTest() { - delete memtable_->Unref(); - delete wb_; - } - - MemTable* GetMemTable() { return memtable_; } - - private: - MemTable* memtable_; - Options options_; - WriteBufferManager* wb_; -}; - -TEST_F(MemTableTest, Simple) { - WriteBatch batch; - WriteBatchInternal::SetSequence(&batch, 100); - ASSERT_OK(batch.Put(std::string("k1"), std::string("v1"))); - ASSERT_OK(batch.Put(std::string("k2"), std::string("v2"))); - ASSERT_OK(batch.Put(std::string("k3"), std::string("v3"))); - ASSERT_OK(batch.Put(std::string("largekey"), std::string("vlarge"))); - ASSERT_OK(batch.DeleteRange(std::string("chi"), std::string("xigua"))); - ASSERT_OK(batch.DeleteRange(std::string("begin"), std::string("end"))); - ColumnFamilyMemTablesDefault cf_mems_default(GetMemTable()); - ASSERT_TRUE( - WriteBatchInternal::InsertInto(&batch, &cf_mems_default, nullptr, nullptr) - .ok()); - - for (int i = 0; i < 2; ++i) { - Arena arena; - ScopedArenaIterator arena_iter_guard; - std::unique_ptr iter_guard; - InternalIterator* iter; - if (i == 0) { - iter = GetMemTable()->NewIterator(ReadOptions(), &arena); - arena_iter_guard.set(iter); - } else { - iter = GetMemTable()->NewRangeTombstoneIterator( - ReadOptions(), kMaxSequenceNumber /* read_seq */, - false /* immutable_memtable */); - iter_guard.reset(iter); - } - if (iter == nullptr) { - continue; - } - iter->SeekToFirst(); - while (iter->Valid()) { - fprintf(stderr, "key: '%s' -> '%s'\n", iter->key().ToString().c_str(), - iter->value().ToString().c_str()); - iter->Next(); - } - } -} - -// Test the empty key -TEST_P(ParameterizedHarnessTest, SimpleEmptyKey) { - Random rnd(test::RandomSeed() + 1); - Add("", "v"); - Test(&rnd); -} - -TEST_P(ParameterizedHarnessTest, SimpleSingle) { - Random rnd(test::RandomSeed() + 2); - Add("abc", "v"); - Test(&rnd); -} - -TEST_P(ParameterizedHarnessTest, SimpleMulti) { - Random rnd(test::RandomSeed() + 3); - Add("abc", "v"); - Add("abcd", "v"); - Add("ac", "v2"); - Test(&rnd); -} - -TEST_P(ParameterizedHarnessTest, SimpleSpecialKey) { - Random rnd(test::RandomSeed() + 4); - Add("\xff\xff", "v3"); - Test(&rnd); -} - -TEST(TableTest, FooterTests) { - Random* r = Random::GetTLSInstance(); - uint64_t data_size = (uint64_t{1} << r->Uniform(40)) + r->Uniform(100); - uint64_t index_size = r->Uniform(1000000000); - uint64_t metaindex_size = r->Uniform(1000000); - // 5 == block trailer size - BlockHandle index(data_size + 5, index_size); - BlockHandle meta_index(data_size + index_size + 2 * 5, metaindex_size); - uint64_t footer_offset = data_size + metaindex_size + index_size + 3 * 5; - { - // legacy block based - FooterBuilder footer; - footer.Build(kBlockBasedTableMagicNumber, /* format_version */ 0, - footer_offset, kCRC32c, meta_index, index); - Footer decoded_footer; - ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); - ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); - ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c); - ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); - ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); - ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); - ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); - ASSERT_EQ(decoded_footer.format_version(), 0U); - ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U); - // Ensure serialized with legacy magic - ASSERT_EQ( - DecodeFixed64(footer.GetSlice().data() + footer.GetSlice().size() - 8), - kLegacyBlockBasedTableMagicNumber); - } - // block based, various checksums, various versions - for (auto t : GetSupportedChecksums()) { - for (uint32_t fv = 1; IsSupportedFormatVersion(fv); ++fv) { - FooterBuilder footer; - footer.Build(kBlockBasedTableMagicNumber, fv, footer_offset, t, - meta_index, index); - Footer decoded_footer; - ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); - ASSERT_EQ(decoded_footer.table_magic_number(), - kBlockBasedTableMagicNumber); - ASSERT_EQ(decoded_footer.checksum_type(), t); - ASSERT_EQ(decoded_footer.metaindex_handle().offset(), - meta_index.offset()); - ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); - ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); - ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); - ASSERT_EQ(decoded_footer.format_version(), fv); - ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U); - } - } - - { - // legacy plain table - FooterBuilder footer; - footer.Build(kPlainTableMagicNumber, /* format_version */ 0, footer_offset, - kNoChecksum, meta_index); - Footer decoded_footer; - ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); - ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber); - ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c); - ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); - ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); - ASSERT_EQ(decoded_footer.index_handle().offset(), 0U); - ASSERT_EQ(decoded_footer.index_handle().size(), 0U); - ASSERT_EQ(decoded_footer.format_version(), 0U); - ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U); - // Ensure serialized with legacy magic - ASSERT_EQ( - DecodeFixed64(footer.GetSlice().data() + footer.GetSlice().size() - 8), - kLegacyPlainTableMagicNumber); - } - { - // xxhash plain table (not currently used) - FooterBuilder footer; - footer.Build(kPlainTableMagicNumber, /* format_version */ 1, footer_offset, - kxxHash, meta_index); - Footer decoded_footer; - ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); - ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber); - ASSERT_EQ(decoded_footer.checksum_type(), kxxHash); - ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); - ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); - ASSERT_EQ(decoded_footer.index_handle().offset(), 0U); - ASSERT_EQ(decoded_footer.index_handle().size(), 0U); - ASSERT_EQ(decoded_footer.format_version(), 1U); - ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U); - } -} - -class IndexBlockRestartIntervalTest - : public TableTest, - public ::testing::WithParamInterface> { - public: - static std::vector> GetRestartValues() { - return {{-1, false}, {0, false}, {1, false}, {8, false}, - {16, false}, {32, false}, {-1, true}, {0, true}, - {1, true}, {8, true}, {16, true}, {32, true}}; - } -}; - -INSTANTIATE_TEST_CASE_P( - IndexBlockRestartIntervalTest, IndexBlockRestartIntervalTest, - ::testing::ValuesIn(IndexBlockRestartIntervalTest::GetRestartValues())); - -TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) { - const int kKeysInTable = 10000; - const int kKeySize = 100; - const int kValSize = 500; - - const int index_block_restart_interval = std::get<0>(GetParam()); - const bool value_delta_encoding = std::get<1>(GetParam()); - - Options options; - BlockBasedTableOptions table_options; - table_options.block_size = 64; // small block size to get big index block - table_options.index_block_restart_interval = index_block_restart_interval; - if (value_delta_encoding) { - table_options.format_version = 4; - } else { - table_options.format_version = 3; - } - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - - TableConstructor c(BytewiseComparator()); - static Random rnd(301); - for (int i = 0; i < kKeysInTable; i++) { - InternalKey k(rnd.RandomString(kKeySize), 0, kTypeValue); - c.Add(k.Encode().ToString(), rnd.RandomString(kValSize)); - } - - std::vector keys; - stl_wrappers::KVMap kvmap; - std::unique_ptr comparator( - new InternalKeyComparator(BytewiseComparator())); - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_options, *comparator, &keys, - &kvmap); - auto reader = c.GetTableReader(); - - ReadOptions read_options; - std::unique_ptr db_iter(reader->NewIterator( - read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized)); - - // Test point lookup - for (auto& kv : kvmap) { - db_iter->Seek(kv.first); - - ASSERT_TRUE(db_iter->Valid()); - ASSERT_OK(db_iter->status()); - ASSERT_EQ(db_iter->key(), kv.first); - ASSERT_EQ(db_iter->value(), kv.second); - } - - // Test iterating - auto kv_iter = kvmap.begin(); - for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { - ASSERT_EQ(db_iter->key(), kv_iter->first); - ASSERT_EQ(db_iter->value(), kv_iter->second); - kv_iter++; - } - ASSERT_EQ(kv_iter, kvmap.end()); - c.ResetTableReader(); -} - -class PrefixTest : public testing::Test { - public: - PrefixTest() : testing::Test() {} - ~PrefixTest() override {} -}; - -namespace { -// A simple PrefixExtractor that only works for test PrefixAndWholeKeyTest -class TestPrefixExtractor : public ROCKSDB_NAMESPACE::SliceTransform { - public: - ~TestPrefixExtractor() override{}; - const char* Name() const override { return "TestPrefixExtractor"; } - - ROCKSDB_NAMESPACE::Slice Transform( - const ROCKSDB_NAMESPACE::Slice& src) const override { - assert(IsValid(src)); - return ROCKSDB_NAMESPACE::Slice(src.data(), 3); - } - - bool InDomain(const ROCKSDB_NAMESPACE::Slice& src) const override { - return IsValid(src); - } - - bool InRange(const ROCKSDB_NAMESPACE::Slice& /*dst*/) const override { - return true; - } - - bool IsValid(const ROCKSDB_NAMESPACE::Slice& src) const { - if (src.size() != 4) { - return false; - } - if (src[0] != '[') { - return false; - } - if (src[1] < '0' || src[1] > '9') { - return false; - } - if (src[2] != ']') { - return false; - } - if (src[3] < '0' || src[3] > '9') { - return false; - } - return true; - } -}; -} // namespace - -TEST_F(PrefixTest, PrefixAndWholeKeyTest) { - ROCKSDB_NAMESPACE::Options options; - options.compaction_style = ROCKSDB_NAMESPACE::kCompactionStyleUniversal; - options.num_levels = 20; - options.create_if_missing = true; - options.optimize_filters_for_hits = false; - options.target_file_size_base = 268435456; - options.prefix_extractor = std::make_shared(); - ROCKSDB_NAMESPACE::BlockBasedTableOptions bbto; - bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); - bbto.block_size = 262144; - bbto.whole_key_filtering = true; - - const std::string kDBPath = test::PerThreadDBPath("table_prefix_test"); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - ASSERT_OK(DestroyDB(kDBPath, options)); - ROCKSDB_NAMESPACE::DB* db; - ASSERT_OK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db)); - - // Create a bunch of keys with 10 filters. - for (int i = 0; i < 10; i++) { - std::string prefix = "[" + std::to_string(i) + "]"; - for (int j = 0; j < 10; j++) { - std::string key = prefix + std::to_string(j); - ASSERT_OK(db->Put(ROCKSDB_NAMESPACE::WriteOptions(), key, "1")); - } - } - - // Trigger compaction. - ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - delete db; - // In the second round, turn whole_key_filtering off and expect - // rocksdb still works. -} - -/* - * Disable TableWithGlobalSeqno since RocksDB does not store global_seqno in - * the SST file any more. Instead, RocksDB deduces global_seqno from the - * MANIFEST while reading from an SST. Therefore, it's not possible to test the - * functionality of global_seqno in a single, isolated unit test without the - * involvement of Version, VersionSet, etc. - */ -TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) { - BlockBasedTableOptions bbto = GetBlockBasedTableOptions(); - test::StringSink* sink = new test::StringSink(); - std::unique_ptr holder(sink); - std::unique_ptr file_writer(new WritableFileWriter( - std::move(holder), "" /* don't care */, FileOptions())); - Options options; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - InternalKeyComparator ikc(options.comparator); - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - int_tbl_prop_collector_factories.emplace_back( - new SstFileWriterPropertiesCollectorFactory(2 /* version */, - 0 /* global_seqno*/)); - std::string column_family_name; - std::unique_ptr builder(options.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, - &int_tbl_prop_collector_factories, kNoCompression, - CompressionOptions(), kUnknownColumnFamily, - column_family_name, -1), - file_writer.get())); - - for (char c = 'a'; c <= 'z'; ++c) { - std::string key(8, c); - std::string value = key; - InternalKey ik(key, 0, kTypeValue); - - builder->Add(ik.Encode(), value); - } - ASSERT_OK(builder->Finish()); - ASSERT_OK(file_writer->Flush()); - - test::RandomRWStringSink ss_rw(sink); - uint32_t version; - uint64_t global_seqno; - uint64_t global_seqno_offset; - - // Helper function to get version, global_seqno, global_seqno_offset - std::function GetVersionAndGlobalSeqno = [&]() { - std::unique_ptr source( - new test::StringSource(ss_rw.contents(), 73342, true)); - std::unique_ptr file_reader( - new RandomAccessFileReader(std::move(source), "")); - - std::unique_ptr props; - ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(), - kBlockBasedTableMagicNumber, ioptions, - &props)); - - UserCollectedProperties user_props = props->user_collected_properties; - version = DecodeFixed32( - user_props[ExternalSstFilePropertyNames::kVersion].c_str()); - global_seqno = DecodeFixed64( - user_props[ExternalSstFilePropertyNames::kGlobalSeqno].c_str()); - global_seqno_offset = props->external_sst_file_global_seqno_offset; - }; - - // Helper function to update the value of the global seqno in the file - std::function SetGlobalSeqno = [&](uint64_t val) { - std::string new_global_seqno; - PutFixed64(&new_global_seqno, val); - - ASSERT_OK(ss_rw.Write(global_seqno_offset, new_global_seqno, IOOptions(), - nullptr)); - }; - - // Helper function to get the contents of the table InternalIterator - std::unique_ptr table_reader; - const ReadOptions read_options; - std::function GetTableInternalIter = [&]() { - std::unique_ptr source( - new test::StringSource(ss_rw.contents(), 73342, true)); - std::unique_ptr file_reader( - new RandomAccessFileReader(std::move(source), "")); - - options.table_factory->NewTableReader( - TableReaderOptions(ioptions, moptions.prefix_extractor, EnvOptions(), - ikc), - std::move(file_reader), ss_rw.contents().size(), &table_reader); - - return table_reader->NewIterator( - read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized); - }; - - GetVersionAndGlobalSeqno(); - ASSERT_EQ(2u, version); - ASSERT_EQ(0u, global_seqno); - - InternalIterator* iter = GetTableInternalIter(); - char current_c = 'a'; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ParsedInternalKey pik; - ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */)); - - ASSERT_EQ(pik.type, ValueType::kTypeValue); - ASSERT_EQ(pik.sequence, 0); - ASSERT_EQ(pik.user_key, iter->value()); - ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c)); - current_c++; - } - ASSERT_EQ(current_c, 'z' + 1); - delete iter; - - // Update global sequence number to 10 - SetGlobalSeqno(10); - GetVersionAndGlobalSeqno(); - ASSERT_EQ(2u, version); - ASSERT_EQ(10u, global_seqno); - - iter = GetTableInternalIter(); - current_c = 'a'; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ParsedInternalKey pik; - ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */)); - - ASSERT_EQ(pik.type, ValueType::kTypeValue); - ASSERT_EQ(pik.sequence, 10); - ASSERT_EQ(pik.user_key, iter->value()); - ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c)); - current_c++; - } - ASSERT_EQ(current_c, 'z' + 1); - - // Verify Seek - for (char c = 'a'; c <= 'z'; c++) { - std::string k = std::string(8, c); - InternalKey ik(k, 10, kValueTypeForSeek); - iter->Seek(ik.Encode()); - ASSERT_TRUE(iter->Valid()); - - ParsedInternalKey pik; - ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */)); - - ASSERT_EQ(pik.type, ValueType::kTypeValue); - ASSERT_EQ(pik.sequence, 10); - ASSERT_EQ(pik.user_key.ToString(), k); - ASSERT_EQ(iter->value().ToString(), k); - } - delete iter; - - // Update global sequence number to 3 - SetGlobalSeqno(3); - GetVersionAndGlobalSeqno(); - ASSERT_EQ(2u, version); - ASSERT_EQ(3u, global_seqno); - - iter = GetTableInternalIter(); - current_c = 'a'; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ParsedInternalKey pik; - ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */)); - - ASSERT_EQ(pik.type, ValueType::kTypeValue); - ASSERT_EQ(pik.sequence, 3); - ASSERT_EQ(pik.user_key, iter->value()); - ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c)); - current_c++; - } - ASSERT_EQ(current_c, 'z' + 1); - - // Verify Seek - for (char c = 'a'; c <= 'z'; c++) { - std::string k = std::string(8, c); - // seqno=4 is less than 3 so we still should get our key - InternalKey ik(k, 4, kValueTypeForSeek); - iter->Seek(ik.Encode()); - ASSERT_TRUE(iter->Valid()); - - ParsedInternalKey pik; - ASSERT_OK(ParseInternalKey(iter->key(), &pik, true /* log_err_key */)); - - ASSERT_EQ(pik.type, ValueType::kTypeValue); - ASSERT_EQ(pik.sequence, 3); - ASSERT_EQ(pik.user_key.ToString(), k); - ASSERT_EQ(iter->value().ToString(), k); - } - - delete iter; -} - -TEST_P(BlockBasedTableTest, BlockAlignTest) { - BlockBasedTableOptions bbto = GetBlockBasedTableOptions(); - bbto.block_align = true; - test::StringSink* sink = new test::StringSink(); - std::unique_ptr holder(sink); - std::unique_ptr file_writer(new WritableFileWriter( - std::move(holder), "" /* don't care */, FileOptions())); - Options options; - options.compression = kNoCompression; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - InternalKeyComparator ikc(options.comparator); - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - std::string column_family_name; - std::unique_ptr builder(options.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, - &int_tbl_prop_collector_factories, kNoCompression, - CompressionOptions(), kUnknownColumnFamily, - column_family_name, -1), - file_writer.get())); - - for (int i = 1; i <= 10000; ++i) { - std::ostringstream ostr; - ostr << std::setfill('0') << std::setw(5) << i; - std::string key = ostr.str(); - std::string value = "val"; - InternalKey ik(key, 0, kTypeValue); - - builder->Add(ik.Encode(), value); - } - ASSERT_OK(builder->Finish()); - ASSERT_OK(file_writer->Flush()); - - std::unique_ptr source( - new test::StringSource(sink->contents(), 73342, false)); - std::unique_ptr file_reader( - new RandomAccessFileReader(std::move(source), "test")); - // Helper function to get version, global_seqno, global_seqno_offset - std::function VerifyBlockAlignment = [&]() { - std::unique_ptr props; - ASSERT_OK(ReadTableProperties(file_reader.get(), sink->contents().size(), - kBlockBasedTableMagicNumber, ioptions, - &props)); - - uint64_t data_block_size = props->data_size / props->num_data_blocks; - ASSERT_EQ(data_block_size, 4096); - ASSERT_EQ(props->data_size, data_block_size * props->num_data_blocks); - }; - - VerifyBlockAlignment(); - - // The below block of code verifies that we can read back the keys. Set - // block_align to false when creating the reader to ensure we can flip between - // the two modes without any issues - std::unique_ptr table_reader; - bbto.block_align = false; - Options options2; - options2.table_factory.reset(NewBlockBasedTableFactory(bbto)); - ImmutableOptions ioptions2(options2); - const MutableCFOptions moptions2(options2); - - ASSERT_OK(ioptions.table_factory->NewTableReader( - TableReaderOptions(ioptions2, moptions2.prefix_extractor, EnvOptions(), - GetPlainInternalComparator(options2.comparator)), - std::move(file_reader), sink->contents().size(), &table_reader)); - - ReadOptions read_options; - std::unique_ptr db_iter(table_reader->NewIterator( - read_options, moptions2.prefix_extractor.get(), /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized)); - - int expected_key = 1; - for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { - std::ostringstream ostr; - ostr << std::setfill('0') << std::setw(5) << expected_key++; - std::string key = ostr.str(); - std::string value = "val"; - - ASSERT_OK(db_iter->status()); - ASSERT_EQ(ExtractUserKey(db_iter->key()).ToString(), key); - ASSERT_EQ(db_iter->value().ToString(), value); - } - expected_key--; - ASSERT_EQ(expected_key, 10000); - table_reader.reset(); -} - -TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { - BlockBasedTableOptions bbto = GetBlockBasedTableOptions(); - bbto.block_align = true; - test::StringSink* sink = new test::StringSink(); - std::unique_ptr holder(sink); - std::unique_ptr file_writer(new WritableFileWriter( - std::move(holder), "" /* don't care */, FileOptions())); - - Options options; - options.compression = kNoCompression; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - InternalKeyComparator ikc(options.comparator); - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - std::string column_family_name; - - std::unique_ptr builder(options.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, - &int_tbl_prop_collector_factories, kNoCompression, - CompressionOptions(), kUnknownColumnFamily, - column_family_name, -1), - file_writer.get())); - - for (int i = 1; i <= 10000; ++i) { - std::ostringstream ostr; - ostr << std::setfill('0') << std::setw(5) << i; - std::string key = ostr.str(); - std::string value = "val"; - InternalKey ik(key, 0, kTypeValue); - - builder->Add(ik.Encode(), value); - } - ASSERT_OK(builder->Finish()); - ASSERT_OK(file_writer->Flush()); - - std::unique_ptr source( - new test::StringSource(sink->contents(), 73342, true)); - std::unique_ptr file_reader( - new RandomAccessFileReader(std::move(source), "test")); - - { - RandomAccessFileReader* file = file_reader.get(); - uint64_t file_size = sink->contents().size(); - - Footer footer; - IOOptions opts; - ASSERT_OK(ReadFooterFromFile(opts, file, *FileSystem::Default(), - nullptr /* prefetch_buffer */, file_size, - &footer, kBlockBasedTableMagicNumber)); - - auto BlockFetchHelper = [&](const BlockHandle& handle, BlockType block_type, - BlockContents* contents) { - ReadOptions read_options; - read_options.verify_checksums = false; - PersistentCacheOptions cache_options; - - BlockFetcher block_fetcher( - file, nullptr /* prefetch_buffer */, footer, read_options, handle, - contents, ioptions, false /* decompress */, - false /*maybe_compressed*/, block_type, - UncompressionDict::GetEmptyDict(), cache_options); - - ASSERT_OK(block_fetcher.ReadBlockContents()); - }; - - // -- Read metaindex block - auto metaindex_handle = footer.metaindex_handle(); - BlockContents metaindex_contents; - - BlockFetchHelper(metaindex_handle, BlockType::kMetaIndex, - &metaindex_contents); - Block metaindex_block(std::move(metaindex_contents)); - - std::unique_ptr meta_iter(metaindex_block.NewDataIterator( - BytewiseComparator(), kDisableGlobalSequenceNumber)); - - // -- Read properties block - BlockHandle properties_handle; - ASSERT_OK(FindOptionalMetaBlock(meta_iter.get(), kPropertiesBlockName, - &properties_handle)); - ASSERT_FALSE(properties_handle.IsNull()); - BlockContents properties_contents; - BlockFetchHelper(properties_handle, BlockType::kProperties, - &properties_contents); - Block properties_block(std::move(properties_contents)); - - ASSERT_EQ(properties_block.NumRestarts(), 1u); - } -} - -TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) { - // The properties meta-block should come at the end since we always need to - // read it when opening a file, unlike index/filter/other meta-blocks, which - // are sometimes read depending on the user's configuration. This ordering - // allows us to do a small readahead on the end of the file to read properties - // and meta-index blocks with one I/O. - TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); - c.Add("a1", "val1"); - c.Add("b2", "val2"); - c.Add("c3", "val3"); - c.Add("d4", "val4"); - c.Add("e5", "val5"); - c.Add("f6", "val6"); - c.Add("g7", "val7"); - c.Add("h8", "val8"); - c.Add("j9", "val9"); - - // write an SST file - Options options; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.filter_policy.reset(NewBloomFilterPolicy( - 8 /* bits_per_key */, false /* use_block_based_filter */)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - ImmutableOptions ioptions(options); - MutableCFOptions moptions(options); - std::vector keys; - stl_wrappers::KVMap kvmap; - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); - - // get file reader - test::StringSink* table_sink = c.TEST_GetSink(); - std::unique_ptr source(new test::StringSource( - table_sink->contents(), 0 /* unique_id */, false /* allow_mmap_reads */)); - - std::unique_ptr table_reader( - new RandomAccessFileReader(std::move(source), "test")); - size_t table_size = table_sink->contents().size(); - - // read footer - Footer footer; - IOOptions opts; - ASSERT_OK(ReadFooterFromFile(opts, table_reader.get(), *FileSystem::Default(), - nullptr /* prefetch_buffer */, table_size, - &footer, kBlockBasedTableMagicNumber)); - - // read metaindex - auto metaindex_handle = footer.metaindex_handle(); - BlockContents metaindex_contents; - PersistentCacheOptions pcache_opts; - BlockFetcher block_fetcher( - table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(), - metaindex_handle, &metaindex_contents, ioptions, false /* decompress */, - false /*maybe_compressed*/, BlockType::kMetaIndex, - UncompressionDict::GetEmptyDict(), pcache_opts, - nullptr /*memory_allocator*/); - ASSERT_OK(block_fetcher.ReadBlockContents()); - Block metaindex_block(std::move(metaindex_contents)); - - // verify properties block comes last - std::unique_ptr metaindex_iter{ - metaindex_block.NewMetaIterator()}; - uint64_t max_offset = 0; - std::string key_at_max_offset; - for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid(); - metaindex_iter->Next()) { - BlockHandle handle; - Slice value = metaindex_iter->value(); - ASSERT_OK(handle.DecodeFrom(&value)); - if (handle.offset() > max_offset) { - max_offset = handle.offset(); - key_at_max_offset = metaindex_iter->key().ToString(); - } - } - ASSERT_EQ(kPropertiesBlockName, key_at_max_offset); - // index handle is stored in footer rather than metaindex block, so need - // separate logic to verify it comes before properties block. - ASSERT_GT(max_offset, footer.index_handle().offset()); - c.ResetTableReader(); -} - -TEST_P(BlockBasedTableTest, SeekMetaBlocks) { - TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); - c.Add("foo_a1", "val1"); - c.Add("foo_b2", "val2"); - c.Add("foo_c3", "val3"); - c.Add("foo_d4", "val4"); - c.Add("foo_e5", "val5"); - c.Add("foo_f6", "val6"); - c.Add("foo_g7", "val7"); - c.Add("foo_h8", "val8"); - c.Add("foo_j9", "val9"); - - // write an SST file - Options options; - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.index_type = BlockBasedTableOptions::kHashSearch; - table_options.filter_policy.reset(NewBloomFilterPolicy( - 8 /* bits_per_key */, false /* use_block_based_filter */)); - options.prefix_extractor.reset(NewFixedPrefixTransform(4)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - ImmutableOptions ioptions(options); - MutableCFOptions moptions(options); - std::vector keys; - stl_wrappers::KVMap kvmap; - c.Finish(options, ioptions, moptions, table_options, - GetPlainInternalComparator(options.comparator), &keys, &kvmap); - - // get file reader - test::StringSink* table_sink = c.TEST_GetSink(); - std::unique_ptr source(new test::StringSource( - table_sink->contents(), 0 /* unique_id */, false /* allow_mmap_reads */)); - - std::unique_ptr table_reader( - new RandomAccessFileReader(std::move(source), "test")); - size_t table_size = table_sink->contents().size(); - - // read footer - Footer footer; - IOOptions opts; - ASSERT_OK(ReadFooterFromFile(opts, table_reader.get(), *FileSystem::Default(), - nullptr /* prefetch_buffer */, table_size, - &footer, kBlockBasedTableMagicNumber)); - - // read metaindex - auto metaindex_handle = footer.metaindex_handle(); - BlockContents metaindex_contents; - PersistentCacheOptions pcache_opts; - BlockFetcher block_fetcher( - table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(), - metaindex_handle, &metaindex_contents, ioptions, false /* decompress */, - false /*maybe_compressed*/, BlockType::kMetaIndex, - UncompressionDict::GetEmptyDict(), pcache_opts, - nullptr /*memory_allocator*/); - ASSERT_OK(block_fetcher.ReadBlockContents()); - Block metaindex_block(std::move(metaindex_contents)); - - // verify properties block comes last - std::unique_ptr metaindex_iter( - metaindex_block.NewMetaIterator()); - bool has_hash_prefixes = false; - bool has_hash_metadata = false; - for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid(); - metaindex_iter->Next()) { - if (metaindex_iter->key().ToString() == kHashIndexPrefixesBlock) { - has_hash_prefixes = true; - } else if (metaindex_iter->key().ToString() == - kHashIndexPrefixesMetadataBlock) { - has_hash_metadata = true; - } - } - if (has_hash_metadata) { - metaindex_iter->Seek(kHashIndexPrefixesMetadataBlock); - ASSERT_TRUE(metaindex_iter->Valid()); - ASSERT_EQ(kHashIndexPrefixesMetadataBlock, - metaindex_iter->key().ToString()); - } - if (has_hash_prefixes) { - metaindex_iter->Seek(kHashIndexPrefixesBlock); - ASSERT_TRUE(metaindex_iter->Valid()); - ASSERT_EQ(kHashIndexPrefixesBlock, metaindex_iter->key().ToString()); - } - c.ResetTableReader(); -} - -TEST_P(BlockBasedTableTest, BadOptions) { - ROCKSDB_NAMESPACE::Options options; - options.compression = kNoCompression; - BlockBasedTableOptions bbto = GetBlockBasedTableOptions(); - bbto.block_size = 4000; - bbto.block_align = true; - - const std::string kDBPath = - test::PerThreadDBPath("block_based_table_bad_options_test"); - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - ASSERT_OK(DestroyDB(kDBPath, options)); - ROCKSDB_NAMESPACE::DB* db; - ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db)); - - bbto.block_size = 4096; - options.compression = kSnappyCompression; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db)); -} - -TEST_F(BBTTailPrefetchTest, TestTailPrefetchStats) { - TailPrefetchStats tpstats; - ASSERT_EQ(0, tpstats.GetSuggestedPrefetchSize()); - tpstats.RecordEffectiveSize(size_t{1000}); - tpstats.RecordEffectiveSize(size_t{1005}); - tpstats.RecordEffectiveSize(size_t{1002}); - ASSERT_EQ(1005, tpstats.GetSuggestedPrefetchSize()); - - // One single super large value shouldn't influence much - tpstats.RecordEffectiveSize(size_t{1002000}); - tpstats.RecordEffectiveSize(size_t{999}); - ASSERT_LE(1005, tpstats.GetSuggestedPrefetchSize()); - ASSERT_GT(1200, tpstats.GetSuggestedPrefetchSize()); - - // Only history of 32 is kept - for (int i = 0; i < 32; i++) { - tpstats.RecordEffectiveSize(size_t{100}); - } - ASSERT_EQ(100, tpstats.GetSuggestedPrefetchSize()); - - // 16 large values and 16 small values. The result should be closer - // to the small value as the algorithm. - for (int i = 0; i < 16; i++) { - tpstats.RecordEffectiveSize(size_t{1000}); - } - tpstats.RecordEffectiveSize(size_t{10}); - tpstats.RecordEffectiveSize(size_t{20}); - for (int i = 0; i < 6; i++) { - tpstats.RecordEffectiveSize(size_t{100}); - } - ASSERT_LE(80, tpstats.GetSuggestedPrefetchSize()); - ASSERT_GT(200, tpstats.GetSuggestedPrefetchSize()); -} - -TEST_F(BBTTailPrefetchTest, FilePrefetchBufferMinOffset) { - TailPrefetchStats tpstats; - FilePrefetchBuffer buffer(0 /* readahead_size */, 0 /* max_readahead_size */, - false /* enable */, true /* track_min_offset */); - IOOptions opts; - buffer.TryReadFromCache(opts, nullptr /* reader */, 500 /* offset */, - 10 /* n */, nullptr /* result */, - nullptr /* status */, - Env::IO_TOTAL /* rate_limiter_priority */); - buffer.TryReadFromCache(opts, nullptr /* reader */, 480 /* offset */, - 10 /* n */, nullptr /* result */, - nullptr /* status */, - Env::IO_TOTAL /* rate_limiter_priority */); - buffer.TryReadFromCache(opts, nullptr /* reader */, 490 /* offset */, - 10 /* n */, nullptr /* result */, - nullptr /* status */, - Env::IO_TOTAL /* rate_limiter_priority */); - ASSERT_EQ(480, buffer.min_offset_read()); -} - -TEST_P(BlockBasedTableTest, DataBlockHashIndex) { - const int kNumKeys = 500; - const int kKeySize = 8; - const int kValSize = 40; - - BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); - table_options.data_block_index_type = - BlockBasedTableOptions::kDataBlockBinaryAndHash; - - Options options; - options.comparator = BytewiseComparator(); - - options.table_factory.reset(new BlockBasedTableFactory(table_options)); - - TableConstructor c(options.comparator); - - static Random rnd(1048); - for (int i = 0; i < kNumKeys; i++) { - // padding one "0" to mark existent keys. - std::string random_key(rnd.RandomString(kKeySize - 1) + "1"); - InternalKey k(random_key, 0, kTypeValue); - c.Add(k.Encode().ToString(), rnd.RandomString(kValSize)); - } - - std::vector keys; - stl_wrappers::KVMap kvmap; - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - const InternalKeyComparator internal_comparator(options.comparator); - c.Finish(options, ioptions, moptions, table_options, internal_comparator, - &keys, &kvmap); - - auto reader = c.GetTableReader(); - - std::unique_ptr seek_iter; - ReadOptions read_options; - seek_iter.reset(reader->NewIterator( - read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized)); - for (int i = 0; i < 2; ++i) { - ReadOptions ro; - // for every kv, we seek using two method: Get() and Seek() - // Get() will use the SuffixIndexHash in Block. For non-existent key it - // will invalidate the iterator - // Seek() will use the default BinarySeek() in Block. So for non-existent - // key it will land at the closest key that is large than target. - - // Search for existent keys - for (auto& kv : kvmap) { - if (i == 0) { - // Search using Seek() - seek_iter->Seek(kv.first); - ASSERT_OK(seek_iter->status()); - ASSERT_TRUE(seek_iter->Valid()); - ASSERT_EQ(seek_iter->key(), kv.first); - ASSERT_EQ(seek_iter->value(), kv.second); - } else { - // Search using Get() - PinnableSlice value; - std::string user_key = ExtractUserKey(kv.first).ToString(); - GetContext get_context(options.comparator, nullptr, nullptr, nullptr, - GetContext::kNotFound, user_key, &value, nullptr, - nullptr, nullptr, true, nullptr, nullptr); - ASSERT_OK(reader->Get(ro, kv.first, &get_context, - moptions.prefix_extractor.get())); - ASSERT_EQ(get_context.State(), GetContext::kFound); - ASSERT_EQ(value, Slice(kv.second)); - value.Reset(); - } - } - - // Search for non-existent keys - for (auto& kv : kvmap) { - std::string user_key = ExtractUserKey(kv.first).ToString(); - user_key.back() = '0'; // make it non-existent key - InternalKey internal_key(user_key, 0, kTypeValue); - std::string encoded_key = internal_key.Encode().ToString(); - if (i == 0) { // Search using Seek() - seek_iter->Seek(encoded_key); - ASSERT_OK(seek_iter->status()); - if (seek_iter->Valid()) { - ASSERT_TRUE(BytewiseComparator()->Compare( - user_key, ExtractUserKey(seek_iter->key())) < 0); - } - } else { // Search using Get() - PinnableSlice value; - GetContext get_context(options.comparator, nullptr, nullptr, nullptr, - GetContext::kNotFound, user_key, &value, nullptr, - nullptr, nullptr, true, nullptr, nullptr); - ASSERT_OK(reader->Get(ro, encoded_key, &get_context, - moptions.prefix_extractor.get())); - ASSERT_EQ(get_context.State(), GetContext::kNotFound); - value.Reset(); - } - } - } -} - -// BlockBasedTableIterator should invalidate itself and return -// OutOfBound()=true immediately after Seek(), to allow LevelIterator -// filter out corresponding level. -TEST_P(BlockBasedTableTest, OutOfBoundOnSeek) { - TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/); - c.Add("foo", "v1"); - std::vector keys; - stl_wrappers::KVMap kvmap; - Options options; - BlockBasedTableOptions table_opt(GetBlockBasedTableOptions()); - options.table_factory.reset(NewBlockBasedTableFactory(table_opt)); - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_opt, - GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap); - auto* reader = c.GetTableReader(); - ReadOptions read_opt; - std::string upper_bound = "bar"; - Slice upper_bound_slice(upper_bound); - read_opt.iterate_upper_bound = &upper_bound_slice; - std::unique_ptr iter; - iter.reset(new KeyConvertingIterator(reader->NewIterator( - read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized))); - iter->SeekToFirst(); - ASSERT_FALSE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound); - iter.reset(new KeyConvertingIterator(reader->NewIterator( - read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized))); - iter->Seek("foo"); - ASSERT_FALSE(iter->Valid()); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound); -} - -// BlockBasedTableIterator should invalidate itself and return -// OutOfBound()=true after Next(), if it finds current index key is no smaller -// than upper bound, unless it is pointing to the last data block. -TEST_P(BlockBasedTableTest, OutOfBoundOnNext) { - TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/); - c.Add("bar", "v"); - c.Add("foo", "v"); - std::vector keys; - stl_wrappers::KVMap kvmap; - Options options; - BlockBasedTableOptions table_opt(GetBlockBasedTableOptions()); - table_opt.flush_block_policy_factory = - std::make_shared(); - options.table_factory.reset(NewBlockBasedTableFactory(table_opt)); - const ImmutableOptions ioptions(options); - const MutableCFOptions moptions(options); - c.Finish(options, ioptions, moptions, table_opt, - GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap); - auto* reader = c.GetTableReader(); - ReadOptions read_opt; - std::string ub1 = "bar_after"; - Slice ub_slice1(ub1); - read_opt.iterate_upper_bound = &ub_slice1; - std::unique_ptr iter; - iter.reset(new KeyConvertingIterator(reader->NewIterator( - read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized))); - iter->Seek("bar"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("bar", iter->key()); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - ASSERT_TRUE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound); - std::string ub2 = "foo_after"; - Slice ub_slice2(ub2); - read_opt.iterate_upper_bound = &ub_slice2; - iter.reset(new KeyConvertingIterator(reader->NewIterator( - read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, - /*skip_filters=*/false, TableReaderCaller::kUncategorized))); - iter->Seek("foo"); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("foo", iter->key()); - iter->Next(); - ASSERT_FALSE(iter->Valid()); - ASSERT_FALSE(iter->UpperBoundCheckResult() == IterBoundCheck::kOutOfBound); -} - -class ChargeCompressionDictionaryBuildingBufferTest - : public BlockBasedTableTestBase {}; -TEST_F(ChargeCompressionDictionaryBuildingBufferTest, Basic) { - constexpr std::size_t kSizeDummyEntry = 256 * 1024; - constexpr std::size_t kMetaDataChargeOverhead = 10000; - constexpr std::size_t kCacheCapacity = 8 * 1024 * 1024; - constexpr std::size_t kMaxDictBytes = 1024; - constexpr std::size_t kMaxDictBufferBytes = 1024; - - for (CacheEntryRoleOptions::Decision - charge_compression_dictionary_building_buffer : - {CacheEntryRoleOptions::Decision::kEnabled, - CacheEntryRoleOptions::Decision::kDisabled}) { - BlockBasedTableOptions table_options; - LRUCacheOptions lo; - lo.capacity = kCacheCapacity; - lo.num_shard_bits = 0; // 2^0 shard - lo.strict_capacity_limit = true; - std::shared_ptr cache(NewLRUCache(lo)); - table_options.block_cache = cache; - table_options.flush_block_policy_factory = - std::make_shared(); - table_options.cache_usage_options.options_overrides.insert( - {CacheEntryRole::kCompressionDictionaryBuildingBuffer, - {/*.charged = */ charge_compression_dictionary_building_buffer}}); - Options options; - options.compression = kSnappyCompression; - options.compression_opts.max_dict_bytes = kMaxDictBytes; - options.compression_opts.max_dict_buffer_bytes = kMaxDictBufferBytes; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - test::StringSink* sink = new test::StringSink(); - std::unique_ptr holder(sink); - std::unique_ptr file_writer(new WritableFileWriter( - std::move(holder), "test_file_name", FileOptions())); - - ImmutableOptions ioptions(options); - MutableCFOptions moptions(options); - InternalKeyComparator ikc(options.comparator); - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - - std::unique_ptr builder( - options.table_factory->NewTableBuilder( - TableBuilderOptions( - ioptions, moptions, ikc, &int_tbl_prop_collector_factories, - kSnappyCompression, options.compression_opts, - kUnknownColumnFamily, "test_cf", -1 /* level */), - file_writer.get())); - - std::string key1 = "key1"; - std::string value1 = "val1"; - InternalKey ik1(key1, 0 /* sequnce number */, kTypeValue); - // Adding the first key won't trigger a flush by FlushBlockEveryKeyPolicy - // therefore won't trigger any data block's buffering - builder->Add(ik1.Encode(), value1); - ASSERT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); - - std::string key2 = "key2"; - std::string value2 = "val2"; - InternalKey ik2(key2, 1 /* sequnce number */, kTypeValue); - // Adding the second key will trigger a flush of the last data block (the - // one containing key1 and value1) by FlushBlockEveryKeyPolicy and hence - // trigger buffering of that data block. - builder->Add(ik2.Encode(), value2); - // Cache charging will increase for last buffered data block (the one - // containing key1 and value1) since the buffer limit is not exceeded after - // that buffering and the cache will not be full after this reservation - if (charge_compression_dictionary_building_buffer == - CacheEntryRoleOptions::Decision::kEnabled) { - EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry); - EXPECT_LT(cache->GetPinnedUsage(), - 1 * kSizeDummyEntry + kMetaDataChargeOverhead); - } else { - EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); - } - - ASSERT_OK(builder->Finish()); - EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); - } -} - -TEST_F(ChargeCompressionDictionaryBuildingBufferTest, - BasicWithBufferLimitExceed) { - constexpr std::size_t kSizeDummyEntry = 256 * 1024; - constexpr std::size_t kMetaDataChargeOverhead = 10000; - constexpr std::size_t kCacheCapacity = 8 * 1024 * 1024; - constexpr std::size_t kMaxDictBytes = 1024; - constexpr std::size_t kMaxDictBufferBytes = 2 * kSizeDummyEntry; - - // `CacheEntryRoleOptions::charged` is enabled by default for - // CacheEntryRole::kCompressionDictionaryBuildingBuffer - BlockBasedTableOptions table_options; - LRUCacheOptions lo; - lo.capacity = kCacheCapacity; - lo.num_shard_bits = 0; // 2^0 shard - lo.strict_capacity_limit = true; - std::shared_ptr cache(NewLRUCache(lo)); - table_options.block_cache = cache; - table_options.flush_block_policy_factory = - std::make_shared(); - - Options options; - options.compression = kSnappyCompression; - options.compression_opts.max_dict_bytes = kMaxDictBytes; - options.compression_opts.max_dict_buffer_bytes = kMaxDictBufferBytes; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - test::StringSink* sink = new test::StringSink(); - std::unique_ptr holder(sink); - std::unique_ptr file_writer(new WritableFileWriter( - std::move(holder), "test_file_name", FileOptions())); - - ImmutableOptions ioptions(options); - MutableCFOptions moptions(options); - InternalKeyComparator ikc(options.comparator); - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - - std::unique_ptr builder(options.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, - &int_tbl_prop_collector_factories, kSnappyCompression, - options.compression_opts, kUnknownColumnFamily, - "test_cf", -1 /* level */), - file_writer.get())); - - std::string key1 = "key1"; - std::string value1(kSizeDummyEntry, '0'); - InternalKey ik1(key1, 0 /* sequnce number */, kTypeValue); - // Adding the first key won't trigger a flush by FlushBlockEveryKeyPolicy - // therefore won't trigger any data block's buffering - builder->Add(ik1.Encode(), value1); - ASSERT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); - - std::string key2 = "key2"; - std::string value2(kSizeDummyEntry, '0'); - InternalKey ik2(key2, 1 /* sequnce number */, kTypeValue); - // Adding the second key will trigger a flush of the last data block (the one - // containing key1 and value1) by FlushBlockEveryKeyPolicy and hence trigger - // buffering of the last data block. - builder->Add(ik2.Encode(), value2); - // Cache charging will increase for last buffered data block (the one - // containing key1 and value1) since the buffer limit is not exceeded after - // the buffering and the cache will not be full after this reservation - EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry); - EXPECT_LT(cache->GetPinnedUsage(), - 2 * kSizeDummyEntry + kMetaDataChargeOverhead); - - std::string key3 = "key3"; - std::string value3 = "val3"; - InternalKey ik3(key3, 2 /* sequnce number */, kTypeValue); - // Adding the third key will trigger a flush of the last data block (the one - // containing key2 and value2) by FlushBlockEveryKeyPolicy and hence trigger - // buffering of the last data block. - builder->Add(ik3.Encode(), value3); - // Cache charging will decrease since the buffer limit is now exceeded - // after the last buffering and EnterUnbuffered() is triggered - EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); - - ASSERT_OK(builder->Finish()); - EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); -} - -TEST_F(ChargeCompressionDictionaryBuildingBufferTest, BasicWithCacheFull) { - constexpr std::size_t kSizeDummyEntry = 256 * 1024; - constexpr std::size_t kMetaDataChargeOverhead = 10000; - // A small kCacheCapacity is chosen so that increase cache charging for - // buffering two data blocks, each containing key1/value1, key2/a big - // value2, will cause cache full - constexpr std::size_t kCacheCapacity = - 1 * kSizeDummyEntry + kSizeDummyEntry / 2; - constexpr std::size_t kMaxDictBytes = 1024; - // A big kMaxDictBufferBytes is chosen so that adding a big key value pair - // (key2, value2) won't exceed the buffer limit - constexpr std::size_t kMaxDictBufferBytes = 1024 * 1024 * 1024; - - // `CacheEntryRoleOptions::charged` is enabled by default for - // CacheEntryRole::kCompressionDictionaryBuildingBuffer - BlockBasedTableOptions table_options; - LRUCacheOptions lo; - lo.capacity = kCacheCapacity; - lo.num_shard_bits = 0; // 2^0 shard - lo.strict_capacity_limit = true; - std::shared_ptr cache(NewLRUCache(lo)); - table_options.block_cache = cache; - table_options.flush_block_policy_factory = - std::make_shared(); - - Options options; - options.compression = kSnappyCompression; - options.compression_opts.max_dict_bytes = kMaxDictBytes; - options.compression_opts.max_dict_buffer_bytes = kMaxDictBufferBytes; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - test::StringSink* sink = new test::StringSink(); - std::unique_ptr holder(sink); - std::unique_ptr file_writer(new WritableFileWriter( - std::move(holder), "test_file_name", FileOptions())); - - ImmutableOptions ioptions(options); - MutableCFOptions moptions(options); - InternalKeyComparator ikc(options.comparator); - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - - std::unique_ptr builder(options.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, - &int_tbl_prop_collector_factories, kSnappyCompression, - options.compression_opts, kUnknownColumnFamily, - "test_cf", -1 /* level */), - file_writer.get())); - - std::string key1 = "key1"; - std::string value1 = "val1"; - InternalKey ik1(key1, 0 /* sequnce number */, kTypeValue); - // Adding the first key won't trigger a flush by FlushBlockEveryKeyPolicy - // therefore won't trigger any data block's buffering - builder->Add(ik1.Encode(), value1); - ASSERT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); - - std::string key2 = "key2"; - std::string value2(kSizeDummyEntry, '0'); - InternalKey ik2(key2, 1 /* sequnce number */, kTypeValue); - // Adding the second key will trigger a flush of the last data block (the one - // containing key1 and value1) by FlushBlockEveryKeyPolicy and hence trigger - // buffering of the last data block. - builder->Add(ik2.Encode(), value2); - // Cache charging will increase for the last buffered data block (the one - // containing key1 and value1) since the buffer limit is not exceeded after - // the buffering and the cache will not be full after this reservation - EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry); - EXPECT_LT(cache->GetPinnedUsage(), - 1 * kSizeDummyEntry + kMetaDataChargeOverhead); - - std::string key3 = "key3"; - std::string value3 = "value3"; - InternalKey ik3(key3, 2 /* sequnce number */, kTypeValue); - // Adding the third key will trigger a flush of the last data block (the one - // containing key2 and value2) by FlushBlockEveryKeyPolicy and hence trigger - // buffering of the last data block. - builder->Add(ik3.Encode(), value3); - // Cache charging will decrease since the cache is now full after - // increasing reservation for the last buffered block and EnterUnbuffered() is - // triggered - EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); - - ASSERT_OK(builder->Finish()); - EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry); -} - -class CacheUsageOptionsOverridesTest : public DBTestBase { - public: - CacheUsageOptionsOverridesTest() - : DBTestBase("cache_usage_options_overrides_test", - /*env_do_fsync=*/false) {} -}; - -TEST_F(CacheUsageOptionsOverridesTest, SanitizeAndValidateOptions) { - // To test `cache_usage_options.options_overrides` is sanitized - // where `cache_usage_options.options` is used when there is no entry in - // `cache_usage_options.options_overrides` - Options options; - options.create_if_missing = true; - BlockBasedTableOptions table_options = BlockBasedTableOptions(); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Destroy(options); - Status s = TryReopen(options); - EXPECT_TRUE(s.ok()); - const auto* sanitized_table_options = - options.table_factory->GetOptions(); - const auto sanitized_options_overrides = - sanitized_table_options->cache_usage_options.options_overrides; - EXPECT_EQ(sanitized_options_overrides.size(), kNumCacheEntryRoles); - for (auto options_overrides_iter = sanitized_options_overrides.cbegin(); - options_overrides_iter != sanitized_options_overrides.cend(); - ++options_overrides_iter) { - CacheEntryRoleOptions role_options = options_overrides_iter->second; - CacheEntryRoleOptions default_options = - sanitized_table_options->cache_usage_options.options; - EXPECT_TRUE(role_options == default_options); - } - Destroy(options); - - // To test option validation on unsupported CacheEntryRole - table_options = BlockBasedTableOptions(); - table_options.cache_usage_options.options_overrides.insert( - {CacheEntryRole::kDataBlock, - {/*.charged = */ CacheEntryRoleOptions::Decision::kDisabled}}); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Destroy(options); - s = TryReopen(options); - EXPECT_TRUE(s.IsNotSupported()); - EXPECT_TRUE( - s.ToString().find("Enable/Disable CacheEntryRoleOptions::charged") != - std::string::npos); - EXPECT_TRUE( - s.ToString().find(kCacheEntryRoleToCamelString[static_cast( - CacheEntryRole::kDataBlock)]) != std::string::npos); - Destroy(options); - - // To test option validation on existence of block cache - table_options = BlockBasedTableOptions(); - table_options.no_block_cache = true; - table_options.cache_usage_options.options_overrides.insert( - {CacheEntryRole::kFilterConstruction, - {/*.charged = */ CacheEntryRoleOptions::Decision::kEnabled}}); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - Destroy(options); - s = TryReopen(options); - EXPECT_TRUE(s.IsInvalidArgument()); - EXPECT_TRUE(s.ToString().find("Enable CacheEntryRoleOptions::charged") != - std::string::npos); - EXPECT_TRUE( - s.ToString().find(kCacheEntryRoleToCamelString[static_cast( - CacheEntryRole::kFilterConstruction)]) != std::string::npos); - EXPECT_TRUE(s.ToString().find("block cache is disabled") != - std::string::npos); - Destroy(options); -} -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/test_util/testutil_test.cc b/test_util/testutil_test.cc deleted file mode 100644 index 41f26e389..000000000 --- a/test_util/testutil_test.cc +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "test_util/testutil.h" - -#include "file/file_util.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "test_util/testharness.h" - -namespace ROCKSDB_NAMESPACE { - -void CreateFile(Env* env, const std::string& path) { - std::unique_ptr f; - ASSERT_OK(env->NewWritableFile(path, &f, EnvOptions())); - f->Close(); -} - -TEST(TestUtil, DestroyDirRecursively) { - auto env = Env::Default(); - // test_util/file - // /dir - // /dir/file - std::string test_dir = test::PerThreadDBPath("test_util"); - ASSERT_OK(env->CreateDir(test_dir)); - CreateFile(env, test_dir + "/file"); - ASSERT_OK(env->CreateDir(test_dir + "/dir")); - CreateFile(env, test_dir + "/dir/file"); - - ASSERT_OK(DestroyDir(env, test_dir)); - auto s = env->FileExists(test_dir); - ASSERT_TRUE(s.IsNotFound()); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/tools/db_bench_tool_test.cc b/tools/db_bench_tool_test.cc deleted file mode 100644 index a30c65065..000000000 --- a/tools/db_bench_tool_test.cc +++ /dev/null @@ -1,342 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "rocksdb/db_bench_tool.h" - -#include "db/db_impl/db_impl.h" -#include "options/options_parser.h" -#include "rocksdb/utilities/options_util.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/random.h" - -#ifdef GFLAGS -#include "util/gflags_compat.h" - -namespace ROCKSDB_NAMESPACE { -namespace { -static const int kMaxArgCount = 100; -static const size_t kArgBufferSize = 100000; -} // namespace - -class DBBenchTest : public testing::Test { - public: - DBBenchTest() : rnd_(0xFB) { - test_path_ = test::PerThreadDBPath("db_bench_test"); - Env::Default()->CreateDir(test_path_); - db_path_ = test_path_ + "/db"; - wal_path_ = test_path_ + "/wal"; - } - - ~DBBenchTest() { - // DestroyDB(db_path_, Options()); - } - - void ResetArgs() { - argc_ = 0; - cursor_ = 0; - memset(arg_buffer_, 0, kArgBufferSize); - } - - void AppendArgs(const std::vector& args) { - for (const auto& arg : args) { - ASSERT_LE(cursor_ + arg.size() + 1, kArgBufferSize); - ASSERT_LE(argc_ + 1, kMaxArgCount); - snprintf(arg_buffer_ + cursor_, arg.size() + 1, "%s", arg.c_str()); - - argv_[argc_++] = arg_buffer_ + cursor_; - cursor_ += arg.size() + 1; - } - } - - // Gets the default options for this test/db_bench. - // Note that db_bench may change some of the default option values and that - // the database might as well. The options changed by db_bench are - // specified here; the ones by the DB are set via SanitizeOptions - Options GetDefaultOptions(CompactionStyle style = kCompactionStyleLevel, - int levels = 7) const { - Options opt; - - opt.create_if_missing = true; - opt.max_open_files = 256; - opt.max_background_compactions = 10; - opt.dump_malloc_stats = true; // db_bench uses a different default - opt.compaction_style = style; - opt.num_levels = levels; - opt.compression = kNoCompression; - opt.arena_block_size = 8388608; - - return SanitizeOptions(db_path_, opt); - } - - void RunDbBench(const std::string& options_file_name) { - AppendArgs({"./db_bench", "--benchmarks=fillseq", "--use_existing_db=0", - "--num=1000", "--compression_type=none", - std::string(std::string("--db=") + db_path_).c_str(), - std::string(std::string("--wal_dir=") + wal_path_).c_str(), - std::string(std::string("--options_file=") + options_file_name) - .c_str()}); - ASSERT_EQ(0, db_bench_tool(argc(), argv())); - } - - void VerifyOptions(const Options& opt) { - DBOptions loaded_db_opts; - ConfigOptions config_opts; - config_opts.ignore_unknown_options = false; - config_opts.input_strings_escaped = true; - config_opts.env = Env::Default(); - std::vector cf_descs; - ASSERT_OK( - LoadLatestOptions(config_opts, db_path_, &loaded_db_opts, &cf_descs)); - - ConfigOptions exact; - exact.input_strings_escaped = false; - exact.sanity_level = ConfigOptions::kSanityLevelExactMatch; - ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(exact, DBOptions(opt), - loaded_db_opts)); - ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( - exact, ColumnFamilyOptions(opt), cf_descs[0].options)); - - // check with the default rocksdb options and expect failure - ASSERT_NOK(RocksDBOptionsParser::VerifyDBOptions(exact, DBOptions(), - loaded_db_opts)); - ASSERT_NOK(RocksDBOptionsParser::VerifyCFOptions( - exact, ColumnFamilyOptions(), cf_descs[0].options)); - } - - char** argv() { return argv_; } - - int argc() { return argc_; } - - std::string db_path_; - std::string test_path_; - std::string wal_path_; - - char arg_buffer_[kArgBufferSize]; - char* argv_[kMaxArgCount]; - int argc_ = 0; - int cursor_ = 0; - Random rnd_; -}; - -namespace {} // namespace - -TEST_F(DBBenchTest, OptionsFile) { - const std::string kOptionsFileName = test_path_ + "/OPTIONS_test"; - Options opt = GetDefaultOptions(); - ASSERT_OK(PersistRocksDBOptions(DBOptions(opt), {"default"}, - {ColumnFamilyOptions(opt)}, kOptionsFileName, - opt.env->GetFileSystem().get())); - - // override the following options as db_bench will not take these - // options from the options file - opt.wal_dir = wal_path_; - - RunDbBench(kOptionsFileName); - opt.delayed_write_rate = 16 * 1024 * 1024; // Set by SanitizeOptions - - VerifyOptions(opt); -} - -TEST_F(DBBenchTest, OptionsFileUniversal) { - const std::string kOptionsFileName = test_path_ + "/OPTIONS_test"; - - Options opt = GetDefaultOptions(kCompactionStyleUniversal, 1); - - ASSERT_OK(PersistRocksDBOptions(DBOptions(opt), {"default"}, - {ColumnFamilyOptions(opt)}, kOptionsFileName, - opt.env->GetFileSystem().get())); - - // override the following options as db_bench will not take these - // options from the options file - opt.wal_dir = wal_path_; - RunDbBench(kOptionsFileName); - - VerifyOptions(opt); -} - -TEST_F(DBBenchTest, OptionsFileMultiLevelUniversal) { - const std::string kOptionsFileName = test_path_ + "/OPTIONS_test"; - - Options opt = GetDefaultOptions(kCompactionStyleUniversal, 12); - - ASSERT_OK(PersistRocksDBOptions(DBOptions(opt), {"default"}, - {ColumnFamilyOptions(opt)}, kOptionsFileName, - opt.env->GetFileSystem().get())); - - // override the following options as db_bench will not take these - // options from the options file - opt.wal_dir = wal_path_; - - RunDbBench(kOptionsFileName); - VerifyOptions(opt); -} - -const std::string options_file_content = R"OPTIONS_FILE( -[Version] - rocksdb_version=4.3.1 - options_file_version=1.1 - -[DBOptions] - wal_bytes_per_sync=1048576 - delete_obsolete_files_period_micros=0 - WAL_ttl_seconds=0 - WAL_size_limit_MB=0 - db_write_buffer_size=0 - max_subcompactions=1 - table_cache_numshardbits=4 - max_open_files=-1 - max_file_opening_threads=10 - max_background_compactions=5 - use_fsync=false - use_adaptive_mutex=false - max_total_wal_size=18446744073709551615 - compaction_readahead_size=0 - keep_log_file_num=10 - skip_stats_update_on_db_open=false - max_manifest_file_size=18446744073709551615 - db_log_dir= - writable_file_max_buffer_size=1048576 - paranoid_checks=true - is_fd_close_on_exec=true - bytes_per_sync=1048576 - enable_thread_tracking=true - recycle_log_file_num=0 - create_missing_column_families=false - log_file_time_to_roll=0 - max_background_flushes=1 - create_if_missing=true - error_if_exists=false - delayed_write_rate=1048576 - manifest_preallocation_size=4194304 - allow_mmap_reads=false - allow_mmap_writes=false - use_direct_reads=false - use_direct_io_for_flush_and_compaction=false - stats_dump_period_sec=600 - allow_fallocate=true - max_log_file_size=83886080 - random_access_max_buffer_size=1048576 - advise_random_on_open=true - dump_malloc_stats=true - -[CFOptions "default"] - compaction_filter_factory=nullptr - table_factory=BlockBasedTable - prefix_extractor=nullptr - comparator=leveldb.BytewiseComparator - compression_per_level= - max_bytes_for_level_base=104857600 - bloom_locality=0 - target_file_size_base=10485760 - memtable_huge_page_size=0 - max_successive_merges=1000 - max_sequential_skip_in_iterations=8 - arena_block_size=52428800 - target_file_size_multiplier=1 - source_compaction_factor=1 - min_write_buffer_number_to_merge=1 - max_write_buffer_number=2 - write_buffer_size=419430400 - max_grandparent_overlap_factor=10 - max_bytes_for_level_multiplier=10 - memtable_factory=SkipListFactory - compression=kNoCompression - min_partial_merge_operands=2 - level0_stop_writes_trigger=100 - num_levels=1 - level0_slowdown_writes_trigger=50 - level0_file_num_compaction_trigger=10 - expanded_compaction_factor=25 - max_write_buffer_number_to_maintain=0 - max_write_buffer_size_to_maintain=0 - verify_checksums_in_compaction=true - merge_operator=nullptr - memtable_prefix_bloom_bits=0 - memtable_whole_key_filtering=true - paranoid_file_checks=false - inplace_update_num_locks=10000 - optimize_filters_for_hits=false - level_compaction_dynamic_level_bytes=false - inplace_update_support=false - compaction_style=kCompactionStyleUniversal - memtable_prefix_bloom_probes=6 - filter_deletes=false - hard_pending_compaction_bytes_limit=0 - disable_auto_compactions=false - compaction_measure_io_stats=false - enable_blob_files=true - min_blob_size=16 - blob_file_size=10485760 - blob_compression_type=kNoCompression - enable_blob_garbage_collection=true - blob_garbage_collection_age_cutoff=0.5 - blob_garbage_collection_force_threshold=0.75 - blob_compaction_readahead_size=262144 - blob_file_starting_level=0 - prepopulate_blob_cache=kDisable; - -[TableOptions/BlockBasedTable "default"] - format_version=0 - skip_table_builder_flush=false - cache_index_and_filter_blocks=false - flush_block_policy_factory=FlushBlockBySizePolicyFactory - index_type=kBinarySearch - whole_key_filtering=true - checksum=kCRC32c - no_block_cache=false - block_size=32768 - block_size_deviation=10 - block_restart_interval=16 - filter_policy=rocksdb.BuiltinBloomFilter -)OPTIONS_FILE"; - -TEST_F(DBBenchTest, OptionsFileFromFile) { - const std::string kOptionsFileName = test_path_ + "/OPTIONS_flash"; - std::unique_ptr writable; - ASSERT_OK(Env::Default()->NewWritableFile(kOptionsFileName, &writable, - EnvOptions())); - ASSERT_OK(writable->Append(options_file_content)); - ASSERT_OK(writable->Close()); - - DBOptions db_opt; - ConfigOptions config_opt; - config_opt.ignore_unknown_options = false; - config_opt.input_strings_escaped = true; - config_opt.env = Env::Default(); - std::vector cf_descs; - ASSERT_OK( - LoadOptionsFromFile(config_opt, kOptionsFileName, &db_opt, &cf_descs)); - Options opt(db_opt, cf_descs[0].options); - opt.create_if_missing = true; - - // override the following options as db_bench will not take these - // options from the options file - opt.wal_dir = wal_path_; - - RunDbBench(kOptionsFileName); - - VerifyOptions(SanitizeOptions(db_path_, opt)); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true); - return RUN_ALL_TESTS(); -} - -#else - -int main(int argc, char** argv) { - printf("Skip db_bench_tool_test as the required library GFLAG is missing."); -} -#endif // #ifdef GFLAGS diff --git a/tools/db_sanity_test.cc b/tools/db_sanity_test.cc deleted file mode 100644 index f40be5ae2..000000000 --- a/tools/db_sanity_test.cc +++ /dev/null @@ -1,296 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include -#include -#include -#include - -#include "port/port.h" -#include "rocksdb/comparator.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/options.h" -#include "rocksdb/slice.h" -#include "rocksdb/slice_transform.h" -#include "rocksdb/status.h" -#include "rocksdb/table.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -class SanityTest { - public: - explicit SanityTest(const std::string& path) - : env_(Env::Default()), path_(path) { - env_->CreateDirIfMissing(path); - } - virtual ~SanityTest() {} - - virtual std::string Name() const = 0; - virtual Options GetOptions() const = 0; - - Status Create() { - Options options = GetOptions(); - options.create_if_missing = true; - std::string dbname = path_ + Name(); - Status s = DestroyDB(dbname, options); - if (!s.ok()) { - return s; - } - DB* db = nullptr; - s = DB::Open(options, dbname, &db); - std::unique_ptr db_guard(db); - if (!s.ok()) { - return s; - } - for (int i = 0; i < 1000000; ++i) { - std::string k = "key" + std::to_string(i); - std::string v = "value" + std::to_string(i); - s = db->Put(WriteOptions(), Slice(k), Slice(v)); - if (!s.ok()) { - return s; - } - } - return db->Flush(FlushOptions()); - } - Status Verify() { - DB* db = nullptr; - std::string dbname = path_ + Name(); - Status s = DB::Open(GetOptions(), dbname, &db); - std::unique_ptr db_guard(db); - if (!s.ok()) { - return s; - } - for (int i = 0; i < 1000000; ++i) { - std::string k = "key" + std::to_string(i); - std::string v = "value" + std::to_string(i); - std::string result; - s = db->Get(ReadOptions(), Slice(k), &result); - if (!s.ok()) { - return s; - } - if (result != v) { - return Status::Corruption("Unexpected value for key " + k); - } - } - return Status::OK(); - } - - private: - Env* env_; - std::string const path_; -}; - -class SanityTestBasic : public SanityTest { - public: - explicit SanityTestBasic(const std::string& path) : SanityTest(path) {} - virtual Options GetOptions() const override { - Options options; - options.create_if_missing = true; - return options; - } - virtual std::string Name() const override { return "Basic"; } -}; - -class SanityTestSpecialComparator : public SanityTest { - public: - explicit SanityTestSpecialComparator(const std::string& path) - : SanityTest(path) { - options_.comparator = new NewComparator(); - } - ~SanityTestSpecialComparator() { delete options_.comparator; } - virtual Options GetOptions() const override { return options_; } - virtual std::string Name() const override { return "SpecialComparator"; } - - private: - class NewComparator : public Comparator { - public: - virtual const char* Name() const override { - return "rocksdb.NewComparator"; - } - virtual int Compare(const Slice& a, const Slice& b) const override { - return BytewiseComparator()->Compare(a, b); - } - virtual void FindShortestSeparator(std::string* s, - const Slice& l) const override { - BytewiseComparator()->FindShortestSeparator(s, l); - } - virtual void FindShortSuccessor(std::string* key) const override { - BytewiseComparator()->FindShortSuccessor(key); - } - }; - Options options_; -}; - -class SanityTestZlibCompression : public SanityTest { - public: - explicit SanityTestZlibCompression(const std::string& path) - : SanityTest(path) { - options_.compression = kZlibCompression; - } - virtual Options GetOptions() const override { return options_; } - virtual std::string Name() const override { return "ZlibCompression"; } - - private: - Options options_; -}; - -class SanityTestZlibCompressionVersion2 : public SanityTest { - public: - explicit SanityTestZlibCompressionVersion2(const std::string& path) - : SanityTest(path) { - options_.compression = kZlibCompression; - BlockBasedTableOptions table_options; -#if ROCKSDB_MAJOR > 3 || (ROCKSDB_MAJOR == 3 && ROCKSDB_MINOR >= 10) - table_options.format_version = 2; -#endif - options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); - } - virtual Options GetOptions() const override { return options_; } - virtual std::string Name() const override { - return "ZlibCompressionVersion2"; - } - - private: - Options options_; -}; - -class SanityTestLZ4Compression : public SanityTest { - public: - explicit SanityTestLZ4Compression(const std::string& path) - : SanityTest(path) { - options_.compression = kLZ4Compression; - } - virtual Options GetOptions() const override { return options_; } - virtual std::string Name() const override { return "LZ4Compression"; } - - private: - Options options_; -}; - -class SanityTestLZ4HCCompression : public SanityTest { - public: - explicit SanityTestLZ4HCCompression(const std::string& path) - : SanityTest(path) { - options_.compression = kLZ4HCCompression; - } - virtual Options GetOptions() const override { return options_; } - virtual std::string Name() const override { return "LZ4HCCompression"; } - - private: - Options options_; -}; - -class SanityTestZSTDCompression : public SanityTest { - public: - explicit SanityTestZSTDCompression(const std::string& path) - : SanityTest(path) { - options_.compression = kZSTD; - } - virtual Options GetOptions() const override { return options_; } - virtual std::string Name() const override { return "ZSTDCompression"; } - - private: - Options options_; -}; - -class SanityTestPlainTableFactory : public SanityTest { - public: - explicit SanityTestPlainTableFactory(const std::string& path) - : SanityTest(path) { - options_.table_factory.reset(NewPlainTableFactory()); - options_.prefix_extractor.reset(NewFixedPrefixTransform(2)); - options_.allow_mmap_reads = true; - } - ~SanityTestPlainTableFactory() {} - virtual Options GetOptions() const override { return options_; } - virtual std::string Name() const override { return "PlainTable"; } - - private: - Options options_; -}; - -class SanityTestBloomFilter : public SanityTest { - public: - explicit SanityTestBloomFilter(const std::string& path) : SanityTest(path) { - BlockBasedTableOptions table_options; - table_options.filter_policy.reset(NewBloomFilterPolicy(10)); - options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); - } - ~SanityTestBloomFilter() {} - virtual Options GetOptions() const override { return options_; } - virtual std::string Name() const override { return "BloomFilter"; } - - private: - Options options_; -}; - -namespace { -bool RunSanityTests(const std::string& command, const std::string& path) { - bool result = true; -// Suppress false positive clang static anaylzer warnings. -#ifndef __clang_analyzer__ - std::vector sanity_tests = { - new SanityTestBasic(path), - new SanityTestSpecialComparator(path), - new SanityTestZlibCompression(path), - new SanityTestZlibCompressionVersion2(path), - new SanityTestLZ4Compression(path), - new SanityTestLZ4HCCompression(path), - new SanityTestZSTDCompression(path), - new SanityTestPlainTableFactory(path), - new SanityTestBloomFilter(path)}; - - if (command == "create") { - fprintf(stderr, "Creating...\n"); - } else { - fprintf(stderr, "Verifying...\n"); - } - for (auto sanity_test : sanity_tests) { - Status s; - fprintf(stderr, "%s -- ", sanity_test->Name().c_str()); - if (command == "create") { - s = sanity_test->Create(); - } else { - assert(command == "verify"); - s = sanity_test->Verify(); - } - fprintf(stderr, "%s\n", s.ToString().c_str()); - if (!s.ok()) { - fprintf(stderr, "FAIL\n"); - result = false; - } - - delete sanity_test; - } -#endif // __clang_analyzer__ - return result; -} -} // namespace - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - std::string path, command; - bool ok = (argc == 3); - if (ok) { - path = std::string(argv[1]); - command = std::string(argv[2]); - ok = (command == "create" || command == "verify"); - } - if (!ok) { - fprintf(stderr, "Usage: %s [create|verify] \n", argv[0]); - exit(1); - } - if (path.back() != '/') { - path += "/"; - } - - bool sanity_ok = ROCKSDB_NAMESPACE::RunSanityTests(command, path); - - return sanity_ok ? 0 : 1; -} diff --git a/tools/io_tracer_parser_test.cc b/tools/io_tracer_parser_test.cc deleted file mode 100644 index 8e1fb72df..000000000 --- a/tools/io_tracer_parser_test.cc +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// - -#ifndef GFLAGS -#include -int main() { - fprintf(stderr, "Please install gflags to run io_tracer_parser_test\n"); - return 0; -} -#else - -#include -#include - -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/status.h" -#include "rocksdb/trace_reader_writer.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "tools/io_tracer_parser_tool.h" - -namespace ROCKSDB_NAMESPACE { - -namespace { -const int kMaxArgCount = 100; -const size_t kArgBufferSize = 100000; -} // namespace - -class IOTracerParserTest : public testing::Test { - public: - IOTracerParserTest() { - test_path_ = test::PerThreadDBPath("io_tracer_parser_test"); - env_ = ROCKSDB_NAMESPACE::Env::Default(); - EXPECT_OK(env_->CreateDirIfMissing(test_path_)); - trace_file_path_ = test_path_ + "/io_trace_file"; - dbname_ = test_path_ + "/db"; - Options options; - options.create_if_missing = true; - EXPECT_OK(DB::Open(options, dbname_, &db_)); - } - - ~IOTracerParserTest() { - if (env_->FileExists(trace_file_path_).ok()) { - EXPECT_OK(env_->DeleteFile(trace_file_path_)); - } - if (db_ != nullptr) { - Options options; - options.env = env_; - delete db_; - db_ = nullptr; - EXPECT_OK(DestroyDB(dbname_, options)); - } - EXPECT_OK(env_->DeleteDir(test_path_)); - } - - void GenerateIOTrace() { - WriteOptions write_opt; - TraceOptions trace_opt; - std::unique_ptr trace_writer; - - ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_, - &trace_writer)); - - ASSERT_OK(db_->StartIOTrace(trace_opt, std::move(trace_writer))); - - for (int i = 0; i < 10; i++) { - ASSERT_OK(db_->Put(write_opt, "key_" + std::to_string(i), - "value_" + std::to_string(i))); - ASSERT_OK(db_->Flush(FlushOptions())); - } - - ASSERT_OK(db_->EndIOTrace()); - ASSERT_OK(env_->FileExists(trace_file_path_)); - } - - void RunIOTracerParserTool() { - std::vector params = {"./io_tracer_parser", - "-io_trace_file=" + trace_file_path_}; - - char arg_buffer[kArgBufferSize]; - char* argv[kMaxArgCount]; - int argc = 0; - int cursor = 0; - for (const auto& arg : params) { - ASSERT_LE(cursor + arg.size() + 1, kArgBufferSize); - ASSERT_LE(argc + 1, kMaxArgCount); - - snprintf(arg_buffer + cursor, arg.size() + 1, "%s", arg.c_str()); - - argv[argc++] = arg_buffer + cursor; - cursor += static_cast(arg.size()) + 1; - } - ASSERT_EQ(0, ROCKSDB_NAMESPACE::io_tracer_parser(argc, argv)); - } - - DB* db_; - Env* env_; - EnvOptions env_options_; - std::string trace_file_path_; - std::string output_file_; - std::string test_path_; - std::string dbname_; -}; - -TEST_F(IOTracerParserTest, InvalidArguments) { - { - std::vector params = {"./io_tracer_parser"}; - char arg_buffer[kArgBufferSize]; - char* argv[kMaxArgCount]; - int argc = 0; - int cursor = 0; - for (const auto& arg : params) { - ASSERT_LE(cursor + arg.size() + 1, kArgBufferSize); - ASSERT_LE(argc + 1, kMaxArgCount); - - snprintf(arg_buffer + cursor, arg.size() + 1, "%s", arg.c_str()); - - argv[argc++] = arg_buffer + cursor; - cursor += static_cast(arg.size()) + 1; - } - ASSERT_EQ(1, ROCKSDB_NAMESPACE::io_tracer_parser(argc, argv)); - } -} - -TEST_F(IOTracerParserTest, DumpAndParseIOTraceRecords) { - GenerateIOTrace(); - RunIOTracerParserTool(); -} - -TEST_F(IOTracerParserTest, NoRecordingAfterEndIOTrace) { - uint64_t file_size = 0; - // Generate IO trace records and parse them. - { - GenerateIOTrace(); - RunIOTracerParserTool(); - ASSERT_OK(env_->GetFileSize(trace_file_path_, &file_size)); - } - // Once DB::EndIOTrace is invoked in GenerateIOTrace(), no new records should - // be appended. - { - WriteOptions write_opt; - for (int i = 10; i < 20; i++) { - ASSERT_OK(db_->Put(write_opt, "key_" + std::to_string(i), - "value_" + std::to_string(i))); - ASSERT_OK(db_->Flush(FlushOptions())); - } - } - - uint64_t new_file_size = 0; - ASSERT_OK(env_->GetFileSize(trace_file_path_, &new_file_size)); - ASSERT_EQ(file_size, new_file_size); -} - -TEST_F(IOTracerParserTest, NoRecordingBeforeStartIOTrace) { - { - WriteOptions write_opt; - for (int i = 10; i < 20; i++) { - ASSERT_OK(db_->Put(write_opt, "key_" + std::to_string(i), - "value_" + std::to_string(i))); - ASSERT_OK(db_->Flush(FlushOptions())); - } - // IO trace file doesn't exist - ASSERT_NOK(env_->FileExists(trace_file_path_)); - } - // Generate IO trace records and parse them. - { - GenerateIOTrace(); - RunIOTracerParserTool(); - } -} -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} -#endif // GFLAGS diff --git a/tools/ldb_cmd_test.cc b/tools/ldb_cmd_test.cc deleted file mode 100644 index c5b4115d1..000000000 --- a/tools/ldb_cmd_test.cc +++ /dev/null @@ -1,1216 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -#include "rocksdb/utilities/ldb_cmd.h" - -#include - -#include "db/db_test_util.h" -#include "db/version_edit.h" -#include "db/version_set.h" -#include "env/composite_env_wrapper.h" -#include "file/filename.h" -#include "port/stack_trace.h" -#include "rocksdb/advanced_options.h" -#include "rocksdb/convenience.h" -#include "rocksdb/db.h" -#include "rocksdb/file_checksum.h" -#include "rocksdb/file_system.h" -#include "rocksdb/utilities/options_util.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/file_checksum_helper.h" -#include "util/random.h" - -using std::map; -using std::string; -using std::vector; - -namespace ROCKSDB_NAMESPACE { - -class LdbCmdTest : public testing::Test { - public: - LdbCmdTest() : testing::Test() {} - - Env* TryLoadCustomOrDefaultEnv() { - Env* env = Env::Default(); - EXPECT_OK(test::CreateEnvFromSystem(ConfigOptions(), &env, &env_guard_)); - return env; - } - - private: - std::shared_ptr env_guard_; -}; - -TEST_F(LdbCmdTest, HelpAndVersion) { - Options o; - o.env = TryLoadCustomOrDefaultEnv(); - LDBOptions lo; - static const char* help[] = {"./ldb", "--help"}; - ASSERT_EQ(0, LDBCommandRunner::RunCommand(2, help, o, lo, nullptr)); - static const char* version[] = {"./ldb", "--version"}; - ASSERT_EQ(0, LDBCommandRunner::RunCommand(2, version, o, lo, nullptr)); - static const char* bad[] = {"./ldb", "--not_an_option"}; - ASSERT_NE(0, LDBCommandRunner::RunCommand(2, bad, o, lo, nullptr)); -} - -TEST_F(LdbCmdTest, HexToString) { - // map input to expected outputs. - // odd number of "hex" half bytes doesn't make sense - map> inputMap = { - {"0x07", {7}}, {"0x5050", {80, 80}}, {"0xFF", {-1}}, - {"0x1234", {18, 52}}, {"0xaaAbAC", {-86, -85, -84}}, {"0x1203", {18, 3}}, - }; - - for (const auto& inPair : inputMap) { - auto actual = ROCKSDB_NAMESPACE::LDBCommand::HexToString(inPair.first); - auto expected = inPair.second; - for (unsigned int i = 0; i < actual.length(); i++) { - EXPECT_EQ(expected[i], static_cast((signed char)actual[i])); - } - auto reverse = ROCKSDB_NAMESPACE::LDBCommand::StringToHex(actual); - EXPECT_STRCASEEQ(inPair.first.c_str(), reverse.c_str()); - } -} - -TEST_F(LdbCmdTest, HexToStringBadInputs) { - const vector badInputs = { - "0xZZ", "123", "0xx5", "0x111G", "0x123", "Ox12", "0xT", "0x1Q1", - }; - for (const auto& badInput : badInputs) { - try { - ROCKSDB_NAMESPACE::LDBCommand::HexToString(badInput); - std::cerr << "Should fail on bad hex value: " << badInput << "\n"; - FAIL(); - } catch (...) { - } - } -} - -TEST_F(LdbCmdTest, MemEnv) { - Env* base_env = TryLoadCustomOrDefaultEnv(); - std::unique_ptr env(NewMemEnv(base_env)); - Options opts; - opts.env = env.get(); - opts.create_if_missing = true; - - DB* db = nullptr; - std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test"); - ASSERT_OK(DB::Open(opts, dbname, &db)); - - WriteOptions wopts; - for (int i = 0; i < 100; i++) { - char buf[16]; - snprintf(buf, sizeof(buf), "%08d", i); - ASSERT_OK(db->Put(wopts, buf, buf)); - } - FlushOptions fopts; - fopts.wait = true; - ASSERT_OK(db->Flush(fopts)); - - delete db; - - char arg1[] = "./ldb"; - char arg2[1024]; - snprintf(arg2, sizeof(arg2), "--db=%s", dbname.c_str()); - char arg3[] = "dump_live_files"; - char* argv[] = {arg1, arg2, arg3}; - - ASSERT_EQ(0, - LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr)); -} - -class FileChecksumTestHelper { - private: - Options options_; - DB* db_; - std::string dbname_; - - Status VerifyChecksum(LiveFileMetaData& file_meta) { - std::string cur_checksum; - std::string checksum_func_name; - - Status s; - EnvOptions soptions; - std::unique_ptr file_reader; - std::string file_path = dbname_ + "/" + file_meta.name; - s = options_.env->NewSequentialFile(file_path, &file_reader, soptions); - if (!s.ok()) { - return s; - } - std::unique_ptr scratch(new char[2048]); - Slice result; - FileChecksumGenFactory* file_checksum_gen_factory = - options_.file_checksum_gen_factory.get(); - if (file_checksum_gen_factory == nullptr) { - cur_checksum = kUnknownFileChecksum; - checksum_func_name = kUnknownFileChecksumFuncName; - } else { - FileChecksumGenContext gen_context; - gen_context.file_name = file_meta.name; - std::unique_ptr file_checksum_gen = - file_checksum_gen_factory->CreateFileChecksumGenerator(gen_context); - checksum_func_name = file_checksum_gen->Name(); - s = file_reader->Read(2048, &result, scratch.get()); - if (!s.ok()) { - return s; - } - while (result.size() != 0) { - file_checksum_gen->Update(scratch.get(), result.size()); - s = file_reader->Read(2048, &result, scratch.get()); - if (!s.ok()) { - return s; - } - } - file_checksum_gen->Finalize(); - cur_checksum = file_checksum_gen->GetChecksum(); - } - - std::string stored_checksum = file_meta.file_checksum; - std::string stored_checksum_func_name = file_meta.file_checksum_func_name; - if ((cur_checksum != stored_checksum) || - (checksum_func_name != stored_checksum_func_name)) { - return Status::Corruption( - "Checksum does not match! The file: " + file_meta.name + - ", checksum name: " + stored_checksum_func_name + " and checksum " + - stored_checksum + ". However, expected checksum name: " + - checksum_func_name + " and checksum " + cur_checksum); - } - return Status::OK(); - } - - public: - FileChecksumTestHelper(Options& options, DB* db, std::string db_name) - : options_(options), db_(db), dbname_(db_name) {} - ~FileChecksumTestHelper() {} - - // Verify the checksum information in Manifest. - Status VerifyChecksumInManifest( - const std::vector& live_files) { - // Step 1: verify if the dbname_ is correct - if (dbname_.back() != '/') { - dbname_.append("/"); - } - - // Step 2, get the the checksum information by recovering the VersionSet - // from Manifest. - std::unique_ptr checksum_list(NewFileChecksumList()); - EnvOptions sopt; - std::shared_ptr tc(NewLRUCache(options_.max_open_files - 10, - options_.table_cache_numshardbits)); - options_.db_paths.emplace_back(dbname_, 0); - options_.num_levels = 64; - WriteController wc(options_.delayed_write_rate); - WriteBufferManager wb(options_.db_write_buffer_size); - ImmutableDBOptions immutable_db_options(options_); - VersionSet versions(dbname_, &immutable_db_options, sopt, tc.get(), &wb, - &wc, nullptr, nullptr, "", ""); - std::vector cf_name_list; - Status s; - s = versions.ListColumnFamilies(&cf_name_list, dbname_, - immutable_db_options.fs.get()); - if (s.ok()) { - std::vector cf_list; - for (const auto& name : cf_name_list) { - fprintf(stdout, "cf_name: %s", name.c_str()); - cf_list.emplace_back(name, ColumnFamilyOptions(options_)); - } - s = versions.Recover(cf_list, true); - } - if (s.ok()) { - s = versions.GetLiveFilesChecksumInfo(checksum_list.get()); - } - if (!s.ok()) { - return s; - } - - // Step 3 verify the checksum - if (live_files.size() != checksum_list->size()) { - return Status::Corruption("The number of files does not match!"); - } - for (size_t i = 0; i < live_files.size(); i++) { - std::string stored_checksum = ""; - std::string stored_func_name = ""; - s = checksum_list->SearchOneFileChecksum( - live_files[i].file_number, &stored_checksum, &stored_func_name); - if (s.IsNotFound()) { - return s; - } - if (live_files[i].file_checksum != stored_checksum || - live_files[i].file_checksum_func_name != stored_func_name) { - return Status::Corruption( - "Checksum does not match! The file: " + - std::to_string(live_files[i].file_number) + - ". In Manifest, checksum name: " + stored_func_name + - " and checksum " + stored_checksum + - ". However, expected checksum name: " + - live_files[i].file_checksum_func_name + " and checksum " + - live_files[i].file_checksum); - } - } - return Status::OK(); - } - - // Verify the checksum of each file by recalculting the checksum and - // comparing it with the one being generated when a SST file is created. - Status VerifyEachFileChecksum() { - assert(db_ != nullptr); - EXPECT_OK(db_->DisableFileDeletions()); - std::vector live_files; - db_->GetLiveFilesMetaData(&live_files); - Status cs; - for (auto a_file : live_files) { - cs = VerifyChecksum(a_file); - if (!cs.ok()) { - break; - } - } - EXPECT_OK(db_->EnableFileDeletions()); - return cs; - } -}; - -TEST_F(LdbCmdTest, DumpFileChecksumNoChecksum) { - Env* base_env = TryLoadCustomOrDefaultEnv(); - std::unique_ptr env(NewMemEnv(base_env)); - Options opts; - opts.env = env.get(); - opts.create_if_missing = true; - - DB* db = nullptr; - std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test"); - ASSERT_OK(DB::Open(opts, dbname, &db)); - - WriteOptions wopts; - FlushOptions fopts; - fopts.wait = true; - Random rnd(test::RandomSeed()); - for (int i = 0; i < 200; i++) { - char buf[16]; - snprintf(buf, sizeof(buf), "%08d", i); - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, buf, v)); - } - ASSERT_OK(db->Flush(fopts)); - for (int i = 100; i < 300; i++) { - char buf[16]; - snprintf(buf, sizeof(buf), "%08d", i); - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, buf, v)); - } - ASSERT_OK(db->Flush(fopts)); - for (int i = 200; i < 400; i++) { - char buf[16]; - snprintf(buf, sizeof(buf), "%08d", i); - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, buf, v)); - } - ASSERT_OK(db->Flush(fopts)); - for (int i = 300; i < 400; i++) { - char buf[16]; - snprintf(buf, sizeof(buf), "%08d", i); - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, buf, v)); - } - ASSERT_OK(db->Flush(fopts)); - ASSERT_OK(db->Close()); - delete db; - - char arg1[] = "./ldb"; - char arg2[1024]; - snprintf(arg2, sizeof(arg2), "--db=%s", dbname.c_str()); - char arg3[] = "file_checksum_dump"; - char arg4[] = "--hex"; - char* argv[] = {arg1, arg2, arg3, arg4}; - - ASSERT_EQ(0, - LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); - - ASSERT_OK(DB::Open(opts, dbname, &db)); - - // Verify each sst file checksum value and checksum name - FileChecksumTestHelper fct_helper(opts, db, dbname); - ASSERT_OK(fct_helper.VerifyEachFileChecksum()); - - // Manually trigger compaction - char b_buf[16]; - snprintf(b_buf, sizeof(b_buf), "%08d", 0); - char e_buf[16]; - snprintf(e_buf, sizeof(e_buf), "%08d", 399); - Slice begin(b_buf); - Slice end(e_buf); - CompactRangeOptions options; - ASSERT_OK(db->CompactRange(options, &begin, &end)); - // Verify each sst file checksum after compaction - FileChecksumTestHelper fct_helper_ac(opts, db, dbname); - ASSERT_OK(fct_helper_ac.VerifyEachFileChecksum()); - - ASSERT_OK(db->Close()); - delete db; - - ASSERT_EQ(0, - LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); - - ASSERT_OK(DB::Open(opts, dbname, &db)); - - // Verify the checksum information in memory is the same as that in Manifest; - std::vector live_files; - db->GetLiveFilesMetaData(&live_files); - delete db; - ASSERT_OK(fct_helper_ac.VerifyChecksumInManifest(live_files)); -} - -TEST_F(LdbCmdTest, BlobDBDumpFileChecksumNoChecksum) { - Env* base_env = TryLoadCustomOrDefaultEnv(); - std::unique_ptr env(NewMemEnv(base_env)); - Options opts; - opts.env = env.get(); - opts.create_if_missing = true; - opts.enable_blob_files = true; - - DB* db = nullptr; - std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test"); - ASSERT_OK(DB::Open(opts, dbname, &db)); - - WriteOptions wopts; - FlushOptions fopts; - fopts.wait = true; - Random rnd(test::RandomSeed()); - for (int i = 0; i < 200; i++) { - std::ostringstream oss; - oss << std::setfill('0') << std::setw(8) << std::fixed << i; - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, oss.str(), v)); - } - ASSERT_OK(db->Flush(fopts)); - for (int i = 100; i < 300; i++) { - std::ostringstream oss; - oss << std::setfill('0') << std::setw(8) << std::fixed << i; - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, oss.str(), v)); - } - ASSERT_OK(db->Flush(fopts)); - for (int i = 200; i < 400; i++) { - std::ostringstream oss; - oss << std::setfill('0') << std::setw(8) << std::fixed << i; - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, oss.str(), v)); - } - ASSERT_OK(db->Flush(fopts)); - for (int i = 300; i < 400; i++) { - std::ostringstream oss; - oss << std::setfill('0') << std::setw(8) << std::fixed << i; - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, oss.str(), v)); - } - ASSERT_OK(db->Flush(fopts)); - ASSERT_OK(db->Close()); - delete db; - - char arg1[] = "./ldb"; - std::string arg2_str = "--db=" + dbname; - char arg3[] = "file_checksum_dump"; - char arg4[] = "--hex"; - char* argv[] = {arg1, const_cast(arg2_str.c_str()), arg3, arg4}; - - ASSERT_EQ(0, - LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); - - ASSERT_OK(DB::Open(opts, dbname, &db)); - - // Verify each sst and blob file checksum value and checksum name - FileChecksumTestHelper fct_helper(opts, db, dbname); - ASSERT_OK(fct_helper.VerifyEachFileChecksum()); - - // Manually trigger compaction - std::ostringstream oss_b_buf; - oss_b_buf << std::setfill('0') << std::setw(8) << std::fixed << 0; - std::ostringstream oss_e_buf; - oss_e_buf << std::setfill('0') << std::setw(8) << std::fixed << 399; - std::string b_buf = oss_b_buf.str(); - std::string e_buf = oss_e_buf.str(); - Slice begin(b_buf); - Slice end(e_buf); - - CompactRangeOptions options; - ASSERT_OK(db->CompactRange(options, &begin, &end)); - // Verify each sst file checksum after compaction - FileChecksumTestHelper fct_helper_ac(opts, db, dbname); - ASSERT_OK(fct_helper_ac.VerifyEachFileChecksum()); - - ASSERT_OK(db->Close()); - delete db; - - ASSERT_EQ(0, - LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); -} - -TEST_F(LdbCmdTest, DumpFileChecksumCRC32) { - Env* base_env = TryLoadCustomOrDefaultEnv(); - std::unique_ptr env(NewMemEnv(base_env)); - Options opts; - opts.env = env.get(); - opts.create_if_missing = true; - opts.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); - - DB* db = nullptr; - std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test"); - ASSERT_OK(DB::Open(opts, dbname, &db)); - - WriteOptions wopts; - FlushOptions fopts; - fopts.wait = true; - Random rnd(test::RandomSeed()); - for (int i = 0; i < 100; i++) { - char buf[16]; - snprintf(buf, sizeof(buf), "%08d", i); - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, buf, v)); - } - ASSERT_OK(db->Flush(fopts)); - for (int i = 50; i < 150; i++) { - char buf[16]; - snprintf(buf, sizeof(buf), "%08d", i); - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, buf, v)); - } - ASSERT_OK(db->Flush(fopts)); - for (int i = 100; i < 200; i++) { - char buf[16]; - snprintf(buf, sizeof(buf), "%08d", i); - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, buf, v)); - } - ASSERT_OK(db->Flush(fopts)); - for (int i = 150; i < 250; i++) { - char buf[16]; - snprintf(buf, sizeof(buf), "%08d", i); - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, buf, v)); - } - ASSERT_OK(db->Flush(fopts)); - ASSERT_OK(db->Close()); - delete db; - - char arg1[] = "./ldb"; - char arg2[1024]; - snprintf(arg2, sizeof(arg2), "--db=%s", dbname.c_str()); - char arg3[] = "file_checksum_dump"; - char arg4[] = "--hex"; - char* argv[] = {arg1, arg2, arg3, arg4}; - - ASSERT_EQ(0, - LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); - - ASSERT_OK(DB::Open(opts, dbname, &db)); - - // Verify each sst file checksum value and checksum name - FileChecksumTestHelper fct_helper(opts, db, dbname); - ASSERT_OK(fct_helper.VerifyEachFileChecksum()); - - // Manually trigger compaction - char b_buf[16]; - snprintf(b_buf, sizeof(b_buf), "%08d", 0); - char e_buf[16]; - snprintf(e_buf, sizeof(e_buf), "%08d", 249); - Slice begin(b_buf); - Slice end(e_buf); - CompactRangeOptions options; - ASSERT_OK(db->CompactRange(options, &begin, &end)); - // Verify each sst file checksum after compaction - FileChecksumTestHelper fct_helper_ac(opts, db, dbname); - ASSERT_OK(fct_helper_ac.VerifyEachFileChecksum()); - - ASSERT_OK(db->Close()); - delete db; - - ASSERT_EQ(0, - LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); - - ASSERT_OK(DB::Open(opts, dbname, &db)); - - // Verify the checksum information in memory is the same as that in Manifest; - std::vector live_files; - db->GetLiveFilesMetaData(&live_files); - ASSERT_OK(fct_helper_ac.VerifyChecksumInManifest(live_files)); - - ASSERT_OK(db->Close()); - delete db; -} - -TEST_F(LdbCmdTest, BlobDBDumpFileChecksumCRC32) { - Env* base_env = TryLoadCustomOrDefaultEnv(); - std::unique_ptr env(NewMemEnv(base_env)); - Options opts; - opts.env = env.get(); - opts.create_if_missing = true; - opts.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); - opts.enable_blob_files = true; - - DB* db = nullptr; - std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test"); - ASSERT_OK(DB::Open(opts, dbname, &db)); - - WriteOptions wopts; - FlushOptions fopts; - fopts.wait = true; - Random rnd(test::RandomSeed()); - for (int i = 0; i < 100; i++) { - std::ostringstream oss; - oss << std::setfill('0') << std::setw(8) << std::fixed << i; - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, oss.str(), v)); - } - ASSERT_OK(db->Flush(fopts)); - for (int i = 50; i < 150; i++) { - std::ostringstream oss; - oss << std::setfill('0') << std::setw(8) << std::fixed << i; - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, oss.str(), v)); - } - ASSERT_OK(db->Flush(fopts)); - for (int i = 100; i < 200; i++) { - std::ostringstream oss; - oss << std::setfill('0') << std::setw(8) << std::fixed << i; - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, oss.str(), v)); - } - ASSERT_OK(db->Flush(fopts)); - for (int i = 150; i < 250; i++) { - std::ostringstream oss; - oss << std::setfill('0') << std::setw(8) << std::fixed << i; - std::string v = rnd.RandomString(100); - ASSERT_OK(db->Put(wopts, oss.str(), v)); - } - ASSERT_OK(db->Flush(fopts)); - ASSERT_OK(db->Close()); - delete db; - - char arg1[] = "./ldb"; - std::string arg2_str = "--db=" + dbname; - char arg3[] = "file_checksum_dump"; - char arg4[] = "--hex"; - char* argv[] = {arg1, const_cast(arg2_str.c_str()), arg3, arg4}; - - ASSERT_EQ(0, - LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); - - ASSERT_OK(DB::Open(opts, dbname, &db)); - - // Verify each sst and blob file checksum value and checksum name - FileChecksumTestHelper fct_helper(opts, db, dbname); - ASSERT_OK(fct_helper.VerifyEachFileChecksum()); - - // Manually trigger compaction - std::ostringstream oss_b_buf; - oss_b_buf << std::setfill('0') << std::setw(8) << std::fixed << 0; - std::ostringstream oss_e_buf; - oss_e_buf << std::setfill('0') << std::setw(8) << std::fixed << 249; - std::string b_buf = oss_b_buf.str(); - std::string e_buf = oss_e_buf.str(); - Slice begin(b_buf); - Slice end(e_buf); - - CompactRangeOptions options; - ASSERT_OK(db->CompactRange(options, &begin, &end)); - // Verify each sst file checksum after compaction - FileChecksumTestHelper fct_helper_ac(opts, db, dbname); - ASSERT_OK(fct_helper_ac.VerifyEachFileChecksum()); - - ASSERT_OK(db->Close()); - delete db; - - ASSERT_EQ(0, - LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); -} - -TEST_F(LdbCmdTest, OptionParsing) { - // test parsing flags - Options opts; - opts.env = TryLoadCustomOrDefaultEnv(); - { - std::vector args; - args.push_back("scan"); - args.push_back("--ttl"); - args.push_back("--timestamp"); - LDBCommand* command = ROCKSDB_NAMESPACE::LDBCommand::InitFromCmdLineArgs( - args, opts, LDBOptions(), nullptr); - const std::vector flags = command->TEST_GetFlags(); - EXPECT_EQ(flags.size(), 2); - EXPECT_EQ(flags[0], "ttl"); - EXPECT_EQ(flags[1], "timestamp"); - delete command; - } - // test parsing options which contains equal sign in the option value - { - std::vector args; - args.push_back("scan"); - args.push_back("--db=/dev/shm/ldbtest/"); - args.push_back( - "--from='abcd/efg/hijk/lmn/" - "opq:__rst.uvw.xyz?a=3+4+bcd+efghi&jk=lm_no&pq=rst-0&uv=wx-8&yz=a&bcd_" - "ef=gh.ijk'"); - LDBCommand* command = ROCKSDB_NAMESPACE::LDBCommand::InitFromCmdLineArgs( - args, opts, LDBOptions(), nullptr); - const std::map option_map = - command->TEST_GetOptionMap(); - EXPECT_EQ(option_map.at("db"), "/dev/shm/ldbtest/"); - EXPECT_EQ(option_map.at("from"), - "'abcd/efg/hijk/lmn/" - "opq:__rst.uvw.xyz?a=3+4+bcd+efghi&jk=lm_no&pq=rst-0&uv=wx-8&yz=" - "a&bcd_ef=gh.ijk'"); - delete command; - } -} - -TEST_F(LdbCmdTest, ListFileTombstone) { - Env* base_env = TryLoadCustomOrDefaultEnv(); - std::unique_ptr env(NewMemEnv(base_env)); - Options opts; - opts.env = env.get(); - opts.create_if_missing = true; - - DB* db = nullptr; - std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test"); - ASSERT_OK(DB::Open(opts, dbname, &db)); - - WriteOptions wopts; - ASSERT_OK(db->Put(wopts, "foo", "1")); - ASSERT_OK(db->Put(wopts, "bar", "2")); - - FlushOptions fopts; - fopts.wait = true; - ASSERT_OK(db->Flush(fopts)); - - ASSERT_OK(db->DeleteRange(wopts, db->DefaultColumnFamily(), "foo", "foo2")); - ASSERT_OK(db->DeleteRange(wopts, db->DefaultColumnFamily(), "bar", "foo2")); - ASSERT_OK(db->Flush(fopts)); - - delete db; - - { - char arg1[] = "./ldb"; - char arg2[1024]; - snprintf(arg2, sizeof(arg2), "--db=%s", dbname.c_str()); - char arg3[] = "list_file_range_deletes"; - char* argv[] = {arg1, arg2, arg3}; - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "ListFileRangeDeletesCommand::DoCommand:BeforePrint", [&](void* arg) { - std::string* out_str = reinterpret_cast(arg); - - // Count number of tombstones printed - int num_tb = 0; - const std::string kFingerprintStr = "start: "; - auto offset = out_str->find(kFingerprintStr); - while (offset != std::string::npos) { - num_tb++; - offset = - out_str->find(kFingerprintStr, offset + kFingerprintStr.size()); - } - EXPECT_EQ(2, num_tb); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_EQ( - 0, LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr)); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } - - // Test the case of limiting tombstones - { - char arg1[] = "./ldb"; - char arg2[1024]; - snprintf(arg2, sizeof(arg2), "--db=%s", dbname.c_str()); - char arg3[] = "list_file_range_deletes"; - char arg4[] = "--max_keys=1"; - char* argv[] = {arg1, arg2, arg3, arg4}; - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "ListFileRangeDeletesCommand::DoCommand:BeforePrint", [&](void* arg) { - std::string* out_str = reinterpret_cast(arg); - - // Count number of tombstones printed - int num_tb = 0; - const std::string kFingerprintStr = "start: "; - auto offset = out_str->find(kFingerprintStr); - while (offset != std::string::npos) { - num_tb++; - offset = - out_str->find(kFingerprintStr, offset + kFingerprintStr.size()); - } - EXPECT_EQ(1, num_tb); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_EQ( - 0, LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - } -} - -TEST_F(LdbCmdTest, DisableConsistencyChecks) { - Env* base_env = TryLoadCustomOrDefaultEnv(); - std::unique_ptr env(NewMemEnv(base_env)); - Options opts; - opts.env = env.get(); - opts.create_if_missing = true; - - std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test"); - - { - DB* db = nullptr; - ASSERT_OK(DB::Open(opts, dbname, &db)); - - WriteOptions wopts; - FlushOptions fopts; - fopts.wait = true; - - ASSERT_OK(db->Put(wopts, "foo1", "1")); - ASSERT_OK(db->Put(wopts, "bar1", "2")); - ASSERT_OK(db->Flush(fopts)); - - ASSERT_OK(db->Put(wopts, "foo2", "3")); - ASSERT_OK(db->Put(wopts, "bar2", "4")); - ASSERT_OK(db->Flush(fopts)); - - delete db; - } - - { - char arg1[] = "./ldb"; - char arg2[1024]; - snprintf(arg2, sizeof(arg2), "--db=%s", dbname.c_str()); - char arg3[] = "checkconsistency"; - char* argv[] = {arg1, arg2, arg3}; - - SyncPoint::GetInstance()->SetCallBack( - "Version::PrepareAppend:forced_check", [&](void* arg) { - bool* forced = reinterpret_cast(arg); - ASSERT_TRUE(*forced); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_EQ( - 0, LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr)); - - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - } - { - char arg1[] = "./ldb"; - char arg2[1024]; - snprintf(arg2, sizeof(arg2), "--db=%s", dbname.c_str()); - char arg3[] = "scan"; - char* argv[] = {arg1, arg2, arg3}; - - SyncPoint::GetInstance()->SetCallBack( - "Version::PrepareAppend:forced_check", [&](void* arg) { - bool* forced = reinterpret_cast(arg); - ASSERT_TRUE(*forced); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_EQ( - 0, LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr)); - - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - } - { - char arg1[] = "./ldb"; - char arg2[1024]; - snprintf(arg2, sizeof(arg2), "--db=%s", dbname.c_str()); - char arg3[] = "scan"; - char arg4[] = "--disable_consistency_checks"; - char* argv[] = {arg1, arg2, arg3, arg4}; - - SyncPoint::GetInstance()->SetCallBack( - "ColumnFamilyData::ColumnFamilyData", [&](void* arg) { - ColumnFamilyOptions* cfo = - reinterpret_cast(arg); - ASSERT_FALSE(cfo->force_consistency_checks); - }); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_EQ( - 0, LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); - - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - } -} - -TEST_F(LdbCmdTest, TestBadDbPath) { - Env* base_env = TryLoadCustomOrDefaultEnv(); - std::unique_ptr env(NewMemEnv(base_env)); - Options opts; - opts.env = env.get(); - opts.create_if_missing = true; - - std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test"); - char arg1[] = "./ldb"; - char arg2[1024]; - snprintf(arg2, sizeof(arg2), "--db=%s/.no_such_dir", dbname.c_str()); - char arg3[1024]; - snprintf(arg3, sizeof(arg3), "create_column_family"); - char arg4[] = "bad cf"; - char* argv[] = {arg1, arg2, arg3, arg4}; - - ASSERT_EQ(1, - LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); - snprintf(arg3, sizeof(arg3), "drop_column_family"); - ASSERT_EQ(1, - LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); -} -namespace { -class WrappedEnv : public EnvWrapper { - public: - explicit WrappedEnv(Env* t) : EnvWrapper(t) {} - static const char* kClassName() { return "WrappedEnv"; } - const char* Name() const override { return kClassName(); } -}; -} // namespace -TEST_F(LdbCmdTest, LoadCFOptionsAndOverride) { - // Env* base_env = TryLoadCustomOrDefaultEnv(); - // std::unique_ptr env(NewMemEnv(base_env)); - std::unique_ptr env(new WrappedEnv(Env::Default())); - Options opts; - opts.env = env.get(); - opts.create_if_missing = true; - - DB* db = nullptr; - std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test"); - ASSERT_OK(DestroyDB(dbname, opts)); - ASSERT_OK(DB::Open(opts, dbname, &db)); - - ColumnFamilyHandle* cf_handle; - ColumnFamilyOptions cf_opts; - cf_opts.num_levels = 20; - ASSERT_OK(db->CreateColumnFamily(cf_opts, "cf1", &cf_handle)); - - delete cf_handle; - delete db; - - char arg1[] = "./ldb"; - char arg2[1024]; - snprintf(arg2, sizeof(arg2), "--db=%s", dbname.c_str()); - char arg3[] = "put"; - char arg4[] = "key1"; - char arg5[] = "value1"; - char arg6[] = "--try_load_options"; - char arg7[] = "--column_family=cf1"; - char arg8[] = "--write_buffer_size=268435456"; - char* argv[] = {arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8}; - - ASSERT_EQ(0, - LDBCommandRunner::RunCommand(8, argv, opts, LDBOptions(), nullptr)); - - ConfigOptions config_opts; - Options options; - std::vector column_families; - config_opts.env = env.get(); - ASSERT_OK(LoadLatestOptions(config_opts, dbname, &options, &column_families)); - ASSERT_EQ(column_families.size(), 2); - ASSERT_EQ(options.num_levels, opts.num_levels); - ASSERT_EQ(column_families[1].options.num_levels, cf_opts.num_levels); - ASSERT_EQ(column_families[1].options.write_buffer_size, 268435456); -} - -TEST_F(LdbCmdTest, UnsafeRemoveSstFile) { - Options opts; - opts.level0_file_num_compaction_trigger = 10; - opts.create_if_missing = true; - - DB* db = nullptr; - std::string dbname = test::PerThreadDBPath(Env::Default(), "ldb_cmd_test"); - ASSERT_OK(DestroyDB(dbname, opts)); - ASSERT_OK(DB::Open(opts, dbname, &db)); - - // Create three SST files - for (size_t i = 0; i < 3; ++i) { - ASSERT_OK(db->Put(WriteOptions(), std::to_string(i), std::to_string(i))); - ASSERT_OK(db->Flush(FlushOptions())); - } - - // Determine which is the "middle" one - std::vector sst_files; - db->GetLiveFilesMetaData(&sst_files); - - std::vector numbers; - for (auto& f : sst_files) { - numbers.push_back(f.file_number); - } - ASSERT_EQ(numbers.size(), 3); - std::sort(numbers.begin(), numbers.end()); - uint64_t to_remove = numbers[1]; - - // Close for unsafe_remove_sst_file - delete db; - db = nullptr; - - char arg1[] = "./ldb"; - char arg2[1024]; - snprintf(arg2, sizeof(arg2), "--db=%s", dbname.c_str()); - char arg3[] = "unsafe_remove_sst_file"; - char arg4[20]; - snprintf(arg4, sizeof(arg4), "%" PRIu64, to_remove); - char* argv[] = {arg1, arg2, arg3, arg4}; - - ASSERT_EQ(0, - LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); - - // Re-open, and verify with Get that middle file is gone - ASSERT_OK(DB::Open(opts, dbname, &db)); - - std::string val; - ASSERT_OK(db->Get(ReadOptions(), "0", &val)); - ASSERT_EQ(val, "0"); - - ASSERT_OK(db->Get(ReadOptions(), "2", &val)); - ASSERT_EQ(val, "2"); - - ASSERT_TRUE(db->Get(ReadOptions(), "1", &val).IsNotFound()); - - // Now with extra CF, two more files - ColumnFamilyHandle* cf_handle; - ColumnFamilyOptions cf_opts; - ASSERT_OK(db->CreateColumnFamily(cf_opts, "cf1", &cf_handle)); - for (size_t i = 3; i < 5; ++i) { - ASSERT_OK(db->Put(WriteOptions(), cf_handle, std::to_string(i), - std::to_string(i))); - ASSERT_OK(db->Flush(FlushOptions(), cf_handle)); - } - - // Determine which is the "last" one - sst_files.clear(); - db->GetLiveFilesMetaData(&sst_files); - - numbers.clear(); - for (auto& f : sst_files) { - numbers.push_back(f.file_number); - } - ASSERT_EQ(numbers.size(), 4); - std::sort(numbers.begin(), numbers.end()); - to_remove = numbers.back(); - - // Close for unsafe_remove_sst_file - delete cf_handle; - delete db; - db = nullptr; - - snprintf(arg4, sizeof(arg4), "%" PRIu64, to_remove); - ASSERT_EQ(0, - LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); - - std::vector cfds = {{kDefaultColumnFamilyName, opts}, - {"cf1", cf_opts}}; - std::vector handles; - ASSERT_OK(DB::Open(opts, dbname, cfds, &handles, &db)); - - ASSERT_OK(db->Get(ReadOptions(), handles[1], "3", &val)); - ASSERT_EQ(val, "3"); - - ASSERT_TRUE(db->Get(ReadOptions(), handles[1], "4", &val).IsNotFound()); - - ASSERT_OK(db->Get(ReadOptions(), handles[0], "0", &val)); - ASSERT_EQ(val, "0"); - - // Determine which is the "first" one (most likely to be opened in recovery) - sst_files.clear(); - db->GetLiveFilesMetaData(&sst_files); - - numbers.clear(); - for (auto& f : sst_files) { - numbers.push_back(f.file_number); - } - ASSERT_EQ(numbers.size(), 3); - std::sort(numbers.begin(), numbers.end()); - to_remove = numbers.front(); - - // This time physically delete the file before unsafe_remove - { - std::string f = dbname + "/" + MakeTableFileName(to_remove); - ASSERT_OK(Env::Default()->DeleteFile(f)); - } - - // Close for unsafe_remove_sst_file - for (auto& h : handles) { - delete h; - } - delete db; - db = nullptr; - - snprintf(arg4, sizeof(arg4), "%" PRIu64, to_remove); - ASSERT_EQ(0, - LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); - - ASSERT_OK(DB::Open(opts, dbname, cfds, &handles, &db)); - - ASSERT_OK(db->Get(ReadOptions(), handles[1], "3", &val)); - ASSERT_EQ(val, "3"); - - ASSERT_TRUE(db->Get(ReadOptions(), handles[0], "0", &val).IsNotFound()); - - for (auto& h : handles) { - delete h; - } - delete db; -} - -TEST_F(LdbCmdTest, FileTemperatureUpdateManifest) { - auto test_fs = std::make_shared(FileSystem::Default()); - std::unique_ptr env(new CompositeEnvWrapper(Env::Default(), test_fs)); - Options opts; - opts.bottommost_temperature = Temperature::kWarm; - opts.level0_file_num_compaction_trigger = 10; - opts.create_if_missing = true; - opts.env = env.get(); - - DB* db = nullptr; - std::string dbname = test::PerThreadDBPath(env.get(), "ldb_cmd_test"); - ASSERT_OK(DestroyDB(dbname, opts)); - ASSERT_OK(DB::Open(opts, dbname, &db)); - - std::array kTestTemps = { - Temperature::kCold, Temperature::kWarm, Temperature::kHot, - Temperature::kWarm, Temperature::kCold}; - std::map number_to_temp; - for (size_t i = 0; i < kTestTemps.size(); ++i) { - ASSERT_OK(db->Put(WriteOptions(), std::to_string(i), std::to_string(i))); - ASSERT_OK(db->Flush(FlushOptions())); - - std::map current_temps; - test_fs->CopyCurrentSstFileTemperatures(¤t_temps); - for (auto e : current_temps) { - if (e.second == Temperature::kUnknown) { - test_fs->OverrideSstFileTemperature(e.first, kTestTemps[i]); - number_to_temp[e.first] = kTestTemps[i]; - } - } - } - - // Close & reopen - delete db; - db = nullptr; - test_fs->PopRequestedSstFileTemperatures(); - ASSERT_OK(DB::Open(opts, dbname, &db)); - - for (size_t i = 0; i < kTestTemps.size(); ++i) { - std::string val; - ASSERT_OK(db->Get(ReadOptions(), std::to_string(i), &val)); - ASSERT_EQ(val, std::to_string(i)); - } - - // Still all unknown - std::vector> requests; - test_fs->PopRequestedSstFileTemperatures(&requests); - ASSERT_EQ(requests.size(), kTestTemps.size()); - for (auto& r : requests) { - ASSERT_EQ(r.second, Temperature::kUnknown); - } - - // Close for update_manifest - delete db; - db = nullptr; - - char arg1[] = "./ldb"; - char arg2[1024]; - snprintf(arg2, sizeof(arg2), "--db=%s", dbname.c_str()); - char arg3[] = "update_manifest"; - char arg4[] = "--update_temperatures"; - char* argv[] = {arg1, arg2, arg3, arg4}; - - ASSERT_EQ(0, - LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); - - // Re-open, get, and verify manifest temps (based on request) - test_fs->PopRequestedSstFileTemperatures(); - ASSERT_OK(DB::Open(opts, dbname, &db)); - - for (size_t i = 0; i < kTestTemps.size(); ++i) { - std::string val; - ASSERT_OK(db->Get(ReadOptions(), std::to_string(i), &val)); - ASSERT_EQ(val, std::to_string(i)); - } - - requests.clear(); - test_fs->PopRequestedSstFileTemperatures(&requests); - ASSERT_EQ(requests.size(), kTestTemps.size()); - for (auto& r : requests) { - ASSERT_EQ(r.second, number_to_temp[r.first]); - } - delete db; -} - -TEST_F(LdbCmdTest, RenameDbAndLoadOptions) { - Env* env = TryLoadCustomOrDefaultEnv(); - Options opts; - opts.env = env; - opts.create_if_missing = false; - - std::string old_dbname = test::PerThreadDBPath(env, "ldb_cmd_test"); - std::string new_dbname = old_dbname + "_2"; - ASSERT_OK(DestroyDB(old_dbname, opts)); - ASSERT_OK(DestroyDB(new_dbname, opts)); - - char old_arg[1024]; - snprintf(old_arg, sizeof(old_arg), "--db=%s", old_dbname.c_str()); - char new_arg[1024]; - snprintf(new_arg, sizeof(old_arg), "--db=%s", new_dbname.c_str()); - const char* argv1[] = {"./ldb", - old_arg, - "put", - "key1", - "value1", - "--try_load_options", - "--create_if_missing"}; - - const char* argv2[] = {"./ldb", old_arg, "get", "key1", "--try_load_options"}; - const char* argv3[] = {"./ldb", new_arg, "put", - "key2", "value2", "--try_load_options"}; - - const char* argv4[] = {"./ldb", new_arg, "get", "key1", "--try_load_options"}; - const char* argv5[] = {"./ldb", new_arg, "get", "key2", "--try_load_options"}; - - ASSERT_EQ( - 0, LDBCommandRunner::RunCommand(7, argv1, opts, LDBOptions(), nullptr)); - ASSERT_EQ( - 0, LDBCommandRunner::RunCommand(5, argv2, opts, LDBOptions(), nullptr)); - ConfigOptions config_opts; - Options options; - std::vector column_families; - config_opts.env = env; - ASSERT_OK( - LoadLatestOptions(config_opts, old_dbname, &options, &column_families)); - ASSERT_EQ(options.wal_dir, ""); - - ASSERT_OK(env->RenameFile(old_dbname, new_dbname)); - ASSERT_NE( - 0, LDBCommandRunner::RunCommand(6, argv1, opts, LDBOptions(), nullptr)); - ASSERT_NE( - 0, LDBCommandRunner::RunCommand(5, argv2, opts, LDBOptions(), nullptr)); - ASSERT_EQ( - 0, LDBCommandRunner::RunCommand(6, argv3, opts, LDBOptions(), nullptr)); - ASSERT_EQ( - 0, LDBCommandRunner::RunCommand(5, argv4, opts, LDBOptions(), nullptr)); - ASSERT_EQ( - 0, LDBCommandRunner::RunCommand(5, argv5, opts, LDBOptions(), nullptr)); - ASSERT_OK(DestroyDB(new_dbname, opts)); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/tools/reduce_levels_test.cc b/tools/reduce_levels_test.cc deleted file mode 100644 index 97f8030b7..000000000 --- a/tools/reduce_levels_test.cc +++ /dev/null @@ -1,212 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// - - -#include "db/db_impl/db_impl.h" -#include "db/version_set.h" -#include "rocksdb/db.h" -#include "rocksdb/utilities/ldb_cmd.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "tools/ldb_cmd_impl.h" -#include "util/cast_util.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -class ReduceLevelTest : public testing::Test { - public: - ReduceLevelTest() { - dbname_ = test::PerThreadDBPath("db_reduce_levels_test"); - EXPECT_OK(DestroyDB(dbname_, Options())); - db_ = nullptr; - } - - Status OpenDB(bool create_if_missing, int levels); - - Status Put(const std::string& k, const std::string& v) { - return db_->Put(WriteOptions(), k, v); - } - - std::string Get(const std::string& k) { - ReadOptions options; - std::string result; - Status s = db_->Get(options, k, &result); - if (s.IsNotFound()) { - result = "NOT_FOUND"; - } else if (!s.ok()) { - result = s.ToString(); - } - return result; - } - - Status Flush() { - if (db_ == nullptr) { - return Status::InvalidArgument("DB not opened."); - } - DBImpl* db_impl = static_cast_with_check(db_); - return db_impl->TEST_FlushMemTable(); - } - - void MoveL0FileToLevel(int level) { - DBImpl* db_impl = static_cast_with_check(db_); - for (int i = 0; i < level; ++i) { - ASSERT_OK(db_impl->TEST_CompactRange(i, nullptr, nullptr)); - } - } - - void CloseDB() { - if (db_ != nullptr) { - delete db_; - db_ = nullptr; - } - } - - bool ReduceLevels(int target_level); - - int FilesOnLevel(int level) { - std::string property; - EXPECT_TRUE(db_->GetProperty( - "rocksdb.num-files-at-level" + std::to_string(level), &property)); - return atoi(property.c_str()); - } - - private: - std::string dbname_; - DB* db_; -}; - -Status ReduceLevelTest::OpenDB(bool create_if_missing, int num_levels) { - ROCKSDB_NAMESPACE::Options opt; - opt.num_levels = num_levels; - opt.create_if_missing = create_if_missing; - ROCKSDB_NAMESPACE::Status st = - ROCKSDB_NAMESPACE::DB::Open(opt, dbname_, &db_); - if (!st.ok()) { - fprintf(stderr, "Can't open the db:%s\n", st.ToString().c_str()); - } - return st; -} - -bool ReduceLevelTest::ReduceLevels(int target_level) { - std::vector args = - ROCKSDB_NAMESPACE::ReduceDBLevelsCommand::PrepareArgs( - dbname_, target_level, false); - LDBCommand* level_reducer = LDBCommand::InitFromCmdLineArgs( - args, Options(), LDBOptions(), nullptr, LDBCommand::SelectCommand); - level_reducer->Run(); - bool is_succeed = level_reducer->GetExecuteState().IsSucceed(); - delete level_reducer; - return is_succeed; -} - -TEST_F(ReduceLevelTest, Last_Level) { - ASSERT_OK(OpenDB(true, 4)); - ASSERT_OK(Put("aaaa", "11111")); - ASSERT_OK(Flush()); - MoveL0FileToLevel(3); - ASSERT_EQ(FilesOnLevel(3), 1); - CloseDB(); - - ASSERT_TRUE(ReduceLevels(3)); - ASSERT_OK(OpenDB(true, 3)); - ASSERT_EQ(FilesOnLevel(2), 1); - CloseDB(); - - ASSERT_TRUE(ReduceLevels(2)); - ASSERT_OK(OpenDB(true, 2)); - ASSERT_EQ(FilesOnLevel(1), 1); - CloseDB(); -} - -TEST_F(ReduceLevelTest, Top_Level) { - ASSERT_OK(OpenDB(true, 5)); - ASSERT_OK(Put("aaaa", "11111")); - ASSERT_OK(Flush()); - ASSERT_EQ(FilesOnLevel(0), 1); - CloseDB(); - - ASSERT_TRUE(ReduceLevels(4)); - ASSERT_OK(OpenDB(true, 4)); - CloseDB(); - - ASSERT_TRUE(ReduceLevels(3)); - ASSERT_OK(OpenDB(true, 3)); - CloseDB(); - - ASSERT_TRUE(ReduceLevels(2)); - ASSERT_OK(OpenDB(true, 2)); - CloseDB(); -} - -TEST_F(ReduceLevelTest, All_Levels) { - ASSERT_OK(OpenDB(true, 5)); - ASSERT_OK(Put("a", "a11111")); - ASSERT_OK(Flush()); - MoveL0FileToLevel(4); - ASSERT_EQ(FilesOnLevel(4), 1); - CloseDB(); - - ASSERT_OK(OpenDB(true, 5)); - ASSERT_OK(Put("b", "b11111")); - ASSERT_OK(Flush()); - MoveL0FileToLevel(3); - ASSERT_EQ(FilesOnLevel(3), 1); - ASSERT_EQ(FilesOnLevel(4), 1); - CloseDB(); - - ASSERT_OK(OpenDB(true, 5)); - ASSERT_OK(Put("c", "c11111")); - ASSERT_OK(Flush()); - MoveL0FileToLevel(2); - ASSERT_EQ(FilesOnLevel(2), 1); - ASSERT_EQ(FilesOnLevel(3), 1); - ASSERT_EQ(FilesOnLevel(4), 1); - CloseDB(); - - ASSERT_OK(OpenDB(true, 5)); - ASSERT_OK(Put("d", "d11111")); - ASSERT_OK(Flush()); - MoveL0FileToLevel(1); - ASSERT_EQ(FilesOnLevel(1), 1); - ASSERT_EQ(FilesOnLevel(2), 1); - ASSERT_EQ(FilesOnLevel(3), 1); - ASSERT_EQ(FilesOnLevel(4), 1); - CloseDB(); - - ASSERT_TRUE(ReduceLevels(4)); - ASSERT_OK(OpenDB(true, 4)); - ASSERT_EQ("a11111", Get("a")); - ASSERT_EQ("b11111", Get("b")); - ASSERT_EQ("c11111", Get("c")); - ASSERT_EQ("d11111", Get("d")); - CloseDB(); - - ASSERT_TRUE(ReduceLevels(3)); - ASSERT_OK(OpenDB(true, 3)); - ASSERT_EQ("a11111", Get("a")); - ASSERT_EQ("b11111", Get("b")); - ASSERT_EQ("c11111", Get("c")); - ASSERT_EQ("d11111", Get("d")); - CloseDB(); - - ASSERT_TRUE(ReduceLevels(2)); - ASSERT_OK(OpenDB(true, 2)); - ASSERT_EQ("a11111", Get("a")); - ASSERT_EQ("b11111", Get("b")); - ASSERT_EQ("c11111", Get("c")); - ASSERT_EQ("d11111", Get("d")); - CloseDB(); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/tools/sst_dump_test.cc b/tools/sst_dump_test.cc deleted file mode 100644 index 29d11d4da..000000000 --- a/tools/sst_dump_test.cc +++ /dev/null @@ -1,471 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include - -#include "file/random_access_file_reader.h" -#include "port/stack_trace.h" -#include "rocksdb/convenience.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/sst_dump_tool.h" -#include "table/block_based/block_based_table_factory.h" -#include "table/table_builder.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" - -namespace ROCKSDB_NAMESPACE { - -const uint32_t kOptLength = 1024; - -namespace { -static std::string MakeKey(int i) { - char buf[100]; - snprintf(buf, sizeof(buf), "k_%04d", i); - InternalKey key(std::string(buf), 0, ValueType::kTypeValue); - return key.Encode().ToString(); -} - -static std::string MakeKeyWithTimeStamp(int i, uint64_t ts) { - char buf[100]; - snprintf(buf, sizeof(buf), "k_%04d", i); - return test::KeyStr(ts, std::string(buf), /*seq=*/0, kTypeValue); -} - -static std::string MakeValue(int i) { - char buf[100]; - snprintf(buf, sizeof(buf), "v_%04d", i); - InternalKey key(std::string(buf), 0, ValueType::kTypeValue); - return key.Encode().ToString(); -} - -void cleanup(const Options& opts, const std::string& file_name) { - Env* env = opts.env; - ASSERT_OK(env->DeleteFile(file_name)); - std::string outfile_name = file_name.substr(0, file_name.length() - 4); - outfile_name.append("_dump.txt"); - env->DeleteFile(outfile_name).PermitUncheckedError(); -} -} // namespace - -// Test for sst dump tool "raw" mode -class SSTDumpToolTest : public testing::Test { - std::string test_dir_; - Env* env_; - std::shared_ptr env_guard_; - - public: - SSTDumpToolTest() : env_(Env::Default()) { - EXPECT_OK(test::CreateEnvFromSystem(ConfigOptions(), &env_, &env_guard_)); - test_dir_ = test::PerThreadDBPath(env_, "sst_dump_test_db"); - Status s = env_->CreateDirIfMissing(test_dir_); - EXPECT_OK(s); - } - - ~SSTDumpToolTest() override { - if (getenv("KEEP_DB")) { - fprintf(stdout, "Data is still at %s\n", test_dir_.c_str()); - } else { - EXPECT_OK(env_->DeleteDir(test_dir_)); - } - } - - Env* env() { return env_; } - - std::string MakeFilePath(const std::string& file_name) const { - std::string path(test_dir_); - path.append("/").append(file_name); - return path; - } - - template - void PopulateCommandArgs(const std::string& file_path, const char* command, - char* (&usage)[N]) const { - for (int i = 0; i < static_cast(N); ++i) { - usage[i] = new char[kOptLength]; - } - snprintf(usage[0], kOptLength, "./sst_dump"); - snprintf(usage[1], kOptLength, "%s", command); - snprintf(usage[2], kOptLength, "--file=%s", file_path.c_str()); - } - - void createSST(const Options& opts, const std::string& file_name) { - Env* test_env = opts.env; - FileOptions file_options(opts); - ReadOptions read_options; - const ImmutableOptions imoptions(opts); - const MutableCFOptions moptions(opts); - ROCKSDB_NAMESPACE::InternalKeyComparator ikc(opts.comparator); - std::unique_ptr tb; - - IntTblPropCollectorFactories int_tbl_prop_collector_factories; - std::unique_ptr file_writer; - ASSERT_OK(WritableFileWriter::Create(test_env->GetFileSystem(), file_name, - file_options, &file_writer, nullptr)); - - std::string column_family_name; - int unknown_level = -1; - tb.reset(opts.table_factory->NewTableBuilder( - TableBuilderOptions( - imoptions, moptions, ikc, &int_tbl_prop_collector_factories, - CompressionType::kNoCompression, CompressionOptions(), - TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, - column_family_name, unknown_level), - file_writer.get())); - - // Populate slightly more than 1K keys - uint32_t num_keys = kNumKey; - const char* comparator_name = ikc.user_comparator()->Name(); - if (strcmp(comparator_name, ReverseBytewiseComparator()->Name()) == 0) { - for (int32_t i = num_keys; i >= 0; i--) { - tb->Add(MakeKey(i), MakeValue(i)); - } - } else if (strcmp(comparator_name, - test::BytewiseComparatorWithU64TsWrapper()->Name()) == - 0) { - for (uint32_t i = 0; i < num_keys; i++) { - tb->Add(MakeKeyWithTimeStamp(i, 100 + i), MakeValue(i)); - } - } else { - for (uint32_t i = 0; i < num_keys; i++) { - tb->Add(MakeKey(i), MakeValue(i)); - } - } - ASSERT_OK(tb->Finish()); - ASSERT_OK(file_writer->Close()); - } - - protected: - constexpr static int kNumKey = 1024; -}; - -constexpr int SSTDumpToolTest::kNumKey; - -TEST_F(SSTDumpToolTest, HelpAndVersion) { - Options opts; - opts.env = env(); - - ROCKSDB_NAMESPACE::SSTDumpTool tool; - - static const char* help[] = {"./sst_dump", "--help"}; - ASSERT_TRUE(!tool.Run(2, help, opts)); - static const char* version[] = {"./sst_dump", "--version"}; - ASSERT_TRUE(!tool.Run(2, version, opts)); - static const char* bad[] = {"./sst_dump", "--not_an_option"}; - ASSERT_TRUE(tool.Run(2, bad, opts)); -} - -TEST_F(SSTDumpToolTest, EmptyFilter) { - Options opts; - opts.env = env(); - std::string file_path = MakeFilePath("rocksdb_sst_test.sst"); - createSST(opts, file_path); - - char* usage[3]; - PopulateCommandArgs(file_path, "--command=raw", usage); - - ROCKSDB_NAMESPACE::SSTDumpTool tool; - ASSERT_TRUE(!tool.Run(3, usage, opts)); - - cleanup(opts, file_path); - for (int i = 0; i < 3; i++) { - delete[] usage[i]; - } -} - -TEST_F(SSTDumpToolTest, SstDumpReverseBytewiseComparator) { - Options opts; - opts.env = env(); - opts.comparator = ReverseBytewiseComparator(); - BlockBasedTableOptions table_opts; - table_opts.filter_policy.reset( - ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, false)); - opts.table_factory.reset(new BlockBasedTableFactory(table_opts)); - std::string file_path = - MakeFilePath("rocksdb_sst_reverse_bytewise_comparator.sst"); - createSST(opts, file_path); - - char* usage[3]; - PopulateCommandArgs(file_path, "--command=raw", usage); - - ROCKSDB_NAMESPACE::SSTDumpTool tool; - ASSERT_TRUE(!tool.Run(3, usage, opts)); - - cleanup(opts, file_path); - for (int i = 0; i < 3; i++) { - delete[] usage[i]; - } -} - -TEST_F(SSTDumpToolTest, SstDumpComparatorWithU64Ts) { - Options opts; - opts.env = env(); - opts.comparator = test::BytewiseComparatorWithU64TsWrapper(); - BlockBasedTableOptions table_opts; - table_opts.filter_policy.reset( - ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, false)); - opts.table_factory.reset(new BlockBasedTableFactory(table_opts)); - std::string file_path = - MakeFilePath("rocksdb_sst_comparator_with_u64_ts.sst"); - createSST(opts, file_path); - - char* usage[3]; - PopulateCommandArgs(file_path, "--command=raw", usage); - - ROCKSDB_NAMESPACE::SSTDumpTool tool; - ASSERT_TRUE(!tool.Run(3, usage, opts)); - - cleanup(opts, file_path); - for (int i = 0; i < 3; i++) { - delete[] usage[i]; - } -} - -TEST_F(SSTDumpToolTest, FilterBlock) { - Options opts; - opts.env = env(); - BlockBasedTableOptions table_opts; - table_opts.filter_policy.reset( - ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, true)); - opts.table_factory.reset(new BlockBasedTableFactory(table_opts)); - std::string file_path = MakeFilePath("rocksdb_sst_test.sst"); - createSST(opts, file_path); - - char* usage[3]; - PopulateCommandArgs(file_path, "--command=raw", usage); - - ROCKSDB_NAMESPACE::SSTDumpTool tool; - ASSERT_TRUE(!tool.Run(3, usage, opts)); - - cleanup(opts, file_path); - for (int i = 0; i < 3; i++) { - delete[] usage[i]; - } -} - -TEST_F(SSTDumpToolTest, FullFilterBlock) { - Options opts; - opts.env = env(); - BlockBasedTableOptions table_opts; - table_opts.filter_policy.reset( - ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, false)); - opts.table_factory.reset(new BlockBasedTableFactory(table_opts)); - std::string file_path = MakeFilePath("rocksdb_sst_test.sst"); - createSST(opts, file_path); - - char* usage[3]; - PopulateCommandArgs(file_path, "--command=raw", usage); - - ROCKSDB_NAMESPACE::SSTDumpTool tool; - ASSERT_TRUE(!tool.Run(3, usage, opts)); - - cleanup(opts, file_path); - for (int i = 0; i < 3; i++) { - delete[] usage[i]; - } -} - -TEST_F(SSTDumpToolTest, GetProperties) { - Options opts; - opts.env = env(); - BlockBasedTableOptions table_opts; - table_opts.filter_policy.reset( - ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, false)); - opts.table_factory.reset(new BlockBasedTableFactory(table_opts)); - std::string file_path = MakeFilePath("rocksdb_sst_test.sst"); - createSST(opts, file_path); - - char* usage[3]; - PopulateCommandArgs(file_path, "--show_properties", usage); - - ROCKSDB_NAMESPACE::SSTDumpTool tool; - ASSERT_TRUE(!tool.Run(3, usage, opts)); - - cleanup(opts, file_path); - for (int i = 0; i < 3; i++) { - delete[] usage[i]; - } -} - -TEST_F(SSTDumpToolTest, CompressedSizes) { - Options opts; - opts.env = env(); - BlockBasedTableOptions table_opts; - table_opts.filter_policy.reset( - ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, false)); - opts.table_factory.reset(new BlockBasedTableFactory(table_opts)); - std::string file_path = MakeFilePath("rocksdb_sst_test.sst"); - createSST(opts, file_path); - - char* usage[3]; - PopulateCommandArgs(file_path, "--command=recompress", usage); - - ROCKSDB_NAMESPACE::SSTDumpTool tool; - ASSERT_TRUE(!tool.Run(3, usage, opts)); - - cleanup(opts, file_path); - for (int i = 0; i < 3; i++) { - delete[] usage[i]; - } -} - -TEST_F(SSTDumpToolTest, MemEnv) { - std::unique_ptr mem_env(NewMemEnv(env())); - Options opts; - opts.env = mem_env.get(); - std::string file_path = MakeFilePath("rocksdb_sst_test.sst"); - createSST(opts, file_path); - - char* usage[3]; - PopulateCommandArgs(file_path, "--command=verify_checksum", usage); - - ROCKSDB_NAMESPACE::SSTDumpTool tool; - ASSERT_TRUE(!tool.Run(3, usage, opts)); - - cleanup(opts, file_path); - for (int i = 0; i < 3; i++) { - delete[] usage[i]; - } -} - -TEST_F(SSTDumpToolTest, ReadaheadSize) { - Options opts; - opts.env = env(); - std::string file_path = MakeFilePath("rocksdb_sst_test.sst"); - createSST(opts, file_path); - - char* usage[4]; - PopulateCommandArgs(file_path, "--command=verify", usage); - snprintf(usage[3], kOptLength, "--readahead_size=4000000"); - - int num_reads = 0; - SyncPoint::GetInstance()->SetCallBack("RandomAccessFileReader::Read", - [&](void*) { num_reads++; }); - SyncPoint::GetInstance()->EnableProcessing(); - - SSTDumpTool tool; - ASSERT_TRUE(!tool.Run(4, usage, opts)); - - // The file is approximately 10MB. Readahead is 4MB. - // We usually need 3 reads + one metadata read. - // One extra read is needed before opening the file for metadata. - ASSERT_EQ(5, num_reads); - - SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - - cleanup(opts, file_path); - for (int i = 0; i < 4; i++) { - delete[] usage[i]; - } -} - -TEST_F(SSTDumpToolTest, NoSstFile) { - Options opts; - opts.env = env(); - std::string file_path = MakeFilePath("no_such_file.sst"); - char* usage[3]; - PopulateCommandArgs(file_path, "", usage); - ROCKSDB_NAMESPACE::SSTDumpTool tool; - for (const auto& command : - {"--command=check", "--command=dump", "--command=raw", - "--command=verify", "--command=recompress", "--command=verify_checksum", - "--show_properties"}) { - snprintf(usage[1], kOptLength, "%s", command); - ASSERT_TRUE(tool.Run(3, usage, opts)); - } - for (int i = 0; i < 3; i++) { - delete[] usage[i]; - } -} - -TEST_F(SSTDumpToolTest, ValidSSTPath) { - Options opts; - opts.env = env(); - char* usage[3]; - PopulateCommandArgs("", "", usage); - SSTDumpTool tool; - std::string file_not_exists = MakeFilePath("file_not_exists.sst"); - std::string sst_file = MakeFilePath("rocksdb_sst_test.sst"); - createSST(opts, sst_file); - std::string text_file = MakeFilePath("text_file"); - ASSERT_OK(WriteStringToFile(opts.env, "Hello World!", text_file)); - std::string fake_sst = MakeFilePath("fake_sst.sst"); - ASSERT_OK(WriteStringToFile(opts.env, "Not an SST file!", fake_sst)); - - for (const auto& command_arg : {"--command=verify", "--command=identify"}) { - snprintf(usage[1], kOptLength, "%s", command_arg); - - snprintf(usage[2], kOptLength, "--file=%s", file_not_exists.c_str()); - ASSERT_TRUE(tool.Run(3, usage, opts)); - - snprintf(usage[2], kOptLength, "--file=%s", sst_file.c_str()); - ASSERT_TRUE(!tool.Run(3, usage, opts)); - - snprintf(usage[2], kOptLength, "--file=%s", text_file.c_str()); - ASSERT_TRUE(tool.Run(3, usage, opts)); - - snprintf(usage[2], kOptLength, "--file=%s", fake_sst.c_str()); - ASSERT_TRUE(tool.Run(3, usage, opts)); - } - ASSERT_OK(opts.env->DeleteFile(sst_file)); - ASSERT_OK(opts.env->DeleteFile(text_file)); - ASSERT_OK(opts.env->DeleteFile(fake_sst)); - - for (int i = 0; i < 3; i++) { - delete[] usage[i]; - } -} - -TEST_F(SSTDumpToolTest, RawOutput) { - Options opts; - opts.env = env(); - std::string file_path = MakeFilePath("rocksdb_sst_test.sst"); - createSST(opts, file_path); - - char* usage[3]; - PopulateCommandArgs(file_path, "--command=raw", usage); - - ROCKSDB_NAMESPACE::SSTDumpTool tool; - ASSERT_TRUE(!tool.Run(3, usage, opts)); - - const std::string raw_path = MakeFilePath("rocksdb_sst_test_dump.txt"); - std::ifstream raw_file(raw_path); - - std::string tp; - bool is_data_block = false; - int key_count = 0; - while (getline(raw_file, tp)) { - if (tp.find("Data Block #") != std::string::npos) { - is_data_block = true; - } - - if (is_data_block && tp.find("HEX") != std::string::npos) { - key_count++; - } - } - - ASSERT_EQ(kNumKey, key_count); - - raw_file.close(); - - cleanup(opts, file_path); - for (int i = 0; i < 3; i++) { - delete[] usage[i]; - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - RegisterCustomObjects(argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/tools/trace_analyzer_test.cc b/tools/trace_analyzer_test.cc deleted file mode 100644 index 81dc4f2cc..000000000 --- a/tools/trace_analyzer_test.cc +++ /dev/null @@ -1,880 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef GFLAGS -#include -int main() { - fprintf(stderr, "Please install gflags to run trace_analyzer test\n"); - return 0; -} -#else - -#include -#include -#include -#include -#include - -#include "db/db_test_util.h" -#include "file/line_file_reader.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/status.h" -#include "rocksdb/trace_reader_writer.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "tools/trace_analyzer_tool.h" -#include "trace_replay/trace_replay.h" - -namespace ROCKSDB_NAMESPACE { - -namespace { -static const int kMaxArgCount = 100; -static const size_t kArgBufferSize = 100000; -} // namespace - -// Note that, the QPS part verification of the analyzing result is not robost -// enough and causes the failure in some rare cases. Disable them temporally and -// wait for future refactor. - -// The helper functions for the test -class TraceAnalyzerTest : public testing::Test { - public: - TraceAnalyzerTest() : rnd_(0xFB) { - // test_path_ = test::TmpDir() + "trace_analyzer_test"; - test_path_ = test::PerThreadDBPath("trace_analyzer_test"); - env_ = ROCKSDB_NAMESPACE::Env::Default(); - env_->CreateDir(test_path_).PermitUncheckedError(); - dbname_ = test_path_ + "/db"; - } - - ~TraceAnalyzerTest() override {} - - void GenerateTrace(std::string trace_path) { - Options options; - options.create_if_missing = true; - options.merge_operator = MergeOperators::CreatePutOperator(); - Slice upper_bound("a"); - Slice lower_bound("abce"); - ReadOptions ro; - ro.iterate_upper_bound = &upper_bound; - ro.iterate_lower_bound = &lower_bound; - WriteOptions wo; - TraceOptions trace_opt; - DB* db_ = nullptr; - std::string value; - std::unique_ptr trace_writer; - Iterator* single_iter = nullptr; - - ASSERT_OK( - NewFileTraceWriter(env_, env_options_, trace_path, &trace_writer)); - ASSERT_OK(DB::Open(options, dbname_, &db_)); - ASSERT_OK(db_->StartTrace(trace_opt, std::move(trace_writer))); - - WriteBatch batch; - ASSERT_OK(batch.Put("a", "aaaaaaaaa")); - ASSERT_OK(batch.Merge("b", "aaaaaaaaaaaaaaaaaaaa")); - ASSERT_OK(batch.Delete("c")); - ASSERT_OK(batch.SingleDelete("d")); - ASSERT_OK(batch.DeleteRange("e", "f")); - ASSERT_OK(db_->Write(wo, &batch)); - std::vector keys; - keys.push_back("a"); - keys.push_back("b"); - keys.push_back("df"); - keys.push_back("gege"); - keys.push_back("hjhjhj"); - std::vector values; - std::vector ss = db_->MultiGet(ro, keys, &values); - ASSERT_GE(ss.size(), 0); - ASSERT_OK(ss[0]); - ASSERT_NOK(ss[2]); - std::vector cfs(2, db_->DefaultColumnFamily()); - std::vector values2(keys.size()); - db_->MultiGet(ro, 2, cfs.data(), keys.data(), values2.data(), ss.data(), - false); - ASSERT_OK(ss[0]); - db_->MultiGet(ro, db_->DefaultColumnFamily(), 2, keys.data() + 3, - values2.data(), ss.data(), false); - ASSERT_OK(db_->Get(ro, "a", &value)); - - single_iter = db_->NewIterator(ro); - single_iter->Seek("a"); - ASSERT_OK(single_iter->status()); - single_iter->SeekForPrev("b"); - ASSERT_OK(single_iter->status()); - delete single_iter; - std::this_thread::sleep_for(std::chrono::seconds(1)); - - db_->Get(ro, "g", &value).PermitUncheckedError(); - - ASSERT_OK(db_->EndTrace()); - - ASSERT_OK(env_->FileExists(trace_path)); - - std::unique_ptr whole_f; - std::string whole_path = test_path_ + "/0.txt"; - ASSERT_OK(env_->NewWritableFile(whole_path, &whole_f, env_options_)); - std::string whole_str = "0x61\n0x62\n0x63\n0x64\n0x65\n0x66\n"; - ASSERT_OK(whole_f->Append(whole_str)); - delete db_; - ASSERT_OK(DestroyDB(dbname_, options)); - } - - void RunTraceAnalyzer(const std::vector& args) { - char arg_buffer[kArgBufferSize]; - char* argv[kMaxArgCount]; - int argc = 0; - int cursor = 0; - - for (const auto& arg : args) { - ASSERT_LE(cursor + arg.size() + 1, kArgBufferSize); - ASSERT_LE(argc + 1, kMaxArgCount); - snprintf(arg_buffer + cursor, arg.size() + 1, "%s", arg.c_str()); - - argv[argc++] = arg_buffer + cursor; - cursor += static_cast(arg.size()) + 1; - } - - ASSERT_EQ(0, ROCKSDB_NAMESPACE::trace_analyzer_tool(argc, argv)); - } - - void CheckFileContent(const std::vector& cnt, - std::string file_path, bool full_content) { - const auto& fs = env_->GetFileSystem(); - FileOptions fopts(env_options_); - - ASSERT_OK(fs->FileExists(file_path, fopts.io_options, nullptr)); - std::unique_ptr file; - ASSERT_OK(fs->NewSequentialFile(file_path, fopts, &file, nullptr)); - - LineFileReader lf_reader(std::move(file), file_path, - 4096 /* filereadahead_size */); - - std::vector result; - std::string line; - while ( - lf_reader.ReadLine(&line, Env::IO_TOTAL /* rate_limiter_priority */)) { - result.push_back(line); - } - - ASSERT_OK(lf_reader.GetStatus()); - - size_t min_size = std::min(cnt.size(), result.size()); - for (size_t i = 0; i < min_size; i++) { - if (full_content) { - ASSERT_EQ(result[i], cnt[i]); - } else { - ASSERT_EQ(result[i][0], cnt[i][0]); - } - } - - return; - } - - void AnalyzeTrace(std::vector& paras_diff, - std::string output_path, std::string trace_path) { - std::vector paras = {"./trace_analyzer", - "-convert_to_human_readable_trace", - "-output_key_stats", - "-output_access_count_stats", - "-output_prefix=test", - "-output_prefix_cut=1", - "-output_time_series", - "-output_value_distribution", - "-output_qps_stats", - "-no_key", - "-no_print"}; - for (auto& para : paras_diff) { - paras.push_back(para); - } - Status s = env_->FileExists(trace_path); - if (!s.ok()) { - GenerateTrace(trace_path); - } - ASSERT_OK(env_->CreateDir(output_path)); - RunTraceAnalyzer(paras); - } - - ROCKSDB_NAMESPACE::Env* env_; - EnvOptions env_options_; - std::string test_path_; - std::string dbname_; - Random rnd_; -}; - -TEST_F(TraceAnalyzerTest, Get) { - std::string trace_path = test_path_ + "/trace"; - std::string output_path = test_path_ + "/get"; - std::string file_path; - std::vector paras = { - "-analyze_get=true", "-analyze_put=false", - "-analyze_delete=false", "-analyze_single_delete=false", - "-analyze_range_delete=false", "-analyze_iterator=false", - "-analyze_multiget=false"}; - paras.push_back("-output_dir=" + output_path); - paras.push_back("-trace_path=" + trace_path); - paras.push_back("-key_space_dir=" + test_path_); - AnalyzeTrace(paras, output_path, trace_path); - - // check the key_stats file - std::vector k_stats = {"0 10 0 1 1.000000", "0 10 1 1 1.000000"}; - file_path = output_path + "/test-get-0-accessed_key_stats.txt"; - CheckFileContent(k_stats, file_path, true); - - // Check the access count distribution - std::vector k_dist = {"access_count: 1 num: 2"}; - file_path = output_path + "/test-get-0-accessed_key_count_distribution.txt"; - CheckFileContent(k_dist, file_path, true); - - // Check the trace sequence - std::vector k_sequence = {"1", "5", "2", "3", "4", "8", - "8", "8", "8", "8", "8", "8", - "8", "8", "0", "6", "7", "0"}; - file_path = output_path + "/test-human_readable_trace.txt"; - CheckFileContent(k_sequence, file_path, false); - - // Check the prefix - std::vector k_prefix = {"0 0 0 0.000000 0.000000 0x30", - "1 1 1 1.000000 1.000000 0x61"}; - file_path = output_path + "/test-get-0-accessed_key_prefix_cut.txt"; - CheckFileContent(k_prefix, file_path, true); - - // Check the time series - std::vector k_series = {"0 1533000630 0", "0 1533000630 1"}; - file_path = output_path + "/test-get-0-time_series.txt"; - CheckFileContent(k_series, file_path, false); - - // Check the accessed key in whole key space - std::vector k_whole_access = {"0 1"}; - file_path = output_path + "/test-get-0-whole_key_stats.txt"; - CheckFileContent(k_whole_access, file_path, true); - - // Check the whole key prefix cut - std::vector k_whole_prefix = {"0 0x61", "1 0x62", "2 0x63", - "3 0x64", "4 0x65", "5 0x66"}; - file_path = output_path + "/test-get-0-whole_key_prefix_cut.txt"; - CheckFileContent(k_whole_prefix, file_path, true); - - /* - // Check the overall qps - std::vector all_qps = {"1 0 0 0 0 0 0 0 0 1"}; - file_path = output_path + "/test-qps_stats.txt"; - CheckFileContent(all_qps, file_path, true); - - // Check the qps of get - std::vector get_qps = {"1"}; - file_path = output_path + "/test-get-0-qps_stats.txt"; - CheckFileContent(get_qps, file_path, true); - - // Check the top k qps prefix cut - std::vector top_qps = {"At time: 0 with QPS: 1", - "The prefix: 0x61 Access count: 1"}; - file_path = output_path + "/test-get-0-accessed_top_k_qps_prefix_cut.txt"; - CheckFileContent(top_qps, file_path, true); - */ -} - -// Test analyzing of Put -TEST_F(TraceAnalyzerTest, Put) { - std::string trace_path = test_path_ + "/trace"; - std::string output_path = test_path_ + "/put"; - std::string file_path; - std::vector paras = { - "-analyze_get=false", "-analyze_put=true", - "-analyze_delete=false", "-analyze_single_delete=false", - "-analyze_range_delete=false", "-analyze_iterator=false", - "-analyze_multiget=false"}; - paras.push_back("-output_dir=" + output_path); - paras.push_back("-trace_path=" + trace_path); - paras.push_back("-key_space_dir=" + test_path_); - AnalyzeTrace(paras, output_path, trace_path); - - // check the key_stats file - std::vector k_stats = {"0 9 0 1 1.000000"}; - file_path = output_path + "/test-put-0-accessed_key_stats.txt"; - CheckFileContent(k_stats, file_path, true); - - // Check the access count distribution - std::vector k_dist = {"access_count: 1 num: 1"}; - file_path = output_path + "/test-put-0-accessed_key_count_distribution.txt"; - CheckFileContent(k_dist, file_path, true); - - // Check the trace sequence - std::vector k_sequence = {"1", "5", "2", "3", "4", "8", - "8", "8", "8", "8", "8", "8", - "8", "8", "0", "6", "7", "0"}; - file_path = output_path + "/test-human_readable_trace.txt"; - CheckFileContent(k_sequence, file_path, false); - - // Check the prefix - std::vector k_prefix = {"0 0 0 0.000000 0.000000 0x30"}; - file_path = output_path + "/test-put-0-accessed_key_prefix_cut.txt"; - CheckFileContent(k_prefix, file_path, true); - - // Check the time series - std::vector k_series = {"1 1533056278 0"}; - file_path = output_path + "/test-put-0-time_series.txt"; - CheckFileContent(k_series, file_path, false); - - // Check the accessed key in whole key space - std::vector k_whole_access = {"0 1"}; - file_path = output_path + "/test-put-0-whole_key_stats.txt"; - CheckFileContent(k_whole_access, file_path, true); - - // Check the whole key prefix cut - std::vector k_whole_prefix = {"0 0x61", "1 0x62", "2 0x63", - "3 0x64", "4 0x65", "5 0x66"}; - file_path = output_path + "/test-put-0-whole_key_prefix_cut.txt"; - CheckFileContent(k_whole_prefix, file_path, true); - - // Check the overall qps - std::vector all_qps = {"0 1 0 0 0 0 0 0 0 0 1"}; - file_path = output_path + "/test-qps_stats.txt"; - CheckFileContent(all_qps, file_path, true); - - /* - // Check the qps of Put - std::vector get_qps = {"1"}; - file_path = output_path + "/test-put-0-qps_stats.txt"; - CheckFileContent(get_qps, file_path, true); - - // Check the top k qps prefix cut - std::vector top_qps = {"At time: 0 with QPS: 1", - "The prefix: 0x61 Access count: 1"}; - file_path = output_path + "/test-put-0-accessed_top_k_qps_prefix_cut.txt"; - CheckFileContent(top_qps, file_path, true); - - // Check the value size distribution - std::vector value_dist = { - "Number_of_value_size_between 0 and 16 is: 1"}; - file_path = output_path + "/test-put-0-accessed_value_size_distribution.txt"; - CheckFileContent(value_dist, file_path, true); - */ -} - -// Test analyzing of delete -TEST_F(TraceAnalyzerTest, Delete) { - std::string trace_path = test_path_ + "/trace"; - std::string output_path = test_path_ + "/delete"; - std::string file_path; - std::vector paras = { - "-analyze_get=false", "-analyze_put=false", - "-analyze_delete=true", "-analyze_single_delete=false", - "-analyze_range_delete=false", "-analyze_iterator=false", - "-analyze_multiget=false"}; - paras.push_back("-output_dir=" + output_path); - paras.push_back("-trace_path=" + trace_path); - paras.push_back("-key_space_dir=" + test_path_); - AnalyzeTrace(paras, output_path, trace_path); - - // check the key_stats file - std::vector k_stats = {"0 10 0 1 1.000000"}; - file_path = output_path + "/test-delete-0-accessed_key_stats.txt"; - CheckFileContent(k_stats, file_path, true); - - // Check the access count distribution - std::vector k_dist = {"access_count: 1 num: 1"}; - file_path = - output_path + "/test-delete-0-accessed_key_count_distribution.txt"; - CheckFileContent(k_dist, file_path, true); - - // Check the trace sequence - std::vector k_sequence = {"1", "5", "2", "3", "4", "8", - "8", "8", "8", "8", "8", "8", - "8", "8", "0", "6", "7", "0"}; - file_path = output_path + "/test-human_readable_trace.txt"; - CheckFileContent(k_sequence, file_path, false); - - // Check the prefix - std::vector k_prefix = {"0 0 0 0.000000 0.000000 0x30"}; - file_path = output_path + "/test-delete-0-accessed_key_prefix_cut.txt"; - CheckFileContent(k_prefix, file_path, true); - - // Check the time series - std::vector k_series = {"2 1533000630 0"}; - file_path = output_path + "/test-delete-0-time_series.txt"; - CheckFileContent(k_series, file_path, false); - - // Check the accessed key in whole key space - std::vector k_whole_access = {"2 1"}; - file_path = output_path + "/test-delete-0-whole_key_stats.txt"; - CheckFileContent(k_whole_access, file_path, true); - - // Check the whole key prefix cut - std::vector k_whole_prefix = {"0 0x61", "1 0x62", "2 0x63", - "3 0x64", "4 0x65", "5 0x66"}; - file_path = output_path + "/test-delete-0-whole_key_prefix_cut.txt"; - CheckFileContent(k_whole_prefix, file_path, true); - - /* - // Check the overall qps - std::vector all_qps = {"0 0 1 0 0 0 0 0 0 1"}; - file_path = output_path + "/test-qps_stats.txt"; - CheckFileContent(all_qps, file_path, true); - - // Check the qps of Delete - std::vector get_qps = {"1"}; - file_path = output_path + "/test-delete-0-qps_stats.txt"; - CheckFileContent(get_qps, file_path, true); - - // Check the top k qps prefix cut - std::vector top_qps = {"At time: 0 with QPS: 1", - "The prefix: 0x63 Access count: 1"}; - file_path = output_path + "/test-delete-0-accessed_top_k_qps_prefix_cut.txt"; - CheckFileContent(top_qps, file_path, true); - */ -} - -// Test analyzing of Merge -TEST_F(TraceAnalyzerTest, Merge) { - std::string trace_path = test_path_ + "/trace"; - std::string output_path = test_path_ + "/merge"; - std::string file_path; - std::vector paras = { - "-analyze_get=false", "-analyze_put=false", - "-analyze_delete=false", "-analyze_merge=true", - "-analyze_single_delete=false", "-analyze_range_delete=false", - "-analyze_iterator=false", "-analyze_multiget=false"}; - paras.push_back("-output_dir=" + output_path); - paras.push_back("-trace_path=" + trace_path); - paras.push_back("-key_space_dir=" + test_path_); - AnalyzeTrace(paras, output_path, trace_path); - - // check the key_stats file - std::vector k_stats = {"0 20 0 1 1.000000"}; - file_path = output_path + "/test-merge-0-accessed_key_stats.txt"; - CheckFileContent(k_stats, file_path, true); - - // Check the access count distribution - std::vector k_dist = {"access_count: 1 num: 1"}; - file_path = output_path + "/test-merge-0-accessed_key_count_distribution.txt"; - CheckFileContent(k_dist, file_path, true); - - // Check the trace sequence - std::vector k_sequence = {"1", "5", "2", "3", "4", "8", - "8", "8", "8", "8", "8", "8", - "8", "8", "0", "6", "7", "0"}; - file_path = output_path + "/test-human_readable_trace.txt"; - CheckFileContent(k_sequence, file_path, false); - - // Check the prefix - std::vector k_prefix = {"0 0 0 0.000000 0.000000 0x30"}; - file_path = output_path + "/test-merge-0-accessed_key_prefix_cut.txt"; - CheckFileContent(k_prefix, file_path, true); - - // Check the time series - std::vector k_series = {"5 1533000630 0"}; - file_path = output_path + "/test-merge-0-time_series.txt"; - CheckFileContent(k_series, file_path, false); - - // Check the accessed key in whole key space - std::vector k_whole_access = {"1 1"}; - file_path = output_path + "/test-merge-0-whole_key_stats.txt"; - CheckFileContent(k_whole_access, file_path, true); - - // Check the whole key prefix cut - std::vector k_whole_prefix = {"0 0x61", "1 0x62", "2 0x63", - "3 0x64", "4 0x65", "5 0x66"}; - file_path = output_path + "/test-merge-0-whole_key_prefix_cut.txt"; - CheckFileContent(k_whole_prefix, file_path, true); - - /* - // Check the overall qps - std::vector all_qps = {"0 0 0 0 0 1 0 0 0 1"}; - file_path = output_path + "/test-qps_stats.txt"; - CheckFileContent(all_qps, file_path, true); - - // Check the qps of Merge - std::vector get_qps = {"1"}; - file_path = output_path + "/test-merge-0-qps_stats.txt"; - CheckFileContent(get_qps, file_path, true); - - // Check the top k qps prefix cut - std::vector top_qps = {"At time: 0 with QPS: 1", - "The prefix: 0x62 Access count: 1"}; - file_path = output_path + "/test-merge-0-accessed_top_k_qps_prefix_cut.txt"; - CheckFileContent(top_qps, file_path, true); - */ - - // Check the value size distribution - std::vector value_dist = { - "Number_of_value_size_between 0 and 24 is: 1"}; - file_path = - output_path + "/test-merge-0-accessed_value_size_distribution.txt"; - CheckFileContent(value_dist, file_path, true); -} - -// Test analyzing of SingleDelete -TEST_F(TraceAnalyzerTest, SingleDelete) { - std::string trace_path = test_path_ + "/trace"; - std::string output_path = test_path_ + "/single_delete"; - std::string file_path; - std::vector paras = { - "-analyze_get=false", "-analyze_put=false", - "-analyze_delete=false", "-analyze_merge=false", - "-analyze_single_delete=true", "-analyze_range_delete=false", - "-analyze_iterator=false", "-analyze_multiget=false"}; - paras.push_back("-output_dir=" + output_path); - paras.push_back("-trace_path=" + trace_path); - paras.push_back("-key_space_dir=" + test_path_); - AnalyzeTrace(paras, output_path, trace_path); - - // check the key_stats file - std::vector k_stats = {"0 10 0 1 1.000000"}; - file_path = output_path + "/test-single_delete-0-accessed_key_stats.txt"; - CheckFileContent(k_stats, file_path, true); - - // Check the access count distribution - std::vector k_dist = {"access_count: 1 num: 1"}; - file_path = - output_path + "/test-single_delete-0-accessed_key_count_distribution.txt"; - CheckFileContent(k_dist, file_path, true); - - // Check the trace sequence - std::vector k_sequence = {"1", "5", "2", "3", "4", "8", - "8", "8", "8", "8", "8", "8", - "8", "8", "0", "6", "7", "0"}; - file_path = output_path + "/test-human_readable_trace.txt"; - CheckFileContent(k_sequence, file_path, false); - - // Check the prefix - std::vector k_prefix = {"0 0 0 0.000000 0.000000 0x30"}; - file_path = output_path + "/test-single_delete-0-accessed_key_prefix_cut.txt"; - CheckFileContent(k_prefix, file_path, true); - - // Check the time series - std::vector k_series = {"3 1533000630 0"}; - file_path = output_path + "/test-single_delete-0-time_series.txt"; - CheckFileContent(k_series, file_path, false); - - // Check the accessed key in whole key space - std::vector k_whole_access = {"3 1"}; - file_path = output_path + "/test-single_delete-0-whole_key_stats.txt"; - CheckFileContent(k_whole_access, file_path, true); - - // Check the whole key prefix cut - std::vector k_whole_prefix = {"0 0x61", "1 0x62", "2 0x63", - "3 0x64", "4 0x65", "5 0x66"}; - file_path = output_path + "/test-single_delete-0-whole_key_prefix_cut.txt"; - CheckFileContent(k_whole_prefix, file_path, true); - - /* - // Check the overall qps - std::vector all_qps = {"0 0 0 1 0 0 0 0 0 1"}; - file_path = output_path + "/test-qps_stats.txt"; - CheckFileContent(all_qps, file_path, true); - - // Check the qps of SingleDelete - std::vector get_qps = {"1"}; - file_path = output_path + "/test-single_delete-0-qps_stats.txt"; - CheckFileContent(get_qps, file_path, true); - - // Check the top k qps prefix cut - std::vector top_qps = {"At time: 0 with QPS: 1", - "The prefix: 0x64 Access count: 1"}; - file_path = - output_path + "/test-single_delete-0-accessed_top_k_qps_prefix_cut.txt"; - CheckFileContent(top_qps, file_path, true); - */ -} - -// Test analyzing of delete -TEST_F(TraceAnalyzerTest, DeleteRange) { - std::string trace_path = test_path_ + "/trace"; - std::string output_path = test_path_ + "/range_delete"; - std::string file_path; - std::vector paras = { - "-analyze_get=false", "-analyze_put=false", - "-analyze_delete=false", "-analyze_merge=false", - "-analyze_single_delete=false", "-analyze_range_delete=true", - "-analyze_iterator=false", "-analyze_multiget=false"}; - paras.push_back("-output_dir=" + output_path); - paras.push_back("-trace_path=" + trace_path); - paras.push_back("-key_space_dir=" + test_path_); - AnalyzeTrace(paras, output_path, trace_path); - - // check the key_stats file - std::vector k_stats = {"0 10 0 1 1.000000", "0 10 1 1 1.000000"}; - file_path = output_path + "/test-range_delete-0-accessed_key_stats.txt"; - CheckFileContent(k_stats, file_path, true); - - // Check the access count distribution - std::vector k_dist = {"access_count: 1 num: 2"}; - file_path = - output_path + "/test-range_delete-0-accessed_key_count_distribution.txt"; - CheckFileContent(k_dist, file_path, true); - - // Check the trace sequence - std::vector k_sequence = {"1", "5", "2", "3", "4", "8", - "8", "8", "8", "8", "8", "8", - "8", "8", "0", "6", "7", "0"}; - file_path = output_path + "/test-human_readable_trace.txt"; - CheckFileContent(k_sequence, file_path, false); - - // Check the prefix - std::vector k_prefix = {"0 0 0 0.000000 0.000000 0x30", - "1 1 1 1.000000 1.000000 0x65"}; - file_path = output_path + "/test-range_delete-0-accessed_key_prefix_cut.txt"; - CheckFileContent(k_prefix, file_path, true); - - // Check the time series - std::vector k_series = {"4 1533000630 0", "4 1533060100 1"}; - file_path = output_path + "/test-range_delete-0-time_series.txt"; - CheckFileContent(k_series, file_path, false); - - // Check the accessed key in whole key space - std::vector k_whole_access = {"4 1", "5 1"}; - file_path = output_path + "/test-range_delete-0-whole_key_stats.txt"; - CheckFileContent(k_whole_access, file_path, true); - - // Check the whole key prefix cut - std::vector k_whole_prefix = {"0 0x61", "1 0x62", "2 0x63", - "3 0x64", "4 0x65", "5 0x66"}; - file_path = output_path + "/test-range_delete-0-whole_key_prefix_cut.txt"; - CheckFileContent(k_whole_prefix, file_path, true); - - /* - // Check the overall qps - std::vector all_qps = {"0 0 0 0 2 0 0 0 0 2"}; - file_path = output_path + "/test-qps_stats.txt"; - CheckFileContent(all_qps, file_path, true); - - // Check the qps of DeleteRange - std::vector get_qps = {"2"}; - file_path = output_path + "/test-range_delete-0-qps_stats.txt"; - CheckFileContent(get_qps, file_path, true); - - // Check the top k qps prefix cut - std::vector top_qps = {"At time: 0 with QPS: 2", - "The prefix: 0x65 Access count: 1", - "The prefix: 0x66 Access count: 1"}; - file_path = - output_path + "/test-range_delete-0-accessed_top_k_qps_prefix_cut.txt"; - CheckFileContent(top_qps, file_path, true); - */ -} - -// Test analyzing of Iterator -TEST_F(TraceAnalyzerTest, Iterator) { - std::string trace_path = test_path_ + "/trace"; - std::string output_path = test_path_ + "/iterator"; - std::string file_path; - std::vector paras = { - "-analyze_get=false", "-analyze_put=false", - "-analyze_delete=false", "-analyze_merge=false", - "-analyze_single_delete=false", "-analyze_range_delete=false", - "-analyze_iterator=true", "-analyze_multiget=false"}; - paras.push_back("-output_dir=" + output_path); - paras.push_back("-trace_path=" + trace_path); - paras.push_back("-key_space_dir=" + test_path_); - AnalyzeTrace(paras, output_path, trace_path); - - // Check the output of Seek - // check the key_stats file - std::vector k_stats = {"0 10 0 1 1.000000"}; - file_path = output_path + "/test-iterator_Seek-0-accessed_key_stats.txt"; - CheckFileContent(k_stats, file_path, true); - - // Check the access count distribution - std::vector k_dist = {"access_count: 1 num: 1"}; - file_path = - output_path + "/test-iterator_Seek-0-accessed_key_count_distribution.txt"; - CheckFileContent(k_dist, file_path, true); - - // Check the trace sequence - std::vector k_sequence = {"1", "5", "2", "3", "4", "8", - "8", "8", "8", "8", "8", "8", - "8", "8", "0", "6", "7", "0"}; - file_path = output_path + "/test-human_readable_trace.txt"; - CheckFileContent(k_sequence, file_path, false); - - // Check the prefix - std::vector k_prefix = {"0 0 0 0.000000 0.000000 0x30"}; - file_path = output_path + "/test-iterator_Seek-0-accessed_key_prefix_cut.txt"; - CheckFileContent(k_prefix, file_path, true); - - // Check the time series - std::vector k_series = {"6 1 0"}; - file_path = output_path + "/test-iterator_Seek-0-time_series.txt"; - CheckFileContent(k_series, file_path, false); - - // Check the accessed key in whole key space - std::vector k_whole_access = {"0 1"}; - file_path = output_path + "/test-iterator_Seek-0-whole_key_stats.txt"; - CheckFileContent(k_whole_access, file_path, true); - - // Check the whole key prefix cut - std::vector k_whole_prefix = {"0 0x61", "1 0x62", "2 0x63", - "3 0x64", "4 0x65", "5 0x66"}; - file_path = output_path + "/test-iterator_Seek-0-whole_key_prefix_cut.txt"; - CheckFileContent(k_whole_prefix, file_path, true); - - /* - // Check the overall qps - std::vector all_qps = {"0 0 0 0 0 0 1 1 0 2"}; - file_path = output_path + "/test-qps_stats.txt"; - CheckFileContent(all_qps, file_path, true); - - // Check the qps of Iterator_Seek - std::vector get_qps = {"1"}; - file_path = output_path + "/test-iterator_Seek-0-qps_stats.txt"; - CheckFileContent(get_qps, file_path, true); - - // Check the top k qps prefix cut - std::vector top_qps = {"At time: 0 with QPS: 1", - "The prefix: 0x61 Access count: 1"}; - file_path = - output_path + "/test-iterator_Seek-0-accessed_top_k_qps_prefix_cut.txt"; - CheckFileContent(top_qps, file_path, true); - */ - - // Check the output of SeekForPrev - // check the key_stats file - k_stats = {"0 10 0 1 1.000000"}; - file_path = - output_path + "/test-iterator_SeekForPrev-0-accessed_key_stats.txt"; - CheckFileContent(k_stats, file_path, true); - - // Check the access count distribution - k_dist = {"access_count: 1 num: 1"}; - file_path = - output_path + - "/test-iterator_SeekForPrev-0-accessed_key_count_distribution.txt"; - CheckFileContent(k_dist, file_path, true); - - // Check the prefix - k_prefix = {"0 0 0 0.000000 0.000000 0x30"}; - file_path = - output_path + "/test-iterator_SeekForPrev-0-accessed_key_prefix_cut.txt"; - CheckFileContent(k_prefix, file_path, true); - - // Check the time series - k_series = {"7 0 0"}; - file_path = output_path + "/test-iterator_SeekForPrev-0-time_series.txt"; - CheckFileContent(k_series, file_path, false); - - // Check the accessed key in whole key space - k_whole_access = {"1 1"}; - file_path = output_path + "/test-iterator_SeekForPrev-0-whole_key_stats.txt"; - CheckFileContent(k_whole_access, file_path, true); - - // Check the whole key prefix cut - k_whole_prefix = {"0 0x61", "1 0x62", "2 0x63", "3 0x64", "4 0x65", "5 0x66"}; - file_path = - output_path + "/test-iterator_SeekForPrev-0-whole_key_prefix_cut.txt"; - CheckFileContent(k_whole_prefix, file_path, true); - - /* - // Check the qps of Iterator_SeekForPrev - get_qps = {"1"}; - file_path = output_path + "/test-iterator_SeekForPrev-0-qps_stats.txt"; - CheckFileContent(get_qps, file_path, true); - - // Check the top k qps prefix cut - top_qps = {"At time: 0 with QPS: 1", "The prefix: 0x62 Access count: 1"}; - file_path = output_path + - "/test-iterator_SeekForPrev-0-accessed_top_k_qps_prefix_cut.txt"; - CheckFileContent(top_qps, file_path, true); - */ -} - -// Test analyzing of multiget -TEST_F(TraceAnalyzerTest, MultiGet) { - std::string trace_path = test_path_ + "/trace"; - std::string output_path = test_path_ + "/multiget"; - std::string file_path; - std::vector paras = { - "-analyze_get=false", "-analyze_put=false", - "-analyze_delete=false", "-analyze_merge=false", - "-analyze_single_delete=false", "-analyze_range_delete=true", - "-analyze_iterator=false", "-analyze_multiget=true"}; - paras.push_back("-output_dir=" + output_path); - paras.push_back("-trace_path=" + trace_path); - paras.push_back("-key_space_dir=" + test_path_); - AnalyzeTrace(paras, output_path, trace_path); - - // check the key_stats file - std::vector k_stats = {"0 10 0 2 1.000000", "0 10 1 2 1.000000", - "0 10 2 1 1.000000", "0 10 3 2 1.000000", - "0 10 4 2 1.000000"}; - file_path = output_path + "/test-multiget-0-accessed_key_stats.txt"; - CheckFileContent(k_stats, file_path, true); - - // Check the access count distribution - std::vector k_dist = {"access_count: 1 num: 1", - "access_count: 2 num: 4"}; - file_path = - output_path + "/test-multiget-0-accessed_key_count_distribution.txt"; - CheckFileContent(k_dist, file_path, true); - - // Check the trace sequence - std::vector k_sequence = {"1", "5", "2", "3", "4", "8", - "8", "8", "8", "8", "8", "8", - "8", "8", "0", "6", "7", "0"}; - file_path = output_path + "/test-human_readable_trace.txt"; - CheckFileContent(k_sequence, file_path, false); - - // Check the prefix - std::vector k_prefix = { - "0 0 0 0.000000 0.000000 0x30", "1 2 1 2.000000 1.000000 0x61", - "2 2 1 2.000000 1.000000 0x62", "3 1 1 1.000000 1.000000 0x64", - "4 2 1 2.000000 1.000000 0x67"}; - file_path = output_path + "/test-multiget-0-accessed_key_prefix_cut.txt"; - CheckFileContent(k_prefix, file_path, true); - - // Check the time series - std::vector k_series = {"8 0 0", "8 0 1", "8 0 2", - "8 0 3", "8 0 4", "8 0 0", - "8 0 1", "8 0 3", "8 0 4"}; - file_path = output_path + "/test-multiget-0-time_series.txt"; - CheckFileContent(k_series, file_path, false); - - // Check the accessed key in whole key space - std::vector k_whole_access = {"0 2", "1 2"}; - file_path = output_path + "/test-multiget-0-whole_key_stats.txt"; - CheckFileContent(k_whole_access, file_path, true); - - // Check the whole key prefix cut - std::vector k_whole_prefix = {"0 0x61", "1 0x62", "2 0x63", - "3 0x64", "4 0x65", "5 0x66"}; - file_path = output_path + "/test-multiget-0-whole_key_prefix_cut.txt"; - CheckFileContent(k_whole_prefix, file_path, true); - - /* - // Check the overall qps. We have 3 MultiGet queries and it requested 9 keys - // in total - std::vector all_qps = {"0 0 0 0 2 0 0 0 9 11"}; - file_path = output_path + "/test-qps_stats.txt"; - CheckFileContent(all_qps, file_path, true); - - // Check the qps of DeleteRange - std::vector get_qps = {"9"}; - file_path = output_path + "/test-multiget-0-qps_stats.txt"; - CheckFileContent(get_qps, file_path, true); - - // Check the top k qps prefix cut - std::vector top_qps = { - "At time: 0 with QPS: 9", "The prefix: 0x61 Access count: 2", - "The prefix: 0x62 Access count: 2", "The prefix: 0x64 Access count: 1", - "The prefix: 0x67 Access count: 2", "The prefix: 0x68 Access count: 2"}; - file_path = - output_path + "/test-multiget-0-accessed_top_k_qps_prefix_cut.txt"; - CheckFileContent(top_qps, file_path, true); - */ -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} -#endif // GFLAG diff --git a/trace_replay/block_cache_tracer_test.cc b/trace_replay/block_cache_tracer_test.cc deleted file mode 100644 index f9d0773bf..000000000 --- a/trace_replay/block_cache_tracer_test.cc +++ /dev/null @@ -1,421 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "trace_replay/block_cache_tracer.h" - -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/status.h" -#include "rocksdb/trace_reader_writer.h" -#include "rocksdb/trace_record.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" - -namespace ROCKSDB_NAMESPACE { - -namespace { -const uint64_t kBlockSize = 1024; -const std::string kBlockKeyPrefix = "test-block-"; -const uint32_t kCFId = 0; -const uint32_t kLevel = 1; -const uint64_t kSSTFDNumber = 100; -const std::string kRefKeyPrefix = "test-get-"; -const uint64_t kNumKeysInBlock = 1024; -const uint64_t kReferencedDataSize = 10; -} // namespace - -class BlockCacheTracerTest : public testing::Test { - public: - BlockCacheTracerTest() { - test_path_ = test::PerThreadDBPath("block_cache_tracer_test"); - env_ = ROCKSDB_NAMESPACE::Env::Default(); - clock_ = env_->GetSystemClock().get(); - EXPECT_OK(env_->CreateDir(test_path_)); - trace_file_path_ = test_path_ + "/block_cache_trace"; - } - - ~BlockCacheTracerTest() override { - EXPECT_OK(env_->DeleteFile(trace_file_path_)); - EXPECT_OK(env_->DeleteDir(test_path_)); - } - - TableReaderCaller GetCaller(uint32_t key_id) { - uint32_t n = key_id % 5; - switch (n) { - case 0: - return TableReaderCaller::kPrefetch; - case 1: - return TableReaderCaller::kCompaction; - case 2: - return TableReaderCaller::kUserGet; - case 3: - return TableReaderCaller::kUserMultiGet; - case 4: - return TableReaderCaller::kUserIterator; - } - assert(false); - return TableReaderCaller::kMaxBlockCacheLookupCaller; - } - - void WriteBlockAccess(BlockCacheTraceWriter* writer, uint32_t from_key_id, - TraceType block_type, uint32_t nblocks) { - assert(writer); - for (uint32_t i = 0; i < nblocks; i++) { - uint32_t key_id = from_key_id + i; - BlockCacheTraceRecord record; - record.block_type = block_type; - record.block_size = kBlockSize + key_id; - record.block_key = (kBlockKeyPrefix + std::to_string(key_id)); - record.access_timestamp = clock_->NowMicros(); - record.cf_id = kCFId; - record.cf_name = kDefaultColumnFamilyName; - record.caller = GetCaller(key_id); - record.level = kLevel; - record.sst_fd_number = kSSTFDNumber + key_id; - record.is_cache_hit = false; - record.no_insert = false; - // Provide get_id for all callers. The writer should only write get_id - // when the caller is either GET or MGET. - record.get_id = key_id + 1; - record.get_from_user_specified_snapshot = true; - // Provide these fields for all block types. - // The writer should only write these fields for data blocks and the - // caller is either GET or MGET. - record.referenced_key = (kRefKeyPrefix + std::to_string(key_id)); - record.referenced_key_exist_in_block = true; - record.num_keys_in_block = kNumKeysInBlock; - record.referenced_data_size = kReferencedDataSize + key_id; - ASSERT_OK(writer->WriteBlockAccess( - record, record.block_key, record.cf_name, record.referenced_key)); - } - } - - BlockCacheTraceRecord GenerateAccessRecord() { - uint32_t key_id = 0; - BlockCacheTraceRecord record; - record.block_type = TraceType::kBlockTraceDataBlock; - record.block_size = kBlockSize; - record.block_key = kBlockKeyPrefix + std::to_string(key_id); - record.access_timestamp = clock_->NowMicros(); - record.cf_id = kCFId; - record.cf_name = kDefaultColumnFamilyName; - record.caller = GetCaller(key_id); - record.level = kLevel; - record.sst_fd_number = kSSTFDNumber + key_id; - record.is_cache_hit = false; - record.no_insert = false; - record.referenced_key = kRefKeyPrefix + std::to_string(key_id); - record.referenced_key_exist_in_block = true; - record.num_keys_in_block = kNumKeysInBlock; - return record; - } - - void VerifyAccess(BlockCacheTraceReader* reader, uint32_t from_key_id, - TraceType block_type, uint32_t nblocks) { - assert(reader); - for (uint32_t i = 0; i < nblocks; i++) { - uint32_t key_id = from_key_id + i; - BlockCacheTraceRecord record; - ASSERT_OK(reader->ReadAccess(&record)); - ASSERT_EQ(block_type, record.block_type); - ASSERT_EQ(kBlockSize + key_id, record.block_size); - ASSERT_EQ(kBlockKeyPrefix + std::to_string(key_id), record.block_key); - ASSERT_EQ(kCFId, record.cf_id); - ASSERT_EQ(kDefaultColumnFamilyName, record.cf_name); - ASSERT_EQ(GetCaller(key_id), record.caller); - ASSERT_EQ(kLevel, record.level); - ASSERT_EQ(kSSTFDNumber + key_id, record.sst_fd_number); - ASSERT_FALSE(record.is_cache_hit); - ASSERT_FALSE(record.no_insert); - if (record.caller == TableReaderCaller::kUserGet || - record.caller == TableReaderCaller::kUserMultiGet) { - ASSERT_EQ(key_id + 1, record.get_id); - ASSERT_TRUE(record.get_from_user_specified_snapshot); - ASSERT_EQ(kRefKeyPrefix + std::to_string(key_id), - record.referenced_key); - } else { - ASSERT_EQ(BlockCacheTraceHelper::kReservedGetId, record.get_id); - ASSERT_FALSE(record.get_from_user_specified_snapshot); - ASSERT_EQ("", record.referenced_key); - } - if (block_type == TraceType::kBlockTraceDataBlock && - (record.caller == TableReaderCaller::kUserGet || - record.caller == TableReaderCaller::kUserMultiGet)) { - ASSERT_TRUE(record.referenced_key_exist_in_block); - ASSERT_EQ(kNumKeysInBlock, record.num_keys_in_block); - ASSERT_EQ(kReferencedDataSize + key_id, record.referenced_data_size); - continue; - } - ASSERT_FALSE(record.referenced_key_exist_in_block); - ASSERT_EQ(0, record.num_keys_in_block); - ASSERT_EQ(0, record.referenced_data_size); - } - } - - Env* env_; - SystemClock* clock_; - EnvOptions env_options_; - std::string trace_file_path_; - std::string test_path_; -}; - -TEST_F(BlockCacheTracerTest, AtomicWriteBeforeStartTrace) { - BlockCacheTraceRecord record = GenerateAccessRecord(); - { - std::unique_ptr trace_writer; - ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_, - &trace_writer)); - BlockCacheTracer writer; - // The record should be written to the trace_file since StartTrace is not - // called. - ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name, - record.referenced_key)); - ASSERT_OK(env_->FileExists(trace_file_path_)); - } - { - // Verify trace file contains nothing. - std::unique_ptr trace_reader; - ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_, - &trace_reader)); - BlockCacheTraceReader reader(std::move(trace_reader)); - BlockCacheTraceHeader header; - ASSERT_NOK(reader.ReadHeader(&header)); - } -} - -TEST_F(BlockCacheTracerTest, AtomicWrite) { - BlockCacheTraceRecord record = GenerateAccessRecord(); - { - BlockCacheTraceWriterOptions trace_writer_opt; - BlockCacheTraceOptions trace_opt; - std::unique_ptr trace_writer; - ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_, - &trace_writer)); - std::unique_ptr block_cache_trace_writer = - NewBlockCacheTraceWriter(env_->GetSystemClock().get(), trace_writer_opt, - std::move(trace_writer)); - ASSERT_NE(block_cache_trace_writer, nullptr); - BlockCacheTracer writer; - ASSERT_OK( - writer.StartTrace(trace_opt, std::move(block_cache_trace_writer))); - ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name, - record.referenced_key)); - ASSERT_OK(env_->FileExists(trace_file_path_)); - } - { - // Verify trace file contains one record. - std::unique_ptr trace_reader; - ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_, - &trace_reader)); - BlockCacheTraceReader reader(std::move(trace_reader)); - BlockCacheTraceHeader header; - ASSERT_OK(reader.ReadHeader(&header)); - ASSERT_EQ(kMajorVersion, static_cast(header.rocksdb_major_version)); - ASSERT_EQ(kMinorVersion, static_cast(header.rocksdb_minor_version)); - VerifyAccess(&reader, 0, TraceType::kBlockTraceDataBlock, 1); - ASSERT_NOK(reader.ReadAccess(&record)); - } -} - -TEST_F(BlockCacheTracerTest, ConsecutiveStartTrace) { - BlockCacheTraceWriterOptions trace_writer_opt; - BlockCacheTraceOptions trace_opt; - std::unique_ptr trace_writer; - ASSERT_OK( - NewFileTraceWriter(env_, env_options_, trace_file_path_, &trace_writer)); - std::unique_ptr block_cache_trace_writer = - NewBlockCacheTraceWriter(env_->GetSystemClock().get(), trace_writer_opt, - std::move(trace_writer)); - ASSERT_NE(block_cache_trace_writer, nullptr); - BlockCacheTracer writer; - ASSERT_OK(writer.StartTrace(trace_opt, std::move(block_cache_trace_writer))); - ASSERT_NOK(writer.StartTrace(trace_opt, std::move(block_cache_trace_writer))); - ASSERT_OK(env_->FileExists(trace_file_path_)); -} - -TEST_F(BlockCacheTracerTest, AtomicNoWriteAfterEndTrace) { - BlockCacheTraceRecord record = GenerateAccessRecord(); - { - BlockCacheTraceWriterOptions trace_writer_opt; - BlockCacheTraceOptions trace_opt; - std::unique_ptr trace_writer; - ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_, - &trace_writer)); - std::unique_ptr block_cache_trace_writer = - NewBlockCacheTraceWriter(env_->GetSystemClock().get(), trace_writer_opt, - std::move(trace_writer)); - ASSERT_NE(block_cache_trace_writer, nullptr); - BlockCacheTracer writer; - ASSERT_OK( - writer.StartTrace(trace_opt, std::move(block_cache_trace_writer))); - ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name, - record.referenced_key)); - writer.EndTrace(); - // Write the record again. This time the record should not be written since - // EndTrace is called. - ASSERT_OK(writer.WriteBlockAccess(record, record.block_key, record.cf_name, - record.referenced_key)); - ASSERT_OK(env_->FileExists(trace_file_path_)); - } - { - // Verify trace file contains one record. - std::unique_ptr trace_reader; - ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_, - &trace_reader)); - BlockCacheTraceReader reader(std::move(trace_reader)); - BlockCacheTraceHeader header; - ASSERT_OK(reader.ReadHeader(&header)); - ASSERT_EQ(kMajorVersion, static_cast(header.rocksdb_major_version)); - ASSERT_EQ(kMinorVersion, static_cast(header.rocksdb_minor_version)); - VerifyAccess(&reader, 0, TraceType::kBlockTraceDataBlock, 1); - ASSERT_NOK(reader.ReadAccess(&record)); - } -} - -TEST_F(BlockCacheTracerTest, NextGetId) { - BlockCacheTracer writer; - { - BlockCacheTraceWriterOptions trace_writer_opt; - BlockCacheTraceOptions trace_opt; - std::unique_ptr trace_writer; - ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_, - &trace_writer)); - std::unique_ptr block_cache_trace_writer = - NewBlockCacheTraceWriter(env_->GetSystemClock().get(), trace_writer_opt, - std::move(trace_writer)); - ASSERT_NE(block_cache_trace_writer, nullptr); - // next get id should always return 0 before we call StartTrace. - ASSERT_EQ(0, writer.NextGetId()); - ASSERT_EQ(0, writer.NextGetId()); - ASSERT_OK( - writer.StartTrace(trace_opt, std::move(block_cache_trace_writer))); - ASSERT_EQ(1, writer.NextGetId()); - ASSERT_EQ(2, writer.NextGetId()); - writer.EndTrace(); - // next get id should return 0. - ASSERT_EQ(0, writer.NextGetId()); - } - - // Start trace again and next get id should return 1. - { - BlockCacheTraceWriterOptions trace_writer_opt; - BlockCacheTraceOptions trace_opt; - std::unique_ptr trace_writer; - ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_, - &trace_writer)); - std::unique_ptr block_cache_trace_writer = - NewBlockCacheTraceWriter(env_->GetSystemClock().get(), trace_writer_opt, - std::move(trace_writer)); - ASSERT_NE(block_cache_trace_writer, nullptr); - ASSERT_OK( - writer.StartTrace(trace_opt, std::move(block_cache_trace_writer))); - ASSERT_EQ(1, writer.NextGetId()); - } -} - -TEST_F(BlockCacheTracerTest, MixedBlocks) { - { - // Generate a trace file containing a mix of blocks. - BlockCacheTraceWriterOptions trace_writer_opt; - std::unique_ptr trace_writer; - ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_, - &trace_writer)); - std::unique_ptr block_cache_trace_writer = - NewBlockCacheTraceWriter(env_->GetSystemClock().get(), trace_writer_opt, - std::move(trace_writer)); - ASSERT_NE(block_cache_trace_writer, nullptr); - ASSERT_OK(block_cache_trace_writer->WriteHeader()); - // Write blocks of different types. - WriteBlockAccess(block_cache_trace_writer.get(), 0, - TraceType::kBlockTraceUncompressionDictBlock, 10); - WriteBlockAccess(block_cache_trace_writer.get(), 10, - TraceType::kBlockTraceDataBlock, 10); - WriteBlockAccess(block_cache_trace_writer.get(), 20, - TraceType::kBlockTraceFilterBlock, 10); - WriteBlockAccess(block_cache_trace_writer.get(), 30, - TraceType::kBlockTraceIndexBlock, 10); - WriteBlockAccess(block_cache_trace_writer.get(), 40, - TraceType::kBlockTraceRangeDeletionBlock, 10); - ASSERT_OK(env_->FileExists(trace_file_path_)); - } - - { - // Verify trace file is generated correctly. - std::unique_ptr trace_reader; - ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_, - &trace_reader)); - BlockCacheTraceReader reader(std::move(trace_reader)); - BlockCacheTraceHeader header; - ASSERT_OK(reader.ReadHeader(&header)); - ASSERT_EQ(kMajorVersion, static_cast(header.rocksdb_major_version)); - ASSERT_EQ(kMinorVersion, static_cast(header.rocksdb_minor_version)); - // Read blocks. - VerifyAccess(&reader, 0, TraceType::kBlockTraceUncompressionDictBlock, 10); - VerifyAccess(&reader, 10, TraceType::kBlockTraceDataBlock, 10); - VerifyAccess(&reader, 20, TraceType::kBlockTraceFilterBlock, 10); - VerifyAccess(&reader, 30, TraceType::kBlockTraceIndexBlock, 10); - VerifyAccess(&reader, 40, TraceType::kBlockTraceRangeDeletionBlock, 10); - // Read one more record should report an error. - BlockCacheTraceRecord record; - ASSERT_NOK(reader.ReadAccess(&record)); - } -} - -TEST_F(BlockCacheTracerTest, HumanReadableTrace) { - BlockCacheTraceRecord record = GenerateAccessRecord(); - record.get_id = 1; - record.referenced_key = ""; - record.caller = TableReaderCaller::kUserGet; - record.get_from_user_specified_snapshot = true; - record.referenced_data_size = kReferencedDataSize; - PutFixed32(&record.referenced_key, 111); - PutLengthPrefixedSlice(&record.referenced_key, "get_key"); - PutFixed64(&record.referenced_key, 2 << 8); - PutLengthPrefixedSlice(&record.block_key, "block_key"); - PutVarint64(&record.block_key, 333); - { - // Generate a human readable trace file. - BlockCacheHumanReadableTraceWriter writer; - ASSERT_OK(writer.NewWritableFile(trace_file_path_, env_)); - ASSERT_OK(writer.WriteHumanReadableTraceRecord(record, 1, 1)); - ASSERT_OK(env_->FileExists(trace_file_path_)); - } - { - BlockCacheHumanReadableTraceReader reader(trace_file_path_); - BlockCacheTraceHeader header; - BlockCacheTraceRecord read_record; - ASSERT_OK(reader.ReadHeader(&header)); - ASSERT_OK(reader.ReadAccess(&read_record)); - ASSERT_EQ(TraceType::kBlockTraceDataBlock, read_record.block_type); - ASSERT_EQ(kBlockSize, read_record.block_size); - ASSERT_EQ(kCFId, read_record.cf_id); - ASSERT_EQ(kDefaultColumnFamilyName, read_record.cf_name); - ASSERT_EQ(TableReaderCaller::kUserGet, read_record.caller); - ASSERT_EQ(kLevel, read_record.level); - ASSERT_EQ(kSSTFDNumber, read_record.sst_fd_number); - ASSERT_FALSE(read_record.is_cache_hit); - ASSERT_FALSE(read_record.no_insert); - ASSERT_EQ(1, read_record.get_id); - ASSERT_TRUE(read_record.get_from_user_specified_snapshot); - ASSERT_TRUE(read_record.referenced_key_exist_in_block); - ASSERT_EQ(kNumKeysInBlock, read_record.num_keys_in_block); - ASSERT_EQ(kReferencedDataSize, read_record.referenced_data_size); - ASSERT_EQ(record.block_key.size(), read_record.block_key.size()); - ASSERT_EQ(record.referenced_key.size(), record.referenced_key.size()); - ASSERT_EQ(112, BlockCacheTraceHelper::GetTableId(read_record)); - ASSERT_EQ(3, BlockCacheTraceHelper::GetSequenceNumber(read_record)); - ASSERT_EQ(333, BlockCacheTraceHelper::GetBlockOffsetInFile(read_record)); - // Read again should fail. - ASSERT_NOK(reader.ReadAccess(&read_record)); - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/trace_replay/io_tracer_test.cc b/trace_replay/io_tracer_test.cc deleted file mode 100644 index be3af4fb3..000000000 --- a/trace_replay/io_tracer_test.cc +++ /dev/null @@ -1,353 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "trace_replay/io_tracer.h" - -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/status.h" -#include "rocksdb/trace_reader_writer.h" -#include "rocksdb/trace_record.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" - -namespace ROCKSDB_NAMESPACE { - -namespace { -const std::string kDummyFile = "/dummy/file"; - -} // namespace - -class IOTracerTest : public testing::Test { - public: - IOTracerTest() { - test_path_ = test::PerThreadDBPath("io_tracer_test"); - env_ = ROCKSDB_NAMESPACE::Env::Default(); - clock_ = env_->GetSystemClock().get(); - EXPECT_OK(env_->CreateDir(test_path_)); - trace_file_path_ = test_path_ + "/io_trace"; - } - - ~IOTracerTest() override { - EXPECT_OK(env_->DeleteFile(trace_file_path_)); - EXPECT_OK(env_->DeleteDir(test_path_)); - } - - std::string GetFileOperation(uint64_t id) { - id = id % 4; - switch (id) { - case 0: - return "CreateDir"; - case 1: - return "GetChildren"; - case 2: - return "FileSize"; - case 3: - return "DeleteDir"; - default: - assert(false); - } - return ""; - } - - void WriteIOOp(IOTraceWriter* writer, uint64_t nrecords) { - assert(writer); - for (uint64_t i = 0; i < nrecords; i++) { - IOTraceRecord record; - record.io_op_data = 0; - record.trace_type = TraceType::kIOTracer; - record.io_op_data |= (1 << IOTraceOp::kIOLen); - record.io_op_data |= (1 << IOTraceOp::kIOOffset); - record.file_operation = GetFileOperation(i); - record.io_status = IOStatus::OK().ToString(); - record.file_name = kDummyFile + std::to_string(i); - record.len = i; - record.offset = i + 20; - EXPECT_OK(writer->WriteIOOp(record, nullptr)); - } - } - - void VerifyIOOp(IOTraceReader* reader, uint32_t nrecords) { - assert(reader); - for (uint32_t i = 0; i < nrecords; i++) { - IOTraceRecord record; - ASSERT_OK(reader->ReadIOOp(&record)); - ASSERT_EQ(record.file_operation, GetFileOperation(i)); - ASSERT_EQ(record.io_status, IOStatus::OK().ToString()); - ASSERT_EQ(record.len, i); - ASSERT_EQ(record.offset, i + 20); - } - } - - Env* env_; - SystemClock* clock_; - EnvOptions env_options_; - std::string trace_file_path_; - std::string test_path_; -}; - -TEST_F(IOTracerTest, MultipleRecordsWithDifferentIOOpOptions) { - std::string file_name = kDummyFile + std::to_string(5); - { - TraceOptions trace_opt; - std::unique_ptr trace_writer; - - ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_, - &trace_writer)); - IOTracer writer; - ASSERT_OK(writer.StartIOTrace(clock_, trace_opt, std::move(trace_writer))); - - // Write general record. - IOTraceRecord record0(0, TraceType::kIOTracer, 0 /*io_op_data*/, - GetFileOperation(0), 155 /*latency*/, - IOStatus::OK().ToString(), file_name); - writer.WriteIOOp(record0, nullptr); - - // Write record with FileSize. - uint64_t io_op_data = 0; - io_op_data |= (1 << IOTraceOp::kIOFileSize); - IOTraceRecord record1(0, TraceType::kIOTracer, io_op_data, - GetFileOperation(1), 10 /*latency*/, - IOStatus::OK().ToString(), file_name, - 256 /*file_size*/); - writer.WriteIOOp(record1, nullptr); - - // Write record with Length. - io_op_data = 0; - io_op_data |= (1 << IOTraceOp::kIOLen); - IOTraceRecord record2(0, TraceType::kIOTracer, io_op_data, - GetFileOperation(2), 10 /*latency*/, - IOStatus::OK().ToString(), file_name, 100 /*length*/, - 200 /*offset*/); - writer.WriteIOOp(record2, nullptr); - - // Write record with Length and offset. - io_op_data = 0; - io_op_data |= (1 << IOTraceOp::kIOLen); - io_op_data |= (1 << IOTraceOp::kIOOffset); - IOTraceRecord record3(0, TraceType::kIOTracer, io_op_data, - GetFileOperation(3), 10 /*latency*/, - IOStatus::OK().ToString(), file_name, 120 /*length*/, - 17 /*offset*/); - writer.WriteIOOp(record3, nullptr); - - // Write record with offset. - io_op_data = 0; - io_op_data |= (1 << IOTraceOp::kIOOffset); - IOTraceRecord record4(0, TraceType::kIOTracer, io_op_data, - GetFileOperation(4), 10 /*latency*/, - IOStatus::OK().ToString(), file_name, 13 /*length*/, - 50 /*offset*/); - writer.WriteIOOp(record4, nullptr); - - // Write record with IODebugContext. - io_op_data = 0; - IODebugContext dbg; - dbg.SetRequestId("request_id_1"); - IOTraceRecord record5(0, TraceType::kIOTracer, io_op_data, - GetFileOperation(5), 10 /*latency*/, - IOStatus::OK().ToString(), file_name); - writer.WriteIOOp(record5, &dbg); - - ASSERT_OK(env_->FileExists(trace_file_path_)); - } - { - // Verify trace file is generated correctly. - std::unique_ptr trace_reader; - ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_, - &trace_reader)); - IOTraceReader reader(std::move(trace_reader)); - IOTraceHeader header; - ASSERT_OK(reader.ReadHeader(&header)); - ASSERT_EQ(kMajorVersion, static_cast(header.rocksdb_major_version)); - ASSERT_EQ(kMinorVersion, static_cast(header.rocksdb_minor_version)); - - // Read general record. - IOTraceRecord record0; - ASSERT_OK(reader.ReadIOOp(&record0)); - ASSERT_EQ(record0.file_operation, GetFileOperation(0)); - ASSERT_EQ(record0.latency, 155); - ASSERT_EQ(record0.file_name, file_name); - - // Read record with FileSize. - IOTraceRecord record1; - ASSERT_OK(reader.ReadIOOp(&record1)); - ASSERT_EQ(record1.file_size, 256); - ASSERT_EQ(record1.len, 0); - ASSERT_EQ(record1.offset, 0); - - // Read record with Length. - IOTraceRecord record2; - ASSERT_OK(reader.ReadIOOp(&record2)); - ASSERT_EQ(record2.len, 100); - ASSERT_EQ(record2.file_size, 0); - ASSERT_EQ(record2.offset, 0); - - // Read record with Length and offset. - IOTraceRecord record3; - ASSERT_OK(reader.ReadIOOp(&record3)); - ASSERT_EQ(record3.len, 120); - ASSERT_EQ(record3.file_size, 0); - ASSERT_EQ(record3.offset, 17); - - // Read record with offset. - IOTraceRecord record4; - ASSERT_OK(reader.ReadIOOp(&record4)); - ASSERT_EQ(record4.len, 0); - ASSERT_EQ(record4.file_size, 0); - ASSERT_EQ(record4.offset, 50); - - IOTraceRecord record5; - ASSERT_OK(reader.ReadIOOp(&record5)); - ASSERT_EQ(record5.len, 0); - ASSERT_EQ(record5.file_size, 0); - ASSERT_EQ(record5.offset, 0); - ASSERT_EQ(record5.request_id, "request_id_1"); - // Read one more record and it should report error. - IOTraceRecord record6; - ASSERT_NOK(reader.ReadIOOp(&record6)); - } -} - -TEST_F(IOTracerTest, AtomicWrite) { - std::string file_name = kDummyFile + std::to_string(0); - { - IOTraceRecord record(0, TraceType::kIOTracer, 0 /*io_op_data*/, - GetFileOperation(0), 10 /*latency*/, - IOStatus::OK().ToString(), file_name); - TraceOptions trace_opt; - std::unique_ptr trace_writer; - ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_, - &trace_writer)); - IOTracer writer; - ASSERT_OK(writer.StartIOTrace(clock_, trace_opt, std::move(trace_writer))); - writer.WriteIOOp(record, nullptr); - ASSERT_OK(env_->FileExists(trace_file_path_)); - } - { - // Verify trace file contains one record. - std::unique_ptr trace_reader; - ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_, - &trace_reader)); - IOTraceReader reader(std::move(trace_reader)); - IOTraceHeader header; - ASSERT_OK(reader.ReadHeader(&header)); - ASSERT_EQ(kMajorVersion, static_cast(header.rocksdb_major_version)); - ASSERT_EQ(kMinorVersion, static_cast(header.rocksdb_minor_version)); - // Read record and verify data. - IOTraceRecord access_record; - ASSERT_OK(reader.ReadIOOp(&access_record)); - ASSERT_EQ(access_record.file_operation, GetFileOperation(0)); - ASSERT_EQ(access_record.io_status, IOStatus::OK().ToString()); - ASSERT_EQ(access_record.file_name, file_name); - ASSERT_NOK(reader.ReadIOOp(&access_record)); - } -} - -TEST_F(IOTracerTest, AtomicWriteBeforeStartTrace) { - std::string file_name = kDummyFile + std::to_string(0); - { - IOTraceRecord record(0, TraceType::kIOTracer, 0 /*io_op_data*/, - GetFileOperation(0), 0, IOStatus::OK().ToString(), - file_name); - std::unique_ptr trace_writer; - ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_, - &trace_writer)); - IOTracer writer; - // The record should not be written to the trace_file since StartIOTrace is - // not called. - writer.WriteIOOp(record, nullptr); - ASSERT_OK(env_->FileExists(trace_file_path_)); - } - { - // Verify trace file contains nothing. - std::unique_ptr trace_reader; - ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_, - &trace_reader)); - IOTraceReader reader(std::move(trace_reader)); - IOTraceHeader header; - ASSERT_NOK(reader.ReadHeader(&header)); - } -} - -TEST_F(IOTracerTest, AtomicNoWriteAfterEndTrace) { - std::string file_name = kDummyFile + std::to_string(0); - { - uint64_t io_op_data = 0; - io_op_data |= (1 << IOTraceOp::kIOFileSize); - IOTraceRecord record( - 0, TraceType::kIOTracer, io_op_data, GetFileOperation(2), 0 /*latency*/, - IOStatus::OK().ToString(), file_name, 10 /*file_size*/); - TraceOptions trace_opt; - std::unique_ptr trace_writer; - ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_, - &trace_writer)); - IOTracer writer; - ASSERT_OK(writer.StartIOTrace(clock_, trace_opt, std::move(trace_writer))); - writer.WriteIOOp(record, nullptr); - writer.EndIOTrace(); - // Write the record again. This time the record should not be written since - // EndIOTrace is called. - writer.WriteIOOp(record, nullptr); - ASSERT_OK(env_->FileExists(trace_file_path_)); - } - { - // Verify trace file contains one record. - std::unique_ptr trace_reader; - ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_, - &trace_reader)); - IOTraceReader reader(std::move(trace_reader)); - IOTraceHeader header; - ASSERT_OK(reader.ReadHeader(&header)); - ASSERT_EQ(kMajorVersion, static_cast(header.rocksdb_major_version)); - ASSERT_EQ(kMinorVersion, static_cast(header.rocksdb_minor_version)); - - IOTraceRecord access_record; - ASSERT_OK(reader.ReadIOOp(&access_record)); - ASSERT_EQ(access_record.file_operation, GetFileOperation(2)); - ASSERT_EQ(access_record.io_status, IOStatus::OK().ToString()); - ASSERT_EQ(access_record.file_size, 10); - // No more record. - ASSERT_NOK(reader.ReadIOOp(&access_record)); - } -} - -TEST_F(IOTracerTest, AtomicMultipleWrites) { - { - TraceOptions trace_opt; - std::unique_ptr trace_writer; - ASSERT_OK(NewFileTraceWriter(env_, env_options_, trace_file_path_, - &trace_writer)); - IOTraceWriter writer(clock_, trace_opt, std::move(trace_writer)); - ASSERT_OK(writer.WriteHeader()); - // Write 10 records - WriteIOOp(&writer, 10); - ASSERT_OK(env_->FileExists(trace_file_path_)); - } - - { - // Verify trace file is generated correctly. - std::unique_ptr trace_reader; - ASSERT_OK(NewFileTraceReader(env_, env_options_, trace_file_path_, - &trace_reader)); - IOTraceReader reader(std::move(trace_reader)); - IOTraceHeader header; - ASSERT_OK(reader.ReadHeader(&header)); - ASSERT_EQ(kMajorVersion, static_cast(header.rocksdb_major_version)); - ASSERT_EQ(kMinorVersion, static_cast(header.rocksdb_minor_version)); - // Read 10 records. - VerifyIOOp(&reader, 10); - // Read one more and record and it should report error. - IOTraceRecord record; - ASSERT_NOK(reader.ReadIOOp(&record)); - } -} -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/util/autovector_test.cc b/util/autovector_test.cc deleted file mode 100644 index b75a0fa2a..000000000 --- a/util/autovector_test.cc +++ /dev/null @@ -1,324 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "util/autovector.h" - -#include -#include -#include -#include - -#include "rocksdb/env.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/string_util.h" - -using std::cout; -using std::endl; - -namespace ROCKSDB_NAMESPACE { - -class AutoVectorTest : public testing::Test {}; -const unsigned long kSize = 8; - -namespace { -template -void AssertAutoVectorOnlyInStack(autovector* vec, bool result) { - ASSERT_EQ(vec->only_in_stack(), result); -} -} // namespace - -TEST_F(AutoVectorTest, PushBackAndPopBack) { - autovector vec; - ASSERT_TRUE(vec.empty()); - ASSERT_EQ(0ul, vec.size()); - - for (size_t i = 0; i < 1000 * kSize; ++i) { - vec.push_back(i); - ASSERT_TRUE(!vec.empty()); - if (i < kSize) { - AssertAutoVectorOnlyInStack(&vec, true); - } else { - AssertAutoVectorOnlyInStack(&vec, false); - } - ASSERT_EQ(i + 1, vec.size()); - ASSERT_EQ(i, vec[i]); - ASSERT_EQ(i, vec.at(i)); - } - - size_t size = vec.size(); - while (size != 0) { - vec.pop_back(); - // will always be in heap - AssertAutoVectorOnlyInStack(&vec, false); - ASSERT_EQ(--size, vec.size()); - } - - ASSERT_TRUE(vec.empty()); -} - -TEST_F(AutoVectorTest, EmplaceBack) { - using ValType = std::pair; - autovector vec; - - for (size_t i = 0; i < 1000 * kSize; ++i) { - vec.emplace_back(i, std::to_string(i + 123)); - ASSERT_TRUE(!vec.empty()); - if (i < kSize) { - AssertAutoVectorOnlyInStack(&vec, true); - } else { - AssertAutoVectorOnlyInStack(&vec, false); - } - - ASSERT_EQ(i + 1, vec.size()); - ASSERT_EQ(i, vec[i].first); - ASSERT_EQ(std::to_string(i + 123), vec[i].second); - } - - vec.clear(); - ASSERT_TRUE(vec.empty()); - AssertAutoVectorOnlyInStack(&vec, false); -} - -TEST_F(AutoVectorTest, Resize) { - autovector vec; - - vec.resize(kSize); - AssertAutoVectorOnlyInStack(&vec, true); - for (size_t i = 0; i < kSize; ++i) { - vec[i] = i; - } - - vec.resize(kSize * 2); - AssertAutoVectorOnlyInStack(&vec, false); - for (size_t i = 0; i < kSize; ++i) { - ASSERT_EQ(vec[i], i); - } - for (size_t i = 0; i < kSize; ++i) { - vec[i + kSize] = i; - } - - vec.resize(1); - ASSERT_EQ(1U, vec.size()); -} - -namespace { -void AssertEqual(const autovector& a, - const autovector& b) { - ASSERT_EQ(a.size(), b.size()); - ASSERT_EQ(a.empty(), b.empty()); - ASSERT_EQ(a.only_in_stack(), b.only_in_stack()); - for (size_t i = 0; i < a.size(); ++i) { - ASSERT_EQ(a[i], b[i]); - } -} -} // namespace - -TEST_F(AutoVectorTest, CopyAndAssignment) { - // Test both heap-allocated and stack-allocated cases. - for (auto size : {kSize / 2, kSize * 1000}) { - autovector vec; - for (size_t i = 0; i < size; ++i) { - vec.push_back(i); - } - - { - autovector other; - other = vec; - AssertEqual(other, vec); - } - - { - autovector other(vec); - AssertEqual(other, vec); - } - } -} - -TEST_F(AutoVectorTest, Iterators) { - autovector vec; - for (size_t i = 0; i < kSize * 1000; ++i) { - vec.push_back(std::to_string(i)); - } - - // basic operator test - ASSERT_EQ(vec.front(), *vec.begin()); - ASSERT_EQ(vec.back(), *(vec.end() - 1)); - ASSERT_TRUE(vec.begin() < vec.end()); - - // non-const iterator - size_t index = 0; - for (const auto& item : vec) { - ASSERT_EQ(vec[index++], item); - } - - index = vec.size() - 1; - for (auto pos = vec.rbegin(); pos != vec.rend(); ++pos) { - ASSERT_EQ(vec[index--], *pos); - } - - // const iterator - const auto& cvec = vec; - index = 0; - for (const auto& item : cvec) { - ASSERT_EQ(cvec[index++], item); - } - - index = vec.size() - 1; - for (auto pos = cvec.rbegin(); pos != cvec.rend(); ++pos) { - ASSERT_EQ(cvec[index--], *pos); - } - - // forward and backward - auto pos = vec.begin(); - while (pos != vec.end()) { - auto old_val = *pos; - auto old = pos++; - // HACK: make sure -> works - ASSERT_TRUE(!old->empty()); - ASSERT_EQ(old_val, *old); - ASSERT_TRUE(pos == vec.end() || old_val != *pos); - } - - pos = vec.begin(); - for (size_t i = 0; i < vec.size(); i += 2) { - // Cannot use ASSERT_EQ since that macro depends on iostream serialization - ASSERT_TRUE(pos + 2 - 2 == pos); - pos += 2; - ASSERT_TRUE(pos >= vec.begin()); - ASSERT_TRUE(pos <= vec.end()); - - size_t diff = static_cast(pos - vec.begin()); - ASSERT_EQ(i + 2, diff); - } -} - -namespace { -std::vector GetTestKeys(size_t size) { - std::vector keys; - keys.resize(size); - - int index = 0; - for (auto& key : keys) { - key = "item-" + std::to_string(index++); - } - return keys; -} -} // namespace - -template -void BenchmarkVectorCreationAndInsertion( - std::string name, size_t ops, size_t item_size, - const std::vector& items) { - auto env = Env::Default(); - - int index = 0; - auto start_time = env->NowNanos(); - auto ops_remaining = ops; - while (ops_remaining--) { - TVector v; - for (size_t i = 0; i < item_size; ++i) { - v.push_back(items[index++]); - } - } - auto elapsed = env->NowNanos() - start_time; - cout << "created " << ops << " " << name << " instances:\n\t" - << "each was inserted with " << item_size << " elements\n\t" - << "total time elapsed: " << elapsed << " (ns)" << endl; -} - -template -size_t BenchmarkSequenceAccess(std::string name, size_t ops, size_t elem_size) { - TVector v; - for (const auto& item : GetTestKeys(elem_size)) { - v.push_back(item); - } - auto env = Env::Default(); - - auto ops_remaining = ops; - auto start_time = env->NowNanos(); - size_t total = 0; - while (ops_remaining--) { - auto end = v.end(); - for (auto pos = v.begin(); pos != end; ++pos) { - total += pos->size(); - } - } - auto elapsed = env->NowNanos() - start_time; - cout << "performed " << ops << " sequence access against " << name << "\n\t" - << "size: " << elem_size << "\n\t" - << "total time elapsed: " << elapsed << " (ns)" << endl; - // HACK avoid compiler's optimization to ignore total - return total; -} - -// This test case only reports the performance between std::vector -// and autovector. We chose string for comparison because in most -// of our use cases we used std::vector. -TEST_F(AutoVectorTest, PerfBench) { - // We run same operations for kOps times in order to get a more fair result. - size_t kOps = 100000; - - // Creation and insertion test - // Test the case when there is: - // * no element inserted: internal array of std::vector may not really get - // initialize. - // * one element inserted: internal array of std::vector must have - // initialized. - // * kSize elements inserted. This shows the most time we'll spend if we - // keep everything in stack. - // * 2 * kSize elements inserted. The internal vector of - // autovector must have been initialized. - cout << "=====================================================" << endl; - cout << "Creation and Insertion Test (value type: std::string)" << endl; - cout << "=====================================================" << endl; - - // pre-generated unique keys - auto string_keys = GetTestKeys(kOps * 2 * kSize); - for (auto insertions : {0ul, 1ul, kSize / 2, kSize, 2 * kSize}) { - BenchmarkVectorCreationAndInsertion>( - "std::vector", kOps, insertions, string_keys); - BenchmarkVectorCreationAndInsertion>( - "autovector", kOps, insertions, string_keys); - cout << "-----------------------------------" << endl; - } - - cout << "=====================================================" << endl; - cout << "Creation and Insertion Test (value type: uint64_t)" << endl; - cout << "=====================================================" << endl; - - // pre-generated unique keys - std::vector int_keys(kOps * 2 * kSize); - for (size_t i = 0; i < kOps * 2 * kSize; ++i) { - int_keys[i] = i; - } - for (auto insertions : {0ul, 1ul, kSize / 2, kSize, 2 * kSize}) { - BenchmarkVectorCreationAndInsertion>( - "std::vector", kOps, insertions, int_keys); - BenchmarkVectorCreationAndInsertion>( - "autovector", kOps, insertions, int_keys); - cout << "-----------------------------------" << endl; - } - - // Sequence Access Test - cout << "=====================================================" << endl; - cout << "Sequence Access Test" << endl; - cout << "=====================================================" << endl; - for (auto elem_size : {kSize / 2, kSize, 2 * kSize}) { - BenchmarkSequenceAccess>("std::vector", kOps, - elem_size); - BenchmarkSequenceAccess>("autovector", kOps, - elem_size); - cout << "-----------------------------------" << endl; - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/util/bloom_test.cc b/util/bloom_test.cc deleted file mode 100644 index 06dd1de06..000000000 --- a/util/bloom_test.cc +++ /dev/null @@ -1,1175 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef GFLAGS -#include -int main() { - fprintf(stderr, "Please install gflags to run this test... Skipping...\n"); - return 0; -} -#else - -#include -#include -#include - -#include "cache/cache_entry_roles.h" -#include "cache/cache_reservation_manager.h" -#include "memory/arena.h" -#include "port/jemalloc_helper.h" -#include "rocksdb/filter_policy.h" -#include "table/block_based/filter_policy_internal.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/gflags_compat.h" -#include "util/hash.h" - -using GFLAGS_NAMESPACE::ParseCommandLineFlags; - -// The test is not fully designed for bits_per_key other than 10, but with -// this parameter you can easily explore the behavior of other bits_per_key. -// See also filter_bench. -DEFINE_int32(bits_per_key, 10, ""); - -namespace ROCKSDB_NAMESPACE { - -namespace { -const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kClassName(); -const std::string kFastLocalBloom = - test::FastLocalBloomFilterPolicy::kClassName(); -const std::string kStandard128Ribbon = - test::Standard128RibbonFilterPolicy::kClassName(); -} // namespace - -static const int kVerbose = 1; - -static Slice Key(int i, char* buffer) { - std::string s; - PutFixed32(&s, static_cast(i)); - memcpy(buffer, s.c_str(), sizeof(i)); - return Slice(buffer, sizeof(i)); -} - -static int NextLength(int length) { - if (length < 10) { - length += 1; - } else if (length < 100) { - length += 10; - } else if (length < 1000) { - length += 100; - } else { - length += 1000; - } - return length; -} - -class FullBloomTest : public testing::TestWithParam { - protected: - BlockBasedTableOptions table_options_; - - private: - std::shared_ptr& policy_; - std::unique_ptr bits_builder_; - std::unique_ptr bits_reader_; - std::unique_ptr buf_; - size_t filter_size_; - - public: - FullBloomTest() : policy_(table_options_.filter_policy), filter_size_(0) { - ResetPolicy(); - } - - BuiltinFilterBitsBuilder* GetBuiltinFilterBitsBuilder() { - // Throws on bad cast - return dynamic_cast(bits_builder_.get()); - } - - const BloomLikeFilterPolicy* GetBloomLikeFilterPolicy() { - // Throws on bad cast - return &dynamic_cast(*policy_); - } - - void Reset() { - bits_builder_.reset(BloomFilterPolicy::GetBuilderFromContext( - FilterBuildingContext(table_options_))); - bits_reader_.reset(nullptr); - buf_.reset(nullptr); - filter_size_ = 0; - } - - void ResetPolicy(double bits_per_key) { - policy_ = BloomLikeFilterPolicy::Create(GetParam(), bits_per_key); - Reset(); - } - - void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); } - - void Add(const Slice& s) { bits_builder_->AddKey(s); } - - void OpenRaw(const Slice& s) { - bits_reader_.reset(policy_->GetFilterBitsReader(s)); - } - - void Build() { - Slice filter = bits_builder_->Finish(&buf_); - bits_reader_.reset(policy_->GetFilterBitsReader(filter)); - filter_size_ = filter.size(); - } - - size_t FilterSize() const { return filter_size_; } - - Slice FilterData() { return Slice(buf_.get(), filter_size_); } - - int GetNumProbesFromFilterData() { - assert(filter_size_ >= 5); - int8_t raw_num_probes = static_cast(buf_.get()[filter_size_ - 5]); - if (raw_num_probes == -1) { // New bloom filter marker - return static_cast(buf_.get()[filter_size_ - 3]); - } else { - return raw_num_probes; - } - } - - int GetRibbonSeedFromFilterData() { - assert(filter_size_ >= 5); - // Check for ribbon marker - assert(-2 == static_cast(buf_.get()[filter_size_ - 5])); - return static_cast(buf_.get()[filter_size_ - 4]); - } - - bool Matches(const Slice& s) { - if (bits_reader_ == nullptr) { - Build(); - } - return bits_reader_->MayMatch(s); - } - - // Provides a kind of fingerprint on the Bloom filter's - // behavior, for reasonbly high FP rates. - uint64_t PackedMatches() { - char buffer[sizeof(int)]; - uint64_t result = 0; - for (int i = 0; i < 64; i++) { - if (Matches(Key(i + 12345, buffer))) { - result |= uint64_t{1} << i; - } - } - return result; - } - - // Provides a kind of fingerprint on the Bloom filter's - // behavior, for lower FP rates. - std::string FirstFPs(int count) { - char buffer[sizeof(int)]; - std::string rv; - int fp_count = 0; - for (int i = 0; i < 1000000; i++) { - // Pack four match booleans into each hexadecimal digit - if (Matches(Key(i + 1000000, buffer))) { - ++fp_count; - rv += std::to_string(i); - if (fp_count == count) { - break; - } - rv += ','; - } - } - return rv; - } - - double FalsePositiveRate() { - char buffer[sizeof(int)]; - int result = 0; - for (int i = 0; i < 10000; i++) { - if (Matches(Key(i + 1000000000, buffer))) { - result++; - } - } - return result / 10000.0; - } -}; - -TEST_P(FullBloomTest, FilterSize) { - // In addition to checking the consistency of space computation, we are - // checking that denoted and computed doubles are interpreted as expected - // as bits_per_key values. - bool some_computed_less_than_denoted = false; - // Note: to avoid unproductive configurations, bits_per_key < 0.5 is rounded - // down to 0 (no filter), and 0.5 <= bits_per_key < 1.0 is rounded up to 1 - // bit per key (1000 millibits). Also, enforced maximum is 100 bits per key - // (100000 millibits). - for (auto bpk : std::vector >{{-HUGE_VAL, 0}, - {-INFINITY, 0}, - {0.0, 0}, - {0.499, 0}, - {0.5, 1000}, - {1.234, 1234}, - {3.456, 3456}, - {9.5, 9500}, - {10.0, 10000}, - {10.499, 10499}, - {21.345, 21345}, - {99.999, 99999}, - {1234.0, 100000}, - {HUGE_VAL, 100000}, - {INFINITY, 100000}, - {NAN, 100000}}) { - ResetPolicy(bpk.first); - auto bfp = GetBloomLikeFilterPolicy(); - EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey()); - EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey()); - - double computed = bpk.first; - // This transforms e.g. 9.5 -> 9.499999999999998, which we still - // round to 10 for whole bits per key. - computed += 0.5; - computed /= 1234567.0; - computed *= 1234567.0; - computed -= 0.5; - some_computed_less_than_denoted |= (computed < bpk.first); - ResetPolicy(computed); - bfp = GetBloomLikeFilterPolicy(); - EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey()); - EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey()); - - auto bits_builder = GetBuiltinFilterBitsBuilder(); - if (bpk.second == 0) { - ASSERT_EQ(bits_builder, nullptr); - continue; - } - - size_t n = 1; - size_t space = 0; - for (; n < 1000000; n += 1 + n / 1000) { - // Ensure consistency between CalculateSpace and ApproximateNumEntries - space = bits_builder->CalculateSpace(n); - size_t n2 = bits_builder->ApproximateNumEntries(space); - EXPECT_GE(n2, n); - size_t space2 = bits_builder->CalculateSpace(n2); - if (n > 12000 && GetParam() == kStandard128Ribbon) { - // TODO(peterd): better approximation? - EXPECT_GE(space2, space); - EXPECT_LE(space2 * 0.998, space * 1.0); - } else { - EXPECT_EQ(space2, space); - } - } - // Until size_t overflow - for (; n < (n + n / 3); n += n / 3) { - // Ensure space computation is not overflowing; capped is OK - size_t space2 = bits_builder->CalculateSpace(n); - EXPECT_GE(space2, space); - space = space2; - } - } - // Check that the compiler hasn't optimized our computation into nothing - EXPECT_TRUE(some_computed_less_than_denoted); - ResetPolicy(); -} - -TEST_P(FullBloomTest, FullEmptyFilter) { - // Empty filter is not match, at this level - ASSERT_TRUE(!Matches("hello")); - ASSERT_TRUE(!Matches("world")); -} - -TEST_P(FullBloomTest, FullSmall) { - Add("hello"); - Add("world"); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - ASSERT_TRUE(!Matches("x")); - ASSERT_TRUE(!Matches("foo")); -} - -TEST_P(FullBloomTest, FullVaryingLengths) { - char buffer[sizeof(int)]; - - // Count number of filters that significantly exceed the false positive rate - int mediocre_filters = 0; - int good_filters = 0; - - for (int length = 1; length <= 10000; length = NextLength(length)) { - Reset(); - for (int i = 0; i < length; i++) { - Add(Key(i, buffer)); - } - Build(); - - EXPECT_LE(FilterSize(), (size_t)((length * FLAGS_bits_per_key / 8) + - CACHE_LINE_SIZE * 2 + 5)); - - // All added keys must match - for (int i = 0; i < length; i++) { - ASSERT_TRUE(Matches(Key(i, buffer))) - << "Length " << length << "; key " << i; - } - - // Check false positive rate - double rate = FalsePositiveRate(); - if (kVerbose >= 1) { - fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n", - rate * 100.0, length, static_cast(FilterSize())); - } - if (FLAGS_bits_per_key == 10) { - EXPECT_LE(rate, 0.02); // Must not be over 2% - if (rate > 0.0125) { - mediocre_filters++; // Allowed, but not too often - } else { - good_filters++; - } - } - } - if (kVerbose >= 1) { - fprintf(stderr, "Filters: %d good, %d mediocre\n", good_filters, - mediocre_filters); - } - EXPECT_LE(mediocre_filters, good_filters / 5); -} - -TEST_P(FullBloomTest, OptimizeForMemory) { - char buffer[sizeof(int)]; - for (bool offm : {true, false}) { - table_options_.optimize_filters_for_memory = offm; - ResetPolicy(); - Random32 rnd(12345); - uint64_t total_size = 0; - uint64_t total_mem = 0; - int64_t total_keys = 0; - double total_fp_rate = 0; - constexpr int nfilters = 100; - for (int i = 0; i < nfilters; ++i) { - int nkeys = static_cast(rnd.Uniformish(10000)) + 100; - Reset(); - for (int j = 0; j < nkeys; ++j) { - Add(Key(j, buffer)); - } - Build(); - size_t size = FilterData().size(); - total_size += size; - // optimize_filters_for_memory currently depends on malloc_usable_size - // but we run the rest of the test to ensure no bad behavior without it. -#ifdef ROCKSDB_MALLOC_USABLE_SIZE - size = malloc_usable_size(const_cast(FilterData().data())); -#endif // ROCKSDB_MALLOC_USABLE_SIZE - total_mem += size; - total_keys += nkeys; - total_fp_rate += FalsePositiveRate(); - } - if (FLAGS_bits_per_key == 10) { - EXPECT_LE(total_fp_rate / double{nfilters}, 0.011); - EXPECT_GE(total_fp_rate / double{nfilters}, - CACHE_LINE_SIZE >= 256 ? 0.007 : 0.008); - } - - int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8; - if (GetParam() == kStandard128Ribbon) { - // ~ 30% savings vs. Bloom filter - ex_min_total_size = 7 * ex_min_total_size / 10; - } - EXPECT_GE(static_cast(total_size), ex_min_total_size); - - int64_t blocked_bloom_overhead = nfilters * (CACHE_LINE_SIZE + 5); - if (GetParam() == kLegacyBloom) { - // this config can add extra cache line to make odd number - blocked_bloom_overhead += nfilters * CACHE_LINE_SIZE; - } - - EXPECT_GE(total_mem, total_size); - - // optimize_filters_for_memory not implemented with legacy Bloom - if (offm && GetParam() != kLegacyBloom) { - // This value can include a small extra penalty for kExtraPadding - fprintf(stderr, "Internal fragmentation (optimized): %g%%\n", - (total_mem - total_size) * 100.0 / total_size); - // Less than 1% internal fragmentation - EXPECT_LE(total_mem, total_size * 101 / 100); - // Up to 2% storage penalty - EXPECT_LE(static_cast(total_size), - ex_min_total_size * 102 / 100 + blocked_bloom_overhead); - } else { - fprintf(stderr, "Internal fragmentation (not optimized): %g%%\n", - (total_mem - total_size) * 100.0 / total_size); - // TODO: add control checks for more allocators? -#ifdef ROCKSDB_JEMALLOC - fprintf(stderr, "Jemalloc detected? %d\n", HasJemalloc()); - if (HasJemalloc()) { -#ifdef ROCKSDB_MALLOC_USABLE_SIZE - // More than 5% internal fragmentation - EXPECT_GE(total_mem, total_size * 105 / 100); -#endif // ROCKSDB_MALLOC_USABLE_SIZE - } -#endif // ROCKSDB_JEMALLOC - // No storage penalty, just usual overhead - EXPECT_LE(static_cast(total_size), - ex_min_total_size + blocked_bloom_overhead); - } - } -} - -class ChargeFilterConstructionTest : public testing::Test {}; -TEST_F(ChargeFilterConstructionTest, RibbonFilterFallBackOnLargeBanding) { - constexpr std::size_t kCacheCapacity = - 8 * CacheReservationManagerImpl< - CacheEntryRole::kFilterConstruction>::GetDummyEntrySize(); - constexpr std::size_t num_entries_for_cache_full = kCacheCapacity / 8; - - for (CacheEntryRoleOptions::Decision charge_filter_construction_mem : - {CacheEntryRoleOptions::Decision::kEnabled, - CacheEntryRoleOptions::Decision::kDisabled}) { - bool will_fall_back = charge_filter_construction_mem == - CacheEntryRoleOptions::Decision::kEnabled; - - BlockBasedTableOptions table_options; - table_options.cache_usage_options.options_overrides.insert( - {CacheEntryRole::kFilterConstruction, - {/*.charged = */ charge_filter_construction_mem}}); - LRUCacheOptions lo; - lo.capacity = kCacheCapacity; - lo.num_shard_bits = 0; // 2^0 shard - lo.strict_capacity_limit = true; - std::shared_ptr cache(NewLRUCache(lo)); - table_options.block_cache = cache; - table_options.filter_policy = - BloomLikeFilterPolicy::Create(kStandard128Ribbon, FLAGS_bits_per_key); - FilterBuildingContext ctx(table_options); - std::unique_ptr filter_bits_builder( - table_options.filter_policy->GetBuilderWithContext(ctx)); - - char key_buffer[sizeof(int)]; - for (std::size_t i = 0; i < num_entries_for_cache_full; ++i) { - filter_bits_builder->AddKey(Key(static_cast(i), key_buffer)); - } - - std::unique_ptr buf; - Slice filter = filter_bits_builder->Finish(&buf); - - // To verify Ribbon Filter fallbacks to Bloom Filter properly - // based on cache charging result - // See BloomFilterPolicy::GetBloomBitsReader re: metadata - // -1 = Marker for newer Bloom implementations - // -2 = Marker for Standard128 Ribbon - if (will_fall_back) { - EXPECT_EQ(filter.data()[filter.size() - 5], static_cast(-1)); - } else { - EXPECT_EQ(filter.data()[filter.size() - 5], static_cast(-2)); - } - - if (charge_filter_construction_mem == - CacheEntryRoleOptions::Decision::kEnabled) { - const size_t dummy_entry_num = static_cast(std::ceil( - filter.size() * 1.0 / - CacheReservationManagerImpl< - CacheEntryRole::kFilterConstruction>::GetDummyEntrySize())); - EXPECT_GE( - cache->GetPinnedUsage(), - dummy_entry_num * - CacheReservationManagerImpl< - CacheEntryRole::kFilterConstruction>::GetDummyEntrySize()); - EXPECT_LT( - cache->GetPinnedUsage(), - (dummy_entry_num + 1) * - CacheReservationManagerImpl< - CacheEntryRole::kFilterConstruction>::GetDummyEntrySize()); - } else { - EXPECT_EQ(cache->GetPinnedUsage(), 0); - } - } -} - -namespace { -inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128, - uint32_t for256) { - (void)for64; - (void)for128; - (void)for256; -#if CACHE_LINE_SIZE == 64 - return for64; -#elif CACHE_LINE_SIZE == 128 - return for128; -#elif CACHE_LINE_SIZE == 256 - return for256; -#else -#error "CACHE_LINE_SIZE unknown or unrecognized" -#endif -} -} // namespace - -// Ensure the implementation doesn't accidentally change in an -// incompatible way. This test doesn't check the reading side -// (FirstFPs/PackedMatches) for LegacyBloom because it requires the -// ability to read filters generated using other cache line sizes. -// See RawSchema. -TEST_P(FullBloomTest, Schema) { -#define EXPECT_EQ_Bloom(a, b) \ - { \ - if (GetParam() != kStandard128Ribbon) { \ - EXPECT_EQ(a, b); \ - } \ - } -#define EXPECT_EQ_Ribbon(a, b) \ - { \ - if (GetParam() == kStandard128Ribbon) { \ - EXPECT_EQ(a, b); \ - } \ - } -#define EXPECT_EQ_FastBloom(a, b) \ - { \ - if (GetParam() == kFastLocalBloom) { \ - EXPECT_EQ(a, b); \ - } \ - } -#define EXPECT_EQ_LegacyBloom(a, b) \ - { \ - if (GetParam() == kLegacyBloom) { \ - EXPECT_EQ(a, b); \ - } \ - } -#define EXPECT_EQ_NotLegacy(a, b) \ - { \ - if (GetParam() != kLegacyBloom) { \ - EXPECT_EQ(a, b); \ - } \ - } - - char buffer[sizeof(int)]; - - // First do a small number of keys, where Ribbon config will fall back on - // fast Bloom filter and generate the same data - ResetPolicy(5); // num_probes = 3 - for (int key = 0; key < 87; key++) { - Add(Key(key, buffer)); - } - Build(); - EXPECT_EQ(GetNumProbesFromFilterData(), 3); - - EXPECT_EQ_NotLegacy(BloomHash(FilterData()), 4130687756U); - - EXPECT_EQ_NotLegacy("31,38,40,43,61,83,86,112,125,131", FirstFPs(10)); - - // Now use enough keys so that changing bits / key by 1 is guaranteed to - // change number of allocated cache lines. So keys > max cache line bits. - - // Note that the first attempted Ribbon seed is determined by the hash - // of the first key added (for pseudorandomness in practice, determinism in - // testing) - - ResetPolicy(2); // num_probes = 1 - for (int key = 0; key < 2087; key++) { - Add(Key(key, buffer)); - } - Build(); - EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 1); - EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); - - EXPECT_EQ_LegacyBloom( - BloomHash(FilterData()), - SelectByCacheLineSize(1567096579, 1964771444, 2659542661U)); - EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3817481309U); - EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1705851228U); - - EXPECT_EQ_FastBloom("11,13,17,25,29,30,35,37,45,53", FirstFPs(10)); - EXPECT_EQ_Ribbon("3,8,10,17,19,20,23,28,31,32", FirstFPs(10)); - - ResetPolicy(3); // num_probes = 2 - for (int key = 0; key < 2087; key++) { - Add(Key(key, buffer)); - } - Build(); - EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 2); - EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); - - EXPECT_EQ_LegacyBloom( - BloomHash(FilterData()), - SelectByCacheLineSize(2707206547U, 2571983456U, 218344685)); - EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2807269961U); - EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1095342358U); - - EXPECT_EQ_FastBloom("4,15,17,24,27,28,29,53,63,70", FirstFPs(10)); - EXPECT_EQ_Ribbon("3,17,20,28,32,33,36,43,49,54", FirstFPs(10)); - - ResetPolicy(5); // num_probes = 3 - for (int key = 0; key < 2087; key++) { - Add(Key(key, buffer)); - } - Build(); - EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 3); - EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); - - EXPECT_EQ_LegacyBloom( - BloomHash(FilterData()), - SelectByCacheLineSize(515748486, 94611728, 2436112214U)); - EXPECT_EQ_FastBloom(BloomHash(FilterData()), 204628445U); - EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3971337699U); - - EXPECT_EQ_FastBloom("15,24,29,39,53,87,89,100,103,104", FirstFPs(10)); - EXPECT_EQ_Ribbon("3,33,36,43,67,70,76,78,84,102", FirstFPs(10)); - - ResetPolicy(8); // num_probes = 5 - for (int key = 0; key < 2087; key++) { - Add(Key(key, buffer)); - } - Build(); - EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 5); - EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); - - EXPECT_EQ_LegacyBloom( - BloomHash(FilterData()), - SelectByCacheLineSize(1302145999, 2811644657U, 756553699)); - EXPECT_EQ_FastBloom(BloomHash(FilterData()), 355564975U); - EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3651449053U); - - EXPECT_EQ_FastBloom("16,60,66,126,220,238,244,256,265,287", FirstFPs(10)); - EXPECT_EQ_Ribbon("33,187,203,296,300,322,411,419,547,582", FirstFPs(10)); - - ResetPolicy(9); // num_probes = 6 - for (int key = 0; key < 2087; key++) { - Add(Key(key, buffer)); - } - Build(); - EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6); - EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); - - EXPECT_EQ_LegacyBloom( - BloomHash(FilterData()), - SelectByCacheLineSize(2092755149, 661139132, 1182970461)); - EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2137566013U); - EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1005676675U); - - EXPECT_EQ_FastBloom("156,367,791,872,945,1015,1139,1159,1265", FirstFPs(9)); - EXPECT_EQ_Ribbon("33,187,203,296,411,419,604,612,615,619", FirstFPs(10)); - - ResetPolicy(11); // num_probes = 7 - for (int key = 0; key < 2087; key++) { - Add(Key(key, buffer)); - } - Build(); - EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 7); - EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); - - EXPECT_EQ_LegacyBloom( - BloomHash(FilterData()), - SelectByCacheLineSize(3755609649U, 1812694762, 1449142939)); - EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2561502687U); - EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3129900846U); - - EXPECT_EQ_FastBloom("34,74,130,236,643,882,962,1015,1035,1110", FirstFPs(10)); - EXPECT_EQ_Ribbon("411,419,623,665,727,794,955,1052,1323,1330", FirstFPs(10)); - - // This used to be 9 probes, but 8 is a better choice for speed, - // especially with SIMD groups of 8 probes, with essentially no - // change in FP rate. - // FP rate @ 9 probes, old Bloom: 0.4321% - // FP rate @ 9 probes, new Bloom: 0.1846% - // FP rate @ 8 probes, new Bloom: 0.1843% - ResetPolicy(14); // num_probes = 8 (new), 9 (old) - for (int key = 0; key < 2087; key++) { - Add(Key(key, buffer)); - } - Build(); - EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 9); - EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 8); - EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); - - EXPECT_EQ_LegacyBloom( - BloomHash(FilterData()), - SelectByCacheLineSize(178861123, 379087593, 2574136516U)); - EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3709876890U); - EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1855638875U); - - EXPECT_EQ_FastBloom("130,240,522,565,989,2002,2526,3147,3543", FirstFPs(9)); - EXPECT_EQ_Ribbon("665,727,1323,1755,3866,4232,4442,4492,4736", FirstFPs(9)); - - // This used to be 11 probes, but 9 is a better choice for speed - // AND accuracy. - // FP rate @ 11 probes, old Bloom: 0.3571% - // FP rate @ 11 probes, new Bloom: 0.0884% - // FP rate @ 9 probes, new Bloom: 0.0843% - ResetPolicy(16); // num_probes = 9 (new), 11 (old) - for (int key = 0; key < 2087; key++) { - Add(Key(key, buffer)); - } - Build(); - EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 11); - EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 9); - EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); - - EXPECT_EQ_LegacyBloom( - BloomHash(FilterData()), - SelectByCacheLineSize(1129406313, 3049154394U, 1727750964)); - EXPECT_EQ_FastBloom(BloomHash(FilterData()), 1087138490U); - EXPECT_EQ_Ribbon(BloomHash(FilterData()), 459379967U); - - EXPECT_EQ_FastBloom("3299,3611,3916,6620,7822,8079,8482,8942", FirstFPs(8)); - EXPECT_EQ_Ribbon("727,1323,1755,4442,4736,5386,6974,7154,8222", FirstFPs(9)); - - ResetPolicy(10); // num_probes = 6, but different memory ratio vs. 9 - for (int key = 0; key < 2087; key++) { - Add(Key(key, buffer)); - } - Build(); - EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6); - EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61); - - EXPECT_EQ_LegacyBloom( - BloomHash(FilterData()), - SelectByCacheLineSize(1478976371, 2910591341U, 1182970461)); - EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2498541272U); - EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1273231667U); - - EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9)); - EXPECT_EQ_Ribbon("296,411,419,612,619,623,630,665,686,727", FirstFPs(10)); - - ResetPolicy(10); - for (int key = /*CHANGED*/ 1; key < 2087; key++) { - Add(Key(key, buffer)); - } - Build(); - EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6); - EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), /*CHANGED*/ 184); - - EXPECT_EQ_LegacyBloom( - BloomHash(FilterData()), - SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U)); - EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2058382345U); - EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3007790572U); - - EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9)); - EXPECT_EQ_Ribbon("33,152,383,497,589,633,737,781,911,990", FirstFPs(10)); - - ResetPolicy(10); - for (int key = 1; key < /*CHANGED*/ 2088; key++) { - Add(Key(key, buffer)); - } - Build(); - EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6); - EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184); - - EXPECT_EQ_LegacyBloom( - BloomHash(FilterData()), - SelectByCacheLineSize(2885052954U, 769447944, 4175124908U)); - EXPECT_EQ_FastBloom(BloomHash(FilterData()), 23699164U); - EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1942323379U); - - EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9)); - EXPECT_EQ_Ribbon("33,95,360,589,737,911,990,1048,1081,1414", FirstFPs(10)); - - // With new fractional bits_per_key, check that we are rounding to - // whole bits per key for old Bloom filters but fractional for - // new Bloom filter. - ResetPolicy(9.5); - for (int key = 1; key < 2088; key++) { - Add(Key(key, buffer)); - } - Build(); - EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6); - EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184); - - EXPECT_EQ_LegacyBloom( - BloomHash(FilterData()), - /*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U)); - EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3166884174U); - EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1148258663U); - - EXPECT_EQ_FastBloom("126,156,367,444,458,791,813,976,1015", FirstFPs(9)); - EXPECT_EQ_Ribbon("33,54,95,360,589,693,737,911,990,1048", FirstFPs(10)); - - ResetPolicy(10.499); - for (int key = 1; key < 2088; key++) { - Add(Key(key, buffer)); - } - Build(); - EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 6); - EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 7); - EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184); - - EXPECT_EQ_LegacyBloom( - BloomHash(FilterData()), - /*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U)); - EXPECT_EQ_FastBloom(BloomHash(FilterData()), 4098502778U); - EXPECT_EQ_Ribbon(BloomHash(FilterData()), 792138188U); - - EXPECT_EQ_FastBloom("16,236,240,472,1015,1045,1111,1409,1465", FirstFPs(9)); - EXPECT_EQ_Ribbon("33,95,360,589,737,990,1048,1081,1414,1643", FirstFPs(10)); - - ResetPolicy(); -} - -// A helper class for testing custom or corrupt filter bits as read by -// built-in FilterBitsReaders. -struct RawFilterTester { - // Buffer, from which we always return a tail Slice, so the - // last five bytes are always the metadata bytes. - std::array data_{}; - // Points five bytes from the end - char* metadata_ptr_; - - RawFilterTester() : metadata_ptr_(&*(data_.end() - 5)) {} - - Slice ResetNoFill(uint32_t len_without_metadata, uint32_t num_lines, - uint32_t num_probes) { - metadata_ptr_[0] = static_cast(num_probes); - EncodeFixed32(metadata_ptr_ + 1, num_lines); - uint32_t len = len_without_metadata + /*metadata*/ 5; - assert(len <= data_.size()); - return Slice(metadata_ptr_ - len_without_metadata, len); - } - - Slice Reset(uint32_t len_without_metadata, uint32_t num_lines, - uint32_t num_probes, bool fill_ones) { - data_.fill(fill_ones ? 0xff : 0); - return ResetNoFill(len_without_metadata, num_lines, num_probes); - } - - Slice ResetWeirdFill(uint32_t len_without_metadata, uint32_t num_lines, - uint32_t num_probes) { - for (uint32_t i = 0; i < data_.size(); ++i) { - data_[i] = static_cast(0x7b7b >> (i % 7)); - } - return ResetNoFill(len_without_metadata, num_lines, num_probes); - } -}; - -TEST_P(FullBloomTest, RawSchema) { - RawFilterTester cft; - // Legacy Bloom configurations - // Two probes, about 3/4 bits set: ~50% "FP" rate - // One 256-byte cache line. - OpenRaw(cft.ResetWeirdFill(256, 1, 2)); - EXPECT_EQ(uint64_t{11384799501900898790U}, PackedMatches()); - - // Two 128-byte cache lines. - OpenRaw(cft.ResetWeirdFill(256, 2, 2)); - EXPECT_EQ(uint64_t{10157853359773492589U}, PackedMatches()); - - // Four 64-byte cache lines. - OpenRaw(cft.ResetWeirdFill(256, 4, 2)); - EXPECT_EQ(uint64_t{7123594913907464682U}, PackedMatches()); - - // Fast local Bloom configurations (marker 255 -> -1) - // Two probes, about 3/4 bits set: ~50% "FP" rate - // Four 64-byte cache lines. - OpenRaw(cft.ResetWeirdFill(256, 2U << 8, 255)); - EXPECT_EQ(uint64_t{9957045189927952471U}, PackedMatches()); - - // Ribbon configurations (marker 254 -> -2) - - // Even though the builder never builds configurations this - // small (preferring Bloom), we can test that the configuration - // can be read, for possible future-proofing. - - // 256 slots, one result column = 32 bytes (2 blocks, seed 0) - // ~50% FP rate: - // 0b0101010111110101010000110000011011011111100100001110010011101010 - OpenRaw(cft.ResetWeirdFill(32, 2U << 8, 254)); - EXPECT_EQ(uint64_t{6193930559317665002U}, PackedMatches()); - - // 256 slots, three-to-four result columns = 112 bytes - // ~ 1 in 10 FP rate: - // 0b0000000000100000000000000000000001000001000000010000101000000000 - OpenRaw(cft.ResetWeirdFill(112, 2U << 8, 254)); - EXPECT_EQ(uint64_t{9007200345328128U}, PackedMatches()); -} - -TEST_P(FullBloomTest, CorruptFilters) { - RawFilterTester cft; - - for (bool fill : {false, true}) { - // Legacy Bloom configurations - // Good filter bits - returns same as fill - OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill)); - ASSERT_EQ(fill, Matches("hello")); - ASSERT_EQ(fill, Matches("world")); - - // Good filter bits - returns same as fill - OpenRaw(cft.Reset(CACHE_LINE_SIZE * 3, 3, 6, fill)); - ASSERT_EQ(fill, Matches("hello")); - ASSERT_EQ(fill, Matches("world")); - - // Good filter bits - returns same as fill - // 256 is unusual but legal cache line size - OpenRaw(cft.Reset(256 * 3, 3, 6, fill)); - ASSERT_EQ(fill, Matches("hello")); - ASSERT_EQ(fill, Matches("world")); - - // Good filter bits - returns same as fill - // 30 should be max num_probes - OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 30, fill)); - ASSERT_EQ(fill, Matches("hello")); - ASSERT_EQ(fill, Matches("world")); - - // Good filter bits - returns same as fill - // 1 should be min num_probes - OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 1, fill)); - ASSERT_EQ(fill, Matches("hello")); - ASSERT_EQ(fill, Matches("world")); - - // Type 1 trivial filter bits - returns true as if FP by zero probes - OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 0, fill)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - - // Type 2 trivial filter bits - returns false as if built from zero keys - OpenRaw(cft.Reset(0, 0, 6, fill)); - ASSERT_FALSE(Matches("hello")); - ASSERT_FALSE(Matches("world")); - - // Type 2 trivial filter bits - returns false as if built from zero keys - OpenRaw(cft.Reset(0, 37, 6, fill)); - ASSERT_FALSE(Matches("hello")); - ASSERT_FALSE(Matches("world")); - - // Type 2 trivial filter bits - returns false as 0 size trumps 0 probes - OpenRaw(cft.Reset(0, 0, 0, fill)); - ASSERT_FALSE(Matches("hello")); - ASSERT_FALSE(Matches("world")); - - // Bad filter bits - returns true for safety - // No solution to 0 * x == CACHE_LINE_SIZE - OpenRaw(cft.Reset(CACHE_LINE_SIZE, 0, 6, fill)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - - // Bad filter bits - returns true for safety - // Can't have 3 * x == 4 for integer x - OpenRaw(cft.Reset(4, 3, 6, fill)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - - // Bad filter bits - returns true for safety - // 97 bytes is not a power of two, so not a legal cache line size - OpenRaw(cft.Reset(97 * 3, 3, 6, fill)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - - // Bad filter bits - returns true for safety - // 65 bytes is not a power of two, so not a legal cache line size - OpenRaw(cft.Reset(65 * 3, 3, 6, fill)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - - // Bad filter bits - returns false as if built from zero keys - // < 5 bytes overall means missing even metadata - OpenRaw(cft.Reset(static_cast(-1), 3, 6, fill)); - ASSERT_FALSE(Matches("hello")); - ASSERT_FALSE(Matches("world")); - - OpenRaw(cft.Reset(static_cast(-5), 3, 6, fill)); - ASSERT_FALSE(Matches("hello")); - ASSERT_FALSE(Matches("world")); - - // Dubious filter bits - returns same as fill (for now) - // 31 is not a useful num_probes, nor generated by RocksDB unless directly - // using filter bits API without BloomFilterPolicy. - OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 31, fill)); - ASSERT_EQ(fill, Matches("hello")); - ASSERT_EQ(fill, Matches("world")); - - // Dubious filter bits - returns same as fill (for now) - // Similar, with 127, largest positive char - OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 127, fill)); - ASSERT_EQ(fill, Matches("hello")); - ASSERT_EQ(fill, Matches("world")); - - // Dubious filter bits - returns true (for now) - // num_probes set to 128 / -128, lowest negative char - // NB: Bug in implementation interprets this as negative and has same - // effect as zero probes, but effectively reserves negative char values - // for future use. - OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 128, fill)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - - // Dubious filter bits - returns true (for now) - // Similar, with 253 / -3 - OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 253, fill)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - - // ######################################################### - // Fast local Bloom configurations (marker 255 -> -1) - // Good config with six probes - OpenRaw(cft.Reset(CACHE_LINE_SIZE, 6U << 8, 255, fill)); - ASSERT_EQ(fill, Matches("hello")); - ASSERT_EQ(fill, Matches("world")); - - // Becomes bad/reserved config (always true) if any other byte set - OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | 1U, 255, fill)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - - OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 16), 255, fill)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - - OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 24), 255, fill)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - - // Good config, max 30 probes - OpenRaw(cft.Reset(CACHE_LINE_SIZE, 30U << 8, 255, fill)); - ASSERT_EQ(fill, Matches("hello")); - ASSERT_EQ(fill, Matches("world")); - - // Bad/reserved config (always true) if more than 30 - OpenRaw(cft.Reset(CACHE_LINE_SIZE, 31U << 8, 255, fill)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - - OpenRaw(cft.Reset(CACHE_LINE_SIZE, 33U << 8, 255, fill)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - - OpenRaw(cft.Reset(CACHE_LINE_SIZE, 66U << 8, 255, fill)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - - OpenRaw(cft.Reset(CACHE_LINE_SIZE, 130U << 8, 255, fill)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - } - - // ######################################################### - // Ribbon configurations (marker 254 -> -2) - // ("fill" doesn't work to detect good configurations, we just - // have to rely on TN probability) - - // Good: 2 blocks * 16 bytes / segment * 4 columns = 128 bytes - // seed = 123 - OpenRaw(cft.Reset(128, (2U << 8) + 123U, 254, false)); - ASSERT_FALSE(Matches("hello")); - ASSERT_FALSE(Matches("world")); - - // Good: 2 blocks * 16 bytes / segment * 8 columns = 256 bytes - OpenRaw(cft.Reset(256, (2U << 8) + 123U, 254, false)); - ASSERT_FALSE(Matches("hello")); - ASSERT_FALSE(Matches("world")); - - // Surprisingly OK: 5000 blocks (640,000 slots) in only 1024 bits - // -> average close to 0 columns - OpenRaw(cft.Reset(128, (5000U << 8) + 123U, 254, false)); - // *Almost* all FPs - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - // Need many queries to find a "true negative" - for (int i = 0; Matches(std::to_string(i)); ++i) { - ASSERT_LT(i, 1000); - } - - // Bad: 1 block not allowed (for implementation detail reasons) - OpenRaw(cft.Reset(128, (1U << 8) + 123U, 254, false)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - - // Bad: 0 blocks not allowed - OpenRaw(cft.Reset(128, (0U << 8) + 123U, 254, false)); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); -} - -INSTANTIATE_TEST_CASE_P(Full, FullBloomTest, - testing::Values(kLegacyBloom, kFastLocalBloom, - kStandard128Ribbon)); - -static double GetEffectiveBitsPerKey(FilterBitsBuilder* builder) { - union { - uint64_t key_value = 0; - char key_bytes[8]; - }; - - const unsigned kNumKeys = 1000; - - Slice key_slice{key_bytes, 8}; - for (key_value = 0; key_value < kNumKeys; ++key_value) { - builder->AddKey(key_slice); - } - - std::unique_ptr buf; - auto filter = builder->Finish(&buf); - return filter.size() * /*bits per byte*/ 8 / (1.0 * kNumKeys); -} - -static void SetTestingLevel(int levelish, FilterBuildingContext* ctx) { - if (levelish == -1) { - // Flush is treated as level -1 for this option but actually level 0 - ctx->level_at_creation = 0; - ctx->reason = TableFileCreationReason::kFlush; - } else { - ctx->level_at_creation = levelish; - ctx->reason = TableFileCreationReason::kCompaction; - } -} - -TEST(RibbonTest, RibbonTestLevelThreshold) { - BlockBasedTableOptions opts; - FilterBuildingContext ctx(opts); - // A few settings - for (CompactionStyle cs : {kCompactionStyleLevel, kCompactionStyleUniversal, - kCompactionStyleFIFO, kCompactionStyleNone}) { - ctx.compaction_style = cs; - for (int bloom_before_level : {-1, 0, 1, 10}) { - std::vector > policies; - policies.emplace_back(NewRibbonFilterPolicy(10, bloom_before_level)); - - if (bloom_before_level == 0) { - // Also test new API default - policies.emplace_back(NewRibbonFilterPolicy(10)); - } - - for (std::unique_ptr& policy : policies) { - // Claim to be generating filter for this level - SetTestingLevel(bloom_before_level, &ctx); - - std::unique_ptr builder{ - policy->GetBuilderWithContext(ctx)}; - - // Must be Ribbon (more space efficient than 10 bits per key) - ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8); - - if (bloom_before_level >= 0) { - // Claim to be generating filter for previous level - SetTestingLevel(bloom_before_level - 1, &ctx); - - builder.reset(policy->GetBuilderWithContext(ctx)); - - if (cs == kCompactionStyleLevel || cs == kCompactionStyleUniversal) { - // Level is considered. - // Must be Bloom (~ 10 bits per key) - ASSERT_GT(GetEffectiveBitsPerKey(builder.get()), 9); - } else { - // Level is ignored under non-traditional compaction styles. - // Must be Ribbon (more space efficient than 10 bits per key) - ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8); - } - } - - // Like SST file writer - ctx.level_at_creation = -1; - ctx.reason = TableFileCreationReason::kMisc; - - builder.reset(policy->GetBuilderWithContext(ctx)); - - // Must be Ribbon (more space efficient than 10 bits per key) - ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8); - } - } - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - ParseCommandLineFlags(&argc, &argv, true); - - return RUN_ALL_TESTS(); -} - -#endif // GFLAGS diff --git a/util/coding_test.cc b/util/coding_test.cc deleted file mode 100644 index 79dd7b82e..000000000 --- a/util/coding_test.cc +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "util/coding.h" - -#include "test_util/testharness.h" - -namespace ROCKSDB_NAMESPACE { - -class Coding {}; -TEST(Coding, Fixed16) { - std::string s; - for (uint16_t v = 0; v < 0xFFFF; v++) { - PutFixed16(&s, v); - } - - const char* p = s.data(); - for (uint16_t v = 0; v < 0xFFFF; v++) { - uint16_t actual = DecodeFixed16(p); - ASSERT_EQ(v, actual); - p += sizeof(uint16_t); - } -} - -TEST(Coding, Fixed32) { - std::string s; - for (uint32_t v = 0; v < 100000; v++) { - PutFixed32(&s, v); - } - - const char* p = s.data(); - for (uint32_t v = 0; v < 100000; v++) { - uint32_t actual = DecodeFixed32(p); - ASSERT_EQ(v, actual); - p += sizeof(uint32_t); - } -} - -TEST(Coding, Fixed64) { - std::string s; - for (int power = 0; power <= 63; power++) { - uint64_t v = static_cast(1) << power; - PutFixed64(&s, v - 1); - PutFixed64(&s, v + 0); - PutFixed64(&s, v + 1); - } - - const char* p = s.data(); - for (int power = 0; power <= 63; power++) { - uint64_t v = static_cast(1) << power; - uint64_t actual = 0; - actual = DecodeFixed64(p); - ASSERT_EQ(v - 1, actual); - p += sizeof(uint64_t); - - actual = DecodeFixed64(p); - ASSERT_EQ(v + 0, actual); - p += sizeof(uint64_t); - - actual = DecodeFixed64(p); - ASSERT_EQ(v + 1, actual); - p += sizeof(uint64_t); - } -} - -// Test that encoding routines generate little-endian encodings -TEST(Coding, EncodingOutput) { - std::string dst; - PutFixed32(&dst, 0x04030201); - ASSERT_EQ(4U, dst.size()); - ASSERT_EQ(0x01, static_cast(dst[0])); - ASSERT_EQ(0x02, static_cast(dst[1])); - ASSERT_EQ(0x03, static_cast(dst[2])); - ASSERT_EQ(0x04, static_cast(dst[3])); - - dst.clear(); - PutFixed64(&dst, 0x0807060504030201ull); - ASSERT_EQ(8U, dst.size()); - ASSERT_EQ(0x01, static_cast(dst[0])); - ASSERT_EQ(0x02, static_cast(dst[1])); - ASSERT_EQ(0x03, static_cast(dst[2])); - ASSERT_EQ(0x04, static_cast(dst[3])); - ASSERT_EQ(0x05, static_cast(dst[4])); - ASSERT_EQ(0x06, static_cast(dst[5])); - ASSERT_EQ(0x07, static_cast(dst[6])); - ASSERT_EQ(0x08, static_cast(dst[7])); -} - -TEST(Coding, Varint32) { - std::string s; - for (uint32_t i = 0; i < (32 * 32); i++) { - uint32_t v = (i / 32) << (i % 32); - PutVarint32(&s, v); - } - - const char* p = s.data(); - const char* limit = p + s.size(); - for (uint32_t i = 0; i < (32 * 32); i++) { - uint32_t expected = (i / 32) << (i % 32); - uint32_t actual = 0; - const char* start = p; - p = GetVarint32Ptr(p, limit, &actual); - ASSERT_TRUE(p != nullptr); - ASSERT_EQ(expected, actual); - ASSERT_EQ(VarintLength(actual), p - start); - } - ASSERT_EQ(p, s.data() + s.size()); -} - -TEST(Coding, Varint64) { - // Construct the list of values to check - std::vector values; - // Some special values - values.push_back(0); - values.push_back(100); - values.push_back(~static_cast(0)); - values.push_back(~static_cast(0) - 1); - for (uint32_t k = 0; k < 64; k++) { - // Test values near powers of two - const uint64_t power = 1ull << k; - values.push_back(power); - values.push_back(power - 1); - values.push_back(power + 1); - }; - - std::string s; - for (unsigned int i = 0; i < values.size(); i++) { - PutVarint64(&s, values[i]); - } - - const char* p = s.data(); - const char* limit = p + s.size(); - for (unsigned int i = 0; i < values.size(); i++) { - ASSERT_TRUE(p < limit); - uint64_t actual = 0; - const char* start = p; - p = GetVarint64Ptr(p, limit, &actual); - ASSERT_TRUE(p != nullptr); - ASSERT_EQ(values[i], actual); - ASSERT_EQ(VarintLength(actual), p - start); - } - ASSERT_EQ(p, limit); -} - -TEST(Coding, Varint32Overflow) { - uint32_t result; - std::string input("\x81\x82\x83\x84\x85\x11"); - ASSERT_TRUE(GetVarint32Ptr(input.data(), input.data() + input.size(), - &result) == nullptr); -} - -TEST(Coding, Varint32Truncation) { - uint32_t large_value = (1u << 31) + 100; - std::string s; - PutVarint32(&s, large_value); - uint32_t result; - for (unsigned int len = 0; len + 1 < s.size(); len++) { - ASSERT_TRUE(GetVarint32Ptr(s.data(), s.data() + len, &result) == nullptr); - } - ASSERT_TRUE(GetVarint32Ptr(s.data(), s.data() + s.size(), &result) != - nullptr); - ASSERT_EQ(large_value, result); -} - -TEST(Coding, Varint64Overflow) { - uint64_t result; - std::string input("\x81\x82\x83\x84\x85\x81\x82\x83\x84\x85\x11"); - ASSERT_TRUE(GetVarint64Ptr(input.data(), input.data() + input.size(), - &result) == nullptr); -} - -TEST(Coding, Varint64Truncation) { - uint64_t large_value = (1ull << 63) + 100ull; - std::string s; - PutVarint64(&s, large_value); - uint64_t result; - for (unsigned int len = 0; len + 1 < s.size(); len++) { - ASSERT_TRUE(GetVarint64Ptr(s.data(), s.data() + len, &result) == nullptr); - } - ASSERT_TRUE(GetVarint64Ptr(s.data(), s.data() + s.size(), &result) != - nullptr); - ASSERT_EQ(large_value, result); -} - -TEST(Coding, Strings) { - std::string s; - PutLengthPrefixedSlice(&s, Slice("")); - PutLengthPrefixedSlice(&s, Slice("foo")); - PutLengthPrefixedSlice(&s, Slice("bar")); - PutLengthPrefixedSlice(&s, Slice(std::string(200, 'x'))); - - Slice input(s); - Slice v; - ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v)); - ASSERT_EQ("", v.ToString()); - ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v)); - ASSERT_EQ("foo", v.ToString()); - ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v)); - ASSERT_EQ("bar", v.ToString()); - ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v)); - ASSERT_EQ(std::string(200, 'x'), v.ToString()); - ASSERT_EQ("", input.ToString()); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/util/crc32c_test.cc b/util/crc32c_test.cc deleted file mode 100644 index 715d63e2d..000000000 --- a/util/crc32c_test.cc +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -#include "util/crc32c.h" - -#include "test_util/testharness.h" -#include "util/coding.h" -#include "util/random.h" - -namespace ROCKSDB_NAMESPACE { -namespace crc32c { - -class CRC {}; - -// Tests for 3-way crc32c algorithm. We need these tests because it uses -// different lookup tables than the original Fast_CRC32 -const unsigned int BUFFER_SIZE = 512 * 1024 * sizeof(uint64_t); -char buffer[BUFFER_SIZE]; - -struct ExpectedResult { - size_t offset; - size_t length; - uint32_t crc32c; -}; - -ExpectedResult expectedResults[] = { - // Zero-byte input - {0, 0, ~0U}, - // Small aligned inputs to test special cases in SIMD implementations - {8, 1, 1543413366}, - {8, 2, 523493126}, - {8, 3, 1560427360}, - {8, 4, 3422504776}, - {8, 5, 447841138}, - {8, 6, 3910050499}, - {8, 7, 3346241981}, - // Small unaligned inputs - {9, 1, 3855826643}, - {10, 2, 560880875}, - {11, 3, 1479707779}, - {12, 4, 2237687071}, - {13, 5, 4063855784}, - {14, 6, 2553454047}, - {15, 7, 1349220140}, - // Larger inputs to test leftover chunks at the end of aligned blocks - {8, 8, 627613930}, - {8, 9, 2105929409}, - {8, 10, 2447068514}, - {8, 11, 863807079}, - {8, 12, 292050879}, - {8, 13, 1411837737}, - {8, 14, 2614515001}, - {8, 15, 3579076296}, - {8, 16, 2897079161}, - {8, 17, 675168386}, - // // Much larger inputs - {0, BUFFER_SIZE, 2096790750}, - {1, BUFFER_SIZE / 2, 3854797577}, - -}; - -TEST(CRC, StandardResults) { - // Original Fast_CRC32 tests. - // From rfc3720 section B.4. - char buf[32]; - - memset(buf, 0, sizeof(buf)); - ASSERT_EQ(0x8a9136aaU, Value(buf, sizeof(buf))); - - memset(buf, 0xff, sizeof(buf)); - ASSERT_EQ(0x62a8ab43U, Value(buf, sizeof(buf))); - - for (int i = 0; i < 32; i++) { - buf[i] = static_cast(i); - } - ASSERT_EQ(0x46dd794eU, Value(buf, sizeof(buf))); - - for (int i = 0; i < 32; i++) { - buf[i] = static_cast(31 - i); - } - ASSERT_EQ(0x113fdb5cU, Value(buf, sizeof(buf))); - - unsigned char data[48] = { - 0x01, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, - 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x18, 0x28, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - }; - ASSERT_EQ(0xd9963a56, Value(reinterpret_cast(data), sizeof(data))); - - // 3-Way Crc32c tests ported from folly. - // Test 1: single computation - for (auto expected : expectedResults) { - uint32_t result = Value(buffer + expected.offset, expected.length); - EXPECT_EQ(~expected.crc32c, result); - } - - // Test 2: stitching two computations - for (auto expected : expectedResults) { - size_t partialLength = expected.length / 2; - uint32_t partialChecksum = Value(buffer + expected.offset, partialLength); - uint32_t result = - Extend(partialChecksum, buffer + expected.offset + partialLength, - expected.length - partialLength); - EXPECT_EQ(~expected.crc32c, result); - } -} - -TEST(CRC, Values) { ASSERT_NE(Value("a", 1), Value("foo", 3)); } - -TEST(CRC, Extend) { - ASSERT_EQ(Value("hello world", 11), Extend(Value("hello ", 6), "world", 5)); -} - -TEST(CRC, Mask) { - uint32_t crc = Value("foo", 3); - ASSERT_NE(crc, Mask(crc)); - ASSERT_NE(crc, Mask(Mask(crc))); - ASSERT_EQ(crc, Unmask(Mask(crc))); - ASSERT_EQ(crc, Unmask(Unmask(Mask(Mask(crc))))); -} - -TEST(CRC, Crc32cCombineBasicTest) { - uint32_t crc1 = Value("hello ", 6); - uint32_t crc2 = Value("world", 5); - uint32_t crc3 = Value("hello world", 11); - uint32_t crc1_2_combine = Crc32cCombine(crc1, crc2, 5); - ASSERT_EQ(crc3, crc1_2_combine); -} - -TEST(CRC, Crc32cCombineOrderMattersTest) { - uint32_t crc1 = Value("hello ", 6); - uint32_t crc2 = Value("world", 5); - uint32_t crc3 = Value("hello world", 11); - uint32_t crc2_1_combine = Crc32cCombine(crc2, crc1, 6); - ASSERT_NE(crc3, crc2_1_combine); -} - -TEST(CRC, Crc32cCombineFullCoverTest) { - int scale = 4 * 1024; - Random rnd(test::RandomSeed()); - int size_1 = 1024 * 1024; - std::string s1 = rnd.RandomBinaryString(size_1); - uint32_t crc1 = Value(s1.data(), size_1); - for (int i = 0; i < scale; i++) { - int size_2 = i; - std::string s2 = rnd.RandomBinaryString(size_2); - uint32_t crc2 = Value(s2.data(), s2.size()); - uint32_t crc1_2 = Extend(crc1, s2.data(), s2.size()); - uint32_t crc1_2_combine = Crc32cCombine(crc1, crc2, size_2); - ASSERT_EQ(crc1_2, crc1_2_combine); - } -} - -TEST(CRC, Crc32cCombineBigSizeTest) { - Random rnd(test::RandomSeed()); - int size_1 = 1024 * 1024; - std::string s1 = rnd.RandomBinaryString(size_1); - uint32_t crc1 = Value(s1.data(), size_1); - int size_2 = 16 * 1024 * 1024 - 1; - std::string s2 = rnd.RandomBinaryString(size_2); - uint32_t crc2 = Value(s2.data(), s2.size()); - uint32_t crc1_2 = Extend(crc1, s2.data(), s2.size()); - uint32_t crc1_2_combine = Crc32cCombine(crc1, crc2, size_2); - ASSERT_EQ(crc1_2, crc1_2_combine); -} - -} // namespace crc32c -} // namespace ROCKSDB_NAMESPACE - -// copied from folly -const uint64_t FNV_64_HASH_START = 14695981039346656037ULL; -inline uint64_t fnv64_buf(const void* buf, size_t n, - uint64_t hash = FNV_64_HASH_START) { - // forcing signed char, since other platforms can use unsigned - const signed char* char_buf = reinterpret_cast(buf); - - for (size_t i = 0; i < n; ++i) { - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + - (hash << 8) + (hash << 40); - hash ^= char_buf[i]; - } - return hash; -} - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - - // Populate a buffer with a deterministic pattern - // on which to compute checksums - - const uint8_t* src = (uint8_t*)ROCKSDB_NAMESPACE::crc32c::buffer; - uint64_t* dst = (uint64_t*)ROCKSDB_NAMESPACE::crc32c::buffer; - const uint64_t* end = - (const uint64_t*)(ROCKSDB_NAMESPACE::crc32c::buffer + - ROCKSDB_NAMESPACE::crc32c::BUFFER_SIZE); - *dst++ = 0; - while (dst < end) { - ROCKSDB_NAMESPACE::EncodeFixed64( - reinterpret_cast(dst), - fnv64_buf((const char*)src, sizeof(uint64_t))); - dst++; - src += sizeof(uint64_t); - } - - return RUN_ALL_TESTS(); -} diff --git a/util/defer_test.cc b/util/defer_test.cc deleted file mode 100644 index 0e98f68b6..000000000 --- a/util/defer_test.cc +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "util/defer.h" - -#include "port/port.h" -#include "port/stack_trace.h" -#include "test_util/testharness.h" - -namespace ROCKSDB_NAMESPACE { - -class DeferTest {}; - -TEST(DeferTest, BlockScope) { - int v = 1; - { - Defer defer([&v]() { v *= 2; }); - } - ASSERT_EQ(2, v); -} - -TEST(DeferTest, FunctionScope) { - int v = 1; - auto f = [&v]() { - Defer defer([&v]() { v *= 2; }); - v = 2; - }; - f(); - ASSERT_EQ(4, v); -} - -TEST(SaveAndRestoreTest, BlockScope) { - int v = 1; - { - SaveAndRestore sr(&v); - ASSERT_EQ(v, 1); - v = 2; - ASSERT_EQ(v, 2); - } - ASSERT_EQ(v, 1); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/util/dynamic_bloom_test.cc b/util/dynamic_bloom_test.cc deleted file mode 100644 index 925c5479a..000000000 --- a/util/dynamic_bloom_test.cc +++ /dev/null @@ -1,325 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#ifndef GFLAGS -#include -int main() { - fprintf(stderr, "Please install gflags to run this test... Skipping...\n"); - return 0; -} -#else - -#include -#include -#include -#include -#include -#include -#include - -#include "dynamic_bloom.h" -#include "memory/arena.h" -#include "port/port.h" -#include "rocksdb/system_clock.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/gflags_compat.h" -#include "util/stop_watch.h" - -using GFLAGS_NAMESPACE::ParseCommandLineFlags; - -DEFINE_int32(bits_per_key, 10, ""); -DEFINE_int32(num_probes, 6, ""); -DEFINE_bool(enable_perf, false, ""); - -namespace ROCKSDB_NAMESPACE { - -struct KeyMaker { - uint64_t a; - uint64_t b; - - // Sequential, within a hash function block - inline Slice Seq(uint64_t i) { - a = i; - return Slice(reinterpret_cast(&a), sizeof(a)); - } - // Not quite sequential, varies across hash function blocks - inline Slice Nonseq(uint64_t i) { - a = i; - b = i * 123; - return Slice(reinterpret_cast(this), sizeof(*this)); - } - inline Slice Key(uint64_t i, bool nonseq) { - return nonseq ? Nonseq(i) : Seq(i); - } -}; - -class DynamicBloomTest : public testing::Test {}; - -TEST_F(DynamicBloomTest, EmptyFilter) { - Arena arena; - DynamicBloom bloom1(&arena, 100, 2); - ASSERT_TRUE(!bloom1.MayContain("hello")); - ASSERT_TRUE(!bloom1.MayContain("world")); - - DynamicBloom bloom2(&arena, CACHE_LINE_SIZE * 8 * 2 - 1, 2); - ASSERT_TRUE(!bloom2.MayContain("hello")); - ASSERT_TRUE(!bloom2.MayContain("world")); -} - -TEST_F(DynamicBloomTest, Small) { - Arena arena; - DynamicBloom bloom1(&arena, 100, 2); - bloom1.Add("hello"); - bloom1.Add("world"); - ASSERT_TRUE(bloom1.MayContain("hello")); - ASSERT_TRUE(bloom1.MayContain("world")); - ASSERT_TRUE(!bloom1.MayContain("x")); - ASSERT_TRUE(!bloom1.MayContain("foo")); - - DynamicBloom bloom2(&arena, CACHE_LINE_SIZE * 8 * 2 - 1, 2); - bloom2.Add("hello"); - bloom2.Add("world"); - ASSERT_TRUE(bloom2.MayContain("hello")); - ASSERT_TRUE(bloom2.MayContain("world")); - ASSERT_TRUE(!bloom2.MayContain("x")); - ASSERT_TRUE(!bloom2.MayContain("foo")); -} - -TEST_F(DynamicBloomTest, SmallConcurrentAdd) { - Arena arena; - DynamicBloom bloom1(&arena, 100, 2); - bloom1.AddConcurrently("hello"); - bloom1.AddConcurrently("world"); - ASSERT_TRUE(bloom1.MayContain("hello")); - ASSERT_TRUE(bloom1.MayContain("world")); - ASSERT_TRUE(!bloom1.MayContain("x")); - ASSERT_TRUE(!bloom1.MayContain("foo")); - - DynamicBloom bloom2(&arena, CACHE_LINE_SIZE * 8 * 2 - 1, 2); - bloom2.AddConcurrently("hello"); - bloom2.AddConcurrently("world"); - ASSERT_TRUE(bloom2.MayContain("hello")); - ASSERT_TRUE(bloom2.MayContain("world")); - ASSERT_TRUE(!bloom2.MayContain("x")); - ASSERT_TRUE(!bloom2.MayContain("foo")); -} - -static uint32_t NextNum(uint32_t num) { - if (num < 10) { - num += 1; - } else if (num < 100) { - num += 10; - } else if (num < 1000) { - num += 100; - } else { - num = num * 26 / 10; - } - return num; -} - -TEST_F(DynamicBloomTest, VaryingLengths) { - KeyMaker km; - - // Count number of filters that significantly exceed the false positive rate - int mediocre_filters = 0; - int good_filters = 0; - uint32_t num_probes = static_cast(FLAGS_num_probes); - - fprintf(stderr, "bits_per_key: %d num_probes: %d\n", FLAGS_bits_per_key, - num_probes); - - // NB: FP rate impact of 32-bit hash is noticeable starting around 10M keys. - // But that effect is hidden if using sequential keys (unique hashes). - for (bool nonseq : {false, true}) { - const uint32_t max_num = FLAGS_enable_perf ? 40000000 : 400000; - for (uint32_t num = 1; num <= max_num; num = NextNum(num)) { - uint32_t bloom_bits = 0; - Arena arena; - bloom_bits = num * FLAGS_bits_per_key; - DynamicBloom bloom(&arena, bloom_bits, num_probes); - for (uint64_t i = 0; i < num; i++) { - bloom.Add(km.Key(i, nonseq)); - ASSERT_TRUE(bloom.MayContain(km.Key(i, nonseq))); - } - - // All added keys must match - for (uint64_t i = 0; i < num; i++) { - ASSERT_TRUE(bloom.MayContain(km.Key(i, nonseq))); - } - - // Check false positive rate - int result = 0; - for (uint64_t i = 0; i < 30000; i++) { - if (bloom.MayContain(km.Key(i + 1000000000, nonseq))) { - result++; - } - } - double rate = result / 30000.0; - - fprintf(stderr, - "False positives (%s keys): " - "%5.2f%% @ num = %6u, bloom_bits = %6u\n", - nonseq ? "nonseq" : "seq", rate * 100.0, num, bloom_bits); - - if (rate > 0.0125) - mediocre_filters++; // Allowed, but not too often - else - good_filters++; - } - } - - fprintf(stderr, "Filters: %d good, %d mediocre\n", good_filters, - mediocre_filters); - ASSERT_LE(mediocre_filters, good_filters / 25); -} - -TEST_F(DynamicBloomTest, perf) { - KeyMaker km; - StopWatchNano timer(SystemClock::Default().get()); - uint32_t num_probes = static_cast(FLAGS_num_probes); - - if (!FLAGS_enable_perf) { - return; - } - - for (uint32_t m = 1; m <= 8; ++m) { - Arena arena; - const uint32_t num_keys = m * 8 * 1024 * 1024; - fprintf(stderr, "testing %" PRIu32 "M keys\n", m * 8); - - DynamicBloom std_bloom(&arena, num_keys * 10, num_probes); - - timer.Start(); - for (uint64_t i = 1; i <= num_keys; ++i) { - std_bloom.Add(km.Seq(i)); - } - - uint64_t elapsed = timer.ElapsedNanos(); - fprintf(stderr, "dynamic bloom, avg add latency %3g\n", - static_cast(elapsed) / num_keys); - - uint32_t count = 0; - timer.Start(); - for (uint64_t i = 1; i <= num_keys; ++i) { - if (std_bloom.MayContain(km.Seq(i))) { - ++count; - } - } - ASSERT_EQ(count, num_keys); - elapsed = timer.ElapsedNanos(); - assert(count > 0); - fprintf(stderr, "dynamic bloom, avg query latency %3g\n", - static_cast(elapsed) / count); - } -} - -TEST_F(DynamicBloomTest, concurrent_with_perf) { - uint32_t num_probes = static_cast(FLAGS_num_probes); - - uint32_t m_limit = FLAGS_enable_perf ? 8 : 1; - - uint32_t num_threads = 4; - std::vector threads; - - // NB: Uses sequential keys for speed, but that hides the FP rate - // impact of 32-bit hash, which is noticeable starting around 10M keys - // when they vary across hashing blocks. - for (uint32_t m = 1; m <= m_limit; ++m) { - Arena arena; - const uint32_t num_keys = m * 8 * 1024 * 1024; - fprintf(stderr, "testing %" PRIu32 "M keys\n", m * 8); - - DynamicBloom std_bloom(&arena, num_keys * 10, num_probes); - - std::atomic elapsed(0); - - std::function adder([&](size_t t) { - KeyMaker km; - StopWatchNano timer(SystemClock::Default().get()); - timer.Start(); - for (uint64_t i = 1 + t; i <= num_keys; i += num_threads) { - std_bloom.AddConcurrently(km.Seq(i)); - } - elapsed += timer.ElapsedNanos(); - }); - for (size_t t = 0; t < num_threads; ++t) { - threads.emplace_back(adder, t); - } - while (threads.size() > 0) { - threads.back().join(); - threads.pop_back(); - } - - fprintf(stderr, - "dynamic bloom, avg parallel add latency %3g" - " nanos/key\n", - static_cast(elapsed) / num_threads / num_keys); - - elapsed = 0; - std::function hitter([&](size_t t) { - KeyMaker km; - StopWatchNano timer(SystemClock::Default().get()); - timer.Start(); - for (uint64_t i = 1 + t; i <= num_keys; i += num_threads) { - bool f = std_bloom.MayContain(km.Seq(i)); - ASSERT_TRUE(f); - } - elapsed += timer.ElapsedNanos(); - }); - for (size_t t = 0; t < num_threads; ++t) { - threads.emplace_back(hitter, t); - } - while (threads.size() > 0) { - threads.back().join(); - threads.pop_back(); - } - - fprintf(stderr, - "dynamic bloom, avg parallel hit latency %3g" - " nanos/key\n", - static_cast(elapsed) / num_threads / num_keys); - - elapsed = 0; - std::atomic false_positives(0); - std::function misser([&](size_t t) { - KeyMaker km; - StopWatchNano timer(SystemClock::Default().get()); - timer.Start(); - for (uint64_t i = num_keys + 1 + t; i <= 2 * num_keys; i += num_threads) { - bool f = std_bloom.MayContain(km.Seq(i)); - if (f) { - ++false_positives; - } - } - elapsed += timer.ElapsedNanos(); - }); - for (size_t t = 0; t < num_threads; ++t) { - threads.emplace_back(misser, t); - } - while (threads.size() > 0) { - threads.back().join(); - threads.pop_back(); - } - - fprintf(stderr, - "dynamic bloom, avg parallel miss latency %3g" - " nanos/key, %f%% false positive rate\n", - static_cast(elapsed) / num_threads / num_keys, - false_positives.load() * 100.0 / num_keys); - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char **argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - ParseCommandLineFlags(&argc, &argv, true); - - return RUN_ALL_TESTS(); -} - -#endif // GFLAGS diff --git a/util/file_reader_writer_test.cc b/util/file_reader_writer_test.cc deleted file mode 100644 index 68776612b..000000000 --- a/util/file_reader_writer_test.cc +++ /dev/null @@ -1,1058 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -#include -#include - -#include "db/db_test_util.h" -#include "env/mock_env.h" -#include "file/line_file_reader.h" -#include "file/random_access_file_reader.h" -#include "file/read_write_util.h" -#include "file/readahead_raf.h" -#include "file/sequence_file_reader.h" -#include "file/writable_file_writer.h" -#include "rocksdb/file_system.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/crc32c.h" -#include "util/random.h" -#include "utilities/fault_injection_fs.h" - -namespace ROCKSDB_NAMESPACE { - -class WritableFileWriterTest : public testing::Test {}; - -constexpr uint32_t kMb = static_cast(1) << 20; - -TEST_F(WritableFileWriterTest, RangeSync) { - class FakeWF : public FSWritableFile { - public: - explicit FakeWF() : size_(0), last_synced_(0) {} - ~FakeWF() override {} - - using FSWritableFile::Append; - IOStatus Append(const Slice& data, const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - size_ += data.size(); - return IOStatus::OK(); - } - IOStatus Truncate(uint64_t /*size*/, const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return IOStatus::OK(); - } - IOStatus Close(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - EXPECT_GE(size_, last_synced_ + kMb); - EXPECT_LT(size_, last_synced_ + 2 * kMb); - // Make sure random writes generated enough writes. - EXPECT_GT(size_, 10 * kMb); - return IOStatus::OK(); - } - IOStatus Flush(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return IOStatus::OK(); - } - IOStatus Sync(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return IOStatus::OK(); - } - IOStatus Fsync(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return IOStatus::OK(); - } - void SetIOPriority(Env::IOPriority /*pri*/) override {} - uint64_t GetFileSize(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return size_; - } - void GetPreallocationStatus(size_t* /*block_size*/, - size_t* /*last_allocated_block*/) override {} - size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const override { - return 0; - } - IOStatus InvalidateCache(size_t /*offset*/, size_t /*length*/) override { - return IOStatus::OK(); - } - - protected: - IOStatus Allocate(uint64_t /*offset*/, uint64_t /*len*/, - const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return IOStatus::OK(); - } - IOStatus RangeSync(uint64_t offset, uint64_t nbytes, - const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - EXPECT_EQ(offset % 4096, 0u); - EXPECT_EQ(nbytes % 4096, 0u); - - EXPECT_EQ(offset, last_synced_); - last_synced_ = offset + nbytes; - EXPECT_GE(size_, last_synced_ + kMb); - if (size_ > 2 * kMb) { - EXPECT_LT(size_, last_synced_ + 2 * kMb); - } - return IOStatus::OK(); - } - - uint64_t size_; - uint64_t last_synced_; - }; - - EnvOptions env_options; - env_options.bytes_per_sync = kMb; - std::unique_ptr wf(new FakeWF); - std::unique_ptr writer( - new WritableFileWriter(std::move(wf), "" /* don't care */, env_options)); - Random r(301); - Status s; - std::unique_ptr large_buf(new char[10 * kMb]); - for (int i = 0; i < 1000; i++) { - int skew_limit = (i < 700) ? 10 : 15; - uint32_t num = r.Skewed(skew_limit) * 100 + r.Uniform(100); - s = writer->Append(Slice(large_buf.get(), num)); - ASSERT_OK(s); - - // Flush in a chance of 1/10. - if (r.Uniform(10) == 0) { - s = writer->Flush(); - ASSERT_OK(s); - } - } - s = writer->Close(); - ASSERT_OK(s); -} - -TEST_F(WritableFileWriterTest, IncrementalBuffer) { - class FakeWF : public FSWritableFile { - public: - explicit FakeWF(std::string* _file_data, bool _use_direct_io, - bool _no_flush) - : file_data_(_file_data), - use_direct_io_(_use_direct_io), - no_flush_(_no_flush) {} - ~FakeWF() override {} - - using FSWritableFile::Append; - IOStatus Append(const Slice& data, const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - file_data_->append(data.data(), data.size()); - size_ += data.size(); - return IOStatus::OK(); - } - using FSWritableFile::PositionedAppend; - IOStatus PositionedAppend(const Slice& data, uint64_t pos, - const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - EXPECT_TRUE(pos % 512 == 0); - EXPECT_TRUE(data.size() % 512 == 0); - file_data_->resize(pos); - file_data_->append(data.data(), data.size()); - size_ += data.size(); - return IOStatus::OK(); - } - - IOStatus Truncate(uint64_t size, const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - file_data_->resize(size); - return IOStatus::OK(); - } - IOStatus Close(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return IOStatus::OK(); - } - IOStatus Flush(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return IOStatus::OK(); - } - IOStatus Sync(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return IOStatus::OK(); - } - IOStatus Fsync(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return IOStatus::OK(); - } - void SetIOPriority(Env::IOPriority /*pri*/) override {} - uint64_t GetFileSize(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return size_; - } - void GetPreallocationStatus(size_t* /*block_size*/, - size_t* /*last_allocated_block*/) override {} - size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const override { - return 0; - } - IOStatus InvalidateCache(size_t /*offset*/, size_t /*length*/) override { - return IOStatus::OK(); - } - bool use_direct_io() const override { return use_direct_io_; } - - std::string* file_data_; - bool use_direct_io_; - bool no_flush_; - size_t size_ = 0; - }; - - Random r(301); - const int kNumAttempts = 50; - for (int attempt = 0; attempt < kNumAttempts; attempt++) { - bool no_flush = (attempt % 3 == 0); - EnvOptions env_options; - env_options.writable_file_max_buffer_size = - (attempt < kNumAttempts / 2) ? 512 * 1024 : 700 * 1024; - std::string actual; - std::unique_ptr wf(new FakeWF(&actual, - attempt % 2 == 1, - no_flush)); - std::unique_ptr writer(new WritableFileWriter( - std::move(wf), "" /* don't care */, env_options)); - - std::string target; - for (int i = 0; i < 20; i++) { - uint32_t num = r.Skewed(16) * 100 + r.Uniform(100); - std::string random_string = r.RandomString(num); - ASSERT_OK(writer->Append(Slice(random_string.c_str(), num))); - target.append(random_string.c_str(), num); - - // In some attempts, flush in a chance of 1/10. - if (!no_flush && r.Uniform(10) == 0) { - ASSERT_OK(writer->Flush()); - } - } - ASSERT_OK(writer->Flush()); - ASSERT_OK(writer->Close()); - ASSERT_EQ(target.size(), actual.size()); - ASSERT_EQ(target, actual); - } -} - -TEST_F(WritableFileWriterTest, BufferWithZeroCapacityDirectIO) { - EnvOptions env_opts; - env_opts.use_direct_writes = true; - env_opts.writable_file_max_buffer_size = 0; - { - std::unique_ptr writer; - const Status s = - WritableFileWriter::Create(FileSystem::Default(), /*fname=*/"dont_care", - FileOptions(env_opts), &writer, - /*dbg=*/nullptr); - ASSERT_TRUE(s.IsInvalidArgument()); - } -} - -class DBWritableFileWriterTest : public DBTestBase { - public: - DBWritableFileWriterTest() - : DBTestBase("db_secondary_cache_test", /*env_do_fsync=*/true) { - fault_fs_.reset(new FaultInjectionTestFS(env_->GetFileSystem())); - fault_env_.reset(new CompositeEnvWrapper(env_, fault_fs_)); - } - - std::shared_ptr fault_fs_; - std::unique_ptr fault_env_; -}; - -TEST_F(DBWritableFileWriterTest, AppendWithChecksum) { - FileOptions file_options = FileOptions(); - Options options = GetDefaultOptions(); - options.create_if_missing = true; - DestroyAndReopen(options); - std::string fname = dbname_ + "/test_file"; - std::unique_ptr writable_file_ptr; - ASSERT_OK(fault_fs_->NewWritableFile(fname, file_options, &writable_file_ptr, - /*dbg*/ nullptr)); - std::unique_ptr file; - file.reset(new TestFSWritableFile( - fname, file_options, std::move(writable_file_ptr), fault_fs_.get())); - std::unique_ptr file_writer; - ImmutableOptions ioptions(options); - file_writer.reset(new WritableFileWriter( - std::move(file), fname, file_options, SystemClock::Default().get(), - nullptr, ioptions.stats, ioptions.listeners, - ioptions.file_checksum_gen_factory.get(), true, true)); - - Random rnd(301); - std::string data = rnd.RandomString(1000); - uint32_t data_crc32c = crc32c::Value(data.c_str(), data.size()); - fault_fs_->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - - ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c)); - ASSERT_OK(file_writer->Flush()); - Random size_r(47); - for (int i = 0; i < 2000; i++) { - data = rnd.RandomString((static_cast(size_r.Next()) % 10000)); - data_crc32c = crc32c::Value(data.c_str(), data.size()); - ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c)); - - data = rnd.RandomString((static_cast(size_r.Next()) % 97)); - ASSERT_OK(file_writer->Append(Slice(data.c_str()))); - ASSERT_OK(file_writer->Flush()); - } - ASSERT_OK(file_writer->Close()); - Destroy(options); -} - -TEST_F(DBWritableFileWriterTest, AppendVerifyNoChecksum) { - FileOptions file_options = FileOptions(); - Options options = GetDefaultOptions(); - options.create_if_missing = true; - DestroyAndReopen(options); - std::string fname = dbname_ + "/test_file"; - std::unique_ptr writable_file_ptr; - ASSERT_OK(fault_fs_->NewWritableFile(fname, file_options, &writable_file_ptr, - /*dbg*/ nullptr)); - std::unique_ptr file; - file.reset(new TestFSWritableFile( - fname, file_options, std::move(writable_file_ptr), fault_fs_.get())); - std::unique_ptr file_writer; - ImmutableOptions ioptions(options); - // Enable checksum handoff for this file, but do not enable buffer checksum. - // So Append with checksum logic will not be triggered - file_writer.reset(new WritableFileWriter( - std::move(file), fname, file_options, SystemClock::Default().get(), - nullptr, ioptions.stats, ioptions.listeners, - ioptions.file_checksum_gen_factory.get(), true, false)); - - Random rnd(301); - std::string data = rnd.RandomString(1000); - uint32_t data_crc32c = crc32c::Value(data.c_str(), data.size()); - fault_fs_->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - - ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c)); - ASSERT_OK(file_writer->Flush()); - Random size_r(47); - for (int i = 0; i < 1000; i++) { - data = rnd.RandomString((static_cast(size_r.Next()) % 10000)); - data_crc32c = crc32c::Value(data.c_str(), data.size()); - ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c)); - - data = rnd.RandomString((static_cast(size_r.Next()) % 97)); - ASSERT_OK(file_writer->Append(Slice(data.c_str()))); - ASSERT_OK(file_writer->Flush()); - } - ASSERT_OK(file_writer->Close()); - Destroy(options); -} - -TEST_F(DBWritableFileWriterTest, AppendWithChecksumRateLimiter) { - FileOptions file_options = FileOptions(); - file_options.rate_limiter = nullptr; - Options options = GetDefaultOptions(); - options.create_if_missing = true; - DestroyAndReopen(options); - std::string fname = dbname_ + "/test_file"; - std::unique_ptr writable_file_ptr; - ASSERT_OK(fault_fs_->NewWritableFile(fname, file_options, &writable_file_ptr, - /*dbg*/ nullptr)); - std::unique_ptr file; - file.reset(new TestFSWritableFile( - fname, file_options, std::move(writable_file_ptr), fault_fs_.get())); - std::unique_ptr file_writer; - ImmutableOptions ioptions(options); - // Enable checksum handoff for this file, but do not enable buffer checksum. - // So Append with checksum logic will not be triggered - file_writer.reset(new WritableFileWriter( - std::move(file), fname, file_options, SystemClock::Default().get(), - nullptr, ioptions.stats, ioptions.listeners, - ioptions.file_checksum_gen_factory.get(), true, true)); - fault_fs_->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - - Random rnd(301); - std::string data; - uint32_t data_crc32c; - uint64_t start = fault_env_->NowMicros(); - Random size_r(47); - uint64_t bytes_written = 0; - for (int i = 0; i < 100; i++) { - data = rnd.RandomString((static_cast(size_r.Next()) % 10000)); - data_crc32c = crc32c::Value(data.c_str(), data.size()); - ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c)); - bytes_written += static_cast(data.size()); - - data = rnd.RandomString((static_cast(size_r.Next()) % 97)); - ASSERT_OK(file_writer->Append(Slice(data.c_str()))); - ASSERT_OK(file_writer->Flush()); - bytes_written += static_cast(data.size()); - } - uint64_t elapsed = fault_env_->NowMicros() - start; - double raw_rate = bytes_written * 1000000.0 / elapsed; - ASSERT_OK(file_writer->Close()); - - // Set the rate-limiter - FileOptions file_options1 = FileOptions(); - file_options1.rate_limiter = - NewGenericRateLimiter(static_cast(0.5 * raw_rate)); - fname = dbname_ + "/test_file_1"; - std::unique_ptr writable_file_ptr1; - ASSERT_OK(fault_fs_->NewWritableFile(fname, file_options1, - &writable_file_ptr1, - /*dbg*/ nullptr)); - file.reset(new TestFSWritableFile( - fname, file_options1, std::move(writable_file_ptr1), fault_fs_.get())); - // Enable checksum handoff for this file, but do not enable buffer checksum. - // So Append with checksum logic will not be triggered - file_writer.reset(new WritableFileWriter( - std::move(file), fname, file_options1, SystemClock::Default().get(), - nullptr, ioptions.stats, ioptions.listeners, - ioptions.file_checksum_gen_factory.get(), true, true)); - - for (int i = 0; i < 1000; i++) { - data = rnd.RandomString((static_cast(size_r.Next()) % 10000)); - data_crc32c = crc32c::Value(data.c_str(), data.size()); - ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c)); - - data = rnd.RandomString((static_cast(size_r.Next()) % 97)); - ASSERT_OK(file_writer->Append(Slice(data.c_str()))); - ASSERT_OK(file_writer->Flush()); - } - ASSERT_OK(file_writer->Close()); - if (file_options1.rate_limiter != nullptr) { - delete file_options1.rate_limiter; - } - - Destroy(options); -} - -TEST_F(WritableFileWriterTest, AppendStatusReturn) { - class FakeWF : public FSWritableFile { - public: - explicit FakeWF() : use_direct_io_(false), io_error_(false) {} - - bool use_direct_io() const override { return use_direct_io_; } - - using FSWritableFile::Append; - IOStatus Append(const Slice& /*data*/, const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - if (io_error_) { - return IOStatus::IOError("Fake IO error"); - } - return IOStatus::OK(); - } - using FSWritableFile::PositionedAppend; - IOStatus PositionedAppend(const Slice& /*data*/, uint64_t, - const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - if (io_error_) { - return IOStatus::IOError("Fake IO error"); - } - return IOStatus::OK(); - } - IOStatus Close(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return IOStatus::OK(); - } - IOStatus Flush(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return IOStatus::OK(); - } - IOStatus Sync(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return IOStatus::OK(); - } - void Setuse_direct_io(bool val) { use_direct_io_ = val; } - void SetIOError(bool val) { io_error_ = val; } - - protected: - bool use_direct_io_; - bool io_error_; - }; - std::unique_ptr wf(new FakeWF()); - wf->Setuse_direct_io(true); - std::unique_ptr writer( - new WritableFileWriter(std::move(wf), "" /* don't care */, EnvOptions())); - - ASSERT_OK(writer->Append(std::string(2 * kMb, 'a'))); - - // Next call to WritableFile::Append() should fail - FakeWF* fwf = static_cast(writer->writable_file()); - fwf->SetIOError(true); - ASSERT_NOK(writer->Append(std::string(2 * kMb, 'b'))); -} - -class ReadaheadRandomAccessFileTest - : public testing::Test, - public testing::WithParamInterface { - public: - static std::vector GetReadaheadSizeList() { - return {1lu << 12, 1lu << 16}; - } - void SetUp() override { - readahead_size_ = GetParam(); - scratch_.reset(new char[2 * readahead_size_]); - ResetSourceStr(); - } - ReadaheadRandomAccessFileTest() : control_contents_() {} - std::string Read(uint64_t offset, size_t n) { - Slice result; - Status s = test_read_holder_->Read(offset, n, IOOptions(), &result, - scratch_.get(), nullptr); - EXPECT_TRUE(s.ok() || s.IsInvalidArgument()); - return std::string(result.data(), result.size()); - } - void ResetSourceStr(const std::string& str = "") { - std::unique_ptr sink( - new test::StringSink(&control_contents_)); - std::unique_ptr write_holder(new WritableFileWriter( - std::move(sink), "" /* don't care */, FileOptions())); - Status s = write_holder->Append(Slice(str)); - EXPECT_OK(s); - s = write_holder->Flush(); - EXPECT_OK(s); - std::unique_ptr read_holder( - new test::StringSource(control_contents_)); - test_read_holder_ = - NewReadaheadRandomAccessFile(std::move(read_holder), readahead_size_); - } - size_t GetReadaheadSize() const { return readahead_size_; } - - private: - size_t readahead_size_; - Slice control_contents_; - std::unique_ptr test_read_holder_; - std::unique_ptr scratch_; -}; - -TEST_P(ReadaheadRandomAccessFileTest, EmptySourceStr) { - ASSERT_EQ("", Read(0, 1)); - ASSERT_EQ("", Read(0, 0)); - ASSERT_EQ("", Read(13, 13)); -} - -TEST_P(ReadaheadRandomAccessFileTest, SourceStrLenLessThanReadaheadSize) { - std::string str = "abcdefghijklmnopqrs"; - ResetSourceStr(str); - ASSERT_EQ(str.substr(3, 4), Read(3, 4)); - ASSERT_EQ(str.substr(0, 3), Read(0, 3)); - ASSERT_EQ(str, Read(0, str.size())); - ASSERT_EQ(str.substr(7, std::min(static_cast(str.size()) - 7, 30)), - Read(7, 30)); - ASSERT_EQ("", Read(100, 100)); -} - -TEST_P(ReadaheadRandomAccessFileTest, SourceStrLenGreaterThanReadaheadSize) { - Random rng(42); - for (int k = 0; k < 100; ++k) { - size_t strLen = k * GetReadaheadSize() + - rng.Uniform(static_cast(GetReadaheadSize())); - std::string str = rng.HumanReadableString(static_cast(strLen)); - ResetSourceStr(str); - for (int test = 1; test <= 100; ++test) { - size_t offset = rng.Uniform(static_cast(strLen)); - size_t n = rng.Uniform(static_cast(GetReadaheadSize())); - ASSERT_EQ(str.substr(offset, std::min(n, strLen - offset)), - Read(offset, n)); - } - } -} - -TEST_P(ReadaheadRandomAccessFileTest, ReadExceedsReadaheadSize) { - Random rng(7); - size_t strLen = 4 * GetReadaheadSize() + - rng.Uniform(static_cast(GetReadaheadSize())); - std::string str = rng.HumanReadableString(static_cast(strLen)); - ResetSourceStr(str); - for (int test = 1; test <= 100; ++test) { - size_t offset = rng.Uniform(static_cast(strLen)); - size_t n = - GetReadaheadSize() + rng.Uniform(static_cast(GetReadaheadSize())); - ASSERT_EQ(str.substr(offset, std::min(n, strLen - offset)), - Read(offset, n)); - } -} - -INSTANTIATE_TEST_CASE_P( - EmptySourceStr, ReadaheadRandomAccessFileTest, - ::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList())); -INSTANTIATE_TEST_CASE_P( - SourceStrLenLessThanReadaheadSize, ReadaheadRandomAccessFileTest, - ::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList())); -INSTANTIATE_TEST_CASE_P( - SourceStrLenGreaterThanReadaheadSize, ReadaheadRandomAccessFileTest, - ::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList())); -INSTANTIATE_TEST_CASE_P( - ReadExceedsReadaheadSize, ReadaheadRandomAccessFileTest, - ::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList())); - -class ReadaheadSequentialFileTest : public testing::Test, - public testing::WithParamInterface { - public: - static std::vector GetReadaheadSizeList() { - return {1lu << 8, 1lu << 12, 1lu << 16, 1lu << 18}; - } - void SetUp() override { - readahead_size_ = GetParam(); - scratch_.reset(new char[2 * readahead_size_]); - ResetSourceStr(); - } - ReadaheadSequentialFileTest() {} - std::string Read(size_t n) { - Slice result; - Status s = test_read_holder_->Read( - n, &result, scratch_.get(), Env::IO_TOTAL /* rate_limiter_priority*/); - EXPECT_TRUE(s.ok() || s.IsInvalidArgument()); - return std::string(result.data(), result.size()); - } - void Skip(size_t n) { test_read_holder_->Skip(n); } - void ResetSourceStr(const std::string& str = "") { - auto read_holder = std::unique_ptr( - new test::SeqStringSource(str, &seq_read_count_)); - test_read_holder_.reset(new SequentialFileReader(std::move(read_holder), - "test", readahead_size_)); - } - size_t GetReadaheadSize() const { return readahead_size_; } - - private: - size_t readahead_size_; - std::unique_ptr test_read_holder_; - std::unique_ptr scratch_; - std::atomic seq_read_count_; -}; - -TEST_P(ReadaheadSequentialFileTest, EmptySourceStr) { - ASSERT_EQ("", Read(0)); - ASSERT_EQ("", Read(1)); - ASSERT_EQ("", Read(13)); -} - -TEST_P(ReadaheadSequentialFileTest, SourceStrLenLessThanReadaheadSize) { - std::string str = "abcdefghijklmnopqrs"; - ResetSourceStr(str); - ASSERT_EQ(str.substr(0, 3), Read(3)); - ASSERT_EQ(str.substr(3, 1), Read(1)); - ASSERT_EQ(str.substr(4), Read(str.size())); - ASSERT_EQ("", Read(100)); -} - -TEST_P(ReadaheadSequentialFileTest, SourceStrLenGreaterThanReadaheadSize) { - Random rng(42); - for (int s = 0; s < 1; ++s) { - for (int k = 0; k < 100; ++k) { - size_t strLen = k * GetReadaheadSize() + - rng.Uniform(static_cast(GetReadaheadSize())); - std::string str = rng.HumanReadableString(static_cast(strLen)); - ResetSourceStr(str); - size_t offset = 0; - for (int test = 1; test <= 100; ++test) { - size_t n = rng.Uniform(static_cast(GetReadaheadSize())); - if (s && test % 2) { - Skip(n); - } else { - ASSERT_EQ(str.substr(offset, std::min(n, strLen - offset)), Read(n)); - } - offset = std::min(offset + n, strLen); - } - } - } -} - -TEST_P(ReadaheadSequentialFileTest, ReadExceedsReadaheadSize) { - Random rng(42); - for (int s = 0; s < 1; ++s) { - for (int k = 0; k < 100; ++k) { - size_t strLen = k * GetReadaheadSize() + - rng.Uniform(static_cast(GetReadaheadSize())); - std::string str = rng.HumanReadableString(static_cast(strLen)); - ResetSourceStr(str); - size_t offset = 0; - for (int test = 1; test <= 100; ++test) { - size_t n = GetReadaheadSize() + - rng.Uniform(static_cast(GetReadaheadSize())); - if (s && test % 2) { - Skip(n); - } else { - ASSERT_EQ(str.substr(offset, std::min(n, strLen - offset)), Read(n)); - } - offset = std::min(offset + n, strLen); - } - } - } -} - -INSTANTIATE_TEST_CASE_P( - EmptySourceStr, ReadaheadSequentialFileTest, - ::testing::ValuesIn(ReadaheadSequentialFileTest::GetReadaheadSizeList())); -INSTANTIATE_TEST_CASE_P( - SourceStrLenLessThanReadaheadSize, ReadaheadSequentialFileTest, - ::testing::ValuesIn(ReadaheadSequentialFileTest::GetReadaheadSizeList())); -INSTANTIATE_TEST_CASE_P( - SourceStrLenGreaterThanReadaheadSize, ReadaheadSequentialFileTest, - ::testing::ValuesIn(ReadaheadSequentialFileTest::GetReadaheadSizeList())); -INSTANTIATE_TEST_CASE_P( - ReadExceedsReadaheadSize, ReadaheadSequentialFileTest, - ::testing::ValuesIn(ReadaheadSequentialFileTest::GetReadaheadSizeList())); - -namespace { -std::string GenerateLine(int n) { - std::string rv; - // Multiples of 17 characters per line, for likely bad buffer alignment - for (int i = 0; i < n; ++i) { - rv.push_back(static_cast('0' + (i % 10))); - rv.append("xxxxxxxxxxxxxxxx"); - } - return rv; -} -} // namespace - -TEST(LineFileReaderTest, LineFileReaderTest) { - const int nlines = 1000; - - std::unique_ptr mem_env(MockEnv::Create(Env::Default())); - std::shared_ptr fs = mem_env->GetFileSystem(); - // Create an input file - { - std::unique_ptr file; - ASSERT_OK( - fs->NewWritableFile("testfile", FileOptions(), &file, /*dbg*/ nullptr)); - - for (int i = 0; i < nlines; ++i) { - std::string line = GenerateLine(i); - line.push_back('\n'); - ASSERT_OK(file->Append(line, IOOptions(), /*dbg*/ nullptr)); - } - } - - // Verify with no I/O errors - { - std::unique_ptr reader; - ASSERT_OK(LineFileReader::Create(fs, "testfile", FileOptions(), &reader, - nullptr /* dbg */, - nullptr /* rate_limiter */)); - std::string line; - int count = 0; - while (reader->ReadLine(&line, Env::IO_TOTAL /* rate_limiter_priority */)) { - ASSERT_EQ(line, GenerateLine(count)); - ++count; - ASSERT_EQ(static_cast(reader->GetLineNumber()), count); - } - ASSERT_OK(reader->GetStatus()); - ASSERT_EQ(count, nlines); - ASSERT_EQ(static_cast(reader->GetLineNumber()), count); - // And still - ASSERT_FALSE( - reader->ReadLine(&line, Env::IO_TOTAL /* rate_limiter_priority */)); - ASSERT_OK(reader->GetStatus()); - ASSERT_EQ(static_cast(reader->GetLineNumber()), count); - } - - // Verify with injected I/O error - { - std::unique_ptr reader; - ASSERT_OK(LineFileReader::Create(fs, "testfile", FileOptions(), &reader, - nullptr /* dbg */, - nullptr /* rate_limiter */)); - std::string line; - int count = 0; - // Read part way through the file - while (count < nlines / 4) { - ASSERT_TRUE( - reader->ReadLine(&line, Env::IO_TOTAL /* rate_limiter_priority */)); - ASSERT_EQ(line, GenerateLine(count)); - ++count; - ASSERT_EQ(static_cast(reader->GetLineNumber()), count); - } - ASSERT_OK(reader->GetStatus()); - - // Inject error - int callback_count = 0; - SyncPoint::GetInstance()->SetCallBack( - "MemFile::Read:IOStatus", [&](void* arg) { - IOStatus* status = static_cast(arg); - *status = IOStatus::Corruption("test"); - ++callback_count; - }); - SyncPoint::GetInstance()->EnableProcessing(); - - while (reader->ReadLine(&line, Env::IO_TOTAL /* rate_limiter_priority */)) { - ASSERT_EQ(line, GenerateLine(count)); - ++count; - ASSERT_EQ(static_cast(reader->GetLineNumber()), count); - } - ASSERT_TRUE(reader->GetStatus().IsCorruption()); - ASSERT_LT(count, nlines / 2); - ASSERT_EQ(callback_count, 1); - - // Still get error & no retry - ASSERT_FALSE( - reader->ReadLine(&line, Env::IO_TOTAL /* rate_limiter_priority */)); - ASSERT_TRUE(reader->GetStatus().IsCorruption()); - ASSERT_EQ(callback_count, 1); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - } -} - -class IOErrorEventListener : public EventListener { - public: - IOErrorEventListener() { notify_error_.store(0); } - - void OnIOError(const IOErrorInfo& io_error_info) override { - notify_error_++; - EXPECT_FALSE(io_error_info.file_path.empty()); - EXPECT_FALSE(io_error_info.io_status.ok()); - } - - size_t NotifyErrorCount() { return notify_error_; } - - bool ShouldBeNotifiedOnFileIO() override { return true; } - - private: - std::atomic notify_error_; -}; - -TEST_F(DBWritableFileWriterTest, IOErrorNotification) { - class FakeWF : public FSWritableFile { - public: - explicit FakeWF() : io_error_(false) { - file_append_errors_.store(0); - file_flush_errors_.store(0); - } - - using FSWritableFile::Append; - IOStatus Append(const Slice& /*data*/, const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - if (io_error_) { - file_append_errors_++; - return IOStatus::IOError("Fake IO error"); - } - return IOStatus::OK(); - } - - using FSWritableFile::PositionedAppend; - IOStatus PositionedAppend(const Slice& /*data*/, uint64_t, - const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - if (io_error_) { - return IOStatus::IOError("Fake IO error"); - } - return IOStatus::OK(); - } - IOStatus Close(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return IOStatus::OK(); - } - IOStatus Flush(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - if (io_error_) { - file_flush_errors_++; - return IOStatus::IOError("Fake IO error"); - } - return IOStatus::OK(); - } - IOStatus Sync(const IOOptions& /*options*/, - IODebugContext* /*dbg*/) override { - return IOStatus::OK(); - } - - void SetIOError(bool val) { io_error_ = val; } - - void CheckCounters(int file_append_errors, int file_flush_errors) { - ASSERT_EQ(file_append_errors, file_append_errors_); - ASSERT_EQ(file_flush_errors_, file_flush_errors); - } - - protected: - bool io_error_; - std::atomic file_append_errors_; - std::atomic file_flush_errors_; - }; - - FileOptions file_options = FileOptions(); - Options options = GetDefaultOptions(); - options.create_if_missing = true; - IOErrorEventListener* listener = new IOErrorEventListener(); - options.listeners.emplace_back(listener); - - DestroyAndReopen(options); - ImmutableOptions ioptions(options); - - std::string fname = dbname_ + "/test_file"; - std::unique_ptr writable_file_ptr(new FakeWF); - - std::unique_ptr file_writer; - writable_file_ptr->SetIOError(true); - - file_writer.reset(new WritableFileWriter( - std::move(writable_file_ptr), fname, file_options, - SystemClock::Default().get(), nullptr, ioptions.stats, ioptions.listeners, - ioptions.file_checksum_gen_factory.get(), true, true)); - - FakeWF* fwf = static_cast(file_writer->writable_file()); - - fwf->SetIOError(true); - ASSERT_NOK(file_writer->Append(std::string(2 * kMb, 'a'))); - fwf->CheckCounters(1, 0); - ASSERT_EQ(listener->NotifyErrorCount(), 1); - - file_writer->reset_seen_error(); - fwf->SetIOError(true); - ASSERT_NOK(file_writer->Flush()); - fwf->CheckCounters(1, 1); - ASSERT_EQ(listener->NotifyErrorCount(), 2); - - /* No error generation */ - file_writer->reset_seen_error(); - fwf->SetIOError(false); - ASSERT_OK(file_writer->Append(std::string(2 * kMb, 'b'))); - ASSERT_EQ(listener->NotifyErrorCount(), 2); - fwf->CheckCounters(1, 1); -} - -class WritableFileWriterIOPriorityTest : public testing::Test { - protected: - // This test is to check whether the rate limiter priority can be passed - // correctly from WritableFileWriter functions to FSWritableFile functions. - - void SetUp() override { - // When op_rate_limiter_priority parameter in WritableFileWriter functions - // is the default (Env::IO_TOTAL). - std::unique_ptr wf{new FakeWF(Env::IO_HIGH)}; - FileOptions file_options; - writer_.reset(new WritableFileWriter(std::move(wf), "" /* don't care */, - file_options)); - } - - class FakeWF : public FSWritableFile { - public: - explicit FakeWF(Env::IOPriority io_priority) { SetIOPriority(io_priority); } - ~FakeWF() override {} - - IOStatus Append(const Slice& /*data*/, const IOOptions& options, - IODebugContext* /*dbg*/) override { - EXPECT_EQ(options.rate_limiter_priority, io_priority_); - return IOStatus::OK(); - } - IOStatus Append(const Slice& data, const IOOptions& options, - const DataVerificationInfo& /* verification_info */, - IODebugContext* dbg) override { - return Append(data, options, dbg); - } - IOStatus PositionedAppend(const Slice& /*data*/, uint64_t /*offset*/, - const IOOptions& options, - IODebugContext* /*dbg*/) override { - EXPECT_EQ(options.rate_limiter_priority, io_priority_); - return IOStatus::OK(); - } - IOStatus PositionedAppend( - const Slice& /* data */, uint64_t /* offset */, - const IOOptions& options, - const DataVerificationInfo& /* verification_info */, - IODebugContext* /*dbg*/) override { - EXPECT_EQ(options.rate_limiter_priority, io_priority_); - return IOStatus::OK(); - } - IOStatus Truncate(uint64_t /*size*/, const IOOptions& options, - IODebugContext* /*dbg*/) override { - EXPECT_EQ(options.rate_limiter_priority, io_priority_); - return IOStatus::OK(); - } - IOStatus Close(const IOOptions& options, IODebugContext* /*dbg*/) override { - EXPECT_EQ(options.rate_limiter_priority, io_priority_); - return IOStatus::OK(); - } - IOStatus Flush(const IOOptions& options, IODebugContext* /*dbg*/) override { - EXPECT_EQ(options.rate_limiter_priority, io_priority_); - return IOStatus::OK(); - } - IOStatus Sync(const IOOptions& options, IODebugContext* /*dbg*/) override { - EXPECT_EQ(options.rate_limiter_priority, io_priority_); - return IOStatus::OK(); - } - IOStatus Fsync(const IOOptions& options, IODebugContext* /*dbg*/) override { - EXPECT_EQ(options.rate_limiter_priority, io_priority_); - return IOStatus::OK(); - } - uint64_t GetFileSize(const IOOptions& options, - IODebugContext* /*dbg*/) override { - EXPECT_EQ(options.rate_limiter_priority, io_priority_); - return 0; - } - void GetPreallocationStatus(size_t* /*block_size*/, - size_t* /*last_allocated_block*/) override {} - size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const override { - return 0; - } - IOStatus InvalidateCache(size_t /*offset*/, size_t /*length*/) override { - return IOStatus::OK(); - } - - IOStatus Allocate(uint64_t /*offset*/, uint64_t /*len*/, - const IOOptions& options, - IODebugContext* /*dbg*/) override { - EXPECT_EQ(options.rate_limiter_priority, io_priority_); - return IOStatus::OK(); - } - IOStatus RangeSync(uint64_t /*offset*/, uint64_t /*nbytes*/, - const IOOptions& options, - IODebugContext* /*dbg*/) override { - EXPECT_EQ(options.rate_limiter_priority, io_priority_); - return IOStatus::OK(); - } - - void PrepareWrite(size_t /*offset*/, size_t /*len*/, - const IOOptions& options, - IODebugContext* /*dbg*/) override { - EXPECT_EQ(options.rate_limiter_priority, io_priority_); - } - - bool IsSyncThreadSafe() const override { return true; } - }; - - std::unique_ptr writer_; -}; - -TEST_F(WritableFileWriterIOPriorityTest, Append) { - ASSERT_OK(writer_->Append(Slice("abc"))); -} - -TEST_F(WritableFileWriterIOPriorityTest, Pad) { ASSERT_OK(writer_->Pad(500)); } - -TEST_F(WritableFileWriterIOPriorityTest, Flush) { ASSERT_OK(writer_->Flush()); } - -TEST_F(WritableFileWriterIOPriorityTest, Close) { ASSERT_OK(writer_->Close()); } - -TEST_F(WritableFileWriterIOPriorityTest, Sync) { - ASSERT_OK(writer_->Sync(false)); - ASSERT_OK(writer_->Sync(true)); -} - -TEST_F(WritableFileWriterIOPriorityTest, SyncWithoutFlush) { - ASSERT_OK(writer_->SyncWithoutFlush(false)); - ASSERT_OK(writer_->SyncWithoutFlush(true)); -} - -TEST_F(WritableFileWriterIOPriorityTest, BasicOp) { - EnvOptions env_options; - env_options.bytes_per_sync = kMb; - std::unique_ptr wf(new FakeWF(Env::IO_HIGH)); - std::unique_ptr writer( - new WritableFileWriter(std::move(wf), "" /* don't care */, env_options)); - Random r(301); - Status s; - std::unique_ptr large_buf(new char[10 * kMb]); - for (int i = 0; i < 1000; i++) { - int skew_limit = (i < 700) ? 10 : 15; - uint32_t num = r.Skewed(skew_limit) * 100 + r.Uniform(100); - s = writer->Append(Slice(large_buf.get(), num)); - ASSERT_OK(s); - - // Flush in a chance of 1/10. - if (r.Uniform(10) == 0) { - s = writer->Flush(); - ASSERT_OK(s); - } - } - s = writer->Close(); - ASSERT_OK(s); -} -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/util/filelock_test.cc b/util/filelock_test.cc deleted file mode 100644 index 69947a732..000000000 --- a/util/filelock_test.cc +++ /dev/null @@ -1,148 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -#include - -#include "rocksdb/env.h" -#include "rocksdb/status.h" -#ifdef __FreeBSD__ -#include -#include -#endif -#include - -#include "test_util/testharness.h" -#include "util/coding.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -class LockTest : public testing::Test { - public: - static LockTest* current_; - std::string file_; - ROCKSDB_NAMESPACE::Env* env_; - - LockTest() - : file_(test::PerThreadDBPath("db_testlock_file")), - env_(ROCKSDB_NAMESPACE::Env::Default()) { - current_ = this; - } - - ~LockTest() override {} - - Status LockFile(FileLock** db_lock) { return env_->LockFile(file_, db_lock); } - - Status UnlockFile(FileLock* db_lock) { return env_->UnlockFile(db_lock); } - - bool AssertFileIsLocked() { - return CheckFileLock(/* lock_expected = */ true); - } - - bool AssertFileIsNotLocked() { - return CheckFileLock(/* lock_expected = */ false); - } - - bool CheckFileLock(bool lock_expected) { - // We need to fork to check the fcntl lock as we need - // to open and close the file from a different process - // to avoid either releasing the lock on close, or not - // contending for it when requesting a lock. - -#ifdef OS_WIN - - // WaitForSingleObject and GetExitCodeProcess can do what waitpid does. - // TODO - implement on Windows - return true; - -#else - - pid_t pid = fork(); - if (0 == pid) { - // child process - int exit_val = EXIT_FAILURE; - int fd = open(file_.c_str(), O_RDWR | O_CREAT, 0644); - if (fd < 0) { - // could not open file, could not check if it was locked - fprintf(stderr, "Open on on file %s failed.\n", file_.c_str()); - exit(exit_val); - } - - struct flock f; - memset(&f, 0, sizeof(f)); - f.l_type = (F_WRLCK); - f.l_whence = SEEK_SET; - f.l_start = 0; - f.l_len = 0; // Lock/unlock entire file - int value = fcntl(fd, F_SETLK, &f); - if (value == -1) { - if (lock_expected) { - exit_val = EXIT_SUCCESS; - } - } else { - if (!lock_expected) { - exit_val = EXIT_SUCCESS; - } - } - close(fd); // lock is released for child process - exit(exit_val); - } else if (pid > 0) { - // parent process - int status; - while (-1 == waitpid(pid, &status, 0)) - ; - if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { - // child process exited with non success status - return false; - } else { - return true; - } - } else { - fprintf(stderr, "Fork failed\n"); - return false; - } - return false; - -#endif - } -}; -LockTest* LockTest::current_; - -TEST_F(LockTest, LockBySameThread) { - FileLock* lock1; - FileLock* lock2; - - // acquire a lock on a file - ASSERT_OK(LockFile(&lock1)); - - // check the file is locked - ASSERT_TRUE(AssertFileIsLocked()); - - // re-acquire the lock on the same file. This should fail. - Status s = LockFile(&lock2); - ASSERT_TRUE(s.IsIOError()); -#ifndef OS_WIN - // Validate that error message contains current thread ID. - ASSERT_TRUE(s.ToString().find(std::to_string( - Env::Default()->GetThreadID())) != std::string::npos); -#endif - - // check the file is locked - ASSERT_TRUE(AssertFileIsLocked()); - - // release the lock - ASSERT_OK(UnlockFile(lock1)); - - // check the file is not locked - ASSERT_TRUE(AssertFileIsNotLocked()); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/util/hash_test.cc b/util/hash_test.cc deleted file mode 100644 index 72112b044..000000000 --- a/util/hash_test.cc +++ /dev/null @@ -1,853 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "util/hash.h" - -#include -#include -#include - -#include "test_util/testharness.h" -#include "util/coding.h" -#include "util/coding_lean.h" -#include "util/hash128.h" -#include "util/math.h" -#include "util/math128.h" - -using ROCKSDB_NAMESPACE::BijectiveHash2x64; -using ROCKSDB_NAMESPACE::BijectiveUnhash2x64; -using ROCKSDB_NAMESPACE::DecodeFixed64; -using ROCKSDB_NAMESPACE::EncodeFixed32; -using ROCKSDB_NAMESPACE::EndianSwapValue; -using ROCKSDB_NAMESPACE::GetSliceHash64; -using ROCKSDB_NAMESPACE::Hash; -using ROCKSDB_NAMESPACE::Hash128; -using ROCKSDB_NAMESPACE::Hash2x64; -using ROCKSDB_NAMESPACE::Hash64; -using ROCKSDB_NAMESPACE::Lower32of64; -using ROCKSDB_NAMESPACE::Lower64of128; -using ROCKSDB_NAMESPACE::ReverseBits; -using ROCKSDB_NAMESPACE::Slice; -using ROCKSDB_NAMESPACE::Unsigned128; -using ROCKSDB_NAMESPACE::Upper32of64; -using ROCKSDB_NAMESPACE::Upper64of128; - -// The hash algorithm is part of the file format, for example for the Bloom -// filters. Test that the hash values are stable for a set of random strings of -// varying lengths. -TEST(HashTest, Values) { - constexpr uint32_t kSeed = 0xbc9f1d34; // Same as BloomHash. - - EXPECT_EQ(Hash("", 0, kSeed), 3164544308u); - EXPECT_EQ(Hash("\x08", 1, kSeed), 422599524u); - EXPECT_EQ(Hash("\x17", 1, kSeed), 3168152998u); - EXPECT_EQ(Hash("\x9a", 1, kSeed), 3195034349u); - EXPECT_EQ(Hash("\x1c", 1, kSeed), 2651681383u); - EXPECT_EQ(Hash("\x4d\x76", 2, kSeed), 2447836956u); - EXPECT_EQ(Hash("\x52\xd5", 2, kSeed), 3854228105u); - EXPECT_EQ(Hash("\x91\xf7", 2, kSeed), 31066776u); - EXPECT_EQ(Hash("\xd6\x27", 2, kSeed), 1806091603u); - EXPECT_EQ(Hash("\x30\x46\x0b", 3, kSeed), 3808221797u); - EXPECT_EQ(Hash("\x56\xdc\xd6", 3, kSeed), 2157698265u); - EXPECT_EQ(Hash("\xd4\x52\x33", 3, kSeed), 1721992661u); - EXPECT_EQ(Hash("\x6a\xb5\xf4", 3, kSeed), 2469105222u); - EXPECT_EQ(Hash("\x67\x53\x81\x1c", 4, kSeed), 118283265u); - EXPECT_EQ(Hash("\x69\xb8\xc0\x88", 4, kSeed), 3416318611u); - EXPECT_EQ(Hash("\x1e\x84\xaf\x2d", 4, kSeed), 3315003572u); - EXPECT_EQ(Hash("\x46\xdc\x54\xbe", 4, kSeed), 447346355u); - EXPECT_EQ(Hash("\xd0\x7a\x6e\xea\x56", 5, kSeed), 4255445370u); - EXPECT_EQ(Hash("\x86\x83\xd5\xa4\xd8", 5, kSeed), 2390603402u); - EXPECT_EQ(Hash("\xb7\x46\xbb\x77\xce", 5, kSeed), 2048907743u); - EXPECT_EQ(Hash("\x6c\xa8\xbc\xe5\x99", 5, kSeed), 2177978500u); - EXPECT_EQ(Hash("\x5c\x5e\xe1\xa0\x73\x81", 6, kSeed), 1036846008u); - EXPECT_EQ(Hash("\x08\x5d\x73\x1c\xe5\x2e", 6, kSeed), 229980482u); - EXPECT_EQ(Hash("\x42\xfb\xf2\x52\xb4\x10", 6, kSeed), 3655585422u); - EXPECT_EQ(Hash("\x73\xe1\xff\x56\x9c\xce", 6, kSeed), 3502708029u); - EXPECT_EQ(Hash("\x5c\xbe\x97\x75\x54\x9a\x52", 7, kSeed), 815120748u); - EXPECT_EQ(Hash("\x16\x82\x39\x49\x88\x2b\x36", 7, kSeed), 3056033698u); - EXPECT_EQ(Hash("\x59\x77\xf0\xa7\x24\xf4\x78", 7, kSeed), 587205227u); - EXPECT_EQ(Hash("\xd3\xa5\x7c\x0e\xc0\x02\x07", 7, kSeed), 2030937252u); - EXPECT_EQ(Hash("\x31\x1b\x98\x75\x96\x22\xd3\x9a", 8, kSeed), 469635402u); - EXPECT_EQ(Hash("\x38\xd6\xf7\x28\x20\xb4\x8a\xe9", 8, kSeed), 3530274698u); - EXPECT_EQ(Hash("\xbb\x18\x5d\xf4\x12\x03\xf7\x99", 8, kSeed), 1974545809u); - EXPECT_EQ(Hash("\x80\xd4\x3b\x3b\xae\x22\xa2\x78", 8, kSeed), 3563570120u); - EXPECT_EQ(Hash("\x1a\xb5\xd0\xfe\xab\xc3\x61\xb2\x99", 9, kSeed), - 2706087434u); - EXPECT_EQ(Hash("\x8e\x4a\xc3\x18\x20\x2f\x06\xe6\x3c", 9, kSeed), - 1534654151u); - EXPECT_EQ(Hash("\xb6\xc0\xdd\x05\x3f\xc4\x86\x4c\xef", 9, kSeed), - 2355554696u); - EXPECT_EQ(Hash("\x9a\x5f\x78\x0d\xaf\x50\xe1\x1f\x55", 9, kSeed), - 1400800912u); - EXPECT_EQ(Hash("\x22\x6f\x39\x1f\xf8\xdd\x4f\x52\x17\x94", 10, kSeed), - 3420325137u); - EXPECT_EQ(Hash("\x32\x89\x2a\x75\x48\x3a\x4a\x02\x69\xdd", 10, kSeed), - 3427803584u); - EXPECT_EQ(Hash("\x06\x92\x5c\xf4\x88\x0e\x7e\x68\x38\x3e", 10, kSeed), - 1152407945u); - EXPECT_EQ(Hash("\xbd\x2c\x63\x38\xbf\xe9\x78\xb7\xbf\x15", 10, kSeed), - 3382479516u); -} - -// The hash algorithm is part of the file format, for example for the Bloom -// filters. -TEST(HashTest, Hash64Misc) { - constexpr uint32_t kSeed = 0; // Same as GetSliceHash64 - - for (char fill : {'\0', 'a', '1', '\xff'}) { - const size_t max_size = 1000; - const std::string str(max_size, fill); - - for (size_t size = 0; size <= max_size; ++size) { - uint64_t here = Hash64(str.data(), size, kSeed); - - // Must be same as unseeded Hash64 and GetSliceHash64 - EXPECT_EQ(here, Hash64(str.data(), size)); - EXPECT_EQ(here, GetSliceHash64(Slice(str.data(), size))); - - // Upper and Lower must reconstruct hash - EXPECT_EQ(here, (uint64_t{Upper32of64(here)} << 32) | Lower32of64(here)); - EXPECT_EQ(here, (uint64_t{Upper32of64(here)} << 32) + Lower32of64(here)); - EXPECT_EQ(here, (uint64_t{Upper32of64(here)} << 32) ^ Lower32of64(here)); - - // Seed changes hash value (with high probability) - for (uint64_t var_seed = 1; var_seed != 0; var_seed <<= 1) { - EXPECT_NE(here, Hash64(str.data(), size, var_seed)); - } - - // Size changes hash value (with high probability) - size_t max_smaller_by = std::min(size_t{30}, size); - for (size_t smaller_by = 1; smaller_by <= max_smaller_by; ++smaller_by) { - EXPECT_NE(here, Hash64(str.data(), size - smaller_by, kSeed)); - } - } - } -} - -// Test that hash values are "non-trivial" for "trivial" inputs -TEST(HashTest, Hash64Trivial) { - // Thorough test too slow for regression testing - constexpr bool thorough = false; - - // For various seeds, make sure hash of empty string is not zero. - constexpr uint64_t max_seed = thorough ? 0x1000000 : 0x10000; - for (uint64_t seed = 0; seed < max_seed; ++seed) { - uint64_t here = Hash64("", 0, seed); - EXPECT_NE(Lower32of64(here), 0u); - EXPECT_NE(Upper32of64(here), 0u); - } - - // For standard seed, make sure hash of small strings are not zero - constexpr uint32_t kSeed = 0; // Same as GetSliceHash64 - char input[4]; - constexpr int max_len = thorough ? 3 : 2; - for (int len = 1; len <= max_len; ++len) { - for (uint32_t i = 0; (i >> (len * 8)) == 0; ++i) { - EncodeFixed32(input, i); - uint64_t here = Hash64(input, len, kSeed); - EXPECT_NE(Lower32of64(here), 0u); - EXPECT_NE(Upper32of64(here), 0u); - } - } -} - -// Test that the hash values are stable for a set of random strings of -// varying small lengths. -TEST(HashTest, Hash64SmallValueSchema) { - constexpr uint32_t kSeed = 0; // Same as GetSliceHash64 - - EXPECT_EQ(Hash64("", 0, kSeed), uint64_t{5999572062939766020u}); - EXPECT_EQ(Hash64("\x08", 1, kSeed), uint64_t{583283813901344696u}); - EXPECT_EQ(Hash64("\x17", 1, kSeed), uint64_t{16175549975585474943u}); - EXPECT_EQ(Hash64("\x9a", 1, kSeed), uint64_t{16322991629225003903u}); - EXPECT_EQ(Hash64("\x1c", 1, kSeed), uint64_t{13269285487706833447u}); - EXPECT_EQ(Hash64("\x4d\x76", 2, kSeed), uint64_t{6859542833406258115u}); - EXPECT_EQ(Hash64("\x52\xd5", 2, kSeed), uint64_t{4919611532550636959u}); - EXPECT_EQ(Hash64("\x91\xf7", 2, kSeed), uint64_t{14199427467559720719u}); - EXPECT_EQ(Hash64("\xd6\x27", 2, kSeed), uint64_t{12292689282614532691u}); - EXPECT_EQ(Hash64("\x30\x46\x0b", 3, kSeed), uint64_t{11404699285340020889u}); - EXPECT_EQ(Hash64("\x56\xdc\xd6", 3, kSeed), uint64_t{12404347133785524237u}); - EXPECT_EQ(Hash64("\xd4\x52\x33", 3, kSeed), uint64_t{15853805298481534034u}); - EXPECT_EQ(Hash64("\x6a\xb5\xf4", 3, kSeed), uint64_t{16863488758399383382u}); - EXPECT_EQ(Hash64("\x67\x53\x81\x1c", 4, kSeed), - uint64_t{9010661983527562386u}); - EXPECT_EQ(Hash64("\x69\xb8\xc0\x88", 4, kSeed), - uint64_t{6611781377647041447u}); - EXPECT_EQ(Hash64("\x1e\x84\xaf\x2d", 4, kSeed), - uint64_t{15290969111616346501u}); - EXPECT_EQ(Hash64("\x46\xdc\x54\xbe", 4, kSeed), - uint64_t{7063754590279313623u}); - EXPECT_EQ(Hash64("\xd0\x7a\x6e\xea\x56", 5, kSeed), - uint64_t{6384167718754869899u}); - EXPECT_EQ(Hash64("\x86\x83\xd5\xa4\xd8", 5, kSeed), - uint64_t{16874407254108011067u}); - EXPECT_EQ(Hash64("\xb7\x46\xbb\x77\xce", 5, kSeed), - uint64_t{16809880630149135206u}); - EXPECT_EQ(Hash64("\x6c\xa8\xbc\xe5\x99", 5, kSeed), - uint64_t{1249038833153141148u}); - EXPECT_EQ(Hash64("\x5c\x5e\xe1\xa0\x73\x81", 6, kSeed), - uint64_t{17358142495308219330u}); - EXPECT_EQ(Hash64("\x08\x5d\x73\x1c\xe5\x2e", 6, kSeed), - uint64_t{4237646583134806322u}); - EXPECT_EQ(Hash64("\x42\xfb\xf2\x52\xb4\x10", 6, kSeed), - uint64_t{4373664924115234051u}); - EXPECT_EQ(Hash64("\x73\xe1\xff\x56\x9c\xce", 6, kSeed), - uint64_t{12012981210634596029u}); - EXPECT_EQ(Hash64("\x5c\xbe\x97\x75\x54\x9a\x52", 7, kSeed), - uint64_t{5716522398211028826u}); - EXPECT_EQ(Hash64("\x16\x82\x39\x49\x88\x2b\x36", 7, kSeed), - uint64_t{15604531309862565013u}); - EXPECT_EQ(Hash64("\x59\x77\xf0\xa7\x24\xf4\x78", 7, kSeed), - uint64_t{8601330687345614172u}); - EXPECT_EQ(Hash64("\xd3\xa5\x7c\x0e\xc0\x02\x07", 7, kSeed), - uint64_t{8088079329364056942u}); - EXPECT_EQ(Hash64("\x31\x1b\x98\x75\x96\x22\xd3\x9a", 8, kSeed), - uint64_t{9844314944338447628u}); - EXPECT_EQ(Hash64("\x38\xd6\xf7\x28\x20\xb4\x8a\xe9", 8, kSeed), - uint64_t{10973293517982163143u}); - EXPECT_EQ(Hash64("\xbb\x18\x5d\xf4\x12\x03\xf7\x99", 8, kSeed), - uint64_t{9986007080564743219u}); - EXPECT_EQ(Hash64("\x80\xd4\x3b\x3b\xae\x22\xa2\x78", 8, kSeed), - uint64_t{1729303145008254458u}); - EXPECT_EQ(Hash64("\x1a\xb5\xd0\xfe\xab\xc3\x61\xb2\x99", 9, kSeed), - uint64_t{13253403748084181481u}); - EXPECT_EQ(Hash64("\x8e\x4a\xc3\x18\x20\x2f\x06\xe6\x3c", 9, kSeed), - uint64_t{7768754303876232188u}); - EXPECT_EQ(Hash64("\xb6\xc0\xdd\x05\x3f\xc4\x86\x4c\xef", 9, kSeed), - uint64_t{12439346786701492u}); - EXPECT_EQ(Hash64("\x9a\x5f\x78\x0d\xaf\x50\xe1\x1f\x55", 9, kSeed), - uint64_t{10841838338450144690u}); - EXPECT_EQ(Hash64("\x22\x6f\x39\x1f\xf8\xdd\x4f\x52\x17\x94", 10, kSeed), - uint64_t{12883919702069153152u}); - EXPECT_EQ(Hash64("\x32\x89\x2a\x75\x48\x3a\x4a\x02\x69\xdd", 10, kSeed), - uint64_t{12692903507676842188u}); - EXPECT_EQ(Hash64("\x06\x92\x5c\xf4\x88\x0e\x7e\x68\x38\x3e", 10, kSeed), - uint64_t{6540985900674032620u}); - EXPECT_EQ(Hash64("\xbd\x2c\x63\x38\xbf\xe9\x78\xb7\xbf\x15", 10, kSeed), - uint64_t{10551812464348219044u}); -} - -std::string Hash64TestDescriptor(const char *repeat, size_t limit) { - const char *mod61_encode = - "abcdefghijklmnopqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - - std::string input; - while (input.size() < limit) { - input.append(repeat); - } - std::string rv; - for (size_t i = 0; i < limit; ++i) { - uint64_t h = GetSliceHash64(Slice(input.data(), i)); - rv.append(1, mod61_encode[static_cast(h % 61)]); - } - return rv; -} - -// XXPH3 changes its algorithm for various sizes up through 250 bytes, so -// we need to check the stability of larger sizes also. -TEST(HashTest, Hash64LargeValueSchema) { - // Each of these derives a "descriptor" from the hash values for all - // lengths up to 430. - // Note that "c" is common for the zero-length string. - EXPECT_EQ( - Hash64TestDescriptor("foo", 430), - "cRhyWsY67B6klRA1udmOuiYuX7IthyGBKqbeosz2hzVglWCmQx8nEdnpkvPfYX56Up2OWOTV" - "lTzfAoYwvtqKzjD8E9xttR2unelbXbIV67NUe6bOO23BxaSFRcA3njGu5cUWfgwOqNoTsszp" - "uPvKRP6qaUR5VdoBkJUCFIefd7edlNK5mv6JYWaGdwxehg65hTkTmjZoPKxTZo4PLyzbL9U4" - "xt12ITSfeP2MfBHuLI2z2pDlBb44UQKVMx27LEoAHsdLp3WfWfgH3sdRBRCHm33UxCM4QmE2" - "xJ7gqSvNwTeH7v9GlC8zWbGroyD3UVNeShMLx29O7tH1biemLULwAHyIw8zdtLMDpEJ8m2ic" - "l6Lb4fDuuFNAs1GCVUthjK8CV8SWI8Rsz5THSwn5CGhpqUwSZcFknjwWIl5rNCvDxXJqYr"); - // Note that "1EeRk" is common for "Rocks" - EXPECT_EQ( - Hash64TestDescriptor("Rocks", 430), - "c1EeRkrzgOYWLA8PuhJrwTePJewoB44WdXYDfhbk3ZxTqqg25WlPExDl7IKIQLJvnA6gJxxn" - "9TCSLkFGfJeXehaSS1GBqWSzfhEH4VXiXIUCuxJXxtKXcSC6FrNIQGTZbYDiUOLD6Y5inzrF" - "9etwQhXUBanw55xAUdNMFQAm2GjJ6UDWp2mISLiMMkLjANWMKLaZMqaFLX37qB4MRO1ooVRv" - "zSvaNRSCLxlggQCasQq8icWjzf3HjBlZtU6pd4rkaUxSzHqmo9oM5MghbU5Rtxg8wEfO7lVN" - "5wdMONYecslQTwjZUpO1K3LDf3K3XK6sUXM6ShQQ3RHmMn2acB4YtTZ3QQcHYJSOHn2DuWpa" - "Q8RqzX5lab92YmOLaCdOHq1BPsM7SIBzMdLgePNsJ1vvMALxAaoDUHPxoFLO2wx18IXnyX"); - EXPECT_EQ( - Hash64TestDescriptor("RocksDB", 430), - "c1EeRkukbkb28wLTahwD2sfUhZzaBEnF8SVrxnPVB6A7b8CaAl3UKsDZISF92GSq2wDCukOq" - "Jgrsp7A3KZhDiLW8dFXp8UPqPxMCRlMdZeVeJ2dJxrmA6cyt99zkQFj7ELbut6jAeVqARFnw" - "fnWVXOsaLrq7bDCbMcns2DKvTaaqTCLMYxI7nhtLpFN1jR755FRQFcOzrrDbh7QhypjdvlYw" - "cdAMSZgp9JMHxbM23wPSuH6BOFgxejz35PScZfhDPvTOxIy1jc3MZsWrMC3P324zNolO7JdW" - "CX2I5UDKjjaEJfxbgVgJIXxtQGlmj2xkO5sPpjULQV4X2HlY7FQleJ4QRaJIB4buhCA4vUTF" - "eMFlxCIYUpTCsal2qsmnGOWa8WCcefrohMjDj1fjzSvSaQwlpyR1GZHF2uPOoQagiCpHpm"); -} - -TEST(HashTest, Hash128Misc) { - constexpr uint32_t kSeed = 0; // Same as GetSliceHash128 - - for (char fill : {'\0', 'a', '1', '\xff', 'e'}) { - const size_t max_size = 1000; - std::string str(max_size, fill); - - if (fill == 'e') { - // Use different characters to check endianness handling - for (size_t i = 0; i < str.size(); ++i) { - str[i] += static_cast(i); - } - } - - for (size_t size = 0; size <= max_size; ++size) { - Unsigned128 here = Hash128(str.data(), size, kSeed); - - // Must be same as unseeded Hash128 and GetSliceHash128 - EXPECT_EQ(here, Hash128(str.data(), size)); - EXPECT_EQ(here, GetSliceHash128(Slice(str.data(), size))); - { - uint64_t hi, lo; - Hash2x64(str.data(), size, &hi, &lo); - EXPECT_EQ(Lower64of128(here), lo); - EXPECT_EQ(Upper64of128(here), hi); - } - if (size == 16) { - const uint64_t in_hi = DecodeFixed64(str.data() + 8); - const uint64_t in_lo = DecodeFixed64(str.data()); - uint64_t hi, lo; - BijectiveHash2x64(in_hi, in_lo, &hi, &lo); - EXPECT_EQ(Lower64of128(here), lo); - EXPECT_EQ(Upper64of128(here), hi); - uint64_t un_hi, un_lo; - BijectiveUnhash2x64(hi, lo, &un_hi, &un_lo); - EXPECT_EQ(in_lo, un_lo); - EXPECT_EQ(in_hi, un_hi); - } - - // Upper and Lower must reconstruct hash - EXPECT_EQ(here, - (Unsigned128{Upper64of128(here)} << 64) | Lower64of128(here)); - EXPECT_EQ(here, - (Unsigned128{Upper64of128(here)} << 64) ^ Lower64of128(here)); - - // Seed changes hash value (with high probability) - for (uint64_t var_seed = 1; var_seed != 0; var_seed <<= 1) { - Unsigned128 seeded = Hash128(str.data(), size, var_seed); - EXPECT_NE(here, seeded); - // Must match seeded Hash2x64 - { - uint64_t hi, lo; - Hash2x64(str.data(), size, var_seed, &hi, &lo); - EXPECT_EQ(Lower64of128(seeded), lo); - EXPECT_EQ(Upper64of128(seeded), hi); - } - if (size == 16) { - const uint64_t in_hi = DecodeFixed64(str.data() + 8); - const uint64_t in_lo = DecodeFixed64(str.data()); - uint64_t hi, lo; - BijectiveHash2x64(in_hi, in_lo, var_seed, &hi, &lo); - EXPECT_EQ(Lower64of128(seeded), lo); - EXPECT_EQ(Upper64of128(seeded), hi); - uint64_t un_hi, un_lo; - BijectiveUnhash2x64(hi, lo, var_seed, &un_hi, &un_lo); - EXPECT_EQ(in_lo, un_lo); - EXPECT_EQ(in_hi, un_hi); - } - } - - // Size changes hash value (with high probability) - size_t max_smaller_by = std::min(size_t{30}, size); - for (size_t smaller_by = 1; smaller_by <= max_smaller_by; ++smaller_by) { - EXPECT_NE(here, Hash128(str.data(), size - smaller_by, kSeed)); - } - } - } -} - -// Test that hash values are "non-trivial" for "trivial" inputs -TEST(HashTest, Hash128Trivial) { - // Thorough test too slow for regression testing - constexpr bool thorough = false; - - // For various seeds, make sure hash of empty string is not zero. - constexpr uint64_t max_seed = thorough ? 0x1000000 : 0x10000; - for (uint64_t seed = 0; seed < max_seed; ++seed) { - Unsigned128 here = Hash128("", 0, seed); - EXPECT_NE(Lower64of128(here), 0u); - EXPECT_NE(Upper64of128(here), 0u); - } - - // For standard seed, make sure hash of small strings are not zero - constexpr uint32_t kSeed = 0; // Same as GetSliceHash128 - char input[4]; - constexpr int max_len = thorough ? 3 : 2; - for (int len = 1; len <= max_len; ++len) { - for (uint32_t i = 0; (i >> (len * 8)) == 0; ++i) { - EncodeFixed32(input, i); - Unsigned128 here = Hash128(input, len, kSeed); - EXPECT_NE(Lower64of128(here), 0u); - EXPECT_NE(Upper64of128(here), 0u); - } - } -} - -std::string Hash128TestDescriptor(const char *repeat, size_t limit) { - const char *mod61_encode = - "abcdefghijklmnopqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - - std::string input; - while (input.size() < limit) { - input.append(repeat); - } - std::string rv; - for (size_t i = 0; i < limit; ++i) { - auto h = GetSliceHash128(Slice(input.data(), i)); - uint64_t h2 = Upper64of128(h) + Lower64of128(h); - rv.append(1, mod61_encode[static_cast(h2 % 61)]); - } - return rv; -} - -// XXH3 changes its algorithm for various sizes up through 250 bytes, so -// we need to check the stability of larger sizes also. -TEST(HashTest, Hash128ValueSchema) { - // Each of these derives a "descriptor" from the hash values for all - // lengths up to 430. - // Note that "b" is common for the zero-length string. - EXPECT_EQ( - Hash128TestDescriptor("foo", 430), - "bUMA3As8n9I4vNGhThXlEevxZlyMcbb6TYAlIKJ2f5ponsv99q962rYclQ7u3gfnRdCDQ5JI" - "2LrGUaCycbXrvLFe4SjgRb9RQwCfrnmNQ7VSEwSKMnkGCK3bDbXSrnIh5qLXdtvIZklbJpGH" - "Dqr93BlqF9ubTnOSYkSdx89XvQqflMIW8bjfQp9BPjQejWOeEQspnN1D3sfgVdFhpaQdHYA5" - "pI2XcPlCMFPxvrFuRr7joaDvjNe9IUZaunLPMewuXmC3EL95h52Ju3D7y9RNKhgYxMTrA84B" - "yJrMvyjdm3vlBxet4EN7v2GEyjbGuaZW9UL6lrX6PghJDg7ACfLGdxNbH3qXM4zaiG2RKnL5" - "S3WXKR78RBB5fRFQ8KDIEQjHFvSNsc3GrAEi6W8P2lv8JMTzjBODO2uN4wadVQFT9wpGfV"); - // Note that "35D2v" is common for "Rocks" - EXPECT_EQ( - Hash128TestDescriptor("Rocks", 430), - "b35D2vzvklFVDqJmyLRXyApwGGO3EAT3swhe8XJAN3mY2UVPglzdmydxcba6JI2tSvwO6zSu" - "ANpjSM7tc9G5iMhsa7R8GfyCXRO1TnLg7HvdWNdgGGBirxZR68BgT7TQsYJt6zyEyISeXI1n" - "MXA48Xo7dWfJeYN6Z4KWlqZY7TgFXGbks9AX4ehZNSGtIhdO5i58qlgVX1bEejeOVaCcjC79" - "67DrMfOKds7rUQzjBa77sMPcoPW1vu6ljGJPZH3XkRyDMZ1twxXKkNxN3tE8nR7JHwyqBAxE" - "fTcjbOWrLZ1irWxRSombD8sGDEmclgF11IxqEhe3Rt7gyofO3nExGckKkS9KfRqsCHbiUyva" - "JGkJwUHRXaZnh58b4i1Ei9aQKZjXlvIVDixoZrjcNaH5XJIJlRZce9Z9t82wYapTpckYSg"); - EXPECT_EQ( - Hash128TestDescriptor("RocksDB", 430), - "b35D2vFUst3XDZCRlSrhmYYakmqImV97LbBsV6EZlOEQpUPH1d1sD3xMKAPlA5UErHehg5O7" - "n966fZqhAf3hRc24kGCLfNAWjyUa7vSNOx3IcPoTyVRFZeFlcCtfl7t1QJumHOCpS33EBmBF" - "hvK13QjBbDWYWeHQhJhgV9Mqbx17TIcvUkEnYZxb8IzWNmjVsJG44Z7v52DjGj1ZzS62S2Vv" - "qWcDO7apvH5VHg68E9Wl6nXP21vlmUqEH9GeWRehfWVvY7mUpsAg5drHHQyDSdiMceiUuUxJ" - "XJqHFcDdzbbPk7xDvbLgWCKvH8k3MpQNWOmbSSRDdAP6nGlDjoTToYkcqVREHJzztSWAAq5h" - "GHSUNJ6OxsMHhf8EhXfHtKyUzRmPtjYyeckQcGmrQfFFLidc6cjMDKCdBG6c6HVBrS7H2R"); -} - -TEST(FastRange32Test, Values) { - using ROCKSDB_NAMESPACE::FastRange32; - // Zero range - EXPECT_EQ(FastRange32(0, 0), 0U); - EXPECT_EQ(FastRange32(123, 0), 0U); - EXPECT_EQ(FastRange32(0xffffffff, 0), 0U); - - // One range - EXPECT_EQ(FastRange32(0, 1), 0U); - EXPECT_EQ(FastRange32(123, 1), 0U); - EXPECT_EQ(FastRange32(0xffffffff, 1), 0U); - - // Two range - EXPECT_EQ(FastRange32(0, 2), 0U); - EXPECT_EQ(FastRange32(123, 2), 0U); - EXPECT_EQ(FastRange32(0x7fffffff, 2), 0U); - EXPECT_EQ(FastRange32(0x80000000, 2), 1U); - EXPECT_EQ(FastRange32(0xffffffff, 2), 1U); - - // Seven range - EXPECT_EQ(FastRange32(0, 7), 0U); - EXPECT_EQ(FastRange32(123, 7), 0U); - EXPECT_EQ(FastRange32(613566756, 7), 0U); - EXPECT_EQ(FastRange32(613566757, 7), 1U); - EXPECT_EQ(FastRange32(1227133513, 7), 1U); - EXPECT_EQ(FastRange32(1227133514, 7), 2U); - // etc. - EXPECT_EQ(FastRange32(0xffffffff, 7), 6U); - - // Big - EXPECT_EQ(FastRange32(1, 0x80000000), 0U); - EXPECT_EQ(FastRange32(2, 0x80000000), 1U); - EXPECT_EQ(FastRange32(4, 0x7fffffff), 1U); - EXPECT_EQ(FastRange32(4, 0x80000000), 2U); - EXPECT_EQ(FastRange32(0xffffffff, 0x7fffffff), 0x7ffffffeU); - EXPECT_EQ(FastRange32(0xffffffff, 0x80000000), 0x7fffffffU); -} - -TEST(FastRange64Test, Values) { - using ROCKSDB_NAMESPACE::FastRange64; - // Zero range - EXPECT_EQ(FastRange64(0, 0), 0U); - EXPECT_EQ(FastRange64(123, 0), 0U); - EXPECT_EQ(FastRange64(0xffffFFFF, 0), 0U); - EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0), 0U); - - // One range - EXPECT_EQ(FastRange64(0, 1), 0U); - EXPECT_EQ(FastRange64(123, 1), 0U); - EXPECT_EQ(FastRange64(0xffffFFFF, 1), 0U); - EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 1), 0U); - - // Two range - EXPECT_EQ(FastRange64(0, 2), 0U); - EXPECT_EQ(FastRange64(123, 2), 0U); - EXPECT_EQ(FastRange64(0xffffFFFF, 2), 0U); - EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 2), 0U); - EXPECT_EQ(FastRange64(0x8000000000000000, 2), 1U); - EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 2), 1U); - - // Seven range - EXPECT_EQ(FastRange64(0, 7), 0U); - EXPECT_EQ(FastRange64(123, 7), 0U); - EXPECT_EQ(FastRange64(0xffffFFFF, 7), 0U); - EXPECT_EQ(FastRange64(2635249153387078802, 7), 0U); - EXPECT_EQ(FastRange64(2635249153387078803, 7), 1U); - EXPECT_EQ(FastRange64(5270498306774157604, 7), 1U); - EXPECT_EQ(FastRange64(5270498306774157605, 7), 2U); - EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 7), 3U); - EXPECT_EQ(FastRange64(0x8000000000000000, 7), 3U); - EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 7), 6U); - - // Big but 32-bit range - EXPECT_EQ(FastRange64(0x100000000, 0x80000000), 0U); - EXPECT_EQ(FastRange64(0x200000000, 0x80000000), 1U); - EXPECT_EQ(FastRange64(0x400000000, 0x7fffFFFF), 1U); - EXPECT_EQ(FastRange64(0x400000000, 0x80000000), 2U); - EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0x7fffFFFF), 0x7fffFFFEU); - EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0x80000000), 0x7fffFFFFU); - - // Big, > 32-bit range -#if SIZE_MAX == UINT64_MAX - EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 0x4200000002), 0x2100000000U); - EXPECT_EQ(FastRange64(0x8000000000000000, 0x4200000002), 0x2100000001U); - - EXPECT_EQ(FastRange64(0x0000000000000000, 420000000002), 0U); - EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 420000000002), 210000000000U); - EXPECT_EQ(FastRange64(0x8000000000000000, 420000000002), 210000000001U); - EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 420000000002), 420000000001U); - - EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0xffffFFFFffffFFFF), - 0xffffFFFFffffFFFEU); -#endif -} - -TEST(FastRangeGenericTest, Values) { - using ROCKSDB_NAMESPACE::FastRangeGeneric; - // Generic (including big and small) - // Note that FastRangeGeneric is also tested indirectly above via - // FastRange32 and FastRange64. - EXPECT_EQ( - FastRangeGeneric(uint64_t{0x8000000000000000}, uint64_t{420000000002}), - uint64_t{210000000001}); - EXPECT_EQ(FastRangeGeneric(uint64_t{0x8000000000000000}, uint16_t{12468}), - uint16_t{6234}); - EXPECT_EQ(FastRangeGeneric(uint32_t{0x80000000}, uint16_t{12468}), - uint16_t{6234}); - // Not recommended for typical use because for example this could fail on - // some platforms and pass on others: - // EXPECT_EQ(FastRangeGeneric(static_cast(0x80000000), - // uint16_t{12468}), - // uint16_t{6234}); -} - -// for inspection of disassembly -uint32_t FastRange32(uint32_t hash, uint32_t range) { - return ROCKSDB_NAMESPACE::FastRange32(hash, range); -} - -// for inspection of disassembly -size_t FastRange64(uint64_t hash, size_t range) { - return ROCKSDB_NAMESPACE::FastRange64(hash, range); -} - -// Tests for math.h / math128.h (not worth a separate test binary) -using ROCKSDB_NAMESPACE::BitParity; -using ROCKSDB_NAMESPACE::BitsSetToOne; -using ROCKSDB_NAMESPACE::ConstexprFloorLog2; -using ROCKSDB_NAMESPACE::CountTrailingZeroBits; -using ROCKSDB_NAMESPACE::DecodeFixed128; -using ROCKSDB_NAMESPACE::DecodeFixedGeneric; -using ROCKSDB_NAMESPACE::DownwardInvolution; -using ROCKSDB_NAMESPACE::EncodeFixed128; -using ROCKSDB_NAMESPACE::EncodeFixedGeneric; -using ROCKSDB_NAMESPACE::FloorLog2; -using ROCKSDB_NAMESPACE::Lower64of128; -using ROCKSDB_NAMESPACE::Multiply64to128; -using ROCKSDB_NAMESPACE::Unsigned128; -using ROCKSDB_NAMESPACE::Upper64of128; - -int blah(int x) { return DownwardInvolution(x); } - -template -static void test_BitOps() { - // This complex code is to generalize to 128-bit values. Otherwise - // we could just use = static_cast(0x5555555555555555ULL); - T everyOtherBit = 0; - for (unsigned i = 0; i < sizeof(T); ++i) { - everyOtherBit = (everyOtherBit << 8) | T{0x55}; - } - - // This one built using bit operations, as our 128-bit layer - // might not implement arithmetic such as subtraction. - T vm1 = 0; // "v minus one" - - for (int i = 0; i < int{8 * sizeof(T)}; ++i) { - T v = T{1} << i; - // If we could directly use arithmetic: - // T vm1 = static_cast(v - 1); - - // FloorLog2 - if (v > 0) { - EXPECT_EQ(FloorLog2(v), i); - EXPECT_EQ(ConstexprFloorLog2(v), i); - } - if (vm1 > 0) { - EXPECT_EQ(FloorLog2(vm1), i - 1); - EXPECT_EQ(ConstexprFloorLog2(vm1), i - 1); - EXPECT_EQ(FloorLog2(everyOtherBit & vm1), (i - 1) & ~1); - EXPECT_EQ(ConstexprFloorLog2(everyOtherBit & vm1), (i - 1) & ~1); - } - - // CountTrailingZeroBits - if (v != 0) { - EXPECT_EQ(CountTrailingZeroBits(v), i); - } - if (vm1 != 0) { - EXPECT_EQ(CountTrailingZeroBits(vm1), 0); - } - if (i < int{8 * sizeof(T)} - 1) { - EXPECT_EQ(CountTrailingZeroBits(~vm1 & everyOtherBit), (i + 1) & ~1); - } - - // BitsSetToOne - EXPECT_EQ(BitsSetToOne(v), 1); - EXPECT_EQ(BitsSetToOne(vm1), i); - EXPECT_EQ(BitsSetToOne(vm1 & everyOtherBit), (i + 1) / 2); - - // BitParity - EXPECT_EQ(BitParity(v), 1); - EXPECT_EQ(BitParity(vm1), i & 1); - EXPECT_EQ(BitParity(vm1 & everyOtherBit), ((i + 1) / 2) & 1); - - // EndianSwapValue - T ev = T{1} << (((sizeof(T) - 1 - (i / 8)) * 8) + i % 8); - EXPECT_EQ(EndianSwapValue(v), ev); - - // ReverseBits - EXPECT_EQ(ReverseBits(v), static_cast(T{1} << (8 * sizeof(T) - 1 - i))); -#ifdef HAVE_UINT128_EXTENSION // Uses multiplication - if (std::is_unsigned::value) { // Technical UB on signed type - T rv = T{1} << (8 * sizeof(T) - 1 - i); - EXPECT_EQ(ReverseBits(vm1), static_cast(rv * ~T{1})); - } -#endif - - // DownwardInvolution - { - T misc = static_cast(/*random*/ 0xc682cd153d0e3279U + - i * /*random*/ 0x9b3972f3bea0baa3U); - if constexpr (sizeof(T) > 8) { - misc = (misc << 64) | (/*random*/ 0x52af031a38ced62dU + - i * /*random*/ 0x936f803d9752ddc3U); - } - T misc_masked = misc & vm1; - EXPECT_LE(misc_masked, vm1); - T di_misc_masked = DownwardInvolution(misc_masked); - EXPECT_LE(di_misc_masked, vm1); - if (misc_masked > 0) { - // Highest-order 1 in same position - EXPECT_EQ(FloorLog2(misc_masked), FloorLog2(di_misc_masked)); - } - // Validate involution property on short value - EXPECT_EQ(DownwardInvolution(di_misc_masked), misc_masked); - - // Validate involution property on large value - T di_misc = DownwardInvolution(misc); - EXPECT_EQ(DownwardInvolution(di_misc), misc); - // Highest-order 1 in same position - if (misc > 0) { - EXPECT_EQ(FloorLog2(misc), FloorLog2(di_misc)); - } - - // Validate distributes over xor. - // static_casts to avoid numerical promotion effects. - EXPECT_EQ(DownwardInvolution(static_cast(misc_masked ^ vm1)), - static_cast(di_misc_masked ^ DownwardInvolution(vm1))); - T misc2 = static_cast(misc >> 1); - EXPECT_EQ(DownwardInvolution(static_cast(misc ^ misc2)), - static_cast(di_misc ^ DownwardInvolution(misc2))); - - // Choose some small number of bits to pull off to test combined - // uniqueness guarantee - int in_bits = i % 7; - unsigned in_mask = (unsigned{1} << in_bits) - 1U; - // IMPLICIT: int out_bits = 8 - in_bits; - std::vector seen(256, false); - for (int j = 0; j < 255; ++j) { - T t_in = misc ^ static_cast(j); - unsigned in = static_cast(t_in); - unsigned out = static_cast(DownwardInvolution(t_in)); - unsigned val = ((out << in_bits) | (in & in_mask)) & 255U; - EXPECT_FALSE(seen[val]); - seen[val] = true; - } - - if (i + 8 < int{8 * sizeof(T)}) { - // Also test manipulating bits in the middle of input is - // bijective in bottom of output - seen = std::vector(256, false); - for (int j = 0; j < 255; ++j) { - T in = misc ^ (static_cast(j) << i); - unsigned val = static_cast(DownwardInvolution(in)) & 255U; - EXPECT_FALSE(seen[val]); - seen[val] = true; - } - } - } - - vm1 = (vm1 << 1) | 1; - } - - EXPECT_EQ(ConstexprFloorLog2(T{1}), 0); - EXPECT_EQ(ConstexprFloorLog2(T{2}), 1); - EXPECT_EQ(ConstexprFloorLog2(T{3}), 1); - EXPECT_EQ(ConstexprFloorLog2(T{42}), 5); -} - -TEST(MathTest, BitOps) { - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); - test_BitOps(); -} - -TEST(MathTest, BitOps128) { test_BitOps(); } - -TEST(MathTest, Math128) { - const Unsigned128 sixteenHexOnes = 0x1111111111111111U; - const Unsigned128 thirtyHexOnes = (sixteenHexOnes << 56) | sixteenHexOnes; - const Unsigned128 sixteenHexTwos = 0x2222222222222222U; - const Unsigned128 thirtyHexTwos = (sixteenHexTwos << 56) | sixteenHexTwos; - - // v will slide from all hex ones to all hex twos - Unsigned128 v = thirtyHexOnes; - for (int i = 0; i <= 30; ++i) { - // Test bitwise operations - EXPECT_EQ(BitsSetToOne(v), 30); - EXPECT_EQ(BitsSetToOne(~v), 128 - 30); - EXPECT_EQ(BitsSetToOne(v & thirtyHexOnes), 30 - i); - EXPECT_EQ(BitsSetToOne(v | thirtyHexOnes), 30 + i); - EXPECT_EQ(BitsSetToOne(v ^ thirtyHexOnes), 2 * i); - EXPECT_EQ(BitsSetToOne(v & thirtyHexTwos), i); - EXPECT_EQ(BitsSetToOne(v | thirtyHexTwos), 60 - i); - EXPECT_EQ(BitsSetToOne(v ^ thirtyHexTwos), 60 - 2 * i); - - // Test comparisons - EXPECT_EQ(v == thirtyHexOnes, i == 0); - EXPECT_EQ(v == thirtyHexTwos, i == 30); - EXPECT_EQ(v > thirtyHexOnes, i > 0); - EXPECT_EQ(v > thirtyHexTwos, false); - EXPECT_EQ(v >= thirtyHexOnes, true); - EXPECT_EQ(v >= thirtyHexTwos, i == 30); - EXPECT_EQ(v < thirtyHexOnes, false); - EXPECT_EQ(v < thirtyHexTwos, i < 30); - EXPECT_EQ(v <= thirtyHexOnes, i == 0); - EXPECT_EQ(v <= thirtyHexTwos, true); - - // Update v, clearing upper-most byte - v = ((v << 12) >> 8) | 0x2; - } - - for (int i = 0; i < 128; ++i) { - // Test shifts - Unsigned128 sl = thirtyHexOnes << i; - Unsigned128 sr = thirtyHexOnes >> i; - EXPECT_EQ(BitsSetToOne(sl), std::min(30, 32 - i / 4)); - EXPECT_EQ(BitsSetToOne(sr), std::max(0, 30 - (i + 3) / 4)); - EXPECT_EQ(BitsSetToOne(sl & sr), i % 2 ? 0 : std::max(0, 30 - i / 2)); - } - - // Test 64x64->128 multiply - Unsigned128 product = - Multiply64to128(0x1111111111111111U, 0x2222222222222222U); - EXPECT_EQ(Lower64of128(product), 2295594818061633090U); - EXPECT_EQ(Upper64of128(product), 163971058432973792U); -} - -TEST(MathTest, Coding128) { - const char *in = "_1234567890123456"; - // Note: in + 1 is likely unaligned - Unsigned128 decoded = DecodeFixed128(in + 1); - EXPECT_EQ(Lower64of128(decoded), 0x3837363534333231U); - EXPECT_EQ(Upper64of128(decoded), 0x3635343332313039U); - char out[18]; - out[0] = '_'; - EncodeFixed128(out + 1, decoded); - out[17] = '\0'; - EXPECT_EQ(std::string(in), std::string(out)); -} - -TEST(MathTest, CodingGeneric) { - const char *in = "_1234567890123456"; - // Decode - // Note: in + 1 is likely unaligned - Unsigned128 decoded128 = DecodeFixedGeneric(in + 1); - EXPECT_EQ(Lower64of128(decoded128), 0x3837363534333231U); - EXPECT_EQ(Upper64of128(decoded128), 0x3635343332313039U); - - uint64_t decoded64 = DecodeFixedGeneric(in + 1); - EXPECT_EQ(decoded64, 0x3837363534333231U); - - uint32_t decoded32 = DecodeFixedGeneric(in + 1); - EXPECT_EQ(decoded32, 0x34333231U); - - uint16_t decoded16 = DecodeFixedGeneric(in + 1); - EXPECT_EQ(decoded16, 0x3231U); - - // Encode - char out[18]; - out[0] = '_'; - memset(out + 1, '\0', 17); - EncodeFixedGeneric(out + 1, decoded128); - EXPECT_EQ(std::string(in), std::string(out)); - - memset(out + 1, '\0', 9); - EncodeFixedGeneric(out + 1, decoded64); - EXPECT_EQ(std::string("_12345678"), std::string(out)); - - memset(out + 1, '\0', 5); - EncodeFixedGeneric(out + 1, decoded32); - EXPECT_EQ(std::string("_1234"), std::string(out)); - - memset(out + 1, '\0', 3); - EncodeFixedGeneric(out + 1, decoded16); - EXPECT_EQ(std::string("_12"), std::string(out)); -} - -int main(int argc, char **argv) { - fprintf(stderr, "NPHash64 id: %x\n", - static_cast(ROCKSDB_NAMESPACE::GetSliceNPHash64("RocksDB"))); - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - - return RUN_ALL_TESTS(); -} diff --git a/util/heap_test.cc b/util/heap_test.cc deleted file mode 100644 index bbb93324f..000000000 --- a/util/heap_test.cc +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "util/heap.h" - -#include - -#include -#include -#include -#include - -#include "port/stack_trace.h" - -#ifndef GFLAGS -const int64_t FLAGS_iters = 100000; -#else -#include "util/gflags_compat.h" -DEFINE_int64(iters, 100000, "number of pseudo-random operations in each test"); -#endif // GFLAGS - -/* - * Compares the custom heap implementation in util/heap.h against - * std::priority_queue on a pseudo-random sequence of operations. - */ - -namespace ROCKSDB_NAMESPACE { - -using HeapTestValue = uint64_t; -using Params = std::tuple; - -class HeapTest : public ::testing::TestWithParam {}; - -TEST_P(HeapTest, Test) { - // This test performs the same pseudorandom sequence of operations on a - // BinaryHeap and an std::priority_queue, comparing output. The three - // possible operations are insert, replace top and pop. - // - // Insert is chosen slightly more often than the others so that the size of - // the heap slowly grows. Once the size heats the MAX_HEAP_SIZE limit, we - // disallow inserting until the heap becomes empty, testing the "draining" - // scenario. - - const auto MAX_HEAP_SIZE = std::get<0>(GetParam()); - const auto MAX_VALUE = std::get<1>(GetParam()); - const auto RNG_SEED = std::get<2>(GetParam()); - - BinaryHeap heap; - std::priority_queue ref; - - std::mt19937 rng(static_cast(RNG_SEED)); - std::uniform_int_distribution value_dist(0, MAX_VALUE); - int ndrains = 0; - bool draining = false; // hit max size, draining until we empty the heap - size_t size = 0; - for (int64_t i = 0; i < FLAGS_iters; ++i) { - if (size == 0) { - draining = false; - } - - if (!draining && (size == 0 || std::bernoulli_distribution(0.4)(rng))) { - // insert - HeapTestValue val = value_dist(rng); - heap.push(val); - ref.push(val); - ++size; - if (size == MAX_HEAP_SIZE) { - draining = true; - ++ndrains; - } - } else if (std::bernoulli_distribution(0.5)(rng)) { - // replace top - HeapTestValue val = value_dist(rng); - heap.replace_top(val); - ref.pop(); - ref.push(val); - } else { - // pop - assert(size > 0); - heap.pop(); - ref.pop(); - --size; - } - - // After every operation, check that the public methods give the same - // results - assert((size == 0) == ref.empty()); - ASSERT_EQ(size == 0, heap.empty()); - if (size > 0) { - ASSERT_EQ(ref.top(), heap.top()); - } - } - - // Probabilities should be set up to occasionally hit the max heap size and - // drain it - assert(ndrains > 0); - - heap.clear(); - ASSERT_TRUE(heap.empty()); -} - -// Basic test, MAX_VALUE = 3*MAX_HEAP_SIZE (occasional duplicates) -INSTANTIATE_TEST_CASE_P(Basic, HeapTest, - ::testing::Values(Params(1000, 3000, - 0x1b575cf05b708945))); -// Mid-size heap with small values (many duplicates) -INSTANTIATE_TEST_CASE_P(SmallValues, HeapTest, - ::testing::Values(Params(100, 10, 0x5ae213f7bd5dccd0))); -// Small heap, large value range (no duplicates) -INSTANTIATE_TEST_CASE_P(SmallHeap, HeapTest, - ::testing::Values(Params(10, ULLONG_MAX, - 0x3e1fa8f4d01707cf))); -// Two-element heap -INSTANTIATE_TEST_CASE_P(TwoElementHeap, HeapTest, - ::testing::Values(Params(2, 5, 0x4b5e13ea988c6abc))); -// One-element heap -INSTANTIATE_TEST_CASE_P(OneElementHeap, HeapTest, - ::testing::Values(Params(1, 3, 0x176a1019ab0b612e))); - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); -#ifdef GFLAGS - GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true); -#endif // GFLAGS - return RUN_ALL_TESTS(); -} diff --git a/util/random_test.cc b/util/random_test.cc deleted file mode 100644 index 1aa62c5da..000000000 --- a/util/random_test.cc +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "util/random.h" - -#include -#include - -#include "test_util/testharness.h" - -using ROCKSDB_NAMESPACE::Random; - -TEST(RandomTest, Uniform) { - const int average = 20; - for (uint32_t seed : {0, 1, 2, 37, 4096}) { - Random r(seed); - for (int range : {1, 2, 8, 12, 100}) { - std::vector counts(range, 0); - - for (int i = 0; i < range * average; ++i) { - ++counts.at(r.Uniform(range)); - } - int max_variance = static_cast(std::sqrt(range) * 2 + 4); - for (int i = 0; i < range; ++i) { - EXPECT_GE(counts[i], std::max(1, average - max_variance)); - EXPECT_LE(counts[i], average + max_variance + 1); - } - } - } -} - -TEST(RandomTest, OneIn) { - Random r(42); - for (int range : {1, 2, 8, 12, 100, 1234}) { - const int average = 100; - int count = 0; - for (int i = 0; i < average * range; ++i) { - if (r.OneIn(range)) { - ++count; - } - } - if (range == 1) { - EXPECT_EQ(count, average); - } else { - int max_variance = static_cast(std::sqrt(average) * 1.5); - EXPECT_GE(count, average - max_variance); - EXPECT_LE(count, average + max_variance); - } - } -} - -TEST(RandomTest, OneInOpt) { - Random r(42); - for (int range : {-12, 0, 1, 2, 8, 12, 100, 1234}) { - const int average = 100; - int count = 0; - for (int i = 0; i < average * range; ++i) { - if (r.OneInOpt(range)) { - ++count; - } - } - if (range < 1) { - EXPECT_EQ(count, 0); - } else if (range == 1) { - EXPECT_EQ(count, average); - } else { - int max_variance = static_cast(std::sqrt(average) * 1.5); - EXPECT_GE(count, average - max_variance); - EXPECT_LE(count, average + max_variance); - } - } -} - -TEST(RandomTest, PercentTrue) { - Random r(42); - for (int pct : {-12, 0, 1, 2, 10, 50, 90, 98, 99, 100, 1234}) { - const int samples = 10000; - - int count = 0; - for (int i = 0; i < samples; ++i) { - if (r.PercentTrue(pct)) { - ++count; - } - } - if (pct <= 0) { - EXPECT_EQ(count, 0); - } else if (pct >= 100) { - EXPECT_EQ(count, samples); - } else { - int est = (count * 100 + (samples / 2)) / samples; - EXPECT_EQ(est, pct); - } - } -} - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - - return RUN_ALL_TESTS(); -} diff --git a/util/rate_limiter_test.cc b/util/rate_limiter_test.cc deleted file mode 100644 index cda134867..000000000 --- a/util/rate_limiter_test.cc +++ /dev/null @@ -1,476 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "util/rate_limiter.h" - -#include -#include -#include -#include - -#include "db/db_test_util.h" -#include "port/port.h" -#include "rocksdb/system_clock.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "util/random.h" - -namespace ROCKSDB_NAMESPACE { - -// TODO(yhchiang): the rate will not be accurate when we run test in parallel. -class RateLimiterTest : public testing::Test { - protected: - ~RateLimiterTest() override { - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearAllCallBacks(); - } -}; - -TEST_F(RateLimiterTest, OverflowRate) { - GenericRateLimiter limiter(std::numeric_limits::max(), 1000, 10, - RateLimiter::Mode::kWritesOnly, - SystemClock::Default(), false /* auto_tuned */); - ASSERT_GT(limiter.GetSingleBurstBytes(), 1000000000ll); -} - -TEST_F(RateLimiterTest, StartStop) { - std::unique_ptr limiter(NewGenericRateLimiter(100, 100, 10)); -} - -TEST_F(RateLimiterTest, GetTotalBytesThrough) { - std::unique_ptr limiter(NewGenericRateLimiter( - 200 /* rate_bytes_per_sec */, 1000 * 1000 /* refill_period_us */, - 10 /* fairness */)); - for (int i = Env::IO_LOW; i <= Env::IO_TOTAL; ++i) { - ASSERT_EQ(limiter->GetTotalBytesThrough(static_cast(i)), - 0); - } - - std::int64_t request_byte = 200; - std::int64_t request_byte_sum = 0; - for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) { - limiter->Request(request_byte, static_cast(i), - nullptr /* stats */, RateLimiter::OpType::kWrite); - request_byte_sum += request_byte; - } - - for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) { - EXPECT_EQ(limiter->GetTotalBytesThrough(static_cast(i)), - request_byte) - << "Failed to track total_bytes_through_ correctly when IOPriority = " - << static_cast(i); - } - EXPECT_EQ(limiter->GetTotalBytesThrough(Env::IO_TOTAL), request_byte_sum) - << "Failed to track total_bytes_through_ correctly when IOPriority = " - "Env::IO_TOTAL"; -} - -TEST_F(RateLimiterTest, GetTotalRequests) { - std::unique_ptr limiter(NewGenericRateLimiter( - 200 /* rate_bytes_per_sec */, 1000 * 1000 /* refill_period_us */, - 10 /* fairness */)); - for (int i = Env::IO_LOW; i <= Env::IO_TOTAL; ++i) { - ASSERT_EQ(limiter->GetTotalRequests(static_cast(i)), 0); - } - - std::int64_t total_requests_sum = 0; - for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) { - limiter->Request(200, static_cast(i), nullptr /* stats */, - RateLimiter::OpType::kWrite); - total_requests_sum += 1; - } - - for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) { - EXPECT_EQ(limiter->GetTotalRequests(static_cast(i)), 1) - << "Failed to track total_requests_ correctly when IOPriority = " - << static_cast(i); - } - EXPECT_EQ(limiter->GetTotalRequests(Env::IO_TOTAL), total_requests_sum) - << "Failed to track total_requests_ correctly when IOPriority = " - "Env::IO_TOTAL"; -} - -TEST_F(RateLimiterTest, GetTotalPendingRequests) { - std::unique_ptr limiter(NewGenericRateLimiter( - 200 /* rate_bytes_per_sec */, 1000 * 1000 /* refill_period_us */, - 10 /* fairness */)); - int64_t total_pending_requests = 0; - for (int i = Env::IO_LOW; i <= Env::IO_TOTAL; ++i) { - ASSERT_OK(limiter->GetTotalPendingRequests( - &total_pending_requests, static_cast(i))); - ASSERT_EQ(total_pending_requests, 0); - } - // This is a variable for making sure the following callback is called - // and the assertions in it are indeed excuted - bool nonzero_pending_requests_verified = false; - SyncPoint::GetInstance()->SetCallBack( - "GenericRateLimiter::Request:PostEnqueueRequest", [&](void* arg) { - port::Mutex* request_mutex = (port::Mutex*)arg; - // We temporarily unlock the mutex so that the following - // GetTotalPendingRequests() can acquire it - request_mutex->Unlock(); - for (int i = Env::IO_LOW; i <= Env::IO_TOTAL; ++i) { - EXPECT_OK(limiter->GetTotalPendingRequests( - &total_pending_requests, static_cast(i))) - << "Failed to return total pending requests for priority level = " - << static_cast(i); - if (i == Env::IO_USER || i == Env::IO_TOTAL) { - EXPECT_EQ(total_pending_requests, 1) - << "Failed to correctly return total pending requests for " - "priority level = " - << static_cast(i); - } else { - EXPECT_EQ(total_pending_requests, 0) - << "Failed to correctly return total pending requests for " - "priority level = " - << static_cast(i); - } - } - // We lock the mutex again so that the request thread can resume running - // with the mutex locked - request_mutex->Lock(); - nonzero_pending_requests_verified = true; - }); - - SyncPoint::GetInstance()->EnableProcessing(); - limiter->Request(200, Env::IO_USER, nullptr /* stats */, - RateLimiter::OpType::kWrite); - ASSERT_EQ(nonzero_pending_requests_verified, true); - for (int i = Env::IO_LOW; i <= Env::IO_TOTAL; ++i) { - EXPECT_OK(limiter->GetTotalPendingRequests(&total_pending_requests, - static_cast(i))) - << "Failed to return total pending requests for priority level = " - << static_cast(i); - EXPECT_EQ(total_pending_requests, 0) - << "Failed to correctly return total pending requests for priority " - "level = " - << static_cast(i); - } - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearCallBack( - "GenericRateLimiter::Request:PostEnqueueRequest"); -} - -TEST_F(RateLimiterTest, Modes) { - for (auto mode : {RateLimiter::Mode::kWritesOnly, - RateLimiter::Mode::kReadsOnly, RateLimiter::Mode::kAllIo}) { - GenericRateLimiter limiter(2000 /* rate_bytes_per_sec */, - 1000 * 1000 /* refill_period_us */, - 10 /* fairness */, mode, SystemClock::Default(), - false /* auto_tuned */); - limiter.Request(1000 /* bytes */, Env::IO_HIGH, nullptr /* stats */, - RateLimiter::OpType::kRead); - if (mode == RateLimiter::Mode::kWritesOnly) { - ASSERT_EQ(0, limiter.GetTotalBytesThrough(Env::IO_HIGH)); - } else { - ASSERT_EQ(1000, limiter.GetTotalBytesThrough(Env::IO_HIGH)); - } - - limiter.Request(1000 /* bytes */, Env::IO_HIGH, nullptr /* stats */, - RateLimiter::OpType::kWrite); - if (mode == RateLimiter::Mode::kAllIo) { - ASSERT_EQ(2000, limiter.GetTotalBytesThrough(Env::IO_HIGH)); - } else { - ASSERT_EQ(1000, limiter.GetTotalBytesThrough(Env::IO_HIGH)); - } - } -} - -TEST_F(RateLimiterTest, GeneratePriorityIterationOrder) { - std::unique_ptr limiter(NewGenericRateLimiter( - 200 /* rate_bytes_per_sec */, 1000 * 1000 /* refill_period_us */, - 10 /* fairness */)); - - bool possible_random_one_in_fairness_results_for_high_mid_pri[4][2] = { - {false, false}, {false, true}, {true, false}, {true, true}}; - std::vector possible_priority_iteration_orders[4] = { - {Env::IO_USER, Env::IO_HIGH, Env::IO_MID, Env::IO_LOW}, - {Env::IO_USER, Env::IO_HIGH, Env::IO_LOW, Env::IO_MID}, - {Env::IO_USER, Env::IO_MID, Env::IO_LOW, Env::IO_HIGH}, - {Env::IO_USER, Env::IO_LOW, Env::IO_MID, Env::IO_HIGH}}; - - for (int i = 0; i < 4; ++i) { - // These are variables for making sure the following callbacks are called - // and the assertion in the last callback is indeed excuted - bool high_pri_iterated_after_mid_low_pri_set = false; - bool mid_pri_itereated_after_low_pri_set = false; - bool pri_iteration_order_verified = false; - SyncPoint::GetInstance()->SetCallBack( - "GenericRateLimiter::GeneratePriorityIterationOrderLocked::" - "PostRandomOneInFairnessForHighPri", - [&](void* arg) { - bool* high_pri_iterated_after_mid_low_pri = (bool*)arg; - *high_pri_iterated_after_mid_low_pri = - possible_random_one_in_fairness_results_for_high_mid_pri[i][0]; - high_pri_iterated_after_mid_low_pri_set = true; - }); - - SyncPoint::GetInstance()->SetCallBack( - "GenericRateLimiter::GeneratePriorityIterationOrderLocked::" - "PostRandomOneInFairnessForMidPri", - [&](void* arg) { - bool* mid_pri_itereated_after_low_pri = (bool*)arg; - *mid_pri_itereated_after_low_pri = - possible_random_one_in_fairness_results_for_high_mid_pri[i][1]; - mid_pri_itereated_after_low_pri_set = true; - }); - - SyncPoint::GetInstance()->SetCallBack( - "GenericRateLimiter::GeneratePriorityIterationOrderLocked::" - "PreReturnPriIterationOrder", - [&](void* arg) { - std::vector* pri_iteration_order = - (std::vector*)arg; - EXPECT_EQ(*pri_iteration_order, possible_priority_iteration_orders[i]) - << "Failed to generate priority iteration order correctly when " - "high_pri_iterated_after_mid_low_pri = " - << possible_random_one_in_fairness_results_for_high_mid_pri[i][0] - << ", mid_pri_itereated_after_low_pri = " - << possible_random_one_in_fairness_results_for_high_mid_pri[i][1] - << std::endl; - pri_iteration_order_verified = true; - }); - - SyncPoint::GetInstance()->EnableProcessing(); - limiter->Request(200 /* request max bytes to drain so that refill and order - generation will be triggered every time - GenericRateLimiter::Request() is called */ - , - Env::IO_USER, nullptr /* stats */, - RateLimiter::OpType::kWrite); - ASSERT_EQ(high_pri_iterated_after_mid_low_pri_set, true); - ASSERT_EQ(mid_pri_itereated_after_low_pri_set, true); - ASSERT_EQ(pri_iteration_order_verified, true); - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->ClearCallBack( - "GenericRateLimiter::GeneratePriorityIterationOrderLocked::" - "PreReturnPriIterationOrder"); - SyncPoint::GetInstance()->ClearCallBack( - "GenericRateLimiter::GeneratePriorityIterationOrderLocked::" - "PostRandomOneInFairnessForMidPri"); - SyncPoint::GetInstance()->ClearCallBack( - "GenericRateLimiter::GeneratePriorityIterationOrderLocked::" - "PostRandomOneInFairnessForHighPri"); - } -} - -TEST_F(RateLimiterTest, Rate) { - auto* env = Env::Default(); - struct Arg { - Arg(int32_t _target_rate, int _burst) - : limiter(NewGenericRateLimiter(_target_rate /* rate_bytes_per_sec */, - 100 * 1000 /* refill_period_us */, - 10 /* fairness */)), - request_size(_target_rate / - 10 /* refill period here is 1/10 second */), - burst(_burst) {} - std::unique_ptr limiter; - int32_t request_size; - int burst; - }; - - auto writer = [](void* p) { - const auto& thread_clock = SystemClock::Default(); - auto* arg = static_cast(p); - // Test for 2 seconds - auto until = thread_clock->NowMicros() + 2 * 1000000; - Random r((uint32_t)(thread_clock->NowNanos() % - std::numeric_limits::max())); - while (thread_clock->NowMicros() < until) { - for (int i = 0; i < static_cast(r.Skewed(arg->burst * 2) + 1); ++i) { - arg->limiter->Request(r.Uniform(arg->request_size - 1) + 1, - Env::IO_USER, nullptr /* stats */, - RateLimiter::OpType::kWrite); - } - - for (int i = 0; i < static_cast(r.Skewed(arg->burst) + 1); ++i) { - arg->limiter->Request(r.Uniform(arg->request_size - 1) + 1, - Env::IO_HIGH, nullptr /* stats */, - RateLimiter::OpType::kWrite); - } - - for (int i = 0; i < static_cast(r.Skewed(arg->burst / 2 + 1) + 1); - ++i) { - arg->limiter->Request(r.Uniform(arg->request_size - 1) + 1, Env::IO_MID, - nullptr /* stats */, RateLimiter::OpType::kWrite); - } - - arg->limiter->Request(r.Uniform(arg->request_size - 1) + 1, Env::IO_LOW, - nullptr /* stats */, RateLimiter::OpType::kWrite); - } - }; - - int samples = 0; - int samples_at_minimum = 0; - - for (int i = 1; i <= 16; i *= 2) { - int32_t target = i * 1024 * 10; - Arg arg(target, i / 4 + 1); - int64_t old_total_bytes_through = 0; - for (int iter = 1; iter <= 2; ++iter) { - // second iteration changes the target dynamically - if (iter == 2) { - target *= 2; - arg.limiter->SetBytesPerSecond(target); - } - auto start = env->NowMicros(); - for (int t = 0; t < i; ++t) { - env->StartThread(writer, &arg); - } - env->WaitForJoin(); - - auto elapsed = env->NowMicros() - start; - double rate = - (arg.limiter->GetTotalBytesThrough() - old_total_bytes_through) * - 1000000.0 / elapsed; - old_total_bytes_through = arg.limiter->GetTotalBytesThrough(); - fprintf(stderr, - "request size [1 - %" PRIi32 "], limit %" PRIi32 - " KB/sec, actual rate: %lf KB/sec, elapsed %.2lf seconds\n", - arg.request_size - 1, target / 1024, rate / 1024, - elapsed / 1000000.0); - - ++samples; - if (rate / target >= 0.80) { - ++samples_at_minimum; - } - ASSERT_LE(rate / target, 1.25); - } - } - - // This can fail due to slow execution speed, like when using valgrind or in - // heavily loaded CI environments - bool skip_minimum_rate_check = -#if (defined(CIRCLECI) && defined(OS_MACOSX)) || defined(ROCKSDB_VALGRIND_RUN) - true; -#else - getenv("SANDCASTLE"); -#endif - if (skip_minimum_rate_check) { - fprintf(stderr, "Skipped minimum rate check (%d / %d passed)\n", - samples_at_minimum, samples); - } else { - ASSERT_EQ(samples_at_minimum, samples); - } -} - -TEST_F(RateLimiterTest, LimitChangeTest) { - // starvation test when limit changes to a smaller value - int64_t refill_period = 1000 * 1000; - auto* env = Env::Default(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - struct Arg { - Arg(int32_t _request_size, Env::IOPriority _pri, - std::shared_ptr _limiter) - : request_size(_request_size), pri(_pri), limiter(_limiter) {} - int32_t request_size; - Env::IOPriority pri; - std::shared_ptr limiter; - }; - - auto writer = [](void* p) { - auto* arg = static_cast(p); - arg->limiter->Request(arg->request_size, arg->pri, nullptr /* stats */, - RateLimiter::OpType::kWrite); - }; - - for (uint32_t i = 1; i <= 16; i <<= 1) { - int32_t target = i * 1024 * 10; - // refill per second - for (int iter = 0; iter < 2; iter++) { - std::shared_ptr limiter = - std::make_shared( - target, refill_period, 10, RateLimiter::Mode::kWritesOnly, - SystemClock::Default(), false /* auto_tuned */); - // After "GenericRateLimiter::Request:1" the mutex is held until the bytes - // are refilled. This test could be improved to change the limit when lock - // is released in `TimedWait()`. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"GenericRateLimiter::Request", - "RateLimiterTest::LimitChangeTest:changeLimitStart"}, - {"RateLimiterTest::LimitChangeTest:changeLimitEnd", - "GenericRateLimiter::Request:1"}}); - Arg arg(target, Env::IO_HIGH, limiter); - // The idea behind is to start a request first, then before it refills, - // update limit to a different value (2X/0.5X). No starvation should - // be guaranteed under any situation - // TODO(lightmark): more test cases are welcome. - env->StartThread(writer, &arg); - int32_t new_limit = (target << 1) >> (iter << 1); - TEST_SYNC_POINT("RateLimiterTest::LimitChangeTest:changeLimitStart"); - arg.limiter->SetBytesPerSecond(new_limit); - TEST_SYNC_POINT("RateLimiterTest::LimitChangeTest:changeLimitEnd"); - env->WaitForJoin(); - fprintf(stderr, - "[COMPLETE] request size %" PRIi32 " KB, new limit %" PRIi32 - "KB/sec, refill period %" PRIi64 " ms\n", - target / 1024, new_limit / 1024, refill_period / 1000); - } - } - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); -} - -TEST_F(RateLimiterTest, AutoTuneIncreaseWhenFull) { - const std::chrono::seconds kTimePerRefill(1); - const int kRefillsPerTune = 100; // needs to match util/rate_limiter.cc - - SpecialEnv special_env(Env::Default(), /*time_elapse_only_sleep*/ true); - - auto stats = CreateDBStatistics(); - std::unique_ptr rate_limiter(new GenericRateLimiter( - 1000 /* rate_bytes_per_sec */, - std::chrono::microseconds(kTimePerRefill).count(), 10 /* fairness */, - RateLimiter::Mode::kWritesOnly, special_env.GetSystemClock(), - true /* auto_tuned */)); - - // Rate limiter uses `CondVar::TimedWait()`, which does not have access to the - // `Env` to advance its time according to the fake wait duration. The - // workaround is to install a callback that advance the `Env`'s mock time. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "GenericRateLimiter::Request:PostTimedWait", [&](void* arg) { - int64_t time_waited_us = *static_cast(arg); - special_env.SleepForMicroseconds(static_cast(time_waited_us)); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - // verify rate limit increases after a sequence of periods where rate limiter - // is always drained - int64_t orig_bytes_per_sec = rate_limiter->GetSingleBurstBytes(); - rate_limiter->Request(orig_bytes_per_sec, Env::IO_HIGH, stats.get(), - RateLimiter::OpType::kWrite); - while (std::chrono::microseconds(special_env.NowMicros()) <= - kRefillsPerTune * kTimePerRefill) { - rate_limiter->Request(orig_bytes_per_sec, Env::IO_HIGH, stats.get(), - RateLimiter::OpType::kWrite); - } - int64_t new_bytes_per_sec = rate_limiter->GetSingleBurstBytes(); - ASSERT_GT(new_bytes_per_sec, orig_bytes_per_sec); - - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack( - "GenericRateLimiter::Request:PostTimedWait"); - - // decreases after a sequence of periods where rate limiter is not drained - orig_bytes_per_sec = new_bytes_per_sec; - special_env.SleepForMicroseconds(static_cast( - kRefillsPerTune * std::chrono::microseconds(kTimePerRefill).count())); - // make a request so tuner can be triggered - rate_limiter->Request(1 /* bytes */, Env::IO_HIGH, stats.get(), - RateLimiter::OpType::kWrite); - new_bytes_per_sec = rate_limiter->GetSingleBurstBytes(); - ASSERT_LT(new_bytes_per_sec, orig_bytes_per_sec); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/util/repeatable_thread_test.cc b/util/repeatable_thread_test.cc deleted file mode 100644 index 0b3e95464..000000000 --- a/util/repeatable_thread_test.cc +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "util/repeatable_thread.h" - -#include -#include - -#include "db/db_test_util.h" -#include "test_util/mock_time_env.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" - -class RepeatableThreadTest : public testing::Test { - public: - RepeatableThreadTest() - : mock_clock_(std::make_shared( - ROCKSDB_NAMESPACE::SystemClock::Default())) {} - - protected: - std::shared_ptr mock_clock_; -}; - -TEST_F(RepeatableThreadTest, TimedTest) { - constexpr uint64_t kSecond = 1000000; // 1s = 1000000us - constexpr int kIteration = 3; - const auto& clock = ROCKSDB_NAMESPACE::SystemClock::Default(); - ROCKSDB_NAMESPACE::port::Mutex mutex; - ROCKSDB_NAMESPACE::port::CondVar test_cv(&mutex); - int count = 0; - uint64_t prev_time = clock->NowMicros(); - ROCKSDB_NAMESPACE::RepeatableThread thread( - [&] { - ROCKSDB_NAMESPACE::MutexLock l(&mutex); - count++; - uint64_t now = clock->NowMicros(); - assert(count == 1 || prev_time + 1 * kSecond <= now); - prev_time = now; - if (count >= kIteration) { - test_cv.SignalAll(); - } - }, - "rt_test", clock.get(), 1 * kSecond); - // Wait for execution finish. - { - ROCKSDB_NAMESPACE::MutexLock l(&mutex); - while (count < kIteration) { - test_cv.Wait(); - } - } - - // Test cancel - thread.cancel(); -} - -TEST_F(RepeatableThreadTest, MockEnvTest) { - constexpr uint64_t kSecond = 1000000; // 1s = 1000000us - constexpr int kIteration = 3; - mock_clock_->SetCurrentTime(0); // in seconds - std::atomic count{0}; - -#if defined(OS_MACOSX) && !defined(NDEBUG) - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "InstrumentedCondVar::TimedWaitInternal", [&](void* arg) { - // Obtain the current (real) time in seconds and add 1000 extra seconds - // to ensure that RepeatableThread::wait invokes TimedWait with a time - // greater than (real) current time. This is to prevent the TimedWait - // function from returning immediately without sleeping and releasing - // the mutex on certain platforms, e.g. OS X. If TimedWait returns - // immediately, the mutex will not be released, and - // RepeatableThread::TEST_WaitForRun never has a chance to execute the - // callback which, in this case, updates the result returned by - // mock_clock->NowMicros. Consequently, RepeatableThread::wait cannot - // break out of the loop, causing test to hang. The extra 1000 seconds - // is a best-effort approach because there seems no reliable and - // deterministic way to provide the aforementioned guarantee. By the - // time RepeatableThread::wait is called, it is no guarantee that the - // delay + mock_clock->NowMicros will be greater than the current real - // time. However, 1000 seconds should be sufficient in most cases. - uint64_t time_us = *reinterpret_cast(arg); - if (time_us < mock_clock_->RealNowMicros()) { - *reinterpret_cast(arg) = - mock_clock_->RealNowMicros() + 1000; - } - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); -#endif // OS_MACOSX && !NDEBUG - - ROCKSDB_NAMESPACE::RepeatableThread thread( - [&] { count++; }, "rt_test", mock_clock_.get(), 1 * kSecond, 1 * kSecond); - for (int i = 1; i <= kIteration; i++) { - // Bump current time - thread.TEST_WaitForRun([&] { mock_clock_->SetCurrentTime(i); }); - } - // Test function should be exectued exactly kIteraion times. - ASSERT_EQ(kIteration, count.load()); - - // Test cancel - thread.cancel(); -} - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - - return RUN_ALL_TESTS(); -} diff --git a/util/ribbon_test.cc b/util/ribbon_test.cc deleted file mode 100644 index 6519df3d5..000000000 --- a/util/ribbon_test.cc +++ /dev/null @@ -1,1308 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "rocksdb/system_clock.h" -#include "test_util/testharness.h" -#include "util/bloom_impl.h" -#include "util/coding.h" -#include "util/hash.h" -#include "util/ribbon_config.h" -#include "util/ribbon_impl.h" -#include "util/stop_watch.h" -#include "util/string_util.h" - -#ifndef GFLAGS -uint32_t FLAGS_thoroughness = 5; -uint32_t FLAGS_max_add = 0; -uint32_t FLAGS_min_check = 4000; -uint32_t FLAGS_max_check = 100000; -bool FLAGS_verbose = false; - -bool FLAGS_find_occ = false; -bool FLAGS_find_slot_occ = false; -double FLAGS_find_next_factor = 1.618; -uint32_t FLAGS_find_iters = 10000; -uint32_t FLAGS_find_min_slots = 128; -uint32_t FLAGS_find_max_slots = 1000000; - -bool FLAGS_optimize_homog = false; -uint32_t FLAGS_optimize_homog_slots = 30000000; -uint32_t FLAGS_optimize_homog_check = 200000; -double FLAGS_optimize_homog_granularity = 0.002; -#else -#include "util/gflags_compat.h" -using GFLAGS_NAMESPACE::ParseCommandLineFlags; -// Using 500 is a good test when you have time to be thorough. -// Default is for general RocksDB regression test runs. -DEFINE_uint32(thoroughness, 5, "iterations per configuration"); -DEFINE_uint32(max_add, 0, - "Add up to this number of entries to a single filter in " - "CompactnessAndBacktrackAndFpRate; 0 == reasonable default"); -DEFINE_uint32(min_check, 4000, - "Minimum number of novel entries for testing FP rate"); -DEFINE_uint32(max_check, 10000, - "Maximum number of novel entries for testing FP rate"); -DEFINE_bool(verbose, false, "Print extra details"); - -// Options for FindOccupancy, which is more of a tool than a test. -DEFINE_bool(find_occ, false, "whether to run the FindOccupancy tool"); -DEFINE_bool(find_slot_occ, false, - "whether to show individual slot occupancies with " - "FindOccupancy tool"); -DEFINE_double(find_next_factor, 1.618, - "factor to next num_slots for FindOccupancy"); -DEFINE_uint32(find_iters, 10000, "number of samples for FindOccupancy"); -DEFINE_uint32(find_min_slots, 128, "number of slots for FindOccupancy"); -DEFINE_uint32(find_max_slots, 1000000, "number of slots for FindOccupancy"); - -// Options for OptimizeHomogAtScale, which is more of a tool than a test. -DEFINE_bool(optimize_homog, false, - "whether to run the OptimizeHomogAtScale tool"); -DEFINE_uint32(optimize_homog_slots, 30000000, - "number of slots for OptimizeHomogAtScale"); -DEFINE_uint32(optimize_homog_check, 200000, - "number of queries for checking FP rate in OptimizeHomogAtScale"); -DEFINE_double( - optimize_homog_granularity, 0.002, - "overhead change between FP rate checking in OptimizeHomogAtScale"); - -#endif // GFLAGS - -template -class RibbonTypeParamTest : public ::testing::Test {}; - -class RibbonTest : public ::testing::Test {}; - -namespace { - -// Different ways of generating keys for testing - -// Generate semi-sequential keys -struct StandardKeyGen { - StandardKeyGen(const std::string& prefix, uint64_t id) - : id_(id), str_(prefix) { - ROCKSDB_NAMESPACE::PutFixed64(&str_, /*placeholder*/ 0); - } - - // Prefix (only one required) - StandardKeyGen& operator++() { - ++id_; - return *this; - } - - StandardKeyGen& operator+=(uint64_t i) { - id_ += i; - return *this; - } - - const std::string& operator*() { - // Use multiplication to mix things up a little in the key - ROCKSDB_NAMESPACE::EncodeFixed64(&str_[str_.size() - 8], - id_ * uint64_t{0x1500000001}); - return str_; - } - - bool operator==(const StandardKeyGen& other) { - // Same prefix is assumed - return id_ == other.id_; - } - bool operator!=(const StandardKeyGen& other) { - // Same prefix is assumed - return id_ != other.id_; - } - - uint64_t id_; - std::string str_; -}; - -// Generate small sequential keys, that can misbehave with sequential seeds -// as in https://github.com/Cyan4973/xxHash/issues/469. -// These keys are only heuristically unique, but that's OK with 64 bits, -// for testing purposes. -struct SmallKeyGen { - SmallKeyGen(const std::string& prefix, uint64_t id) : id_(id) { - // Hash the prefix for a heuristically unique offset - id_ += ROCKSDB_NAMESPACE::GetSliceHash64(prefix); - ROCKSDB_NAMESPACE::PutFixed64(&str_, id_); - } - - // Prefix (only one required) - SmallKeyGen& operator++() { - ++id_; - return *this; - } - - SmallKeyGen& operator+=(uint64_t i) { - id_ += i; - return *this; - } - - const std::string& operator*() { - ROCKSDB_NAMESPACE::EncodeFixed64(&str_[str_.size() - 8], id_); - return str_; - } - - bool operator==(const SmallKeyGen& other) { return id_ == other.id_; } - bool operator!=(const SmallKeyGen& other) { return id_ != other.id_; } - - uint64_t id_; - std::string str_; -}; - -template -struct Hash32KeyGenWrapper : public KeyGen { - Hash32KeyGenWrapper(const std::string& prefix, uint64_t id) - : KeyGen(prefix, id) {} - uint32_t operator*() { - auto& key = *static_cast(*this); - // unseeded - return ROCKSDB_NAMESPACE::GetSliceHash(key); - } -}; - -template -struct Hash64KeyGenWrapper : public KeyGen { - Hash64KeyGenWrapper(const std::string& prefix, uint64_t id) - : KeyGen(prefix, id) {} - uint64_t operator*() { - auto& key = *static_cast(*this); - // unseeded - return ROCKSDB_NAMESPACE::GetSliceHash64(key); - } -}; - -using ROCKSDB_NAMESPACE::ribbon::ConstructionFailureChance; - -const std::vector kFailureOnly50Pct = { - ROCKSDB_NAMESPACE::ribbon::kOneIn2}; - -const std::vector kFailureOnlyRare = { - ROCKSDB_NAMESPACE::ribbon::kOneIn1000}; - -const std::vector kFailureAll = { - ROCKSDB_NAMESPACE::ribbon::kOneIn2, ROCKSDB_NAMESPACE::ribbon::kOneIn20, - ROCKSDB_NAMESPACE::ribbon::kOneIn1000}; - -} // namespace - -using ROCKSDB_NAMESPACE::ribbon::ExpectedCollisionFpRate; -using ROCKSDB_NAMESPACE::ribbon::StandardHasher; -using ROCKSDB_NAMESPACE::ribbon::StandardRehasherAdapter; - -struct DefaultTypesAndSettings { - using CoeffRow = ROCKSDB_NAMESPACE::Unsigned128; - using ResultRow = uint8_t; - using Index = uint32_t; - using Hash = uint64_t; - using Seed = uint32_t; - using Key = ROCKSDB_NAMESPACE::Slice; - static constexpr bool kIsFilter = true; - static constexpr bool kHomogeneous = false; - static constexpr bool kFirstCoeffAlwaysOne = true; - static constexpr bool kUseSmash = false; - static constexpr bool kAllowZeroStarts = false; - static Hash HashFn(const Key& key, uint64_t raw_seed) { - // This version 0.7.2 preview of XXH3 (a.k.a. XXPH3) function does - // not pass SmallKeyGen tests below without some seed premixing from - // StandardHasher. See https://github.com/Cyan4973/xxHash/issues/469 - return ROCKSDB_NAMESPACE::Hash64(key.data(), key.size(), raw_seed); - } - // For testing - using KeyGen = StandardKeyGen; - static const std::vector& FailureChanceToTest() { - return kFailureAll; - } -}; - -using TypesAndSettings_Coeff128 = DefaultTypesAndSettings; -struct TypesAndSettings_Coeff128Smash : public DefaultTypesAndSettings { - static constexpr bool kUseSmash = true; -}; -struct TypesAndSettings_Coeff64 : public DefaultTypesAndSettings { - using CoeffRow = uint64_t; -}; -struct TypesAndSettings_Coeff64Smash : public TypesAndSettings_Coeff64 { - static constexpr bool kUseSmash = true; -}; -struct TypesAndSettings_Coeff64Smash0 : public TypesAndSettings_Coeff64Smash { - static constexpr bool kFirstCoeffAlwaysOne = false; -}; - -// Homogeneous Ribbon configurations -struct TypesAndSettings_Coeff128_Homog : public DefaultTypesAndSettings { - static constexpr bool kHomogeneous = true; - // Since our best construction success setting still has 1/1000 failure - // rate, the best FP rate we test is 1/256 - using ResultRow = uint8_t; - // Homogeneous only makes sense with sufficient slots for equivalent of - // almost sure construction success - static const std::vector& FailureChanceToTest() { - return kFailureOnlyRare; - } -}; -struct TypesAndSettings_Coeff128Smash_Homog - : public TypesAndSettings_Coeff128_Homog { - // Smash (extra time to save space) + Homog (extra space to save time) - // doesn't make much sense in practice, but we minimally test it - static constexpr bool kUseSmash = true; -}; -struct TypesAndSettings_Coeff64_Homog : public TypesAndSettings_Coeff128_Homog { - using CoeffRow = uint64_t; -}; -struct TypesAndSettings_Coeff64Smash_Homog - : public TypesAndSettings_Coeff64_Homog { - // Smash (extra time to save space) + Homog (extra space to save time) - // doesn't make much sense in practice, but we minimally test it - static constexpr bool kUseSmash = true; -}; - -// Less exhaustive mix of coverage, but still covering the most stressful case -// (only 50% construction success) -struct AbridgedTypesAndSettings : public DefaultTypesAndSettings { - static const std::vector& FailureChanceToTest() { - return kFailureOnly50Pct; - } -}; -struct TypesAndSettings_Result16 : public AbridgedTypesAndSettings { - using ResultRow = uint16_t; -}; -struct TypesAndSettings_Result32 : public AbridgedTypesAndSettings { - using ResultRow = uint32_t; -}; -struct TypesAndSettings_IndexSizeT : public AbridgedTypesAndSettings { - using Index = size_t; -}; -struct TypesAndSettings_Hash32 : public AbridgedTypesAndSettings { - using Hash = uint32_t; - static Hash HashFn(const Key& key, Hash raw_seed) { - // This MurmurHash1 function does not pass tests below without the - // seed premixing from StandardHasher. In fact, it needs more than - // just a multiplication mixer on the ordinal seed. - return ROCKSDB_NAMESPACE::Hash(key.data(), key.size(), raw_seed); - } -}; -struct TypesAndSettings_Hash32_Result16 : public AbridgedTypesAndSettings { - using ResultRow = uint16_t; -}; -struct TypesAndSettings_KeyString : public AbridgedTypesAndSettings { - using Key = std::string; -}; -struct TypesAndSettings_Seed8 : public AbridgedTypesAndSettings { - // This is not a generally recommended configuration. With the configured - // hash function, it would fail with SmallKeyGen due to insufficient - // independence among the seeds. - using Seed = uint8_t; -}; -struct TypesAndSettings_NoAlwaysOne : public AbridgedTypesAndSettings { - static constexpr bool kFirstCoeffAlwaysOne = false; -}; -struct TypesAndSettings_AllowZeroStarts : public AbridgedTypesAndSettings { - static constexpr bool kAllowZeroStarts = true; -}; -struct TypesAndSettings_Seed64 : public AbridgedTypesAndSettings { - using Seed = uint64_t; -}; -struct TypesAndSettings_Rehasher - : public StandardRehasherAdapter { - using KeyGen = Hash64KeyGenWrapper; -}; -struct TypesAndSettings_Rehasher_Result16 : public TypesAndSettings_Rehasher { - using ResultRow = uint16_t; -}; -struct TypesAndSettings_Rehasher_Result32 : public TypesAndSettings_Rehasher { - using ResultRow = uint32_t; -}; -struct TypesAndSettings_Rehasher_Seed64 - : public StandardRehasherAdapter { - using KeyGen = Hash64KeyGenWrapper; - // Note: 64-bit seed with Rehasher gives slightly better average reseeds -}; -struct TypesAndSettings_Rehasher32 - : public StandardRehasherAdapter { - using KeyGen = Hash32KeyGenWrapper; -}; -struct TypesAndSettings_Rehasher32_Coeff64 - : public TypesAndSettings_Rehasher32 { - using CoeffRow = uint64_t; -}; -struct TypesAndSettings_SmallKeyGen : public AbridgedTypesAndSettings { - // SmallKeyGen stresses the independence of different hash seeds - using KeyGen = SmallKeyGen; -}; -struct TypesAndSettings_Hash32_SmallKeyGen : public TypesAndSettings_Hash32 { - // SmallKeyGen stresses the independence of different hash seeds - using KeyGen = SmallKeyGen; -}; -struct TypesAndSettings_Coeff32 : public DefaultTypesAndSettings { - using CoeffRow = uint32_t; -}; -struct TypesAndSettings_Coeff32Smash : public TypesAndSettings_Coeff32 { - static constexpr bool kUseSmash = true; -}; -struct TypesAndSettings_Coeff16 : public DefaultTypesAndSettings { - using CoeffRow = uint16_t; -}; -struct TypesAndSettings_Coeff16Smash : public TypesAndSettings_Coeff16 { - static constexpr bool kUseSmash = true; -}; - -using TestTypesAndSettings = ::testing::Types< - TypesAndSettings_Coeff128, TypesAndSettings_Coeff128Smash, - TypesAndSettings_Coeff64, TypesAndSettings_Coeff64Smash, - TypesAndSettings_Coeff64Smash0, TypesAndSettings_Coeff128_Homog, - TypesAndSettings_Coeff128Smash_Homog, TypesAndSettings_Coeff64_Homog, - TypesAndSettings_Coeff64Smash_Homog, TypesAndSettings_Result16, - TypesAndSettings_Result32, TypesAndSettings_IndexSizeT, - TypesAndSettings_Hash32, TypesAndSettings_Hash32_Result16, - TypesAndSettings_KeyString, TypesAndSettings_Seed8, - TypesAndSettings_NoAlwaysOne, TypesAndSettings_AllowZeroStarts, - TypesAndSettings_Seed64, TypesAndSettings_Rehasher, - TypesAndSettings_Rehasher_Result16, TypesAndSettings_Rehasher_Result32, - TypesAndSettings_Rehasher_Seed64, TypesAndSettings_Rehasher32, - TypesAndSettings_Rehasher32_Coeff64, TypesAndSettings_SmallKeyGen, - TypesAndSettings_Hash32_SmallKeyGen, TypesAndSettings_Coeff32, - TypesAndSettings_Coeff32Smash, TypesAndSettings_Coeff16, - TypesAndSettings_Coeff16Smash>; -TYPED_TEST_CASE(RibbonTypeParamTest, TestTypesAndSettings); - -namespace { - -// For testing Poisson-distributed (or similar) statistics, get value for -// `stddevs_allowed` standard deviations above expected mean -// `expected_count`. -// (Poisson approximates Binomial only if probability of a trial being -// in the count is low.) -uint64_t PoissonUpperBound(double expected_count, double stddevs_allowed) { - return static_cast( - expected_count + stddevs_allowed * std::sqrt(expected_count) + 1.0); -} - -uint64_t PoissonLowerBound(double expected_count, double stddevs_allowed) { - return static_cast(std::max( - 0.0, expected_count - stddevs_allowed * std::sqrt(expected_count))); -} - -uint64_t FrequentPoissonUpperBound(double expected_count) { - // Allow up to 5.0 standard deviations for frequently checked statistics - return PoissonUpperBound(expected_count, 5.0); -} - -uint64_t FrequentPoissonLowerBound(double expected_count) { - return PoissonLowerBound(expected_count, 5.0); -} - -uint64_t InfrequentPoissonUpperBound(double expected_count) { - // Allow up to 3 standard deviations for infrequently checked statistics - return PoissonUpperBound(expected_count, 3.0); -} - -uint64_t InfrequentPoissonLowerBound(double expected_count) { - return PoissonLowerBound(expected_count, 3.0); -} - -} // namespace - -TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) { - IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam); - IMPORT_RIBBON_IMPL_TYPES(TypeParam); - using KeyGen = typename TypeParam::KeyGen; - using ConfigHelper = - ROCKSDB_NAMESPACE::ribbon::BandingConfigHelper; - - if (sizeof(CoeffRow) < 8) { - ROCKSDB_GTEST_BYPASS("Not fully supported"); - return; - } - - const auto log2_thoroughness = - static_cast(ROCKSDB_NAMESPACE::FloorLog2(FLAGS_thoroughness)); - - // We are going to choose num_to_add using an exponential distribution, - // so that we have good representation of small-to-medium filters. - // Here we just pick some reasonable, practical upper bound based on - // kCoeffBits or option. - const double log_max_add = std::log( - FLAGS_max_add > 0 ? FLAGS_max_add - : static_cast(kCoeffBits * kCoeffBits) * - std::max(FLAGS_thoroughness, uint32_t{32})); - - // This needs to be enough below the minimum number of slots to get a - // reasonable number of samples with the minimum number of slots. - const double log_min_add = std::log(0.66 * SimpleSoln::RoundUpNumSlots(1)); - - ASSERT_GT(log_max_add, log_min_add); - - const double diff_log_add = log_max_add - log_min_add; - - for (ConstructionFailureChance cs : TypeParam::FailureChanceToTest()) { - double expected_reseeds; - switch (cs) { - default: - assert(false); - FALLTHROUGH_INTENDED; - case ROCKSDB_NAMESPACE::ribbon::kOneIn2: - fprintf(stderr, "== Failure: 50 percent\n"); - expected_reseeds = 1.0; - break; - case ROCKSDB_NAMESPACE::ribbon::kOneIn20: - fprintf(stderr, "== Failure: 95 percent\n"); - expected_reseeds = 0.053; - break; - case ROCKSDB_NAMESPACE::ribbon::kOneIn1000: - fprintf(stderr, "== Failure: 1/1000\n"); - expected_reseeds = 0.001; - break; - } - - uint64_t total_reseeds = 0; - uint64_t total_singles = 0; - uint64_t total_single_failures = 0; - uint64_t total_batch = 0; - uint64_t total_batch_successes = 0; - uint64_t total_fp_count = 0; - uint64_t total_added = 0; - uint64_t total_expand_trials = 0; - uint64_t total_expand_failures = 0; - double total_expand_overhead = 0.0; - - uint64_t soln_query_nanos = 0; - uint64_t soln_query_count = 0; - uint64_t bloom_query_nanos = 0; - uint64_t isoln_query_nanos = 0; - uint64_t isoln_query_count = 0; - - // Take different samples if you change thoroughness - ROCKSDB_NAMESPACE::Random32 rnd(FLAGS_thoroughness); - - for (uint32_t i = 0; i < FLAGS_thoroughness; ++i) { - // We are going to choose num_to_add using an exponential distribution - // as noted above, but instead of randomly choosing them, we generate - // samples linearly using the golden ratio, which ensures a nice spread - // even for a small number of samples, and starting with the minimum - // number of slots to ensure it is tested. - double log_add = - std::fmod(0.6180339887498948482 * diff_log_add * i, diff_log_add) + - log_min_add; - uint32_t num_to_add = static_cast(std::exp(log_add)); - - // Most of the time, test the Interleaved solution storage, but when - // we do we have to make num_slots a multiple of kCoeffBits. So - // sometimes we want to test without that limitation. - bool test_interleaved = (i % 7) != 6; - - // Compute num_slots, and re-adjust num_to_add to get as close as possible - // to next num_slots, to stress that num_slots in terms of construction - // success. Ensure at least one iteration: - Index num_slots = Index{0} - 1; - --num_to_add; - for (;;) { - Index next_num_slots = SimpleSoln::RoundUpNumSlots( - ConfigHelper::GetNumSlots(num_to_add + 1, cs)); - if (test_interleaved) { - next_num_slots = InterleavedSoln::RoundUpNumSlots(next_num_slots); - // assert idempotent - EXPECT_EQ(next_num_slots, - InterleavedSoln::RoundUpNumSlots(next_num_slots)); - } - // assert idempotent with InterleavedSoln::RoundUpNumSlots - EXPECT_EQ(next_num_slots, SimpleSoln::RoundUpNumSlots(next_num_slots)); - - if (next_num_slots > num_slots) { - break; - } - num_slots = next_num_slots; - ++num_to_add; - } - assert(num_slots < Index{0} - 1); - - total_added += num_to_add; - - std::string prefix; - ROCKSDB_NAMESPACE::PutFixed32(&prefix, rnd.Next()); - - // Batch that must be added - std::string added_str = prefix + "added"; - KeyGen keys_begin(added_str, 0); - KeyGen keys_end(added_str, num_to_add); - - // A couple more that will probably be added - KeyGen one_more(prefix + "more", 1); - KeyGen two_more(prefix + "more", 2); - - // Batch that may or may not be added - uint32_t batch_size = - static_cast(2.0 * std::sqrt(num_slots - num_to_add)); - if (batch_size < 10U) { - batch_size = 0; - } - std::string batch_str = prefix + "batch"; - KeyGen batch_begin(batch_str, 0); - KeyGen batch_end(batch_str, batch_size); - - // Batch never (successfully) added, but used for querying FP rate - std::string not_str = prefix + "not"; - KeyGen other_keys_begin(not_str, 0); - KeyGen other_keys_end(not_str, FLAGS_max_check); - - double overhead_ratio = 1.0 * num_slots / num_to_add; - if (FLAGS_verbose) { - fprintf(stderr, "Adding(%s) %u / %u Overhead: %g Batch size: %u\n", - test_interleaved ? "i" : "s", (unsigned)num_to_add, - (unsigned)num_slots, overhead_ratio, (unsigned)batch_size); - } - - // Vary bytes for InterleavedSoln to use number of solution columns - // from 0 to max allowed by ResultRow type (and used by SimpleSoln). - // Specifically include 0 and max, and otherwise skew toward max. - uint32_t max_ibytes = - static_cast(sizeof(ResultRow) * num_slots); - size_t ibytes; - if (i == 0) { - ibytes = 0; - } else if (i == 1) { - ibytes = max_ibytes; - } else { - // Skewed - ibytes = - std::max(rnd.Uniformish(max_ibytes), rnd.Uniformish(max_ibytes)); - } - std::unique_ptr idata(new char[ibytes]); - InterleavedSoln isoln(idata.get(), ibytes); - - SimpleSoln soln; - Hasher hasher; - bool first_single; - bool second_single; - bool batch_success; - { - Banding banding; - // Traditional solve for a fixed set. - ASSERT_TRUE( - banding.ResetAndFindSeedToSolve(num_slots, keys_begin, keys_end)); - - Index occupied_count = banding.GetOccupiedCount(); - Index more_added = 0; - - if (TypeParam::kHomogeneous || overhead_ratio < 1.01 || - batch_size == 0) { - // Homogeneous not compatible with backtracking because add - // doesn't fail. Small overhead ratio too packed to expect more - first_single = false; - second_single = false; - batch_success = false; - } else { - // Now to test backtracking, starting with guaranteed fail. By using - // the keys that will be used to test FP rate, we are then doing an - // extra check that after backtracking there are no remnants (e.g. in - // result side of banding) of these entries. - KeyGen other_keys_too_big_end = other_keys_begin; - other_keys_too_big_end += num_to_add; - banding.EnsureBacktrackSize(std::max(num_to_add, batch_size)); - EXPECT_FALSE(banding.AddRangeOrRollBack(other_keys_begin, - other_keys_too_big_end)); - EXPECT_EQ(occupied_count, banding.GetOccupiedCount()); - - // Check that we still have a good chance of adding a couple more - // individually - first_single = banding.Add(*one_more); - second_single = banding.Add(*two_more); - more_added += (first_single ? 1 : 0) + (second_single ? 1 : 0); - total_singles += 2U; - total_single_failures += 2U - more_added; - - // Or as a batch - batch_success = banding.AddRangeOrRollBack(batch_begin, batch_end); - ++total_batch; - if (batch_success) { - more_added += batch_size; - ++total_batch_successes; - } - EXPECT_LE(banding.GetOccupiedCount(), occupied_count + more_added); - } - - // Also verify that redundant adds are OK (no effect) - ASSERT_TRUE( - banding.AddRange(keys_begin, KeyGen(added_str, num_to_add / 8))); - EXPECT_LE(banding.GetOccupiedCount(), occupied_count + more_added); - - // Now back-substitution - soln.BackSubstFrom(banding); - if (test_interleaved) { - isoln.BackSubstFrom(banding); - } - - Seed reseeds = banding.GetOrdinalSeed(); - total_reseeds += reseeds; - - EXPECT_LE(reseeds, 8 + log2_thoroughness); - if (reseeds > log2_thoroughness + 1) { - fprintf( - stderr, "%s high reseeds at %u, %u/%u: %u\n", - reseeds > log2_thoroughness + 8 ? "ERROR Extremely" : "Somewhat", - static_cast(i), static_cast(num_to_add), - static_cast(num_slots), static_cast(reseeds)); - } - - if (reseeds > 0) { - // "Expand" test: given a failed construction, how likely is it to - // pass with same seed and more slots. At each step, we increase - // enough to ensure there is at least one shift within each coeff - // block. - ++total_expand_trials; - Index expand_count = 0; - Index ex_slots = num_slots; - banding.SetOrdinalSeed(0); - for (;; ++expand_count) { - ASSERT_LE(expand_count, log2_thoroughness); - ex_slots += ex_slots / kCoeffBits; - if (test_interleaved) { - ex_slots = InterleavedSoln::RoundUpNumSlots(ex_slots); - } - banding.Reset(ex_slots); - bool success = banding.AddRange(keys_begin, keys_end); - if (success) { - break; - } - } - total_expand_failures += expand_count; - total_expand_overhead += 1.0 * (ex_slots - num_slots) / num_slots; - } - - hasher.SetOrdinalSeed(reseeds); - } - // soln and hasher now independent of Banding object - - // Verify keys added - KeyGen cur = keys_begin; - while (cur != keys_end) { - ASSERT_TRUE(soln.FilterQuery(*cur, hasher)); - ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*cur, hasher)); - ++cur; - } - // We (maybe) snuck these in! - if (first_single) { - ASSERT_TRUE(soln.FilterQuery(*one_more, hasher)); - ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*one_more, hasher)); - } - if (second_single) { - ASSERT_TRUE(soln.FilterQuery(*two_more, hasher)); - ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*two_more, hasher)); - } - if (batch_success) { - cur = batch_begin; - while (cur != batch_end) { - ASSERT_TRUE(soln.FilterQuery(*cur, hasher)); - ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*cur, hasher)); - ++cur; - } - } - - // Check FP rate (depends only on number of result bits == solution - // columns) - Index fp_count = 0; - cur = other_keys_begin; - { - ROCKSDB_NAMESPACE::StopWatchNano timer( - ROCKSDB_NAMESPACE::SystemClock::Default().get(), true); - while (cur != other_keys_end) { - bool fp = soln.FilterQuery(*cur, hasher); - fp_count += fp ? 1 : 0; - ++cur; - } - soln_query_nanos += timer.ElapsedNanos(); - soln_query_count += FLAGS_max_check; - } - { - double expected_fp_count = soln.ExpectedFpRate() * FLAGS_max_check; - // For expected FP rate, also include false positives due to collisions - // in Hash value. (Negligible for 64-bit, can matter for 32-bit.) - double correction = - FLAGS_max_check * ExpectedCollisionFpRate(hasher, num_to_add); - - // NOTE: rare violations expected with kHomogeneous - EXPECT_LE(fp_count, - FrequentPoissonUpperBound(expected_fp_count + correction)); - EXPECT_GE(fp_count, - FrequentPoissonLowerBound(expected_fp_count + correction)); - } - total_fp_count += fp_count; - - // And also check FP rate for isoln - if (test_interleaved) { - Index ifp_count = 0; - cur = other_keys_begin; - ROCKSDB_NAMESPACE::StopWatchNano timer( - ROCKSDB_NAMESPACE::SystemClock::Default().get(), true); - while (cur != other_keys_end) { - ifp_count += isoln.FilterQuery(*cur, hasher) ? 1 : 0; - ++cur; - } - isoln_query_nanos += timer.ElapsedNanos(); - isoln_query_count += FLAGS_max_check; - { - double expected_fp_count = isoln.ExpectedFpRate() * FLAGS_max_check; - // For expected FP rate, also include false positives due to - // collisions in Hash value. (Negligible for 64-bit, can matter for - // 32-bit.) - double correction = - FLAGS_max_check * ExpectedCollisionFpRate(hasher, num_to_add); - - // NOTE: rare violations expected with kHomogeneous - EXPECT_LE(ifp_count, - FrequentPoissonUpperBound(expected_fp_count + correction)); - - // FIXME: why sometimes can we slightly "beat the odds"? - // (0.95 factor should not be needed) - EXPECT_GE(ifp_count, FrequentPoissonLowerBound( - 0.95 * expected_fp_count + correction)); - } - // Since the bits used in isoln are a subset of the bits used in soln, - // it cannot have fewer FPs - EXPECT_GE(ifp_count, fp_count); - } - - // And compare to Bloom time, for fun - if (ibytes >= /* minimum Bloom impl bytes*/ 64) { - Index bfp_count = 0; - cur = other_keys_begin; - ROCKSDB_NAMESPACE::StopWatchNano timer( - ROCKSDB_NAMESPACE::SystemClock::Default().get(), true); - while (cur != other_keys_end) { - uint64_t h = hasher.GetHash(*cur); - uint32_t h1 = ROCKSDB_NAMESPACE::Lower32of64(h); - uint32_t h2 = sizeof(Hash) >= 8 ? ROCKSDB_NAMESPACE::Upper32of64(h) - : h1 * 0x9e3779b9; - bfp_count += - ROCKSDB_NAMESPACE::FastLocalBloomImpl::HashMayMatch( - h1, h2, static_cast(ibytes), 6, idata.get()) - ? 1 - : 0; - ++cur; - } - bloom_query_nanos += timer.ElapsedNanos(); - // ensure bfp_count is used - ASSERT_LT(bfp_count, FLAGS_max_check); - } - } - - // "outside" == key not in original set so either negative or false positive - fprintf(stderr, - "Simple outside query, hot, incl hashing, ns/key: %g\n", - 1.0 * soln_query_nanos / soln_query_count); - fprintf(stderr, - "Interleaved outside query, hot, incl hashing, ns/key: %g\n", - 1.0 * isoln_query_nanos / isoln_query_count); - fprintf(stderr, - "Bloom outside query, hot, incl hashing, ns/key: %g\n", - 1.0 * bloom_query_nanos / soln_query_count); - - if (TypeParam::kHomogeneous) { - EXPECT_EQ(total_reseeds, 0U); - } else { - double average_reseeds = 1.0 * total_reseeds / FLAGS_thoroughness; - fprintf(stderr, "Average re-seeds: %g\n", average_reseeds); - // Values above were chosen to target around 50% chance of encoding - // success rate (average of 1.0 re-seeds) or slightly better. But 1.15 is - // also close enough. - EXPECT_LE(total_reseeds, - InfrequentPoissonUpperBound(1.15 * expected_reseeds * - FLAGS_thoroughness)); - // Would use 0.85 here instead of 0.75, but - // TypesAndSettings_Hash32_SmallKeyGen can "beat the odds" because of - // sequential keys with a small, cheap hash function. We accept that - // there are surely inputs that are somewhat bad for this setup, but - // these somewhat good inputs are probably more likely. - EXPECT_GE(total_reseeds, - InfrequentPoissonLowerBound(0.75 * expected_reseeds * - FLAGS_thoroughness)); - } - - if (total_expand_trials > 0) { - double average_expand_failures = - 1.0 * total_expand_failures / total_expand_trials; - fprintf(stderr, "Average expand failures, and overhead: %g, %g\n", - average_expand_failures, - total_expand_overhead / total_expand_trials); - // Seems to be a generous allowance - EXPECT_LE(total_expand_failures, - InfrequentPoissonUpperBound(1.0 * total_expand_trials)); - } else { - fprintf(stderr, "Average expand failures: N/A\n"); - } - - if (total_singles > 0) { - double single_failure_rate = 1.0 * total_single_failures / total_singles; - fprintf(stderr, "Add'l single, failure rate: %g\n", single_failure_rate); - // A rough bound (one sided) based on nothing in particular - double expected_single_failures = 1.0 * total_singles / - (sizeof(CoeffRow) == 16 ? 128 - : TypeParam::kUseSmash ? 64 - : 32); - EXPECT_LE(total_single_failures, - InfrequentPoissonUpperBound(expected_single_failures)); - } - - if (total_batch > 0) { - // Counting successes here for Poisson to approximate the Binomial - // distribution. - // A rough bound (one sided) based on nothing in particular. - double expected_batch_successes = 1.0 * total_batch / 2; - uint64_t lower_bound = - InfrequentPoissonLowerBound(expected_batch_successes); - fprintf(stderr, "Add'l batch, success rate: %g (>= %g)\n", - 1.0 * total_batch_successes / total_batch, - 1.0 * lower_bound / total_batch); - EXPECT_GE(total_batch_successes, lower_bound); - } - - { - uint64_t total_checked = uint64_t{FLAGS_max_check} * FLAGS_thoroughness; - double expected_total_fp_count = - total_checked * std::pow(0.5, 8U * sizeof(ResultRow)); - // For expected FP rate, also include false positives due to collisions - // in Hash value. (Negligible for 64-bit, can matter for 32-bit.) - double average_added = 1.0 * total_added / FLAGS_thoroughness; - expected_total_fp_count += - total_checked * ExpectedCollisionFpRate(Hasher(), average_added); - - uint64_t upper_bound = - InfrequentPoissonUpperBound(expected_total_fp_count); - uint64_t lower_bound = - InfrequentPoissonLowerBound(expected_total_fp_count); - fprintf(stderr, "Average FP rate: %g (~= %g, <= %g, >= %g)\n", - 1.0 * total_fp_count / total_checked, - expected_total_fp_count / total_checked, - 1.0 * upper_bound / total_checked, - 1.0 * lower_bound / total_checked); - EXPECT_LE(total_fp_count, upper_bound); - EXPECT_GE(total_fp_count, lower_bound); - } - } -} - -TYPED_TEST(RibbonTypeParamTest, Extremes) { - IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam); - IMPORT_RIBBON_IMPL_TYPES(TypeParam); - using KeyGen = typename TypeParam::KeyGen; - - size_t bytes = 128 * 1024; - std::unique_ptr buf(new char[bytes]); - InterleavedSoln isoln(buf.get(), bytes); - SimpleSoln soln; - Hasher hasher; - Banding banding; - - // ######################################## - // Add zero keys to minimal number of slots - KeyGen begin_and_end("foo", 123); - ASSERT_TRUE(banding.ResetAndFindSeedToSolve( - /*slots*/ kCoeffBits, begin_and_end, begin_and_end, /*first seed*/ 0, - /* seed mask*/ 0)); - - soln.BackSubstFrom(banding); - isoln.BackSubstFrom(banding); - - // Because there's plenty of memory, we expect the interleaved solution to - // use maximum supported columns (same as simple solution) - ASSERT_EQ(isoln.GetUpperNumColumns(), 8U * sizeof(ResultRow)); - ASSERT_EQ(isoln.GetUpperStartBlock(), 0U); - - // Somewhat oddly, we expect same FP rate as if we had essentially filled - // up the slots. - KeyGen other_keys_begin("not", 0); - KeyGen other_keys_end("not", FLAGS_max_check); - - Index fp_count = 0; - KeyGen cur = other_keys_begin; - while (cur != other_keys_end) { - bool isoln_query_result = isoln.FilterQuery(*cur, hasher); - bool soln_query_result = soln.FilterQuery(*cur, hasher); - // Solutions are equivalent - ASSERT_EQ(isoln_query_result, soln_query_result); - if (!TypeParam::kHomogeneous) { - // And in fact we only expect an FP when ResultRow is 0 - // (except Homogeneous) - ASSERT_EQ(soln_query_result, hasher.GetResultRowFromHash( - hasher.GetHash(*cur)) == ResultRow{0}); - } - fp_count += soln_query_result ? 1 : 0; - ++cur; - } - { - ASSERT_EQ(isoln.ExpectedFpRate(), soln.ExpectedFpRate()); - double expected_fp_count = isoln.ExpectedFpRate() * FLAGS_max_check; - EXPECT_LE(fp_count, InfrequentPoissonUpperBound(expected_fp_count)); - if (TypeParam::kHomogeneous) { - // Pseudorandom garbage in Homogeneous filter can "beat the odds" if - // nothing added - } else { - EXPECT_GE(fp_count, InfrequentPoissonLowerBound(expected_fp_count)); - } - } - - // ###################################################### - // Use zero bytes for interleaved solution (key(s) added) - - // Add one key - KeyGen key_begin("added", 0); - KeyGen key_end("added", 1); - ASSERT_TRUE(banding.ResetAndFindSeedToSolve( - /*slots*/ kCoeffBits, key_begin, key_end, /*first seed*/ 0, - /* seed mask*/ 0)); - - InterleavedSoln isoln2(nullptr, /*bytes*/ 0); - - isoln2.BackSubstFrom(banding); - - ASSERT_EQ(isoln2.GetUpperNumColumns(), 0U); - ASSERT_EQ(isoln2.GetUpperStartBlock(), 0U); - - // All queries return true - ASSERT_TRUE(isoln2.FilterQuery(*other_keys_begin, hasher)); - ASSERT_EQ(isoln2.ExpectedFpRate(), 1.0); -} - -TEST(RibbonTest, AllowZeroStarts) { - IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings_AllowZeroStarts); - IMPORT_RIBBON_IMPL_TYPES(TypesAndSettings_AllowZeroStarts); - using KeyGen = StandardKeyGen; - - InterleavedSoln isoln(nullptr, /*bytes*/ 0); - SimpleSoln soln; - Hasher hasher; - Banding banding; - - KeyGen begin("foo", 0); - KeyGen end("foo", 1); - // Can't add 1 entry - ASSERT_FALSE(banding.ResetAndFindSeedToSolve(/*slots*/ 0, begin, end)); - - KeyGen begin_and_end("foo", 123); - // Can add 0 entries - ASSERT_TRUE(banding.ResetAndFindSeedToSolve(/*slots*/ 0, begin_and_end, - begin_and_end)); - - Seed reseeds = banding.GetOrdinalSeed(); - ASSERT_EQ(reseeds, 0U); - hasher.SetOrdinalSeed(reseeds); - - // Can construct 0-slot solutions - isoln.BackSubstFrom(banding); - soln.BackSubstFrom(banding); - - // Should always return false - ASSERT_FALSE(isoln.FilterQuery(*begin, hasher)); - ASSERT_FALSE(soln.FilterQuery(*begin, hasher)); - - // And report that in FP rate - ASSERT_EQ(isoln.ExpectedFpRate(), 0.0); - ASSERT_EQ(soln.ExpectedFpRate(), 0.0); -} - -TEST(RibbonTest, RawAndOrdinalSeeds) { - StandardHasher hasher64; - StandardHasher hasher64_32; - StandardHasher hasher32; - StandardHasher hasher8; - - for (uint32_t limit : {0xffU, 0xffffU}) { - std::vector seen(limit + 1); - for (uint32_t i = 0; i < limit; ++i) { - hasher64.SetOrdinalSeed(i); - auto raw64 = hasher64.GetRawSeed(); - hasher32.SetOrdinalSeed(i); - auto raw32 = hasher32.GetRawSeed(); - hasher8.SetOrdinalSeed(static_cast(i)); - auto raw8 = hasher8.GetRawSeed(); - { - hasher64_32.SetOrdinalSeed(i); - auto raw64_32 = hasher64_32.GetRawSeed(); - ASSERT_EQ(raw64_32, raw32); // Same size seed - } - if (i == 0) { - // Documented that ordinal seed 0 == raw seed 0 - ASSERT_EQ(raw64, 0U); - ASSERT_EQ(raw32, 0U); - ASSERT_EQ(raw8, 0U); - } else { - // Extremely likely that upper bits are set - ASSERT_GT(raw64, raw32); - ASSERT_GT(raw32, raw8); - } - // Hashers agree on lower bits - ASSERT_EQ(static_cast(raw64), raw32); - ASSERT_EQ(static_cast(raw32), raw8); - - // The translation is one-to-one for this size prefix - uint32_t v = static_cast(raw32 & limit); - ASSERT_EQ(raw64 & limit, v); - ASSERT_FALSE(seen[v]); - seen[v] = true; - } - } -} - -namespace { - -struct PhsfInputGen { - PhsfInputGen(const std::string& prefix, uint64_t id) : id_(id) { - val_.first = prefix; - ROCKSDB_NAMESPACE::PutFixed64(&val_.first, /*placeholder*/ 0); - } - - // Prefix (only one required) - PhsfInputGen& operator++() { - ++id_; - return *this; - } - - const std::pair& operator*() { - // Use multiplication to mix things up a little in the key - ROCKSDB_NAMESPACE::EncodeFixed64(&val_.first[val_.first.size() - 8], - id_ * uint64_t{0x1500000001}); - // Occasionally repeat values etc. - val_.second = static_cast(id_ * 7 / 8); - return val_; - } - - const std::pair* operator->() { return &**this; } - - bool operator==(const PhsfInputGen& other) { - // Same prefix is assumed - return id_ == other.id_; - } - bool operator!=(const PhsfInputGen& other) { - // Same prefix is assumed - return id_ != other.id_; - } - - uint64_t id_; - std::pair val_; -}; - -struct PhsfTypesAndSettings : public DefaultTypesAndSettings { - static constexpr bool kIsFilter = false; -}; -} // namespace - -TEST(RibbonTest, PhsfBasic) { - IMPORT_RIBBON_TYPES_AND_SETTINGS(PhsfTypesAndSettings); - IMPORT_RIBBON_IMPL_TYPES(PhsfTypesAndSettings); - - Index num_slots = 12800; - Index num_to_add = static_cast(num_slots / 1.02); - - PhsfInputGen begin("in", 0); - PhsfInputGen end("in", num_to_add); - - std::unique_ptr idata(new char[/*bytes*/ num_slots]); - InterleavedSoln isoln(idata.get(), /*bytes*/ num_slots); - SimpleSoln soln; - Hasher hasher; - - { - Banding banding; - ASSERT_TRUE(banding.ResetAndFindSeedToSolve(num_slots, begin, end)); - - soln.BackSubstFrom(banding); - isoln.BackSubstFrom(banding); - - hasher.SetOrdinalSeed(banding.GetOrdinalSeed()); - } - - for (PhsfInputGen cur = begin; cur != end; ++cur) { - ASSERT_EQ(cur->second, soln.PhsfQuery(cur->first, hasher)); - ASSERT_EQ(cur->second, isoln.PhsfQuery(cur->first, hasher)); - } -} - -// Not a real test, but a tool used to build APIs in ribbon_config.h -TYPED_TEST(RibbonTypeParamTest, FindOccupancy) { - IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam); - IMPORT_RIBBON_IMPL_TYPES(TypeParam); - using KeyGen = typename TypeParam::KeyGen; - - if (!FLAGS_find_occ) { - ROCKSDB_GTEST_BYPASS("Tool disabled during unit test runs"); - return; - } - - KeyGen cur(std::to_string(testing::UnitTest::GetInstance()->random_seed()), - 0); - - Banding banding; - Index num_slots = InterleavedSoln::RoundUpNumSlots(FLAGS_find_min_slots); - Index max_slots = InterleavedSoln::RoundUpNumSlots(FLAGS_find_max_slots); - while (num_slots <= max_slots) { - std::map rem_histogram; - std::map slot_histogram; - if (FLAGS_find_slot_occ) { - for (Index i = 0; i < kCoeffBits; ++i) { - slot_histogram[i] = 0; - slot_histogram[num_slots - 1 - i] = 0; - slot_histogram[num_slots / 2 - kCoeffBits / 2 + i] = 0; - } - } - uint64_t total_added = 0; - for (uint32_t i = 0; i < FLAGS_find_iters; ++i) { - banding.Reset(num_slots); - uint32_t j = 0; - KeyGen end = cur; - end += num_slots + num_slots / 10; - for (; cur != end; ++cur) { - if (banding.Add(*cur)) { - ++j; - } else { - break; - } - } - total_added += j; - for (auto& slot : slot_histogram) { - slot.second += banding.IsOccupied(slot.first); - } - - int32_t bucket = - static_cast(num_slots) - static_cast(j); - rem_histogram[bucket]++; - if (FLAGS_verbose) { - fprintf(stderr, "num_slots: %u i: %u / %u avg_overhead: %g\r", - static_cast(num_slots), static_cast(i), - static_cast(FLAGS_find_iters), - 1.0 * (i + 1) * num_slots / total_added); - } - } - if (FLAGS_verbose) { - fprintf(stderr, "\n"); - } - - uint32_t cumulative = 0; - - double p50_rem = 0; - double p95_rem = 0; - double p99_9_rem = 0; - - for (auto& h : rem_histogram) { - double before = 1.0 * cumulative / FLAGS_find_iters; - double not_after = 1.0 * (cumulative + h.second) / FLAGS_find_iters; - if (FLAGS_verbose) { - fprintf(stderr, "overhead: %g before: %g not_after: %g\n", - 1.0 * num_slots / (num_slots - h.first), before, not_after); - } - cumulative += h.second; - if (before < 0.5 && 0.5 <= not_after) { - // fake it with linear interpolation - double portion = (0.5 - before) / (not_after - before); - p50_rem = h.first + portion; - } else if (before < 0.95 && 0.95 <= not_after) { - // fake it with linear interpolation - double portion = (0.95 - before) / (not_after - before); - p95_rem = h.first + portion; - } else if (before < 0.999 && 0.999 <= not_after) { - // fake it with linear interpolation - double portion = (0.999 - before) / (not_after - before); - p99_9_rem = h.first + portion; - } - } - for (auto& slot : slot_histogram) { - fprintf(stderr, "slot[%u] occupied: %g\n", (unsigned)slot.first, - 1.0 * slot.second / FLAGS_find_iters); - } - - double mean_rem = - (1.0 * FLAGS_find_iters * num_slots - total_added) / FLAGS_find_iters; - fprintf( - stderr, - "num_slots: %u iters: %u mean_ovr: %g p50_ovr: %g p95_ovr: %g " - "p99.9_ovr: %g mean_rem: %g p50_rem: %g p95_rem: %g p99.9_rem: %g\n", - static_cast(num_slots), - static_cast(FLAGS_find_iters), - 1.0 * num_slots / (num_slots - mean_rem), - 1.0 * num_slots / (num_slots - p50_rem), - 1.0 * num_slots / (num_slots - p95_rem), - 1.0 * num_slots / (num_slots - p99_9_rem), mean_rem, p50_rem, p95_rem, - p99_9_rem); - - num_slots = std::max( - num_slots + 1, static_cast(num_slots * FLAGS_find_next_factor)); - num_slots = InterleavedSoln::RoundUpNumSlots(num_slots); - } -} - -// Not a real test, but a tool to understand Homogeneous Ribbon -// behavior (TODO: configuration APIs & tests) -TYPED_TEST(RibbonTypeParamTest, OptimizeHomogAtScale) { - IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam); - IMPORT_RIBBON_IMPL_TYPES(TypeParam); - using KeyGen = typename TypeParam::KeyGen; - - if (!FLAGS_optimize_homog) { - ROCKSDB_GTEST_BYPASS("Tool disabled during unit test runs"); - return; - } - - if (!TypeParam::kHomogeneous) { - ROCKSDB_GTEST_BYPASS("Only for Homogeneous Ribbon"); - return; - } - - KeyGen cur(std::to_string(testing::UnitTest::GetInstance()->random_seed()), - 0); - - Banding banding; - Index num_slots = SimpleSoln::RoundUpNumSlots(FLAGS_optimize_homog_slots); - banding.Reset(num_slots); - - // This and "band_ovr" is the "allocated overhead", or slots over added. - // It does not take into account FP rates. - double target_overhead = 1.20; - uint32_t num_added = 0; - - do { - do { - (void)banding.Add(*cur); - ++cur; - ++num_added; - } while (1.0 * num_slots / num_added > target_overhead); - - SimpleSoln soln; - soln.BackSubstFrom(banding); - - std::array fp_counts_by_cols; - fp_counts_by_cols.fill(0U); - for (uint32_t i = 0; i < FLAGS_optimize_homog_check; ++i) { - ResultRow r = soln.PhsfQuery(*cur, banding); - ++cur; - for (size_t j = 0; j < fp_counts_by_cols.size(); ++j) { - if ((r & 1) == 1) { - break; - } - fp_counts_by_cols[j]++; - r /= 2; - } - } - fprintf(stderr, "band_ovr: %g ", 1.0 * num_slots / num_added); - for (unsigned j = 0; j < fp_counts_by_cols.size(); ++j) { - double inv_fp_rate = - 1.0 * FLAGS_optimize_homog_check / fp_counts_by_cols[j]; - double equiv_cols = std::log(inv_fp_rate) * 1.4426950409; - // Overhead vs. information-theoretic minimum based on observed - // FP rate (subject to sampling error, especially for low FP rates) - double actual_overhead = - 1.0 * (j + 1) * num_slots / (equiv_cols * num_added); - fprintf(stderr, "ovr_%u: %g ", j + 1, actual_overhead); - } - fprintf(stderr, "\n"); - target_overhead -= FLAGS_optimize_homog_granularity; - } while (target_overhead > 1.0); -} - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); -#ifdef GFLAGS - ParseCommandLineFlags(&argc, &argv, true); -#endif // GFLAGS - return RUN_ALL_TESTS(); -} diff --git a/util/slice_test.cc b/util/slice_test.cc deleted file mode 100644 index 010ded3d8..000000000 --- a/util/slice_test.cc +++ /dev/null @@ -1,252 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "rocksdb/slice.h" - -#include - -#include "port/port.h" -#include "port/stack_trace.h" -#include "rocksdb/data_structure.h" -#include "rocksdb/types.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" - -namespace ROCKSDB_NAMESPACE { - -TEST(SliceTest, StringView) { - std::string s = "foo"; - std::string_view sv = s; - ASSERT_EQ(Slice(s), Slice(sv)); - ASSERT_EQ(Slice(s), Slice(std::move(sv))); -} - -// Use this to keep track of the cleanups that were actually performed -void Multiplier(void* arg1, void* arg2) { - int* res = reinterpret_cast(arg1); - int* num = reinterpret_cast(arg2); - *res *= *num; -} - -class PinnableSliceTest : public testing::Test { - public: - void AssertSameData(const std::string& expected, const PinnableSlice& slice) { - std::string got; - got.assign(slice.data(), slice.size()); - ASSERT_EQ(expected, got); - } -}; - -// Test that the external buffer is moved instead of being copied. -TEST_F(PinnableSliceTest, MoveExternalBuffer) { - Slice s("123"); - std::string buf; - PinnableSlice v1(&buf); - v1.PinSelf(s); - - PinnableSlice v2(std::move(v1)); - ASSERT_EQ(buf.data(), v2.data()); - ASSERT_EQ(&buf, v2.GetSelf()); - - PinnableSlice v3; - v3 = std::move(v2); - ASSERT_EQ(buf.data(), v3.data()); - ASSERT_EQ(&buf, v3.GetSelf()); -} - -TEST_F(PinnableSliceTest, Move) { - int n2 = 2; - int res = 1; - const std::string const_str1 = "123"; - const std::string const_str2 = "ABC"; - Slice slice1(const_str1); - Slice slice2(const_str2); - - { - // Test move constructor on a pinned slice. - res = 1; - PinnableSlice v1; - v1.PinSlice(slice1, Multiplier, &res, &n2); - PinnableSlice v2(std::move(v1)); - - // Since v1's Cleanable has been moved to v2, - // no cleanup should happen in Reset. - v1.Reset(); - ASSERT_EQ(1, res); - - AssertSameData(const_str1, v2); - } - // v2 is cleaned up. - ASSERT_EQ(2, res); - - { - // Test move constructor on an unpinned slice. - PinnableSlice v1; - v1.PinSelf(slice1); - PinnableSlice v2(std::move(v1)); - - AssertSameData(const_str1, v2); - } - - { - // Test move assignment from a pinned slice to - // another pinned slice. - res = 1; - PinnableSlice v1; - v1.PinSlice(slice1, Multiplier, &res, &n2); - PinnableSlice v2; - v2.PinSlice(slice2, Multiplier, &res, &n2); - v2 = std::move(v1); - - // v2's Cleanable will be Reset before moving - // anything from v1. - ASSERT_EQ(2, res); - // Since v1's Cleanable has been moved to v2, - // no cleanup should happen in Reset. - v1.Reset(); - ASSERT_EQ(2, res); - - AssertSameData(const_str1, v2); - } - // The Cleanable moved from v1 to v2 will be Reset. - ASSERT_EQ(4, res); - - { - // Test move assignment from a pinned slice to - // an unpinned slice. - res = 1; - PinnableSlice v1; - v1.PinSlice(slice1, Multiplier, &res, &n2); - PinnableSlice v2; - v2.PinSelf(slice2); - v2 = std::move(v1); - - // Since v1's Cleanable has been moved to v2, - // no cleanup should happen in Reset. - v1.Reset(); - ASSERT_EQ(1, res); - - AssertSameData(const_str1, v2); - } - // The Cleanable moved from v1 to v2 will be Reset. - ASSERT_EQ(2, res); - - { - // Test move assignment from an upinned slice to - // another unpinned slice. - PinnableSlice v1; - v1.PinSelf(slice1); - PinnableSlice v2; - v2.PinSelf(slice2); - v2 = std::move(v1); - - AssertSameData(const_str1, v2); - } - - { - // Test move assignment from an upinned slice to - // a pinned slice. - res = 1; - PinnableSlice v1; - v1.PinSelf(slice1); - PinnableSlice v2; - v2.PinSlice(slice2, Multiplier, &res, &n2); - v2 = std::move(v1); - - // v2's Cleanable will be Reset before moving - // anything from v1. - ASSERT_EQ(2, res); - - AssertSameData(const_str1, v2); - } - // No Cleanable is moved from v1 to v2, so no more cleanup. - ASSERT_EQ(2, res); -} - -// ***************************************************************** // -// Unit test for SmallEnumSet -class SmallEnumSetTest : public testing::Test { - public: - SmallEnumSetTest() {} - ~SmallEnumSetTest() {} -}; - -TEST_F(SmallEnumSetTest, SmallEnumSetTest1) { - FileTypeSet fs; // based on a legacy enum type - ASSERT_TRUE(fs.empty()); - ASSERT_TRUE(fs.Add(FileType::kIdentityFile)); - ASSERT_FALSE(fs.empty()); - ASSERT_FALSE(fs.Add(FileType::kIdentityFile)); - ASSERT_TRUE(fs.Add(FileType::kInfoLogFile)); - ASSERT_TRUE(fs.Contains(FileType::kIdentityFile)); - ASSERT_FALSE(fs.Contains(FileType::kDBLockFile)); - ASSERT_FALSE(fs.empty()); - ASSERT_FALSE(fs.Remove(FileType::kDBLockFile)); - ASSERT_TRUE(fs.Remove(FileType::kIdentityFile)); - ASSERT_FALSE(fs.empty()); - ASSERT_TRUE(fs.Remove(FileType::kInfoLogFile)); - ASSERT_TRUE(fs.empty()); -} - -namespace { -enum class MyEnumClass { A, B, C }; -} // namespace - -using MyEnumClassSet = SmallEnumSet; - -TEST_F(SmallEnumSetTest, SmallEnumSetTest2) { - MyEnumClassSet s; // based on an enum class type - ASSERT_TRUE(s.Add(MyEnumClass::A)); - ASSERT_TRUE(s.Contains(MyEnumClass::A)); - ASSERT_FALSE(s.Contains(MyEnumClass::B)); - ASSERT_TRUE(s.With(MyEnumClass::B).Contains(MyEnumClass::B)); - ASSERT_TRUE(s.With(MyEnumClass::A).Contains(MyEnumClass::A)); - ASSERT_FALSE(s.Contains(MyEnumClass::B)); - ASSERT_FALSE(s.Without(MyEnumClass::A).Contains(MyEnumClass::A)); - ASSERT_FALSE( - s.With(MyEnumClass::B).Without(MyEnumClass::B).Contains(MyEnumClass::B)); - ASSERT_TRUE( - s.Without(MyEnumClass::B).With(MyEnumClass::B).Contains(MyEnumClass::B)); - ASSERT_TRUE(s.Contains(MyEnumClass::A)); - - const MyEnumClassSet cs = s; - ASSERT_TRUE(cs.Contains(MyEnumClass::A)); - ASSERT_EQ(cs, MyEnumClassSet{MyEnumClass::A}); - ASSERT_EQ(cs.Without(MyEnumClass::A), MyEnumClassSet{}); - ASSERT_EQ(cs, MyEnumClassSet::All().Without(MyEnumClass::B, MyEnumClass::C)); - ASSERT_EQ(cs.With(MyEnumClass::B, MyEnumClass::C), MyEnumClassSet::All()); - ASSERT_EQ( - MyEnumClassSet::All(), - MyEnumClassSet{}.With(MyEnumClass::A, MyEnumClass::B, MyEnumClass::C)); - ASSERT_NE(cs, MyEnumClassSet{MyEnumClass::B}); - ASSERT_NE(cs, MyEnumClassSet::All()); - - int count = 0; - for (MyEnumClass e : cs) { - ASSERT_EQ(e, MyEnumClass::A); - ++count; - } - ASSERT_EQ(count, 1); - - count = 0; - for (MyEnumClass e : MyEnumClassSet::All().Without(MyEnumClass::B)) { - ASSERT_NE(e, MyEnumClass::B); - ++count; - } - ASSERT_EQ(count, 2); - - for (MyEnumClass e : MyEnumClassSet{}) { - (void)e; - assert(false); - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/util/slice_transform_test.cc b/util/slice_transform_test.cc deleted file mode 100644 index 64ac8bb1f..000000000 --- a/util/slice_transform_test.cc +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "rocksdb/slice_transform.h" - -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "rocksdb/filter_policy.h" -#include "rocksdb/statistics.h" -#include "rocksdb/table.h" -#include "test_util/testharness.h" - -namespace ROCKSDB_NAMESPACE { - -class SliceTransformTest : public testing::Test {}; - -TEST_F(SliceTransformTest, CapPrefixTransform) { - std::string s; - s = "abcdefge"; - - std::unique_ptr transform; - - transform.reset(NewCappedPrefixTransform(6)); - ASSERT_EQ(transform->Transform(s).ToString(), "abcdef"); - ASSERT_TRUE(transform->SameResultWhenAppended("123456")); - ASSERT_TRUE(transform->SameResultWhenAppended("1234567")); - ASSERT_TRUE(!transform->SameResultWhenAppended("12345")); - - transform.reset(NewCappedPrefixTransform(8)); - ASSERT_EQ(transform->Transform(s).ToString(), "abcdefge"); - - transform.reset(NewCappedPrefixTransform(10)); - ASSERT_EQ(transform->Transform(s).ToString(), "abcdefge"); - - transform.reset(NewCappedPrefixTransform(0)); - ASSERT_EQ(transform->Transform(s).ToString(), ""); - - transform.reset(NewCappedPrefixTransform(0)); - ASSERT_EQ(transform->Transform("").ToString(), ""); -} - -class SliceTransformDBTest : public testing::Test { - private: - std::string dbname_; - Env* env_; - DB* db_; - - public: - SliceTransformDBTest() : env_(Env::Default()), db_(nullptr) { - dbname_ = test::PerThreadDBPath("slice_transform_db_test"); - EXPECT_OK(DestroyDB(dbname_, last_options_)); - } - - ~SliceTransformDBTest() override { - delete db_; - EXPECT_OK(DestroyDB(dbname_, last_options_)); - } - - DB* db() { return db_; } - - // Return the current option configuration. - Options* GetOptions() { return &last_options_; } - - void DestroyAndReopen() { - // Destroy using last options - Destroy(); - ASSERT_OK(TryReopen()); - } - - void Destroy() { - delete db_; - db_ = nullptr; - ASSERT_OK(DestroyDB(dbname_, last_options_)); - } - - Status TryReopen() { - delete db_; - db_ = nullptr; - last_options_.create_if_missing = true; - - return DB::Open(last_options_, dbname_, &db_); - } - - Options last_options_; -}; - -namespace { -uint64_t TestGetTickerCount(const Options& options, Tickers ticker_type) { - return options.statistics->getTickerCount(ticker_type); -} -} // namespace - -TEST_F(SliceTransformDBTest, CapPrefix) { - last_options_.prefix_extractor.reset(NewCappedPrefixTransform(8)); - last_options_.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); - bbto.whole_key_filtering = false; - last_options_.table_factory.reset(NewBlockBasedTableFactory(bbto)); - ASSERT_OK(TryReopen()); - - ReadOptions ro; - FlushOptions fo; - WriteOptions wo; - - ASSERT_OK(db()->Put(wo, "barbarbar", "foo")); - ASSERT_OK(db()->Put(wo, "barbarbar2", "foo2")); - ASSERT_OK(db()->Put(wo, "foo", "bar")); - ASSERT_OK(db()->Put(wo, "foo3", "bar3")); - ASSERT_OK(db()->Flush(fo)); - - std::unique_ptr iter(db()->NewIterator(ro)); - - iter->Seek("foo"); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->value().ToString(), "bar"); - ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 0U); - - iter->Seek("foo2"); - ASSERT_OK(iter->status()); - ASSERT_TRUE(!iter->Valid()); - ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 1U); - - iter->Seek("barbarbar"); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->value().ToString(), "foo"); - ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 1U); - - iter->Seek("barfoofoo"); - ASSERT_OK(iter->status()); - ASSERT_TRUE(!iter->Valid()); - ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 2U); - - iter->Seek("foobarbar"); - ASSERT_OK(iter->status()); - ASSERT_TRUE(!iter->Valid()); - ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 3U); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/util/thread_list_test.cc b/util/thread_list_test.cc deleted file mode 100644 index af4e62355..000000000 --- a/util/thread_list_test.cc +++ /dev/null @@ -1,360 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include -#include - -#include "monitoring/thread_status_updater.h" -#include "rocksdb/db.h" -#include "test_util/testharness.h" - -#ifdef ROCKSDB_USING_THREAD_STATUS - -namespace ROCKSDB_NAMESPACE { - -class SimulatedBackgroundTask { - public: - SimulatedBackgroundTask( - const void* db_key, const std::string& db_name, const void* cf_key, - const std::string& cf_name, - const ThreadStatus::OperationType operation_type = - ThreadStatus::OP_UNKNOWN, - const ThreadStatus::StateType state_type = ThreadStatus::STATE_UNKNOWN) - : db_key_(db_key), - db_name_(db_name), - cf_key_(cf_key), - cf_name_(cf_name), - operation_type_(operation_type), - state_type_(state_type), - should_run_(true), - running_count_(0) { - Env::Default()->GetThreadStatusUpdater()->NewColumnFamilyInfo( - db_key_, db_name_, cf_key_, cf_name_); - } - - ~SimulatedBackgroundTask() { - Env::Default()->GetThreadStatusUpdater()->EraseDatabaseInfo(db_key_); - } - - void Run() { - std::unique_lock l(mutex_); - running_count_++; - bg_cv_.notify_all(); - Env::Default()->GetThreadStatusUpdater()->SetColumnFamilyInfoKey(cf_key_); - Env::Default()->GetThreadStatusUpdater()->SetThreadOperation( - operation_type_); - Env::Default()->GetThreadStatusUpdater()->SetThreadState(state_type_); - while (should_run_) { - bg_cv_.wait(l); - } - Env::Default()->GetThreadStatusUpdater()->ClearThreadState(); - Env::Default()->GetThreadStatusUpdater()->ClearThreadOperation(); - Env::Default()->GetThreadStatusUpdater()->SetColumnFamilyInfoKey(nullptr); - running_count_--; - bg_cv_.notify_all(); - } - - void FinishAllTasks() { - std::unique_lock l(mutex_); - should_run_ = false; - bg_cv_.notify_all(); - } - - void WaitUntilScheduled(int job_count) { - std::unique_lock l(mutex_); - while (running_count_ < job_count) { - bg_cv_.wait(l); - } - } - - void WaitUntilDone() { - std::unique_lock l(mutex_); - while (running_count_ > 0) { - bg_cv_.wait(l); - } - } - - static void DoSimulatedTask(void* arg) { - reinterpret_cast(arg)->Run(); - } - - private: - const void* db_key_; - const std::string db_name_; - const void* cf_key_; - const std::string cf_name_; - const ThreadStatus::OperationType operation_type_; - const ThreadStatus::StateType state_type_; - std::mutex mutex_; - std::condition_variable bg_cv_; - bool should_run_; - std::atomic running_count_; -}; - -class ThreadListTest : public testing::Test { - public: - ThreadListTest() {} -}; - -TEST_F(ThreadListTest, GlobalTables) { - // verify the global tables for operations and states are properly indexed. - for (int type = 0; type != ThreadStatus::NUM_OP_TYPES; ++type) { - ASSERT_EQ(global_operation_table[type].type, type); - ASSERT_EQ( - global_operation_table[type].name, - ThreadStatus::GetOperationName(ThreadStatus::OperationType(type))); - } - - for (int type = 0; type != ThreadStatus::NUM_STATE_TYPES; ++type) { - ASSERT_EQ(global_state_table[type].type, type); - ASSERT_EQ(global_state_table[type].name, - ThreadStatus::GetStateName(ThreadStatus::StateType(type))); - } - - for (int stage = 0; stage != ThreadStatus::NUM_OP_STAGES; ++stage) { - ASSERT_EQ(global_op_stage_table[stage].stage, stage); - ASSERT_EQ(global_op_stage_table[stage].name, - ThreadStatus::GetOperationStageName( - ThreadStatus::OperationStage(stage))); - } -} - -TEST_F(ThreadListTest, SimpleColumnFamilyInfoTest) { - Env* env = Env::Default(); - const int kHighPriorityThreads = 3; - const int kLowPriorityThreads = 5; - const int kSimulatedHighPriThreads = kHighPriorityThreads - 1; - const int kSimulatedLowPriThreads = kLowPriorityThreads / 3; - const int kDelayMicros = 1000000; - env->SetBackgroundThreads(kHighPriorityThreads, Env::HIGH); - env->SetBackgroundThreads(kLowPriorityThreads, Env::LOW); - // Wait 1 second so that threads start - Env::Default()->SleepForMicroseconds(kDelayMicros); - SimulatedBackgroundTask running_task(reinterpret_cast(1234), "running", - reinterpret_cast(5678), - "pikachu"); - - for (int test = 0; test < kSimulatedHighPriThreads; ++test) { - env->Schedule(&SimulatedBackgroundTask::DoSimulatedTask, &running_task, - Env::Priority::HIGH); - } - - for (int test = 0; test < kSimulatedLowPriThreads; ++test) { - env->Schedule(&SimulatedBackgroundTask::DoSimulatedTask, &running_task, - Env::Priority::LOW); - } - running_task.WaitUntilScheduled(kSimulatedHighPriThreads + - kSimulatedLowPriThreads); - // We can only reserve limited number of waiting threads - ASSERT_EQ(kHighPriorityThreads - kSimulatedHighPriThreads, - env->ReserveThreads(kHighPriorityThreads, Env::Priority::HIGH)); - ASSERT_EQ(kLowPriorityThreads - kSimulatedLowPriThreads, - env->ReserveThreads(kLowPriorityThreads, Env::Priority::LOW)); - - // Reservation shall not affect the existing thread list - std::vector thread_list; - - // Verify the number of running threads in each pool. - ASSERT_OK(env->GetThreadList(&thread_list)); - int running_count[ThreadStatus::NUM_THREAD_TYPES] = {0}; - for (auto thread_status : thread_list) { - if (thread_status.cf_name == "pikachu" && - thread_status.db_name == "running") { - running_count[thread_status.thread_type]++; - } - } - // Cannot reserve more threads - ASSERT_EQ(0, env->ReserveThreads(kHighPriorityThreads, Env::Priority::HIGH)); - ASSERT_EQ(0, env->ReserveThreads(kLowPriorityThreads, Env::Priority::LOW)); - - ASSERT_EQ(running_count[ThreadStatus::HIGH_PRIORITY], - kSimulatedHighPriThreads); - ASSERT_EQ(running_count[ThreadStatus::LOW_PRIORITY], kSimulatedLowPriThreads); - ASSERT_EQ(running_count[ThreadStatus::USER], 0); - - running_task.FinishAllTasks(); - running_task.WaitUntilDone(); - - ASSERT_EQ(kHighPriorityThreads - kSimulatedHighPriThreads, - env->ReleaseThreads(kHighPriorityThreads, Env::Priority::HIGH)); - ASSERT_EQ(kLowPriorityThreads - kSimulatedLowPriThreads, - env->ReleaseThreads(kLowPriorityThreads, Env::Priority::LOW)); - // Verify none of the threads are running - ASSERT_OK(env->GetThreadList(&thread_list)); - - for (int i = 0; i < ThreadStatus::NUM_THREAD_TYPES; ++i) { - running_count[i] = 0; - } - for (auto thread_status : thread_list) { - if (thread_status.cf_name == "pikachu" && - thread_status.db_name == "running") { - running_count[thread_status.thread_type]++; - } - } - - ASSERT_EQ(running_count[ThreadStatus::HIGH_PRIORITY], 0); - ASSERT_EQ(running_count[ThreadStatus::LOW_PRIORITY], 0); - ASSERT_EQ(running_count[ThreadStatus::USER], 0); -} - -namespace { -void UpdateStatusCounts(const std::vector& thread_list, - int operation_counts[], int state_counts[]) { - for (auto thread_status : thread_list) { - operation_counts[thread_status.operation_type]++; - state_counts[thread_status.state_type]++; - } -} - -void VerifyAndResetCounts(const int correct_counts[], int collected_counts[], - int size) { - for (int i = 0; i < size; ++i) { - ASSERT_EQ(collected_counts[i], correct_counts[i]); - collected_counts[i] = 0; - } -} - -void UpdateCount(int operation_counts[], int from_event, int to_event, - int amount) { - operation_counts[from_event] -= amount; - operation_counts[to_event] += amount; -} -} // namespace - -TEST_F(ThreadListTest, SimpleEventTest) { - Env* env = Env::Default(); - - // simulated tasks - const int kFlushWriteTasks = 3; - SimulatedBackgroundTask flush_write_task( - reinterpret_cast(1234), "running", reinterpret_cast(5678), - "pikachu", ThreadStatus::OP_FLUSH); - - const int kCompactionWriteTasks = 4; - SimulatedBackgroundTask compaction_write_task( - reinterpret_cast(1234), "running", reinterpret_cast(5678), - "pikachu", ThreadStatus::OP_COMPACTION); - - const int kCompactionReadTasks = 5; - SimulatedBackgroundTask compaction_read_task( - reinterpret_cast(1234), "running", reinterpret_cast(5678), - "pikachu", ThreadStatus::OP_COMPACTION); - - const int kCompactionWaitTasks = 6; - SimulatedBackgroundTask compaction_wait_task( - reinterpret_cast(1234), "running", reinterpret_cast(5678), - "pikachu", ThreadStatus::OP_COMPACTION); - - // setup right answers - int correct_operation_counts[ThreadStatus::NUM_OP_TYPES] = {0}; - correct_operation_counts[ThreadStatus::OP_FLUSH] = kFlushWriteTasks; - correct_operation_counts[ThreadStatus::OP_COMPACTION] = - kCompactionWriteTasks + kCompactionReadTasks + kCompactionWaitTasks; - - env->SetBackgroundThreads(correct_operation_counts[ThreadStatus::OP_FLUSH], - Env::HIGH); - env->SetBackgroundThreads( - correct_operation_counts[ThreadStatus::OP_COMPACTION], Env::LOW); - - // schedule the simulated tasks - for (int t = 0; t < kFlushWriteTasks; ++t) { - env->Schedule(&SimulatedBackgroundTask::DoSimulatedTask, &flush_write_task, - Env::Priority::HIGH); - } - flush_write_task.WaitUntilScheduled(kFlushWriteTasks); - - for (int t = 0; t < kCompactionWriteTasks; ++t) { - env->Schedule(&SimulatedBackgroundTask::DoSimulatedTask, - &compaction_write_task, Env::Priority::LOW); - } - compaction_write_task.WaitUntilScheduled(kCompactionWriteTasks); - - for (int t = 0; t < kCompactionReadTasks; ++t) { - env->Schedule(&SimulatedBackgroundTask::DoSimulatedTask, - &compaction_read_task, Env::Priority::LOW); - } - compaction_read_task.WaitUntilScheduled(kCompactionReadTasks); - - for (int t = 0; t < kCompactionWaitTasks; ++t) { - env->Schedule(&SimulatedBackgroundTask::DoSimulatedTask, - &compaction_wait_task, Env::Priority::LOW); - } - compaction_wait_task.WaitUntilScheduled(kCompactionWaitTasks); - - // verify the thread-status - int operation_counts[ThreadStatus::NUM_OP_TYPES] = {0}; - int state_counts[ThreadStatus::NUM_STATE_TYPES] = {0}; - - std::vector thread_list; - ASSERT_OK(env->GetThreadList(&thread_list)); - UpdateStatusCounts(thread_list, operation_counts, state_counts); - VerifyAndResetCounts(correct_operation_counts, operation_counts, - ThreadStatus::NUM_OP_TYPES); - - // terminate compaction-wait tasks and see if the thread-status - // reflects this update - compaction_wait_task.FinishAllTasks(); - compaction_wait_task.WaitUntilDone(); - UpdateCount(correct_operation_counts, ThreadStatus::OP_COMPACTION, - ThreadStatus::OP_UNKNOWN, kCompactionWaitTasks); - - ASSERT_OK(env->GetThreadList(&thread_list)); - UpdateStatusCounts(thread_list, operation_counts, state_counts); - VerifyAndResetCounts(correct_operation_counts, operation_counts, - ThreadStatus::NUM_OP_TYPES); - - // terminate flush-write tasks and see if the thread-status - // reflects this update - flush_write_task.FinishAllTasks(); - flush_write_task.WaitUntilDone(); - UpdateCount(correct_operation_counts, ThreadStatus::OP_FLUSH, - ThreadStatus::OP_UNKNOWN, kFlushWriteTasks); - - ASSERT_OK(env->GetThreadList(&thread_list)); - UpdateStatusCounts(thread_list, operation_counts, state_counts); - VerifyAndResetCounts(correct_operation_counts, operation_counts, - ThreadStatus::NUM_OP_TYPES); - - // terminate compaction-write tasks and see if the thread-status - // reflects this update - compaction_write_task.FinishAllTasks(); - compaction_write_task.WaitUntilDone(); - UpdateCount(correct_operation_counts, ThreadStatus::OP_COMPACTION, - ThreadStatus::OP_UNKNOWN, kCompactionWriteTasks); - - ASSERT_OK(env->GetThreadList(&thread_list)); - UpdateStatusCounts(thread_list, operation_counts, state_counts); - VerifyAndResetCounts(correct_operation_counts, operation_counts, - ThreadStatus::NUM_OP_TYPES); - - // terminate compaction-write tasks and see if the thread-status - // reflects this update - compaction_read_task.FinishAllTasks(); - compaction_read_task.WaitUntilDone(); - UpdateCount(correct_operation_counts, ThreadStatus::OP_COMPACTION, - ThreadStatus::OP_UNKNOWN, kCompactionReadTasks); - - ASSERT_OK(env->GetThreadList(&thread_list)); - UpdateStatusCounts(thread_list, operation_counts, state_counts); - VerifyAndResetCounts(correct_operation_counts, operation_counts, - ThreadStatus::NUM_OP_TYPES); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - -#else - -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - return 0; -} - -#endif // ROCKSDB_USING_THREAD_STATUS diff --git a/util/thread_local_test.cc b/util/thread_local_test.cc deleted file mode 100644 index 3d12fe83a..000000000 --- a/util/thread_local_test.cc +++ /dev/null @@ -1,578 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "util/thread_local.h" - -#include -#include -#include - -#include "port/port.h" -#include "rocksdb/env.h" -#include "test_util/sync_point.h" -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "util/autovector.h" - -namespace ROCKSDB_NAMESPACE { - -class ThreadLocalTest : public testing::Test { - public: - ThreadLocalTest() : env_(Env::Default()) {} - - Env* env_; -}; - -namespace { - -struct Params { - Params(port::Mutex* m, port::CondVar* c, int* u, int n, - UnrefHandler handler = nullptr) - : mu(m), - cv(c), - unref(u), - total(n), - started(0), - completed(0), - doWrite(false), - tls1(handler), - tls2(nullptr) {} - - port::Mutex* mu; - port::CondVar* cv; - int* unref; - int total; - int started; - int completed; - bool doWrite; - ThreadLocalPtr tls1; - ThreadLocalPtr* tls2; -}; - -class IDChecker : public ThreadLocalPtr { - public: - static uint32_t PeekId() { return TEST_PeekId(); } -}; - -} // anonymous namespace - -// Suppress false positive clang analyzer warnings. -#ifndef __clang_analyzer__ -TEST_F(ThreadLocalTest, UniqueIdTest) { - port::Mutex mu; - port::CondVar cv(&mu); - - uint32_t base_id = IDChecker::PeekId(); - // New ThreadLocal instance bumps id by 1 - { - // Id used 0 - Params p1(&mu, &cv, nullptr, 1u); - ASSERT_EQ(IDChecker::PeekId(), base_id + 1u); - // Id used 1 - Params p2(&mu, &cv, nullptr, 1u); - ASSERT_EQ(IDChecker::PeekId(), base_id + 2u); - // Id used 2 - Params p3(&mu, &cv, nullptr, 1u); - ASSERT_EQ(IDChecker::PeekId(), base_id + 3u); - // Id used 3 - Params p4(&mu, &cv, nullptr, 1u); - ASSERT_EQ(IDChecker::PeekId(), base_id + 4u); - } - // id 3, 2, 1, 0 are in the free queue in order - ASSERT_EQ(IDChecker::PeekId(), base_id + 0u); - - // pick up 0 - Params p1(&mu, &cv, nullptr, 1u); - ASSERT_EQ(IDChecker::PeekId(), base_id + 1u); - // pick up 1 - Params* p2 = new Params(&mu, &cv, nullptr, 1u); - ASSERT_EQ(IDChecker::PeekId(), base_id + 2u); - // pick up 2 - Params p3(&mu, &cv, nullptr, 1u); - ASSERT_EQ(IDChecker::PeekId(), base_id + 3u); - // return up 1 - delete p2; - ASSERT_EQ(IDChecker::PeekId(), base_id + 1u); - // Now we have 3, 1 in queue - // pick up 1 - Params p4(&mu, &cv, nullptr, 1u); - ASSERT_EQ(IDChecker::PeekId(), base_id + 3u); - // pick up 3 - Params p5(&mu, &cv, nullptr, 1u); - // next new id - ASSERT_EQ(IDChecker::PeekId(), base_id + 4u); - // After exit, id sequence in queue: - // 3, 1, 2, 0 -} -#endif // __clang_analyzer__ - -TEST_F(ThreadLocalTest, SequentialReadWriteTest) { - // global id list carries over 3, 1, 2, 0 - uint32_t base_id = IDChecker::PeekId(); - - port::Mutex mu; - port::CondVar cv(&mu); - Params p(&mu, &cv, nullptr, 1); - ThreadLocalPtr tls2; - p.tls2 = &tls2; - - ASSERT_GT(IDChecker::PeekId(), base_id); - base_id = IDChecker::PeekId(); - - auto func = [](Params* ptr) { - Params& params = *ptr; - ASSERT_TRUE(params.tls1.Get() == nullptr); - params.tls1.Reset(reinterpret_cast(1)); - ASSERT_TRUE(params.tls1.Get() == reinterpret_cast(1)); - params.tls1.Reset(reinterpret_cast(2)); - ASSERT_TRUE(params.tls1.Get() == reinterpret_cast(2)); - - ASSERT_TRUE(params.tls2->Get() == nullptr); - params.tls2->Reset(reinterpret_cast(1)); - ASSERT_TRUE(params.tls2->Get() == reinterpret_cast(1)); - params.tls2->Reset(reinterpret_cast(2)); - ASSERT_TRUE(params.tls2->Get() == reinterpret_cast(2)); - - params.mu->Lock(); - ++(params.completed); - params.cv->SignalAll(); - params.mu->Unlock(); - }; - - for (int iter = 0; iter < 1024; ++iter) { - ASSERT_EQ(IDChecker::PeekId(), base_id); - // Another new thread, read/write should not see value from previous thread - env_->StartThreadTyped(func, &p); - - mu.Lock(); - while (p.completed != iter + 1) { - cv.Wait(); - } - mu.Unlock(); - ASSERT_EQ(IDChecker::PeekId(), base_id); - } -} - -TEST_F(ThreadLocalTest, ConcurrentReadWriteTest) { - // global id list carries over 3, 1, 2, 0 - uint32_t base_id = IDChecker::PeekId(); - - ThreadLocalPtr tls2; - port::Mutex mu1; - port::CondVar cv1(&mu1); - Params p1(&mu1, &cv1, nullptr, 16); - p1.tls2 = &tls2; - - port::Mutex mu2; - port::CondVar cv2(&mu2); - Params p2(&mu2, &cv2, nullptr, 16); - p2.doWrite = true; - p2.tls2 = &tls2; - - auto func = [](void* ptr) { - auto& p = *static_cast(ptr); - - p.mu->Lock(); - // Size_T switches size along with the ptr size - // we want to cast to. - size_t own = ++(p.started); - p.cv->SignalAll(); - while (p.started != p.total) { - p.cv->Wait(); - } - p.mu->Unlock(); - - // Let write threads write a different value from the read threads - if (p.doWrite) { - own += 8192; - } - - ASSERT_TRUE(p.tls1.Get() == nullptr); - ASSERT_TRUE(p.tls2->Get() == nullptr); - - auto* env = Env::Default(); - auto start = env->NowMicros(); - - p.tls1.Reset(reinterpret_cast(own)); - p.tls2->Reset(reinterpret_cast(own + 1)); - // Loop for 1 second - while (env->NowMicros() - start < 1000 * 1000) { - for (int iter = 0; iter < 100000; ++iter) { - ASSERT_TRUE(p.tls1.Get() == reinterpret_cast(own)); - ASSERT_TRUE(p.tls2->Get() == reinterpret_cast(own + 1)); - if (p.doWrite) { - p.tls1.Reset(reinterpret_cast(own)); - p.tls2->Reset(reinterpret_cast(own + 1)); - } - } - } - - p.mu->Lock(); - ++(p.completed); - p.cv->SignalAll(); - p.mu->Unlock(); - }; - - // Initiate 2 instnaces: one keeps writing and one keeps reading. - // The read instance should not see data from the write instance. - // Each thread local copy of the value are also different from each - // other. - for (int th = 0; th < p1.total; ++th) { - env_->StartThreadTyped(func, &p1); - } - for (int th = 0; th < p2.total; ++th) { - env_->StartThreadTyped(func, &p2); - } - - mu1.Lock(); - while (p1.completed != p1.total) { - cv1.Wait(); - } - mu1.Unlock(); - - mu2.Lock(); - while (p2.completed != p2.total) { - cv2.Wait(); - } - mu2.Unlock(); - - ASSERT_EQ(IDChecker::PeekId(), base_id + 3u); -} - -TEST_F(ThreadLocalTest, Unref) { - auto unref = [](void* ptr) { - auto& p = *static_cast(ptr); - p.mu->Lock(); - ++(*p.unref); - p.mu->Unlock(); - }; - - // Case 0: no unref triggered if ThreadLocalPtr is never accessed - auto func0 = [](Params* ptr) { - auto& p = *ptr; - p.mu->Lock(); - ++(p.started); - p.cv->SignalAll(); - while (p.started != p.total) { - p.cv->Wait(); - } - p.mu->Unlock(); - }; - - for (int th = 1; th <= 128; th += th) { - port::Mutex mu; - port::CondVar cv(&mu); - int unref_count = 0; - Params p(&mu, &cv, &unref_count, th, unref); - - for (int i = 0; i < p.total; ++i) { - env_->StartThreadTyped(func0, &p); - } - env_->WaitForJoin(); - ASSERT_EQ(unref_count, 0); - } - - // Case 1: unref triggered by thread exit - auto func1 = [](Params* ptr) { - auto& p = *ptr; - - p.mu->Lock(); - ++(p.started); - p.cv->SignalAll(); - while (p.started != p.total) { - p.cv->Wait(); - } - p.mu->Unlock(); - - ASSERT_TRUE(p.tls1.Get() == nullptr); - ASSERT_TRUE(p.tls2->Get() == nullptr); - - p.tls1.Reset(ptr); - p.tls2->Reset(ptr); - - p.tls1.Reset(ptr); - p.tls2->Reset(ptr); - }; - - for (int th = 1; th <= 128; th += th) { - port::Mutex mu; - port::CondVar cv(&mu); - int unref_count = 0; - ThreadLocalPtr tls2(unref); - Params p(&mu, &cv, &unref_count, th, unref); - p.tls2 = &tls2; - - for (int i = 0; i < p.total; ++i) { - env_->StartThreadTyped(func1, &p); - } - - env_->WaitForJoin(); - - // N threads x 2 ThreadLocal instance cleanup on thread exit - ASSERT_EQ(unref_count, 2 * p.total); - } - - // Case 2: unref triggered by ThreadLocal instance destruction - auto func2 = [](Params* ptr) { - auto& p = *ptr; - - p.mu->Lock(); - ++(p.started); - p.cv->SignalAll(); - while (p.started != p.total) { - p.cv->Wait(); - } - p.mu->Unlock(); - - ASSERT_TRUE(p.tls1.Get() == nullptr); - ASSERT_TRUE(p.tls2->Get() == nullptr); - - p.tls1.Reset(ptr); - p.tls2->Reset(ptr); - - p.tls1.Reset(ptr); - p.tls2->Reset(ptr); - - p.mu->Lock(); - ++(p.completed); - p.cv->SignalAll(); - - // Waiting for instruction to exit thread - while (p.completed != 0) { - p.cv->Wait(); - } - p.mu->Unlock(); - }; - - for (int th = 1; th <= 128; th += th) { - port::Mutex mu; - port::CondVar cv(&mu); - int unref_count = 0; - Params p(&mu, &cv, &unref_count, th, unref); - p.tls2 = new ThreadLocalPtr(unref); - - for (int i = 0; i < p.total; ++i) { - env_->StartThreadTyped(func2, &p); - } - - // Wait for all threads to finish using Params - mu.Lock(); - while (p.completed != p.total) { - cv.Wait(); - } - mu.Unlock(); - - // Now destroy one ThreadLocal instance - delete p.tls2; - p.tls2 = nullptr; - // instance destroy for N threads - ASSERT_EQ(unref_count, p.total); - - // Signal to exit - mu.Lock(); - p.completed = 0; - cv.SignalAll(); - mu.Unlock(); - env_->WaitForJoin(); - // additional N threads exit unref for the left instance - ASSERT_EQ(unref_count, 2 * p.total); - } -} - -TEST_F(ThreadLocalTest, Swap) { - ThreadLocalPtr tls; - tls.Reset(reinterpret_cast(1)); - ASSERT_EQ(reinterpret_cast(tls.Swap(nullptr)), 1); - ASSERT_TRUE(tls.Swap(reinterpret_cast(2)) == nullptr); - ASSERT_EQ(reinterpret_cast(tls.Get()), 2); - ASSERT_EQ(reinterpret_cast(tls.Swap(reinterpret_cast(3))), 2); -} - -TEST_F(ThreadLocalTest, Scrape) { - auto unref = [](void* ptr) { - auto& p = *static_cast(ptr); - p.mu->Lock(); - ++(*p.unref); - p.mu->Unlock(); - }; - - auto func = [](void* ptr) { - auto& p = *static_cast(ptr); - - ASSERT_TRUE(p.tls1.Get() == nullptr); - ASSERT_TRUE(p.tls2->Get() == nullptr); - - p.tls1.Reset(ptr); - p.tls2->Reset(ptr); - - p.tls1.Reset(ptr); - p.tls2->Reset(ptr); - - p.mu->Lock(); - ++(p.completed); - p.cv->SignalAll(); - - // Waiting for instruction to exit thread - while (p.completed != 0) { - p.cv->Wait(); - } - p.mu->Unlock(); - }; - - for (int th = 1; th <= 128; th += th) { - port::Mutex mu; - port::CondVar cv(&mu); - int unref_count = 0; - Params p(&mu, &cv, &unref_count, th, unref); - p.tls2 = new ThreadLocalPtr(unref); - - for (int i = 0; i < p.total; ++i) { - env_->StartThreadTyped(func, &p); - } - - // Wait for all threads to finish using Params - mu.Lock(); - while (p.completed != p.total) { - cv.Wait(); - } - mu.Unlock(); - - ASSERT_EQ(unref_count, 0); - - // Scrape all thread local data. No unref at thread - // exit or ThreadLocalPtr destruction - autovector ptrs; - p.tls1.Scrape(&ptrs, nullptr); - p.tls2->Scrape(&ptrs, nullptr); - delete p.tls2; - // Signal to exit - mu.Lock(); - p.completed = 0; - cv.SignalAll(); - mu.Unlock(); - env_->WaitForJoin(); - - ASSERT_EQ(unref_count, 0); - } -} - -TEST_F(ThreadLocalTest, Fold) { - auto unref = [](void* ptr) { - delete static_cast*>(ptr); - }; - static const int kNumThreads = 16; - static const int kItersPerThread = 10; - port::Mutex mu; - port::CondVar cv(&mu); - Params params(&mu, &cv, nullptr, kNumThreads, unref); - auto func = [](void* ptr) { - auto& p = *static_cast(ptr); - ASSERT_TRUE(p.tls1.Get() == nullptr); - p.tls1.Reset(new std::atomic(0)); - - for (int i = 0; i < kItersPerThread; ++i) { - static_cast*>(p.tls1.Get())->fetch_add(1); - } - - p.mu->Lock(); - ++(p.completed); - p.cv->SignalAll(); - - // Waiting for instruction to exit thread - while (p.completed != 0) { - p.cv->Wait(); - } - p.mu->Unlock(); - }; - - for (int th = 0; th < params.total; ++th) { - env_->StartThread(func, ¶ms); - } - - // Wait for all threads to finish using Params - mu.Lock(); - while (params.completed != params.total) { - cv.Wait(); - } - mu.Unlock(); - - // Verify Fold() behavior - int64_t sum = 0; - params.tls1.Fold( - [](void* ptr, void* res) { - auto sum_ptr = static_cast(res); - *sum_ptr += static_cast*>(ptr)->load(); - }, - &sum); - ASSERT_EQ(sum, kNumThreads * kItersPerThread); - - // Signal to exit - mu.Lock(); - params.completed = 0; - cv.SignalAll(); - mu.Unlock(); - env_->WaitForJoin(); -} - -TEST_F(ThreadLocalTest, CompareAndSwap) { - ThreadLocalPtr tls; - ASSERT_TRUE(tls.Swap(reinterpret_cast(1)) == nullptr); - void* expected = reinterpret_cast(1); - // Swap in 2 - ASSERT_TRUE(tls.CompareAndSwap(reinterpret_cast(2), expected)); - expected = reinterpret_cast(100); - // Fail Swap, still 2 - ASSERT_TRUE(!tls.CompareAndSwap(reinterpret_cast(2), expected)); - ASSERT_EQ(expected, reinterpret_cast(2)); - // Swap in 3 - expected = reinterpret_cast(2); - ASSERT_TRUE(tls.CompareAndSwap(reinterpret_cast(3), expected)); - ASSERT_EQ(tls.Get(), reinterpret_cast(3)); -} - -namespace { - -void* AccessThreadLocal(void* /*arg*/) { - TEST_SYNC_POINT("AccessThreadLocal:Start"); - ThreadLocalPtr tlp; - tlp.Reset(new std::string("hello RocksDB")); - TEST_SYNC_POINT("AccessThreadLocal:End"); - return nullptr; -} - -} // namespace - -// The following test is disabled as it requires manual steps to run it -// correctly. -// -// Currently we have no way to acess SyncPoint w/o ASAN error when the -// child thread dies after the main thread dies. So if you manually enable -// this test and only see an ASAN error on SyncPoint, it means you pass the -// test. -TEST_F(ThreadLocalTest, DISABLED_MainThreadDiesFirst) { - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"AccessThreadLocal:Start", "MainThreadDiesFirst:End"}, - {"PosixEnv::~PosixEnv():End", "AccessThreadLocal:End"}}); - - // Triggers the initialization of singletons. - Env::Default(); - - try { - ROCKSDB_NAMESPACE::port::Thread th(&AccessThreadLocal, nullptr); - th.detach(); - TEST_SYNC_POINT("MainThreadDiesFirst:End"); - } catch (const std::system_error& ex) { - std::cerr << "Start thread: " << ex.code() << std::endl; - FAIL(); - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/util/timer_queue_test.cc b/util/timer_queue_test.cc deleted file mode 100644 index b3c3768ec..000000000 --- a/util/timer_queue_test.cc +++ /dev/null @@ -1,73 +0,0 @@ -// Portions Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -// borrowed from -// http://www.crazygaze.com/blog/2016/03/24/portable-c-timer-queue/ -// Timer Queue -// -// License -// -// The source code in this article is licensed under the CC0 license, so feel -// free -// to copy, modify, share, do whatever you want with it. -// No attribution is required, but Ill be happy if you do. -// CC0 license - -// The person who associated a work with this deed has dedicated the work to the -// public domain by waiving all of his or her rights to the work worldwide -// under copyright law, including all related and neighboring rights, to the -// extent allowed by law. You can copy, modify, distribute and perform the -// work, even for -// commercial purposes, all without asking permission. See Other Information -// below. -// - -#include "util/timer_queue.h" - -#include - -namespace Timing { - -using Clock = std::chrono::high_resolution_clock; -double now() { - static auto start = Clock::now(); - return std::chrono::duration(Clock::now() - start) - .count(); -} - -} // namespace Timing - -int main() { - TimerQueue q; - - double tnow = Timing::now(); - - q.add(10000, [tnow](bool aborted) mutable { - printf("T 1: %d, Elapsed %4.2fms\n", aborted, Timing::now() - tnow); - return std::make_pair(false, 0); - }); - q.add(10001, [tnow](bool aborted) mutable { - printf("T 2: %d, Elapsed %4.2fms\n", aborted, Timing::now() - tnow); - return std::make_pair(false, 0); - }); - - q.add(1000, [tnow](bool aborted) mutable { - printf("T 3: %d, Elapsed %4.2fms\n", aborted, Timing::now() - tnow); - return std::make_pair(!aborted, 1000); - }); - - auto id = q.add(2000, [tnow](bool aborted) mutable { - printf("T 4: %d, Elapsed %4.2fms\n", aborted, Timing::now() - tnow); - return std::make_pair(!aborted, 2000); - }); - - (void)id; - // auto ret = q.cancel(id); - // assert(ret == 1); - // q.cancelAll(); - - return 0; -} -////////////////////////////////////////// diff --git a/util/timer_test.cc b/util/timer_test.cc deleted file mode 100644 index 0ebfa9f3d..000000000 --- a/util/timer_test.cc +++ /dev/null @@ -1,402 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "util/timer.h" - -#include "db/db_test_util.h" -#include "rocksdb/file_system.h" -#include "test_util/mock_time_env.h" - -namespace ROCKSDB_NAMESPACE { - -class TimerTest : public testing::Test { - public: - TimerTest() - : mock_clock_(std::make_shared(SystemClock::Default())) { - } - - protected: - std::shared_ptr mock_clock_; - - void SetUp() override { mock_clock_->InstallTimedWaitFixCallback(); } - - const int kUsPerSec = 1000000; -}; - -TEST_F(TimerTest, SingleScheduleOnce) { - const int kInitDelayUs = 1 * kUsPerSec; - Timer timer(mock_clock_.get()); - - int count = 0; - timer.Add([&] { count++; }, "fn_sch_test", kInitDelayUs, 0); - - ASSERT_TRUE(timer.Start()); - - ASSERT_EQ(0, count); - // Wait for execution to finish - timer.TEST_WaitForRun( - [&] { mock_clock_->SleepForMicroseconds(kInitDelayUs); }); - ASSERT_EQ(1, count); - - ASSERT_TRUE(timer.Shutdown()); -} - -TEST_F(TimerTest, MultipleScheduleOnce) { - const int kInitDelay1Us = 1 * kUsPerSec; - const int kInitDelay2Us = 3 * kUsPerSec; - Timer timer(mock_clock_.get()); - - int count1 = 0; - timer.Add([&] { count1++; }, "fn_sch_test1", kInitDelay1Us, 0); - - int count2 = 0; - timer.Add([&] { count2++; }, "fn_sch_test2", kInitDelay2Us, 0); - - ASSERT_TRUE(timer.Start()); - ASSERT_EQ(0, count1); - ASSERT_EQ(0, count2); - - timer.TEST_WaitForRun( - [&] { mock_clock_->SleepForMicroseconds(kInitDelay1Us); }); - - ASSERT_EQ(1, count1); - ASSERT_EQ(0, count2); - - timer.TEST_WaitForRun([&] { - mock_clock_->SleepForMicroseconds(kInitDelay2Us - kInitDelay1Us); - }); - - ASSERT_EQ(1, count1); - ASSERT_EQ(1, count2); - - ASSERT_TRUE(timer.Shutdown()); -} - -TEST_F(TimerTest, SingleScheduleRepeatedly) { - const int kIterations = 5; - const int kInitDelayUs = 1 * kUsPerSec; - const int kRepeatUs = 1 * kUsPerSec; - - Timer timer(mock_clock_.get()); - int count = 0; - timer.Add([&] { count++; }, "fn_sch_test", kInitDelayUs, kRepeatUs); - - ASSERT_TRUE(timer.Start()); - ASSERT_EQ(0, count); - - timer.TEST_WaitForRun( - [&] { mock_clock_->SleepForMicroseconds(kInitDelayUs); }); - - ASSERT_EQ(1, count); - - // Wait for execution to finish - for (int i = 1; i < kIterations; i++) { - timer.TEST_WaitForRun( - [&] { mock_clock_->SleepForMicroseconds(kRepeatUs); }); - } - ASSERT_EQ(kIterations, count); - - ASSERT_TRUE(timer.Shutdown()); -} - -TEST_F(TimerTest, MultipleScheduleRepeatedly) { - const int kIterations = 5; - const int kInitDelay1Us = 0 * kUsPerSec; - const int kInitDelay2Us = 1 * kUsPerSec; - const int kInitDelay3Us = 0 * kUsPerSec; - const int kRepeatUs = 2 * kUsPerSec; - const int kLargeRepeatUs = 100 * kUsPerSec; - - Timer timer(mock_clock_.get()); - - int count1 = 0; - timer.Add([&] { count1++; }, "fn_sch_test1", kInitDelay1Us, kRepeatUs); - - int count2 = 0; - timer.Add([&] { count2++; }, "fn_sch_test2", kInitDelay2Us, kRepeatUs); - - // Add a function with relatively large repeat interval - int count3 = 0; - timer.Add([&] { count3++; }, "fn_sch_test3", kInitDelay3Us, kLargeRepeatUs); - - ASSERT_TRUE(timer.Start()); - - ASSERT_EQ(0, count2); - // Wait for execution to finish - for (int i = 1; i < kIterations * (kRepeatUs / kUsPerSec); i++) { - timer.TEST_WaitForRun( - [&] { mock_clock_->SleepForMicroseconds(1 * kUsPerSec); }); - ASSERT_EQ((i + 2) / (kRepeatUs / kUsPerSec), count1); - ASSERT_EQ((i + 1) / (kRepeatUs / kUsPerSec), count2); - - // large interval function should only run once (the first one). - ASSERT_EQ(1, count3); - } - - timer.Cancel("fn_sch_test1"); - - // Wait for execution to finish - timer.TEST_WaitForRun( - [&] { mock_clock_->SleepForMicroseconds(1 * kUsPerSec); }); - ASSERT_EQ(kIterations, count1); - ASSERT_EQ(kIterations, count2); - ASSERT_EQ(1, count3); - - timer.Cancel("fn_sch_test2"); - - ASSERT_EQ(kIterations, count1); - ASSERT_EQ(kIterations, count2); - - // execute the long interval one - timer.TEST_WaitForRun([&] { - mock_clock_->SleepForMicroseconds( - kLargeRepeatUs - static_cast(mock_clock_->NowMicros())); - }); - ASSERT_EQ(2, count3); - - ASSERT_TRUE(timer.Shutdown()); -} - -TEST_F(TimerTest, AddAfterStartTest) { - const int kIterations = 5; - const int kInitDelayUs = 1 * kUsPerSec; - const int kRepeatUs = 1 * kUsPerSec; - - // wait timer to run and then add a new job - SyncPoint::GetInstance()->LoadDependency( - {{"Timer::Run::Waiting", "TimerTest:AddAfterStartTest:1"}}); - SyncPoint::GetInstance()->EnableProcessing(); - - Timer timer(mock_clock_.get()); - - ASSERT_TRUE(timer.Start()); - - TEST_SYNC_POINT("TimerTest:AddAfterStartTest:1"); - int count = 0; - timer.Add([&] { count++; }, "fn_sch_test", kInitDelayUs, kRepeatUs); - ASSERT_EQ(0, count); - // Wait for execution to finish - timer.TEST_WaitForRun( - [&] { mock_clock_->SleepForMicroseconds(kInitDelayUs); }); - ASSERT_EQ(1, count); - - for (int i = 1; i < kIterations; i++) { - timer.TEST_WaitForRun( - [&] { mock_clock_->SleepForMicroseconds(kRepeatUs); }); - } - ASSERT_EQ(kIterations, count); - - ASSERT_TRUE(timer.Shutdown()); -} - -TEST_F(TimerTest, CancelRunningTask) { - static constexpr char kTestFuncName[] = "test_func"; - const int kRepeatUs = 1 * kUsPerSec; - Timer timer(mock_clock_.get()); - ASSERT_TRUE(timer.Start()); - int* value = new int; - *value = 0; - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency({ - {"TimerTest::CancelRunningTask:test_func:0", - "TimerTest::CancelRunningTask:BeforeCancel"}, - {"Timer::WaitForTaskCompleteIfNecessary:TaskExecuting", - "TimerTest::CancelRunningTask:test_func:1"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - timer.Add( - [&]() { - *value = 1; - TEST_SYNC_POINT("TimerTest::CancelRunningTask:test_func:0"); - TEST_SYNC_POINT("TimerTest::CancelRunningTask:test_func:1"); - }, - kTestFuncName, 0, kRepeatUs); - port::Thread control_thr([&]() { - TEST_SYNC_POINT("TimerTest::CancelRunningTask:BeforeCancel"); - timer.Cancel(kTestFuncName); - // Verify that *value has been set to 1. - ASSERT_EQ(1, *value); - delete value; - value = nullptr; - }); - mock_clock_->SleepForMicroseconds(kRepeatUs); - control_thr.join(); - ASSERT_TRUE(timer.Shutdown()); -} - -TEST_F(TimerTest, ShutdownRunningTask) { - const int kRepeatUs = 1 * kUsPerSec; - constexpr char kTestFunc1Name[] = "test_func1"; - constexpr char kTestFunc2Name[] = "test_func2"; - Timer timer(mock_clock_.get()); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency({ - {"TimerTest::ShutdownRunningTest:test_func:0", - "TimerTest::ShutdownRunningTest:BeforeShutdown"}, - {"Timer::WaitForTaskCompleteIfNecessary:TaskExecuting", - "TimerTest::ShutdownRunningTest:test_func:1"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_TRUE(timer.Start()); - - int* value = new int; - *value = 0; - timer.Add( - [&]() { - TEST_SYNC_POINT("TimerTest::ShutdownRunningTest:test_func:0"); - *value = 1; - TEST_SYNC_POINT("TimerTest::ShutdownRunningTest:test_func:1"); - }, - kTestFunc1Name, 0, kRepeatUs); - - timer.Add([&]() { ++(*value); }, kTestFunc2Name, 0, kRepeatUs); - - port::Thread control_thr([&]() { - TEST_SYNC_POINT("TimerTest::ShutdownRunningTest:BeforeShutdown"); - timer.Shutdown(); - }); - mock_clock_->SleepForMicroseconds(kRepeatUs); - control_thr.join(); - delete value; -} - -TEST_F(TimerTest, AddSameFuncName) { - const int kInitDelayUs = 1 * kUsPerSec; - const int kRepeat1Us = 5 * kUsPerSec; - const int kRepeat2Us = 4 * kUsPerSec; - - Timer timer(mock_clock_.get()); - ASSERT_TRUE(timer.Start()); - - int func_counter1 = 0; - ASSERT_TRUE(timer.Add([&] { func_counter1++; }, "duplicated_func", - kInitDelayUs, kRepeat1Us)); - - int func2_counter = 0; - ASSERT_TRUE( - timer.Add([&] { func2_counter++; }, "func2", kInitDelayUs, kRepeat2Us)); - - // New function with the same name should fail to add - int func_counter2 = 0; - ASSERT_FALSE(timer.Add([&] { func_counter2++; }, "duplicated_func", - kInitDelayUs, kRepeat1Us)); - - ASSERT_EQ(0, func_counter1); - ASSERT_EQ(0, func2_counter); - - timer.TEST_WaitForRun( - [&] { mock_clock_->SleepForMicroseconds(kInitDelayUs); }); - - ASSERT_EQ(1, func_counter1); - ASSERT_EQ(1, func2_counter); - - timer.TEST_WaitForRun([&] { mock_clock_->SleepForMicroseconds(kRepeat1Us); }); - - ASSERT_EQ(2, func_counter1); - ASSERT_EQ(2, func2_counter); - ASSERT_EQ(0, func_counter2); - - ASSERT_TRUE(timer.Shutdown()); -} - -TEST_F(TimerTest, RepeatIntervalWithFuncRunningTime) { - const int kInitDelayUs = 1 * kUsPerSec; - const int kRepeatUs = 5 * kUsPerSec; - const int kFuncRunningTimeUs = 1 * kUsPerSec; - - Timer timer(mock_clock_.get()); - ASSERT_TRUE(timer.Start()); - - int func_counter = 0; - timer.Add( - [&] { - mock_clock_->SleepForMicroseconds(kFuncRunningTimeUs); - func_counter++; - }, - "func", kInitDelayUs, kRepeatUs); - - ASSERT_EQ(0, func_counter); - timer.TEST_WaitForRun( - [&] { mock_clock_->SleepForMicroseconds(kInitDelayUs); }); - ASSERT_EQ(1, func_counter); - ASSERT_EQ(kInitDelayUs + kFuncRunningTimeUs, mock_clock_->NowMicros()); - - // After repeat interval time, the function is not executed, as running - // the function takes some time (`kFuncRunningTimeSec`). The repeat interval - // is the time between ending time of the last call and starting time of the - // next call. - uint64_t next_abs_interval_time_us = kInitDelayUs + kRepeatUs; - timer.TEST_WaitForRun([&] { - mock_clock_->SetCurrentTime(next_abs_interval_time_us / kUsPerSec); - }); - ASSERT_EQ(1, func_counter); - - // After the function running time, it's executed again - timer.TEST_WaitForRun( - [&] { mock_clock_->SleepForMicroseconds(kFuncRunningTimeUs); }); - ASSERT_EQ(2, func_counter); - - ASSERT_TRUE(timer.Shutdown()); -} - -TEST_F(TimerTest, DestroyRunningTimer) { - const int kInitDelayUs = 1 * kUsPerSec; - const int kRepeatUs = 1 * kUsPerSec; - - auto timer_ptr = new Timer(mock_clock_.get()); - - int count = 0; - timer_ptr->Add([&] { count++; }, "fn_sch_test", kInitDelayUs, kRepeatUs); - ASSERT_TRUE(timer_ptr->Start()); - - timer_ptr->TEST_WaitForRun( - [&] { mock_clock_->SleepForMicroseconds(kInitDelayUs); }); - - // delete a running timer should not cause any exception - delete timer_ptr; -} - -TEST_F(TimerTest, DestroyTimerWithRunningFunc) { - const int kRepeatUs = 1 * kUsPerSec; - auto timer_ptr = new Timer(mock_clock_.get()); - - SyncPoint::GetInstance()->DisableProcessing(); - SyncPoint::GetInstance()->LoadDependency({ - {"TimerTest::DestroyTimerWithRunningFunc:test_func:0", - "TimerTest::DestroyTimerWithRunningFunc:BeforeDelete"}, - {"Timer::WaitForTaskCompleteIfNecessary:TaskExecuting", - "TimerTest::DestroyTimerWithRunningFunc:test_func:1"}, - }); - SyncPoint::GetInstance()->EnableProcessing(); - - ASSERT_TRUE(timer_ptr->Start()); - - int count = 0; - timer_ptr->Add( - [&]() { - TEST_SYNC_POINT("TimerTest::DestroyTimerWithRunningFunc:test_func:0"); - count++; - TEST_SYNC_POINT("TimerTest::DestroyTimerWithRunningFunc:test_func:1"); - }, - "fn_running_test", 0, kRepeatUs); - - port::Thread control_thr([&] { - TEST_SYNC_POINT("TimerTest::DestroyTimerWithRunningFunc:BeforeDelete"); - delete timer_ptr; - }); - mock_clock_->SleepForMicroseconds(kRepeatUs); - control_thr.join(); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - - return RUN_ALL_TESTS(); -} diff --git a/util/work_queue_test.cc b/util/work_queue_test.cc deleted file mode 100644 index c23a51279..000000000 --- a/util/work_queue_test.cc +++ /dev/null @@ -1,272 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#include "util/work_queue.h" - -#include - -#include -#include -#include -#include -#include - -#include "port/stack_trace.h" - -namespace ROCKSDB_NAMESPACE { - -// Unit test for work_queue.h. -// -// This file is an excerpt from Facebook's zstd repo at -// https://github.com/facebook/zstd/. The relevant file is -// contrib/pzstd/utils/test/WorkQueueTest.cpp. - -struct Popper { - WorkQueue* queue; - int* results; - std::mutex* mutex; - - void operator()() { - int result; - while (queue->pop(result)) { - std::lock_guard lock(*mutex); - results[result] = result; - } - } -}; - -TEST(WorkQueue, SingleThreaded) { - WorkQueue queue; - int result; - - queue.push(5); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(5, result); - - queue.push(1); - queue.push(2); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(1, result); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(2, result); - - queue.push(1); - queue.push(2); - queue.finish(); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(1, result); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(2, result); - EXPECT_FALSE(queue.pop(result)); - - queue.waitUntilFinished(); -} - -TEST(WorkQueue, SPSC) { - WorkQueue queue; - const int max = 100; - - for (int i = 0; i < 10; ++i) { - queue.push(i); - } - - std::thread thread([&queue, max] { - int result; - for (int i = 0;; ++i) { - if (!queue.pop(result)) { - EXPECT_EQ(i, max); - break; - } - EXPECT_EQ(i, result); - } - }); - - std::this_thread::yield(); - for (int i = 10; i < max; ++i) { - queue.push(i); - } - queue.finish(); - - thread.join(); -} - -TEST(WorkQueue, SPMC) { - WorkQueue queue; - std::vector results(50, -1); - std::mutex mutex; - std::vector threads; - for (int i = 0; i < 5; ++i) { - threads.emplace_back(Popper{&queue, results.data(), &mutex}); - } - - for (int i = 0; i < 50; ++i) { - queue.push(i); - } - queue.finish(); - - for (auto& thread : threads) { - thread.join(); - } - - for (int i = 0; i < 50; ++i) { - EXPECT_EQ(i, results[i]); - } -} - -TEST(WorkQueue, MPMC) { - WorkQueue queue; - std::vector results(100, -1); - std::mutex mutex; - std::vector popperThreads; - for (int i = 0; i < 4; ++i) { - popperThreads.emplace_back(Popper{&queue, results.data(), &mutex}); - } - - std::vector pusherThreads; - for (int i = 0; i < 2; ++i) { - auto min = i * 50; - auto max = (i + 1) * 50; - pusherThreads.emplace_back([&queue, min, max] { - for (int j = min; j < max; ++j) { - queue.push(j); - } - }); - } - - for (auto& thread : pusherThreads) { - thread.join(); - } - queue.finish(); - - for (auto& thread : popperThreads) { - thread.join(); - } - - for (int i = 0; i < 100; ++i) { - EXPECT_EQ(i, results[i]); - } -} - -TEST(WorkQueue, BoundedSizeWorks) { - WorkQueue queue(1); - int result; - queue.push(5); - queue.pop(result); - queue.push(5); - queue.pop(result); - queue.push(5); - queue.finish(); - queue.pop(result); - EXPECT_EQ(5, result); -} - -TEST(WorkQueue, BoundedSizePushAfterFinish) { - WorkQueue queue(1); - int result; - queue.push(5); - std::thread pusher([&queue] { queue.push(6); }); - // Dirtily try and make sure that pusher has run. - std::this_thread::sleep_for(std::chrono::seconds(1)); - queue.finish(); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(5, result); - EXPECT_FALSE(queue.pop(result)); - - pusher.join(); -} - -TEST(WorkQueue, SetMaxSize) { - WorkQueue queue(2); - int result; - queue.push(5); - queue.push(6); - queue.setMaxSize(1); - std::thread pusher([&queue] { queue.push(7); }); - // Dirtily try and make sure that pusher has run. - std::this_thread::sleep_for(std::chrono::seconds(1)); - queue.finish(); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(5, result); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(6, result); - EXPECT_FALSE(queue.pop(result)); - - pusher.join(); -} - -TEST(WorkQueue, BoundedSizeMPMC) { - WorkQueue queue(10); - std::vector results(200, -1); - std::mutex mutex; - std::cerr << "Creating popperThreads" << std::endl; - std::vector popperThreads; - for (int i = 0; i < 4; ++i) { - popperThreads.emplace_back(Popper{&queue, results.data(), &mutex}); - } - - std::cerr << "Creating pusherThreads" << std::endl; - std::vector pusherThreads; - for (int i = 0; i < 2; ++i) { - auto min = i * 100; - auto max = (i + 1) * 100; - pusherThreads.emplace_back([&queue, min, max] { - for (int j = min; j < max; ++j) { - queue.push(j); - } - }); - } - - std::cerr << "Joining pusherThreads" << std::endl; - for (auto& thread : pusherThreads) { - thread.join(); - } - std::cerr << "Finishing queue" << std::endl; - queue.finish(); - - std::cerr << "Joining popperThreads" << std::endl; - for (auto& thread : popperThreads) { - thread.join(); - } - - std::cerr << "Inspecting results" << std::endl; - for (int i = 0; i < 200; ++i) { - EXPECT_EQ(i, results[i]); - } -} - -TEST(WorkQueue, FailedPush) { - WorkQueue queue; - EXPECT_TRUE(queue.push(1)); - queue.finish(); - EXPECT_FALSE(queue.push(1)); -} - -TEST(WorkQueue, FailedPop) { - WorkQueue queue; - int x = 5; - EXPECT_TRUE(queue.push(x)); - queue.finish(); - x = 0; - EXPECT_TRUE(queue.pop(x)); - EXPECT_EQ(5, x); - EXPECT_FALSE(queue.pop(x)); - EXPECT_EQ(5, x); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/utilities/env_mirror_test.cc b/utilities/env_mirror_test.cc deleted file mode 100644 index ad4cc9366..000000000 --- a/utilities/env_mirror_test.cc +++ /dev/null @@ -1,216 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -// Copyright (c) 2015, Red Hat, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - - -#include "rocksdb/utilities/env_mirror.h" - -#include "env/mock_env.h" -#include "test_util/testharness.h" - -namespace ROCKSDB_NAMESPACE { - -class EnvMirrorTest : public testing::Test { - public: - Env* default_; - MockEnv *a_, *b_; - EnvMirror* env_; - const EnvOptions soptions_; - - EnvMirrorTest() - : default_(Env::Default()), - a_(new MockEnv(default_)), - b_(new MockEnv(default_)), - env_(new EnvMirror(a_, b_)) {} - ~EnvMirrorTest() { - delete env_; - delete a_; - delete b_; - } -}; - -TEST_F(EnvMirrorTest, Basics) { - uint64_t file_size; - std::unique_ptr writable_file; - std::vector children; - - ASSERT_OK(env_->CreateDir("/dir")); - - // Check that the directory is empty. - ASSERT_EQ(Status::NotFound(), env_->FileExists("/dir/non_existent")); - ASSERT_TRUE(!env_->GetFileSize("/dir/non_existent", &file_size).ok()); - ASSERT_OK(env_->GetChildren("/dir", &children)); - ASSERT_EQ(0U, children.size()); - - // Create a file. - ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file, soptions_)); - writable_file.reset(); - - // Check that the file exists. - ASSERT_OK(env_->FileExists("/dir/f")); - ASSERT_OK(a_->FileExists("/dir/f")); - ASSERT_OK(b_->FileExists("/dir/f")); - ASSERT_OK(env_->GetFileSize("/dir/f", &file_size)); - ASSERT_EQ(0U, file_size); - ASSERT_OK(env_->GetChildren("/dir", &children)); - ASSERT_EQ(1U, children.size()); - ASSERT_EQ("f", children[0]); - ASSERT_OK(a_->GetChildren("/dir", &children)); - ASSERT_EQ(1U, children.size()); - ASSERT_EQ("f", children[0]); - ASSERT_OK(b_->GetChildren("/dir", &children)); - ASSERT_EQ(1U, children.size()); - ASSERT_EQ("f", children[0]); - - // Write to the file. - ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file, soptions_)); - ASSERT_OK(writable_file->Append("abc")); - writable_file.reset(); - - // Check for expected size. - ASSERT_OK(env_->GetFileSize("/dir/f", &file_size)); - ASSERT_EQ(3U, file_size); - ASSERT_OK(a_->GetFileSize("/dir/f", &file_size)); - ASSERT_EQ(3U, file_size); - ASSERT_OK(b_->GetFileSize("/dir/f", &file_size)); - ASSERT_EQ(3U, file_size); - - // Check that renaming works. - ASSERT_TRUE(!env_->RenameFile("/dir/non_existent", "/dir/g").ok()); - ASSERT_OK(env_->RenameFile("/dir/f", "/dir/g")); - ASSERT_EQ(Status::NotFound(), env_->FileExists("/dir/f")); - ASSERT_OK(env_->FileExists("/dir/g")); - ASSERT_OK(env_->GetFileSize("/dir/g", &file_size)); - ASSERT_EQ(3U, file_size); - ASSERT_OK(a_->FileExists("/dir/g")); - ASSERT_OK(a_->GetFileSize("/dir/g", &file_size)); - ASSERT_EQ(3U, file_size); - ASSERT_OK(b_->FileExists("/dir/g")); - ASSERT_OK(b_->GetFileSize("/dir/g", &file_size)); - ASSERT_EQ(3U, file_size); - - // Check that opening non-existent file fails. - std::unique_ptr seq_file; - std::unique_ptr rand_file; - ASSERT_TRUE( - !env_->NewSequentialFile("/dir/non_existent", &seq_file, soptions_).ok()); - ASSERT_TRUE(!seq_file); - ASSERT_TRUE( - !env_->NewRandomAccessFile("/dir/non_existent", &rand_file, soptions_) - .ok()); - ASSERT_TRUE(!rand_file); - - // Check that deleting works. - ASSERT_TRUE(!env_->DeleteFile("/dir/non_existent").ok()); - ASSERT_OK(env_->DeleteFile("/dir/g")); - ASSERT_EQ(Status::NotFound(), env_->FileExists("/dir/g")); - ASSERT_OK(env_->GetChildren("/dir", &children)); - ASSERT_EQ(0U, children.size()); - ASSERT_OK(env_->DeleteDir("/dir")); -} - -TEST_F(EnvMirrorTest, ReadWrite) { - std::unique_ptr writable_file; - std::unique_ptr seq_file; - std::unique_ptr rand_file; - Slice result; - char scratch[100]; - - ASSERT_OK(env_->CreateDir("/dir")); - - ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file, soptions_)); - ASSERT_OK(writable_file->Append("hello ")); - ASSERT_OK(writable_file->Append("world")); - writable_file.reset(); - - // Read sequentially. - ASSERT_OK(env_->NewSequentialFile("/dir/f", &seq_file, soptions_)); - ASSERT_OK(seq_file->Read(5, &result, scratch)); // Read "hello". - ASSERT_EQ(0, result.compare("hello")); - ASSERT_OK(seq_file->Skip(1)); - ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Read "world". - ASSERT_EQ(0, result.compare("world")); - ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Try reading past EOF. - ASSERT_EQ(0U, result.size()); - ASSERT_OK(seq_file->Skip(100)); // Try to skip past end of file. - ASSERT_OK(seq_file->Read(1000, &result, scratch)); - ASSERT_EQ(0U, result.size()); - - // Random reads. - ASSERT_OK(env_->NewRandomAccessFile("/dir/f", &rand_file, soptions_)); - ASSERT_OK(rand_file->Read(6, 5, &result, scratch)); // Read "world". - ASSERT_EQ(0, result.compare("world")); - ASSERT_OK(rand_file->Read(0, 5, &result, scratch)); // Read "hello". - ASSERT_EQ(0, result.compare("hello")); - ASSERT_OK(rand_file->Read(10, 100, &result, scratch)); // Read "d". - ASSERT_EQ(0, result.compare("d")); - - // Too high offset. - ASSERT_TRUE(!rand_file->Read(1000, 5, &result, scratch).ok()); -} - -TEST_F(EnvMirrorTest, Locks) { - FileLock* lock; - - // These are no-ops, but we test they return success. - ASSERT_OK(env_->LockFile("some file", &lock)); - ASSERT_OK(env_->UnlockFile(lock)); -} - -TEST_F(EnvMirrorTest, Misc) { - std::string test_dir; - ASSERT_OK(env_->GetTestDirectory(&test_dir)); - ASSERT_TRUE(!test_dir.empty()); - - std::unique_ptr writable_file; - ASSERT_OK(env_->NewWritableFile("/a/b", &writable_file, soptions_)); - - // These are no-ops, but we test they return success. - ASSERT_OK(writable_file->Sync()); - ASSERT_OK(writable_file->Flush()); - ASSERT_OK(writable_file->Close()); - writable_file.reset(); -} - -TEST_F(EnvMirrorTest, LargeWrite) { - const size_t kWriteSize = 300 * 1024; - char* scratch = new char[kWriteSize * 2]; - - std::string write_data; - for (size_t i = 0; i < kWriteSize; ++i) { - write_data.append(1, static_cast(i)); - } - - std::unique_ptr writable_file; - ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file, soptions_)); - ASSERT_OK(writable_file->Append("foo")); - ASSERT_OK(writable_file->Append(write_data)); - writable_file.reset(); - - std::unique_ptr seq_file; - Slice result; - ASSERT_OK(env_->NewSequentialFile("/dir/f", &seq_file, soptions_)); - ASSERT_OK(seq_file->Read(3, &result, scratch)); // Read "foo". - ASSERT_EQ(0, result.compare("foo")); - - size_t read = 0; - std::string read_data; - while (read < kWriteSize) { - ASSERT_OK(seq_file->Read(kWriteSize - read, &result, scratch)); - read_data.append(result.data(), result.size()); - read += result.size(); - } - ASSERT_TRUE(write_data == read_data); - delete[] scratch; -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/utilities/env_timed_test.cc b/utilities/env_timed_test.cc deleted file mode 100644 index 3099fb74c..000000000 --- a/utilities/env_timed_test.cc +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2017-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - - -#include "rocksdb/env.h" -#include "rocksdb/perf_context.h" -#include "test_util/testharness.h" - -namespace ROCKSDB_NAMESPACE { - -class TimedEnvTest : public testing::Test {}; - -TEST_F(TimedEnvTest, BasicTest) { - SetPerfLevel(PerfLevel::kEnableTime); - ASSERT_EQ(0, get_perf_context()->env_new_writable_file_nanos); - - std::unique_ptr mem_env(NewMemEnv(Env::Default())); - std::unique_ptr timed_env(NewTimedEnv(mem_env.get())); - std::unique_ptr writable_file; - ASSERT_OK(timed_env->NewWritableFile("f", &writable_file, EnvOptions())); - - ASSERT_GT(get_perf_context()->env_new_writable_file_nanos, 0); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/utilities/object_registry_test.cc b/utilities/object_registry_test.cc deleted file mode 100644 index 4042bc9b9..000000000 --- a/utilities/object_registry_test.cc +++ /dev/null @@ -1,862 +0,0 @@ -// Copyright (c) 2016-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - - -#include "rocksdb/utilities/object_registry.h" - -#include "rocksdb/convenience.h" -#include "rocksdb/customizable.h" -#include "test_util/testharness.h" - -namespace ROCKSDB_NAMESPACE { - -class ObjRegistryTest : public testing::Test { - public: - static int num_a, num_b; -}; - -int ObjRegistryTest::num_a = 0; -int ObjRegistryTest::num_b = 0; -static FactoryFunc test_reg_a = ObjectLibrary::Default()->AddFactory( - ObjectLibrary::PatternEntry("a", false).AddSeparator("://"), - [](const std::string& /*uri*/, std::unique_ptr* /*env_guard*/, - std::string* /* errmsg */) { - ++ObjRegistryTest::num_a; - return Env::Default(); - }); - -class WrappedEnv : public EnvWrapper { - private: - std::string id_; - - public: - WrappedEnv(Env* t, const std::string& id) : EnvWrapper(t), id_(id) {} - const char* Name() const override { return id_.c_str(); } - std::string GetId() const override { return id_; } -}; -static FactoryFunc test_reg_b = ObjectLibrary::Default()->AddFactory( - ObjectLibrary::PatternEntry("b", false).AddSeparator("://"), - [](const std::string& uri, std::unique_ptr* env_guard, - std::string* /* errmsg */) { - ++ObjRegistryTest::num_b; - // Env::Default() is a singleton so we can't grant ownership directly to - // the caller - we must wrap it first. - env_guard->reset(new WrappedEnv(Env::Default(), uri)); - return env_guard->get(); - }); - -TEST_F(ObjRegistryTest, Basics) { - std::string msg; - std::unique_ptr guard; - Env* a_env = nullptr; - - auto registry = ObjectRegistry::NewInstance(); - ASSERT_NOK(registry->NewStaticObject("c://test", &a_env)); - ASSERT_NOK(registry->NewUniqueObject("c://test", &guard)); - ASSERT_EQ(a_env, nullptr); - ASSERT_EQ(guard, nullptr); - ASSERT_EQ(0, num_a); - ASSERT_EQ(0, num_b); - - ASSERT_OK(registry->NewStaticObject("a://test", &a_env)); - ASSERT_NE(a_env, nullptr); - ASSERT_EQ(1, num_a); - ASSERT_EQ(0, num_b); - - ASSERT_OK(registry->NewUniqueObject("b://test", &guard)); - ASSERT_NE(guard, nullptr); - ASSERT_EQ(1, num_a); - ASSERT_EQ(1, num_b); - - Env* b_env = nullptr; - ASSERT_NOK(registry->NewStaticObject("b://test", &b_env)); - ASSERT_EQ(b_env, nullptr); - ASSERT_EQ(1, num_a); - ASSERT_EQ(2, num_b); // Created but rejected as not static - - b_env = a_env; - ASSERT_NOK(registry->NewStaticObject("b://test", &b_env)); - ASSERT_EQ(b_env, a_env); - ASSERT_EQ(1, num_a); - ASSERT_EQ(3, num_b); - - b_env = guard.get(); - ASSERT_NOK(registry->NewUniqueObject("a://test", &guard)); - ASSERT_EQ(guard.get(), b_env); // Unchanged - ASSERT_EQ(2, num_a); // Created one but rejected it as not unique - ASSERT_EQ(3, num_b); -} - -TEST_F(ObjRegistryTest, LocalRegistry) { - Env* env = nullptr; - auto registry = ObjectRegistry::NewInstance(); - std::shared_ptr library = - std::make_shared("local"); - registry->AddLibrary(library); - library->AddFactory( - "test-local", - [](const std::string& /*uri*/, std::unique_ptr* /*guard */, - std::string* /* errmsg */) { return Env::Default(); }); - - ObjectLibrary::Default()->AddFactory( - "test-global", - [](const std::string& /*uri*/, std::unique_ptr* /*guard */, - std::string* /* errmsg */) { return Env::Default(); }); - - ASSERT_NOK( - ObjectRegistry::NewInstance()->NewStaticObject("test-local", &env)); - ASSERT_EQ(env, nullptr); - ASSERT_OK( - ObjectRegistry::NewInstance()->NewStaticObject("test-global", &env)); - ASSERT_NE(env, nullptr); - ASSERT_OK(registry->NewStaticObject("test-local", &env)); - ASSERT_NE(env, nullptr); - ASSERT_OK(registry->NewStaticObject("test-global", &env)); - ASSERT_NE(env, nullptr); -} - -static int RegisterTestUnguarded(ObjectLibrary& library, - const std::string& /*arg*/) { - library.AddFactory( - "unguarded", - [](const std::string& /*uri*/, std::unique_ptr* /*guard */, - std::string* /* errmsg */) { return Env::Default(); }); - library.AddFactory( - "guarded", [](const std::string& uri, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new WrappedEnv(Env::Default(), uri)); - return guard->get(); - }); - return 2; -} - -TEST_F(ObjRegistryTest, CheckShared) { - std::shared_ptr shared; - std::shared_ptr registry = ObjectRegistry::NewInstance(); - registry->AddLibrary("shared", RegisterTestUnguarded, ""); - - ASSERT_OK(registry->NewSharedObject("guarded", &shared)); - ASSERT_NE(shared, nullptr); - shared.reset(); - ASSERT_NOK(registry->NewSharedObject("unguarded", &shared)); - ASSERT_EQ(shared, nullptr); -} - -TEST_F(ObjRegistryTest, CheckStatic) { - Env* env = nullptr; - std::shared_ptr registry = ObjectRegistry::NewInstance(); - registry->AddLibrary("static", RegisterTestUnguarded, ""); - - ASSERT_NOK(registry->NewStaticObject("guarded", &env)); - ASSERT_EQ(env, nullptr); - env = nullptr; - ASSERT_OK(registry->NewStaticObject("unguarded", &env)); - ASSERT_NE(env, nullptr); -} - -TEST_F(ObjRegistryTest, CheckUnique) { - std::unique_ptr unique; - std::shared_ptr registry = ObjectRegistry::NewInstance(); - registry->AddLibrary("unique", RegisterTestUnguarded, ""); - - ASSERT_OK(registry->NewUniqueObject("guarded", &unique)); - ASSERT_NE(unique, nullptr); - unique.reset(); - ASSERT_NOK(registry->NewUniqueObject("unguarded", &unique)); - ASSERT_EQ(unique, nullptr); -} - -TEST_F(ObjRegistryTest, FailingFactory) { - std::shared_ptr registry = ObjectRegistry::NewInstance(); - std::shared_ptr library = - std::make_shared("failing"); - registry->AddLibrary(library); - library->AddFactory( - "failing", [](const std::string& /*uri*/, - std::unique_ptr* /*guard */, std::string* errmsg) { - *errmsg = "Bad Factory"; - return nullptr; - }); - std::unique_ptr unique; - std::shared_ptr shared; - Env* pointer = nullptr; - Status s; - s = registry->NewUniqueObject("failing", &unique); - ASSERT_TRUE(s.IsInvalidArgument()); - s = registry->NewSharedObject("failing", &shared); - ASSERT_TRUE(s.IsInvalidArgument()); - s = registry->NewStaticObject("failing", &pointer); - ASSERT_TRUE(s.IsInvalidArgument()); - - s = registry->NewUniqueObject("missing", &unique); - ASSERT_TRUE(s.IsNotSupported()); - s = registry->NewSharedObject("missing", &shared); - ASSERT_TRUE(s.IsNotSupported()); - s = registry->NewStaticObject("missing", &pointer); - ASSERT_TRUE(s.IsNotSupported()); -} - -TEST_F(ObjRegistryTest, TestRegistryParents) { - auto grand = ObjectRegistry::Default(); - auto parent = ObjectRegistry::NewInstance(); // parent with a grandparent - auto uncle = ObjectRegistry::NewInstance(grand); - auto child = ObjectRegistry::NewInstance(parent); - auto cousin = ObjectRegistry::NewInstance(uncle); - - auto library = parent->AddLibrary("parent"); - library->AddFactory( - "parent", [](const std::string& uri, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new WrappedEnv(Env::Default(), uri)); - return guard->get(); - }); - library = cousin->AddLibrary("cousin"); - library->AddFactory( - "cousin", [](const std::string& uri, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new WrappedEnv(Env::Default(), uri)); - return guard->get(); - }); - - Env* env = nullptr; - std::unique_ptr guard; - std::string msg; - - // a:://* is registered in Default, so they should all work - ASSERT_OK(parent->NewStaticObject("a://test", &env)); - ASSERT_OK(child->NewStaticObject("a://test", &env)); - ASSERT_OK(uncle->NewStaticObject("a://test", &env)); - ASSERT_OK(cousin->NewStaticObject("a://test", &env)); - - // The parent env is only registered for parent, not uncle, - // So parent and child should return success and uncle and cousin should fail - ASSERT_OK(parent->NewUniqueObject("parent", &guard)); - ASSERT_OK(child->NewUniqueObject("parent", &guard)); - ASSERT_NOK(uncle->NewUniqueObject("parent", &guard)); - ASSERT_NOK(cousin->NewUniqueObject("parent", &guard)); - - // The cousin is only registered in the cousin, so all of the others should - // fail - ASSERT_OK(cousin->NewUniqueObject("cousin", &guard)); - ASSERT_NOK(parent->NewUniqueObject("cousin", &guard)); - ASSERT_NOK(child->NewUniqueObject("cousin", &guard)); - ASSERT_NOK(uncle->NewUniqueObject("cousin", &guard)); -} - -class MyCustomizable : public Customizable { - public: - static const char* Type() { return "MyCustomizable"; } - MyCustomizable(const char* prefix, const std::string& id) : id_(id) { - name_ = id_.substr(0, strlen(prefix) - 1); - } - const char* Name() const override { return name_.c_str(); } - std::string GetId() const override { return id_; } - - private: - std::string id_; - std::string name_; -}; - -TEST_F(ObjRegistryTest, TestFactoryCount) { - std::string msg; - auto grand = ObjectRegistry::Default(); - auto local = ObjectRegistry::NewInstance(); - std::unordered_set grand_types, local_types; - std::vector grand_names, local_names; - - // Check how many types we have on startup. - // Grand should equal local - grand->GetFactoryTypes(&grand_types); - local->GetFactoryTypes(&local_types); - ASSERT_EQ(grand_types, local_types); - size_t grand_count = grand->GetFactoryCount(Env::Type()); - size_t local_count = local->GetFactoryCount(Env::Type()); - - ASSERT_EQ(grand_count, local_count); - grand->GetFactoryNames(Env::Type(), &grand_names); - local->GetFactoryNames(Env::Type(), &local_names); - ASSERT_EQ(grand_names.size(), grand_count); - ASSERT_EQ(local_names.size(), local_count); - ASSERT_EQ(grand_names, local_names); - - // Add an Env to the local registry. - // This will add one factory. - auto library = local->AddLibrary("local"); - library->AddFactory( - "A", [](const std::string& /*uri*/, std::unique_ptr* /*guard */, - std::string* /* errmsg */) { return nullptr; }); - ASSERT_EQ(local_count + 1, local->GetFactoryCount(Env::Type())); - ASSERT_EQ(grand_count, grand->GetFactoryCount(Env::Type())); - local->GetFactoryTypes(&local_types); - local->GetFactoryNames(Env::Type(), &local_names); - ASSERT_EQ(grand_names.size() + 1, local_names.size()); - ASSERT_EQ(local_names.size(), local->GetFactoryCount(Env::Type())); - - if (grand_count == 0) { - // There were no Env when we started. Should have one more type - // than previously - ASSERT_NE(grand_types, local_types); - ASSERT_EQ(grand_types.size() + 1, local_types.size()); - } else { - // There was an Env type when we started. The types should match - ASSERT_EQ(grand_types, local_types); - } - - // Add a MyCustomizable to the registry. This should be a new type - library->AddFactory( - "MY", [](const std::string& /*uri*/, - std::unique_ptr* /*guard */, - std::string* /* errmsg */) { return nullptr; }); - ASSERT_EQ(local_count + 1, local->GetFactoryCount(Env::Type())); - ASSERT_EQ(grand_count, grand->GetFactoryCount(Env::Type())); - ASSERT_EQ(0U, grand->GetFactoryCount(MyCustomizable::Type())); - ASSERT_EQ(1U, local->GetFactoryCount(MyCustomizable::Type())); - - local->GetFactoryNames(MyCustomizable::Type(), &local_names); - ASSERT_EQ(1U, local_names.size()); - ASSERT_EQ(local_names[0], "MY"); - - local->GetFactoryTypes(&local_types); - ASSERT_EQ(grand_count == 0 ? 2 : grand_types.size() + 1, local_types.size()); - - // Add the same name again. We should now have 2 factories. - library->AddFactory( - "MY", [](const std::string& /*uri*/, - std::unique_ptr* /*guard */, - std::string* /* errmsg */) { return nullptr; }); - local->GetFactoryNames(MyCustomizable::Type(), &local_names); - ASSERT_EQ(2U, local_names.size()); -} - -TEST_F(ObjRegistryTest, TestManagedObjects) { - auto registry = ObjectRegistry::NewInstance(); - auto m_a1 = std::make_shared("", "A"); - auto m_a2 = std::make_shared("", "A"); - - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - ASSERT_OK(registry->SetManagedObject(m_a1)); - ASSERT_EQ(registry->GetManagedObject("A"), m_a1); - - ASSERT_NOK(registry->SetManagedObject(m_a2)); - ASSERT_OK(registry->SetManagedObject(m_a1)); - m_a1.reset(); - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - ASSERT_OK(registry->SetManagedObject(m_a2)); - ASSERT_EQ(registry->GetManagedObject("A"), m_a2); -} - -TEST_F(ObjRegistryTest, TestTwoManagedObjects) { - auto registry = ObjectRegistry::NewInstance(); - auto m_a = std::make_shared("", "A"); - auto m_b = std::make_shared("", "B"); - std::vector> objects; - - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - ASSERT_EQ(registry->GetManagedObject("B"), nullptr); - ASSERT_OK(registry->ListManagedObjects(&objects)); - ASSERT_EQ(objects.size(), 0U); - ASSERT_OK(registry->SetManagedObject(m_a)); - ASSERT_EQ(registry->GetManagedObject("B"), nullptr); - ASSERT_EQ(registry->GetManagedObject("A"), m_a); - ASSERT_OK(registry->ListManagedObjects(&objects)); - ASSERT_EQ(objects.size(), 1U); - ASSERT_EQ(objects.front(), m_a); - - ASSERT_OK(registry->SetManagedObject(m_b)); - ASSERT_EQ(registry->GetManagedObject("A"), m_a); - ASSERT_EQ(registry->GetManagedObject("B"), m_b); - ASSERT_OK(registry->ListManagedObjects(&objects)); - ASSERT_EQ(objects.size(), 2U); - ASSERT_OK(registry->ListManagedObjects("A", &objects)); - ASSERT_EQ(objects.size(), 1U); - ASSERT_EQ(objects.front(), m_a); - ASSERT_OK(registry->ListManagedObjects("B", &objects)); - ASSERT_EQ(objects.size(), 1U); - ASSERT_EQ(objects.front(), m_b); - ASSERT_OK(registry->ListManagedObjects("C", &objects)); - ASSERT_EQ(objects.size(), 0U); - - m_a.reset(); - objects.clear(); - - ASSERT_EQ(registry->GetManagedObject("B"), m_b); - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - ASSERT_OK(registry->ListManagedObjects(&objects)); - ASSERT_EQ(objects.size(), 1U); - ASSERT_EQ(objects.front(), m_b); - - m_b.reset(); - objects.clear(); - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - ASSERT_EQ(registry->GetManagedObject("B"), nullptr); -} - -TEST_F(ObjRegistryTest, TestAlternateNames) { - auto registry = ObjectRegistry::NewInstance(); - auto m_a = std::make_shared("", "A"); - auto m_b = std::make_shared("", "B"); - std::vector> objects; - // Test no objects exist - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - ASSERT_EQ(registry->GetManagedObject("B"), nullptr); - ASSERT_EQ(registry->GetManagedObject("TheOne"), nullptr); - ASSERT_OK(registry->ListManagedObjects(&objects)); - ASSERT_EQ(objects.size(), 0U); - - // Mark "TheOne" to be A - ASSERT_OK(registry->SetManagedObject("TheOne", m_a)); - ASSERT_EQ(registry->GetManagedObject("B"), nullptr); - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - ASSERT_EQ(registry->GetManagedObject("TheOne"), m_a); - ASSERT_OK(registry->ListManagedObjects(&objects)); - ASSERT_EQ(objects.size(), 1U); - ASSERT_EQ(objects.front(), m_a); - - // Try to mark "TheOne" again. - ASSERT_NOK(registry->SetManagedObject("TheOne", m_b)); - ASSERT_OK(registry->SetManagedObject("TheOne", m_a)); - - // Add "A" as a managed object. Registered 2x - ASSERT_OK(registry->SetManagedObject(m_a)); - ASSERT_EQ(registry->GetManagedObject("B"), nullptr); - ASSERT_EQ(registry->GetManagedObject("A"), m_a); - ASSERT_EQ(registry->GetManagedObject("TheOne"), m_a); - ASSERT_OK(registry->ListManagedObjects(&objects)); - ASSERT_EQ(objects.size(), 2U); - - // Delete "A". - m_a.reset(); - objects.clear(); - - ASSERT_EQ(registry->GetManagedObject("TheOne"), nullptr); - ASSERT_OK(registry->SetManagedObject("TheOne", m_b)); - ASSERT_EQ(registry->GetManagedObject("TheOne"), m_b); - ASSERT_OK(registry->ListManagedObjects(&objects)); - ASSERT_EQ(objects.size(), 1U); - ASSERT_EQ(objects.front(), m_b); - - m_b.reset(); - objects.clear(); - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - ASSERT_EQ(registry->GetManagedObject("TheOne"), nullptr); - ASSERT_OK(registry->ListManagedObjects(&objects)); - ASSERT_EQ(objects.size(), 0U); -} - -TEST_F(ObjRegistryTest, TestTwoManagedClasses) { - class MyCustomizable2 : public MyCustomizable { - public: - static const char* Type() { return "MyCustomizable2"; } - MyCustomizable2(const char* prefix, const std::string& id) - : MyCustomizable(prefix, id) {} - }; - - auto registry = ObjectRegistry::NewInstance(); - auto m_a1 = std::make_shared("", "A"); - auto m_a2 = std::make_shared("", "A"); - std::vector> obj1s; - std::vector> obj2s; - - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - - ASSERT_OK(registry->SetManagedObject(m_a1)); - ASSERT_EQ(registry->GetManagedObject("A"), m_a1); - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - - ASSERT_OK(registry->SetManagedObject(m_a2)); - ASSERT_EQ(registry->GetManagedObject("A"), m_a2); - ASSERT_OK(registry->ListManagedObjects(&obj1s)); - ASSERT_OK(registry->ListManagedObjects(&obj2s)); - ASSERT_EQ(obj1s.size(), 1U); - ASSERT_EQ(obj2s.size(), 1U); - ASSERT_EQ(obj1s.front(), m_a1); - ASSERT_EQ(obj2s.front(), m_a2); - m_a1.reset(); - obj1s.clear(); - obj2s.clear(); - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - ASSERT_EQ(registry->GetManagedObject("A"), m_a2); - - m_a2.reset(); - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); -} - -TEST_F(ObjRegistryTest, TestManagedObjectsWithParent) { - auto base = ObjectRegistry::NewInstance(); - auto registry = ObjectRegistry::NewInstance(base); - - auto m_a = std::make_shared("", "A"); - auto m_b = std::make_shared("", "A"); - - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - ASSERT_OK(base->SetManagedObject(m_a)); - ASSERT_EQ(registry->GetManagedObject("A"), m_a); - - ASSERT_NOK(registry->SetManagedObject(m_b)); - ASSERT_OK(registry->SetManagedObject(m_a)); - - m_a.reset(); - ASSERT_EQ(registry->GetManagedObject("A"), nullptr); - ASSERT_OK(registry->SetManagedObject(m_b)); - ASSERT_EQ(registry->GetManagedObject("A"), m_b); -} - -TEST_F(ObjRegistryTest, TestGetOrCreateManagedObject) { - auto registry = ObjectRegistry::NewInstance(); - registry->AddLibrary("test")->AddFactory( - ObjectLibrary::PatternEntry::AsIndividualId("MC"), - [](const std::string& uri, std::unique_ptr* guard, - std::string* /* errmsg */) { - guard->reset(new MyCustomizable("MC", uri)); - return guard->get(); - }); - std::shared_ptr m_a, m_b, obj; - std::vector> objs; - - std::unordered_map opt_map; - - ASSERT_EQ(registry->GetManagedObject("MC@A#1"), nullptr); - ASSERT_EQ(registry->GetManagedObject("MC@B#1"), nullptr); - ASSERT_OK(registry->GetOrCreateManagedObject("MC@A#1", &m_a)); - ASSERT_OK(registry->GetOrCreateManagedObject("MC@B#1", &m_b)); - ASSERT_EQ(registry->GetManagedObject("MC@A#1"), m_a); - ASSERT_OK(registry->GetOrCreateManagedObject("MC@A#1", &obj)); - ASSERT_EQ(obj, m_a); - ASSERT_OK(registry->GetOrCreateManagedObject("MC@B#1", &obj)); - ASSERT_EQ(obj, m_b); - ASSERT_OK(registry->ListManagedObjects(&objs)); - ASSERT_EQ(objs.size(), 2U); - - objs.clear(); - m_a.reset(); - obj.reset(); - ASSERT_OK(registry->GetOrCreateManagedObject("MC@A#1", &m_a)); - ASSERT_EQ(1, m_a.use_count()); - ASSERT_OK(registry->GetOrCreateManagedObject("MC@B#1", &obj)); - ASSERT_EQ(2, obj.use_count()); -} - -TEST_F(ObjRegistryTest, RegisterPlugin) { - std::shared_ptr registry = ObjectRegistry::NewInstance(); - std::unique_ptr guard; - Env* env = nullptr; - - ASSERT_NOK(registry->NewObject("unguarded", &env, &guard)); - ASSERT_EQ(registry->RegisterPlugin("Missing", nullptr), -1); - ASSERT_EQ(registry->RegisterPlugin("", RegisterTestUnguarded), -1); - ASSERT_GT(registry->RegisterPlugin("Valid", RegisterTestUnguarded), 0); - ASSERT_OK(registry->NewObject("unguarded", &env, &guard)); - ASSERT_NE(env, nullptr); -} -class PatternEntryTest : public testing::Test {}; - -TEST_F(PatternEntryTest, TestSimpleEntry) { - ObjectLibrary::PatternEntry entry("ABC", true); - - ASSERT_TRUE(entry.Matches("ABC")); - ASSERT_FALSE(entry.Matches("AABC")); - ASSERT_FALSE(entry.Matches("ABCA")); - ASSERT_FALSE(entry.Matches("AABCA")); - ASSERT_FALSE(entry.Matches("AB")); - ASSERT_FALSE(entry.Matches("BC")); - ASSERT_FALSE(entry.Matches("ABD")); - ASSERT_FALSE(entry.Matches("BCA")); -} - -TEST_F(PatternEntryTest, TestPatternEntry) { - // Matches A:+ - ObjectLibrary::PatternEntry entry("A", false); - entry.AddSeparator(":"); - ASSERT_FALSE(entry.Matches("A")); - ASSERT_FALSE(entry.Matches("AA")); - ASSERT_FALSE(entry.Matches("AB")); - ASSERT_FALSE(entry.Matches("B")); - ASSERT_FALSE(entry.Matches("A:")); - ASSERT_FALSE(entry.Matches("AA:")); - ASSERT_FALSE(entry.Matches("AA:B")); - ASSERT_FALSE(entry.Matches("AA:BB")); - ASSERT_TRUE(entry.Matches("A:B")); - ASSERT_TRUE(entry.Matches("A:BB")); - - entry.SetOptional(true); // Now matches "A" or "A:+" - ASSERT_TRUE(entry.Matches("A")); - ASSERT_FALSE(entry.Matches("AA")); - ASSERT_FALSE(entry.Matches("AB")); - ASSERT_FALSE(entry.Matches("B")); - ASSERT_FALSE(entry.Matches("A:")); - ASSERT_FALSE(entry.Matches("AA:")); - ASSERT_FALSE(entry.Matches("AA:B")); - ASSERT_FALSE(entry.Matches("AA:BB")); - ASSERT_TRUE(entry.Matches("A:B")); - ASSERT_TRUE(entry.Matches("A:BB")); -} - -TEST_F(PatternEntryTest, MatchZeroOrMore) { - // Matches A:* - ObjectLibrary::PatternEntry entry("A", false); - entry.AddSeparator(":", false); - ASSERT_FALSE(entry.Matches("A")); - ASSERT_FALSE(entry.Matches("AA")); - ASSERT_FALSE(entry.Matches("AB")); - ASSERT_FALSE(entry.Matches("B")); - ASSERT_TRUE(entry.Matches("A:")); - ASSERT_FALSE(entry.Matches("B:")); - ASSERT_FALSE(entry.Matches("B:A")); - ASSERT_FALSE(entry.Matches("AA:")); - ASSERT_FALSE(entry.Matches("AA:B")); - ASSERT_FALSE(entry.Matches("AA:BB")); - ASSERT_TRUE(entry.Matches("A:B")); - ASSERT_TRUE(entry.Matches("A:BB")); - - entry.SetOptional(true); // Now matches "A" or "A:*" - ASSERT_TRUE(entry.Matches("A")); - ASSERT_FALSE(entry.Matches("AA")); - ASSERT_FALSE(entry.Matches("AB")); - ASSERT_FALSE(entry.Matches("B")); - ASSERT_TRUE(entry.Matches("A:")); - ASSERT_FALSE(entry.Matches("B:")); - ASSERT_FALSE(entry.Matches("B:A")); - ASSERT_FALSE(entry.Matches("AA:")); - ASSERT_FALSE(entry.Matches("AA:B")); - ASSERT_FALSE(entry.Matches("AA:BB")); - ASSERT_TRUE(entry.Matches("A:B")); - ASSERT_TRUE(entry.Matches("A:BB")); -} - -TEST_F(PatternEntryTest, TestSuffixEntry) { - ObjectLibrary::PatternEntry entry("AA", true); - entry.AddSuffix("BB"); - - ASSERT_TRUE(entry.Matches("AA")); - ASSERT_TRUE(entry.Matches("AABB")); - - ASSERT_FALSE(entry.Matches("A")); - ASSERT_FALSE(entry.Matches("AB")); - ASSERT_FALSE(entry.Matches("B")); - ASSERT_FALSE(entry.Matches("BB")); - ASSERT_FALSE(entry.Matches("ABA")); - ASSERT_FALSE(entry.Matches("BBAA")); - ASSERT_FALSE(entry.Matches("AABBA")); - ASSERT_FALSE(entry.Matches("AABBB")); -} - -TEST_F(PatternEntryTest, TestNumericEntry) { - ObjectLibrary::PatternEntry entry("A", false); - entry.AddNumber(":"); - ASSERT_FALSE(entry.Matches("A")); - ASSERT_FALSE(entry.Matches("AA")); - ASSERT_FALSE(entry.Matches("A:")); - ASSERT_FALSE(entry.Matches("AA:")); - ASSERT_TRUE(entry.Matches("A:1")); - ASSERT_TRUE(entry.Matches("A:11")); - ASSERT_FALSE(entry.Matches("AA:1")); - ASSERT_FALSE(entry.Matches("AA:11")); - ASSERT_FALSE(entry.Matches("A:B")); - ASSERT_FALSE(entry.Matches("A:1B")); - ASSERT_FALSE(entry.Matches("A:B1")); - - entry.AddSeparator(":", false); - ASSERT_FALSE(entry.Matches("A")); - ASSERT_FALSE(entry.Matches("AA")); - ASSERT_FALSE(entry.Matches("A:")); - ASSERT_FALSE(entry.Matches("AA:")); - ASSERT_TRUE(entry.Matches("A:1:")); - ASSERT_TRUE(entry.Matches("A:11:")); - ASSERT_FALSE(entry.Matches("A:1")); - ASSERT_FALSE(entry.Matches("A:B1:")); - ASSERT_FALSE(entry.Matches("A:1B:")); - ASSERT_FALSE(entry.Matches("A::")); -} - -TEST_F(PatternEntryTest, TestDoubleEntry) { - ObjectLibrary::PatternEntry entry("A", false); - entry.AddNumber(":", false); - ASSERT_FALSE(entry.Matches("A")); - ASSERT_FALSE(entry.Matches("AA")); - ASSERT_FALSE(entry.Matches("A:")); - ASSERT_FALSE(entry.Matches("AA:")); - ASSERT_FALSE(entry.Matches("AA:1")); - ASSERT_FALSE(entry.Matches("AA:11")); - ASSERT_FALSE(entry.Matches("A:B")); - ASSERT_FALSE(entry.Matches("A:1B")); - ASSERT_FALSE(entry.Matches("A:B1")); - ASSERT_TRUE(entry.Matches("A:1")); - ASSERT_TRUE(entry.Matches("A:11")); - ASSERT_TRUE(entry.Matches("A:1.1")); - ASSERT_TRUE(entry.Matches("A:11.11")); - ASSERT_TRUE(entry.Matches("A:1.")); - ASSERT_TRUE(entry.Matches("A:.1")); - ASSERT_TRUE(entry.Matches("A:0.1")); - ASSERT_TRUE(entry.Matches("A:1.0")); - ASSERT_TRUE(entry.Matches("A:1.0")); - - ASSERT_FALSE(entry.Matches("A:1.0.")); - ASSERT_FALSE(entry.Matches("A:1.0.2")); - ASSERT_FALSE(entry.Matches("A:.1.0")); - ASSERT_FALSE(entry.Matches("A:..10")); - ASSERT_FALSE(entry.Matches("A:10..")); - ASSERT_FALSE(entry.Matches("A:.")); - - entry.AddSeparator(":", false); - ASSERT_FALSE(entry.Matches("A:1")); - ASSERT_FALSE(entry.Matches("A:1.0")); - - ASSERT_TRUE(entry.Matches("A:11:")); - ASSERT_TRUE(entry.Matches("A:1.1:")); - ASSERT_TRUE(entry.Matches("A:11.11:")); - ASSERT_TRUE(entry.Matches("A:1.:")); - ASSERT_TRUE(entry.Matches("A:.1:")); - ASSERT_TRUE(entry.Matches("A:0.1:")); - ASSERT_TRUE(entry.Matches("A:1.0:")); - ASSERT_TRUE(entry.Matches("A:1.0:")); - - ASSERT_FALSE(entry.Matches("A:1.0.:")); - ASSERT_FALSE(entry.Matches("A:1.0.2:")); - ASSERT_FALSE(entry.Matches("A:.1.0:")); - ASSERT_FALSE(entry.Matches("A:..10:")); - ASSERT_FALSE(entry.Matches("A:10..:")); - ASSERT_FALSE(entry.Matches("A:.:")); - ASSERT_FALSE(entry.Matches("A::")); -} - -TEST_F(PatternEntryTest, TestIndividualIdEntry) { - auto entry = ObjectLibrary::PatternEntry::AsIndividualId("AA"); - ASSERT_TRUE(entry.Matches("AA")); - ASSERT_TRUE(entry.Matches("AA@123#456")); - ASSERT_TRUE(entry.Matches("AA@deadbeef#id")); - - ASSERT_FALSE(entry.Matches("A")); - ASSERT_FALSE(entry.Matches("AAA")); - ASSERT_FALSE(entry.Matches("AA@123")); - ASSERT_FALSE(entry.Matches("AA@123#")); - ASSERT_FALSE(entry.Matches("AA@#123")); -} - -TEST_F(PatternEntryTest, TestTwoNameEntry) { - ObjectLibrary::PatternEntry entry("A"); - entry.AnotherName("B"); - ASSERT_TRUE(entry.Matches("A")); - ASSERT_TRUE(entry.Matches("B")); - ASSERT_FALSE(entry.Matches("AA")); - ASSERT_FALSE(entry.Matches("BB")); - ASSERT_FALSE(entry.Matches("AA")); - ASSERT_FALSE(entry.Matches("BA")); - ASSERT_FALSE(entry.Matches("AB")); -} - -TEST_F(PatternEntryTest, TestTwoPatternEntry) { - ObjectLibrary::PatternEntry entry("AA", false); - entry.AddSeparator(":"); - entry.AddSeparator(":"); - ASSERT_FALSE(entry.Matches("AA")); - ASSERT_FALSE(entry.Matches("AA:")); - ASSERT_FALSE(entry.Matches("AA::")); - ASSERT_FALSE(entry.Matches("AA::12")); - ASSERT_TRUE(entry.Matches("AA:1:2")); - ASSERT_TRUE(entry.Matches("AA:1:2:")); - - ObjectLibrary::PatternEntry entry2("AA", false); - entry2.AddSeparator("::"); - entry2.AddSeparator("##"); - ASSERT_FALSE(entry2.Matches("AA")); - ASSERT_FALSE(entry2.Matches("AA:")); - ASSERT_FALSE(entry2.Matches("AA::")); - ASSERT_FALSE(entry2.Matches("AA::#")); - ASSERT_FALSE(entry2.Matches("AA::##")); - ASSERT_FALSE(entry2.Matches("AA##1::2")); - ASSERT_FALSE(entry2.Matches("AA::123##")); - ASSERT_TRUE(entry2.Matches("AA::1##2")); - ASSERT_TRUE(entry2.Matches("AA::12##34:")); - ASSERT_TRUE(entry2.Matches("AA::12::34##56")); - ASSERT_TRUE(entry2.Matches("AA::12##34::56")); -} - -TEST_F(PatternEntryTest, TestTwoNumbersEntry) { - ObjectLibrary::PatternEntry entry("AA", false); - entry.AddNumber(":"); - entry.AddNumber(":"); - ASSERT_FALSE(entry.Matches("AA")); - ASSERT_FALSE(entry.Matches("AA:")); - ASSERT_FALSE(entry.Matches("AA::")); - ASSERT_FALSE(entry.Matches("AA::12")); - ASSERT_FALSE(entry.Matches("AA:1:2:")); - ASSERT_TRUE(entry.Matches("AA:1:2")); - ASSERT_TRUE(entry.Matches("AA:12:23456")); - - ObjectLibrary::PatternEntry entry2("AA", false); - entry2.AddNumber(":"); - entry2.AddNumber("#"); - ASSERT_FALSE(entry2.Matches("AA")); - ASSERT_FALSE(entry2.Matches("AA:")); - ASSERT_FALSE(entry2.Matches("AA:#")); - ASSERT_FALSE(entry2.Matches("AA#:")); - ASSERT_FALSE(entry2.Matches("AA:123#")); - ASSERT_FALSE(entry2.Matches("AA:123#B")); - ASSERT_FALSE(entry2.Matches("AA:B#123")); - ASSERT_TRUE(entry2.Matches("AA:1#2")); - ASSERT_FALSE(entry2.Matches("AA:123#23:")); - ASSERT_FALSE(entry2.Matches("AA::12#234")); -} - -TEST_F(PatternEntryTest, TestPatternAndSuffix) { - ObjectLibrary::PatternEntry entry("AA", false); - entry.AddSeparator("::"); - entry.AddSuffix("##"); - ASSERT_FALSE(entry.Matches("AA")); - ASSERT_FALSE(entry.Matches("AA::")); - ASSERT_FALSE(entry.Matches("AA::##")); - ASSERT_FALSE(entry.Matches("AB::1##")); - ASSERT_FALSE(entry.Matches("AB::1##2")); - ASSERT_FALSE(entry.Matches("AA##1::")); - ASSERT_TRUE(entry.Matches("AA::1##")); - ASSERT_FALSE(entry.Matches("AA::1###")); - - ObjectLibrary::PatternEntry entry2("AA", false); - entry2.AddSuffix("::"); - entry2.AddSeparator("##"); - ASSERT_FALSE(entry2.Matches("AA")); - ASSERT_FALSE(entry2.Matches("AA::")); - ASSERT_FALSE(entry2.Matches("AA::##")); - ASSERT_FALSE(entry2.Matches("AB::1##")); - ASSERT_FALSE(entry2.Matches("AB::1##2")); - ASSERT_TRUE(entry2.Matches("AA::##12")); -} - -TEST_F(PatternEntryTest, TestTwoNamesAndPattern) { - ObjectLibrary::PatternEntry entry("AA", true); - entry.AddSeparator("::"); - entry.AnotherName("BBB"); - ASSERT_TRUE(entry.Matches("AA")); - ASSERT_TRUE(entry.Matches("AA::1")); - ASSERT_TRUE(entry.Matches("BBB")); - ASSERT_TRUE(entry.Matches("BBB::2")); - - ASSERT_FALSE(entry.Matches("AA::")); - ASSERT_FALSE(entry.Matches("AAA::")); - ASSERT_FALSE(entry.Matches("BBB::")); - - entry.SetOptional(false); - ASSERT_FALSE(entry.Matches("AA")); - ASSERT_FALSE(entry.Matches("BBB")); - - ASSERT_FALSE(entry.Matches("AA::")); - ASSERT_FALSE(entry.Matches("AAA::")); - ASSERT_FALSE(entry.Matches("BBB::")); - - ASSERT_TRUE(entry.Matches("AA::1")); - ASSERT_TRUE(entry.Matches("BBB::2")); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - diff --git a/utilities/util_merge_operators_test.cc b/utilities/util_merge_operators_test.cc deleted file mode 100644 index fed6f1a75..000000000 --- a/utilities/util_merge_operators_test.cc +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#include "test_util/testharness.h" -#include "test_util/testutil.h" -#include "utilities/merge_operators.h" - -namespace ROCKSDB_NAMESPACE { - -class UtilMergeOperatorTest : public testing::Test { - public: - UtilMergeOperatorTest() {} - - std::string FullMergeV2(std::string existing_value, - std::vector operands, - std::string key = "") { - std::string result; - Slice result_operand(nullptr, 0); - - Slice existing_value_slice(existing_value); - std::vector operands_slice(operands.begin(), operands.end()); - - const MergeOperator::MergeOperationInput merge_in( - key, &existing_value_slice, operands_slice, nullptr); - MergeOperator::MergeOperationOutput merge_out(result, result_operand); - merge_operator_->FullMergeV2(merge_in, &merge_out); - - if (result_operand.data()) { - result.assign(result_operand.data(), result_operand.size()); - } - return result; - } - - std::string FullMergeV2(std::vector operands, - std::string key = "") { - std::string result; - Slice result_operand(nullptr, 0); - - std::vector operands_slice(operands.begin(), operands.end()); - - const MergeOperator::MergeOperationInput merge_in(key, nullptr, - operands_slice, nullptr); - MergeOperator::MergeOperationOutput merge_out(result, result_operand); - merge_operator_->FullMergeV2(merge_in, &merge_out); - - if (result_operand.data()) { - result.assign(result_operand.data(), result_operand.size()); - } - return result; - } - - std::string PartialMerge(std::string left, std::string right, - std::string key = "") { - std::string result; - - merge_operator_->PartialMerge(key, left, right, &result, nullptr); - return result; - } - - std::string PartialMergeMulti(std::deque operands, - std::string key = "") { - std::string result; - std::deque operands_slice(operands.begin(), operands.end()); - - merge_operator_->PartialMergeMulti(key, operands_slice, &result, nullptr); - return result; - } - - protected: - std::shared_ptr merge_operator_; -}; - -TEST_F(UtilMergeOperatorTest, MaxMergeOperator) { - merge_operator_ = MergeOperators::CreateMaxOperator(); - - EXPECT_EQ("B", FullMergeV2("B", {"A"})); - EXPECT_EQ("B", FullMergeV2("A", {"B"})); - EXPECT_EQ("", FullMergeV2({"", "", ""})); - EXPECT_EQ("A", FullMergeV2({"A"})); - EXPECT_EQ("ABC", FullMergeV2({"ABC"})); - EXPECT_EQ("Z", FullMergeV2({"ABC", "Z", "C", "AXX"})); - EXPECT_EQ("ZZZ", FullMergeV2({"ABC", "CC", "Z", "ZZZ"})); - EXPECT_EQ("a", FullMergeV2("a", {"ABC", "CC", "Z", "ZZZ"})); - - EXPECT_EQ("z", PartialMergeMulti({"a", "z", "efqfqwgwew", "aaz", "hhhhh"})); - - EXPECT_EQ("b", PartialMerge("a", "b")); - EXPECT_EQ("z", PartialMerge("z", "azzz")); - EXPECT_EQ("a", PartialMerge("a", "")); -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -}